ai-data-science-team 0.0.0.9014__py3-none-any.whl → 0.0.0.9015__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/data_visualization_agent.py +172 -129
- ai_data_science_team/agents/data_wrangling_agent.py +1 -0
- ai_data_science_team/multiagents/pandas_data_analyst.py +5 -5
- ai_data_science_team/multiagents/sql_data_analyst.py +7 -18
- ai_data_science_team/tools/eda.py +2 -0
- {ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/METADATA +5 -4
- {ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/RECORD +11 -11
- {ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/WHEEL +1 -1
- {ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/top_level.txt +0 -0
ai_data_science_team/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.0.0.
|
1
|
+
__version__ = "0.0.0.9015"
|
@@ -4,7 +4,6 @@
|
|
4
4
|
# * Agents: Data Visualization Agent
|
5
5
|
|
6
6
|
|
7
|
-
|
8
7
|
# Libraries
|
9
8
|
from typing import TypedDict, Annotated, Sequence, Literal
|
10
9
|
import operator
|
@@ -17,25 +16,25 @@ from langgraph.checkpoint.memory import MemorySaver
|
|
17
16
|
from langgraph.types import Checkpointer
|
18
17
|
|
19
18
|
import os
|
20
|
-
import json
|
19
|
+
import json
|
21
20
|
import pandas as pd
|
22
21
|
|
23
22
|
from IPython.display import Markdown
|
24
23
|
|
25
|
-
from ai_data_science_team.templates import(
|
26
|
-
node_func_execute_agent_code_on_data,
|
24
|
+
from ai_data_science_team.templates import (
|
25
|
+
node_func_execute_agent_code_on_data,
|
27
26
|
node_func_human_review,
|
28
|
-
node_func_fix_agent_code,
|
27
|
+
node_func_fix_agent_code,
|
29
28
|
node_func_report_agent_outputs,
|
30
29
|
create_coding_agent_graph,
|
31
30
|
BaseAgent,
|
32
31
|
)
|
33
32
|
from ai_data_science_team.parsers.parsers import PythonOutputParser
|
34
33
|
from ai_data_science_team.utils.regex import (
|
35
|
-
relocate_imports_inside_function,
|
36
|
-
add_comments_to_top,
|
37
|
-
format_agent_name,
|
38
|
-
format_recommended_steps,
|
34
|
+
relocate_imports_inside_function,
|
35
|
+
add_comments_to_top,
|
36
|
+
format_agent_name,
|
37
|
+
format_recommended_steps,
|
39
38
|
get_generic_summary,
|
40
39
|
)
|
41
40
|
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
@@ -48,11 +47,12 @@ LOG_PATH = os.path.join(os.getcwd(), "logs/")
|
|
48
47
|
|
49
48
|
# Class
|
50
49
|
|
50
|
+
|
51
51
|
class DataVisualizationAgent(BaseAgent):
|
52
52
|
"""
|
53
53
|
Creates a data visualization agent that can generate Plotly charts based on user-defined instructions or
|
54
|
-
default visualization steps (if any). The agent generates a Python function to produce the visualization,
|
55
|
-
executes it, and logs the process, including code and errors. It is designed to facilitate reproducible
|
54
|
+
default visualization steps (if any). The agent generates a Python function to produce the visualization,
|
55
|
+
executes it, and logs the process, including code and errors. It is designed to facilitate reproducible
|
56
56
|
and customizable data visualization workflows.
|
57
57
|
|
58
58
|
The agent may use default instructions for creating charts unless instructed otherwise, such as:
|
@@ -124,10 +124,10 @@ class DataVisualizationAgent(BaseAgent):
|
|
124
124
|
llm = ChatOpenAI(model="gpt-4o-mini")
|
125
125
|
|
126
126
|
data_visualization_agent = DataVisualizationAgent(
|
127
|
-
model=llm,
|
127
|
+
model=llm,
|
128
128
|
n_samples=30,
|
129
|
-
log=True,
|
130
|
-
log_path="logs",
|
129
|
+
log=True,
|
130
|
+
log_path="logs",
|
131
131
|
human_in_the_loop=True
|
132
132
|
)
|
133
133
|
|
@@ -141,7 +141,7 @@ class DataVisualizationAgent(BaseAgent):
|
|
141
141
|
)
|
142
142
|
|
143
143
|
plotly_graph_dict = data_visualization_agent.get_plotly_graph()
|
144
|
-
# You can render plotly_graph_dict with plotly.io.from_json or
|
144
|
+
# You can render plotly_graph_dict with plotly.io.from_json or
|
145
145
|
# something similar in a Jupyter Notebook.
|
146
146
|
|
147
147
|
response = data_visualization_agent.get_response()
|
@@ -149,21 +149,21 @@ class DataVisualizationAgent(BaseAgent):
|
|
149
149
|
|
150
150
|
Returns
|
151
151
|
--------
|
152
|
-
DataVisualizationAgent : langchain.graphs.CompiledStateGraph
|
153
|
-
A data visualization agent implemented as a compiled state graph.
|
152
|
+
DataVisualizationAgent : langchain.graphs.CompiledStateGraph
|
153
|
+
A data visualization agent implemented as a compiled state graph.
|
154
154
|
"""
|
155
155
|
|
156
156
|
def __init__(
|
157
|
-
self,
|
158
|
-
model,
|
159
|
-
n_samples=30,
|
160
|
-
log=False,
|
161
|
-
log_path=None,
|
162
|
-
file_name="data_visualization.py",
|
157
|
+
self,
|
158
|
+
model,
|
159
|
+
n_samples=30,
|
160
|
+
log=False,
|
161
|
+
log_path=None,
|
162
|
+
file_name="data_visualization.py",
|
163
163
|
function_name="data_visualization",
|
164
|
-
overwrite=True,
|
165
|
-
human_in_the_loop=False,
|
166
|
-
bypass_recommended_steps=False,
|
164
|
+
overwrite=True,
|
165
|
+
human_in_the_loop=False,
|
166
|
+
bypass_recommended_steps=False,
|
167
167
|
bypass_explain_code=False,
|
168
168
|
checkpointer=None,
|
169
169
|
):
|
@@ -185,7 +185,7 @@ class DataVisualizationAgent(BaseAgent):
|
|
185
185
|
|
186
186
|
def _make_compiled_graph(self):
|
187
187
|
"""
|
188
|
-
Create the compiled graph for the data visualization agent.
|
188
|
+
Create the compiled graph for the data visualization agent.
|
189
189
|
Running this method will reset the response to None.
|
190
190
|
"""
|
191
191
|
self.response = None
|
@@ -201,9 +201,16 @@ class DataVisualizationAgent(BaseAgent):
|
|
201
201
|
# Rebuild the compiled graph
|
202
202
|
self._compiled_graph = self._make_compiled_graph()
|
203
203
|
|
204
|
-
async def ainvoke_agent(
|
204
|
+
async def ainvoke_agent(
|
205
|
+
self,
|
206
|
+
data_raw: pd.DataFrame,
|
207
|
+
user_instructions: str = None,
|
208
|
+
max_retries: int = 3,
|
209
|
+
retry_count: int = 0,
|
210
|
+
**kwargs,
|
211
|
+
):
|
205
212
|
"""
|
206
|
-
Asynchronously invokes the agent to generate a visualization.
|
213
|
+
Asynchronously invokes the agent to generate a visualization.
|
207
214
|
The response is stored in the 'response' attribute.
|
208
215
|
|
209
216
|
Parameters
|
@@ -223,18 +230,28 @@ class DataVisualizationAgent(BaseAgent):
|
|
223
230
|
-------
|
224
231
|
None
|
225
232
|
"""
|
226
|
-
response = await self._compiled_graph.ainvoke(
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
233
|
+
response = await self._compiled_graph.ainvoke(
|
234
|
+
{
|
235
|
+
"user_instructions": user_instructions,
|
236
|
+
"data_raw": data_raw.to_dict(),
|
237
|
+
"max_retries": max_retries,
|
238
|
+
"retry_count": retry_count,
|
239
|
+
},
|
240
|
+
**kwargs,
|
241
|
+
)
|
232
242
|
self.response = response
|
233
243
|
return None
|
234
244
|
|
235
|
-
def invoke_agent(
|
245
|
+
def invoke_agent(
|
246
|
+
self,
|
247
|
+
data_raw: pd.DataFrame,
|
248
|
+
user_instructions: str = None,
|
249
|
+
max_retries: int = 3,
|
250
|
+
retry_count: int = 0,
|
251
|
+
**kwargs,
|
252
|
+
):
|
236
253
|
"""
|
237
|
-
Synchronously invokes the agent to generate a visualization.
|
254
|
+
Synchronously invokes the agent to generate a visualization.
|
238
255
|
The response is stored in the 'response' attribute.
|
239
256
|
|
240
257
|
Parameters
|
@@ -254,12 +271,15 @@ class DataVisualizationAgent(BaseAgent):
|
|
254
271
|
-------
|
255
272
|
None
|
256
273
|
"""
|
257
|
-
response = self._compiled_graph.invoke(
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
274
|
+
response = self._compiled_graph.invoke(
|
275
|
+
{
|
276
|
+
"user_instructions": user_instructions,
|
277
|
+
"data_raw": data_raw.to_dict(),
|
278
|
+
"max_retries": max_retries,
|
279
|
+
"retry_count": retry_count,
|
280
|
+
},
|
281
|
+
**kwargs,
|
282
|
+
)
|
263
283
|
self.response = response
|
264
284
|
return None
|
265
285
|
|
@@ -268,7 +288,9 @@ class DataVisualizationAgent(BaseAgent):
|
|
268
288
|
Retrieves the agent's workflow summary, if logging is enabled.
|
269
289
|
"""
|
270
290
|
if self.response and self.response.get("messages"):
|
271
|
-
summary = get_generic_summary(
|
291
|
+
summary = get_generic_summary(
|
292
|
+
json.loads(self.response.get("messages")[-1].content)
|
293
|
+
)
|
272
294
|
if markdown:
|
273
295
|
return Markdown(summary)
|
274
296
|
else:
|
@@ -279,7 +301,7 @@ class DataVisualizationAgent(BaseAgent):
|
|
279
301
|
Logs a summary of the agent's operations, if logging is enabled.
|
280
302
|
"""
|
281
303
|
if self.response:
|
282
|
-
if self.response.get(
|
304
|
+
if self.response.get("data_visualization_function_path"):
|
283
305
|
log_details = f"""
|
284
306
|
## Data Visualization Agent Log Summary:
|
285
307
|
|
@@ -288,7 +310,7 @@ Function Path: {self.response.get('data_visualization_function_path')}
|
|
288
310
|
Function Name: {self.response.get('data_visualization_function_name')}
|
289
311
|
"""
|
290
312
|
if markdown:
|
291
|
-
return Markdown(log_details)
|
313
|
+
return Markdown(log_details)
|
292
314
|
else:
|
293
315
|
return log_details
|
294
316
|
|
@@ -380,16 +402,17 @@ Function Name: {self.response.get('data_visualization_function_name')}
|
|
380
402
|
|
381
403
|
# Agent
|
382
404
|
|
405
|
+
|
383
406
|
def make_data_visualization_agent(
|
384
|
-
model,
|
407
|
+
model,
|
385
408
|
n_samples=30,
|
386
|
-
log=False,
|
387
|
-
log_path=None,
|
409
|
+
log=False,
|
410
|
+
log_path=None,
|
388
411
|
file_name="data_visualization.py",
|
389
412
|
function_name="data_visualization",
|
390
|
-
overwrite=True,
|
391
|
-
human_in_the_loop=False,
|
392
|
-
bypass_recommended_steps=False,
|
413
|
+
overwrite=True,
|
414
|
+
human_in_the_loop=False,
|
415
|
+
bypass_recommended_steps=False,
|
393
416
|
bypass_explain_code=False,
|
394
417
|
checkpointer=None,
|
395
418
|
):
|
@@ -460,25 +483,27 @@ def make_data_visualization_agent(
|
|
460
483
|
app : langchain.graphs.CompiledStateGraph
|
461
484
|
The data visualization agent as a state graph.
|
462
485
|
"""
|
463
|
-
|
486
|
+
|
464
487
|
llm = model
|
465
|
-
|
488
|
+
|
466
489
|
if human_in_the_loop:
|
467
490
|
if checkpointer is None:
|
468
|
-
print(
|
491
|
+
print(
|
492
|
+
"Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver()."
|
493
|
+
)
|
469
494
|
checkpointer = MemorySaver()
|
470
|
-
|
495
|
+
|
471
496
|
# Human in th loop requires recommended steps
|
472
497
|
if bypass_recommended_steps and human_in_the_loop:
|
473
498
|
bypass_recommended_steps = False
|
474
499
|
print("Bypass recommended steps set to False to enable human in the loop.")
|
475
|
-
|
500
|
+
|
476
501
|
# Setup Log Directory
|
477
502
|
if log:
|
478
503
|
if log_path is None:
|
479
504
|
log_path = LOG_PATH
|
480
505
|
if not os.path.exists(log_path):
|
481
|
-
os.makedirs(log_path)
|
506
|
+
os.makedirs(log_path)
|
482
507
|
|
483
508
|
# Define GraphState for the router
|
484
509
|
class GraphState(TypedDict):
|
@@ -496,12 +521,11 @@ def make_data_visualization_agent(
|
|
496
521
|
data_visualization_error: str
|
497
522
|
max_retries: int
|
498
523
|
retry_count: int
|
499
|
-
|
524
|
+
|
500
525
|
def chart_instructor(state: GraphState):
|
501
|
-
|
502
526
|
print(format_agent_name(AGENT_NAME))
|
503
527
|
print(" * CREATE CHART GENERATOR INSTRUCTIONS")
|
504
|
-
|
528
|
+
|
505
529
|
recommend_steps_prompt = PromptTemplate(
|
506
530
|
template="""
|
507
531
|
You are a supervisor that is an expert in providing instructions to a chart generator agent for plotting.
|
@@ -514,25 +538,23 @@ def make_data_visualization_agent(
|
|
514
538
|
Previously Recommended Instructions (if any):
|
515
539
|
{recommended_steps}
|
516
540
|
|
517
|
-
DATA:
|
541
|
+
DATA SUMMARY:
|
518
542
|
{all_datasets_summary}
|
519
543
|
|
520
|
-
|
544
|
+
IMPORTANT:
|
545
|
+
|
546
|
+
- Formulate chart generator instructions by informing the chart generator of what type of plotly plot to use (e.g. bar, line, scatter, etc) to best represent the data.
|
547
|
+
- Think about how best to convey the information in the data to the user.
|
548
|
+
- If the user does not specify a type of plot, select the appropriate chart type based on the data summary provided and the user's question and how best to show the results.
|
549
|
+
- Come up with an informative title from the user's question and data provided. Also provide X and Y axis titles.
|
550
|
+
|
551
|
+
CHART TYPE SELECTION TIPS:
|
521
552
|
|
522
|
-
|
553
|
+
- If a numeric column has less than 10 unique values, consider this column to be treated as a categorical column. Pick a chart that is appropriate for categorical data.
|
554
|
+
- If a numeric column has more than 10 unique values, consider this column to be treated as a continuous column. Pick a chart that is appropriate for continuous data.
|
523
555
|
|
524
|
-
Instruct the chart generator to use the following theme colors, sizes, etc:
|
525
556
|
|
526
|
-
|
527
|
-
- Use a white background
|
528
|
-
- Use this color for bars and lines:
|
529
|
-
'blue': '#3381ff',
|
530
|
-
- Base Font Size: 8.8 (Used for x and y axes tickfont, any annotations, hovertips)
|
531
|
-
- Title Font Size: 13.2
|
532
|
-
- Line Size: 0.65 (specify these within the xaxis and yaxis dictionaries)
|
533
|
-
- Add smoothers or trendlines to scatter plots unless not desired by the user
|
534
|
-
- Do not use color_discrete_map (this will result in an error)
|
535
|
-
- Hover tip size: 8.8
|
557
|
+
RETURN FORMAT:
|
536
558
|
|
537
559
|
Return your instructions in the following format:
|
538
560
|
CHART GENERATOR INSTRUCTIONS:
|
@@ -542,51 +564,61 @@ def make_data_visualization_agent(
|
|
542
564
|
1. Do not include steps to save files.
|
543
565
|
2. Do not include unrelated user instructions that are not related to the chart generation.
|
544
566
|
""",
|
545
|
-
input_variables=[
|
546
|
-
|
567
|
+
input_variables=[
|
568
|
+
"user_instructions",
|
569
|
+
"recommended_steps",
|
570
|
+
"all_datasets_summary",
|
571
|
+
],
|
547
572
|
)
|
548
|
-
|
573
|
+
|
549
574
|
data_raw = state.get("data_raw")
|
550
575
|
df = pd.DataFrame.from_dict(data_raw)
|
551
576
|
|
552
|
-
all_datasets_summary = get_dataframe_summary(
|
553
|
-
|
577
|
+
all_datasets_summary = get_dataframe_summary(
|
578
|
+
[df], n_sample=n_samples, skip_stats=False
|
579
|
+
)
|
580
|
+
|
554
581
|
all_datasets_summary_str = "\n\n".join(all_datasets_summary)
|
555
582
|
|
556
|
-
chart_instructor = recommend_steps_prompt | llm
|
557
|
-
|
558
|
-
recommended_steps = chart_instructor.invoke(
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
583
|
+
chart_instructor = recommend_steps_prompt | llm
|
584
|
+
|
585
|
+
recommended_steps = chart_instructor.invoke(
|
586
|
+
{
|
587
|
+
"user_instructions": state.get("user_instructions"),
|
588
|
+
"recommended_steps": state.get("recommended_steps"),
|
589
|
+
"all_datasets_summary": all_datasets_summary_str,
|
590
|
+
}
|
591
|
+
)
|
592
|
+
|
564
593
|
return {
|
565
|
-
"recommended_steps": format_recommended_steps(
|
566
|
-
|
594
|
+
"recommended_steps": format_recommended_steps(
|
595
|
+
recommended_steps.content.strip(),
|
596
|
+
heading="# Recommended Data Cleaning Steps:",
|
597
|
+
),
|
598
|
+
"all_datasets_summary": all_datasets_summary_str,
|
567
599
|
}
|
568
|
-
|
600
|
+
|
569
601
|
def chart_generator(state: GraphState):
|
570
|
-
|
571
602
|
print(" * CREATE DATA VISUALIZATION CODE")
|
572
603
|
|
573
|
-
|
574
604
|
if bypass_recommended_steps:
|
575
605
|
print(format_agent_name(AGENT_NAME))
|
576
|
-
|
606
|
+
|
577
607
|
data_raw = state.get("data_raw")
|
578
608
|
df = pd.DataFrame.from_dict(data_raw)
|
579
609
|
|
580
|
-
all_datasets_summary = get_dataframe_summary(
|
581
|
-
|
610
|
+
all_datasets_summary = get_dataframe_summary(
|
611
|
+
[df], n_sample=n_samples, skip_stats=False
|
612
|
+
)
|
613
|
+
|
582
614
|
all_datasets_summary_str = "\n\n".join(all_datasets_summary)
|
583
|
-
|
615
|
+
|
584
616
|
chart_generator_instructions = state.get("user_instructions")
|
585
|
-
|
617
|
+
|
586
618
|
else:
|
587
619
|
all_datasets_summary_str = state.get("all_datasets_summary")
|
588
620
|
chart_generator_instructions = state.get("recommended_steps")
|
589
|
-
|
621
|
+
|
590
622
|
prompt_template = PromptTemplate(
|
591
623
|
template="""
|
592
624
|
You are a chart generator agent that is an expert in generating plotly charts. You must use plotly or plotly.express to produce plots.
|
@@ -628,65 +660,76 @@ def make_data_visualization_agent(
|
|
628
660
|
2. Do not include unrelated user instructions that are not related to the chart generation.
|
629
661
|
|
630
662
|
""",
|
631
|
-
input_variables=[
|
663
|
+
input_variables=[
|
664
|
+
"chart_generator_instructions",
|
665
|
+
"all_datasets_summary",
|
666
|
+
"function_name",
|
667
|
+
],
|
632
668
|
)
|
633
669
|
|
634
670
|
data_visualization_agent = prompt_template | llm | PythonOutputParser()
|
635
|
-
|
636
|
-
response = data_visualization_agent.invoke(
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
671
|
+
|
672
|
+
response = data_visualization_agent.invoke(
|
673
|
+
{
|
674
|
+
"chart_generator_instructions": chart_generator_instructions,
|
675
|
+
"all_datasets_summary": all_datasets_summary_str,
|
676
|
+
"function_name": function_name,
|
677
|
+
}
|
678
|
+
)
|
679
|
+
|
642
680
|
response = relocate_imports_inside_function(response)
|
643
681
|
response = add_comments_to_top(response, agent_name=AGENT_NAME)
|
644
|
-
|
682
|
+
|
645
683
|
# For logging: store the code generated:
|
646
684
|
file_path, file_name_2 = log_ai_function(
|
647
685
|
response=response,
|
648
686
|
file_name=file_name,
|
649
687
|
log=log,
|
650
688
|
log_path=log_path,
|
651
|
-
overwrite=overwrite
|
689
|
+
overwrite=overwrite,
|
652
690
|
)
|
653
|
-
|
691
|
+
|
654
692
|
return {
|
655
693
|
"data_visualization_function": response,
|
656
694
|
"data_visualization_function_path": file_path,
|
657
695
|
"data_visualization_function_file_name": file_name_2,
|
658
696
|
"data_visualization_function_name": function_name,
|
659
|
-
"all_datasets_summary": all_datasets_summary_str
|
697
|
+
"all_datasets_summary": all_datasets_summary_str,
|
660
698
|
}
|
661
|
-
|
699
|
+
|
662
700
|
# Human Review
|
663
|
-
|
701
|
+
|
664
702
|
prompt_text_human_review = "Are the following data visualization instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
|
665
|
-
|
703
|
+
|
666
704
|
if not bypass_explain_code:
|
667
|
-
|
705
|
+
|
706
|
+
def human_review(
|
707
|
+
state: GraphState,
|
708
|
+
) -> Command[Literal["chart_instructor", "explain_data_visualization_code"]]:
|
668
709
|
return node_func_human_review(
|
669
710
|
state=state,
|
670
711
|
prompt_text=prompt_text_human_review,
|
671
|
-
yes_goto=
|
712
|
+
yes_goto="explain_data_visualization_code",
|
672
713
|
no_goto="chart_instructor",
|
673
714
|
user_instructions_key="user_instructions",
|
674
715
|
recommended_steps_key="recommended_steps",
|
675
716
|
code_snippet_key="data_visualization_function",
|
676
717
|
)
|
677
718
|
else:
|
678
|
-
|
719
|
+
|
720
|
+
def human_review(
|
721
|
+
state: GraphState,
|
722
|
+
) -> Command[Literal["chart_instructor", "__end__"]]:
|
679
723
|
return node_func_human_review(
|
680
724
|
state=state,
|
681
725
|
prompt_text=prompt_text_human_review,
|
682
|
-
yes_goto=
|
726
|
+
yes_goto="__end__",
|
683
727
|
no_goto="chart_instructor",
|
684
728
|
user_instructions_key="user_instructions",
|
685
729
|
recommended_steps_key="recommended_steps",
|
686
|
-
code_snippet_key="data_visualization_function",
|
730
|
+
code_snippet_key="data_visualization_function",
|
687
731
|
)
|
688
|
-
|
689
|
-
|
732
|
+
|
690
733
|
def execute_data_visualization_code(state):
|
691
734
|
return node_func_execute_agent_code_on_data(
|
692
735
|
state=state,
|
@@ -697,9 +740,9 @@ def make_data_visualization_agent(
|
|
697
740
|
agent_function_name=state.get("data_visualization_function_name"),
|
698
741
|
pre_processing=lambda data: pd.DataFrame.from_dict(data),
|
699
742
|
# post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
|
700
|
-
error_message_prefix="An error occurred during data visualization: "
|
743
|
+
error_message_prefix="An error occurred during data visualization: ",
|
701
744
|
)
|
702
|
-
|
745
|
+
|
703
746
|
def fix_data_visualization_code(state: GraphState):
|
704
747
|
prompt = """
|
705
748
|
You are a Data Visualization Agent. Your job is to create a {function_name}() function that can be run on the data provided. The function is currently broken and needs to be fixed.
|
@@ -719,14 +762,14 @@ def make_data_visualization_agent(
|
|
719
762
|
state=state,
|
720
763
|
code_snippet_key="data_visualization_function",
|
721
764
|
error_key="data_visualization_error",
|
722
|
-
llm=llm,
|
765
|
+
llm=llm,
|
723
766
|
prompt_template=prompt,
|
724
767
|
agent_name=AGENT_NAME,
|
725
768
|
log=log,
|
726
769
|
file_path=state.get("data_visualization_function_path"),
|
727
770
|
function_name=state.get("data_visualization_function_name"),
|
728
771
|
)
|
729
|
-
|
772
|
+
|
730
773
|
# Final reporting node
|
731
774
|
def report_agent_outputs(state: GraphState):
|
732
775
|
return node_func_report_agent_outputs(
|
@@ -740,9 +783,9 @@ def make_data_visualization_agent(
|
|
740
783
|
],
|
741
784
|
result_key="messages",
|
742
785
|
role=AGENT_NAME,
|
743
|
-
custom_title="Data Visualization Agent Outputs"
|
786
|
+
custom_title="Data Visualization Agent Outputs",
|
744
787
|
)
|
745
|
-
|
788
|
+
|
746
789
|
# Define the graph
|
747
790
|
node_functions = {
|
748
791
|
"chart_instructor": chart_instructor,
|
@@ -752,7 +795,7 @@ def make_data_visualization_agent(
|
|
752
795
|
"fix_data_visualization_code": fix_data_visualization_code,
|
753
796
|
"report_agent_outputs": report_agent_outputs,
|
754
797
|
}
|
755
|
-
|
798
|
+
|
756
799
|
app = create_coding_agent_graph(
|
757
800
|
GraphState=GraphState,
|
758
801
|
node_functions=node_functions,
|
@@ -769,5 +812,5 @@ def make_data_visualization_agent(
|
|
769
812
|
bypass_explain_code=bypass_explain_code,
|
770
813
|
agent_name=AGENT_NAME,
|
771
814
|
)
|
772
|
-
|
815
|
+
|
773
816
|
return app
|
@@ -702,6 +702,7 @@ def make_data_wrangling_agent(
|
|
702
702
|
|
703
703
|
Important Notes:
|
704
704
|
1. Do Not use Print statements to display the data. Return the data frame instead with the data wrangling operation performed.
|
705
|
+
2. Do not plot graphs. Only return the data frame.
|
705
706
|
|
706
707
|
Make sure to explain any non-trivial steps with inline comments. Follow user instructions. Comment code thoroughly.
|
707
708
|
|
@@ -132,10 +132,10 @@ class PandasDataAnalyst(BaseAgent):
|
|
132
132
|
"""Returns a summary of the workflow."""
|
133
133
|
if self.response and self.response.get("messages"):
|
134
134
|
agents = [msg.role for msg in self.response["messages"]]
|
135
|
-
agent_labels = [f"- **Agent {i+1}:** {role}" for i, role in enumerate(agents)]
|
135
|
+
agent_labels = [f"- **Agent {i+1}:** {role}\n" for i, role in enumerate(agents)]
|
136
136
|
header = f"# Pandas Data Analyst Workflow Summary\n\nThis workflow contains {len(agents)} agents:\n\n" + "\n".join(agent_labels)
|
137
137
|
reports = [get_generic_summary(json.loads(msg.content)) for msg in self.response["messages"]]
|
138
|
-
summary = "\n" +header + "\n\n".join(reports)
|
138
|
+
summary = "\n\n" + header + "\n\n".join(reports)
|
139
139
|
return Markdown(summary) if markdown else summary
|
140
140
|
|
141
141
|
@staticmethod
|
@@ -177,15 +177,15 @@ def make_pandas_data_analyst(
|
|
177
177
|
|
178
178
|
routing_preprocessor_prompt = PromptTemplate(
|
179
179
|
template="""
|
180
|
-
You are an expert in routing decisions for a Pandas Data Manipulation Wrangling Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to:
|
180
|
+
You are an expert in routing decisions for a Pandas Data Manipulation Wrangling Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to tell the agents which actions to perform and determine the correct routing for the incoming user question:
|
181
181
|
|
182
|
-
1. Determine what the correct format for a Users Question should be for use with a Pandas Data Wrangling Agent based on the incoming user question. Anything related to data wrangling and manipulation should be passed along.
|
182
|
+
1. Determine what the correct format for a Users Question should be for use with a Pandas Data Wrangling Agent based on the incoming user question. Anything related to data wrangling and manipulation should be passed along. Anything related to data analysis can be handled by the Pandas Agent. Anything that uses Pandas can be passed along. Tables can be returned from this agent. Don't pass along anything about plotting or visualization.
|
183
183
|
2. Determine whether or not a chart should be generated or a table should be returned based on the users question.
|
184
184
|
3. If a chart is requested, determine the correct format of a Users Question should be used with a Data Visualization Agent. Anything related to plotting and visualization should be passed along.
|
185
185
|
|
186
186
|
Use the following criteria on how to route the the initial user question:
|
187
187
|
|
188
|
-
From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the
|
188
|
+
From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the Pandas Data Wrangling and Transformation agent. This will be the 'user_instructions_data_wrangling'. If 'None' is found, return the original user question.
|
189
189
|
|
190
190
|
Next, determine if the user would like a data visualization ('chart') or a 'table' returned with the results of the Data Wrangling Agent. If unknown, not specified or 'None' is found, then select 'table'.
|
191
191
|
|
@@ -301,24 +301,13 @@ class SQLDataAnalyst(BaseAgent):
|
|
301
301
|
markdown: bool
|
302
302
|
If True, returns the summary as a Markdown-formatted string.
|
303
303
|
"""
|
304
|
-
if self.response and self.
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
# Construct header
|
313
|
-
header = f"# SQL Data Analyst Workflow Summary Report\n\nThis agentic workflow contains {len(agents)} agents:\n\n" + "\n".join(agent_labels)
|
314
|
-
|
315
|
-
reports = []
|
316
|
-
for msg in self.get_response()['messages']:
|
317
|
-
reports.append(get_generic_summary(json.loads(msg.content)))
|
318
|
-
|
319
|
-
if markdown:
|
320
|
-
return Markdown(header + "\n\n".join(reports))
|
321
|
-
return "\n\n".join(reports)
|
304
|
+
if self.response and self.response.get("messages"):
|
305
|
+
agents = [msg.role for msg in self.response["messages"]]
|
306
|
+
agent_labels = [f"- **Agent {i+1}:** {role}\n" for i, role in enumerate(agents)]
|
307
|
+
header = f"# SQL Data Analyst Workflow Summary\n\nThis workflow contains {len(agents)} agents:\n\n" + "\n".join(agent_labels)
|
308
|
+
reports = [get_generic_summary(json.loads(msg.content)) for msg in self.response["messages"]]
|
309
|
+
summary = "\n\n" + header + "\n\n".join(reports)
|
310
|
+
return Markdown(summary) if markdown else summary
|
322
311
|
|
323
312
|
|
324
313
|
|
@@ -235,6 +235,7 @@ def correlation_funnel(
|
|
235
235
|
df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
|
236
236
|
|
237
237
|
# Attempt to generate a static plot.
|
238
|
+
encoded = None
|
238
239
|
try:
|
239
240
|
# Here we assume that your DataFrame has a method plot_correlation_funnel.
|
240
241
|
fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
|
@@ -248,6 +249,7 @@ def correlation_funnel(
|
|
248
249
|
encoded = {"error": str(e)}
|
249
250
|
|
250
251
|
# Attempt to generate a Plotly plot.
|
252
|
+
fig_dict = None
|
251
253
|
try:
|
252
254
|
fig = df_correlated.plot_correlation_funnel(engine='plotly')
|
253
255
|
fig_json = pio.to_json(fig)
|
{ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ai-data-science-team
|
3
|
-
Version: 0.0.0.
|
3
|
+
Version: 0.0.0.9015
|
4
4
|
Summary: Build and run an AI-powered data science team.
|
5
5
|
Home-page: https://github.com/business-science/ai-data-science-team
|
6
6
|
Author: Matt Dancho
|
@@ -136,9 +136,9 @@ This project is a work in progress. New data science agents will be released soo
|
|
136
136
|
|
137
137
|
### NEW: Multi-Agents
|
138
138
|
|
139
|
-
|
139
|
+
**🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
|
140
140
|
|
141
|
-

|
142
142
|
|
143
143
|
### Data Science Apps
|
144
144
|
|
@@ -182,7 +182,8 @@ This is a top secret project I'm working on. It's a multi-agent data science app
|
|
182
182
|
|
183
183
|
#### Multi-Agents
|
184
184
|
|
185
|
-
1.
|
185
|
+
1. **🔥🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/pandas_data_analyst.ipynb)
|
186
|
+
2. **🔥🔥 SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
|
186
187
|
|
187
188
|
### Agents Coming Soon
|
188
189
|
|
{ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/RECORD
RENAMED
@@ -1,11 +1,11 @@
|
|
1
1
|
ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
|
2
|
-
ai_data_science_team/_version.py,sha256=
|
2
|
+
ai_data_science_team/_version.py,sha256=c-XrUvZG3E6SWR9NMQqLxISzMZJUpsnK0FlIEMHAOls,27
|
3
3
|
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
4
|
ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
|
5
5
|
ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
|
6
6
|
ai_data_science_team/agents/data_loader_tools_agent.py,sha256=TFKzYqV6cvU-sMbfL-hg8-NgF_Hz3nysGFldvb5K3fM,9327
|
7
|
-
ai_data_science_team/agents/data_visualization_agent.py,sha256=
|
8
|
-
ai_data_science_team/agents/data_wrangling_agent.py,sha256=
|
7
|
+
ai_data_science_team/agents/data_visualization_agent.py,sha256=IHNagAVY4XIRfyKKj3jdJZV0vUpzBqqnQBVbzP1lZj0,29829
|
8
|
+
ai_data_science_team/agents/data_wrangling_agent.py,sha256=jyBrEfLsgIqSF6xcmRgnkzvNqJfkXdjn6FDefQij62o,33439
|
9
9
|
ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
|
10
10
|
ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
|
11
11
|
ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
|
@@ -16,8 +16,8 @@ ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NX
|
|
16
16
|
ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=QImaZnS8hPdrU7GI6pZ0dUDO-LXx40MSA3XyMDppIh0,12003
|
18
18
|
ai_data_science_team/multiagents/__init__.py,sha256=5tpmZBQ_UT5SKDCS_NivZhN19HEStKIcstiqSXPXDl0,208
|
19
|
-
ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=
|
20
|
-
ai_data_science_team/multiagents/sql_data_analyst.py,sha256=
|
19
|
+
ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=6JvcGFvDH7_ozRo-RQvjA_to5R27c7ZSEdKt4VQGL6U,13935
|
20
|
+
ai_data_science_team/multiagents/sql_data_analyst.py,sha256=ZZx3Edzff6zf27iPl8lUGoqaZkPaJQtCJIgNx9wdCZY,18232
|
21
21
|
ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
|
22
22
|
ai_data_science_team/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
ai_data_science_team/parsers/parsers.py,sha256=hIsMZXRHz9hqs8R1ebymKA7D6NxOf5UVMpDAr_gGhE8,2027
|
@@ -26,7 +26,7 @@ ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J
|
|
26
26
|
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
27
|
ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
|
28
28
|
ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
|
29
|
-
ai_data_science_team/tools/eda.py,sha256=
|
29
|
+
ai_data_science_team/tools/eda.py,sha256=orabE8qaYj5TC5n7CRS6rHOPkyBVxr488631AwkVKVg,12726
|
30
30
|
ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
|
31
31
|
ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
|
32
32
|
ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
|
@@ -37,8 +37,8 @@ ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcU
|
|
37
37
|
ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
|
38
38
|
ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
|
39
39
|
ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
|
40
|
-
ai_data_science_team-0.0.0.
|
41
|
-
ai_data_science_team-0.0.0.
|
42
|
-
ai_data_science_team-0.0.0.
|
43
|
-
ai_data_science_team-0.0.0.
|
44
|
-
ai_data_science_team-0.0.0.
|
40
|
+
ai_data_science_team-0.0.0.9015.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
41
|
+
ai_data_science_team-0.0.0.9015.dist-info/METADATA,sha256=tIcThz7trmAG6TZAnDHxy8ntBslXMKS5xSUbvaTygyQ,13164
|
42
|
+
ai_data_science_team-0.0.0.9015.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
43
|
+
ai_data_science_team-0.0.0.9015.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
44
|
+
ai_data_science_team-0.0.0.9015.dist-info/RECORD,,
|
{ai_data_science_team-0.0.0.9014.dist-info → ai_data_science_team-0.0.0.9015.dist-info}/LICENSE
RENAMED
File without changes
|
File without changes
|