ai-data-science-team 0.0.0.9014__py3-none-any.whl → 0.0.0.9015__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9014"
1
+ __version__ = "0.0.0.9015"
@@ -4,7 +4,6 @@
4
4
  # * Agents: Data Visualization Agent
5
5
 
6
6
 
7
-
8
7
  # Libraries
9
8
  from typing import TypedDict, Annotated, Sequence, Literal
10
9
  import operator
@@ -17,25 +16,25 @@ from langgraph.checkpoint.memory import MemorySaver
17
16
  from langgraph.types import Checkpointer
18
17
 
19
18
  import os
20
- import json
19
+ import json
21
20
  import pandas as pd
22
21
 
23
22
  from IPython.display import Markdown
24
23
 
25
- from ai_data_science_team.templates import(
26
- node_func_execute_agent_code_on_data,
24
+ from ai_data_science_team.templates import (
25
+ node_func_execute_agent_code_on_data,
27
26
  node_func_human_review,
28
- node_func_fix_agent_code,
27
+ node_func_fix_agent_code,
29
28
  node_func_report_agent_outputs,
30
29
  create_coding_agent_graph,
31
30
  BaseAgent,
32
31
  )
33
32
  from ai_data_science_team.parsers.parsers import PythonOutputParser
34
33
  from ai_data_science_team.utils.regex import (
35
- relocate_imports_inside_function,
36
- add_comments_to_top,
37
- format_agent_name,
38
- format_recommended_steps,
34
+ relocate_imports_inside_function,
35
+ add_comments_to_top,
36
+ format_agent_name,
37
+ format_recommended_steps,
39
38
  get_generic_summary,
40
39
  )
41
40
  from ai_data_science_team.tools.dataframe import get_dataframe_summary
@@ -48,11 +47,12 @@ LOG_PATH = os.path.join(os.getcwd(), "logs/")
48
47
 
49
48
  # Class
50
49
 
50
+
51
51
  class DataVisualizationAgent(BaseAgent):
52
52
  """
53
53
  Creates a data visualization agent that can generate Plotly charts based on user-defined instructions or
54
- default visualization steps (if any). The agent generates a Python function to produce the visualization,
55
- executes it, and logs the process, including code and errors. It is designed to facilitate reproducible
54
+ default visualization steps (if any). The agent generates a Python function to produce the visualization,
55
+ executes it, and logs the process, including code and errors. It is designed to facilitate reproducible
56
56
  and customizable data visualization workflows.
57
57
 
58
58
  The agent may use default instructions for creating charts unless instructed otherwise, such as:
@@ -124,10 +124,10 @@ class DataVisualizationAgent(BaseAgent):
124
124
  llm = ChatOpenAI(model="gpt-4o-mini")
125
125
 
126
126
  data_visualization_agent = DataVisualizationAgent(
127
- model=llm,
127
+ model=llm,
128
128
  n_samples=30,
129
- log=True,
130
- log_path="logs",
129
+ log=True,
130
+ log_path="logs",
131
131
  human_in_the_loop=True
132
132
  )
133
133
 
@@ -141,7 +141,7 @@ class DataVisualizationAgent(BaseAgent):
141
141
  )
142
142
 
143
143
  plotly_graph_dict = data_visualization_agent.get_plotly_graph()
144
- # You can render plotly_graph_dict with plotly.io.from_json or
144
+ # You can render plotly_graph_dict with plotly.io.from_json or
145
145
  # something similar in a Jupyter Notebook.
146
146
 
147
147
  response = data_visualization_agent.get_response()
@@ -149,21 +149,21 @@ class DataVisualizationAgent(BaseAgent):
149
149
 
150
150
  Returns
151
151
  --------
152
- DataVisualizationAgent : langchain.graphs.CompiledStateGraph
153
- A data visualization agent implemented as a compiled state graph.
152
+ DataVisualizationAgent : langchain.graphs.CompiledStateGraph
153
+ A data visualization agent implemented as a compiled state graph.
154
154
  """
155
155
 
156
156
  def __init__(
157
- self,
158
- model,
159
- n_samples=30,
160
- log=False,
161
- log_path=None,
162
- file_name="data_visualization.py",
157
+ self,
158
+ model,
159
+ n_samples=30,
160
+ log=False,
161
+ log_path=None,
162
+ file_name="data_visualization.py",
163
163
  function_name="data_visualization",
164
- overwrite=True,
165
- human_in_the_loop=False,
166
- bypass_recommended_steps=False,
164
+ overwrite=True,
165
+ human_in_the_loop=False,
166
+ bypass_recommended_steps=False,
167
167
  bypass_explain_code=False,
168
168
  checkpointer=None,
169
169
  ):
@@ -185,7 +185,7 @@ class DataVisualizationAgent(BaseAgent):
185
185
 
186
186
  def _make_compiled_graph(self):
187
187
  """
188
- Create the compiled graph for the data visualization agent.
188
+ Create the compiled graph for the data visualization agent.
189
189
  Running this method will reset the response to None.
190
190
  """
191
191
  self.response = None
@@ -201,9 +201,16 @@ class DataVisualizationAgent(BaseAgent):
201
201
  # Rebuild the compiled graph
202
202
  self._compiled_graph = self._make_compiled_graph()
203
203
 
204
- async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
204
+ async def ainvoke_agent(
205
+ self,
206
+ data_raw: pd.DataFrame,
207
+ user_instructions: str = None,
208
+ max_retries: int = 3,
209
+ retry_count: int = 0,
210
+ **kwargs,
211
+ ):
205
212
  """
206
- Asynchronously invokes the agent to generate a visualization.
213
+ Asynchronously invokes the agent to generate a visualization.
207
214
  The response is stored in the 'response' attribute.
208
215
 
209
216
  Parameters
@@ -223,18 +230,28 @@ class DataVisualizationAgent(BaseAgent):
223
230
  -------
224
231
  None
225
232
  """
226
- response = await self._compiled_graph.ainvoke({
227
- "user_instructions": user_instructions,
228
- "data_raw": data_raw.to_dict(),
229
- "max_retries": max_retries,
230
- "retry_count": retry_count,
231
- }, **kwargs)
233
+ response = await self._compiled_graph.ainvoke(
234
+ {
235
+ "user_instructions": user_instructions,
236
+ "data_raw": data_raw.to_dict(),
237
+ "max_retries": max_retries,
238
+ "retry_count": retry_count,
239
+ },
240
+ **kwargs,
241
+ )
232
242
  self.response = response
233
243
  return None
234
244
 
235
- def invoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
245
+ def invoke_agent(
246
+ self,
247
+ data_raw: pd.DataFrame,
248
+ user_instructions: str = None,
249
+ max_retries: int = 3,
250
+ retry_count: int = 0,
251
+ **kwargs,
252
+ ):
236
253
  """
237
- Synchronously invokes the agent to generate a visualization.
254
+ Synchronously invokes the agent to generate a visualization.
238
255
  The response is stored in the 'response' attribute.
239
256
 
240
257
  Parameters
@@ -254,12 +271,15 @@ class DataVisualizationAgent(BaseAgent):
254
271
  -------
255
272
  None
256
273
  """
257
- response = self._compiled_graph.invoke({
258
- "user_instructions": user_instructions,
259
- "data_raw": data_raw.to_dict(),
260
- "max_retries": max_retries,
261
- "retry_count": retry_count,
262
- }, **kwargs)
274
+ response = self._compiled_graph.invoke(
275
+ {
276
+ "user_instructions": user_instructions,
277
+ "data_raw": data_raw.to_dict(),
278
+ "max_retries": max_retries,
279
+ "retry_count": retry_count,
280
+ },
281
+ **kwargs,
282
+ )
263
283
  self.response = response
264
284
  return None
265
285
 
@@ -268,7 +288,9 @@ class DataVisualizationAgent(BaseAgent):
268
288
  Retrieves the agent's workflow summary, if logging is enabled.
269
289
  """
270
290
  if self.response and self.response.get("messages"):
271
- summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
291
+ summary = get_generic_summary(
292
+ json.loads(self.response.get("messages")[-1].content)
293
+ )
272
294
  if markdown:
273
295
  return Markdown(summary)
274
296
  else:
@@ -279,7 +301,7 @@ class DataVisualizationAgent(BaseAgent):
279
301
  Logs a summary of the agent's operations, if logging is enabled.
280
302
  """
281
303
  if self.response:
282
- if self.response.get('data_visualization_function_path'):
304
+ if self.response.get("data_visualization_function_path"):
283
305
  log_details = f"""
284
306
  ## Data Visualization Agent Log Summary:
285
307
 
@@ -288,7 +310,7 @@ Function Path: {self.response.get('data_visualization_function_path')}
288
310
  Function Name: {self.response.get('data_visualization_function_name')}
289
311
  """
290
312
  if markdown:
291
- return Markdown(log_details)
313
+ return Markdown(log_details)
292
314
  else:
293
315
  return log_details
294
316
 
@@ -380,16 +402,17 @@ Function Name: {self.response.get('data_visualization_function_name')}
380
402
 
381
403
  # Agent
382
404
 
405
+
383
406
  def make_data_visualization_agent(
384
- model,
407
+ model,
385
408
  n_samples=30,
386
- log=False,
387
- log_path=None,
409
+ log=False,
410
+ log_path=None,
388
411
  file_name="data_visualization.py",
389
412
  function_name="data_visualization",
390
- overwrite=True,
391
- human_in_the_loop=False,
392
- bypass_recommended_steps=False,
413
+ overwrite=True,
414
+ human_in_the_loop=False,
415
+ bypass_recommended_steps=False,
393
416
  bypass_explain_code=False,
394
417
  checkpointer=None,
395
418
  ):
@@ -460,25 +483,27 @@ def make_data_visualization_agent(
460
483
  app : langchain.graphs.CompiledStateGraph
461
484
  The data visualization agent as a state graph.
462
485
  """
463
-
486
+
464
487
  llm = model
465
-
488
+
466
489
  if human_in_the_loop:
467
490
  if checkpointer is None:
468
- print("Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver().")
491
+ print(
492
+ "Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver()."
493
+ )
469
494
  checkpointer = MemorySaver()
470
-
495
+
471
496
  # Human in th loop requires recommended steps
472
497
  if bypass_recommended_steps and human_in_the_loop:
473
498
  bypass_recommended_steps = False
474
499
  print("Bypass recommended steps set to False to enable human in the loop.")
475
-
500
+
476
501
  # Setup Log Directory
477
502
  if log:
478
503
  if log_path is None:
479
504
  log_path = LOG_PATH
480
505
  if not os.path.exists(log_path):
481
- os.makedirs(log_path)
506
+ os.makedirs(log_path)
482
507
 
483
508
  # Define GraphState for the router
484
509
  class GraphState(TypedDict):
@@ -496,12 +521,11 @@ def make_data_visualization_agent(
496
521
  data_visualization_error: str
497
522
  max_retries: int
498
523
  retry_count: int
499
-
524
+
500
525
  def chart_instructor(state: GraphState):
501
-
502
526
  print(format_agent_name(AGENT_NAME))
503
527
  print(" * CREATE CHART GENERATOR INSTRUCTIONS")
504
-
528
+
505
529
  recommend_steps_prompt = PromptTemplate(
506
530
  template="""
507
531
  You are a supervisor that is an expert in providing instructions to a chart generator agent for plotting.
@@ -514,25 +538,23 @@ def make_data_visualization_agent(
514
538
  Previously Recommended Instructions (if any):
515
539
  {recommended_steps}
516
540
 
517
- DATA:
541
+ DATA SUMMARY:
518
542
  {all_datasets_summary}
519
543
 
520
- Formulate chart generator instructions by informing the chart generator of what type of plotly plot to use (e.g. bar, line, scatter, etc) to best represent the data.
544
+ IMPORTANT:
545
+
546
+ - Formulate chart generator instructions by informing the chart generator of what type of plotly plot to use (e.g. bar, line, scatter, etc) to best represent the data.
547
+ - Think about how best to convey the information in the data to the user.
548
+ - If the user does not specify a type of plot, select the appropriate chart type based on the data summary provided and the user's question and how best to show the results.
549
+ - Come up with an informative title from the user's question and data provided. Also provide X and Y axis titles.
550
+
551
+ CHART TYPE SELECTION TIPS:
521
552
 
522
- Come up with an informative title from the user's question and data provided. Also provide X and Y axis titles.
553
+ - If a numeric column has less than 10 unique values, consider this column to be treated as a categorical column. Pick a chart that is appropriate for categorical data.
554
+ - If a numeric column has more than 10 unique values, consider this column to be treated as a continuous column. Pick a chart that is appropriate for continuous data.
523
555
 
524
- Instruct the chart generator to use the following theme colors, sizes, etc:
525
556
 
526
- - Start with the "plotly_white" template
527
- - Use a white background
528
- - Use this color for bars and lines:
529
- 'blue': '#3381ff',
530
- - Base Font Size: 8.8 (Used for x and y axes tickfont, any annotations, hovertips)
531
- - Title Font Size: 13.2
532
- - Line Size: 0.65 (specify these within the xaxis and yaxis dictionaries)
533
- - Add smoothers or trendlines to scatter plots unless not desired by the user
534
- - Do not use color_discrete_map (this will result in an error)
535
- - Hover tip size: 8.8
557
+ RETURN FORMAT:
536
558
 
537
559
  Return your instructions in the following format:
538
560
  CHART GENERATOR INSTRUCTIONS:
@@ -542,51 +564,61 @@ def make_data_visualization_agent(
542
564
  1. Do not include steps to save files.
543
565
  2. Do not include unrelated user instructions that are not related to the chart generation.
544
566
  """,
545
- input_variables=["user_instructions", "recommended_steps", "all_datasets_summary"]
546
-
567
+ input_variables=[
568
+ "user_instructions",
569
+ "recommended_steps",
570
+ "all_datasets_summary",
571
+ ],
547
572
  )
548
-
573
+
549
574
  data_raw = state.get("data_raw")
550
575
  df = pd.DataFrame.from_dict(data_raw)
551
576
 
552
- all_datasets_summary = get_dataframe_summary([df], n_sample=n_samples, skip_stats=False)
553
-
577
+ all_datasets_summary = get_dataframe_summary(
578
+ [df], n_sample=n_samples, skip_stats=False
579
+ )
580
+
554
581
  all_datasets_summary_str = "\n\n".join(all_datasets_summary)
555
582
 
556
- chart_instructor = recommend_steps_prompt | llm
557
-
558
- recommended_steps = chart_instructor.invoke({
559
- "user_instructions": state.get("user_instructions"),
560
- "recommended_steps": state.get("recommended_steps"),
561
- "all_datasets_summary": all_datasets_summary_str
562
- })
563
-
583
+ chart_instructor = recommend_steps_prompt | llm
584
+
585
+ recommended_steps = chart_instructor.invoke(
586
+ {
587
+ "user_instructions": state.get("user_instructions"),
588
+ "recommended_steps": state.get("recommended_steps"),
589
+ "all_datasets_summary": all_datasets_summary_str,
590
+ }
591
+ )
592
+
564
593
  return {
565
- "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended Data Cleaning Steps:"),
566
- "all_datasets_summary": all_datasets_summary_str
594
+ "recommended_steps": format_recommended_steps(
595
+ recommended_steps.content.strip(),
596
+ heading="# Recommended Data Cleaning Steps:",
597
+ ),
598
+ "all_datasets_summary": all_datasets_summary_str,
567
599
  }
568
-
600
+
569
601
  def chart_generator(state: GraphState):
570
-
571
602
  print(" * CREATE DATA VISUALIZATION CODE")
572
603
 
573
-
574
604
  if bypass_recommended_steps:
575
605
  print(format_agent_name(AGENT_NAME))
576
-
606
+
577
607
  data_raw = state.get("data_raw")
578
608
  df = pd.DataFrame.from_dict(data_raw)
579
609
 
580
- all_datasets_summary = get_dataframe_summary([df], n_sample=n_samples, skip_stats=False)
581
-
610
+ all_datasets_summary = get_dataframe_summary(
611
+ [df], n_sample=n_samples, skip_stats=False
612
+ )
613
+
582
614
  all_datasets_summary_str = "\n\n".join(all_datasets_summary)
583
-
615
+
584
616
  chart_generator_instructions = state.get("user_instructions")
585
-
617
+
586
618
  else:
587
619
  all_datasets_summary_str = state.get("all_datasets_summary")
588
620
  chart_generator_instructions = state.get("recommended_steps")
589
-
621
+
590
622
  prompt_template = PromptTemplate(
591
623
  template="""
592
624
  You are a chart generator agent that is an expert in generating plotly charts. You must use plotly or plotly.express to produce plots.
@@ -628,65 +660,76 @@ def make_data_visualization_agent(
628
660
  2. Do not include unrelated user instructions that are not related to the chart generation.
629
661
 
630
662
  """,
631
- input_variables=["chart_generator_instructions", "all_datasets_summary", "function_name"]
663
+ input_variables=[
664
+ "chart_generator_instructions",
665
+ "all_datasets_summary",
666
+ "function_name",
667
+ ],
632
668
  )
633
669
 
634
670
  data_visualization_agent = prompt_template | llm | PythonOutputParser()
635
-
636
- response = data_visualization_agent.invoke({
637
- "chart_generator_instructions": chart_generator_instructions,
638
- "all_datasets_summary": all_datasets_summary_str,
639
- "function_name": function_name
640
- })
641
-
671
+
672
+ response = data_visualization_agent.invoke(
673
+ {
674
+ "chart_generator_instructions": chart_generator_instructions,
675
+ "all_datasets_summary": all_datasets_summary_str,
676
+ "function_name": function_name,
677
+ }
678
+ )
679
+
642
680
  response = relocate_imports_inside_function(response)
643
681
  response = add_comments_to_top(response, agent_name=AGENT_NAME)
644
-
682
+
645
683
  # For logging: store the code generated:
646
684
  file_path, file_name_2 = log_ai_function(
647
685
  response=response,
648
686
  file_name=file_name,
649
687
  log=log,
650
688
  log_path=log_path,
651
- overwrite=overwrite
689
+ overwrite=overwrite,
652
690
  )
653
-
691
+
654
692
  return {
655
693
  "data_visualization_function": response,
656
694
  "data_visualization_function_path": file_path,
657
695
  "data_visualization_function_file_name": file_name_2,
658
696
  "data_visualization_function_name": function_name,
659
- "all_datasets_summary": all_datasets_summary_str
697
+ "all_datasets_summary": all_datasets_summary_str,
660
698
  }
661
-
699
+
662
700
  # Human Review
663
-
701
+
664
702
  prompt_text_human_review = "Are the following data visualization instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
665
-
703
+
666
704
  if not bypass_explain_code:
667
- def human_review(state: GraphState) -> Command[Literal["chart_instructor", "explain_data_visualization_code"]]:
705
+
706
+ def human_review(
707
+ state: GraphState,
708
+ ) -> Command[Literal["chart_instructor", "explain_data_visualization_code"]]:
668
709
  return node_func_human_review(
669
710
  state=state,
670
711
  prompt_text=prompt_text_human_review,
671
- yes_goto= 'explain_data_visualization_code',
712
+ yes_goto="explain_data_visualization_code",
672
713
  no_goto="chart_instructor",
673
714
  user_instructions_key="user_instructions",
674
715
  recommended_steps_key="recommended_steps",
675
716
  code_snippet_key="data_visualization_function",
676
717
  )
677
718
  else:
678
- def human_review(state: GraphState) -> Command[Literal["chart_instructor", "__end__"]]:
719
+
720
+ def human_review(
721
+ state: GraphState,
722
+ ) -> Command[Literal["chart_instructor", "__end__"]]:
679
723
  return node_func_human_review(
680
724
  state=state,
681
725
  prompt_text=prompt_text_human_review,
682
- yes_goto= '__end__',
726
+ yes_goto="__end__",
683
727
  no_goto="chart_instructor",
684
728
  user_instructions_key="user_instructions",
685
729
  recommended_steps_key="recommended_steps",
686
- code_snippet_key="data_visualization_function",
730
+ code_snippet_key="data_visualization_function",
687
731
  )
688
-
689
-
732
+
690
733
  def execute_data_visualization_code(state):
691
734
  return node_func_execute_agent_code_on_data(
692
735
  state=state,
@@ -697,9 +740,9 @@ def make_data_visualization_agent(
697
740
  agent_function_name=state.get("data_visualization_function_name"),
698
741
  pre_processing=lambda data: pd.DataFrame.from_dict(data),
699
742
  # post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
700
- error_message_prefix="An error occurred during data visualization: "
743
+ error_message_prefix="An error occurred during data visualization: ",
701
744
  )
702
-
745
+
703
746
  def fix_data_visualization_code(state: GraphState):
704
747
  prompt = """
705
748
  You are a Data Visualization Agent. Your job is to create a {function_name}() function that can be run on the data provided. The function is currently broken and needs to be fixed.
@@ -719,14 +762,14 @@ def make_data_visualization_agent(
719
762
  state=state,
720
763
  code_snippet_key="data_visualization_function",
721
764
  error_key="data_visualization_error",
722
- llm=llm,
765
+ llm=llm,
723
766
  prompt_template=prompt,
724
767
  agent_name=AGENT_NAME,
725
768
  log=log,
726
769
  file_path=state.get("data_visualization_function_path"),
727
770
  function_name=state.get("data_visualization_function_name"),
728
771
  )
729
-
772
+
730
773
  # Final reporting node
731
774
  def report_agent_outputs(state: GraphState):
732
775
  return node_func_report_agent_outputs(
@@ -740,9 +783,9 @@ def make_data_visualization_agent(
740
783
  ],
741
784
  result_key="messages",
742
785
  role=AGENT_NAME,
743
- custom_title="Data Visualization Agent Outputs"
786
+ custom_title="Data Visualization Agent Outputs",
744
787
  )
745
-
788
+
746
789
  # Define the graph
747
790
  node_functions = {
748
791
  "chart_instructor": chart_instructor,
@@ -752,7 +795,7 @@ def make_data_visualization_agent(
752
795
  "fix_data_visualization_code": fix_data_visualization_code,
753
796
  "report_agent_outputs": report_agent_outputs,
754
797
  }
755
-
798
+
756
799
  app = create_coding_agent_graph(
757
800
  GraphState=GraphState,
758
801
  node_functions=node_functions,
@@ -769,5 +812,5 @@ def make_data_visualization_agent(
769
812
  bypass_explain_code=bypass_explain_code,
770
813
  agent_name=AGENT_NAME,
771
814
  )
772
-
815
+
773
816
  return app
@@ -702,6 +702,7 @@ def make_data_wrangling_agent(
702
702
 
703
703
  Important Notes:
704
704
  1. Do Not use Print statements to display the data. Return the data frame instead with the data wrangling operation performed.
705
+ 2. Do not plot graphs. Only return the data frame.
705
706
 
706
707
  Make sure to explain any non-trivial steps with inline comments. Follow user instructions. Comment code thoroughly.
707
708
 
@@ -132,10 +132,10 @@ class PandasDataAnalyst(BaseAgent):
132
132
  """Returns a summary of the workflow."""
133
133
  if self.response and self.response.get("messages"):
134
134
  agents = [msg.role for msg in self.response["messages"]]
135
- agent_labels = [f"- **Agent {i+1}:** {role}" for i, role in enumerate(agents)]
135
+ agent_labels = [f"- **Agent {i+1}:** {role}\n" for i, role in enumerate(agents)]
136
136
  header = f"# Pandas Data Analyst Workflow Summary\n\nThis workflow contains {len(agents)} agents:\n\n" + "\n".join(agent_labels)
137
137
  reports = [get_generic_summary(json.loads(msg.content)) for msg in self.response["messages"]]
138
- summary = "\n" +header + "\n\n".join(reports)
138
+ summary = "\n\n" + header + "\n\n".join(reports)
139
139
  return Markdown(summary) if markdown else summary
140
140
 
141
141
  @staticmethod
@@ -177,15 +177,15 @@ def make_pandas_data_analyst(
177
177
 
178
178
  routing_preprocessor_prompt = PromptTemplate(
179
179
  template="""
180
- You are an expert in routing decisions for a Pandas Data Manipulation Wrangling Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to:
180
+ You are an expert in routing decisions for a Pandas Data Manipulation Wrangling Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to tell the agents which actions to perform and determine the correct routing for the incoming user question:
181
181
 
182
- 1. Determine what the correct format for a Users Question should be for use with a Pandas Data Wrangling Agent based on the incoming user question. Anything related to data wrangling and manipulation should be passed along.
182
+ 1. Determine what the correct format for a Users Question should be for use with a Pandas Data Wrangling Agent based on the incoming user question. Anything related to data wrangling and manipulation should be passed along. Anything related to data analysis can be handled by the Pandas Agent. Anything that uses Pandas can be passed along. Tables can be returned from this agent. Don't pass along anything about plotting or visualization.
183
183
  2. Determine whether or not a chart should be generated or a table should be returned based on the users question.
184
184
  3. If a chart is requested, determine the correct format of a Users Question should be used with a Data Visualization Agent. Anything related to plotting and visualization should be passed along.
185
185
 
186
186
  Use the following criteria on how to route the the initial user question:
187
187
 
188
- From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the SQL generator agent. This will be the 'user_instructions_data_wrangling'. If 'None' is found, return the original user question.
188
+ From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the Pandas Data Wrangling and Transformation agent. This will be the 'user_instructions_data_wrangling'. If 'None' is found, return the original user question.
189
189
 
190
190
  Next, determine if the user would like a data visualization ('chart') or a 'table' returned with the results of the Data Wrangling Agent. If unknown, not specified or 'None' is found, then select 'table'.
191
191
 
@@ -301,24 +301,13 @@ class SQLDataAnalyst(BaseAgent):
301
301
  markdown: bool
302
302
  If True, returns the summary as a Markdown-formatted string.
303
303
  """
304
- if self.response and self.get_response()['messages']:
305
-
306
- agents = [self.get_response()['messages'][i].role for i in range(len(self.get_response()['messages']))]
307
-
308
- agent_labels = []
309
- for i in range(len(agents)):
310
- agent_labels.append(f"- **Agent {i+1}:** {agents[i]}")
311
-
312
- # Construct header
313
- header = f"# SQL Data Analyst Workflow Summary Report\n\nThis agentic workflow contains {len(agents)} agents:\n\n" + "\n".join(agent_labels)
314
-
315
- reports = []
316
- for msg in self.get_response()['messages']:
317
- reports.append(get_generic_summary(json.loads(msg.content)))
318
-
319
- if markdown:
320
- return Markdown(header + "\n\n".join(reports))
321
- return "\n\n".join(reports)
304
+ if self.response and self.response.get("messages"):
305
+ agents = [msg.role for msg in self.response["messages"]]
306
+ agent_labels = [f"- **Agent {i+1}:** {role}\n" for i, role in enumerate(agents)]
307
+ header = f"# SQL Data Analyst Workflow Summary\n\nThis workflow contains {len(agents)} agents:\n\n" + "\n".join(agent_labels)
308
+ reports = [get_generic_summary(json.loads(msg.content)) for msg in self.response["messages"]]
309
+ summary = "\n\n" + header + "\n\n".join(reports)
310
+ return Markdown(summary) if markdown else summary
322
311
 
323
312
 
324
313
 
@@ -235,6 +235,7 @@ def correlation_funnel(
235
235
  df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
236
236
 
237
237
  # Attempt to generate a static plot.
238
+ encoded = None
238
239
  try:
239
240
  # Here we assume that your DataFrame has a method plot_correlation_funnel.
240
241
  fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
@@ -248,6 +249,7 @@ def correlation_funnel(
248
249
  encoded = {"error": str(e)}
249
250
 
250
251
  # Attempt to generate a Plotly plot.
252
+ fig_dict = None
251
253
  try:
252
254
  fig = df_correlated.plot_correlation_funnel(engine='plotly')
253
255
  fig_json = pio.to_json(fig)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9014
3
+ Version: 0.0.0.9015
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -136,9 +136,9 @@ This project is a work in progress. New data science agents will be released soo
136
136
 
137
137
  ### NEW: Multi-Agents
138
138
 
139
- This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
139
+ **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
140
140
 
141
- ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
141
+ ![Business Intelligence SQL Agent](/img/multi_agent_pandas_data_analyst.jpg)
142
142
 
143
143
  ### Data Science Apps
144
144
 
@@ -182,7 +182,8 @@ This is a top secret project I'm working on. It's a multi-agent data science app
182
182
 
183
183
  #### Multi-Agents
184
184
 
185
- 1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
185
+ 1. **🔥🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/pandas_data_analyst.ipynb)
186
+ 2. **🔥🔥 SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
186
187
 
187
188
  ### Agents Coming Soon
188
189
 
@@ -1,11 +1,11 @@
1
1
  ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
2
- ai_data_science_team/_version.py,sha256=D4dUl-fYnimOU_VSzvrmJm30_IoaF_9m9dTLp8HE6rQ,26
2
+ ai_data_science_team/_version.py,sha256=c-XrUvZG3E6SWR9NMQqLxISzMZJUpsnK0FlIEMHAOls,27
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
5
  ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
6
6
  ai_data_science_team/agents/data_loader_tools_agent.py,sha256=TFKzYqV6cvU-sMbfL-hg8-NgF_Hz3nysGFldvb5K3fM,9327
7
- ai_data_science_team/agents/data_visualization_agent.py,sha256=eUSTzTOm5aLJ6Cqnk-hRuXeVbYyy0RIzN8_0LLy0P9o,29387
8
- ai_data_science_team/agents/data_wrangling_agent.py,sha256=6tiDO1i-5s2Ju6_MsLoJMflUuRSf_1oTSsSKcLlgzEc,33376
7
+ ai_data_science_team/agents/data_visualization_agent.py,sha256=IHNagAVY4XIRfyKKj3jdJZV0vUpzBqqnQBVbzP1lZj0,29829
8
+ ai_data_science_team/agents/data_wrangling_agent.py,sha256=jyBrEfLsgIqSF6xcmRgnkzvNqJfkXdjn6FDefQij62o,33439
9
9
  ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
10
10
  ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
11
11
  ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
@@ -16,8 +16,8 @@ ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NX
16
16
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=QImaZnS8hPdrU7GI6pZ0dUDO-LXx40MSA3XyMDppIh0,12003
18
18
  ai_data_science_team/multiagents/__init__.py,sha256=5tpmZBQ_UT5SKDCS_NivZhN19HEStKIcstiqSXPXDl0,208
19
- ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=O662v-75tLqHHrVNjncsPeR2FB4MWSWruJRIF-YO-fg,13581
20
- ai_data_science_team/multiagents/sql_data_analyst.py,sha256=Fpue6WcX9x18kEH3kfEi8kkFoG9HhQ1AZiWw6Y6FXOo,18502
19
+ ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=6JvcGFvDH7_ozRo-RQvjA_to5R27c7ZSEdKt4VQGL6U,13935
20
+ ai_data_science_team/multiagents/sql_data_analyst.py,sha256=ZZx3Edzff6zf27iPl8lUGoqaZkPaJQtCJIgNx9wdCZY,18232
21
21
  ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
22
22
  ai_data_science_team/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  ai_data_science_team/parsers/parsers.py,sha256=hIsMZXRHz9hqs8R1ebymKA7D6NxOf5UVMpDAr_gGhE8,2027
@@ -26,7 +26,7 @@ ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J
26
26
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
28
28
  ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
29
- ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
29
+ ai_data_science_team/tools/eda.py,sha256=orabE8qaYj5TC5n7CRS6rHOPkyBVxr488631AwkVKVg,12726
30
30
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
31
31
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
32
32
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -37,8 +37,8 @@ ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcU
37
37
  ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
38
38
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
39
39
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
40
- ai_data_science_team-0.0.0.9014.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
41
- ai_data_science_team-0.0.0.9014.dist-info/METADATA,sha256=a35LbXyxf_XiP82m_P5HLFwPrmuzXkNWbyfzGge7dHA,13021
42
- ai_data_science_team-0.0.0.9014.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
43
- ai_data_science_team-0.0.0.9014.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
44
- ai_data_science_team-0.0.0.9014.dist-info/RECORD,,
40
+ ai_data_science_team-0.0.0.9015.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
41
+ ai_data_science_team-0.0.0.9015.dist-info/METADATA,sha256=tIcThz7trmAG6TZAnDHxy8ntBslXMKS5xSUbvaTygyQ,13164
42
+ ai_data_science_team-0.0.0.9015.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
43
+ ai_data_science_team-0.0.0.9015.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
44
+ ai_data_science_team-0.0.0.9015.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5