ai-data-science-team 0.0.0.9008__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9008"
1
+ __version__ = "0.0.0.9009"
@@ -3,4 +3,3 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
-
@@ -14,7 +14,7 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
17
+ import json
18
18
  import pandas as pd
19
19
 
20
20
  from IPython.display import Markdown
@@ -23,12 +23,18 @@ from ai_data_science_team.templates import(
23
23
  node_func_execute_agent_code_on_data,
24
24
  node_func_human_review,
25
25
  node_func_fix_agent_code,
26
- node_func_explain_agent_code,
26
+ node_func_report_agent_outputs,
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
30
  from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
31
+ from ai_data_science_team.tools.regex import (
32
+ relocate_imports_inside_function,
33
+ add_comments_to_top,
34
+ format_agent_name,
35
+ format_recommended_steps,
36
+ get_generic_summary,
37
+ )
32
38
  from ai_data_science_team.tools.metadata import get_dataframe_summary
33
39
  from ai_data_science_team.tools.logging import log_ai_function
34
40
 
@@ -37,7 +43,6 @@ AGENT_NAME = "data_cleaning_agent"
37
43
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
38
44
 
39
45
 
40
-
41
46
  # Class
42
47
  class DataCleaningAgent(BaseAgent):
43
48
  """
@@ -89,8 +94,8 @@ class DataCleaningAgent(BaseAgent):
89
94
  Cleans the provided dataset asynchronously based on user instructions.
90
95
  invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
91
96
  Cleans the provided dataset synchronously based on user instructions.
92
- explain_cleaning_steps()
93
- Returns an explanation of the cleaning steps performed by the agent.
97
+ get_workflow_summary()
98
+ Retrieves a summary of the agent's workflow.
94
99
  get_log_summary()
95
100
  Retrieves a summary of logged operations if logging is enabled.
96
101
  get_state_keys()
@@ -178,7 +183,6 @@ class DataCleaningAgent(BaseAgent):
178
183
  self.response=None
179
184
  return make_data_cleaning_agent(**self._params)
180
185
 
181
-
182
186
  def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
183
187
  """
184
188
  Asynchronously invokes the agent. The response is stored in the response attribute.
@@ -239,15 +243,16 @@ class DataCleaningAgent(BaseAgent):
239
243
  self.response = response
240
244
  return None
241
245
 
242
- def explain_cleaning_steps(self):
246
+ def get_workflow_summary(self, markdown=False):
243
247
  """
244
- Provides an explanation of the cleaning steps performed by the agent.
245
-
246
- Returns:
247
- str: Explanation of the cleaning steps.
248
+ Retrieves the agent's workflow summary, if logging is enabled.
248
249
  """
249
- messages = self.response.get("messages", [])
250
- return messages
250
+ if self.response and self.response.get("messages"):
251
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
252
+ if markdown:
253
+ return Markdown(summary)
254
+ else:
255
+ return summary
251
256
 
252
257
  def get_log_summary(self, markdown=False):
253
258
  """
@@ -255,7 +260,13 @@ class DataCleaningAgent(BaseAgent):
255
260
  """
256
261
  if self.response:
257
262
  if self.response.get('data_cleaner_function_path'):
258
- log_details = f"Log Path: {self.response.get('data_cleaner_function_path')}"
263
+ log_details = f"""
264
+ ## Data Cleaning Agent Log Summary:
265
+
266
+ Function Path: {self.response.get('data_cleaner_function_path')}
267
+
268
+ Function Name: {self.response.get('data_cleaner_function_name')}
269
+ """
259
270
  if markdown:
260
271
  return Markdown(log_details)
261
272
  else:
@@ -462,7 +473,7 @@ def make_data_cleaning_agent(
462
473
  Below are summaries of all datasets provided:
463
474
  {all_datasets_summary}
464
475
 
465
- Return the steps as a bullet point list (no code, just the steps).
476
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
466
477
 
467
478
  Avoid these:
468
479
  1. Do not include steps to save files.
@@ -633,32 +644,31 @@ def make_data_cleaning_agent(
633
644
  function_name=state.get("data_cleaner_function_name"),
634
645
  )
635
646
 
636
- def explain_data_cleaner_code(state: GraphState):
637
- return node_func_explain_agent_code(
647
+ # Final reporting node
648
+ def report_agent_outputs(state: GraphState):
649
+ return node_func_report_agent_outputs(
638
650
  state=state,
639
- code_snippet_key="data_cleaner_function",
651
+ keys_to_include=[
652
+ "recommended_steps",
653
+ "data_cleaner_function",
654
+ "data_cleaner_function_path",
655
+ "data_cleaner_function_name",
656
+ "data_cleaner_error",
657
+ ],
640
658
  result_key="messages",
641
- error_key="data_cleaner_error",
642
- llm=llm,
643
659
  role=AGENT_NAME,
644
- explanation_prompt_template="""
645
- Explain the data cleaning steps that the data cleaning agent performed in this function.
646
- Keep the summary succinct and to the point.\n\n# Data Cleaning Agent:\n\n{code}
647
- """,
648
- success_prefix="# Data Cleaning Agent:\n\n ",
649
- error_message="The Data Cleaning Agent encountered an error during data cleaning. Data could not be explained."
660
+ custom_title="Data Cleaning Agent Outputs"
650
661
  )
651
-
652
- # Define the graph
662
+
653
663
  node_functions = {
654
664
  "recommend_cleaning_steps": recommend_cleaning_steps,
655
665
  "human_review": human_review,
656
666
  "create_data_cleaner_code": create_data_cleaner_code,
657
667
  "execute_data_cleaner_code": execute_data_cleaner_code,
658
668
  "fix_data_cleaner_code": fix_data_cleaner_code,
659
- "explain_data_cleaner_code": explain_data_cleaner_code
669
+ "report_agent_outputs": report_agent_outputs,
660
670
  }
661
-
671
+
662
672
  app = create_coding_agent_graph(
663
673
  GraphState=GraphState,
664
674
  node_functions=node_functions,
@@ -666,16 +676,17 @@ def make_data_cleaning_agent(
666
676
  create_code_node_name="create_data_cleaner_code",
667
677
  execute_code_node_name="execute_data_cleaner_code",
668
678
  fix_code_node_name="fix_data_cleaner_code",
669
- explain_code_node_name="explain_data_cleaner_code",
679
+ explain_code_node_name="report_agent_outputs",
670
680
  error_key="data_cleaner_error",
671
- human_in_the_loop=human_in_the_loop, # or False
681
+ human_in_the_loop=human_in_the_loop,
672
682
  human_review_node_name="human_review",
673
683
  checkpointer=MemorySaver() if human_in_the_loop else None,
674
684
  bypass_recommended_steps=bypass_recommended_steps,
675
685
  bypass_explain_code=bypass_explain_code,
676
686
  )
677
-
687
+
678
688
  return app
689
+
679
690
 
680
691
 
681
692
 
@@ -17,6 +17,7 @@ from langgraph.types import Command
17
17
  from langgraph.checkpoint.memory import MemorySaver
18
18
 
19
19
  import os
20
+ import json
20
21
  import pandas as pd
21
22
 
22
23
  from IPython.display import Markdown
@@ -25,7 +26,7 @@ from ai_data_science_team.templates import(
25
26
  node_func_execute_agent_code_on_data,
26
27
  node_func_human_review,
27
28
  node_func_fix_agent_code,
28
- node_func_explain_agent_code,
29
+ node_func_report_agent_outputs,
29
30
  create_coding_agent_graph,
30
31
  BaseAgent,
31
32
  )
@@ -34,7 +35,8 @@ from ai_data_science_team.tools.regex import (
34
35
  relocate_imports_inside_function,
35
36
  add_comments_to_top,
36
37
  format_agent_name,
37
- format_recommended_steps
38
+ format_recommended_steps,
39
+ get_generic_summary,
38
40
  )
39
41
  from ai_data_science_team.tools.metadata import get_dataframe_summary
40
42
  from ai_data_science_team.tools.logging import log_ai_function
@@ -93,8 +95,8 @@ class DataVisualizationAgent(BaseAgent):
93
95
  Asynchronously generates a visualization based on user instructions.
94
96
  invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
95
97
  Synchronously generates a visualization based on user instructions.
96
- explain_visualization_steps()
97
- Returns an explanation of the visualization steps performed by the agent.
98
+ get_workflow_summary()
99
+ Retrieves a summary of the agent's workflow.
98
100
  get_log_summary()
99
101
  Retrieves a summary of logged operations if logging is enabled.
100
102
  get_plotly_graph()
@@ -257,40 +259,34 @@ class DataVisualizationAgent(BaseAgent):
257
259
  self.response = response
258
260
  return None
259
261
 
260
- def explain_visualization_steps(self):
262
+ def get_workflow_summary(self, markdown=False):
261
263
  """
262
- Provides an explanation of the visualization steps performed by the agent.
263
-
264
- Returns
265
- -------
266
- str
267
- Explanation of the visualization steps, if any are available.
264
+ Retrieves the agent's workflow summary, if logging is enabled.
268
265
  """
269
- if self.response:
270
- return self.response.get("messages", [])
271
- return []
266
+ if self.response and self.response.get("messages"):
267
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
268
+ if markdown:
269
+ return Markdown(summary)
270
+ else:
271
+ return summary
272
272
 
273
273
  def get_log_summary(self, markdown=False):
274
274
  """
275
275
  Logs a summary of the agent's operations, if logging is enabled.
276
+ """
277
+ if self.response:
278
+ if self.response.get('data_visualization_function_path'):
279
+ log_details = f"""
280
+ ## Data Visualization Agent Log Summary:
276
281
 
277
- Parameters
278
- ----------
279
- markdown : bool, optional
280
- If True, returns Markdown-formatted output.
282
+ Function Path: {self.response.get('data_visualization_function_path')}
281
283
 
282
- Returns
283
- -------
284
- str or None
285
- Summary of logs or None if no logs are available.
286
- """
287
- if self.response and self.response.get('data_visualization_function_path'):
288
- log_details = f"Log Path: {self.response.get('data_visualization_function_path')}"
289
- if markdown:
290
- return Markdown(log_details)
291
- else:
292
- return log_details
293
- return None
284
+ Function Name: {self.response.get('data_visualization_function_name')}
285
+ """
286
+ if markdown:
287
+ return Markdown(log_details)
288
+ else:
289
+ return log_details
294
290
 
295
291
  def get_plotly_graph(self):
296
292
  """
@@ -719,20 +715,20 @@ def make_data_visualization_agent(
719
715
  function_name=state.get("data_visualization_function_name"),
720
716
  )
721
717
 
722
- def explain_data_visualization_code(state: GraphState):
723
- return node_func_explain_agent_code(
718
+ # Final reporting node
719
+ def report_agent_outputs(state: GraphState):
720
+ return node_func_report_agent_outputs(
724
721
  state=state,
725
- code_snippet_key="data_visualization_function",
722
+ keys_to_include=[
723
+ "recommended_steps",
724
+ "data_visualization_function",
725
+ "data_visualization_function_path",
726
+ "data_visualization_function_name",
727
+ "data_visualization_error",
728
+ ],
726
729
  result_key="messages",
727
- error_key="data_visualization_error",
728
- llm=llm,
729
730
  role=AGENT_NAME,
730
- explanation_prompt_template="""
731
- Explain the data visualization steps that the data visualization agent performed in this function.
732
- Keep the summary succinct and to the point.\n\n# Data Visualization Agent:\n\n{code}
733
- """,
734
- success_prefix="# Data Visualization Agent:\n\n ",
735
- error_message="The Data Visualization Agent encountered an error during data visualization. No explanation could be provided."
731
+ custom_title="Data Visualization Agent Outputs"
736
732
  )
737
733
 
738
734
  # Define the graph
@@ -742,7 +738,7 @@ def make_data_visualization_agent(
742
738
  "chart_generator": chart_generator,
743
739
  "execute_data_visualization_code": execute_data_visualization_code,
744
740
  "fix_data_visualization_code": fix_data_visualization_code,
745
- "explain_data_visualization_code": explain_data_visualization_code
741
+ "report_agent_outputs": report_agent_outputs,
746
742
  }
747
743
 
748
744
  app = create_coding_agent_graph(
@@ -752,7 +748,7 @@ def make_data_visualization_agent(
752
748
  create_code_node_name="chart_generator",
753
749
  execute_code_node_name="execute_data_visualization_code",
754
750
  fix_code_node_name="fix_data_visualization_code",
755
- explain_code_node_name="explain_data_visualization_code",
751
+ explain_code_node_name="report_agent_outputs",
756
752
  error_key="data_visualization_error",
757
753
  human_in_the_loop=human_in_the_loop, # or False
758
754
  human_review_node_name="human_review",
@@ -7,6 +7,7 @@
7
7
  from typing import TypedDict, Annotated, Sequence, Literal, Union, Optional
8
8
  import operator
9
9
  import os
10
+ import json
10
11
  import pandas as pd
11
12
  from IPython.display import Markdown
12
13
 
@@ -19,12 +20,18 @@ from ai_data_science_team.templates import(
19
20
  node_func_execute_agent_code_on_data,
20
21
  node_func_human_review,
21
22
  node_func_fix_agent_code,
22
- node_func_explain_agent_code,
23
+ node_func_report_agent_outputs,
23
24
  create_coding_agent_graph,
24
25
  BaseAgent,
25
26
  )
26
27
  from ai_data_science_team.tools.parsers import PythonOutputParser
27
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
28
+ from ai_data_science_team.tools.regex import (
29
+ relocate_imports_inside_function,
30
+ add_comments_to_top,
31
+ format_agent_name,
32
+ format_recommended_steps,
33
+ get_generic_summary,
34
+ )
28
35
  from ai_data_science_team.tools.metadata import get_dataframe_summary
29
36
  from ai_data_science_team.tools.logging import log_ai_function
30
37
 
@@ -88,8 +95,8 @@ class DataWranglingAgent(BaseAgent):
88
95
  invoke_agent(user_instructions: str, data_raw: Union[dict, list], max_retries=3, retry_count=0)
89
96
  Synchronously wrangles the provided dataset(s) based on user instructions.
90
97
 
91
- explain_wrangling_steps()
92
- Returns an explanation of the wrangling steps performed by the agent.
98
+ get_workflow_summary()
99
+ Retrieves a summary of the agent's workflow.
93
100
 
94
101
  get_log_summary()
95
102
  Retrieves a summary of logged operations if logging is enabled.
@@ -287,40 +294,34 @@ class DataWranglingAgent(BaseAgent):
287
294
  self.response = response
288
295
  return None
289
296
 
290
- def explain_wrangling_steps(self):
297
+ def get_workflow_summary(self, markdown=False):
291
298
  """
292
- Provides an explanation of the wrangling steps performed by the agent.
293
-
294
- Returns
295
- -------
296
- str or list
297
- Explanation of the data wrangling steps.
299
+ Retrieves the agent's workflow summary, if logging is enabled.
298
300
  """
299
- if self.response:
300
- return self.response.get("messages", [])
301
- return []
301
+ if self.response and self.response.get("messages"):
302
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
303
+ if markdown:
304
+ return Markdown(summary)
305
+ else:
306
+ return summary
302
307
 
303
308
  def get_log_summary(self, markdown=False):
304
309
  """
305
310
  Logs a summary of the agent's operations, if logging is enabled.
311
+ """
312
+ if self.response:
313
+ if self.response.get('data_wrangler_function_path'):
314
+ log_details = f"""
315
+ ## Data Wrangling Agent Log Summary:
306
316
 
307
- Parameters
308
- ----------
309
- markdown : bool, optional
310
- If True, returns the summary in Markdown.
317
+ Function Path: {self.response.get('data_wrangler_function_path')}
311
318
 
312
- Returns
313
- -------
314
- str or None
315
- The log details, or None if not available.
316
- """
317
- if self.response and self.response.get("data_wrangler_function_path"):
318
- log_details = f"Log Path: {self.response.get('data_wrangler_function_path')}"
319
- if markdown:
320
- return Markdown(log_details)
321
- else:
322
- return log_details
323
- return None
319
+ Function Name: {self.response.get('data_wrangler_function_name')}
320
+ """
321
+ if markdown:
322
+ return Markdown(log_details)
323
+ else:
324
+ return log_details
324
325
 
325
326
  def get_data_wrangled(self) -> Optional[pd.DataFrame]:
326
327
  """
@@ -597,7 +598,7 @@ def make_data_wrangling_agent(
597
598
  Below are summaries of all datasets provided:
598
599
  {all_datasets_summary}
599
600
 
600
- Return your recommended steps as a numbered point list, explaining briefly why each step is needed.
601
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
601
602
 
602
603
  Avoid these:
603
604
  1. Do not include steps to save files.
@@ -797,20 +798,20 @@ def make_data_wrangling_agent(
797
798
  function_name=state.get("data_wrangler_function_name"),
798
799
  )
799
800
 
800
- def explain_data_wrangler_code(state: GraphState):
801
- return node_func_explain_agent_code(
801
+ # Final reporting node
802
+ def report_agent_outputs(state: GraphState):
803
+ return node_func_report_agent_outputs(
802
804
  state=state,
803
- code_snippet_key="data_wrangler_function",
805
+ keys_to_include=[
806
+ "recommended_steps",
807
+ "data_wrangler_function",
808
+ "data_wrangler_function_path",
809
+ "data_wrangler_function_name",
810
+ "data_wrangler_error",
811
+ ],
804
812
  result_key="messages",
805
- error_key="data_wrangler_error",
806
- llm=llm,
807
813
  role=AGENT_NAME,
808
- explanation_prompt_template="""
809
- Explain the data wrangling steps that the data wrangling agent performed in this function.
810
- Keep the summary succinct and to the point.\n\n# Data Wrangling Agent:\n\n{code}
811
- """,
812
- success_prefix="# Data Wrangling Agent:\n\n ",
813
- error_message="The Data Wrangling Agent encountered an error during data wrangling. Data could not be explained."
814
+ custom_title="Data Wrangling Agent Outputs"
814
815
  )
815
816
 
816
817
  # Define the graph
@@ -820,7 +821,7 @@ def make_data_wrangling_agent(
820
821
  "create_data_wrangler_code": create_data_wrangler_code,
821
822
  "execute_data_wrangler_code": execute_data_wrangler_code,
822
823
  "fix_data_wrangler_code": fix_data_wrangler_code,
823
- "explain_data_wrangler_code": explain_data_wrangler_code
824
+ "report_agent_outputs": report_agent_outputs,
824
825
  }
825
826
 
826
827
  app = create_coding_agent_graph(
@@ -830,7 +831,7 @@ def make_data_wrangling_agent(
830
831
  create_code_node_name="create_data_wrangler_code",
831
832
  execute_code_node_name="execute_data_wrangler_code",
832
833
  fix_code_node_name="fix_data_wrangler_code",
833
- explain_code_node_name="explain_data_wrangler_code",
834
+ explain_code_node_name="report_agent_outputs",
834
835
  error_key="data_wrangler_error",
835
836
  human_in_the_loop=human_in_the_loop,
836
837
  human_review_node_name="human_review",