ai-data-science-team 0.0.0.9008__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9008"
1
+ __version__ = "0.0.0.9009"
@@ -3,4 +3,3 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
-
@@ -14,7 +14,7 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
17
+ import json
18
18
  import pandas as pd
19
19
 
20
20
  from IPython.display import Markdown
@@ -23,12 +23,18 @@ from ai_data_science_team.templates import(
23
23
  node_func_execute_agent_code_on_data,
24
24
  node_func_human_review,
25
25
  node_func_fix_agent_code,
26
- node_func_explain_agent_code,
26
+ node_func_report_agent_outputs,
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
30
  from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
31
+ from ai_data_science_team.tools.regex import (
32
+ relocate_imports_inside_function,
33
+ add_comments_to_top,
34
+ format_agent_name,
35
+ format_recommended_steps,
36
+ get_generic_summary,
37
+ )
32
38
  from ai_data_science_team.tools.metadata import get_dataframe_summary
33
39
  from ai_data_science_team.tools.logging import log_ai_function
34
40
 
@@ -37,7 +43,6 @@ AGENT_NAME = "data_cleaning_agent"
37
43
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
38
44
 
39
45
 
40
-
41
46
  # Class
42
47
  class DataCleaningAgent(BaseAgent):
43
48
  """
@@ -89,8 +94,8 @@ class DataCleaningAgent(BaseAgent):
89
94
  Cleans the provided dataset asynchronously based on user instructions.
90
95
  invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
91
96
  Cleans the provided dataset synchronously based on user instructions.
92
- explain_cleaning_steps()
93
- Returns an explanation of the cleaning steps performed by the agent.
97
+ get_workflow_summary()
98
+ Retrieves a summary of the agent's workflow.
94
99
  get_log_summary()
95
100
  Retrieves a summary of logged operations if logging is enabled.
96
101
  get_state_keys()
@@ -178,7 +183,6 @@ class DataCleaningAgent(BaseAgent):
178
183
  self.response=None
179
184
  return make_data_cleaning_agent(**self._params)
180
185
 
181
-
182
186
  def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
183
187
  """
184
188
  Asynchronously invokes the agent. The response is stored in the response attribute.
@@ -239,15 +243,16 @@ class DataCleaningAgent(BaseAgent):
239
243
  self.response = response
240
244
  return None
241
245
 
242
- def explain_cleaning_steps(self):
246
+ def get_workflow_summary(self, markdown=False):
243
247
  """
244
- Provides an explanation of the cleaning steps performed by the agent.
245
-
246
- Returns:
247
- str: Explanation of the cleaning steps.
248
+ Retrieves the agent's workflow summary, if logging is enabled.
248
249
  """
249
- messages = self.response.get("messages", [])
250
- return messages
250
+ if self.response and self.response.get("messages"):
251
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
252
+ if markdown:
253
+ return Markdown(summary)
254
+ else:
255
+ return summary
251
256
 
252
257
  def get_log_summary(self, markdown=False):
253
258
  """
@@ -255,7 +260,13 @@ class DataCleaningAgent(BaseAgent):
255
260
  """
256
261
  if self.response:
257
262
  if self.response.get('data_cleaner_function_path'):
258
- log_details = f"Log Path: {self.response.get('data_cleaner_function_path')}"
263
+ log_details = f"""
264
+ ## Data Cleaning Agent Log Summary:
265
+
266
+ Function Path: {self.response.get('data_cleaner_function_path')}
267
+
268
+ Function Name: {self.response.get('data_cleaner_function_name')}
269
+ """
259
270
  if markdown:
260
271
  return Markdown(log_details)
261
272
  else:
@@ -462,7 +473,7 @@ def make_data_cleaning_agent(
462
473
  Below are summaries of all datasets provided:
463
474
  {all_datasets_summary}
464
475
 
465
- Return the steps as a bullet point list (no code, just the steps).
476
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
466
477
 
467
478
  Avoid these:
468
479
  1. Do not include steps to save files.
@@ -633,32 +644,31 @@ def make_data_cleaning_agent(
633
644
  function_name=state.get("data_cleaner_function_name"),
634
645
  )
635
646
 
636
- def explain_data_cleaner_code(state: GraphState):
637
- return node_func_explain_agent_code(
647
+ # Final reporting node
648
+ def report_agent_outputs(state: GraphState):
649
+ return node_func_report_agent_outputs(
638
650
  state=state,
639
- code_snippet_key="data_cleaner_function",
651
+ keys_to_include=[
652
+ "recommended_steps",
653
+ "data_cleaner_function",
654
+ "data_cleaner_function_path",
655
+ "data_cleaner_function_name",
656
+ "data_cleaner_error",
657
+ ],
640
658
  result_key="messages",
641
- error_key="data_cleaner_error",
642
- llm=llm,
643
659
  role=AGENT_NAME,
644
- explanation_prompt_template="""
645
- Explain the data cleaning steps that the data cleaning agent performed in this function.
646
- Keep the summary succinct and to the point.\n\n# Data Cleaning Agent:\n\n{code}
647
- """,
648
- success_prefix="# Data Cleaning Agent:\n\n ",
649
- error_message="The Data Cleaning Agent encountered an error during data cleaning. Data could not be explained."
660
+ custom_title="Data Cleaning Agent Outputs"
650
661
  )
651
-
652
- # Define the graph
662
+
653
663
  node_functions = {
654
664
  "recommend_cleaning_steps": recommend_cleaning_steps,
655
665
  "human_review": human_review,
656
666
  "create_data_cleaner_code": create_data_cleaner_code,
657
667
  "execute_data_cleaner_code": execute_data_cleaner_code,
658
668
  "fix_data_cleaner_code": fix_data_cleaner_code,
659
- "explain_data_cleaner_code": explain_data_cleaner_code
669
+ "report_agent_outputs": report_agent_outputs,
660
670
  }
661
-
671
+
662
672
  app = create_coding_agent_graph(
663
673
  GraphState=GraphState,
664
674
  node_functions=node_functions,
@@ -666,16 +676,17 @@ def make_data_cleaning_agent(
666
676
  create_code_node_name="create_data_cleaner_code",
667
677
  execute_code_node_name="execute_data_cleaner_code",
668
678
  fix_code_node_name="fix_data_cleaner_code",
669
- explain_code_node_name="explain_data_cleaner_code",
679
+ explain_code_node_name="report_agent_outputs",
670
680
  error_key="data_cleaner_error",
671
- human_in_the_loop=human_in_the_loop, # or False
681
+ human_in_the_loop=human_in_the_loop,
672
682
  human_review_node_name="human_review",
673
683
  checkpointer=MemorySaver() if human_in_the_loop else None,
674
684
  bypass_recommended_steps=bypass_recommended_steps,
675
685
  bypass_explain_code=bypass_explain_code,
676
686
  )
677
-
687
+
678
688
  return app
689
+
679
690
 
680
691
 
681
692
 
@@ -17,6 +17,7 @@ from langgraph.types import Command
17
17
  from langgraph.checkpoint.memory import MemorySaver
18
18
 
19
19
  import os
20
+ import json
20
21
  import pandas as pd
21
22
 
22
23
  from IPython.display import Markdown
@@ -25,7 +26,7 @@ from ai_data_science_team.templates import(
25
26
  node_func_execute_agent_code_on_data,
26
27
  node_func_human_review,
27
28
  node_func_fix_agent_code,
28
- node_func_explain_agent_code,
29
+ node_func_report_agent_outputs,
29
30
  create_coding_agent_graph,
30
31
  BaseAgent,
31
32
  )
@@ -34,7 +35,8 @@ from ai_data_science_team.tools.regex import (
34
35
  relocate_imports_inside_function,
35
36
  add_comments_to_top,
36
37
  format_agent_name,
37
- format_recommended_steps
38
+ format_recommended_steps,
39
+ get_generic_summary,
38
40
  )
39
41
  from ai_data_science_team.tools.metadata import get_dataframe_summary
40
42
  from ai_data_science_team.tools.logging import log_ai_function
@@ -93,8 +95,8 @@ class DataVisualizationAgent(BaseAgent):
93
95
  Asynchronously generates a visualization based on user instructions.
94
96
  invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
95
97
  Synchronously generates a visualization based on user instructions.
96
- explain_visualization_steps()
97
- Returns an explanation of the visualization steps performed by the agent.
98
+ get_workflow_summary()
99
+ Retrieves a summary of the agent's workflow.
98
100
  get_log_summary()
99
101
  Retrieves a summary of logged operations if logging is enabled.
100
102
  get_plotly_graph()
@@ -257,40 +259,34 @@ class DataVisualizationAgent(BaseAgent):
257
259
  self.response = response
258
260
  return None
259
261
 
260
- def explain_visualization_steps(self):
262
+ def get_workflow_summary(self, markdown=False):
261
263
  """
262
- Provides an explanation of the visualization steps performed by the agent.
263
-
264
- Returns
265
- -------
266
- str
267
- Explanation of the visualization steps, if any are available.
264
+ Retrieves the agent's workflow summary, if logging is enabled.
268
265
  """
269
- if self.response:
270
- return self.response.get("messages", [])
271
- return []
266
+ if self.response and self.response.get("messages"):
267
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
268
+ if markdown:
269
+ return Markdown(summary)
270
+ else:
271
+ return summary
272
272
 
273
273
  def get_log_summary(self, markdown=False):
274
274
  """
275
275
  Logs a summary of the agent's operations, if logging is enabled.
276
+ """
277
+ if self.response:
278
+ if self.response.get('data_visualization_function_path'):
279
+ log_details = f"""
280
+ ## Data Visualization Agent Log Summary:
276
281
 
277
- Parameters
278
- ----------
279
- markdown : bool, optional
280
- If True, returns Markdown-formatted output.
282
+ Function Path: {self.response.get('data_visualization_function_path')}
281
283
 
282
- Returns
283
- -------
284
- str or None
285
- Summary of logs or None if no logs are available.
286
- """
287
- if self.response and self.response.get('data_visualization_function_path'):
288
- log_details = f"Log Path: {self.response.get('data_visualization_function_path')}"
289
- if markdown:
290
- return Markdown(log_details)
291
- else:
292
- return log_details
293
- return None
284
+ Function Name: {self.response.get('data_visualization_function_name')}
285
+ """
286
+ if markdown:
287
+ return Markdown(log_details)
288
+ else:
289
+ return log_details
294
290
 
295
291
  def get_plotly_graph(self):
296
292
  """
@@ -719,20 +715,20 @@ def make_data_visualization_agent(
719
715
  function_name=state.get("data_visualization_function_name"),
720
716
  )
721
717
 
722
- def explain_data_visualization_code(state: GraphState):
723
- return node_func_explain_agent_code(
718
+ # Final reporting node
719
+ def report_agent_outputs(state: GraphState):
720
+ return node_func_report_agent_outputs(
724
721
  state=state,
725
- code_snippet_key="data_visualization_function",
722
+ keys_to_include=[
723
+ "recommended_steps",
724
+ "data_visualization_function",
725
+ "data_visualization_function_path",
726
+ "data_visualization_function_name",
727
+ "data_visualization_error",
728
+ ],
726
729
  result_key="messages",
727
- error_key="data_visualization_error",
728
- llm=llm,
729
730
  role=AGENT_NAME,
730
- explanation_prompt_template="""
731
- Explain the data visualization steps that the data visualization agent performed in this function.
732
- Keep the summary succinct and to the point.\n\n# Data Visualization Agent:\n\n{code}
733
- """,
734
- success_prefix="# Data Visualization Agent:\n\n ",
735
- error_message="The Data Visualization Agent encountered an error during data visualization. No explanation could be provided."
731
+ custom_title="Data Visualization Agent Outputs"
736
732
  )
737
733
 
738
734
  # Define the graph
@@ -742,7 +738,7 @@ def make_data_visualization_agent(
742
738
  "chart_generator": chart_generator,
743
739
  "execute_data_visualization_code": execute_data_visualization_code,
744
740
  "fix_data_visualization_code": fix_data_visualization_code,
745
- "explain_data_visualization_code": explain_data_visualization_code
741
+ "report_agent_outputs": report_agent_outputs,
746
742
  }
747
743
 
748
744
  app = create_coding_agent_graph(
@@ -752,7 +748,7 @@ def make_data_visualization_agent(
752
748
  create_code_node_name="chart_generator",
753
749
  execute_code_node_name="execute_data_visualization_code",
754
750
  fix_code_node_name="fix_data_visualization_code",
755
- explain_code_node_name="explain_data_visualization_code",
751
+ explain_code_node_name="report_agent_outputs",
756
752
  error_key="data_visualization_error",
757
753
  human_in_the_loop=human_in_the_loop, # or False
758
754
  human_review_node_name="human_review",
@@ -7,6 +7,7 @@
7
7
  from typing import TypedDict, Annotated, Sequence, Literal, Union, Optional
8
8
  import operator
9
9
  import os
10
+ import json
10
11
  import pandas as pd
11
12
  from IPython.display import Markdown
12
13
 
@@ -19,12 +20,18 @@ from ai_data_science_team.templates import(
19
20
  node_func_execute_agent_code_on_data,
20
21
  node_func_human_review,
21
22
  node_func_fix_agent_code,
22
- node_func_explain_agent_code,
23
+ node_func_report_agent_outputs,
23
24
  create_coding_agent_graph,
24
25
  BaseAgent,
25
26
  )
26
27
  from ai_data_science_team.tools.parsers import PythonOutputParser
27
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
28
+ from ai_data_science_team.tools.regex import (
29
+ relocate_imports_inside_function,
30
+ add_comments_to_top,
31
+ format_agent_name,
32
+ format_recommended_steps,
33
+ get_generic_summary,
34
+ )
28
35
  from ai_data_science_team.tools.metadata import get_dataframe_summary
29
36
  from ai_data_science_team.tools.logging import log_ai_function
30
37
 
@@ -88,8 +95,8 @@ class DataWranglingAgent(BaseAgent):
88
95
  invoke_agent(user_instructions: str, data_raw: Union[dict, list], max_retries=3, retry_count=0)
89
96
  Synchronously wrangles the provided dataset(s) based on user instructions.
90
97
 
91
- explain_wrangling_steps()
92
- Returns an explanation of the wrangling steps performed by the agent.
98
+ get_workflow_summary()
99
+ Retrieves a summary of the agent's workflow.
93
100
 
94
101
  get_log_summary()
95
102
  Retrieves a summary of logged operations if logging is enabled.
@@ -287,40 +294,34 @@ class DataWranglingAgent(BaseAgent):
287
294
  self.response = response
288
295
  return None
289
296
 
290
- def explain_wrangling_steps(self):
297
+ def get_workflow_summary(self, markdown=False):
291
298
  """
292
- Provides an explanation of the wrangling steps performed by the agent.
293
-
294
- Returns
295
- -------
296
- str or list
297
- Explanation of the data wrangling steps.
299
+ Retrieves the agent's workflow summary, if logging is enabled.
298
300
  """
299
- if self.response:
300
- return self.response.get("messages", [])
301
- return []
301
+ if self.response and self.response.get("messages"):
302
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
303
+ if markdown:
304
+ return Markdown(summary)
305
+ else:
306
+ return summary
302
307
 
303
308
  def get_log_summary(self, markdown=False):
304
309
  """
305
310
  Logs a summary of the agent's operations, if logging is enabled.
311
+ """
312
+ if self.response:
313
+ if self.response.get('data_wrangler_function_path'):
314
+ log_details = f"""
315
+ ## Data Wrangling Agent Log Summary:
306
316
 
307
- Parameters
308
- ----------
309
- markdown : bool, optional
310
- If True, returns the summary in Markdown.
317
+ Function Path: {self.response.get('data_wrangler_function_path')}
311
318
 
312
- Returns
313
- -------
314
- str or None
315
- The log details, or None if not available.
316
- """
317
- if self.response and self.response.get("data_wrangler_function_path"):
318
- log_details = f"Log Path: {self.response.get('data_wrangler_function_path')}"
319
- if markdown:
320
- return Markdown(log_details)
321
- else:
322
- return log_details
323
- return None
319
+ Function Name: {self.response.get('data_wrangler_function_name')}
320
+ """
321
+ if markdown:
322
+ return Markdown(log_details)
323
+ else:
324
+ return log_details
324
325
 
325
326
  def get_data_wrangled(self) -> Optional[pd.DataFrame]:
326
327
  """
@@ -597,7 +598,7 @@ def make_data_wrangling_agent(
597
598
  Below are summaries of all datasets provided:
598
599
  {all_datasets_summary}
599
600
 
600
- Return your recommended steps as a numbered point list, explaining briefly why each step is needed.
601
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
601
602
 
602
603
  Avoid these:
603
604
  1. Do not include steps to save files.
@@ -797,20 +798,20 @@ def make_data_wrangling_agent(
797
798
  function_name=state.get("data_wrangler_function_name"),
798
799
  )
799
800
 
800
- def explain_data_wrangler_code(state: GraphState):
801
- return node_func_explain_agent_code(
801
+ # Final reporting node
802
+ def report_agent_outputs(state: GraphState):
803
+ return node_func_report_agent_outputs(
802
804
  state=state,
803
- code_snippet_key="data_wrangler_function",
805
+ keys_to_include=[
806
+ "recommended_steps",
807
+ "data_wrangler_function",
808
+ "data_wrangler_function_path",
809
+ "data_wrangler_function_name",
810
+ "data_wrangler_error",
811
+ ],
804
812
  result_key="messages",
805
- error_key="data_wrangler_error",
806
- llm=llm,
807
813
  role=AGENT_NAME,
808
- explanation_prompt_template="""
809
- Explain the data wrangling steps that the data wrangling agent performed in this function.
810
- Keep the summary succinct and to the point.\n\n# Data Wrangling Agent:\n\n{code}
811
- """,
812
- success_prefix="# Data Wrangling Agent:\n\n ",
813
- error_message="The Data Wrangling Agent encountered an error during data wrangling. Data could not be explained."
814
+ custom_title="Data Wrangling Agent Outputs"
814
815
  )
815
816
 
816
817
  # Define the graph
@@ -820,7 +821,7 @@ def make_data_wrangling_agent(
820
821
  "create_data_wrangler_code": create_data_wrangler_code,
821
822
  "execute_data_wrangler_code": execute_data_wrangler_code,
822
823
  "fix_data_wrangler_code": fix_data_wrangler_code,
823
- "explain_data_wrangler_code": explain_data_wrangler_code
824
+ "report_agent_outputs": report_agent_outputs,
824
825
  }
825
826
 
826
827
  app = create_coding_agent_graph(
@@ -830,7 +831,7 @@ def make_data_wrangling_agent(
830
831
  create_code_node_name="create_data_wrangler_code",
831
832
  execute_code_node_name="execute_data_wrangler_code",
832
833
  fix_code_node_name="fix_data_wrangler_code",
833
- explain_code_node_name="explain_data_wrangler_code",
834
+ explain_code_node_name="report_agent_outputs",
834
835
  error_key="data_wrangler_error",
835
836
  human_in_the_loop=human_in_the_loop,
836
837
  human_review_node_name="human_review",