PyPI - ai-data-science-team - Versions diffs - 0.0.0.9013__py3-none-any.whl → 0.0.0.9015__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9013py3-none-any.whl → 0.0.0.9015py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

ai_data_science_team/agents/data_wrangling_agent.py CHANGED Viewed

@@ -13,7 +13,7 @@ from IPython.display import Markdown
 from langchain.prompts import PromptTemplate
 from langchain_core.messages import BaseMessage
-from langgraph.types import Command
+from langgraph.types import Command, Checkpointer
 from langgraph.checkpoint.memory import MemorySaver
 from ai_data_science_team.templates import(
@@ -83,6 +83,8 @@ class DataWranglingAgent(BaseAgent):
         If True, skips the step that generates recommended data wrangling steps. Defaults to False.
     bypass_explain_code : bool, optional
         If True, skips the step that provides code explanations. Defaults to False.
+    checkpointer : Checkpointer, optional
+        A checkpointer object to save and load the agent's state. Defaults to None.
     Methods
     -------
@@ -180,7 +182,8 @@ class DataWranglingAgent(BaseAgent):
         overwrite=True,
         human_in_the_loop=False,
         bypass_recommended_steps=False,
-        bypass_explain_code=False
+        bypass_explain_code=False,
+        checkpointer=None,
     ):
         self._params = {
             "model": model,
@@ -192,7 +195,8 @@ class DataWranglingAgent(BaseAgent):
             "overwrite": overwrite,
             "human_in_the_loop": human_in_the_loop,
             "bypass_recommended_steps": bypass_recommended_steps,
-            "bypass_explain_code": bypass_explain_code
+            "bypass_explain_code": bypass_explain_code,
+            "checkpointer": checkpointer,
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
@@ -443,7 +447,8 @@ def make_data_wrangling_agent(
     overwrite=True,
     human_in_the_loop=False,
     bypass_recommended_steps=False,
-    bypass_explain_code=False
+    bypass_explain_code=False,
+    checkpointer=None,
 ):
     """
     Creates a data wrangling agent that can be run on one or more datasets. The agent can be
@@ -488,6 +493,8 @@ def make_data_wrangling_agent(
         Bypass the recommendation step, by default False
     bypass_explain_code : bool, optional
         Bypass the code explanation step, by default False.
+    checkpointer : Checkpointer, optional
+        A checkpointer object to save and load the agent's state. Defaults to None.
     Example
     -------
@@ -520,6 +527,11 @@ def make_data_wrangling_agent(
     """
     llm = model
+    if human_in_the_loop:
+        if checkpointer is None:
+            print("Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver().")
+            checkpointer = MemorySaver()
     # Human in th loop requires recommended steps
     if bypass_recommended_steps and human_in_the_loop:
         bypass_recommended_steps = False
@@ -569,7 +581,7 @@ def make_data_wrangling_agent(
         # Create a summary for all datasets
         # We'll include a short sample and info for each dataset
-        all_datasets_summary = get_dataframe_summary(dataframes, n_sample=n_samples)
+        all_datasets_summary = get_dataframe_summary(dataframes, n_sample=n_samples, skip_stats=True)
         # Join all datasets summaries into one big text block
         all_datasets_summary_str = "\n\n".join(all_datasets_summary)
@@ -642,7 +654,7 @@ def make_data_wrangling_agent(
             # Create a summary for all datasets
             # We'll include a short sample and info for each dataset
-            all_datasets_summary = get_dataframe_summary(dataframes, n_sample=n_samples)
+            all_datasets_summary = get_dataframe_summary(dataframes, n_sample=n_samples, skip_stats=True)
             # Join all datasets summaries into one big text block
             all_datasets_summary_str = "\n\n".join(all_datasets_summary)
@@ -654,9 +666,12 @@ def make_data_wrangling_agent(
         data_wrangling_prompt = PromptTemplate(
             template="""
-            You are a Data Wrangling Coding Agent. Your job is to create a {function_name}() function that can be run on the provided data.
+            You are a Pandas Data Wrangling Coding Agent. Your job is to create a {function_name}() function that can be run on the provided data. You should use Pandas and NumPy for data wrangling operations.
+            User instructions:
+            {user_instructions}
-            Follow these recommended steps:
+            Follow these recommended steps (if present):
             {recommended_steps}
             If multiple datasets are provided, you may need to merge or join them. Make sure to handle that scenario based on the recommended steps and user instructions.
@@ -685,17 +700,22 @@ def make_data_wrangling_agent(
             1. If the incoming data is not a list. Convert it to a list first.
             2. Do not specify data types inside the function arguments.
+            Important Notes:
+            1. Do Not use Print statements to display the data. Return the data frame instead with the data wrangling operation performed.
+            2. Do not plot graphs. Only return the data frame.
             Make sure to explain any non-trivial steps with inline comments. Follow user instructions. Comment code thoroughly.
             """,
-            input_variables=["recommended_steps", "all_datasets_summary", "function_name"]
+            input_variables=["recommended_steps", "user_instructions", "all_datasets_summary", "function_name"]
         )
         data_wrangling_agent = data_wrangling_prompt | llm | PythonOutputParser()
         response = data_wrangling_agent.invoke({
             "recommended_steps": state.get("recommended_steps"),
+            "user_instructions": state.get("user_instructions"),
             "all_datasets_summary": all_datasets_summary_str,
             "function_name": function_name
         })
@@ -835,9 +855,10 @@ def make_data_wrangling_agent(
         error_key="data_wrangler_error",
         human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",
-        checkpointer=MemorySaver() if human_in_the_loop else None,
+        checkpointer=checkpointer,
         bypass_recommended_steps=bypass_recommended_steps,
         bypass_explain_code=bypass_explain_code,
+        agent_name=AGENT_NAME,
     )
     return app

ai_data_science_team/agents/feature_engineering_agent.py CHANGED Viewed

@@ -10,7 +10,7 @@ import operator
 from langchain.prompts import PromptTemplate
 from langchain_core.messages import BaseMessage
-from langgraph.types import Command
+from langgraph.types import Command, Checkpointer
 from langgraph.checkpoint.memory import MemorySaver
 import os
@@ -84,6 +84,8 @@ class FeatureEngineeringAgent(BaseAgent):
         If True, skips the default recommended steps. Defaults to False.
     bypass_explain_code : bool, optional
         If True, skips the step that provides code explanations. Defaults to False.
+    checkpointer : Checkpointer, optional
+        Checkpointer to save and load the agent's state. Defaults to None.
     Methods
     -------
@@ -170,7 +172,8 @@ class FeatureEngineeringAgent(BaseAgent):
         overwrite=True,
         human_in_the_loop=False,
         bypass_recommended_steps=False,
-        bypass_explain_code=False
+        bypass_explain_code=False,
+        checkpointer=None,
     ):
         self._params = {
             "model": model,
@@ -182,7 +185,8 @@ class FeatureEngineeringAgent(BaseAgent):
             "overwrite": overwrite,
             "human_in_the_loop": human_in_the_loop,
             "bypass_recommended_steps": bypass_recommended_steps,
-            "bypass_explain_code": bypass_explain_code
+            "bypass_explain_code": bypass_explain_code,
+            "checkpointer": checkpointer,
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
@@ -400,6 +404,7 @@ def make_feature_engineering_agent(
     human_in_the_loop=False,
     bypass_recommended_steps=False,
     bypass_explain_code=False,
+    checkpointer=None,
 ):
     """
     Creates a feature engineering agent that can be run on a dataset. The agent applies various feature engineering
@@ -448,6 +453,8 @@ def make_feature_engineering_agent(
         Bypass the recommendation step, by default False
     bypass_explain_code : bool, optional
         Bypass the code explanation step, by default False.
+    checkpointer : Checkpointer, optional
+        Checkpointer to save and load the agent's state. Defaults to None.
     Examples
     -------
@@ -480,6 +487,11 @@ def make_feature_engineering_agent(
     """
     llm = model
+    if human_in_the_loop:
+        if checkpointer is None:
+            print("Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver().")
+            checkpointer = MemorySaver()
     # Human in th loop requires recommended steps
     if bypass_recommended_steps and human_in_the_loop:
         bypass_recommended_steps = False
@@ -782,9 +794,10 @@ def make_feature_engineering_agent(
         retry_count_key = "retry_count",
         human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",
-        checkpointer=MemorySaver(),
+        checkpointer=checkpointer,
         bypass_recommended_steps=bypass_recommended_steps,
         bypass_explain_code=bypass_explain_code,
+        agent_name=AGENT_NAME,
     )
     return app

ai_data_science_team/agents/sql_database_agent.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain.prompts import PromptTemplate
 from langchain_core.messages import BaseMessage
 from langchain_core.output_parsers import JsonOutputParser
-from langgraph.types import Command
+from langgraph.types import Command, Checkpointer
 from langgraph.checkpoint.memory import MemorySaver
 import os
@@ -75,6 +75,8 @@ class SQLDatabaseAgent(BaseAgent):
         If True, skips the step that generates recommended SQL steps. Defaults to False.
     bypass_explain_code : bool, optional
         If True, skips the step that provides code explanations. Defaults to False.
+    checkpointer : Checkpointer, optional
+        A checkpointer to save and load the agent's state. Defaults to None.
     smart_schema_pruning : bool, optional
         If True, filters the tables and columns based on the user instructions and recommended steps. Defaults to False.
@@ -157,6 +159,7 @@ class SQLDatabaseAgent(BaseAgent):
         human_in_the_loop=False,
         bypass_recommended_steps=False,
         bypass_explain_code=False,
+        checkpointer=None,
         smart_schema_pruning=False,
     ):
         self._params = {
@@ -171,6 +174,7 @@ class SQLDatabaseAgent(BaseAgent):
             "human_in_the_loop": human_in_the_loop,
             "bypass_recommended_steps": bypass_recommended_steps,
             "bypass_explain_code": bypass_explain_code,
+            "checkpointer": checkpointer,
             "smart_schema_pruning": smart_schema_pruning,
         }
         self._compiled_graph = self._make_compiled_graph()
@@ -365,6 +369,7 @@ def make_sql_database_agent(
     human_in_the_loop=False,
     bypass_recommended_steps=False,
     bypass_explain_code=False,
+    checkpointer=None,
     smart_schema_pruning=False,
 ):
     """
@@ -394,6 +399,8 @@ def make_sql_database_agent(
         Bypass the recommendation step, by default False
     bypass_explain_code : bool, optional
         Bypass the code explanation step, by default False.
+    checkpointer : Checkpointer, optional
+        A checkpointer to save and load the agent's state. Defaults to None.
     smart_schema_pruning : bool, optional
         If True, filters the tables and columns with an extra LLM step to reduce tokens for large databases. Increases processing time but can avoid errors due to hitting max token limits with large databases. Defaults to False.
@@ -432,6 +439,11 @@ def make_sql_database_agent(
     llm = model
+    if human_in_the_loop:
+        if checkpointer is None:
+            print("Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver().")
+            checkpointer = MemorySaver()
     # Human in th loop requires recommended steps
     if bypass_recommended_steps and human_in_the_loop:
         bypass_recommended_steps = False
@@ -742,9 +754,10 @@ def {function_name}(connection):
         error_key="sql_database_error",
         human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",
-        checkpointer=MemorySaver() if human_in_the_loop else None,
+        checkpointer=checkpointer,
         bypass_recommended_steps=bypass_recommended_steps,
         bypass_explain_code=bypass_explain_code,
+        agent_name=AGENT_NAME,
     )
     return app

ai_data_science_team/ds_agents/eda_tools_agent.py CHANGED Viewed

@@ -1,12 +1,8 @@
-from typing import Any, Optional, Annotated, Sequence, List, Dict, Tuple
+from typing import Any, Optional, Annotated, Sequence, Dict
 import operator
 import pandas as pd
-import os
-from io import StringIO, BytesIO
-import base64
-import matplotlib.pyplot as plt
 from IPython.display import Markdown
@@ -14,6 +10,7 @@ from langchain_core.messages import BaseMessage, AIMessage
 from langgraph.prebuilt import create_react_agent, ToolNode
 from langgraph.prebuilt.chat_agent_executor import AgentState
 from langgraph.graph import START, END, StateGraph
+from langgraph.types import Checkpointer
 from ai_data_science_team.templates import BaseAgent
 from ai_data_science_team.utils.regex import format_agent_name
@@ -52,6 +49,8 @@ class EDAToolsAgent(BaseAgent):
         Additional kwargs for create_react_agent.
     invoke_react_agent_kwargs : dict
         Additional kwargs for agent invocation.
+    checkpointer : Checkpointer, optional
+        The checkpointer for the agent.
     """
     def __init__(
@@ -59,11 +58,13 @@ class EDAToolsAgent(BaseAgent):
         model: Any,
         create_react_agent_kwargs: Optional[Dict] = {},
         invoke_react_agent_kwargs: Optional[Dict] = {},
+        checkpointer: Optional[Checkpointer] = None,
     ):
         self._params = {
             "model": model,
             "create_react_agent_kwargs": create_react_agent_kwargs,
             "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
+            "checkpointer": checkpointer
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
@@ -176,6 +177,7 @@ def make_eda_tools_agent(
     model: Any,
     create_react_agent_kwargs: Optional[Dict] = {},
     invoke_react_agent_kwargs: Optional[Dict] = {},
+    checkpointer: Optional[Checkpointer] = None,
 ):
     """
     Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
@@ -188,6 +190,8 @@ def make_eda_tools_agent(
         Additional kwargs for create_react_agent.
     invoke_react_agent_kwargs : dict
         Additional kwargs for agent invocation.
+    checkpointer : Checkpointer, optional
+        The checkpointer for the agent.
     Returns:
     -------
@@ -215,6 +219,7 @@ def make_eda_tools_agent(
             tools=tool_node,
             state_schema=GraphState,
             **create_react_agent_kwargs,
+            checkpointer=checkpointer,
         )
         response = eda_agent.invoke(
@@ -254,5 +259,9 @@ def make_eda_tools_agent(
     workflow.add_edge(START, "exploratory_agent")
     workflow.add_edge("exploratory_agent", END)
-    app = workflow.compile()
+    app = workflow.compile(
+        checkpointer=checkpointer,
+        name=AGENT_NAME,
+    )
     return app

ai_data_science_team/ml_agents/h2o_ml_agent.py CHANGED Viewed

@@ -5,7 +5,7 @@
 import os
 import json
-from typing import TypedDict, Annotated, Sequence, Literal
+from typing import TypedDict, Annotated, Sequence, Literal, Optional
 import operator
 import pandas as pd
@@ -14,7 +14,7 @@ from IPython.display import Markdown
 from langchain.prompts import PromptTemplate
 from langchain_core.messages import BaseMessage
-from langgraph.types import Command
+from langgraph.types import Command, Checkpointer
 from langgraph.checkpoint.memory import MemorySaver
 from ai_data_science_team.templates import(
@@ -79,6 +79,8 @@ class H2OMLAgent(BaseAgent):
         Name of the MLflow experiment (created if doesn't exist).
     mlflow_run_name : str, default None
         A custom name for the MLflow run.
+    checkpointer : langgraph.checkpoint.memory.MemorySaver, optional
+        A checkpointer object for saving the agent's state. Defaults to None.
     Methods
@@ -176,6 +178,7 @@ class H2OMLAgent(BaseAgent):
         mlflow_tracking_uri=None,
         mlflow_experiment_name="H2O AutoML",
         mlflow_run_name=None,
+        checkpointer: Optional[Checkpointer]=None,
     ):
         self._params = {
             "model": model,
@@ -193,6 +196,7 @@ class H2OMLAgent(BaseAgent):
             "mlflow_tracking_uri": mlflow_tracking_uri,
             "mlflow_experiment_name": mlflow_experiment_name,
             "mlflow_run_name": mlflow_run_name,
+            "checkpointer": checkpointer,
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
@@ -350,6 +354,7 @@ def make_h2o_ml_agent(
     mlflow_tracking_uri=None,
     mlflow_experiment_name="H2O AutoML",
     mlflow_run_name=None,
+    checkpointer=None,
 ):
     """
     Creates a machine learning agent that uses H2O for AutoML.
@@ -384,6 +389,12 @@ def make_h2o_ml_agent(
             "    pip install h2o\n\n"
             "Visit https://docs.h2o.ai/h2o/latest-stable/h2o-docs/downloading.html for details."
         ) from e
+    if human_in_the_loop:
+        if checkpointer is None:
+            print("Human in the loop is enabled. A checkpointer is required. Setting to MemorySaver().")
+            checkpointer = MemorySaver()
     # Define GraphState
     class GraphState(TypedDict):
@@ -844,9 +855,10 @@ def make_h2o_ml_agent(
         retry_count_key="retry_count",
         human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",
-        checkpointer=MemorySaver(),
+        checkpointer=checkpointer,
         bypass_recommended_steps=bypass_recommended_steps,
         bypass_explain_code=bypass_explain_code,
+        agent_name=AGENT_NAME,
     )
     return app

ai_data_science_team/ml_agents/mlflow_tools_agent.py CHANGED Viewed

@@ -10,6 +10,7 @@ from langchain_core.messages import BaseMessage, AIMessage
 from langgraph.prebuilt import create_react_agent, ToolNode
 from langgraph.prebuilt.chat_agent_executor import AgentState
+from langgraph.types import Checkpointer
 from langgraph.graph import START, END, StateGraph
 from ai_data_science_team.templates import BaseAgent
@@ -68,6 +69,8 @@ class MLflowToolsAgent(BaseAgent):
         Additional keyword arguments to pass to the create_react_agent function.
     invoke_react_agent_kwargs : dict
         Additional keyword arguments to pass to the invoke method of the react agent.
+    checkpointer : langchain.checkpointing.Checkpointer, optional
+        A checkpointer to use for saving and loading the agent's state. Defaults to None.
     Methods:
     --------
@@ -119,6 +122,7 @@ class MLflowToolsAgent(BaseAgent):
         mlflow_registry_uri: Optional[str]=None,
         create_react_agent_kwargs: Optional[Dict]={},
         invoke_react_agent_kwargs: Optional[Dict]={},
+        checkpointer: Optional[Checkpointer]=None,
     ):
         self._params = {
             "model": model,
@@ -126,6 +130,7 @@ class MLflowToolsAgent(BaseAgent):
             "mlflow_registry_uri": mlflow_registry_uri,
             "create_react_agent_kwargs": create_react_agent_kwargs,
             "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
+            "checkpointer": checkpointer,
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
@@ -245,6 +250,7 @@ def make_mlflow_tools_agent(
     mlflow_registry_uri: str=None,
     create_react_agent_kwargs: Optional[Dict]={},
     invoke_react_agent_kwargs: Optional[Dict]={},
+    checkpointer: Optional[Checkpointer]=None,
 ):
     """
     MLflow Tool Calling Agent
@@ -261,6 +267,8 @@ def make_mlflow_tools_agent(
         Additional keyword arguments to pass to the agent's create_react_agent method.
     invoke_react_agent_kwargs : dict, optional
         Additional keyword arguments to pass to the agent's invoke method.
+    checkpointer : langchain.checkpointing.Checkpointer, optional
+        A checkpointer to use for saving and loading the agent's state. Defaults to None.
     Returns
     -------
@@ -303,6 +311,7 @@ def make_mlflow_tools_agent(
             model,
             tools=tool_node,
             state_schema=GraphState,
+            checkpointer=checkpointer,
             **create_react_agent_kwargs,
         )
@@ -354,7 +363,10 @@ def make_mlflow_tools_agent(
     workflow.add_edge(START, "mlflow_tools_agent")
     workflow.add_edge("mlflow_tools_agent", END)
-    app = workflow.compile()
+    app = workflow.compile(
+        checkpointer=checkpointer,
+        name=AGENT_NAME,
+    )
     return app

ai_data_science_team/multiagents/__init__.py CHANGED Viewed

@@ -1 +1,2 @@
-from ai_data_science_team.multiagents.sql_data_analyst import SQLDataAnalyst, make_sql_data_analyst
+from ai_data_science_team.multiagents.sql_data_analyst import SQLDataAnalyst, make_sql_data_analyst
+from ai_data_science_team.multiagents.pandas_data_analyst import PandasDataAnalyst, make_pandas_data_analyst

ai-data-science-team 0.0.0.9013__py3-none-any.whl → 0.0.0.9015__py3-none-any.whl

ai-data-science-team 0.0.0.9013py3-none-any.whl → 0.0.0.9015py3-none-any.whl