PyPI - ai-data-science-team - Versions diffs - 0.0.0.9010__py3-none-any.whl → 0.0.0.9012__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9010py3-none-any.whl → 0.0.0.9012py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ai_data_science_team/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.0.~~9010~~"
1	+ __version__ = "0.0.0.9012"

ai_data_science_team/agents/__init__.py CHANGED Viewed

@@ -3,3 +3,4 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
 from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
 from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
 from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
+from ai_data_science_team.agents.data_loader_tools_agent import make_data_loader_tools_agent, DataLoaderToolsAgent

ai_data_science_team/agents/data_loader_tools_agent.py CHANGED Viewed

@@ -37,11 +37,150 @@ tools = [
     search_files_by_pattern,
 ]
+class DataLoaderToolsAgent(BaseAgent):
+    """
+    A Data Loader Agent that can interact with data loading tools and search for files in your file system.
+    Parameters:
+    ----------
+    model : langchain.llms.base.LLM
+        The language model used to generate the tool calling agent.
+    react_agent_kwargs : dict
+        Additional keyword arguments to pass to the create_react_agent function.
+    invoke_react_agent_kwargs : dict
+        Additional keyword arguments to pass to the invoke method of the react agent.
+    Methods:
+    --------
+    update_params(**kwargs)
+        Updates the agent's parameters and rebuilds the compiled graph.
+    ainvoke_agent(user_instructions: str=None, **kwargs)
+        Runs the agent with the given user instructions asynchronously.
+    invoke_agent(user_instructions: str=None, **kwargs)
+        Runs the agent with the given user instructions.
+    get_internal_messages(markdown: bool=False)
+        Returns the internal messages from the agent's response.
+    get_artifacts(as_dataframe: bool=False)
+        Returns the MLflow artifacts from the agent's response.
+    get_ai_message(markdown: bool=False)
+        Returns the AI message from the agent's response.
+    """
+    def __init__(
+        self,
+        model: Any,
+        create_react_agent_kwargs: Optional[Dict]={},
+        invoke_react_agent_kwargs: Optional[Dict]={},
+    ):
+        self._params = {
+            "model": model,
+            "create_react_agent_kwargs": create_react_agent_kwargs,
+            "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
+        }
+        self._compiled_graph = self._make_compiled_graph()
+        self.response = None
+    def _make_compiled_graph(self):
+        """
+        Creates the compiled graph for the agent.
+        """
+        self.response = None
+        return make_data_loader_tools_agent(**self._params)
+    def update_params(self, **kwargs):
+        """
+        Updates the agent's parameters and rebuilds the compiled graph.
+        """
+        for k, v in kwargs.items():
+            self._params[k] = v
+        self._compiled_graph = self._make_compiled_graph()
+    async def ainvoke_agent(
+        self,
+        user_instructions: str=None,
+        **kwargs
+    ):
+        """
+        Runs the agent with the given user instructions.
+        Parameters:
+        ----------
+        user_instructions : str, optional
+            The user instructions to pass to the agent.
+        kwargs : dict, optional
+            Additional keyword arguments to pass to the agents ainvoke method.
+        """
+        response = await self._compiled_graph.ainvoke(
+            {
+                "user_instructions": user_instructions,
+            },
+            **kwargs
+        )
+        self.response = response
+        return None
+    def invoke_agent(
+        self,
+        user_instructions: str=None,
+        **kwargs
+    ):
+        """
+        Runs the agent with the given user instructions.
+        Parameters:
+        ----------
+        user_instructions : str, optional
+            The user instructions to pass to the agent.
+        kwargs : dict, optional
+            Additional keyword arguments to pass to the agents invoke method.
+        """
+        response = self._compiled_graph.invoke(
+            {
+                "user_instructions": user_instructions,
+            },
+            **kwargs
+        )
+        self.response = response
+        return None
+    def get_internal_messages(self, markdown: bool=False):
+        """
+        Returns the internal messages from the agent's response.
+        """
+        pretty_print = "\n\n".join([f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}" for msg in self.response["internal_messages"]])
+        if markdown:
+            return Markdown(pretty_print)
+        else:
+            return self.response["internal_messages"]
+    def get_artifacts(self, as_dataframe: bool=False):
+        """
+        Returns the MLflow artifacts from the agent's response.
+        """
+        if as_dataframe:
+            return pd.DataFrame(self.response["data_loader_artifacts"])
+        else:
+            return self.response["data_loader_artifacts"]
+    def get_ai_message(self, markdown: bool=False):
+        """
+        Returns the AI message from the agent's response.
+        """
+        if markdown:
+            return Markdown(self.response["messages"][0].content)
+        else:
+            return self.response["messages"][0].content
 def make_data_loader_tools_agent(
     model: Any,
-    directory: Optional[str] = os.getcwd(),
+    create_react_agent_kwargs: Optional[Dict]={},
+    invoke_react_agent_kwargs: Optional[Dict]={},
 ):
     """
     Creates a Data Loader Agent that can interact with data loading tools.
@@ -50,20 +189,84 @@ def make_data_loader_tools_agent(
     ----------
     model : langchain.llms.base.LLM
         The language model used to generate the tool calling agent.
-    directory : str, optional
-        The directory to search for files. Defaults to the current working directory.
+    react_agent_kwargs : dict
+        Additional keyword arguments to pass to the create_react_agent function.
+    invoke_react_agent_kwargs : dict
+        Additional keyword arguments to pass to the invoke method of the react agent.
     Returns:
     --------
-    Data Loader Agent
+    app : langchain.graphs.CompiledStateGraph
         An agent that can interact with data loading tools.
     """
     class GraphState(AgentState):
         internal_messages: Annotated[Sequence[BaseMessage], operator.add]
-        directory: str
         user_instructions: str
-        data_artifacts: dict
+        data_loader_artifacts: dict
+    def data_loader_agent(state):
+        print(format_agent_name(AGENT_NAME))
+        print("    ")
+        print("    * RUN REACT TOOL-CALLING AGENT")
+        tool_node = ToolNode(
+            tools=tools
+        )
+        data_loader_agent = create_react_agent(
+            model,
+            tools=tool_node,
+            state_schema=GraphState,
+            **create_react_agent_kwargs,
+        )
+        response = data_loader_agent.invoke(
+            {
+                "messages": [("user", state["user_instructions"])],
+            },
+            invoke_react_agent_kwargs,
+        )
+        print("    * POST-PROCESS RESULTS")
+        internal_messages = response['messages']
+        # Ensure there is at least one AI message
+        if not internal_messages:
+            return {
+                "internal_messages": [],
+                "mlflow_artifacts": None,
+            }
+        # Get the last AI message
+        last_ai_message = AIMessage(internal_messages[-1].content, role = AGENT_NAME)
+        # Get the last tool artifact safely
+        last_tool_artifact = None
+        if len(internal_messages) > 1:
+            last_message = internal_messages[-2]  # Get second-to-last message
+            if hasattr(last_message, "artifact"):  # Check if it has an "artifact"
+                last_tool_artifact = last_message.artifact
+            elif isinstance(last_message, dict) and "artifact" in last_message:
+                last_tool_artifact = last_message["artifact"]
+        return {
+            "messages": [last_ai_message],
+            "internal_messages": internal_messages,
+            "data_loader_artifacts": last_tool_artifact,
+        }
+    workflow = StateGraph(GraphState)
-    pass
+    workflow.add_node("data_loader_agent", data_loader_agent)
+    workflow.add_edge(START, "data_loader_agent")
+    workflow.add_edge("data_loader_agent", END)
+    app = workflow.compile()
+    return app

ai_data_science_team/ds_agents/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from ai_data_science_team.ds_agents.eda_tools_agent import EDAToolsAgent, make_eda_tools_agent

ai_data_science_team/ds_agents/eda_tools_agent.py ADDED Viewed

@@ -0,0 +1,245 @@
+from typing import Any, Optional, Annotated, Sequence, List, Dict, Tuple
+import operator
+import pandas as pd
+import os
+from io import StringIO, BytesIO
+import base64
+import matplotlib.pyplot as plt
+from IPython.display import Markdown
+from langchain_core.messages import BaseMessage, AIMessage
+from langgraph.prebuilt import create_react_agent, ToolNode
+from langgraph.prebuilt.chat_agent_executor import AgentState
+from langgraph.graph import START, END, StateGraph
+from ai_data_science_team.templates import BaseAgent
+from ai_data_science_team.utils.regex import format_agent_name
+from ai_data_science_team.tools.eda import (
+    describe_dataset,
+    visualize_missing,
+    correlation_funnel,
+    generate_sweetviz_report,
+)
+AGENT_NAME = "exploratory_data_analyst_agent"
+# Updated tool list for EDA
+EDA_TOOLS = [
+    describe_dataset,
+    visualize_missing,
+    correlation_funnel,
+    generate_sweetviz_report,
+]
+class EDAToolsAgent(BaseAgent):
+    """
+    An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
+    missing data visualizations, correlation funnels, EDA reports, etc.
+    Parameters:
+    ----------
+    model : langchain.llms.base.LLM
+        The language model for generating the tool-calling agent.
+    create_react_agent_kwargs : dict
+        Additional kwargs for create_react_agent.
+    invoke_react_agent_kwargs : dict
+        Additional kwargs for agent invocation.
+    """
+    def __init__(
+        self,
+        model: Any,
+        create_react_agent_kwargs: Optional[Dict] = {},
+        invoke_react_agent_kwargs: Optional[Dict] = {},
+    ):
+        self._params = {
+            "model": model,
+            "create_react_agent_kwargs": create_react_agent_kwargs,
+            "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
+        }
+        self._compiled_graph = self._make_compiled_graph()
+        self.response = None
+    def _make_compiled_graph(self):
+        """
+        Creates the compiled state graph for the EDA agent.
+        """
+        self.response = None
+        return make_eda_tools_agent(**self._params)
+    def update_params(self, **kwargs):
+        """
+        Updates the agent's parameters and rebuilds the compiled graph.
+        """
+        for k, v in kwargs.items():
+            self._params[k] = v
+        self._compiled_graph = self._make_compiled_graph()
+    async def ainvoke_agent(
+        self,
+        user_instructions: str = None,
+        data_raw: pd.DataFrame = None,
+        **kwargs
+    ):
+        """
+        Asynchronously runs the agent with user instructions and data.
+        Parameters:
+        ----------
+        user_instructions : str, optional
+            The instructions for the agent.
+        data_raw : pd.DataFrame, optional
+            The input data as a DataFrame.
+        """
+        response = await self._compiled_graph.ainvoke(
+            {
+                "user_instructions": user_instructions,
+                "data_raw": data_raw.to_dict() if data_raw is not None else None,
+            },
+            **kwargs
+        )
+        self.response = response
+        return None
+    def invoke_agent(
+        self,
+        user_instructions: str = None,
+        data_raw: pd.DataFrame = None,
+        **kwargs
+    ):
+        """
+        Synchronously runs the agent with user instructions and data.
+        Parameters:
+        ----------
+        user_instructions : str, optional
+            The instructions for the agent.
+        data_raw : pd.DataFrame, optional
+            The input data as a DataFrame.
+        """
+        response = self._compiled_graph.invoke(
+            {
+                "user_instructions": user_instructions,
+                "data_raw": data_raw.to_dict() if data_raw is not None else None,
+            },
+            **kwargs
+        )
+        self.response = response
+        return None
+    def get_internal_messages(self, markdown: bool = False):
+        """
+        Returns internal messages from the agent response.
+        """
+        pretty_print = "\n\n".join(
+            [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
+             for msg in self.response["internal_messages"]]
+        )
+        if markdown:
+            return Markdown(pretty_print)
+        else:
+            return self.response["internal_messages"]
+    def get_artifacts(self, as_dataframe: bool = False):
+        """
+        Returns the EDA artifacts from the agent response.
+        """
+        if as_dataframe:
+            return pd.DataFrame(self.response["eda_artifacts"])
+        else:
+            return self.response["eda_artifacts"]
+    def get_ai_message(self, markdown: bool = False):
+        """
+        Returns the AI message from the agent response.
+        """
+        if markdown:
+            return Markdown(self.response["messages"][0].content)
+        else:
+            return self.response["messages"][0].content
+def make_eda_tools_agent(
+    model: Any,
+    create_react_agent_kwargs: Optional[Dict] = {},
+    invoke_react_agent_kwargs: Optional[Dict] = {},
+):
+    """
+    Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
+    Parameters:
+    ----------
+    model : Any
+        The language model used for tool-calling.
+    create_react_agent_kwargs : dict
+        Additional kwargs for create_react_agent.
+    invoke_react_agent_kwargs : dict
+        Additional kwargs for agent invocation.
+    Returns:
+    -------
+    app : langgraph.graph.CompiledStateGraph
+        The compiled state graph for the EDA agent.
+    """
+    class GraphState(AgentState):
+        internal_messages: Annotated[Sequence[BaseMessage], operator.add]
+        user_instructions: str
+        data_raw: dict
+        eda_artifacts: dict
+    def exploratory_agent(state):
+        print(format_agent_name(AGENT_NAME))
+        print("    * RUN REACT TOOL-CALLING AGENT FOR EDA")
+        tool_node = ToolNode(
+            tools=EDA_TOOLS
+        )
+        eda_agent = create_react_agent(
+            model,
+            tools=tool_node,
+            state_schema=GraphState,
+            **create_react_agent_kwargs,
+        )
+        response = eda_agent.invoke(
+            {
+                "messages": [("user", state["user_instructions"])],
+                "data_raw": state["data_raw"],
+            },
+            invoke_react_agent_kwargs,
+        )
+        print("    * POST-PROCESSING EDA RESULTS")
+        internal_messages = response['messages']
+        if not internal_messages:
+            return {"internal_messages": [], "eda_artifacts": None}
+        last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
+        last_tool_artifact = None
+        if len(internal_messages) > 1:
+            last_message = internal_messages[-2]
+            if hasattr(last_message, "artifact"):
+                last_tool_artifact = last_message.artifact
+            elif isinstance(last_message, dict) and "artifact" in last_message:
+                last_tool_artifact = last_message["artifact"]
+        return {
+            "messages": [last_ai_message],
+            "internal_messages": internal_messages,
+            "eda_artifacts": last_tool_artifact,
+        }
+    workflow = StateGraph(GraphState)
+    workflow.add_node("exploratory_agent", exploratory_agent)
+    workflow.add_edge(START, "exploratory_agent")
+    workflow.add_edge("exploratory_agent", END)
+    app = workflow.compile()
+    return app

ai_data_science_team/ds_agents/modeling_tools_agent.py ADDED Viewed

File without changes

ai_data_science_team/ml_agents/h2o_ml_agent.py CHANGED Viewed

@@ -506,6 +506,7 @@ def make_h2o_ml_agent(
             while remaining flexible to user instructions.
             - Return a dict with keys: leaderboard, best_model_id, model_path, and model_results.
             - If enable_mlfow is True, log the top metrics and save the model as an artifact. (See example function)
+            - IMPORTANT: if enable_mlflow is True, make sure to set enable_mlflow to True in the function definition.
             Initial User Instructions (Disregard any instructions that are unrelated to modeling):
                 {user_instructions}
@@ -533,7 +534,7 @@ def make_h2o_ml_agent(
                 sort_metric: str ,
                 model_directory: Optional[str] = None,
                 log_path: Optional[str] = None,
-                enable_mlflow: bool,
+                enable_mlflow: bool, # If use has specified to enable MLflow, make sure to make this True
                 mlflow_tracking_uri: Optional[str],
                 mlflow_experiment_name: str,
                 mlflow_run_name: str,

ai_data_science_team/ml_agents/h2o_ml_tools_agent.py ADDED Viewed

File without changes

ai_data_science_team/ml_agents/mlflow_tools_agent.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from typing import Any, Optional, Annotated, Sequence
+from typing import Any, Optional, Annotated, Sequence, Dict
 import operator
 import pandas as pd
@@ -63,8 +63,10 @@ class MLflowToolsAgent(BaseAgent):
         The tracking URI for MLflow. Defaults to None.
     mlflow_registry_uri : str, optional
         The registry URI for MLflow. Defaults to None.
-    **react_agent_kwargs : dict, optional
-        Additional keyword arguments to pass to the agent's react agent.
+    react_agent_kwargs : dict
+        Additional keyword arguments to pass to the create_react_agent function.
+    invoke_react_agent_kwargs : dict
+        Additional keyword arguments to pass to the invoke method of the react agent.
     Methods:
     --------
@@ -114,13 +116,15 @@ class MLflowToolsAgent(BaseAgent):
         model: Any,
         mlflow_tracking_uri: Optional[str]=None,
         mlflow_registry_uri: Optional[str]=None,
-        **react_agent_kwargs,
+        create_react_agent_kwargs: Optional[Dict]={},
+        invoke_react_agent_kwargs: Optional[Dict]={},
     ):
         self._params = {
             "model": model,
             "mlflow_tracking_uri": mlflow_tracking_uri,
             "mlflow_registry_uri": mlflow_registry_uri,
-            **react_agent_kwargs,
+            "create_react_agent_kwargs": create_react_agent_kwargs,
+            "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
@@ -185,8 +189,6 @@ class MLflowToolsAgent(BaseAgent):
             The user instructions to pass to the agent.
         data_raw : pd.DataFrame, optional
             The raw data to pass to the agent. Used for prediction and tool calls where data is required.
-        kwargs : dict, optional
-            Additional keyword arguments to pass to the agents invoke method.
         """
         response = self._compiled_graph.invoke(
@@ -234,10 +236,30 @@ def make_mlflow_tools_agent(
     model: Any,
     mlflow_tracking_uri: str=None,
     mlflow_registry_uri: str=None,
-    **react_agent_kwargs,
+    create_react_agent_kwargs: Optional[Dict]={},
+    invoke_react_agent_kwargs: Optional[Dict]={},
 ):
     """
     MLflow Tool Calling Agent
+    Parameters:
+    ----------
+    model : Any
+        The language model used to generate the agent.
+    mlflow_tracking_uri : str, optional
+        The tracking URI for MLflow. Defaults to None.
+    mlflow_registry_uri : str, optional
+        The registry URI for MLflow. Defaults to None.
+    create_react_agent_kwargs : dict, optional
+        Additional keyword arguments to pass to the agent's create_react_agent method.
+    invoke_react_agent_kwargs : dict, optional
+        Additional keyword arguments to pass to the agent's invoke method.
+    Returns
+    -------
+    app : langchain.graphs.CompiledStateGraph
+        A compiled state graph for the MLflow Tool Calling Agent.
     """
     try:
@@ -274,7 +296,7 @@ def make_mlflow_tools_agent(
             model,
             tools=tool_node,
             state_schema=GraphState,
-            **react_agent_kwargs,
+            **create_react_agent_kwargs,
         )
         response = mlflow_agent.invoke(
@@ -282,6 +304,7 @@ def make_mlflow_tools_agent(
                 "messages": [("user", state["user_instructions"])],
                 "data_raw": state["data_raw"],
             },
+            invoke_react_agent_kwargs,
         )
         print("    * POST-PROCESS RESULTS")

ai-data-science-team 0.0.0.9010__py3-none-any.whl → 0.0.0.9012__py3-none-any.whl

ai-data-science-team 0.0.0.9010py3-none-any.whl → 0.0.0.9012py3-none-any.whl