PyPI - ai-data-science-team - Versions diffs - 0.0.0.9006__py3-none-any.whl → 0.0.0.9008__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9006py3-none-any.whl → 0.0.0.9008py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

ai_data_science_team/multiagents/sql_data_analyst.py ADDED Viewed

@@ -0,0 +1,286 @@
+from langchain_core.messages import BaseMessage
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.types import Checkpointer
+from langgraph.graph import START, END, StateGraph
+from langgraph.graph.state import CompiledStateGraph
+from langgraph.types import Command
+from typing import TypedDict, Annotated, Sequence
+import operator
+from typing_extensions import TypedDict, Literal
+import pandas as pd
+from IPython.display import Markdown
+from ai_data_science_team.templates import BaseAgent
+from ai_data_science_team.agents import SQLDatabaseAgent, DataVisualizationAgent
+from ai_data_science_team.utils.plotly import plotly_from_dict
+class SQLDataAnalyst(BaseAgent):
+    """
+    SQLDataAnalyst is a multi-agent class that combines SQL database querying and data visualization capabilities.
+    Parameters:
+    -----------
+    model:
+        The language model to be used for the agents.
+    sql_database_agent: SQLDatabaseAgent
+        The SQL Database Agent.
+    data_visualization_agent: DataVisualizationAgent
+        The Data Visualization Agent.
+    Methods:
+    --------
+    ainvoke_agent(user_instructions, **kwargs)
+        Asynchronously invokes the SQL Data Analyst Multi-Agent with the given user instructions.
+    invoke_agent(user_instructions, **kwargs)
+        Invokes the SQL Data Analyst Multi-Agent with the given user instructions.
+    get_data_sql()
+        Returns the SQL data as a Pandas DataFrame.
+    get_plotly_graph()
+        Returns the Plotly graph as a Plotly object.
+    get_sql_query_code(markdown=False)
+        Returns the SQL query code as a string, optionally formatted as a Markdown code block.
+    get_sql_database_function(markdown=False)
+        Returns the SQL database function as a string, optionally formatted as a Markdown code block.
+    get_data_visualization_function(markdown=False)
+        Returns the data visualization function as a string, optionally formatted as a Markdown code block.
+    """
+    def __init__(
+        self,
+        model,
+        sql_database_agent: SQLDatabaseAgent,
+        data_visualization_agent: DataVisualizationAgent,
+        checkpointer: Checkpointer = None,
+    ):
+        self._params = {
+            "model": model,
+            "sql_database_agent": sql_database_agent,
+            "data_visualization_agent": data_visualization_agent,
+            "checkpointer": checkpointer,
+        }
+        self._compiled_graph = self._make_compiled_graph()
+        self.response = None
+    def _make_compiled_graph(self):
+        """
+        Create or rebuild the compiled graph for the SQL Data Analyst Multi-Agent.
+        Running this method resets the response to None.
+        """
+        self.response = None
+        return make_sql_data_analyst(
+            model=self._params["model"],
+            sql_database_agent=self._params["sql_database_agent"]._compiled_graph,
+            data_visualization_agent=self._params["data_visualization_agent"]._compiled_graph,
+            checkpointer=self._params["checkpointer"],
+        )
+    def update_params(self, **kwargs):
+        """
+        Updates the agent's parameters (e.g. model, sql_database_agent, etc.)
+        and rebuilds the compiled graph.
+        """
+        for k, v in kwargs.items():
+            self._params[k] = v
+        self._compiled_graph = self._make_compiled_graph()
+    def ainvoke_agent(self, user_instructions, **kwargs):
+        """
+        Asynchronosly nvokes the SQL Data Analyst Multi-Agent.
+        Parameters:
+        ----------
+        user_instructions: str
+            The user's instructions for the combined SQL and (optionally) Data Visualization agents.
+        **kwargs:
+            Additional keyword arguments to pass to the compiled graph's `ainvoke` method.
+        Returns:
+        -------
+        None. The response is stored in the `response` attribute.
+        Example:
+        --------
+        ``` python
+        # TODO
+        ```
+        """
+        response = self._compiled_graph.ainvoke({
+            "user_instructions": user_instructions,
+        }, **kwargs)
+        self.response = response
+    def invoke_agent(self, user_instructions, **kwargs):
+        """
+        Invokes the SQL Data Analyst Multi-Agent.
+        Parameters:
+        ----------
+        user_instructions: str
+            The user's instructions for the combined SQL and (optionally) Data Visualization agents.
+        **kwargs:
+            Additional keyword arguments to pass to the compiled graph's `invoke` method.
+        Returns:
+        -------
+        None. The response is stored in the `response` attribute.
+        Example:
+        --------
+        ``` python
+        # TODO
+        ```
+        """
+        response = self._compiled_graph.invoke({
+            "user_instructions": user_instructions,
+        }, **kwargs)
+        self.response = response
+    def get_data_sql(self):
+        """
+        Returns the SQL data as a Pandas DataFrame.
+        """
+        if self.response:
+            if self.response.get("data_sql"):
+                return pd.DataFrame(self.response.get("data_sql"))
+    def get_plotly_graph(self):
+        """
+        Returns the Plotly graph as a Plotly object.
+        """
+        if self.response:
+            if self.response.get("plotly_graph"):
+                return plotly_from_dict(self.response.get("plotly_graph"))
+    def get_sql_query_code(self, markdown=False):
+        """
+        Returns the SQL query code as a string.
+        Parameters:
+        ----------
+        markdown: bool
+            If True, returns the code as a Markdown code block for Jupyter (IPython).
+            For streamlit, use `st.code()` instead.
+        """
+        if self.response:
+            if self.response.get("sql_query_code"):
+                if markdown:
+                    return Markdown(f"```sql\n{self.response.get('sql_query_code')}\n```")
+                return self.response.get("sql_query_code")
+    def get_sql_database_function(self, markdown=False):
+        """
+        Returns the SQL database function as a string.
+        Parameters:
+        ----------
+        markdown: bool
+            If True, returns the function as a Markdown code block for Jupyter (IPython).
+            For streamlit, use `st.code()` instead.
+        """
+        if self.response:
+            if self.response.get("sql_database_function"):
+                if markdown:
+                    return Markdown(f"```python\n{self.response.get('sql_database_function')}\n```")
+                return self.response.get("sql_database_function")
+    def get_data_visualization_function(self, markdown=False):
+        """
+        Returns the data visualization function as a string.
+        Parameters:
+        ----------
+        markdown: bool
+            If True, returns the function as a Markdown code block for Jupyter (IPython).
+            For streamlit, use `st.code()` instead.
+        """
+        if self.response:
+            if self.response.get("data_visualization_function"):
+                if markdown:
+                    return Markdown(f"```python\n{self.response.get('data_visualization_function')}\n```")
+                return self.response.get("data_visualization_function")
+def make_sql_data_analyst(
+    model,
+    sql_database_agent: CompiledStateGraph,
+    data_visualization_agent: CompiledStateGraph,
+    checkpointer: Checkpointer = None
+):
+    """
+    Creates a multi-agent system that takes in a SQL query and returns a plot or table.
+    - Agent 1: SQL Database Agent made with `make_sql_database_agent()`
+    - Agent 2: Data Visualization Agent made with `make_data_visualization_agent()`
+    Parameters:
+    ----------
+    model:
+        The language model to be used for the agents.
+    sql_database_agent: CompiledStateGraph
+        The SQL Database Agent made with `make_sql_database_agent()`.
+    data_visualization_agent: CompiledStateGraph
+        The Data Visualization Agent made with `make_data_visualization_agent()`.
+    checkpointer: Checkpointer (optional)
+        The checkpointer to save the state of the multi-agent system.
+        Default: None
+    Returns:
+    -------
+    CompiledStateGraph
+        The compiled multi-agent system.
+    """
+    llm = model
+    class PrimaryState(TypedDict):
+        messages: Annotated[Sequence[BaseMessage], operator.add]
+        user_instructions: str
+        sql_query_code: str
+        sql_database_function: str
+        data_sql: dict
+        data_raw: dict
+        plot_required: bool
+        data_visualization_function: str
+        plotly_graph: dict
+    def route_to_visualization(state) -> Command[Literal["data_visualization_agent", "__end__"]]:
+        response = llm.invoke(f"Respond in 1 word ('plot' or 'table'). Is the user requesting a plot? If unknown, select 'table'. \n\n User Instructions:\n{state.get('user_instructions')}")
+        if response.content == 'plot':
+            plot_required = True
+            goto="data_visualization_agent"
+        else:
+            plot_required = False
+            goto="__end__"
+        return Command(
+            update={
+                'data_raw': state.get("data_sql"),
+                'plot_required': plot_required,
+            },
+            goto=goto
+        )
+    workflow = StateGraph(PrimaryState)
+    workflow.add_node("sql_database_agent", sql_database_agent)
+    workflow.add_node("route_to_visualization", route_to_visualization)
+    workflow.add_node("data_visualization_agent", data_visualization_agent)
+    workflow.add_edge(START, "sql_database_agent")
+    workflow.add_edge("sql_database_agent", "route_to_visualization")
+    workflow.add_edge("data_visualization_agent", END)
+    app = workflow.compile(checkpointer=checkpointer)
+    return app

ai_data_science_team/multiagents/supervised_data_analyst.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # TODO: Implement the supervised data analyst agent
2	+ # https://langchain-ai.github.io/langgraph/tutorials/multi_agent/agent_supervisor/#create-agent-supervisor

ai_data_science_team/templates/__init__.py CHANGED Viewed

@@ -0,0 +1,9 @@
+from ai_data_science_team.templates.agent_templates import(
+    node_func_execute_agent_code_on_data,
+    node_func_human_review,
+    node_func_fix_agent_code,
+    node_func_explain_agent_code,
+    node_func_execute_agent_from_sql_connection,
+    create_coding_agent_graph,
+    BaseAgent,
+)

ai_data_science_team/templates/agent_templates.py CHANGED Viewed

@@ -1,15 +1,202 @@
 from langchain_core.messages import AIMessage
 from langgraph.graph import StateGraph, END
 from langgraph.types import interrupt, Command
+from langgraph.graph.state import CompiledStateGraph
+from langchain_core.runnables import RunnableConfig
+from langgraph.pregel.types import StreamMode
 import pandas as pd
 import sqlalchemy as sql
-from typing import Any, Callable, Dict, Type, Optional
+from typing import Any, Callable, Dict, Type, Optional, Union
 from ai_data_science_team.tools.parsers import PythonOutputParser
 from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top
+from IPython.display import Image, display
+import pandas as pd
+class BaseAgent(CompiledStateGraph):
+    """
+    A generic base class for agents that interact with compiled state graphs.
+    Provides shared functionality for handling parameters, responses, and state
+    graph operations.
+    """
+    def __init__(self, **params):
+        """
+        Initialize the agent with provided parameters.
+        Parameters:
+            **params: Arbitrary keyword arguments representing the agent's parameters.
+        """
+        self._params = params
+        self._compiled_graph = self._make_compiled_graph()
+        self.response = None
+    def _make_compiled_graph(self):
+        """
+        Subclasses should override this method to create a specific compiled graph.
+        """
+        raise NotImplementedError("Subclasses must implement the `_make_compiled_graph` method.")
+    def update_params(self, **kwargs):
+        """
+        Update one or more parameters and rebuild the compiled graph.
+        Parameters:
+            **kwargs: Parameters to update.
+        """
+        self._params.update(kwargs)
+        self._compiled_graph = self._make_compiled_graph()
+    def __getattr__(self, name: str):
+        """
+        Delegate attribute access to the compiled graph if the attribute is not found.
+        Parameters:
+            name (str): The attribute name.
+        Returns:
+            Any: The attribute from the compiled graph.
+        """
+        return getattr(self._compiled_graph, name)
+    def invoke(
+        self,
+        input: Union[dict[str, Any], Any],
+        config: Optional[RunnableConfig] = None,
+        **kwargs
+    ):
+        """
+        Wrapper for self._compiled_graph.invoke()
+        Parameters:
+            input: The input data for the graph. It can be a dictionary or any other type.
+            config: Optional. The configuration for the graph run.
+            **kwarg: Arguments to pass to self._compiled_graph.invoke()
+        Returns:
+            Any: The agent's response.
+        """
+        self.response = self._compiled_graph.invoke(input=input, config=config,**kwargs)
+        return self.response
+    def ainvoke(
+        self,
+        input: Union[dict[str, Any], Any],
+        config: Optional[RunnableConfig] = None,
+        **kwargs
+    ):
+        """
+        Wrapper for self._compiled_graph.ainvoke()
+        Parameters:
+            input: The input data for the graph. It can be a dictionary or any other type.
+            config: Optional. The configuration for the graph run.
+            **kwarg: Arguments to pass to self._compiled_graph.ainvoke()
+        Returns:
+            Any: The agent's response.
+        """
+        self.response = self._compiled_graph.ainvoke(input=input, config=config,**kwargs)
+        return self.response
+    def stream(
+        self,
+        input: dict[str, Any] | Any,
+        config: RunnableConfig | None = None,
+        stream_mode: StreamMode | list[StreamMode] | None = None,
+        **kwargs
+    ):
+        """
+        Wrapper for self._compiled_graph.stream()
+        Parameters:
+            input: The input to the graph.
+            config: The configuration to use for the run.
+            stream_mode: The mode to stream output, defaults to self.stream_mode.
+                Options are 'values', 'updates', and 'debug'.
+                values: Emit the current values of the state for each step.
+                updates: Emit only the updates to the state for each step.
+                    Output is a dict with the node name as key and the updated values as value.
+                debug: Emit debug events for each step.
+            **kwarg: Arguments to pass to self._compiled_graph.stream()
+        Returns:
+            Any: The agent's response.
+        """
+        self.response = self._compiled_graph.stream(input=input, config=config, stream_mode=stream_mode, **kwargs)
+        return self.response
+    def astream(
+        self,
+        input: dict[str, Any] | Any,
+        config: RunnableConfig | None = None,
+        stream_mode: StreamMode | list[StreamMode] | None = None,
+        **kwargs
+    ):
+        """
+        Wrapper for self._compiled_graph.astream()
+        Parameters:
+            input: The input to the graph.
+            config: The configuration to use for the run.
+            stream_mode: The mode to stream output, defaults to self.stream_mode.
+                Options are 'values', 'updates', and 'debug'.
+                values: Emit the current values of the state for each step.
+                updates: Emit only the updates to the state for each step.
+                    Output is a dict with the node name as key and the updated values as value.
+                debug: Emit debug events for each step.
+            **kwarg: Arguments to pass to self._compiled_graph.astream()
+        Returns:
+            Any: The agent's response.
+        """
+        self.response = self._compiled_graph.astream(input=input, config=config, stream_mode=stream_mode, **kwargs)
+        return self.response
+    def get_state_keys(self):
+        """
+        Returns a list of keys that the state graph response contains.
+        Returns:
+            list: A list of keys in the response.
+        """
+        return list(self.get_output_jsonschema()['properties'].keys())
+    def get_state_properties(self):
+        """
+        Returns detailed properties of the state graph response.
+        Returns:
+            dict: The properties of the response.
+        """
+        return self.get_output_jsonschema()['properties']
+    def get_response(self):
+        """
+        Returns the response generated by the agent.
+        Returns:
+            Any: The agent's response.
+        """
+        return self.response
+    def show(self, xray: int = 0):
+        """
+        Displays the agent's state graph as a Mermaid diagram.
+        Parameters:
+            xray (int): If set to 1, displays subgraph levels. Defaults to 0.
+        """
+        display(Image(self.get_graph(xray=xray).draw_mermaid_png()))
 def create_coding_agent_graph(
     GraphState: Type,
     node_functions: Dict[str, Callable],
@@ -79,35 +266,37 @@ def create_coding_agent_graph(
     workflow = StateGraph(GraphState)
-    # Conditionally add the recommended-steps node
-    if not bypass_recommended_steps:
-        workflow.add_node(recommended_steps_node_name, node_functions[recommended_steps_node_name])
+    # * NODES
     # Always add create, execute, and fix nodes
     workflow.add_node(create_code_node_name, node_functions[create_code_node_name])
     workflow.add_node(execute_code_node_name, node_functions[execute_code_node_name])
     workflow.add_node(fix_code_node_name, node_functions[fix_code_node_name])
+    # Conditionally add the recommended-steps node
+    if not bypass_recommended_steps:
+        workflow.add_node(recommended_steps_node_name, node_functions[recommended_steps_node_name])
+    # Conditionally add the human review node
+    if human_in_the_loop:
+        workflow.add_node(human_review_node_name, node_functions[human_review_node_name])
     # Conditionally add the explanation node
     if not bypass_explain_code:
         workflow.add_node(explain_code_node_name, node_functions[explain_code_node_name])
+    # * EDGES
     # Set the entry point
     entry_point = create_code_node_name if bypass_recommended_steps else recommended_steps_node_name
     workflow.set_entry_point(entry_point)
-    # Add edges for recommended steps
     if not bypass_recommended_steps:
-        if human_in_the_loop:
-            workflow.add_edge(recommended_steps_node_name, human_review_node_name)
-        else:
-            workflow.add_edge(recommended_steps_node_name, create_code_node_name)
-    elif human_in_the_loop:
-        # Skip recommended steps but still include human review
-        workflow.add_edge(create_code_node_name, human_review_node_name)
+        workflow.add_edge(recommended_steps_node_name, create_code_node_name)
-    # Create -> Execute
     workflow.add_edge(create_code_node_name, execute_code_node_name)
+    workflow.add_edge(fix_code_node_name, execute_code_node_name)
     # Define a helper to check if we have an error & can still retry
     def error_and_can_retry(state):
@@ -117,39 +306,43 @@ def create_coding_agent_graph(
             and state.get(max_retries_key) is not None
             and state[retry_count_key] < state[max_retries_key]
         )
-    # ---- Split into two branches for bypass_explain_code ----
-    if not bypass_explain_code:
-        # If we are NOT bypassing explain, the next node is fix_code if error,
-        # else explain_code. Then we wire explain_code -> END afterward.
+    # If human in the loop, add a branch for human review
+    if human_in_the_loop:
         workflow.add_conditional_edges(
             execute_code_node_name,
-            lambda s: "fix_code" if error_and_can_retry(s) else "explain_code",
+            lambda s: "fix_code" if error_and_can_retry(s) else "human_review",
             {
+                "human_review": human_review_node_name,
                 "fix_code": fix_code_node_name,
-                "explain_code": explain_code_node_name,
             },
         )
-        # Fix code -> Execute again
-        workflow.add_edge(fix_code_node_name, execute_code_node_name)
-        # explain_code -> END
-        workflow.add_edge(explain_code_node_name, END)
     else:
-        # If we ARE bypassing explain_code, the next node is fix_code if error,
-        # else straight to END.
-        workflow.add_conditional_edges(
-            execute_code_node_name,
-            lambda s: "fix_code" if error_and_can_retry(s) else "END",
-            {
-                "fix_code": fix_code_node_name,
-                "END": END,
-            },
-        )
-        # Fix code -> Execute again
-        workflow.add_edge(fix_code_node_name, execute_code_node_name)
+        # If no human review, the next node is fix_code if error, else explain_code.
+        if not bypass_explain_code:
+            workflow.add_conditional_edges(
+                execute_code_node_name,
+                lambda s: "fix_code" if error_and_can_retry(s) else "explain_code",
+                {
+                    "fix_code": fix_code_node_name,
+                    "explain_code": explain_code_node_name,
+                },
+            )
+        else:
+            workflow.add_conditional_edges(
+                execute_code_node_name,
+                lambda s: "fix_code" if error_and_can_retry(s) else "END",
+                {
+                    "fix_code": fix_code_node_name,
+                    "END": END,
+                },
+            )
+    if not bypass_explain_code:
+        workflow.add_edge(explain_code_node_name, END)
     # Finally, compile
-    if human_in_the_loop and checkpointer is not None:
+    if human_in_the_loop:
         app = workflow.compile(checkpointer=checkpointer)
     else:
         app = workflow.compile()
@@ -165,6 +358,8 @@ def node_func_human_review(
     no_goto: str,
     user_instructions_key: str = "user_instructions",
     recommended_steps_key: str = "recommended_steps",
+    code_snippet_key: str = "code_snippet",
+    code_type: str = "python"
 ) -> Command[str]:
     """
     A generic function to handle human review steps.
@@ -183,6 +378,10 @@ def node_func_human_review(
         The key in the state to store user instructions.
     recommended_steps_key : str, optional
         The key in the state to store recommended steps.
+    code_snippet_key : str, optional
+        The key in the state to store the code snippet.
+    code_type : str, optional
+        The type of code snippet to display (e.g., "python").
     Returns
     -------
@@ -190,9 +389,11 @@ def node_func_human_review(
         A Command object directing the next state and updates to the state.
     """
     print("    * HUMAN REVIEW")
+    code_markdown=f"```{code_type}\n" + state.get(code_snippet_key)+"\n```"
     # Display instructions and get user response
-    user_input = interrupt(value=prompt_text.format(steps=state.get(recommended_steps_key, '')))
+    user_input = interrupt(value=prompt_text.format(steps=state.get(recommended_steps_key, '') + "\n\n" + code_markdown))
     # Decide next steps based on user input
     if user_input.strip().lower() == "yes":
@@ -200,11 +401,11 @@ def node_func_human_review(
         update = {}
     else:
         goto = no_goto
-        modifications = "Modifications: \n" + user_input
+        modifications = "User Has Requested Modifications To Previous Code: \n" + user_input
         if state.get(user_instructions_key) is None:
-            update = {user_instructions_key: modifications}
+            update = {user_instructions_key: modifications + "\n\nPrevious Code:\n" + code_markdown}
         else:
-            update = {user_instructions_key: state.get(user_instructions_key) + modifications}
+            update = {user_instructions_key: state.get(user_instructions_key) + modifications + "\n\nPrevious Code:\n" + code_markdown}
     return Command(goto=goto, update=update)
@@ -394,6 +595,7 @@ def node_func_fix_agent_code(
     retry_count_key: str = "retry_count",
     log: bool = False,
     file_path: str = "logs/agent_function.py",
+    function_name: str = "agent_function"
 ) -> dict:
     """
     Generic function to fix a given piece of agent code using an LLM and a prompt template.
@@ -420,6 +622,8 @@ def node_func_fix_agent_code(
         Whether to log the returned code to a file.
     file_path : str, optional
         The path to the file where the code will be logged.
+    function_name : str, optional
+        The name of the function in the code snippet that will be fixed.
     Returns
     -------
@@ -436,7 +640,8 @@ def node_func_fix_agent_code(
     # Format the prompt with the code snippet and the error
     prompt = prompt_template.format(
         code_snippet=code_snippet,
-        error=error_message
+        error=error_message,
+        function_name=function_name,
     )
     # Execute the prompt with the LLM

ai-data-science-team 0.0.0.9006__py3-none-any.whl → 0.0.0.9008__py3-none-any.whl

ai-data-science-team 0.0.0.9006py3-none-any.whl → 0.0.0.9008py3-none-any.whl