PyPI - ai-data-science-team - Versions diffs - 0.0.0.9008__py3-none-any.whl → 0.0.0.9010__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9008py3-none-any.whl → 0.0.0.9010py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

ai_data_science_team/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.0.~~9008~~"
1	+ __version__ = "0.0.0.9010"

ai_data_science_team/agents/__init__.py CHANGED Viewed

@@ -3,4 +3,3 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
 from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
 from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
 from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent

ai_data_science_team/agents/data_cleaning_agent.py CHANGED Viewed

@@ -14,7 +14,7 @@ from langgraph.types import Command
 from langgraph.checkpoint.memory import MemorySaver
 import os
-import io
+import json
 import pandas as pd
 from IPython.display import Markdown
@@ -23,21 +23,26 @@ from ai_data_science_team.templates import(
     node_func_execute_agent_code_on_data,
     node_func_human_review,
     node_func_fix_agent_code,
-    node_func_explain_agent_code,
+    node_func_report_agent_outputs,
     create_coding_agent_graph,
     BaseAgent,
 )
-from ai_data_science_team.tools.parsers import PythonOutputParser
-from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
-from ai_data_science_team.tools.metadata import get_dataframe_summary
-from ai_data_science_team.tools.logging import log_ai_function
+from ai_data_science_team.parsers.parsers import PythonOutputParser
+from ai_data_science_team.utils.regex import (
+    relocate_imports_inside_function,
+    add_comments_to_top,
+    format_agent_name,
+    format_recommended_steps,
+    get_generic_summary,
+)
+from ai_data_science_team.tools.dataframe import get_dataframe_summary
+from ai_data_science_team.utils.logging import log_ai_function
 # Setup
 AGENT_NAME = "data_cleaning_agent"
 LOG_PATH = os.path.join(os.getcwd(), "logs/")
 # Class
 class DataCleaningAgent(BaseAgent):
     """
@@ -89,8 +94,8 @@ class DataCleaningAgent(BaseAgent):
         Cleans the provided dataset asynchronously based on user instructions.
     invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
         Cleans the provided dataset synchronously based on user instructions.
-    explain_cleaning_steps()
-        Returns an explanation of the cleaning steps performed by the agent.
+    get_workflow_summary()
+        Retrieves a summary of the agent's workflow.
     get_log_summary()
         Retrieves a summary of logged operations if logging is enabled.
     get_state_keys()
@@ -178,8 +183,7 @@ class DataCleaningAgent(BaseAgent):
         self.response=None
         return make_data_cleaning_agent(**self._params)
-    def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
+    async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
         """
         Asynchronously invokes the agent. The response is stored in the response attribute.
@@ -200,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
         --------
             None. The response is stored in the response attribute.
         """
-        response = self._compiled_graph.ainvoke({
+        response = await self._compiled_graph.ainvoke({
             "user_instructions": user_instructions,
             "data_raw": data_raw.to_dict(),
             "max_retries": max_retries,
@@ -239,15 +243,16 @@ class DataCleaningAgent(BaseAgent):
         self.response = response
         return None
-    def explain_cleaning_steps(self):
+    def get_workflow_summary(self, markdown=False):
         """
-        Provides an explanation of the cleaning steps performed by the agent.
-        Returns:
-            str: Explanation of the cleaning steps.
+        Retrieves the agent's workflow summary, if logging is enabled.
         """
-        messages = self.response.get("messages", [])
-        return messages
+        if self.response and self.response.get("messages"):
+            summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
+            if markdown:
+                return Markdown(summary)
+            else:
+                return summary
     def get_log_summary(self, markdown=False):
         """
@@ -255,7 +260,13 @@ class DataCleaningAgent(BaseAgent):
         """
         if self.response:
             if self.response.get('data_cleaner_function_path'):
-                log_details = f"Log Path: {self.response.get('data_cleaner_function_path')}"
+                log_details = f"""
+## Data Cleaning Agent Log Summary:
+Function Path: {self.response.get('data_cleaner_function_path')}
+Function Name: {self.response.get('data_cleaner_function_name')}
+                """
                 if markdown:
                     return Markdown(log_details)
                 else:
@@ -462,7 +473,7 @@ def make_data_cleaning_agent(
             Below are summaries of all datasets provided:
             {all_datasets_summary}
-            Return the steps as a bullet point list (no code, just the steps).
+            Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
             Avoid these:
             1. Do not include steps to save files.
@@ -633,32 +644,31 @@ def make_data_cleaning_agent(
             function_name=state.get("data_cleaner_function_name"),
         )
-    def explain_data_cleaner_code(state: GraphState):
-        return node_func_explain_agent_code(
+    # Final reporting node
+    def report_agent_outputs(state: GraphState):
+        return node_func_report_agent_outputs(
             state=state,
-            code_snippet_key="data_cleaner_function",
+            keys_to_include=[
+                "recommended_steps",
+                "data_cleaner_function",
+                "data_cleaner_function_path",
+                "data_cleaner_function_name",
+                "data_cleaner_error",
+            ],
             result_key="messages",
-            error_key="data_cleaner_error",
-            llm=llm,
             role=AGENT_NAME,
-            explanation_prompt_template="""
-            Explain the data cleaning steps that the data cleaning agent performed in this function.
-            Keep the summary succinct and to the point.\n\n# Data Cleaning Agent:\n\n{code}
-            """,
-            success_prefix="# Data Cleaning Agent:\n\n ",
-            error_message="The Data Cleaning Agent encountered an error during data cleaning. Data could not be explained."
+            custom_title="Data Cleaning Agent Outputs"
         )
-    # Define the graph
     node_functions = {
         "recommend_cleaning_steps": recommend_cleaning_steps,
         "human_review": human_review,
         "create_data_cleaner_code": create_data_cleaner_code,
         "execute_data_cleaner_code": execute_data_cleaner_code,
         "fix_data_cleaner_code": fix_data_cleaner_code,
-        "explain_data_cleaner_code": explain_data_cleaner_code
+        "report_agent_outputs": report_agent_outputs,
     }
     app = create_coding_agent_graph(
         GraphState=GraphState,
         node_functions=node_functions,
@@ -666,16 +676,17 @@ def make_data_cleaning_agent(
         create_code_node_name="create_data_cleaner_code",
         execute_code_node_name="execute_data_cleaner_code",
         fix_code_node_name="fix_data_cleaner_code",
-        explain_code_node_name="explain_data_cleaner_code",
+        explain_code_node_name="report_agent_outputs",
         error_key="data_cleaner_error",
-        human_in_the_loop=human_in_the_loop,  # or False
+        human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",
         checkpointer=MemorySaver() if human_in_the_loop else None,
         bypass_recommended_steps=bypass_recommended_steps,
         bypass_explain_code=bypass_explain_code,
     )
     return app

ai_data_science_team/agents/data_loader_tools_agent.py ADDED Viewed

@@ -0,0 +1,69 @@
+from typing import Any, Optional, Annotated, Sequence, List, Dict
+import operator
+import pandas as pd
+import os
+from IPython.display import Markdown
+from langchain_core.messages import BaseMessage, AIMessage
+from langgraph.prebuilt import create_react_agent, ToolNode
+from langgraph.prebuilt.chat_agent_executor import AgentState
+from langgraph.graph import START, END, StateGraph
+from ai_data_science_team.templates import BaseAgent
+from ai_data_science_team.utils.regex import format_agent_name
+from ai_data_science_team.tools.data_loader import (
+    load_directory,
+    load_file,
+    list_directory_contents,
+    list_directory_recursive,
+    get_file_info,
+    search_files_by_pattern,
+)
+AGENT_NAME = "data_loader_tools_agent"
+tools = [
+    load_directory,
+    load_file,
+    list_directory_contents,
+    list_directory_recursive,
+    get_file_info,
+    search_files_by_pattern,
+]
+def make_data_loader_tools_agent(
+    model: Any,
+    directory: Optional[str] = os.getcwd(),
+):
+    """
+    Creates a Data Loader Agent that can interact with data loading tools.
+    Parameters:
+    ----------
+    model : langchain.llms.base.LLM
+        The language model used to generate the tool calling agent.
+    directory : str, optional
+        The directory to search for files. Defaults to the current working directory.
+    Returns:
+    --------
+    Data Loader Agent
+        An agent that can interact with data loading tools.
+    """
+    class GraphState(AgentState):
+        internal_messages: Annotated[Sequence[BaseMessage], operator.add]
+        directory: str
+        user_instructions: str
+        data_artifacts: dict
+    pass

ai_data_science_team/agents/data_visualization_agent.py CHANGED Viewed

@@ -10,13 +10,13 @@ from typing import TypedDict, Annotated, Sequence, Literal
 import operator
 from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import StrOutputParser
 from langchain_core.messages import BaseMessage
 from langgraph.types import Command
 from langgraph.checkpoint.memory import MemorySaver
 import os
+import json
 import pandas as pd
 from IPython.display import Markdown
@@ -25,19 +25,20 @@ from ai_data_science_team.templates import(
     node_func_execute_agent_code_on_data,
     node_func_human_review,
     node_func_fix_agent_code,
-    node_func_explain_agent_code,
+    node_func_report_agent_outputs,
     create_coding_agent_graph,
     BaseAgent,
 )
-from ai_data_science_team.tools.parsers import PythonOutputParser
-from ai_data_science_team.tools.regex import (
+from ai_data_science_team.parsers.parsers import PythonOutputParser
+from ai_data_science_team.utils.regex import (
     relocate_imports_inside_function,
     add_comments_to_top,
     format_agent_name,
-    format_recommended_steps
+    format_recommended_steps,
+    get_generic_summary,
 )
-from ai_data_science_team.tools.metadata import get_dataframe_summary
-from ai_data_science_team.tools.logging import log_ai_function
+from ai_data_science_team.tools.dataframe import get_dataframe_summary
+from ai_data_science_team.utils.logging import log_ai_function
 from ai_data_science_team.utils.plotly import plotly_from_dict
 # Setup
@@ -93,8 +94,8 @@ class DataVisualizationAgent(BaseAgent):
         Asynchronously generates a visualization based on user instructions.
     invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
         Synchronously generates a visualization based on user instructions.
-    explain_visualization_steps()
-        Returns an explanation of the visualization steps performed by the agent.
+    get_workflow_summary()
+        Retrieves a summary of the agent's workflow.
     get_log_summary()
         Retrieves a summary of logged operations if logging is enabled.
     get_plotly_graph()
@@ -195,7 +196,7 @@ class DataVisualizationAgent(BaseAgent):
         # Rebuild the compiled graph
         self._compiled_graph = self._make_compiled_graph()
-    def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
+    async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
         """
         Asynchronously invokes the agent to generate a visualization.
         The response is stored in the 'response' attribute.
@@ -217,7 +218,7 @@ class DataVisualizationAgent(BaseAgent):
         -------
         None
         """
-        response = self._compiled_graph.ainvoke({
+        response = await self._compiled_graph.ainvoke({
             "user_instructions": user_instructions,
             "data_raw": data_raw.to_dict(),
             "max_retries": max_retries,
@@ -257,40 +258,34 @@ class DataVisualizationAgent(BaseAgent):
         self.response = response
         return None
-    def explain_visualization_steps(self):
+    def get_workflow_summary(self, markdown=False):
         """
-        Provides an explanation of the visualization steps performed by the agent.
-        Returns
-        -------
-        str
-            Explanation of the visualization steps, if any are available.
+        Retrieves the agent's workflow summary, if logging is enabled.
         """
-        if self.response:
-            return self.response.get("messages", [])
-        return []
+        if self.response and self.response.get("messages"):
+            summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
+            if markdown:
+                return Markdown(summary)
+            else:
+                return summary
     def get_log_summary(self, markdown=False):
         """
         Logs a summary of the agent's operations, if logging is enabled.
+        """
+        if self.response:
+            if self.response.get('data_visualization_function_path'):
+                log_details = f"""
+## Data Visualization Agent Log Summary:
-        Parameters
-        ----------
-        markdown : bool, optional
-            If True, returns Markdown-formatted output.
+Function Path: {self.response.get('data_visualization_function_path')}
-        Returns
-        -------
-        str or None
-            Summary of logs or None if no logs are available.
-        """
-        if self.response and self.response.get('data_visualization_function_path'):
-            log_details = f"Log Path: {self.response.get('data_visualization_function_path')}"
-            if markdown:
-                return Markdown(log_details)
-            else:
-                return log_details
-        return None
+Function Name: {self.response.get('data_visualization_function_name')}
+                """
+                if markdown:
+                    return Markdown(log_details)
+                else:
+                    return log_details
     def get_plotly_graph(self):
         """
@@ -719,20 +714,20 @@ def make_data_visualization_agent(
             function_name=state.get("data_visualization_function_name"),
         )
-    def explain_data_visualization_code(state: GraphState):
-        return node_func_explain_agent_code(
+    # Final reporting node
+    def report_agent_outputs(state: GraphState):
+        return node_func_report_agent_outputs(
             state=state,
-            code_snippet_key="data_visualization_function",
+            keys_to_include=[
+                "recommended_steps",
+                "data_visualization_function",
+                "data_visualization_function_path",
+                "data_visualization_function_name",
+                "data_visualization_error",
+            ],
             result_key="messages",
-            error_key="data_visualization_error",
-            llm=llm,
             role=AGENT_NAME,
-            explanation_prompt_template="""
-            Explain the data visualization steps that the data visualization agent performed in this function.
-            Keep the summary succinct and to the point.\n\n# Data Visualization Agent:\n\n{code}
-            """,
-            success_prefix="# Data Visualization Agent:\n\n ",
-            error_message="The Data Visualization Agent encountered an error during data visualization. No explanation could be provided."
+            custom_title="Data Visualization Agent Outputs"
         )
     # Define the graph
@@ -742,7 +737,7 @@ def make_data_visualization_agent(
         "chart_generator": chart_generator,
         "execute_data_visualization_code": execute_data_visualization_code,
         "fix_data_visualization_code": fix_data_visualization_code,
-        "explain_data_visualization_code": explain_data_visualization_code
+        "report_agent_outputs": report_agent_outputs,
     }
     app = create_coding_agent_graph(
@@ -752,7 +747,7 @@ def make_data_visualization_agent(
         create_code_node_name="chart_generator",
         execute_code_node_name="execute_data_visualization_code",
         fix_code_node_name="fix_data_visualization_code",
-        explain_code_node_name="explain_data_visualization_code",
+        explain_code_node_name="report_agent_outputs",
         error_key="data_visualization_error",
         human_in_the_loop=human_in_the_loop,  # or False
         human_review_node_name="human_review",

ai_data_science_team/agents/data_wrangling_agent.py CHANGED Viewed

@@ -7,6 +7,7 @@
 from typing import TypedDict, Annotated, Sequence, Literal, Union, Optional
 import operator
 import os
+import json
 import pandas as pd
 from IPython.display import Markdown
@@ -19,14 +20,20 @@ from ai_data_science_team.templates import(
     node_func_execute_agent_code_on_data,
     node_func_human_review,
     node_func_fix_agent_code,
-    node_func_explain_agent_code,
+    node_func_report_agent_outputs,
     create_coding_agent_graph,
     BaseAgent,
 )
-from ai_data_science_team.tools.parsers import PythonOutputParser
-from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
-from ai_data_science_team.tools.metadata import get_dataframe_summary
-from ai_data_science_team.tools.logging import log_ai_function
+from ai_data_science_team.parsers.parsers import PythonOutputParser
+from ai_data_science_team.utils.regex import (
+    relocate_imports_inside_function,
+    add_comments_to_top,
+    format_agent_name,
+    format_recommended_steps,
+    get_generic_summary,
+)
+from ai_data_science_team.tools.dataframe import get_dataframe_summary
+from ai_data_science_team.utils.logging import log_ai_function
 # Setup Logging Path
 AGENT_NAME = "data_wrangling_agent"
@@ -88,8 +95,8 @@ class DataWranglingAgent(BaseAgent):
     invoke_agent(user_instructions: str, data_raw: Union[dict, list], max_retries=3, retry_count=0)
         Synchronously wrangles the provided dataset(s) based on user instructions.
-    explain_wrangling_steps()
-        Returns an explanation of the wrangling steps performed by the agent.
+    get_workflow_summary()
+        Retrieves a summary of the agent's workflow.
     get_log_summary()
         Retrieves a summary of logged operations if logging is enabled.
@@ -206,7 +213,7 @@ class DataWranglingAgent(BaseAgent):
             self._params[k] = v
         self._compiled_graph = self._make_compiled_graph()
-    def ainvoke_agent(
+    async def ainvoke_agent(
         self,
         data_raw: Union[pd.DataFrame, dict, list],
         user_instructions: str=None,
@@ -238,7 +245,7 @@ class DataWranglingAgent(BaseAgent):
         None
         """
         data_input = self._convert_data_input(data_raw)
-        response = self._compiled_graph.ainvoke({
+        response = await self._compiled_graph.ainvoke({
             "user_instructions": user_instructions,
             "data_raw": data_input,
             "max_retries": max_retries,
@@ -287,40 +294,34 @@ class DataWranglingAgent(BaseAgent):
         self.response = response
         return None
-    def explain_wrangling_steps(self):
+    def get_workflow_summary(self, markdown=False):
         """
-        Provides an explanation of the wrangling steps performed by the agent.
-        Returns
-        -------
-        str or list
-            Explanation of the data wrangling steps.
+        Retrieves the agent's workflow summary, if logging is enabled.
         """
-        if self.response:
-            return self.response.get("messages", [])
-        return []
+        if self.response and self.response.get("messages"):
+            summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
+            if markdown:
+                return Markdown(summary)
+            else:
+                return summary
     def get_log_summary(self, markdown=False):
         """
         Logs a summary of the agent's operations, if logging is enabled.
+        """
+        if self.response:
+            if self.response.get('data_wrangler_function_path'):
+                log_details = f"""
+## Data Wrangling Agent Log Summary:
-        Parameters
-        ----------
-        markdown : bool, optional
-            If True, returns the summary in Markdown.
+Function Path: {self.response.get('data_wrangler_function_path')}
-        Returns
-        -------
-        str or None
-            The log details, or None if not available.
-        """
-        if self.response and self.response.get("data_wrangler_function_path"):
-            log_details = f"Log Path: {self.response.get('data_wrangler_function_path')}"
-            if markdown:
-                return Markdown(log_details)
-            else:
-                return log_details
-        return None
+Function Name: {self.response.get('data_wrangler_function_name')}
+                """
+                if markdown:
+                    return Markdown(log_details)
+                else:
+                    return log_details
     def get_data_wrangled(self) -> Optional[pd.DataFrame]:
         """
@@ -597,7 +598,7 @@ def make_data_wrangling_agent(
             Below are summaries of all datasets provided:
             {all_datasets_summary}
-            Return your recommended steps as a numbered point list, explaining briefly why each step is needed.
+            Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
             Avoid these:
             1. Do not include steps to save files.
@@ -797,20 +798,20 @@ def make_data_wrangling_agent(
             function_name=state.get("data_wrangler_function_name"),
         )
-    def explain_data_wrangler_code(state: GraphState):
-        return node_func_explain_agent_code(
+    # Final reporting node
+    def report_agent_outputs(state: GraphState):
+        return node_func_report_agent_outputs(
             state=state,
-            code_snippet_key="data_wrangler_function",
+            keys_to_include=[
+                "recommended_steps",
+                "data_wrangler_function",
+                "data_wrangler_function_path",
+                "data_wrangler_function_name",
+                "data_wrangler_error",
+            ],
             result_key="messages",
-            error_key="data_wrangler_error",
-            llm=llm,
             role=AGENT_NAME,
-            explanation_prompt_template="""
-            Explain the data wrangling steps that the data wrangling agent performed in this function.
-            Keep the summary succinct and to the point.\n\n# Data Wrangling Agent:\n\n{code}
-            """,
-            success_prefix="# Data Wrangling Agent:\n\n ",
-            error_message="The Data Wrangling Agent encountered an error during data wrangling. Data could not be explained."
+            custom_title="Data Wrangling Agent Outputs"
         )
     # Define the graph
@@ -820,7 +821,7 @@ def make_data_wrangling_agent(
         "create_data_wrangler_code": create_data_wrangler_code,
         "execute_data_wrangler_code": execute_data_wrangler_code,
         "fix_data_wrangler_code": fix_data_wrangler_code,
-        "explain_data_wrangler_code": explain_data_wrangler_code
+        "report_agent_outputs": report_agent_outputs,
     }
     app = create_coding_agent_graph(
@@ -830,7 +831,7 @@ def make_data_wrangling_agent(
         create_code_node_name="create_data_wrangler_code",
         execute_code_node_name="execute_data_wrangler_code",
         fix_code_node_name="fix_data_wrangler_code",
-        explain_code_node_name="explain_data_wrangler_code",
+        explain_code_node_name="report_agent_outputs",
         error_key="data_wrangler_error",
         human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",

ai-data-science-team 0.0.0.9008__py3-none-any.whl → 0.0.0.9010__py3-none-any.whl

ai-data-science-team 0.0.0.9008py3-none-any.whl → 0.0.0.9010py3-none-any.whl