ai-data-science-team 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_data_science_team/_version.py +1 -1
 - ai_data_science_team/agents/__init__.py +4 -5
 - ai_data_science_team/agents/data_cleaning_agent.py +268 -116
 - ai_data_science_team/agents/data_visualization_agent.py +470 -41
 - ai_data_science_team/agents/data_wrangling_agent.py +471 -31
 - ai_data_science_team/agents/feature_engineering_agent.py +426 -41
 - ai_data_science_team/agents/sql_database_agent.py +458 -58
 - ai_data_science_team/ml_agents/__init__.py +1 -0
 - ai_data_science_team/ml_agents/h2o_ml_agent.py +1032 -0
 - ai_data_science_team/multiagents/__init__.py +1 -0
 - ai_data_science_team/multiagents/sql_data_analyst.py +398 -0
 - ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
 - ai_data_science_team/templates/__init__.py +3 -1
 - ai_data_science_team/templates/agent_templates.py +319 -43
 - ai_data_science_team/tools/metadata.py +94 -62
 - ai_data_science_team/tools/regex.py +86 -1
 - ai_data_science_team/utils/__init__.py +0 -0
 - ai_data_science_team/utils/plotly.py +24 -0
 - ai_data_science_team-0.0.0.9009.dist-info/METADATA +245 -0
 - ai_data_science_team-0.0.0.9009.dist-info/RECORD +28 -0
 - ai_data_science_team-0.0.0.9007.dist-info/METADATA +0 -183
 - ai_data_science_team-0.0.0.9007.dist-info/RECORD +0 -21
 - {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE +0 -0
 - {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL +0 -0
 - {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/top_level.txt +0 -0
 
    
        ai_data_science_team/_version.py
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            __version__ = "0.0.0. 
     | 
| 
      
 1 
     | 
    
         
            +
            __version__ = "0.0.0.9009"
         
     | 
| 
         @@ -1,6 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            from ai_data_science_team.agents.data_cleaning_agent import make_data_cleaning_agent, DataCleaningAgent
         
     | 
| 
       2 
     | 
    
         
            -
            from ai_data_science_team.agents.feature_engineering_agent import make_feature_engineering_agent
         
     | 
| 
       3 
     | 
    
         
            -
            from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent
         
     | 
| 
       4 
     | 
    
         
            -
            from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent
         
     | 
| 
       5 
     | 
    
         
            -
            from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
            from ai_data_science_team.agents.feature_engineering_agent import make_feature_engineering_agent, FeatureEngineeringAgent
         
     | 
| 
      
 3 
     | 
    
         
            +
            from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
         
     | 
| 
      
 4 
     | 
    
         
            +
            from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
         
     | 
| 
      
 5 
     | 
    
         
            +
            from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
         
     | 
| 
         @@ -13,21 +13,28 @@ from langchain_core.messages import BaseMessage 
     | 
|
| 
       13 
13 
     | 
    
         
             
            from langgraph.types import Command
         
     | 
| 
       14 
14 
     | 
    
         
             
            from langgraph.checkpoint.memory import MemorySaver
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
            from langgraph.graph.state import CompiledStateGraph
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
16 
     | 
    
         
             
            import os
         
     | 
| 
       19 
     | 
    
         
            -
            import  
     | 
| 
      
 17 
     | 
    
         
            +
            import json
         
     | 
| 
       20 
18 
     | 
    
         
             
            import pandas as pd
         
     | 
| 
       21 
19 
     | 
    
         | 
| 
      
 20 
     | 
    
         
            +
            from IPython.display import Markdown
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
       22 
22 
     | 
    
         
             
            from ai_data_science_team.templates import(
         
     | 
| 
       23 
23 
     | 
    
         
             
                node_func_execute_agent_code_on_data, 
         
     | 
| 
       24 
24 
     | 
    
         
             
                node_func_human_review,
         
     | 
| 
       25 
25 
     | 
    
         
             
                node_func_fix_agent_code, 
         
     | 
| 
       26 
     | 
    
         
            -
                 
     | 
| 
       27 
     | 
    
         
            -
                create_coding_agent_graph
         
     | 
| 
      
 26 
     | 
    
         
            +
                node_func_report_agent_outputs,
         
     | 
| 
      
 27 
     | 
    
         
            +
                create_coding_agent_graph,
         
     | 
| 
      
 28 
     | 
    
         
            +
                BaseAgent,
         
     | 
| 
       28 
29 
     | 
    
         
             
            )
         
     | 
| 
       29 
30 
     | 
    
         
             
            from ai_data_science_team.tools.parsers import PythonOutputParser
         
     | 
| 
       30 
     | 
    
         
            -
            from ai_data_science_team.tools.regex import  
     | 
| 
      
 31 
     | 
    
         
            +
            from ai_data_science_team.tools.regex import (
         
     | 
| 
      
 32 
     | 
    
         
            +
                relocate_imports_inside_function, 
         
     | 
| 
      
 33 
     | 
    
         
            +
                add_comments_to_top, 
         
     | 
| 
      
 34 
     | 
    
         
            +
                format_agent_name, 
         
     | 
| 
      
 35 
     | 
    
         
            +
                format_recommended_steps, 
         
     | 
| 
      
 36 
     | 
    
         
            +
                get_generic_summary,
         
     | 
| 
      
 37 
     | 
    
         
            +
            )
         
     | 
| 
       31 
38 
     | 
    
         
             
            from ai_data_science_team.tools.metadata import get_dataframe_summary
         
     | 
| 
       32 
39 
     | 
    
         
             
            from ai_data_science_team.tools.logging import log_ai_function
         
     | 
| 
       33 
40 
     | 
    
         | 
| 
         @@ -36,9 +43,110 @@ AGENT_NAME = "data_cleaning_agent" 
     | 
|
| 
       36 
43 
     | 
    
         
             
            LOG_PATH = os.path.join(os.getcwd(), "logs/")
         
     | 
| 
       37 
44 
     | 
    
         | 
| 
       38 
45 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
46 
     | 
    
         
             
            # Class
         
     | 
| 
       41 
     | 
    
         
            -
            class DataCleaningAgent( 
     | 
| 
      
 47 
     | 
    
         
            +
            class DataCleaningAgent(BaseAgent):
         
     | 
| 
      
 48 
     | 
    
         
            +
                """
         
     | 
| 
      
 49 
     | 
    
         
            +
                Creates a data cleaning agent that can process datasets based on user-defined instructions or default cleaning steps. 
         
     | 
| 
      
 50 
     | 
    
         
            +
                The agent generates a Python function to clean the dataset, performs the cleaning, and logs the process, including code 
         
     | 
| 
      
 51 
     | 
    
         
            +
                and errors. It is designed to facilitate reproducible and customizable data cleaning workflows.
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                The agent performs the following default cleaning steps unless instructed otherwise:
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                - Removing columns with more than 40% missing values.
         
     | 
| 
      
 56 
     | 
    
         
            +
                - Imputing missing values with the mean for numeric columns.
         
     | 
| 
      
 57 
     | 
    
         
            +
                - Imputing missing values with the mode for categorical columns.
         
     | 
| 
      
 58 
     | 
    
         
            +
                - Converting columns to appropriate data types.
         
     | 
| 
      
 59 
     | 
    
         
            +
                - Removing duplicate rows.
         
     | 
| 
      
 60 
     | 
    
         
            +
                - Removing rows with missing values.
         
     | 
| 
      
 61 
     | 
    
         
            +
                - Removing rows with extreme outliers (values 3x the interquartile range).
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                User instructions can modify, add, or remove any of these steps to tailor the cleaning process.
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                Parameters
         
     | 
| 
      
 66 
     | 
    
         
            +
                ----------
         
     | 
| 
      
 67 
     | 
    
         
            +
                model : langchain.llms.base.LLM
         
     | 
| 
      
 68 
     | 
    
         
            +
                    The language model used to generate the data cleaning function.
         
     | 
| 
      
 69 
     | 
    
         
            +
                n_samples : int, optional
         
     | 
| 
      
 70 
     | 
    
         
            +
                    Number of samples used when summarizing the dataset. Defaults to 30. Reducing this number can help 
         
     | 
| 
      
 71 
     | 
    
         
            +
                    avoid exceeding the model's token limits.
         
     | 
| 
      
 72 
     | 
    
         
            +
                log : bool, optional
         
     | 
| 
      
 73 
     | 
    
         
            +
                    Whether to log the generated code and errors. Defaults to False.
         
     | 
| 
      
 74 
     | 
    
         
            +
                log_path : str, optional
         
     | 
| 
      
 75 
     | 
    
         
            +
                    Directory path for storing log files. Defaults to None.
         
     | 
| 
      
 76 
     | 
    
         
            +
                file_name : str, optional
         
     | 
| 
      
 77 
     | 
    
         
            +
                    Name of the file for saving the generated response. Defaults to "data_cleaner.py".
         
     | 
| 
      
 78 
     | 
    
         
            +
                function_name : str, optional
         
     | 
| 
      
 79 
     | 
    
         
            +
                    Name of the generated data cleaning function. Defaults to "data_cleaner".
         
     | 
| 
      
 80 
     | 
    
         
            +
                overwrite : bool, optional
         
     | 
| 
      
 81 
     | 
    
         
            +
                    Whether to overwrite the log file if it exists. If False, a unique file name is created. Defaults to True.
         
     | 
| 
      
 82 
     | 
    
         
            +
                human_in_the_loop : bool, optional
         
     | 
| 
      
 83 
     | 
    
         
            +
                    Enables user review of data cleaning instructions. Defaults to False.
         
     | 
| 
      
 84 
     | 
    
         
            +
                bypass_recommended_steps : bool, optional
         
     | 
| 
      
 85 
     | 
    
         
            +
                    If True, skips the default recommended cleaning steps. Defaults to False.
         
     | 
| 
      
 86 
     | 
    
         
            +
                bypass_explain_code : bool, optional
         
     | 
| 
      
 87 
     | 
    
         
            +
                    If True, skips the step that provides code explanations. Defaults to False.
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                Methods
         
     | 
| 
      
 90 
     | 
    
         
            +
                -------
         
     | 
| 
      
 91 
     | 
    
         
            +
                update_params(**kwargs)
         
     | 
| 
      
 92 
     | 
    
         
            +
                    Updates the agent's parameters and rebuilds the compiled state graph.
         
     | 
| 
      
 93 
     | 
    
         
            +
                ainvoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
         
     | 
| 
      
 94 
     | 
    
         
            +
                    Cleans the provided dataset asynchronously based on user instructions.
         
     | 
| 
      
 95 
     | 
    
         
            +
                invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
         
     | 
| 
      
 96 
     | 
    
         
            +
                    Cleans the provided dataset synchronously based on user instructions.
         
     | 
| 
      
 97 
     | 
    
         
            +
                get_workflow_summary()
         
     | 
| 
      
 98 
     | 
    
         
            +
                    Retrieves a summary of the agent's workflow.
         
     | 
| 
      
 99 
     | 
    
         
            +
                get_log_summary()
         
     | 
| 
      
 100 
     | 
    
         
            +
                    Retrieves a summary of logged operations if logging is enabled.
         
     | 
| 
      
 101 
     | 
    
         
            +
                get_state_keys()
         
     | 
| 
      
 102 
     | 
    
         
            +
                    Returns a list of keys from the state graph response.
         
     | 
| 
      
 103 
     | 
    
         
            +
                get_state_properties()
         
     | 
| 
      
 104 
     | 
    
         
            +
                    Returns detailed properties of the state graph response.
         
     | 
| 
      
 105 
     | 
    
         
            +
                get_data_cleaned()
         
     | 
| 
      
 106 
     | 
    
         
            +
                    Retrieves the cleaned dataset as a pandas DataFrame.
         
     | 
| 
      
 107 
     | 
    
         
            +
                get_data_raw()
         
     | 
| 
      
 108 
     | 
    
         
            +
                    Retrieves the raw dataset as a pandas DataFrame.
         
     | 
| 
      
 109 
     | 
    
         
            +
                get_data_cleaner_function()
         
     | 
| 
      
 110 
     | 
    
         
            +
                    Retrieves the generated Python function used for cleaning the data.
         
     | 
| 
      
 111 
     | 
    
         
            +
                get_recommended_cleaning_steps()
         
     | 
| 
      
 112 
     | 
    
         
            +
                    Retrieves the agent's recommended cleaning steps.
         
     | 
| 
      
 113 
     | 
    
         
            +
                get_response()
         
     | 
| 
      
 114 
     | 
    
         
            +
                    Returns the response from the agent as a dictionary.
         
     | 
| 
      
 115 
     | 
    
         
            +
                show()
         
     | 
| 
      
 116 
     | 
    
         
            +
                    Displays the agent's mermaid diagram.
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                Examples
         
     | 
| 
      
 119 
     | 
    
         
            +
                --------
         
     | 
| 
      
 120 
     | 
    
         
            +
                ```python
         
     | 
| 
      
 121 
     | 
    
         
            +
                import pandas as pd
         
     | 
| 
      
 122 
     | 
    
         
            +
                from langchain_openai import ChatOpenAI
         
     | 
| 
      
 123 
     | 
    
         
            +
                from ai_data_science_team.agents import DataCleaningAgent
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
                llm = ChatOpenAI(model="gpt-4o-mini")
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                data_cleaning_agent = DataCleaningAgent(
         
     | 
| 
      
 128 
     | 
    
         
            +
                    model=llm, n_samples=50, log=True, log_path="logs", human_in_the_loop=True
         
     | 
| 
      
 129 
     | 
    
         
            +
                )
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
                df = pd.read_csv("https://raw.githubusercontent.com/business-science/ai-data-science-team/refs/heads/master/data/churn_data.csv")
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                data_cleaning_agent.invoke_agent(
         
     | 
| 
      
 134 
     | 
    
         
            +
                    user_instructions="Don't remove outliers when cleaning the data.",
         
     | 
| 
      
 135 
     | 
    
         
            +
                    data_raw=df,
         
     | 
| 
      
 136 
     | 
    
         
            +
                    max_retries=3,
         
     | 
| 
      
 137 
     | 
    
         
            +
                    retry_count=0
         
     | 
| 
      
 138 
     | 
    
         
            +
                )
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
                cleaned_data = data_cleaning_agent.get_data_cleaned()
         
     | 
| 
      
 141 
     | 
    
         
            +
                
         
     | 
| 
      
 142 
     | 
    
         
            +
                response = data_cleaning_agent.response
         
     | 
| 
      
 143 
     | 
    
         
            +
                ```
         
     | 
| 
      
 144 
     | 
    
         
            +
                
         
     | 
| 
      
 145 
     | 
    
         
            +
                Returns
         
     | 
| 
      
 146 
     | 
    
         
            +
                --------
         
     | 
| 
      
 147 
     | 
    
         
            +
                DataCleaningAgent : langchain.graphs.CompiledStateGraph 
         
     | 
| 
      
 148 
     | 
    
         
            +
                    A data cleaning agent implemented as a compiled state graph. 
         
     | 
| 
      
 149 
     | 
    
         
            +
                """
         
     | 
| 
       42 
150 
     | 
    
         | 
| 
       43 
151 
     | 
    
         
             
                def __init__(
         
     | 
| 
       44 
152 
     | 
    
         
             
                    self, 
         
     | 
| 
         @@ -47,6 +155,7 @@ class DataCleaningAgent(CompiledStateGraph): 
     | 
|
| 
       47 
155 
     | 
    
         
             
                    log=False, 
         
     | 
| 
       48 
156 
     | 
    
         
             
                    log_path=None, 
         
     | 
| 
       49 
157 
     | 
    
         
             
                    file_name="data_cleaner.py", 
         
     | 
| 
      
 158 
     | 
    
         
            +
                    function_name="data_cleaner",
         
     | 
| 
       50 
159 
     | 
    
         
             
                    overwrite=True, 
         
     | 
| 
       51 
160 
     | 
    
         
             
                    human_in_the_loop=False, 
         
     | 
| 
       52 
161 
     | 
    
         
             
                    bypass_recommended_steps=False, 
         
     | 
| 
         @@ -58,6 +167,7 @@ class DataCleaningAgent(CompiledStateGraph): 
     | 
|
| 
       58 
167 
     | 
    
         
             
                        "log": log,
         
     | 
| 
       59 
168 
     | 
    
         
             
                        "log_path": log_path,
         
     | 
| 
       60 
169 
     | 
    
         
             
                        "file_name": file_name,
         
     | 
| 
      
 170 
     | 
    
         
            +
                        "function_name": function_name,
         
     | 
| 
       61 
171 
     | 
    
         
             
                        "overwrite": overwrite,
         
     | 
| 
       62 
172 
     | 
    
         
             
                        "human_in_the_loop": human_in_the_loop,
         
     | 
| 
       63 
173 
     | 
    
         
             
                        "bypass_recommended_steps": bypass_recommended_steps,
         
     | 
| 
         @@ -67,102 +177,104 @@ class DataCleaningAgent(CompiledStateGraph): 
     | 
|
| 
       67 
177 
     | 
    
         
             
                    self.response = None
         
     | 
| 
       68 
178 
     | 
    
         | 
| 
       69 
179 
     | 
    
         
             
                def _make_compiled_graph(self):
         
     | 
| 
       70 
     | 
    
         
            -
                    self.response = None
         
     | 
| 
       71 
     | 
    
         
            -
                    return make_data_cleaning_agent(**self._params)
         
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
                def update_params(self, **kwargs):
         
     | 
| 
       74 
180 
     | 
    
         
             
                    """
         
     | 
| 
       75 
     | 
    
         
            -
                     
     | 
| 
       76 
     | 
    
         
            -
                    e.g. agent.update_params(model=new_llm, n_samples=100)
         
     | 
| 
      
 181 
     | 
    
         
            +
                    Create the compiled graph for the data cleaning agent. Running this method will reset the response to None.
         
     | 
| 
       77 
182 
     | 
    
         
             
                    """
         
     | 
| 
       78 
     | 
    
         
            -
                    self. 
     | 
| 
       79 
     | 
    
         
            -
                     
     | 
| 
      
 183 
     | 
    
         
            +
                    self.response=None
         
     | 
| 
      
 184 
     | 
    
         
            +
                    return make_data_cleaning_agent(**self._params)
         
     | 
| 
       80 
185 
     | 
    
         | 
| 
       81 
     | 
    
         
            -
                def  
     | 
| 
       82 
     | 
    
         
            -
                    """
         
     | 
| 
       83 
     | 
    
         
            -
                    Delegate attribute access to `_compiled_graph` if `name` is not
         
     | 
| 
       84 
     | 
    
         
            -
                    found in this instance. This 'inherits' methods from the compiled graph.
         
     | 
| 
       85 
     | 
    
         
            -
                    """
         
     | 
| 
       86 
     | 
    
         
            -
                    return getattr(self._compiled_graph, name)
         
     | 
| 
       87 
     | 
    
         
            -
                
         
     | 
| 
       88 
     | 
    
         
            -
                def ainvoke(self, user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0):
         
     | 
| 
      
 186 
     | 
    
         
            +
                def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
         
     | 
| 
       89 
187 
     | 
    
         
             
                    """
         
     | 
| 
       90 
     | 
    
         
            -
                     
     | 
| 
      
 188 
     | 
    
         
            +
                    Asynchronously invokes the agent. The response is stored in the response attribute.
         
     | 
| 
       91 
189 
     | 
    
         | 
| 
       92 
190 
     | 
    
         
             
                    Parameters:
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                        data_raw (pd.DataFrame):  
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
                         
     | 
| 
      
 191 
     | 
    
         
            +
                    ----------
         
     | 
| 
      
 192 
     | 
    
         
            +
                        data_raw (pd.DataFrame): 
         
     | 
| 
      
 193 
     | 
    
         
            +
                            The raw dataset to be cleaned.
         
     | 
| 
      
 194 
     | 
    
         
            +
                        user_instructions (str): 
         
     | 
| 
      
 195 
     | 
    
         
            +
                            Instructions for data cleaning agent.
         
     | 
| 
      
 196 
     | 
    
         
            +
                        max_retries (int): 
         
     | 
| 
      
 197 
     | 
    
         
            +
                            Maximum retry attempts for cleaning.
         
     | 
| 
      
 198 
     | 
    
         
            +
                        retry_count (int): 
         
     | 
| 
      
 199 
     | 
    
         
            +
                            Current retry attempt.
         
     | 
| 
      
 200 
     | 
    
         
            +
                        **kwargs
         
     | 
| 
      
 201 
     | 
    
         
            +
                            Additional keyword arguments to pass to ainvoke().
         
     | 
| 
       97 
202 
     | 
    
         | 
| 
       98 
203 
     | 
    
         
             
                    Returns:
         
     | 
| 
      
 204 
     | 
    
         
            +
                    --------
         
     | 
| 
       99 
205 
     | 
    
         
             
                        None. The response is stored in the response attribute.
         
     | 
| 
       100 
206 
     | 
    
         
             
                    """
         
     | 
| 
       101 
     | 
    
         
            -
                    response = self.ainvoke({
         
     | 
| 
      
 207 
     | 
    
         
            +
                    response = self._compiled_graph.ainvoke({
         
     | 
| 
       102 
208 
     | 
    
         
             
                        "user_instructions": user_instructions,
         
     | 
| 
       103 
209 
     | 
    
         
             
                        "data_raw": data_raw.to_dict(),
         
     | 
| 
       104 
210 
     | 
    
         
             
                        "max_retries": max_retries,
         
     | 
| 
       105 
211 
     | 
    
         
             
                        "retry_count": retry_count,
         
     | 
| 
       106 
     | 
    
         
            -
                    })
         
     | 
| 
      
 212 
     | 
    
         
            +
                    }, **kwargs)
         
     | 
| 
       107 
213 
     | 
    
         
             
                    self.response = response
         
     | 
| 
       108 
214 
     | 
    
         
             
                    return None
         
     | 
| 
       109 
215 
     | 
    
         | 
| 
       110 
     | 
    
         
            -
                def  
     | 
| 
      
 216 
     | 
    
         
            +
                def invoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
         
     | 
| 
       111 
217 
     | 
    
         
             
                    """
         
     | 
| 
       112 
     | 
    
         
            -
                     
     | 
| 
      
 218 
     | 
    
         
            +
                    Invokes the agent. The response is stored in the response attribute.
         
     | 
| 
       113 
219 
     | 
    
         | 
| 
       114 
220 
     | 
    
         
             
                    Parameters:
         
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                        data_raw (pd.DataFrame):  
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
                         
     | 
| 
      
 221 
     | 
    
         
            +
                    ----------
         
     | 
| 
      
 222 
     | 
    
         
            +
                        data_raw (pd.DataFrame): 
         
     | 
| 
      
 223 
     | 
    
         
            +
                            The raw dataset to be cleaned.
         
     | 
| 
      
 224 
     | 
    
         
            +
                        user_instructions (str): 
         
     | 
| 
      
 225 
     | 
    
         
            +
                            Instructions for data cleaning agent.
         
     | 
| 
      
 226 
     | 
    
         
            +
                        max_retries (int): 
         
     | 
| 
      
 227 
     | 
    
         
            +
                            Maximum retry attempts for cleaning.
         
     | 
| 
      
 228 
     | 
    
         
            +
                        retry_count (int): 
         
     | 
| 
      
 229 
     | 
    
         
            +
                            Current retry attempt.
         
     | 
| 
      
 230 
     | 
    
         
            +
                        **kwargs
         
     | 
| 
      
 231 
     | 
    
         
            +
                            Additional keyword arguments to pass to invoke().
         
     | 
| 
       119 
232 
     | 
    
         | 
| 
       120 
233 
     | 
    
         
             
                    Returns:
         
     | 
| 
      
 234 
     | 
    
         
            +
                    --------
         
     | 
| 
       121 
235 
     | 
    
         
             
                        None. The response is stored in the response attribute.
         
     | 
| 
       122 
236 
     | 
    
         
             
                    """
         
     | 
| 
       123 
     | 
    
         
            -
                    response = self.invoke({
         
     | 
| 
      
 237 
     | 
    
         
            +
                    response = self._compiled_graph.invoke({
         
     | 
| 
       124 
238 
     | 
    
         
             
                        "user_instructions": user_instructions,
         
     | 
| 
       125 
239 
     | 
    
         
             
                        "data_raw": data_raw.to_dict(),
         
     | 
| 
       126 
240 
     | 
    
         
             
                        "max_retries": max_retries,
         
     | 
| 
       127 
241 
     | 
    
         
             
                        "retry_count": retry_count,
         
     | 
| 
       128 
     | 
    
         
            -
                    })
         
     | 
| 
      
 242 
     | 
    
         
            +
                    },**kwargs)
         
     | 
| 
       129 
243 
     | 
    
         
             
                    self.response = response
         
     | 
| 
       130 
244 
     | 
    
         
             
                    return None
         
     | 
| 
       131 
245 
     | 
    
         | 
| 
       132 
     | 
    
         
            -
                def  
     | 
| 
      
 246 
     | 
    
         
            +
                def get_workflow_summary(self, markdown=False):
         
     | 
| 
       133 
247 
     | 
    
         
             
                    """
         
     | 
| 
       134 
     | 
    
         
            -
                     
     | 
| 
       135 
     | 
    
         
            -
             
     | 
| 
       136 
     | 
    
         
            -
                    Returns:
         
     | 
| 
       137 
     | 
    
         
            -
                        str: Explanation of the cleaning steps.
         
     | 
| 
      
 248 
     | 
    
         
            +
                    Retrieves the agent's workflow summary, if logging is enabled.
         
     | 
| 
       138 
249 
     | 
    
         
             
                    """
         
     | 
| 
       139 
     | 
    
         
            -
                     
     | 
| 
       140 
     | 
    
         
            -
             
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
             
     | 
| 
      
 250 
     | 
    
         
            +
                    if self.response and self.response.get("messages"):
         
     | 
| 
      
 251 
     | 
    
         
            +
                        summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
         
     | 
| 
      
 252 
     | 
    
         
            +
                        if markdown:
         
     | 
| 
      
 253 
     | 
    
         
            +
                            return Markdown(summary)
         
     | 
| 
      
 254 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 255 
     | 
    
         
            +
                            return summary
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
                def get_log_summary(self, markdown=False):
         
     | 
| 
       143 
258 
     | 
    
         
             
                    """
         
     | 
| 
       144 
259 
     | 
    
         
             
                    Logs a summary of the agent's operations, if logging is enabled.
         
     | 
| 
       145 
260 
     | 
    
         
             
                    """
         
     | 
| 
       146 
261 
     | 
    
         
             
                    if self.response:
         
     | 
| 
       147 
     | 
    
         
            -
                        if self. 
     | 
| 
       148 
     | 
    
         
            -
                            log_details = f" 
     | 
| 
       149 
     | 
    
         
            -
             
     | 
| 
       150 
     | 
    
         
            -
             
     | 
| 
       151 
     | 
    
         
            -
             
     | 
| 
       152 
     | 
    
         
            -
             
     | 
| 
       153 
     | 
    
         
            -
             
     | 
| 
       154 
     | 
    
         
            -
             
     | 
| 
       155 
     | 
    
         
            -
             
     | 
| 
       156 
     | 
    
         
            -
             
     | 
| 
       157 
     | 
    
         
            -
             
     | 
| 
       158 
     | 
    
         
            -
             
     | 
| 
       159 
     | 
    
         
            -
                    Returns a list of keys that the state graph returns in a response.
         
     | 
| 
       160 
     | 
    
         
            -
                    """
         
     | 
| 
       161 
     | 
    
         
            -
                    return self.get_output_jsonschema()['properties']
         
     | 
| 
      
 262 
     | 
    
         
            +
                        if self.response.get('data_cleaner_function_path'):
         
     | 
| 
      
 263 
     | 
    
         
            +
                            log_details = f"""
         
     | 
| 
      
 264 
     | 
    
         
            +
            ## Data Cleaning Agent Log Summary:
         
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
            Function Path: {self.response.get('data_cleaner_function_path')}
         
     | 
| 
      
 267 
     | 
    
         
            +
             
     | 
| 
      
 268 
     | 
    
         
            +
            Function Name: {self.response.get('data_cleaner_function_name')}
         
     | 
| 
      
 269 
     | 
    
         
            +
                            """
         
     | 
| 
      
 270 
     | 
    
         
            +
                            if markdown:
         
     | 
| 
      
 271 
     | 
    
         
            +
                                return Markdown(log_details) 
         
     | 
| 
      
 272 
     | 
    
         
            +
                            else:
         
     | 
| 
      
 273 
     | 
    
         
            +
                                return log_details
         
     | 
| 
       162 
274 
     | 
    
         | 
| 
       163 
275 
     | 
    
         
             
                def get_data_cleaned(self):
         
     | 
| 
       164 
276 
     | 
    
         
             
                    """
         
     | 
| 
       165 
     | 
    
         
            -
                    Retrieves the cleaned data stored after running  
     | 
| 
      
 277 
     | 
    
         
            +
                    Retrieves the cleaned data stored after running invoke_agent or clean_data methods.
         
     | 
| 
       166 
278 
     | 
    
         
             
                    """
         
     | 
| 
       167 
279 
     | 
    
         
             
                    if self.response:
         
     | 
| 
       168 
280 
     | 
    
         
             
                        return pd.DataFrame(self.response.get("data_cleaned"))
         
     | 
| 
         @@ -174,15 +286,25 @@ class DataCleaningAgent(CompiledStateGraph): 
     | 
|
| 
       174 
286 
     | 
    
         
             
                    if self.response:
         
     | 
| 
       175 
287 
     | 
    
         
             
                        return pd.DataFrame(self.response.get("data_raw"))
         
     | 
| 
       176 
288 
     | 
    
         | 
| 
       177 
     | 
    
         
            -
                def get_data_cleaner_function(self):
         
     | 
| 
      
 289 
     | 
    
         
            +
                def get_data_cleaner_function(self, markdown=False):
         
     | 
| 
       178 
290 
     | 
    
         
             
                    """
         
     | 
| 
       179 
291 
     | 
    
         
             
                    Retrieves the agent's pipeline function.
         
     | 
| 
       180 
292 
     | 
    
         
             
                    """
         
     | 
| 
       181 
293 
     | 
    
         
             
                    if self.response:
         
     | 
| 
       182 
     | 
    
         
            -
                         
     | 
| 
       183 
     | 
    
         
            -
             
     | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
       185 
     | 
    
         
            -
             
     | 
| 
      
 294 
     | 
    
         
            +
                        if markdown:
         
     | 
| 
      
 295 
     | 
    
         
            +
                            return Markdown(f"```python\n{self.response.get('data_cleaner_function')}\n```")
         
     | 
| 
      
 296 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 297 
     | 
    
         
            +
                            return self.response.get("data_cleaner_function")
         
     | 
| 
      
 298 
     | 
    
         
            +
                        
         
     | 
| 
      
 299 
     | 
    
         
            +
                def get_recommended_cleaning_steps(self, markdown=False):
         
     | 
| 
      
 300 
     | 
    
         
            +
                    """
         
     | 
| 
      
 301 
     | 
    
         
            +
                    Retrieves the agent's recommended cleaning steps
         
     | 
| 
      
 302 
     | 
    
         
            +
                    """
         
     | 
| 
      
 303 
     | 
    
         
            +
                    if self.response:
         
     | 
| 
      
 304 
     | 
    
         
            +
                        if markdown:
         
     | 
| 
      
 305 
     | 
    
         
            +
                            return Markdown(self.response.get('recommended_steps'))
         
     | 
| 
      
 306 
     | 
    
         
            +
                        else:
         
     | 
| 
      
 307 
     | 
    
         
            +
                            return self.response.get('recommended_steps')
         
     | 
| 
       186 
308 
     | 
    
         | 
| 
       187 
309 
     | 
    
         | 
| 
       188 
310 
     | 
    
         | 
| 
         @@ -194,6 +316,7 @@ def make_data_cleaning_agent( 
     | 
|
| 
       194 
316 
     | 
    
         
             
                log=False, 
         
     | 
| 
       195 
317 
     | 
    
         
             
                log_path=None, 
         
     | 
| 
       196 
318 
     | 
    
         
             
                file_name="data_cleaner.py",
         
     | 
| 
      
 319 
     | 
    
         
            +
                function_name="data_cleaner",
         
     | 
| 
       197 
320 
     | 
    
         
             
                overwrite = True, 
         
     | 
| 
       198 
321 
     | 
    
         
             
                human_in_the_loop=False, 
         
     | 
| 
       199 
322 
     | 
    
         
             
                bypass_recommended_steps=False, 
         
     | 
| 
         @@ -235,6 +358,8 @@ def make_data_cleaning_agent( 
     | 
|
| 
       235 
358 
     | 
    
         
             
                    "logs/".
         
     | 
| 
       236 
359 
     | 
    
         
             
                file_name : str, optional
         
     | 
| 
       237 
360 
     | 
    
         
             
                    The name of the file to save the response to. Defaults to "data_cleaner.py".
         
     | 
| 
      
 361 
     | 
    
         
            +
                function_name : str, optional
         
     | 
| 
      
 362 
     | 
    
         
            +
                    The name of the function that will be generated to clean the data. Defaults to "data_cleaner".
         
     | 
| 
       238 
363 
     | 
    
         
             
                overwrite : bool, optional
         
     | 
| 
       239 
364 
     | 
    
         
             
                    Whether or not to overwrite the log file if it already exists. If False, a unique file name will be created. 
         
     | 
| 
       240 
365 
     | 
    
         
             
                    Defaults to True.
         
     | 
| 
         @@ -275,6 +400,11 @@ def make_data_cleaning_agent( 
     | 
|
| 
       275 
400 
     | 
    
         
             
                """
         
     | 
| 
       276 
401 
     | 
    
         
             
                llm = model
         
     | 
| 
       277 
402 
     | 
    
         | 
| 
      
 403 
     | 
    
         
            +
                # Human in th loop requires recommended steps
         
     | 
| 
      
 404 
     | 
    
         
            +
                if bypass_recommended_steps and human_in_the_loop:
         
     | 
| 
      
 405 
     | 
    
         
            +
                    bypass_recommended_steps = False
         
     | 
| 
      
 406 
     | 
    
         
            +
                    print("Bypass recommended steps set to False to enable human in the loop.")
         
     | 
| 
      
 407 
     | 
    
         
            +
                
         
     | 
| 
       278 
408 
     | 
    
         
             
                # Setup Log Directory
         
     | 
| 
       279 
409 
     | 
    
         
             
                if log:
         
     | 
| 
       280 
410 
     | 
    
         
             
                    if log_path is None:
         
     | 
| 
         @@ -292,6 +422,7 @@ def make_data_cleaning_agent( 
     | 
|
| 
       292 
422 
     | 
    
         
             
                    all_datasets_summary: str
         
     | 
| 
       293 
423 
     | 
    
         
             
                    data_cleaner_function: str
         
     | 
| 
       294 
424 
     | 
    
         
             
                    data_cleaner_function_path: str
         
     | 
| 
      
 425 
     | 
    
         
            +
                    data_cleaner_file_name: str
         
     | 
| 
       295 
426 
     | 
    
         
             
                    data_cleaner_function_name: str
         
     | 
| 
       296 
427 
     | 
    
         
             
                    data_cleaner_error: str
         
     | 
| 
       297 
428 
     | 
    
         
             
                    max_retries: int
         
     | 
| 
         @@ -342,7 +473,7 @@ def make_data_cleaning_agent( 
     | 
|
| 
       342 
473 
     | 
    
         
             
                        Below are summaries of all datasets provided:
         
     | 
| 
       343 
474 
     | 
    
         
             
                        {all_datasets_summary}
         
     | 
| 
       344 
475 
     | 
    
         | 
| 
       345 
     | 
    
         
            -
                        Return  
     | 
| 
      
 476 
     | 
    
         
            +
                        Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
         
     | 
| 
       346 
477 
     | 
    
         | 
| 
       347 
478 
     | 
    
         
             
                        Avoid these:
         
     | 
| 
       348 
479 
     | 
    
         
             
                        1. Do not include steps to save files.
         
     | 
| 
         @@ -366,7 +497,7 @@ def make_data_cleaning_agent( 
     | 
|
| 
       366 
497 
     | 
    
         
             
                    }) 
         
     | 
| 
       367 
498 
     | 
    
         | 
| 
       368 
499 
     | 
    
         
             
                    return {
         
     | 
| 
       369 
     | 
    
         
            -
                        "recommended_steps": " 
     | 
| 
      
 500 
     | 
    
         
            +
                        "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended Data Cleaning Steps:"),
         
     | 
| 
       370 
501 
     | 
    
         
             
                        "all_datasets_summary": all_datasets_summary_str
         
     | 
| 
       371 
502 
     | 
    
         
             
                    }
         
     | 
| 
       372 
503 
     | 
    
         | 
| 
         @@ -386,42 +517,44 @@ def make_data_cleaning_agent( 
     | 
|
| 
       386 
517 
     | 
    
         
             
                    else:
         
     | 
| 
       387 
518 
     | 
    
         
             
                        all_datasets_summary_str = state.get("all_datasets_summary")
         
     | 
| 
       388 
519 
     | 
    
         | 
| 
      
 520 
     | 
    
         
            +
                    
         
     | 
| 
       389 
521 
     | 
    
         
             
                    data_cleaning_prompt = PromptTemplate(
         
     | 
| 
       390 
522 
     | 
    
         
             
                        template="""
         
     | 
| 
       391 
     | 
    
         
            -
                        You are a Data Cleaning Agent. Your job is to create a  
     | 
| 
       392 
     | 
    
         
            -
             
     | 
| 
      
 523 
     | 
    
         
            +
                        You are a Data Cleaning Agent. Your job is to create a {function_name}() function that can be run on the data provided using the following recommended steps.
         
     | 
| 
      
 524 
     | 
    
         
            +
             
     | 
| 
       393 
525 
     | 
    
         
             
                        Recommended Steps:
         
     | 
| 
       394 
526 
     | 
    
         
             
                        {recommended_steps}
         
     | 
| 
       395 
     | 
    
         
            -
             
     | 
| 
      
 527 
     | 
    
         
            +
             
     | 
| 
       396 
528 
     | 
    
         
             
                        You can use Pandas, Numpy, and Scikit Learn libraries to clean the data.
         
     | 
| 
       397 
     | 
    
         
            -
             
     | 
| 
      
 529 
     | 
    
         
            +
             
     | 
| 
       398 
530 
     | 
    
         
             
                        Below are summaries of all datasets provided. Use this information about the data to help determine how to clean the data:
         
     | 
| 
       399 
531 
     | 
    
         | 
| 
       400 
532 
     | 
    
         
             
                        {all_datasets_summary}
         
     | 
| 
       401 
     | 
    
         
            -
             
     | 
| 
       402 
     | 
    
         
            -
                        Return Python code in ```python 
     | 
| 
       403 
     | 
    
         
            -
             
     | 
| 
      
 533 
     | 
    
         
            +
             
     | 
| 
      
 534 
     | 
    
         
            +
                        Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), that includes all imports inside the function.
         
     | 
| 
      
 535 
     | 
    
         
            +
             
     | 
| 
       404 
536 
     | 
    
         
             
                        Return code to provide the data cleaning function:
         
     | 
| 
       405 
     | 
    
         
            -
             
     | 
| 
       406 
     | 
    
         
            -
                        def  
     | 
| 
      
 537 
     | 
    
         
            +
             
     | 
| 
      
 538 
     | 
    
         
            +
                        def {function_name}(data_raw):
         
     | 
| 
       407 
539 
     | 
    
         
             
                            import pandas as pd
         
     | 
| 
       408 
540 
     | 
    
         
             
                            import numpy as np
         
     | 
| 
       409 
541 
     | 
    
         
             
                            ...
         
     | 
| 
       410 
542 
     | 
    
         
             
                            return data_cleaned
         
     | 
| 
       411 
     | 
    
         
            -
             
     | 
| 
      
 543 
     | 
    
         
            +
             
     | 
| 
       412 
544 
     | 
    
         
             
                        Best Practices and Error Preventions:
         
     | 
| 
       413 
     | 
    
         
            -
             
     | 
| 
      
 545 
     | 
    
         
            +
             
     | 
| 
       414 
546 
     | 
    
         
             
                        Always ensure that when assigning the output of fit_transform() from SimpleImputer to a Pandas DataFrame column, you call .ravel() or flatten the array, because fit_transform() returns a 2D array while a DataFrame column is 1D.
         
     | 
| 
       415 
547 
     | 
    
         | 
| 
       416 
548 
     | 
    
         
             
                        """,
         
     | 
| 
       417 
     | 
    
         
            -
                        input_variables=["recommended_steps", "all_datasets_summary"]
         
     | 
| 
      
 549 
     | 
    
         
            +
                        input_variables=["recommended_steps", "all_datasets_summary", "function_name"]
         
     | 
| 
       418 
550 
     | 
    
         
             
                    )
         
     | 
| 
       419 
551 
     | 
    
         | 
| 
       420 
552 
     | 
    
         
             
                    data_cleaning_agent = data_cleaning_prompt | llm | PythonOutputParser()
         
     | 
| 
       421 
553 
     | 
    
         | 
| 
       422 
554 
     | 
    
         
             
                    response = data_cleaning_agent.invoke({
         
     | 
| 
       423 
555 
     | 
    
         
             
                        "recommended_steps": state.get("recommended_steps"),
         
     | 
| 
       424 
     | 
    
         
            -
                        "all_datasets_summary": all_datasets_summary_str
         
     | 
| 
      
 556 
     | 
    
         
            +
                        "all_datasets_summary": all_datasets_summary_str,
         
     | 
| 
      
 557 
     | 
    
         
            +
                        "function_name": function_name
         
     | 
| 
       425 
558 
     | 
    
         
             
                    })
         
     | 
| 
       426 
559 
     | 
    
         | 
| 
       427 
560 
     | 
    
         
             
                    response = relocate_imports_inside_function(response)
         
     | 
| 
         @@ -439,19 +572,37 @@ def make_data_cleaning_agent( 
     | 
|
| 
       439 
572 
     | 
    
         
             
                    return {
         
     | 
| 
       440 
573 
     | 
    
         
             
                        "data_cleaner_function" : response,
         
     | 
| 
       441 
574 
     | 
    
         
             
                        "data_cleaner_function_path": file_path,
         
     | 
| 
       442 
     | 
    
         
            -
                        " 
     | 
| 
      
 575 
     | 
    
         
            +
                        "data_cleaner_file_name": file_name_2,
         
     | 
| 
      
 576 
     | 
    
         
            +
                        "data_cleaner_function_name": function_name,
         
     | 
| 
       443 
577 
     | 
    
         
             
                        "all_datasets_summary": all_datasets_summary_str
         
     | 
| 
       444 
578 
     | 
    
         
             
                    }
         
     | 
| 
      
 579 
     | 
    
         
            +
                    
         
     | 
| 
      
 580 
     | 
    
         
            +
                # Human Review
         
     | 
| 
      
 581 
     | 
    
         
            +
                    
         
     | 
| 
      
 582 
     | 
    
         
            +
                prompt_text_human_review = "Are the following data cleaning instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
         
     | 
| 
       445 
583 
     | 
    
         | 
| 
       446 
     | 
    
         
            -
                 
     | 
| 
       447 
     | 
    
         
            -
                     
     | 
| 
       448 
     | 
    
         
            -
                         
     | 
| 
       449 
     | 
    
         
            -
             
     | 
| 
       450 
     | 
    
         
            -
             
     | 
| 
       451 
     | 
    
         
            -
             
     | 
| 
       452 
     | 
    
         
            -
             
     | 
| 
       453 
     | 
    
         
            -
             
     | 
| 
       454 
     | 
    
         
            -
             
     | 
| 
      
 584 
     | 
    
         
            +
                if not bypass_explain_code:
         
     | 
| 
      
 585 
     | 
    
         
            +
                    def human_review(state: GraphState) -> Command[Literal["recommend_cleaning_steps", "explain_data_cleaner_code"]]:
         
     | 
| 
      
 586 
     | 
    
         
            +
                        return node_func_human_review(
         
     | 
| 
      
 587 
     | 
    
         
            +
                            state=state,
         
     | 
| 
      
 588 
     | 
    
         
            +
                            prompt_text=prompt_text_human_review,
         
     | 
| 
      
 589 
     | 
    
         
            +
                            yes_goto= 'explain_data_cleaner_code',
         
     | 
| 
      
 590 
     | 
    
         
            +
                            no_goto="recommend_cleaning_steps",
         
     | 
| 
      
 591 
     | 
    
         
            +
                            user_instructions_key="user_instructions",
         
     | 
| 
      
 592 
     | 
    
         
            +
                            recommended_steps_key="recommended_steps",
         
     | 
| 
      
 593 
     | 
    
         
            +
                            code_snippet_key="data_cleaner_function",
         
     | 
| 
      
 594 
     | 
    
         
            +
                        )
         
     | 
| 
      
 595 
     | 
    
         
            +
                else:
         
     | 
| 
      
 596 
     | 
    
         
            +
                    def human_review(state: GraphState) -> Command[Literal["recommend_cleaning_steps", "__end__"]]:
         
     | 
| 
      
 597 
     | 
    
         
            +
                        return node_func_human_review(
         
     | 
| 
      
 598 
     | 
    
         
            +
                            state=state,
         
     | 
| 
      
 599 
     | 
    
         
            +
                            prompt_text=prompt_text_human_review,
         
     | 
| 
      
 600 
     | 
    
         
            +
                            yes_goto= '__end__',
         
     | 
| 
      
 601 
     | 
    
         
            +
                            no_goto="recommend_cleaning_steps",
         
     | 
| 
      
 602 
     | 
    
         
            +
                            user_instructions_key="user_instructions",
         
     | 
| 
      
 603 
     | 
    
         
            +
                            recommended_steps_key="recommended_steps",
         
     | 
| 
      
 604 
     | 
    
         
            +
                            code_snippet_key="data_cleaner_function", 
         
     | 
| 
      
 605 
     | 
    
         
            +
                        )
         
     | 
| 
       455 
606 
     | 
    
         | 
| 
       456 
607 
     | 
    
         
             
                def execute_data_cleaner_code(state):
         
     | 
| 
       457 
608 
     | 
    
         
             
                    return node_func_execute_agent_code_on_data(
         
     | 
| 
         @@ -460,7 +611,7 @@ def make_data_cleaning_agent( 
     | 
|
| 
       460 
611 
     | 
    
         
             
                        result_key="data_cleaned",
         
     | 
| 
       461 
612 
     | 
    
         
             
                        error_key="data_cleaner_error",
         
     | 
| 
       462 
613 
     | 
    
         
             
                        code_snippet_key="data_cleaner_function",
         
     | 
| 
       463 
     | 
    
         
            -
                        agent_function_name=" 
     | 
| 
      
 614 
     | 
    
         
            +
                        agent_function_name=state.get("data_cleaner_function_name"),
         
     | 
| 
       464 
615 
     | 
    
         
             
                        pre_processing=lambda data: pd.DataFrame.from_dict(data),
         
     | 
| 
       465 
616 
     | 
    
         
             
                        post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
         
     | 
| 
       466 
617 
     | 
    
         
             
                        error_message_prefix="An error occurred during data cleaning: "
         
     | 
| 
         @@ -468,11 +619,11 @@ def make_data_cleaning_agent( 
     | 
|
| 
       468 
619 
     | 
    
         | 
| 
       469 
620 
     | 
    
         
             
                def fix_data_cleaner_code(state: GraphState):
         
     | 
| 
       470 
621 
     | 
    
         
             
                    data_cleaner_prompt = """
         
     | 
| 
       471 
     | 
    
         
            -
                    You are a Data Cleaning Agent. Your job is to create a  
     | 
| 
      
 622 
     | 
    
         
            +
                    You are a Data Cleaning Agent. Your job is to create a {function_name}() function that can be run on the data provided. The function is currently broken and needs to be fixed.
         
     | 
| 
       472 
623 
     | 
    
         | 
| 
       473 
     | 
    
         
            -
                    Make sure to only return the function definition for  
     | 
| 
      
 624 
     | 
    
         
            +
                    Make sure to only return the function definition for {function_name}().
         
     | 
| 
       474 
625 
     | 
    
         | 
| 
       475 
     | 
    
         
            -
                    Return Python code in ```python``` format with a single function definition,  
     | 
| 
      
 626 
     | 
    
         
            +
                    Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), that includes all imports inside the function.
         
     | 
| 
       476 
627 
     | 
    
         | 
| 
       477 
628 
     | 
    
         
             
                    This is the broken code (please fix): 
         
     | 
| 
       478 
629 
     | 
    
         
             
                    {code_snippet}
         
     | 
| 
         @@ -490,34 +641,34 @@ def make_data_cleaning_agent( 
     | 
|
| 
       490 
641 
     | 
    
         
             
                        agent_name=AGENT_NAME,
         
     | 
| 
       491 
642 
     | 
    
         
             
                        log=log,
         
     | 
| 
       492 
643 
     | 
    
         
             
                        file_path=state.get("data_cleaner_function_path"),
         
     | 
| 
      
 644 
     | 
    
         
            +
                        function_name=state.get("data_cleaner_function_name"),
         
     | 
| 
       493 
645 
     | 
    
         
             
                    )
         
     | 
| 
       494 
646 
     | 
    
         | 
| 
       495 
     | 
    
         
            -
                 
     | 
| 
       496 
     | 
    
         
            -
             
     | 
| 
      
 647 
     | 
    
         
            +
                # Final reporting node
         
     | 
| 
      
 648 
     | 
    
         
            +
                def report_agent_outputs(state: GraphState):
         
     | 
| 
      
 649 
     | 
    
         
            +
                    return node_func_report_agent_outputs(
         
     | 
| 
       497 
650 
     | 
    
         
             
                        state=state,
         
     | 
| 
       498 
     | 
    
         
            -
                         
     | 
| 
      
 651 
     | 
    
         
            +
                        keys_to_include=[
         
     | 
| 
      
 652 
     | 
    
         
            +
                            "recommended_steps",
         
     | 
| 
      
 653 
     | 
    
         
            +
                            "data_cleaner_function",
         
     | 
| 
      
 654 
     | 
    
         
            +
                            "data_cleaner_function_path",
         
     | 
| 
      
 655 
     | 
    
         
            +
                            "data_cleaner_function_name",
         
     | 
| 
      
 656 
     | 
    
         
            +
                            "data_cleaner_error",
         
     | 
| 
      
 657 
     | 
    
         
            +
                        ],
         
     | 
| 
       499 
658 
     | 
    
         
             
                        result_key="messages",
         
     | 
| 
       500 
     | 
    
         
            -
                        error_key="data_cleaner_error",
         
     | 
| 
       501 
     | 
    
         
            -
                        llm=llm,  
         
     | 
| 
       502 
659 
     | 
    
         
             
                        role=AGENT_NAME,
         
     | 
| 
       503 
     | 
    
         
            -
                         
     | 
| 
       504 
     | 
    
         
            -
                        Explain the data cleaning steps that the data cleaning agent performed in this function. 
         
     | 
| 
       505 
     | 
    
         
            -
                        Keep the summary succinct and to the point.\n\n# Data Cleaning Agent:\n\n{code}
         
     | 
| 
       506 
     | 
    
         
            -
                        """,
         
     | 
| 
       507 
     | 
    
         
            -
                        success_prefix="# Data Cleaning Agent:\n\n ",
         
     | 
| 
       508 
     | 
    
         
            -
                        error_message="The Data Cleaning Agent encountered an error during data cleaning. Data could not be explained."
         
     | 
| 
      
 660 
     | 
    
         
            +
                        custom_title="Data Cleaning Agent Outputs"
         
     | 
| 
       509 
661 
     | 
    
         
             
                    )
         
     | 
| 
       510 
     | 
    
         
            -
             
     | 
| 
       511 
     | 
    
         
            -
                # Define the graph
         
     | 
| 
      
 662 
     | 
    
         
            +
             
     | 
| 
       512 
663 
     | 
    
         
             
                node_functions = {
         
     | 
| 
       513 
664 
     | 
    
         
             
                    "recommend_cleaning_steps": recommend_cleaning_steps,
         
     | 
| 
       514 
665 
     | 
    
         
             
                    "human_review": human_review,
         
     | 
| 
       515 
666 
     | 
    
         
             
                    "create_data_cleaner_code": create_data_cleaner_code,
         
     | 
| 
       516 
667 
     | 
    
         
             
                    "execute_data_cleaner_code": execute_data_cleaner_code,
         
     | 
| 
       517 
668 
     | 
    
         
             
                    "fix_data_cleaner_code": fix_data_cleaner_code,
         
     | 
| 
       518 
     | 
    
         
            -
                    " 
     | 
| 
      
 669 
     | 
    
         
            +
                    "report_agent_outputs": report_agent_outputs, 
         
     | 
| 
       519 
670 
     | 
    
         
             
                }
         
     | 
| 
       520 
     | 
    
         
            -
             
     | 
| 
      
 671 
     | 
    
         
            +
             
     | 
| 
       521 
672 
     | 
    
         
             
                app = create_coding_agent_graph(
         
     | 
| 
       522 
673 
     | 
    
         
             
                    GraphState=GraphState,
         
     | 
| 
       523 
674 
     | 
    
         
             
                    node_functions=node_functions,
         
     | 
| 
         @@ -525,16 +676,17 @@ def make_data_cleaning_agent( 
     | 
|
| 
       525 
676 
     | 
    
         
             
                    create_code_node_name="create_data_cleaner_code",
         
     | 
| 
       526 
677 
     | 
    
         
             
                    execute_code_node_name="execute_data_cleaner_code",
         
     | 
| 
       527 
678 
     | 
    
         
             
                    fix_code_node_name="fix_data_cleaner_code",
         
     | 
| 
       528 
     | 
    
         
            -
                    explain_code_node_name=" 
     | 
| 
      
 679 
     | 
    
         
            +
                    explain_code_node_name="report_agent_outputs", 
         
     | 
| 
       529 
680 
     | 
    
         
             
                    error_key="data_cleaner_error",
         
     | 
| 
       530 
     | 
    
         
            -
                    human_in_the_loop=human_in_the_loop, 
     | 
| 
      
 681 
     | 
    
         
            +
                    human_in_the_loop=human_in_the_loop,
         
     | 
| 
       531 
682 
     | 
    
         
             
                    human_review_node_name="human_review",
         
     | 
| 
       532 
683 
     | 
    
         
             
                    checkpointer=MemorySaver() if human_in_the_loop else None,
         
     | 
| 
       533 
684 
     | 
    
         
             
                    bypass_recommended_steps=bypass_recommended_steps,
         
     | 
| 
       534 
685 
     | 
    
         
             
                    bypass_explain_code=bypass_explain_code,
         
     | 
| 
       535 
686 
     | 
    
         
             
                )
         
     | 
| 
       536 
     | 
    
         
            -
             
     | 
| 
      
 687 
     | 
    
         
            +
             
     | 
| 
       537 
688 
     | 
    
         
             
                return app
         
     | 
| 
      
 689 
     | 
    
         
            +
                 
         
     | 
| 
       538 
690 
     | 
    
         | 
| 
       539 
691 
     | 
    
         | 
| 
       540 
692 
     | 
    
         |