PyPI - ai-data-science-team - Versions diffs - 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9007py3-none-any.whl → 0.0.0.9009py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

ai_data_science_team/_version.py +1 -1
ai_data_science_team/agents/__init__.py +4 -5
ai_data_science_team/agents/data_cleaning_agent.py +268 -116
ai_data_science_team/agents/data_visualization_agent.py +470 -41
ai_data_science_team/agents/data_wrangling_agent.py +471 -31
ai_data_science_team/agents/feature_engineering_agent.py +426 -41
ai_data_science_team/agents/sql_database_agent.py +458 -58
ai_data_science_team/ml_agents/__init__.py +1 -0
ai_data_science_team/ml_agents/h2o_ml_agent.py +1032 -0
ai_data_science_team/multiagents/__init__.py +1 -0
ai_data_science_team/multiagents/sql_data_analyst.py +398 -0
ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
ai_data_science_team/templates/__init__.py +3 -1
ai_data_science_team/templates/agent_templates.py +319 -43
ai_data_science_team/tools/metadata.py +94 -62
ai_data_science_team/tools/regex.py +86 -1
ai_data_science_team/utils/__init__.py +0 -0
ai_data_science_team/utils/plotly.py +24 -0
ai_data_science_team-0.0.0.9009.dist-info/METADATA +245 -0
ai_data_science_team-0.0.0.9009.dist-info/RECORD +28 -0
ai_data_science_team-0.0.0.9007.dist-info/METADATA +0 -183
ai_data_science_team-0.0.0.9007.dist-info/RECORD +0 -21
{ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE +0 -0
{ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL +0 -0
{ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/top_level.txt +0 -0

ai_data_science_team/agents/sql_database_agent.py CHANGED Viewed

@@ -5,24 +5,33 @@ import operator
 from langchain.prompts import PromptTemplate
 from langchain_core.messages import BaseMessage
+from langchain_core.output_parsers import JsonOutputParser
 from langgraph.types import Command
 from langgraph.checkpoint.memory import MemorySaver
 import os
-import io
+import json
 import pandas as pd
 import sqlalchemy as sql
+from IPython.display import Markdown
 from ai_data_science_team.templates import(
     node_func_execute_agent_from_sql_connection,
     node_func_human_review,
     node_func_fix_agent_code,
-    node_func_explain_agent_code,
-    create_coding_agent_graph
+    node_func_report_agent_outputs,
+    create_coding_agent_graph,
+    BaseAgent,
+)
+from ai_data_science_team.tools.parsers import SQLOutputParser
+from ai_data_science_team.tools.regex import (
+    add_comments_to_top,
+    format_agent_name,
+    format_recommended_steps,
+    get_generic_summary,
 )
-from ai_data_science_team.tools.parsers import PythonOutputParser, SQLOutputParser
-from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name
 from ai_data_science_team.tools.metadata import get_database_metadata
 from ai_data_science_team.tools.logging import log_ai_function
@@ -30,16 +39,333 @@ from ai_data_science_team.tools.logging import log_ai_function
 AGENT_NAME = "sql_database_agent"
 LOG_PATH = os.path.join(os.getcwd(), "logs/")
+# Class
+class SQLDatabaseAgent(BaseAgent):
+    """
+    Creates a SQL Database Agent that can recommend SQL steps and generate SQL code to query a database.
+    The agent can:
+    - Propose recommended steps to answer a user's query or instructions.
+    - Generate a SQL query based on the recommended steps and user instructions.
+    - Execute that SQL query against the provided database connection.
+    - Return the resulting data as a dictionary, suitable for conversion to a DataFrame or other structures.
+    - Log generated code and errors if enabled.
+    Parameters
+    ----------
+    model : ChatOpenAI or langchain.llms.base.LLM
+        The language model used to generate the SQL code.
+    connection : sqlalchemy.engine.base.Engine or sqlalchemy.engine.base.Connection
+        The SQLAlchemy connection (or engine) to the database.
+    n_samples : int, optional
+        Number of sample rows (per column) to retrieve when summarizing database metadata. Defaults to 1.
+    log : bool, optional
+        Whether to log the generated code and errors. Defaults to False.
+    log_path : str, optional
+        Directory path for storing log files. Defaults to None.
+    file_name : str, optional
+        Name of the file for saving the generated response. Defaults to "sql_database.py".
+    function_name : str, optional
+        Name of the Python function that executes the SQL query. Defaults to "sql_database_pipeline".
+    overwrite : bool, optional
+        Whether to overwrite the log file if it exists. If False, a unique file name is created. Defaults to True.
+    human_in_the_loop : bool, optional
+        Enables user review of the recommended steps before generating code. Defaults to False.
+    bypass_recommended_steps : bool, optional
+        If True, skips the step that generates recommended SQL steps. Defaults to False.
+    bypass_explain_code : bool, optional
+        If True, skips the step that provides code explanations. Defaults to False.
+    smart_schema_pruning : bool, optional
+        If True, filters the tables and columns based on the user instructions and recommended steps. Defaults to False.
+    Methods
+    -------
+    update_params(**kwargs)
+        Updates the agent's parameters and rebuilds the compiled state graph.
+    ainvoke_agent(user_instructions: str, max_retries=3, retry_count=0)
+        Asynchronously runs the agent to generate and execute a SQL query based on user instructions.
+    invoke_agent(user_instructions: str, max_retries=3, retry_count=0)
+        Synchronously runs the agent to generate and execute a SQL query based on user instructions.
+    get_workflow_summary()
+        Retrieves a summary of the agent's workflow.
+    get_log_summary()
+        Retrieves a summary of logged operations if logging is enabled.
+    get_data_sql()
+        Retrieves the resulting data from the SQL query as a dictionary.
+        (You can convert this to a DataFrame if desired.)
+    get_sql_query_code()
+        Retrieves the exact SQL query generated by the agent.
+    get_sql_database_function()
+        Retrieves the Python function that executes the SQL query.
+    get_recommended_sql_steps()
+        Retrieves the recommended steps for querying the SQL database.
+    get_response()
+        Returns the full response dictionary from the agent.
+    show()
+        Displays the agent's mermaid diagram for visual inspection of the compiled graph.
+    Examples
+    --------
+    ```python
+    import sqlalchemy as sql
+    from langchain_openai import ChatOpenAI
+    from ai_data_science_team.agents import SQLDatabaseAgent
+    # Create the engine/connection
+    sql_engine = sql.create_engine("sqlite:///data/my_database.db")
+    conn = sql_engine.connect()
+    llm = ChatOpenAI(model="gpt-4o-mini")
+    sql_database_agent = SQLDatabaseAgent(
+        model=llm,
+        connection=conn,
+        n_samples=10,
+        log=True,
+        log_path="logs",
+        human_in_the_loop=True
+    )
+    # Example usage
+    sql_database_agent.invoke_agent(
+        user_instructions="List all the tables in the database.",
+        max_retries=3,
+        retry_count=0
+    )
+    data_result = sql_database_agent.get_data_sql()  # dictionary of rows returned
+    sql_code = sql_database_agent.get_sql_query_code()
+    response = sql_database_agent.get_response()
+    ```
+    Returns
+    -------
+    SQLDatabaseAgent : langchain.graphs.CompiledStateGraph
+        A SQL database agent implemented as a compiled state graph.
+    """
+    def __init__(
+        self,
+        model,
+        connection,
+        n_samples=1,
+        log=False,
+        log_path=None,
+        file_name="sql_database.py",
+        function_name="sql_database_pipeline",
+        overwrite=True,
+        human_in_the_loop=False,
+        bypass_recommended_steps=False,
+        bypass_explain_code=False,
+        smart_schema_pruning=False,
+    ):
+        self._params = {
+            "model": model,
+            "connection": connection,
+            "n_samples": n_samples,
+            "log": log,
+            "log_path": log_path,
+            "file_name": file_name,
+            "function_name": function_name,
+            "overwrite": overwrite,
+            "human_in_the_loop": human_in_the_loop,
+            "bypass_recommended_steps": bypass_recommended_steps,
+            "bypass_explain_code": bypass_explain_code,
+            "smart_schema_pruning": smart_schema_pruning,
+        }
+        self._compiled_graph = self._make_compiled_graph()
+        self.response = None
+    def _make_compiled_graph(self):
+        """
+        Create or rebuild the compiled graph for the SQL Database Agent.
+        Running this method resets the response to None.
+        """
+        self.response = None
+        return make_sql_database_agent(**self._params)
+    def update_params(self, **kwargs):
+        """
+        Updates the agent's parameters (e.g. connection, n_samples, log, etc.)
+        and rebuilds the compiled graph.
+        """
+        for k, v in kwargs.items():
+            self._params[k] = v
+        self._compiled_graph = self._make_compiled_graph()
+    def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
+        """
+        Asynchronously runs the SQL Database Agent based on user instructions.
+        Parameters
+        ----------
+        user_instructions : str
+            Instructions for the SQL query or metadata request.
+        max_retries : int, optional
+            Maximum retry attempts. Defaults to 3.
+        retry_count : int, optional
+            Current retry count. Defaults to 0.
+        kwargs : dict
+            Additional keyword arguments to pass to ainvoke().
+        Returns
+        -------
+        None
+        """
+        response = self._compiled_graph.ainvoke({
+            "user_instructions": user_instructions,
+            "max_retries": max_retries,
+            "retry_count": retry_count
+        }, **kwargs)
+        self.response = response
+    def invoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
+        """
+        Synchronously runs the SQL Database Agent based on user instructions.
+        Parameters
+        ----------
+        user_instructions : str
+            Instructions for the SQL query or metadata request.
+        max_retries : int, optional
+            Maximum retry attempts. Defaults to 3.
+        retry_count : int, optional
+            Current retry count. Defaults to 0.
+        kwargs : dict
+            Additional keyword arguments to pass to invoke().
+        Returns
+        -------
+        None
+        """
+        response = self._compiled_graph.invoke({
+            "user_instructions": user_instructions,
+            "max_retries": max_retries,
+            "retry_count": retry_count
+        }, **kwargs)
+        self.response = response
+    def get_workflow_summary(self, markdown=False):
+        """
+        Retrieves the agent's workflow summary, if logging is enabled.
+        """
+        if self.response and self.response.get("messages"):
+            summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
+            if markdown:
+                return Markdown(summary)
+            else:
+                return summary
+    def get_log_summary(self, markdown=False):
+        """
+        Logs a summary of the agent's operations, if logging is enabled.
+        """
+        if self.response:
+            if self.response.get('sql_database_function_path'):
+                log_details = f"""
+## SQL Database Agent Log Summary:
+Function Path: {self.response.get('sql_database_function_path')}
+Function Name: {self.response.get('sql_database_function_name')}
+                """
+                if markdown:
+                    return Markdown(log_details)
+                else:
+                    return log_details
+    def get_data_sql(self):
+        """
+        Retrieves the SQL query result from the agent's response.
+        Returns
+        -------
+        dict or None
+            The returned data as a dictionary of column -> list_of_values,
+            or None if no data is found.
+        """
+        if self.response and "data_sql" in self.response:
+            return pd.DataFrame(self.response["data_sql"])
+        return None
+    def get_sql_query_code(self, markdown=False):
+        """
+        Retrieves the raw SQL query code generated by the agent (if available).
+        Parameters
+        ----------
+        markdown : bool, optional
+            If True, returns the code in a Markdown code block.
+        Returns
+        -------
+        str or None
+            The SQL query as a string, or None if not available.
+        """
+        if self.response and "sql_query_code" in self.response:
+            if markdown:
+                return Markdown(f"```sql\n{self.response['sql_query_code']}\n```")
+            return self.response["sql_query_code"]
+        return None
+    def get_sql_database_function(self, markdown=False):
+        """
+        Retrieves the Python function code used to execute the SQL query.
+        Parameters
+        ----------
+        markdown : bool, optional
+            If True, returns the code in a Markdown code block.
+        Returns
+        -------
+        str or None
+            The function code if available, otherwise None.
+        """
+        if self.response and "sql_database_function" in self.response:
+            code = self.response["sql_database_function"]
+            if markdown:
+                return Markdown(f"```python\n{code}\n```")
+            return code
+        return None
+    def get_recommended_sql_steps(self, markdown=False):
+        """
+        Retrieves the recommended SQL steps from the agent's response.
+        Parameters
+        ----------
+        markdown : bool, optional
+            If True, returns the steps in Markdown format.
+        Returns
+        -------
+        str or None
+            Recommended steps or None if not available.
+        """
+        if self.response and "recommended_steps" in self.response:
+            if markdown:
+                return Markdown(self.response["recommended_steps"])
+            return self.response["recommended_steps"]
+        return None
+# Function
 def make_sql_database_agent(
-    model, connection,
-    n_samples = 10,
+    model,
+    connection,
+    n_samples=1,
     log=False,
     log_path=None,
     file_name="sql_database.py",
+    function_name="sql_database_pipeline",
     overwrite = True,
-    human_in_the_loop=False, bypass_recommended_steps=False,
-    bypass_explain_code=False
+    human_in_the_loop=False,
+    bypass_recommended_steps=False,
+    bypass_explain_code=False,
+    smart_schema_pruning=False,
 ):
     """
     Creates a SQL Database Agent that can recommend SQL steps and generate SQL code to query a database.
@@ -51,7 +377,7 @@ def make_sql_database_agent(
     connection : sqlalchemy.engine.base.Engine
         The connection to the SQL database.
     n_samples : int, optional
-        The number of samples to retrieve for each column, by default 10.
+        The number of samples to retrieve for each column, by default 1.
         If you get an error due to maximum tokens, try reducing this number.
         > "This model's maximum context length is 128000 tokens. However, your messages resulted in 333858 tokens. Please reduce the length of the messages."
     log : bool, optional
@@ -68,6 +394,8 @@ def make_sql_database_agent(
         Bypass the recommendation step, by default False
     bypass_explain_code : bool, optional
         Bypass the code explanation step, by default False.
+    smart_schema_pruning : bool, optional
+        If True, filters the tables and columns with an extra LLM step to reduce tokens for large databases. Increases processing time but can avoid errors due to hitting max token limits with large databases. Defaults to False.
     Returns
     -------
@@ -100,20 +428,26 @@ def make_sql_database_agent(
         "retry_count":0
     })
     ```
     """
-    is_engine = isinstance(connection, sql.engine.base.Engine)
-    conn = connection.connect() if is_engine else connection
     llm = model
+    # Human in th loop requires recommended steps
+    if bypass_recommended_steps and human_in_the_loop:
+        bypass_recommended_steps = False
+        print("Bypass recommended steps set to False to enable human in the loop.")
     # Setup Log Directory
     if log:
         if log_path is None:
             log_path = LOG_PATH
         if not os.path.exists(log_path):
             os.makedirs(log_path)
+    # Get the database metadata
+    is_engine = isinstance(connection, sql.engine.base.Engine)
+    conn = connection.connect() if is_engine else connection
     class GraphState(TypedDict):
         messages: Annotated[Sequence[BaseMessage], operator.add]
@@ -124,6 +458,7 @@ def make_sql_database_agent(
         sql_query_code: str
         sql_database_function: str
         sql_database_function_path: str
+        sql_database_function_file_name: str
         sql_database_function_name: str
         sql_database_error: str
         max_retries: int
@@ -132,6 +467,16 @@ def make_sql_database_agent(
     def recommend_sql_steps(state: GraphState):
         print(format_agent_name(AGENT_NAME))
+        all_sql_database_summary = get_database_metadata(conn, n_samples=n_samples)
+        all_sql_database_summary = smart_schema_filter(
+            llm,
+            state.get("user_instructions"),
+            all_sql_database_summary,
+            smart_filtering=smart_schema_pruning
+        )
         print("    * RECOMMEND STEPS")
@@ -148,6 +493,7 @@ def make_sql_database_agent(
             - If no user instructions are provided, just return the steps needed to understand the database.
             - Take into account the database dialect and the tables and columns in the database.
             - Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
+            - IMPORTANT: Pay attention to the table names and column names in the database. Make sure to use the correct table and column names in the SQL code. If a space is present in the table name or column name, make sure to account for it.
             User instructions / Question:
@@ -159,7 +505,7 @@ def make_sql_database_agent(
             Below are summaries of the database metadata and the SQL tables:
             {all_sql_database_summary}
-            Return the steps as a numbered point list (no code, just the steps).
+            Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
             Consider these:
@@ -178,13 +524,6 @@ def make_sql_database_agent(
             input_variables=["user_instructions", "recommended_steps", "all_sql_database_summary"]
         )
-        # Create a connection if needed
-        is_engine = isinstance(connection, sql.engine.base.Engine)
-        conn = connection.connect() if is_engine else connection
-        # Get the database metadata
-        all_sql_database_summary = get_database_metadata(conn, n_samples=n_samples)
         steps_agent = recommend_steps_prompt | llm
         recommended_steps = steps_agent.invoke({
@@ -194,13 +533,22 @@ def make_sql_database_agent(
         })
         return {
-            "recommended_steps": "\n\n# Recommended SQL Database Steps:\n" + recommended_steps.content.strip(),
+            "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended SQL Database Steps:"),
             "all_sql_database_summary": all_sql_database_summary
         }
     def create_sql_query_code(state: GraphState):
         if bypass_recommended_steps:
             print(format_agent_name(AGENT_NAME))
+            all_sql_database_summary = get_database_metadata(conn, n_samples=n_samples)
+            all_sql_database_summary = smart_schema_filter(
+                llm,
+                state.get("user_instructions"),
+                all_sql_database_summary,
+                smart_filtering=smart_schema_pruning
+            )
+        else:
+            all_sql_database_summary = state.get("all_sql_database_summary")
         print("    * CREATE SQL QUERY CODE")
         # Prompt to get the SQL code from the LLM
@@ -216,6 +564,7 @@ def make_sql_database_agent(
             - Only return a single query if possible.
             - Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
             - Pay attention to the SQL dialect from the database summary metadata. Write the SQL code according to the dialect specified.
+            - IMPORTANT: Pay attention to the table names and column names in the database. Make sure to use the correct table and column names in the SQL code. If a space is present in the table name or column name, make sure to account for it.
             User instructions / Question:
@@ -240,13 +589,6 @@ def make_sql_database_agent(
             input_variables=["user_instructions", "recommended_steps", "all_sql_database_summary"]
         )
-        # Create a connection if needed
-        is_engine = isinstance(connection, sql.engine.base.Engine)
-        conn = connection.connect() if is_engine else connection
-        # Get the database metadata
-        all_sql_database_summary = get_database_metadata(conn, n_samples=n_samples)
         sql_query_code_agent = sql_query_code_prompt | llm | SQLOutputParser()
         sql_query_code = sql_query_code_agent.invoke({
@@ -258,7 +600,7 @@ def make_sql_database_agent(
         print("    * CREATE PYTHON FUNCTION TO RUN SQL CODE")
         response = f"""
-def sql_database_pipeline(connection):
+def {function_name}(connection):
     import pandas as pd
     import sqlalchemy as sql
@@ -288,19 +630,37 @@ def sql_database_pipeline(connection):
             "sql_query_code": sql_query_code,
             "sql_database_function": response,
             "sql_database_function_path": file_path,
-            "sql_database_function_name": file_name_2,
+            "sql_database_function_file_name": file_name_2,
+            "sql_database_function_name": function_name,
             "all_sql_database_summary": all_sql_database_summary
         }
-    def human_review(state: GraphState) -> Command[Literal["recommend_sql_steps", "create_sql_query_code"]]:
-        return node_func_human_review(
-            state=state,
-            prompt_text="Are the following SQL database querying steps correct? (Answer 'yes' or provide modifications)\n{steps}",
-            yes_goto="create_sql_query_code",
-            no_goto="recommend_sql_steps",
-            user_instructions_key="user_instructions",
-            recommended_steps_key="recommended_steps"
-        )
+    # Human Review
+    prompt_text_human_review = "Are the following SQL agent instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
+    if not bypass_explain_code:
+        def human_review(state: GraphState) -> Command[Literal["recommend_sql_steps", "explain_sql_database_code"]]:
+            return node_func_human_review(
+                state=state,
+                prompt_text=prompt_text_human_review,
+                yes_goto= 'explain_sql_database_code',
+                no_goto="recommend_sql_steps",
+                user_instructions_key="user_instructions",
+                recommended_steps_key="recommended_steps",
+                code_snippet_key="sql_database_function",
+            )
+    else:
+        def human_review(state: GraphState) -> Command[Literal["recommend_sql_steps", "__end__"]]:
+            return node_func_human_review(
+                state=state,
+                prompt_text=prompt_text_human_review,
+                yes_goto= '__end__',
+                no_goto="recommend_sql_steps",
+                user_instructions_key="user_instructions",
+                recommended_steps_key="recommended_steps",
+                code_snippet_key="sql_database_function",
+            )
     def execute_sql_database_code(state: GraphState):
@@ -313,18 +673,18 @@ def sql_database_pipeline(connection):
             result_key="data_sql",
             error_key="sql_database_error",
             code_snippet_key="sql_database_function",
-            agent_function_name="sql_database_pipeline",
+            agent_function_name=state.get("sql_database_function_name"),
             post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
             error_message_prefix="An error occurred during executing the sql database pipeline: "
         )
     def fix_sql_database_code(state: GraphState):
         prompt = """
-        You are a SQL Database Agent code fixer. Your job is to create a sql_database_pipeline(connection) function that can be run on a sql connection. The function is currently broken and needs to be fixed.
+        You are a SQL Database Agent code fixer. Your job is to create a {function_name}(connection) function that can be run on a sql connection. The function is currently broken and needs to be fixed.
-        Make sure to only return the function definition for sql_database_pipeline().
+        Make sure to only return the function definition for {function_name}().
-        Return Python code in ```python``` format with a single function definition, sql_database_pipeline(connection), that includes all imports inside the function. The connection object is a SQLAlchemy connection object. Don't specify the class of the connection object, just use it as an argument to the function.
+        Return Python code in ```python``` format with a single function definition, {function_name}(connection), that includes all imports inside the function. The connection object is a SQLAlchemy connection object. Don't specify the class of the connection object, just use it as an argument to the function.
         This is the broken code (please fix):
         {code_snippet}
@@ -342,22 +702,23 @@ def sql_database_pipeline(connection):
             agent_name=AGENT_NAME,
             log=log,
             file_path=state.get("sql_database_function_path", None),
+            function_name=state.get("sql_database_function_name"),
         )
-    def explain_sql_database_code(state: GraphState):
-        return node_func_explain_agent_code(
+    # Final reporting node
+    def report_agent_outputs(state: GraphState):
+        return node_func_report_agent_outputs(
             state=state,
-            code_snippet_key="sql_database_function",
+            keys_to_include=[
+                "recommended_steps",
+                "sql_database_function",
+                "sql_database_function_path",
+                "sql_database_function_name",
+                "sql_database_error",
+            ],
             result_key="messages",
-            error_key="sql_database_error",
-            llm=llm,
             role=AGENT_NAME,
-            explanation_prompt_template="""
-            Explain the SQL steps that the SQL Database agent performed in this function.
-            Keep the summary succinct and to the point.\n\n# SQL Database Agent:\n\n{code}
-            """,
-            success_prefix="# SQL Database Agent:\n\n",
-            error_message="The SQL Database Agent encountered an error during SQL Query Analysis. No SQL function explanation is returned."
+            custom_title="SQL Database Agent Outputs"
         )
     # Create the graph
@@ -367,7 +728,7 @@ def sql_database_pipeline(connection):
         "create_sql_query_code": create_sql_query_code,
         "execute_sql_database_code": execute_sql_database_code,
         "fix_sql_database_code": fix_sql_database_code,
-        "explain_sql_database_code": explain_sql_database_code
+        "report_agent_outputs": report_agent_outputs,
     }
     app = create_coding_agent_graph(
@@ -377,7 +738,7 @@ def sql_database_pipeline(connection):
         create_code_node_name="create_sql_query_code",
         execute_code_node_name="execute_sql_database_code",
         fix_code_node_name="fix_sql_database_code",
-        explain_code_node_name="explain_sql_database_code",
+        explain_code_node_name="report_agent_outputs",
         error_key="sql_database_error",
         human_in_the_loop=human_in_the_loop,
         human_review_node_name="human_review",
@@ -391,7 +752,46 @@ def sql_database_pipeline(connection):
+def smart_schema_filter(llm, user_instructions, all_sql_database_summary, smart_filtering = True):
+    """
+    This function filters the tables and columns based on the user instructions and the recommended steps.
+    """
+    # Smart schema filtering
+    if smart_filtering:
+        print("    * SMART FILTER SCHEMA")
+        filter_schema_prompt = PromptTemplate(
+            template="""
+            You are a highly skilled data engineer. The user question is:
+                "{user_instructions}"
+            You have the full database metadata in JSON format below:
+                {all_sql_database_summary}
+            Please return ONLY the subset of this metadata that is relevant to answering the user’s question.
+            - Preserve the same JSON structure for "schemas" -> "tables" -> "columns".
+            - If any schemas/tables are irrelevant, omit them entirely.
+            - If some columns in a relevant table are not needed, you can still keep them if you aren't sure.
+            - However, try to keep only the minimum amount of data required to answer the user’s question.
+            Return a valid JSON object. Do not include any additional explanation or text outside of the JSON.
+            """,
+            input_variables=["user_instructions", "full_metadata_json"]
+        )
+        filter_schema_agent = filter_schema_prompt | llm | JsonOutputParser()
+        response = filter_schema_agent.invoke({
+            "user_instructions": user_instructions,
+            "all_sql_database_summary": all_sql_database_summary
+        })
+        return response
+    else:
+        return all_sql_database_summary

ai-data-science-team 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl

ai-data-science-team 0.0.0.9007py3-none-any.whl → 0.0.0.9009py3-none-any.whl