PyPI - ai-data-science-team - Versions diffs - 0.0.0.9012__py3-none-any.whl → 0.0.0.9013__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9012py3-none-any.whl → 0.0.0.9013py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

ai_data_science_team/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.0.~~9012~~"
1	+ __version__ = "0.0.0.9013"

ai_data_science_team/agents/data_loader_tools_agent.py CHANGED Viewed

@@ -25,6 +25,7 @@ from ai_data_science_team.tools.data_loader import (
     get_file_info,
     search_files_by_pattern,
 )
+from ai_data_science_team.utils.messages import get_tool_call_names
 AGENT_NAME = "data_loader_tools_agent"
@@ -174,6 +175,12 @@ class DataLoaderToolsAgent(BaseAgent):
             return Markdown(self.response["messages"][0].content)
         else:
             return self.response["messages"][0].content
+    def get_tool_calls(self):
+        """
+        Returns the tool calls made by the agent.
+        """
+        return self.response["tool_calls"]
@@ -204,6 +211,7 @@ def make_data_loader_tools_agent(
         internal_messages: Annotated[Sequence[BaseMessage], operator.add]
         user_instructions: str
         data_loader_artifacts: dict
+        tool_calls: List[str]
     def data_loader_agent(state):
@@ -253,10 +261,13 @@ def make_data_loader_tools_agent(
             elif isinstance(last_message, dict) and "artifact" in last_message:
                 last_tool_artifact = last_message["artifact"]
+        tool_calls = get_tool_call_names(internal_messages)
         return {
             "messages": [last_ai_message],
             "internal_messages": internal_messages,
             "data_loader_artifacts": last_tool_artifact,
+            "tool_calls": tool_calls,
         }
     workflow = StateGraph(GraphState)

ai_data_science_team/ds_agents/eda_tools_agent.py CHANGED Viewed

@@ -19,17 +19,20 @@ from ai_data_science_team.templates import BaseAgent
 from ai_data_science_team.utils.regex import format_agent_name
 from ai_data_science_team.tools.eda import (
+    explain_data,
     describe_dataset,
     visualize_missing,
     correlation_funnel,
     generate_sweetviz_report,
 )
+from ai_data_science_team.utils.messages import get_tool_call_names
 AGENT_NAME = "exploratory_data_analyst_agent"
 # Updated tool list for EDA
 EDA_TOOLS = [
+    explain_data,
     describe_dataset,
     visualize_missing,
     correlation_funnel,
@@ -162,6 +165,12 @@ class EDAToolsAgent(BaseAgent):
             return Markdown(self.response["messages"][0].content)
         else:
             return self.response["messages"][0].content
+    def get_tool_calls(self):
+        """
+        Returns the tool calls made by the agent.
+        """
+        return self.response["tool_calls"]
 def make_eda_tools_agent(
     model: Any,
@@ -191,6 +200,7 @@ def make_eda_tools_agent(
         user_instructions: str
         data_raw: dict
         eda_artifacts: dict
+        tool_calls: list
     def exploratory_agent(state):
         print(format_agent_name(AGENT_NAME))
@@ -229,11 +239,14 @@ def make_eda_tools_agent(
                 last_tool_artifact = last_message.artifact
             elif isinstance(last_message, dict) and "artifact" in last_message:
                 last_tool_artifact = last_message["artifact"]
+        tool_calls = get_tool_call_names(internal_messages)
         return {
             "messages": [last_ai_message],
             "internal_messages": internal_messages,
             "eda_artifacts": last_tool_artifact,
+            "tool_calls": tool_calls,
         }
     workflow = StateGraph(GraphState)

ai_data_science_team/ml_agents/mlflow_tools_agent.py CHANGED Viewed

@@ -27,6 +27,7 @@ from ai_data_science_team.tools.mlflow import (
     mlflow_search_registered_models,
     mlflow_get_model_version_details,
 )
+from ai_data_science_team.utils.messages import get_tool_call_names
 AGENT_NAME = "mlflow_tools_agent"
@@ -228,6 +229,12 @@ class MLflowToolsAgent(BaseAgent):
             return Markdown(self.response["messages"][0].content)
         else:
             return self.response["messages"][0].content
+    def get_tool_calls(self):
+        """
+        Returns the tool calls made by the agent.
+        """
+        return self.response["tool_calls"]
@@ -330,10 +337,13 @@ def make_mlflow_tools_agent(
             elif isinstance(last_message, dict) and "artifact" in last_message:
                 last_tool_artifact = last_message["artifact"]
+        tool_calls = get_tool_call_names(internal_messages)
         return {
             "messages": [last_ai_message],
             "internal_messages": internal_messages,
             "mlflow_artifacts": last_tool_artifact,
+            "tool_calls": tool_calls,
         }

ai_data_science_team/tools/dataframe.py CHANGED Viewed

@@ -74,7 +74,12 @@ def get_dataframe_summary(
     return summaries
-def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
+def _summarize_dataframe(
+    df: pd.DataFrame,
+    dataset_name: str,
+    n_sample=30,
+    skip_stats=False
+) -> str:
     """Generate a summary string for a single DataFrame."""
     # 1. Convert dictionary-type cells to strings
     #    This prevents unhashable dict errors during df.nunique().

ai_data_science_team/tools/eda.py CHANGED Viewed

@@ -2,11 +2,44 @@
 from typing import Annotated, Dict, Tuple, Union
 import os
+import tempfile
 from langchain.tools import tool
 from langgraph.prebuilt import InjectedState
+from ai_data_science_team.tools.dataframe import get_dataframe_summary
+@tool(response_format='content')
+def explain_data(
+    data_raw: Annotated[dict, InjectedState("data_raw")],
+    n_sample: int = 30,
+    skip_stats: bool = False,
+):
+    """
+    Tool: explain_data
+    Description:
+        Provides an extensive, narrative summary of a DataFrame including its shape, column types,
+        missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
+    Parameters:
+        data_raw (dict): Raw data.
+        n_sample (int, default=30): Number of rows to display.
+        skip_stats (bool, default=False): If True, omit descriptive stats/info.
+    LLM Guidance:
+        Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
+    Returns:
+        str: Detailed DataFrame summary.
+    """
+    print("    * Tool: explain_data")
+    import pandas as pd
+    result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
+    return result
 @tool(response_format='content_and_artifact')
 def describe_dataset(
@@ -15,21 +48,33 @@ def describe_dataset(
     """
     Tool: describe_dataset
     Description:
-        Describe the dataset by computing summary
-        statistics using the DataFrame's describe() method.
+        Compute and return summary statistics for the dataset using pandas' describe() method.
+        The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
+    Parameters:
+    -----------
+    data_raw : dict
+        The raw data in dictionary format.
+    LLM Selection Guidance:
+    ------------------------
+    Use this tool when:
+      - The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
+      - The user needs a concise statistical snapshot rather than a detailed narrative.
+      - Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
     Returns:
     -------
     Tuple[str, Dict]:
-        content: A textual summary of the DataFrame's descriptive statistics.
-        artifact: A dictionary (from DataFrame.describe()) for further inspection.
+        - content: A textual summary indicating that summary statistics have been computed.
+        - artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
     """
     print("    * Tool: describe_dataset")
     import pandas as pd
     df = pd.DataFrame(data_raw)
     description_df = df.describe(include='all')
     content = "Summary statistics computed using pandas describe()."
-    artifact = description_df.to_dict()
+    artifact = {'describe_df': description_df.to_dict()}
     return content, artifact
@@ -226,8 +271,8 @@ def generate_sweetviz_report(
     data_raw: Annotated[dict, InjectedState("data_raw")],
     target: str = None,
     report_name: str = "sweetviz_report.html",
-    report_directory: str = os.path.join(os.getcwd(), "reports"),
-    open_browser: bool = True,
+    report_directory: str = None,  # <-- Default to None
+    open_browser: bool = False,
 ) -> Tuple[str, Dict]:
     """
     Tool: generate_sweetviz_report
@@ -243,9 +288,10 @@ def generate_sweetviz_report(
     report_name : str, optional
         The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
     report_directory : str, optional
-        The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
+        The directory where the report should be saved.
+        If None, a temporary directory is created and used.
     open_browser : bool, optional
-        Whether to open the report in a web browser. Default is True.
+        Whether to open the report in a web browser. Default is False.
     Returns:
     --------
@@ -254,28 +300,37 @@ def generate_sweetviz_report(
         artifact: A dictionary with the report file path and optionally the report's HTML content.
     """
     print("    * Tool: generate_sweetviz_report")
+    # Import sweetviz
     try:
         import sweetviz as sv
     except ImportError:
         raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
     import pandas as pd
     # Convert injected raw data to a DataFrame.
     df = pd.DataFrame(data_raw)
+    # If no directory is specified, use a temporary directory.
+    if not report_directory:
+        report_directory = tempfile.mkdtemp()
+        print(f"    * Using temporary directory: {report_directory}")
+    else:
+        # Ensure user-specified directory exists.
+        if not os.path.exists(report_directory):
+            os.makedirs(report_directory)
     # Create the Sweetviz report.
     report = sv.analyze(df, target_feat=target)
-    # Ensure the directory exists; default is os.getcwd()/reports
-    if not os.path.exists(report_directory):
-        os.makedirs(report_directory)
     # Determine the full path for the report.
     full_report_path = os.path.join(report_directory, report_name)
     # Save the report to the specified HTML file.
     report.show_html(
         filepath=full_report_path,
-        open_browser=True,
+        open_browser=open_browser,
     )
     # Optionally, read the HTML content (if desired to pass along in the artifact).
@@ -285,9 +340,13 @@ def generate_sweetviz_report(
     except Exception:
         html_content = None
-    content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
+    content = (
+        f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
+        f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
+    )
     artifact = {
         "report_file": os.path.abspath(full_report_path),
         "report_html": html_content,
     }
     return content, artifact

ai_data_science_team/utils/messages.py ADDED Viewed

@@ -0,0 +1,27 @@
+def get_tool_call_names(messages):
+    """
+    Method to extract the tool call names from a list of LangChain messages.
+    Parameters:
+    ----------
+    messages : list
+        A list of LangChain messages.
+    Returns:
+    -------
+    tool_calls : list
+        A list of tool call names.
+    """
+    tool_calls = []
+    for message in messages:
+        try:
+            if "tool_call_id" in list(dict(message).keys()):
+                tool_calls.append(message.name)
+        except:
+            pass
+    return tool_calls

{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9013.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ai-data-science-team
-Version: 0.0.0.9012
+Version: 0.0.0.9013
 Summary: Build and run an AI-powered data science team.
 Home-page: https://github.com/business-science/ai-data-science-team
 Author: Matt Dancho
@@ -152,7 +152,11 @@ This is a top secret project I'm working on. It's a multi-agent data science app
 #### 🔥 Agentic Applications
-1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
+1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
+![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
+2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
 ### Agents Available Now

{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9013.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
 ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ai_data_science_team/_version.py,sha256=BybGt-zGNDZsdJxDMV3xmjghiRF8jmwG3ov_dt_rM7E,26
+ai_data_science_team/_version.py,sha256=8mQbNYWB914j3xlCMQYaR14g26vq-2SV31Xf8uer_L0,26
 ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
 ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
 ai_data_science_team/agents/data_cleaning_agent.py,sha256=V5tJMwGJK0JwrF_H-7r3S0E8UkAY6ci4BGxqjhZiGBI,27352
-ai_data_science_team/agents/data_loader_tools_agent.py,sha256=fnkOvmrXzvTTt1mnAyTlsF_7ZGrkp3P97YU_LgeffMg,8445
+ai_data_science_team/agents/data_loader_tools_agent.py,sha256=23Uuqt-oaJfj3CFRKT7NErNkodXpraXl0HOWvXjMcJs,8802
 ai_data_science_team/agents/data_visualization_agent.py,sha256=tJy9Ehnh9mvAu6H--TXI8esSHmK1RW_L1RDAdn7Xek4,28821
 ai_data_science_team/agents/data_wrangling_agent.py,sha256=LxzphH-TmrFG0GjejGOjulhPq4SsWFo5Y9tk4WEuN4M,32347
 ai_data_science_team/agents/feature_engineering_agent.py,sha256=KmPBkj7WUBz6LFUlDDfQHMi7ujXwsH5P9LWRS-F4tdM,31026
 ai_data_science_team/agents/sql_database_agent.py,sha256=1K2o3NiuKgGKdbMz_Tq9IeQ8xhXjpfGOxx9lArZh1yE,31173
 ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
-ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=y65lsBXhQNOGwWealEho6uFxGSTW7FNfvTUZnW8_XNY,7609
+ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=VJkqyQCNxoV0kvUTpUZh8SXTTZ0K1tUlg3jq6LDnpPQ,8009
 ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
 ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=DamR72agrTKfdcdhablmP2mpbj0CqtMonP-QU8p7o9w,33394
 ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=zbT0KIsmQp_sEyxzXRguhqx5913Q2yPYyKGU6TUWEM8,11067
+ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=bRTT53_pHV0qAYl07iZcwUEYffGH_ZfJICdrLeOUPn4,11394
 ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
 ai_data_science_team/multiagents/sql_data_analyst.py,sha256=kmmED3gLf5STWWY6ZVJYd7_Pt8NMl6SHyBocuQzRDGk,14193
 ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
@@ -24,8 +24,8 @@ ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP
 ai_data_science_team/templates/agent_templates.py,sha256=Lezp0ugtIP3m5WUOmjLwghNnjjyQVQecysONeIHWwi0,29133
 ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
-ai_data_science_team/tools/dataframe.py,sha256=qSflGDByqqCXv4TjuvOFvGPZmegzeOesb0Y4i4Y0gdQ,4551
-ai_data_science_team/tools/eda.py,sha256=UGD6PC12RsB_UmStvR4TmSqv0noxjM4DkzY-kHjI0-E,10591
+ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
+ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
 ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
 ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
 ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -33,10 +33,11 @@ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
 ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
 ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
 ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
+ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
 ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
 ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
-ai_data_science_team-0.0.0.9012.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
-ai_data_science_team-0.0.0.9012.dist-info/METADATA,sha256=geRCFLG3YO9uprp_CGKiqCTSThg06L2U6WxVqYKzyM8,12704
-ai_data_science_team-0.0.0.9012.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ai_data_science_team-0.0.0.9012.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
-ai_data_science_team-0.0.0.9012.dist-info/RECORD,,
+ai_data_science_team-0.0.0.9013.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
+ai_data_science_team-0.0.0.9013.dist-info/METADATA,sha256=z18MmCwNdEgovskYmYmd4CS1I4WKTvh_mSnmzKOaHZs,13021
+ai_data_science_team-0.0.0.9013.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+ai_data_science_team-0.0.0.9013.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
+ai_data_science_team-0.0.0.9013.dist-info/RECORD,,

{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9013.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9013.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9013.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-data-science-team 0.0.0.9012__py3-none-any.whl → 0.0.0.9013__py3-none-any.whl

ai-data-science-team 0.0.0.9012py3-none-any.whl → 0.0.0.9013py3-none-any.whl