PyPI - ai-data-science-team - Versions diffs - 0.0.0.9015__py3-none-any.whl → 0.0.0.9016__py3-none-any.whl - Mend

ai-data-science-team 0.0.0.9015py3-none-any.whl → 0.0.0.9016py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ai_data_science_team/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.0.~~9015~~"
1	+ __version__ = "0.0.0.9016"

ai_data_science_team/ds_agents/eda_tools_agent.py CHANGED Viewed

@@ -1,5 +1,3 @@
 from typing import Any, Optional, Annotated, Sequence, Dict
 import operator
 import pandas as pd
@@ -17,10 +15,11 @@ from ai_data_science_team.utils.regex import format_agent_name
 from ai_data_science_team.tools.eda import (
     explain_data,
-    describe_dataset,
-    visualize_missing,
-    correlation_funnel,
+    describe_dataset,
+    visualize_missing,
+    generate_correlation_funnel,
     generate_sweetviz_report,
+    generate_dtale_report,
 )
 from ai_data_science_team.utils.messages import get_tool_call_names
@@ -32,15 +31,17 @@ EDA_TOOLS = [
     explain_data,
     describe_dataset,
     visualize_missing,
-    correlation_funnel,
+    generate_correlation_funnel,
     generate_sweetviz_report,
+    generate_dtale_report,
 ]
 class EDAToolsAgent(BaseAgent):
     """
     An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
     missing data visualizations, correlation funnels, EDA reports, etc.
     Parameters:
     ----------
     model : langchain.llms.base.LLM
@@ -52,9 +53,9 @@ class EDAToolsAgent(BaseAgent):
     checkpointer : Checkpointer, optional
         The checkpointer for the agent.
     """
     def __init__(
-        self,
+        self,
         model: Any,
         create_react_agent_kwargs: Optional[Dict] = {},
         invoke_react_agent_kwargs: Optional[Dict] = {},
@@ -64,18 +65,18 @@ class EDAToolsAgent(BaseAgent):
             "model": model,
             "create_react_agent_kwargs": create_react_agent_kwargs,
             "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
-            "checkpointer": checkpointer
+            "checkpointer": checkpointer,
         }
         self._compiled_graph = self._make_compiled_graph()
         self.response = None
     def _make_compiled_graph(self):
         """
         Creates the compiled state graph for the EDA agent.
         """
         self.response = None
         return make_eda_tools_agent(**self._params)
     def update_params(self, **kwargs):
         """
         Updates the agent's parameters and rebuilds the compiled graph.
@@ -83,16 +84,13 @@ class EDAToolsAgent(BaseAgent):
         for k, v in kwargs.items():
             self._params[k] = v
         self._compiled_graph = self._make_compiled_graph()
     async def ainvoke_agent(
-        self,
-        user_instructions: str = None,
-        data_raw: pd.DataFrame = None,
-        **kwargs
+        self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
     ):
         """
         Asynchronously runs the agent with user instructions and data.
         Parameters:
         ----------
         user_instructions : str, optional
@@ -105,20 +103,17 @@ class EDAToolsAgent(BaseAgent):
                 "user_instructions": user_instructions,
                 "data_raw": data_raw.to_dict() if data_raw is not None else None,
             },
-            **kwargs
+            **kwargs,
         )
         self.response = response
         return None
     def invoke_agent(
-        self,
-        user_instructions: str = None,
-        data_raw: pd.DataFrame = None,
-        **kwargs
+        self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
     ):
         """
         Synchronously runs the agent with user instructions and data.
         Parameters:
         ----------
         user_instructions : str, optional
@@ -131,24 +126,26 @@ class EDAToolsAgent(BaseAgent):
                 "user_instructions": user_instructions,
                 "data_raw": data_raw.to_dict() if data_raw is not None else None,
             },
-            **kwargs
+            **kwargs,
         )
         self.response = response
         return None
     def get_internal_messages(self, markdown: bool = False):
         """
         Returns internal messages from the agent response.
         """
         pretty_print = "\n\n".join(
-            [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
-             for msg in self.response["internal_messages"]]
+            [
+                f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
+                for msg in self.response["internal_messages"]
+            ]
         )
         if markdown:
             return Markdown(pretty_print)
         else:
             return self.response["internal_messages"]
     def get_artifacts(self, as_dataframe: bool = False):
         """
         Returns the EDA artifacts from the agent response.
@@ -157,7 +154,7 @@ class EDAToolsAgent(BaseAgent):
             return pd.DataFrame(self.response["eda_artifacts"])
         else:
             return self.response["eda_artifacts"]
     def get_ai_message(self, markdown: bool = False):
         """
         Returns the AI message from the agent response.
@@ -166,13 +163,14 @@ class EDAToolsAgent(BaseAgent):
             return Markdown(self.response["messages"][0].content)
         else:
             return self.response["messages"][0].content
     def get_tool_calls(self):
         """
         Returns the tool calls made by the agent.
         """
         return self.response["tool_calls"]
 def make_eda_tools_agent(
     model: Any,
     create_react_agent_kwargs: Optional[Dict] = {},
@@ -181,7 +179,7 @@ def make_eda_tools_agent(
 ):
     """
     Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
     Parameters:
     ----------
     model : Any
@@ -192,13 +190,13 @@ def make_eda_tools_agent(
         Additional kwargs for agent invocation.
     checkpointer : Checkpointer, optional
         The checkpointer for the agent.
     Returns:
     -------
     app : langgraph.graph.CompiledStateGraph
         The compiled state graph for the EDA agent.
     """
     class GraphState(AgentState):
         internal_messages: Annotated[Sequence[BaseMessage], operator.add]
         user_instructions: str
@@ -209,11 +207,9 @@ def make_eda_tools_agent(
     def exploratory_agent(state):
         print(format_agent_name(AGENT_NAME))
         print("    * RUN REACT TOOL-CALLING AGENT FOR EDA")
-        tool_node = ToolNode(
-            tools=EDA_TOOLS
-        )
+        tool_node = ToolNode(tools=EDA_TOOLS)
         eda_agent = create_react_agent(
             model,
             tools=tool_node,
@@ -221,7 +217,7 @@ def make_eda_tools_agent(
             **create_react_agent_kwargs,
             checkpointer=checkpointer,
         )
         response = eda_agent.invoke(
             {
                 "messages": [("user", state["user_instructions"])],
@@ -229,13 +225,13 @@ def make_eda_tools_agent(
             },
             invoke_react_agent_kwargs,
         )
         print("    * POST-PROCESSING EDA RESULTS")
-        internal_messages = response['messages']
+        internal_messages = response["messages"]
         if not internal_messages:
             return {"internal_messages": [], "eda_artifacts": None}
         last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
         last_tool_artifact = None
         if len(internal_messages) > 1:
@@ -244,24 +240,24 @@ def make_eda_tools_agent(
                 last_tool_artifact = last_message.artifact
             elif isinstance(last_message, dict) and "artifact" in last_message:
                 last_tool_artifact = last_message["artifact"]
         tool_calls = get_tool_call_names(internal_messages)
         return {
             "messages": [last_ai_message],
             "internal_messages": internal_messages,
             "eda_artifacts": last_tool_artifact,
             "tool_calls": tool_calls,
         }
     workflow = StateGraph(GraphState)
     workflow.add_node("exploratory_agent", exploratory_agent)
     workflow.add_edge(START, "exploratory_agent")
     workflow.add_edge("exploratory_agent", END)
     app = workflow.compile(
         checkpointer=checkpointer,
         name=AGENT_NAME,
     )
     return app

ai_data_science_team/tools/eda.py CHANGED Viewed

@@ -1,4 +1,3 @@
 from typing import Annotated, Dict, Tuple, Union
 import os
@@ -6,12 +5,12 @@ import tempfile
 from langchain.tools import tool
-from langgraph.prebuilt import InjectedState
+from langgraph.prebuilt import InjectedState
 from ai_data_science_team.tools.dataframe import get_dataframe_summary
-@tool(response_format='content')
+@tool(response_format="content")
 def explain_data(
     data_raw: Annotated[dict, InjectedState("data_raw")],
     n_sample: int = 30,
@@ -36,14 +35,17 @@ def explain_data(
     """
     print("    * Tool: explain_data")
     import pandas as pd
-    result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
+    result = get_dataframe_summary(
+        pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats
+    )
     return result
-@tool(response_format='content_and_artifact')
+@tool(response_format="content_and_artifact")
 def describe_dataset(
-    data_raw: Annotated[dict, InjectedState("data_raw")]
+    data_raw: Annotated[dict, InjectedState("data_raw")],
 ) -> Tuple[str, Dict]:
     """
     Tool: describe_dataset
@@ -71,30 +73,30 @@ def describe_dataset(
     """
     print("    * Tool: describe_dataset")
     import pandas as pd
     df = pd.DataFrame(data_raw)
-    description_df = df.describe(include='all')
+    description_df = df.describe(include="all")
     content = "Summary statistics computed using pandas describe()."
-    artifact = {'describe_df': description_df.to_dict()}
+    artifact = {"describe_df": description_df.to_dict()}
     return content, artifact
-@tool(response_format='content_and_artifact')
+@tool(response_format="content_and_artifact")
 def visualize_missing(
-    data_raw: Annotated[dict, InjectedState("data_raw")],
-    n_sample: int = None
+    data_raw: Annotated[dict, InjectedState("data_raw")], n_sample: int = None
 ) -> Tuple[str, Dict]:
     """
     Tool: visualize_missing
     Description:
         Missing value analysis using the missingno library. Generates a matrix plot, bar plot, and heatmap plot.
     Parameters:
     -----------
     data_raw : dict
         The raw data in dictionary format.
     n_sample : int, optional (default: None)
         The number of rows to sample from the dataset if it is large.
     Returns:
     -------
     Tuple[str, Dict]:
@@ -103,12 +105,14 @@ def visualize_missing(
                   corresponding base64 encoded PNG image.
     """
     print("    * Tool: visualize_missing")
     try:
         import missingno as msno  # Ensure missingno is installed
     except ImportError:
-        raise ImportError("Please install the 'missingno' package to use this tool. pip install missingno")
+        raise ImportError(
+            "Please install the 'missingno' package to use this tool. pip install missingno"
+        )
     import pandas as pd
     import base64
     from io import BytesIO
@@ -136,21 +140,22 @@ def visualize_missing(
     # Create and encode the matrix plot.
     encoded_plots["matrix_plot"] = create_and_encode_plot(msno.matrix, "matrix")
     # Create and encode the bar plot.
     encoded_plots["bar_plot"] = create_and_encode_plot(msno.bar, "bar")
     # Create and encode the heatmap plot.
     encoded_plots["heatmap_plot"] = create_and_encode_plot(msno.heatmap, "heatmap")
-    content = "Missing data visualizations (matrix, bar, and heatmap) have been generated."
+    content = (
+        "Missing data visualizations (matrix, bar, and heatmap) have been generated."
+    )
     artifact = encoded_plots
     return content, artifact
-@tool(response_format='content_and_artifact')
-def correlation_funnel(
+@tool(response_format="content_and_artifact")
+def generate_correlation_funnel(
     data_raw: Annotated[dict, InjectedState("data_raw")],
     target: str,
     target_bin_index: Union[int, str] = -1,
@@ -160,10 +165,10 @@ def correlation_funnel(
     name_infreq: str = "-OTHER",
 ) -> Tuple[str, Dict]:
     """
-    Tool: correlation_funnel
+    Tool: generate_correlation_funnel
     Description:
         Correlation analysis using the correlation funnel method. The tool binarizes the data and computes correlation versus a target column.
     Parameters:
     ----------
     target : str
@@ -171,8 +176,8 @@ def correlation_funnel(
         with this string followed by '__' (e.g., 'Member_Status__Gold', 'Member_Status__Platinum').
     target_bin_index : int or str, default -1
         If an integer, selects the target level by position from the matching columns.
-        If a string (e.g., "Yes"), attempts to match to the suffix of a column name
-        (i.e., 'target__Yes').
+        If a string (e.g., "Yes"), attempts to match to the suffix of a column name
+        (i.e., 'target__Yes').
     corr_method : str
         The correlation method ('pearson', 'kendall', or 'spearman'). Default is 'pearson'.
     n_bins : int
@@ -182,34 +187,36 @@ def correlation_funnel(
     name_infreq : str
         The name to use for infrequent levels. Default is '-OTHER'.
     """
-    print("    * Tool: correlation_funnel")
+    print("    * Tool: generate_correlation_funnel")
     try:
         import pytimetk as tk
     except ImportError:
-        raise ImportError("Please install the 'pytimetk' package to use this tool. pip install pytimetk")
+        raise ImportError(
+            "Please install the 'pytimetk' package to use this tool. pip install pytimetk"
+        )
     import pandas as pd
     import base64
     from io import BytesIO
     import matplotlib.pyplot as plt
     import json
-    import plotly.graph_objects as go
     import plotly.io as pio
-    from typing import Union
     # Convert the raw injected state into a DataFrame.
     df = pd.DataFrame(data_raw)
     # Apply the binarization method.
     df_binarized = df.binarize(
-        n_bins=n_bins,
-        thresh_infreq=thresh_infreq,
-        name_infreq=name_infreq,
-        one_hot=True
+        n_bins=n_bins,
+        thresh_infreq=thresh_infreq,
+        name_infreq=name_infreq,
+        one_hot=True,
     )
     # Determine the full target column name.
     # Look for all columns that start with "target__"
-    matching_columns = [col for col in df_binarized.columns if col.startswith(f"{target}__")]
+    matching_columns = [
+        col for col in df_binarized.columns if col.startswith(f"{target}__")
+    ]
     if not matching_columns:
         # If no matching columns are found, warn and use the provided target as-is.
         full_target = target
@@ -230,15 +237,15 @@ def correlation_funnel(
             except IndexError:
                 # If index is out of bounds, use the last matching column.
                 full_target = matching_columns[-1]
     # Compute correlation funnel using the full target column name.
     df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
     # Attempt to generate a static plot.
     encoded = None
     try:
         # Here we assume that your DataFrame has a method plot_correlation_funnel.
-        fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
+        fig = df_correlated.plot_correlation_funnel(engine="plotnine", height=600)
         buf = BytesIO()
         # Use the appropriate save method for your figure object.
         fig.save(buf, format="png")
@@ -247,18 +254,21 @@ def correlation_funnel(
         encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
     except Exception as e:
         encoded = {"error": str(e)}
     # Attempt to generate a Plotly plot.
     fig_dict = None
     try:
-        fig = df_correlated.plot_correlation_funnel(engine='plotly')
+        fig = df_correlated.plot_correlation_funnel(engine="plotly", base_size=14)
         fig_json = pio.to_json(fig)
         fig_dict = json.loads(fig_json)
     except Exception as e:
         fig_dict = {"error": str(e)}
-    content = (f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
-               f"Base target was '{target}' with target_bin_index '{target_bin_index}'.")
+    content = (
+        f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
+        f"Base target was '{target}' with target_bin_index '{target_bin_index}'."
+    )
     artifact = {
         "correlation_data": df_correlated.to_dict(orient="list"),
         "plot_image": encoded,
@@ -267,8 +277,7 @@ def correlation_funnel(
     return content, artifact
-@tool(response_format='content_and_artifact')
+@tool(response_format="content_and_artifact")
 def generate_sweetviz_report(
     data_raw: Annotated[dict, InjectedState("data_raw")],
     target: str = None,
@@ -280,7 +289,7 @@ def generate_sweetviz_report(
     Tool: generate_sweetviz_report
     Description:
         Make an Exploratory Data Analysis (EDA) report using the Sweetviz library.
     Parameters:
     -----------
     data_raw : dict
@@ -290,11 +299,11 @@ def generate_sweetviz_report(
     report_name : str, optional
         The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
     report_directory : str, optional
-        The directory where the report should be saved.
+        The directory where the report should be saved.
         If None, a temporary directory is created and used.
     open_browser : bool, optional
         Whether to open the report in a web browser. Default is False.
     Returns:
     --------
     Tuple[str, Dict]:
@@ -307,13 +316,15 @@ def generate_sweetviz_report(
     try:
         import sweetviz as sv
     except ImportError:
-        raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
+        raise ImportError(
+            "Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz"
+        )
     import pandas as pd
     # Convert injected raw data to a DataFrame.
     df = pd.DataFrame(data_raw)
     # If no directory is specified, use a temporary directory.
     if not report_directory:
         report_directory = tempfile.mkdtemp()
@@ -322,26 +333,26 @@ def generate_sweetviz_report(
         # Ensure user-specified directory exists.
         if not os.path.exists(report_directory):
             os.makedirs(report_directory)
     # Create the Sweetviz report.
     report = sv.analyze(df, target_feat=target)
     # Determine the full path for the report.
     full_report_path = os.path.join(report_directory, report_name)
     # Save the report to the specified HTML file.
     report.show_html(
         filepath=full_report_path,
         open_browser=open_browser,
     )
     # Optionally, read the HTML content (if desired to pass along in the artifact).
     try:
         with open(full_report_path, "r", encoding="utf-8") as f:
             html_content = f.read()
     except Exception:
         html_content = None
     content = (
         f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
         f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
@@ -352,3 +363,53 @@ def generate_sweetviz_report(
     }
     return content, artifact
+@tool(response_format="content_and_artifact")
+def generate_dtale_report(
+    data_raw: Annotated[dict, InjectedState("data_raw")],
+    host: str = "localhost",
+    port: int = 40000,
+    open_browser: bool = False,
+) -> Tuple[str, Dict]:
+    """
+    Tool: generate_dtale_report
+    Description:
+        Creates an interactive data exploration report using the dtale library.
+    Parameters:
+    -----------
+    data_raw : dict
+        The raw data in dictionary format.
+    host : str, optional
+        The host IP address to serve the dtale app. Default is "localhost".
+    port : int, optional
+        The port number to serve the dtale app. Default is 40000.
+    open_browser : bool, optional
+        Whether to open the report in a web browser. Default is False.
+    Returns:
+    --------
+    Tuple[str, Dict]:
+        content: A summary message describing the dtale report.
+        artifact: A dictionary containing the URL of the dtale report.
+    """
+    print("    * Tool: generate_dtale_report")
+    try:
+        import dtale
+    except ImportError:
+        raise ImportError(
+            "Please install the 'dtale' package to use this tool. Run: pip install dtale"
+        )
+    import pandas as pd
+    df = pd.DataFrame(data_raw)
+    # Create the dtale report
+    d = dtale.show(df, host=host, port=port, open_browser=open_browser)
+    content = f"Dtale report generated and available at: {d.main_url()}"
+    artifact = {"dtale_url": d.main_url()}
+    return content, artifact

{ai_data_science_team-0.0.0.9015.dist-info → ai_data_science_team-0.0.0.9016.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: ai-data-science-team
-Version: 0.0.0.9015
+Version: 0.0.0.9016
 Summary: Build and run an AI-powered data science team.
 Home-page: https://github.com/business-science/ai-data-science-team
 Author: Matt Dancho
@@ -47,6 +47,7 @@ Dynamic: classifier
 Dynamic: description
 Dynamic: description-content-type
 Dynamic: home-page
+Dynamic: license-file
 Dynamic: provides-extra
 Dynamic: requires-dist
 Dynamic: requires-python
@@ -97,9 +98,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
   - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
   - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
   - [Data Science Agents](#data-science-agents)
+    - [🔥 NEW: Data Science Apps](#-new-data-science-apps)
     - [NEW: Multi-Agents](#new-multi-agents)
-    - [Data Science Apps](#data-science-apps)
-    - [Apps Available Now](#apps-available-now)
       - [🔥 Agentic Applications](#-agentic-applications)
     - [Agents Available Now](#agents-available-now)
       - [Standard Agents](#standard-agents)
@@ -110,11 +110,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
   - [Disclaimer](#disclaimer)
   - [Installation](#installation)
   - [Usage](#usage)
-    - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
-    - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
+    - [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
   - [Contributing](#contributing)
   - [License](#license)
 - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
+- [⭐️ Star History](#️-star-history)
 ## Companies That Want A Custom AI Data Science Team (And AI Apps)
@@ -134,21 +134,24 @@ This project is a work in progress. New data science agents will be released soo
 ![AI Data Science Team](/img/ai_data_science_team.jpg)
-### NEW: Multi-Agents
+### 🔥 NEW: Data Science Apps
-**🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
+**🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
+![Pandas Data Analyst App](/img/apps/ai_pandas_data_analyst_app.jpg)
+**🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
-![Business Intelligence SQL Agent](/img/multi_agent_pandas_data_analyst.jpg)
+**🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
-### Data Science Apps
+[See all available apps here](/apps)
-This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
+### NEW: Multi-Agents
-![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
+**🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
-### Apps Available Now
+![Pandas Data Analyst Agent](/img/multi_agent_pandas_data_analyst.jpg)
-[See all available apps here](/apps)
 #### 🔥 Agentic Applications
@@ -205,6 +208,14 @@ By using this software, you agree to use it solely for learning purposes.
 ## Installation
+You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
+``` bash
+pip install ai-data-science-team
+```
+Or, if you want the latest version from GitHub:
 ``` bash
 pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
 ```
@@ -213,55 +224,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
 [See all examples here.](/examples)
-### Example 1: Feature Engineering with the Feature Engineering Agent
+### Example: H2O Machine Learning Agent
-[See the full example here.](/examples/feature_engineering_agent.ipynb)
+[See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
 ``` python
-feature_engineering_agent = FeatureEngineeringAgent(model = llm)
-feature_engineering_agent.invoke_agent(
-    data_raw = df,
-    user_instructions = "Make sure to scale and center numeric features",
-    target_variable = "Churn",
-    max_retries = 3,
+# Import libraries
+from langchain_openai import ChatOpenAI
+import pandas as pd
+import h2o
+import os
+from ai_data_science_team.ml_agents import H2OMLAgent
+# Load the data
+df = pd.read_csv("data/churn_data.csv")
+df
+# Initialize the language model
+os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
+llm = ChatOpenAI(model=MODEL)
+llm
+# Initialize the H2O ML Agent
+ml_agent = H2OMLAgent(
+    model=llm,
+    log=True,
+    log_path="logs/",
+    model_directory="h2o_models/",
+    enable_mlflow=True, # Use this if you wish to log models to MLflow
 )
-```
-``` bash
----FEATURE ENGINEERING AGENT----
-    * CREATE FEATURE ENGINEER CODE
-    * EXECUTING AGENT CODE
-    * EXPLAIN AGENT CODE
-```
-``` python
-feature_engineering_agent.get_data_engineered()
-```
-### Example 2: Cleaning Data with the Data Cleaning Agent
-[See the full example here.](/examples/data_cleaning_agent.ipynb)
-``` python
-data_cleaning_agent = DataCleaningAgent(model = llm)
+ml_agent
-response = data_cleaning_agent.invoke_agent(
-    data_raw = df,
-    user_instructions = "Don't remove outliers when cleaning the data.",
-    max_retries = 3,
+# Run the agent
+ml_agent.invoke_agent(
+    data_raw=df.drop(columns=["customerID"]),
+    user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
+    target_variable="Churn"
 )
-```
-``` bash
----DATA CLEANING AGENT----
-    * CREATE DATA CLEANER CODE
-    * EXECUTING AGENT CODE
-    * EXPLAIN AGENT CODE
-```
-``` python
-data_cleaning_agent.get_data_cleaned()
+# Retrieve and display the leaderboard of models
+ml_agent.get_leaderboard()
 ```
 ## Contributing
@@ -282,4 +284,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
 I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
+# ⭐️ Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=business-science/ai-data-science-team&type=Date)](https://star-history.com/#)
+[**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)

{ai_data_science_team-0.0.0.9015.dist-info → ai_data_science_team-0.0.0.9016.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
-ai_data_science_team/_version.py,sha256=c-XrUvZG3E6SWR9NMQqLxISzMZJUpsnK0FlIEMHAOls,27
+ai_data_science_team/_version.py,sha256=CuRBSRSns8bxBgkn7Hp4BqQhLmZGuLWdyc2Xq7zO6ww,27
 ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
 ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
 ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
@@ -9,7 +9,7 @@ ai_data_science_team/agents/data_wrangling_agent.py,sha256=jyBrEfLsgIqSF6xcmRgnk
 ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
 ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
 ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
-ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=x0kTwDo0BNbYzgA0YamMWdqRjx0upZgeXp9nF6C6_8E,8364
+ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=RiwpAp2dIZyN1kRNk7WBUI5KsiP14dLuHm8fhOCsKCk,8228
 ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
 ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NXrNHb723NeF2rns0Y0,33934
@@ -26,7 +26,7 @@ ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J
 ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
 ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
-ai_data_science_team/tools/eda.py,sha256=orabE8qaYj5TC5n7CRS6rHOPkyBVxr488631AwkVKVg,12726
+ai_data_science_team/tools/eda.py,sha256=ycE_VAgeDoJyZpt6jjprID-D3ocseYTdzlry-qiSc5w,14201
 ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
 ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
 ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -37,8 +37,8 @@ ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcU
 ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
 ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
 ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
-ai_data_science_team-0.0.0.9015.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
-ai_data_science_team-0.0.0.9015.dist-info/METADATA,sha256=tIcThz7trmAG6TZAnDHxy8ntBslXMKS5xSUbvaTygyQ,13164
-ai_data_science_team-0.0.0.9015.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-ai_data_science_team-0.0.0.9015.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
-ai_data_science_team-0.0.0.9015.dist-info/RECORD,,
+ai_data_science_team-0.0.0.9016.dist-info/licenses/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
+ai_data_science_team-0.0.0.9016.dist-info/METADATA,sha256=Fxmv56STouZdBJurMyf98VgpATeLYajJlmIDtgsbPXg,13746
+ai_data_science_team-0.0.0.9016.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+ai_data_science_team-0.0.0.9016.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
+ai_data_science_team-0.0.0.9016.dist-info/RECORD,,

{ai_data_science_team-0.0.0.9015.dist-info → ai_data_science_team-0.0.0.9016.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ai_data_science_team-0.0.0.9015.dist-info → ai_data_science_team-0.0.0.9016.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{ai_data_science_team-0.0.0.9015.dist-info → ai_data_science_team-0.0.0.9016.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-data-science-team 0.0.0.9015__py3-none-any.whl → 0.0.0.9016__py3-none-any.whl

ai-data-science-team 0.0.0.9015py3-none-any.whl → 0.0.0.9016py3-none-any.whl