ai-data-science-team 0.0.0.9010__tar.gz → 0.0.0.9011__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. {ai_data_science_team-0.0.0.9010/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9011}/PKG-INFO +13 -10
  2. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/README.md +12 -9
  3. ai_data_science_team-0.0.0.9011/ai_data_science_team/_version.py +1 -0
  4. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/agents/__init__.py +1 -0
  5. ai_data_science_team-0.0.0.9011/ai_data_science_team/agents/data_loader_tools_agent.py +272 -0
  6. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/ml_agents/h2o_ml_agent.py +2 -1
  7. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/ml_agents/mlflow_tools_agent.py +32 -9
  8. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/tools/data_loader.py +95 -25
  9. ai_data_science_team-0.0.0.9011/ai_data_science_team/utils/__init__.py +0 -0
  10. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011/ai_data_science_team.egg-info}/PKG-INFO +13 -10
  11. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team.egg-info/SOURCES.txt +1 -0
  12. ai_data_science_team-0.0.0.9010/ai_data_science_team/_version.py +0 -1
  13. ai_data_science_team-0.0.0.9010/ai_data_science_team/agents/data_loader_tools_agent.py +0 -69
  14. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/LICENSE +0 -0
  15. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/MANIFEST.in +0 -0
  16. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/__init__.py +0 -0
  17. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/agents/data_cleaning_agent.py +0 -0
  18. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/agents/data_visualization_agent.py +0 -0
  19. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/agents/data_wrangling_agent.py +0 -0
  20. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/agents/feature_engineering_agent.py +0 -0
  21. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/agents/sql_database_agent.py +0 -0
  22. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/ml_agents/__init__.py +0 -0
  23. /ai_data_science_team-0.0.0.9010/ai_data_science_team/parsers/__init__.py → /ai_data_science_team-0.0.0.9011/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  24. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/multiagents/__init__.py +0 -0
  25. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/multiagents/sql_data_analyst.py +0 -0
  26. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
  27. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/orchestration.py +0 -0
  28. {ai_data_science_team-0.0.0.9010/ai_data_science_team/tools → ai_data_science_team-0.0.0.9011/ai_data_science_team/parsers}/__init__.py +0 -0
  29. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/parsers/parsers.py +0 -0
  30. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/templates/__init__.py +0 -0
  31. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/templates/agent_templates.py +0 -0
  32. {ai_data_science_team-0.0.0.9010/ai_data_science_team/utils → ai_data_science_team-0.0.0.9011/ai_data_science_team/tools}/__init__.py +0 -0
  33. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/tools/dataframe.py +0 -0
  34. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/tools/h2o.py +0 -0
  35. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/tools/mlflow.py +0 -0
  36. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/tools/sql.py +0 -0
  37. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/utils/logging.py +0 -0
  38. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/utils/plotly.py +0 -0
  39. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team/utils/regex.py +0 -0
  40. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
  41. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team.egg-info/requires.txt +0 -0
  42. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/ai_data_science_team.egg-info/top_level.txt +0 -0
  43. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/requirements.txt +0 -0
  44. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/setup.cfg +0 -0
  45. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9011}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9010
3
+ Version: 0.0.0.9011
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -93,8 +93,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
93
  - [Apps Available Now](#apps-available-now)
94
94
  - [🔥 Agentic Applications](#-agentic-applications)
95
95
  - [Agents Available Now](#agents-available-now)
96
+ - [Agents](#agents)
96
97
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
97
- - [Data Science Agents](#data-science-agents-1)
98
98
  - [Multi-Agents](#multi-agents)
99
99
  - [Agents Coming Soon](#agents-coming-soon)
100
100
  - [Disclaimer](#disclaimer)
@@ -122,7 +122,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
122
 
123
123
  This project is a work in progress. New data science agents will be released soon.
124
124
 
125
- ![Data Science Team](/img/ai_data_science_team.jpg)
125
+ ![AI Data Science Team](/img/ai_data_science_team_.jpg)
126
126
 
127
127
  ### NEW: Multi-Agents
128
128
 
@@ -146,18 +146,21 @@ This is a top secret project I'm working on. It's a multi-agent data science app
146
146
 
147
147
  ### Agents Available Now
148
148
 
149
+ #### Agents
150
+
151
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
152
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
153
+ 3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
154
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
155
+ 5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
156
+ 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
157
+
158
+
149
159
  #### 🔥🔥 NEW! Machine Learning Agents
150
160
 
151
161
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
152
162
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
153
163
 
154
- #### Data Science Agents
155
-
156
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
157
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
158
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
159
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
160
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
161
164
 
162
165
  #### Multi-Agents
163
166
 
@@ -46,8 +46,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
46
46
  - [Apps Available Now](#apps-available-now)
47
47
  - [🔥 Agentic Applications](#-agentic-applications)
48
48
  - [Agents Available Now](#agents-available-now)
49
+ - [Agents](#agents)
49
50
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
50
- - [Data Science Agents](#data-science-agents-1)
51
51
  - [Multi-Agents](#multi-agents)
52
52
  - [Agents Coming Soon](#agents-coming-soon)
53
53
  - [Disclaimer](#disclaimer)
@@ -75,7 +75,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
75
75
 
76
76
  This project is a work in progress. New data science agents will be released soon.
77
77
 
78
- ![Data Science Team](/img/ai_data_science_team.jpg)
78
+ ![AI Data Science Team](/img/ai_data_science_team_.jpg)
79
79
 
80
80
  ### NEW: Multi-Agents
81
81
 
@@ -99,18 +99,21 @@ This is a top secret project I'm working on. It's a multi-agent data science app
99
99
 
100
100
  ### Agents Available Now
101
101
 
102
+ #### Agents
103
+
104
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
105
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
106
+ 3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
107
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
108
+ 5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
109
+ 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
110
+
111
+
102
112
  #### 🔥🔥 NEW! Machine Learning Agents
103
113
 
104
114
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
105
115
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
106
116
 
107
- #### Data Science Agents
108
-
109
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
110
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
111
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
112
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
113
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
114
117
 
115
118
  #### Multi-Agents
116
119
 
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0.9011"
@@ -3,3 +3,4 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
+ from ai_data_science_team.agents.data_loader_tools_agent import make_data_loader_tools_agent, DataLoaderToolsAgent
@@ -0,0 +1,272 @@
1
+
2
+
3
+
4
+ from typing import Any, Optional, Annotated, Sequence, List, Dict
5
+ import operator
6
+
7
+ import pandas as pd
8
+ import os
9
+
10
+ from IPython.display import Markdown
11
+
12
+ from langchain_core.messages import BaseMessage, AIMessage
13
+
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+ from ai_data_science_team.tools.data_loader import (
21
+ load_directory,
22
+ load_file,
23
+ list_directory_contents,
24
+ list_directory_recursive,
25
+ get_file_info,
26
+ search_files_by_pattern,
27
+ )
28
+
29
+ AGENT_NAME = "data_loader_tools_agent"
30
+
31
+ tools = [
32
+ load_directory,
33
+ load_file,
34
+ list_directory_contents,
35
+ list_directory_recursive,
36
+ get_file_info,
37
+ search_files_by_pattern,
38
+ ]
39
+
40
+ class DataLoaderToolsAgent(BaseAgent):
41
+ """
42
+ A Data Loader Agent that can interact with data loading tools and search for files in your file system.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model used to generate the tool calling agent.
48
+ react_agent_kwargs : dict
49
+ Additional keyword arguments to pass to the create_react_agent function.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional keyword arguments to pass to the invoke method of the react agent.
52
+
53
+ Methods:
54
+ --------
55
+ update_params(**kwargs)
56
+ Updates the agent's parameters and rebuilds the compiled graph.
57
+ ainvoke_agent(user_instructions: str=None, **kwargs)
58
+ Runs the agent with the given user instructions asynchronously.
59
+ invoke_agent(user_instructions: str=None, **kwargs)
60
+ Runs the agent with the given user instructions.
61
+ get_internal_messages(markdown: bool=False)
62
+ Returns the internal messages from the agent's response.
63
+ get_artifacts(as_dataframe: bool=False)
64
+ Returns the MLflow artifacts from the agent's response.
65
+ get_ai_message(markdown: bool=False)
66
+ Returns the AI message from the agent's response.
67
+
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ model: Any,
73
+ create_react_agent_kwargs: Optional[Dict]={},
74
+ invoke_react_agent_kwargs: Optional[Dict]={},
75
+ ):
76
+ self._params = {
77
+ "model": model,
78
+ "create_react_agent_kwargs": create_react_agent_kwargs,
79
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
80
+ }
81
+ self._compiled_graph = self._make_compiled_graph()
82
+ self.response = None
83
+
84
+ def _make_compiled_graph(self):
85
+ """
86
+ Creates the compiled graph for the agent.
87
+ """
88
+ self.response = None
89
+ return make_data_loader_tools_agent(**self._params)
90
+
91
+
92
+ def update_params(self, **kwargs):
93
+ """
94
+ Updates the agent's parameters and rebuilds the compiled graph.
95
+ """
96
+ for k, v in kwargs.items():
97
+ self._params[k] = v
98
+ self._compiled_graph = self._make_compiled_graph()
99
+
100
+ async def ainvoke_agent(
101
+ self,
102
+ user_instructions: str=None,
103
+ **kwargs
104
+ ):
105
+ """
106
+ Runs the agent with the given user instructions.
107
+
108
+ Parameters:
109
+ ----------
110
+ user_instructions : str, optional
111
+ The user instructions to pass to the agent.
112
+ kwargs : dict, optional
113
+ Additional keyword arguments to pass to the agents ainvoke method.
114
+
115
+ """
116
+ response = await self._compiled_graph.ainvoke(
117
+ {
118
+ "user_instructions": user_instructions,
119
+ },
120
+ **kwargs
121
+ )
122
+ self.response = response
123
+ return None
124
+
125
+ def invoke_agent(
126
+ self,
127
+ user_instructions: str=None,
128
+ **kwargs
129
+ ):
130
+ """
131
+ Runs the agent with the given user instructions.
132
+
133
+ Parameters:
134
+ ----------
135
+ user_instructions : str, optional
136
+ The user instructions to pass to the agent.
137
+ kwargs : dict, optional
138
+ Additional keyword arguments to pass to the agents invoke method.
139
+
140
+ """
141
+ response = self._compiled_graph.invoke(
142
+ {
143
+ "user_instructions": user_instructions,
144
+ },
145
+ **kwargs
146
+ )
147
+ self.response = response
148
+ return None
149
+
150
+ def get_internal_messages(self, markdown: bool=False):
151
+ """
152
+ Returns the internal messages from the agent's response.
153
+ """
154
+ pretty_print = "\n\n".join([f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}" for msg in self.response["internal_messages"]])
155
+ if markdown:
156
+ return Markdown(pretty_print)
157
+ else:
158
+ return self.response["internal_messages"]
159
+
160
+ def get_artifacts(self, as_dataframe: bool=False):
161
+ """
162
+ Returns the MLflow artifacts from the agent's response.
163
+ """
164
+ if as_dataframe:
165
+ return pd.DataFrame(self.response["data_loader_artifacts"])
166
+ else:
167
+ return self.response["data_loader_artifacts"]
168
+
169
+ def get_ai_message(self, markdown: bool=False):
170
+ """
171
+ Returns the AI message from the agent's response.
172
+ """
173
+ if markdown:
174
+ return Markdown(self.response["messages"][0].content)
175
+ else:
176
+ return self.response["messages"][0].content
177
+
178
+
179
+
180
+ def make_data_loader_tools_agent(
181
+ model: Any,
182
+ create_react_agent_kwargs: Optional[Dict]={},
183
+ invoke_react_agent_kwargs: Optional[Dict]={},
184
+ ):
185
+ """
186
+ Creates a Data Loader Agent that can interact with data loading tools.
187
+
188
+ Parameters:
189
+ ----------
190
+ model : langchain.llms.base.LLM
191
+ The language model used to generate the tool calling agent.
192
+ react_agent_kwargs : dict
193
+ Additional keyword arguments to pass to the create_react_agent function.
194
+ invoke_react_agent_kwargs : dict
195
+ Additional keyword arguments to pass to the invoke method of the react agent.
196
+
197
+ Returns:
198
+ --------
199
+ app : langchain.graphs.CompiledStateGraph
200
+ An agent that can interact with data loading tools.
201
+ """
202
+
203
+ class GraphState(AgentState):
204
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
205
+ user_instructions: str
206
+ data_loader_artifacts: dict
207
+
208
+ def data_loader_agent(state):
209
+
210
+ print(format_agent_name(AGENT_NAME))
211
+ print(" ")
212
+
213
+ print(" * RUN REACT TOOL-CALLING AGENT")
214
+
215
+ tool_node = ToolNode(
216
+ tools=tools
217
+ )
218
+
219
+ data_loader_agent = create_react_agent(
220
+ model,
221
+ tools=tool_node,
222
+ state_schema=GraphState,
223
+ **create_react_agent_kwargs,
224
+ )
225
+
226
+ response = data_loader_agent.invoke(
227
+ {
228
+ "messages": [("user", state["user_instructions"])],
229
+ },
230
+ invoke_react_agent_kwargs,
231
+ )
232
+
233
+ print(" * POST-PROCESS RESULTS")
234
+
235
+ internal_messages = response['messages']
236
+
237
+ # Ensure there is at least one AI message
238
+ if not internal_messages:
239
+ return {
240
+ "internal_messages": [],
241
+ "mlflow_artifacts": None,
242
+ }
243
+
244
+ # Get the last AI message
245
+ last_ai_message = AIMessage(internal_messages[-1].content, role = AGENT_NAME)
246
+
247
+ # Get the last tool artifact safely
248
+ last_tool_artifact = None
249
+ if len(internal_messages) > 1:
250
+ last_message = internal_messages[-2] # Get second-to-last message
251
+ if hasattr(last_message, "artifact"): # Check if it has an "artifact"
252
+ last_tool_artifact = last_message.artifact
253
+ elif isinstance(last_message, dict) and "artifact" in last_message:
254
+ last_tool_artifact = last_message["artifact"]
255
+
256
+ return {
257
+ "messages": [last_ai_message],
258
+ "internal_messages": internal_messages,
259
+ "data_loader_artifacts": last_tool_artifact,
260
+ }
261
+
262
+ workflow = StateGraph(GraphState)
263
+
264
+ workflow.add_node("data_loader_agent", data_loader_agent)
265
+
266
+ workflow.add_edge(START, "data_loader_agent")
267
+ workflow.add_edge("data_loader_agent", END)
268
+
269
+ app = workflow.compile()
270
+
271
+ return app
272
+
@@ -506,6 +506,7 @@ def make_h2o_ml_agent(
506
506
  while remaining flexible to user instructions.
507
507
  - Return a dict with keys: leaderboard, best_model_id, model_path, and model_results.
508
508
  - If enable_mlfow is True, log the top metrics and save the model as an artifact. (See example function)
509
+ - IMPORTANT: if enable_mlflow is True, make sure to set enable_mlflow to True in the function definition.
509
510
 
510
511
  Initial User Instructions (Disregard any instructions that are unrelated to modeling):
511
512
  {user_instructions}
@@ -533,7 +534,7 @@ def make_h2o_ml_agent(
533
534
  sort_metric: str ,
534
535
  model_directory: Optional[str] = None,
535
536
  log_path: Optional[str] = None,
536
- enable_mlflow: bool,
537
+ enable_mlflow: bool, # If use has specified to enable MLflow, make sure to make this True
537
538
  mlflow_tracking_uri: Optional[str],
538
539
  mlflow_experiment_name: str,
539
540
  mlflow_run_name: str,
@@ -1,5 +1,5 @@
1
1
 
2
- from typing import Any, Optional, Annotated, Sequence
2
+ from typing import Any, Optional, Annotated, Sequence, Dict
3
3
  import operator
4
4
 
5
5
  import pandas as pd
@@ -63,8 +63,10 @@ class MLflowToolsAgent(BaseAgent):
63
63
  The tracking URI for MLflow. Defaults to None.
64
64
  mlflow_registry_uri : str, optional
65
65
  The registry URI for MLflow. Defaults to None.
66
- **react_agent_kwargs : dict, optional
67
- Additional keyword arguments to pass to the agent's react agent.
66
+ react_agent_kwargs : dict
67
+ Additional keyword arguments to pass to the create_react_agent function.
68
+ invoke_react_agent_kwargs : dict
69
+ Additional keyword arguments to pass to the invoke method of the react agent.
68
70
 
69
71
  Methods:
70
72
  --------
@@ -114,13 +116,15 @@ class MLflowToolsAgent(BaseAgent):
114
116
  model: Any,
115
117
  mlflow_tracking_uri: Optional[str]=None,
116
118
  mlflow_registry_uri: Optional[str]=None,
117
- **react_agent_kwargs,
119
+ create_react_agent_kwargs: Optional[Dict]={},
120
+ invoke_react_agent_kwargs: Optional[Dict]={},
118
121
  ):
119
122
  self._params = {
120
123
  "model": model,
121
124
  "mlflow_tracking_uri": mlflow_tracking_uri,
122
125
  "mlflow_registry_uri": mlflow_registry_uri,
123
- **react_agent_kwargs,
126
+ "create_react_agent_kwargs": create_react_agent_kwargs,
127
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
124
128
  }
125
129
  self._compiled_graph = self._make_compiled_graph()
126
130
  self.response = None
@@ -185,8 +189,6 @@ class MLflowToolsAgent(BaseAgent):
185
189
  The user instructions to pass to the agent.
186
190
  data_raw : pd.DataFrame, optional
187
191
  The raw data to pass to the agent. Used for prediction and tool calls where data is required.
188
- kwargs : dict, optional
189
- Additional keyword arguments to pass to the agents invoke method.
190
192
 
191
193
  """
192
194
  response = self._compiled_graph.invoke(
@@ -234,10 +236,30 @@ def make_mlflow_tools_agent(
234
236
  model: Any,
235
237
  mlflow_tracking_uri: str=None,
236
238
  mlflow_registry_uri: str=None,
237
- **react_agent_kwargs,
239
+ create_react_agent_kwargs: Optional[Dict]={},
240
+ invoke_react_agent_kwargs: Optional[Dict]={},
238
241
  ):
239
242
  """
240
243
  MLflow Tool Calling Agent
244
+
245
+ Parameters:
246
+ ----------
247
+ model : Any
248
+ The language model used to generate the agent.
249
+ mlflow_tracking_uri : str, optional
250
+ The tracking URI for MLflow. Defaults to None.
251
+ mlflow_registry_uri : str, optional
252
+ The registry URI for MLflow. Defaults to None.
253
+ create_react_agent_kwargs : dict, optional
254
+ Additional keyword arguments to pass to the agent's create_react_agent method.
255
+ invoke_react_agent_kwargs : dict, optional
256
+ Additional keyword arguments to pass to the agent's invoke method.
257
+
258
+ Returns
259
+ -------
260
+ app : langchain.graphs.CompiledStateGraph
261
+ A compiled state graph for the MLflow Tool Calling Agent.
262
+
241
263
  """
242
264
 
243
265
  try:
@@ -274,7 +296,7 @@ def make_mlflow_tools_agent(
274
296
  model,
275
297
  tools=tool_node,
276
298
  state_schema=GraphState,
277
- **react_agent_kwargs,
299
+ **create_react_agent_kwargs,
278
300
  )
279
301
 
280
302
  response = mlflow_agent.invoke(
@@ -282,6 +304,7 @@ def make_mlflow_tools_agent(
282
304
  "messages": [("user", state["user_instructions"])],
283
305
  "data_raw": state["data_raw"],
284
306
  },
307
+ invoke_react_agent_kwargs,
285
308
  )
286
309
 
287
310
  print(" * POST-PROCESS RESULTS")
@@ -1,41 +1,77 @@
1
1
 
2
2
  from langchain.tools import tool
3
+ from langgraph.prebuilt import InjectedState
3
4
 
4
5
  import pandas as pd
6
+ import os
5
7
 
6
- from typing import Tuple, List, Dict
8
+ from typing import Tuple, List, Dict, Optional, Annotated
7
9
 
8
10
 
9
11
  @tool(response_format='content_and_artifact')
10
- def load_directory(dir_path: str) -> Tuple[str, Dict]:
12
+ def load_directory(
13
+ directory_path: str = os.getcwd(),
14
+ file_type: Optional[str] = None
15
+ ) -> Tuple[str, Dict]:
11
16
  """
12
17
  Tool: load_directory
13
- Description: Loads all recognized tabular files in a directory.
18
+ Description: Loads all recognized tabular files in a directory.
19
+ If file_type is specified (e.g., 'csv'), only files
20
+ with that extension are loaded.
14
21
 
15
22
  Parameters:
16
23
  ----------
17
- dir_path : str
18
- The path to the directory to load.
24
+ directory_path : str
25
+ The path to the directory to load. Defaults to the current working directory.
26
+
27
+ file_type : str, optional
28
+ The extension of the file type you want to load exclusively
29
+ (e.g., 'csv', 'xlsx', 'parquet'). If None or not provided,
30
+ attempts to load all recognized tabular files.
19
31
 
20
32
  Returns:
21
33
  -------
22
34
  Tuple[str, Dict]
23
35
  A tuple containing a message and a dictionary of data frames.
24
36
  """
25
- print(" * Tool: load_directory")
37
+ print(f" * Tool: load_directory | {directory_path}")
38
+
26
39
  import os
27
40
  import pandas as pd
41
+
42
+ if directory_path is None:
43
+ return "No directory path provided.", {}
44
+
45
+ if not os.path.isdir(directory_path):
46
+ return f"Directory not found: {directory_path}", {}
47
+
28
48
  data_frames = {}
29
- for filename in os.listdir(dir_path):
30
- file_path = os.path.join(dir_path, filename)
49
+
50
+ for filename in os.listdir(directory_path):
51
+ file_path = os.path.join(directory_path, filename)
52
+
31
53
  # Skip directories
32
54
  if os.path.isdir(file_path):
33
55
  continue
56
+
57
+ # If file_type is specified, only process files that match.
58
+ if file_type:
59
+ # Make sure extension check is case-insensitive
60
+ if not filename.lower().endswith(f".{file_type.lower()}"):
61
+ continue
62
+
34
63
  try:
64
+ # Attempt to auto-detect and load the file
35
65
  data_frames[filename] = auto_load_file(file_path).to_dict()
36
66
  except Exception as e:
67
+ # If loading fails, record the error message
37
68
  data_frames[filename] = f"Error loading file: {e}"
38
- return f"Returned the following data frames: {list(data_frames.keys())}", data_frames
69
+
70
+ return (
71
+ f"Returned the following data frames: {list(data_frames.keys())}",
72
+ data_frames
73
+ )
74
+
39
75
 
40
76
  @tool(response_format='content_and_artifact')
41
77
  def load_file(file_path: str) -> Tuple[str, Dict]:
@@ -52,12 +88,15 @@ def load_file(file_path: str) -> Tuple[str, Dict]:
52
88
  Tuple[str, Dict]
53
89
  A tuple containing a message and a dictionary of the data frame.
54
90
  """
55
- print(" * Tool: load_file")
91
+ print(f" * Tool: load_file | {file_path}")
56
92
  return f"Returned the following data frame from this file: {file_path}", auto_load_file(file_path).to_dict()
57
93
 
58
94
 
59
95
  @tool(response_format='content_and_artifact')
60
- def list_directory_contents(directory_path: str, show_hidden: bool = False) -> Tuple[List[str], List[Dict]]:
96
+ def list_directory_contents(
97
+ directory_path: str = os.getcwd(),
98
+ show_hidden: bool = False
99
+ ) -> Tuple[List[str], List[Dict]]:
61
100
  """
62
101
  Tool: list_directory_contents
63
102
  Description: Lists all files and folders in the specified directory.
@@ -67,30 +106,51 @@ def list_directory_contents(directory_path: str, show_hidden: bool = False) -> T
67
106
  Returns:
68
107
  tuple:
69
108
  - content (list[str]): A list of filenames/folders (suitable for display)
70
- - artifact (list[dict]): A list of dictionaries where each dict has keys like {"filename": <name>}.
71
- This structure can be easily converted to a pandas DataFrame.
109
+ - artifact (list[dict]): A list of dictionaries where each dict includes
110
+ the keys {"filename": <name>, "type": <'file' or 'directory'>}.
111
+ This structure can be easily converted to a pandas DataFrame.
72
112
  """
73
- print(" * Tool: list_directory_contents")
113
+ print(f" * Tool: list_directory_contents | {directory_path}")
74
114
  import os
75
-
115
+
116
+ if directory_path is None:
117
+ return "No directory path provided.", []
118
+
119
+ if not os.path.isdir(directory_path):
120
+ return f"Directory not found: {directory_path}", []
121
+
76
122
  items = []
77
123
  for item in os.listdir(directory_path):
78
124
  # If show_hidden is False, skip items starting with '.'
79
125
  if not show_hidden and item.startswith('.'):
80
126
  continue
81
127
  items.append(item)
128
+ items.reverse()
82
129
 
83
- # content: just the raw list of filenames
84
- content = items
85
-
86
- # artifact: list of dicts (each row is {"filename": ...}), easily turned into a DataFrame
87
- artifact = [{"filename": item} for item in items]
130
+ # content: just the raw list of item names (files/folders).
131
+ content = items.copy()
132
+
133
+ content.append(f"Total items: {len(items)}")
134
+ content.append(f"Directory: {directory_path}")
135
+
136
+ # artifact: list of dicts with both "filename" and "type" keys.
137
+ artifact = []
138
+ for item in items:
139
+ item_path = os.path.join(directory_path, item)
140
+ artifact.append({
141
+ "filename": item,
142
+ "type": "directory" if os.path.isdir(item_path) else "file"
143
+ })
88
144
 
89
145
  return content, artifact
90
146
 
91
147
 
148
+
92
149
  @tool(response_format='content_and_artifact')
93
- def list_directory_recursive(directory_path: str, show_hidden: bool = False) -> Tuple[str, List[Dict]]:
150
+ def list_directory_recursive(
151
+ directory_path: str = os.getcwd(),
152
+ show_hidden: bool = False
153
+ ) -> Tuple[str, List[Dict]]:
94
154
  """
95
155
  Tool: list_directory_recursive
96
156
  Description:
@@ -111,13 +171,19 @@ def list_directory_recursive(directory_path: str, show_hidden: bool = False) ->
111
171
  Example:
112
172
  content, artifact = list_directory_recursive("/path/to/folder", show_hidden=False)
113
173
  """
114
- print(" * Tool: list_directory_recursive")
174
+ print(f" * Tool: list_directory_recursive | {directory_path}")
115
175
 
116
176
  # We'll store two things as we recurse:
117
177
  # 1) lines for building the "tree" string
118
178
  # 2) records in a list of dicts for easy DataFrame creation
119
179
  import os
120
180
 
181
+ if directory_path is None:
182
+ return "No directory path provided.", {}
183
+
184
+ if not os.path.isdir(directory_path):
185
+ return f"Directory not found: {directory_path}", {}
186
+
121
187
  lines = []
122
188
  records = []
123
189
 
@@ -210,7 +276,7 @@ def get_file_info(file_path: str) -> Tuple[str, List[Dict]]:
210
276
  Example:
211
277
  content, artifact = get_file_info("/path/to/mydata.csv")
212
278
  """
213
- print(" * Tool: get_file_info")
279
+ print(f" * Tool: get_file_info | {file_path}")
214
280
 
215
281
  # Ensure the file exists
216
282
  import os
@@ -244,7 +310,11 @@ def get_file_info(file_path: str) -> Tuple[str, List[Dict]]:
244
310
 
245
311
 
246
312
  @tool(response_format='content_and_artifact')
247
- def search_files_by_pattern(directory_path: str, pattern: str = "*.csv", recursive: bool = False) -> Tuple[str, List[Dict]]:
313
+ def search_files_by_pattern(
314
+ directory_path: str = os.getcwd(),
315
+ pattern: str = "*.csv",
316
+ recursive: bool = False
317
+ ) -> Tuple[str, List[Dict]]:
248
318
  """
249
319
  Tool: search_files_by_pattern
250
320
  Description:
@@ -266,7 +336,7 @@ def search_files_by_pattern(directory_path: str, pattern: str = "*.csv", recursi
266
336
  Example:
267
337
  content, artifact = search_files_by_pattern("/path/to/folder", "*.csv", recursive=True)
268
338
  """
269
- print(" * Tool: search_files_by_pattern")
339
+ print(f" * Tool: search_files_by_pattern | {directory_path}")
270
340
 
271
341
  import os
272
342
  import fnmatch
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9010
3
+ Version: 0.0.0.9011
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -93,8 +93,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
93
  - [Apps Available Now](#apps-available-now)
94
94
  - [🔥 Agentic Applications](#-agentic-applications)
95
95
  - [Agents Available Now](#agents-available-now)
96
+ - [Agents](#agents)
96
97
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
97
- - [Data Science Agents](#data-science-agents-1)
98
98
  - [Multi-Agents](#multi-agents)
99
99
  - [Agents Coming Soon](#agents-coming-soon)
100
100
  - [Disclaimer](#disclaimer)
@@ -122,7 +122,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
122
 
123
123
  This project is a work in progress. New data science agents will be released soon.
124
124
 
125
- ![Data Science Team](/img/ai_data_science_team.jpg)
125
+ ![AI Data Science Team](/img/ai_data_science_team_.jpg)
126
126
 
127
127
  ### NEW: Multi-Agents
128
128
 
@@ -146,18 +146,21 @@ This is a top secret project I'm working on. It's a multi-agent data science app
146
146
 
147
147
  ### Agents Available Now
148
148
 
149
+ #### Agents
150
+
151
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
152
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
153
+ 3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
154
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
155
+ 5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
156
+ 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
157
+
158
+
149
159
  #### 🔥🔥 NEW! Machine Learning Agents
150
160
 
151
161
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
152
162
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
153
163
 
154
- #### Data Science Agents
155
-
156
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
157
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
158
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
159
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
160
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
161
164
 
162
165
  #### Multi-Agents
163
166
 
@@ -20,6 +20,7 @@ ai_data_science_team/agents/feature_engineering_agent.py
20
20
  ai_data_science_team/agents/sql_database_agent.py
21
21
  ai_data_science_team/ml_agents/__init__.py
22
22
  ai_data_science_team/ml_agents/h2o_ml_agent.py
23
+ ai_data_science_team/ml_agents/h2o_ml_tools_agent.py
23
24
  ai_data_science_team/ml_agents/mlflow_tools_agent.py
24
25
  ai_data_science_team/multiagents/__init__.py
25
26
  ai_data_science_team/multiagents/sql_data_analyst.py
@@ -1 +0,0 @@
1
- __version__ = "0.0.0.9010"
@@ -1,69 +0,0 @@
1
-
2
-
3
-
4
- from typing import Any, Optional, Annotated, Sequence, List, Dict
5
- import operator
6
-
7
- import pandas as pd
8
- import os
9
-
10
- from IPython.display import Markdown
11
-
12
- from langchain_core.messages import BaseMessage, AIMessage
13
-
14
- from langgraph.prebuilt import create_react_agent, ToolNode
15
- from langgraph.prebuilt.chat_agent_executor import AgentState
16
- from langgraph.graph import START, END, StateGraph
17
-
18
- from ai_data_science_team.templates import BaseAgent
19
- from ai_data_science_team.utils.regex import format_agent_name
20
- from ai_data_science_team.tools.data_loader import (
21
- load_directory,
22
- load_file,
23
- list_directory_contents,
24
- list_directory_recursive,
25
- get_file_info,
26
- search_files_by_pattern,
27
- )
28
-
29
- AGENT_NAME = "data_loader_tools_agent"
30
-
31
- tools = [
32
- load_directory,
33
- load_file,
34
- list_directory_contents,
35
- list_directory_recursive,
36
- get_file_info,
37
- search_files_by_pattern,
38
- ]
39
-
40
-
41
-
42
- def make_data_loader_tools_agent(
43
- model: Any,
44
- directory: Optional[str] = os.getcwd(),
45
- ):
46
- """
47
- Creates a Data Loader Agent that can interact with data loading tools.
48
-
49
- Parameters:
50
- ----------
51
- model : langchain.llms.base.LLM
52
- The language model used to generate the tool calling agent.
53
- directory : str, optional
54
- The directory to search for files. Defaults to the current working directory.
55
-
56
- Returns:
57
- --------
58
- Data Loader Agent
59
- An agent that can interact with data loading tools.
60
- """
61
-
62
- class GraphState(AgentState):
63
- internal_messages: Annotated[Sequence[BaseMessage], operator.add]
64
- directory: str
65
- user_instructions: str
66
- data_artifacts: dict
67
-
68
- pass
69
-