ai-data-science-team 0.0.0.9015__tar.gz → 0.0.0.9016__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_data_science_team-0.0.0.9015/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9016}/PKG-INFO +62 -56
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/README.md +59 -54
- ai_data_science_team-0.0.0.9016/ai_data_science_team/_version.py +1 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/eda_tools_agent.py +46 -50
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/eda.py +121 -60
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016/ai_data_science_team.egg-info}/PKG-INFO +62 -56
- ai_data_science_team-0.0.0.9015/ai_data_science_team/_version.py +0 -1
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/MANIFEST.in +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_cleaning_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_loader_tools_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_visualization_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_wrangling_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/feature_engineering_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/sql_database_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/modeling_tools_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/h2o_ml_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/mlflow_tools_agent.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/pandas_data_analyst.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/sql_data_analyst.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/orchestration.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/parsers/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/parsers/parsers.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/templates/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/templates/agent_templates.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/data_loader.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/dataframe.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/h2o.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/mlflow.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/sql.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/html.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/logging.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/matplotlib.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/messages.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/plotly.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/regex.py +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/SOURCES.txt +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/requires.txt +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/top_level.txt +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/requirements.txt +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/setup.cfg +0 -0
- {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: ai-data-science-team
|
3
|
-
Version: 0.0.0.
|
3
|
+
Version: 0.0.0.9016
|
4
4
|
Summary: Build and run an AI-powered data science team.
|
5
5
|
Home-page: https://github.com/business-science/ai-data-science-team
|
6
6
|
Author: Matt Dancho
|
@@ -47,6 +47,7 @@ Dynamic: classifier
|
|
47
47
|
Dynamic: description
|
48
48
|
Dynamic: description-content-type
|
49
49
|
Dynamic: home-page
|
50
|
+
Dynamic: license-file
|
50
51
|
Dynamic: provides-extra
|
51
52
|
Dynamic: requires-dist
|
52
53
|
Dynamic: requires-python
|
@@ -97,9 +98,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
97
98
|
- [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
|
98
99
|
- [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
|
99
100
|
- [Data Science Agents](#data-science-agents)
|
101
|
+
- [🔥 NEW: Data Science Apps](#-new-data-science-apps)
|
100
102
|
- [NEW: Multi-Agents](#new-multi-agents)
|
101
|
-
- [Data Science Apps](#data-science-apps)
|
102
|
-
- [Apps Available Now](#apps-available-now)
|
103
103
|
- [🔥 Agentic Applications](#-agentic-applications)
|
104
104
|
- [Agents Available Now](#agents-available-now)
|
105
105
|
- [Standard Agents](#standard-agents)
|
@@ -110,11 +110,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
110
110
|
- [Disclaimer](#disclaimer)
|
111
111
|
- [Installation](#installation)
|
112
112
|
- [Usage](#usage)
|
113
|
-
- [Example
|
114
|
-
- [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
|
113
|
+
- [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
|
115
114
|
- [Contributing](#contributing)
|
116
115
|
- [License](#license)
|
117
116
|
- [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
|
117
|
+
- [⭐️ Star History](#️-star-history)
|
118
118
|
|
119
119
|
## Companies That Want A Custom AI Data Science Team (And AI Apps)
|
120
120
|
|
@@ -134,21 +134,24 @@ This project is a work in progress. New data science agents will be released soo
|
|
134
134
|
|
135
135
|

|
136
136
|
|
137
|
-
### NEW:
|
137
|
+
### 🔥 NEW: Data Science Apps
|
138
138
|
|
139
|
-
**🔥 Pandas Data Analyst
|
139
|
+
**🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
|
140
|
+
|
141
|
+

|
142
|
+
|
143
|
+
**🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
|
140
144
|
|
141
|
-
|
145
|
+
**🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
|
142
146
|
|
143
|
-
|
147
|
+
[See all available apps here](/apps)
|
144
148
|
|
145
|
-
|
149
|
+
### NEW: Multi-Agents
|
146
150
|
|
147
|
-
|
151
|
+
**🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
|
148
152
|
|
149
|
-
|
153
|
+

|
150
154
|
|
151
|
-
[See all available apps here](/apps)
|
152
155
|
|
153
156
|
#### 🔥 Agentic Applications
|
154
157
|
|
@@ -205,6 +208,14 @@ By using this software, you agree to use it solely for learning purposes.
|
|
205
208
|
|
206
209
|
## Installation
|
207
210
|
|
211
|
+
You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
|
212
|
+
|
213
|
+
``` bash
|
214
|
+
pip install ai-data-science-team
|
215
|
+
```
|
216
|
+
|
217
|
+
Or, if you want the latest version from GitHub:
|
218
|
+
|
208
219
|
``` bash
|
209
220
|
pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
|
210
221
|
```
|
@@ -213,55 +224,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
|
|
213
224
|
|
214
225
|
[See all examples here.](/examples)
|
215
226
|
|
216
|
-
### Example
|
227
|
+
### Example: H2O Machine Learning Agent
|
217
228
|
|
218
|
-
[See the full example here.](/examples/
|
229
|
+
[See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
|
219
230
|
|
220
231
|
``` python
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
232
|
+
# Import libraries
|
233
|
+
from langchain_openai import ChatOpenAI
|
234
|
+
import pandas as pd
|
235
|
+
import h2o
|
236
|
+
import os
|
237
|
+
from ai_data_science_team.ml_agents import H2OMLAgent
|
238
|
+
|
239
|
+
# Load the data
|
240
|
+
df = pd.read_csv("data/churn_data.csv")
|
241
|
+
df
|
242
|
+
|
243
|
+
# Initialize the language model
|
244
|
+
os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
|
245
|
+
llm = ChatOpenAI(model=MODEL)
|
246
|
+
llm
|
247
|
+
|
248
|
+
# Initialize the H2O ML Agent
|
249
|
+
ml_agent = H2OMLAgent(
|
250
|
+
model=llm,
|
251
|
+
log=True,
|
252
|
+
log_path="logs/",
|
253
|
+
model_directory="h2o_models/",
|
254
|
+
enable_mlflow=True, # Use this if you wish to log models to MLflow
|
228
255
|
)
|
229
|
-
|
230
|
-
|
231
|
-
``` bash
|
232
|
-
---FEATURE ENGINEERING AGENT----
|
233
|
-
* CREATE FEATURE ENGINEER CODE
|
234
|
-
* EXECUTING AGENT CODE
|
235
|
-
* EXPLAIN AGENT CODE
|
236
|
-
```
|
237
|
-
|
238
|
-
``` python
|
239
|
-
feature_engineering_agent.get_data_engineered()
|
240
|
-
```
|
241
|
-
|
242
|
-
### Example 2: Cleaning Data with the Data Cleaning Agent
|
243
|
-
|
244
|
-
[See the full example here.](/examples/data_cleaning_agent.ipynb)
|
245
|
-
|
246
|
-
``` python
|
247
|
-
data_cleaning_agent = DataCleaningAgent(model = llm)
|
256
|
+
ml_agent
|
248
257
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
258
|
+
# Run the agent
|
259
|
+
ml_agent.invoke_agent(
|
260
|
+
data_raw=df.drop(columns=["customerID"]),
|
261
|
+
user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
|
262
|
+
target_variable="Churn"
|
253
263
|
)
|
254
|
-
```
|
255
|
-
|
256
|
-
``` bash
|
257
|
-
---DATA CLEANING AGENT----
|
258
|
-
* CREATE DATA CLEANER CODE
|
259
|
-
* EXECUTING AGENT CODE
|
260
|
-
* EXPLAIN AGENT CODE
|
261
|
-
```
|
262
264
|
|
263
|
-
|
264
|
-
|
265
|
+
# Retrieve and display the leaderboard of models
|
266
|
+
ml_agent.get_leaderboard()
|
265
267
|
```
|
266
268
|
|
267
269
|
## Contributing
|
@@ -282,4 +284,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
|
|
282
284
|
|
283
285
|
I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
|
284
286
|
|
287
|
+
# ⭐️ Star History
|
285
288
|
|
289
|
+
[](https://star-history.com/#)
|
290
|
+
|
291
|
+
[**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
|
@@ -43,9 +43,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
43
43
|
- [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
|
44
44
|
- [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
|
45
45
|
- [Data Science Agents](#data-science-agents)
|
46
|
+
- [🔥 NEW: Data Science Apps](#-new-data-science-apps)
|
46
47
|
- [NEW: Multi-Agents](#new-multi-agents)
|
47
|
-
- [Data Science Apps](#data-science-apps)
|
48
|
-
- [Apps Available Now](#apps-available-now)
|
49
48
|
- [🔥 Agentic Applications](#-agentic-applications)
|
50
49
|
- [Agents Available Now](#agents-available-now)
|
51
50
|
- [Standard Agents](#standard-agents)
|
@@ -56,11 +55,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
56
55
|
- [Disclaimer](#disclaimer)
|
57
56
|
- [Installation](#installation)
|
58
57
|
- [Usage](#usage)
|
59
|
-
- [Example
|
60
|
-
- [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
|
58
|
+
- [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
|
61
59
|
- [Contributing](#contributing)
|
62
60
|
- [License](#license)
|
63
61
|
- [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
|
62
|
+
- [⭐️ Star History](#️-star-history)
|
64
63
|
|
65
64
|
## Companies That Want A Custom AI Data Science Team (And AI Apps)
|
66
65
|
|
@@ -80,21 +79,24 @@ This project is a work in progress. New data science agents will be released soo
|
|
80
79
|
|
81
80
|

|
82
81
|
|
83
|
-
### NEW:
|
82
|
+
### 🔥 NEW: Data Science Apps
|
84
83
|
|
85
|
-
**🔥 Pandas Data Analyst
|
84
|
+
**🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
|
85
|
+
|
86
|
+

|
87
|
+
|
88
|
+
**🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
|
86
89
|
|
87
|
-
|
90
|
+
**🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
|
88
91
|
|
89
|
-
|
92
|
+
[See all available apps here](/apps)
|
90
93
|
|
91
|
-
|
94
|
+
### NEW: Multi-Agents
|
92
95
|
|
93
|
-
|
96
|
+
**🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
|
94
97
|
|
95
|
-
|
98
|
+

|
96
99
|
|
97
|
-
[See all available apps here](/apps)
|
98
100
|
|
99
101
|
#### 🔥 Agentic Applications
|
100
102
|
|
@@ -151,6 +153,14 @@ By using this software, you agree to use it solely for learning purposes.
|
|
151
153
|
|
152
154
|
## Installation
|
153
155
|
|
156
|
+
You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
|
157
|
+
|
158
|
+
``` bash
|
159
|
+
pip install ai-data-science-team
|
160
|
+
```
|
161
|
+
|
162
|
+
Or, if you want the latest version from GitHub:
|
163
|
+
|
154
164
|
``` bash
|
155
165
|
pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
|
156
166
|
```
|
@@ -159,55 +169,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
|
|
159
169
|
|
160
170
|
[See all examples here.](/examples)
|
161
171
|
|
162
|
-
### Example
|
172
|
+
### Example: H2O Machine Learning Agent
|
163
173
|
|
164
|
-
[See the full example here.](/examples/
|
174
|
+
[See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
|
165
175
|
|
166
176
|
``` python
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
177
|
+
# Import libraries
|
178
|
+
from langchain_openai import ChatOpenAI
|
179
|
+
import pandas as pd
|
180
|
+
import h2o
|
181
|
+
import os
|
182
|
+
from ai_data_science_team.ml_agents import H2OMLAgent
|
183
|
+
|
184
|
+
# Load the data
|
185
|
+
df = pd.read_csv("data/churn_data.csv")
|
186
|
+
df
|
187
|
+
|
188
|
+
# Initialize the language model
|
189
|
+
os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
|
190
|
+
llm = ChatOpenAI(model=MODEL)
|
191
|
+
llm
|
192
|
+
|
193
|
+
# Initialize the H2O ML Agent
|
194
|
+
ml_agent = H2OMLAgent(
|
195
|
+
model=llm,
|
196
|
+
log=True,
|
197
|
+
log_path="logs/",
|
198
|
+
model_directory="h2o_models/",
|
199
|
+
enable_mlflow=True, # Use this if you wish to log models to MLflow
|
174
200
|
)
|
175
|
-
|
176
|
-
|
177
|
-
``` bash
|
178
|
-
---FEATURE ENGINEERING AGENT----
|
179
|
-
* CREATE FEATURE ENGINEER CODE
|
180
|
-
* EXECUTING AGENT CODE
|
181
|
-
* EXPLAIN AGENT CODE
|
182
|
-
```
|
183
|
-
|
184
|
-
``` python
|
185
|
-
feature_engineering_agent.get_data_engineered()
|
186
|
-
```
|
187
|
-
|
188
|
-
### Example 2: Cleaning Data with the Data Cleaning Agent
|
189
|
-
|
190
|
-
[See the full example here.](/examples/data_cleaning_agent.ipynb)
|
191
|
-
|
192
|
-
``` python
|
193
|
-
data_cleaning_agent = DataCleaningAgent(model = llm)
|
201
|
+
ml_agent
|
194
202
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
203
|
+
# Run the agent
|
204
|
+
ml_agent.invoke_agent(
|
205
|
+
data_raw=df.drop(columns=["customerID"]),
|
206
|
+
user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
|
207
|
+
target_variable="Churn"
|
199
208
|
)
|
200
|
-
```
|
201
|
-
|
202
|
-
``` bash
|
203
|
-
---DATA CLEANING AGENT----
|
204
|
-
* CREATE DATA CLEANER CODE
|
205
|
-
* EXECUTING AGENT CODE
|
206
|
-
* EXPLAIN AGENT CODE
|
207
|
-
```
|
208
209
|
|
209
|
-
|
210
|
-
|
210
|
+
# Retrieve and display the leaderboard of models
|
211
|
+
ml_agent.get_leaderboard()
|
211
212
|
```
|
212
213
|
|
213
214
|
## Contributing
|
@@ -228,4 +229,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
|
|
228
229
|
|
229
230
|
I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
|
230
231
|
|
232
|
+
# ⭐️ Star History
|
231
233
|
|
234
|
+
[](https://star-history.com/#)
|
235
|
+
|
236
|
+
[**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.0.0.9016"
|
@@ -1,5 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
1
|
from typing import Any, Optional, Annotated, Sequence, Dict
|
4
2
|
import operator
|
5
3
|
import pandas as pd
|
@@ -17,10 +15,11 @@ from ai_data_science_team.utils.regex import format_agent_name
|
|
17
15
|
|
18
16
|
from ai_data_science_team.tools.eda import (
|
19
17
|
explain_data,
|
20
|
-
describe_dataset,
|
21
|
-
visualize_missing,
|
22
|
-
|
18
|
+
describe_dataset,
|
19
|
+
visualize_missing,
|
20
|
+
generate_correlation_funnel,
|
23
21
|
generate_sweetviz_report,
|
22
|
+
generate_dtale_report,
|
24
23
|
)
|
25
24
|
from ai_data_science_team.utils.messages import get_tool_call_names
|
26
25
|
|
@@ -32,15 +31,17 @@ EDA_TOOLS = [
|
|
32
31
|
explain_data,
|
33
32
|
describe_dataset,
|
34
33
|
visualize_missing,
|
35
|
-
|
34
|
+
generate_correlation_funnel,
|
36
35
|
generate_sweetviz_report,
|
36
|
+
generate_dtale_report,
|
37
37
|
]
|
38
38
|
|
39
|
+
|
39
40
|
class EDAToolsAgent(BaseAgent):
|
40
41
|
"""
|
41
42
|
An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
|
42
43
|
missing data visualizations, correlation funnels, EDA reports, etc.
|
43
|
-
|
44
|
+
|
44
45
|
Parameters:
|
45
46
|
----------
|
46
47
|
model : langchain.llms.base.LLM
|
@@ -52,9 +53,9 @@ class EDAToolsAgent(BaseAgent):
|
|
52
53
|
checkpointer : Checkpointer, optional
|
53
54
|
The checkpointer for the agent.
|
54
55
|
"""
|
55
|
-
|
56
|
+
|
56
57
|
def __init__(
|
57
|
-
self,
|
58
|
+
self,
|
58
59
|
model: Any,
|
59
60
|
create_react_agent_kwargs: Optional[Dict] = {},
|
60
61
|
invoke_react_agent_kwargs: Optional[Dict] = {},
|
@@ -64,18 +65,18 @@ class EDAToolsAgent(BaseAgent):
|
|
64
65
|
"model": model,
|
65
66
|
"create_react_agent_kwargs": create_react_agent_kwargs,
|
66
67
|
"invoke_react_agent_kwargs": invoke_react_agent_kwargs,
|
67
|
-
"checkpointer": checkpointer
|
68
|
+
"checkpointer": checkpointer,
|
68
69
|
}
|
69
70
|
self._compiled_graph = self._make_compiled_graph()
|
70
71
|
self.response = None
|
71
|
-
|
72
|
+
|
72
73
|
def _make_compiled_graph(self):
|
73
74
|
"""
|
74
75
|
Creates the compiled state graph for the EDA agent.
|
75
76
|
"""
|
76
77
|
self.response = None
|
77
78
|
return make_eda_tools_agent(**self._params)
|
78
|
-
|
79
|
+
|
79
80
|
def update_params(self, **kwargs):
|
80
81
|
"""
|
81
82
|
Updates the agent's parameters and rebuilds the compiled graph.
|
@@ -83,16 +84,13 @@ class EDAToolsAgent(BaseAgent):
|
|
83
84
|
for k, v in kwargs.items():
|
84
85
|
self._params[k] = v
|
85
86
|
self._compiled_graph = self._make_compiled_graph()
|
86
|
-
|
87
|
+
|
87
88
|
async def ainvoke_agent(
|
88
|
-
self,
|
89
|
-
user_instructions: str = None,
|
90
|
-
data_raw: pd.DataFrame = None,
|
91
|
-
**kwargs
|
89
|
+
self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
|
92
90
|
):
|
93
91
|
"""
|
94
92
|
Asynchronously runs the agent with user instructions and data.
|
95
|
-
|
93
|
+
|
96
94
|
Parameters:
|
97
95
|
----------
|
98
96
|
user_instructions : str, optional
|
@@ -105,20 +103,17 @@ class EDAToolsAgent(BaseAgent):
|
|
105
103
|
"user_instructions": user_instructions,
|
106
104
|
"data_raw": data_raw.to_dict() if data_raw is not None else None,
|
107
105
|
},
|
108
|
-
**kwargs
|
106
|
+
**kwargs,
|
109
107
|
)
|
110
108
|
self.response = response
|
111
109
|
return None
|
112
|
-
|
110
|
+
|
113
111
|
def invoke_agent(
|
114
|
-
self,
|
115
|
-
user_instructions: str = None,
|
116
|
-
data_raw: pd.DataFrame = None,
|
117
|
-
**kwargs
|
112
|
+
self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
|
118
113
|
):
|
119
114
|
"""
|
120
115
|
Synchronously runs the agent with user instructions and data.
|
121
|
-
|
116
|
+
|
122
117
|
Parameters:
|
123
118
|
----------
|
124
119
|
user_instructions : str, optional
|
@@ -131,24 +126,26 @@ class EDAToolsAgent(BaseAgent):
|
|
131
126
|
"user_instructions": user_instructions,
|
132
127
|
"data_raw": data_raw.to_dict() if data_raw is not None else None,
|
133
128
|
},
|
134
|
-
**kwargs
|
129
|
+
**kwargs,
|
135
130
|
)
|
136
131
|
self.response = response
|
137
132
|
return None
|
138
|
-
|
133
|
+
|
139
134
|
def get_internal_messages(self, markdown: bool = False):
|
140
135
|
"""
|
141
136
|
Returns internal messages from the agent response.
|
142
137
|
"""
|
143
138
|
pretty_print = "\n\n".join(
|
144
|
-
[
|
145
|
-
|
139
|
+
[
|
140
|
+
f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
|
141
|
+
for msg in self.response["internal_messages"]
|
142
|
+
]
|
146
143
|
)
|
147
144
|
if markdown:
|
148
145
|
return Markdown(pretty_print)
|
149
146
|
else:
|
150
147
|
return self.response["internal_messages"]
|
151
|
-
|
148
|
+
|
152
149
|
def get_artifacts(self, as_dataframe: bool = False):
|
153
150
|
"""
|
154
151
|
Returns the EDA artifacts from the agent response.
|
@@ -157,7 +154,7 @@ class EDAToolsAgent(BaseAgent):
|
|
157
154
|
return pd.DataFrame(self.response["eda_artifacts"])
|
158
155
|
else:
|
159
156
|
return self.response["eda_artifacts"]
|
160
|
-
|
157
|
+
|
161
158
|
def get_ai_message(self, markdown: bool = False):
|
162
159
|
"""
|
163
160
|
Returns the AI message from the agent response.
|
@@ -166,13 +163,14 @@ class EDAToolsAgent(BaseAgent):
|
|
166
163
|
return Markdown(self.response["messages"][0].content)
|
167
164
|
else:
|
168
165
|
return self.response["messages"][0].content
|
169
|
-
|
166
|
+
|
170
167
|
def get_tool_calls(self):
|
171
168
|
"""
|
172
169
|
Returns the tool calls made by the agent.
|
173
170
|
"""
|
174
171
|
return self.response["tool_calls"]
|
175
172
|
|
173
|
+
|
176
174
|
def make_eda_tools_agent(
|
177
175
|
model: Any,
|
178
176
|
create_react_agent_kwargs: Optional[Dict] = {},
|
@@ -181,7 +179,7 @@ def make_eda_tools_agent(
|
|
181
179
|
):
|
182
180
|
"""
|
183
181
|
Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
|
184
|
-
|
182
|
+
|
185
183
|
Parameters:
|
186
184
|
----------
|
187
185
|
model : Any
|
@@ -192,13 +190,13 @@ def make_eda_tools_agent(
|
|
192
190
|
Additional kwargs for agent invocation.
|
193
191
|
checkpointer : Checkpointer, optional
|
194
192
|
The checkpointer for the agent.
|
195
|
-
|
193
|
+
|
196
194
|
Returns:
|
197
195
|
-------
|
198
196
|
app : langgraph.graph.CompiledStateGraph
|
199
197
|
The compiled state graph for the EDA agent.
|
200
198
|
"""
|
201
|
-
|
199
|
+
|
202
200
|
class GraphState(AgentState):
|
203
201
|
internal_messages: Annotated[Sequence[BaseMessage], operator.add]
|
204
202
|
user_instructions: str
|
@@ -209,11 +207,9 @@ def make_eda_tools_agent(
|
|
209
207
|
def exploratory_agent(state):
|
210
208
|
print(format_agent_name(AGENT_NAME))
|
211
209
|
print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
|
212
|
-
|
213
|
-
tool_node = ToolNode(
|
214
|
-
|
215
|
-
)
|
216
|
-
|
210
|
+
|
211
|
+
tool_node = ToolNode(tools=EDA_TOOLS)
|
212
|
+
|
217
213
|
eda_agent = create_react_agent(
|
218
214
|
model,
|
219
215
|
tools=tool_node,
|
@@ -221,7 +217,7 @@ def make_eda_tools_agent(
|
|
221
217
|
**create_react_agent_kwargs,
|
222
218
|
checkpointer=checkpointer,
|
223
219
|
)
|
224
|
-
|
220
|
+
|
225
221
|
response = eda_agent.invoke(
|
226
222
|
{
|
227
223
|
"messages": [("user", state["user_instructions"])],
|
@@ -229,13 +225,13 @@ def make_eda_tools_agent(
|
|
229
225
|
},
|
230
226
|
invoke_react_agent_kwargs,
|
231
227
|
)
|
232
|
-
|
228
|
+
|
233
229
|
print(" * POST-PROCESSING EDA RESULTS")
|
234
|
-
|
235
|
-
internal_messages = response[
|
230
|
+
|
231
|
+
internal_messages = response["messages"]
|
236
232
|
if not internal_messages:
|
237
233
|
return {"internal_messages": [], "eda_artifacts": None}
|
238
|
-
|
234
|
+
|
239
235
|
last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
|
240
236
|
last_tool_artifact = None
|
241
237
|
if len(internal_messages) > 1:
|
@@ -244,24 +240,24 @@ def make_eda_tools_agent(
|
|
244
240
|
last_tool_artifact = last_message.artifact
|
245
241
|
elif isinstance(last_message, dict) and "artifact" in last_message:
|
246
242
|
last_tool_artifact = last_message["artifact"]
|
247
|
-
|
243
|
+
|
248
244
|
tool_calls = get_tool_call_names(internal_messages)
|
249
|
-
|
245
|
+
|
250
246
|
return {
|
251
247
|
"messages": [last_ai_message],
|
252
248
|
"internal_messages": internal_messages,
|
253
249
|
"eda_artifacts": last_tool_artifact,
|
254
250
|
"tool_calls": tool_calls,
|
255
251
|
}
|
256
|
-
|
252
|
+
|
257
253
|
workflow = StateGraph(GraphState)
|
258
254
|
workflow.add_node("exploratory_agent", exploratory_agent)
|
259
255
|
workflow.add_edge(START, "exploratory_agent")
|
260
256
|
workflow.add_edge("exploratory_agent", END)
|
261
|
-
|
257
|
+
|
262
258
|
app = workflow.compile(
|
263
259
|
checkpointer=checkpointer,
|
264
260
|
name=AGENT_NAME,
|
265
261
|
)
|
266
|
-
|
262
|
+
|
267
263
|
return app
|