ai-data-science-team 0.0.0.9015__tar.gz → 0.0.0.9016__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {ai_data_science_team-0.0.0.9015/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9016}/PKG-INFO +62 -56
  2. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/README.md +59 -54
  3. ai_data_science_team-0.0.0.9016/ai_data_science_team/_version.py +1 -0
  4. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/eda_tools_agent.py +46 -50
  5. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/eda.py +121 -60
  6. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016/ai_data_science_team.egg-info}/PKG-INFO +62 -56
  7. ai_data_science_team-0.0.0.9015/ai_data_science_team/_version.py +0 -1
  8. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/LICENSE +0 -0
  9. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/MANIFEST.in +0 -0
  10. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/__init__.py +0 -0
  11. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/__init__.py +0 -0
  12. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_cleaning_agent.py +0 -0
  13. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_loader_tools_agent.py +0 -0
  14. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_visualization_agent.py +0 -0
  15. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_wrangling_agent.py +0 -0
  16. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/feature_engineering_agent.py +0 -0
  17. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/sql_database_agent.py +0 -0
  18. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/__init__.py +0 -0
  19. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/modeling_tools_agent.py +0 -0
  20. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/__init__.py +0 -0
  21. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/h2o_ml_agent.py +0 -0
  22. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  23. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/mlflow_tools_agent.py +0 -0
  24. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/__init__.py +0 -0
  25. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/pandas_data_analyst.py +0 -0
  26. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/sql_data_analyst.py +0 -0
  27. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
  28. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/orchestration.py +0 -0
  29. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/parsers/__init__.py +0 -0
  30. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/parsers/parsers.py +0 -0
  31. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/templates/__init__.py +0 -0
  32. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/templates/agent_templates.py +0 -0
  33. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/__init__.py +0 -0
  34. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/data_loader.py +0 -0
  35. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/dataframe.py +0 -0
  36. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/h2o.py +0 -0
  37. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/mlflow.py +0 -0
  38. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/sql.py +0 -0
  39. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/__init__.py +0 -0
  40. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/html.py +0 -0
  41. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/logging.py +0 -0
  42. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/matplotlib.py +0 -0
  43. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/messages.py +0 -0
  44. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/plotly.py +0 -0
  45. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/regex.py +0 -0
  46. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/SOURCES.txt +0 -0
  47. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
  48. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/requires.txt +0 -0
  49. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/top_level.txt +0 -0
  50. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/requirements.txt +0 -0
  51. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/setup.cfg +0 -0
  52. {ai_data_science_team-0.0.0.9015 → ai_data_science_team-0.0.0.9016}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9015
3
+ Version: 0.0.0.9016
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -47,6 +47,7 @@ Dynamic: classifier
47
47
  Dynamic: description
48
48
  Dynamic: description-content-type
49
49
  Dynamic: home-page
50
+ Dynamic: license-file
50
51
  Dynamic: provides-extra
51
52
  Dynamic: requires-dist
52
53
  Dynamic: requires-python
@@ -97,9 +98,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
97
98
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
98
99
  - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
99
100
  - [Data Science Agents](#data-science-agents)
101
+ - [🔥 NEW: Data Science Apps](#-new-data-science-apps)
100
102
  - [NEW: Multi-Agents](#new-multi-agents)
101
- - [Data Science Apps](#data-science-apps)
102
- - [Apps Available Now](#apps-available-now)
103
103
  - [🔥 Agentic Applications](#-agentic-applications)
104
104
  - [Agents Available Now](#agents-available-now)
105
105
  - [Standard Agents](#standard-agents)
@@ -110,11 +110,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
110
110
  - [Disclaimer](#disclaimer)
111
111
  - [Installation](#installation)
112
112
  - [Usage](#usage)
113
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
114
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
113
+ - [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
115
114
  - [Contributing](#contributing)
116
115
  - [License](#license)
117
116
  - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
117
+ - [⭐️ Star History](#️-star-history)
118
118
 
119
119
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
120
120
 
@@ -134,21 +134,24 @@ This project is a work in progress. New data science agents will be released soo
134
134
 
135
135
  ![AI Data Science Team](/img/ai_data_science_team.jpg)
136
136
 
137
- ### NEW: Multi-Agents
137
+ ### 🔥 NEW: Data Science Apps
138
138
 
139
- **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
139
+ **🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
140
+
141
+ ![Pandas Data Analyst App](/img/apps/ai_pandas_data_analyst_app.jpg)
142
+
143
+ **🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
140
144
 
141
- ![Business Intelligence SQL Agent](/img/multi_agent_pandas_data_analyst.jpg)
145
+ **🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
142
146
 
143
- ### Data Science Apps
147
+ [See all available apps here](/apps)
144
148
 
145
- This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
149
+ ### NEW: Multi-Agents
146
150
 
147
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
151
+ **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
148
152
 
149
- ### Apps Available Now
153
+ ![Pandas Data Analyst Agent](/img/multi_agent_pandas_data_analyst.jpg)
150
154
 
151
- [See all available apps here](/apps)
152
155
 
153
156
  #### 🔥 Agentic Applications
154
157
 
@@ -205,6 +208,14 @@ By using this software, you agree to use it solely for learning purposes.
205
208
 
206
209
  ## Installation
207
210
 
211
+ You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
212
+
213
+ ``` bash
214
+ pip install ai-data-science-team
215
+ ```
216
+
217
+ Or, if you want the latest version from GitHub:
218
+
208
219
  ``` bash
209
220
  pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
210
221
  ```
@@ -213,55 +224,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
213
224
 
214
225
  [See all examples here.](/examples)
215
226
 
216
- ### Example 1: Feature Engineering with the Feature Engineering Agent
227
+ ### Example: H2O Machine Learning Agent
217
228
 
218
- [See the full example here.](/examples/feature_engineering_agent.ipynb)
229
+ [See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
219
230
 
220
231
  ``` python
221
- feature_engineering_agent = FeatureEngineeringAgent(model = llm)
222
-
223
- feature_engineering_agent.invoke_agent(
224
- data_raw = df,
225
- user_instructions = "Make sure to scale and center numeric features",
226
- target_variable = "Churn",
227
- max_retries = 3,
232
+ # Import libraries
233
+ from langchain_openai import ChatOpenAI
234
+ import pandas as pd
235
+ import h2o
236
+ import os
237
+ from ai_data_science_team.ml_agents import H2OMLAgent
238
+
239
+ # Load the data
240
+ df = pd.read_csv("data/churn_data.csv")
241
+ df
242
+
243
+ # Initialize the language model
244
+ os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
245
+ llm = ChatOpenAI(model=MODEL)
246
+ llm
247
+
248
+ # Initialize the H2O ML Agent
249
+ ml_agent = H2OMLAgent(
250
+ model=llm,
251
+ log=True,
252
+ log_path="logs/",
253
+ model_directory="h2o_models/",
254
+ enable_mlflow=True, # Use this if you wish to log models to MLflow
228
255
  )
229
- ```
230
-
231
- ``` bash
232
- ---FEATURE ENGINEERING AGENT----
233
- * CREATE FEATURE ENGINEER CODE
234
- * EXECUTING AGENT CODE
235
- * EXPLAIN AGENT CODE
236
- ```
237
-
238
- ``` python
239
- feature_engineering_agent.get_data_engineered()
240
- ```
241
-
242
- ### Example 2: Cleaning Data with the Data Cleaning Agent
243
-
244
- [See the full example here.](/examples/data_cleaning_agent.ipynb)
245
-
246
- ``` python
247
- data_cleaning_agent = DataCleaningAgent(model = llm)
256
+ ml_agent
248
257
 
249
- response = data_cleaning_agent.invoke_agent(
250
- data_raw = df,
251
- user_instructions = "Don't remove outliers when cleaning the data.",
252
- max_retries = 3,
258
+ # Run the agent
259
+ ml_agent.invoke_agent(
260
+ data_raw=df.drop(columns=["customerID"]),
261
+ user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
262
+ target_variable="Churn"
253
263
  )
254
- ```
255
-
256
- ``` bash
257
- ---DATA CLEANING AGENT----
258
- * CREATE DATA CLEANER CODE
259
- * EXECUTING AGENT CODE
260
- * EXPLAIN AGENT CODE
261
- ```
262
264
 
263
- ``` python
264
- data_cleaning_agent.get_data_cleaned()
265
+ # Retrieve and display the leaderboard of models
266
+ ml_agent.get_leaderboard()
265
267
  ```
266
268
 
267
269
  ## Contributing
@@ -282,4 +284,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
282
284
 
283
285
  I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
284
286
 
287
+ # ⭐️ Star History
285
288
 
289
+ [![Star History Chart](https://api.star-history.com/svg?repos=business-science/ai-data-science-team&type=Date)](https://star-history.com/#)
290
+
291
+ [**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
@@ -43,9 +43,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
43
43
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
44
44
  - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
45
45
  - [Data Science Agents](#data-science-agents)
46
+ - [🔥 NEW: Data Science Apps](#-new-data-science-apps)
46
47
  - [NEW: Multi-Agents](#new-multi-agents)
47
- - [Data Science Apps](#data-science-apps)
48
- - [Apps Available Now](#apps-available-now)
49
48
  - [🔥 Agentic Applications](#-agentic-applications)
50
49
  - [Agents Available Now](#agents-available-now)
51
50
  - [Standard Agents](#standard-agents)
@@ -56,11 +55,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
56
55
  - [Disclaimer](#disclaimer)
57
56
  - [Installation](#installation)
58
57
  - [Usage](#usage)
59
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
60
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
58
+ - [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
61
59
  - [Contributing](#contributing)
62
60
  - [License](#license)
63
61
  - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
62
+ - [⭐️ Star History](#️-star-history)
64
63
 
65
64
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
66
65
 
@@ -80,21 +79,24 @@ This project is a work in progress. New data science agents will be released soo
80
79
 
81
80
  ![AI Data Science Team](/img/ai_data_science_team.jpg)
82
81
 
83
- ### NEW: Multi-Agents
82
+ ### 🔥 NEW: Data Science Apps
84
83
 
85
- **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
84
+ **🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
85
+
86
+ ![Pandas Data Analyst App](/img/apps/ai_pandas_data_analyst_app.jpg)
87
+
88
+ **🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
86
89
 
87
- ![Business Intelligence SQL Agent](/img/multi_agent_pandas_data_analyst.jpg)
90
+ **🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
88
91
 
89
- ### Data Science Apps
92
+ [See all available apps here](/apps)
90
93
 
91
- This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
94
+ ### NEW: Multi-Agents
92
95
 
93
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
96
+ **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
94
97
 
95
- ### Apps Available Now
98
+ ![Pandas Data Analyst Agent](/img/multi_agent_pandas_data_analyst.jpg)
96
99
 
97
- [See all available apps here](/apps)
98
100
 
99
101
  #### 🔥 Agentic Applications
100
102
 
@@ -151,6 +153,14 @@ By using this software, you agree to use it solely for learning purposes.
151
153
 
152
154
  ## Installation
153
155
 
156
+ You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
157
+
158
+ ``` bash
159
+ pip install ai-data-science-team
160
+ ```
161
+
162
+ Or, if you want the latest version from GitHub:
163
+
154
164
  ``` bash
155
165
  pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
156
166
  ```
@@ -159,55 +169,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
159
169
 
160
170
  [See all examples here.](/examples)
161
171
 
162
- ### Example 1: Feature Engineering with the Feature Engineering Agent
172
+ ### Example: H2O Machine Learning Agent
163
173
 
164
- [See the full example here.](/examples/feature_engineering_agent.ipynb)
174
+ [See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
165
175
 
166
176
  ``` python
167
- feature_engineering_agent = FeatureEngineeringAgent(model = llm)
168
-
169
- feature_engineering_agent.invoke_agent(
170
- data_raw = df,
171
- user_instructions = "Make sure to scale and center numeric features",
172
- target_variable = "Churn",
173
- max_retries = 3,
177
+ # Import libraries
178
+ from langchain_openai import ChatOpenAI
179
+ import pandas as pd
180
+ import h2o
181
+ import os
182
+ from ai_data_science_team.ml_agents import H2OMLAgent
183
+
184
+ # Load the data
185
+ df = pd.read_csv("data/churn_data.csv")
186
+ df
187
+
188
+ # Initialize the language model
189
+ os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
190
+ llm = ChatOpenAI(model=MODEL)
191
+ llm
192
+
193
+ # Initialize the H2O ML Agent
194
+ ml_agent = H2OMLAgent(
195
+ model=llm,
196
+ log=True,
197
+ log_path="logs/",
198
+ model_directory="h2o_models/",
199
+ enable_mlflow=True, # Use this if you wish to log models to MLflow
174
200
  )
175
- ```
176
-
177
- ``` bash
178
- ---FEATURE ENGINEERING AGENT----
179
- * CREATE FEATURE ENGINEER CODE
180
- * EXECUTING AGENT CODE
181
- * EXPLAIN AGENT CODE
182
- ```
183
-
184
- ``` python
185
- feature_engineering_agent.get_data_engineered()
186
- ```
187
-
188
- ### Example 2: Cleaning Data with the Data Cleaning Agent
189
-
190
- [See the full example here.](/examples/data_cleaning_agent.ipynb)
191
-
192
- ``` python
193
- data_cleaning_agent = DataCleaningAgent(model = llm)
201
+ ml_agent
194
202
 
195
- response = data_cleaning_agent.invoke_agent(
196
- data_raw = df,
197
- user_instructions = "Don't remove outliers when cleaning the data.",
198
- max_retries = 3,
203
+ # Run the agent
204
+ ml_agent.invoke_agent(
205
+ data_raw=df.drop(columns=["customerID"]),
206
+ user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
207
+ target_variable="Churn"
199
208
  )
200
- ```
201
-
202
- ``` bash
203
- ---DATA CLEANING AGENT----
204
- * CREATE DATA CLEANER CODE
205
- * EXECUTING AGENT CODE
206
- * EXPLAIN AGENT CODE
207
- ```
208
209
 
209
- ``` python
210
- data_cleaning_agent.get_data_cleaned()
210
+ # Retrieve and display the leaderboard of models
211
+ ml_agent.get_leaderboard()
211
212
  ```
212
213
 
213
214
  ## Contributing
@@ -228,4 +229,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
228
229
 
229
230
  I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
230
231
 
232
+ # ⭐️ Star History
231
233
 
234
+ [![Star History Chart](https://api.star-history.com/svg?repos=business-science/ai-data-science-team&type=Date)](https://star-history.com/#)
235
+
236
+ [**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0.9016"
@@ -1,5 +1,3 @@
1
-
2
-
3
1
  from typing import Any, Optional, Annotated, Sequence, Dict
4
2
  import operator
5
3
  import pandas as pd
@@ -17,10 +15,11 @@ from ai_data_science_team.utils.regex import format_agent_name
17
15
 
18
16
  from ai_data_science_team.tools.eda import (
19
17
  explain_data,
20
- describe_dataset,
21
- visualize_missing,
22
- correlation_funnel,
18
+ describe_dataset,
19
+ visualize_missing,
20
+ generate_correlation_funnel,
23
21
  generate_sweetviz_report,
22
+ generate_dtale_report,
24
23
  )
25
24
  from ai_data_science_team.utils.messages import get_tool_call_names
26
25
 
@@ -32,15 +31,17 @@ EDA_TOOLS = [
32
31
  explain_data,
33
32
  describe_dataset,
34
33
  visualize_missing,
35
- correlation_funnel,
34
+ generate_correlation_funnel,
36
35
  generate_sweetviz_report,
36
+ generate_dtale_report,
37
37
  ]
38
38
 
39
+
39
40
  class EDAToolsAgent(BaseAgent):
40
41
  """
41
42
  An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
42
43
  missing data visualizations, correlation funnels, EDA reports, etc.
43
-
44
+
44
45
  Parameters:
45
46
  ----------
46
47
  model : langchain.llms.base.LLM
@@ -52,9 +53,9 @@ class EDAToolsAgent(BaseAgent):
52
53
  checkpointer : Checkpointer, optional
53
54
  The checkpointer for the agent.
54
55
  """
55
-
56
+
56
57
  def __init__(
57
- self,
58
+ self,
58
59
  model: Any,
59
60
  create_react_agent_kwargs: Optional[Dict] = {},
60
61
  invoke_react_agent_kwargs: Optional[Dict] = {},
@@ -64,18 +65,18 @@ class EDAToolsAgent(BaseAgent):
64
65
  "model": model,
65
66
  "create_react_agent_kwargs": create_react_agent_kwargs,
66
67
  "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
67
- "checkpointer": checkpointer
68
+ "checkpointer": checkpointer,
68
69
  }
69
70
  self._compiled_graph = self._make_compiled_graph()
70
71
  self.response = None
71
-
72
+
72
73
  def _make_compiled_graph(self):
73
74
  """
74
75
  Creates the compiled state graph for the EDA agent.
75
76
  """
76
77
  self.response = None
77
78
  return make_eda_tools_agent(**self._params)
78
-
79
+
79
80
  def update_params(self, **kwargs):
80
81
  """
81
82
  Updates the agent's parameters and rebuilds the compiled graph.
@@ -83,16 +84,13 @@ class EDAToolsAgent(BaseAgent):
83
84
  for k, v in kwargs.items():
84
85
  self._params[k] = v
85
86
  self._compiled_graph = self._make_compiled_graph()
86
-
87
+
87
88
  async def ainvoke_agent(
88
- self,
89
- user_instructions: str = None,
90
- data_raw: pd.DataFrame = None,
91
- **kwargs
89
+ self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
92
90
  ):
93
91
  """
94
92
  Asynchronously runs the agent with user instructions and data.
95
-
93
+
96
94
  Parameters:
97
95
  ----------
98
96
  user_instructions : str, optional
@@ -105,20 +103,17 @@ class EDAToolsAgent(BaseAgent):
105
103
  "user_instructions": user_instructions,
106
104
  "data_raw": data_raw.to_dict() if data_raw is not None else None,
107
105
  },
108
- **kwargs
106
+ **kwargs,
109
107
  )
110
108
  self.response = response
111
109
  return None
112
-
110
+
113
111
  def invoke_agent(
114
- self,
115
- user_instructions: str = None,
116
- data_raw: pd.DataFrame = None,
117
- **kwargs
112
+ self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
118
113
  ):
119
114
  """
120
115
  Synchronously runs the agent with user instructions and data.
121
-
116
+
122
117
  Parameters:
123
118
  ----------
124
119
  user_instructions : str, optional
@@ -131,24 +126,26 @@ class EDAToolsAgent(BaseAgent):
131
126
  "user_instructions": user_instructions,
132
127
  "data_raw": data_raw.to_dict() if data_raw is not None else None,
133
128
  },
134
- **kwargs
129
+ **kwargs,
135
130
  )
136
131
  self.response = response
137
132
  return None
138
-
133
+
139
134
  def get_internal_messages(self, markdown: bool = False):
140
135
  """
141
136
  Returns internal messages from the agent response.
142
137
  """
143
138
  pretty_print = "\n\n".join(
144
- [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
145
- for msg in self.response["internal_messages"]]
139
+ [
140
+ f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
141
+ for msg in self.response["internal_messages"]
142
+ ]
146
143
  )
147
144
  if markdown:
148
145
  return Markdown(pretty_print)
149
146
  else:
150
147
  return self.response["internal_messages"]
151
-
148
+
152
149
  def get_artifacts(self, as_dataframe: bool = False):
153
150
  """
154
151
  Returns the EDA artifacts from the agent response.
@@ -157,7 +154,7 @@ class EDAToolsAgent(BaseAgent):
157
154
  return pd.DataFrame(self.response["eda_artifacts"])
158
155
  else:
159
156
  return self.response["eda_artifacts"]
160
-
157
+
161
158
  def get_ai_message(self, markdown: bool = False):
162
159
  """
163
160
  Returns the AI message from the agent response.
@@ -166,13 +163,14 @@ class EDAToolsAgent(BaseAgent):
166
163
  return Markdown(self.response["messages"][0].content)
167
164
  else:
168
165
  return self.response["messages"][0].content
169
-
166
+
170
167
  def get_tool_calls(self):
171
168
  """
172
169
  Returns the tool calls made by the agent.
173
170
  """
174
171
  return self.response["tool_calls"]
175
172
 
173
+
176
174
  def make_eda_tools_agent(
177
175
  model: Any,
178
176
  create_react_agent_kwargs: Optional[Dict] = {},
@@ -181,7 +179,7 @@ def make_eda_tools_agent(
181
179
  ):
182
180
  """
183
181
  Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
184
-
182
+
185
183
  Parameters:
186
184
  ----------
187
185
  model : Any
@@ -192,13 +190,13 @@ def make_eda_tools_agent(
192
190
  Additional kwargs for agent invocation.
193
191
  checkpointer : Checkpointer, optional
194
192
  The checkpointer for the agent.
195
-
193
+
196
194
  Returns:
197
195
  -------
198
196
  app : langgraph.graph.CompiledStateGraph
199
197
  The compiled state graph for the EDA agent.
200
198
  """
201
-
199
+
202
200
  class GraphState(AgentState):
203
201
  internal_messages: Annotated[Sequence[BaseMessage], operator.add]
204
202
  user_instructions: str
@@ -209,11 +207,9 @@ def make_eda_tools_agent(
209
207
  def exploratory_agent(state):
210
208
  print(format_agent_name(AGENT_NAME))
211
209
  print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
212
-
213
- tool_node = ToolNode(
214
- tools=EDA_TOOLS
215
- )
216
-
210
+
211
+ tool_node = ToolNode(tools=EDA_TOOLS)
212
+
217
213
  eda_agent = create_react_agent(
218
214
  model,
219
215
  tools=tool_node,
@@ -221,7 +217,7 @@ def make_eda_tools_agent(
221
217
  **create_react_agent_kwargs,
222
218
  checkpointer=checkpointer,
223
219
  )
224
-
220
+
225
221
  response = eda_agent.invoke(
226
222
  {
227
223
  "messages": [("user", state["user_instructions"])],
@@ -229,13 +225,13 @@ def make_eda_tools_agent(
229
225
  },
230
226
  invoke_react_agent_kwargs,
231
227
  )
232
-
228
+
233
229
  print(" * POST-PROCESSING EDA RESULTS")
234
-
235
- internal_messages = response['messages']
230
+
231
+ internal_messages = response["messages"]
236
232
  if not internal_messages:
237
233
  return {"internal_messages": [], "eda_artifacts": None}
238
-
234
+
239
235
  last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
240
236
  last_tool_artifact = None
241
237
  if len(internal_messages) > 1:
@@ -244,24 +240,24 @@ def make_eda_tools_agent(
244
240
  last_tool_artifact = last_message.artifact
245
241
  elif isinstance(last_message, dict) and "artifact" in last_message:
246
242
  last_tool_artifact = last_message["artifact"]
247
-
243
+
248
244
  tool_calls = get_tool_call_names(internal_messages)
249
-
245
+
250
246
  return {
251
247
  "messages": [last_ai_message],
252
248
  "internal_messages": internal_messages,
253
249
  "eda_artifacts": last_tool_artifact,
254
250
  "tool_calls": tool_calls,
255
251
  }
256
-
252
+
257
253
  workflow = StateGraph(GraphState)
258
254
  workflow.add_node("exploratory_agent", exploratory_agent)
259
255
  workflow.add_edge(START, "exploratory_agent")
260
256
  workflow.add_edge("exploratory_agent", END)
261
-
257
+
262
258
  app = workflow.compile(
263
259
  checkpointer=checkpointer,
264
260
  name=AGENT_NAME,
265
261
  )
266
-
262
+
267
263
  return app