ai-data-science-team 0.0.0.9014__tar.gz → 0.0.0.9016__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {ai_data_science_team-0.0.0.9014/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9016}/PKG-INFO +64 -57
  2. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/README.md +61 -55
  3. ai_data_science_team-0.0.0.9016/ai_data_science_team/_version.py +1 -0
  4. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_visualization_agent.py +172 -129
  5. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_wrangling_agent.py +1 -0
  6. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/eda_tools_agent.py +46 -50
  7. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/pandas_data_analyst.py +5 -5
  8. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/sql_data_analyst.py +7 -18
  9. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/eda.py +123 -60
  10. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016/ai_data_science_team.egg-info}/PKG-INFO +64 -57
  11. ai_data_science_team-0.0.0.9014/ai_data_science_team/_version.py +0 -1
  12. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/LICENSE +0 -0
  13. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/MANIFEST.in +0 -0
  14. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/__init__.py +0 -0
  15. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/__init__.py +0 -0
  16. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_cleaning_agent.py +0 -0
  17. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/data_loader_tools_agent.py +0 -0
  18. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/feature_engineering_agent.py +0 -0
  19. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/agents/sql_database_agent.py +0 -0
  20. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/__init__.py +0 -0
  21. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ds_agents/modeling_tools_agent.py +0 -0
  22. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/__init__.py +0 -0
  23. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/h2o_ml_agent.py +0 -0
  24. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  25. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/ml_agents/mlflow_tools_agent.py +0 -0
  26. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/__init__.py +0 -0
  27. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
  28. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/orchestration.py +0 -0
  29. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/parsers/__init__.py +0 -0
  30. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/parsers/parsers.py +0 -0
  31. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/templates/__init__.py +0 -0
  32. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/templates/agent_templates.py +0 -0
  33. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/__init__.py +0 -0
  34. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/data_loader.py +0 -0
  35. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/dataframe.py +0 -0
  36. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/h2o.py +0 -0
  37. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/mlflow.py +0 -0
  38. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/tools/sql.py +0 -0
  39. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/__init__.py +0 -0
  40. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/html.py +0 -0
  41. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/logging.py +0 -0
  42. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/matplotlib.py +0 -0
  43. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/messages.py +0 -0
  44. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/plotly.py +0 -0
  45. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team/utils/regex.py +0 -0
  46. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/SOURCES.txt +0 -0
  47. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
  48. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/requires.txt +0 -0
  49. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/ai_data_science_team.egg-info/top_level.txt +0 -0
  50. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/requirements.txt +0 -0
  51. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/setup.cfg +0 -0
  52. {ai_data_science_team-0.0.0.9014 → ai_data_science_team-0.0.0.9016}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9014
3
+ Version: 0.0.0.9016
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -47,6 +47,7 @@ Dynamic: classifier
47
47
  Dynamic: description
48
48
  Dynamic: description-content-type
49
49
  Dynamic: home-page
50
+ Dynamic: license-file
50
51
  Dynamic: provides-extra
51
52
  Dynamic: requires-dist
52
53
  Dynamic: requires-python
@@ -97,9 +98,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
97
98
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
98
99
  - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
99
100
  - [Data Science Agents](#data-science-agents)
101
+ - [🔥 NEW: Data Science Apps](#-new-data-science-apps)
100
102
  - [NEW: Multi-Agents](#new-multi-agents)
101
- - [Data Science Apps](#data-science-apps)
102
- - [Apps Available Now](#apps-available-now)
103
103
  - [🔥 Agentic Applications](#-agentic-applications)
104
104
  - [Agents Available Now](#agents-available-now)
105
105
  - [Standard Agents](#standard-agents)
@@ -110,11 +110,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
110
110
  - [Disclaimer](#disclaimer)
111
111
  - [Installation](#installation)
112
112
  - [Usage](#usage)
113
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
114
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
113
+ - [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
115
114
  - [Contributing](#contributing)
116
115
  - [License](#license)
117
116
  - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
117
+ - [⭐️ Star History](#️-star-history)
118
118
 
119
119
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
120
120
 
@@ -134,21 +134,24 @@ This project is a work in progress. New data science agents will be released soo
134
134
 
135
135
  ![AI Data Science Team](/img/ai_data_science_team.jpg)
136
136
 
137
- ### NEW: Multi-Agents
137
+ ### 🔥 NEW: Data Science Apps
138
138
 
139
- This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
139
+ **🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
140
140
 
141
- ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
141
+ ![Pandas Data Analyst App](/img/apps/ai_pandas_data_analyst_app.jpg)
142
142
 
143
- ### Data Science Apps
143
+ **🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
144
144
 
145
- This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
145
+ **🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
146
146
 
147
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
147
+ [See all available apps here](/apps)
148
148
 
149
- ### Apps Available Now
149
+ ### NEW: Multi-Agents
150
+
151
+ **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
152
+
153
+ ![Pandas Data Analyst Agent](/img/multi_agent_pandas_data_analyst.jpg)
150
154
 
151
- [See all available apps here](/apps)
152
155
 
153
156
  #### 🔥 Agentic Applications
154
157
 
@@ -182,7 +185,8 @@ This is a top secret project I'm working on. It's a multi-agent data science app
182
185
 
183
186
  #### Multi-Agents
184
187
 
185
- 1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
188
+ 1. **🔥🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/pandas_data_analyst.ipynb)
189
+ 2. **🔥🔥 SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
186
190
 
187
191
  ### Agents Coming Soon
188
192
 
@@ -204,6 +208,14 @@ By using this software, you agree to use it solely for learning purposes.
204
208
 
205
209
  ## Installation
206
210
 
211
+ You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
212
+
213
+ ``` bash
214
+ pip install ai-data-science-team
215
+ ```
216
+
217
+ Or, if you want the latest version from GitHub:
218
+
207
219
  ``` bash
208
220
  pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
209
221
  ```
@@ -212,55 +224,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
212
224
 
213
225
  [See all examples here.](/examples)
214
226
 
215
- ### Example 1: Feature Engineering with the Feature Engineering Agent
227
+ ### Example: H2O Machine Learning Agent
216
228
 
217
- [See the full example here.](/examples/feature_engineering_agent.ipynb)
229
+ [See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
218
230
 
219
231
  ``` python
220
- feature_engineering_agent = FeatureEngineeringAgent(model = llm)
221
-
222
- feature_engineering_agent.invoke_agent(
223
- data_raw = df,
224
- user_instructions = "Make sure to scale and center numeric features",
225
- target_variable = "Churn",
226
- max_retries = 3,
232
+ # Import libraries
233
+ from langchain_openai import ChatOpenAI
234
+ import pandas as pd
235
+ import h2o
236
+ import os
237
+ from ai_data_science_team.ml_agents import H2OMLAgent
238
+
239
+ # Load the data
240
+ df = pd.read_csv("data/churn_data.csv")
241
+ df
242
+
243
+ # Initialize the language model
244
+ os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
245
+ llm = ChatOpenAI(model=MODEL)
246
+ llm
247
+
248
+ # Initialize the H2O ML Agent
249
+ ml_agent = H2OMLAgent(
250
+ model=llm,
251
+ log=True,
252
+ log_path="logs/",
253
+ model_directory="h2o_models/",
254
+ enable_mlflow=True, # Use this if you wish to log models to MLflow
227
255
  )
228
- ```
229
-
230
- ``` bash
231
- ---FEATURE ENGINEERING AGENT----
232
- * CREATE FEATURE ENGINEER CODE
233
- * EXECUTING AGENT CODE
234
- * EXPLAIN AGENT CODE
235
- ```
236
-
237
- ``` python
238
- feature_engineering_agent.get_data_engineered()
239
- ```
240
-
241
- ### Example 2: Cleaning Data with the Data Cleaning Agent
242
-
243
- [See the full example here.](/examples/data_cleaning_agent.ipynb)
244
-
245
- ``` python
246
- data_cleaning_agent = DataCleaningAgent(model = llm)
256
+ ml_agent
247
257
 
248
- response = data_cleaning_agent.invoke_agent(
249
- data_raw = df,
250
- user_instructions = "Don't remove outliers when cleaning the data.",
251
- max_retries = 3,
258
+ # Run the agent
259
+ ml_agent.invoke_agent(
260
+ data_raw=df.drop(columns=["customerID"]),
261
+ user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
262
+ target_variable="Churn"
252
263
  )
253
- ```
254
264
 
255
- ``` bash
256
- ---DATA CLEANING AGENT----
257
- * CREATE DATA CLEANER CODE
258
- * EXECUTING AGENT CODE
259
- * EXPLAIN AGENT CODE
260
- ```
261
-
262
- ``` python
263
- data_cleaning_agent.get_data_cleaned()
265
+ # Retrieve and display the leaderboard of models
266
+ ml_agent.get_leaderboard()
264
267
  ```
265
268
 
266
269
  ## Contributing
@@ -281,4 +284,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
281
284
 
282
285
  I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
283
286
 
287
+ # ⭐️ Star History
288
+
289
+ [![Star History Chart](https://api.star-history.com/svg?repos=business-science/ai-data-science-team&type=Date)](https://star-history.com/#)
284
290
 
291
+ [**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
@@ -43,9 +43,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
43
43
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
44
44
  - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
45
45
  - [Data Science Agents](#data-science-agents)
46
+ - [🔥 NEW: Data Science Apps](#-new-data-science-apps)
46
47
  - [NEW: Multi-Agents](#new-multi-agents)
47
- - [Data Science Apps](#data-science-apps)
48
- - [Apps Available Now](#apps-available-now)
49
48
  - [🔥 Agentic Applications](#-agentic-applications)
50
49
  - [Agents Available Now](#agents-available-now)
51
50
  - [Standard Agents](#standard-agents)
@@ -56,11 +55,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
56
55
  - [Disclaimer](#disclaimer)
57
56
  - [Installation](#installation)
58
57
  - [Usage](#usage)
59
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
60
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
58
+ - [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
61
59
  - [Contributing](#contributing)
62
60
  - [License](#license)
63
61
  - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
62
+ - [⭐️ Star History](#️-star-history)
64
63
 
65
64
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
66
65
 
@@ -80,21 +79,24 @@ This project is a work in progress. New data science agents will be released soo
80
79
 
81
80
  ![AI Data Science Team](/img/ai_data_science_team.jpg)
82
81
 
83
- ### NEW: Multi-Agents
82
+ ### 🔥 NEW: Data Science Apps
84
83
 
85
- This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
84
+ **🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
86
85
 
87
- ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
86
+ ![Pandas Data Analyst App](/img/apps/ai_pandas_data_analyst_app.jpg)
88
87
 
89
- ### Data Science Apps
88
+ **🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
90
89
 
91
- This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
90
+ **🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
92
91
 
93
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
92
+ [See all available apps here](/apps)
94
93
 
95
- ### Apps Available Now
94
+ ### NEW: Multi-Agents
95
+
96
+ **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
97
+
98
+ ![Pandas Data Analyst Agent](/img/multi_agent_pandas_data_analyst.jpg)
96
99
 
97
- [See all available apps here](/apps)
98
100
 
99
101
  #### 🔥 Agentic Applications
100
102
 
@@ -128,7 +130,8 @@ This is a top secret project I'm working on. It's a multi-agent data science app
128
130
 
129
131
  #### Multi-Agents
130
132
 
131
- 1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
133
+ 1. **🔥🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/pandas_data_analyst.ipynb)
134
+ 2. **🔥🔥 SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
132
135
 
133
136
  ### Agents Coming Soon
134
137
 
@@ -150,6 +153,14 @@ By using this software, you agree to use it solely for learning purposes.
150
153
 
151
154
  ## Installation
152
155
 
156
+ You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
157
+
158
+ ``` bash
159
+ pip install ai-data-science-team
160
+ ```
161
+
162
+ Or, if you want the latest version from GitHub:
163
+
153
164
  ``` bash
154
165
  pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
155
166
  ```
@@ -158,55 +169,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
158
169
 
159
170
  [See all examples here.](/examples)
160
171
 
161
- ### Example 1: Feature Engineering with the Feature Engineering Agent
172
+ ### Example: H2O Machine Learning Agent
162
173
 
163
- [See the full example here.](/examples/feature_engineering_agent.ipynb)
174
+ [See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
164
175
 
165
176
  ``` python
166
- feature_engineering_agent = FeatureEngineeringAgent(model = llm)
167
-
168
- feature_engineering_agent.invoke_agent(
169
- data_raw = df,
170
- user_instructions = "Make sure to scale and center numeric features",
171
- target_variable = "Churn",
172
- max_retries = 3,
177
+ # Import libraries
178
+ from langchain_openai import ChatOpenAI
179
+ import pandas as pd
180
+ import h2o
181
+ import os
182
+ from ai_data_science_team.ml_agents import H2OMLAgent
183
+
184
+ # Load the data
185
+ df = pd.read_csv("data/churn_data.csv")
186
+ df
187
+
188
+ # Initialize the language model
189
+ os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
190
+ llm = ChatOpenAI(model=MODEL)
191
+ llm
192
+
193
+ # Initialize the H2O ML Agent
194
+ ml_agent = H2OMLAgent(
195
+ model=llm,
196
+ log=True,
197
+ log_path="logs/",
198
+ model_directory="h2o_models/",
199
+ enable_mlflow=True, # Use this if you wish to log models to MLflow
173
200
  )
174
- ```
175
-
176
- ``` bash
177
- ---FEATURE ENGINEERING AGENT----
178
- * CREATE FEATURE ENGINEER CODE
179
- * EXECUTING AGENT CODE
180
- * EXPLAIN AGENT CODE
181
- ```
182
-
183
- ``` python
184
- feature_engineering_agent.get_data_engineered()
185
- ```
186
-
187
- ### Example 2: Cleaning Data with the Data Cleaning Agent
188
-
189
- [See the full example here.](/examples/data_cleaning_agent.ipynb)
190
-
191
- ``` python
192
- data_cleaning_agent = DataCleaningAgent(model = llm)
201
+ ml_agent
193
202
 
194
- response = data_cleaning_agent.invoke_agent(
195
- data_raw = df,
196
- user_instructions = "Don't remove outliers when cleaning the data.",
197
- max_retries = 3,
203
+ # Run the agent
204
+ ml_agent.invoke_agent(
205
+ data_raw=df.drop(columns=["customerID"]),
206
+ user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
207
+ target_variable="Churn"
198
208
  )
199
- ```
200
209
 
201
- ``` bash
202
- ---DATA CLEANING AGENT----
203
- * CREATE DATA CLEANER CODE
204
- * EXECUTING AGENT CODE
205
- * EXPLAIN AGENT CODE
206
- ```
207
-
208
- ``` python
209
- data_cleaning_agent.get_data_cleaned()
210
+ # Retrieve and display the leaderboard of models
211
+ ml_agent.get_leaderboard()
210
212
  ```
211
213
 
212
214
  ## Contributing
@@ -227,4 +229,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
227
229
 
228
230
  I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
229
231
 
232
+ # ⭐️ Star History
233
+
234
+ [![Star History Chart](https://api.star-history.com/svg?repos=business-science/ai-data-science-team&type=Date)](https://star-history.com/#)
230
235
 
236
+ [**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0.9016"