ai-data-science-team 0.0.0.9008__tar.gz → 0.0.0.9010__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. {ai_data_science_team-0.0.0.9008/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9010}/PKG-INFO +56 -24
  2. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/README.md +46 -23
  3. ai_data_science_team-0.0.0.9010/ai_data_science_team/_version.py +1 -0
  4. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/__init__.py +0 -1
  5. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/data_cleaning_agent.py +50 -39
  6. ai_data_science_team-0.0.0.9010/ai_data_science_team/agents/data_loader_tools_agent.py +69 -0
  7. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/data_visualization_agent.py +45 -50
  8. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/data_wrangling_agent.py +50 -49
  9. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/feature_engineering_agent.py +48 -67
  10. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/sql_database_agent.py +130 -76
  11. ai_data_science_team-0.0.0.9010/ai_data_science_team/ml_agents/__init__.py +2 -0
  12. ai_data_science_team-0.0.0.9010/ai_data_science_team/ml_agents/h2o_ml_agent.py +852 -0
  13. ai_data_science_team-0.0.0.9010/ai_data_science_team/ml_agents/mlflow_tools_agent.py +327 -0
  14. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/multiagents/sql_data_analyst.py +120 -9
  15. {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/parsers}/parsers.py +0 -1
  16. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/templates/__init__.py +1 -0
  17. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/templates/agent_templates.py +78 -7
  18. ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/data_loader.py +378 -0
  19. ai_data_science_team-0.0.0.9008/ai_data_science_team/tools/metadata.py → ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/dataframe.py +0 -91
  20. ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/h2o.py +643 -0
  21. ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/mlflow.py +961 -0
  22. ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/sql.py +126 -0
  23. ai_data_science_team-0.0.0.9010/ai_data_science_team/utils/__init__.py +0 -0
  24. {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/utils}/regex.py +59 -1
  25. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010/ai_data_science_team.egg-info}/PKG-INFO +56 -24
  26. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/SOURCES.txt +14 -5
  27. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/requires.txt +10 -0
  28. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/requirements.txt +2 -0
  29. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/setup.py +4 -0
  30. ai_data_science_team-0.0.0.9008/ai_data_science_team/_version.py +0 -1
  31. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/LICENSE +0 -0
  32. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/MANIFEST.in +0 -0
  33. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/__init__.py +0 -0
  34. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/multiagents/__init__.py +0 -0
  35. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
  36. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/orchestration.py +0 -0
  37. {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/parsers}/__init__.py +0 -0
  38. {ai_data_science_team-0.0.0.9008/ai_data_science_team/utils → ai_data_science_team-0.0.0.9010/ai_data_science_team/tools}/__init__.py +0 -0
  39. {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/utils}/logging.py +0 -0
  40. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/utils/plotly.py +0 -0
  41. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
  42. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/top_level.txt +0 -0
  43. {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9008
3
+ Version: 0.0.0.9010
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -21,17 +21,26 @@ Requires-Dist: langchain_experimental
21
21
  Requires-Dist: langgraph>=0.2.57
22
22
  Requires-Dist: openai
23
23
  Requires-Dist: pandas
24
+ Requires-Dist: sqlalchemy
24
25
  Requires-Dist: numpy
25
26
  Requires-Dist: plotly
26
27
  Requires-Dist: streamlit
27
28
  Requires-Dist: scikit-learn
28
29
  Requires-Dist: xgboost
30
+ Requires-Dist: psutil
31
+ Provides-Extra: machine-learning
32
+ Requires-Dist: h2o; extra == "machine-learning"
33
+ Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: all
35
+ Requires-Dist: h2o; extra == "all"
36
+ Requires-Dist: mlflow; extra == "all"
29
37
  Dynamic: author
30
38
  Dynamic: author-email
31
39
  Dynamic: classifier
32
40
  Dynamic: description
33
41
  Dynamic: description-content-type
34
42
  Dynamic: home-page
43
+ Dynamic: provides-extra
35
44
  Dynamic: requires-dist
36
45
  Dynamic: requires-python
37
46
  Dynamic: summary
@@ -39,7 +48,7 @@ Dynamic: summary
39
48
  <div align="center">
40
49
  <a href="https://github.com/business-science/ai-data-science-team">
41
50
  <picture>
42
- <img src="/img/ai_data_science_team_logo.jpg" alt="AI Data Science Team" width="400">
51
+ <img src="/img/ai_data_science_team_logo_small.jpg" alt="AI Data Science Team" width="400">
43
52
  </picture>
44
53
  </a>
45
54
  </div>
@@ -47,13 +56,13 @@ Dynamic: summary
47
56
  <em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
48
57
  </div>
49
58
  <div align="center">
50
- <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg" alt="PyPI"></a>
51
- <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg" alt="versions"></a>
52
- <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?v" alt="license"></a>
59
+ <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
+ <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
+ <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
53
62
  </div>
54
63
 
55
64
 
56
- # Your AI Data Science Team (An Army Of Agents)
65
+ # Your AI Data Science Team (🪖 An Army Of Agents)
57
66
 
58
67
  **An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
59
68
 
@@ -74,14 +83,19 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
74
83
 
75
84
  ## Table of Contents
76
85
 
77
- - [Your AI Data Science Team (An Army Of Agents)](#your-ai-data-science-team-an-army-of-agents)
86
+ - [Your AI Data Science Team (🪖 An Army Of Agents)](#your-ai-data-science-team--an-army-of-agents)
78
87
  - [Table of Contents](#table-of-contents)
79
88
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
80
- - [Free How To Build AI Agents for Data Scientists Workshop](#free-how-to-build-ai-agents-for-data-scientists-workshop)
89
+ - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
81
90
  - [Data Science Agents](#data-science-agents)
82
- - [Coming Soon: Multi-Agents](#coming-soon-multi-agents)
83
- - [...And after that, the Multi-Agent Data Science Apps](#and-after-that-the-multi-agent-data-science-apps)
91
+ - [NEW: Multi-Agents](#new-multi-agents)
92
+ - [Data Science Apps](#data-science-apps)
93
+ - [Apps Available Now](#apps-available-now)
94
+ - [🔥 Agentic Applications](#-agentic-applications)
84
95
  - [Agents Available Now](#agents-available-now)
96
+ - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
97
+ - [Data Science Agents](#data-science-agents-1)
98
+ - [Multi-Agents](#multi-agents)
85
99
  - [Agents Coming Soon](#agents-coming-soon)
86
100
  - [Disclaimer](#disclaimer)
87
101
  - [Installation](#installation)
@@ -94,11 +108,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
94
108
 
95
109
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
96
110
 
97
- Want to have your own _customized_ enterprise-grade AI Data Science Team and domain-specifici AI-powered Apps?
111
+ Want to have your own _customized_ enterprise-grade AI Data Science Team and *domain-specific* AI-powered Apps?
98
112
 
99
113
  **Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
100
114
 
101
- ## Free How To Build AI Agents for Data Scientists Workshop
115
+ ## Generative AI for Data Scientists Workshop
102
116
 
103
117
  If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
104
118
 
@@ -110,32 +124,50 @@ This project is a work in progress. New data science agents will be released soo
110
124
 
111
125
  ![Data Science Team](/img/ai_data_science_team.jpg)
112
126
 
113
- ### Coming Soon: Multi-Agents
127
+ ### NEW: Multi-Agents
114
128
 
115
- This is the internals of the Business Intelligence SQL Agent I'm working on:
129
+ This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
116
130
 
117
131
  ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
118
132
 
119
- ### ...And after that, the Multi-Agent Data Science Apps
133
+ ### Data Science Apps
120
134
 
121
135
  This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
122
136
 
123
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
137
+ ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
138
+
139
+ ### Apps Available Now
140
+
141
+ [See all available apps here](/apps)
142
+
143
+ #### 🔥 Agentic Applications
144
+
145
+ 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
124
146
 
125
147
  ### Agents Available Now
126
148
 
127
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
128
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
129
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
130
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
131
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
149
+ #### 🔥🔥 NEW! Machine Learning Agents
150
+
151
+ 1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
152
+ 2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
153
+
154
+ #### Data Science Agents
155
+
156
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
157
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
158
+ 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
159
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
160
+ 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
161
+
162
+ #### Multi-Agents
163
+
164
+ 1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
132
165
 
133
166
  ### Agents Coming Soon
134
167
 
135
168
  1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
136
- 2. **Machine Learning Agent:** Builds and logs the machine learning models.
137
- 3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
138
- 4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
169
+ 2. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
170
+ 3. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
139
171
 
140
172
  ## Disclaimer
141
173
 
@@ -1,7 +1,7 @@
1
1
  <div align="center">
2
2
  <a href="https://github.com/business-science/ai-data-science-team">
3
3
  <picture>
4
- <img src="/img/ai_data_science_team_logo.jpg" alt="AI Data Science Team" width="400">
4
+ <img src="/img/ai_data_science_team_logo_small.jpg" alt="AI Data Science Team" width="400">
5
5
  </picture>
6
6
  </a>
7
7
  </div>
@@ -9,13 +9,13 @@
9
9
  <em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
10
10
  </div>
11
11
  <div align="center">
12
- <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg" alt="PyPI"></a>
13
- <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg" alt="versions"></a>
14
- <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?v" alt="license"></a>
12
+ <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
13
+ <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
14
+ <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
15
15
  </div>
16
16
 
17
17
 
18
- # Your AI Data Science Team (An Army Of Agents)
18
+ # Your AI Data Science Team (🪖 An Army Of Agents)
19
19
 
20
20
  **An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
21
21
 
@@ -36,14 +36,19 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
36
36
 
37
37
  ## Table of Contents
38
38
 
39
- - [Your AI Data Science Team (An Army Of Agents)](#your-ai-data-science-team-an-army-of-agents)
39
+ - [Your AI Data Science Team (🪖 An Army Of Agents)](#your-ai-data-science-team--an-army-of-agents)
40
40
  - [Table of Contents](#table-of-contents)
41
41
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
42
- - [Free How To Build AI Agents for Data Scientists Workshop](#free-how-to-build-ai-agents-for-data-scientists-workshop)
42
+ - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
43
43
  - [Data Science Agents](#data-science-agents)
44
- - [Coming Soon: Multi-Agents](#coming-soon-multi-agents)
45
- - [...And after that, the Multi-Agent Data Science Apps](#and-after-that-the-multi-agent-data-science-apps)
44
+ - [NEW: Multi-Agents](#new-multi-agents)
45
+ - [Data Science Apps](#data-science-apps)
46
+ - [Apps Available Now](#apps-available-now)
47
+ - [🔥 Agentic Applications](#-agentic-applications)
46
48
  - [Agents Available Now](#agents-available-now)
49
+ - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
50
+ - [Data Science Agents](#data-science-agents-1)
51
+ - [Multi-Agents](#multi-agents)
47
52
  - [Agents Coming Soon](#agents-coming-soon)
48
53
  - [Disclaimer](#disclaimer)
49
54
  - [Installation](#installation)
@@ -56,11 +61,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
56
61
 
57
62
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
58
63
 
59
- Want to have your own _customized_ enterprise-grade AI Data Science Team and domain-specifici AI-powered Apps?
64
+ Want to have your own _customized_ enterprise-grade AI Data Science Team and *domain-specific* AI-powered Apps?
60
65
 
61
66
  **Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
62
67
 
63
- ## Free How To Build AI Agents for Data Scientists Workshop
68
+ ## Generative AI for Data Scientists Workshop
64
69
 
65
70
  If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
66
71
 
@@ -72,32 +77,50 @@ This project is a work in progress. New data science agents will be released soo
72
77
 
73
78
  ![Data Science Team](/img/ai_data_science_team.jpg)
74
79
 
75
- ### Coming Soon: Multi-Agents
80
+ ### NEW: Multi-Agents
76
81
 
77
- This is the internals of the Business Intelligence SQL Agent I'm working on:
82
+ This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
78
83
 
79
84
  ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
80
85
 
81
- ### ...And after that, the Multi-Agent Data Science Apps
86
+ ### Data Science Apps
82
87
 
83
88
  This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
84
89
 
85
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
90
+ ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
91
+
92
+ ### Apps Available Now
93
+
94
+ [See all available apps here](/apps)
95
+
96
+ #### 🔥 Agentic Applications
97
+
98
+ 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
86
99
 
87
100
  ### Agents Available Now
88
101
 
89
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
90
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
91
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
92
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
93
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
102
+ #### 🔥🔥 NEW! Machine Learning Agents
103
+
104
+ 1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
105
+ 2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
106
+
107
+ #### Data Science Agents
108
+
109
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
110
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
111
+ 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
112
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
113
+ 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
114
+
115
+ #### Multi-Agents
116
+
117
+ 1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
94
118
 
95
119
  ### Agents Coming Soon
96
120
 
97
121
  1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
98
- 2. **Machine Learning Agent:** Builds and logs the machine learning models.
99
- 3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
100
- 4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
122
+ 2. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
123
+ 3. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
101
124
 
102
125
  ## Disclaimer
103
126
 
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0.9010"
@@ -3,4 +3,3 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
-
@@ -14,7 +14,7 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
17
+ import json
18
18
  import pandas as pd
19
19
 
20
20
  from IPython.display import Markdown
@@ -23,21 +23,26 @@ from ai_data_science_team.templates import(
23
23
  node_func_execute_agent_code_on_data,
24
24
  node_func_human_review,
25
25
  node_func_fix_agent_code,
26
- node_func_explain_agent_code,
26
+ node_func_report_agent_outputs,
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name, format_recommended_steps
32
- from ai_data_science_team.tools.metadata import get_dataframe_summary
33
- from ai_data_science_team.tools.logging import log_ai_function
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
+ relocate_imports_inside_function,
33
+ add_comments_to_top,
34
+ format_agent_name,
35
+ format_recommended_steps,
36
+ get_generic_summary,
37
+ )
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
34
40
 
35
41
  # Setup
36
42
  AGENT_NAME = "data_cleaning_agent"
37
43
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
38
44
 
39
45
 
40
-
41
46
  # Class
42
47
  class DataCleaningAgent(BaseAgent):
43
48
  """
@@ -89,8 +94,8 @@ class DataCleaningAgent(BaseAgent):
89
94
  Cleans the provided dataset asynchronously based on user instructions.
90
95
  invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
91
96
  Cleans the provided dataset synchronously based on user instructions.
92
- explain_cleaning_steps()
93
- Returns an explanation of the cleaning steps performed by the agent.
97
+ get_workflow_summary()
98
+ Retrieves a summary of the agent's workflow.
94
99
  get_log_summary()
95
100
  Retrieves a summary of logged operations if logging is enabled.
96
101
  get_state_keys()
@@ -178,8 +183,7 @@ class DataCleaningAgent(BaseAgent):
178
183
  self.response=None
179
184
  return make_data_cleaning_agent(**self._params)
180
185
 
181
-
182
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
186
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
183
187
  """
184
188
  Asynchronously invokes the agent. The response is stored in the response attribute.
185
189
 
@@ -200,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
200
204
  --------
201
205
  None. The response is stored in the response attribute.
202
206
  """
203
- response = self._compiled_graph.ainvoke({
207
+ response = await self._compiled_graph.ainvoke({
204
208
  "user_instructions": user_instructions,
205
209
  "data_raw": data_raw.to_dict(),
206
210
  "max_retries": max_retries,
@@ -239,15 +243,16 @@ class DataCleaningAgent(BaseAgent):
239
243
  self.response = response
240
244
  return None
241
245
 
242
- def explain_cleaning_steps(self):
246
+ def get_workflow_summary(self, markdown=False):
243
247
  """
244
- Provides an explanation of the cleaning steps performed by the agent.
245
-
246
- Returns:
247
- str: Explanation of the cleaning steps.
248
+ Retrieves the agent's workflow summary, if logging is enabled.
248
249
  """
249
- messages = self.response.get("messages", [])
250
- return messages
250
+ if self.response and self.response.get("messages"):
251
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
252
+ if markdown:
253
+ return Markdown(summary)
254
+ else:
255
+ return summary
251
256
 
252
257
  def get_log_summary(self, markdown=False):
253
258
  """
@@ -255,7 +260,13 @@ class DataCleaningAgent(BaseAgent):
255
260
  """
256
261
  if self.response:
257
262
  if self.response.get('data_cleaner_function_path'):
258
- log_details = f"Log Path: {self.response.get('data_cleaner_function_path')}"
263
+ log_details = f"""
264
+ ## Data Cleaning Agent Log Summary:
265
+
266
+ Function Path: {self.response.get('data_cleaner_function_path')}
267
+
268
+ Function Name: {self.response.get('data_cleaner_function_name')}
269
+ """
259
270
  if markdown:
260
271
  return Markdown(log_details)
261
272
  else:
@@ -462,7 +473,7 @@ def make_data_cleaning_agent(
462
473
  Below are summaries of all datasets provided:
463
474
  {all_datasets_summary}
464
475
 
465
- Return the steps as a bullet point list (no code, just the steps).
476
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
466
477
 
467
478
  Avoid these:
468
479
  1. Do not include steps to save files.
@@ -633,32 +644,31 @@ def make_data_cleaning_agent(
633
644
  function_name=state.get("data_cleaner_function_name"),
634
645
  )
635
646
 
636
- def explain_data_cleaner_code(state: GraphState):
637
- return node_func_explain_agent_code(
647
+ # Final reporting node
648
+ def report_agent_outputs(state: GraphState):
649
+ return node_func_report_agent_outputs(
638
650
  state=state,
639
- code_snippet_key="data_cleaner_function",
651
+ keys_to_include=[
652
+ "recommended_steps",
653
+ "data_cleaner_function",
654
+ "data_cleaner_function_path",
655
+ "data_cleaner_function_name",
656
+ "data_cleaner_error",
657
+ ],
640
658
  result_key="messages",
641
- error_key="data_cleaner_error",
642
- llm=llm,
643
659
  role=AGENT_NAME,
644
- explanation_prompt_template="""
645
- Explain the data cleaning steps that the data cleaning agent performed in this function.
646
- Keep the summary succinct and to the point.\n\n# Data Cleaning Agent:\n\n{code}
647
- """,
648
- success_prefix="# Data Cleaning Agent:\n\n ",
649
- error_message="The Data Cleaning Agent encountered an error during data cleaning. Data could not be explained."
660
+ custom_title="Data Cleaning Agent Outputs"
650
661
  )
651
-
652
- # Define the graph
662
+
653
663
  node_functions = {
654
664
  "recommend_cleaning_steps": recommend_cleaning_steps,
655
665
  "human_review": human_review,
656
666
  "create_data_cleaner_code": create_data_cleaner_code,
657
667
  "execute_data_cleaner_code": execute_data_cleaner_code,
658
668
  "fix_data_cleaner_code": fix_data_cleaner_code,
659
- "explain_data_cleaner_code": explain_data_cleaner_code
669
+ "report_agent_outputs": report_agent_outputs,
660
670
  }
661
-
671
+
662
672
  app = create_coding_agent_graph(
663
673
  GraphState=GraphState,
664
674
  node_functions=node_functions,
@@ -666,16 +676,17 @@ def make_data_cleaning_agent(
666
676
  create_code_node_name="create_data_cleaner_code",
667
677
  execute_code_node_name="execute_data_cleaner_code",
668
678
  fix_code_node_name="fix_data_cleaner_code",
669
- explain_code_node_name="explain_data_cleaner_code",
679
+ explain_code_node_name="report_agent_outputs",
670
680
  error_key="data_cleaner_error",
671
- human_in_the_loop=human_in_the_loop, # or False
681
+ human_in_the_loop=human_in_the_loop,
672
682
  human_review_node_name="human_review",
673
683
  checkpointer=MemorySaver() if human_in_the_loop else None,
674
684
  bypass_recommended_steps=bypass_recommended_steps,
675
685
  bypass_explain_code=bypass_explain_code,
676
686
  )
677
-
687
+
678
688
  return app
689
+
679
690
 
680
691
 
681
692
 
@@ -0,0 +1,69 @@
1
+
2
+
3
+
4
+ from typing import Any, Optional, Annotated, Sequence, List, Dict
5
+ import operator
6
+
7
+ import pandas as pd
8
+ import os
9
+
10
+ from IPython.display import Markdown
11
+
12
+ from langchain_core.messages import BaseMessage, AIMessage
13
+
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+ from ai_data_science_team.tools.data_loader import (
21
+ load_directory,
22
+ load_file,
23
+ list_directory_contents,
24
+ list_directory_recursive,
25
+ get_file_info,
26
+ search_files_by_pattern,
27
+ )
28
+
29
+ AGENT_NAME = "data_loader_tools_agent"
30
+
31
+ tools = [
32
+ load_directory,
33
+ load_file,
34
+ list_directory_contents,
35
+ list_directory_recursive,
36
+ get_file_info,
37
+ search_files_by_pattern,
38
+ ]
39
+
40
+
41
+
42
+ def make_data_loader_tools_agent(
43
+ model: Any,
44
+ directory: Optional[str] = os.getcwd(),
45
+ ):
46
+ """
47
+ Creates a Data Loader Agent that can interact with data loading tools.
48
+
49
+ Parameters:
50
+ ----------
51
+ model : langchain.llms.base.LLM
52
+ The language model used to generate the tool calling agent.
53
+ directory : str, optional
54
+ The directory to search for files. Defaults to the current working directory.
55
+
56
+ Returns:
57
+ --------
58
+ Data Loader Agent
59
+ An agent that can interact with data loading tools.
60
+ """
61
+
62
+ class GraphState(AgentState):
63
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
64
+ directory: str
65
+ user_instructions: str
66
+ data_artifacts: dict
67
+
68
+ pass
69
+