ai-data-science-team 0.0.0.9008__tar.gz → 0.0.0.9010__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {ai_data_science_team-0.0.0.9008/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9010}/PKG-INFO +56 -24
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/README.md +46 -23
- ai_data_science_team-0.0.0.9010/ai_data_science_team/_version.py +1 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/__init__.py +0 -1
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/data_cleaning_agent.py +50 -39
- ai_data_science_team-0.0.0.9010/ai_data_science_team/agents/data_loader_tools_agent.py +69 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/data_visualization_agent.py +45 -50
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/data_wrangling_agent.py +50 -49
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/feature_engineering_agent.py +48 -67
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/agents/sql_database_agent.py +130 -76
- ai_data_science_team-0.0.0.9010/ai_data_science_team/ml_agents/__init__.py +2 -0
- ai_data_science_team-0.0.0.9010/ai_data_science_team/ml_agents/h2o_ml_agent.py +852 -0
- ai_data_science_team-0.0.0.9010/ai_data_science_team/ml_agents/mlflow_tools_agent.py +327 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/multiagents/sql_data_analyst.py +120 -9
- {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/parsers}/parsers.py +0 -1
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/templates/__init__.py +1 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/templates/agent_templates.py +78 -7
- ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/data_loader.py +378 -0
- ai_data_science_team-0.0.0.9008/ai_data_science_team/tools/metadata.py → ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/dataframe.py +0 -91
- ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/h2o.py +643 -0
- ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/mlflow.py +961 -0
- ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/sql.py +126 -0
- ai_data_science_team-0.0.0.9010/ai_data_science_team/utils/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/utils}/regex.py +59 -1
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010/ai_data_science_team.egg-info}/PKG-INFO +56 -24
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/SOURCES.txt +14 -5
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/requires.txt +10 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/requirements.txt +2 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/setup.py +4 -0
- ai_data_science_team-0.0.0.9008/ai_data_science_team/_version.py +0 -1
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/MANIFEST.in +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/multiagents/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/orchestration.py +0 -0
- {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/parsers}/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9008/ai_data_science_team/utils → ai_data_science_team-0.0.0.9010/ai_data_science_team/tools}/__init__.py +0 -0
- {ai_data_science_team-0.0.0.9008/ai_data_science_team/tools → ai_data_science_team-0.0.0.9010/ai_data_science_team/utils}/logging.py +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team/utils/plotly.py +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/ai_data_science_team.egg-info/top_level.txt +0 -0
- {ai_data_science_team-0.0.0.9008 → ai_data_science_team-0.0.0.9010}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ai-data-science-team
|
3
|
-
Version: 0.0.0.
|
3
|
+
Version: 0.0.0.9010
|
4
4
|
Summary: Build and run an AI-powered data science team.
|
5
5
|
Home-page: https://github.com/business-science/ai-data-science-team
|
6
6
|
Author: Matt Dancho
|
@@ -21,17 +21,26 @@ Requires-Dist: langchain_experimental
|
|
21
21
|
Requires-Dist: langgraph>=0.2.57
|
22
22
|
Requires-Dist: openai
|
23
23
|
Requires-Dist: pandas
|
24
|
+
Requires-Dist: sqlalchemy
|
24
25
|
Requires-Dist: numpy
|
25
26
|
Requires-Dist: plotly
|
26
27
|
Requires-Dist: streamlit
|
27
28
|
Requires-Dist: scikit-learn
|
28
29
|
Requires-Dist: xgboost
|
30
|
+
Requires-Dist: psutil
|
31
|
+
Provides-Extra: machine-learning
|
32
|
+
Requires-Dist: h2o; extra == "machine-learning"
|
33
|
+
Requires-Dist: mlflow; extra == "machine-learning"
|
34
|
+
Provides-Extra: all
|
35
|
+
Requires-Dist: h2o; extra == "all"
|
36
|
+
Requires-Dist: mlflow; extra == "all"
|
29
37
|
Dynamic: author
|
30
38
|
Dynamic: author-email
|
31
39
|
Dynamic: classifier
|
32
40
|
Dynamic: description
|
33
41
|
Dynamic: description-content-type
|
34
42
|
Dynamic: home-page
|
43
|
+
Dynamic: provides-extra
|
35
44
|
Dynamic: requires-dist
|
36
45
|
Dynamic: requires-python
|
37
46
|
Dynamic: summary
|
@@ -39,7 +48,7 @@ Dynamic: summary
|
|
39
48
|
<div align="center">
|
40
49
|
<a href="https://github.com/business-science/ai-data-science-team">
|
41
50
|
<picture>
|
42
|
-
<img src="/img/
|
51
|
+
<img src="/img/ai_data_science_team_logo_small.jpg" alt="AI Data Science Team" width="400">
|
43
52
|
</picture>
|
44
53
|
</a>
|
45
54
|
</div>
|
@@ -47,13 +56,13 @@ Dynamic: summary
|
|
47
56
|
<em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
|
48
57
|
</div>
|
49
58
|
<div align="center">
|
50
|
-
<a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg" alt="PyPI"></a>
|
51
|
-
<a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg" alt="versions"></a>
|
52
|
-
<a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?
|
59
|
+
<a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
|
60
|
+
<a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
|
61
|
+
<a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
|
53
62
|
</div>
|
54
63
|
|
55
64
|
|
56
|
-
# Your AI Data Science Team (An Army Of Agents)
|
65
|
+
# Your AI Data Science Team (🪖 An Army Of Agents)
|
57
66
|
|
58
67
|
**An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
|
59
68
|
|
@@ -74,14 +83,19 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
74
83
|
|
75
84
|
## Table of Contents
|
76
85
|
|
77
|
-
- [Your AI Data Science Team (An Army Of Agents)](#your-ai-data-science-team
|
86
|
+
- [Your AI Data Science Team (🪖 An Army Of Agents)](#your-ai-data-science-team--an-army-of-agents)
|
78
87
|
- [Table of Contents](#table-of-contents)
|
79
88
|
- [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
|
80
|
-
- [
|
89
|
+
- [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
|
81
90
|
- [Data Science Agents](#data-science-agents)
|
82
|
-
- [
|
83
|
-
- [
|
91
|
+
- [NEW: Multi-Agents](#new-multi-agents)
|
92
|
+
- [Data Science Apps](#data-science-apps)
|
93
|
+
- [Apps Available Now](#apps-available-now)
|
94
|
+
- [🔥 Agentic Applications](#-agentic-applications)
|
84
95
|
- [Agents Available Now](#agents-available-now)
|
96
|
+
- [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
|
97
|
+
- [Data Science Agents](#data-science-agents-1)
|
98
|
+
- [Multi-Agents](#multi-agents)
|
85
99
|
- [Agents Coming Soon](#agents-coming-soon)
|
86
100
|
- [Disclaimer](#disclaimer)
|
87
101
|
- [Installation](#installation)
|
@@ -94,11 +108,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
94
108
|
|
95
109
|
## Companies That Want A Custom AI Data Science Team (And AI Apps)
|
96
110
|
|
97
|
-
Want to have your own _customized_ enterprise-grade AI Data Science Team and domain-
|
111
|
+
Want to have your own _customized_ enterprise-grade AI Data Science Team and *domain-specific* AI-powered Apps?
|
98
112
|
|
99
113
|
**Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
|
100
114
|
|
101
|
-
##
|
115
|
+
## Generative AI for Data Scientists Workshop
|
102
116
|
|
103
117
|
If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
|
104
118
|
|
@@ -110,32 +124,50 @@ This project is a work in progress. New data science agents will be released soo
|
|
110
124
|
|
111
125
|

|
112
126
|
|
113
|
-
###
|
127
|
+
### NEW: Multi-Agents
|
114
128
|
|
115
|
-
This is the internals of the
|
129
|
+
This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
|
116
130
|
|
117
131
|

|
118
132
|
|
119
|
-
###
|
133
|
+
### Data Science Apps
|
120
134
|
|
121
135
|
This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
|
122
136
|
|
123
|
-

|
137
|
+

|
138
|
+
|
139
|
+
### Apps Available Now
|
140
|
+
|
141
|
+
[See all available apps here](/apps)
|
142
|
+
|
143
|
+
#### 🔥 Agentic Applications
|
144
|
+
|
145
|
+
1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
|
124
146
|
|
125
147
|
### Agents Available Now
|
126
148
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
149
|
+
#### 🔥🔥 NEW! Machine Learning Agents
|
150
|
+
|
151
|
+
1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
|
152
|
+
2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
|
153
|
+
|
154
|
+
#### Data Science Agents
|
155
|
+
|
156
|
+
1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
|
157
|
+
2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
|
158
|
+
3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
|
159
|
+
4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
|
160
|
+
5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
|
161
|
+
|
162
|
+
#### Multi-Agents
|
163
|
+
|
164
|
+
1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
|
132
165
|
|
133
166
|
### Agents Coming Soon
|
134
167
|
|
135
168
|
1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
|
136
|
-
2. **
|
137
|
-
3. **
|
138
|
-
4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
169
|
+
2. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
|
170
|
+
3. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
139
171
|
|
140
172
|
## Disclaimer
|
141
173
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
<div align="center">
|
2
2
|
<a href="https://github.com/business-science/ai-data-science-team">
|
3
3
|
<picture>
|
4
|
-
<img src="/img/
|
4
|
+
<img src="/img/ai_data_science_team_logo_small.jpg" alt="AI Data Science Team" width="400">
|
5
5
|
</picture>
|
6
6
|
</a>
|
7
7
|
</div>
|
@@ -9,13 +9,13 @@
|
|
9
9
|
<em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
|
10
10
|
</div>
|
11
11
|
<div align="center">
|
12
|
-
<a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg" alt="PyPI"></a>
|
13
|
-
<a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg" alt="versions"></a>
|
14
|
-
<a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?
|
12
|
+
<a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
|
13
|
+
<a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
|
14
|
+
<a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
|
15
15
|
</div>
|
16
16
|
|
17
17
|
|
18
|
-
# Your AI Data Science Team (An Army Of Agents)
|
18
|
+
# Your AI Data Science Team (🪖 An Army Of Agents)
|
19
19
|
|
20
20
|
**An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
|
21
21
|
|
@@ -36,14 +36,19 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
36
36
|
|
37
37
|
## Table of Contents
|
38
38
|
|
39
|
-
- [Your AI Data Science Team (An Army Of Agents)](#your-ai-data-science-team
|
39
|
+
- [Your AI Data Science Team (🪖 An Army Of Agents)](#your-ai-data-science-team--an-army-of-agents)
|
40
40
|
- [Table of Contents](#table-of-contents)
|
41
41
|
- [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
|
42
|
-
- [
|
42
|
+
- [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
|
43
43
|
- [Data Science Agents](#data-science-agents)
|
44
|
-
- [
|
45
|
-
- [
|
44
|
+
- [NEW: Multi-Agents](#new-multi-agents)
|
45
|
+
- [Data Science Apps](#data-science-apps)
|
46
|
+
- [Apps Available Now](#apps-available-now)
|
47
|
+
- [🔥 Agentic Applications](#-agentic-applications)
|
46
48
|
- [Agents Available Now](#agents-available-now)
|
49
|
+
- [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
|
50
|
+
- [Data Science Agents](#data-science-agents-1)
|
51
|
+
- [Multi-Agents](#multi-agents)
|
47
52
|
- [Agents Coming Soon](#agents-coming-soon)
|
48
53
|
- [Disclaimer](#disclaimer)
|
49
54
|
- [Installation](#installation)
|
@@ -56,11 +61,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
|
|
56
61
|
|
57
62
|
## Companies That Want A Custom AI Data Science Team (And AI Apps)
|
58
63
|
|
59
|
-
Want to have your own _customized_ enterprise-grade AI Data Science Team and domain-
|
64
|
+
Want to have your own _customized_ enterprise-grade AI Data Science Team and *domain-specific* AI-powered Apps?
|
60
65
|
|
61
66
|
**Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
|
62
67
|
|
63
|
-
##
|
68
|
+
## Generative AI for Data Scientists Workshop
|
64
69
|
|
65
70
|
If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
|
66
71
|
|
@@ -72,32 +77,50 @@ This project is a work in progress. New data science agents will be released soo
|
|
72
77
|
|
73
78
|

|
74
79
|
|
75
|
-
###
|
80
|
+
### NEW: Multi-Agents
|
76
81
|
|
77
|
-
This is the internals of the
|
82
|
+
This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
|
78
83
|
|
79
84
|

|
80
85
|
|
81
|
-
###
|
86
|
+
### Data Science Apps
|
82
87
|
|
83
88
|
This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
|
84
89
|
|
85
|
-

|
90
|
+

|
91
|
+
|
92
|
+
### Apps Available Now
|
93
|
+
|
94
|
+
[See all available apps here](/apps)
|
95
|
+
|
96
|
+
#### 🔥 Agentic Applications
|
97
|
+
|
98
|
+
1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
|
86
99
|
|
87
100
|
### Agents Available Now
|
88
101
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
102
|
+
#### 🔥🔥 NEW! Machine Learning Agents
|
103
|
+
|
104
|
+
1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
|
105
|
+
2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
|
106
|
+
|
107
|
+
#### Data Science Agents
|
108
|
+
|
109
|
+
1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
|
110
|
+
2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
|
111
|
+
3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
|
112
|
+
4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
|
113
|
+
5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
|
114
|
+
|
115
|
+
#### Multi-Agents
|
116
|
+
|
117
|
+
1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/multiagents/sql_data_analyst.ipynb)
|
94
118
|
|
95
119
|
### Agents Coming Soon
|
96
120
|
|
97
121
|
1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
|
98
|
-
2. **
|
99
|
-
3. **
|
100
|
-
4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
122
|
+
2. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
|
123
|
+
3. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
101
124
|
|
102
125
|
## Disclaimer
|
103
126
|
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.0.0.9010"
|
@@ -3,4 +3,3 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
|
|
3
3
|
from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
|
4
4
|
from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
|
5
5
|
from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
|
6
|
-
|
@@ -14,7 +14,7 @@ from langgraph.types import Command
|
|
14
14
|
from langgraph.checkpoint.memory import MemorySaver
|
15
15
|
|
16
16
|
import os
|
17
|
-
import
|
17
|
+
import json
|
18
18
|
import pandas as pd
|
19
19
|
|
20
20
|
from IPython.display import Markdown
|
@@ -23,21 +23,26 @@ from ai_data_science_team.templates import(
|
|
23
23
|
node_func_execute_agent_code_on_data,
|
24
24
|
node_func_human_review,
|
25
25
|
node_func_fix_agent_code,
|
26
|
-
|
26
|
+
node_func_report_agent_outputs,
|
27
27
|
create_coding_agent_graph,
|
28
28
|
BaseAgent,
|
29
29
|
)
|
30
|
-
from ai_data_science_team.
|
31
|
-
from ai_data_science_team.
|
32
|
-
|
33
|
-
|
30
|
+
from ai_data_science_team.parsers.parsers import PythonOutputParser
|
31
|
+
from ai_data_science_team.utils.regex import (
|
32
|
+
relocate_imports_inside_function,
|
33
|
+
add_comments_to_top,
|
34
|
+
format_agent_name,
|
35
|
+
format_recommended_steps,
|
36
|
+
get_generic_summary,
|
37
|
+
)
|
38
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
39
|
+
from ai_data_science_team.utils.logging import log_ai_function
|
34
40
|
|
35
41
|
# Setup
|
36
42
|
AGENT_NAME = "data_cleaning_agent"
|
37
43
|
LOG_PATH = os.path.join(os.getcwd(), "logs/")
|
38
44
|
|
39
45
|
|
40
|
-
|
41
46
|
# Class
|
42
47
|
class DataCleaningAgent(BaseAgent):
|
43
48
|
"""
|
@@ -89,8 +94,8 @@ class DataCleaningAgent(BaseAgent):
|
|
89
94
|
Cleans the provided dataset asynchronously based on user instructions.
|
90
95
|
invoke_agent(user_instructions: str, data_raw: pd.DataFrame, max_retries=3, retry_count=0)
|
91
96
|
Cleans the provided dataset synchronously based on user instructions.
|
92
|
-
|
93
|
-
|
97
|
+
get_workflow_summary()
|
98
|
+
Retrieves a summary of the agent's workflow.
|
94
99
|
get_log_summary()
|
95
100
|
Retrieves a summary of logged operations if logging is enabled.
|
96
101
|
get_state_keys()
|
@@ -178,8 +183,7 @@ class DataCleaningAgent(BaseAgent):
|
|
178
183
|
self.response=None
|
179
184
|
return make_data_cleaning_agent(**self._params)
|
180
185
|
|
181
|
-
|
182
|
-
def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
|
186
|
+
async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
|
183
187
|
"""
|
184
188
|
Asynchronously invokes the agent. The response is stored in the response attribute.
|
185
189
|
|
@@ -200,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
|
|
200
204
|
--------
|
201
205
|
None. The response is stored in the response attribute.
|
202
206
|
"""
|
203
|
-
response = self._compiled_graph.ainvoke({
|
207
|
+
response = await self._compiled_graph.ainvoke({
|
204
208
|
"user_instructions": user_instructions,
|
205
209
|
"data_raw": data_raw.to_dict(),
|
206
210
|
"max_retries": max_retries,
|
@@ -239,15 +243,16 @@ class DataCleaningAgent(BaseAgent):
|
|
239
243
|
self.response = response
|
240
244
|
return None
|
241
245
|
|
242
|
-
def
|
246
|
+
def get_workflow_summary(self, markdown=False):
|
243
247
|
"""
|
244
|
-
|
245
|
-
|
246
|
-
Returns:
|
247
|
-
str: Explanation of the cleaning steps.
|
248
|
+
Retrieves the agent's workflow summary, if logging is enabled.
|
248
249
|
"""
|
249
|
-
|
250
|
-
|
250
|
+
if self.response and self.response.get("messages"):
|
251
|
+
summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
|
252
|
+
if markdown:
|
253
|
+
return Markdown(summary)
|
254
|
+
else:
|
255
|
+
return summary
|
251
256
|
|
252
257
|
def get_log_summary(self, markdown=False):
|
253
258
|
"""
|
@@ -255,7 +260,13 @@ class DataCleaningAgent(BaseAgent):
|
|
255
260
|
"""
|
256
261
|
if self.response:
|
257
262
|
if self.response.get('data_cleaner_function_path'):
|
258
|
-
log_details = f"
|
263
|
+
log_details = f"""
|
264
|
+
## Data Cleaning Agent Log Summary:
|
265
|
+
|
266
|
+
Function Path: {self.response.get('data_cleaner_function_path')}
|
267
|
+
|
268
|
+
Function Name: {self.response.get('data_cleaner_function_name')}
|
269
|
+
"""
|
259
270
|
if markdown:
|
260
271
|
return Markdown(log_details)
|
261
272
|
else:
|
@@ -462,7 +473,7 @@ def make_data_cleaning_agent(
|
|
462
473
|
Below are summaries of all datasets provided:
|
463
474
|
{all_datasets_summary}
|
464
475
|
|
465
|
-
Return
|
476
|
+
Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
|
466
477
|
|
467
478
|
Avoid these:
|
468
479
|
1. Do not include steps to save files.
|
@@ -633,32 +644,31 @@ def make_data_cleaning_agent(
|
|
633
644
|
function_name=state.get("data_cleaner_function_name"),
|
634
645
|
)
|
635
646
|
|
636
|
-
|
637
|
-
|
647
|
+
# Final reporting node
|
648
|
+
def report_agent_outputs(state: GraphState):
|
649
|
+
return node_func_report_agent_outputs(
|
638
650
|
state=state,
|
639
|
-
|
651
|
+
keys_to_include=[
|
652
|
+
"recommended_steps",
|
653
|
+
"data_cleaner_function",
|
654
|
+
"data_cleaner_function_path",
|
655
|
+
"data_cleaner_function_name",
|
656
|
+
"data_cleaner_error",
|
657
|
+
],
|
640
658
|
result_key="messages",
|
641
|
-
error_key="data_cleaner_error",
|
642
|
-
llm=llm,
|
643
659
|
role=AGENT_NAME,
|
644
|
-
|
645
|
-
Explain the data cleaning steps that the data cleaning agent performed in this function.
|
646
|
-
Keep the summary succinct and to the point.\n\n# Data Cleaning Agent:\n\n{code}
|
647
|
-
""",
|
648
|
-
success_prefix="# Data Cleaning Agent:\n\n ",
|
649
|
-
error_message="The Data Cleaning Agent encountered an error during data cleaning. Data could not be explained."
|
660
|
+
custom_title="Data Cleaning Agent Outputs"
|
650
661
|
)
|
651
|
-
|
652
|
-
# Define the graph
|
662
|
+
|
653
663
|
node_functions = {
|
654
664
|
"recommend_cleaning_steps": recommend_cleaning_steps,
|
655
665
|
"human_review": human_review,
|
656
666
|
"create_data_cleaner_code": create_data_cleaner_code,
|
657
667
|
"execute_data_cleaner_code": execute_data_cleaner_code,
|
658
668
|
"fix_data_cleaner_code": fix_data_cleaner_code,
|
659
|
-
"
|
669
|
+
"report_agent_outputs": report_agent_outputs,
|
660
670
|
}
|
661
|
-
|
671
|
+
|
662
672
|
app = create_coding_agent_graph(
|
663
673
|
GraphState=GraphState,
|
664
674
|
node_functions=node_functions,
|
@@ -666,16 +676,17 @@ def make_data_cleaning_agent(
|
|
666
676
|
create_code_node_name="create_data_cleaner_code",
|
667
677
|
execute_code_node_name="execute_data_cleaner_code",
|
668
678
|
fix_code_node_name="fix_data_cleaner_code",
|
669
|
-
explain_code_node_name="
|
679
|
+
explain_code_node_name="report_agent_outputs",
|
670
680
|
error_key="data_cleaner_error",
|
671
|
-
human_in_the_loop=human_in_the_loop,
|
681
|
+
human_in_the_loop=human_in_the_loop,
|
672
682
|
human_review_node_name="human_review",
|
673
683
|
checkpointer=MemorySaver() if human_in_the_loop else None,
|
674
684
|
bypass_recommended_steps=bypass_recommended_steps,
|
675
685
|
bypass_explain_code=bypass_explain_code,
|
676
686
|
)
|
677
|
-
|
687
|
+
|
678
688
|
return app
|
689
|
+
|
679
690
|
|
680
691
|
|
681
692
|
|
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
from typing import Any, Optional, Annotated, Sequence, List, Dict
|
5
|
+
import operator
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
import os
|
9
|
+
|
10
|
+
from IPython.display import Markdown
|
11
|
+
|
12
|
+
from langchain_core.messages import BaseMessage, AIMessage
|
13
|
+
|
14
|
+
from langgraph.prebuilt import create_react_agent, ToolNode
|
15
|
+
from langgraph.prebuilt.chat_agent_executor import AgentState
|
16
|
+
from langgraph.graph import START, END, StateGraph
|
17
|
+
|
18
|
+
from ai_data_science_team.templates import BaseAgent
|
19
|
+
from ai_data_science_team.utils.regex import format_agent_name
|
20
|
+
from ai_data_science_team.tools.data_loader import (
|
21
|
+
load_directory,
|
22
|
+
load_file,
|
23
|
+
list_directory_contents,
|
24
|
+
list_directory_recursive,
|
25
|
+
get_file_info,
|
26
|
+
search_files_by_pattern,
|
27
|
+
)
|
28
|
+
|
29
|
+
AGENT_NAME = "data_loader_tools_agent"
|
30
|
+
|
31
|
+
tools = [
|
32
|
+
load_directory,
|
33
|
+
load_file,
|
34
|
+
list_directory_contents,
|
35
|
+
list_directory_recursive,
|
36
|
+
get_file_info,
|
37
|
+
search_files_by_pattern,
|
38
|
+
]
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
def make_data_loader_tools_agent(
|
43
|
+
model: Any,
|
44
|
+
directory: Optional[str] = os.getcwd(),
|
45
|
+
):
|
46
|
+
"""
|
47
|
+
Creates a Data Loader Agent that can interact with data loading tools.
|
48
|
+
|
49
|
+
Parameters:
|
50
|
+
----------
|
51
|
+
model : langchain.llms.base.LLM
|
52
|
+
The language model used to generate the tool calling agent.
|
53
|
+
directory : str, optional
|
54
|
+
The directory to search for files. Defaults to the current working directory.
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
--------
|
58
|
+
Data Loader Agent
|
59
|
+
An agent that can interact with data loading tools.
|
60
|
+
"""
|
61
|
+
|
62
|
+
class GraphState(AgentState):
|
63
|
+
internal_messages: Annotated[Sequence[BaseMessage], operator.add]
|
64
|
+
directory: str
|
65
|
+
user_instructions: str
|
66
|
+
data_artifacts: dict
|
67
|
+
|
68
|
+
pass
|
69
|
+
|