ai-data-science-team 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/__init__.py +4 -5
- ai_data_science_team/agents/data_cleaning_agent.py +268 -116
- ai_data_science_team/agents/data_visualization_agent.py +470 -41
- ai_data_science_team/agents/data_wrangling_agent.py +471 -31
- ai_data_science_team/agents/feature_engineering_agent.py +426 -41
- ai_data_science_team/agents/sql_database_agent.py +458 -58
- ai_data_science_team/ml_agents/__init__.py +1 -0
- ai_data_science_team/ml_agents/h2o_ml_agent.py +1032 -0
- ai_data_science_team/multiagents/__init__.py +1 -0
- ai_data_science_team/multiagents/sql_data_analyst.py +398 -0
- ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
- ai_data_science_team/templates/__init__.py +3 -1
- ai_data_science_team/templates/agent_templates.py +319 -43
- ai_data_science_team/tools/metadata.py +94 -62
- ai_data_science_team/tools/regex.py +86 -1
- ai_data_science_team/utils/__init__.py +0 -0
- ai_data_science_team/utils/plotly.py +24 -0
- ai_data_science_team-0.0.0.9009.dist-info/METADATA +245 -0
- ai_data_science_team-0.0.0.9009.dist-info/RECORD +28 -0
- ai_data_science_team-0.0.0.9007.dist-info/METADATA +0 -183
- ai_data_science_team-0.0.0.9007.dist-info/RECORD +0 -21
- {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL +0 -0
- {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/top_level.txt +0 -0
@@ -76,4 +76,89 @@ def format_agent_name(agent_name: str) -> str:
|
|
76
76
|
|
77
77
|
formatted_name = agent_name.strip().replace("_", " ").upper()
|
78
78
|
|
79
|
-
return f"---{formatted_name}----"
|
79
|
+
return f"---{formatted_name}----"
|
80
|
+
|
81
|
+
def format_recommended_steps(raw_text: str, heading: str = "# Recommended Steps:") -> str:
|
82
|
+
# Split text by newline and strip leading/trailing whitespace
|
83
|
+
lines = raw_text.strip().split('\n')
|
84
|
+
|
85
|
+
# Remove empty lines from the start
|
86
|
+
while lines and not lines[0].strip():
|
87
|
+
lines.pop(0)
|
88
|
+
|
89
|
+
seen_heading = False
|
90
|
+
new_lines = []
|
91
|
+
|
92
|
+
for line in lines:
|
93
|
+
# If this line *is exactly* the heading, check if we've seen it already
|
94
|
+
if line.strip() == heading:
|
95
|
+
if seen_heading:
|
96
|
+
# Skip duplicates
|
97
|
+
continue
|
98
|
+
else:
|
99
|
+
seen_heading = True
|
100
|
+
new_lines.append(line)
|
101
|
+
|
102
|
+
# If heading was never seen, prepend it
|
103
|
+
if not seen_heading:
|
104
|
+
new_lines.insert(0, heading)
|
105
|
+
|
106
|
+
return "\n".join(new_lines)
|
107
|
+
|
108
|
+
def get_generic_summary(report_dict: dict, code_lang = "python") -> str:
|
109
|
+
"""
|
110
|
+
Takes a dictionary of unknown structure (e.g., from json.loads(...))
|
111
|
+
and returns a textual summary. It assumes:
|
112
|
+
1) 'report_title' (if present) should be displayed first.
|
113
|
+
2) If a key includes 'code' or 'function',
|
114
|
+
the value is treated as a code block.
|
115
|
+
3) Otherwise, key-value pairs are displayed as text.
|
116
|
+
|
117
|
+
Parameters
|
118
|
+
----------
|
119
|
+
report_dict : dict
|
120
|
+
The dictionary holding the agent output or user report.
|
121
|
+
|
122
|
+
Returns
|
123
|
+
-------
|
124
|
+
str
|
125
|
+
A formatted summary string.
|
126
|
+
"""
|
127
|
+
# 1) Grab the report title (or default)
|
128
|
+
title = report_dict.get("report_title", "Untitled Report")
|
129
|
+
|
130
|
+
lines = []
|
131
|
+
lines.append(f"# {title}")
|
132
|
+
|
133
|
+
# 2) Iterate over all other keys
|
134
|
+
for key, value in report_dict.items():
|
135
|
+
# Skip the title key, since we already displayed it
|
136
|
+
if key == "report_title":
|
137
|
+
continue
|
138
|
+
|
139
|
+
# 3) Check if it's code or function
|
140
|
+
# (You can tweak this logic if you have different rules)
|
141
|
+
key_lower = key.lower()
|
142
|
+
if "code" in key_lower or "function" in key_lower:
|
143
|
+
# Treat as code
|
144
|
+
lines.append(f"\n## {format_agent_name(key).upper()}")
|
145
|
+
lines.append(f"```{code_lang}\n" + str(value) + "\n```")
|
146
|
+
else:
|
147
|
+
# 4) Otherwise, just display the key-value as text
|
148
|
+
lines.append(f"\n## {format_agent_name(key).upper()}")
|
149
|
+
lines.append(str(value))
|
150
|
+
|
151
|
+
return "\n".join(lines)
|
152
|
+
|
153
|
+
def remove_consecutive_duplicates(messages):
|
154
|
+
unique_messages = []
|
155
|
+
prev_message = None
|
156
|
+
|
157
|
+
for msg in messages:
|
158
|
+
if msg.content != prev_message:
|
159
|
+
unique_messages.append(msg)
|
160
|
+
prev_message = msg.content # Update previous message to current
|
161
|
+
|
162
|
+
return unique_messages
|
163
|
+
|
164
|
+
|
File without changes
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
import json
|
4
|
+
import plotly.io as pio
|
5
|
+
|
6
|
+
def plotly_from_dict(plotly_graph_dict: dict):
|
7
|
+
"""
|
8
|
+
Convert a Plotly graph dictionary to a Plotly graph object.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
plotly_graph_dict: dict
|
13
|
+
A Plotly graph dictionary.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
plotly_graph: plotly.graph_objs.graph_objs.Figure
|
18
|
+
A Plotly graph object.
|
19
|
+
"""
|
20
|
+
|
21
|
+
if plotly_from_dict is None:
|
22
|
+
return None
|
23
|
+
|
24
|
+
return pio.from_json(json.dumps(plotly_graph_dict))
|
@@ -0,0 +1,245 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: ai-data-science-team
|
3
|
+
Version: 0.0.0.9009
|
4
|
+
Summary: Build and run an AI-powered data science team.
|
5
|
+
Home-page: https://github.com/business-science/ai-data-science-team
|
6
|
+
Author: Matt Dancho
|
7
|
+
Author-email: mdancho@business-science.io
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Requires-Python: >=3.9
|
14
|
+
Description-Content-Type: text/markdown
|
15
|
+
License-File: LICENSE
|
16
|
+
Requires-Dist: openpyxl
|
17
|
+
Requires-Dist: langchain
|
18
|
+
Requires-Dist: langchain_community
|
19
|
+
Requires-Dist: langchain_openai
|
20
|
+
Requires-Dist: langchain_experimental
|
21
|
+
Requires-Dist: langgraph>=0.2.57
|
22
|
+
Requires-Dist: openai
|
23
|
+
Requires-Dist: pandas
|
24
|
+
Requires-Dist: sqlalchemy
|
25
|
+
Requires-Dist: numpy
|
26
|
+
Requires-Dist: plotly
|
27
|
+
Requires-Dist: streamlit
|
28
|
+
Requires-Dist: scikit-learn
|
29
|
+
Requires-Dist: xgboost
|
30
|
+
Provides-Extra: machine-learning-agent
|
31
|
+
Requires-Dist: h2o; extra == "machine-learning-agent"
|
32
|
+
Provides-Extra: all
|
33
|
+
Requires-Dist: h2o; extra == "all"
|
34
|
+
Dynamic: author
|
35
|
+
Dynamic: author-email
|
36
|
+
Dynamic: classifier
|
37
|
+
Dynamic: description
|
38
|
+
Dynamic: description-content-type
|
39
|
+
Dynamic: home-page
|
40
|
+
Dynamic: provides-extra
|
41
|
+
Dynamic: requires-dist
|
42
|
+
Dynamic: requires-python
|
43
|
+
Dynamic: summary
|
44
|
+
|
45
|
+
<div align="center">
|
46
|
+
<a href="https://github.com/business-science/ai-data-science-team">
|
47
|
+
<picture>
|
48
|
+
<img src="/img/ai_data_science_team_logo.jpg" alt="AI Data Science Team" width="400">
|
49
|
+
</picture>
|
50
|
+
</a>
|
51
|
+
</div>
|
52
|
+
<div align="center">
|
53
|
+
<em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
|
54
|
+
</div>
|
55
|
+
<div align="center">
|
56
|
+
<a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
|
57
|
+
<a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
|
58
|
+
<a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
|
59
|
+
</div>
|
60
|
+
|
61
|
+
|
62
|
+
# Your AI Data Science Team (🪖 An Army Of Agents)
|
63
|
+
|
64
|
+
**An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
|
65
|
+
|
66
|
+
[**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
|
67
|
+
|
68
|
+
*Beta - This Python library is under active development. There may be breaking changes that occur until release of 0.1.0.*
|
69
|
+
|
70
|
+
---
|
71
|
+
|
72
|
+
The AI Data Science Team of Copilots includes Agents that specialize data cleaning, preparation, feature engineering, modeling (machine learning), and interpretation of various business problems like:
|
73
|
+
|
74
|
+
- Churn Modeling
|
75
|
+
- Employee Attrition
|
76
|
+
- Lead Scoring
|
77
|
+
- Insurance Risk
|
78
|
+
- Credit Card Risk
|
79
|
+
- And more
|
80
|
+
|
81
|
+
## Table of Contents
|
82
|
+
|
83
|
+
- [Your AI Data Science Team (🪖 An Army Of Agents)](#your-ai-data-science-team--an-army-of-agents)
|
84
|
+
- [Table of Contents](#table-of-contents)
|
85
|
+
- [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
|
86
|
+
- [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
|
87
|
+
- [Data Science Agents](#data-science-agents)
|
88
|
+
- [NEW: Multi-Agents](#new-multi-agents)
|
89
|
+
- [Coming Soon: Data Science Apps](#coming-soon-data-science-apps)
|
90
|
+
- [Agents Available Now](#agents-available-now)
|
91
|
+
- [Data Science Agents](#data-science-agents-1)
|
92
|
+
- [Multi-Agents](#multi-agents)
|
93
|
+
- [Agents Coming Soon](#agents-coming-soon)
|
94
|
+
- [Disclaimer](#disclaimer)
|
95
|
+
- [Installation](#installation)
|
96
|
+
- [Usage](#usage)
|
97
|
+
- [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
|
98
|
+
- [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
|
99
|
+
- [Contributing](#contributing)
|
100
|
+
- [License](#license)
|
101
|
+
- [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
|
102
|
+
|
103
|
+
## Companies That Want A Custom AI Data Science Team (And AI Apps)
|
104
|
+
|
105
|
+
Want to have your own _customized_ enterprise-grade AI Data Science Team and *domain-specific* AI-powered Apps?
|
106
|
+
|
107
|
+
**Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
|
108
|
+
|
109
|
+
## Generative AI for Data Scientists Workshop
|
110
|
+
|
111
|
+
If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
|
112
|
+
|
113
|
+
[**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
|
114
|
+
|
115
|
+
## Data Science Agents
|
116
|
+
|
117
|
+
This project is a work in progress. New data science agents will be released soon.
|
118
|
+
|
119
|
+

|
120
|
+
|
121
|
+
### NEW: Multi-Agents
|
122
|
+
|
123
|
+
This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
|
124
|
+
|
125
|
+

|
126
|
+
|
127
|
+
### Coming Soon: Data Science Apps
|
128
|
+
|
129
|
+
This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
|
130
|
+
|
131
|
+

|
132
|
+
|
133
|
+
### Agents Available Now
|
134
|
+
|
135
|
+
#### Data Science Agents
|
136
|
+
|
137
|
+
1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
|
138
|
+
2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
|
139
|
+
3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
|
140
|
+
4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
|
141
|
+
5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
|
142
|
+
|
143
|
+
#### Multi-Agents
|
144
|
+
|
145
|
+
1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data.
|
146
|
+
|
147
|
+
### Agents Coming Soon
|
148
|
+
|
149
|
+
1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
|
150
|
+
2. **Machine Learning Agent:** Builds and logs the machine learning models.
|
151
|
+
3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
|
152
|
+
4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
153
|
+
|
154
|
+
## Disclaimer
|
155
|
+
|
156
|
+
**This project is for educational purposes only.**
|
157
|
+
|
158
|
+
- It is not intended to replace your company's data science team
|
159
|
+
- No warranties or guarantees provided
|
160
|
+
- Creator assumes no liability for financial loss
|
161
|
+
- Consult an experienced Generative AI Data Scientist for building your own custom AI Data Science Team
|
162
|
+
- If you want a custom enterprise-grade AI Data Science Team, [send inquiries here](https://www.business-science.io/contact.html).
|
163
|
+
|
164
|
+
By using this software, you agree to use it solely for learning purposes.
|
165
|
+
|
166
|
+
## Installation
|
167
|
+
|
168
|
+
``` bash
|
169
|
+
pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
|
170
|
+
```
|
171
|
+
|
172
|
+
## Usage
|
173
|
+
|
174
|
+
[See all examples here.](/examples)
|
175
|
+
|
176
|
+
### Example 1: Feature Engineering with the Feature Engineering Agent
|
177
|
+
|
178
|
+
[See the full example here.](/examples/feature_engineering_agent.ipynb)
|
179
|
+
|
180
|
+
``` python
|
181
|
+
feature_engineering_agent = FeatureEngineeringAgent(model = llm)
|
182
|
+
|
183
|
+
feature_engineering_agent.invoke_agent(
|
184
|
+
data_raw = df,
|
185
|
+
user_instructions = "Make sure to scale and center numeric features",
|
186
|
+
target_variable = "Churn",
|
187
|
+
max_retries = 3,
|
188
|
+
)
|
189
|
+
```
|
190
|
+
|
191
|
+
``` bash
|
192
|
+
---FEATURE ENGINEERING AGENT----
|
193
|
+
* CREATE FEATURE ENGINEER CODE
|
194
|
+
* EXECUTING AGENT CODE
|
195
|
+
* EXPLAIN AGENT CODE
|
196
|
+
```
|
197
|
+
|
198
|
+
``` python
|
199
|
+
feature_engineering_agent.get_data_engineered()
|
200
|
+
```
|
201
|
+
|
202
|
+
### Example 2: Cleaning Data with the Data Cleaning Agent
|
203
|
+
|
204
|
+
[See the full example here.](/examples/data_cleaning_agent.ipynb)
|
205
|
+
|
206
|
+
``` python
|
207
|
+
data_cleaning_agent = DataCleaningAgent(model = llm)
|
208
|
+
|
209
|
+
response = data_cleaning_agent.invoke_agent(
|
210
|
+
data_raw = df,
|
211
|
+
user_instructions = "Don't remove outliers when cleaning the data.",
|
212
|
+
max_retries = 3,
|
213
|
+
)
|
214
|
+
```
|
215
|
+
|
216
|
+
``` bash
|
217
|
+
---DATA CLEANING AGENT----
|
218
|
+
* CREATE DATA CLEANER CODE
|
219
|
+
* EXECUTING AGENT CODE
|
220
|
+
* EXPLAIN AGENT CODE
|
221
|
+
```
|
222
|
+
|
223
|
+
``` python
|
224
|
+
data_cleaning_agent.get_data_cleaned()
|
225
|
+
```
|
226
|
+
|
227
|
+
## Contributing
|
228
|
+
|
229
|
+
1. Fork the repository
|
230
|
+
2. Create a feature branch
|
231
|
+
3. Commit your changes
|
232
|
+
4. Push to the branch
|
233
|
+
5. Create a Pull Request
|
234
|
+
|
235
|
+
## License
|
236
|
+
|
237
|
+
This project is licensed under the MIT License. See LICENSE file for details.
|
238
|
+
|
239
|
+
# Want To Become A Full-Stack Generative AI Data Scientist?
|
240
|
+
|
241
|
+

|
242
|
+
|
243
|
+
I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
|
244
|
+
|
245
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
ai_data_science_team/_version.py,sha256=IEp7uHd_8RlLYvLFCpp_wJCutdvJI7cJ73IN0GzK3ts,26
|
3
|
+
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
|
+
ai_data_science_team/agents/__init__.py,sha256=KSwxfciazWyaDG-xM93SadiIyT6X4d3uJLTdvHvVKq0,553
|
5
|
+
ai_data_science_team/agents/data_cleaning_agent.py,sha256=OWJ3tEA5cy2fo92bTmKS8CDA48ZRRqmWg2kH7cacjDM,27337
|
6
|
+
ai_data_science_team/agents/data_visualization_agent.py,sha256=pm7yln3GI91mOAjwDveenWwYXtJqh990oFvsoFhX3aA,28864
|
7
|
+
ai_data_science_team/agents/data_wrangling_agent.py,sha256=UUoejYBmVFdM4At_CKQjYUyFHkaloowdd6yAElfeV9Q,32332
|
8
|
+
ai_data_science_team/agents/feature_engineering_agent.py,sha256=bngc0COOYa8AolJwQrNuO1aDRgwBCp6LCN9_otIscWk,31011
|
9
|
+
ai_data_science_team/agents/sql_database_agent.py,sha256=M_7IBOu7ISZZEtDAC9KGQIE7FPaXSyQ5IdD8vu91_DM,31164
|
10
|
+
ai_data_science_team/ml_agents/__init__.py,sha256=fA5uX6dSVMAf2ApmBJXEArbnKNmsmuE0nbBsCeNAksk,86
|
11
|
+
ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=h6kz8ZPw7ApCdfrKBqggHfBnwBJ3kDSzLMwxMxz_2tM,55181
|
12
|
+
ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
|
13
|
+
ai_data_science_team/multiagents/sql_data_analyst.py,sha256=2gETU9O5t9R5Ut1kEW1T3H-6Sh8xDzDfQmFV3i5lMKs,14233
|
14
|
+
ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
|
15
|
+
ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP3cEFuf22-R5RM,330
|
16
|
+
ai_data_science_team/templates/agent_templates.py,sha256=pphuitXUVv21ljr_H-aof6Xq78KvDY0adF3K6lXGEz4,29107
|
17
|
+
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
ai_data_science_team/tools/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
|
19
|
+
ai_data_science_team/tools/metadata.py,sha256=3lPxLEUr3I9AF6wIKx5en-GV6JVkpUHDSLQxKj1N5Gs,9313
|
20
|
+
ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
|
21
|
+
ai_data_science_team/tools/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
|
22
|
+
ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
+
ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
|
24
|
+
ai_data_science_team-0.0.0.9009.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
25
|
+
ai_data_science_team-0.0.0.9009.dist-info/METADATA,sha256=iMIyksmkPmuj9TI_oIa1lXGJYKVspxhWvflBJ1zlW0o,9875
|
26
|
+
ai_data_science_team-0.0.0.9009.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
27
|
+
ai_data_science_team-0.0.0.9009.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
28
|
+
ai_data_science_team-0.0.0.9009.dist-info/RECORD,,
|
@@ -1,183 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.2
|
2
|
-
Name: ai-data-science-team
|
3
|
-
Version: 0.0.0.9007
|
4
|
-
Summary: Build and run an AI-powered data science team.
|
5
|
-
Home-page: https://github.com/business-science/ai-data-science-team
|
6
|
-
Author: Matt Dancho
|
7
|
-
Author-email: mdancho@business-science.io
|
8
|
-
Requires-Python: >=3.9
|
9
|
-
Description-Content-Type: text/markdown
|
10
|
-
License-File: LICENSE
|
11
|
-
Requires-Dist: openpyxl
|
12
|
-
Requires-Dist: langchain
|
13
|
-
Requires-Dist: langchain_community
|
14
|
-
Requires-Dist: langchain_openai
|
15
|
-
Requires-Dist: langchain_experimental
|
16
|
-
Requires-Dist: langgraph>=0.2.57
|
17
|
-
Requires-Dist: openai
|
18
|
-
Requires-Dist: pandas
|
19
|
-
Requires-Dist: numpy
|
20
|
-
Requires-Dist: plotly
|
21
|
-
Requires-Dist: streamlit
|
22
|
-
Requires-Dist: scikit-learn
|
23
|
-
Requires-Dist: xgboost
|
24
|
-
Dynamic: author
|
25
|
-
Dynamic: author-email
|
26
|
-
Dynamic: description
|
27
|
-
Dynamic: description-content-type
|
28
|
-
Dynamic: home-page
|
29
|
-
Dynamic: requires-dist
|
30
|
-
Dynamic: requires-python
|
31
|
-
Dynamic: summary
|
32
|
-
|
33
|
-
# Your AI Data Science Team (An Army Of Copilots)
|
34
|
-
|
35
|
-
**An AI-powered data science team of copilots that uses agents to help you perform common data science tasks 10X faster**.
|
36
|
-
|
37
|
-
**Star ⭐ This GitHub (Takes 2 seconds and means a lot).**
|
38
|
-
|
39
|
-
*Beta - This Python library is under active development. There may be breaking changes that occur until release of 0.1.0.*
|
40
|
-
|
41
|
-
---
|
42
|
-
|
43
|
-
The AI Data Science Team of Copilots includes Agents that specialize data cleaning, preparation, feature engineering, modeling (machine learning), and interpretation of various business problems like:
|
44
|
-
|
45
|
-
- Churn Modeling
|
46
|
-
- Employee Attrition
|
47
|
-
- Lead Scoring
|
48
|
-
- Insurance Risk
|
49
|
-
- Credit Card Risk
|
50
|
-
- And more
|
51
|
-
|
52
|
-
## Table of Contents
|
53
|
-
|
54
|
-
- [Your AI Data Science Team (An Army Of Copilots)](#your-ai-data-science-team-an-army-of-copilots)
|
55
|
-
- [Table of Contents](#table-of-contents)
|
56
|
-
- [Companies That Want An AI Data Science Team Copilot](#companies-that-want-an-ai-data-science-team-copilot)
|
57
|
-
- [Free Generative AI For Data Scientists Workshop](#free-generative-ai-for-data-scientists-workshop)
|
58
|
-
- [Data Science Agents](#data-science-agents)
|
59
|
-
- [Coming Soon: Multi-Agents](#coming-soon-multi-agents)
|
60
|
-
- [Agents Available Now](#agents-available-now)
|
61
|
-
- [Agents Coming Soon](#agents-coming-soon)
|
62
|
-
- [Disclaimer](#disclaimer)
|
63
|
-
- [Installation](#installation)
|
64
|
-
- [Usage](#usage)
|
65
|
-
- [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
|
66
|
-
- [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
|
67
|
-
- [Contributing](#contributing)
|
68
|
-
- [License](#license)
|
69
|
-
|
70
|
-
## Companies That Want An AI Data Science Team Copilot
|
71
|
-
|
72
|
-
If you are interested in having your own custom enteprise-grade AI Data Science Team Copilot, send inquiries here: [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
|
73
|
-
|
74
|
-
## Free Generative AI For Data Scientists Workshop
|
75
|
-
|
76
|
-
If you want to learn how to build AI Agents for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, [register for my next Generative AI for Data Scientists workshop here.](https://learn.business-science.io/ai-register)
|
77
|
-
|
78
|
-
## Data Science Agents
|
79
|
-
|
80
|
-
This project is a work in progress. New data science agents will be released soon.
|
81
|
-
|
82
|
-

|
83
|
-
|
84
|
-
### Coming Soon: Multi-Agents
|
85
|
-
|
86
|
-
This is the internals of the Business Intelligence SQL Agent I'm working on:
|
87
|
-
|
88
|
-

|
89
|
-
|
90
|
-
### Agents Available Now
|
91
|
-
|
92
|
-
1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
|
93
|
-
2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
|
94
|
-
3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
|
95
|
-
4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
|
96
|
-
5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
|
97
|
-
|
98
|
-
### Agents Coming Soon
|
99
|
-
|
100
|
-
1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
|
101
|
-
2. **Machine Learning Agent:** Builds and logs the machine learning models.
|
102
|
-
3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
|
103
|
-
4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
104
|
-
|
105
|
-
## Disclaimer
|
106
|
-
|
107
|
-
**This project is for educational purposes only.**
|
108
|
-
|
109
|
-
- It is not intended to replace your company's data science team
|
110
|
-
- No warranties or guarantees provided
|
111
|
-
- Creator assumes no liability for financial loss
|
112
|
-
- Consult an experienced Generative AI Data Scientist for building your own custom AI Data Science Team
|
113
|
-
- If you want a custom enterprise-grade AI Data Science Team, [send inquiries here](https://www.business-science.io/contact.html).
|
114
|
-
|
115
|
-
By using this software, you agree to use it solely for learning purposes.
|
116
|
-
|
117
|
-
## Installation
|
118
|
-
|
119
|
-
``` bash
|
120
|
-
pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
|
121
|
-
```
|
122
|
-
|
123
|
-
## Usage
|
124
|
-
|
125
|
-
[See all examples here.](/examples)
|
126
|
-
|
127
|
-
### Example 1: Feature Engineering with the Feature Engineering Agent
|
128
|
-
|
129
|
-
[See the full example here.](/examples/feature_engineering_agent.ipynb)
|
130
|
-
|
131
|
-
``` python
|
132
|
-
feature_engineering_agent = make_feature_engineering_agent(model = llm)
|
133
|
-
|
134
|
-
response = feature_engineering_agent.invoke({
|
135
|
-
"user_instructions": "Make sure to scale and center numeric features",
|
136
|
-
"target_variable": "Churn",
|
137
|
-
"data_raw": df.to_dict(),
|
138
|
-
"max_retries":3,
|
139
|
-
"retry_count":0
|
140
|
-
})
|
141
|
-
```
|
142
|
-
|
143
|
-
``` bash
|
144
|
-
---FEATURE ENGINEERING AGENT----
|
145
|
-
* CREATE FEATURE ENGINEER CODE
|
146
|
-
* EXECUTING AGENT CODE
|
147
|
-
* EXPLAIN AGENT CODE
|
148
|
-
```
|
149
|
-
|
150
|
-
### Example 2: Cleaning Data with the Data Cleaning Agent
|
151
|
-
|
152
|
-
[See the full example here.](/examples/data_cleaning_agent.ipynb)
|
153
|
-
|
154
|
-
``` python
|
155
|
-
data_cleaning_agent = make_data_cleaning_agent(model = llm)
|
156
|
-
|
157
|
-
response = data_cleaning_agent.invoke({
|
158
|
-
"user_instructions": "Don't remove outliers when cleaning the data.",
|
159
|
-
"data_raw": df.to_dict(),
|
160
|
-
"max_retries":3,
|
161
|
-
"retry_count":0
|
162
|
-
})
|
163
|
-
```
|
164
|
-
|
165
|
-
``` bash
|
166
|
-
---DATA CLEANING AGENT----
|
167
|
-
* CREATE DATA CLEANER CODE
|
168
|
-
* EXECUTING AGENT CODE
|
169
|
-
* EXPLAIN AGENT CODE
|
170
|
-
```
|
171
|
-
|
172
|
-
## Contributing
|
173
|
-
|
174
|
-
1. Fork the repository
|
175
|
-
2. Create a feature branch
|
176
|
-
3. Commit your changes
|
177
|
-
4. Push to the branch
|
178
|
-
5. Create a Pull Request
|
179
|
-
|
180
|
-
## License
|
181
|
-
|
182
|
-
This project is licensed under the MIT License. See LICENSE file for details.
|
183
|
-
|
@@ -1,21 +0,0 @@
|
|
1
|
-
ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
ai_data_science_team/_version.py,sha256=VJYpfOaKsXjGzPOsT6kYyVW6T9bFBqxt6Ph3qF8t-A8,26
|
3
|
-
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
|
-
ai_data_science_team/agents/__init__.py,sha256=rcF18rBsOuPJqJKvoffh6lwr4Nwm24MErM2u4H4Th9s,467
|
5
|
-
ai_data_science_team/agents/data_cleaning_agent.py,sha256=gixYY4wGehKK_ROgU7CVOzijghmVQGD4hyK9uKhc8Hw,20890
|
6
|
-
ai_data_science_team/agents/data_visualization_agent.py,sha256=wePFZbdB4kBah8m_iy6f4IDyjl6L6zBWzIgigJEXdk8,12933
|
7
|
-
ai_data_science_team/agents/data_wrangling_agent.py,sha256=5w1kytoWLE4p3hj0YHVuXcgCd304eNQac-Zrrgmnr2s,16735
|
8
|
-
ai_data_science_team/agents/feature_engineering_agent.py,sha256=UaaU3VkPhjOV0NbrYXedRb6eHOcOWWiGYhB_srrYWvg,17571
|
9
|
-
ai_data_science_team/agents/sql_database_agent.py,sha256=mRbEAPHP6NlwQac2_VL9RuyIfCCtrmXTrzu5RLzOoeU,16031
|
10
|
-
ai_data_science_team/templates/__init__.py,sha256=bNrKGmWXQG7GRczln_zVfUQLzxzp7hSwlLyNtLxleu4,278
|
11
|
-
ai_data_science_team/templates/agent_templates.py,sha256=xohVgEfxPcVukPLpPfV7mZ0cpFgp-oJVLZRWCv2V-WU,19948
|
12
|
-
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
ai_data_science_team/tools/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
|
14
|
-
ai_data_science_team/tools/metadata.py,sha256=tbnca_tDp67oBA6qD29AKVooJG10VqGr4vwzj4rPUas,8348
|
15
|
-
ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
|
16
|
-
ai_data_science_team/tools/regex.py,sha256=vkfdvi9pDe582p-fh_7cB07Wb0dOR2CsiVq-wUO3mas,2491
|
17
|
-
ai_data_science_team-0.0.0.9007.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
18
|
-
ai_data_science_team-0.0.0.9007.dist-info/METADATA,sha256=KcMFR2V9_wbepdKsrlFdfc7UB7t-Hf7i75x67LPXw3Q,6783
|
19
|
-
ai_data_science_team-0.0.0.9007.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
20
|
-
ai_data_science_team-0.0.0.9007.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
21
|
-
ai_data_science_team-0.0.0.9007.dist-info/RECORD,,
|
{ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE
RENAMED
File without changes
|
{ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|