ai-data-science-team 0.0.0.9006__py3-none-any.whl → 0.0.0.9008__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/__init__.py +5 -4
- ai_data_science_team/agents/data_cleaning_agent.py +371 -45
- ai_data_science_team/agents/data_visualization_agent.py +764 -0
- ai_data_science_team/agents/data_wrangling_agent.py +507 -23
- ai_data_science_team/agents/feature_engineering_agent.py +467 -34
- ai_data_science_team/agents/sql_database_agent.py +394 -30
- ai_data_science_team/multiagents/__init__.py +1 -0
- ai_data_science_team/multiagents/sql_data_analyst.py +286 -0
- ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
- ai_data_science_team/templates/__init__.py +9 -0
- ai_data_science_team/templates/agent_templates.py +247 -42
- ai_data_science_team/tools/metadata.py +110 -47
- ai_data_science_team/tools/regex.py +33 -0
- ai_data_science_team/utils/__init__.py +0 -0
- ai_data_science_team/utils/plotly.py +24 -0
- ai_data_science_team-0.0.0.9008.dist-info/METADATA +231 -0
- ai_data_science_team-0.0.0.9008.dist-info/RECORD +26 -0
- {ai_data_science_team-0.0.0.9006.dist-info → ai_data_science_team-0.0.0.9008.dist-info}/WHEEL +1 -1
- ai_data_science_team-0.0.0.9006.dist-info/METADATA +0 -165
- ai_data_science_team-0.0.0.9006.dist-info/RECORD +0 -20
- {ai_data_science_team-0.0.0.9006.dist-info → ai_data_science_team-0.0.0.9008.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9006.dist-info → ai_data_science_team-0.0.0.9008.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,9 @@ import sqlalchemy as sql
|
|
4
4
|
from typing import Union, List, Dict
|
5
5
|
|
6
6
|
def get_dataframe_summary(
|
7
|
-
dataframes: Union[pd.DataFrame, List[pd.DataFrame], Dict[str, pd.DataFrame]]
|
7
|
+
dataframes: Union[pd.DataFrame, List[pd.DataFrame], Dict[str, pd.DataFrame]],
|
8
|
+
n_sample: int = 30,
|
9
|
+
skip_stats: bool = False,
|
8
10
|
) -> List[str]:
|
9
11
|
"""
|
10
12
|
Generate a summary for one or more DataFrames. Accepts a single DataFrame, a list of DataFrames,
|
@@ -16,6 +18,10 @@ def get_dataframe_summary(
|
|
16
18
|
- Single DataFrame: produce a single summary (returned within a one-element list).
|
17
19
|
- List of DataFrames: produce a summary for each DataFrame, using index-based names.
|
18
20
|
- Dictionary of DataFrames: produce a summary for each DataFrame, using dictionary keys as names.
|
21
|
+
n_sample : int, default 30
|
22
|
+
Number of rows to display in the "Data (first 30 rows)" section.
|
23
|
+
skip_stats : bool, default False
|
24
|
+
If True, skip the descriptive statistics and DataFrame info sections.
|
19
25
|
|
20
26
|
Example:
|
21
27
|
--------
|
@@ -49,17 +55,17 @@ def get_dataframe_summary(
|
|
49
55
|
# --- Dictionary Case ---
|
50
56
|
if isinstance(dataframes, dict):
|
51
57
|
for dataset_name, df in dataframes.items():
|
52
|
-
summaries.append(_summarize_dataframe(df, dataset_name))
|
58
|
+
summaries.append(_summarize_dataframe(df, dataset_name, n_sample, skip_stats))
|
53
59
|
|
54
60
|
# --- Single DataFrame Case ---
|
55
61
|
elif isinstance(dataframes, pd.DataFrame):
|
56
|
-
summaries.append(_summarize_dataframe(dataframes, "Single_Dataset"))
|
62
|
+
summaries.append(_summarize_dataframe(dataframes, "Single_Dataset", n_sample, skip_stats))
|
57
63
|
|
58
64
|
# --- List of DataFrames Case ---
|
59
65
|
elif isinstance(dataframes, list):
|
60
66
|
for idx, df in enumerate(dataframes):
|
61
67
|
dataset_name = f"Dataset_{idx}"
|
62
|
-
summaries.append(_summarize_dataframe(df, dataset_name))
|
68
|
+
summaries.append(_summarize_dataframe(df, dataset_name, n_sample, skip_stats))
|
63
69
|
|
64
70
|
else:
|
65
71
|
raise TypeError(
|
@@ -69,7 +75,7 @@ def get_dataframe_summary(
|
|
69
75
|
return summaries
|
70
76
|
|
71
77
|
|
72
|
-
def _summarize_dataframe(df: pd.DataFrame, dataset_name: str) -> str:
|
78
|
+
def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
|
73
79
|
"""Generate a summary string for a single DataFrame."""
|
74
80
|
# 1. Convert dictionary-type cells to strings
|
75
81
|
# This prevents unhashable dict errors during df.nunique().
|
@@ -91,77 +97,134 @@ def _summarize_dataframe(df: pd.DataFrame, dataset_name: str) -> str:
|
|
91
97
|
unique_counts = df.nunique() # Will no longer fail on unhashable dict
|
92
98
|
unique_counts_summary = "\n".join([f"{col}: {count}" for col, count in unique_counts.items()])
|
93
99
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
100
|
+
# 6. Generate the summary text
|
101
|
+
if not skip_stats:
|
102
|
+
summary_text = f"""
|
103
|
+
Dataset Name: {dataset_name}
|
104
|
+
----------------------------
|
105
|
+
Shape: {df.shape[0]} rows x {df.shape[1]} columns
|
98
106
|
|
99
|
-
|
100
|
-
|
107
|
+
Column Data Types:
|
108
|
+
{column_types}
|
101
109
|
|
102
|
-
|
103
|
-
|
110
|
+
Missing Value Percentage:
|
111
|
+
{missing_summary}
|
104
112
|
|
105
|
-
|
106
|
-
|
113
|
+
Unique Value Counts:
|
114
|
+
{unique_counts_summary}
|
107
115
|
|
108
|
-
|
109
|
-
|
116
|
+
Data (first {n_sample} rows):
|
117
|
+
{df.head(n_sample).to_string()}
|
110
118
|
|
111
|
-
|
112
|
-
|
119
|
+
Data Description:
|
120
|
+
{df.describe().to_string()}
|
113
121
|
|
114
|
-
|
115
|
-
|
116
|
-
|
122
|
+
Data Info:
|
123
|
+
{info_text}
|
124
|
+
"""
|
125
|
+
else:
|
126
|
+
summary_text = f"""
|
127
|
+
Dataset Name: {dataset_name}
|
128
|
+
----------------------------
|
129
|
+
Shape: {df.shape[0]} rows x {df.shape[1]} columns
|
130
|
+
|
131
|
+
Column Data Types:
|
132
|
+
{column_types}
|
133
|
+
|
134
|
+
Data (first {n_sample} rows):
|
135
|
+
{df.head(n_sample).to_string()}
|
136
|
+
"""
|
137
|
+
|
117
138
|
return summary_text.strip()
|
118
139
|
|
119
140
|
|
120
|
-
def get_database_metadata(connection: Union[sql.engine.base.Connection, sql.engine.base.Engine], n_values: int=10):
|
121
|
-
"""
|
122
|
-
Collects metadata and sample data from a database.
|
123
141
|
|
124
|
-
|
125
|
-
|
126
|
-
|
142
|
+
def get_database_metadata(connection: Union[sql.engine.base.Connection, sql.engine.base.Engine],
|
143
|
+
n_samples: int = 10) -> str:
|
144
|
+
"""
|
145
|
+
Collects metadata and sample data from a database, with safe identifier quoting and
|
146
|
+
basic dialect-aware row limiting. Prevents issues with spaces/reserved words in identifiers.
|
147
|
+
|
148
|
+
Parameters
|
149
|
+
----------
|
150
|
+
connection : Union[sql.engine.base.Connection, sql.engine.base.Engine]
|
127
151
|
An active SQLAlchemy connection or engine.
|
128
|
-
|
152
|
+
n_samples : int
|
129
153
|
Number of sample values to retrieve for each column.
|
130
154
|
|
131
|
-
Returns
|
132
|
-
|
133
|
-
str
|
155
|
+
Returns
|
156
|
+
-------
|
157
|
+
str
|
158
|
+
A formatted string with database metadata, including some sample data from each column.
|
134
159
|
"""
|
160
|
+
|
135
161
|
# If a connection is passed, use it; if an engine is passed, connect to it
|
136
162
|
is_engine = isinstance(connection, sql.engine.base.Engine)
|
137
163
|
conn = connection.connect() if is_engine else connection
|
138
|
-
output = []
|
139
164
|
|
165
|
+
output = []
|
140
166
|
try:
|
141
|
-
#
|
167
|
+
# Grab the engine off the connection
|
142
168
|
sql_engine = conn.engine
|
169
|
+
dialect_name = sql_engine.dialect.name.lower()
|
170
|
+
|
143
171
|
output.append(f"Database Dialect: {sql_engine.dialect.name}")
|
144
172
|
output.append(f"Driver: {sql_engine.driver}")
|
145
173
|
output.append(f"Connection URL: {sql_engine.url}")
|
146
|
-
|
174
|
+
|
147
175
|
# Inspect the database
|
148
176
|
inspector = sql.inspect(sql_engine)
|
149
|
-
|
177
|
+
tables = inspector.get_table_names()
|
178
|
+
output.append(f"Tables: {tables}")
|
150
179
|
output.append(f"Schemas: {inspector.get_schema_names()}")
|
151
|
-
|
152
|
-
#
|
153
|
-
|
180
|
+
|
181
|
+
# Helper to build a dialect-specific limit clause
|
182
|
+
def build_query(col_name_quoted: str, table_name_quoted: str, n: int) -> str:
|
183
|
+
"""
|
184
|
+
Returns a SQL query string to select N rows from the given column/table
|
185
|
+
across different dialects (SQLite, MySQL, Postgres, MSSQL, Oracle, etc.)
|
186
|
+
"""
|
187
|
+
if "sqlite" in dialect_name or "mysql" in dialect_name or "postgres" in dialect_name:
|
188
|
+
# Common dialects supporting LIMIT
|
189
|
+
return f"SELECT {col_name_quoted} FROM {table_name_quoted} LIMIT {n}"
|
190
|
+
elif "mssql" in dialect_name:
|
191
|
+
# Microsoft SQL Server syntax
|
192
|
+
return f"SELECT TOP {n} {col_name_quoted} FROM {table_name_quoted}"
|
193
|
+
elif "oracle" in dialect_name:
|
194
|
+
# Oracle syntax
|
195
|
+
return f"SELECT {col_name_quoted} FROM {table_name_quoted} WHERE ROWNUM <= {n}"
|
196
|
+
else:
|
197
|
+
# Fallback
|
198
|
+
return f"SELECT {col_name_quoted} FROM {table_name_quoted} LIMIT {n}"
|
199
|
+
|
200
|
+
# Prepare for quoting
|
201
|
+
preparer = inspector.bind.dialect.identifier_preparer
|
202
|
+
|
203
|
+
# For each table, get columns and sample data
|
204
|
+
for table_name in tables:
|
154
205
|
output.append(f"\nTable: {table_name}")
|
206
|
+
# Properly quote the table name
|
207
|
+
table_name_quoted = preparer.quote_identifier(table_name)
|
208
|
+
|
155
209
|
for column in inspector.get_columns(table_name):
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
210
|
+
col_name = column["name"]
|
211
|
+
col_type = column["type"]
|
212
|
+
output.append(f" Column: {col_name} Type: {col_type}")
|
213
|
+
|
214
|
+
# Properly quote the column name
|
215
|
+
col_name_quoted = preparer.quote_identifier(col_name)
|
216
|
+
|
217
|
+
# Build a dialect-aware query with safe quoting
|
218
|
+
query = build_query(col_name_quoted, table_name_quoted, n_samples)
|
219
|
+
|
220
|
+
# Read a few sample values
|
221
|
+
df = pd.read_sql(sql.text(query), conn)
|
222
|
+
first_values = df[col_name].tolist()
|
223
|
+
output.append(f" First {n_samples} Values: {first_values}")
|
224
|
+
|
161
225
|
finally:
|
162
|
-
# Close connection if
|
226
|
+
# Close connection if created inside the function
|
163
227
|
if is_engine:
|
164
228
|
conn.close()
|
165
|
-
|
166
|
-
# Join all collected information into a single string
|
229
|
+
|
167
230
|
return "\n".join(output)
|
@@ -71,3 +71,36 @@ def add_comments_to_top(code_text, agent_name="data_wrangler"):
|
|
71
71
|
# Join the header with newlines, then prepend to the existing code_text
|
72
72
|
header_block = "\n".join(header_comments)
|
73
73
|
return header_block + code_text
|
74
|
+
|
75
|
+
def format_agent_name(agent_name: str) -> str:
|
76
|
+
|
77
|
+
formatted_name = agent_name.strip().replace("_", " ").upper()
|
78
|
+
|
79
|
+
return f"---{formatted_name}----"
|
80
|
+
|
81
|
+
def format_recommended_steps(raw_text: str, heading: str = "# Recommended Steps:") -> str:
|
82
|
+
# Split text by newline and strip leading/trailing whitespace
|
83
|
+
lines = raw_text.strip().split('\n')
|
84
|
+
|
85
|
+
# Remove empty lines from the start
|
86
|
+
while lines and not lines[0].strip():
|
87
|
+
lines.pop(0)
|
88
|
+
|
89
|
+
seen_heading = False
|
90
|
+
new_lines = []
|
91
|
+
|
92
|
+
for line in lines:
|
93
|
+
# If this line *is exactly* the heading, check if we've seen it already
|
94
|
+
if line.strip() == heading:
|
95
|
+
if seen_heading:
|
96
|
+
# Skip duplicates
|
97
|
+
continue
|
98
|
+
else:
|
99
|
+
seen_heading = True
|
100
|
+
new_lines.append(line)
|
101
|
+
|
102
|
+
# If heading was never seen, prepend it
|
103
|
+
if not seen_heading:
|
104
|
+
new_lines.insert(0, heading)
|
105
|
+
|
106
|
+
return "\n".join(new_lines)
|
File without changes
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
import json
|
4
|
+
import plotly.io as pio
|
5
|
+
|
6
|
+
def plotly_from_dict(plotly_graph_dict: dict):
|
7
|
+
"""
|
8
|
+
Convert a Plotly graph dictionary to a Plotly graph object.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
plotly_graph_dict: dict
|
13
|
+
A Plotly graph dictionary.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
plotly_graph: plotly.graph_objs.graph_objs.Figure
|
18
|
+
A Plotly graph object.
|
19
|
+
"""
|
20
|
+
|
21
|
+
if plotly_from_dict is None:
|
22
|
+
return None
|
23
|
+
|
24
|
+
return pio.from_json(json.dumps(plotly_graph_dict))
|
@@ -0,0 +1,231 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: ai-data-science-team
|
3
|
+
Version: 0.0.0.9008
|
4
|
+
Summary: Build and run an AI-powered data science team.
|
5
|
+
Home-page: https://github.com/business-science/ai-data-science-team
|
6
|
+
Author: Matt Dancho
|
7
|
+
Author-email: mdancho@business-science.io
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Requires-Python: >=3.9
|
14
|
+
Description-Content-Type: text/markdown
|
15
|
+
License-File: LICENSE
|
16
|
+
Requires-Dist: openpyxl
|
17
|
+
Requires-Dist: langchain
|
18
|
+
Requires-Dist: langchain_community
|
19
|
+
Requires-Dist: langchain_openai
|
20
|
+
Requires-Dist: langchain_experimental
|
21
|
+
Requires-Dist: langgraph>=0.2.57
|
22
|
+
Requires-Dist: openai
|
23
|
+
Requires-Dist: pandas
|
24
|
+
Requires-Dist: numpy
|
25
|
+
Requires-Dist: plotly
|
26
|
+
Requires-Dist: streamlit
|
27
|
+
Requires-Dist: scikit-learn
|
28
|
+
Requires-Dist: xgboost
|
29
|
+
Dynamic: author
|
30
|
+
Dynamic: author-email
|
31
|
+
Dynamic: classifier
|
32
|
+
Dynamic: description
|
33
|
+
Dynamic: description-content-type
|
34
|
+
Dynamic: home-page
|
35
|
+
Dynamic: requires-dist
|
36
|
+
Dynamic: requires-python
|
37
|
+
Dynamic: summary
|
38
|
+
|
39
|
+
<div align="center">
|
40
|
+
<a href="https://github.com/business-science/ai-data-science-team">
|
41
|
+
<picture>
|
42
|
+
<img src="/img/ai_data_science_team_logo.jpg" alt="AI Data Science Team" width="400">
|
43
|
+
</picture>
|
44
|
+
</a>
|
45
|
+
</div>
|
46
|
+
<div align="center">
|
47
|
+
<em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
|
48
|
+
</div>
|
49
|
+
<div align="center">
|
50
|
+
<a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg" alt="PyPI"></a>
|
51
|
+
<a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg" alt="versions"></a>
|
52
|
+
<a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?v" alt="license"></a>
|
53
|
+
</div>
|
54
|
+
|
55
|
+
|
56
|
+
# Your AI Data Science Team (An Army Of Agents)
|
57
|
+
|
58
|
+
**An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
|
59
|
+
|
60
|
+
[**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
|
61
|
+
|
62
|
+
*Beta - This Python library is under active development. There may be breaking changes that occur until release of 0.1.0.*
|
63
|
+
|
64
|
+
---
|
65
|
+
|
66
|
+
The AI Data Science Team of Copilots includes Agents that specialize data cleaning, preparation, feature engineering, modeling (machine learning), and interpretation of various business problems like:
|
67
|
+
|
68
|
+
- Churn Modeling
|
69
|
+
- Employee Attrition
|
70
|
+
- Lead Scoring
|
71
|
+
- Insurance Risk
|
72
|
+
- Credit Card Risk
|
73
|
+
- And more
|
74
|
+
|
75
|
+
## Table of Contents
|
76
|
+
|
77
|
+
- [Your AI Data Science Team (An Army Of Agents)](#your-ai-data-science-team-an-army-of-agents)
|
78
|
+
- [Table of Contents](#table-of-contents)
|
79
|
+
- [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
|
80
|
+
- [Free How To Build AI Agents for Data Scientists Workshop](#free-how-to-build-ai-agents-for-data-scientists-workshop)
|
81
|
+
- [Data Science Agents](#data-science-agents)
|
82
|
+
- [Coming Soon: Multi-Agents](#coming-soon-multi-agents)
|
83
|
+
- [...And after that, the Multi-Agent Data Science Apps](#and-after-that-the-multi-agent-data-science-apps)
|
84
|
+
- [Agents Available Now](#agents-available-now)
|
85
|
+
- [Agents Coming Soon](#agents-coming-soon)
|
86
|
+
- [Disclaimer](#disclaimer)
|
87
|
+
- [Installation](#installation)
|
88
|
+
- [Usage](#usage)
|
89
|
+
- [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
|
90
|
+
- [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
|
91
|
+
- [Contributing](#contributing)
|
92
|
+
- [License](#license)
|
93
|
+
- [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
|
94
|
+
|
95
|
+
## Companies That Want A Custom AI Data Science Team (And AI Apps)
|
96
|
+
|
97
|
+
Want to have your own _customized_ enterprise-grade AI Data Science Team and domain-specifici AI-powered Apps?
|
98
|
+
|
99
|
+
**Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
|
100
|
+
|
101
|
+
## Free How To Build AI Agents for Data Scientists Workshop
|
102
|
+
|
103
|
+
If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
|
104
|
+
|
105
|
+
[**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
|
106
|
+
|
107
|
+
## Data Science Agents
|
108
|
+
|
109
|
+
This project is a work in progress. New data science agents will be released soon.
|
110
|
+
|
111
|
+

|
112
|
+
|
113
|
+
### Coming Soon: Multi-Agents
|
114
|
+
|
115
|
+
This is the internals of the Business Intelligence SQL Agent I'm working on:
|
116
|
+
|
117
|
+

|
118
|
+
|
119
|
+
### ...And after that, the Multi-Agent Data Science Apps
|
120
|
+
|
121
|
+
This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
|
122
|
+
|
123
|
+

|
124
|
+
|
125
|
+
### Agents Available Now
|
126
|
+
|
127
|
+
1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
|
128
|
+
2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
|
129
|
+
3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
|
130
|
+
4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
|
131
|
+
5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
|
132
|
+
|
133
|
+
### Agents Coming Soon
|
134
|
+
|
135
|
+
1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
|
136
|
+
2. **Machine Learning Agent:** Builds and logs the machine learning models.
|
137
|
+
3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
|
138
|
+
4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
|
139
|
+
|
140
|
+
## Disclaimer
|
141
|
+
|
142
|
+
**This project is for educational purposes only.**
|
143
|
+
|
144
|
+
- It is not intended to replace your company's data science team
|
145
|
+
- No warranties or guarantees provided
|
146
|
+
- Creator assumes no liability for financial loss
|
147
|
+
- Consult an experienced Generative AI Data Scientist for building your own custom AI Data Science Team
|
148
|
+
- If you want a custom enterprise-grade AI Data Science Team, [send inquiries here](https://www.business-science.io/contact.html).
|
149
|
+
|
150
|
+
By using this software, you agree to use it solely for learning purposes.
|
151
|
+
|
152
|
+
## Installation
|
153
|
+
|
154
|
+
``` bash
|
155
|
+
pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
|
156
|
+
```
|
157
|
+
|
158
|
+
## Usage
|
159
|
+
|
160
|
+
[See all examples here.](/examples)
|
161
|
+
|
162
|
+
### Example 1: Feature Engineering with the Feature Engineering Agent
|
163
|
+
|
164
|
+
[See the full example here.](/examples/feature_engineering_agent.ipynb)
|
165
|
+
|
166
|
+
``` python
|
167
|
+
feature_engineering_agent = FeatureEngineeringAgent(model = llm)
|
168
|
+
|
169
|
+
feature_engineering_agent.invoke_agent(
|
170
|
+
data_raw = df,
|
171
|
+
user_instructions = "Make sure to scale and center numeric features",
|
172
|
+
target_variable = "Churn",
|
173
|
+
max_retries = 3,
|
174
|
+
)
|
175
|
+
```
|
176
|
+
|
177
|
+
``` bash
|
178
|
+
---FEATURE ENGINEERING AGENT----
|
179
|
+
* CREATE FEATURE ENGINEER CODE
|
180
|
+
* EXECUTING AGENT CODE
|
181
|
+
* EXPLAIN AGENT CODE
|
182
|
+
```
|
183
|
+
|
184
|
+
``` python
|
185
|
+
feature_engineering_agent.get_data_engineered()
|
186
|
+
```
|
187
|
+
|
188
|
+
### Example 2: Cleaning Data with the Data Cleaning Agent
|
189
|
+
|
190
|
+
[See the full example here.](/examples/data_cleaning_agent.ipynb)
|
191
|
+
|
192
|
+
``` python
|
193
|
+
data_cleaning_agent = DataCleaningAgent(model = llm)
|
194
|
+
|
195
|
+
response = data_cleaning_agent.invoke_agent(
|
196
|
+
data_raw = df,
|
197
|
+
user_instructions = "Don't remove outliers when cleaning the data.",
|
198
|
+
max_retries = 3,
|
199
|
+
)
|
200
|
+
```
|
201
|
+
|
202
|
+
``` bash
|
203
|
+
---DATA CLEANING AGENT----
|
204
|
+
* CREATE DATA CLEANER CODE
|
205
|
+
* EXECUTING AGENT CODE
|
206
|
+
* EXPLAIN AGENT CODE
|
207
|
+
```
|
208
|
+
|
209
|
+
``` python
|
210
|
+
data_cleaning_agent.get_data_cleaned()
|
211
|
+
```
|
212
|
+
|
213
|
+
## Contributing
|
214
|
+
|
215
|
+
1. Fork the repository
|
216
|
+
2. Create a feature branch
|
217
|
+
3. Commit your changes
|
218
|
+
4. Push to the branch
|
219
|
+
5. Create a Pull Request
|
220
|
+
|
221
|
+
## License
|
222
|
+
|
223
|
+
This project is licensed under the MIT License. See LICENSE file for details.
|
224
|
+
|
225
|
+
# Want To Become A Full-Stack Generative AI Data Scientist?
|
226
|
+
|
227
|
+

|
228
|
+
|
229
|
+
I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
|
230
|
+
|
231
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
ai_data_science_team/_version.py,sha256=P58HXrtvcvSlic1oJw_w9WwHrQ3kBtvlqYwnMEbOL6g,26
|
3
|
+
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
|
+
ai_data_science_team/agents/__init__.py,sha256=6qGE7p8X291aiw5CFwTNot00_LF3_1fboLbjVf_TlHo,554
|
5
|
+
ai_data_science_team/agents/data_cleaning_agent.py,sha256=sMyyWvJ3NK6bEqdkttqRZU03pN6Q2gcR7d39eA0wj-w,27225
|
6
|
+
ai_data_science_team/agents/data_visualization_agent.py,sha256=S0gvUepJBVveMyTFaU0xcNCuOgLLkuDCZbwTGpyjNNQ,29186
|
7
|
+
ai_data_science_team/agents/data_wrangling_agent.py,sha256=s2w9ub92mHFl9oj3jUxlIfEq4Yg8uwGOcwyX3rIgAxk,32477
|
8
|
+
ai_data_science_team/agents/feature_engineering_agent.py,sha256=nB5KBcPzrxtN82sWAXFVZgkezEBG2uscSxb12njLux0,31596
|
9
|
+
ai_data_science_team/agents/sql_database_agent.py,sha256=GbqMh-ImoKaoDMtvv3IZOQT82WGewCubZKyDU4iYIG4,28796
|
10
|
+
ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
|
11
|
+
ai_data_science_team/multiagents/sql_data_analyst.py,sha256=cFAqCKnLKKJ0zKxmRWSZupbRrVZLI-ugxLAgasWhjVc,9974
|
12
|
+
ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
|
13
|
+
ai_data_science_team/templates/__init__.py,sha256=Dt3K5sdhEEQSc1hLasjXPkhmPn-JpPndSFc85ANIAyo,294
|
14
|
+
ai_data_science_team/templates/agent_templates.py,sha256=mlsWxfmLRu9ocgR0l5UQxwki0rnoCoksRyx87WGvbeI,26804
|
15
|
+
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
ai_data_science_team/tools/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
|
17
|
+
ai_data_science_team/tools/metadata.py,sha256=tbnca_tDp67oBA6qD29AKVooJG10VqGr4vwzj4rPUas,8348
|
18
|
+
ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
|
19
|
+
ai_data_science_team/tools/regex.py,sha256=dDHzeGkHU0fGQ5qbfuOR9SXdypjeekvSUn1nQztXuvo,3296
|
20
|
+
ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
|
22
|
+
ai_data_science_team-0.0.0.9008.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
23
|
+
ai_data_science_team-0.0.0.9008.dist-info/METADATA,sha256=MLWo_wXkAnJP0YcddIDpE3NDhSQViALw_Dai9l3WSS0,9014
|
24
|
+
ai_data_science_team-0.0.0.9008.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
25
|
+
ai_data_science_team-0.0.0.9008.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
26
|
+
ai_data_science_team-0.0.0.9008.dist-info/RECORD,,
|