ai-data-science-team 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. ai_data_science_team/_version.py +1 -1
  2. ai_data_science_team/agents/__init__.py +4 -5
  3. ai_data_science_team/agents/data_cleaning_agent.py +268 -116
  4. ai_data_science_team/agents/data_visualization_agent.py +470 -41
  5. ai_data_science_team/agents/data_wrangling_agent.py +471 -31
  6. ai_data_science_team/agents/feature_engineering_agent.py +426 -41
  7. ai_data_science_team/agents/sql_database_agent.py +458 -58
  8. ai_data_science_team/ml_agents/__init__.py +1 -0
  9. ai_data_science_team/ml_agents/h2o_ml_agent.py +1032 -0
  10. ai_data_science_team/multiagents/__init__.py +1 -0
  11. ai_data_science_team/multiagents/sql_data_analyst.py +398 -0
  12. ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
  13. ai_data_science_team/templates/__init__.py +3 -1
  14. ai_data_science_team/templates/agent_templates.py +319 -43
  15. ai_data_science_team/tools/metadata.py +94 -62
  16. ai_data_science_team/tools/regex.py +86 -1
  17. ai_data_science_team/utils/__init__.py +0 -0
  18. ai_data_science_team/utils/plotly.py +24 -0
  19. ai_data_science_team-0.0.0.9009.dist-info/METADATA +245 -0
  20. ai_data_science_team-0.0.0.9009.dist-info/RECORD +28 -0
  21. ai_data_science_team-0.0.0.9007.dist-info/METADATA +0 -183
  22. ai_data_science_team-0.0.0.9007.dist-info/RECORD +0 -21
  23. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE +0 -0
  24. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL +0 -0
  25. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/top_level.txt +0 -0
@@ -76,4 +76,89 @@ def format_agent_name(agent_name: str) -> str:
76
76
 
77
77
  formatted_name = agent_name.strip().replace("_", " ").upper()
78
78
 
79
- return f"---{formatted_name}----"
79
+ return f"---{formatted_name}----"
80
+
81
+ def format_recommended_steps(raw_text: str, heading: str = "# Recommended Steps:") -> str:
82
+ # Split text by newline and strip leading/trailing whitespace
83
+ lines = raw_text.strip().split('\n')
84
+
85
+ # Remove empty lines from the start
86
+ while lines and not lines[0].strip():
87
+ lines.pop(0)
88
+
89
+ seen_heading = False
90
+ new_lines = []
91
+
92
+ for line in lines:
93
+ # If this line *is exactly* the heading, check if we've seen it already
94
+ if line.strip() == heading:
95
+ if seen_heading:
96
+ # Skip duplicates
97
+ continue
98
+ else:
99
+ seen_heading = True
100
+ new_lines.append(line)
101
+
102
+ # If heading was never seen, prepend it
103
+ if not seen_heading:
104
+ new_lines.insert(0, heading)
105
+
106
+ return "\n".join(new_lines)
107
+
108
+ def get_generic_summary(report_dict: dict, code_lang = "python") -> str:
109
+ """
110
+ Takes a dictionary of unknown structure (e.g., from json.loads(...))
111
+ and returns a textual summary. It assumes:
112
+ 1) 'report_title' (if present) should be displayed first.
113
+ 2) If a key includes 'code' or 'function',
114
+ the value is treated as a code block.
115
+ 3) Otherwise, key-value pairs are displayed as text.
116
+
117
+ Parameters
118
+ ----------
119
+ report_dict : dict
120
+ The dictionary holding the agent output or user report.
121
+
122
+ Returns
123
+ -------
124
+ str
125
+ A formatted summary string.
126
+ """
127
+ # 1) Grab the report title (or default)
128
+ title = report_dict.get("report_title", "Untitled Report")
129
+
130
+ lines = []
131
+ lines.append(f"# {title}")
132
+
133
+ # 2) Iterate over all other keys
134
+ for key, value in report_dict.items():
135
+ # Skip the title key, since we already displayed it
136
+ if key == "report_title":
137
+ continue
138
+
139
+ # 3) Check if it's code or function
140
+ # (You can tweak this logic if you have different rules)
141
+ key_lower = key.lower()
142
+ if "code" in key_lower or "function" in key_lower:
143
+ # Treat as code
144
+ lines.append(f"\n## {format_agent_name(key).upper()}")
145
+ lines.append(f"```{code_lang}\n" + str(value) + "\n```")
146
+ else:
147
+ # 4) Otherwise, just display the key-value as text
148
+ lines.append(f"\n## {format_agent_name(key).upper()}")
149
+ lines.append(str(value))
150
+
151
+ return "\n".join(lines)
152
+
153
+ def remove_consecutive_duplicates(messages):
154
+ unique_messages = []
155
+ prev_message = None
156
+
157
+ for msg in messages:
158
+ if msg.content != prev_message:
159
+ unique_messages.append(msg)
160
+ prev_message = msg.content # Update previous message to current
161
+
162
+ return unique_messages
163
+
164
+
File without changes
@@ -0,0 +1,24 @@
1
+
2
+
3
+ import json
4
+ import plotly.io as pio
5
+
6
+ def plotly_from_dict(plotly_graph_dict: dict):
7
+ """
8
+ Convert a Plotly graph dictionary to a Plotly graph object.
9
+
10
+ Parameters:
11
+ -----------
12
+ plotly_graph_dict: dict
13
+ A Plotly graph dictionary.
14
+
15
+ Returns:
16
+ --------
17
+ plotly_graph: plotly.graph_objs.graph_objs.Figure
18
+ A Plotly graph object.
19
+ """
20
+
21
+ if plotly_from_dict is None:
22
+ return None
23
+
24
+ return pio.from_json(json.dumps(plotly_graph_dict))
@@ -0,0 +1,245 @@
1
+ Metadata-Version: 2.2
2
+ Name: ai-data-science-team
3
+ Version: 0.0.0.9009
4
+ Summary: Build and run an AI-powered data science team.
5
+ Home-page: https://github.com/business-science/ai-data-science-team
6
+ Author: Matt Dancho
7
+ Author-email: mdancho@business-science.io
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Python: >=3.9
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: openpyxl
17
+ Requires-Dist: langchain
18
+ Requires-Dist: langchain_community
19
+ Requires-Dist: langchain_openai
20
+ Requires-Dist: langchain_experimental
21
+ Requires-Dist: langgraph>=0.2.57
22
+ Requires-Dist: openai
23
+ Requires-Dist: pandas
24
+ Requires-Dist: sqlalchemy
25
+ Requires-Dist: numpy
26
+ Requires-Dist: plotly
27
+ Requires-Dist: streamlit
28
+ Requires-Dist: scikit-learn
29
+ Requires-Dist: xgboost
30
+ Provides-Extra: machine-learning-agent
31
+ Requires-Dist: h2o; extra == "machine-learning-agent"
32
+ Provides-Extra: all
33
+ Requires-Dist: h2o; extra == "all"
34
+ Dynamic: author
35
+ Dynamic: author-email
36
+ Dynamic: classifier
37
+ Dynamic: description
38
+ Dynamic: description-content-type
39
+ Dynamic: home-page
40
+ Dynamic: provides-extra
41
+ Dynamic: requires-dist
42
+ Dynamic: requires-python
43
+ Dynamic: summary
44
+
45
+ <div align="center">
46
+ <a href="https://github.com/business-science/ai-data-science-team">
47
+ <picture>
48
+ <img src="/img/ai_data_science_team_logo.jpg" alt="AI Data Science Team" width="400">
49
+ </picture>
50
+ </a>
51
+ </div>
52
+ <div align="center">
53
+ <em>An AI-powered data science team of agents to help you perform common data science tasks 10X faster</em>
54
+ </div>
55
+ <div align="center">
56
+ <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
57
+ <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
58
+ <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
59
+ </div>
60
+
61
+
62
+ # Your AI Data Science Team (🪖 An Army Of Agents)
63
+
64
+ **An AI-powered data science team of agents to help you perform common data science tasks 10X faster**.
65
+
66
+ [**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
67
+
68
+ *Beta - This Python library is under active development. There may be breaking changes that occur until release of 0.1.0.*
69
+
70
+ ---
71
+
72
+ The AI Data Science Team of Copilots includes Agents that specialize data cleaning, preparation, feature engineering, modeling (machine learning), and interpretation of various business problems like:
73
+
74
+ - Churn Modeling
75
+ - Employee Attrition
76
+ - Lead Scoring
77
+ - Insurance Risk
78
+ - Credit Card Risk
79
+ - And more
80
+
81
+ ## Table of Contents
82
+
83
+ - [Your AI Data Science Team (🪖 An Army Of Agents)](#your-ai-data-science-team--an-army-of-agents)
84
+ - [Table of Contents](#table-of-contents)
85
+ - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
86
+ - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
87
+ - [Data Science Agents](#data-science-agents)
88
+ - [NEW: Multi-Agents](#new-multi-agents)
89
+ - [Coming Soon: Data Science Apps](#coming-soon-data-science-apps)
90
+ - [Agents Available Now](#agents-available-now)
91
+ - [Data Science Agents](#data-science-agents-1)
92
+ - [Multi-Agents](#multi-agents)
93
+ - [Agents Coming Soon](#agents-coming-soon)
94
+ - [Disclaimer](#disclaimer)
95
+ - [Installation](#installation)
96
+ - [Usage](#usage)
97
+ - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
98
+ - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
99
+ - [Contributing](#contributing)
100
+ - [License](#license)
101
+ - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
102
+
103
+ ## Companies That Want A Custom AI Data Science Team (And AI Apps)
104
+
105
+ Want to have your own _customized_ enterprise-grade AI Data Science Team and *domain-specific* AI-powered Apps?
106
+
107
+ **Send inquiries here:** [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
108
+
109
+ ## Generative AI for Data Scientists Workshop
110
+
111
+ If you're an aspiring data scientist who wants to learn how to build AI Agents and AI Apps for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, then I'd love to help you.
112
+
113
+ [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
114
+
115
+ ## Data Science Agents
116
+
117
+ This project is a work in progress. New data science agents will be released soon.
118
+
119
+ ![Data Science Team](/img/ai_data_science_team.jpg)
120
+
121
+ ### NEW: Multi-Agents
122
+
123
+ This is the internals of the SQL Data Analyst Agent that connects to SQL databases to pull data into the data science environment. It creates pipelines to automate data extraction, performs Joins, Aggregations, and other SQL Query operations. And it includes a Data Visualization Agent that creates visualizations to help you understand your data.:
124
+
125
+ ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
126
+
127
+ ### Coming Soon: Data Science Apps
128
+
129
+ This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
130
+
131
+ ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
132
+
133
+ ### Agents Available Now
134
+
135
+ #### Data Science Agents
136
+
137
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
138
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
139
+ 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
140
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
141
+ 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
142
+
143
+ #### Multi-Agents
144
+
145
+ 1. **SQL Data Analyst Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. Includes a Data Visualization Agent that creates visualizations to help you understand your data.
146
+
147
+ ### Agents Coming Soon
148
+
149
+ 1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
150
+ 2. **Machine Learning Agent:** Builds and logs the machine learning models.
151
+ 3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
152
+ 4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
153
+
154
+ ## Disclaimer
155
+
156
+ **This project is for educational purposes only.**
157
+
158
+ - It is not intended to replace your company's data science team
159
+ - No warranties or guarantees provided
160
+ - Creator assumes no liability for financial loss
161
+ - Consult an experienced Generative AI Data Scientist for building your own custom AI Data Science Team
162
+ - If you want a custom enterprise-grade AI Data Science Team, [send inquiries here](https://www.business-science.io/contact.html).
163
+
164
+ By using this software, you agree to use it solely for learning purposes.
165
+
166
+ ## Installation
167
+
168
+ ``` bash
169
+ pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
170
+ ```
171
+
172
+ ## Usage
173
+
174
+ [See all examples here.](/examples)
175
+
176
+ ### Example 1: Feature Engineering with the Feature Engineering Agent
177
+
178
+ [See the full example here.](/examples/feature_engineering_agent.ipynb)
179
+
180
+ ``` python
181
+ feature_engineering_agent = FeatureEngineeringAgent(model = llm)
182
+
183
+ feature_engineering_agent.invoke_agent(
184
+ data_raw = df,
185
+ user_instructions = "Make sure to scale and center numeric features",
186
+ target_variable = "Churn",
187
+ max_retries = 3,
188
+ )
189
+ ```
190
+
191
+ ``` bash
192
+ ---FEATURE ENGINEERING AGENT----
193
+ * CREATE FEATURE ENGINEER CODE
194
+ * EXECUTING AGENT CODE
195
+ * EXPLAIN AGENT CODE
196
+ ```
197
+
198
+ ``` python
199
+ feature_engineering_agent.get_data_engineered()
200
+ ```
201
+
202
+ ### Example 2: Cleaning Data with the Data Cleaning Agent
203
+
204
+ [See the full example here.](/examples/data_cleaning_agent.ipynb)
205
+
206
+ ``` python
207
+ data_cleaning_agent = DataCleaningAgent(model = llm)
208
+
209
+ response = data_cleaning_agent.invoke_agent(
210
+ data_raw = df,
211
+ user_instructions = "Don't remove outliers when cleaning the data.",
212
+ max_retries = 3,
213
+ )
214
+ ```
215
+
216
+ ``` bash
217
+ ---DATA CLEANING AGENT----
218
+ * CREATE DATA CLEANER CODE
219
+ * EXECUTING AGENT CODE
220
+ * EXPLAIN AGENT CODE
221
+ ```
222
+
223
+ ``` python
224
+ data_cleaning_agent.get_data_cleaned()
225
+ ```
226
+
227
+ ## Contributing
228
+
229
+ 1. Fork the repository
230
+ 2. Create a feature branch
231
+ 3. Commit your changes
232
+ 4. Push to the branch
233
+ 5. Create a Pull Request
234
+
235
+ ## License
236
+
237
+ This project is licensed under the MIT License. See LICENSE file for details.
238
+
239
+ # Want To Become A Full-Stack Generative AI Data Scientist?
240
+
241
+ ![Generative AI Data Scientist](/img/become_a_generative_ai_data_scientist.jpg)
242
+
243
+ I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
244
+
245
+
@@ -0,0 +1,28 @@
1
+ ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ai_data_science_team/_version.py,sha256=IEp7uHd_8RlLYvLFCpp_wJCutdvJI7cJ73IN0GzK3ts,26
3
+ ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
+ ai_data_science_team/agents/__init__.py,sha256=KSwxfciazWyaDG-xM93SadiIyT6X4d3uJLTdvHvVKq0,553
5
+ ai_data_science_team/agents/data_cleaning_agent.py,sha256=OWJ3tEA5cy2fo92bTmKS8CDA48ZRRqmWg2kH7cacjDM,27337
6
+ ai_data_science_team/agents/data_visualization_agent.py,sha256=pm7yln3GI91mOAjwDveenWwYXtJqh990oFvsoFhX3aA,28864
7
+ ai_data_science_team/agents/data_wrangling_agent.py,sha256=UUoejYBmVFdM4At_CKQjYUyFHkaloowdd6yAElfeV9Q,32332
8
+ ai_data_science_team/agents/feature_engineering_agent.py,sha256=bngc0COOYa8AolJwQrNuO1aDRgwBCp6LCN9_otIscWk,31011
9
+ ai_data_science_team/agents/sql_database_agent.py,sha256=M_7IBOu7ISZZEtDAC9KGQIE7FPaXSyQ5IdD8vu91_DM,31164
10
+ ai_data_science_team/ml_agents/__init__.py,sha256=fA5uX6dSVMAf2ApmBJXEArbnKNmsmuE0nbBsCeNAksk,86
11
+ ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=h6kz8ZPw7ApCdfrKBqggHfBnwBJ3kDSzLMwxMxz_2tM,55181
12
+ ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
13
+ ai_data_science_team/multiagents/sql_data_analyst.py,sha256=2gETU9O5t9R5Ut1kEW1T3H-6Sh8xDzDfQmFV3i5lMKs,14233
14
+ ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
15
+ ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP3cEFuf22-R5RM,330
16
+ ai_data_science_team/templates/agent_templates.py,sha256=pphuitXUVv21ljr_H-aof6Xq78KvDY0adF3K6lXGEz4,29107
17
+ ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ ai_data_science_team/tools/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
19
+ ai_data_science_team/tools/metadata.py,sha256=3lPxLEUr3I9AF6wIKx5en-GV6JVkpUHDSLQxKj1N5Gs,9313
20
+ ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
21
+ ai_data_science_team/tools/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
22
+ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
24
+ ai_data_science_team-0.0.0.9009.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
25
+ ai_data_science_team-0.0.0.9009.dist-info/METADATA,sha256=iMIyksmkPmuj9TI_oIa1lXGJYKVspxhWvflBJ1zlW0o,9875
26
+ ai_data_science_team-0.0.0.9009.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
27
+ ai_data_science_team-0.0.0.9009.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
28
+ ai_data_science_team-0.0.0.9009.dist-info/RECORD,,
@@ -1,183 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: ai-data-science-team
3
- Version: 0.0.0.9007
4
- Summary: Build and run an AI-powered data science team.
5
- Home-page: https://github.com/business-science/ai-data-science-team
6
- Author: Matt Dancho
7
- Author-email: mdancho@business-science.io
8
- Requires-Python: >=3.9
9
- Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: openpyxl
12
- Requires-Dist: langchain
13
- Requires-Dist: langchain_community
14
- Requires-Dist: langchain_openai
15
- Requires-Dist: langchain_experimental
16
- Requires-Dist: langgraph>=0.2.57
17
- Requires-Dist: openai
18
- Requires-Dist: pandas
19
- Requires-Dist: numpy
20
- Requires-Dist: plotly
21
- Requires-Dist: streamlit
22
- Requires-Dist: scikit-learn
23
- Requires-Dist: xgboost
24
- Dynamic: author
25
- Dynamic: author-email
26
- Dynamic: description
27
- Dynamic: description-content-type
28
- Dynamic: home-page
29
- Dynamic: requires-dist
30
- Dynamic: requires-python
31
- Dynamic: summary
32
-
33
- # Your AI Data Science Team (An Army Of Copilots)
34
-
35
- **An AI-powered data science team of copilots that uses agents to help you perform common data science tasks 10X faster**.
36
-
37
- **Star ⭐ This GitHub (Takes 2 seconds and means a lot).**
38
-
39
- *Beta - This Python library is under active development. There may be breaking changes that occur until release of 0.1.0.*
40
-
41
- ---
42
-
43
- The AI Data Science Team of Copilots includes Agents that specialize data cleaning, preparation, feature engineering, modeling (machine learning), and interpretation of various business problems like:
44
-
45
- - Churn Modeling
46
- - Employee Attrition
47
- - Lead Scoring
48
- - Insurance Risk
49
- - Credit Card Risk
50
- - And more
51
-
52
- ## Table of Contents
53
-
54
- - [Your AI Data Science Team (An Army Of Copilots)](#your-ai-data-science-team-an-army-of-copilots)
55
- - [Table of Contents](#table-of-contents)
56
- - [Companies That Want An AI Data Science Team Copilot](#companies-that-want-an-ai-data-science-team-copilot)
57
- - [Free Generative AI For Data Scientists Workshop](#free-generative-ai-for-data-scientists-workshop)
58
- - [Data Science Agents](#data-science-agents)
59
- - [Coming Soon: Multi-Agents](#coming-soon-multi-agents)
60
- - [Agents Available Now](#agents-available-now)
61
- - [Agents Coming Soon](#agents-coming-soon)
62
- - [Disclaimer](#disclaimer)
63
- - [Installation](#installation)
64
- - [Usage](#usage)
65
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
66
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
67
- - [Contributing](#contributing)
68
- - [License](#license)
69
-
70
- ## Companies That Want An AI Data Science Team Copilot
71
-
72
- If you are interested in having your own custom enteprise-grade AI Data Science Team Copilot, send inquiries here: [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
73
-
74
- ## Free Generative AI For Data Scientists Workshop
75
-
76
- If you want to learn how to build AI Agents for your company that performs Data Science, Business Intelligence, Churn Modeling, Time Series Forecasting, and more, [register for my next Generative AI for Data Scientists workshop here.](https://learn.business-science.io/ai-register)
77
-
78
- ## Data Science Agents
79
-
80
- This project is a work in progress. New data science agents will be released soon.
81
-
82
- ![Data Science Team](/img/ai_data_science_team.jpg)
83
-
84
- ### Coming Soon: Multi-Agents
85
-
86
- This is the internals of the Business Intelligence SQL Agent I'm working on:
87
-
88
- ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
89
-
90
- ### Agents Available Now
91
-
92
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
93
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
94
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
95
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
96
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
97
-
98
- ### Agents Coming Soon
99
-
100
- 1. **Data Analyst:** Analyzes data structure, creates exploratory visualizations, and performs correlation analysis to identify relationships.
101
- 2. **Machine Learning Agent:** Builds and logs the machine learning models.
102
- 3. **Interpretability Agent:** Performs Interpretable ML to explain why the model returned predictions including which features were the most important to the model.
103
- 4. **Supervisor:** Forms task list. Moderates sub-agents. Returns completed assignment.
104
-
105
- ## Disclaimer
106
-
107
- **This project is for educational purposes only.**
108
-
109
- - It is not intended to replace your company's data science team
110
- - No warranties or guarantees provided
111
- - Creator assumes no liability for financial loss
112
- - Consult an experienced Generative AI Data Scientist for building your own custom AI Data Science Team
113
- - If you want a custom enterprise-grade AI Data Science Team, [send inquiries here](https://www.business-science.io/contact.html).
114
-
115
- By using this software, you agree to use it solely for learning purposes.
116
-
117
- ## Installation
118
-
119
- ``` bash
120
- pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
121
- ```
122
-
123
- ## Usage
124
-
125
- [See all examples here.](/examples)
126
-
127
- ### Example 1: Feature Engineering with the Feature Engineering Agent
128
-
129
- [See the full example here.](/examples/feature_engineering_agent.ipynb)
130
-
131
- ``` python
132
- feature_engineering_agent = make_feature_engineering_agent(model = llm)
133
-
134
- response = feature_engineering_agent.invoke({
135
- "user_instructions": "Make sure to scale and center numeric features",
136
- "target_variable": "Churn",
137
- "data_raw": df.to_dict(),
138
- "max_retries":3,
139
- "retry_count":0
140
- })
141
- ```
142
-
143
- ``` bash
144
- ---FEATURE ENGINEERING AGENT----
145
- * CREATE FEATURE ENGINEER CODE
146
- * EXECUTING AGENT CODE
147
- * EXPLAIN AGENT CODE
148
- ```
149
-
150
- ### Example 2: Cleaning Data with the Data Cleaning Agent
151
-
152
- [See the full example here.](/examples/data_cleaning_agent.ipynb)
153
-
154
- ``` python
155
- data_cleaning_agent = make_data_cleaning_agent(model = llm)
156
-
157
- response = data_cleaning_agent.invoke({
158
- "user_instructions": "Don't remove outliers when cleaning the data.",
159
- "data_raw": df.to_dict(),
160
- "max_retries":3,
161
- "retry_count":0
162
- })
163
- ```
164
-
165
- ``` bash
166
- ---DATA CLEANING AGENT----
167
- * CREATE DATA CLEANER CODE
168
- * EXECUTING AGENT CODE
169
- * EXPLAIN AGENT CODE
170
- ```
171
-
172
- ## Contributing
173
-
174
- 1. Fork the repository
175
- 2. Create a feature branch
176
- 3. Commit your changes
177
- 4. Push to the branch
178
- 5. Create a Pull Request
179
-
180
- ## License
181
-
182
- This project is licensed under the MIT License. See LICENSE file for details.
183
-
@@ -1,21 +0,0 @@
1
- ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=VJYpfOaKsXjGzPOsT6kYyVW6T9bFBqxt6Ph3qF8t-A8,26
3
- ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
- ai_data_science_team/agents/__init__.py,sha256=rcF18rBsOuPJqJKvoffh6lwr4Nwm24MErM2u4H4Th9s,467
5
- ai_data_science_team/agents/data_cleaning_agent.py,sha256=gixYY4wGehKK_ROgU7CVOzijghmVQGD4hyK9uKhc8Hw,20890
6
- ai_data_science_team/agents/data_visualization_agent.py,sha256=wePFZbdB4kBah8m_iy6f4IDyjl6L6zBWzIgigJEXdk8,12933
7
- ai_data_science_team/agents/data_wrangling_agent.py,sha256=5w1kytoWLE4p3hj0YHVuXcgCd304eNQac-Zrrgmnr2s,16735
8
- ai_data_science_team/agents/feature_engineering_agent.py,sha256=UaaU3VkPhjOV0NbrYXedRb6eHOcOWWiGYhB_srrYWvg,17571
9
- ai_data_science_team/agents/sql_database_agent.py,sha256=mRbEAPHP6NlwQac2_VL9RuyIfCCtrmXTrzu5RLzOoeU,16031
10
- ai_data_science_team/templates/__init__.py,sha256=bNrKGmWXQG7GRczln_zVfUQLzxzp7hSwlLyNtLxleu4,278
11
- ai_data_science_team/templates/agent_templates.py,sha256=xohVgEfxPcVukPLpPfV7mZ0cpFgp-oJVLZRWCv2V-WU,19948
12
- ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- ai_data_science_team/tools/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
14
- ai_data_science_team/tools/metadata.py,sha256=tbnca_tDp67oBA6qD29AKVooJG10VqGr4vwzj4rPUas,8348
15
- ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
16
- ai_data_science_team/tools/regex.py,sha256=vkfdvi9pDe582p-fh_7cB07Wb0dOR2CsiVq-wUO3mas,2491
17
- ai_data_science_team-0.0.0.9007.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
18
- ai_data_science_team-0.0.0.9007.dist-info/METADATA,sha256=KcMFR2V9_wbepdKsrlFdfc7UB7t-Hf7i75x67LPXw3Q,6783
19
- ai_data_science_team-0.0.0.9007.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
20
- ai_data_science_team-0.0.0.9007.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
21
- ai_data_science_team-0.0.0.9007.dist-info/RECORD,,