ai-data-science-team 0.0.0.9011__tar.gz → 0.0.0.9013__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. {ai_data_science_team-0.0.0.9011/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9013}/PKG-INFO +24 -6
  2. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/README.md +16 -5
  3. ai_data_science_team-0.0.0.9013/ai_data_science_team/_version.py +1 -0
  4. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/data_loader_tools_agent.py +11 -0
  5. ai_data_science_team-0.0.0.9013/ai_data_science_team/ds_agents/__init__.py +1 -0
  6. ai_data_science_team-0.0.0.9013/ai_data_science_team/ds_agents/eda_tools_agent.py +258 -0
  7. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/ml_agents/mlflow_tools_agent.py +10 -0
  8. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/tools/dataframe.py +6 -1
  9. ai_data_science_team-0.0.0.9013/ai_data_science_team/tools/eda.py +352 -0
  10. ai_data_science_team-0.0.0.9013/ai_data_science_team/utils/__init__.py +0 -0
  11. ai_data_science_team-0.0.0.9013/ai_data_science_team/utils/html.py +27 -0
  12. ai_data_science_team-0.0.0.9013/ai_data_science_team/utils/matplotlib.py +46 -0
  13. ai_data_science_team-0.0.0.9013/ai_data_science_team/utils/messages.py +27 -0
  14. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013/ai_data_science_team.egg-info}/PKG-INFO +24 -6
  15. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team.egg-info/SOURCES.txt +7 -0
  16. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team.egg-info/requires.txt +8 -0
  17. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/setup.py +2 -1
  18. ai_data_science_team-0.0.0.9011/ai_data_science_team/_version.py +0 -1
  19. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/LICENSE +0 -0
  20. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/MANIFEST.in +0 -0
  21. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/__init__.py +0 -0
  22. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/__init__.py +0 -0
  23. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/data_cleaning_agent.py +0 -0
  24. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/data_visualization_agent.py +0 -0
  25. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/data_wrangling_agent.py +0 -0
  26. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/feature_engineering_agent.py +0 -0
  27. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/agents/sql_database_agent.py +0 -0
  28. /ai_data_science_team-0.0.0.9011/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py → /ai_data_science_team-0.0.0.9013/ai_data_science_team/ds_agents/modeling_tools_agent.py +0 -0
  29. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/ml_agents/__init__.py +0 -0
  30. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/ml_agents/h2o_ml_agent.py +0 -0
  31. /ai_data_science_team-0.0.0.9011/ai_data_science_team/parsers/__init__.py → /ai_data_science_team-0.0.0.9013/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  32. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/multiagents/__init__.py +0 -0
  33. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/multiagents/sql_data_analyst.py +0 -0
  34. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
  35. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/orchestration.py +0 -0
  36. {ai_data_science_team-0.0.0.9011/ai_data_science_team/tools → ai_data_science_team-0.0.0.9013/ai_data_science_team/parsers}/__init__.py +0 -0
  37. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/parsers/parsers.py +0 -0
  38. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/templates/__init__.py +0 -0
  39. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/templates/agent_templates.py +0 -0
  40. {ai_data_science_team-0.0.0.9011/ai_data_science_team/utils → ai_data_science_team-0.0.0.9013/ai_data_science_team/tools}/__init__.py +0 -0
  41. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/tools/data_loader.py +0 -0
  42. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/tools/h2o.py +0 -0
  43. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/tools/mlflow.py +0 -0
  44. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/tools/sql.py +0 -0
  45. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/utils/logging.py +0 -0
  46. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/utils/plotly.py +0 -0
  47. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team/utils/regex.py +0 -0
  48. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
  49. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/ai_data_science_team.egg-info/top_level.txt +0 -0
  50. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/requirements.txt +0 -0
  51. {ai_data_science_team-0.0.0.9011 → ai_data_science_team-0.0.0.9013}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9011
3
+ Version: 0.0.0.9013
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -31,9 +31,16 @@ Requires-Dist: psutil
31
31
  Provides-Extra: machine-learning
32
32
  Requires-Dist: h2o; extra == "machine-learning"
33
33
  Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: data-science
35
+ Requires-Dist: pytimetk; extra == "data-science"
36
+ Requires-Dist: missingno; extra == "data-science"
37
+ Requires-Dist: sweetviz; extra == "data-science"
34
38
  Provides-Extra: all
35
39
  Requires-Dist: h2o; extra == "all"
36
40
  Requires-Dist: mlflow; extra == "all"
41
+ Requires-Dist: pytimetk; extra == "all"
42
+ Requires-Dist: missingno; extra == "all"
43
+ Requires-Dist: sweetviz; extra == "all"
37
44
  Dynamic: author
38
45
  Dynamic: author-email
39
46
  Dynamic: classifier
@@ -59,6 +66,8 @@ Dynamic: summary
59
66
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
67
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
68
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
69
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
70
+
62
71
  </div>
63
72
 
64
73
 
@@ -93,8 +102,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
102
  - [Apps Available Now](#apps-available-now)
94
103
  - [🔥 Agentic Applications](#-agentic-applications)
95
104
  - [Agents Available Now](#agents-available-now)
96
- - [Agents](#agents)
105
+ - [Standard Agents](#standard-agents)
97
106
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
107
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
98
108
  - [Multi-Agents](#multi-agents)
99
109
  - [Agents Coming Soon](#agents-coming-soon)
100
110
  - [Disclaimer](#disclaimer)
@@ -122,7 +132,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
132
 
123
133
  This project is a work in progress. New data science agents will be released soon.
124
134
 
125
- ![AI Data Science Team](/img/ai_data_science_team_.jpg)
135
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
126
136
 
127
137
  ### NEW: Multi-Agents
128
138
 
@@ -142,18 +152,22 @@ This is a top secret project I'm working on. It's a multi-agent data science app
142
152
 
143
153
  #### 🔥 Agentic Applications
144
154
 
145
- 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
155
+ 1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
156
+
157
+ ![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
158
+
159
+ 2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
146
160
 
147
161
  ### Agents Available Now
148
162
 
149
- #### Agents
163
+ #### Standard Agents
150
164
 
151
165
  1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
152
166
  2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
153
167
  3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
154
168
  4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
155
169
  5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
156
- 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
170
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
157
171
 
158
172
 
159
173
  #### 🔥🔥 NEW! Machine Learning Agents
@@ -161,6 +175,10 @@ This is a top secret project I'm working on. It's a multi-agent data science app
161
175
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
162
176
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
163
177
 
178
+ #### 🔥 NEW! Data Science Agents
179
+
180
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
181
+
164
182
 
165
183
  #### Multi-Agents
166
184
 
@@ -12,6 +12,8 @@
12
12
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
13
13
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
14
14
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
15
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
16
+
15
17
  </div>
16
18
 
17
19
 
@@ -46,8 +48,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
46
48
  - [Apps Available Now](#apps-available-now)
47
49
  - [🔥 Agentic Applications](#-agentic-applications)
48
50
  - [Agents Available Now](#agents-available-now)
49
- - [Agents](#agents)
51
+ - [Standard Agents](#standard-agents)
50
52
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
53
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
51
54
  - [Multi-Agents](#multi-agents)
52
55
  - [Agents Coming Soon](#agents-coming-soon)
53
56
  - [Disclaimer](#disclaimer)
@@ -75,7 +78,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
75
78
 
76
79
  This project is a work in progress. New data science agents will be released soon.
77
80
 
78
- ![AI Data Science Team](/img/ai_data_science_team_.jpg)
81
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
79
82
 
80
83
  ### NEW: Multi-Agents
81
84
 
@@ -95,18 +98,22 @@ This is a top secret project I'm working on. It's a multi-agent data science app
95
98
 
96
99
  #### 🔥 Agentic Applications
97
100
 
98
- 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
101
+ 1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
102
+
103
+ ![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
104
+
105
+ 2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
99
106
 
100
107
  ### Agents Available Now
101
108
 
102
- #### Agents
109
+ #### Standard Agents
103
110
 
104
111
  1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
105
112
  2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
106
113
  3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
107
114
  4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
108
115
  5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
109
- 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
116
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
110
117
 
111
118
 
112
119
  #### 🔥🔥 NEW! Machine Learning Agents
@@ -114,6 +121,10 @@ This is a top secret project I'm working on. It's a multi-agent data science app
114
121
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
115
122
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
116
123
 
124
+ #### 🔥 NEW! Data Science Agents
125
+
126
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
127
+
117
128
 
118
129
  #### Multi-Agents
119
130
 
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0.9013"
@@ -25,6 +25,7 @@ from ai_data_science_team.tools.data_loader import (
25
25
  get_file_info,
26
26
  search_files_by_pattern,
27
27
  )
28
+ from ai_data_science_team.utils.messages import get_tool_call_names
28
29
 
29
30
  AGENT_NAME = "data_loader_tools_agent"
30
31
 
@@ -174,6 +175,12 @@ class DataLoaderToolsAgent(BaseAgent):
174
175
  return Markdown(self.response["messages"][0].content)
175
176
  else:
176
177
  return self.response["messages"][0].content
178
+
179
+ def get_tool_calls(self):
180
+ """
181
+ Returns the tool calls made by the agent.
182
+ """
183
+ return self.response["tool_calls"]
177
184
 
178
185
 
179
186
 
@@ -204,6 +211,7 @@ def make_data_loader_tools_agent(
204
211
  internal_messages: Annotated[Sequence[BaseMessage], operator.add]
205
212
  user_instructions: str
206
213
  data_loader_artifacts: dict
214
+ tool_calls: List[str]
207
215
 
208
216
  def data_loader_agent(state):
209
217
 
@@ -253,10 +261,13 @@ def make_data_loader_tools_agent(
253
261
  elif isinstance(last_message, dict) and "artifact" in last_message:
254
262
  last_tool_artifact = last_message["artifact"]
255
263
 
264
+ tool_calls = get_tool_call_names(internal_messages)
265
+
256
266
  return {
257
267
  "messages": [last_ai_message],
258
268
  "internal_messages": internal_messages,
259
269
  "data_loader_artifacts": last_tool_artifact,
270
+ "tool_calls": tool_calls,
260
271
  }
261
272
 
262
273
  workflow = StateGraph(GraphState)
@@ -0,0 +1 @@
1
+ from ai_data_science_team.ds_agents.eda_tools_agent import EDAToolsAgent, make_eda_tools_agent
@@ -0,0 +1,258 @@
1
+
2
+
3
+ from typing import Any, Optional, Annotated, Sequence, List, Dict, Tuple
4
+ import operator
5
+ import pandas as pd
6
+ import os
7
+ from io import StringIO, BytesIO
8
+ import base64
9
+ import matplotlib.pyplot as plt
10
+
11
+ from IPython.display import Markdown
12
+
13
+ from langchain_core.messages import BaseMessage, AIMessage
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+
21
+ from ai_data_science_team.tools.eda import (
22
+ explain_data,
23
+ describe_dataset,
24
+ visualize_missing,
25
+ correlation_funnel,
26
+ generate_sweetviz_report,
27
+ )
28
+ from ai_data_science_team.utils.messages import get_tool_call_names
29
+
30
+
31
+ AGENT_NAME = "exploratory_data_analyst_agent"
32
+
33
+ # Updated tool list for EDA
34
+ EDA_TOOLS = [
35
+ explain_data,
36
+ describe_dataset,
37
+ visualize_missing,
38
+ correlation_funnel,
39
+ generate_sweetviz_report,
40
+ ]
41
+
42
+ class EDAToolsAgent(BaseAgent):
43
+ """
44
+ An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
45
+ missing data visualizations, correlation funnels, EDA reports, etc.
46
+
47
+ Parameters:
48
+ ----------
49
+ model : langchain.llms.base.LLM
50
+ The language model for generating the tool-calling agent.
51
+ create_react_agent_kwargs : dict
52
+ Additional kwargs for create_react_agent.
53
+ invoke_react_agent_kwargs : dict
54
+ Additional kwargs for agent invocation.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ model: Any,
60
+ create_react_agent_kwargs: Optional[Dict] = {},
61
+ invoke_react_agent_kwargs: Optional[Dict] = {},
62
+ ):
63
+ self._params = {
64
+ "model": model,
65
+ "create_react_agent_kwargs": create_react_agent_kwargs,
66
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
67
+ }
68
+ self._compiled_graph = self._make_compiled_graph()
69
+ self.response = None
70
+
71
+ def _make_compiled_graph(self):
72
+ """
73
+ Creates the compiled state graph for the EDA agent.
74
+ """
75
+ self.response = None
76
+ return make_eda_tools_agent(**self._params)
77
+
78
+ def update_params(self, **kwargs):
79
+ """
80
+ Updates the agent's parameters and rebuilds the compiled graph.
81
+ """
82
+ for k, v in kwargs.items():
83
+ self._params[k] = v
84
+ self._compiled_graph = self._make_compiled_graph()
85
+
86
+ async def ainvoke_agent(
87
+ self,
88
+ user_instructions: str = None,
89
+ data_raw: pd.DataFrame = None,
90
+ **kwargs
91
+ ):
92
+ """
93
+ Asynchronously runs the agent with user instructions and data.
94
+
95
+ Parameters:
96
+ ----------
97
+ user_instructions : str, optional
98
+ The instructions for the agent.
99
+ data_raw : pd.DataFrame, optional
100
+ The input data as a DataFrame.
101
+ """
102
+ response = await self._compiled_graph.ainvoke(
103
+ {
104
+ "user_instructions": user_instructions,
105
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
106
+ },
107
+ **kwargs
108
+ )
109
+ self.response = response
110
+ return None
111
+
112
+ def invoke_agent(
113
+ self,
114
+ user_instructions: str = None,
115
+ data_raw: pd.DataFrame = None,
116
+ **kwargs
117
+ ):
118
+ """
119
+ Synchronously runs the agent with user instructions and data.
120
+
121
+ Parameters:
122
+ ----------
123
+ user_instructions : str, optional
124
+ The instructions for the agent.
125
+ data_raw : pd.DataFrame, optional
126
+ The input data as a DataFrame.
127
+ """
128
+ response = self._compiled_graph.invoke(
129
+ {
130
+ "user_instructions": user_instructions,
131
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
132
+ },
133
+ **kwargs
134
+ )
135
+ self.response = response
136
+ return None
137
+
138
+ def get_internal_messages(self, markdown: bool = False):
139
+ """
140
+ Returns internal messages from the agent response.
141
+ """
142
+ pretty_print = "\n\n".join(
143
+ [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
144
+ for msg in self.response["internal_messages"]]
145
+ )
146
+ if markdown:
147
+ return Markdown(pretty_print)
148
+ else:
149
+ return self.response["internal_messages"]
150
+
151
+ def get_artifacts(self, as_dataframe: bool = False):
152
+ """
153
+ Returns the EDA artifacts from the agent response.
154
+ """
155
+ if as_dataframe:
156
+ return pd.DataFrame(self.response["eda_artifacts"])
157
+ else:
158
+ return self.response["eda_artifacts"]
159
+
160
+ def get_ai_message(self, markdown: bool = False):
161
+ """
162
+ Returns the AI message from the agent response.
163
+ """
164
+ if markdown:
165
+ return Markdown(self.response["messages"][0].content)
166
+ else:
167
+ return self.response["messages"][0].content
168
+
169
+ def get_tool_calls(self):
170
+ """
171
+ Returns the tool calls made by the agent.
172
+ """
173
+ return self.response["tool_calls"]
174
+
175
+ def make_eda_tools_agent(
176
+ model: Any,
177
+ create_react_agent_kwargs: Optional[Dict] = {},
178
+ invoke_react_agent_kwargs: Optional[Dict] = {},
179
+ ):
180
+ """
181
+ Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
182
+
183
+ Parameters:
184
+ ----------
185
+ model : Any
186
+ The language model used for tool-calling.
187
+ create_react_agent_kwargs : dict
188
+ Additional kwargs for create_react_agent.
189
+ invoke_react_agent_kwargs : dict
190
+ Additional kwargs for agent invocation.
191
+
192
+ Returns:
193
+ -------
194
+ app : langgraph.graph.CompiledStateGraph
195
+ The compiled state graph for the EDA agent.
196
+ """
197
+
198
+ class GraphState(AgentState):
199
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
200
+ user_instructions: str
201
+ data_raw: dict
202
+ eda_artifacts: dict
203
+ tool_calls: list
204
+
205
+ def exploratory_agent(state):
206
+ print(format_agent_name(AGENT_NAME))
207
+ print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
208
+
209
+ tool_node = ToolNode(
210
+ tools=EDA_TOOLS
211
+ )
212
+
213
+ eda_agent = create_react_agent(
214
+ model,
215
+ tools=tool_node,
216
+ state_schema=GraphState,
217
+ **create_react_agent_kwargs,
218
+ )
219
+
220
+ response = eda_agent.invoke(
221
+ {
222
+ "messages": [("user", state["user_instructions"])],
223
+ "data_raw": state["data_raw"],
224
+ },
225
+ invoke_react_agent_kwargs,
226
+ )
227
+
228
+ print(" * POST-PROCESSING EDA RESULTS")
229
+
230
+ internal_messages = response['messages']
231
+ if not internal_messages:
232
+ return {"internal_messages": [], "eda_artifacts": None}
233
+
234
+ last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
235
+ last_tool_artifact = None
236
+ if len(internal_messages) > 1:
237
+ last_message = internal_messages[-2]
238
+ if hasattr(last_message, "artifact"):
239
+ last_tool_artifact = last_message.artifact
240
+ elif isinstance(last_message, dict) and "artifact" in last_message:
241
+ last_tool_artifact = last_message["artifact"]
242
+
243
+ tool_calls = get_tool_call_names(internal_messages)
244
+
245
+ return {
246
+ "messages": [last_ai_message],
247
+ "internal_messages": internal_messages,
248
+ "eda_artifacts": last_tool_artifact,
249
+ "tool_calls": tool_calls,
250
+ }
251
+
252
+ workflow = StateGraph(GraphState)
253
+ workflow.add_node("exploratory_agent", exploratory_agent)
254
+ workflow.add_edge(START, "exploratory_agent")
255
+ workflow.add_edge("exploratory_agent", END)
256
+
257
+ app = workflow.compile()
258
+ return app
@@ -27,6 +27,7 @@ from ai_data_science_team.tools.mlflow import (
27
27
  mlflow_search_registered_models,
28
28
  mlflow_get_model_version_details,
29
29
  )
30
+ from ai_data_science_team.utils.messages import get_tool_call_names
30
31
 
31
32
  AGENT_NAME = "mlflow_tools_agent"
32
33
 
@@ -228,6 +229,12 @@ class MLflowToolsAgent(BaseAgent):
228
229
  return Markdown(self.response["messages"][0].content)
229
230
  else:
230
231
  return self.response["messages"][0].content
232
+
233
+ def get_tool_calls(self):
234
+ """
235
+ Returns the tool calls made by the agent.
236
+ """
237
+ return self.response["tool_calls"]
231
238
 
232
239
 
233
240
 
@@ -330,10 +337,13 @@ def make_mlflow_tools_agent(
330
337
  elif isinstance(last_message, dict) and "artifact" in last_message:
331
338
  last_tool_artifact = last_message["artifact"]
332
339
 
340
+ tool_calls = get_tool_call_names(internal_messages)
341
+
333
342
  return {
334
343
  "messages": [last_ai_message],
335
344
  "internal_messages": internal_messages,
336
345
  "mlflow_artifacts": last_tool_artifact,
346
+ "tool_calls": tool_calls,
337
347
  }
338
348
 
339
349
 
@@ -74,7 +74,12 @@ def get_dataframe_summary(
74
74
  return summaries
75
75
 
76
76
 
77
- def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
77
+ def _summarize_dataframe(
78
+ df: pd.DataFrame,
79
+ dataset_name: str,
80
+ n_sample=30,
81
+ skip_stats=False
82
+ ) -> str:
78
83
  """Generate a summary string for a single DataFrame."""
79
84
  # 1. Convert dictionary-type cells to strings
80
85
  # This prevents unhashable dict errors during df.nunique().
@@ -0,0 +1,352 @@
1
+
2
+ from typing import Annotated, Dict, Tuple, Union
3
+
4
+ import os
5
+ import tempfile
6
+
7
+ from langchain.tools import tool
8
+
9
+ from langgraph.prebuilt import InjectedState
10
+
11
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
12
+
13
+
14
+ @tool(response_format='content')
15
+ def explain_data(
16
+ data_raw: Annotated[dict, InjectedState("data_raw")],
17
+ n_sample: int = 30,
18
+ skip_stats: bool = False,
19
+ ):
20
+ """
21
+ Tool: explain_data
22
+ Description:
23
+ Provides an extensive, narrative summary of a DataFrame including its shape, column types,
24
+ missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
25
+
26
+ Parameters:
27
+ data_raw (dict): Raw data.
28
+ n_sample (int, default=30): Number of rows to display.
29
+ skip_stats (bool, default=False): If True, omit descriptive stats/info.
30
+
31
+ LLM Guidance:
32
+ Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
33
+
34
+ Returns:
35
+ str: Detailed DataFrame summary.
36
+ """
37
+ print(" * Tool: explain_data")
38
+ import pandas as pd
39
+
40
+ result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
41
+
42
+ return result
43
+
44
+ @tool(response_format='content_and_artifact')
45
+ def describe_dataset(
46
+ data_raw: Annotated[dict, InjectedState("data_raw")]
47
+ ) -> Tuple[str, Dict]:
48
+ """
49
+ Tool: describe_dataset
50
+ Description:
51
+ Compute and return summary statistics for the dataset using pandas' describe() method.
52
+ The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
53
+
54
+ Parameters:
55
+ -----------
56
+ data_raw : dict
57
+ The raw data in dictionary format.
58
+
59
+ LLM Selection Guidance:
60
+ ------------------------
61
+ Use this tool when:
62
+ - The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
63
+ - The user needs a concise statistical snapshot rather than a detailed narrative.
64
+ - Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
65
+
66
+ Returns:
67
+ -------
68
+ Tuple[str, Dict]:
69
+ - content: A textual summary indicating that summary statistics have been computed.
70
+ - artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
71
+ """
72
+ print(" * Tool: describe_dataset")
73
+ import pandas as pd
74
+ df = pd.DataFrame(data_raw)
75
+ description_df = df.describe(include='all')
76
+ content = "Summary statistics computed using pandas describe()."
77
+ artifact = {'describe_df': description_df.to_dict()}
78
+ return content, artifact
79
+
80
+
81
+ @tool(response_format='content_and_artifact')
82
+ def visualize_missing(
83
+ data_raw: Annotated[dict, InjectedState("data_raw")],
84
+ n_sample: int = None
85
+ ) -> Tuple[str, Dict]:
86
+ """
87
+ Tool: visualize_missing
88
+ Description:
89
+ Missing value analysis using the missingno library. Generates a matrix plot, bar plot, and heatmap plot.
90
+
91
+ Parameters:
92
+ -----------
93
+ data_raw : dict
94
+ The raw data in dictionary format.
95
+ n_sample : int, optional (default: None)
96
+ The number of rows to sample from the dataset if it is large.
97
+
98
+ Returns:
99
+ -------
100
+ Tuple[str, Dict]:
101
+ content: A message describing the generated plots.
102
+ artifact: A dict with keys 'matrix_plot', 'bar_plot', and 'heatmap_plot' each containing the
103
+ corresponding base64 encoded PNG image.
104
+ """
105
+ print(" * Tool: visualize_missing")
106
+
107
+ try:
108
+ import missingno as msno # Ensure missingno is installed
109
+ except ImportError:
110
+ raise ImportError("Please install the 'missingno' package to use this tool. pip install missingno")
111
+
112
+ import pandas as pd
113
+ import base64
114
+ from io import BytesIO
115
+ import matplotlib.pyplot as plt
116
+
117
+ # Create the DataFrame and sample if n_sample is provided.
118
+ df = pd.DataFrame(data_raw)
119
+ if n_sample is not None:
120
+ df = df.sample(n=n_sample, random_state=42)
121
+
122
+ # Dictionary to store the base64 encoded images for each plot.
123
+ encoded_plots = {}
124
+
125
+ # Define a helper function to create a plot, save it, and encode it.
126
+ def create_and_encode_plot(plot_func, plot_name: str):
127
+ plt.figure(figsize=(8, 6))
128
+ # Call the missingno plotting function.
129
+ plot_func(df)
130
+ plt.tight_layout()
131
+ buf = BytesIO()
132
+ plt.savefig(buf, format="png")
133
+ plt.close()
134
+ buf.seek(0)
135
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
136
+
137
+ # Create and encode the matrix plot.
138
+ encoded_plots["matrix_plot"] = create_and_encode_plot(msno.matrix, "matrix")
139
+
140
+ # Create and encode the bar plot.
141
+ encoded_plots["bar_plot"] = create_and_encode_plot(msno.bar, "bar")
142
+
143
+ # Create and encode the heatmap plot.
144
+ encoded_plots["heatmap_plot"] = create_and_encode_plot(msno.heatmap, "heatmap")
145
+
146
+ content = "Missing data visualizations (matrix, bar, and heatmap) have been generated."
147
+ artifact = encoded_plots
148
+ return content, artifact
149
+
150
+
151
+
152
+ @tool(response_format='content_and_artifact')
153
+ def correlation_funnel(
154
+ data_raw: Annotated[dict, InjectedState("data_raw")],
155
+ target: str,
156
+ target_bin_index: Union[int, str] = -1,
157
+ corr_method: str = "pearson",
158
+ n_bins: int = 4,
159
+ thresh_infreq: float = 0.01,
160
+ name_infreq: str = "-OTHER",
161
+ ) -> Tuple[str, Dict]:
162
+ """
163
+ Tool: correlation_funnel
164
+ Description:
165
+ Correlation analysis using the correlation funnel method. The tool binarizes the data and computes correlation versus a target column.
166
+
167
+ Parameters:
168
+ ----------
169
+ target : str
170
+ The base target column name (e.g., 'Member_Status'). The tool will look for columns that begin
171
+ with this string followed by '__' (e.g., 'Member_Status__Gold', 'Member_Status__Platinum').
172
+ target_bin_index : int or str, default -1
173
+ If an integer, selects the target level by position from the matching columns.
174
+ If a string (e.g., "Yes"), attempts to match to the suffix of a column name
175
+ (i.e., 'target__Yes').
176
+ corr_method : str
177
+ The correlation method ('pearson', 'kendall', or 'spearman'). Default is 'pearson'.
178
+ n_bins : int
179
+ The number of bins to use for binarization. Default is 4.
180
+ thresh_infreq : float
181
+ The threshold for infrequent levels. Default is 0.01.
182
+ name_infreq : str
183
+ The name to use for infrequent levels. Default is '-OTHER'.
184
+ """
185
+ print(" * Tool: correlation_funnel")
186
+ try:
187
+ import pytimetk as tk
188
+ except ImportError:
189
+ raise ImportError("Please install the 'pytimetk' package to use this tool. pip install pytimetk")
190
+ import pandas as pd
191
+ import base64
192
+ from io import BytesIO
193
+ import matplotlib.pyplot as plt
194
+ import json
195
+ import plotly.graph_objects as go
196
+ import plotly.io as pio
197
+ from typing import Union
198
+
199
+ # Convert the raw injected state into a DataFrame.
200
+ df = pd.DataFrame(data_raw)
201
+
202
+ # Apply the binarization method.
203
+ df_binarized = df.binarize(
204
+ n_bins=n_bins,
205
+ thresh_infreq=thresh_infreq,
206
+ name_infreq=name_infreq,
207
+ one_hot=True
208
+ )
209
+
210
+ # Determine the full target column name.
211
+ # Look for all columns that start with "target__"
212
+ matching_columns = [col for col in df_binarized.columns if col.startswith(f"{target}__")]
213
+ if not matching_columns:
214
+ # If no matching columns are found, warn and use the provided target as-is.
215
+ full_target = target
216
+ else:
217
+ # Determine the full target based on target_bin_index.
218
+ if isinstance(target_bin_index, str):
219
+ # Build the candidate column name
220
+ candidate = f"{target}__{target_bin_index}"
221
+ if candidate in matching_columns:
222
+ full_target = candidate
223
+ else:
224
+ # If no matching candidate is found, default to the last matching column.
225
+ full_target = matching_columns[-1]
226
+ else:
227
+ # target_bin_index is an integer.
228
+ try:
229
+ full_target = matching_columns[target_bin_index]
230
+ except IndexError:
231
+ # If index is out of bounds, use the last matching column.
232
+ full_target = matching_columns[-1]
233
+
234
+ # Compute correlation funnel using the full target column name.
235
+ df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
236
+
237
+ # Attempt to generate a static plot.
238
+ try:
239
+ # Here we assume that your DataFrame has a method plot_correlation_funnel.
240
+ fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
241
+ buf = BytesIO()
242
+ # Use the appropriate save method for your figure object.
243
+ fig.save(buf, format="png")
244
+ plt.close()
245
+ buf.seek(0)
246
+ encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
247
+ except Exception as e:
248
+ encoded = {"error": str(e)}
249
+
250
+ # Attempt to generate a Plotly plot.
251
+ try:
252
+ fig = df_correlated.plot_correlation_funnel(engine='plotly')
253
+ fig_json = pio.to_json(fig)
254
+ fig_dict = json.loads(fig_json)
255
+ except Exception as e:
256
+ fig_dict = {"error": str(e)}
257
+
258
+ content = (f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
259
+ f"Base target was '{target}' with target_bin_index '{target_bin_index}'.")
260
+ artifact = {
261
+ "correlation_data": df_correlated.to_dict(orient="list"),
262
+ "plot_image": encoded,
263
+ "plotly_figure": fig_dict,
264
+ }
265
+ return content, artifact
266
+
267
+
268
+
269
+ @tool(response_format='content_and_artifact')
270
+ def generate_sweetviz_report(
271
+ data_raw: Annotated[dict, InjectedState("data_raw")],
272
+ target: str = None,
273
+ report_name: str = "sweetviz_report.html",
274
+ report_directory: str = None, # <-- Default to None
275
+ open_browser: bool = False,
276
+ ) -> Tuple[str, Dict]:
277
+ """
278
+ Tool: generate_sweetviz_report
279
+ Description:
280
+ Make an Exploratory Data Analysis (EDA) report using the Sweetviz library.
281
+
282
+ Parameters:
283
+ -----------
284
+ data_raw : dict
285
+ The raw data injected as a dictionary (converted from a DataFrame).
286
+ target : str, optional
287
+ The target feature to analyze. Default is None.
288
+ report_name : str, optional
289
+ The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
290
+ report_directory : str, optional
291
+ The directory where the report should be saved.
292
+ If None, a temporary directory is created and used.
293
+ open_browser : bool, optional
294
+ Whether to open the report in a web browser. Default is False.
295
+
296
+ Returns:
297
+ --------
298
+ Tuple[str, Dict]:
299
+ content: A summary message describing the generated report.
300
+ artifact: A dictionary with the report file path and optionally the report's HTML content.
301
+ """
302
+ print(" * Tool: generate_sweetviz_report")
303
+
304
+ # Import sweetviz
305
+ try:
306
+ import sweetviz as sv
307
+ except ImportError:
308
+ raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
309
+
310
+ import pandas as pd
311
+
312
+ # Convert injected raw data to a DataFrame.
313
+ df = pd.DataFrame(data_raw)
314
+
315
+ # If no directory is specified, use a temporary directory.
316
+ if not report_directory:
317
+ report_directory = tempfile.mkdtemp()
318
+ print(f" * Using temporary directory: {report_directory}")
319
+ else:
320
+ # Ensure user-specified directory exists.
321
+ if not os.path.exists(report_directory):
322
+ os.makedirs(report_directory)
323
+
324
+ # Create the Sweetviz report.
325
+ report = sv.analyze(df, target_feat=target)
326
+
327
+ # Determine the full path for the report.
328
+ full_report_path = os.path.join(report_directory, report_name)
329
+
330
+ # Save the report to the specified HTML file.
331
+ report.show_html(
332
+ filepath=full_report_path,
333
+ open_browser=open_browser,
334
+ )
335
+
336
+ # Optionally, read the HTML content (if desired to pass along in the artifact).
337
+ try:
338
+ with open(full_report_path, "r", encoding="utf-8") as f:
339
+ html_content = f.read()
340
+ except Exception:
341
+ html_content = None
342
+
343
+ content = (
344
+ f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
345
+ f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
346
+ )
347
+ artifact = {
348
+ "report_file": os.path.abspath(full_report_path),
349
+ "report_html": html_content,
350
+ }
351
+ return content, artifact
352
+
@@ -0,0 +1,27 @@
1
+
2
+
3
+ import webbrowser
4
+ import os
5
+
6
+ def open_html_file_in_browser(file_path: str):
7
+ """
8
+ Opens an HTML file in the default web browser.
9
+
10
+ Parameters:
11
+ -----------
12
+ file_path : str
13
+ The file path or URL of the HTML file to open.
14
+
15
+ Returns:
16
+ --------
17
+ None
18
+ """
19
+ # Check if the file exists if a local path is provided.
20
+ if os.path.isfile(file_path):
21
+ # Convert file path to a file URL
22
+ file_url = 'file://' + os.path.abspath(file_path)
23
+ else:
24
+ # If the file doesn't exist locally, assume it's a URL
25
+ file_url = file_path
26
+
27
+ webbrowser.open(file_url)
@@ -0,0 +1,46 @@
1
+ import base64
2
+ from io import BytesIO
3
+ import matplotlib.pyplot as plt
4
+ from PIL import Image
5
+
6
+ def matplotlib_from_base64(encoded: str, title: str = None, figsize: tuple = (8, 6)):
7
+ """
8
+ Convert a base64-encoded image to a matplotlib plot and display it.
9
+
10
+ Parameters:
11
+ -----------
12
+ encoded : str
13
+ The base64-encoded image string.
14
+ title : str, optional
15
+ A title for the plot. Default is None.
16
+ figsize : tuple, optional
17
+ Figure size (width, height) for the plot. Default is (8, 6).
18
+
19
+ Returns:
20
+ --------
21
+ fig, ax : tuple
22
+ The matplotlib figure and axes objects.
23
+ """
24
+ # Decode the base64 string to bytes
25
+ img_data = base64.b64decode(encoded)
26
+
27
+ # Load the bytes data into a BytesIO buffer
28
+ buf = BytesIO(img_data)
29
+
30
+ # Open the image using Pillow
31
+ img = Image.open(buf)
32
+
33
+ # Create a matplotlib figure and axis
34
+ fig, ax = plt.subplots(figsize=figsize)
35
+
36
+ # Display the image
37
+ ax.imshow(img)
38
+ ax.axis('off') # Hide the axis
39
+
40
+ if title:
41
+ ax.set_title(title)
42
+
43
+ # Show the plot
44
+ plt.show()
45
+
46
+ return fig, ax
@@ -0,0 +1,27 @@
1
+
2
+
3
+
4
+ def get_tool_call_names(messages):
5
+ """
6
+ Method to extract the tool call names from a list of LangChain messages.
7
+
8
+ Parameters:
9
+ ----------
10
+ messages : list
11
+ A list of LangChain messages.
12
+
13
+ Returns:
14
+ -------
15
+ tool_calls : list
16
+ A list of tool call names.
17
+
18
+ """
19
+ tool_calls = []
20
+ for message in messages:
21
+ try:
22
+ if "tool_call_id" in list(dict(message).keys()):
23
+ tool_calls.append(message.name)
24
+ except:
25
+ pass
26
+ return tool_calls
27
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9011
3
+ Version: 0.0.0.9013
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -31,9 +31,16 @@ Requires-Dist: psutil
31
31
  Provides-Extra: machine-learning
32
32
  Requires-Dist: h2o; extra == "machine-learning"
33
33
  Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: data-science
35
+ Requires-Dist: pytimetk; extra == "data-science"
36
+ Requires-Dist: missingno; extra == "data-science"
37
+ Requires-Dist: sweetviz; extra == "data-science"
34
38
  Provides-Extra: all
35
39
  Requires-Dist: h2o; extra == "all"
36
40
  Requires-Dist: mlflow; extra == "all"
41
+ Requires-Dist: pytimetk; extra == "all"
42
+ Requires-Dist: missingno; extra == "all"
43
+ Requires-Dist: sweetviz; extra == "all"
37
44
  Dynamic: author
38
45
  Dynamic: author-email
39
46
  Dynamic: classifier
@@ -59,6 +66,8 @@ Dynamic: summary
59
66
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
67
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
68
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
69
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
70
+
62
71
  </div>
63
72
 
64
73
 
@@ -93,8 +102,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
102
  - [Apps Available Now](#apps-available-now)
94
103
  - [🔥 Agentic Applications](#-agentic-applications)
95
104
  - [Agents Available Now](#agents-available-now)
96
- - [Agents](#agents)
105
+ - [Standard Agents](#standard-agents)
97
106
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
107
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
98
108
  - [Multi-Agents](#multi-agents)
99
109
  - [Agents Coming Soon](#agents-coming-soon)
100
110
  - [Disclaimer](#disclaimer)
@@ -122,7 +132,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
132
 
123
133
  This project is a work in progress. New data science agents will be released soon.
124
134
 
125
- ![AI Data Science Team](/img/ai_data_science_team_.jpg)
135
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
126
136
 
127
137
  ### NEW: Multi-Agents
128
138
 
@@ -142,18 +152,22 @@ This is a top secret project I'm working on. It's a multi-agent data science app
142
152
 
143
153
  #### 🔥 Agentic Applications
144
154
 
145
- 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
155
+ 1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
156
+
157
+ ![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
158
+
159
+ 2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
146
160
 
147
161
  ### Agents Available Now
148
162
 
149
- #### Agents
163
+ #### Standard Agents
150
164
 
151
165
  1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
152
166
  2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
153
167
  3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
154
168
  4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
155
169
  5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
156
- 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
170
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
157
171
 
158
172
 
159
173
  #### 🔥🔥 NEW! Machine Learning Agents
@@ -161,6 +175,10 @@ This is a top secret project I'm working on. It's a multi-agent data science app
161
175
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
162
176
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
163
177
 
178
+ #### 🔥 NEW! Data Science Agents
179
+
180
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
181
+
164
182
 
165
183
  #### Multi-Agents
166
184
 
@@ -18,6 +18,9 @@ ai_data_science_team/agents/data_visualization_agent.py
18
18
  ai_data_science_team/agents/data_wrangling_agent.py
19
19
  ai_data_science_team/agents/feature_engineering_agent.py
20
20
  ai_data_science_team/agents/sql_database_agent.py
21
+ ai_data_science_team/ds_agents/__init__.py
22
+ ai_data_science_team/ds_agents/eda_tools_agent.py
23
+ ai_data_science_team/ds_agents/modeling_tools_agent.py
21
24
  ai_data_science_team/ml_agents/__init__.py
22
25
  ai_data_science_team/ml_agents/h2o_ml_agent.py
23
26
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py
@@ -32,10 +35,14 @@ ai_data_science_team/templates/agent_templates.py
32
35
  ai_data_science_team/tools/__init__.py
33
36
  ai_data_science_team/tools/data_loader.py
34
37
  ai_data_science_team/tools/dataframe.py
38
+ ai_data_science_team/tools/eda.py
35
39
  ai_data_science_team/tools/h2o.py
36
40
  ai_data_science_team/tools/mlflow.py
37
41
  ai_data_science_team/tools/sql.py
38
42
  ai_data_science_team/utils/__init__.py
43
+ ai_data_science_team/utils/html.py
39
44
  ai_data_science_team/utils/logging.py
45
+ ai_data_science_team/utils/matplotlib.py
46
+ ai_data_science_team/utils/messages.py
40
47
  ai_data_science_team/utils/plotly.py
41
48
  ai_data_science_team/utils/regex.py
@@ -17,6 +17,14 @@ psutil
17
17
  [all]
18
18
  h2o
19
19
  mlflow
20
+ pytimetk
21
+ missingno
22
+ sweetviz
23
+
24
+ [data_science]
25
+ pytimetk
26
+ missingno
27
+ sweetviz
20
28
 
21
29
  [machine_learning]
22
30
  h2o
@@ -27,7 +27,8 @@ setup(
27
27
  install_requires=parse_requirements("requirements.txt"),
28
28
  extras_require={
29
29
  "machine_learning": ["h2o", "mlflow"],
30
- "all": ["h2o", "mlflow"],
30
+ "data_science": ["pytimetk", "missingno", "sweetviz"],
31
+ "all": ["h2o", "mlflow", "pytimetk", "missingno","sweetviz"],
31
32
  },
32
33
  python_requires=">=3.9",
33
34
  classifiers=[
@@ -1 +0,0 @@
1
- __version__ = "0.0.0.9011"