ai-data-science-team 0.0.0.9010__tar.gz → 0.0.0.9012__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. {ai_data_science_team-0.0.0.9010/ai_data_science_team.egg-info → ai_data_science_team-0.0.0.9012}/PKG-INFO +26 -9
  2. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/README.md +18 -8
  3. ai_data_science_team-0.0.0.9012/ai_data_science_team/_version.py +1 -0
  4. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/agents/__init__.py +1 -0
  5. ai_data_science_team-0.0.0.9012/ai_data_science_team/agents/data_loader_tools_agent.py +272 -0
  6. ai_data_science_team-0.0.0.9012/ai_data_science_team/ds_agents/__init__.py +1 -0
  7. ai_data_science_team-0.0.0.9012/ai_data_science_team/ds_agents/eda_tools_agent.py +245 -0
  8. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/ml_agents/h2o_ml_agent.py +2 -1
  9. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/ml_agents/mlflow_tools_agent.py +32 -9
  10. ai_data_science_team-0.0.0.9012/ai_data_science_team/tools/__init__.py +0 -0
  11. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/tools/data_loader.py +95 -25
  12. ai_data_science_team-0.0.0.9012/ai_data_science_team/tools/eda.py +293 -0
  13. ai_data_science_team-0.0.0.9012/ai_data_science_team/utils/__init__.py +0 -0
  14. ai_data_science_team-0.0.0.9012/ai_data_science_team/utils/html.py +27 -0
  15. ai_data_science_team-0.0.0.9012/ai_data_science_team/utils/matplotlib.py +46 -0
  16. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012/ai_data_science_team.egg-info}/PKG-INFO +26 -9
  17. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team.egg-info/SOURCES.txt +7 -0
  18. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team.egg-info/requires.txt +8 -0
  19. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/setup.py +2 -1
  20. ai_data_science_team-0.0.0.9010/ai_data_science_team/_version.py +0 -1
  21. ai_data_science_team-0.0.0.9010/ai_data_science_team/agents/data_loader_tools_agent.py +0 -69
  22. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/LICENSE +0 -0
  23. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/MANIFEST.in +0 -0
  24. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/__init__.py +0 -0
  25. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/agents/data_cleaning_agent.py +0 -0
  26. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/agents/data_visualization_agent.py +0 -0
  27. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/agents/data_wrangling_agent.py +0 -0
  28. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/agents/feature_engineering_agent.py +0 -0
  29. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/agents/sql_database_agent.py +0 -0
  30. /ai_data_science_team-0.0.0.9010/ai_data_science_team/parsers/__init__.py → /ai_data_science_team-0.0.0.9012/ai_data_science_team/ds_agents/modeling_tools_agent.py +0 -0
  31. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/ml_agents/__init__.py +0 -0
  32. /ai_data_science_team-0.0.0.9010/ai_data_science_team/tools/__init__.py → /ai_data_science_team-0.0.0.9012/ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  33. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/multiagents/__init__.py +0 -0
  34. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/multiagents/sql_data_analyst.py +0 -0
  35. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/multiagents/supervised_data_analyst.py +0 -0
  36. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/orchestration.py +0 -0
  37. {ai_data_science_team-0.0.0.9010/ai_data_science_team/utils → ai_data_science_team-0.0.0.9012/ai_data_science_team/parsers}/__init__.py +0 -0
  38. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/parsers/parsers.py +0 -0
  39. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/templates/__init__.py +0 -0
  40. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/templates/agent_templates.py +0 -0
  41. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/tools/dataframe.py +0 -0
  42. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/tools/h2o.py +0 -0
  43. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/tools/mlflow.py +0 -0
  44. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/tools/sql.py +0 -0
  45. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/utils/logging.py +0 -0
  46. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/utils/plotly.py +0 -0
  47. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team/utils/regex.py +0 -0
  48. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team.egg-info/dependency_links.txt +0 -0
  49. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/ai_data_science_team.egg-info/top_level.txt +0 -0
  50. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/requirements.txt +0 -0
  51. {ai_data_science_team-0.0.0.9010 → ai_data_science_team-0.0.0.9012}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9010
3
+ Version: 0.0.0.9012
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -31,9 +31,16 @@ Requires-Dist: psutil
31
31
  Provides-Extra: machine-learning
32
32
  Requires-Dist: h2o; extra == "machine-learning"
33
33
  Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: data-science
35
+ Requires-Dist: pytimetk; extra == "data-science"
36
+ Requires-Dist: missingno; extra == "data-science"
37
+ Requires-Dist: sweetviz; extra == "data-science"
34
38
  Provides-Extra: all
35
39
  Requires-Dist: h2o; extra == "all"
36
40
  Requires-Dist: mlflow; extra == "all"
41
+ Requires-Dist: pytimetk; extra == "all"
42
+ Requires-Dist: missingno; extra == "all"
43
+ Requires-Dist: sweetviz; extra == "all"
37
44
  Dynamic: author
38
45
  Dynamic: author-email
39
46
  Dynamic: classifier
@@ -59,6 +66,8 @@ Dynamic: summary
59
66
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
67
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
68
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
69
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
70
+
62
71
  </div>
63
72
 
64
73
 
@@ -93,8 +102,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
102
  - [Apps Available Now](#apps-available-now)
94
103
  - [🔥 Agentic Applications](#-agentic-applications)
95
104
  - [Agents Available Now](#agents-available-now)
105
+ - [Standard Agents](#standard-agents)
96
106
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
97
- - [Data Science Agents](#data-science-agents-1)
107
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
98
108
  - [Multi-Agents](#multi-agents)
99
109
  - [Agents Coming Soon](#agents-coming-soon)
100
110
  - [Disclaimer](#disclaimer)
@@ -122,7 +132,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
132
 
123
133
  This project is a work in progress. New data science agents will be released soon.
124
134
 
125
- ![Data Science Team](/img/ai_data_science_team.jpg)
135
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
126
136
 
127
137
  ### NEW: Multi-Agents
128
138
 
@@ -146,18 +156,25 @@ This is a top secret project I'm working on. It's a multi-agent data science app
146
156
 
147
157
  ### Agents Available Now
148
158
 
159
+ #### Standard Agents
160
+
161
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
162
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
163
+ 3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
164
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
165
+ 5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
166
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
167
+
168
+
149
169
  #### 🔥🔥 NEW! Machine Learning Agents
150
170
 
151
171
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
152
172
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
153
173
 
154
- #### Data Science Agents
174
+ #### 🔥 NEW! Data Science Agents
175
+
176
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
155
177
 
156
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
157
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
158
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
159
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
160
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
161
178
 
162
179
  #### Multi-Agents
163
180
 
@@ -12,6 +12,8 @@
12
12
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
13
13
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
14
14
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
15
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
16
+
15
17
  </div>
16
18
 
17
19
 
@@ -46,8 +48,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
46
48
  - [Apps Available Now](#apps-available-now)
47
49
  - [🔥 Agentic Applications](#-agentic-applications)
48
50
  - [Agents Available Now](#agents-available-now)
51
+ - [Standard Agents](#standard-agents)
49
52
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
50
- - [Data Science Agents](#data-science-agents-1)
53
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
51
54
  - [Multi-Agents](#multi-agents)
52
55
  - [Agents Coming Soon](#agents-coming-soon)
53
56
  - [Disclaimer](#disclaimer)
@@ -75,7 +78,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
75
78
 
76
79
  This project is a work in progress. New data science agents will be released soon.
77
80
 
78
- ![Data Science Team](/img/ai_data_science_team.jpg)
81
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
79
82
 
80
83
  ### NEW: Multi-Agents
81
84
 
@@ -99,18 +102,25 @@ This is a top secret project I'm working on. It's a multi-agent data science app
99
102
 
100
103
  ### Agents Available Now
101
104
 
105
+ #### Standard Agents
106
+
107
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
108
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
109
+ 3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
110
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
111
+ 5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
112
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
113
+
114
+
102
115
  #### 🔥🔥 NEW! Machine Learning Agents
103
116
 
104
117
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
105
118
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
106
119
 
107
- #### Data Science Agents
120
+ #### 🔥 NEW! Data Science Agents
121
+
122
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
108
123
 
109
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
110
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
111
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
112
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
113
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
114
124
 
115
125
  #### Multi-Agents
116
126
 
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0.9012"
@@ -3,3 +3,4 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
+ from ai_data_science_team.agents.data_loader_tools_agent import make_data_loader_tools_agent, DataLoaderToolsAgent
@@ -0,0 +1,272 @@
1
+
2
+
3
+
4
+ from typing import Any, Optional, Annotated, Sequence, List, Dict
5
+ import operator
6
+
7
+ import pandas as pd
8
+ import os
9
+
10
+ from IPython.display import Markdown
11
+
12
+ from langchain_core.messages import BaseMessage, AIMessage
13
+
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+ from ai_data_science_team.tools.data_loader import (
21
+ load_directory,
22
+ load_file,
23
+ list_directory_contents,
24
+ list_directory_recursive,
25
+ get_file_info,
26
+ search_files_by_pattern,
27
+ )
28
+
29
+ AGENT_NAME = "data_loader_tools_agent"
30
+
31
+ tools = [
32
+ load_directory,
33
+ load_file,
34
+ list_directory_contents,
35
+ list_directory_recursive,
36
+ get_file_info,
37
+ search_files_by_pattern,
38
+ ]
39
+
40
+ class DataLoaderToolsAgent(BaseAgent):
41
+ """
42
+ A Data Loader Agent that can interact with data loading tools and search for files in your file system.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model used to generate the tool calling agent.
48
+ react_agent_kwargs : dict
49
+ Additional keyword arguments to pass to the create_react_agent function.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional keyword arguments to pass to the invoke method of the react agent.
52
+
53
+ Methods:
54
+ --------
55
+ update_params(**kwargs)
56
+ Updates the agent's parameters and rebuilds the compiled graph.
57
+ ainvoke_agent(user_instructions: str=None, **kwargs)
58
+ Runs the agent with the given user instructions asynchronously.
59
+ invoke_agent(user_instructions: str=None, **kwargs)
60
+ Runs the agent with the given user instructions.
61
+ get_internal_messages(markdown: bool=False)
62
+ Returns the internal messages from the agent's response.
63
+ get_artifacts(as_dataframe: bool=False)
64
+ Returns the MLflow artifacts from the agent's response.
65
+ get_ai_message(markdown: bool=False)
66
+ Returns the AI message from the agent's response.
67
+
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ model: Any,
73
+ create_react_agent_kwargs: Optional[Dict]={},
74
+ invoke_react_agent_kwargs: Optional[Dict]={},
75
+ ):
76
+ self._params = {
77
+ "model": model,
78
+ "create_react_agent_kwargs": create_react_agent_kwargs,
79
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
80
+ }
81
+ self._compiled_graph = self._make_compiled_graph()
82
+ self.response = None
83
+
84
+ def _make_compiled_graph(self):
85
+ """
86
+ Creates the compiled graph for the agent.
87
+ """
88
+ self.response = None
89
+ return make_data_loader_tools_agent(**self._params)
90
+
91
+
92
+ def update_params(self, **kwargs):
93
+ """
94
+ Updates the agent's parameters and rebuilds the compiled graph.
95
+ """
96
+ for k, v in kwargs.items():
97
+ self._params[k] = v
98
+ self._compiled_graph = self._make_compiled_graph()
99
+
100
+ async def ainvoke_agent(
101
+ self,
102
+ user_instructions: str=None,
103
+ **kwargs
104
+ ):
105
+ """
106
+ Runs the agent with the given user instructions.
107
+
108
+ Parameters:
109
+ ----------
110
+ user_instructions : str, optional
111
+ The user instructions to pass to the agent.
112
+ kwargs : dict, optional
113
+ Additional keyword arguments to pass to the agents ainvoke method.
114
+
115
+ """
116
+ response = await self._compiled_graph.ainvoke(
117
+ {
118
+ "user_instructions": user_instructions,
119
+ },
120
+ **kwargs
121
+ )
122
+ self.response = response
123
+ return None
124
+
125
+ def invoke_agent(
126
+ self,
127
+ user_instructions: str=None,
128
+ **kwargs
129
+ ):
130
+ """
131
+ Runs the agent with the given user instructions.
132
+
133
+ Parameters:
134
+ ----------
135
+ user_instructions : str, optional
136
+ The user instructions to pass to the agent.
137
+ kwargs : dict, optional
138
+ Additional keyword arguments to pass to the agents invoke method.
139
+
140
+ """
141
+ response = self._compiled_graph.invoke(
142
+ {
143
+ "user_instructions": user_instructions,
144
+ },
145
+ **kwargs
146
+ )
147
+ self.response = response
148
+ return None
149
+
150
+ def get_internal_messages(self, markdown: bool=False):
151
+ """
152
+ Returns the internal messages from the agent's response.
153
+ """
154
+ pretty_print = "\n\n".join([f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}" for msg in self.response["internal_messages"]])
155
+ if markdown:
156
+ return Markdown(pretty_print)
157
+ else:
158
+ return self.response["internal_messages"]
159
+
160
+ def get_artifacts(self, as_dataframe: bool=False):
161
+ """
162
+ Returns the MLflow artifacts from the agent's response.
163
+ """
164
+ if as_dataframe:
165
+ return pd.DataFrame(self.response["data_loader_artifacts"])
166
+ else:
167
+ return self.response["data_loader_artifacts"]
168
+
169
+ def get_ai_message(self, markdown: bool=False):
170
+ """
171
+ Returns the AI message from the agent's response.
172
+ """
173
+ if markdown:
174
+ return Markdown(self.response["messages"][0].content)
175
+ else:
176
+ return self.response["messages"][0].content
177
+
178
+
179
+
180
+ def make_data_loader_tools_agent(
181
+ model: Any,
182
+ create_react_agent_kwargs: Optional[Dict]={},
183
+ invoke_react_agent_kwargs: Optional[Dict]={},
184
+ ):
185
+ """
186
+ Creates a Data Loader Agent that can interact with data loading tools.
187
+
188
+ Parameters:
189
+ ----------
190
+ model : langchain.llms.base.LLM
191
+ The language model used to generate the tool calling agent.
192
+ react_agent_kwargs : dict
193
+ Additional keyword arguments to pass to the create_react_agent function.
194
+ invoke_react_agent_kwargs : dict
195
+ Additional keyword arguments to pass to the invoke method of the react agent.
196
+
197
+ Returns:
198
+ --------
199
+ app : langchain.graphs.CompiledStateGraph
200
+ An agent that can interact with data loading tools.
201
+ """
202
+
203
+ class GraphState(AgentState):
204
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
205
+ user_instructions: str
206
+ data_loader_artifacts: dict
207
+
208
+ def data_loader_agent(state):
209
+
210
+ print(format_agent_name(AGENT_NAME))
211
+ print(" ")
212
+
213
+ print(" * RUN REACT TOOL-CALLING AGENT")
214
+
215
+ tool_node = ToolNode(
216
+ tools=tools
217
+ )
218
+
219
+ data_loader_agent = create_react_agent(
220
+ model,
221
+ tools=tool_node,
222
+ state_schema=GraphState,
223
+ **create_react_agent_kwargs,
224
+ )
225
+
226
+ response = data_loader_agent.invoke(
227
+ {
228
+ "messages": [("user", state["user_instructions"])],
229
+ },
230
+ invoke_react_agent_kwargs,
231
+ )
232
+
233
+ print(" * POST-PROCESS RESULTS")
234
+
235
+ internal_messages = response['messages']
236
+
237
+ # Ensure there is at least one AI message
238
+ if not internal_messages:
239
+ return {
240
+ "internal_messages": [],
241
+ "mlflow_artifacts": None,
242
+ }
243
+
244
+ # Get the last AI message
245
+ last_ai_message = AIMessage(internal_messages[-1].content, role = AGENT_NAME)
246
+
247
+ # Get the last tool artifact safely
248
+ last_tool_artifact = None
249
+ if len(internal_messages) > 1:
250
+ last_message = internal_messages[-2] # Get second-to-last message
251
+ if hasattr(last_message, "artifact"): # Check if it has an "artifact"
252
+ last_tool_artifact = last_message.artifact
253
+ elif isinstance(last_message, dict) and "artifact" in last_message:
254
+ last_tool_artifact = last_message["artifact"]
255
+
256
+ return {
257
+ "messages": [last_ai_message],
258
+ "internal_messages": internal_messages,
259
+ "data_loader_artifacts": last_tool_artifact,
260
+ }
261
+
262
+ workflow = StateGraph(GraphState)
263
+
264
+ workflow.add_node("data_loader_agent", data_loader_agent)
265
+
266
+ workflow.add_edge(START, "data_loader_agent")
267
+ workflow.add_edge("data_loader_agent", END)
268
+
269
+ app = workflow.compile()
270
+
271
+ return app
272
+
@@ -0,0 +1 @@
1
+ from ai_data_science_team.ds_agents.eda_tools_agent import EDAToolsAgent, make_eda_tools_agent
@@ -0,0 +1,245 @@
1
+
2
+
3
+ from typing import Any, Optional, Annotated, Sequence, List, Dict, Tuple
4
+ import operator
5
+ import pandas as pd
6
+ import os
7
+ from io import StringIO, BytesIO
8
+ import base64
9
+ import matplotlib.pyplot as plt
10
+
11
+ from IPython.display import Markdown
12
+
13
+ from langchain_core.messages import BaseMessage, AIMessage
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+
21
+ from ai_data_science_team.tools.eda import (
22
+ describe_dataset,
23
+ visualize_missing,
24
+ correlation_funnel,
25
+ generate_sweetviz_report,
26
+ )
27
+
28
+
29
+ AGENT_NAME = "exploratory_data_analyst_agent"
30
+
31
+ # Updated tool list for EDA
32
+ EDA_TOOLS = [
33
+ describe_dataset,
34
+ visualize_missing,
35
+ correlation_funnel,
36
+ generate_sweetviz_report,
37
+ ]
38
+
39
+ class EDAToolsAgent(BaseAgent):
40
+ """
41
+ An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
42
+ missing data visualizations, correlation funnels, EDA reports, etc.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model for generating the tool-calling agent.
48
+ create_react_agent_kwargs : dict
49
+ Additional kwargs for create_react_agent.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional kwargs for agent invocation.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ model: Any,
57
+ create_react_agent_kwargs: Optional[Dict] = {},
58
+ invoke_react_agent_kwargs: Optional[Dict] = {},
59
+ ):
60
+ self._params = {
61
+ "model": model,
62
+ "create_react_agent_kwargs": create_react_agent_kwargs,
63
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
64
+ }
65
+ self._compiled_graph = self._make_compiled_graph()
66
+ self.response = None
67
+
68
+ def _make_compiled_graph(self):
69
+ """
70
+ Creates the compiled state graph for the EDA agent.
71
+ """
72
+ self.response = None
73
+ return make_eda_tools_agent(**self._params)
74
+
75
+ def update_params(self, **kwargs):
76
+ """
77
+ Updates the agent's parameters and rebuilds the compiled graph.
78
+ """
79
+ for k, v in kwargs.items():
80
+ self._params[k] = v
81
+ self._compiled_graph = self._make_compiled_graph()
82
+
83
+ async def ainvoke_agent(
84
+ self,
85
+ user_instructions: str = None,
86
+ data_raw: pd.DataFrame = None,
87
+ **kwargs
88
+ ):
89
+ """
90
+ Asynchronously runs the agent with user instructions and data.
91
+
92
+ Parameters:
93
+ ----------
94
+ user_instructions : str, optional
95
+ The instructions for the agent.
96
+ data_raw : pd.DataFrame, optional
97
+ The input data as a DataFrame.
98
+ """
99
+ response = await self._compiled_graph.ainvoke(
100
+ {
101
+ "user_instructions": user_instructions,
102
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
103
+ },
104
+ **kwargs
105
+ )
106
+ self.response = response
107
+ return None
108
+
109
+ def invoke_agent(
110
+ self,
111
+ user_instructions: str = None,
112
+ data_raw: pd.DataFrame = None,
113
+ **kwargs
114
+ ):
115
+ """
116
+ Synchronously runs the agent with user instructions and data.
117
+
118
+ Parameters:
119
+ ----------
120
+ user_instructions : str, optional
121
+ The instructions for the agent.
122
+ data_raw : pd.DataFrame, optional
123
+ The input data as a DataFrame.
124
+ """
125
+ response = self._compiled_graph.invoke(
126
+ {
127
+ "user_instructions": user_instructions,
128
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
129
+ },
130
+ **kwargs
131
+ )
132
+ self.response = response
133
+ return None
134
+
135
+ def get_internal_messages(self, markdown: bool = False):
136
+ """
137
+ Returns internal messages from the agent response.
138
+ """
139
+ pretty_print = "\n\n".join(
140
+ [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
141
+ for msg in self.response["internal_messages"]]
142
+ )
143
+ if markdown:
144
+ return Markdown(pretty_print)
145
+ else:
146
+ return self.response["internal_messages"]
147
+
148
+ def get_artifacts(self, as_dataframe: bool = False):
149
+ """
150
+ Returns the EDA artifacts from the agent response.
151
+ """
152
+ if as_dataframe:
153
+ return pd.DataFrame(self.response["eda_artifacts"])
154
+ else:
155
+ return self.response["eda_artifacts"]
156
+
157
+ def get_ai_message(self, markdown: bool = False):
158
+ """
159
+ Returns the AI message from the agent response.
160
+ """
161
+ if markdown:
162
+ return Markdown(self.response["messages"][0].content)
163
+ else:
164
+ return self.response["messages"][0].content
165
+
166
+ def make_eda_tools_agent(
167
+ model: Any,
168
+ create_react_agent_kwargs: Optional[Dict] = {},
169
+ invoke_react_agent_kwargs: Optional[Dict] = {},
170
+ ):
171
+ """
172
+ Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
173
+
174
+ Parameters:
175
+ ----------
176
+ model : Any
177
+ The language model used for tool-calling.
178
+ create_react_agent_kwargs : dict
179
+ Additional kwargs for create_react_agent.
180
+ invoke_react_agent_kwargs : dict
181
+ Additional kwargs for agent invocation.
182
+
183
+ Returns:
184
+ -------
185
+ app : langgraph.graph.CompiledStateGraph
186
+ The compiled state graph for the EDA agent.
187
+ """
188
+
189
+ class GraphState(AgentState):
190
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
191
+ user_instructions: str
192
+ data_raw: dict
193
+ eda_artifacts: dict
194
+
195
+ def exploratory_agent(state):
196
+ print(format_agent_name(AGENT_NAME))
197
+ print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
198
+
199
+ tool_node = ToolNode(
200
+ tools=EDA_TOOLS
201
+ )
202
+
203
+ eda_agent = create_react_agent(
204
+ model,
205
+ tools=tool_node,
206
+ state_schema=GraphState,
207
+ **create_react_agent_kwargs,
208
+ )
209
+
210
+ response = eda_agent.invoke(
211
+ {
212
+ "messages": [("user", state["user_instructions"])],
213
+ "data_raw": state["data_raw"],
214
+ },
215
+ invoke_react_agent_kwargs,
216
+ )
217
+
218
+ print(" * POST-PROCESSING EDA RESULTS")
219
+
220
+ internal_messages = response['messages']
221
+ if not internal_messages:
222
+ return {"internal_messages": [], "eda_artifacts": None}
223
+
224
+ last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
225
+ last_tool_artifact = None
226
+ if len(internal_messages) > 1:
227
+ last_message = internal_messages[-2]
228
+ if hasattr(last_message, "artifact"):
229
+ last_tool_artifact = last_message.artifact
230
+ elif isinstance(last_message, dict) and "artifact" in last_message:
231
+ last_tool_artifact = last_message["artifact"]
232
+
233
+ return {
234
+ "messages": [last_ai_message],
235
+ "internal_messages": internal_messages,
236
+ "eda_artifacts": last_tool_artifact,
237
+ }
238
+
239
+ workflow = StateGraph(GraphState)
240
+ workflow.add_node("exploratory_agent", exploratory_agent)
241
+ workflow.add_edge(START, "exploratory_agent")
242
+ workflow.add_edge("exploratory_agent", END)
243
+
244
+ app = workflow.compile()
245
+ return app