ai-data-science-team 0.0.0.9011__py3-none-any.whl → 0.0.0.9012__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9011"
1
+ __version__ = "0.0.0.9012"
@@ -0,0 +1 @@
1
+ from ai_data_science_team.ds_agents.eda_tools_agent import EDAToolsAgent, make_eda_tools_agent
@@ -0,0 +1,245 @@
1
+
2
+
3
+ from typing import Any, Optional, Annotated, Sequence, List, Dict, Tuple
4
+ import operator
5
+ import pandas as pd
6
+ import os
7
+ from io import StringIO, BytesIO
8
+ import base64
9
+ import matplotlib.pyplot as plt
10
+
11
+ from IPython.display import Markdown
12
+
13
+ from langchain_core.messages import BaseMessage, AIMessage
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+
21
+ from ai_data_science_team.tools.eda import (
22
+ describe_dataset,
23
+ visualize_missing,
24
+ correlation_funnel,
25
+ generate_sweetviz_report,
26
+ )
27
+
28
+
29
+ AGENT_NAME = "exploratory_data_analyst_agent"
30
+
31
+ # Updated tool list for EDA
32
+ EDA_TOOLS = [
33
+ describe_dataset,
34
+ visualize_missing,
35
+ correlation_funnel,
36
+ generate_sweetviz_report,
37
+ ]
38
+
39
+ class EDAToolsAgent(BaseAgent):
40
+ """
41
+ An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
42
+ missing data visualizations, correlation funnels, EDA reports, etc.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model for generating the tool-calling agent.
48
+ create_react_agent_kwargs : dict
49
+ Additional kwargs for create_react_agent.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional kwargs for agent invocation.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ model: Any,
57
+ create_react_agent_kwargs: Optional[Dict] = {},
58
+ invoke_react_agent_kwargs: Optional[Dict] = {},
59
+ ):
60
+ self._params = {
61
+ "model": model,
62
+ "create_react_agent_kwargs": create_react_agent_kwargs,
63
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
64
+ }
65
+ self._compiled_graph = self._make_compiled_graph()
66
+ self.response = None
67
+
68
+ def _make_compiled_graph(self):
69
+ """
70
+ Creates the compiled state graph for the EDA agent.
71
+ """
72
+ self.response = None
73
+ return make_eda_tools_agent(**self._params)
74
+
75
+ def update_params(self, **kwargs):
76
+ """
77
+ Updates the agent's parameters and rebuilds the compiled graph.
78
+ """
79
+ for k, v in kwargs.items():
80
+ self._params[k] = v
81
+ self._compiled_graph = self._make_compiled_graph()
82
+
83
+ async def ainvoke_agent(
84
+ self,
85
+ user_instructions: str = None,
86
+ data_raw: pd.DataFrame = None,
87
+ **kwargs
88
+ ):
89
+ """
90
+ Asynchronously runs the agent with user instructions and data.
91
+
92
+ Parameters:
93
+ ----------
94
+ user_instructions : str, optional
95
+ The instructions for the agent.
96
+ data_raw : pd.DataFrame, optional
97
+ The input data as a DataFrame.
98
+ """
99
+ response = await self._compiled_graph.ainvoke(
100
+ {
101
+ "user_instructions": user_instructions,
102
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
103
+ },
104
+ **kwargs
105
+ )
106
+ self.response = response
107
+ return None
108
+
109
+ def invoke_agent(
110
+ self,
111
+ user_instructions: str = None,
112
+ data_raw: pd.DataFrame = None,
113
+ **kwargs
114
+ ):
115
+ """
116
+ Synchronously runs the agent with user instructions and data.
117
+
118
+ Parameters:
119
+ ----------
120
+ user_instructions : str, optional
121
+ The instructions for the agent.
122
+ data_raw : pd.DataFrame, optional
123
+ The input data as a DataFrame.
124
+ """
125
+ response = self._compiled_graph.invoke(
126
+ {
127
+ "user_instructions": user_instructions,
128
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
129
+ },
130
+ **kwargs
131
+ )
132
+ self.response = response
133
+ return None
134
+
135
+ def get_internal_messages(self, markdown: bool = False):
136
+ """
137
+ Returns internal messages from the agent response.
138
+ """
139
+ pretty_print = "\n\n".join(
140
+ [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
141
+ for msg in self.response["internal_messages"]]
142
+ )
143
+ if markdown:
144
+ return Markdown(pretty_print)
145
+ else:
146
+ return self.response["internal_messages"]
147
+
148
+ def get_artifacts(self, as_dataframe: bool = False):
149
+ """
150
+ Returns the EDA artifacts from the agent response.
151
+ """
152
+ if as_dataframe:
153
+ return pd.DataFrame(self.response["eda_artifacts"])
154
+ else:
155
+ return self.response["eda_artifacts"]
156
+
157
+ def get_ai_message(self, markdown: bool = False):
158
+ """
159
+ Returns the AI message from the agent response.
160
+ """
161
+ if markdown:
162
+ return Markdown(self.response["messages"][0].content)
163
+ else:
164
+ return self.response["messages"][0].content
165
+
166
+ def make_eda_tools_agent(
167
+ model: Any,
168
+ create_react_agent_kwargs: Optional[Dict] = {},
169
+ invoke_react_agent_kwargs: Optional[Dict] = {},
170
+ ):
171
+ """
172
+ Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
173
+
174
+ Parameters:
175
+ ----------
176
+ model : Any
177
+ The language model used for tool-calling.
178
+ create_react_agent_kwargs : dict
179
+ Additional kwargs for create_react_agent.
180
+ invoke_react_agent_kwargs : dict
181
+ Additional kwargs for agent invocation.
182
+
183
+ Returns:
184
+ -------
185
+ app : langgraph.graph.CompiledStateGraph
186
+ The compiled state graph for the EDA agent.
187
+ """
188
+
189
+ class GraphState(AgentState):
190
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
191
+ user_instructions: str
192
+ data_raw: dict
193
+ eda_artifacts: dict
194
+
195
+ def exploratory_agent(state):
196
+ print(format_agent_name(AGENT_NAME))
197
+ print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
198
+
199
+ tool_node = ToolNode(
200
+ tools=EDA_TOOLS
201
+ )
202
+
203
+ eda_agent = create_react_agent(
204
+ model,
205
+ tools=tool_node,
206
+ state_schema=GraphState,
207
+ **create_react_agent_kwargs,
208
+ )
209
+
210
+ response = eda_agent.invoke(
211
+ {
212
+ "messages": [("user", state["user_instructions"])],
213
+ "data_raw": state["data_raw"],
214
+ },
215
+ invoke_react_agent_kwargs,
216
+ )
217
+
218
+ print(" * POST-PROCESSING EDA RESULTS")
219
+
220
+ internal_messages = response['messages']
221
+ if not internal_messages:
222
+ return {"internal_messages": [], "eda_artifacts": None}
223
+
224
+ last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
225
+ last_tool_artifact = None
226
+ if len(internal_messages) > 1:
227
+ last_message = internal_messages[-2]
228
+ if hasattr(last_message, "artifact"):
229
+ last_tool_artifact = last_message.artifact
230
+ elif isinstance(last_message, dict) and "artifact" in last_message:
231
+ last_tool_artifact = last_message["artifact"]
232
+
233
+ return {
234
+ "messages": [last_ai_message],
235
+ "internal_messages": internal_messages,
236
+ "eda_artifacts": last_tool_artifact,
237
+ }
238
+
239
+ workflow = StateGraph(GraphState)
240
+ workflow.add_node("exploratory_agent", exploratory_agent)
241
+ workflow.add_edge(START, "exploratory_agent")
242
+ workflow.add_edge("exploratory_agent", END)
243
+
244
+ app = workflow.compile()
245
+ return app
File without changes
@@ -0,0 +1,293 @@
1
+
2
+ from typing import Annotated, Dict, Tuple, Union
3
+
4
+ import os
5
+
6
+ from langchain.tools import tool
7
+
8
+ from langgraph.prebuilt import InjectedState
9
+
10
+
11
+ @tool(response_format='content_and_artifact')
12
+ def describe_dataset(
13
+ data_raw: Annotated[dict, InjectedState("data_raw")]
14
+ ) -> Tuple[str, Dict]:
15
+ """
16
+ Tool: describe_dataset
17
+ Description:
18
+ Describe the dataset by computing summary
19
+ statistics using the DataFrame's describe() method.
20
+
21
+ Returns:
22
+ -------
23
+ Tuple[str, Dict]:
24
+ content: A textual summary of the DataFrame's descriptive statistics.
25
+ artifact: A dictionary (from DataFrame.describe()) for further inspection.
26
+ """
27
+ print(" * Tool: describe_dataset")
28
+ import pandas as pd
29
+ df = pd.DataFrame(data_raw)
30
+ description_df = df.describe(include='all')
31
+ content = "Summary statistics computed using pandas describe()."
32
+ artifact = description_df.to_dict()
33
+ return content, artifact
34
+
35
+
36
+ @tool(response_format='content_and_artifact')
37
+ def visualize_missing(
38
+ data_raw: Annotated[dict, InjectedState("data_raw")],
39
+ n_sample: int = None
40
+ ) -> Tuple[str, Dict]:
41
+ """
42
+ Tool: visualize_missing
43
+ Description:
44
+ Missing value analysis using the missingno library. Generates a matrix plot, bar plot, and heatmap plot.
45
+
46
+ Parameters:
47
+ -----------
48
+ data_raw : dict
49
+ The raw data in dictionary format.
50
+ n_sample : int, optional (default: None)
51
+ The number of rows to sample from the dataset if it is large.
52
+
53
+ Returns:
54
+ -------
55
+ Tuple[str, Dict]:
56
+ content: A message describing the generated plots.
57
+ artifact: A dict with keys 'matrix_plot', 'bar_plot', and 'heatmap_plot' each containing the
58
+ corresponding base64 encoded PNG image.
59
+ """
60
+ print(" * Tool: visualize_missing")
61
+
62
+ try:
63
+ import missingno as msno # Ensure missingno is installed
64
+ except ImportError:
65
+ raise ImportError("Please install the 'missingno' package to use this tool. pip install missingno")
66
+
67
+ import pandas as pd
68
+ import base64
69
+ from io import BytesIO
70
+ import matplotlib.pyplot as plt
71
+
72
+ # Create the DataFrame and sample if n_sample is provided.
73
+ df = pd.DataFrame(data_raw)
74
+ if n_sample is not None:
75
+ df = df.sample(n=n_sample, random_state=42)
76
+
77
+ # Dictionary to store the base64 encoded images for each plot.
78
+ encoded_plots = {}
79
+
80
+ # Define a helper function to create a plot, save it, and encode it.
81
+ def create_and_encode_plot(plot_func, plot_name: str):
82
+ plt.figure(figsize=(8, 6))
83
+ # Call the missingno plotting function.
84
+ plot_func(df)
85
+ plt.tight_layout()
86
+ buf = BytesIO()
87
+ plt.savefig(buf, format="png")
88
+ plt.close()
89
+ buf.seek(0)
90
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
91
+
92
+ # Create and encode the matrix plot.
93
+ encoded_plots["matrix_plot"] = create_and_encode_plot(msno.matrix, "matrix")
94
+
95
+ # Create and encode the bar plot.
96
+ encoded_plots["bar_plot"] = create_and_encode_plot(msno.bar, "bar")
97
+
98
+ # Create and encode the heatmap plot.
99
+ encoded_plots["heatmap_plot"] = create_and_encode_plot(msno.heatmap, "heatmap")
100
+
101
+ content = "Missing data visualizations (matrix, bar, and heatmap) have been generated."
102
+ artifact = encoded_plots
103
+ return content, artifact
104
+
105
+
106
+
107
+ @tool(response_format='content_and_artifact')
108
+ def correlation_funnel(
109
+ data_raw: Annotated[dict, InjectedState("data_raw")],
110
+ target: str,
111
+ target_bin_index: Union[int, str] = -1,
112
+ corr_method: str = "pearson",
113
+ n_bins: int = 4,
114
+ thresh_infreq: float = 0.01,
115
+ name_infreq: str = "-OTHER",
116
+ ) -> Tuple[str, Dict]:
117
+ """
118
+ Tool: correlation_funnel
119
+ Description:
120
+ Correlation analysis using the correlation funnel method. The tool binarizes the data and computes correlation versus a target column.
121
+
122
+ Parameters:
123
+ ----------
124
+ target : str
125
+ The base target column name (e.g., 'Member_Status'). The tool will look for columns that begin
126
+ with this string followed by '__' (e.g., 'Member_Status__Gold', 'Member_Status__Platinum').
127
+ target_bin_index : int or str, default -1
128
+ If an integer, selects the target level by position from the matching columns.
129
+ If a string (e.g., "Yes"), attempts to match to the suffix of a column name
130
+ (i.e., 'target__Yes').
131
+ corr_method : str
132
+ The correlation method ('pearson', 'kendall', or 'spearman'). Default is 'pearson'.
133
+ n_bins : int
134
+ The number of bins to use for binarization. Default is 4.
135
+ thresh_infreq : float
136
+ The threshold for infrequent levels. Default is 0.01.
137
+ name_infreq : str
138
+ The name to use for infrequent levels. Default is '-OTHER'.
139
+ """
140
+ print(" * Tool: correlation_funnel")
141
+ try:
142
+ import pytimetk as tk
143
+ except ImportError:
144
+ raise ImportError("Please install the 'pytimetk' package to use this tool. pip install pytimetk")
145
+ import pandas as pd
146
+ import base64
147
+ from io import BytesIO
148
+ import matplotlib.pyplot as plt
149
+ import json
150
+ import plotly.graph_objects as go
151
+ import plotly.io as pio
152
+ from typing import Union
153
+
154
+ # Convert the raw injected state into a DataFrame.
155
+ df = pd.DataFrame(data_raw)
156
+
157
+ # Apply the binarization method.
158
+ df_binarized = df.binarize(
159
+ n_bins=n_bins,
160
+ thresh_infreq=thresh_infreq,
161
+ name_infreq=name_infreq,
162
+ one_hot=True
163
+ )
164
+
165
+ # Determine the full target column name.
166
+ # Look for all columns that start with "target__"
167
+ matching_columns = [col for col in df_binarized.columns if col.startswith(f"{target}__")]
168
+ if not matching_columns:
169
+ # If no matching columns are found, warn and use the provided target as-is.
170
+ full_target = target
171
+ else:
172
+ # Determine the full target based on target_bin_index.
173
+ if isinstance(target_bin_index, str):
174
+ # Build the candidate column name
175
+ candidate = f"{target}__{target_bin_index}"
176
+ if candidate in matching_columns:
177
+ full_target = candidate
178
+ else:
179
+ # If no matching candidate is found, default to the last matching column.
180
+ full_target = matching_columns[-1]
181
+ else:
182
+ # target_bin_index is an integer.
183
+ try:
184
+ full_target = matching_columns[target_bin_index]
185
+ except IndexError:
186
+ # If index is out of bounds, use the last matching column.
187
+ full_target = matching_columns[-1]
188
+
189
+ # Compute correlation funnel using the full target column name.
190
+ df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
191
+
192
+ # Attempt to generate a static plot.
193
+ try:
194
+ # Here we assume that your DataFrame has a method plot_correlation_funnel.
195
+ fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
196
+ buf = BytesIO()
197
+ # Use the appropriate save method for your figure object.
198
+ fig.save(buf, format="png")
199
+ plt.close()
200
+ buf.seek(0)
201
+ encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
202
+ except Exception as e:
203
+ encoded = {"error": str(e)}
204
+
205
+ # Attempt to generate a Plotly plot.
206
+ try:
207
+ fig = df_correlated.plot_correlation_funnel(engine='plotly')
208
+ fig_json = pio.to_json(fig)
209
+ fig_dict = json.loads(fig_json)
210
+ except Exception as e:
211
+ fig_dict = {"error": str(e)}
212
+
213
+ content = (f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
214
+ f"Base target was '{target}' with target_bin_index '{target_bin_index}'.")
215
+ artifact = {
216
+ "correlation_data": df_correlated.to_dict(orient="list"),
217
+ "plot_image": encoded,
218
+ "plotly_figure": fig_dict,
219
+ }
220
+ return content, artifact
221
+
222
+
223
+
224
+ @tool(response_format='content_and_artifact')
225
+ def generate_sweetviz_report(
226
+ data_raw: Annotated[dict, InjectedState("data_raw")],
227
+ target: str = None,
228
+ report_name: str = "sweetviz_report.html",
229
+ report_directory: str = os.path.join(os.getcwd(), "reports"),
230
+ open_browser: bool = True,
231
+ ) -> Tuple[str, Dict]:
232
+ """
233
+ Tool: generate_sweetviz_report
234
+ Description:
235
+ Make an Exploratory Data Analysis (EDA) report using the Sweetviz library.
236
+
237
+ Parameters:
238
+ -----------
239
+ data_raw : dict
240
+ The raw data injected as a dictionary (converted from a DataFrame).
241
+ target : str, optional
242
+ The target feature to analyze. Default is None.
243
+ report_name : str, optional
244
+ The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
245
+ report_directory : str, optional
246
+ The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
247
+ open_browser : bool, optional
248
+ Whether to open the report in a web browser. Default is True.
249
+
250
+ Returns:
251
+ --------
252
+ Tuple[str, Dict]:
253
+ content: A summary message describing the generated report.
254
+ artifact: A dictionary with the report file path and optionally the report's HTML content.
255
+ """
256
+ print(" * Tool: generate_sweetviz_report")
257
+ try:
258
+ import sweetviz as sv
259
+ except ImportError:
260
+ raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
261
+ import pandas as pd
262
+ # Convert injected raw data to a DataFrame.
263
+ df = pd.DataFrame(data_raw)
264
+
265
+ # Create the Sweetviz report.
266
+ report = sv.analyze(df, target_feat=target)
267
+
268
+ # Ensure the directory exists; default is os.getcwd()/reports
269
+ if not os.path.exists(report_directory):
270
+ os.makedirs(report_directory)
271
+
272
+ # Determine the full path for the report.
273
+ full_report_path = os.path.join(report_directory, report_name)
274
+
275
+ # Save the report to the specified HTML file.
276
+ report.show_html(
277
+ filepath=full_report_path,
278
+ open_browser=True,
279
+ )
280
+
281
+ # Optionally, read the HTML content (if desired to pass along in the artifact).
282
+ try:
283
+ with open(full_report_path, "r", encoding="utf-8") as f:
284
+ html_content = f.read()
285
+ except Exception:
286
+ html_content = None
287
+
288
+ content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
289
+ artifact = {
290
+ "report_file": os.path.abspath(full_report_path),
291
+ "report_html": html_content,
292
+ }
293
+ return content, artifact
@@ -0,0 +1,27 @@
1
+
2
+
3
+ import webbrowser
4
+ import os
5
+
6
+ def open_html_file_in_browser(file_path: str):
7
+ """
8
+ Opens an HTML file in the default web browser.
9
+
10
+ Parameters:
11
+ -----------
12
+ file_path : str
13
+ The file path or URL of the HTML file to open.
14
+
15
+ Returns:
16
+ --------
17
+ None
18
+ """
19
+ # Check if the file exists if a local path is provided.
20
+ if os.path.isfile(file_path):
21
+ # Convert file path to a file URL
22
+ file_url = 'file://' + os.path.abspath(file_path)
23
+ else:
24
+ # If the file doesn't exist locally, assume it's a URL
25
+ file_url = file_path
26
+
27
+ webbrowser.open(file_url)
@@ -0,0 +1,46 @@
1
+ import base64
2
+ from io import BytesIO
3
+ import matplotlib.pyplot as plt
4
+ from PIL import Image
5
+
6
+ def matplotlib_from_base64(encoded: str, title: str = None, figsize: tuple = (8, 6)):
7
+ """
8
+ Convert a base64-encoded image to a matplotlib plot and display it.
9
+
10
+ Parameters:
11
+ -----------
12
+ encoded : str
13
+ The base64-encoded image string.
14
+ title : str, optional
15
+ A title for the plot. Default is None.
16
+ figsize : tuple, optional
17
+ Figure size (width, height) for the plot. Default is (8, 6).
18
+
19
+ Returns:
20
+ --------
21
+ fig, ax : tuple
22
+ The matplotlib figure and axes objects.
23
+ """
24
+ # Decode the base64 string to bytes
25
+ img_data = base64.b64decode(encoded)
26
+
27
+ # Load the bytes data into a BytesIO buffer
28
+ buf = BytesIO(img_data)
29
+
30
+ # Open the image using Pillow
31
+ img = Image.open(buf)
32
+
33
+ # Create a matplotlib figure and axis
34
+ fig, ax = plt.subplots(figsize=figsize)
35
+
36
+ # Display the image
37
+ ax.imshow(img)
38
+ ax.axis('off') # Hide the axis
39
+
40
+ if title:
41
+ ax.set_title(title)
42
+
43
+ # Show the plot
44
+ plt.show()
45
+
46
+ return fig, ax
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9011
3
+ Version: 0.0.0.9012
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -31,9 +31,16 @@ Requires-Dist: psutil
31
31
  Provides-Extra: machine-learning
32
32
  Requires-Dist: h2o; extra == "machine-learning"
33
33
  Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: data-science
35
+ Requires-Dist: pytimetk; extra == "data-science"
36
+ Requires-Dist: missingno; extra == "data-science"
37
+ Requires-Dist: sweetviz; extra == "data-science"
34
38
  Provides-Extra: all
35
39
  Requires-Dist: h2o; extra == "all"
36
40
  Requires-Dist: mlflow; extra == "all"
41
+ Requires-Dist: pytimetk; extra == "all"
42
+ Requires-Dist: missingno; extra == "all"
43
+ Requires-Dist: sweetviz; extra == "all"
37
44
  Dynamic: author
38
45
  Dynamic: author-email
39
46
  Dynamic: classifier
@@ -59,6 +66,8 @@ Dynamic: summary
59
66
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
67
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
68
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
69
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
70
+
62
71
  </div>
63
72
 
64
73
 
@@ -93,8 +102,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
102
  - [Apps Available Now](#apps-available-now)
94
103
  - [🔥 Agentic Applications](#-agentic-applications)
95
104
  - [Agents Available Now](#agents-available-now)
96
- - [Agents](#agents)
105
+ - [Standard Agents](#standard-agents)
97
106
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
107
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
98
108
  - [Multi-Agents](#multi-agents)
99
109
  - [Agents Coming Soon](#agents-coming-soon)
100
110
  - [Disclaimer](#disclaimer)
@@ -122,7 +132,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
132
 
123
133
  This project is a work in progress. New data science agents will be released soon.
124
134
 
125
- ![AI Data Science Team](/img/ai_data_science_team_.jpg)
135
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
126
136
 
127
137
  ### NEW: Multi-Agents
128
138
 
@@ -146,14 +156,14 @@ This is a top secret project I'm working on. It's a multi-agent data science app
146
156
 
147
157
  ### Agents Available Now
148
158
 
149
- #### Agents
159
+ #### Standard Agents
150
160
 
151
161
  1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
152
162
  2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
153
163
  3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
154
164
  4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
155
165
  5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
156
- 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
166
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
157
167
 
158
168
 
159
169
  #### 🔥🔥 NEW! Machine Learning Agents
@@ -161,6 +171,10 @@ This is a top secret project I'm working on. It's a multi-agent data science app
161
171
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
162
172
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
163
173
 
174
+ #### 🔥 NEW! Data Science Agents
175
+
176
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
177
+
164
178
 
165
179
  #### Multi-Agents
166
180
 
@@ -1,5 +1,5 @@
1
1
  ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=2kHKuNhTDtlOUMah-41rNdTSBWrq3Lr4KsZbtsfHvPE,26
2
+ ai_data_science_team/_version.py,sha256=BybGt-zGNDZsdJxDMV3xmjghiRF8jmwG3ov_dt_rM7E,26
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
5
  ai_data_science_team/agents/data_cleaning_agent.py,sha256=V5tJMwGJK0JwrF_H-7r3S0E8UkAY6ci4BGxqjhZiGBI,27352
@@ -8,6 +8,9 @@ ai_data_science_team/agents/data_visualization_agent.py,sha256=tJy9Ehnh9mvAu6H--
8
8
  ai_data_science_team/agents/data_wrangling_agent.py,sha256=LxzphH-TmrFG0GjejGOjulhPq4SsWFo5Y9tk4WEuN4M,32347
9
9
  ai_data_science_team/agents/feature_engineering_agent.py,sha256=KmPBkj7WUBz6LFUlDDfQHMi7ujXwsH5P9LWRS-F4tdM,31026
10
10
  ai_data_science_team/agents/sql_database_agent.py,sha256=1K2o3NiuKgGKdbMz_Tq9IeQ8xhXjpfGOxx9lArZh1yE,31173
11
+ ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
12
+ ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=y65lsBXhQNOGwWealEho6uFxGSTW7FNfvTUZnW8_XNY,7609
13
+ ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
14
  ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
12
15
  ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=DamR72agrTKfdcdhablmP2mpbj0CqtMonP-QU8p7o9w,33394
13
16
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,15 +25,18 @@ ai_data_science_team/templates/agent_templates.py,sha256=Lezp0ugtIP3m5WUOmjLwghN
22
25
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
26
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
24
27
  ai_data_science_team/tools/dataframe.py,sha256=qSflGDByqqCXv4TjuvOFvGPZmegzeOesb0Y4i4Y0gdQ,4551
28
+ ai_data_science_team/tools/eda.py,sha256=UGD6PC12RsB_UmStvR4TmSqv0noxjM4DkzY-kHjI0-E,10591
25
29
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
26
30
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
27
31
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
28
32
  ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
29
34
  ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
35
+ ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
30
36
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
31
37
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
32
- ai_data_science_team-0.0.0.9011.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
33
- ai_data_science_team-0.0.0.9011.dist-info/METADATA,sha256=LxSjuOR2ArtBi-jauFoWQx7TGakHg7TJ8leKQIi7fmk,11854
34
- ai_data_science_team-0.0.0.9011.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
35
- ai_data_science_team-0.0.0.9011.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
36
- ai_data_science_team-0.0.0.9011.dist-info/RECORD,,
38
+ ai_data_science_team-0.0.0.9012.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
39
+ ai_data_science_team-0.0.0.9012.dist-info/METADATA,sha256=geRCFLG3YO9uprp_CGKiqCTSThg06L2U6WxVqYKzyM8,12704
40
+ ai_data_science_team-0.0.0.9012.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
41
+ ai_data_science_team-0.0.0.9012.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
42
+ ai_data_science_team-0.0.0.9012.dist-info/RECORD,,