ai-data-science-team 0.0.0.9011__py3-none-any.whl → 0.0.0.9012__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9011"
1
+ __version__ = "0.0.0.9012"
@@ -0,0 +1 @@
1
+ from ai_data_science_team.ds_agents.eda_tools_agent import EDAToolsAgent, make_eda_tools_agent
@@ -0,0 +1,245 @@
1
+
2
+
3
+ from typing import Any, Optional, Annotated, Sequence, List, Dict, Tuple
4
+ import operator
5
+ import pandas as pd
6
+ import os
7
+ from io import StringIO, BytesIO
8
+ import base64
9
+ import matplotlib.pyplot as plt
10
+
11
+ from IPython.display import Markdown
12
+
13
+ from langchain_core.messages import BaseMessage, AIMessage
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+
21
+ from ai_data_science_team.tools.eda import (
22
+ describe_dataset,
23
+ visualize_missing,
24
+ correlation_funnel,
25
+ generate_sweetviz_report,
26
+ )
27
+
28
+
29
+ AGENT_NAME = "exploratory_data_analyst_agent"
30
+
31
+ # Updated tool list for EDA
32
+ EDA_TOOLS = [
33
+ describe_dataset,
34
+ visualize_missing,
35
+ correlation_funnel,
36
+ generate_sweetviz_report,
37
+ ]
38
+
39
+ class EDAToolsAgent(BaseAgent):
40
+ """
41
+ An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
42
+ missing data visualizations, correlation funnels, EDA reports, etc.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model for generating the tool-calling agent.
48
+ create_react_agent_kwargs : dict
49
+ Additional kwargs for create_react_agent.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional kwargs for agent invocation.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ model: Any,
57
+ create_react_agent_kwargs: Optional[Dict] = {},
58
+ invoke_react_agent_kwargs: Optional[Dict] = {},
59
+ ):
60
+ self._params = {
61
+ "model": model,
62
+ "create_react_agent_kwargs": create_react_agent_kwargs,
63
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
64
+ }
65
+ self._compiled_graph = self._make_compiled_graph()
66
+ self.response = None
67
+
68
+ def _make_compiled_graph(self):
69
+ """
70
+ Creates the compiled state graph for the EDA agent.
71
+ """
72
+ self.response = None
73
+ return make_eda_tools_agent(**self._params)
74
+
75
+ def update_params(self, **kwargs):
76
+ """
77
+ Updates the agent's parameters and rebuilds the compiled graph.
78
+ """
79
+ for k, v in kwargs.items():
80
+ self._params[k] = v
81
+ self._compiled_graph = self._make_compiled_graph()
82
+
83
+ async def ainvoke_agent(
84
+ self,
85
+ user_instructions: str = None,
86
+ data_raw: pd.DataFrame = None,
87
+ **kwargs
88
+ ):
89
+ """
90
+ Asynchronously runs the agent with user instructions and data.
91
+
92
+ Parameters:
93
+ ----------
94
+ user_instructions : str, optional
95
+ The instructions for the agent.
96
+ data_raw : pd.DataFrame, optional
97
+ The input data as a DataFrame.
98
+ """
99
+ response = await self._compiled_graph.ainvoke(
100
+ {
101
+ "user_instructions": user_instructions,
102
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
103
+ },
104
+ **kwargs
105
+ )
106
+ self.response = response
107
+ return None
108
+
109
+ def invoke_agent(
110
+ self,
111
+ user_instructions: str = None,
112
+ data_raw: pd.DataFrame = None,
113
+ **kwargs
114
+ ):
115
+ """
116
+ Synchronously runs the agent with user instructions and data.
117
+
118
+ Parameters:
119
+ ----------
120
+ user_instructions : str, optional
121
+ The instructions for the agent.
122
+ data_raw : pd.DataFrame, optional
123
+ The input data as a DataFrame.
124
+ """
125
+ response = self._compiled_graph.invoke(
126
+ {
127
+ "user_instructions": user_instructions,
128
+ "data_raw": data_raw.to_dict() if data_raw is not None else None,
129
+ },
130
+ **kwargs
131
+ )
132
+ self.response = response
133
+ return None
134
+
135
+ def get_internal_messages(self, markdown: bool = False):
136
+ """
137
+ Returns internal messages from the agent response.
138
+ """
139
+ pretty_print = "\n\n".join(
140
+ [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
141
+ for msg in self.response["internal_messages"]]
142
+ )
143
+ if markdown:
144
+ return Markdown(pretty_print)
145
+ else:
146
+ return self.response["internal_messages"]
147
+
148
+ def get_artifacts(self, as_dataframe: bool = False):
149
+ """
150
+ Returns the EDA artifacts from the agent response.
151
+ """
152
+ if as_dataframe:
153
+ return pd.DataFrame(self.response["eda_artifacts"])
154
+ else:
155
+ return self.response["eda_artifacts"]
156
+
157
+ def get_ai_message(self, markdown: bool = False):
158
+ """
159
+ Returns the AI message from the agent response.
160
+ """
161
+ if markdown:
162
+ return Markdown(self.response["messages"][0].content)
163
+ else:
164
+ return self.response["messages"][0].content
165
+
166
+ def make_eda_tools_agent(
167
+ model: Any,
168
+ create_react_agent_kwargs: Optional[Dict] = {},
169
+ invoke_react_agent_kwargs: Optional[Dict] = {},
170
+ ):
171
+ """
172
+ Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
173
+
174
+ Parameters:
175
+ ----------
176
+ model : Any
177
+ The language model used for tool-calling.
178
+ create_react_agent_kwargs : dict
179
+ Additional kwargs for create_react_agent.
180
+ invoke_react_agent_kwargs : dict
181
+ Additional kwargs for agent invocation.
182
+
183
+ Returns:
184
+ -------
185
+ app : langgraph.graph.CompiledStateGraph
186
+ The compiled state graph for the EDA agent.
187
+ """
188
+
189
+ class GraphState(AgentState):
190
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
191
+ user_instructions: str
192
+ data_raw: dict
193
+ eda_artifacts: dict
194
+
195
+ def exploratory_agent(state):
196
+ print(format_agent_name(AGENT_NAME))
197
+ print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
198
+
199
+ tool_node = ToolNode(
200
+ tools=EDA_TOOLS
201
+ )
202
+
203
+ eda_agent = create_react_agent(
204
+ model,
205
+ tools=tool_node,
206
+ state_schema=GraphState,
207
+ **create_react_agent_kwargs,
208
+ )
209
+
210
+ response = eda_agent.invoke(
211
+ {
212
+ "messages": [("user", state["user_instructions"])],
213
+ "data_raw": state["data_raw"],
214
+ },
215
+ invoke_react_agent_kwargs,
216
+ )
217
+
218
+ print(" * POST-PROCESSING EDA RESULTS")
219
+
220
+ internal_messages = response['messages']
221
+ if not internal_messages:
222
+ return {"internal_messages": [], "eda_artifacts": None}
223
+
224
+ last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
225
+ last_tool_artifact = None
226
+ if len(internal_messages) > 1:
227
+ last_message = internal_messages[-2]
228
+ if hasattr(last_message, "artifact"):
229
+ last_tool_artifact = last_message.artifact
230
+ elif isinstance(last_message, dict) and "artifact" in last_message:
231
+ last_tool_artifact = last_message["artifact"]
232
+
233
+ return {
234
+ "messages": [last_ai_message],
235
+ "internal_messages": internal_messages,
236
+ "eda_artifacts": last_tool_artifact,
237
+ }
238
+
239
+ workflow = StateGraph(GraphState)
240
+ workflow.add_node("exploratory_agent", exploratory_agent)
241
+ workflow.add_edge(START, "exploratory_agent")
242
+ workflow.add_edge("exploratory_agent", END)
243
+
244
+ app = workflow.compile()
245
+ return app
File without changes
@@ -0,0 +1,293 @@
1
+
2
+ from typing import Annotated, Dict, Tuple, Union
3
+
4
+ import os
5
+
6
+ from langchain.tools import tool
7
+
8
+ from langgraph.prebuilt import InjectedState
9
+
10
+
11
+ @tool(response_format='content_and_artifact')
12
+ def describe_dataset(
13
+ data_raw: Annotated[dict, InjectedState("data_raw")]
14
+ ) -> Tuple[str, Dict]:
15
+ """
16
+ Tool: describe_dataset
17
+ Description:
18
+ Describe the dataset by computing summary
19
+ statistics using the DataFrame's describe() method.
20
+
21
+ Returns:
22
+ -------
23
+ Tuple[str, Dict]:
24
+ content: A textual summary of the DataFrame's descriptive statistics.
25
+ artifact: A dictionary (from DataFrame.describe()) for further inspection.
26
+ """
27
+ print(" * Tool: describe_dataset")
28
+ import pandas as pd
29
+ df = pd.DataFrame(data_raw)
30
+ description_df = df.describe(include='all')
31
+ content = "Summary statistics computed using pandas describe()."
32
+ artifact = description_df.to_dict()
33
+ return content, artifact
34
+
35
+
36
+ @tool(response_format='content_and_artifact')
37
+ def visualize_missing(
38
+ data_raw: Annotated[dict, InjectedState("data_raw")],
39
+ n_sample: int = None
40
+ ) -> Tuple[str, Dict]:
41
+ """
42
+ Tool: visualize_missing
43
+ Description:
44
+ Missing value analysis using the missingno library. Generates a matrix plot, bar plot, and heatmap plot.
45
+
46
+ Parameters:
47
+ -----------
48
+ data_raw : dict
49
+ The raw data in dictionary format.
50
+ n_sample : int, optional (default: None)
51
+ The number of rows to sample from the dataset if it is large.
52
+
53
+ Returns:
54
+ -------
55
+ Tuple[str, Dict]:
56
+ content: A message describing the generated plots.
57
+ artifact: A dict with keys 'matrix_plot', 'bar_plot', and 'heatmap_plot' each containing the
58
+ corresponding base64 encoded PNG image.
59
+ """
60
+ print(" * Tool: visualize_missing")
61
+
62
+ try:
63
+ import missingno as msno # Ensure missingno is installed
64
+ except ImportError:
65
+ raise ImportError("Please install the 'missingno' package to use this tool. pip install missingno")
66
+
67
+ import pandas as pd
68
+ import base64
69
+ from io import BytesIO
70
+ import matplotlib.pyplot as plt
71
+
72
+ # Create the DataFrame and sample if n_sample is provided.
73
+ df = pd.DataFrame(data_raw)
74
+ if n_sample is not None:
75
+ df = df.sample(n=n_sample, random_state=42)
76
+
77
+ # Dictionary to store the base64 encoded images for each plot.
78
+ encoded_plots = {}
79
+
80
+ # Define a helper function to create a plot, save it, and encode it.
81
+ def create_and_encode_plot(plot_func, plot_name: str):
82
+ plt.figure(figsize=(8, 6))
83
+ # Call the missingno plotting function.
84
+ plot_func(df)
85
+ plt.tight_layout()
86
+ buf = BytesIO()
87
+ plt.savefig(buf, format="png")
88
+ plt.close()
89
+ buf.seek(0)
90
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
91
+
92
+ # Create and encode the matrix plot.
93
+ encoded_plots["matrix_plot"] = create_and_encode_plot(msno.matrix, "matrix")
94
+
95
+ # Create and encode the bar plot.
96
+ encoded_plots["bar_plot"] = create_and_encode_plot(msno.bar, "bar")
97
+
98
+ # Create and encode the heatmap plot.
99
+ encoded_plots["heatmap_plot"] = create_and_encode_plot(msno.heatmap, "heatmap")
100
+
101
+ content = "Missing data visualizations (matrix, bar, and heatmap) have been generated."
102
+ artifact = encoded_plots
103
+ return content, artifact
104
+
105
+
106
+
107
+ @tool(response_format='content_and_artifact')
108
+ def correlation_funnel(
109
+ data_raw: Annotated[dict, InjectedState("data_raw")],
110
+ target: str,
111
+ target_bin_index: Union[int, str] = -1,
112
+ corr_method: str = "pearson",
113
+ n_bins: int = 4,
114
+ thresh_infreq: float = 0.01,
115
+ name_infreq: str = "-OTHER",
116
+ ) -> Tuple[str, Dict]:
117
+ """
118
+ Tool: correlation_funnel
119
+ Description:
120
+ Correlation analysis using the correlation funnel method. The tool binarizes the data and computes correlation versus a target column.
121
+
122
+ Parameters:
123
+ ----------
124
+ target : str
125
+ The base target column name (e.g., 'Member_Status'). The tool will look for columns that begin
126
+ with this string followed by '__' (e.g., 'Member_Status__Gold', 'Member_Status__Platinum').
127
+ target_bin_index : int or str, default -1
128
+ If an integer, selects the target level by position from the matching columns.
129
+ If a string (e.g., "Yes"), attempts to match to the suffix of a column name
130
+ (i.e., 'target__Yes').
131
+ corr_method : str
132
+ The correlation method ('pearson', 'kendall', or 'spearman'). Default is 'pearson'.
133
+ n_bins : int
134
+ The number of bins to use for binarization. Default is 4.
135
+ thresh_infreq : float
136
+ The threshold for infrequent levels. Default is 0.01.
137
+ name_infreq : str
138
+ The name to use for infrequent levels. Default is '-OTHER'.
139
+ """
140
+ print(" * Tool: correlation_funnel")
141
+ try:
142
+ import pytimetk as tk
143
+ except ImportError:
144
+ raise ImportError("Please install the 'pytimetk' package to use this tool. pip install pytimetk")
145
+ import pandas as pd
146
+ import base64
147
+ from io import BytesIO
148
+ import matplotlib.pyplot as plt
149
+ import json
150
+ import plotly.graph_objects as go
151
+ import plotly.io as pio
152
+ from typing import Union
153
+
154
+ # Convert the raw injected state into a DataFrame.
155
+ df = pd.DataFrame(data_raw)
156
+
157
+ # Apply the binarization method.
158
+ df_binarized = df.binarize(
159
+ n_bins=n_bins,
160
+ thresh_infreq=thresh_infreq,
161
+ name_infreq=name_infreq,
162
+ one_hot=True
163
+ )
164
+
165
+ # Determine the full target column name.
166
+ # Look for all columns that start with "target__"
167
+ matching_columns = [col for col in df_binarized.columns if col.startswith(f"{target}__")]
168
+ if not matching_columns:
169
+ # If no matching columns are found, warn and use the provided target as-is.
170
+ full_target = target
171
+ else:
172
+ # Determine the full target based on target_bin_index.
173
+ if isinstance(target_bin_index, str):
174
+ # Build the candidate column name
175
+ candidate = f"{target}__{target_bin_index}"
176
+ if candidate in matching_columns:
177
+ full_target = candidate
178
+ else:
179
+ # If no matching candidate is found, default to the last matching column.
180
+ full_target = matching_columns[-1]
181
+ else:
182
+ # target_bin_index is an integer.
183
+ try:
184
+ full_target = matching_columns[target_bin_index]
185
+ except IndexError:
186
+ # If index is out of bounds, use the last matching column.
187
+ full_target = matching_columns[-1]
188
+
189
+ # Compute correlation funnel using the full target column name.
190
+ df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
191
+
192
+ # Attempt to generate a static plot.
193
+ try:
194
+ # Here we assume that your DataFrame has a method plot_correlation_funnel.
195
+ fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
196
+ buf = BytesIO()
197
+ # Use the appropriate save method for your figure object.
198
+ fig.save(buf, format="png")
199
+ plt.close()
200
+ buf.seek(0)
201
+ encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
202
+ except Exception as e:
203
+ encoded = {"error": str(e)}
204
+
205
+ # Attempt to generate a Plotly plot.
206
+ try:
207
+ fig = df_correlated.plot_correlation_funnel(engine='plotly')
208
+ fig_json = pio.to_json(fig)
209
+ fig_dict = json.loads(fig_json)
210
+ except Exception as e:
211
+ fig_dict = {"error": str(e)}
212
+
213
+ content = (f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
214
+ f"Base target was '{target}' with target_bin_index '{target_bin_index}'.")
215
+ artifact = {
216
+ "correlation_data": df_correlated.to_dict(orient="list"),
217
+ "plot_image": encoded,
218
+ "plotly_figure": fig_dict,
219
+ }
220
+ return content, artifact
221
+
222
+
223
+
224
+ @tool(response_format='content_and_artifact')
225
+ def generate_sweetviz_report(
226
+ data_raw: Annotated[dict, InjectedState("data_raw")],
227
+ target: str = None,
228
+ report_name: str = "sweetviz_report.html",
229
+ report_directory: str = os.path.join(os.getcwd(), "reports"),
230
+ open_browser: bool = True,
231
+ ) -> Tuple[str, Dict]:
232
+ """
233
+ Tool: generate_sweetviz_report
234
+ Description:
235
+ Make an Exploratory Data Analysis (EDA) report using the Sweetviz library.
236
+
237
+ Parameters:
238
+ -----------
239
+ data_raw : dict
240
+ The raw data injected as a dictionary (converted from a DataFrame).
241
+ target : str, optional
242
+ The target feature to analyze. Default is None.
243
+ report_name : str, optional
244
+ The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
245
+ report_directory : str, optional
246
+ The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
247
+ open_browser : bool, optional
248
+ Whether to open the report in a web browser. Default is True.
249
+
250
+ Returns:
251
+ --------
252
+ Tuple[str, Dict]:
253
+ content: A summary message describing the generated report.
254
+ artifact: A dictionary with the report file path and optionally the report's HTML content.
255
+ """
256
+ print(" * Tool: generate_sweetviz_report")
257
+ try:
258
+ import sweetviz as sv
259
+ except ImportError:
260
+ raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
261
+ import pandas as pd
262
+ # Convert injected raw data to a DataFrame.
263
+ df = pd.DataFrame(data_raw)
264
+
265
+ # Create the Sweetviz report.
266
+ report = sv.analyze(df, target_feat=target)
267
+
268
+ # Ensure the directory exists; default is os.getcwd()/reports
269
+ if not os.path.exists(report_directory):
270
+ os.makedirs(report_directory)
271
+
272
+ # Determine the full path for the report.
273
+ full_report_path = os.path.join(report_directory, report_name)
274
+
275
+ # Save the report to the specified HTML file.
276
+ report.show_html(
277
+ filepath=full_report_path,
278
+ open_browser=True,
279
+ )
280
+
281
+ # Optionally, read the HTML content (if desired to pass along in the artifact).
282
+ try:
283
+ with open(full_report_path, "r", encoding="utf-8") as f:
284
+ html_content = f.read()
285
+ except Exception:
286
+ html_content = None
287
+
288
+ content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
289
+ artifact = {
290
+ "report_file": os.path.abspath(full_report_path),
291
+ "report_html": html_content,
292
+ }
293
+ return content, artifact
@@ -0,0 +1,27 @@
1
+
2
+
3
+ import webbrowser
4
+ import os
5
+
6
+ def open_html_file_in_browser(file_path: str):
7
+ """
8
+ Opens an HTML file in the default web browser.
9
+
10
+ Parameters:
11
+ -----------
12
+ file_path : str
13
+ The file path or URL of the HTML file to open.
14
+
15
+ Returns:
16
+ --------
17
+ None
18
+ """
19
+ # Check if the file exists if a local path is provided.
20
+ if os.path.isfile(file_path):
21
+ # Convert file path to a file URL
22
+ file_url = 'file://' + os.path.abspath(file_path)
23
+ else:
24
+ # If the file doesn't exist locally, assume it's a URL
25
+ file_url = file_path
26
+
27
+ webbrowser.open(file_url)
@@ -0,0 +1,46 @@
1
+ import base64
2
+ from io import BytesIO
3
+ import matplotlib.pyplot as plt
4
+ from PIL import Image
5
+
6
+ def matplotlib_from_base64(encoded: str, title: str = None, figsize: tuple = (8, 6)):
7
+ """
8
+ Convert a base64-encoded image to a matplotlib plot and display it.
9
+
10
+ Parameters:
11
+ -----------
12
+ encoded : str
13
+ The base64-encoded image string.
14
+ title : str, optional
15
+ A title for the plot. Default is None.
16
+ figsize : tuple, optional
17
+ Figure size (width, height) for the plot. Default is (8, 6).
18
+
19
+ Returns:
20
+ --------
21
+ fig, ax : tuple
22
+ The matplotlib figure and axes objects.
23
+ """
24
+ # Decode the base64 string to bytes
25
+ img_data = base64.b64decode(encoded)
26
+
27
+ # Load the bytes data into a BytesIO buffer
28
+ buf = BytesIO(img_data)
29
+
30
+ # Open the image using Pillow
31
+ img = Image.open(buf)
32
+
33
+ # Create a matplotlib figure and axis
34
+ fig, ax = plt.subplots(figsize=figsize)
35
+
36
+ # Display the image
37
+ ax.imshow(img)
38
+ ax.axis('off') # Hide the axis
39
+
40
+ if title:
41
+ ax.set_title(title)
42
+
43
+ # Show the plot
44
+ plt.show()
45
+
46
+ return fig, ax
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9011
3
+ Version: 0.0.0.9012
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -31,9 +31,16 @@ Requires-Dist: psutil
31
31
  Provides-Extra: machine-learning
32
32
  Requires-Dist: h2o; extra == "machine-learning"
33
33
  Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: data-science
35
+ Requires-Dist: pytimetk; extra == "data-science"
36
+ Requires-Dist: missingno; extra == "data-science"
37
+ Requires-Dist: sweetviz; extra == "data-science"
34
38
  Provides-Extra: all
35
39
  Requires-Dist: h2o; extra == "all"
36
40
  Requires-Dist: mlflow; extra == "all"
41
+ Requires-Dist: pytimetk; extra == "all"
42
+ Requires-Dist: missingno; extra == "all"
43
+ Requires-Dist: sweetviz; extra == "all"
37
44
  Dynamic: author
38
45
  Dynamic: author-email
39
46
  Dynamic: classifier
@@ -59,6 +66,8 @@ Dynamic: summary
59
66
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
67
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
68
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
69
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
70
+
62
71
  </div>
63
72
 
64
73
 
@@ -93,8 +102,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
102
  - [Apps Available Now](#apps-available-now)
94
103
  - [🔥 Agentic Applications](#-agentic-applications)
95
104
  - [Agents Available Now](#agents-available-now)
96
- - [Agents](#agents)
105
+ - [Standard Agents](#standard-agents)
97
106
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
107
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
98
108
  - [Multi-Agents](#multi-agents)
99
109
  - [Agents Coming Soon](#agents-coming-soon)
100
110
  - [Disclaimer](#disclaimer)
@@ -122,7 +132,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
132
 
123
133
  This project is a work in progress. New data science agents will be released soon.
124
134
 
125
- ![AI Data Science Team](/img/ai_data_science_team_.jpg)
135
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
126
136
 
127
137
  ### NEW: Multi-Agents
128
138
 
@@ -146,14 +156,14 @@ This is a top secret project I'm working on. It's a multi-agent data science app
146
156
 
147
157
  ### Agents Available Now
148
158
 
149
- #### Agents
159
+ #### Standard Agents
150
160
 
151
161
  1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
152
162
  2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
153
163
  3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
154
164
  4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
155
165
  5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
156
- 6. **Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
166
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
157
167
 
158
168
 
159
169
  #### 🔥🔥 NEW! Machine Learning Agents
@@ -161,6 +171,10 @@ This is a top secret project I'm working on. It's a multi-agent data science app
161
171
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
162
172
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
163
173
 
174
+ #### 🔥 NEW! Data Science Agents
175
+
176
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
177
+
164
178
 
165
179
  #### Multi-Agents
166
180
 
@@ -1,5 +1,5 @@
1
1
  ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=2kHKuNhTDtlOUMah-41rNdTSBWrq3Lr4KsZbtsfHvPE,26
2
+ ai_data_science_team/_version.py,sha256=BybGt-zGNDZsdJxDMV3xmjghiRF8jmwG3ov_dt_rM7E,26
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
5
  ai_data_science_team/agents/data_cleaning_agent.py,sha256=V5tJMwGJK0JwrF_H-7r3S0E8UkAY6ci4BGxqjhZiGBI,27352
@@ -8,6 +8,9 @@ ai_data_science_team/agents/data_visualization_agent.py,sha256=tJy9Ehnh9mvAu6H--
8
8
  ai_data_science_team/agents/data_wrangling_agent.py,sha256=LxzphH-TmrFG0GjejGOjulhPq4SsWFo5Y9tk4WEuN4M,32347
9
9
  ai_data_science_team/agents/feature_engineering_agent.py,sha256=KmPBkj7WUBz6LFUlDDfQHMi7ujXwsH5P9LWRS-F4tdM,31026
10
10
  ai_data_science_team/agents/sql_database_agent.py,sha256=1K2o3NiuKgGKdbMz_Tq9IeQ8xhXjpfGOxx9lArZh1yE,31173
11
+ ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
12
+ ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=y65lsBXhQNOGwWealEho6uFxGSTW7FNfvTUZnW8_XNY,7609
13
+ ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
14
  ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
12
15
  ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=DamR72agrTKfdcdhablmP2mpbj0CqtMonP-QU8p7o9w,33394
13
16
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,15 +25,18 @@ ai_data_science_team/templates/agent_templates.py,sha256=Lezp0ugtIP3m5WUOmjLwghN
22
25
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
26
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
24
27
  ai_data_science_team/tools/dataframe.py,sha256=qSflGDByqqCXv4TjuvOFvGPZmegzeOesb0Y4i4Y0gdQ,4551
28
+ ai_data_science_team/tools/eda.py,sha256=UGD6PC12RsB_UmStvR4TmSqv0noxjM4DkzY-kHjI0-E,10591
25
29
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
26
30
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
27
31
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
28
32
  ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
29
34
  ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
35
+ ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
30
36
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
31
37
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
32
- ai_data_science_team-0.0.0.9011.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
33
- ai_data_science_team-0.0.0.9011.dist-info/METADATA,sha256=LxSjuOR2ArtBi-jauFoWQx7TGakHg7TJ8leKQIi7fmk,11854
34
- ai_data_science_team-0.0.0.9011.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
35
- ai_data_science_team-0.0.0.9011.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
36
- ai_data_science_team-0.0.0.9011.dist-info/RECORD,,
38
+ ai_data_science_team-0.0.0.9012.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
39
+ ai_data_science_team-0.0.0.9012.dist-info/METADATA,sha256=geRCFLG3YO9uprp_CGKiqCTSThg06L2U6WxVqYKzyM8,12704
40
+ ai_data_science_team-0.0.0.9012.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
41
+ ai_data_science_team-0.0.0.9012.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
42
+ ai_data_science_team-0.0.0.9012.dist-info/RECORD,,