ai-data-science-team 0.0.0.9010__py3-none-any.whl → 0.0.0.9012__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,41 +1,77 @@
1
1
 
2
2
  from langchain.tools import tool
3
+ from langgraph.prebuilt import InjectedState
3
4
 
4
5
  import pandas as pd
6
+ import os
5
7
 
6
- from typing import Tuple, List, Dict
8
+ from typing import Tuple, List, Dict, Optional, Annotated
7
9
 
8
10
 
9
11
  @tool(response_format='content_and_artifact')
10
- def load_directory(dir_path: str) -> Tuple[str, Dict]:
12
+ def load_directory(
13
+ directory_path: str = os.getcwd(),
14
+ file_type: Optional[str] = None
15
+ ) -> Tuple[str, Dict]:
11
16
  """
12
17
  Tool: load_directory
13
- Description: Loads all recognized tabular files in a directory.
18
+ Description: Loads all recognized tabular files in a directory.
19
+ If file_type is specified (e.g., 'csv'), only files
20
+ with that extension are loaded.
14
21
 
15
22
  Parameters:
16
23
  ----------
17
- dir_path : str
18
- The path to the directory to load.
24
+ directory_path : str
25
+ The path to the directory to load. Defaults to the current working directory.
26
+
27
+ file_type : str, optional
28
+ The extension of the file type you want to load exclusively
29
+ (e.g., 'csv', 'xlsx', 'parquet'). If None or not provided,
30
+ attempts to load all recognized tabular files.
19
31
 
20
32
  Returns:
21
33
  -------
22
34
  Tuple[str, Dict]
23
35
  A tuple containing a message and a dictionary of data frames.
24
36
  """
25
- print(" * Tool: load_directory")
37
+ print(f" * Tool: load_directory | {directory_path}")
38
+
26
39
  import os
27
40
  import pandas as pd
41
+
42
+ if directory_path is None:
43
+ return "No directory path provided.", {}
44
+
45
+ if not os.path.isdir(directory_path):
46
+ return f"Directory not found: {directory_path}", {}
47
+
28
48
  data_frames = {}
29
- for filename in os.listdir(dir_path):
30
- file_path = os.path.join(dir_path, filename)
49
+
50
+ for filename in os.listdir(directory_path):
51
+ file_path = os.path.join(directory_path, filename)
52
+
31
53
  # Skip directories
32
54
  if os.path.isdir(file_path):
33
55
  continue
56
+
57
+ # If file_type is specified, only process files that match.
58
+ if file_type:
59
+ # Make sure extension check is case-insensitive
60
+ if not filename.lower().endswith(f".{file_type.lower()}"):
61
+ continue
62
+
34
63
  try:
64
+ # Attempt to auto-detect and load the file
35
65
  data_frames[filename] = auto_load_file(file_path).to_dict()
36
66
  except Exception as e:
67
+ # If loading fails, record the error message
37
68
  data_frames[filename] = f"Error loading file: {e}"
38
- return f"Returned the following data frames: {list(data_frames.keys())}", data_frames
69
+
70
+ return (
71
+ f"Returned the following data frames: {list(data_frames.keys())}",
72
+ data_frames
73
+ )
74
+
39
75
 
40
76
  @tool(response_format='content_and_artifact')
41
77
  def load_file(file_path: str) -> Tuple[str, Dict]:
@@ -52,12 +88,15 @@ def load_file(file_path: str) -> Tuple[str, Dict]:
52
88
  Tuple[str, Dict]
53
89
  A tuple containing a message and a dictionary of the data frame.
54
90
  """
55
- print(" * Tool: load_file")
91
+ print(f" * Tool: load_file | {file_path}")
56
92
  return f"Returned the following data frame from this file: {file_path}", auto_load_file(file_path).to_dict()
57
93
 
58
94
 
59
95
  @tool(response_format='content_and_artifact')
60
- def list_directory_contents(directory_path: str, show_hidden: bool = False) -> Tuple[List[str], List[Dict]]:
96
+ def list_directory_contents(
97
+ directory_path: str = os.getcwd(),
98
+ show_hidden: bool = False
99
+ ) -> Tuple[List[str], List[Dict]]:
61
100
  """
62
101
  Tool: list_directory_contents
63
102
  Description: Lists all files and folders in the specified directory.
@@ -67,30 +106,51 @@ def list_directory_contents(directory_path: str, show_hidden: bool = False) -> T
67
106
  Returns:
68
107
  tuple:
69
108
  - content (list[str]): A list of filenames/folders (suitable for display)
70
- - artifact (list[dict]): A list of dictionaries where each dict has keys like {"filename": <name>}.
71
- This structure can be easily converted to a pandas DataFrame.
109
+ - artifact (list[dict]): A list of dictionaries where each dict includes
110
+ the keys {"filename": <name>, "type": <'file' or 'directory'>}.
111
+ This structure can be easily converted to a pandas DataFrame.
72
112
  """
73
- print(" * Tool: list_directory_contents")
113
+ print(f" * Tool: list_directory_contents | {directory_path}")
74
114
  import os
75
-
115
+
116
+ if directory_path is None:
117
+ return "No directory path provided.", []
118
+
119
+ if not os.path.isdir(directory_path):
120
+ return f"Directory not found: {directory_path}", []
121
+
76
122
  items = []
77
123
  for item in os.listdir(directory_path):
78
124
  # If show_hidden is False, skip items starting with '.'
79
125
  if not show_hidden and item.startswith('.'):
80
126
  continue
81
127
  items.append(item)
128
+ items.reverse()
82
129
 
83
- # content: just the raw list of filenames
84
- content = items
85
-
86
- # artifact: list of dicts (each row is {"filename": ...}), easily turned into a DataFrame
87
- artifact = [{"filename": item} for item in items]
130
+ # content: just the raw list of item names (files/folders).
131
+ content = items.copy()
132
+
133
+ content.append(f"Total items: {len(items)}")
134
+ content.append(f"Directory: {directory_path}")
135
+
136
+ # artifact: list of dicts with both "filename" and "type" keys.
137
+ artifact = []
138
+ for item in items:
139
+ item_path = os.path.join(directory_path, item)
140
+ artifact.append({
141
+ "filename": item,
142
+ "type": "directory" if os.path.isdir(item_path) else "file"
143
+ })
88
144
 
89
145
  return content, artifact
90
146
 
91
147
 
148
+
92
149
  @tool(response_format='content_and_artifact')
93
- def list_directory_recursive(directory_path: str, show_hidden: bool = False) -> Tuple[str, List[Dict]]:
150
+ def list_directory_recursive(
151
+ directory_path: str = os.getcwd(),
152
+ show_hidden: bool = False
153
+ ) -> Tuple[str, List[Dict]]:
94
154
  """
95
155
  Tool: list_directory_recursive
96
156
  Description:
@@ -111,13 +171,19 @@ def list_directory_recursive(directory_path: str, show_hidden: bool = False) ->
111
171
  Example:
112
172
  content, artifact = list_directory_recursive("/path/to/folder", show_hidden=False)
113
173
  """
114
- print(" * Tool: list_directory_recursive")
174
+ print(f" * Tool: list_directory_recursive | {directory_path}")
115
175
 
116
176
  # We'll store two things as we recurse:
117
177
  # 1) lines for building the "tree" string
118
178
  # 2) records in a list of dicts for easy DataFrame creation
119
179
  import os
120
180
 
181
+ if directory_path is None:
182
+ return "No directory path provided.", {}
183
+
184
+ if not os.path.isdir(directory_path):
185
+ return f"Directory not found: {directory_path}", {}
186
+
121
187
  lines = []
122
188
  records = []
123
189
 
@@ -210,7 +276,7 @@ def get_file_info(file_path: str) -> Tuple[str, List[Dict]]:
210
276
  Example:
211
277
  content, artifact = get_file_info("/path/to/mydata.csv")
212
278
  """
213
- print(" * Tool: get_file_info")
279
+ print(f" * Tool: get_file_info | {file_path}")
214
280
 
215
281
  # Ensure the file exists
216
282
  import os
@@ -244,7 +310,11 @@ def get_file_info(file_path: str) -> Tuple[str, List[Dict]]:
244
310
 
245
311
 
246
312
  @tool(response_format='content_and_artifact')
247
- def search_files_by_pattern(directory_path: str, pattern: str = "*.csv", recursive: bool = False) -> Tuple[str, List[Dict]]:
313
+ def search_files_by_pattern(
314
+ directory_path: str = os.getcwd(),
315
+ pattern: str = "*.csv",
316
+ recursive: bool = False
317
+ ) -> Tuple[str, List[Dict]]:
248
318
  """
249
319
  Tool: search_files_by_pattern
250
320
  Description:
@@ -266,7 +336,7 @@ def search_files_by_pattern(directory_path: str, pattern: str = "*.csv", recursi
266
336
  Example:
267
337
  content, artifact = search_files_by_pattern("/path/to/folder", "*.csv", recursive=True)
268
338
  """
269
- print(" * Tool: search_files_by_pattern")
339
+ print(f" * Tool: search_files_by_pattern | {directory_path}")
270
340
 
271
341
  import os
272
342
  import fnmatch
@@ -0,0 +1,293 @@
1
+
2
+ from typing import Annotated, Dict, Tuple, Union
3
+
4
+ import os
5
+
6
+ from langchain.tools import tool
7
+
8
+ from langgraph.prebuilt import InjectedState
9
+
10
+
11
+ @tool(response_format='content_and_artifact')
12
+ def describe_dataset(
13
+ data_raw: Annotated[dict, InjectedState("data_raw")]
14
+ ) -> Tuple[str, Dict]:
15
+ """
16
+ Tool: describe_dataset
17
+ Description:
18
+ Describe the dataset by computing summary
19
+ statistics using the DataFrame's describe() method.
20
+
21
+ Returns:
22
+ -------
23
+ Tuple[str, Dict]:
24
+ content: A textual summary of the DataFrame's descriptive statistics.
25
+ artifact: A dictionary (from DataFrame.describe()) for further inspection.
26
+ """
27
+ print(" * Tool: describe_dataset")
28
+ import pandas as pd
29
+ df = pd.DataFrame(data_raw)
30
+ description_df = df.describe(include='all')
31
+ content = "Summary statistics computed using pandas describe()."
32
+ artifact = description_df.to_dict()
33
+ return content, artifact
34
+
35
+
36
+ @tool(response_format='content_and_artifact')
37
+ def visualize_missing(
38
+ data_raw: Annotated[dict, InjectedState("data_raw")],
39
+ n_sample: int = None
40
+ ) -> Tuple[str, Dict]:
41
+ """
42
+ Tool: visualize_missing
43
+ Description:
44
+ Missing value analysis using the missingno library. Generates a matrix plot, bar plot, and heatmap plot.
45
+
46
+ Parameters:
47
+ -----------
48
+ data_raw : dict
49
+ The raw data in dictionary format.
50
+ n_sample : int, optional (default: None)
51
+ The number of rows to sample from the dataset if it is large.
52
+
53
+ Returns:
54
+ -------
55
+ Tuple[str, Dict]:
56
+ content: A message describing the generated plots.
57
+ artifact: A dict with keys 'matrix_plot', 'bar_plot', and 'heatmap_plot' each containing the
58
+ corresponding base64 encoded PNG image.
59
+ """
60
+ print(" * Tool: visualize_missing")
61
+
62
+ try:
63
+ import missingno as msno # Ensure missingno is installed
64
+ except ImportError:
65
+ raise ImportError("Please install the 'missingno' package to use this tool. pip install missingno")
66
+
67
+ import pandas as pd
68
+ import base64
69
+ from io import BytesIO
70
+ import matplotlib.pyplot as plt
71
+
72
+ # Create the DataFrame and sample if n_sample is provided.
73
+ df = pd.DataFrame(data_raw)
74
+ if n_sample is not None:
75
+ df = df.sample(n=n_sample, random_state=42)
76
+
77
+ # Dictionary to store the base64 encoded images for each plot.
78
+ encoded_plots = {}
79
+
80
+ # Define a helper function to create a plot, save it, and encode it.
81
+ def create_and_encode_plot(plot_func, plot_name: str):
82
+ plt.figure(figsize=(8, 6))
83
+ # Call the missingno plotting function.
84
+ plot_func(df)
85
+ plt.tight_layout()
86
+ buf = BytesIO()
87
+ plt.savefig(buf, format="png")
88
+ plt.close()
89
+ buf.seek(0)
90
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
91
+
92
+ # Create and encode the matrix plot.
93
+ encoded_plots["matrix_plot"] = create_and_encode_plot(msno.matrix, "matrix")
94
+
95
+ # Create and encode the bar plot.
96
+ encoded_plots["bar_plot"] = create_and_encode_plot(msno.bar, "bar")
97
+
98
+ # Create and encode the heatmap plot.
99
+ encoded_plots["heatmap_plot"] = create_and_encode_plot(msno.heatmap, "heatmap")
100
+
101
+ content = "Missing data visualizations (matrix, bar, and heatmap) have been generated."
102
+ artifact = encoded_plots
103
+ return content, artifact
104
+
105
+
106
+
107
+ @tool(response_format='content_and_artifact')
108
+ def correlation_funnel(
109
+ data_raw: Annotated[dict, InjectedState("data_raw")],
110
+ target: str,
111
+ target_bin_index: Union[int, str] = -1,
112
+ corr_method: str = "pearson",
113
+ n_bins: int = 4,
114
+ thresh_infreq: float = 0.01,
115
+ name_infreq: str = "-OTHER",
116
+ ) -> Tuple[str, Dict]:
117
+ """
118
+ Tool: correlation_funnel
119
+ Description:
120
+ Correlation analysis using the correlation funnel method. The tool binarizes the data and computes correlation versus a target column.
121
+
122
+ Parameters:
123
+ ----------
124
+ target : str
125
+ The base target column name (e.g., 'Member_Status'). The tool will look for columns that begin
126
+ with this string followed by '__' (e.g., 'Member_Status__Gold', 'Member_Status__Platinum').
127
+ target_bin_index : int or str, default -1
128
+ If an integer, selects the target level by position from the matching columns.
129
+ If a string (e.g., "Yes"), attempts to match to the suffix of a column name
130
+ (i.e., 'target__Yes').
131
+ corr_method : str
132
+ The correlation method ('pearson', 'kendall', or 'spearman'). Default is 'pearson'.
133
+ n_bins : int
134
+ The number of bins to use for binarization. Default is 4.
135
+ thresh_infreq : float
136
+ The threshold for infrequent levels. Default is 0.01.
137
+ name_infreq : str
138
+ The name to use for infrequent levels. Default is '-OTHER'.
139
+ """
140
+ print(" * Tool: correlation_funnel")
141
+ try:
142
+ import pytimetk as tk
143
+ except ImportError:
144
+ raise ImportError("Please install the 'pytimetk' package to use this tool. pip install pytimetk")
145
+ import pandas as pd
146
+ import base64
147
+ from io import BytesIO
148
+ import matplotlib.pyplot as plt
149
+ import json
150
+ import plotly.graph_objects as go
151
+ import plotly.io as pio
152
+ from typing import Union
153
+
154
+ # Convert the raw injected state into a DataFrame.
155
+ df = pd.DataFrame(data_raw)
156
+
157
+ # Apply the binarization method.
158
+ df_binarized = df.binarize(
159
+ n_bins=n_bins,
160
+ thresh_infreq=thresh_infreq,
161
+ name_infreq=name_infreq,
162
+ one_hot=True
163
+ )
164
+
165
+ # Determine the full target column name.
166
+ # Look for all columns that start with "target__"
167
+ matching_columns = [col for col in df_binarized.columns if col.startswith(f"{target}__")]
168
+ if not matching_columns:
169
+ # If no matching columns are found, warn and use the provided target as-is.
170
+ full_target = target
171
+ else:
172
+ # Determine the full target based on target_bin_index.
173
+ if isinstance(target_bin_index, str):
174
+ # Build the candidate column name
175
+ candidate = f"{target}__{target_bin_index}"
176
+ if candidate in matching_columns:
177
+ full_target = candidate
178
+ else:
179
+ # If no matching candidate is found, default to the last matching column.
180
+ full_target = matching_columns[-1]
181
+ else:
182
+ # target_bin_index is an integer.
183
+ try:
184
+ full_target = matching_columns[target_bin_index]
185
+ except IndexError:
186
+ # If index is out of bounds, use the last matching column.
187
+ full_target = matching_columns[-1]
188
+
189
+ # Compute correlation funnel using the full target column name.
190
+ df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
191
+
192
+ # Attempt to generate a static plot.
193
+ try:
194
+ # Here we assume that your DataFrame has a method plot_correlation_funnel.
195
+ fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
196
+ buf = BytesIO()
197
+ # Use the appropriate save method for your figure object.
198
+ fig.save(buf, format="png")
199
+ plt.close()
200
+ buf.seek(0)
201
+ encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
202
+ except Exception as e:
203
+ encoded = {"error": str(e)}
204
+
205
+ # Attempt to generate a Plotly plot.
206
+ try:
207
+ fig = df_correlated.plot_correlation_funnel(engine='plotly')
208
+ fig_json = pio.to_json(fig)
209
+ fig_dict = json.loads(fig_json)
210
+ except Exception as e:
211
+ fig_dict = {"error": str(e)}
212
+
213
+ content = (f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
214
+ f"Base target was '{target}' with target_bin_index '{target_bin_index}'.")
215
+ artifact = {
216
+ "correlation_data": df_correlated.to_dict(orient="list"),
217
+ "plot_image": encoded,
218
+ "plotly_figure": fig_dict,
219
+ }
220
+ return content, artifact
221
+
222
+
223
+
224
+ @tool(response_format='content_and_artifact')
225
+ def generate_sweetviz_report(
226
+ data_raw: Annotated[dict, InjectedState("data_raw")],
227
+ target: str = None,
228
+ report_name: str = "sweetviz_report.html",
229
+ report_directory: str = os.path.join(os.getcwd(), "reports"),
230
+ open_browser: bool = True,
231
+ ) -> Tuple[str, Dict]:
232
+ """
233
+ Tool: generate_sweetviz_report
234
+ Description:
235
+ Make an Exploratory Data Analysis (EDA) report using the Sweetviz library.
236
+
237
+ Parameters:
238
+ -----------
239
+ data_raw : dict
240
+ The raw data injected as a dictionary (converted from a DataFrame).
241
+ target : str, optional
242
+ The target feature to analyze. Default is None.
243
+ report_name : str, optional
244
+ The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
245
+ report_directory : str, optional
246
+ The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
247
+ open_browser : bool, optional
248
+ Whether to open the report in a web browser. Default is True.
249
+
250
+ Returns:
251
+ --------
252
+ Tuple[str, Dict]:
253
+ content: A summary message describing the generated report.
254
+ artifact: A dictionary with the report file path and optionally the report's HTML content.
255
+ """
256
+ print(" * Tool: generate_sweetviz_report")
257
+ try:
258
+ import sweetviz as sv
259
+ except ImportError:
260
+ raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
261
+ import pandas as pd
262
+ # Convert injected raw data to a DataFrame.
263
+ df = pd.DataFrame(data_raw)
264
+
265
+ # Create the Sweetviz report.
266
+ report = sv.analyze(df, target_feat=target)
267
+
268
+ # Ensure the directory exists; default is os.getcwd()/reports
269
+ if not os.path.exists(report_directory):
270
+ os.makedirs(report_directory)
271
+
272
+ # Determine the full path for the report.
273
+ full_report_path = os.path.join(report_directory, report_name)
274
+
275
+ # Save the report to the specified HTML file.
276
+ report.show_html(
277
+ filepath=full_report_path,
278
+ open_browser=True,
279
+ )
280
+
281
+ # Optionally, read the HTML content (if desired to pass along in the artifact).
282
+ try:
283
+ with open(full_report_path, "r", encoding="utf-8") as f:
284
+ html_content = f.read()
285
+ except Exception:
286
+ html_content = None
287
+
288
+ content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
289
+ artifact = {
290
+ "report_file": os.path.abspath(full_report_path),
291
+ "report_html": html_content,
292
+ }
293
+ return content, artifact
@@ -0,0 +1,27 @@
1
+
2
+
3
+ import webbrowser
4
+ import os
5
+
6
+ def open_html_file_in_browser(file_path: str):
7
+ """
8
+ Opens an HTML file in the default web browser.
9
+
10
+ Parameters:
11
+ -----------
12
+ file_path : str
13
+ The file path or URL of the HTML file to open.
14
+
15
+ Returns:
16
+ --------
17
+ None
18
+ """
19
+ # Check if the file exists if a local path is provided.
20
+ if os.path.isfile(file_path):
21
+ # Convert file path to a file URL
22
+ file_url = 'file://' + os.path.abspath(file_path)
23
+ else:
24
+ # If the file doesn't exist locally, assume it's a URL
25
+ file_url = file_path
26
+
27
+ webbrowser.open(file_url)
@@ -0,0 +1,46 @@
1
+ import base64
2
+ from io import BytesIO
3
+ import matplotlib.pyplot as plt
4
+ from PIL import Image
5
+
6
+ def matplotlib_from_base64(encoded: str, title: str = None, figsize: tuple = (8, 6)):
7
+ """
8
+ Convert a base64-encoded image to a matplotlib plot and display it.
9
+
10
+ Parameters:
11
+ -----------
12
+ encoded : str
13
+ The base64-encoded image string.
14
+ title : str, optional
15
+ A title for the plot. Default is None.
16
+ figsize : tuple, optional
17
+ Figure size (width, height) for the plot. Default is (8, 6).
18
+
19
+ Returns:
20
+ --------
21
+ fig, ax : tuple
22
+ The matplotlib figure and axes objects.
23
+ """
24
+ # Decode the base64 string to bytes
25
+ img_data = base64.b64decode(encoded)
26
+
27
+ # Load the bytes data into a BytesIO buffer
28
+ buf = BytesIO(img_data)
29
+
30
+ # Open the image using Pillow
31
+ img = Image.open(buf)
32
+
33
+ # Create a matplotlib figure and axis
34
+ fig, ax = plt.subplots(figsize=figsize)
35
+
36
+ # Display the image
37
+ ax.imshow(img)
38
+ ax.axis('off') # Hide the axis
39
+
40
+ if title:
41
+ ax.set_title(title)
42
+
43
+ # Show the plot
44
+ plt.show()
45
+
46
+ return fig, ax
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9010
3
+ Version: 0.0.0.9012
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -31,9 +31,16 @@ Requires-Dist: psutil
31
31
  Provides-Extra: machine-learning
32
32
  Requires-Dist: h2o; extra == "machine-learning"
33
33
  Requires-Dist: mlflow; extra == "machine-learning"
34
+ Provides-Extra: data-science
35
+ Requires-Dist: pytimetk; extra == "data-science"
36
+ Requires-Dist: missingno; extra == "data-science"
37
+ Requires-Dist: sweetviz; extra == "data-science"
34
38
  Provides-Extra: all
35
39
  Requires-Dist: h2o; extra == "all"
36
40
  Requires-Dist: mlflow; extra == "all"
41
+ Requires-Dist: pytimetk; extra == "all"
42
+ Requires-Dist: missingno; extra == "all"
43
+ Requires-Dist: sweetviz; extra == "all"
37
44
  Dynamic: author
38
45
  Dynamic: author-email
39
46
  Dynamic: classifier
@@ -59,6 +66,8 @@ Dynamic: summary
59
66
  <a href="https://pypi.python.org/pypi/ai-data-science-team"><img src="https://img.shields.io/pypi/v/ai-data-science-team.svg?style=for-the-badge" alt="PyPI"></a>
60
67
  <a href="https://github.com/business-science/ai-data-science-team"><img src="https://img.shields.io/pypi/pyversions/ai-data-science-team.svg?style=for-the-badge" alt="versions"></a>
61
68
  <a href="https://github.com/business-science/ai-data-science-team/blob/main/LICENSE"><img src="https://img.shields.io/github/license/business-science/ai-data-science-team.svg?style=for-the-badge" alt="license"></a>
69
+ <img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/business-science/ai-data-science-team?style=for-the-badge">
70
+
62
71
  </div>
63
72
 
64
73
 
@@ -93,8 +102,9 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
93
102
  - [Apps Available Now](#apps-available-now)
94
103
  - [🔥 Agentic Applications](#-agentic-applications)
95
104
  - [Agents Available Now](#agents-available-now)
105
+ - [Standard Agents](#standard-agents)
96
106
  - [🔥🔥 NEW! Machine Learning Agents](#-new-machine-learning-agents)
97
- - [Data Science Agents](#data-science-agents-1)
107
+ - [🔥 NEW! Data Science Agents](#-new-data-science-agents)
98
108
  - [Multi-Agents](#multi-agents)
99
109
  - [Agents Coming Soon](#agents-coming-soon)
100
110
  - [Disclaimer](#disclaimer)
@@ -122,7 +132,7 @@ If you're an aspiring data scientist who wants to learn how to build AI Agents a
122
132
 
123
133
  This project is a work in progress. New data science agents will be released soon.
124
134
 
125
- ![Data Science Team](/img/ai_data_science_team.jpg)
135
+ ![AI Data Science Team](/img/ai_data_science_team.jpg)
126
136
 
127
137
  ### NEW: Multi-Agents
128
138
 
@@ -146,18 +156,25 @@ This is a top secret project I'm working on. It's a multi-agent data science app
146
156
 
147
157
  ### Agents Available Now
148
158
 
159
+ #### Standard Agents
160
+
161
+ 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
162
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
163
+ 3. **🔥 Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
164
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
165
+ 5. **🔥 SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
166
+ 6. **🔥 Data Loader Tools Agent:** Loads data from various sources including CSV, Excel, Parquet, and Pickle files. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_loader_tools_agent.ipynb)
167
+
168
+
149
169
  #### 🔥🔥 NEW! Machine Learning Agents
150
170
 
151
171
  1. **🔥 H2O Machine Learning Agent:** Builds and logs 100's of high-performance machine learning models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
152
172
  2. **🔥 MLflow Tools Agent (MLOps):** This agent has 11+ tools for managing models, ML projects, and making production ML predictions with MLflow. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/mlflow_tools_agent.ipynb)
153
173
 
154
- #### Data Science Agents
174
+ #### 🔥 NEW! Data Science Agents
175
+
176
+ 1. **🔥🔥 EDA Tools Agent:** Performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/ds_agents/eda_tools_agent.ipynb)
155
177
 
156
- 1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_wrangling_agent.ipynb)
157
- 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_visualization_agent.ipynb)
158
- 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/data_cleaning_agent.ipynb)
159
- 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/feature_engineering_agent.ipynb)
160
- 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations. [See Example](https://github.com/business-science/ai-data-science-team/blob/master/examples/sql_database_agent.ipynb)
161
178
 
162
179
  #### Multi-Agents
163
180