ai-data-science-team 0.0.0.9012__py3-none-any.whl → 0.0.0.9013__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9012"
1
+ __version__ = "0.0.0.9013"
@@ -25,6 +25,7 @@ from ai_data_science_team.tools.data_loader import (
25
25
  get_file_info,
26
26
  search_files_by_pattern,
27
27
  )
28
+ from ai_data_science_team.utils.messages import get_tool_call_names
28
29
 
29
30
  AGENT_NAME = "data_loader_tools_agent"
30
31
 
@@ -174,6 +175,12 @@ class DataLoaderToolsAgent(BaseAgent):
174
175
  return Markdown(self.response["messages"][0].content)
175
176
  else:
176
177
  return self.response["messages"][0].content
178
+
179
+ def get_tool_calls(self):
180
+ """
181
+ Returns the tool calls made by the agent.
182
+ """
183
+ return self.response["tool_calls"]
177
184
 
178
185
 
179
186
 
@@ -204,6 +211,7 @@ def make_data_loader_tools_agent(
204
211
  internal_messages: Annotated[Sequence[BaseMessage], operator.add]
205
212
  user_instructions: str
206
213
  data_loader_artifacts: dict
214
+ tool_calls: List[str]
207
215
 
208
216
  def data_loader_agent(state):
209
217
 
@@ -253,10 +261,13 @@ def make_data_loader_tools_agent(
253
261
  elif isinstance(last_message, dict) and "artifact" in last_message:
254
262
  last_tool_artifact = last_message["artifact"]
255
263
 
264
+ tool_calls = get_tool_call_names(internal_messages)
265
+
256
266
  return {
257
267
  "messages": [last_ai_message],
258
268
  "internal_messages": internal_messages,
259
269
  "data_loader_artifacts": last_tool_artifact,
270
+ "tool_calls": tool_calls,
260
271
  }
261
272
 
262
273
  workflow = StateGraph(GraphState)
@@ -19,17 +19,20 @@ from ai_data_science_team.templates import BaseAgent
19
19
  from ai_data_science_team.utils.regex import format_agent_name
20
20
 
21
21
  from ai_data_science_team.tools.eda import (
22
+ explain_data,
22
23
  describe_dataset,
23
24
  visualize_missing,
24
25
  correlation_funnel,
25
26
  generate_sweetviz_report,
26
27
  )
28
+ from ai_data_science_team.utils.messages import get_tool_call_names
27
29
 
28
30
 
29
31
  AGENT_NAME = "exploratory_data_analyst_agent"
30
32
 
31
33
  # Updated tool list for EDA
32
34
  EDA_TOOLS = [
35
+ explain_data,
33
36
  describe_dataset,
34
37
  visualize_missing,
35
38
  correlation_funnel,
@@ -162,6 +165,12 @@ class EDAToolsAgent(BaseAgent):
162
165
  return Markdown(self.response["messages"][0].content)
163
166
  else:
164
167
  return self.response["messages"][0].content
168
+
169
+ def get_tool_calls(self):
170
+ """
171
+ Returns the tool calls made by the agent.
172
+ """
173
+ return self.response["tool_calls"]
165
174
 
166
175
  def make_eda_tools_agent(
167
176
  model: Any,
@@ -191,6 +200,7 @@ def make_eda_tools_agent(
191
200
  user_instructions: str
192
201
  data_raw: dict
193
202
  eda_artifacts: dict
203
+ tool_calls: list
194
204
 
195
205
  def exploratory_agent(state):
196
206
  print(format_agent_name(AGENT_NAME))
@@ -229,11 +239,14 @@ def make_eda_tools_agent(
229
239
  last_tool_artifact = last_message.artifact
230
240
  elif isinstance(last_message, dict) and "artifact" in last_message:
231
241
  last_tool_artifact = last_message["artifact"]
242
+
243
+ tool_calls = get_tool_call_names(internal_messages)
232
244
 
233
245
  return {
234
246
  "messages": [last_ai_message],
235
247
  "internal_messages": internal_messages,
236
248
  "eda_artifacts": last_tool_artifact,
249
+ "tool_calls": tool_calls,
237
250
  }
238
251
 
239
252
  workflow = StateGraph(GraphState)
@@ -27,6 +27,7 @@ from ai_data_science_team.tools.mlflow import (
27
27
  mlflow_search_registered_models,
28
28
  mlflow_get_model_version_details,
29
29
  )
30
+ from ai_data_science_team.utils.messages import get_tool_call_names
30
31
 
31
32
  AGENT_NAME = "mlflow_tools_agent"
32
33
 
@@ -228,6 +229,12 @@ class MLflowToolsAgent(BaseAgent):
228
229
  return Markdown(self.response["messages"][0].content)
229
230
  else:
230
231
  return self.response["messages"][0].content
232
+
233
+ def get_tool_calls(self):
234
+ """
235
+ Returns the tool calls made by the agent.
236
+ """
237
+ return self.response["tool_calls"]
231
238
 
232
239
 
233
240
 
@@ -330,10 +337,13 @@ def make_mlflow_tools_agent(
330
337
  elif isinstance(last_message, dict) and "artifact" in last_message:
331
338
  last_tool_artifact = last_message["artifact"]
332
339
 
340
+ tool_calls = get_tool_call_names(internal_messages)
341
+
333
342
  return {
334
343
  "messages": [last_ai_message],
335
344
  "internal_messages": internal_messages,
336
345
  "mlflow_artifacts": last_tool_artifact,
346
+ "tool_calls": tool_calls,
337
347
  }
338
348
 
339
349
 
@@ -74,7 +74,12 @@ def get_dataframe_summary(
74
74
  return summaries
75
75
 
76
76
 
77
- def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
77
+ def _summarize_dataframe(
78
+ df: pd.DataFrame,
79
+ dataset_name: str,
80
+ n_sample=30,
81
+ skip_stats=False
82
+ ) -> str:
78
83
  """Generate a summary string for a single DataFrame."""
79
84
  # 1. Convert dictionary-type cells to strings
80
85
  # This prevents unhashable dict errors during df.nunique().
@@ -2,11 +2,44 @@
2
2
  from typing import Annotated, Dict, Tuple, Union
3
3
 
4
4
  import os
5
+ import tempfile
5
6
 
6
7
  from langchain.tools import tool
7
8
 
8
9
  from langgraph.prebuilt import InjectedState
9
10
 
11
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
12
+
13
+
14
+ @tool(response_format='content')
15
+ def explain_data(
16
+ data_raw: Annotated[dict, InjectedState("data_raw")],
17
+ n_sample: int = 30,
18
+ skip_stats: bool = False,
19
+ ):
20
+ """
21
+ Tool: explain_data
22
+ Description:
23
+ Provides an extensive, narrative summary of a DataFrame including its shape, column types,
24
+ missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
25
+
26
+ Parameters:
27
+ data_raw (dict): Raw data.
28
+ n_sample (int, default=30): Number of rows to display.
29
+ skip_stats (bool, default=False): If True, omit descriptive stats/info.
30
+
31
+ LLM Guidance:
32
+ Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
33
+
34
+ Returns:
35
+ str: Detailed DataFrame summary.
36
+ """
37
+ print(" * Tool: explain_data")
38
+ import pandas as pd
39
+
40
+ result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
41
+
42
+ return result
10
43
 
11
44
  @tool(response_format='content_and_artifact')
12
45
  def describe_dataset(
@@ -15,21 +48,33 @@ def describe_dataset(
15
48
  """
16
49
  Tool: describe_dataset
17
50
  Description:
18
- Describe the dataset by computing summary
19
- statistics using the DataFrame's describe() method.
20
-
51
+ Compute and return summary statistics for the dataset using pandas' describe() method.
52
+ The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
53
+
54
+ Parameters:
55
+ -----------
56
+ data_raw : dict
57
+ The raw data in dictionary format.
58
+
59
+ LLM Selection Guidance:
60
+ ------------------------
61
+ Use this tool when:
62
+ - The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
63
+ - The user needs a concise statistical snapshot rather than a detailed narrative.
64
+ - Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
65
+
21
66
  Returns:
22
67
  -------
23
68
  Tuple[str, Dict]:
24
- content: A textual summary of the DataFrame's descriptive statistics.
25
- artifact: A dictionary (from DataFrame.describe()) for further inspection.
69
+ - content: A textual summary indicating that summary statistics have been computed.
70
+ - artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
26
71
  """
27
72
  print(" * Tool: describe_dataset")
28
73
  import pandas as pd
29
74
  df = pd.DataFrame(data_raw)
30
75
  description_df = df.describe(include='all')
31
76
  content = "Summary statistics computed using pandas describe()."
32
- artifact = description_df.to_dict()
77
+ artifact = {'describe_df': description_df.to_dict()}
33
78
  return content, artifact
34
79
 
35
80
 
@@ -226,8 +271,8 @@ def generate_sweetviz_report(
226
271
  data_raw: Annotated[dict, InjectedState("data_raw")],
227
272
  target: str = None,
228
273
  report_name: str = "sweetviz_report.html",
229
- report_directory: str = os.path.join(os.getcwd(), "reports"),
230
- open_browser: bool = True,
274
+ report_directory: str = None, # <-- Default to None
275
+ open_browser: bool = False,
231
276
  ) -> Tuple[str, Dict]:
232
277
  """
233
278
  Tool: generate_sweetviz_report
@@ -243,9 +288,10 @@ def generate_sweetviz_report(
243
288
  report_name : str, optional
244
289
  The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
245
290
  report_directory : str, optional
246
- The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
291
+ The directory where the report should be saved.
292
+ If None, a temporary directory is created and used.
247
293
  open_browser : bool, optional
248
- Whether to open the report in a web browser. Default is True.
294
+ Whether to open the report in a web browser. Default is False.
249
295
 
250
296
  Returns:
251
297
  --------
@@ -254,28 +300,37 @@ def generate_sweetviz_report(
254
300
  artifact: A dictionary with the report file path and optionally the report's HTML content.
255
301
  """
256
302
  print(" * Tool: generate_sweetviz_report")
303
+
304
+ # Import sweetviz
257
305
  try:
258
306
  import sweetviz as sv
259
307
  except ImportError:
260
308
  raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
309
+
261
310
  import pandas as pd
311
+
262
312
  # Convert injected raw data to a DataFrame.
263
313
  df = pd.DataFrame(data_raw)
264
314
 
315
+ # If no directory is specified, use a temporary directory.
316
+ if not report_directory:
317
+ report_directory = tempfile.mkdtemp()
318
+ print(f" * Using temporary directory: {report_directory}")
319
+ else:
320
+ # Ensure user-specified directory exists.
321
+ if not os.path.exists(report_directory):
322
+ os.makedirs(report_directory)
323
+
265
324
  # Create the Sweetviz report.
266
325
  report = sv.analyze(df, target_feat=target)
267
326
 
268
- # Ensure the directory exists; default is os.getcwd()/reports
269
- if not os.path.exists(report_directory):
270
- os.makedirs(report_directory)
271
-
272
327
  # Determine the full path for the report.
273
328
  full_report_path = os.path.join(report_directory, report_name)
274
329
 
275
330
  # Save the report to the specified HTML file.
276
331
  report.show_html(
277
332
  filepath=full_report_path,
278
- open_browser=True,
333
+ open_browser=open_browser,
279
334
  )
280
335
 
281
336
  # Optionally, read the HTML content (if desired to pass along in the artifact).
@@ -285,9 +340,13 @@ def generate_sweetviz_report(
285
340
  except Exception:
286
341
  html_content = None
287
342
 
288
- content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
343
+ content = (
344
+ f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
345
+ f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
346
+ )
289
347
  artifact = {
290
348
  "report_file": os.path.abspath(full_report_path),
291
349
  "report_html": html_content,
292
350
  }
293
351
  return content, artifact
352
+
@@ -0,0 +1,27 @@
1
+
2
+
3
+
4
+ def get_tool_call_names(messages):
5
+ """
6
+ Method to extract the tool call names from a list of LangChain messages.
7
+
8
+ Parameters:
9
+ ----------
10
+ messages : list
11
+ A list of LangChain messages.
12
+
13
+ Returns:
14
+ -------
15
+ tool_calls : list
16
+ A list of tool call names.
17
+
18
+ """
19
+ tool_calls = []
20
+ for message in messages:
21
+ try:
22
+ if "tool_call_id" in list(dict(message).keys()):
23
+ tool_calls.append(message.name)
24
+ except:
25
+ pass
26
+ return tool_calls
27
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9012
3
+ Version: 0.0.0.9013
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -152,7 +152,11 @@ This is a top secret project I'm working on. It's a multi-agent data science app
152
152
 
153
153
  #### 🔥 Agentic Applications
154
154
 
155
- 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
155
+ 1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
156
+
157
+ ![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
158
+
159
+ 2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
156
160
 
157
161
  ### Agents Available Now
158
162
 
@@ -1,20 +1,20 @@
1
1
  ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=BybGt-zGNDZsdJxDMV3xmjghiRF8jmwG3ov_dt_rM7E,26
2
+ ai_data_science_team/_version.py,sha256=8mQbNYWB914j3xlCMQYaR14g26vq-2SV31Xf8uer_L0,26
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
5
  ai_data_science_team/agents/data_cleaning_agent.py,sha256=V5tJMwGJK0JwrF_H-7r3S0E8UkAY6ci4BGxqjhZiGBI,27352
6
- ai_data_science_team/agents/data_loader_tools_agent.py,sha256=fnkOvmrXzvTTt1mnAyTlsF_7ZGrkp3P97YU_LgeffMg,8445
6
+ ai_data_science_team/agents/data_loader_tools_agent.py,sha256=23Uuqt-oaJfj3CFRKT7NErNkodXpraXl0HOWvXjMcJs,8802
7
7
  ai_data_science_team/agents/data_visualization_agent.py,sha256=tJy9Ehnh9mvAu6H--TXI8esSHmK1RW_L1RDAdn7Xek4,28821
8
8
  ai_data_science_team/agents/data_wrangling_agent.py,sha256=LxzphH-TmrFG0GjejGOjulhPq4SsWFo5Y9tk4WEuN4M,32347
9
9
  ai_data_science_team/agents/feature_engineering_agent.py,sha256=KmPBkj7WUBz6LFUlDDfQHMi7ujXwsH5P9LWRS-F4tdM,31026
10
10
  ai_data_science_team/agents/sql_database_agent.py,sha256=1K2o3NiuKgGKdbMz_Tq9IeQ8xhXjpfGOxx9lArZh1yE,31173
11
11
  ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
12
- ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=y65lsBXhQNOGwWealEho6uFxGSTW7FNfvTUZnW8_XNY,7609
12
+ ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=VJkqyQCNxoV0kvUTpUZh8SXTTZ0K1tUlg3jq6LDnpPQ,8009
13
13
  ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
15
15
  ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=DamR72agrTKfdcdhablmP2mpbj0CqtMonP-QU8p7o9w,33394
16
16
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=zbT0KIsmQp_sEyxzXRguhqx5913Q2yPYyKGU6TUWEM8,11067
17
+ ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=bRTT53_pHV0qAYl07iZcwUEYffGH_ZfJICdrLeOUPn4,11394
18
18
  ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
19
19
  ai_data_science_team/multiagents/sql_data_analyst.py,sha256=kmmED3gLf5STWWY6ZVJYd7_Pt8NMl6SHyBocuQzRDGk,14193
20
20
  ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
@@ -24,8 +24,8 @@ ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP
24
24
  ai_data_science_team/templates/agent_templates.py,sha256=Lezp0ugtIP3m5WUOmjLwghNnjjyQVQecysONeIHWwi0,29133
25
25
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
27
- ai_data_science_team/tools/dataframe.py,sha256=qSflGDByqqCXv4TjuvOFvGPZmegzeOesb0Y4i4Y0gdQ,4551
28
- ai_data_science_team/tools/eda.py,sha256=UGD6PC12RsB_UmStvR4TmSqv0noxjM4DkzY-kHjI0-E,10591
27
+ ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
28
+ ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
29
29
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
30
30
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
31
31
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -33,10 +33,11 @@ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
33
33
  ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
34
34
  ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
35
35
  ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
36
+ ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
36
37
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
37
38
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
38
- ai_data_science_team-0.0.0.9012.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
39
- ai_data_science_team-0.0.0.9012.dist-info/METADATA,sha256=geRCFLG3YO9uprp_CGKiqCTSThg06L2U6WxVqYKzyM8,12704
40
- ai_data_science_team-0.0.0.9012.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
41
- ai_data_science_team-0.0.0.9012.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
42
- ai_data_science_team-0.0.0.9012.dist-info/RECORD,,
39
+ ai_data_science_team-0.0.0.9013.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
40
+ ai_data_science_team-0.0.0.9013.dist-info/METADATA,sha256=z18MmCwNdEgovskYmYmd4CS1I4WKTvh_mSnmzKOaHZs,13021
41
+ ai_data_science_team-0.0.0.9013.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
42
+ ai_data_science_team-0.0.0.9013.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
43
+ ai_data_science_team-0.0.0.9013.dist-info/RECORD,,