ai-data-science-team 0.0.0.9015__py3-none-any.whl → 0.0.0.9016__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = "0.0.0.9015"
1
+ __version__ = "0.0.0.9016"
@@ -1,5 +1,3 @@
1
-
2
-
3
1
  from typing import Any, Optional, Annotated, Sequence, Dict
4
2
  import operator
5
3
  import pandas as pd
@@ -17,10 +15,11 @@ from ai_data_science_team.utils.regex import format_agent_name
17
15
 
18
16
  from ai_data_science_team.tools.eda import (
19
17
  explain_data,
20
- describe_dataset,
21
- visualize_missing,
22
- correlation_funnel,
18
+ describe_dataset,
19
+ visualize_missing,
20
+ generate_correlation_funnel,
23
21
  generate_sweetviz_report,
22
+ generate_dtale_report,
24
23
  )
25
24
  from ai_data_science_team.utils.messages import get_tool_call_names
26
25
 
@@ -32,15 +31,17 @@ EDA_TOOLS = [
32
31
  explain_data,
33
32
  describe_dataset,
34
33
  visualize_missing,
35
- correlation_funnel,
34
+ generate_correlation_funnel,
36
35
  generate_sweetviz_report,
36
+ generate_dtale_report,
37
37
  ]
38
38
 
39
+
39
40
  class EDAToolsAgent(BaseAgent):
40
41
  """
41
42
  An Exploratory Data Analysis Tools Agent that interacts with EDA tools to generate summary statistics,
42
43
  missing data visualizations, correlation funnels, EDA reports, etc.
43
-
44
+
44
45
  Parameters:
45
46
  ----------
46
47
  model : langchain.llms.base.LLM
@@ -52,9 +53,9 @@ class EDAToolsAgent(BaseAgent):
52
53
  checkpointer : Checkpointer, optional
53
54
  The checkpointer for the agent.
54
55
  """
55
-
56
+
56
57
  def __init__(
57
- self,
58
+ self,
58
59
  model: Any,
59
60
  create_react_agent_kwargs: Optional[Dict] = {},
60
61
  invoke_react_agent_kwargs: Optional[Dict] = {},
@@ -64,18 +65,18 @@ class EDAToolsAgent(BaseAgent):
64
65
  "model": model,
65
66
  "create_react_agent_kwargs": create_react_agent_kwargs,
66
67
  "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
67
- "checkpointer": checkpointer
68
+ "checkpointer": checkpointer,
68
69
  }
69
70
  self._compiled_graph = self._make_compiled_graph()
70
71
  self.response = None
71
-
72
+
72
73
  def _make_compiled_graph(self):
73
74
  """
74
75
  Creates the compiled state graph for the EDA agent.
75
76
  """
76
77
  self.response = None
77
78
  return make_eda_tools_agent(**self._params)
78
-
79
+
79
80
  def update_params(self, **kwargs):
80
81
  """
81
82
  Updates the agent's parameters and rebuilds the compiled graph.
@@ -83,16 +84,13 @@ class EDAToolsAgent(BaseAgent):
83
84
  for k, v in kwargs.items():
84
85
  self._params[k] = v
85
86
  self._compiled_graph = self._make_compiled_graph()
86
-
87
+
87
88
  async def ainvoke_agent(
88
- self,
89
- user_instructions: str = None,
90
- data_raw: pd.DataFrame = None,
91
- **kwargs
89
+ self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
92
90
  ):
93
91
  """
94
92
  Asynchronously runs the agent with user instructions and data.
95
-
93
+
96
94
  Parameters:
97
95
  ----------
98
96
  user_instructions : str, optional
@@ -105,20 +103,17 @@ class EDAToolsAgent(BaseAgent):
105
103
  "user_instructions": user_instructions,
106
104
  "data_raw": data_raw.to_dict() if data_raw is not None else None,
107
105
  },
108
- **kwargs
106
+ **kwargs,
109
107
  )
110
108
  self.response = response
111
109
  return None
112
-
110
+
113
111
  def invoke_agent(
114
- self,
115
- user_instructions: str = None,
116
- data_raw: pd.DataFrame = None,
117
- **kwargs
112
+ self, user_instructions: str = None, data_raw: pd.DataFrame = None, **kwargs
118
113
  ):
119
114
  """
120
115
  Synchronously runs the agent with user instructions and data.
121
-
116
+
122
117
  Parameters:
123
118
  ----------
124
119
  user_instructions : str, optional
@@ -131,24 +126,26 @@ class EDAToolsAgent(BaseAgent):
131
126
  "user_instructions": user_instructions,
132
127
  "data_raw": data_raw.to_dict() if data_raw is not None else None,
133
128
  },
134
- **kwargs
129
+ **kwargs,
135
130
  )
136
131
  self.response = response
137
132
  return None
138
-
133
+
139
134
  def get_internal_messages(self, markdown: bool = False):
140
135
  """
141
136
  Returns internal messages from the agent response.
142
137
  """
143
138
  pretty_print = "\n\n".join(
144
- [f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
145
- for msg in self.response["internal_messages"]]
139
+ [
140
+ f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}"
141
+ for msg in self.response["internal_messages"]
142
+ ]
146
143
  )
147
144
  if markdown:
148
145
  return Markdown(pretty_print)
149
146
  else:
150
147
  return self.response["internal_messages"]
151
-
148
+
152
149
  def get_artifacts(self, as_dataframe: bool = False):
153
150
  """
154
151
  Returns the EDA artifacts from the agent response.
@@ -157,7 +154,7 @@ class EDAToolsAgent(BaseAgent):
157
154
  return pd.DataFrame(self.response["eda_artifacts"])
158
155
  else:
159
156
  return self.response["eda_artifacts"]
160
-
157
+
161
158
  def get_ai_message(self, markdown: bool = False):
162
159
  """
163
160
  Returns the AI message from the agent response.
@@ -166,13 +163,14 @@ class EDAToolsAgent(BaseAgent):
166
163
  return Markdown(self.response["messages"][0].content)
167
164
  else:
168
165
  return self.response["messages"][0].content
169
-
166
+
170
167
  def get_tool_calls(self):
171
168
  """
172
169
  Returns the tool calls made by the agent.
173
170
  """
174
171
  return self.response["tool_calls"]
175
172
 
173
+
176
174
  def make_eda_tools_agent(
177
175
  model: Any,
178
176
  create_react_agent_kwargs: Optional[Dict] = {},
@@ -181,7 +179,7 @@ def make_eda_tools_agent(
181
179
  ):
182
180
  """
183
181
  Creates an Exploratory Data Analyst Agent that can interact with EDA tools.
184
-
182
+
185
183
  Parameters:
186
184
  ----------
187
185
  model : Any
@@ -192,13 +190,13 @@ def make_eda_tools_agent(
192
190
  Additional kwargs for agent invocation.
193
191
  checkpointer : Checkpointer, optional
194
192
  The checkpointer for the agent.
195
-
193
+
196
194
  Returns:
197
195
  -------
198
196
  app : langgraph.graph.CompiledStateGraph
199
197
  The compiled state graph for the EDA agent.
200
198
  """
201
-
199
+
202
200
  class GraphState(AgentState):
203
201
  internal_messages: Annotated[Sequence[BaseMessage], operator.add]
204
202
  user_instructions: str
@@ -209,11 +207,9 @@ def make_eda_tools_agent(
209
207
  def exploratory_agent(state):
210
208
  print(format_agent_name(AGENT_NAME))
211
209
  print(" * RUN REACT TOOL-CALLING AGENT FOR EDA")
212
-
213
- tool_node = ToolNode(
214
- tools=EDA_TOOLS
215
- )
216
-
210
+
211
+ tool_node = ToolNode(tools=EDA_TOOLS)
212
+
217
213
  eda_agent = create_react_agent(
218
214
  model,
219
215
  tools=tool_node,
@@ -221,7 +217,7 @@ def make_eda_tools_agent(
221
217
  **create_react_agent_kwargs,
222
218
  checkpointer=checkpointer,
223
219
  )
224
-
220
+
225
221
  response = eda_agent.invoke(
226
222
  {
227
223
  "messages": [("user", state["user_instructions"])],
@@ -229,13 +225,13 @@ def make_eda_tools_agent(
229
225
  },
230
226
  invoke_react_agent_kwargs,
231
227
  )
232
-
228
+
233
229
  print(" * POST-PROCESSING EDA RESULTS")
234
-
235
- internal_messages = response['messages']
230
+
231
+ internal_messages = response["messages"]
236
232
  if not internal_messages:
237
233
  return {"internal_messages": [], "eda_artifacts": None}
238
-
234
+
239
235
  last_ai_message = AIMessage(internal_messages[-1].content, role=AGENT_NAME)
240
236
  last_tool_artifact = None
241
237
  if len(internal_messages) > 1:
@@ -244,24 +240,24 @@ def make_eda_tools_agent(
244
240
  last_tool_artifact = last_message.artifact
245
241
  elif isinstance(last_message, dict) and "artifact" in last_message:
246
242
  last_tool_artifact = last_message["artifact"]
247
-
243
+
248
244
  tool_calls = get_tool_call_names(internal_messages)
249
-
245
+
250
246
  return {
251
247
  "messages": [last_ai_message],
252
248
  "internal_messages": internal_messages,
253
249
  "eda_artifacts": last_tool_artifact,
254
250
  "tool_calls": tool_calls,
255
251
  }
256
-
252
+
257
253
  workflow = StateGraph(GraphState)
258
254
  workflow.add_node("exploratory_agent", exploratory_agent)
259
255
  workflow.add_edge(START, "exploratory_agent")
260
256
  workflow.add_edge("exploratory_agent", END)
261
-
257
+
262
258
  app = workflow.compile(
263
259
  checkpointer=checkpointer,
264
260
  name=AGENT_NAME,
265
261
  )
266
-
262
+
267
263
  return app
@@ -1,4 +1,3 @@
1
-
2
1
  from typing import Annotated, Dict, Tuple, Union
3
2
 
4
3
  import os
@@ -6,12 +5,12 @@ import tempfile
6
5
 
7
6
  from langchain.tools import tool
8
7
 
9
- from langgraph.prebuilt import InjectedState
8
+ from langgraph.prebuilt import InjectedState
10
9
 
11
10
  from ai_data_science_team.tools.dataframe import get_dataframe_summary
12
11
 
13
12
 
14
- @tool(response_format='content')
13
+ @tool(response_format="content")
15
14
  def explain_data(
16
15
  data_raw: Annotated[dict, InjectedState("data_raw")],
17
16
  n_sample: int = 30,
@@ -36,14 +35,17 @@ def explain_data(
36
35
  """
37
36
  print(" * Tool: explain_data")
38
37
  import pandas as pd
39
-
40
- result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
41
-
38
+
39
+ result = get_dataframe_summary(
40
+ pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats
41
+ )
42
+
42
43
  return result
43
44
 
44
- @tool(response_format='content_and_artifact')
45
+
46
+ @tool(response_format="content_and_artifact")
45
47
  def describe_dataset(
46
- data_raw: Annotated[dict, InjectedState("data_raw")]
48
+ data_raw: Annotated[dict, InjectedState("data_raw")],
47
49
  ) -> Tuple[str, Dict]:
48
50
  """
49
51
  Tool: describe_dataset
@@ -71,30 +73,30 @@ def describe_dataset(
71
73
  """
72
74
  print(" * Tool: describe_dataset")
73
75
  import pandas as pd
76
+
74
77
  df = pd.DataFrame(data_raw)
75
- description_df = df.describe(include='all')
78
+ description_df = df.describe(include="all")
76
79
  content = "Summary statistics computed using pandas describe()."
77
- artifact = {'describe_df': description_df.to_dict()}
80
+ artifact = {"describe_df": description_df.to_dict()}
78
81
  return content, artifact
79
82
 
80
83
 
81
- @tool(response_format='content_and_artifact')
84
+ @tool(response_format="content_and_artifact")
82
85
  def visualize_missing(
83
- data_raw: Annotated[dict, InjectedState("data_raw")],
84
- n_sample: int = None
86
+ data_raw: Annotated[dict, InjectedState("data_raw")], n_sample: int = None
85
87
  ) -> Tuple[str, Dict]:
86
88
  """
87
89
  Tool: visualize_missing
88
90
  Description:
89
91
  Missing value analysis using the missingno library. Generates a matrix plot, bar plot, and heatmap plot.
90
-
92
+
91
93
  Parameters:
92
94
  -----------
93
95
  data_raw : dict
94
96
  The raw data in dictionary format.
95
97
  n_sample : int, optional (default: None)
96
98
  The number of rows to sample from the dataset if it is large.
97
-
99
+
98
100
  Returns:
99
101
  -------
100
102
  Tuple[str, Dict]:
@@ -103,12 +105,14 @@ def visualize_missing(
103
105
  corresponding base64 encoded PNG image.
104
106
  """
105
107
  print(" * Tool: visualize_missing")
106
-
108
+
107
109
  try:
108
110
  import missingno as msno # Ensure missingno is installed
109
111
  except ImportError:
110
- raise ImportError("Please install the 'missingno' package to use this tool. pip install missingno")
111
-
112
+ raise ImportError(
113
+ "Please install the 'missingno' package to use this tool. pip install missingno"
114
+ )
115
+
112
116
  import pandas as pd
113
117
  import base64
114
118
  from io import BytesIO
@@ -136,21 +140,22 @@ def visualize_missing(
136
140
 
137
141
  # Create and encode the matrix plot.
138
142
  encoded_plots["matrix_plot"] = create_and_encode_plot(msno.matrix, "matrix")
139
-
143
+
140
144
  # Create and encode the bar plot.
141
145
  encoded_plots["bar_plot"] = create_and_encode_plot(msno.bar, "bar")
142
-
146
+
143
147
  # Create and encode the heatmap plot.
144
148
  encoded_plots["heatmap_plot"] = create_and_encode_plot(msno.heatmap, "heatmap")
145
149
 
146
- content = "Missing data visualizations (matrix, bar, and heatmap) have been generated."
150
+ content = (
151
+ "Missing data visualizations (matrix, bar, and heatmap) have been generated."
152
+ )
147
153
  artifact = encoded_plots
148
154
  return content, artifact
149
155
 
150
156
 
151
-
152
- @tool(response_format='content_and_artifact')
153
- def correlation_funnel(
157
+ @tool(response_format="content_and_artifact")
158
+ def generate_correlation_funnel(
154
159
  data_raw: Annotated[dict, InjectedState("data_raw")],
155
160
  target: str,
156
161
  target_bin_index: Union[int, str] = -1,
@@ -160,10 +165,10 @@ def correlation_funnel(
160
165
  name_infreq: str = "-OTHER",
161
166
  ) -> Tuple[str, Dict]:
162
167
  """
163
- Tool: correlation_funnel
168
+ Tool: generate_correlation_funnel
164
169
  Description:
165
170
  Correlation analysis using the correlation funnel method. The tool binarizes the data and computes correlation versus a target column.
166
-
171
+
167
172
  Parameters:
168
173
  ----------
169
174
  target : str
@@ -171,8 +176,8 @@ def correlation_funnel(
171
176
  with this string followed by '__' (e.g., 'Member_Status__Gold', 'Member_Status__Platinum').
172
177
  target_bin_index : int or str, default -1
173
178
  If an integer, selects the target level by position from the matching columns.
174
- If a string (e.g., "Yes"), attempts to match to the suffix of a column name
175
- (i.e., 'target__Yes').
179
+ If a string (e.g., "Yes"), attempts to match to the suffix of a column name
180
+ (i.e., 'target__Yes').
176
181
  corr_method : str
177
182
  The correlation method ('pearson', 'kendall', or 'spearman'). Default is 'pearson'.
178
183
  n_bins : int
@@ -182,34 +187,36 @@ def correlation_funnel(
182
187
  name_infreq : str
183
188
  The name to use for infrequent levels. Default is '-OTHER'.
184
189
  """
185
- print(" * Tool: correlation_funnel")
190
+ print(" * Tool: generate_correlation_funnel")
186
191
  try:
187
192
  import pytimetk as tk
188
193
  except ImportError:
189
- raise ImportError("Please install the 'pytimetk' package to use this tool. pip install pytimetk")
194
+ raise ImportError(
195
+ "Please install the 'pytimetk' package to use this tool. pip install pytimetk"
196
+ )
190
197
  import pandas as pd
191
198
  import base64
192
199
  from io import BytesIO
193
200
  import matplotlib.pyplot as plt
194
201
  import json
195
- import plotly.graph_objects as go
196
202
  import plotly.io as pio
197
- from typing import Union
198
203
 
199
204
  # Convert the raw injected state into a DataFrame.
200
205
  df = pd.DataFrame(data_raw)
201
-
206
+
202
207
  # Apply the binarization method.
203
208
  df_binarized = df.binarize(
204
- n_bins=n_bins,
205
- thresh_infreq=thresh_infreq,
206
- name_infreq=name_infreq,
207
- one_hot=True
209
+ n_bins=n_bins,
210
+ thresh_infreq=thresh_infreq,
211
+ name_infreq=name_infreq,
212
+ one_hot=True,
208
213
  )
209
-
214
+
210
215
  # Determine the full target column name.
211
216
  # Look for all columns that start with "target__"
212
- matching_columns = [col for col in df_binarized.columns if col.startswith(f"{target}__")]
217
+ matching_columns = [
218
+ col for col in df_binarized.columns if col.startswith(f"{target}__")
219
+ ]
213
220
  if not matching_columns:
214
221
  # If no matching columns are found, warn and use the provided target as-is.
215
222
  full_target = target
@@ -230,15 +237,15 @@ def correlation_funnel(
230
237
  except IndexError:
231
238
  # If index is out of bounds, use the last matching column.
232
239
  full_target = matching_columns[-1]
233
-
240
+
234
241
  # Compute correlation funnel using the full target column name.
235
242
  df_correlated = df_binarized.correlate(target=full_target, method=corr_method)
236
-
243
+
237
244
  # Attempt to generate a static plot.
238
245
  encoded = None
239
246
  try:
240
247
  # Here we assume that your DataFrame has a method plot_correlation_funnel.
241
- fig = df_correlated.plot_correlation_funnel(engine='plotnine', height=600)
248
+ fig = df_correlated.plot_correlation_funnel(engine="plotnine", height=600)
242
249
  buf = BytesIO()
243
250
  # Use the appropriate save method for your figure object.
244
251
  fig.save(buf, format="png")
@@ -247,18 +254,21 @@ def correlation_funnel(
247
254
  encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
248
255
  except Exception as e:
249
256
  encoded = {"error": str(e)}
250
-
257
+
251
258
  # Attempt to generate a Plotly plot.
252
259
  fig_dict = None
253
260
  try:
254
- fig = df_correlated.plot_correlation_funnel(engine='plotly')
261
+ fig = df_correlated.plot_correlation_funnel(engine="plotly", base_size=14)
262
+
255
263
  fig_json = pio.to_json(fig)
256
264
  fig_dict = json.loads(fig_json)
257
265
  except Exception as e:
258
266
  fig_dict = {"error": str(e)}
259
267
 
260
- content = (f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
261
- f"Base target was '{target}' with target_bin_index '{target_bin_index}'.")
268
+ content = (
269
+ f"Correlation funnel computed using method '{corr_method}' for target level '{full_target}'. "
270
+ f"Base target was '{target}' with target_bin_index '{target_bin_index}'."
271
+ )
262
272
  artifact = {
263
273
  "correlation_data": df_correlated.to_dict(orient="list"),
264
274
  "plot_image": encoded,
@@ -267,8 +277,7 @@ def correlation_funnel(
267
277
  return content, artifact
268
278
 
269
279
 
270
-
271
- @tool(response_format='content_and_artifact')
280
+ @tool(response_format="content_and_artifact")
272
281
  def generate_sweetviz_report(
273
282
  data_raw: Annotated[dict, InjectedState("data_raw")],
274
283
  target: str = None,
@@ -280,7 +289,7 @@ def generate_sweetviz_report(
280
289
  Tool: generate_sweetviz_report
281
290
  Description:
282
291
  Make an Exploratory Data Analysis (EDA) report using the Sweetviz library.
283
-
292
+
284
293
  Parameters:
285
294
  -----------
286
295
  data_raw : dict
@@ -290,11 +299,11 @@ def generate_sweetviz_report(
290
299
  report_name : str, optional
291
300
  The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
292
301
  report_directory : str, optional
293
- The directory where the report should be saved.
302
+ The directory where the report should be saved.
294
303
  If None, a temporary directory is created and used.
295
304
  open_browser : bool, optional
296
305
  Whether to open the report in a web browser. Default is False.
297
-
306
+
298
307
  Returns:
299
308
  --------
300
309
  Tuple[str, Dict]:
@@ -307,13 +316,15 @@ def generate_sweetviz_report(
307
316
  try:
308
317
  import sweetviz as sv
309
318
  except ImportError:
310
- raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
311
-
319
+ raise ImportError(
320
+ "Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz"
321
+ )
322
+
312
323
  import pandas as pd
313
-
324
+
314
325
  # Convert injected raw data to a DataFrame.
315
326
  df = pd.DataFrame(data_raw)
316
-
327
+
317
328
  # If no directory is specified, use a temporary directory.
318
329
  if not report_directory:
319
330
  report_directory = tempfile.mkdtemp()
@@ -322,26 +333,26 @@ def generate_sweetviz_report(
322
333
  # Ensure user-specified directory exists.
323
334
  if not os.path.exists(report_directory):
324
335
  os.makedirs(report_directory)
325
-
336
+
326
337
  # Create the Sweetviz report.
327
338
  report = sv.analyze(df, target_feat=target)
328
-
339
+
329
340
  # Determine the full path for the report.
330
341
  full_report_path = os.path.join(report_directory, report_name)
331
-
342
+
332
343
  # Save the report to the specified HTML file.
333
344
  report.show_html(
334
345
  filepath=full_report_path,
335
346
  open_browser=open_browser,
336
347
  )
337
-
348
+
338
349
  # Optionally, read the HTML content (if desired to pass along in the artifact).
339
350
  try:
340
351
  with open(full_report_path, "r", encoding="utf-8") as f:
341
352
  html_content = f.read()
342
353
  except Exception:
343
354
  html_content = None
344
-
355
+
345
356
  content = (
346
357
  f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
347
358
  f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
@@ -352,3 +363,53 @@ def generate_sweetviz_report(
352
363
  }
353
364
  return content, artifact
354
365
 
366
+
367
+ @tool(response_format="content_and_artifact")
368
+ def generate_dtale_report(
369
+ data_raw: Annotated[dict, InjectedState("data_raw")],
370
+ host: str = "localhost",
371
+ port: int = 40000,
372
+ open_browser: bool = False,
373
+ ) -> Tuple[str, Dict]:
374
+ """
375
+ Tool: generate_dtale_report
376
+ Description:
377
+ Creates an interactive data exploration report using the dtale library.
378
+
379
+ Parameters:
380
+ -----------
381
+ data_raw : dict
382
+ The raw data in dictionary format.
383
+ host : str, optional
384
+ The host IP address to serve the dtale app. Default is "localhost".
385
+ port : int, optional
386
+ The port number to serve the dtale app. Default is 40000.
387
+ open_browser : bool, optional
388
+ Whether to open the report in a web browser. Default is False.
389
+
390
+ Returns:
391
+ --------
392
+ Tuple[str, Dict]:
393
+ content: A summary message describing the dtale report.
394
+ artifact: A dictionary containing the URL of the dtale report.
395
+ """
396
+ print(" * Tool: generate_dtale_report")
397
+
398
+ try:
399
+ import dtale
400
+ except ImportError:
401
+ raise ImportError(
402
+ "Please install the 'dtale' package to use this tool. Run: pip install dtale"
403
+ )
404
+
405
+ import pandas as pd
406
+
407
+ df = pd.DataFrame(data_raw)
408
+
409
+ # Create the dtale report
410
+ d = dtale.show(df, host=host, port=port, open_browser=open_browser)
411
+
412
+ content = f"Dtale report generated and available at: {d.main_url()}"
413
+ artifact = {"dtale_url": d.main_url()}
414
+
415
+ return content, artifact
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9015
3
+ Version: 0.0.0.9016
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -47,6 +47,7 @@ Dynamic: classifier
47
47
  Dynamic: description
48
48
  Dynamic: description-content-type
49
49
  Dynamic: home-page
50
+ Dynamic: license-file
50
51
  Dynamic: provides-extra
51
52
  Dynamic: requires-dist
52
53
  Dynamic: requires-python
@@ -97,9 +98,8 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
97
98
  - [Companies That Want A Custom AI Data Science Team (And AI Apps)](#companies-that-want-a-custom-ai-data-science-team-and-ai-apps)
98
99
  - [Generative AI for Data Scientists Workshop](#generative-ai-for-data-scientists-workshop)
99
100
  - [Data Science Agents](#data-science-agents)
101
+ - [🔥 NEW: Data Science Apps](#-new-data-science-apps)
100
102
  - [NEW: Multi-Agents](#new-multi-agents)
101
- - [Data Science Apps](#data-science-apps)
102
- - [Apps Available Now](#apps-available-now)
103
103
  - [🔥 Agentic Applications](#-agentic-applications)
104
104
  - [Agents Available Now](#agents-available-now)
105
105
  - [Standard Agents](#standard-agents)
@@ -110,11 +110,11 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
110
110
  - [Disclaimer](#disclaimer)
111
111
  - [Installation](#installation)
112
112
  - [Usage](#usage)
113
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
114
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
113
+ - [Example: H2O Machine Learning Agent](#example-h2o-machine-learning-agent)
115
114
  - [Contributing](#contributing)
116
115
  - [License](#license)
117
116
  - [Want To Become A Full-Stack Generative AI Data Scientist?](#want-to-become-a-full-stack-generative-ai-data-scientist)
117
+ - [⭐️ Star History](#️-star-history)
118
118
 
119
119
  ## Companies That Want A Custom AI Data Science Team (And AI Apps)
120
120
 
@@ -134,21 +134,24 @@ This project is a work in progress. New data science agents will be released soo
134
134
 
135
135
  ![AI Data Science Team](/img/ai_data_science_team.jpg)
136
136
 
137
- ### NEW: Multi-Agents
137
+ ### 🔥 NEW: Data Science Apps
138
138
 
139
- **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
139
+ **🔥 Open Pandas AI Data Analyst:** Load an Excel or CSV file and ask it questions. Get data and charts back.
140
+
141
+ ![Pandas Data Analyst App](/img/apps/ai_pandas_data_analyst_app.jpg)
142
+
143
+ **🔥 SQL Database Agent:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table.
140
144
 
141
- ![Business Intelligence SQL Agent](/img/multi_agent_pandas_data_analyst.jpg)
145
+ **🔥 Exploratory Data Copilot:** An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more.
142
146
 
143
- ### Data Science Apps
147
+ [See all available apps here](/apps)
144
148
 
145
- This is a top secret project I'm working on. It's a multi-agent data science app that performs time series forecasting.
149
+ ### NEW: Multi-Agents
146
150
 
147
- ![Multi-Agent Data Science App](/img/ai_powered_apps.jpg)
151
+ **🔥 Pandas Data Analyst Agent:** Combines the ability to wrangle, transform, and analyze data with an optional data visualization agent that can create interactive plots.
148
152
 
149
- ### Apps Available Now
153
+ ![Pandas Data Analyst Agent](/img/multi_agent_pandas_data_analyst.jpg)
150
154
 
151
- [See all available apps here](/apps)
152
155
 
153
156
  #### 🔥 Agentic Applications
154
157
 
@@ -205,6 +208,14 @@ By using this software, you agree to use it solely for learning purposes.
205
208
 
206
209
  ## Installation
207
210
 
211
+ You can install via PyPI (note that this is a beta version and breaking changes may occur until 0.1.0):
212
+
213
+ ``` bash
214
+ pip install ai-data-science-team
215
+ ```
216
+
217
+ Or, if you want the latest version from GitHub:
218
+
208
219
  ``` bash
209
220
  pip install git+https://github.com/business-science/ai-data-science-team.git --upgrade
210
221
  ```
@@ -213,55 +224,46 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
213
224
 
214
225
  [See all examples here.](/examples)
215
226
 
216
- ### Example 1: Feature Engineering with the Feature Engineering Agent
227
+ ### Example: H2O Machine Learning Agent
217
228
 
218
- [See the full example here.](/examples/feature_engineering_agent.ipynb)
229
+ [See the full example here.](https://github.com/business-science/ai-data-science-team/blob/master/examples/ml_agents/h2o_machine_learning_agent.ipynb)
219
230
 
220
231
  ``` python
221
- feature_engineering_agent = FeatureEngineeringAgent(model = llm)
222
-
223
- feature_engineering_agent.invoke_agent(
224
- data_raw = df,
225
- user_instructions = "Make sure to scale and center numeric features",
226
- target_variable = "Churn",
227
- max_retries = 3,
232
+ # Import libraries
233
+ from langchain_openai import ChatOpenAI
234
+ import pandas as pd
235
+ import h2o
236
+ import os
237
+ from ai_data_science_team.ml_agents import H2OMLAgent
238
+
239
+ # Load the data
240
+ df = pd.read_csv("data/churn_data.csv")
241
+ df
242
+
243
+ # Initialize the language model
244
+ os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
245
+ llm = ChatOpenAI(model=MODEL)
246
+ llm
247
+
248
+ # Initialize the H2O ML Agent
249
+ ml_agent = H2OMLAgent(
250
+ model=llm,
251
+ log=True,
252
+ log_path="logs/",
253
+ model_directory="h2o_models/",
254
+ enable_mlflow=True, # Use this if you wish to log models to MLflow
228
255
  )
229
- ```
230
-
231
- ``` bash
232
- ---FEATURE ENGINEERING AGENT----
233
- * CREATE FEATURE ENGINEER CODE
234
- * EXECUTING AGENT CODE
235
- * EXPLAIN AGENT CODE
236
- ```
237
-
238
- ``` python
239
- feature_engineering_agent.get_data_engineered()
240
- ```
241
-
242
- ### Example 2: Cleaning Data with the Data Cleaning Agent
243
-
244
- [See the full example here.](/examples/data_cleaning_agent.ipynb)
245
-
246
- ``` python
247
- data_cleaning_agent = DataCleaningAgent(model = llm)
256
+ ml_agent
248
257
 
249
- response = data_cleaning_agent.invoke_agent(
250
- data_raw = df,
251
- user_instructions = "Don't remove outliers when cleaning the data.",
252
- max_retries = 3,
258
+ # Run the agent
259
+ ml_agent.invoke_agent(
260
+ data_raw=df.drop(columns=["customerID"]),
261
+ user_instructions="Please do classification on 'Churn'. Use a max runtime of 30 seconds.",
262
+ target_variable="Churn"
253
263
  )
254
- ```
255
-
256
- ``` bash
257
- ---DATA CLEANING AGENT----
258
- * CREATE DATA CLEANER CODE
259
- * EXECUTING AGENT CODE
260
- * EXPLAIN AGENT CODE
261
- ```
262
264
 
263
- ``` python
264
- data_cleaning_agent.get_data_cleaned()
265
+ # Retrieve and display the leaderboard of models
266
+ ml_agent.get_leaderboard()
265
267
  ```
266
268
 
267
269
  ## Contributing
@@ -282,4 +284,8 @@ This project is licensed under the MIT License. See LICENSE file for details.
282
284
 
283
285
  I teach Generative AI Data Science to help you build AI-powered data science apps. [**Register for my next Generative AI for Data Scientists workshop here.**](https://learn.business-science.io/ai-register)
284
286
 
287
+ # ⭐️ Star History
285
288
 
289
+ [![Star History Chart](https://api.star-history.com/svg?repos=business-science/ai-data-science-team&type=Date)](https://star-history.com/#)
290
+
291
+ [**Please ⭐ us on GitHub (it takes 2 seconds and means a lot).**](https://github.com/business-science/ai-data-science-team)
@@ -1,5 +1,5 @@
1
1
  ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
2
- ai_data_science_team/_version.py,sha256=c-XrUvZG3E6SWR9NMQqLxISzMZJUpsnK0FlIEMHAOls,27
2
+ ai_data_science_team/_version.py,sha256=CuRBSRSns8bxBgkn7Hp4BqQhLmZGuLWdyc2Xq7zO6ww,27
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
5
  ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
@@ -9,7 +9,7 @@ ai_data_science_team/agents/data_wrangling_agent.py,sha256=jyBrEfLsgIqSF6xcmRgnk
9
9
  ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
10
10
  ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
11
11
  ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
12
- ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=x0kTwDo0BNbYzgA0YamMWdqRjx0upZgeXp9nF6C6_8E,8364
12
+ ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=RiwpAp2dIZyN1kRNk7WBUI5KsiP14dLuHm8fhOCsKCk,8228
13
13
  ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
15
15
  ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NXrNHb723NeF2rns0Y0,33934
@@ -26,7 +26,7 @@ ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J
26
26
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
28
28
  ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
29
- ai_data_science_team/tools/eda.py,sha256=orabE8qaYj5TC5n7CRS6rHOPkyBVxr488631AwkVKVg,12726
29
+ ai_data_science_team/tools/eda.py,sha256=ycE_VAgeDoJyZpt6jjprID-D3ocseYTdzlry-qiSc5w,14201
30
30
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
31
31
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
32
32
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -37,8 +37,8 @@ ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcU
37
37
  ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
38
38
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
39
39
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
40
- ai_data_science_team-0.0.0.9015.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
41
- ai_data_science_team-0.0.0.9015.dist-info/METADATA,sha256=tIcThz7trmAG6TZAnDHxy8ntBslXMKS5xSUbvaTygyQ,13164
42
- ai_data_science_team-0.0.0.9015.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
43
- ai_data_science_team-0.0.0.9015.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
44
- ai_data_science_team-0.0.0.9015.dist-info/RECORD,,
40
+ ai_data_science_team-0.0.0.9016.dist-info/licenses/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
41
+ ai_data_science_team-0.0.0.9016.dist-info/METADATA,sha256=Fxmv56STouZdBJurMyf98VgpATeLYajJlmIDtgsbPXg,13746
42
+ ai_data_science_team-0.0.0.9016.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
43
+ ai_data_science_team-0.0.0.9016.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
44
+ ai_data_science_team-0.0.0.9016.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5