ai-data-science-team 0.0.0.9012__py3-none-any.whl → 0.0.0.9014__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. ai_data_science_team/__init__.py +22 -0
  2. ai_data_science_team/_version.py +1 -1
  3. ai_data_science_team/agents/data_cleaning_agent.py +17 -3
  4. ai_data_science_team/agents/data_loader_tools_agent.py +24 -1
  5. ai_data_science_team/agents/data_visualization_agent.py +17 -3
  6. ai_data_science_team/agents/data_wrangling_agent.py +30 -10
  7. ai_data_science_team/agents/feature_engineering_agent.py +17 -4
  8. ai_data_science_team/agents/sql_database_agent.py +15 -2
  9. ai_data_science_team/ds_agents/eda_tools_agent.py +28 -6
  10. ai_data_science_team/ml_agents/h2o_ml_agent.py +15 -3
  11. ai_data_science_team/ml_agents/mlflow_tools_agent.py +23 -1
  12. ai_data_science_team/multiagents/__init__.py +2 -1
  13. ai_data_science_team/multiagents/pandas_data_analyst.py +305 -0
  14. ai_data_science_team/multiagents/sql_data_analyst.py +119 -30
  15. ai_data_science_team/templates/agent_templates.py +41 -5
  16. ai_data_science_team/tools/dataframe.py +6 -1
  17. ai_data_science_team/tools/eda.py +75 -16
  18. ai_data_science_team/utils/messages.py +27 -0
  19. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/METADATA +7 -3
  20. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/RECORD +23 -21
  21. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/LICENSE +0 -0
  22. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/WHEEL +0 -0
  23. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
1
 
2
2
  from langchain_core.messages import BaseMessage
3
- from langgraph.types import Checkpointer
3
+
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_core.output_parsers import JsonOutputParser
4
6
 
5
7
  from langgraph.graph import START, END, StateGraph
6
8
  from langgraph.graph.state import CompiledStateGraph
7
- from langgraph.types import Command
9
+ from langgraph.types import Checkpointer
8
10
 
9
- from typing import TypedDict, Annotated, Sequence, Literal
11
+ from typing import TypedDict, Annotated, Sequence
10
12
  import operator
11
13
 
12
14
  from typing_extensions import TypedDict
@@ -20,6 +22,7 @@ from ai_data_science_team.agents import SQLDatabaseAgent, DataVisualizationAgent
20
22
  from ai_data_science_team.utils.plotly import plotly_from_dict
21
23
  from ai_data_science_team.utils.regex import remove_consecutive_duplicates, get_generic_summary
22
24
 
25
+ AGENT_NAME = "sql_data_analyst"
23
26
 
24
27
  class SQLDataAnalyst(BaseAgent):
25
28
  """
@@ -33,6 +36,8 @@ class SQLDataAnalyst(BaseAgent):
33
36
  The SQL Database Agent.
34
37
  data_visualization_agent: DataVisualizationAgent
35
38
  The Data Visualization Agent.
39
+ checkpointer: Checkpointer (optional)
40
+ The checkpointer to save the state of the multi-agent system.
36
41
 
37
42
  Methods:
38
43
  --------
@@ -326,17 +331,17 @@ def make_sql_data_analyst(
326
331
  """
327
332
  Creates a multi-agent system that takes in a SQL query and returns a plot or table.
328
333
 
329
- - Agent 1: SQL Database Agent made with `make_sql_database_agent()`
330
- - Agent 2: Data Visualization Agent made with `make_data_visualization_agent()`
334
+ - Agent 1: SQL Database Agent made with `SQLDatabaseAgent()`
335
+ - Agent 2: Data Visualization Agent made with `DataVisualizationAgent()`
331
336
 
332
337
  Parameters:
333
338
  ----------
334
339
  model:
335
340
  The language model to be used for the agents.
336
341
  sql_database_agent: CompiledStateGraph
337
- The SQL Database Agent made with `make_sql_database_agent()`.
342
+ The SQL Database Agent made with `SQLDatabaseAgent()`.
338
343
  data_visualization_agent: CompiledStateGraph
339
- The Data Visualization Agent made with `make_data_visualization_agent()`.
344
+ The Data Visualization Agent made with `DataVisualizationAgent()`.
340
345
  checkpointer: Checkpointer (optional)
341
346
  The checkpointer to save the state of the multi-agent system.
342
347
  Default: None
@@ -348,10 +353,39 @@ def make_sql_data_analyst(
348
353
  """
349
354
 
350
355
  llm = model
356
+
357
+
358
+ routing_preprocessor_prompt = PromptTemplate(
359
+ template="""
360
+ You are an expert in routing decisions for a SQL Database Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to:
361
+
362
+ 1. Determine what the correct format for a Users Question should be for use with a SQL Database Agent based on the incoming user question. Anything related to database and data manipulation should be passed along.
363
+ 2. Determine whether or not a chart should be generated or a table should be returned based on the users question.
364
+ 3. If a chart is requested, determine the correct format of a Users Question should be used with a Data Visualization Agent. Anything related to plotting and visualization should be passed along.
365
+
366
+ Use the following criteria on how to route the the initial user question:
367
+
368
+ From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the SQL generator agent. This will be the 'user_instructions_sql_database'. If 'None' is found, return the original user question.
369
+
370
+ Next, determine if the user would like a data visualization ('chart') or a 'table' returned with the results of the Data Wrangling Agent. If unknown, not specified or 'None' is found, then select 'table'.
371
+
372
+ If a 'chart' is requested, return the 'user_instructions_data_visualization'. If 'None' is found, return None.
373
+
374
+ Return JSON with 'user_instructions_sql_database', 'user_instructions_data_visualization' and 'routing_preprocessor_decision'.
375
+
376
+ INITIAL_USER_QUESTION: {user_instructions}
377
+ """,
378
+ input_variables=["user_instructions"]
379
+ )
380
+
381
+ routing_preprocessor = routing_preprocessor_prompt | llm | JsonOutputParser()
351
382
 
352
383
  class PrimaryState(TypedDict):
353
384
  messages: Annotated[Sequence[BaseMessage], operator.add]
354
385
  user_instructions: str
386
+ user_instructions_sql_database: str
387
+ user_instructions_data_visualization: str
388
+ routing_preprocessor_decision: str
355
389
  sql_query_code: str
356
390
  sql_database_function: str
357
391
  data_sql: dict
@@ -359,39 +393,94 @@ def make_sql_data_analyst(
359
393
  plot_required: bool
360
394
  data_visualization_function: str
361
395
  plotly_graph: dict
396
+ plotly_error: str
362
397
  max_retries: int
363
398
  retry_count: int
364
399
 
365
- def route_to_visualization(state) -> Command[Literal["data_visualization_agent", "__end__"]]:
400
+ def preprocess_routing(state: PrimaryState):
401
+ print("---SQL DATA ANALYST---")
402
+ print("*************************")
403
+ print("---PREPROCESS ROUTER---")
404
+ question = state.get("user_instructions")
366
405
 
367
- response = llm.invoke(f"Respond in 1 word ('plot' or 'table'). Is the user requesting a plot? If unknown, select 'table'. \n\n User Instructions:\n{state.get('user_instructions')}")
406
+ # Chart Routing and SQL Prep
407
+ response = routing_preprocessor.invoke({"user_instructions": question})
368
408
 
369
- if response.content == 'plot':
370
- plot_required = True
371
- goto="data_visualization_agent"
372
- else:
373
- plot_required = False
374
- goto="__end__"
409
+ return {
410
+ "user_instructions_sql_database": response.get('user_instructions_sql_database'),
411
+ "user_instructions_data_visualization": response.get('user_instructions_data_visualization'),
412
+ "routing_preprocessor_decision": response.get('routing_preprocessor_decision'),
413
+ }
414
+
415
+ def router_chart_or_table(state: PrimaryState):
416
+ print("---ROUTER: CHART OR TABLE---")
417
+ return "chart" if state.get('routing_preprocessor_decision') == "chart" else "table"
418
+
419
+
420
+ def invoke_sql_database_agent(state: PrimaryState):
375
421
 
376
- return Command(
377
- update={
378
- 'data_raw': state.get("data_sql"),
379
- 'plot_required': plot_required,
380
- },
381
- goto=goto
382
- )
422
+ response = sql_database_agent.invoke({
423
+ "user_instructions": state.get("user_instructions_sql_database"),
424
+ "max_retries": state.get("max_retries"),
425
+ "retry_count": state.get("retry_count"),
426
+ })
383
427
 
384
- workflow = StateGraph(PrimaryState)
428
+ return {
429
+ "messages": response.get("messages"),
430
+ "data_sql": response.get("data_sql"),
431
+ "sql_query_code": response.get("sql_query_code"),
432
+ "sql_database_function": response.get("sql_database_function"),
433
+
434
+ }
435
+
436
+ def invoke_data_visualization_agent(state: PrimaryState):
437
+
438
+ response = data_visualization_agent.invoke({
439
+ "user_instructions": state.get("user_instructions_data_visualization"),
440
+ "data_raw": state.get("data_sql"),
441
+ "max_retries": state.get("max_retries"),
442
+ "retry_count": state.get("retry_count"),
443
+ })
444
+
445
+ return {
446
+ "messages": response.get("messages"),
447
+ "data_visualization_function": response.get("data_visualization_function"),
448
+ "plotly_graph": response.get("plotly_graph"),
449
+ "plotly_error": response.get("data_visualization_error"),
450
+ }
385
451
 
386
- workflow.add_node("sql_database_agent", sql_database_agent)
387
- workflow.add_node("route_to_visualization", route_to_visualization)
388
- workflow.add_node("data_visualization_agent", data_visualization_agent)
452
+ def route_printer(state: PrimaryState):
453
+ print("---ROUTE PRINTER---")
454
+ print(f" Route: {state.get('routing_preprocessor_decision')}")
455
+ print("---END---")
456
+ return {}
457
+
458
+ workflow = StateGraph(PrimaryState)
459
+
460
+ workflow.add_node("routing_preprocessor", preprocess_routing)
461
+ workflow.add_node("sql_database_agent", invoke_sql_database_agent)
462
+ workflow.add_node("data_visualization_agent", invoke_data_visualization_agent)
463
+ workflow.add_node("route_printer", route_printer)
389
464
 
390
- workflow.add_edge(START, "sql_database_agent")
391
- workflow.add_edge("sql_database_agent", "route_to_visualization")
392
- workflow.add_edge("data_visualization_agent", END)
465
+ workflow.add_edge(START, "routing_preprocessor")
466
+ workflow.add_edge("routing_preprocessor", "sql_database_agent")
467
+
468
+ workflow.add_conditional_edges(
469
+ "sql_database_agent",
470
+ router_chart_or_table,
471
+ {
472
+ "chart": "data_visualization_agent",
473
+ "table": "route_printer"
474
+ }
475
+ )
476
+
477
+ workflow.add_edge("data_visualization_agent", "route_printer")
478
+ workflow.add_edge("route_printer", END)
393
479
 
394
- app = workflow.compile(checkpointer=checkpointer)
480
+ app = workflow.compile(
481
+ checkpointer=checkpointer,
482
+ name=AGENT_NAME
483
+ )
395
484
 
396
485
  return app
397
486
 
@@ -40,6 +40,21 @@ class BaseAgent(CompiledStateGraph):
40
40
  self._params = params
41
41
  self._compiled_graph = self._make_compiled_graph()
42
42
  self.response = None
43
+ self.name = self._compiled_graph.name
44
+ self.checkpointer = self._compiled_graph.checkpointer
45
+ self.store = self._compiled_graph.store
46
+ self.output_channels = self._compiled_graph.output_channels
47
+ self.nodes = self._compiled_graph.nodes
48
+ self.stream_mode = self._compiled_graph.stream_mode
49
+ self.builder = self._compiled_graph.builder
50
+ self.channels = self._compiled_graph.channels
51
+ self.input_channels = self._compiled_graph.input_channels
52
+ self.input_schema = self._compiled_graph.input_schema
53
+ self.output_schema = self._compiled_graph.output_schema
54
+ self.debug = self._compiled_graph.debug
55
+ self.interrupt_after_nodes = self._compiled_graph.interrupt_after_nodes
56
+ self.interrupt_before_nodes = self._compiled_graph.interrupt_before_nodes
57
+ self.config = self._compiled_graph.config
43
58
 
44
59
  def _make_compiled_graph(self):
45
60
  """
@@ -197,6 +212,24 @@ class BaseAgent(CompiledStateGraph):
197
212
  """
198
213
  return self.get_output_jsonschema()['properties']
199
214
 
215
+ def get_state(self, config, *, subgraphs = False):
216
+ """
217
+ Returns the state of the agent.
218
+ """
219
+ return self._compiled_graph.get_state(config, subgraphs=subgraphs)
220
+
221
+ def get_state_history(self, config, *, filter = None, before = None, limit = None):
222
+ """
223
+ Returns the state history of the agent.
224
+ """
225
+ return self._compiled_graph.get_state_history(config, filter=filter, before=before, limit=limit)
226
+
227
+ def update_state(self, config, values, as_node = None):
228
+ """
229
+ Updates the state of the agent.
230
+ """
231
+ return self._compiled_graph.update_state(config, values, as_node)
232
+
200
233
  def get_response(self):
201
234
  """
202
235
  Returns the response generated by the agent.
@@ -237,6 +270,7 @@ def create_coding_agent_graph(
237
270
  checkpointer: Optional[Callable] = None,
238
271
  bypass_recommended_steps: bool = False,
239
272
  bypass_explain_code: bool = False,
273
+ agent_name: str = "coding_agent"
240
274
  ):
241
275
  """
242
276
  Creates a generic agent graph using the provided node functions and node names.
@@ -281,6 +315,8 @@ def create_coding_agent_graph(
281
315
  Whether to skip the recommended steps node.
282
316
  bypass_explain_code : bool, optional
283
317
  Whether to skip the final explain code node.
318
+ name : str, optional
319
+ The name of the agent graph.
284
320
 
285
321
  Returns
286
322
  -------
@@ -366,10 +402,10 @@ def create_coding_agent_graph(
366
402
  workflow.add_edge(explain_code_node_name, END)
367
403
 
368
404
  # Finally, compile
369
- if human_in_the_loop:
370
- app = workflow.compile(checkpointer=checkpointer)
371
- else:
372
- app = workflow.compile()
405
+ app = workflow.compile(
406
+ checkpointer=checkpointer,
407
+ name=agent_name,
408
+ )
373
409
 
374
410
  return app
375
411
 
@@ -574,7 +610,7 @@ def node_func_execute_agent_from_sql_connection(
574
610
 
575
611
  # Retrieve SQLAlchemy connection and code snippet from the state
576
612
  is_engine = isinstance(connection, sql.engine.base.Engine)
577
- conn = connection.connect() if is_engine else connection
613
+ connection = connection.connect() if is_engine else connection
578
614
  agent_code = state.get(code_snippet_key)
579
615
 
580
616
  # Ensure the connection object is provided
@@ -74,7 +74,12 @@ def get_dataframe_summary(
74
74
  return summaries
75
75
 
76
76
 
77
- def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
77
+ def _summarize_dataframe(
78
+ df: pd.DataFrame,
79
+ dataset_name: str,
80
+ n_sample=30,
81
+ skip_stats=False
82
+ ) -> str:
78
83
  """Generate a summary string for a single DataFrame."""
79
84
  # 1. Convert dictionary-type cells to strings
80
85
  # This prevents unhashable dict errors during df.nunique().
@@ -2,11 +2,44 @@
2
2
  from typing import Annotated, Dict, Tuple, Union
3
3
 
4
4
  import os
5
+ import tempfile
5
6
 
6
7
  from langchain.tools import tool
7
8
 
8
9
  from langgraph.prebuilt import InjectedState
9
10
 
11
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
12
+
13
+
14
+ @tool(response_format='content')
15
+ def explain_data(
16
+ data_raw: Annotated[dict, InjectedState("data_raw")],
17
+ n_sample: int = 30,
18
+ skip_stats: bool = False,
19
+ ):
20
+ """
21
+ Tool: explain_data
22
+ Description:
23
+ Provides an extensive, narrative summary of a DataFrame including its shape, column types,
24
+ missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
25
+
26
+ Parameters:
27
+ data_raw (dict): Raw data.
28
+ n_sample (int, default=30): Number of rows to display.
29
+ skip_stats (bool, default=False): If True, omit descriptive stats/info.
30
+
31
+ LLM Guidance:
32
+ Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
33
+
34
+ Returns:
35
+ str: Detailed DataFrame summary.
36
+ """
37
+ print(" * Tool: explain_data")
38
+ import pandas as pd
39
+
40
+ result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
41
+
42
+ return result
10
43
 
11
44
  @tool(response_format='content_and_artifact')
12
45
  def describe_dataset(
@@ -15,21 +48,33 @@ def describe_dataset(
15
48
  """
16
49
  Tool: describe_dataset
17
50
  Description:
18
- Describe the dataset by computing summary
19
- statistics using the DataFrame's describe() method.
20
-
51
+ Compute and return summary statistics for the dataset using pandas' describe() method.
52
+ The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
53
+
54
+ Parameters:
55
+ -----------
56
+ data_raw : dict
57
+ The raw data in dictionary format.
58
+
59
+ LLM Selection Guidance:
60
+ ------------------------
61
+ Use this tool when:
62
+ - The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
63
+ - The user needs a concise statistical snapshot rather than a detailed narrative.
64
+ - Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
65
+
21
66
  Returns:
22
67
  -------
23
68
  Tuple[str, Dict]:
24
- content: A textual summary of the DataFrame's descriptive statistics.
25
- artifact: A dictionary (from DataFrame.describe()) for further inspection.
69
+ - content: A textual summary indicating that summary statistics have been computed.
70
+ - artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
26
71
  """
27
72
  print(" * Tool: describe_dataset")
28
73
  import pandas as pd
29
74
  df = pd.DataFrame(data_raw)
30
75
  description_df = df.describe(include='all')
31
76
  content = "Summary statistics computed using pandas describe()."
32
- artifact = description_df.to_dict()
77
+ artifact = {'describe_df': description_df.to_dict()}
33
78
  return content, artifact
34
79
 
35
80
 
@@ -226,8 +271,8 @@ def generate_sweetviz_report(
226
271
  data_raw: Annotated[dict, InjectedState("data_raw")],
227
272
  target: str = None,
228
273
  report_name: str = "sweetviz_report.html",
229
- report_directory: str = os.path.join(os.getcwd(), "reports"),
230
- open_browser: bool = True,
274
+ report_directory: str = None, # <-- Default to None
275
+ open_browser: bool = False,
231
276
  ) -> Tuple[str, Dict]:
232
277
  """
233
278
  Tool: generate_sweetviz_report
@@ -243,9 +288,10 @@ def generate_sweetviz_report(
243
288
  report_name : str, optional
244
289
  The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
245
290
  report_directory : str, optional
246
- The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
291
+ The directory where the report should be saved.
292
+ If None, a temporary directory is created and used.
247
293
  open_browser : bool, optional
248
- Whether to open the report in a web browser. Default is True.
294
+ Whether to open the report in a web browser. Default is False.
249
295
 
250
296
  Returns:
251
297
  --------
@@ -254,28 +300,37 @@ def generate_sweetviz_report(
254
300
  artifact: A dictionary with the report file path and optionally the report's HTML content.
255
301
  """
256
302
  print(" * Tool: generate_sweetviz_report")
303
+
304
+ # Import sweetviz
257
305
  try:
258
306
  import sweetviz as sv
259
307
  except ImportError:
260
308
  raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
309
+
261
310
  import pandas as pd
311
+
262
312
  # Convert injected raw data to a DataFrame.
263
313
  df = pd.DataFrame(data_raw)
264
314
 
315
+ # If no directory is specified, use a temporary directory.
316
+ if not report_directory:
317
+ report_directory = tempfile.mkdtemp()
318
+ print(f" * Using temporary directory: {report_directory}")
319
+ else:
320
+ # Ensure user-specified directory exists.
321
+ if not os.path.exists(report_directory):
322
+ os.makedirs(report_directory)
323
+
265
324
  # Create the Sweetviz report.
266
325
  report = sv.analyze(df, target_feat=target)
267
326
 
268
- # Ensure the directory exists; default is os.getcwd()/reports
269
- if not os.path.exists(report_directory):
270
- os.makedirs(report_directory)
271
-
272
327
  # Determine the full path for the report.
273
328
  full_report_path = os.path.join(report_directory, report_name)
274
329
 
275
330
  # Save the report to the specified HTML file.
276
331
  report.show_html(
277
332
  filepath=full_report_path,
278
- open_browser=True,
333
+ open_browser=open_browser,
279
334
  )
280
335
 
281
336
  # Optionally, read the HTML content (if desired to pass along in the artifact).
@@ -285,9 +340,13 @@ def generate_sweetviz_report(
285
340
  except Exception:
286
341
  html_content = None
287
342
 
288
- content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
343
+ content = (
344
+ f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
345
+ f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
346
+ )
289
347
  artifact = {
290
348
  "report_file": os.path.abspath(full_report_path),
291
349
  "report_html": html_content,
292
350
  }
293
351
  return content, artifact
352
+
@@ -0,0 +1,27 @@
1
+
2
+
3
+
4
+ def get_tool_call_names(messages):
5
+ """
6
+ Method to extract the tool call names from a list of LangChain messages.
7
+
8
+ Parameters:
9
+ ----------
10
+ messages : list
11
+ A list of LangChain messages.
12
+
13
+ Returns:
14
+ -------
15
+ tool_calls : list
16
+ A list of tool call names.
17
+
18
+ """
19
+ tool_calls = []
20
+ for message in messages:
21
+ try:
22
+ if "tool_call_id" in list(dict(message).keys()):
23
+ tool_calls.append(message.name)
24
+ except:
25
+ pass
26
+ return tool_calls
27
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9012
3
+ Version: 0.0.0.9014
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -18,7 +18,7 @@ Requires-Dist: langchain
18
18
  Requires-Dist: langchain_community
19
19
  Requires-Dist: langchain_openai
20
20
  Requires-Dist: langchain_experimental
21
- Requires-Dist: langgraph>=0.2.57
21
+ Requires-Dist: langgraph>=0.2.74
22
22
  Requires-Dist: openai
23
23
  Requires-Dist: pandas
24
24
  Requires-Dist: sqlalchemy
@@ -152,7 +152,11 @@ This is a top secret project I'm working on. It's a multi-agent data science app
152
152
 
153
153
  #### 🔥 Agentic Applications
154
154
 
155
- 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
155
+ 1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
156
+
157
+ ![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
158
+
159
+ 2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
156
160
 
157
161
  ### Agents Available Now
158
162
 
@@ -1,31 +1,32 @@
1
- ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=BybGt-zGNDZsdJxDMV3xmjghiRF8jmwG3ov_dt_rM7E,26
1
+ ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
2
+ ai_data_science_team/_version.py,sha256=D4dUl-fYnimOU_VSzvrmJm30_IoaF_9m9dTLp8HE6rQ,26
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
- ai_data_science_team/agents/data_cleaning_agent.py,sha256=V5tJMwGJK0JwrF_H-7r3S0E8UkAY6ci4BGxqjhZiGBI,27352
6
- ai_data_science_team/agents/data_loader_tools_agent.py,sha256=fnkOvmrXzvTTt1mnAyTlsF_7ZGrkp3P97YU_LgeffMg,8445
7
- ai_data_science_team/agents/data_visualization_agent.py,sha256=tJy9Ehnh9mvAu6H--TXI8esSHmK1RW_L1RDAdn7Xek4,28821
8
- ai_data_science_team/agents/data_wrangling_agent.py,sha256=LxzphH-TmrFG0GjejGOjulhPq4SsWFo5Y9tk4WEuN4M,32347
9
- ai_data_science_team/agents/feature_engineering_agent.py,sha256=KmPBkj7WUBz6LFUlDDfQHMi7ujXwsH5P9LWRS-F4tdM,31026
10
- ai_data_science_team/agents/sql_database_agent.py,sha256=1K2o3NiuKgGKdbMz_Tq9IeQ8xhXjpfGOxx9lArZh1yE,31173
5
+ ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
6
+ ai_data_science_team/agents/data_loader_tools_agent.py,sha256=TFKzYqV6cvU-sMbfL-hg8-NgF_Hz3nysGFldvb5K3fM,9327
7
+ ai_data_science_team/agents/data_visualization_agent.py,sha256=eUSTzTOm5aLJ6Cqnk-hRuXeVbYyy0RIzN8_0LLy0P9o,29387
8
+ ai_data_science_team/agents/data_wrangling_agent.py,sha256=6tiDO1i-5s2Ju6_MsLoJMflUuRSf_1oTSsSKcLlgzEc,33376
9
+ ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
10
+ ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
11
11
  ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
12
- ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=y65lsBXhQNOGwWealEho6uFxGSTW7FNfvTUZnW8_XNY,7609
12
+ ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=x0kTwDo0BNbYzgA0YamMWdqRjx0upZgeXp9nF6C6_8E,8364
13
13
  ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
15
- ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=DamR72agrTKfdcdhablmP2mpbj0CqtMonP-QU8p7o9w,33394
15
+ ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NXrNHb723NeF2rns0Y0,33934
16
16
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=zbT0KIsmQp_sEyxzXRguhqx5913Q2yPYyKGU6TUWEM8,11067
18
- ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
19
- ai_data_science_team/multiagents/sql_data_analyst.py,sha256=kmmED3gLf5STWWY6ZVJYd7_Pt8NMl6SHyBocuQzRDGk,14193
17
+ ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=QImaZnS8hPdrU7GI6pZ0dUDO-LXx40MSA3XyMDppIh0,12003
18
+ ai_data_science_team/multiagents/__init__.py,sha256=5tpmZBQ_UT5SKDCS_NivZhN19HEStKIcstiqSXPXDl0,208
19
+ ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=O662v-75tLqHHrVNjncsPeR2FB4MWSWruJRIF-YO-fg,13581
20
+ ai_data_science_team/multiagents/sql_data_analyst.py,sha256=Fpue6WcX9x18kEH3kfEi8kkFoG9HhQ1AZiWw6Y6FXOo,18502
20
21
  ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
21
22
  ai_data_science_team/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
23
  ai_data_science_team/parsers/parsers.py,sha256=hIsMZXRHz9hqs8R1ebymKA7D6NxOf5UVMpDAr_gGhE8,2027
23
24
  ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP3cEFuf22-R5RM,330
24
- ai_data_science_team/templates/agent_templates.py,sha256=Lezp0ugtIP3m5WUOmjLwghNnjjyQVQecysONeIHWwi0,29133
25
+ ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J2GG91REJOKUUEY71Dszs,30767
25
26
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
27
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
27
- ai_data_science_team/tools/dataframe.py,sha256=qSflGDByqqCXv4TjuvOFvGPZmegzeOesb0Y4i4Y0gdQ,4551
28
- ai_data_science_team/tools/eda.py,sha256=UGD6PC12RsB_UmStvR4TmSqv0noxjM4DkzY-kHjI0-E,10591
28
+ ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
29
+ ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
29
30
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
30
31
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
31
32
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -33,10 +34,11 @@ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
33
34
  ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
34
35
  ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
35
36
  ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
37
+ ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
36
38
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
37
39
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
38
- ai_data_science_team-0.0.0.9012.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
39
- ai_data_science_team-0.0.0.9012.dist-info/METADATA,sha256=geRCFLG3YO9uprp_CGKiqCTSThg06L2U6WxVqYKzyM8,12704
40
- ai_data_science_team-0.0.0.9012.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
41
- ai_data_science_team-0.0.0.9012.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
42
- ai_data_science_team-0.0.0.9012.dist-info/RECORD,,
40
+ ai_data_science_team-0.0.0.9014.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
41
+ ai_data_science_team-0.0.0.9014.dist-info/METADATA,sha256=a35LbXyxf_XiP82m_P5HLFwPrmuzXkNWbyfzGge7dHA,13021
42
+ ai_data_science_team-0.0.0.9014.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
43
+ ai_data_science_team-0.0.0.9014.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
44
+ ai_data_science_team-0.0.0.9014.dist-info/RECORD,,