ai-data-science-team 0.0.0.9012__py3-none-any.whl → 0.0.0.9014__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (23) hide show
  1. ai_data_science_team/__init__.py +22 -0
  2. ai_data_science_team/_version.py +1 -1
  3. ai_data_science_team/agents/data_cleaning_agent.py +17 -3
  4. ai_data_science_team/agents/data_loader_tools_agent.py +24 -1
  5. ai_data_science_team/agents/data_visualization_agent.py +17 -3
  6. ai_data_science_team/agents/data_wrangling_agent.py +30 -10
  7. ai_data_science_team/agents/feature_engineering_agent.py +17 -4
  8. ai_data_science_team/agents/sql_database_agent.py +15 -2
  9. ai_data_science_team/ds_agents/eda_tools_agent.py +28 -6
  10. ai_data_science_team/ml_agents/h2o_ml_agent.py +15 -3
  11. ai_data_science_team/ml_agents/mlflow_tools_agent.py +23 -1
  12. ai_data_science_team/multiagents/__init__.py +2 -1
  13. ai_data_science_team/multiagents/pandas_data_analyst.py +305 -0
  14. ai_data_science_team/multiagents/sql_data_analyst.py +119 -30
  15. ai_data_science_team/templates/agent_templates.py +41 -5
  16. ai_data_science_team/tools/dataframe.py +6 -1
  17. ai_data_science_team/tools/eda.py +75 -16
  18. ai_data_science_team/utils/messages.py +27 -0
  19. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/METADATA +7 -3
  20. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/RECORD +23 -21
  21. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/LICENSE +0 -0
  22. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/WHEEL +0 -0
  23. {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
1
 
2
2
  from langchain_core.messages import BaseMessage
3
- from langgraph.types import Checkpointer
3
+
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_core.output_parsers import JsonOutputParser
4
6
 
5
7
  from langgraph.graph import START, END, StateGraph
6
8
  from langgraph.graph.state import CompiledStateGraph
7
- from langgraph.types import Command
9
+ from langgraph.types import Checkpointer
8
10
 
9
- from typing import TypedDict, Annotated, Sequence, Literal
11
+ from typing import TypedDict, Annotated, Sequence
10
12
  import operator
11
13
 
12
14
  from typing_extensions import TypedDict
@@ -20,6 +22,7 @@ from ai_data_science_team.agents import SQLDatabaseAgent, DataVisualizationAgent
20
22
  from ai_data_science_team.utils.plotly import plotly_from_dict
21
23
  from ai_data_science_team.utils.regex import remove_consecutive_duplicates, get_generic_summary
22
24
 
25
+ AGENT_NAME = "sql_data_analyst"
23
26
 
24
27
  class SQLDataAnalyst(BaseAgent):
25
28
  """
@@ -33,6 +36,8 @@ class SQLDataAnalyst(BaseAgent):
33
36
  The SQL Database Agent.
34
37
  data_visualization_agent: DataVisualizationAgent
35
38
  The Data Visualization Agent.
39
+ checkpointer: Checkpointer (optional)
40
+ The checkpointer to save the state of the multi-agent system.
36
41
 
37
42
  Methods:
38
43
  --------
@@ -326,17 +331,17 @@ def make_sql_data_analyst(
326
331
  """
327
332
  Creates a multi-agent system that takes in a SQL query and returns a plot or table.
328
333
 
329
- - Agent 1: SQL Database Agent made with `make_sql_database_agent()`
330
- - Agent 2: Data Visualization Agent made with `make_data_visualization_agent()`
334
+ - Agent 1: SQL Database Agent made with `SQLDatabaseAgent()`
335
+ - Agent 2: Data Visualization Agent made with `DataVisualizationAgent()`
331
336
 
332
337
  Parameters:
333
338
  ----------
334
339
  model:
335
340
  The language model to be used for the agents.
336
341
  sql_database_agent: CompiledStateGraph
337
- The SQL Database Agent made with `make_sql_database_agent()`.
342
+ The SQL Database Agent made with `SQLDatabaseAgent()`.
338
343
  data_visualization_agent: CompiledStateGraph
339
- The Data Visualization Agent made with `make_data_visualization_agent()`.
344
+ The Data Visualization Agent made with `DataVisualizationAgent()`.
340
345
  checkpointer: Checkpointer (optional)
341
346
  The checkpointer to save the state of the multi-agent system.
342
347
  Default: None
@@ -348,10 +353,39 @@ def make_sql_data_analyst(
348
353
  """
349
354
 
350
355
  llm = model
356
+
357
+
358
+ routing_preprocessor_prompt = PromptTemplate(
359
+ template="""
360
+ You are an expert in routing decisions for a SQL Database Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to:
361
+
362
+ 1. Determine what the correct format for a Users Question should be for use with a SQL Database Agent based on the incoming user question. Anything related to database and data manipulation should be passed along.
363
+ 2. Determine whether or not a chart should be generated or a table should be returned based on the users question.
364
+ 3. If a chart is requested, determine the correct format of a Users Question should be used with a Data Visualization Agent. Anything related to plotting and visualization should be passed along.
365
+
366
+ Use the following criteria on how to route the the initial user question:
367
+
368
+ From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the SQL generator agent. This will be the 'user_instructions_sql_database'. If 'None' is found, return the original user question.
369
+
370
+ Next, determine if the user would like a data visualization ('chart') or a 'table' returned with the results of the Data Wrangling Agent. If unknown, not specified or 'None' is found, then select 'table'.
371
+
372
+ If a 'chart' is requested, return the 'user_instructions_data_visualization'. If 'None' is found, return None.
373
+
374
+ Return JSON with 'user_instructions_sql_database', 'user_instructions_data_visualization' and 'routing_preprocessor_decision'.
375
+
376
+ INITIAL_USER_QUESTION: {user_instructions}
377
+ """,
378
+ input_variables=["user_instructions"]
379
+ )
380
+
381
+ routing_preprocessor = routing_preprocessor_prompt | llm | JsonOutputParser()
351
382
 
352
383
  class PrimaryState(TypedDict):
353
384
  messages: Annotated[Sequence[BaseMessage], operator.add]
354
385
  user_instructions: str
386
+ user_instructions_sql_database: str
387
+ user_instructions_data_visualization: str
388
+ routing_preprocessor_decision: str
355
389
  sql_query_code: str
356
390
  sql_database_function: str
357
391
  data_sql: dict
@@ -359,39 +393,94 @@ def make_sql_data_analyst(
359
393
  plot_required: bool
360
394
  data_visualization_function: str
361
395
  plotly_graph: dict
396
+ plotly_error: str
362
397
  max_retries: int
363
398
  retry_count: int
364
399
 
365
- def route_to_visualization(state) -> Command[Literal["data_visualization_agent", "__end__"]]:
400
+ def preprocess_routing(state: PrimaryState):
401
+ print("---SQL DATA ANALYST---")
402
+ print("*************************")
403
+ print("---PREPROCESS ROUTER---")
404
+ question = state.get("user_instructions")
366
405
 
367
- response = llm.invoke(f"Respond in 1 word ('plot' or 'table'). Is the user requesting a plot? If unknown, select 'table'. \n\n User Instructions:\n{state.get('user_instructions')}")
406
+ # Chart Routing and SQL Prep
407
+ response = routing_preprocessor.invoke({"user_instructions": question})
368
408
 
369
- if response.content == 'plot':
370
- plot_required = True
371
- goto="data_visualization_agent"
372
- else:
373
- plot_required = False
374
- goto="__end__"
409
+ return {
410
+ "user_instructions_sql_database": response.get('user_instructions_sql_database'),
411
+ "user_instructions_data_visualization": response.get('user_instructions_data_visualization'),
412
+ "routing_preprocessor_decision": response.get('routing_preprocessor_decision'),
413
+ }
414
+
415
+ def router_chart_or_table(state: PrimaryState):
416
+ print("---ROUTER: CHART OR TABLE---")
417
+ return "chart" if state.get('routing_preprocessor_decision') == "chart" else "table"
418
+
419
+
420
+ def invoke_sql_database_agent(state: PrimaryState):
375
421
 
376
- return Command(
377
- update={
378
- 'data_raw': state.get("data_sql"),
379
- 'plot_required': plot_required,
380
- },
381
- goto=goto
382
- )
422
+ response = sql_database_agent.invoke({
423
+ "user_instructions": state.get("user_instructions_sql_database"),
424
+ "max_retries": state.get("max_retries"),
425
+ "retry_count": state.get("retry_count"),
426
+ })
383
427
 
384
- workflow = StateGraph(PrimaryState)
428
+ return {
429
+ "messages": response.get("messages"),
430
+ "data_sql": response.get("data_sql"),
431
+ "sql_query_code": response.get("sql_query_code"),
432
+ "sql_database_function": response.get("sql_database_function"),
433
+
434
+ }
435
+
436
+ def invoke_data_visualization_agent(state: PrimaryState):
437
+
438
+ response = data_visualization_agent.invoke({
439
+ "user_instructions": state.get("user_instructions_data_visualization"),
440
+ "data_raw": state.get("data_sql"),
441
+ "max_retries": state.get("max_retries"),
442
+ "retry_count": state.get("retry_count"),
443
+ })
444
+
445
+ return {
446
+ "messages": response.get("messages"),
447
+ "data_visualization_function": response.get("data_visualization_function"),
448
+ "plotly_graph": response.get("plotly_graph"),
449
+ "plotly_error": response.get("data_visualization_error"),
450
+ }
385
451
 
386
- workflow.add_node("sql_database_agent", sql_database_agent)
387
- workflow.add_node("route_to_visualization", route_to_visualization)
388
- workflow.add_node("data_visualization_agent", data_visualization_agent)
452
+ def route_printer(state: PrimaryState):
453
+ print("---ROUTE PRINTER---")
454
+ print(f" Route: {state.get('routing_preprocessor_decision')}")
455
+ print("---END---")
456
+ return {}
457
+
458
+ workflow = StateGraph(PrimaryState)
459
+
460
+ workflow.add_node("routing_preprocessor", preprocess_routing)
461
+ workflow.add_node("sql_database_agent", invoke_sql_database_agent)
462
+ workflow.add_node("data_visualization_agent", invoke_data_visualization_agent)
463
+ workflow.add_node("route_printer", route_printer)
389
464
 
390
- workflow.add_edge(START, "sql_database_agent")
391
- workflow.add_edge("sql_database_agent", "route_to_visualization")
392
- workflow.add_edge("data_visualization_agent", END)
465
+ workflow.add_edge(START, "routing_preprocessor")
466
+ workflow.add_edge("routing_preprocessor", "sql_database_agent")
467
+
468
+ workflow.add_conditional_edges(
469
+ "sql_database_agent",
470
+ router_chart_or_table,
471
+ {
472
+ "chart": "data_visualization_agent",
473
+ "table": "route_printer"
474
+ }
475
+ )
476
+
477
+ workflow.add_edge("data_visualization_agent", "route_printer")
478
+ workflow.add_edge("route_printer", END)
393
479
 
394
- app = workflow.compile(checkpointer=checkpointer)
480
+ app = workflow.compile(
481
+ checkpointer=checkpointer,
482
+ name=AGENT_NAME
483
+ )
395
484
 
396
485
  return app
397
486
 
@@ -40,6 +40,21 @@ class BaseAgent(CompiledStateGraph):
40
40
  self._params = params
41
41
  self._compiled_graph = self._make_compiled_graph()
42
42
  self.response = None
43
+ self.name = self._compiled_graph.name
44
+ self.checkpointer = self._compiled_graph.checkpointer
45
+ self.store = self._compiled_graph.store
46
+ self.output_channels = self._compiled_graph.output_channels
47
+ self.nodes = self._compiled_graph.nodes
48
+ self.stream_mode = self._compiled_graph.stream_mode
49
+ self.builder = self._compiled_graph.builder
50
+ self.channels = self._compiled_graph.channels
51
+ self.input_channels = self._compiled_graph.input_channels
52
+ self.input_schema = self._compiled_graph.input_schema
53
+ self.output_schema = self._compiled_graph.output_schema
54
+ self.debug = self._compiled_graph.debug
55
+ self.interrupt_after_nodes = self._compiled_graph.interrupt_after_nodes
56
+ self.interrupt_before_nodes = self._compiled_graph.interrupt_before_nodes
57
+ self.config = self._compiled_graph.config
43
58
 
44
59
  def _make_compiled_graph(self):
45
60
  """
@@ -197,6 +212,24 @@ class BaseAgent(CompiledStateGraph):
197
212
  """
198
213
  return self.get_output_jsonschema()['properties']
199
214
 
215
+ def get_state(self, config, *, subgraphs = False):
216
+ """
217
+ Returns the state of the agent.
218
+ """
219
+ return self._compiled_graph.get_state(config, subgraphs=subgraphs)
220
+
221
+ def get_state_history(self, config, *, filter = None, before = None, limit = None):
222
+ """
223
+ Returns the state history of the agent.
224
+ """
225
+ return self._compiled_graph.get_state_history(config, filter=filter, before=before, limit=limit)
226
+
227
+ def update_state(self, config, values, as_node = None):
228
+ """
229
+ Updates the state of the agent.
230
+ """
231
+ return self._compiled_graph.update_state(config, values, as_node)
232
+
200
233
  def get_response(self):
201
234
  """
202
235
  Returns the response generated by the agent.
@@ -237,6 +270,7 @@ def create_coding_agent_graph(
237
270
  checkpointer: Optional[Callable] = None,
238
271
  bypass_recommended_steps: bool = False,
239
272
  bypass_explain_code: bool = False,
273
+ agent_name: str = "coding_agent"
240
274
  ):
241
275
  """
242
276
  Creates a generic agent graph using the provided node functions and node names.
@@ -281,6 +315,8 @@ def create_coding_agent_graph(
281
315
  Whether to skip the recommended steps node.
282
316
  bypass_explain_code : bool, optional
283
317
  Whether to skip the final explain code node.
318
+ name : str, optional
319
+ The name of the agent graph.
284
320
 
285
321
  Returns
286
322
  -------
@@ -366,10 +402,10 @@ def create_coding_agent_graph(
366
402
  workflow.add_edge(explain_code_node_name, END)
367
403
 
368
404
  # Finally, compile
369
- if human_in_the_loop:
370
- app = workflow.compile(checkpointer=checkpointer)
371
- else:
372
- app = workflow.compile()
405
+ app = workflow.compile(
406
+ checkpointer=checkpointer,
407
+ name=agent_name,
408
+ )
373
409
 
374
410
  return app
375
411
 
@@ -574,7 +610,7 @@ def node_func_execute_agent_from_sql_connection(
574
610
 
575
611
  # Retrieve SQLAlchemy connection and code snippet from the state
576
612
  is_engine = isinstance(connection, sql.engine.base.Engine)
577
- conn = connection.connect() if is_engine else connection
613
+ connection = connection.connect() if is_engine else connection
578
614
  agent_code = state.get(code_snippet_key)
579
615
 
580
616
  # Ensure the connection object is provided
@@ -74,7 +74,12 @@ def get_dataframe_summary(
74
74
  return summaries
75
75
 
76
76
 
77
- def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
77
+ def _summarize_dataframe(
78
+ df: pd.DataFrame,
79
+ dataset_name: str,
80
+ n_sample=30,
81
+ skip_stats=False
82
+ ) -> str:
78
83
  """Generate a summary string for a single DataFrame."""
79
84
  # 1. Convert dictionary-type cells to strings
80
85
  # This prevents unhashable dict errors during df.nunique().
@@ -2,11 +2,44 @@
2
2
  from typing import Annotated, Dict, Tuple, Union
3
3
 
4
4
  import os
5
+ import tempfile
5
6
 
6
7
  from langchain.tools import tool
7
8
 
8
9
  from langgraph.prebuilt import InjectedState
9
10
 
11
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
12
+
13
+
14
+ @tool(response_format='content')
15
+ def explain_data(
16
+ data_raw: Annotated[dict, InjectedState("data_raw")],
17
+ n_sample: int = 30,
18
+ skip_stats: bool = False,
19
+ ):
20
+ """
21
+ Tool: explain_data
22
+ Description:
23
+ Provides an extensive, narrative summary of a DataFrame including its shape, column types,
24
+ missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
25
+
26
+ Parameters:
27
+ data_raw (dict): Raw data.
28
+ n_sample (int, default=30): Number of rows to display.
29
+ skip_stats (bool, default=False): If True, omit descriptive stats/info.
30
+
31
+ LLM Guidance:
32
+ Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
33
+
34
+ Returns:
35
+ str: Detailed DataFrame summary.
36
+ """
37
+ print(" * Tool: explain_data")
38
+ import pandas as pd
39
+
40
+ result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
41
+
42
+ return result
10
43
 
11
44
  @tool(response_format='content_and_artifact')
12
45
  def describe_dataset(
@@ -15,21 +48,33 @@ def describe_dataset(
15
48
  """
16
49
  Tool: describe_dataset
17
50
  Description:
18
- Describe the dataset by computing summary
19
- statistics using the DataFrame's describe() method.
20
-
51
+ Compute and return summary statistics for the dataset using pandas' describe() method.
52
+ The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
53
+
54
+ Parameters:
55
+ -----------
56
+ data_raw : dict
57
+ The raw data in dictionary format.
58
+
59
+ LLM Selection Guidance:
60
+ ------------------------
61
+ Use this tool when:
62
+ - The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
63
+ - The user needs a concise statistical snapshot rather than a detailed narrative.
64
+ - Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
65
+
21
66
  Returns:
22
67
  -------
23
68
  Tuple[str, Dict]:
24
- content: A textual summary of the DataFrame's descriptive statistics.
25
- artifact: A dictionary (from DataFrame.describe()) for further inspection.
69
+ - content: A textual summary indicating that summary statistics have been computed.
70
+ - artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
26
71
  """
27
72
  print(" * Tool: describe_dataset")
28
73
  import pandas as pd
29
74
  df = pd.DataFrame(data_raw)
30
75
  description_df = df.describe(include='all')
31
76
  content = "Summary statistics computed using pandas describe()."
32
- artifact = description_df.to_dict()
77
+ artifact = {'describe_df': description_df.to_dict()}
33
78
  return content, artifact
34
79
 
35
80
 
@@ -226,8 +271,8 @@ def generate_sweetviz_report(
226
271
  data_raw: Annotated[dict, InjectedState("data_raw")],
227
272
  target: str = None,
228
273
  report_name: str = "sweetviz_report.html",
229
- report_directory: str = os.path.join(os.getcwd(), "reports"),
230
- open_browser: bool = True,
274
+ report_directory: str = None, # <-- Default to None
275
+ open_browser: bool = False,
231
276
  ) -> Tuple[str, Dict]:
232
277
  """
233
278
  Tool: generate_sweetviz_report
@@ -243,9 +288,10 @@ def generate_sweetviz_report(
243
288
  report_name : str, optional
244
289
  The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
245
290
  report_directory : str, optional
246
- The directory where the report should be saved. Defaults to a 'reports' directory in the current working directory.
291
+ The directory where the report should be saved.
292
+ If None, a temporary directory is created and used.
247
293
  open_browser : bool, optional
248
- Whether to open the report in a web browser. Default is True.
294
+ Whether to open the report in a web browser. Default is False.
249
295
 
250
296
  Returns:
251
297
  --------
@@ -254,28 +300,37 @@ def generate_sweetviz_report(
254
300
  artifact: A dictionary with the report file path and optionally the report's HTML content.
255
301
  """
256
302
  print(" * Tool: generate_sweetviz_report")
303
+
304
+ # Import sweetviz
257
305
  try:
258
306
  import sweetviz as sv
259
307
  except ImportError:
260
308
  raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
309
+
261
310
  import pandas as pd
311
+
262
312
  # Convert injected raw data to a DataFrame.
263
313
  df = pd.DataFrame(data_raw)
264
314
 
315
+ # If no directory is specified, use a temporary directory.
316
+ if not report_directory:
317
+ report_directory = tempfile.mkdtemp()
318
+ print(f" * Using temporary directory: {report_directory}")
319
+ else:
320
+ # Ensure user-specified directory exists.
321
+ if not os.path.exists(report_directory):
322
+ os.makedirs(report_directory)
323
+
265
324
  # Create the Sweetviz report.
266
325
  report = sv.analyze(df, target_feat=target)
267
326
 
268
- # Ensure the directory exists; default is os.getcwd()/reports
269
- if not os.path.exists(report_directory):
270
- os.makedirs(report_directory)
271
-
272
327
  # Determine the full path for the report.
273
328
  full_report_path = os.path.join(report_directory, report_name)
274
329
 
275
330
  # Save the report to the specified HTML file.
276
331
  report.show_html(
277
332
  filepath=full_report_path,
278
- open_browser=True,
333
+ open_browser=open_browser,
279
334
  )
280
335
 
281
336
  # Optionally, read the HTML content (if desired to pass along in the artifact).
@@ -285,9 +340,13 @@ def generate_sweetviz_report(
285
340
  except Exception:
286
341
  html_content = None
287
342
 
288
- content = f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'."
343
+ content = (
344
+ f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
345
+ f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
346
+ )
289
347
  artifact = {
290
348
  "report_file": os.path.abspath(full_report_path),
291
349
  "report_html": html_content,
292
350
  }
293
351
  return content, artifact
352
+
@@ -0,0 +1,27 @@
1
+
2
+
3
+
4
+ def get_tool_call_names(messages):
5
+ """
6
+ Method to extract the tool call names from a list of LangChain messages.
7
+
8
+ Parameters:
9
+ ----------
10
+ messages : list
11
+ A list of LangChain messages.
12
+
13
+ Returns:
14
+ -------
15
+ tool_calls : list
16
+ A list of tool call names.
17
+
18
+ """
19
+ tool_calls = []
20
+ for message in messages:
21
+ try:
22
+ if "tool_call_id" in list(dict(message).keys()):
23
+ tool_calls.append(message.name)
24
+ except:
25
+ pass
26
+ return tool_calls
27
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9012
3
+ Version: 0.0.0.9014
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -18,7 +18,7 @@ Requires-Dist: langchain
18
18
  Requires-Dist: langchain_community
19
19
  Requires-Dist: langchain_openai
20
20
  Requires-Dist: langchain_experimental
21
- Requires-Dist: langgraph>=0.2.57
21
+ Requires-Dist: langgraph>=0.2.74
22
22
  Requires-Dist: openai
23
23
  Requires-Dist: pandas
24
24
  Requires-Dist: sqlalchemy
@@ -152,7 +152,11 @@ This is a top secret project I'm working on. It's a multi-agent data science app
152
152
 
153
153
  #### 🔥 Agentic Applications
154
154
 
155
- 1. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
155
+ 1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
156
+
157
+ ![Exploratory Data Copilot](/img/apps/ai_exploratory_copilot.jpg)
158
+
159
+ 2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
156
160
 
157
161
  ### Agents Available Now
158
162
 
@@ -1,31 +1,32 @@
1
- ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=BybGt-zGNDZsdJxDMV3xmjghiRF8jmwG3ov_dt_rM7E,26
1
+ ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
2
+ ai_data_science_team/_version.py,sha256=D4dUl-fYnimOU_VSzvrmJm30_IoaF_9m9dTLp8HE6rQ,26
3
3
  ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
4
  ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
5
- ai_data_science_team/agents/data_cleaning_agent.py,sha256=V5tJMwGJK0JwrF_H-7r3S0E8UkAY6ci4BGxqjhZiGBI,27352
6
- ai_data_science_team/agents/data_loader_tools_agent.py,sha256=fnkOvmrXzvTTt1mnAyTlsF_7ZGrkp3P97YU_LgeffMg,8445
7
- ai_data_science_team/agents/data_visualization_agent.py,sha256=tJy9Ehnh9mvAu6H--TXI8esSHmK1RW_L1RDAdn7Xek4,28821
8
- ai_data_science_team/agents/data_wrangling_agent.py,sha256=LxzphH-TmrFG0GjejGOjulhPq4SsWFo5Y9tk4WEuN4M,32347
9
- ai_data_science_team/agents/feature_engineering_agent.py,sha256=KmPBkj7WUBz6LFUlDDfQHMi7ujXwsH5P9LWRS-F4tdM,31026
10
- ai_data_science_team/agents/sql_database_agent.py,sha256=1K2o3NiuKgGKdbMz_Tq9IeQ8xhXjpfGOxx9lArZh1yE,31173
5
+ ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
6
+ ai_data_science_team/agents/data_loader_tools_agent.py,sha256=TFKzYqV6cvU-sMbfL-hg8-NgF_Hz3nysGFldvb5K3fM,9327
7
+ ai_data_science_team/agents/data_visualization_agent.py,sha256=eUSTzTOm5aLJ6Cqnk-hRuXeVbYyy0RIzN8_0LLy0P9o,29387
8
+ ai_data_science_team/agents/data_wrangling_agent.py,sha256=6tiDO1i-5s2Ju6_MsLoJMflUuRSf_1oTSsSKcLlgzEc,33376
9
+ ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
10
+ ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
11
11
  ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
12
- ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=y65lsBXhQNOGwWealEho6uFxGSTW7FNfvTUZnW8_XNY,7609
12
+ ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=x0kTwDo0BNbYzgA0YamMWdqRjx0upZgeXp9nF6C6_8E,8364
13
13
  ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
15
- ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=DamR72agrTKfdcdhablmP2mpbj0CqtMonP-QU8p7o9w,33394
15
+ ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NXrNHb723NeF2rns0Y0,33934
16
16
  ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=zbT0KIsmQp_sEyxzXRguhqx5913Q2yPYyKGU6TUWEM8,11067
18
- ai_data_science_team/multiagents/__init__.py,sha256=aI4GztEwmkexZKT5XHcH3cAjO-xYUhncb3yfPJQDqTA,99
19
- ai_data_science_team/multiagents/sql_data_analyst.py,sha256=kmmED3gLf5STWWY6ZVJYd7_Pt8NMl6SHyBocuQzRDGk,14193
17
+ ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=QImaZnS8hPdrU7GI6pZ0dUDO-LXx40MSA3XyMDppIh0,12003
18
+ ai_data_science_team/multiagents/__init__.py,sha256=5tpmZBQ_UT5SKDCS_NivZhN19HEStKIcstiqSXPXDl0,208
19
+ ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=O662v-75tLqHHrVNjncsPeR2FB4MWSWruJRIF-YO-fg,13581
20
+ ai_data_science_team/multiagents/sql_data_analyst.py,sha256=Fpue6WcX9x18kEH3kfEi8kkFoG9HhQ1AZiWw6Y6FXOo,18502
20
21
  ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
21
22
  ai_data_science_team/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
23
  ai_data_science_team/parsers/parsers.py,sha256=hIsMZXRHz9hqs8R1ebymKA7D6NxOf5UVMpDAr_gGhE8,2027
23
24
  ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP3cEFuf22-R5RM,330
24
- ai_data_science_team/templates/agent_templates.py,sha256=Lezp0ugtIP3m5WUOmjLwghNnjjyQVQecysONeIHWwi0,29133
25
+ ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J2GG91REJOKUUEY71Dszs,30767
25
26
  ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
27
  ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
27
- ai_data_science_team/tools/dataframe.py,sha256=qSflGDByqqCXv4TjuvOFvGPZmegzeOesb0Y4i4Y0gdQ,4551
28
- ai_data_science_team/tools/eda.py,sha256=UGD6PC12RsB_UmStvR4TmSqv0noxjM4DkzY-kHjI0-E,10591
28
+ ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
29
+ ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
29
30
  ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
30
31
  ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
31
32
  ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
@@ -33,10 +34,11 @@ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
33
34
  ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
34
35
  ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
35
36
  ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
37
+ ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
36
38
  ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
37
39
  ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
38
- ai_data_science_team-0.0.0.9012.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
39
- ai_data_science_team-0.0.0.9012.dist-info/METADATA,sha256=geRCFLG3YO9uprp_CGKiqCTSThg06L2U6WxVqYKzyM8,12704
40
- ai_data_science_team-0.0.0.9012.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
41
- ai_data_science_team-0.0.0.9012.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
42
- ai_data_science_team-0.0.0.9012.dist-info/RECORD,,
40
+ ai_data_science_team-0.0.0.9014.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
41
+ ai_data_science_team-0.0.0.9014.dist-info/METADATA,sha256=a35LbXyxf_XiP82m_P5HLFwPrmuzXkNWbyfzGge7dHA,13021
42
+ ai_data_science_team-0.0.0.9014.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
43
+ ai_data_science_team-0.0.0.9014.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
44
+ ai_data_science_team-0.0.0.9014.dist-info/RECORD,,