ai-data-science-team 0.0.0.9012__py3-none-any.whl → 0.0.0.9014__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_data_science_team/__init__.py +22 -0
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/data_cleaning_agent.py +17 -3
- ai_data_science_team/agents/data_loader_tools_agent.py +24 -1
- ai_data_science_team/agents/data_visualization_agent.py +17 -3
- ai_data_science_team/agents/data_wrangling_agent.py +30 -10
- ai_data_science_team/agents/feature_engineering_agent.py +17 -4
- ai_data_science_team/agents/sql_database_agent.py +15 -2
- ai_data_science_team/ds_agents/eda_tools_agent.py +28 -6
- ai_data_science_team/ml_agents/h2o_ml_agent.py +15 -3
- ai_data_science_team/ml_agents/mlflow_tools_agent.py +23 -1
- ai_data_science_team/multiagents/__init__.py +2 -1
- ai_data_science_team/multiagents/pandas_data_analyst.py +305 -0
- ai_data_science_team/multiagents/sql_data_analyst.py +119 -30
- ai_data_science_team/templates/agent_templates.py +41 -5
- ai_data_science_team/tools/dataframe.py +6 -1
- ai_data_science_team/tools/eda.py +75 -16
- ai_data_science_team/utils/messages.py +27 -0
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/METADATA +7 -3
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/RECORD +23 -21
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/WHEEL +0 -0
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
|
|
1
1
|
|
2
2
|
from langchain_core.messages import BaseMessage
|
3
|
-
|
3
|
+
|
4
|
+
from langchain.prompts import PromptTemplate
|
5
|
+
from langchain_core.output_parsers import JsonOutputParser
|
4
6
|
|
5
7
|
from langgraph.graph import START, END, StateGraph
|
6
8
|
from langgraph.graph.state import CompiledStateGraph
|
7
|
-
from langgraph.types import
|
9
|
+
from langgraph.types import Checkpointer
|
8
10
|
|
9
|
-
from typing import TypedDict, Annotated, Sequence
|
11
|
+
from typing import TypedDict, Annotated, Sequence
|
10
12
|
import operator
|
11
13
|
|
12
14
|
from typing_extensions import TypedDict
|
@@ -20,6 +22,7 @@ from ai_data_science_team.agents import SQLDatabaseAgent, DataVisualizationAgent
|
|
20
22
|
from ai_data_science_team.utils.plotly import plotly_from_dict
|
21
23
|
from ai_data_science_team.utils.regex import remove_consecutive_duplicates, get_generic_summary
|
22
24
|
|
25
|
+
AGENT_NAME = "sql_data_analyst"
|
23
26
|
|
24
27
|
class SQLDataAnalyst(BaseAgent):
|
25
28
|
"""
|
@@ -33,6 +36,8 @@ class SQLDataAnalyst(BaseAgent):
|
|
33
36
|
The SQL Database Agent.
|
34
37
|
data_visualization_agent: DataVisualizationAgent
|
35
38
|
The Data Visualization Agent.
|
39
|
+
checkpointer: Checkpointer (optional)
|
40
|
+
The checkpointer to save the state of the multi-agent system.
|
36
41
|
|
37
42
|
Methods:
|
38
43
|
--------
|
@@ -326,17 +331,17 @@ def make_sql_data_analyst(
|
|
326
331
|
"""
|
327
332
|
Creates a multi-agent system that takes in a SQL query and returns a plot or table.
|
328
333
|
|
329
|
-
- Agent 1: SQL Database Agent made with `
|
330
|
-
- Agent 2: Data Visualization Agent made with `
|
334
|
+
- Agent 1: SQL Database Agent made with `SQLDatabaseAgent()`
|
335
|
+
- Agent 2: Data Visualization Agent made with `DataVisualizationAgent()`
|
331
336
|
|
332
337
|
Parameters:
|
333
338
|
----------
|
334
339
|
model:
|
335
340
|
The language model to be used for the agents.
|
336
341
|
sql_database_agent: CompiledStateGraph
|
337
|
-
The SQL Database Agent made with `
|
342
|
+
The SQL Database Agent made with `SQLDatabaseAgent()`.
|
338
343
|
data_visualization_agent: CompiledStateGraph
|
339
|
-
The Data Visualization Agent made with `
|
344
|
+
The Data Visualization Agent made with `DataVisualizationAgent()`.
|
340
345
|
checkpointer: Checkpointer (optional)
|
341
346
|
The checkpointer to save the state of the multi-agent system.
|
342
347
|
Default: None
|
@@ -348,10 +353,39 @@ def make_sql_data_analyst(
|
|
348
353
|
"""
|
349
354
|
|
350
355
|
llm = model
|
356
|
+
|
357
|
+
|
358
|
+
routing_preprocessor_prompt = PromptTemplate(
|
359
|
+
template="""
|
360
|
+
You are an expert in routing decisions for a SQL Database Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to:
|
361
|
+
|
362
|
+
1. Determine what the correct format for a Users Question should be for use with a SQL Database Agent based on the incoming user question. Anything related to database and data manipulation should be passed along.
|
363
|
+
2. Determine whether or not a chart should be generated or a table should be returned based on the users question.
|
364
|
+
3. If a chart is requested, determine the correct format of a Users Question should be used with a Data Visualization Agent. Anything related to plotting and visualization should be passed along.
|
365
|
+
|
366
|
+
Use the following criteria on how to route the the initial user question:
|
367
|
+
|
368
|
+
From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the SQL generator agent. This will be the 'user_instructions_sql_database'. If 'None' is found, return the original user question.
|
369
|
+
|
370
|
+
Next, determine if the user would like a data visualization ('chart') or a 'table' returned with the results of the Data Wrangling Agent. If unknown, not specified or 'None' is found, then select 'table'.
|
371
|
+
|
372
|
+
If a 'chart' is requested, return the 'user_instructions_data_visualization'. If 'None' is found, return None.
|
373
|
+
|
374
|
+
Return JSON with 'user_instructions_sql_database', 'user_instructions_data_visualization' and 'routing_preprocessor_decision'.
|
375
|
+
|
376
|
+
INITIAL_USER_QUESTION: {user_instructions}
|
377
|
+
""",
|
378
|
+
input_variables=["user_instructions"]
|
379
|
+
)
|
380
|
+
|
381
|
+
routing_preprocessor = routing_preprocessor_prompt | llm | JsonOutputParser()
|
351
382
|
|
352
383
|
class PrimaryState(TypedDict):
|
353
384
|
messages: Annotated[Sequence[BaseMessage], operator.add]
|
354
385
|
user_instructions: str
|
386
|
+
user_instructions_sql_database: str
|
387
|
+
user_instructions_data_visualization: str
|
388
|
+
routing_preprocessor_decision: str
|
355
389
|
sql_query_code: str
|
356
390
|
sql_database_function: str
|
357
391
|
data_sql: dict
|
@@ -359,39 +393,94 @@ def make_sql_data_analyst(
|
|
359
393
|
plot_required: bool
|
360
394
|
data_visualization_function: str
|
361
395
|
plotly_graph: dict
|
396
|
+
plotly_error: str
|
362
397
|
max_retries: int
|
363
398
|
retry_count: int
|
364
399
|
|
365
|
-
def
|
400
|
+
def preprocess_routing(state: PrimaryState):
|
401
|
+
print("---SQL DATA ANALYST---")
|
402
|
+
print("*************************")
|
403
|
+
print("---PREPROCESS ROUTER---")
|
404
|
+
question = state.get("user_instructions")
|
366
405
|
|
367
|
-
|
406
|
+
# Chart Routing and SQL Prep
|
407
|
+
response = routing_preprocessor.invoke({"user_instructions": question})
|
368
408
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
409
|
+
return {
|
410
|
+
"user_instructions_sql_database": response.get('user_instructions_sql_database'),
|
411
|
+
"user_instructions_data_visualization": response.get('user_instructions_data_visualization'),
|
412
|
+
"routing_preprocessor_decision": response.get('routing_preprocessor_decision'),
|
413
|
+
}
|
414
|
+
|
415
|
+
def router_chart_or_table(state: PrimaryState):
|
416
|
+
print("---ROUTER: CHART OR TABLE---")
|
417
|
+
return "chart" if state.get('routing_preprocessor_decision') == "chart" else "table"
|
418
|
+
|
419
|
+
|
420
|
+
def invoke_sql_database_agent(state: PrimaryState):
|
375
421
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
goto=goto
|
382
|
-
)
|
422
|
+
response = sql_database_agent.invoke({
|
423
|
+
"user_instructions": state.get("user_instructions_sql_database"),
|
424
|
+
"max_retries": state.get("max_retries"),
|
425
|
+
"retry_count": state.get("retry_count"),
|
426
|
+
})
|
383
427
|
|
384
|
-
|
428
|
+
return {
|
429
|
+
"messages": response.get("messages"),
|
430
|
+
"data_sql": response.get("data_sql"),
|
431
|
+
"sql_query_code": response.get("sql_query_code"),
|
432
|
+
"sql_database_function": response.get("sql_database_function"),
|
433
|
+
|
434
|
+
}
|
435
|
+
|
436
|
+
def invoke_data_visualization_agent(state: PrimaryState):
|
437
|
+
|
438
|
+
response = data_visualization_agent.invoke({
|
439
|
+
"user_instructions": state.get("user_instructions_data_visualization"),
|
440
|
+
"data_raw": state.get("data_sql"),
|
441
|
+
"max_retries": state.get("max_retries"),
|
442
|
+
"retry_count": state.get("retry_count"),
|
443
|
+
})
|
444
|
+
|
445
|
+
return {
|
446
|
+
"messages": response.get("messages"),
|
447
|
+
"data_visualization_function": response.get("data_visualization_function"),
|
448
|
+
"plotly_graph": response.get("plotly_graph"),
|
449
|
+
"plotly_error": response.get("data_visualization_error"),
|
450
|
+
}
|
385
451
|
|
386
|
-
|
387
|
-
|
388
|
-
|
452
|
+
def route_printer(state: PrimaryState):
|
453
|
+
print("---ROUTE PRINTER---")
|
454
|
+
print(f" Route: {state.get('routing_preprocessor_decision')}")
|
455
|
+
print("---END---")
|
456
|
+
return {}
|
457
|
+
|
458
|
+
workflow = StateGraph(PrimaryState)
|
459
|
+
|
460
|
+
workflow.add_node("routing_preprocessor", preprocess_routing)
|
461
|
+
workflow.add_node("sql_database_agent", invoke_sql_database_agent)
|
462
|
+
workflow.add_node("data_visualization_agent", invoke_data_visualization_agent)
|
463
|
+
workflow.add_node("route_printer", route_printer)
|
389
464
|
|
390
|
-
workflow.add_edge(START, "
|
391
|
-
workflow.add_edge("
|
392
|
-
|
465
|
+
workflow.add_edge(START, "routing_preprocessor")
|
466
|
+
workflow.add_edge("routing_preprocessor", "sql_database_agent")
|
467
|
+
|
468
|
+
workflow.add_conditional_edges(
|
469
|
+
"sql_database_agent",
|
470
|
+
router_chart_or_table,
|
471
|
+
{
|
472
|
+
"chart": "data_visualization_agent",
|
473
|
+
"table": "route_printer"
|
474
|
+
}
|
475
|
+
)
|
476
|
+
|
477
|
+
workflow.add_edge("data_visualization_agent", "route_printer")
|
478
|
+
workflow.add_edge("route_printer", END)
|
393
479
|
|
394
|
-
app = workflow.compile(
|
480
|
+
app = workflow.compile(
|
481
|
+
checkpointer=checkpointer,
|
482
|
+
name=AGENT_NAME
|
483
|
+
)
|
395
484
|
|
396
485
|
return app
|
397
486
|
|
@@ -40,6 +40,21 @@ class BaseAgent(CompiledStateGraph):
|
|
40
40
|
self._params = params
|
41
41
|
self._compiled_graph = self._make_compiled_graph()
|
42
42
|
self.response = None
|
43
|
+
self.name = self._compiled_graph.name
|
44
|
+
self.checkpointer = self._compiled_graph.checkpointer
|
45
|
+
self.store = self._compiled_graph.store
|
46
|
+
self.output_channels = self._compiled_graph.output_channels
|
47
|
+
self.nodes = self._compiled_graph.nodes
|
48
|
+
self.stream_mode = self._compiled_graph.stream_mode
|
49
|
+
self.builder = self._compiled_graph.builder
|
50
|
+
self.channels = self._compiled_graph.channels
|
51
|
+
self.input_channels = self._compiled_graph.input_channels
|
52
|
+
self.input_schema = self._compiled_graph.input_schema
|
53
|
+
self.output_schema = self._compiled_graph.output_schema
|
54
|
+
self.debug = self._compiled_graph.debug
|
55
|
+
self.interrupt_after_nodes = self._compiled_graph.interrupt_after_nodes
|
56
|
+
self.interrupt_before_nodes = self._compiled_graph.interrupt_before_nodes
|
57
|
+
self.config = self._compiled_graph.config
|
43
58
|
|
44
59
|
def _make_compiled_graph(self):
|
45
60
|
"""
|
@@ -197,6 +212,24 @@ class BaseAgent(CompiledStateGraph):
|
|
197
212
|
"""
|
198
213
|
return self.get_output_jsonschema()['properties']
|
199
214
|
|
215
|
+
def get_state(self, config, *, subgraphs = False):
|
216
|
+
"""
|
217
|
+
Returns the state of the agent.
|
218
|
+
"""
|
219
|
+
return self._compiled_graph.get_state(config, subgraphs=subgraphs)
|
220
|
+
|
221
|
+
def get_state_history(self, config, *, filter = None, before = None, limit = None):
|
222
|
+
"""
|
223
|
+
Returns the state history of the agent.
|
224
|
+
"""
|
225
|
+
return self._compiled_graph.get_state_history(config, filter=filter, before=before, limit=limit)
|
226
|
+
|
227
|
+
def update_state(self, config, values, as_node = None):
|
228
|
+
"""
|
229
|
+
Updates the state of the agent.
|
230
|
+
"""
|
231
|
+
return self._compiled_graph.update_state(config, values, as_node)
|
232
|
+
|
200
233
|
def get_response(self):
|
201
234
|
"""
|
202
235
|
Returns the response generated by the agent.
|
@@ -237,6 +270,7 @@ def create_coding_agent_graph(
|
|
237
270
|
checkpointer: Optional[Callable] = None,
|
238
271
|
bypass_recommended_steps: bool = False,
|
239
272
|
bypass_explain_code: bool = False,
|
273
|
+
agent_name: str = "coding_agent"
|
240
274
|
):
|
241
275
|
"""
|
242
276
|
Creates a generic agent graph using the provided node functions and node names.
|
@@ -281,6 +315,8 @@ def create_coding_agent_graph(
|
|
281
315
|
Whether to skip the recommended steps node.
|
282
316
|
bypass_explain_code : bool, optional
|
283
317
|
Whether to skip the final explain code node.
|
318
|
+
name : str, optional
|
319
|
+
The name of the agent graph.
|
284
320
|
|
285
321
|
Returns
|
286
322
|
-------
|
@@ -366,10 +402,10 @@ def create_coding_agent_graph(
|
|
366
402
|
workflow.add_edge(explain_code_node_name, END)
|
367
403
|
|
368
404
|
# Finally, compile
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
405
|
+
app = workflow.compile(
|
406
|
+
checkpointer=checkpointer,
|
407
|
+
name=agent_name,
|
408
|
+
)
|
373
409
|
|
374
410
|
return app
|
375
411
|
|
@@ -574,7 +610,7 @@ def node_func_execute_agent_from_sql_connection(
|
|
574
610
|
|
575
611
|
# Retrieve SQLAlchemy connection and code snippet from the state
|
576
612
|
is_engine = isinstance(connection, sql.engine.base.Engine)
|
577
|
-
|
613
|
+
connection = connection.connect() if is_engine else connection
|
578
614
|
agent_code = state.get(code_snippet_key)
|
579
615
|
|
580
616
|
# Ensure the connection object is provided
|
@@ -74,7 +74,12 @@ def get_dataframe_summary(
|
|
74
74
|
return summaries
|
75
75
|
|
76
76
|
|
77
|
-
def _summarize_dataframe(
|
77
|
+
def _summarize_dataframe(
|
78
|
+
df: pd.DataFrame,
|
79
|
+
dataset_name: str,
|
80
|
+
n_sample=30,
|
81
|
+
skip_stats=False
|
82
|
+
) -> str:
|
78
83
|
"""Generate a summary string for a single DataFrame."""
|
79
84
|
# 1. Convert dictionary-type cells to strings
|
80
85
|
# This prevents unhashable dict errors during df.nunique().
|
@@ -2,11 +2,44 @@
|
|
2
2
|
from typing import Annotated, Dict, Tuple, Union
|
3
3
|
|
4
4
|
import os
|
5
|
+
import tempfile
|
5
6
|
|
6
7
|
from langchain.tools import tool
|
7
8
|
|
8
9
|
from langgraph.prebuilt import InjectedState
|
9
10
|
|
11
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
12
|
+
|
13
|
+
|
14
|
+
@tool(response_format='content')
|
15
|
+
def explain_data(
|
16
|
+
data_raw: Annotated[dict, InjectedState("data_raw")],
|
17
|
+
n_sample: int = 30,
|
18
|
+
skip_stats: bool = False,
|
19
|
+
):
|
20
|
+
"""
|
21
|
+
Tool: explain_data
|
22
|
+
Description:
|
23
|
+
Provides an extensive, narrative summary of a DataFrame including its shape, column types,
|
24
|
+
missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
|
25
|
+
|
26
|
+
Parameters:
|
27
|
+
data_raw (dict): Raw data.
|
28
|
+
n_sample (int, default=30): Number of rows to display.
|
29
|
+
skip_stats (bool, default=False): If True, omit descriptive stats/info.
|
30
|
+
|
31
|
+
LLM Guidance:
|
32
|
+
Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
str: Detailed DataFrame summary.
|
36
|
+
"""
|
37
|
+
print(" * Tool: explain_data")
|
38
|
+
import pandas as pd
|
39
|
+
|
40
|
+
result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
|
41
|
+
|
42
|
+
return result
|
10
43
|
|
11
44
|
@tool(response_format='content_and_artifact')
|
12
45
|
def describe_dataset(
|
@@ -15,21 +48,33 @@ def describe_dataset(
|
|
15
48
|
"""
|
16
49
|
Tool: describe_dataset
|
17
50
|
Description:
|
18
|
-
|
19
|
-
|
20
|
-
|
51
|
+
Compute and return summary statistics for the dataset using pandas' describe() method.
|
52
|
+
The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
|
53
|
+
|
54
|
+
Parameters:
|
55
|
+
-----------
|
56
|
+
data_raw : dict
|
57
|
+
The raw data in dictionary format.
|
58
|
+
|
59
|
+
LLM Selection Guidance:
|
60
|
+
------------------------
|
61
|
+
Use this tool when:
|
62
|
+
- The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
|
63
|
+
- The user needs a concise statistical snapshot rather than a detailed narrative.
|
64
|
+
- Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
|
65
|
+
|
21
66
|
Returns:
|
22
67
|
-------
|
23
68
|
Tuple[str, Dict]:
|
24
|
-
content: A textual summary
|
25
|
-
artifact: A dictionary (from DataFrame.describe())
|
69
|
+
- content: A textual summary indicating that summary statistics have been computed.
|
70
|
+
- artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
|
26
71
|
"""
|
27
72
|
print(" * Tool: describe_dataset")
|
28
73
|
import pandas as pd
|
29
74
|
df = pd.DataFrame(data_raw)
|
30
75
|
description_df = df.describe(include='all')
|
31
76
|
content = "Summary statistics computed using pandas describe()."
|
32
|
-
artifact = description_df.to_dict()
|
77
|
+
artifact = {'describe_df': description_df.to_dict()}
|
33
78
|
return content, artifact
|
34
79
|
|
35
80
|
|
@@ -226,8 +271,8 @@ def generate_sweetviz_report(
|
|
226
271
|
data_raw: Annotated[dict, InjectedState("data_raw")],
|
227
272
|
target: str = None,
|
228
273
|
report_name: str = "sweetviz_report.html",
|
229
|
-
report_directory: str =
|
230
|
-
open_browser: bool =
|
274
|
+
report_directory: str = None, # <-- Default to None
|
275
|
+
open_browser: bool = False,
|
231
276
|
) -> Tuple[str, Dict]:
|
232
277
|
"""
|
233
278
|
Tool: generate_sweetviz_report
|
@@ -243,9 +288,10 @@ def generate_sweetviz_report(
|
|
243
288
|
report_name : str, optional
|
244
289
|
The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
|
245
290
|
report_directory : str, optional
|
246
|
-
The directory where the report should be saved.
|
291
|
+
The directory where the report should be saved.
|
292
|
+
If None, a temporary directory is created and used.
|
247
293
|
open_browser : bool, optional
|
248
|
-
Whether to open the report in a web browser. Default is
|
294
|
+
Whether to open the report in a web browser. Default is False.
|
249
295
|
|
250
296
|
Returns:
|
251
297
|
--------
|
@@ -254,28 +300,37 @@ def generate_sweetviz_report(
|
|
254
300
|
artifact: A dictionary with the report file path and optionally the report's HTML content.
|
255
301
|
"""
|
256
302
|
print(" * Tool: generate_sweetviz_report")
|
303
|
+
|
304
|
+
# Import sweetviz
|
257
305
|
try:
|
258
306
|
import sweetviz as sv
|
259
307
|
except ImportError:
|
260
308
|
raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
|
309
|
+
|
261
310
|
import pandas as pd
|
311
|
+
|
262
312
|
# Convert injected raw data to a DataFrame.
|
263
313
|
df = pd.DataFrame(data_raw)
|
264
314
|
|
315
|
+
# If no directory is specified, use a temporary directory.
|
316
|
+
if not report_directory:
|
317
|
+
report_directory = tempfile.mkdtemp()
|
318
|
+
print(f" * Using temporary directory: {report_directory}")
|
319
|
+
else:
|
320
|
+
# Ensure user-specified directory exists.
|
321
|
+
if not os.path.exists(report_directory):
|
322
|
+
os.makedirs(report_directory)
|
323
|
+
|
265
324
|
# Create the Sweetviz report.
|
266
325
|
report = sv.analyze(df, target_feat=target)
|
267
326
|
|
268
|
-
# Ensure the directory exists; default is os.getcwd()/reports
|
269
|
-
if not os.path.exists(report_directory):
|
270
|
-
os.makedirs(report_directory)
|
271
|
-
|
272
327
|
# Determine the full path for the report.
|
273
328
|
full_report_path = os.path.join(report_directory, report_name)
|
274
329
|
|
275
330
|
# Save the report to the specified HTML file.
|
276
331
|
report.show_html(
|
277
332
|
filepath=full_report_path,
|
278
|
-
open_browser=
|
333
|
+
open_browser=open_browser,
|
279
334
|
)
|
280
335
|
|
281
336
|
# Optionally, read the HTML content (if desired to pass along in the artifact).
|
@@ -285,9 +340,13 @@ def generate_sweetviz_report(
|
|
285
340
|
except Exception:
|
286
341
|
html_content = None
|
287
342
|
|
288
|
-
content =
|
343
|
+
content = (
|
344
|
+
f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
|
345
|
+
f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
|
346
|
+
)
|
289
347
|
artifact = {
|
290
348
|
"report_file": os.path.abspath(full_report_path),
|
291
349
|
"report_html": html_content,
|
292
350
|
}
|
293
351
|
return content, artifact
|
352
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
def get_tool_call_names(messages):
|
5
|
+
"""
|
6
|
+
Method to extract the tool call names from a list of LangChain messages.
|
7
|
+
|
8
|
+
Parameters:
|
9
|
+
----------
|
10
|
+
messages : list
|
11
|
+
A list of LangChain messages.
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
-------
|
15
|
+
tool_calls : list
|
16
|
+
A list of tool call names.
|
17
|
+
|
18
|
+
"""
|
19
|
+
tool_calls = []
|
20
|
+
for message in messages:
|
21
|
+
try:
|
22
|
+
if "tool_call_id" in list(dict(message).keys()):
|
23
|
+
tool_calls.append(message.name)
|
24
|
+
except:
|
25
|
+
pass
|
26
|
+
return tool_calls
|
27
|
+
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ai-data-science-team
|
3
|
-
Version: 0.0.0.
|
3
|
+
Version: 0.0.0.9014
|
4
4
|
Summary: Build and run an AI-powered data science team.
|
5
5
|
Home-page: https://github.com/business-science/ai-data-science-team
|
6
6
|
Author: Matt Dancho
|
@@ -18,7 +18,7 @@ Requires-Dist: langchain
|
|
18
18
|
Requires-Dist: langchain_community
|
19
19
|
Requires-Dist: langchain_openai
|
20
20
|
Requires-Dist: langchain_experimental
|
21
|
-
Requires-Dist: langgraph>=0.2.
|
21
|
+
Requires-Dist: langgraph>=0.2.74
|
22
22
|
Requires-Dist: openai
|
23
23
|
Requires-Dist: pandas
|
24
24
|
Requires-Dist: sqlalchemy
|
@@ -152,7 +152,11 @@ This is a top secret project I'm working on. It's a multi-agent data science app
|
|
152
152
|
|
153
153
|
#### 🔥 Agentic Applications
|
154
154
|
|
155
|
-
1. **
|
155
|
+
1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
|
156
|
+
|
157
|
+

|
158
|
+
|
159
|
+
2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
|
156
160
|
|
157
161
|
### Agents Available Now
|
158
162
|
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/RECORD
RENAMED
@@ -1,31 +1,32 @@
|
|
1
|
-
ai_data_science_team/__init__.py,sha256=
|
2
|
-
ai_data_science_team/_version.py,sha256=
|
1
|
+
ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
|
2
|
+
ai_data_science_team/_version.py,sha256=D4dUl-fYnimOU_VSzvrmJm30_IoaF_9m9dTLp8HE6rQ,26
|
3
3
|
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
4
|
ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
|
5
|
-
ai_data_science_team/agents/data_cleaning_agent.py,sha256=
|
6
|
-
ai_data_science_team/agents/data_loader_tools_agent.py,sha256=
|
7
|
-
ai_data_science_team/agents/data_visualization_agent.py,sha256=
|
8
|
-
ai_data_science_team/agents/data_wrangling_agent.py,sha256=
|
9
|
-
ai_data_science_team/agents/feature_engineering_agent.py,sha256=
|
10
|
-
ai_data_science_team/agents/sql_database_agent.py,sha256=
|
5
|
+
ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
|
6
|
+
ai_data_science_team/agents/data_loader_tools_agent.py,sha256=TFKzYqV6cvU-sMbfL-hg8-NgF_Hz3nysGFldvb5K3fM,9327
|
7
|
+
ai_data_science_team/agents/data_visualization_agent.py,sha256=eUSTzTOm5aLJ6Cqnk-hRuXeVbYyy0RIzN8_0LLy0P9o,29387
|
8
|
+
ai_data_science_team/agents/data_wrangling_agent.py,sha256=6tiDO1i-5s2Ju6_MsLoJMflUuRSf_1oTSsSKcLlgzEc,33376
|
9
|
+
ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
|
10
|
+
ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
|
11
11
|
ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
|
12
|
-
ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=
|
12
|
+
ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=x0kTwDo0BNbYzgA0YamMWdqRjx0upZgeXp9nF6C6_8E,8364
|
13
13
|
ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
|
15
|
-
ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=
|
15
|
+
ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NXrNHb723NeF2rns0Y0,33934
|
16
16
|
ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=
|
18
|
-
ai_data_science_team/multiagents/__init__.py,sha256=
|
19
|
-
ai_data_science_team/multiagents/
|
17
|
+
ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=QImaZnS8hPdrU7GI6pZ0dUDO-LXx40MSA3XyMDppIh0,12003
|
18
|
+
ai_data_science_team/multiagents/__init__.py,sha256=5tpmZBQ_UT5SKDCS_NivZhN19HEStKIcstiqSXPXDl0,208
|
19
|
+
ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=O662v-75tLqHHrVNjncsPeR2FB4MWSWruJRIF-YO-fg,13581
|
20
|
+
ai_data_science_team/multiagents/sql_data_analyst.py,sha256=Fpue6WcX9x18kEH3kfEi8kkFoG9HhQ1AZiWw6Y6FXOo,18502
|
20
21
|
ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
|
21
22
|
ai_data_science_team/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
23
|
ai_data_science_team/parsers/parsers.py,sha256=hIsMZXRHz9hqs8R1ebymKA7D6NxOf5UVMpDAr_gGhE8,2027
|
23
24
|
ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP3cEFuf22-R5RM,330
|
24
|
-
ai_data_science_team/templates/agent_templates.py,sha256=
|
25
|
+
ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J2GG91REJOKUUEY71Dszs,30767
|
25
26
|
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
27
|
ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
|
27
|
-
ai_data_science_team/tools/dataframe.py,sha256=
|
28
|
-
ai_data_science_team/tools/eda.py,sha256=
|
28
|
+
ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
|
29
|
+
ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
|
29
30
|
ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
|
30
31
|
ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
|
31
32
|
ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
|
@@ -33,10 +34,11 @@ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
33
34
|
ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
|
34
35
|
ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
|
35
36
|
ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
|
37
|
+
ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
|
36
38
|
ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
|
37
39
|
ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
|
38
|
-
ai_data_science_team-0.0.0.
|
39
|
-
ai_data_science_team-0.0.0.
|
40
|
-
ai_data_science_team-0.0.0.
|
41
|
-
ai_data_science_team-0.0.0.
|
42
|
-
ai_data_science_team-0.0.0.
|
40
|
+
ai_data_science_team-0.0.0.9014.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
41
|
+
ai_data_science_team-0.0.0.9014.dist-info/METADATA,sha256=a35LbXyxf_XiP82m_P5HLFwPrmuzXkNWbyfzGge7dHA,13021
|
42
|
+
ai_data_science_team-0.0.0.9014.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
43
|
+
ai_data_science_team-0.0.0.9014.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
44
|
+
ai_data_science_team-0.0.0.9014.dist-info/RECORD,,
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/LICENSE
RENAMED
File without changes
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|