ai-data-science-team 0.0.0.9012__py3-none-any.whl → 0.0.0.9014__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- ai_data_science_team/__init__.py +22 -0
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/data_cleaning_agent.py +17 -3
- ai_data_science_team/agents/data_loader_tools_agent.py +24 -1
- ai_data_science_team/agents/data_visualization_agent.py +17 -3
- ai_data_science_team/agents/data_wrangling_agent.py +30 -10
- ai_data_science_team/agents/feature_engineering_agent.py +17 -4
- ai_data_science_team/agents/sql_database_agent.py +15 -2
- ai_data_science_team/ds_agents/eda_tools_agent.py +28 -6
- ai_data_science_team/ml_agents/h2o_ml_agent.py +15 -3
- ai_data_science_team/ml_agents/mlflow_tools_agent.py +23 -1
- ai_data_science_team/multiagents/__init__.py +2 -1
- ai_data_science_team/multiagents/pandas_data_analyst.py +305 -0
- ai_data_science_team/multiagents/sql_data_analyst.py +119 -30
- ai_data_science_team/templates/agent_templates.py +41 -5
- ai_data_science_team/tools/dataframe.py +6 -1
- ai_data_science_team/tools/eda.py +75 -16
- ai_data_science_team/utils/messages.py +27 -0
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/METADATA +7 -3
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/RECORD +23 -21
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/WHEEL +0 -0
- {ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
|
|
1
1
|
|
2
2
|
from langchain_core.messages import BaseMessage
|
3
|
-
|
3
|
+
|
4
|
+
from langchain.prompts import PromptTemplate
|
5
|
+
from langchain_core.output_parsers import JsonOutputParser
|
4
6
|
|
5
7
|
from langgraph.graph import START, END, StateGraph
|
6
8
|
from langgraph.graph.state import CompiledStateGraph
|
7
|
-
from langgraph.types import
|
9
|
+
from langgraph.types import Checkpointer
|
8
10
|
|
9
|
-
from typing import TypedDict, Annotated, Sequence
|
11
|
+
from typing import TypedDict, Annotated, Sequence
|
10
12
|
import operator
|
11
13
|
|
12
14
|
from typing_extensions import TypedDict
|
@@ -20,6 +22,7 @@ from ai_data_science_team.agents import SQLDatabaseAgent, DataVisualizationAgent
|
|
20
22
|
from ai_data_science_team.utils.plotly import plotly_from_dict
|
21
23
|
from ai_data_science_team.utils.regex import remove_consecutive_duplicates, get_generic_summary
|
22
24
|
|
25
|
+
AGENT_NAME = "sql_data_analyst"
|
23
26
|
|
24
27
|
class SQLDataAnalyst(BaseAgent):
|
25
28
|
"""
|
@@ -33,6 +36,8 @@ class SQLDataAnalyst(BaseAgent):
|
|
33
36
|
The SQL Database Agent.
|
34
37
|
data_visualization_agent: DataVisualizationAgent
|
35
38
|
The Data Visualization Agent.
|
39
|
+
checkpointer: Checkpointer (optional)
|
40
|
+
The checkpointer to save the state of the multi-agent system.
|
36
41
|
|
37
42
|
Methods:
|
38
43
|
--------
|
@@ -326,17 +331,17 @@ def make_sql_data_analyst(
|
|
326
331
|
"""
|
327
332
|
Creates a multi-agent system that takes in a SQL query and returns a plot or table.
|
328
333
|
|
329
|
-
- Agent 1: SQL Database Agent made with `
|
330
|
-
- Agent 2: Data Visualization Agent made with `
|
334
|
+
- Agent 1: SQL Database Agent made with `SQLDatabaseAgent()`
|
335
|
+
- Agent 2: Data Visualization Agent made with `DataVisualizationAgent()`
|
331
336
|
|
332
337
|
Parameters:
|
333
338
|
----------
|
334
339
|
model:
|
335
340
|
The language model to be used for the agents.
|
336
341
|
sql_database_agent: CompiledStateGraph
|
337
|
-
The SQL Database Agent made with `
|
342
|
+
The SQL Database Agent made with `SQLDatabaseAgent()`.
|
338
343
|
data_visualization_agent: CompiledStateGraph
|
339
|
-
The Data Visualization Agent made with `
|
344
|
+
The Data Visualization Agent made with `DataVisualizationAgent()`.
|
340
345
|
checkpointer: Checkpointer (optional)
|
341
346
|
The checkpointer to save the state of the multi-agent system.
|
342
347
|
Default: None
|
@@ -348,10 +353,39 @@ def make_sql_data_analyst(
|
|
348
353
|
"""
|
349
354
|
|
350
355
|
llm = model
|
356
|
+
|
357
|
+
|
358
|
+
routing_preprocessor_prompt = PromptTemplate(
|
359
|
+
template="""
|
360
|
+
You are an expert in routing decisions for a SQL Database Agent, a Charting Visualization Agent, and a Pandas Table Agent. Your job is to:
|
361
|
+
|
362
|
+
1. Determine what the correct format for a Users Question should be for use with a SQL Database Agent based on the incoming user question. Anything related to database and data manipulation should be passed along.
|
363
|
+
2. Determine whether or not a chart should be generated or a table should be returned based on the users question.
|
364
|
+
3. If a chart is requested, determine the correct format of a Users Question should be used with a Data Visualization Agent. Anything related to plotting and visualization should be passed along.
|
365
|
+
|
366
|
+
Use the following criteria on how to route the the initial user question:
|
367
|
+
|
368
|
+
From the incoming user question, remove any details about the format of the final response as either a Chart or Table and return only the important part of the incoming user question that is relevant for the SQL generator agent. This will be the 'user_instructions_sql_database'. If 'None' is found, return the original user question.
|
369
|
+
|
370
|
+
Next, determine if the user would like a data visualization ('chart') or a 'table' returned with the results of the Data Wrangling Agent. If unknown, not specified or 'None' is found, then select 'table'.
|
371
|
+
|
372
|
+
If a 'chart' is requested, return the 'user_instructions_data_visualization'. If 'None' is found, return None.
|
373
|
+
|
374
|
+
Return JSON with 'user_instructions_sql_database', 'user_instructions_data_visualization' and 'routing_preprocessor_decision'.
|
375
|
+
|
376
|
+
INITIAL_USER_QUESTION: {user_instructions}
|
377
|
+
""",
|
378
|
+
input_variables=["user_instructions"]
|
379
|
+
)
|
380
|
+
|
381
|
+
routing_preprocessor = routing_preprocessor_prompt | llm | JsonOutputParser()
|
351
382
|
|
352
383
|
class PrimaryState(TypedDict):
|
353
384
|
messages: Annotated[Sequence[BaseMessage], operator.add]
|
354
385
|
user_instructions: str
|
386
|
+
user_instructions_sql_database: str
|
387
|
+
user_instructions_data_visualization: str
|
388
|
+
routing_preprocessor_decision: str
|
355
389
|
sql_query_code: str
|
356
390
|
sql_database_function: str
|
357
391
|
data_sql: dict
|
@@ -359,39 +393,94 @@ def make_sql_data_analyst(
|
|
359
393
|
plot_required: bool
|
360
394
|
data_visualization_function: str
|
361
395
|
plotly_graph: dict
|
396
|
+
plotly_error: str
|
362
397
|
max_retries: int
|
363
398
|
retry_count: int
|
364
399
|
|
365
|
-
def
|
400
|
+
def preprocess_routing(state: PrimaryState):
|
401
|
+
print("---SQL DATA ANALYST---")
|
402
|
+
print("*************************")
|
403
|
+
print("---PREPROCESS ROUTER---")
|
404
|
+
question = state.get("user_instructions")
|
366
405
|
|
367
|
-
|
406
|
+
# Chart Routing and SQL Prep
|
407
|
+
response = routing_preprocessor.invoke({"user_instructions": question})
|
368
408
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
409
|
+
return {
|
410
|
+
"user_instructions_sql_database": response.get('user_instructions_sql_database'),
|
411
|
+
"user_instructions_data_visualization": response.get('user_instructions_data_visualization'),
|
412
|
+
"routing_preprocessor_decision": response.get('routing_preprocessor_decision'),
|
413
|
+
}
|
414
|
+
|
415
|
+
def router_chart_or_table(state: PrimaryState):
|
416
|
+
print("---ROUTER: CHART OR TABLE---")
|
417
|
+
return "chart" if state.get('routing_preprocessor_decision') == "chart" else "table"
|
418
|
+
|
419
|
+
|
420
|
+
def invoke_sql_database_agent(state: PrimaryState):
|
375
421
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
goto=goto
|
382
|
-
)
|
422
|
+
response = sql_database_agent.invoke({
|
423
|
+
"user_instructions": state.get("user_instructions_sql_database"),
|
424
|
+
"max_retries": state.get("max_retries"),
|
425
|
+
"retry_count": state.get("retry_count"),
|
426
|
+
})
|
383
427
|
|
384
|
-
|
428
|
+
return {
|
429
|
+
"messages": response.get("messages"),
|
430
|
+
"data_sql": response.get("data_sql"),
|
431
|
+
"sql_query_code": response.get("sql_query_code"),
|
432
|
+
"sql_database_function": response.get("sql_database_function"),
|
433
|
+
|
434
|
+
}
|
435
|
+
|
436
|
+
def invoke_data_visualization_agent(state: PrimaryState):
|
437
|
+
|
438
|
+
response = data_visualization_agent.invoke({
|
439
|
+
"user_instructions": state.get("user_instructions_data_visualization"),
|
440
|
+
"data_raw": state.get("data_sql"),
|
441
|
+
"max_retries": state.get("max_retries"),
|
442
|
+
"retry_count": state.get("retry_count"),
|
443
|
+
})
|
444
|
+
|
445
|
+
return {
|
446
|
+
"messages": response.get("messages"),
|
447
|
+
"data_visualization_function": response.get("data_visualization_function"),
|
448
|
+
"plotly_graph": response.get("plotly_graph"),
|
449
|
+
"plotly_error": response.get("data_visualization_error"),
|
450
|
+
}
|
385
451
|
|
386
|
-
|
387
|
-
|
388
|
-
|
452
|
+
def route_printer(state: PrimaryState):
|
453
|
+
print("---ROUTE PRINTER---")
|
454
|
+
print(f" Route: {state.get('routing_preprocessor_decision')}")
|
455
|
+
print("---END---")
|
456
|
+
return {}
|
457
|
+
|
458
|
+
workflow = StateGraph(PrimaryState)
|
459
|
+
|
460
|
+
workflow.add_node("routing_preprocessor", preprocess_routing)
|
461
|
+
workflow.add_node("sql_database_agent", invoke_sql_database_agent)
|
462
|
+
workflow.add_node("data_visualization_agent", invoke_data_visualization_agent)
|
463
|
+
workflow.add_node("route_printer", route_printer)
|
389
464
|
|
390
|
-
workflow.add_edge(START, "
|
391
|
-
workflow.add_edge("
|
392
|
-
|
465
|
+
workflow.add_edge(START, "routing_preprocessor")
|
466
|
+
workflow.add_edge("routing_preprocessor", "sql_database_agent")
|
467
|
+
|
468
|
+
workflow.add_conditional_edges(
|
469
|
+
"sql_database_agent",
|
470
|
+
router_chart_or_table,
|
471
|
+
{
|
472
|
+
"chart": "data_visualization_agent",
|
473
|
+
"table": "route_printer"
|
474
|
+
}
|
475
|
+
)
|
476
|
+
|
477
|
+
workflow.add_edge("data_visualization_agent", "route_printer")
|
478
|
+
workflow.add_edge("route_printer", END)
|
393
479
|
|
394
|
-
app = workflow.compile(
|
480
|
+
app = workflow.compile(
|
481
|
+
checkpointer=checkpointer,
|
482
|
+
name=AGENT_NAME
|
483
|
+
)
|
395
484
|
|
396
485
|
return app
|
397
486
|
|
@@ -40,6 +40,21 @@ class BaseAgent(CompiledStateGraph):
|
|
40
40
|
self._params = params
|
41
41
|
self._compiled_graph = self._make_compiled_graph()
|
42
42
|
self.response = None
|
43
|
+
self.name = self._compiled_graph.name
|
44
|
+
self.checkpointer = self._compiled_graph.checkpointer
|
45
|
+
self.store = self._compiled_graph.store
|
46
|
+
self.output_channels = self._compiled_graph.output_channels
|
47
|
+
self.nodes = self._compiled_graph.nodes
|
48
|
+
self.stream_mode = self._compiled_graph.stream_mode
|
49
|
+
self.builder = self._compiled_graph.builder
|
50
|
+
self.channels = self._compiled_graph.channels
|
51
|
+
self.input_channels = self._compiled_graph.input_channels
|
52
|
+
self.input_schema = self._compiled_graph.input_schema
|
53
|
+
self.output_schema = self._compiled_graph.output_schema
|
54
|
+
self.debug = self._compiled_graph.debug
|
55
|
+
self.interrupt_after_nodes = self._compiled_graph.interrupt_after_nodes
|
56
|
+
self.interrupt_before_nodes = self._compiled_graph.interrupt_before_nodes
|
57
|
+
self.config = self._compiled_graph.config
|
43
58
|
|
44
59
|
def _make_compiled_graph(self):
|
45
60
|
"""
|
@@ -197,6 +212,24 @@ class BaseAgent(CompiledStateGraph):
|
|
197
212
|
"""
|
198
213
|
return self.get_output_jsonschema()['properties']
|
199
214
|
|
215
|
+
def get_state(self, config, *, subgraphs = False):
|
216
|
+
"""
|
217
|
+
Returns the state of the agent.
|
218
|
+
"""
|
219
|
+
return self._compiled_graph.get_state(config, subgraphs=subgraphs)
|
220
|
+
|
221
|
+
def get_state_history(self, config, *, filter = None, before = None, limit = None):
|
222
|
+
"""
|
223
|
+
Returns the state history of the agent.
|
224
|
+
"""
|
225
|
+
return self._compiled_graph.get_state_history(config, filter=filter, before=before, limit=limit)
|
226
|
+
|
227
|
+
def update_state(self, config, values, as_node = None):
|
228
|
+
"""
|
229
|
+
Updates the state of the agent.
|
230
|
+
"""
|
231
|
+
return self._compiled_graph.update_state(config, values, as_node)
|
232
|
+
|
200
233
|
def get_response(self):
|
201
234
|
"""
|
202
235
|
Returns the response generated by the agent.
|
@@ -237,6 +270,7 @@ def create_coding_agent_graph(
|
|
237
270
|
checkpointer: Optional[Callable] = None,
|
238
271
|
bypass_recommended_steps: bool = False,
|
239
272
|
bypass_explain_code: bool = False,
|
273
|
+
agent_name: str = "coding_agent"
|
240
274
|
):
|
241
275
|
"""
|
242
276
|
Creates a generic agent graph using the provided node functions and node names.
|
@@ -281,6 +315,8 @@ def create_coding_agent_graph(
|
|
281
315
|
Whether to skip the recommended steps node.
|
282
316
|
bypass_explain_code : bool, optional
|
283
317
|
Whether to skip the final explain code node.
|
318
|
+
name : str, optional
|
319
|
+
The name of the agent graph.
|
284
320
|
|
285
321
|
Returns
|
286
322
|
-------
|
@@ -366,10 +402,10 @@ def create_coding_agent_graph(
|
|
366
402
|
workflow.add_edge(explain_code_node_name, END)
|
367
403
|
|
368
404
|
# Finally, compile
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
405
|
+
app = workflow.compile(
|
406
|
+
checkpointer=checkpointer,
|
407
|
+
name=agent_name,
|
408
|
+
)
|
373
409
|
|
374
410
|
return app
|
375
411
|
|
@@ -574,7 +610,7 @@ def node_func_execute_agent_from_sql_connection(
|
|
574
610
|
|
575
611
|
# Retrieve SQLAlchemy connection and code snippet from the state
|
576
612
|
is_engine = isinstance(connection, sql.engine.base.Engine)
|
577
|
-
|
613
|
+
connection = connection.connect() if is_engine else connection
|
578
614
|
agent_code = state.get(code_snippet_key)
|
579
615
|
|
580
616
|
# Ensure the connection object is provided
|
@@ -74,7 +74,12 @@ def get_dataframe_summary(
|
|
74
74
|
return summaries
|
75
75
|
|
76
76
|
|
77
|
-
def _summarize_dataframe(
|
77
|
+
def _summarize_dataframe(
|
78
|
+
df: pd.DataFrame,
|
79
|
+
dataset_name: str,
|
80
|
+
n_sample=30,
|
81
|
+
skip_stats=False
|
82
|
+
) -> str:
|
78
83
|
"""Generate a summary string for a single DataFrame."""
|
79
84
|
# 1. Convert dictionary-type cells to strings
|
80
85
|
# This prevents unhashable dict errors during df.nunique().
|
@@ -2,11 +2,44 @@
|
|
2
2
|
from typing import Annotated, Dict, Tuple, Union
|
3
3
|
|
4
4
|
import os
|
5
|
+
import tempfile
|
5
6
|
|
6
7
|
from langchain.tools import tool
|
7
8
|
|
8
9
|
from langgraph.prebuilt import InjectedState
|
9
10
|
|
11
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
12
|
+
|
13
|
+
|
14
|
+
@tool(response_format='content')
|
15
|
+
def explain_data(
|
16
|
+
data_raw: Annotated[dict, InjectedState("data_raw")],
|
17
|
+
n_sample: int = 30,
|
18
|
+
skip_stats: bool = False,
|
19
|
+
):
|
20
|
+
"""
|
21
|
+
Tool: explain_data
|
22
|
+
Description:
|
23
|
+
Provides an extensive, narrative summary of a DataFrame including its shape, column types,
|
24
|
+
missing value percentages, unique counts, sample rows, and (if not skipped) descriptive stats/info.
|
25
|
+
|
26
|
+
Parameters:
|
27
|
+
data_raw (dict): Raw data.
|
28
|
+
n_sample (int, default=30): Number of rows to display.
|
29
|
+
skip_stats (bool, default=False): If True, omit descriptive stats/info.
|
30
|
+
|
31
|
+
LLM Guidance:
|
32
|
+
Use when a detailed, human-readable explanation is needed—i.e., a full overview is preferred over a concise numerical summary.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
str: Detailed DataFrame summary.
|
36
|
+
"""
|
37
|
+
print(" * Tool: explain_data")
|
38
|
+
import pandas as pd
|
39
|
+
|
40
|
+
result = get_dataframe_summary(pd.DataFrame(data_raw), n_sample=n_sample, skip_stats=skip_stats)
|
41
|
+
|
42
|
+
return result
|
10
43
|
|
11
44
|
@tool(response_format='content_and_artifact')
|
12
45
|
def describe_dataset(
|
@@ -15,21 +48,33 @@ def describe_dataset(
|
|
15
48
|
"""
|
16
49
|
Tool: describe_dataset
|
17
50
|
Description:
|
18
|
-
|
19
|
-
|
20
|
-
|
51
|
+
Compute and return summary statistics for the dataset using pandas' describe() method.
|
52
|
+
The tool provides both a textual summary and a structured artifact (a dictionary) for further processing.
|
53
|
+
|
54
|
+
Parameters:
|
55
|
+
-----------
|
56
|
+
data_raw : dict
|
57
|
+
The raw data in dictionary format.
|
58
|
+
|
59
|
+
LLM Selection Guidance:
|
60
|
+
------------------------
|
61
|
+
Use this tool when:
|
62
|
+
- The request emphasizes numerical descriptive statistics (e.g., count, mean, std, min, quartiles, max).
|
63
|
+
- The user needs a concise statistical snapshot rather than a detailed narrative.
|
64
|
+
- Both a brief text explanation and a structured data artifact (for downstream tasks) are required.
|
65
|
+
|
21
66
|
Returns:
|
22
67
|
-------
|
23
68
|
Tuple[str, Dict]:
|
24
|
-
content: A textual summary
|
25
|
-
artifact: A dictionary (from DataFrame.describe())
|
69
|
+
- content: A textual summary indicating that summary statistics have been computed.
|
70
|
+
- artifact: A dictionary (derived from DataFrame.describe()) containing detailed statistical measures.
|
26
71
|
"""
|
27
72
|
print(" * Tool: describe_dataset")
|
28
73
|
import pandas as pd
|
29
74
|
df = pd.DataFrame(data_raw)
|
30
75
|
description_df = df.describe(include='all')
|
31
76
|
content = "Summary statistics computed using pandas describe()."
|
32
|
-
artifact = description_df.to_dict()
|
77
|
+
artifact = {'describe_df': description_df.to_dict()}
|
33
78
|
return content, artifact
|
34
79
|
|
35
80
|
|
@@ -226,8 +271,8 @@ def generate_sweetviz_report(
|
|
226
271
|
data_raw: Annotated[dict, InjectedState("data_raw")],
|
227
272
|
target: str = None,
|
228
273
|
report_name: str = "sweetviz_report.html",
|
229
|
-
report_directory: str =
|
230
|
-
open_browser: bool =
|
274
|
+
report_directory: str = None, # <-- Default to None
|
275
|
+
open_browser: bool = False,
|
231
276
|
) -> Tuple[str, Dict]:
|
232
277
|
"""
|
233
278
|
Tool: generate_sweetviz_report
|
@@ -243,9 +288,10 @@ def generate_sweetviz_report(
|
|
243
288
|
report_name : str, optional
|
244
289
|
The file name to save the Sweetviz HTML report. Default is "sweetviz_report.html".
|
245
290
|
report_directory : str, optional
|
246
|
-
The directory where the report should be saved.
|
291
|
+
The directory where the report should be saved.
|
292
|
+
If None, a temporary directory is created and used.
|
247
293
|
open_browser : bool, optional
|
248
|
-
Whether to open the report in a web browser. Default is
|
294
|
+
Whether to open the report in a web browser. Default is False.
|
249
295
|
|
250
296
|
Returns:
|
251
297
|
--------
|
@@ -254,28 +300,37 @@ def generate_sweetviz_report(
|
|
254
300
|
artifact: A dictionary with the report file path and optionally the report's HTML content.
|
255
301
|
"""
|
256
302
|
print(" * Tool: generate_sweetviz_report")
|
303
|
+
|
304
|
+
# Import sweetviz
|
257
305
|
try:
|
258
306
|
import sweetviz as sv
|
259
307
|
except ImportError:
|
260
308
|
raise ImportError("Please install the 'sweetviz' package to use this tool. Run: pip install sweetviz")
|
309
|
+
|
261
310
|
import pandas as pd
|
311
|
+
|
262
312
|
# Convert injected raw data to a DataFrame.
|
263
313
|
df = pd.DataFrame(data_raw)
|
264
314
|
|
315
|
+
# If no directory is specified, use a temporary directory.
|
316
|
+
if not report_directory:
|
317
|
+
report_directory = tempfile.mkdtemp()
|
318
|
+
print(f" * Using temporary directory: {report_directory}")
|
319
|
+
else:
|
320
|
+
# Ensure user-specified directory exists.
|
321
|
+
if not os.path.exists(report_directory):
|
322
|
+
os.makedirs(report_directory)
|
323
|
+
|
265
324
|
# Create the Sweetviz report.
|
266
325
|
report = sv.analyze(df, target_feat=target)
|
267
326
|
|
268
|
-
# Ensure the directory exists; default is os.getcwd()/reports
|
269
|
-
if not os.path.exists(report_directory):
|
270
|
-
os.makedirs(report_directory)
|
271
|
-
|
272
327
|
# Determine the full path for the report.
|
273
328
|
full_report_path = os.path.join(report_directory, report_name)
|
274
329
|
|
275
330
|
# Save the report to the specified HTML file.
|
276
331
|
report.show_html(
|
277
332
|
filepath=full_report_path,
|
278
|
-
open_browser=
|
333
|
+
open_browser=open_browser,
|
279
334
|
)
|
280
335
|
|
281
336
|
# Optionally, read the HTML content (if desired to pass along in the artifact).
|
@@ -285,9 +340,13 @@ def generate_sweetviz_report(
|
|
285
340
|
except Exception:
|
286
341
|
html_content = None
|
287
342
|
|
288
|
-
content =
|
343
|
+
content = (
|
344
|
+
f"Sweetviz EDA report generated and saved as '{os.path.abspath(full_report_path)}'. "
|
345
|
+
f"{'This was saved in a temporary directory.' if 'tmp' in report_directory else ''}"
|
346
|
+
)
|
289
347
|
artifact = {
|
290
348
|
"report_file": os.path.abspath(full_report_path),
|
291
349
|
"report_html": html_content,
|
292
350
|
}
|
293
351
|
return content, artifact
|
352
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
def get_tool_call_names(messages):
|
5
|
+
"""
|
6
|
+
Method to extract the tool call names from a list of LangChain messages.
|
7
|
+
|
8
|
+
Parameters:
|
9
|
+
----------
|
10
|
+
messages : list
|
11
|
+
A list of LangChain messages.
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
-------
|
15
|
+
tool_calls : list
|
16
|
+
A list of tool call names.
|
17
|
+
|
18
|
+
"""
|
19
|
+
tool_calls = []
|
20
|
+
for message in messages:
|
21
|
+
try:
|
22
|
+
if "tool_call_id" in list(dict(message).keys()):
|
23
|
+
tool_calls.append(message.name)
|
24
|
+
except:
|
25
|
+
pass
|
26
|
+
return tool_calls
|
27
|
+
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ai-data-science-team
|
3
|
-
Version: 0.0.0.
|
3
|
+
Version: 0.0.0.9014
|
4
4
|
Summary: Build and run an AI-powered data science team.
|
5
5
|
Home-page: https://github.com/business-science/ai-data-science-team
|
6
6
|
Author: Matt Dancho
|
@@ -18,7 +18,7 @@ Requires-Dist: langchain
|
|
18
18
|
Requires-Dist: langchain_community
|
19
19
|
Requires-Dist: langchain_openai
|
20
20
|
Requires-Dist: langchain_experimental
|
21
|
-
Requires-Dist: langgraph>=0.2.
|
21
|
+
Requires-Dist: langgraph>=0.2.74
|
22
22
|
Requires-Dist: openai
|
23
23
|
Requires-Dist: pandas
|
24
24
|
Requires-Dist: sqlalchemy
|
@@ -152,7 +152,11 @@ This is a top secret project I'm working on. It's a multi-agent data science app
|
|
152
152
|
|
153
153
|
#### 🔥 Agentic Applications
|
154
154
|
|
155
|
-
1. **
|
155
|
+
1. **NEW Exploratory Data Copilot**: An AI-powered data science app that performs automated exploratory data analysis (EDA) with EDA Reporting, Missing Data Analysis, Correlation Analysis, and more. [See Application](/apps/exploratory-copilot-app/)
|
156
|
+
|
157
|
+

|
158
|
+
|
159
|
+
2. **SQL Database Agent App:** Connects any SQL Database, generates SQL queries from natural language, and returns data as a downloadable table. [See Application](/apps/sql-database-agent-app/)
|
156
160
|
|
157
161
|
### Agents Available Now
|
158
162
|
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/RECORD
RENAMED
@@ -1,31 +1,32 @@
|
|
1
|
-
ai_data_science_team/__init__.py,sha256=
|
2
|
-
ai_data_science_team/_version.py,sha256=
|
1
|
+
ai_data_science_team/__init__.py,sha256=LmogkhGnxvvVe1ukJM6I6lXy4B7SuCr5eXZpwjyDMKQ,444
|
2
|
+
ai_data_science_team/_version.py,sha256=D4dUl-fYnimOU_VSzvrmJm30_IoaF_9m9dTLp8HE6rQ,26
|
3
3
|
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
4
|
ai_data_science_team/agents/__init__.py,sha256=Gnotza9SKr_0IxuaX8k1nsZK48wXkkeZcGcrR1EqNks,668
|
5
|
-
ai_data_science_team/agents/data_cleaning_agent.py,sha256=
|
6
|
-
ai_data_science_team/agents/data_loader_tools_agent.py,sha256=
|
7
|
-
ai_data_science_team/agents/data_visualization_agent.py,sha256=
|
8
|
-
ai_data_science_team/agents/data_wrangling_agent.py,sha256=
|
9
|
-
ai_data_science_team/agents/feature_engineering_agent.py,sha256=
|
10
|
-
ai_data_science_team/agents/sql_database_agent.py,sha256=
|
5
|
+
ai_data_science_team/agents/data_cleaning_agent.py,sha256=aZLhnN2EBlY_hmAg_r73dwi1w5utSFNEgEs8aWl8Cho,27991
|
6
|
+
ai_data_science_team/agents/data_loader_tools_agent.py,sha256=TFKzYqV6cvU-sMbfL-hg8-NgF_Hz3nysGFldvb5K3fM,9327
|
7
|
+
ai_data_science_team/agents/data_visualization_agent.py,sha256=eUSTzTOm5aLJ6Cqnk-hRuXeVbYyy0RIzN8_0LLy0P9o,29387
|
8
|
+
ai_data_science_team/agents/data_wrangling_agent.py,sha256=6tiDO1i-5s2Ju6_MsLoJMflUuRSf_1oTSsSKcLlgzEc,33376
|
9
|
+
ai_data_science_team/agents/feature_engineering_agent.py,sha256=xZGDFnmM6wx4bi3e4c_dNOZzGcxBmX8k0iveL7dlA-k,31608
|
10
|
+
ai_data_science_team/agents/sql_database_agent.py,sha256=fln8unefn5Jd2exeyGs-9PljyLXAK60HI81tJACYeCY,31726
|
11
11
|
ai_data_science_team/ds_agents/__init__.py,sha256=dnuagUTebTDHhGXbCt-hZIilzXMSUwyHaEI7sOxhvoE,95
|
12
|
-
ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=
|
12
|
+
ai_data_science_team/ds_agents/eda_tools_agent.py,sha256=x0kTwDo0BNbYzgA0YamMWdqRjx0upZgeXp9nF6C6_8E,8364
|
13
13
|
ai_data_science_team/ds_agents/modeling_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
ai_data_science_team/ml_agents/__init__.py,sha256=qq3UlDCRV_z4FHQ1jj3YR6zPbA6kuCvYCisj_bHYfO4,190
|
15
|
-
ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=
|
15
|
+
ai_data_science_team/ml_agents/h2o_ml_agent.py,sha256=S0uayngaVwVUyA4zy05QYlq5NXrNHb723NeF2rns0Y0,33934
|
16
16
|
ai_data_science_team/ml_agents/h2o_ml_tools_agent.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=
|
18
|
-
ai_data_science_team/multiagents/__init__.py,sha256=
|
19
|
-
ai_data_science_team/multiagents/
|
17
|
+
ai_data_science_team/ml_agents/mlflow_tools_agent.py,sha256=QImaZnS8hPdrU7GI6pZ0dUDO-LXx40MSA3XyMDppIh0,12003
|
18
|
+
ai_data_science_team/multiagents/__init__.py,sha256=5tpmZBQ_UT5SKDCS_NivZhN19HEStKIcstiqSXPXDl0,208
|
19
|
+
ai_data_science_team/multiagents/pandas_data_analyst.py,sha256=O662v-75tLqHHrVNjncsPeR2FB4MWSWruJRIF-YO-fg,13581
|
20
|
+
ai_data_science_team/multiagents/sql_data_analyst.py,sha256=Fpue6WcX9x18kEH3kfEi8kkFoG9HhQ1AZiWw6Y6FXOo,18502
|
20
21
|
ai_data_science_team/multiagents/supervised_data_analyst.py,sha256=uduCYpicga-UCf9nPQktQggW96-HDlqvioYmEdWejtI,158
|
21
22
|
ai_data_science_team/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
23
|
ai_data_science_team/parsers/parsers.py,sha256=hIsMZXRHz9hqs8R1ebymKA7D6NxOf5UVMpDAr_gGhE8,2027
|
23
24
|
ai_data_science_team/templates/__init__.py,sha256=_IcyFUu_mM8dFtttz95h0csJZ-XWDP3cEFuf22-R5RM,330
|
24
|
-
ai_data_science_team/templates/agent_templates.py,sha256=
|
25
|
+
ai_data_science_team/templates/agent_templates.py,sha256=QHRNZVmIfeClEef2Fr2Wb9J2GG91REJOKUUEY71Dszs,30767
|
25
26
|
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
27
|
ai_data_science_team/tools/data_loader.py,sha256=ITs_6UAJ0m9h68R9_LruiaJSElv9l7SxTQYryI7YZPY,14702
|
27
|
-
ai_data_science_team/tools/dataframe.py,sha256=
|
28
|
-
ai_data_science_team/tools/eda.py,sha256=
|
28
|
+
ai_data_science_team/tools/dataframe.py,sha256=cckplDWu9SsA_PRo89pYsyVCmBE0PoDIwMv6tuLunT4,4572
|
29
|
+
ai_data_science_team/tools/eda.py,sha256=KoryXso_5zOPDq7jwcUAMEXV-AIzpWb62zzbUHVtgtM,12687
|
29
30
|
ai_data_science_team/tools/h2o.py,sha256=gSK0f2FULfAfipFTTjDMUS6DjHwFFvvl4jxshr6QpS0,38997
|
30
31
|
ai_data_science_team/tools/mlflow.py,sha256=8NTkSOvbTk01GOmwFaMkLBRse80w9Kk7Ypi6Fv4kTII,29475
|
31
32
|
ai_data_science_team/tools/sql.py,sha256=vvz_CiOg6GqXo2_mlF4kq5IS6if79dpaizAgLR9sRyg,4784
|
@@ -33,10 +34,11 @@ ai_data_science_team/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
33
34
|
ai_data_science_team/utils/html.py,sha256=1MBcjNyATi3FPOyVdqf6-_QYCJmDVQWmVPIInUr50dk,628
|
34
35
|
ai_data_science_team/utils/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
|
35
36
|
ai_data_science_team/utils/matplotlib.py,sha256=d6DZfCXvZ5Kocxtsp92etIymKW2cRBcUG9GmCOMtgJo,1145
|
37
|
+
ai_data_science_team/utils/messages.py,sha256=feWIPGsv8ly9jpNnS97SoPsn1feaY1Km0VCbHTbRpI8,549
|
36
38
|
ai_data_science_team/utils/plotly.py,sha256=nST-NG0oizKVHhH6HsjHUpTUumq9bCccBdxjuaJWnVQ,504
|
37
39
|
ai_data_science_team/utils/regex.py,sha256=lwarbLqTA2VfNQSyqKCl-PBlH_0WH3zXZvYGBYGUiu4,5144
|
38
|
-
ai_data_science_team-0.0.0.
|
39
|
-
ai_data_science_team-0.0.0.
|
40
|
-
ai_data_science_team-0.0.0.
|
41
|
-
ai_data_science_team-0.0.0.
|
42
|
-
ai_data_science_team-0.0.0.
|
40
|
+
ai_data_science_team-0.0.0.9014.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
41
|
+
ai_data_science_team-0.0.0.9014.dist-info/METADATA,sha256=a35LbXyxf_XiP82m_P5HLFwPrmuzXkNWbyfzGge7dHA,13021
|
42
|
+
ai_data_science_team-0.0.0.9014.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
43
|
+
ai_data_science_team-0.0.0.9014.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
44
|
+
ai_data_science_team-0.0.0.9014.dist-info/RECORD,,
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/LICENSE
RENAMED
File without changes
|
{ai_data_science_team-0.0.0.9012.dist-info → ai_data_science_team-0.0.0.9014.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|