ai-data-science-team 0.0.0.9009__py3-none-any.whl → 0.0.0.9010__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/data_cleaning_agent.py +6 -6
- ai_data_science_team/agents/data_loader_tools_agent.py +69 -0
- ai_data_science_team/agents/data_visualization_agent.py +6 -7
- ai_data_science_team/agents/data_wrangling_agent.py +6 -6
- ai_data_science_team/agents/feature_engineering_agent.py +6 -6
- ai_data_science_team/agents/sql_database_agent.py +6 -6
- ai_data_science_team/ml_agents/__init__.py +1 -0
- ai_data_science_team/ml_agents/h2o_ml_agent.py +205 -385
- ai_data_science_team/ml_agents/mlflow_tools_agent.py +327 -0
- ai_data_science_team/multiagents/sql_data_analyst.py +3 -4
- ai_data_science_team/parsers/__init__.py +0 -0
- ai_data_science_team/{tools → parsers}/parsers.py +0 -1
- ai_data_science_team/templates/agent_templates.py +6 -6
- ai_data_science_team/tools/data_loader.py +378 -0
- ai_data_science_team/tools/dataframe.py +139 -0
- ai_data_science_team/tools/h2o.py +643 -0
- ai_data_science_team/tools/mlflow.py +961 -0
- ai_data_science_team/tools/{metadata.py → sql.py} +1 -137
- {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/METADATA +34 -16
- ai_data_science_team-0.0.0.9010.dist-info/RECORD +35 -0
- ai_data_science_team-0.0.0.9009.dist-info/RECORD +0 -28
- /ai_data_science_team/{tools → utils}/logging.py +0 -0
- /ai_data_science_team/{tools → utils}/regex.py +0 -0
- {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/WHEEL +0 -0
- {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/top_level.txt +0 -0
ai_data_science_team/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.0.0.
|
1
|
+
__version__ = "0.0.0.9010"
|
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
|
|
27
27
|
create_coding_agent_graph,
|
28
28
|
BaseAgent,
|
29
29
|
)
|
30
|
-
from ai_data_science_team.
|
31
|
-
from ai_data_science_team.
|
30
|
+
from ai_data_science_team.parsers.parsers import PythonOutputParser
|
31
|
+
from ai_data_science_team.utils.regex import (
|
32
32
|
relocate_imports_inside_function,
|
33
33
|
add_comments_to_top,
|
34
34
|
format_agent_name,
|
35
35
|
format_recommended_steps,
|
36
36
|
get_generic_summary,
|
37
37
|
)
|
38
|
-
from ai_data_science_team.tools.
|
39
|
-
from ai_data_science_team.
|
38
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
39
|
+
from ai_data_science_team.utils.logging import log_ai_function
|
40
40
|
|
41
41
|
# Setup
|
42
42
|
AGENT_NAME = "data_cleaning_agent"
|
@@ -183,7 +183,7 @@ class DataCleaningAgent(BaseAgent):
|
|
183
183
|
self.response=None
|
184
184
|
return make_data_cleaning_agent(**self._params)
|
185
185
|
|
186
|
-
def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
|
186
|
+
async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
|
187
187
|
"""
|
188
188
|
Asynchronously invokes the agent. The response is stored in the response attribute.
|
189
189
|
|
@@ -204,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
|
|
204
204
|
--------
|
205
205
|
None. The response is stored in the response attribute.
|
206
206
|
"""
|
207
|
-
response = self._compiled_graph.ainvoke({
|
207
|
+
response = await self._compiled_graph.ainvoke({
|
208
208
|
"user_instructions": user_instructions,
|
209
209
|
"data_raw": data_raw.to_dict(),
|
210
210
|
"max_retries": max_retries,
|
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
from typing import Any, Optional, Annotated, Sequence, List, Dict
|
5
|
+
import operator
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
import os
|
9
|
+
|
10
|
+
from IPython.display import Markdown
|
11
|
+
|
12
|
+
from langchain_core.messages import BaseMessage, AIMessage
|
13
|
+
|
14
|
+
from langgraph.prebuilt import create_react_agent, ToolNode
|
15
|
+
from langgraph.prebuilt.chat_agent_executor import AgentState
|
16
|
+
from langgraph.graph import START, END, StateGraph
|
17
|
+
|
18
|
+
from ai_data_science_team.templates import BaseAgent
|
19
|
+
from ai_data_science_team.utils.regex import format_agent_name
|
20
|
+
from ai_data_science_team.tools.data_loader import (
|
21
|
+
load_directory,
|
22
|
+
load_file,
|
23
|
+
list_directory_contents,
|
24
|
+
list_directory_recursive,
|
25
|
+
get_file_info,
|
26
|
+
search_files_by_pattern,
|
27
|
+
)
|
28
|
+
|
29
|
+
AGENT_NAME = "data_loader_tools_agent"
|
30
|
+
|
31
|
+
tools = [
|
32
|
+
load_directory,
|
33
|
+
load_file,
|
34
|
+
list_directory_contents,
|
35
|
+
list_directory_recursive,
|
36
|
+
get_file_info,
|
37
|
+
search_files_by_pattern,
|
38
|
+
]
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
def make_data_loader_tools_agent(
|
43
|
+
model: Any,
|
44
|
+
directory: Optional[str] = os.getcwd(),
|
45
|
+
):
|
46
|
+
"""
|
47
|
+
Creates a Data Loader Agent that can interact with data loading tools.
|
48
|
+
|
49
|
+
Parameters:
|
50
|
+
----------
|
51
|
+
model : langchain.llms.base.LLM
|
52
|
+
The language model used to generate the tool calling agent.
|
53
|
+
directory : str, optional
|
54
|
+
The directory to search for files. Defaults to the current working directory.
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
--------
|
58
|
+
Data Loader Agent
|
59
|
+
An agent that can interact with data loading tools.
|
60
|
+
"""
|
61
|
+
|
62
|
+
class GraphState(AgentState):
|
63
|
+
internal_messages: Annotated[Sequence[BaseMessage], operator.add]
|
64
|
+
directory: str
|
65
|
+
user_instructions: str
|
66
|
+
data_artifacts: dict
|
67
|
+
|
68
|
+
pass
|
69
|
+
|
@@ -10,7 +10,6 @@ from typing import TypedDict, Annotated, Sequence, Literal
|
|
10
10
|
import operator
|
11
11
|
|
12
12
|
from langchain.prompts import PromptTemplate
|
13
|
-
from langchain_core.output_parsers import StrOutputParser
|
14
13
|
from langchain_core.messages import BaseMessage
|
15
14
|
|
16
15
|
from langgraph.types import Command
|
@@ -30,16 +29,16 @@ from ai_data_science_team.templates import(
|
|
30
29
|
create_coding_agent_graph,
|
31
30
|
BaseAgent,
|
32
31
|
)
|
33
|
-
from ai_data_science_team.
|
34
|
-
from ai_data_science_team.
|
32
|
+
from ai_data_science_team.parsers.parsers import PythonOutputParser
|
33
|
+
from ai_data_science_team.utils.regex import (
|
35
34
|
relocate_imports_inside_function,
|
36
35
|
add_comments_to_top,
|
37
36
|
format_agent_name,
|
38
37
|
format_recommended_steps,
|
39
38
|
get_generic_summary,
|
40
39
|
)
|
41
|
-
from ai_data_science_team.tools.
|
42
|
-
from ai_data_science_team.
|
40
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
41
|
+
from ai_data_science_team.utils.logging import log_ai_function
|
43
42
|
from ai_data_science_team.utils.plotly import plotly_from_dict
|
44
43
|
|
45
44
|
# Setup
|
@@ -197,7 +196,7 @@ class DataVisualizationAgent(BaseAgent):
|
|
197
196
|
# Rebuild the compiled graph
|
198
197
|
self._compiled_graph = self._make_compiled_graph()
|
199
198
|
|
200
|
-
def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
|
199
|
+
async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
|
201
200
|
"""
|
202
201
|
Asynchronously invokes the agent to generate a visualization.
|
203
202
|
The response is stored in the 'response' attribute.
|
@@ -219,7 +218,7 @@ class DataVisualizationAgent(BaseAgent):
|
|
219
218
|
-------
|
220
219
|
None
|
221
220
|
"""
|
222
|
-
response = self._compiled_graph.ainvoke({
|
221
|
+
response = await self._compiled_graph.ainvoke({
|
223
222
|
"user_instructions": user_instructions,
|
224
223
|
"data_raw": data_raw.to_dict(),
|
225
224
|
"max_retries": max_retries,
|
@@ -24,16 +24,16 @@ from ai_data_science_team.templates import(
|
|
24
24
|
create_coding_agent_graph,
|
25
25
|
BaseAgent,
|
26
26
|
)
|
27
|
-
from ai_data_science_team.
|
28
|
-
from ai_data_science_team.
|
27
|
+
from ai_data_science_team.parsers.parsers import PythonOutputParser
|
28
|
+
from ai_data_science_team.utils.regex import (
|
29
29
|
relocate_imports_inside_function,
|
30
30
|
add_comments_to_top,
|
31
31
|
format_agent_name,
|
32
32
|
format_recommended_steps,
|
33
33
|
get_generic_summary,
|
34
34
|
)
|
35
|
-
from ai_data_science_team.tools.
|
36
|
-
from ai_data_science_team.
|
35
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
36
|
+
from ai_data_science_team.utils.logging import log_ai_function
|
37
37
|
|
38
38
|
# Setup Logging Path
|
39
39
|
AGENT_NAME = "data_wrangling_agent"
|
@@ -213,7 +213,7 @@ class DataWranglingAgent(BaseAgent):
|
|
213
213
|
self._params[k] = v
|
214
214
|
self._compiled_graph = self._make_compiled_graph()
|
215
215
|
|
216
|
-
def ainvoke_agent(
|
216
|
+
async def ainvoke_agent(
|
217
217
|
self,
|
218
218
|
data_raw: Union[pd.DataFrame, dict, list],
|
219
219
|
user_instructions: str=None,
|
@@ -245,7 +245,7 @@ class DataWranglingAgent(BaseAgent):
|
|
245
245
|
None
|
246
246
|
"""
|
247
247
|
data_input = self._convert_data_input(data_raw)
|
248
|
-
response = self._compiled_graph.ainvoke({
|
248
|
+
response = await self._compiled_graph.ainvoke({
|
249
249
|
"user_instructions": user_instructions,
|
250
250
|
"data_raw": data_input,
|
251
251
|
"max_retries": max_retries,
|
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
|
|
27
27
|
create_coding_agent_graph,
|
28
28
|
BaseAgent,
|
29
29
|
)
|
30
|
-
from ai_data_science_team.
|
31
|
-
from ai_data_science_team.
|
30
|
+
from ai_data_science_team.parsers.parsers import PythonOutputParser
|
31
|
+
from ai_data_science_team.utils.regex import (
|
32
32
|
relocate_imports_inside_function,
|
33
33
|
add_comments_to_top,
|
34
34
|
format_agent_name,
|
35
35
|
format_recommended_steps,
|
36
36
|
get_generic_summary,
|
37
37
|
)
|
38
|
-
from ai_data_science_team.tools.
|
39
|
-
from ai_data_science_team.
|
38
|
+
from ai_data_science_team.tools.dataframe import get_dataframe_summary
|
39
|
+
from ai_data_science_team.utils.logging import log_ai_function
|
40
40
|
|
41
41
|
# Setup
|
42
42
|
AGENT_NAME = "feature_engineering_agent"
|
@@ -203,7 +203,7 @@ class FeatureEngineeringAgent(BaseAgent):
|
|
203
203
|
self._params[k] = v
|
204
204
|
self._compiled_graph = self._make_compiled_graph()
|
205
205
|
|
206
|
-
def ainvoke_agent(
|
206
|
+
async def ainvoke_agent(
|
207
207
|
self,
|
208
208
|
data_raw: pd.DataFrame,
|
209
209
|
user_instructions: str=None,
|
@@ -235,7 +235,7 @@ class FeatureEngineeringAgent(BaseAgent):
|
|
235
235
|
-------
|
236
236
|
None
|
237
237
|
"""
|
238
|
-
response = self._compiled_graph.ainvoke({
|
238
|
+
response = await self._compiled_graph.ainvoke({
|
239
239
|
"user_instructions": user_instructions,
|
240
240
|
"data_raw": data_raw.to_dict(),
|
241
241
|
"target_variable": target_variable,
|
@@ -25,15 +25,15 @@ from ai_data_science_team.templates import(
|
|
25
25
|
create_coding_agent_graph,
|
26
26
|
BaseAgent,
|
27
27
|
)
|
28
|
-
from ai_data_science_team.
|
29
|
-
from ai_data_science_team.
|
28
|
+
from ai_data_science_team.parsers.parsers import SQLOutputParser
|
29
|
+
from ai_data_science_team.utils.regex import (
|
30
30
|
add_comments_to_top,
|
31
31
|
format_agent_name,
|
32
32
|
format_recommended_steps,
|
33
33
|
get_generic_summary,
|
34
34
|
)
|
35
|
-
from ai_data_science_team.tools.
|
36
|
-
from ai_data_science_team.
|
35
|
+
from ai_data_science_team.tools.sql import get_database_metadata
|
36
|
+
from ai_data_science_team.utils.logging import log_ai_function
|
37
37
|
|
38
38
|
# Setup
|
39
39
|
AGENT_NAME = "sql_database_agent"
|
@@ -193,7 +193,7 @@ class SQLDatabaseAgent(BaseAgent):
|
|
193
193
|
self._params[k] = v
|
194
194
|
self._compiled_graph = self._make_compiled_graph()
|
195
195
|
|
196
|
-
def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
|
196
|
+
async def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
|
197
197
|
"""
|
198
198
|
Asynchronously runs the SQL Database Agent based on user instructions.
|
199
199
|
|
@@ -212,7 +212,7 @@ class SQLDatabaseAgent(BaseAgent):
|
|
212
212
|
-------
|
213
213
|
None
|
214
214
|
"""
|
215
|
-
response = self._compiled_graph.ainvoke({
|
215
|
+
response = await self._compiled_graph.ainvoke({
|
216
216
|
"user_instructions": user_instructions,
|
217
217
|
"max_retries": max_retries,
|
218
218
|
"retry_count": retry_count
|