ai-data-science-team 0.0.0.9009__py3-none-any.whl → 0.0.0.9011__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. ai_data_science_team/_version.py +1 -1
  2. ai_data_science_team/agents/__init__.py +1 -0
  3. ai_data_science_team/agents/data_cleaning_agent.py +6 -6
  4. ai_data_science_team/agents/data_loader_tools_agent.py +272 -0
  5. ai_data_science_team/agents/data_visualization_agent.py +6 -7
  6. ai_data_science_team/agents/data_wrangling_agent.py +6 -6
  7. ai_data_science_team/agents/feature_engineering_agent.py +6 -6
  8. ai_data_science_team/agents/sql_database_agent.py +6 -6
  9. ai_data_science_team/ml_agents/__init__.py +1 -0
  10. ai_data_science_team/ml_agents/h2o_ml_agent.py +206 -385
  11. ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  12. ai_data_science_team/ml_agents/mlflow_tools_agent.py +350 -0
  13. ai_data_science_team/multiagents/sql_data_analyst.py +3 -4
  14. ai_data_science_team/parsers/__init__.py +0 -0
  15. ai_data_science_team/{tools → parsers}/parsers.py +0 -1
  16. ai_data_science_team/templates/agent_templates.py +6 -6
  17. ai_data_science_team/tools/data_loader.py +448 -0
  18. ai_data_science_team/tools/dataframe.py +139 -0
  19. ai_data_science_team/tools/h2o.py +643 -0
  20. ai_data_science_team/tools/mlflow.py +961 -0
  21. ai_data_science_team/tools/{metadata.py → sql.py} +1 -137
  22. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/METADATA +40 -19
  23. ai_data_science_team-0.0.0.9011.dist-info/RECORD +36 -0
  24. ai_data_science_team-0.0.0.9009.dist-info/RECORD +0 -28
  25. /ai_data_science_team/{tools → utils}/logging.py +0 -0
  26. /ai_data_science_team/{tools → utils}/regex.py +0 -0
  27. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/LICENSE +0 -0
  28. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/WHEEL +0 -0
  29. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- __version__ = "0.0.0.9009"
1
+ __version__ = "0.0.0.9011"
@@ -3,3 +3,4 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
+ from ai_data_science_team.agents.data_loader_tools_agent import make_data_loader_tools_agent, DataLoaderToolsAgent
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import (
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
32
  relocate_imports_inside_function,
33
33
  add_comments_to_top,
34
34
  format_agent_name,
35
35
  format_recommended_steps,
36
36
  get_generic_summary,
37
37
  )
38
- from ai_data_science_team.tools.metadata import get_dataframe_summary
39
- from ai_data_science_team.tools.logging import log_ai_function
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
40
40
 
41
41
  # Setup
42
42
  AGENT_NAME = "data_cleaning_agent"
@@ -183,7 +183,7 @@ class DataCleaningAgent(BaseAgent):
183
183
  self.response=None
184
184
  return make_data_cleaning_agent(**self._params)
185
185
 
186
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
186
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
187
187
  """
188
188
  Asynchronously invokes the agent. The response is stored in the response attribute.
189
189
 
@@ -204,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
204
204
  --------
205
205
  None. The response is stored in the response attribute.
206
206
  """
207
- response = self._compiled_graph.ainvoke({
207
+ response = await self._compiled_graph.ainvoke({
208
208
  "user_instructions": user_instructions,
209
209
  "data_raw": data_raw.to_dict(),
210
210
  "max_retries": max_retries,
@@ -0,0 +1,272 @@
1
+
2
+
3
+
4
+ from typing import Any, Optional, Annotated, Sequence, List, Dict
5
+ import operator
6
+
7
+ import pandas as pd
8
+ import os
9
+
10
+ from IPython.display import Markdown
11
+
12
+ from langchain_core.messages import BaseMessage, AIMessage
13
+
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+ from ai_data_science_team.tools.data_loader import (
21
+ load_directory,
22
+ load_file,
23
+ list_directory_contents,
24
+ list_directory_recursive,
25
+ get_file_info,
26
+ search_files_by_pattern,
27
+ )
28
+
29
+ AGENT_NAME = "data_loader_tools_agent"
30
+
31
+ tools = [
32
+ load_directory,
33
+ load_file,
34
+ list_directory_contents,
35
+ list_directory_recursive,
36
+ get_file_info,
37
+ search_files_by_pattern,
38
+ ]
39
+
40
+ class DataLoaderToolsAgent(BaseAgent):
41
+ """
42
+ A Data Loader Agent that can interact with data loading tools and search for files in your file system.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model used to generate the tool calling agent.
48
+ react_agent_kwargs : dict
49
+ Additional keyword arguments to pass to the create_react_agent function.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional keyword arguments to pass to the invoke method of the react agent.
52
+
53
+ Methods:
54
+ --------
55
+ update_params(**kwargs)
56
+ Updates the agent's parameters and rebuilds the compiled graph.
57
+ ainvoke_agent(user_instructions: str=None, **kwargs)
58
+ Runs the agent with the given user instructions asynchronously.
59
+ invoke_agent(user_instructions: str=None, **kwargs)
60
+ Runs the agent with the given user instructions.
61
+ get_internal_messages(markdown: bool=False)
62
+ Returns the internal messages from the agent's response.
63
+ get_artifacts(as_dataframe: bool=False)
64
+ Returns the MLflow artifacts from the agent's response.
65
+ get_ai_message(markdown: bool=False)
66
+ Returns the AI message from the agent's response.
67
+
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ model: Any,
73
+ create_react_agent_kwargs: Optional[Dict]={},
74
+ invoke_react_agent_kwargs: Optional[Dict]={},
75
+ ):
76
+ self._params = {
77
+ "model": model,
78
+ "create_react_agent_kwargs": create_react_agent_kwargs,
79
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
80
+ }
81
+ self._compiled_graph = self._make_compiled_graph()
82
+ self.response = None
83
+
84
+ def _make_compiled_graph(self):
85
+ """
86
+ Creates the compiled graph for the agent.
87
+ """
88
+ self.response = None
89
+ return make_data_loader_tools_agent(**self._params)
90
+
91
+
92
+ def update_params(self, **kwargs):
93
+ """
94
+ Updates the agent's parameters and rebuilds the compiled graph.
95
+ """
96
+ for k, v in kwargs.items():
97
+ self._params[k] = v
98
+ self._compiled_graph = self._make_compiled_graph()
99
+
100
+ async def ainvoke_agent(
101
+ self,
102
+ user_instructions: str=None,
103
+ **kwargs
104
+ ):
105
+ """
106
+ Runs the agent with the given user instructions.
107
+
108
+ Parameters:
109
+ ----------
110
+ user_instructions : str, optional
111
+ The user instructions to pass to the agent.
112
+ kwargs : dict, optional
113
+ Additional keyword arguments to pass to the agents ainvoke method.
114
+
115
+ """
116
+ response = await self._compiled_graph.ainvoke(
117
+ {
118
+ "user_instructions": user_instructions,
119
+ },
120
+ **kwargs
121
+ )
122
+ self.response = response
123
+ return None
124
+
125
+ def invoke_agent(
126
+ self,
127
+ user_instructions: str=None,
128
+ **kwargs
129
+ ):
130
+ """
131
+ Runs the agent with the given user instructions.
132
+
133
+ Parameters:
134
+ ----------
135
+ user_instructions : str, optional
136
+ The user instructions to pass to the agent.
137
+ kwargs : dict, optional
138
+ Additional keyword arguments to pass to the agents invoke method.
139
+
140
+ """
141
+ response = self._compiled_graph.invoke(
142
+ {
143
+ "user_instructions": user_instructions,
144
+ },
145
+ **kwargs
146
+ )
147
+ self.response = response
148
+ return None
149
+
150
+ def get_internal_messages(self, markdown: bool=False):
151
+ """
152
+ Returns the internal messages from the agent's response.
153
+ """
154
+ pretty_print = "\n\n".join([f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}" for msg in self.response["internal_messages"]])
155
+ if markdown:
156
+ return Markdown(pretty_print)
157
+ else:
158
+ return self.response["internal_messages"]
159
+
160
+ def get_artifacts(self, as_dataframe: bool=False):
161
+ """
162
+ Returns the MLflow artifacts from the agent's response.
163
+ """
164
+ if as_dataframe:
165
+ return pd.DataFrame(self.response["data_loader_artifacts"])
166
+ else:
167
+ return self.response["data_loader_artifacts"]
168
+
169
+ def get_ai_message(self, markdown: bool=False):
170
+ """
171
+ Returns the AI message from the agent's response.
172
+ """
173
+ if markdown:
174
+ return Markdown(self.response["messages"][0].content)
175
+ else:
176
+ return self.response["messages"][0].content
177
+
178
+
179
+
180
+ def make_data_loader_tools_agent(
181
+ model: Any,
182
+ create_react_agent_kwargs: Optional[Dict]={},
183
+ invoke_react_agent_kwargs: Optional[Dict]={},
184
+ ):
185
+ """
186
+ Creates a Data Loader Agent that can interact with data loading tools.
187
+
188
+ Parameters:
189
+ ----------
190
+ model : langchain.llms.base.LLM
191
+ The language model used to generate the tool calling agent.
192
+ react_agent_kwargs : dict
193
+ Additional keyword arguments to pass to the create_react_agent function.
194
+ invoke_react_agent_kwargs : dict
195
+ Additional keyword arguments to pass to the invoke method of the react agent.
196
+
197
+ Returns:
198
+ --------
199
+ app : langchain.graphs.CompiledStateGraph
200
+ An agent that can interact with data loading tools.
201
+ """
202
+
203
+ class GraphState(AgentState):
204
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
205
+ user_instructions: str
206
+ data_loader_artifacts: dict
207
+
208
+ def data_loader_agent(state):
209
+
210
+ print(format_agent_name(AGENT_NAME))
211
+ print(" ")
212
+
213
+ print(" * RUN REACT TOOL-CALLING AGENT")
214
+
215
+ tool_node = ToolNode(
216
+ tools=tools
217
+ )
218
+
219
+ data_loader_agent = create_react_agent(
220
+ model,
221
+ tools=tool_node,
222
+ state_schema=GraphState,
223
+ **create_react_agent_kwargs,
224
+ )
225
+
226
+ response = data_loader_agent.invoke(
227
+ {
228
+ "messages": [("user", state["user_instructions"])],
229
+ },
230
+ invoke_react_agent_kwargs,
231
+ )
232
+
233
+ print(" * POST-PROCESS RESULTS")
234
+
235
+ internal_messages = response['messages']
236
+
237
+ # Ensure there is at least one AI message
238
+ if not internal_messages:
239
+ return {
240
+ "internal_messages": [],
241
+ "mlflow_artifacts": None,
242
+ }
243
+
244
+ # Get the last AI message
245
+ last_ai_message = AIMessage(internal_messages[-1].content, role = AGENT_NAME)
246
+
247
+ # Get the last tool artifact safely
248
+ last_tool_artifact = None
249
+ if len(internal_messages) > 1:
250
+ last_message = internal_messages[-2] # Get second-to-last message
251
+ if hasattr(last_message, "artifact"): # Check if it has an "artifact"
252
+ last_tool_artifact = last_message.artifact
253
+ elif isinstance(last_message, dict) and "artifact" in last_message:
254
+ last_tool_artifact = last_message["artifact"]
255
+
256
+ return {
257
+ "messages": [last_ai_message],
258
+ "internal_messages": internal_messages,
259
+ "data_loader_artifacts": last_tool_artifact,
260
+ }
261
+
262
+ workflow = StateGraph(GraphState)
263
+
264
+ workflow.add_node("data_loader_agent", data_loader_agent)
265
+
266
+ workflow.add_edge(START, "data_loader_agent")
267
+ workflow.add_edge("data_loader_agent", END)
268
+
269
+ app = workflow.compile()
270
+
271
+ return app
272
+
@@ -10,7 +10,6 @@ from typing import TypedDict, Annotated, Sequence, Literal
10
10
  import operator
11
11
 
12
12
  from langchain.prompts import PromptTemplate
13
- from langchain_core.output_parsers import StrOutputParser
14
13
  from langchain_core.messages import BaseMessage
15
14
 
16
15
  from langgraph.types import Command
@@ -30,16 +29,16 @@ from ai_data_science_team.templates import(
30
29
  create_coding_agent_graph,
31
30
  BaseAgent,
32
31
  )
33
- from ai_data_science_team.tools.parsers import PythonOutputParser
34
- from ai_data_science_team.tools.regex import (
32
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
33
+ from ai_data_science_team.utils.regex import (
35
34
  relocate_imports_inside_function,
36
35
  add_comments_to_top,
37
36
  format_agent_name,
38
37
  format_recommended_steps,
39
38
  get_generic_summary,
40
39
  )
41
- from ai_data_science_team.tools.metadata import get_dataframe_summary
42
- from ai_data_science_team.tools.logging import log_ai_function
40
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
41
+ from ai_data_science_team.utils.logging import log_ai_function
43
42
  from ai_data_science_team.utils.plotly import plotly_from_dict
44
43
 
45
44
  # Setup
@@ -197,7 +196,7 @@ class DataVisualizationAgent(BaseAgent):
197
196
  # Rebuild the compiled graph
198
197
  self._compiled_graph = self._make_compiled_graph()
199
198
 
200
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
199
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
201
200
  """
202
201
  Asynchronously invokes the agent to generate a visualization.
203
202
  The response is stored in the 'response' attribute.
@@ -219,7 +218,7 @@ class DataVisualizationAgent(BaseAgent):
219
218
  -------
220
219
  None
221
220
  """
222
- response = self._compiled_graph.ainvoke({
221
+ response = await self._compiled_graph.ainvoke({
223
222
  "user_instructions": user_instructions,
224
223
  "data_raw": data_raw.to_dict(),
225
224
  "max_retries": max_retries,
@@ -24,16 +24,16 @@ from ai_data_science_team.templates import(
24
24
  create_coding_agent_graph,
25
25
  BaseAgent,
26
26
  )
27
- from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import (
27
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
28
+ from ai_data_science_team.utils.regex import (
29
29
  relocate_imports_inside_function,
30
30
  add_comments_to_top,
31
31
  format_agent_name,
32
32
  format_recommended_steps,
33
33
  get_generic_summary,
34
34
  )
35
- from ai_data_science_team.tools.metadata import get_dataframe_summary
36
- from ai_data_science_team.tools.logging import log_ai_function
35
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
36
+ from ai_data_science_team.utils.logging import log_ai_function
37
37
 
38
38
  # Setup Logging Path
39
39
  AGENT_NAME = "data_wrangling_agent"
@@ -213,7 +213,7 @@ class DataWranglingAgent(BaseAgent):
213
213
  self._params[k] = v
214
214
  self._compiled_graph = self._make_compiled_graph()
215
215
 
216
- def ainvoke_agent(
216
+ async def ainvoke_agent(
217
217
  self,
218
218
  data_raw: Union[pd.DataFrame, dict, list],
219
219
  user_instructions: str=None,
@@ -245,7 +245,7 @@ class DataWranglingAgent(BaseAgent):
245
245
  None
246
246
  """
247
247
  data_input = self._convert_data_input(data_raw)
248
- response = self._compiled_graph.ainvoke({
248
+ response = await self._compiled_graph.ainvoke({
249
249
  "user_instructions": user_instructions,
250
250
  "data_raw": data_input,
251
251
  "max_retries": max_retries,
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import (
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
32
  relocate_imports_inside_function,
33
33
  add_comments_to_top,
34
34
  format_agent_name,
35
35
  format_recommended_steps,
36
36
  get_generic_summary,
37
37
  )
38
- from ai_data_science_team.tools.metadata import get_dataframe_summary
39
- from ai_data_science_team.tools.logging import log_ai_function
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
40
40
 
41
41
  # Setup
42
42
  AGENT_NAME = "feature_engineering_agent"
@@ -203,7 +203,7 @@ class FeatureEngineeringAgent(BaseAgent):
203
203
  self._params[k] = v
204
204
  self._compiled_graph = self._make_compiled_graph()
205
205
 
206
- def ainvoke_agent(
206
+ async def ainvoke_agent(
207
207
  self,
208
208
  data_raw: pd.DataFrame,
209
209
  user_instructions: str=None,
@@ -235,7 +235,7 @@ class FeatureEngineeringAgent(BaseAgent):
235
235
  -------
236
236
  None
237
237
  """
238
- response = self._compiled_graph.ainvoke({
238
+ response = await self._compiled_graph.ainvoke({
239
239
  "user_instructions": user_instructions,
240
240
  "data_raw": data_raw.to_dict(),
241
241
  "target_variable": target_variable,
@@ -25,15 +25,15 @@ from ai_data_science_team.templates import(
25
25
  create_coding_agent_graph,
26
26
  BaseAgent,
27
27
  )
28
- from ai_data_science_team.tools.parsers import SQLOutputParser
29
- from ai_data_science_team.tools.regex import (
28
+ from ai_data_science_team.parsers.parsers import SQLOutputParser
29
+ from ai_data_science_team.utils.regex import (
30
30
  add_comments_to_top,
31
31
  format_agent_name,
32
32
  format_recommended_steps,
33
33
  get_generic_summary,
34
34
  )
35
- from ai_data_science_team.tools.metadata import get_database_metadata
36
- from ai_data_science_team.tools.logging import log_ai_function
35
+ from ai_data_science_team.tools.sql import get_database_metadata
36
+ from ai_data_science_team.utils.logging import log_ai_function
37
37
 
38
38
  # Setup
39
39
  AGENT_NAME = "sql_database_agent"
@@ -193,7 +193,7 @@ class SQLDatabaseAgent(BaseAgent):
193
193
  self._params[k] = v
194
194
  self._compiled_graph = self._make_compiled_graph()
195
195
 
196
- def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
196
+ async def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
197
197
  """
198
198
  Asynchronously runs the SQL Database Agent based on user instructions.
199
199
 
@@ -212,7 +212,7 @@ class SQLDatabaseAgent(BaseAgent):
212
212
  -------
213
213
  None
214
214
  """
215
- response = self._compiled_graph.ainvoke({
215
+ response = await self._compiled_graph.ainvoke({
216
216
  "user_instructions": user_instructions,
217
217
  "max_retries": max_retries,
218
218
  "retry_count": retry_count
@@ -1 +1,2 @@
1
1
  from ai_data_science_team.ml_agents.h2o_ml_agent import make_h2o_ml_agent, H2OMLAgent
2
+ from ai_data_science_team.ml_agents.mlflow_tools_agent import make_mlflow_tools_agent, MLflowToolsAgent