ai-data-science-team 0.0.0.9009__py3-none-any.whl → 0.0.0.9011__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ai_data_science_team/_version.py +1 -1
  2. ai_data_science_team/agents/__init__.py +1 -0
  3. ai_data_science_team/agents/data_cleaning_agent.py +6 -6
  4. ai_data_science_team/agents/data_loader_tools_agent.py +272 -0
  5. ai_data_science_team/agents/data_visualization_agent.py +6 -7
  6. ai_data_science_team/agents/data_wrangling_agent.py +6 -6
  7. ai_data_science_team/agents/feature_engineering_agent.py +6 -6
  8. ai_data_science_team/agents/sql_database_agent.py +6 -6
  9. ai_data_science_team/ml_agents/__init__.py +1 -0
  10. ai_data_science_team/ml_agents/h2o_ml_agent.py +206 -385
  11. ai_data_science_team/ml_agents/h2o_ml_tools_agent.py +0 -0
  12. ai_data_science_team/ml_agents/mlflow_tools_agent.py +350 -0
  13. ai_data_science_team/multiagents/sql_data_analyst.py +3 -4
  14. ai_data_science_team/parsers/__init__.py +0 -0
  15. ai_data_science_team/{tools → parsers}/parsers.py +0 -1
  16. ai_data_science_team/templates/agent_templates.py +6 -6
  17. ai_data_science_team/tools/data_loader.py +448 -0
  18. ai_data_science_team/tools/dataframe.py +139 -0
  19. ai_data_science_team/tools/h2o.py +643 -0
  20. ai_data_science_team/tools/mlflow.py +961 -0
  21. ai_data_science_team/tools/{metadata.py → sql.py} +1 -137
  22. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/METADATA +40 -19
  23. ai_data_science_team-0.0.0.9011.dist-info/RECORD +36 -0
  24. ai_data_science_team-0.0.0.9009.dist-info/RECORD +0 -28
  25. /ai_data_science_team/{tools → utils}/logging.py +0 -0
  26. /ai_data_science_team/{tools → utils}/regex.py +0 -0
  27. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/LICENSE +0 -0
  28. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/WHEEL +0 -0
  29. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9011.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- __version__ = "0.0.0.9009"
1
+ __version__ = "0.0.0.9011"
@@ -3,3 +3,4 @@ from ai_data_science_team.agents.feature_engineering_agent import make_feature_e
3
3
  from ai_data_science_team.agents.data_wrangling_agent import make_data_wrangling_agent, DataWranglingAgent
4
4
  from ai_data_science_team.agents.sql_database_agent import make_sql_database_agent, SQLDatabaseAgent
5
5
  from ai_data_science_team.agents.data_visualization_agent import make_data_visualization_agent, DataVisualizationAgent
6
+ from ai_data_science_team.agents.data_loader_tools_agent import make_data_loader_tools_agent, DataLoaderToolsAgent
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import (
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
32
  relocate_imports_inside_function,
33
33
  add_comments_to_top,
34
34
  format_agent_name,
35
35
  format_recommended_steps,
36
36
  get_generic_summary,
37
37
  )
38
- from ai_data_science_team.tools.metadata import get_dataframe_summary
39
- from ai_data_science_team.tools.logging import log_ai_function
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
40
40
 
41
41
  # Setup
42
42
  AGENT_NAME = "data_cleaning_agent"
@@ -183,7 +183,7 @@ class DataCleaningAgent(BaseAgent):
183
183
  self.response=None
184
184
  return make_data_cleaning_agent(**self._params)
185
185
 
186
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
186
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
187
187
  """
188
188
  Asynchronously invokes the agent. The response is stored in the response attribute.
189
189
 
@@ -204,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
204
204
  --------
205
205
  None. The response is stored in the response attribute.
206
206
  """
207
- response = self._compiled_graph.ainvoke({
207
+ response = await self._compiled_graph.ainvoke({
208
208
  "user_instructions": user_instructions,
209
209
  "data_raw": data_raw.to_dict(),
210
210
  "max_retries": max_retries,
@@ -0,0 +1,272 @@
1
+
2
+
3
+
4
+ from typing import Any, Optional, Annotated, Sequence, List, Dict
5
+ import operator
6
+
7
+ import pandas as pd
8
+ import os
9
+
10
+ from IPython.display import Markdown
11
+
12
+ from langchain_core.messages import BaseMessage, AIMessage
13
+
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+ from ai_data_science_team.tools.data_loader import (
21
+ load_directory,
22
+ load_file,
23
+ list_directory_contents,
24
+ list_directory_recursive,
25
+ get_file_info,
26
+ search_files_by_pattern,
27
+ )
28
+
29
+ AGENT_NAME = "data_loader_tools_agent"
30
+
31
+ tools = [
32
+ load_directory,
33
+ load_file,
34
+ list_directory_contents,
35
+ list_directory_recursive,
36
+ get_file_info,
37
+ search_files_by_pattern,
38
+ ]
39
+
40
+ class DataLoaderToolsAgent(BaseAgent):
41
+ """
42
+ A Data Loader Agent that can interact with data loading tools and search for files in your file system.
43
+
44
+ Parameters:
45
+ ----------
46
+ model : langchain.llms.base.LLM
47
+ The language model used to generate the tool calling agent.
48
+ react_agent_kwargs : dict
49
+ Additional keyword arguments to pass to the create_react_agent function.
50
+ invoke_react_agent_kwargs : dict
51
+ Additional keyword arguments to pass to the invoke method of the react agent.
52
+
53
+ Methods:
54
+ --------
55
+ update_params(**kwargs)
56
+ Updates the agent's parameters and rebuilds the compiled graph.
57
+ ainvoke_agent(user_instructions: str=None, **kwargs)
58
+ Runs the agent with the given user instructions asynchronously.
59
+ invoke_agent(user_instructions: str=None, **kwargs)
60
+ Runs the agent with the given user instructions.
61
+ get_internal_messages(markdown: bool=False)
62
+ Returns the internal messages from the agent's response.
63
+ get_artifacts(as_dataframe: bool=False)
64
+ Returns the MLflow artifacts from the agent's response.
65
+ get_ai_message(markdown: bool=False)
66
+ Returns the AI message from the agent's response.
67
+
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ model: Any,
73
+ create_react_agent_kwargs: Optional[Dict]={},
74
+ invoke_react_agent_kwargs: Optional[Dict]={},
75
+ ):
76
+ self._params = {
77
+ "model": model,
78
+ "create_react_agent_kwargs": create_react_agent_kwargs,
79
+ "invoke_react_agent_kwargs": invoke_react_agent_kwargs,
80
+ }
81
+ self._compiled_graph = self._make_compiled_graph()
82
+ self.response = None
83
+
84
+ def _make_compiled_graph(self):
85
+ """
86
+ Creates the compiled graph for the agent.
87
+ """
88
+ self.response = None
89
+ return make_data_loader_tools_agent(**self._params)
90
+
91
+
92
+ def update_params(self, **kwargs):
93
+ """
94
+ Updates the agent's parameters and rebuilds the compiled graph.
95
+ """
96
+ for k, v in kwargs.items():
97
+ self._params[k] = v
98
+ self._compiled_graph = self._make_compiled_graph()
99
+
100
+ async def ainvoke_agent(
101
+ self,
102
+ user_instructions: str=None,
103
+ **kwargs
104
+ ):
105
+ """
106
+ Runs the agent with the given user instructions.
107
+
108
+ Parameters:
109
+ ----------
110
+ user_instructions : str, optional
111
+ The user instructions to pass to the agent.
112
+ kwargs : dict, optional
113
+ Additional keyword arguments to pass to the agents ainvoke method.
114
+
115
+ """
116
+ response = await self._compiled_graph.ainvoke(
117
+ {
118
+ "user_instructions": user_instructions,
119
+ },
120
+ **kwargs
121
+ )
122
+ self.response = response
123
+ return None
124
+
125
+ def invoke_agent(
126
+ self,
127
+ user_instructions: str=None,
128
+ **kwargs
129
+ ):
130
+ """
131
+ Runs the agent with the given user instructions.
132
+
133
+ Parameters:
134
+ ----------
135
+ user_instructions : str, optional
136
+ The user instructions to pass to the agent.
137
+ kwargs : dict, optional
138
+ Additional keyword arguments to pass to the agents invoke method.
139
+
140
+ """
141
+ response = self._compiled_graph.invoke(
142
+ {
143
+ "user_instructions": user_instructions,
144
+ },
145
+ **kwargs
146
+ )
147
+ self.response = response
148
+ return None
149
+
150
+ def get_internal_messages(self, markdown: bool=False):
151
+ """
152
+ Returns the internal messages from the agent's response.
153
+ """
154
+ pretty_print = "\n\n".join([f"### {msg.type.upper()}\n\nID: {msg.id}\n\nContent:\n\n{msg.content}" for msg in self.response["internal_messages"]])
155
+ if markdown:
156
+ return Markdown(pretty_print)
157
+ else:
158
+ return self.response["internal_messages"]
159
+
160
+ def get_artifacts(self, as_dataframe: bool=False):
161
+ """
162
+ Returns the MLflow artifacts from the agent's response.
163
+ """
164
+ if as_dataframe:
165
+ return pd.DataFrame(self.response["data_loader_artifacts"])
166
+ else:
167
+ return self.response["data_loader_artifacts"]
168
+
169
+ def get_ai_message(self, markdown: bool=False):
170
+ """
171
+ Returns the AI message from the agent's response.
172
+ """
173
+ if markdown:
174
+ return Markdown(self.response["messages"][0].content)
175
+ else:
176
+ return self.response["messages"][0].content
177
+
178
+
179
+
180
+ def make_data_loader_tools_agent(
181
+ model: Any,
182
+ create_react_agent_kwargs: Optional[Dict]={},
183
+ invoke_react_agent_kwargs: Optional[Dict]={},
184
+ ):
185
+ """
186
+ Creates a Data Loader Agent that can interact with data loading tools.
187
+
188
+ Parameters:
189
+ ----------
190
+ model : langchain.llms.base.LLM
191
+ The language model used to generate the tool calling agent.
192
+ react_agent_kwargs : dict
193
+ Additional keyword arguments to pass to the create_react_agent function.
194
+ invoke_react_agent_kwargs : dict
195
+ Additional keyword arguments to pass to the invoke method of the react agent.
196
+
197
+ Returns:
198
+ --------
199
+ app : langchain.graphs.CompiledStateGraph
200
+ An agent that can interact with data loading tools.
201
+ """
202
+
203
+ class GraphState(AgentState):
204
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
205
+ user_instructions: str
206
+ data_loader_artifacts: dict
207
+
208
+ def data_loader_agent(state):
209
+
210
+ print(format_agent_name(AGENT_NAME))
211
+ print(" ")
212
+
213
+ print(" * RUN REACT TOOL-CALLING AGENT")
214
+
215
+ tool_node = ToolNode(
216
+ tools=tools
217
+ )
218
+
219
+ data_loader_agent = create_react_agent(
220
+ model,
221
+ tools=tool_node,
222
+ state_schema=GraphState,
223
+ **create_react_agent_kwargs,
224
+ )
225
+
226
+ response = data_loader_agent.invoke(
227
+ {
228
+ "messages": [("user", state["user_instructions"])],
229
+ },
230
+ invoke_react_agent_kwargs,
231
+ )
232
+
233
+ print(" * POST-PROCESS RESULTS")
234
+
235
+ internal_messages = response['messages']
236
+
237
+ # Ensure there is at least one AI message
238
+ if not internal_messages:
239
+ return {
240
+ "internal_messages": [],
241
+ "mlflow_artifacts": None,
242
+ }
243
+
244
+ # Get the last AI message
245
+ last_ai_message = AIMessage(internal_messages[-1].content, role = AGENT_NAME)
246
+
247
+ # Get the last tool artifact safely
248
+ last_tool_artifact = None
249
+ if len(internal_messages) > 1:
250
+ last_message = internal_messages[-2] # Get second-to-last message
251
+ if hasattr(last_message, "artifact"): # Check if it has an "artifact"
252
+ last_tool_artifact = last_message.artifact
253
+ elif isinstance(last_message, dict) and "artifact" in last_message:
254
+ last_tool_artifact = last_message["artifact"]
255
+
256
+ return {
257
+ "messages": [last_ai_message],
258
+ "internal_messages": internal_messages,
259
+ "data_loader_artifacts": last_tool_artifact,
260
+ }
261
+
262
+ workflow = StateGraph(GraphState)
263
+
264
+ workflow.add_node("data_loader_agent", data_loader_agent)
265
+
266
+ workflow.add_edge(START, "data_loader_agent")
267
+ workflow.add_edge("data_loader_agent", END)
268
+
269
+ app = workflow.compile()
270
+
271
+ return app
272
+
@@ -10,7 +10,6 @@ from typing import TypedDict, Annotated, Sequence, Literal
10
10
  import operator
11
11
 
12
12
  from langchain.prompts import PromptTemplate
13
- from langchain_core.output_parsers import StrOutputParser
14
13
  from langchain_core.messages import BaseMessage
15
14
 
16
15
  from langgraph.types import Command
@@ -30,16 +29,16 @@ from ai_data_science_team.templates import(
30
29
  create_coding_agent_graph,
31
30
  BaseAgent,
32
31
  )
33
- from ai_data_science_team.tools.parsers import PythonOutputParser
34
- from ai_data_science_team.tools.regex import (
32
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
33
+ from ai_data_science_team.utils.regex import (
35
34
  relocate_imports_inside_function,
36
35
  add_comments_to_top,
37
36
  format_agent_name,
38
37
  format_recommended_steps,
39
38
  get_generic_summary,
40
39
  )
41
- from ai_data_science_team.tools.metadata import get_dataframe_summary
42
- from ai_data_science_team.tools.logging import log_ai_function
40
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
41
+ from ai_data_science_team.utils.logging import log_ai_function
43
42
  from ai_data_science_team.utils.plotly import plotly_from_dict
44
43
 
45
44
  # Setup
@@ -197,7 +196,7 @@ class DataVisualizationAgent(BaseAgent):
197
196
  # Rebuild the compiled graph
198
197
  self._compiled_graph = self._make_compiled_graph()
199
198
 
200
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
199
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
201
200
  """
202
201
  Asynchronously invokes the agent to generate a visualization.
203
202
  The response is stored in the 'response' attribute.
@@ -219,7 +218,7 @@ class DataVisualizationAgent(BaseAgent):
219
218
  -------
220
219
  None
221
220
  """
222
- response = self._compiled_graph.ainvoke({
221
+ response = await self._compiled_graph.ainvoke({
223
222
  "user_instructions": user_instructions,
224
223
  "data_raw": data_raw.to_dict(),
225
224
  "max_retries": max_retries,
@@ -24,16 +24,16 @@ from ai_data_science_team.templates import(
24
24
  create_coding_agent_graph,
25
25
  BaseAgent,
26
26
  )
27
- from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import (
27
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
28
+ from ai_data_science_team.utils.regex import (
29
29
  relocate_imports_inside_function,
30
30
  add_comments_to_top,
31
31
  format_agent_name,
32
32
  format_recommended_steps,
33
33
  get_generic_summary,
34
34
  )
35
- from ai_data_science_team.tools.metadata import get_dataframe_summary
36
- from ai_data_science_team.tools.logging import log_ai_function
35
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
36
+ from ai_data_science_team.utils.logging import log_ai_function
37
37
 
38
38
  # Setup Logging Path
39
39
  AGENT_NAME = "data_wrangling_agent"
@@ -213,7 +213,7 @@ class DataWranglingAgent(BaseAgent):
213
213
  self._params[k] = v
214
214
  self._compiled_graph = self._make_compiled_graph()
215
215
 
216
- def ainvoke_agent(
216
+ async def ainvoke_agent(
217
217
  self,
218
218
  data_raw: Union[pd.DataFrame, dict, list],
219
219
  user_instructions: str=None,
@@ -245,7 +245,7 @@ class DataWranglingAgent(BaseAgent):
245
245
  None
246
246
  """
247
247
  data_input = self._convert_data_input(data_raw)
248
- response = self._compiled_graph.ainvoke({
248
+ response = await self._compiled_graph.ainvoke({
249
249
  "user_instructions": user_instructions,
250
250
  "data_raw": data_input,
251
251
  "max_retries": max_retries,
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import (
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
32
  relocate_imports_inside_function,
33
33
  add_comments_to_top,
34
34
  format_agent_name,
35
35
  format_recommended_steps,
36
36
  get_generic_summary,
37
37
  )
38
- from ai_data_science_team.tools.metadata import get_dataframe_summary
39
- from ai_data_science_team.tools.logging import log_ai_function
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
40
40
 
41
41
  # Setup
42
42
  AGENT_NAME = "feature_engineering_agent"
@@ -203,7 +203,7 @@ class FeatureEngineeringAgent(BaseAgent):
203
203
  self._params[k] = v
204
204
  self._compiled_graph = self._make_compiled_graph()
205
205
 
206
- def ainvoke_agent(
206
+ async def ainvoke_agent(
207
207
  self,
208
208
  data_raw: pd.DataFrame,
209
209
  user_instructions: str=None,
@@ -235,7 +235,7 @@ class FeatureEngineeringAgent(BaseAgent):
235
235
  -------
236
236
  None
237
237
  """
238
- response = self._compiled_graph.ainvoke({
238
+ response = await self._compiled_graph.ainvoke({
239
239
  "user_instructions": user_instructions,
240
240
  "data_raw": data_raw.to_dict(),
241
241
  "target_variable": target_variable,
@@ -25,15 +25,15 @@ from ai_data_science_team.templates import(
25
25
  create_coding_agent_graph,
26
26
  BaseAgent,
27
27
  )
28
- from ai_data_science_team.tools.parsers import SQLOutputParser
29
- from ai_data_science_team.tools.regex import (
28
+ from ai_data_science_team.parsers.parsers import SQLOutputParser
29
+ from ai_data_science_team.utils.regex import (
30
30
  add_comments_to_top,
31
31
  format_agent_name,
32
32
  format_recommended_steps,
33
33
  get_generic_summary,
34
34
  )
35
- from ai_data_science_team.tools.metadata import get_database_metadata
36
- from ai_data_science_team.tools.logging import log_ai_function
35
+ from ai_data_science_team.tools.sql import get_database_metadata
36
+ from ai_data_science_team.utils.logging import log_ai_function
37
37
 
38
38
  # Setup
39
39
  AGENT_NAME = "sql_database_agent"
@@ -193,7 +193,7 @@ class SQLDatabaseAgent(BaseAgent):
193
193
  self._params[k] = v
194
194
  self._compiled_graph = self._make_compiled_graph()
195
195
 
196
- def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
196
+ async def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
197
197
  """
198
198
  Asynchronously runs the SQL Database Agent based on user instructions.
199
199
 
@@ -212,7 +212,7 @@ class SQLDatabaseAgent(BaseAgent):
212
212
  -------
213
213
  None
214
214
  """
215
- response = self._compiled_graph.ainvoke({
215
+ response = await self._compiled_graph.ainvoke({
216
216
  "user_instructions": user_instructions,
217
217
  "max_retries": max_retries,
218
218
  "retry_count": retry_count
@@ -1 +1,2 @@
1
1
  from ai_data_science_team.ml_agents.h2o_ml_agent import make_h2o_ml_agent, H2OMLAgent
2
+ from ai_data_science_team.ml_agents.mlflow_tools_agent import make_mlflow_tools_agent, MLflowToolsAgent