ai-data-science-team 0.0.0.9009__py3-none-any.whl → 0.0.0.9010__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. ai_data_science_team/_version.py +1 -1
  2. ai_data_science_team/agents/data_cleaning_agent.py +6 -6
  3. ai_data_science_team/agents/data_loader_tools_agent.py +69 -0
  4. ai_data_science_team/agents/data_visualization_agent.py +6 -7
  5. ai_data_science_team/agents/data_wrangling_agent.py +6 -6
  6. ai_data_science_team/agents/feature_engineering_agent.py +6 -6
  7. ai_data_science_team/agents/sql_database_agent.py +6 -6
  8. ai_data_science_team/ml_agents/__init__.py +1 -0
  9. ai_data_science_team/ml_agents/h2o_ml_agent.py +205 -385
  10. ai_data_science_team/ml_agents/mlflow_tools_agent.py +327 -0
  11. ai_data_science_team/multiagents/sql_data_analyst.py +3 -4
  12. ai_data_science_team/parsers/__init__.py +0 -0
  13. ai_data_science_team/{tools → parsers}/parsers.py +0 -1
  14. ai_data_science_team/templates/agent_templates.py +6 -6
  15. ai_data_science_team/tools/data_loader.py +378 -0
  16. ai_data_science_team/tools/dataframe.py +139 -0
  17. ai_data_science_team/tools/h2o.py +643 -0
  18. ai_data_science_team/tools/mlflow.py +961 -0
  19. ai_data_science_team/tools/{metadata.py → sql.py} +1 -137
  20. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/METADATA +34 -16
  21. ai_data_science_team-0.0.0.9010.dist-info/RECORD +35 -0
  22. ai_data_science_team-0.0.0.9009.dist-info/RECORD +0 -28
  23. /ai_data_science_team/{tools → utils}/logging.py +0 -0
  24. /ai_data_science_team/{tools → utils}/regex.py +0 -0
  25. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/LICENSE +0 -0
  26. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/WHEEL +0 -0
  27. {ai_data_science_team-0.0.0.9009.dist-info → ai_data_science_team-0.0.0.9010.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- __version__ = "0.0.0.9009"
1
+ __version__ = "0.0.0.9010"
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import (
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
32
  relocate_imports_inside_function,
33
33
  add_comments_to_top,
34
34
  format_agent_name,
35
35
  format_recommended_steps,
36
36
  get_generic_summary,
37
37
  )
38
- from ai_data_science_team.tools.metadata import get_dataframe_summary
39
- from ai_data_science_team.tools.logging import log_ai_function
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
40
40
 
41
41
  # Setup
42
42
  AGENT_NAME = "data_cleaning_agent"
@@ -183,7 +183,7 @@ class DataCleaningAgent(BaseAgent):
183
183
  self.response=None
184
184
  return make_data_cleaning_agent(**self._params)
185
185
 
186
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
186
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
187
187
  """
188
188
  Asynchronously invokes the agent. The response is stored in the response attribute.
189
189
 
@@ -204,7 +204,7 @@ class DataCleaningAgent(BaseAgent):
204
204
  --------
205
205
  None. The response is stored in the response attribute.
206
206
  """
207
- response = self._compiled_graph.ainvoke({
207
+ response = await self._compiled_graph.ainvoke({
208
208
  "user_instructions": user_instructions,
209
209
  "data_raw": data_raw.to_dict(),
210
210
  "max_retries": max_retries,
@@ -0,0 +1,69 @@
1
+
2
+
3
+
4
+ from typing import Any, Optional, Annotated, Sequence, List, Dict
5
+ import operator
6
+
7
+ import pandas as pd
8
+ import os
9
+
10
+ from IPython.display import Markdown
11
+
12
+ from langchain_core.messages import BaseMessage, AIMessage
13
+
14
+ from langgraph.prebuilt import create_react_agent, ToolNode
15
+ from langgraph.prebuilt.chat_agent_executor import AgentState
16
+ from langgraph.graph import START, END, StateGraph
17
+
18
+ from ai_data_science_team.templates import BaseAgent
19
+ from ai_data_science_team.utils.regex import format_agent_name
20
+ from ai_data_science_team.tools.data_loader import (
21
+ load_directory,
22
+ load_file,
23
+ list_directory_contents,
24
+ list_directory_recursive,
25
+ get_file_info,
26
+ search_files_by_pattern,
27
+ )
28
+
29
+ AGENT_NAME = "data_loader_tools_agent"
30
+
31
+ tools = [
32
+ load_directory,
33
+ load_file,
34
+ list_directory_contents,
35
+ list_directory_recursive,
36
+ get_file_info,
37
+ search_files_by_pattern,
38
+ ]
39
+
40
+
41
+
42
+ def make_data_loader_tools_agent(
43
+ model: Any,
44
+ directory: Optional[str] = os.getcwd(),
45
+ ):
46
+ """
47
+ Creates a Data Loader Agent that can interact with data loading tools.
48
+
49
+ Parameters:
50
+ ----------
51
+ model : langchain.llms.base.LLM
52
+ The language model used to generate the tool calling agent.
53
+ directory : str, optional
54
+ The directory to search for files. Defaults to the current working directory.
55
+
56
+ Returns:
57
+ --------
58
+ Data Loader Agent
59
+ An agent that can interact with data loading tools.
60
+ """
61
+
62
+ class GraphState(AgentState):
63
+ internal_messages: Annotated[Sequence[BaseMessage], operator.add]
64
+ directory: str
65
+ user_instructions: str
66
+ data_artifacts: dict
67
+
68
+ pass
69
+
@@ -10,7 +10,6 @@ from typing import TypedDict, Annotated, Sequence, Literal
10
10
  import operator
11
11
 
12
12
  from langchain.prompts import PromptTemplate
13
- from langchain_core.output_parsers import StrOutputParser
14
13
  from langchain_core.messages import BaseMessage
15
14
 
16
15
  from langgraph.types import Command
@@ -30,16 +29,16 @@ from ai_data_science_team.templates import(
30
29
  create_coding_agent_graph,
31
30
  BaseAgent,
32
31
  )
33
- from ai_data_science_team.tools.parsers import PythonOutputParser
34
- from ai_data_science_team.tools.regex import (
32
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
33
+ from ai_data_science_team.utils.regex import (
35
34
  relocate_imports_inside_function,
36
35
  add_comments_to_top,
37
36
  format_agent_name,
38
37
  format_recommended_steps,
39
38
  get_generic_summary,
40
39
  )
41
- from ai_data_science_team.tools.metadata import get_dataframe_summary
42
- from ai_data_science_team.tools.logging import log_ai_function
40
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
41
+ from ai_data_science_team.utils.logging import log_ai_function
43
42
  from ai_data_science_team.utils.plotly import plotly_from_dict
44
43
 
45
44
  # Setup
@@ -197,7 +196,7 @@ class DataVisualizationAgent(BaseAgent):
197
196
  # Rebuild the compiled graph
198
197
  self._compiled_graph = self._make_compiled_graph()
199
198
 
200
- def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
199
+ async def ainvoke_agent(self, data_raw: pd.DataFrame, user_instructions: str=None, max_retries:int=3, retry_count:int=0, **kwargs):
201
200
  """
202
201
  Asynchronously invokes the agent to generate a visualization.
203
202
  The response is stored in the 'response' attribute.
@@ -219,7 +218,7 @@ class DataVisualizationAgent(BaseAgent):
219
218
  -------
220
219
  None
221
220
  """
222
- response = self._compiled_graph.ainvoke({
221
+ response = await self._compiled_graph.ainvoke({
223
222
  "user_instructions": user_instructions,
224
223
  "data_raw": data_raw.to_dict(),
225
224
  "max_retries": max_retries,
@@ -24,16 +24,16 @@ from ai_data_science_team.templates import(
24
24
  create_coding_agent_graph,
25
25
  BaseAgent,
26
26
  )
27
- from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import (
27
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
28
+ from ai_data_science_team.utils.regex import (
29
29
  relocate_imports_inside_function,
30
30
  add_comments_to_top,
31
31
  format_agent_name,
32
32
  format_recommended_steps,
33
33
  get_generic_summary,
34
34
  )
35
- from ai_data_science_team.tools.metadata import get_dataframe_summary
36
- from ai_data_science_team.tools.logging import log_ai_function
35
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
36
+ from ai_data_science_team.utils.logging import log_ai_function
37
37
 
38
38
  # Setup Logging Path
39
39
  AGENT_NAME = "data_wrangling_agent"
@@ -213,7 +213,7 @@ class DataWranglingAgent(BaseAgent):
213
213
  self._params[k] = v
214
214
  self._compiled_graph = self._make_compiled_graph()
215
215
 
216
- def ainvoke_agent(
216
+ async def ainvoke_agent(
217
217
  self,
218
218
  data_raw: Union[pd.DataFrame, dict, list],
219
219
  user_instructions: str=None,
@@ -245,7 +245,7 @@ class DataWranglingAgent(BaseAgent):
245
245
  None
246
246
  """
247
247
  data_input = self._convert_data_input(data_raw)
248
- response = self._compiled_graph.ainvoke({
248
+ response = await self._compiled_graph.ainvoke({
249
249
  "user_instructions": user_instructions,
250
250
  "data_raw": data_input,
251
251
  "max_retries": max_retries,
@@ -27,16 +27,16 @@ from ai_data_science_team.templates import(
27
27
  create_coding_agent_graph,
28
28
  BaseAgent,
29
29
  )
30
- from ai_data_science_team.tools.parsers import PythonOutputParser
31
- from ai_data_science_team.tools.regex import (
30
+ from ai_data_science_team.parsers.parsers import PythonOutputParser
31
+ from ai_data_science_team.utils.regex import (
32
32
  relocate_imports_inside_function,
33
33
  add_comments_to_top,
34
34
  format_agent_name,
35
35
  format_recommended_steps,
36
36
  get_generic_summary,
37
37
  )
38
- from ai_data_science_team.tools.metadata import get_dataframe_summary
39
- from ai_data_science_team.tools.logging import log_ai_function
38
+ from ai_data_science_team.tools.dataframe import get_dataframe_summary
39
+ from ai_data_science_team.utils.logging import log_ai_function
40
40
 
41
41
  # Setup
42
42
  AGENT_NAME = "feature_engineering_agent"
@@ -203,7 +203,7 @@ class FeatureEngineeringAgent(BaseAgent):
203
203
  self._params[k] = v
204
204
  self._compiled_graph = self._make_compiled_graph()
205
205
 
206
- def ainvoke_agent(
206
+ async def ainvoke_agent(
207
207
  self,
208
208
  data_raw: pd.DataFrame,
209
209
  user_instructions: str=None,
@@ -235,7 +235,7 @@ class FeatureEngineeringAgent(BaseAgent):
235
235
  -------
236
236
  None
237
237
  """
238
- response = self._compiled_graph.ainvoke({
238
+ response = await self._compiled_graph.ainvoke({
239
239
  "user_instructions": user_instructions,
240
240
  "data_raw": data_raw.to_dict(),
241
241
  "target_variable": target_variable,
@@ -25,15 +25,15 @@ from ai_data_science_team.templates import(
25
25
  create_coding_agent_graph,
26
26
  BaseAgent,
27
27
  )
28
- from ai_data_science_team.tools.parsers import SQLOutputParser
29
- from ai_data_science_team.tools.regex import (
28
+ from ai_data_science_team.parsers.parsers import SQLOutputParser
29
+ from ai_data_science_team.utils.regex import (
30
30
  add_comments_to_top,
31
31
  format_agent_name,
32
32
  format_recommended_steps,
33
33
  get_generic_summary,
34
34
  )
35
- from ai_data_science_team.tools.metadata import get_database_metadata
36
- from ai_data_science_team.tools.logging import log_ai_function
35
+ from ai_data_science_team.tools.sql import get_database_metadata
36
+ from ai_data_science_team.utils.logging import log_ai_function
37
37
 
38
38
  # Setup
39
39
  AGENT_NAME = "sql_database_agent"
@@ -193,7 +193,7 @@ class SQLDatabaseAgent(BaseAgent):
193
193
  self._params[k] = v
194
194
  self._compiled_graph = self._make_compiled_graph()
195
195
 
196
- def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
196
+ async def ainvoke_agent(self, user_instructions: str=None, max_retries=3, retry_count=0, **kwargs):
197
197
  """
198
198
  Asynchronously runs the SQL Database Agent based on user instructions.
199
199
 
@@ -212,7 +212,7 @@ class SQLDatabaseAgent(BaseAgent):
212
212
  -------
213
213
  None
214
214
  """
215
- response = self._compiled_graph.ainvoke({
215
+ response = await self._compiled_graph.ainvoke({
216
216
  "user_instructions": user_instructions,
217
217
  "max_retries": max_retries,
218
218
  "retry_count": retry_count
@@ -1 +1,2 @@
1
1
  from ai_data_science_team.ml_agents.h2o_ml_agent import make_h2o_ml_agent, H2OMLAgent
2
+ from ai_data_science_team.ml_agents.mlflow_tools_agent import make_mlflow_tools_agent, MLflowToolsAgent