ai-data-science-team 0.0.0.9005__py3-none-any.whl → 0.0.0.9007__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,7 @@ from langgraph.graph import StateGraph, END
3
3
  from langgraph.types import interrupt, Command
4
4
 
5
5
  import pandas as pd
6
+ import sqlalchemy as sql
6
7
 
7
8
  from typing import Any, Callable, Dict, Type, Optional
8
9
 
@@ -22,7 +23,9 @@ def create_coding_agent_graph(
22
23
  retry_count_key: str = "retry_count",
23
24
  human_in_the_loop: bool = False,
24
25
  human_review_node_name: str = "human_review",
25
- checkpointer: Optional[Callable] = None
26
+ checkpointer: Optional[Callable] = None,
27
+ bypass_recommended_steps: bool = False,
28
+ bypass_explain_code: bool = False,
26
29
  ):
27
30
  """
28
31
  Creates a generic agent graph using the provided node functions and node names.
@@ -63,7 +66,11 @@ def create_coding_agent_graph(
63
66
  The node name for human review if human_in_the_loop is True.
64
67
  checkpointer : callable, optional
65
68
  A checkpointer callable if desired.
66
-
69
+ bypass_recommended_steps : bool, optional
70
+ Whether to skip the recommended steps node.
71
+ bypass_explain_code : bool, optional
72
+ Whether to skip the final explain code node.
73
+
67
74
  Returns
68
75
  -------
69
76
  app : langchain.graphs.StateGraph
@@ -72,50 +79,76 @@ def create_coding_agent_graph(
72
79
 
73
80
  workflow = StateGraph(GraphState)
74
81
 
75
- # Add the recommended steps node
76
- workflow.add_node(recommended_steps_node_name, node_functions[recommended_steps_node_name])
82
+ # Conditionally add the recommended-steps node
83
+ if not bypass_recommended_steps:
84
+ workflow.add_node(recommended_steps_node_name, node_functions[recommended_steps_node_name])
77
85
 
78
- # Optionally add the human review node
79
- if human_in_the_loop:
80
- workflow.add_node(human_review_node_name, node_functions[human_review_node_name])
81
-
82
- # Add main nodes
86
+ # Always add create, execute, and fix nodes
83
87
  workflow.add_node(create_code_node_name, node_functions[create_code_node_name])
84
88
  workflow.add_node(execute_code_node_name, node_functions[execute_code_node_name])
85
89
  workflow.add_node(fix_code_node_name, node_functions[fix_code_node_name])
86
- workflow.add_node(explain_code_node_name, node_functions[explain_code_node_name])
90
+
91
+ # Conditionally add the explanation node
92
+ if not bypass_explain_code:
93
+ workflow.add_node(explain_code_node_name, node_functions[explain_code_node_name])
87
94
 
88
95
  # Set the entry point
89
- workflow.set_entry_point(recommended_steps_node_name)
96
+ entry_point = create_code_node_name if bypass_recommended_steps else recommended_steps_node_name
97
+ workflow.set_entry_point(entry_point)
90
98
 
91
- # Add edges depending on human_in_the_loop
92
- if human_in_the_loop:
93
- workflow.add_edge(recommended_steps_node_name, human_review_node_name)
94
- else:
95
- workflow.add_edge(recommended_steps_node_name, create_code_node_name)
99
+ # Add edges for recommended steps
100
+ if not bypass_recommended_steps:
101
+ if human_in_the_loop:
102
+ workflow.add_edge(recommended_steps_node_name, human_review_node_name)
103
+ else:
104
+ workflow.add_edge(recommended_steps_node_name, create_code_node_name)
105
+ elif human_in_the_loop:
106
+ # Skip recommended steps but still include human review
107
+ workflow.add_edge(create_code_node_name, human_review_node_name)
96
108
 
97
- # Connect create_code_node to execution node
109
+ # Create -> Execute
98
110
  workflow.add_edge(create_code_node_name, execute_code_node_name)
99
111
 
100
- # Add conditional edges for error handling
101
- workflow.add_conditional_edges(
102
- execute_code_node_name,
103
- lambda state: "fix_code" if (
104
- state.get(error_key) is not None and
105
- state.get(retry_count_key) is not None and
106
- state.get(max_retries_key) is not None and
107
- state.get(retry_count_key) < state.get(max_retries_key)
108
- ) else "explain_code",
109
- {"fix_code": fix_code_node_name, "explain_code": explain_code_node_name},
110
- )
111
-
112
- # From fix_code_node_name back to execution node
113
- workflow.add_edge(fix_code_node_name, execute_code_node_name)
114
-
115
- # explain_code_node_name leads to end
116
- workflow.add_edge(explain_code_node_name, END)
117
-
118
- # Compile workflow, optionally with checkpointer
112
+ # Define a helper to check if we have an error & can still retry
113
+ def error_and_can_retry(state):
114
+ return (
115
+ state.get(error_key) is not None
116
+ and state.get(retry_count_key) is not None
117
+ and state.get(max_retries_key) is not None
118
+ and state[retry_count_key] < state[max_retries_key]
119
+ )
120
+
121
+ # ---- Split into two branches for bypass_explain_code ----
122
+ if not bypass_explain_code:
123
+ # If we are NOT bypassing explain, the next node is fix_code if error,
124
+ # else explain_code. Then we wire explain_code -> END afterward.
125
+ workflow.add_conditional_edges(
126
+ execute_code_node_name,
127
+ lambda s: "fix_code" if error_and_can_retry(s) else "explain_code",
128
+ {
129
+ "fix_code": fix_code_node_name,
130
+ "explain_code": explain_code_node_name,
131
+ },
132
+ )
133
+ # Fix code -> Execute again
134
+ workflow.add_edge(fix_code_node_name, execute_code_node_name)
135
+ # explain_code -> END
136
+ workflow.add_edge(explain_code_node_name, END)
137
+ else:
138
+ # If we ARE bypassing explain_code, the next node is fix_code if error,
139
+ # else straight to END.
140
+ workflow.add_conditional_edges(
141
+ execute_code_node_name,
142
+ lambda s: "fix_code" if error_and_can_retry(s) else "END",
143
+ {
144
+ "fix_code": fix_code_node_name,
145
+ "END": END,
146
+ },
147
+ )
148
+ # Fix code -> Execute again
149
+ workflow.add_edge(fix_code_node_name, execute_code_node_name)
150
+
151
+ # Finally, compile
119
152
  if human_in_the_loop and checkpointer is not None:
120
153
  app = workflow.compile(checkpointer=checkpointer)
121
154
  else:
@@ -124,6 +157,7 @@ def create_coding_agent_graph(
124
157
  return app
125
158
 
126
159
 
160
+
127
161
  def node_func_human_review(
128
162
  state: Any,
129
163
  prompt_text: str,
@@ -256,6 +290,88 @@ def node_func_execute_agent_code_on_data(
256
290
  # if state.get("retry_count") == 0:
257
291
  # 10/0
258
292
 
293
+ # Apply post-processing if provided
294
+ if post_processing is not None:
295
+ result = post_processing(result)
296
+ else:
297
+ if isinstance(result, pd.DataFrame):
298
+ result = result.to_dict()
299
+
300
+ except Exception as e:
301
+ print(e)
302
+ agent_error = f"{error_message_prefix}{str(e)}"
303
+
304
+ # Return results
305
+ output = {result_key: result, error_key: agent_error}
306
+ return output
307
+
308
+ def node_func_execute_agent_from_sql_connection(
309
+ state: Any,
310
+ connection: Any,
311
+ code_snippet_key: str,
312
+ result_key: str,
313
+ error_key: str,
314
+ agent_function_name: str,
315
+ post_processing: Optional[Callable[[Any], Any]] = None,
316
+ error_message_prefix: str = "An error occurred during agent execution: "
317
+ ) -> Dict[str, Any]:
318
+ """
319
+ Execute a generic agent code defined in a code snippet retrieved from the state on a SQLAlchemy connection object
320
+ and return the result.
321
+
322
+ Parameters
323
+ ----------
324
+ state : Any
325
+ A state object that supports `get(key: str)` method to retrieve values.
326
+ connection : str
327
+ The SQLAlchemy connection object to use for executing the agent function.
328
+ code_snippet_key : str
329
+ The key in the state used to retrieve the Python code snippet defining the agent function.
330
+ result_key : str
331
+ The key in the state used to store the result of the agent function.
332
+ error_key : str
333
+ The key in the state used to store the error message if any.
334
+ agent_function_name : str
335
+ The name of the function (e.g., 'sql_database_agent') expected to be defined in the code snippet.
336
+ post_processing : Callable[[Any], Any], optional
337
+ A function to postprocess the output of the agent function before returning it.
338
+ error_message_prefix : str, optional
339
+ A prefix or full message to use in the error output if an exception occurs.
340
+
341
+ Returns
342
+ -------
343
+ Dict[str, Any]
344
+ A dictionary containing the result and/or error messages. Keys are arbitrary,
345
+ but typically include something like "result" or "error".
346
+ """
347
+
348
+ print(" * EXECUTING AGENT CODE ON SQL CONNECTION")
349
+
350
+ # Retrieve SQLAlchemy connection and code snippet from the state
351
+ is_engine = isinstance(connection, sql.engine.base.Engine)
352
+ conn = connection.connect() if is_engine else connection
353
+ agent_code = state.get(code_snippet_key)
354
+
355
+ # Ensure the connection object is provided
356
+ if connection is None:
357
+ raise ValueError(f"Connection object not found.")
358
+
359
+ # Execute the code snippet to define the agent function
360
+ local_vars = {}
361
+ global_vars = {}
362
+ exec(agent_code, global_vars, local_vars)
363
+
364
+ # Retrieve the agent function from the executed code
365
+ agent_function = local_vars.get(agent_function_name, None)
366
+ if agent_function is None or not callable(agent_function):
367
+ raise ValueError(f"Agent function '{agent_function_name}' not found or not callable in the provided code.")
368
+
369
+ # Execute the agent function
370
+ agent_error = None
371
+ result = None
372
+ try:
373
+ result = agent_function(connection)
374
+
259
375
  # Apply post-processing if provided
260
376
  if post_processing is not None:
261
377
  result = post_processing(result)
@@ -267,6 +383,7 @@ def node_func_execute_agent_code_on_data(
267
383
  output = {result_key: result, error_key: agent_error}
268
384
  return output
269
385
 
386
+
270
387
  def node_func_fix_agent_code(
271
388
  state: Any,
272
389
  code_snippet_key: str,
@@ -326,7 +443,7 @@ def node_func_fix_agent_code(
326
443
  response = (llm | PythonOutputParser()).invoke(prompt)
327
444
 
328
445
  response = relocate_imports_inside_function(response)
329
- response = add_comments_to_top(response, agent_name="data_wrangler")
446
+ response = add_comments_to_top(response, agent_name=agent_name)
330
447
 
331
448
  # Log the response if requested
332
449
  if log:
@@ -58,4 +58,4 @@ def log_ai_function(response: str, file_name: str, log: bool = True, log_path: s
58
58
  return (file_path, file_name)
59
59
 
60
60
  else:
61
- return None
61
+ return (None, None)
@@ -0,0 +1,230 @@
1
+ import io
2
+ import pandas as pd
3
+ import sqlalchemy as sql
4
+ from typing import Union, List, Dict
5
+
6
+ def get_dataframe_summary(
7
+ dataframes: Union[pd.DataFrame, List[pd.DataFrame], Dict[str, pd.DataFrame]],
8
+ n_sample: int = 30,
9
+ skip_stats: bool = False,
10
+ ) -> List[str]:
11
+ """
12
+ Generate a summary for one or more DataFrames. Accepts a single DataFrame, a list of DataFrames,
13
+ or a dictionary mapping names to DataFrames.
14
+
15
+ Parameters
16
+ ----------
17
+ dataframes : pandas.DataFrame or list of pandas.DataFrame or dict of (str -> pandas.DataFrame)
18
+ - Single DataFrame: produce a single summary (returned within a one-element list).
19
+ - List of DataFrames: produce a summary for each DataFrame, using index-based names.
20
+ - Dictionary of DataFrames: produce a summary for each DataFrame, using dictionary keys as names.
21
+ n_sample : int, default 30
22
+ Number of rows to display in the "Data (first 30 rows)" section.
23
+ skip_stats : bool, default False
24
+ If True, skip the descriptive statistics and DataFrame info sections.
25
+
26
+ Example:
27
+ --------
28
+ ``` python
29
+ import pandas as pd
30
+ from sklearn.datasets import load_iris
31
+ data = load_iris(as_frame=True)
32
+ dataframes = {
33
+ "iris": data.frame,
34
+ "iris_target": data.target,
35
+ }
36
+ summaries = get_dataframe_summary(dataframes)
37
+ print(summaries[0])
38
+ ```
39
+
40
+ Returns
41
+ -------
42
+ list of str
43
+ A list of summaries, one for each provided DataFrame. Each summary includes:
44
+ - Shape of the DataFrame (rows, columns)
45
+ - Column data types
46
+ - Missing value percentage
47
+ - Unique value counts
48
+ - First 30 rows
49
+ - Descriptive statistics
50
+ - DataFrame info output
51
+ """
52
+
53
+ summaries = []
54
+
55
+ # --- Dictionary Case ---
56
+ if isinstance(dataframes, dict):
57
+ for dataset_name, df in dataframes.items():
58
+ summaries.append(_summarize_dataframe(df, dataset_name, n_sample, skip_stats))
59
+
60
+ # --- Single DataFrame Case ---
61
+ elif isinstance(dataframes, pd.DataFrame):
62
+ summaries.append(_summarize_dataframe(dataframes, "Single_Dataset", n_sample, skip_stats))
63
+
64
+ # --- List of DataFrames Case ---
65
+ elif isinstance(dataframes, list):
66
+ for idx, df in enumerate(dataframes):
67
+ dataset_name = f"Dataset_{idx}"
68
+ summaries.append(_summarize_dataframe(df, dataset_name, n_sample, skip_stats))
69
+
70
+ else:
71
+ raise TypeError(
72
+ "Input must be a single DataFrame, a list of DataFrames, or a dictionary of DataFrames."
73
+ )
74
+
75
+ return summaries
76
+
77
+
78
+ def _summarize_dataframe(df: pd.DataFrame, dataset_name: str, n_sample=30, skip_stats=False) -> str:
79
+ """Generate a summary string for a single DataFrame."""
80
+ # 1. Convert dictionary-type cells to strings
81
+ # This prevents unhashable dict errors during df.nunique().
82
+ df = df.apply(lambda col: col.map(lambda x: str(x) if isinstance(x, dict) else x))
83
+
84
+ # 2. Capture df.info() output
85
+ buffer = io.StringIO()
86
+ df.info(buf=buffer)
87
+ info_text = buffer.getvalue()
88
+
89
+ # 3. Calculate missing value stats
90
+ missing_stats = (df.isna().sum() / len(df) * 100).sort_values(ascending=False)
91
+ missing_summary = "\n".join([f"{col}: {val:.2f}%" for col, val in missing_stats.items()])
92
+
93
+ # 4. Get column data types
94
+ column_types = "\n".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
95
+
96
+ # 5. Get unique value counts
97
+ unique_counts = df.nunique() # Will no longer fail on unhashable dict
98
+ unique_counts_summary = "\n".join([f"{col}: {count}" for col, count in unique_counts.items()])
99
+
100
+ # 6. Generate the summary text
101
+ if not skip_stats:
102
+ summary_text = f"""
103
+ Dataset Name: {dataset_name}
104
+ ----------------------------
105
+ Shape: {df.shape[0]} rows x {df.shape[1]} columns
106
+
107
+ Column Data Types:
108
+ {column_types}
109
+
110
+ Missing Value Percentage:
111
+ {missing_summary}
112
+
113
+ Unique Value Counts:
114
+ {unique_counts_summary}
115
+
116
+ Data (first {n_sample} rows):
117
+ {df.head(n_sample).to_string()}
118
+
119
+ Data Description:
120
+ {df.describe().to_string()}
121
+
122
+ Data Info:
123
+ {info_text}
124
+ """
125
+ else:
126
+ summary_text = f"""
127
+ Dataset Name: {dataset_name}
128
+ ----------------------------
129
+ Shape: {df.shape[0]} rows x {df.shape[1]} columns
130
+
131
+ Column Data Types:
132
+ {column_types}
133
+
134
+ Data (first {n_sample} rows):
135
+ {df.head(n_sample).to_string()}
136
+ """
137
+
138
+ return summary_text.strip()
139
+
140
+
141
+
142
+ def get_database_metadata(connection: Union[sql.engine.base.Connection, sql.engine.base.Engine],
143
+ n_samples: int = 10) -> str:
144
+ """
145
+ Collects metadata and sample data from a database, with safe identifier quoting and
146
+ basic dialect-aware row limiting. Prevents issues with spaces/reserved words in identifiers.
147
+
148
+ Parameters
149
+ ----------
150
+ connection : Union[sql.engine.base.Connection, sql.engine.base.Engine]
151
+ An active SQLAlchemy connection or engine.
152
+ n_samples : int
153
+ Number of sample values to retrieve for each column.
154
+
155
+ Returns
156
+ -------
157
+ str
158
+ A formatted string with database metadata, including some sample data from each column.
159
+ """
160
+
161
+ # If a connection is passed, use it; if an engine is passed, connect to it
162
+ is_engine = isinstance(connection, sql.engine.base.Engine)
163
+ conn = connection.connect() if is_engine else connection
164
+
165
+ output = []
166
+ try:
167
+ # Grab the engine off the connection
168
+ sql_engine = conn.engine
169
+ dialect_name = sql_engine.dialect.name.lower()
170
+
171
+ output.append(f"Database Dialect: {sql_engine.dialect.name}")
172
+ output.append(f"Driver: {sql_engine.driver}")
173
+ output.append(f"Connection URL: {sql_engine.url}")
174
+
175
+ # Inspect the database
176
+ inspector = sql.inspect(sql_engine)
177
+ tables = inspector.get_table_names()
178
+ output.append(f"Tables: {tables}")
179
+ output.append(f"Schemas: {inspector.get_schema_names()}")
180
+
181
+ # Helper to build a dialect-specific limit clause
182
+ def build_query(col_name_quoted: str, table_name_quoted: str, n: int) -> str:
183
+ """
184
+ Returns a SQL query string to select N rows from the given column/table
185
+ across different dialects (SQLite, MySQL, Postgres, MSSQL, Oracle, etc.)
186
+ """
187
+ if "sqlite" in dialect_name or "mysql" in dialect_name or "postgres" in dialect_name:
188
+ # Common dialects supporting LIMIT
189
+ return f"SELECT {col_name_quoted} FROM {table_name_quoted} LIMIT {n}"
190
+ elif "mssql" in dialect_name:
191
+ # Microsoft SQL Server syntax
192
+ return f"SELECT TOP {n} {col_name_quoted} FROM {table_name_quoted}"
193
+ elif "oracle" in dialect_name:
194
+ # Oracle syntax
195
+ return f"SELECT {col_name_quoted} FROM {table_name_quoted} WHERE ROWNUM <= {n}"
196
+ else:
197
+ # Fallback
198
+ return f"SELECT {col_name_quoted} FROM {table_name_quoted} LIMIT {n}"
199
+
200
+ # Prepare for quoting
201
+ preparer = inspector.bind.dialect.identifier_preparer
202
+
203
+ # For each table, get columns and sample data
204
+ for table_name in tables:
205
+ output.append(f"\nTable: {table_name}")
206
+ # Properly quote the table name
207
+ table_name_quoted = preparer.quote_identifier(table_name)
208
+
209
+ for column in inspector.get_columns(table_name):
210
+ col_name = column["name"]
211
+ col_type = column["type"]
212
+ output.append(f" Column: {col_name} Type: {col_type}")
213
+
214
+ # Properly quote the column name
215
+ col_name_quoted = preparer.quote_identifier(col_name)
216
+
217
+ # Build a dialect-aware query with safe quoting
218
+ query = build_query(col_name_quoted, table_name_quoted, n_samples)
219
+
220
+ # Read a few sample values
221
+ df = pd.read_sql(sql.text(query), conn)
222
+ first_values = df[col_name].tolist()
223
+ output.append(f" First {n_samples} Values: {first_values}")
224
+
225
+ finally:
226
+ # Close connection if created inside the function
227
+ if is_engine:
228
+ conn.close()
229
+
230
+ return "\n".join(output)
@@ -64,10 +64,16 @@ def add_comments_to_top(code_text, agent_name="data_wrangler"):
64
64
  header_comments = [
65
65
  "# Disclaimer: This function was generated by AI. Please review before using.",
66
66
  f"# Agent Name: {agent_name}",
67
- f"# Time Created: {time_created}",
67
+ f"# Time Created: {time_created}\n",
68
68
  ""
69
69
  ]
70
70
 
71
71
  # Join the header with newlines, then prepend to the existing code_text
72
72
  header_block = "\n".join(header_comments)
73
73
  return header_block + code_text
74
+
75
+ def format_agent_name(agent_name: str) -> str:
76
+
77
+ formatted_name = agent_name.strip().replace("_", " ").upper()
78
+
79
+ return f"---{formatted_name}----"
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: ai-data-science-team
3
- Version: 0.0.0.9005
3
+ Version: 0.0.0.9007
4
4
  Summary: Build and run an AI-powered data science team.
5
5
  Home-page: https://github.com/business-science/ai-data-science-team
6
6
  Author: Matt Dancho
@@ -21,12 +21,22 @@ Requires-Dist: plotly
21
21
  Requires-Dist: streamlit
22
22
  Requires-Dist: scikit-learn
23
23
  Requires-Dist: xgboost
24
+ Dynamic: author
25
+ Dynamic: author-email
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: requires-dist
30
+ Dynamic: requires-python
31
+ Dynamic: summary
24
32
 
25
33
  # Your AI Data Science Team (An Army Of Copilots)
26
34
 
27
35
  **An AI-powered data science team of copilots that uses agents to help you perform common data science tasks 10X faster**.
28
36
 
29
- Star ⭐ This GitHub (Takes 2 seconds and means a lot).
37
+ **Star ⭐ This GitHub (Takes 2 seconds and means a lot).**
38
+
39
+ *Beta - This Python library is under active development. There may be breaking changes that occur until release of 0.1.0.*
30
40
 
31
41
  ---
32
42
 
@@ -39,6 +49,24 @@ The AI Data Science Team of Copilots includes Agents that specialize data cleani
39
49
  - Credit Card Risk
40
50
  - And more
41
51
 
52
+ ## Table of Contents
53
+
54
+ - [Your AI Data Science Team (An Army Of Copilots)](#your-ai-data-science-team-an-army-of-copilots)
55
+ - [Table of Contents](#table-of-contents)
56
+ - [Companies That Want An AI Data Science Team Copilot](#companies-that-want-an-ai-data-science-team-copilot)
57
+ - [Free Generative AI For Data Scientists Workshop](#free-generative-ai-for-data-scientists-workshop)
58
+ - [Data Science Agents](#data-science-agents)
59
+ - [Coming Soon: Multi-Agents](#coming-soon-multi-agents)
60
+ - [Agents Available Now](#agents-available-now)
61
+ - [Agents Coming Soon](#agents-coming-soon)
62
+ - [Disclaimer](#disclaimer)
63
+ - [Installation](#installation)
64
+ - [Usage](#usage)
65
+ - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
66
+ - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
67
+ - [Contributing](#contributing)
68
+ - [License](#license)
69
+
42
70
  ## Companies That Want An AI Data Science Team Copilot
43
71
 
44
72
  If you are interested in having your own custom enteprise-grade AI Data Science Team Copilot, send inquiries here: [https://www.business-science.io/contact.html](https://www.business-science.io/contact.html)
@@ -53,11 +81,19 @@ This project is a work in progress. New data science agents will be released soo
53
81
 
54
82
  ![Data Science Team](/img/ai_data_science_team.jpg)
55
83
 
84
+ ### Coming Soon: Multi-Agents
85
+
86
+ This is the internals of the Business Intelligence SQL Agent I'm working on:
87
+
88
+ ![Business Intelligence SQL Agent](/img/multi_agent_sql_data_visualization.jpg)
89
+
56
90
  ### Agents Available Now
57
91
 
58
92
  1. **Data Wrangling Agent:** Merges, Joins, Preps and Wrangles data into a format that is ready for data analysis.
59
- 2. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
60
- 3. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
93
+ 2. **Data Visualization Agent:** Creates visualizations to help you understand your data. Returns JSON serializable plotly visualizations.
94
+ 3. **Data Cleaning Agent:** Performs Data Preparation steps including handling missing values, outliers, and data type conversions.
95
+ 4. **Feature Engineering Agent:** Converts the prepared data into ML-ready data. Adds features to increase predictive accuracy of ML models.
96
+ 5. **SQL Database Agent:** Connects to SQL databases to pull data into the data science environment. Creates pipelines to automate data extraction. Performs Joins, Aggregations, and other SQL Query operations.
61
97
 
62
98
  ### Agents Coming Soon
63
99
 
@@ -78,23 +114,6 @@ This project is a work in progress. New data science agents will be released soo
78
114
 
79
115
  By using this software, you agree to use it solely for learning purposes.
80
116
 
81
- ## Table of Contents
82
-
83
- - [Your AI Data Science Team (An Army Of Copilots)](#your-ai-data-science-team-an-army-of-copilots)
84
- - [Companies That Want An AI Data Science Team Copilot](#companies-that-want-an-ai-data-science-team-copilot)
85
- - [Free Generative AI For Data Scientists Workshop](#free-generative-ai-for-data-scientists-workshop)
86
- - [Data Science Agents](#data-science-agents)
87
- - [Agents Available Now](#agents-available-now)
88
- - [Agents Coming Soon](#agents-coming-soon)
89
- - [Disclaimer](#disclaimer)
90
- - [Table of Contents](#table-of-contents)
91
- - [Installation](#installation)
92
- - [Usage](#usage)
93
- - [Example 1: Feature Engineering with the Feature Engineering Agent](#example-1-feature-engineering-with-the-feature-engineering-agent)
94
- - [Example 2: Cleaning Data with the Data Cleaning Agent](#example-2-cleaning-data-with-the-data-cleaning-agent)
95
- - [Contributing](#contributing)
96
- - [License](#license)
97
-
98
117
  ## Installation
99
118
 
100
119
  ``` bash
@@ -103,6 +122,8 @@ pip install git+https://github.com/business-science/ai-data-science-team.git --u
103
122
 
104
123
  ## Usage
105
124
 
125
+ [See all examples here.](/examples)
126
+
106
127
  ### Example 1: Feature Engineering with the Feature Engineering Agent
107
128
 
108
129
  [See the full example here.](/examples/feature_engineering_agent.ipynb)
@@ -0,0 +1,21 @@
1
+ ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ai_data_science_team/_version.py,sha256=VJYpfOaKsXjGzPOsT6kYyVW6T9bFBqxt6Ph3qF8t-A8,26
3
+ ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
+ ai_data_science_team/agents/__init__.py,sha256=rcF18rBsOuPJqJKvoffh6lwr4Nwm24MErM2u4H4Th9s,467
5
+ ai_data_science_team/agents/data_cleaning_agent.py,sha256=gixYY4wGehKK_ROgU7CVOzijghmVQGD4hyK9uKhc8Hw,20890
6
+ ai_data_science_team/agents/data_visualization_agent.py,sha256=wePFZbdB4kBah8m_iy6f4IDyjl6L6zBWzIgigJEXdk8,12933
7
+ ai_data_science_team/agents/data_wrangling_agent.py,sha256=5w1kytoWLE4p3hj0YHVuXcgCd304eNQac-Zrrgmnr2s,16735
8
+ ai_data_science_team/agents/feature_engineering_agent.py,sha256=UaaU3VkPhjOV0NbrYXedRb6eHOcOWWiGYhB_srrYWvg,17571
9
+ ai_data_science_team/agents/sql_database_agent.py,sha256=mRbEAPHP6NlwQac2_VL9RuyIfCCtrmXTrzu5RLzOoeU,16031
10
+ ai_data_science_team/templates/__init__.py,sha256=bNrKGmWXQG7GRczln_zVfUQLzxzp7hSwlLyNtLxleu4,278
11
+ ai_data_science_team/templates/agent_templates.py,sha256=xohVgEfxPcVukPLpPfV7mZ0cpFgp-oJVLZRWCv2V-WU,19948
12
+ ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ ai_data_science_team/tools/logging.py,sha256=7wFOv6GGhXR_RPbh-8p0GyrS608XOnZtiaGK2IbDl_s,2081
14
+ ai_data_science_team/tools/metadata.py,sha256=tbnca_tDp67oBA6qD29AKVooJG10VqGr4vwzj4rPUas,8348
15
+ ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
16
+ ai_data_science_team/tools/regex.py,sha256=vkfdvi9pDe582p-fh_7cB07Wb0dOR2CsiVq-wUO3mas,2491
17
+ ai_data_science_team-0.0.0.9007.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
18
+ ai_data_science_team-0.0.0.9007.dist-info/METADATA,sha256=KcMFR2V9_wbepdKsrlFdfc7UB7t-Hf7i75x67LPXw3Q,6783
19
+ ai_data_science_team-0.0.0.9007.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
20
+ ai_data_science_team-0.0.0.9007.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
21
+ ai_data_science_team-0.0.0.9007.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5