ai-data-science-team 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. ai_data_science_team/_version.py +1 -1
  2. ai_data_science_team/agents/__init__.py +4 -5
  3. ai_data_science_team/agents/data_cleaning_agent.py +268 -116
  4. ai_data_science_team/agents/data_visualization_agent.py +470 -41
  5. ai_data_science_team/agents/data_wrangling_agent.py +471 -31
  6. ai_data_science_team/agents/feature_engineering_agent.py +426 -41
  7. ai_data_science_team/agents/sql_database_agent.py +458 -58
  8. ai_data_science_team/ml_agents/__init__.py +1 -0
  9. ai_data_science_team/ml_agents/h2o_ml_agent.py +1032 -0
  10. ai_data_science_team/multiagents/__init__.py +1 -0
  11. ai_data_science_team/multiagents/sql_data_analyst.py +398 -0
  12. ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
  13. ai_data_science_team/templates/__init__.py +3 -1
  14. ai_data_science_team/templates/agent_templates.py +319 -43
  15. ai_data_science_team/tools/metadata.py +94 -62
  16. ai_data_science_team/tools/regex.py +86 -1
  17. ai_data_science_team/utils/__init__.py +0 -0
  18. ai_data_science_team/utils/plotly.py +24 -0
  19. ai_data_science_team-0.0.0.9009.dist-info/METADATA +245 -0
  20. ai_data_science_team-0.0.0.9009.dist-info/RECORD +28 -0
  21. ai_data_science_team-0.0.0.9007.dist-info/METADATA +0 -183
  22. ai_data_science_team-0.0.0.9007.dist-info/RECORD +0 -21
  23. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE +0 -0
  24. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL +0 -0
  25. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/top_level.txt +0 -0
@@ -14,18 +14,27 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
17
+ import json
18
18
  import pandas as pd
19
19
 
20
+ from IPython.display import Markdown
21
+
20
22
  from ai_data_science_team.templates import(
21
23
  node_func_execute_agent_code_on_data,
22
24
  node_func_human_review,
23
25
  node_func_fix_agent_code,
24
- node_func_explain_agent_code,
25
- create_coding_agent_graph
26
+ node_func_report_agent_outputs,
27
+ create_coding_agent_graph,
28
+ BaseAgent,
26
29
  )
27
30
  from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name
31
+ from ai_data_science_team.tools.regex import (
32
+ relocate_imports_inside_function,
33
+ add_comments_to_top,
34
+ format_agent_name,
35
+ format_recommended_steps,
36
+ get_generic_summary,
37
+ )
29
38
  from ai_data_science_team.tools.metadata import get_dataframe_summary
30
39
  from ai_data_science_team.tools.logging import log_ai_function
31
40
 
@@ -33,6 +42,351 @@ from ai_data_science_team.tools.logging import log_ai_function
33
42
  AGENT_NAME = "feature_engineering_agent"
34
43
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
35
44
 
45
+ # Class
46
+
47
+ class FeatureEngineeringAgent(BaseAgent):
48
+ """
49
+ Creates a feature engineering agent that can process datasets based on user-defined instructions or
50
+ default feature engineering steps. The agent generates a Python function to engineer features, executes it,
51
+ and logs the process, including code and errors. It is designed to facilitate reproducible and
52
+ customizable feature engineering workflows.
53
+
54
+ The agent can perform the following default feature engineering steps unless instructed otherwise:
55
+ - Convert features to appropriate data types
56
+ - Remove features that have unique values for each row
57
+ - Remove constant features
58
+ - Encode high-cardinality categoricals (threshold <= 5% of dataset) as 'other'
59
+ - One-hot-encode categorical variables
60
+ - Convert booleans to integer (1/0)
61
+ - Create datetime-based features (if applicable)
62
+ - Handle target variable encoding if specified
63
+ - Any user-provided instructions to add, remove, or modify steps
64
+
65
+ Parameters
66
+ ----------
67
+ model : langchain.llms.base.LLM
68
+ The language model used to generate the feature engineering function.
69
+ n_samples : int, optional
70
+ Number of samples used when summarizing the dataset. Defaults to 30.
71
+ log : bool, optional
72
+ Whether to log the generated code and errors. Defaults to False.
73
+ log_path : str, optional
74
+ Directory path for storing log files. Defaults to None.
75
+ file_name : str, optional
76
+ Name of the file for saving the generated response. Defaults to "feature_engineer.py".
77
+ function_name : str, optional
78
+ Name of the function for data visualization. Defaults to "feature_engineer".
79
+ overwrite : bool, optional
80
+ Whether to overwrite the log file if it exists. If False, a unique file name is created. Defaults to True.
81
+ human_in_the_loop : bool, optional
82
+ Enables user review of feature engineering instructions. Defaults to False.
83
+ bypass_recommended_steps : bool, optional
84
+ If True, skips the default recommended steps. Defaults to False.
85
+ bypass_explain_code : bool, optional
86
+ If True, skips the step that provides code explanations. Defaults to False.
87
+
88
+ Methods
89
+ -------
90
+ update_params(**kwargs)
91
+ Updates the agent's parameters and rebuilds the compiled state graph.
92
+ ainvoke_agent(
93
+ user_instructions: str,
94
+ data_raw: pd.DataFrame,
95
+ target_variable: str = None,
96
+ max_retries=3,
97
+ retry_count=0
98
+ )
99
+ Engineers features from the provided dataset asynchronously based on user instructions.
100
+ invoke_agent(
101
+ user_instructions: str,
102
+ data_raw: pd.DataFrame,
103
+ target_variable: str = None,
104
+ max_retries=3,
105
+ retry_count=0
106
+ )
107
+ Engineers features from the provided dataset synchronously based on user instructions.
108
+ get_workflow_summary()
109
+ Retrieves a summary of the agent's workflow.
110
+ get_log_summary()
111
+ Retrieves a summary of logged operations if logging is enabled.
112
+ get_data_engineered()
113
+ Retrieves the feature-engineered dataset as a pandas DataFrame.
114
+ get_data_raw()
115
+ Retrieves the raw dataset as a pandas DataFrame.
116
+ get_feature_engineer_function()
117
+ Retrieves the generated Python function used for feature engineering.
118
+ get_recommended_feature_engineering_steps()
119
+ Retrieves the agent's recommended feature engineering steps.
120
+ get_response()
121
+ Returns the response from the agent as a dictionary.
122
+ show()
123
+ Displays the agent's mermaid diagram.
124
+
125
+ Examples
126
+ --------
127
+ ```python
128
+ import pandas as pd
129
+ from langchain_openai import ChatOpenAI
130
+ from ai_data_science_team.agents import FeatureEngineeringAgent
131
+
132
+ llm = ChatOpenAI(model="gpt-4o-mini")
133
+
134
+ feature_agent = FeatureEngineeringAgent(
135
+ model=llm,
136
+ n_samples=30,
137
+ log=True,
138
+ log_path="logs",
139
+ human_in_the_loop=True
140
+ )
141
+
142
+ df = pd.read_csv("https://raw.githubusercontent.com/business-science/ai-data-science-team/refs/heads/master/data/churn_data.csv")
143
+
144
+ feature_agent.invoke_agent(
145
+ user_instructions="Also encode the 'PaymentMethod' column with one-hot encoding.",
146
+ data_raw=df,
147
+ target_variable="Churn",
148
+ max_retries=3,
149
+ retry_count=0
150
+ )
151
+
152
+ engineered_data = feature_agent.get_data_engineered()
153
+ response = feature_agent.get_response()
154
+ ```
155
+
156
+ Returns
157
+ -------
158
+ FeatureEngineeringAgent : langchain.graphs.CompiledStateGraph
159
+ A feature engineering agent implemented as a compiled state graph.
160
+ """
161
+
162
+ def __init__(
163
+ self,
164
+ model,
165
+ n_samples=30,
166
+ log=False,
167
+ log_path=None,
168
+ file_name="feature_engineer.py",
169
+ function_name="feature_engineer",
170
+ overwrite=True,
171
+ human_in_the_loop=False,
172
+ bypass_recommended_steps=False,
173
+ bypass_explain_code=False
174
+ ):
175
+ self._params = {
176
+ "model": model,
177
+ "n_samples": n_samples,
178
+ "log": log,
179
+ "log_path": log_path,
180
+ "file_name": file_name,
181
+ "function_name": function_name,
182
+ "overwrite": overwrite,
183
+ "human_in_the_loop": human_in_the_loop,
184
+ "bypass_recommended_steps": bypass_recommended_steps,
185
+ "bypass_explain_code": bypass_explain_code
186
+ }
187
+ self._compiled_graph = self._make_compiled_graph()
188
+ self.response = None
189
+
190
+ def _make_compiled_graph(self):
191
+ """
192
+ Create the compiled graph for the feature engineering agent.
193
+ Running this method will reset the response to None.
194
+ """
195
+ self.response = None
196
+ return make_feature_engineering_agent(**self._params)
197
+
198
+ def update_params(self, **kwargs):
199
+ """
200
+ Updates the agent's parameters and rebuilds the compiled graph.
201
+ """
202
+ for k, v in kwargs.items():
203
+ self._params[k] = v
204
+ self._compiled_graph = self._make_compiled_graph()
205
+
206
+ def ainvoke_agent(
207
+ self,
208
+ data_raw: pd.DataFrame,
209
+ user_instructions: str=None,
210
+ target_variable: str = None,
211
+ max_retries=3,
212
+ retry_count=0,
213
+ **kwargs
214
+ ):
215
+ """
216
+ Asynchronously engineers features for the provided dataset.
217
+ The response is stored in the 'response' attribute.
218
+
219
+ Parameters
220
+ ----------
221
+ data_raw : pd.DataFrame
222
+ The raw dataset to be processed.
223
+ user_instructions : str, optional
224
+ Instructions for feature engineering.
225
+ target_variable : str, optional
226
+ The name of the target variable (if any).
227
+ max_retries : int
228
+ Maximum retry attempts.
229
+ retry_count : int
230
+ Current retry attempt count.
231
+ **kwargs
232
+ Additional keyword arguments to pass to ainvoke().
233
+
234
+ Returns
235
+ -------
236
+ None
237
+ """
238
+ response = self._compiled_graph.ainvoke({
239
+ "user_instructions": user_instructions,
240
+ "data_raw": data_raw.to_dict(),
241
+ "target_variable": target_variable,
242
+ "max_retries": max_retries,
243
+ "retry_count": retry_count
244
+ }, **kwargs)
245
+ self.response = response
246
+ return None
247
+
248
+ def invoke_agent(
249
+ self,
250
+ data_raw: pd.DataFrame,
251
+ user_instructions: str=None,
252
+ target_variable: str = None,
253
+ max_retries=3,
254
+ retry_count=0,
255
+ **kwargs
256
+ ):
257
+ """
258
+ Synchronously engineers features for the provided dataset.
259
+ The response is stored in the 'response' attribute.
260
+
261
+ Parameters
262
+ ----------
263
+ data_raw : pd.DataFrame
264
+ The raw dataset to be processed.
265
+ user_instructions : str
266
+ Instructions for feature engineering agent.
267
+ target_variable : str, optional
268
+ The name of the target variable (if any).
269
+ max_retries : int
270
+ Maximum retry attempts.
271
+ retry_count : int
272
+ Current retry attempt count.
273
+ **kwargs
274
+ Additional keyword arguments to pass to invoke().
275
+
276
+ Returns
277
+ -------
278
+ None
279
+ """
280
+ response = self._compiled_graph.invoke({
281
+ "user_instructions": user_instructions,
282
+ "data_raw": data_raw.to_dict(),
283
+ "target_variable": target_variable,
284
+ "max_retries": max_retries,
285
+ "retry_count": retry_count
286
+ }, **kwargs)
287
+ self.response = response
288
+ return None
289
+
290
+ def get_workflow_summary(self, markdown=False):
291
+ """
292
+ Retrieves the agent's workflow summary, if logging is enabled.
293
+ """
294
+ if self.response and self.response.get("messages"):
295
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
296
+ if markdown:
297
+ return Markdown(summary)
298
+ else:
299
+ return summary
300
+
301
+ def get_log_summary(self, markdown=False):
302
+ """
303
+ Logs a summary of the agent's operations, if logging is enabled.
304
+ """
305
+ if self.response:
306
+ if self.response.get('feature_engineer_function_path'):
307
+ log_details = f"""
308
+ ## Featuring Engineering Agent Log Summary:
309
+
310
+ Function Path: {self.response.get('feature_engineer_function_path')}
311
+
312
+ Function Name: {self.response.get('feature_engineer_function_name')}
313
+ """
314
+ if markdown:
315
+ return Markdown(log_details)
316
+ else:
317
+ return log_details
318
+
319
+ def get_data_engineered(self):
320
+ """
321
+ Retrieves the engineered data stored after running invoke/ainvoke.
322
+
323
+ Returns
324
+ -------
325
+ pd.DataFrame or None
326
+ The engineered dataset as a pandas DataFrame.
327
+ """
328
+ if self.response and "data_engineered" in self.response:
329
+ return pd.DataFrame(self.response["data_engineered"])
330
+ return None
331
+
332
+ def get_data_raw(self):
333
+ """
334
+ Retrieves the raw data.
335
+
336
+ Returns
337
+ -------
338
+ pd.DataFrame or None
339
+ The raw dataset as a pandas DataFrame if available.
340
+ """
341
+ if self.response and "data_raw" in self.response:
342
+ return pd.DataFrame(self.response["data_raw"])
343
+ return None
344
+
345
+ def get_feature_engineer_function(self, markdown=False):
346
+ """
347
+ Retrieves the feature engineering function generated by the agent.
348
+
349
+ Parameters
350
+ ----------
351
+ markdown : bool, optional
352
+ If True, returns the function in Markdown code block format.
353
+
354
+ Returns
355
+ -------
356
+ str or None
357
+ The Python function code, or None if unavailable.
358
+ """
359
+ if self.response and "feature_engineer_function" in self.response:
360
+ code = self.response["feature_engineer_function"]
361
+ if markdown:
362
+ return Markdown(f"```python\n{code}\n```")
363
+ return code
364
+ return None
365
+
366
+ def get_recommended_feature_engineering_steps(self, markdown=False):
367
+ """
368
+ Retrieves the agent's recommended feature engineering steps.
369
+
370
+ Parameters
371
+ ----------
372
+ markdown : bool, optional
373
+ If True, returns the steps in Markdown format.
374
+
375
+ Returns
376
+ -------
377
+ str or None
378
+ The recommended steps, or None if not available.
379
+ """
380
+ if self.response and "recommended_steps" in self.response:
381
+ steps = self.response["recommended_steps"]
382
+ if markdown:
383
+ return Markdown(steps)
384
+ return steps
385
+ return None
386
+
387
+
388
+
389
+
36
390
  # * Feature Engineering Agent
37
391
 
38
392
  def make_feature_engineering_agent(
@@ -41,6 +395,7 @@ def make_feature_engineering_agent(
41
395
  log=False,
42
396
  log_path=None,
43
397
  file_name="feature_engineer.py",
398
+ function_name="feature_engineer",
44
399
  overwrite = True,
45
400
  human_in_the_loop=False,
46
401
  bypass_recommended_steps=False,
@@ -82,6 +437,8 @@ def make_feature_engineering_agent(
82
437
  The path to the directory where the log files should be stored. Defaults to "logs/".
83
438
  file_name : str, optional
84
439
  The name of the file to save the log to. Defaults to "feature_engineer.py".
440
+ function_name : str, optional
441
+ The name of the function that will be generated. Defaults to "feature_engineer".
85
442
  overwrite : bool, optional
86
443
  Whether or not to overwrite the log file if it already exists. If False, a unique file name will be created.
87
444
  Defaults to True.
@@ -122,6 +479,11 @@ def make_feature_engineering_agent(
122
479
  The feature engineering agent as a state graph.
123
480
  """
124
481
  llm = model
482
+
483
+ # Human in th loop requires recommended steps
484
+ if bypass_recommended_steps and human_in_the_loop:
485
+ bypass_recommended_steps = False
486
+ print("Bypass recommended steps set to False to enable human in the loop.")
125
487
 
126
488
  # Setup Log Directory
127
489
  if log:
@@ -141,6 +503,7 @@ def make_feature_engineering_agent(
141
503
  all_datasets_summary: str
142
504
  feature_engineer_function: str
143
505
  feature_engineer_function_path: str
506
+ feature_engineer_file_name: str
144
507
  feature_engineer_function_name: str
145
508
  feature_engineer_error: str
146
509
  max_retries: int
@@ -194,7 +557,7 @@ def make_feature_engineering_agent(
194
557
  Below are summaries of all datasets provided:
195
558
  {all_datasets_summary}
196
559
 
197
- Return the steps as a numbered list (no code, just the steps).
560
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
198
561
 
199
562
  Avoid these:
200
563
  1. Do not include steps to save files.
@@ -218,19 +581,36 @@ def make_feature_engineering_agent(
218
581
  })
219
582
 
220
583
  return {
221
- "recommended_steps": "\n\n# Recommended Feature Engineering Steps:\n" + recommended_steps.content.strip(),
584
+ "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended Feature Engineering Steps:"),
222
585
  "all_datasets_summary": all_datasets_summary_str
223
586
  }
224
587
 
225
- def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "create_feature_engineering_code"]]:
226
- return node_func_human_review(
227
- state=state,
228
- prompt_text="Is the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}",
229
- yes_goto="create_feature_engineering_code",
230
- no_goto="recommend_feature_engineering_steps",
231
- user_instructions_key="user_instructions",
232
- recommended_steps_key="recommended_steps"
233
- )
588
+ # Human Review
589
+
590
+ prompt_text_human_review = "Are the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
591
+
592
+ if not bypass_explain_code:
593
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "explain_feature_engineering_code"]]:
594
+ return node_func_human_review(
595
+ state=state,
596
+ prompt_text=prompt_text_human_review,
597
+ yes_goto= 'explain_feature_engineering_code',
598
+ no_goto="recommend_feature_engineering_steps",
599
+ user_instructions_key="user_instructions",
600
+ recommended_steps_key="recommended_steps",
601
+ code_snippet_key="feature_engineer_function",
602
+ )
603
+ else:
604
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "__end__"]]:
605
+ return node_func_human_review(
606
+ state=state,
607
+ prompt_text=prompt_text_human_review,
608
+ yes_goto= '__end__',
609
+ no_goto="recommend_feature_engineering_steps",
610
+ user_instructions_key="user_instructions",
611
+ recommended_steps_key="recommended_steps",
612
+ code_snippet_key="feature_engineer_function",
613
+ )
234
614
 
235
615
  def create_feature_engineering_code(state: GraphState):
236
616
  if bypass_recommended_steps:
@@ -250,8 +630,7 @@ def make_feature_engineering_agent(
250
630
 
251
631
  feature_engineering_prompt = PromptTemplate(
252
632
  template="""
253
-
254
- You are a Feature Engineering Agent. Your job is to create a feature_engineer() function that can be run on the data provided using the following recommended steps.
633
+ You are a Feature Engineering Agent. Your job is to create a {function_name}() function that can be run on the data provided using the following recommended steps.
255
634
 
256
635
  Recommended Steps:
257
636
  {recommended_steps}
@@ -265,11 +644,11 @@ def make_feature_engineering_agent(
265
644
 
266
645
  You can use Pandas, Numpy, and Scikit Learn libraries to feature engineer the data.
267
646
 
268
- Return Python code in ```python``` format with a single function definition, feature_engineer(data_raw), including all imports inside the function.
647
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), including all imports inside the function.
269
648
 
270
649
  Return code to provide the feature engineering function:
271
650
 
272
- def feature_engineer(data_raw):
651
+ def {function_name}(data_raw):
273
652
  import pandas as pd
274
653
  import numpy as np
275
654
  ...
@@ -292,7 +671,7 @@ def make_feature_engineering_agent(
292
671
 
293
672
 
294
673
  """,
295
- input_variables=["recommeded_steps", "target_variable", "all_datasets_summary"]
674
+ input_variables=["recommeded_steps", "target_variable", "all_datasets_summary", "function_name"]
296
675
  )
297
676
 
298
677
  feature_engineering_agent = feature_engineering_prompt | llm | PythonOutputParser()
@@ -301,6 +680,7 @@ def make_feature_engineering_agent(
301
680
  "recommended_steps": state.get("recommended_steps"),
302
681
  "target_variable": state.get("target_variable"),
303
682
  "all_datasets_summary": all_datasets_summary_str,
683
+ "function_name": function_name
304
684
  })
305
685
 
306
686
  response = relocate_imports_inside_function(response)
@@ -318,12 +698,11 @@ def make_feature_engineering_agent(
318
698
  return {
319
699
  "feature_engineer_function": response,
320
700
  "feature_engineer_function_path": file_path,
321
- "feature_engineer_function_name": file_name_2,
701
+ "feature_engineer_file_name": file_name_2,
702
+ "feature_engineer_function_name": function_name,
322
703
  "all_datasets_summary": all_datasets_summary_str
323
704
  }
324
705
 
325
-
326
-
327
706
  def execute_feature_engineering_code(state):
328
707
  return node_func_execute_agent_code_on_data(
329
708
  state=state,
@@ -331,7 +710,7 @@ def make_feature_engineering_agent(
331
710
  result_key="data_engineered",
332
711
  error_key="feature_engineer_error",
333
712
  code_snippet_key="feature_engineer_function",
334
- agent_function_name="feature_engineer",
713
+ agent_function_name=state.get("feature_engineer_function_name"),
335
714
  pre_processing=lambda data: pd.DataFrame.from_dict(data),
336
715
  post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
337
716
  error_message_prefix="An error occurred during feature engineering: "
@@ -339,11 +718,13 @@ def make_feature_engineering_agent(
339
718
 
340
719
  def fix_feature_engineering_code(state: GraphState):
341
720
  feature_engineer_prompt = """
342
- You are a Feature Engineering Agent. Your job is to fix the feature_engineer() function that currently contains errors.
721
+ You are a Feature Engineering Agent. Your job is to fix the {function_name}() function that currently contains errors.
722
+
723
+ Provide only the corrected function definition for {function_name}().
343
724
 
344
- Provide only the corrected function definition.
725
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), that includes all imports inside the function.
345
726
 
346
- Broken code:
727
+ This is the broken code (please fix):
347
728
  {code_snippet}
348
729
 
349
730
  Last Known Error:
@@ -359,23 +740,25 @@ def make_feature_engineering_agent(
359
740
  agent_name=AGENT_NAME,
360
741
  log=log,
361
742
  file_path=state.get("feature_engineer_function_path"),
743
+ function_name=state.get("feature_engineer_function_name"),
362
744
  )
363
745
 
364
- def explain_feature_engineering_code(state: GraphState):
365
- return node_func_explain_agent_code(
746
+ # Final reporting node
747
+ def report_agent_outputs(state: GraphState):
748
+ return node_func_report_agent_outputs(
366
749
  state=state,
367
- code_snippet_key="feature_engineer_function",
750
+ keys_to_include=[
751
+ "recommended_steps",
752
+ "feature_engineer_function",
753
+ "feature_engineer_function_path",
754
+ "feature_engineer_function_name",
755
+ "feature_engineer_error",
756
+ ],
368
757
  result_key="messages",
369
- error_key="feature_engineer_error",
370
- llm=llm,
371
758
  role=AGENT_NAME,
372
- explanation_prompt_template="""
373
- Explain the feature engineering steps performed by this function. Keep the explanation clear and concise.\n\n# Feature Engineering Agent:\n\n{code}
374
- """,
375
- success_prefix="# Feature Engineering Agent:\n\n ",
376
- error_message="The Feature Engineering Agent encountered an error during feature engineering. Data could not be explained."
759
+ custom_title="Feature Engineering Agent Outputs"
377
760
  )
378
-
761
+
379
762
  # Create the graph
380
763
  node_functions = {
381
764
  "recommend_feature_engineering_steps": recommend_feature_engineering_steps,
@@ -383,7 +766,7 @@ def make_feature_engineering_agent(
383
766
  "create_feature_engineering_code": create_feature_engineering_code,
384
767
  "execute_feature_engineering_code": execute_feature_engineering_code,
385
768
  "fix_feature_engineering_code": fix_feature_engineering_code,
386
- "explain_feature_engineering_code": explain_feature_engineering_code
769
+ "report_agent_outputs": report_agent_outputs,
387
770
  }
388
771
 
389
772
  app = create_coding_agent_graph(
@@ -393,11 +776,13 @@ def make_feature_engineering_agent(
393
776
  create_code_node_name="create_feature_engineering_code",
394
777
  execute_code_node_name="execute_feature_engineering_code",
395
778
  fix_code_node_name="fix_feature_engineering_code",
396
- explain_code_node_name="explain_feature_engineering_code",
779
+ explain_code_node_name="report_agent_outputs",
397
780
  error_key="feature_engineer_error",
781
+ max_retries_key = "max_retries",
782
+ retry_count_key = "retry_count",
398
783
  human_in_the_loop=human_in_the_loop,
399
784
  human_review_node_name="human_review",
400
- checkpointer=MemorySaver() if human_in_the_loop else None,
785
+ checkpointer=MemorySaver(),
401
786
  bypass_recommended_steps=bypass_recommended_steps,
402
787
  bypass_explain_code=bypass_explain_code,
403
788
  )