ai-data-science-team 0.0.0.9006__py3-none-any.whl → 0.0.0.9008__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,18 +14,25 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
18
17
  import pandas as pd
19
18
 
20
- from ai_data_science_team.templates.agent_templates import(
19
+ from IPython.display import Markdown
20
+
21
+ from ai_data_science_team.templates import(
21
22
  node_func_execute_agent_code_on_data,
22
23
  node_func_human_review,
23
24
  node_func_fix_agent_code,
24
25
  node_func_explain_agent_code,
25
- create_coding_agent_graph
26
+ create_coding_agent_graph,
27
+ BaseAgent,
26
28
  )
27
29
  from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top
30
+ from ai_data_science_team.tools.regex import (
31
+ relocate_imports_inside_function,
32
+ add_comments_to_top,
33
+ format_agent_name,
34
+ format_recommended_steps
35
+ )
29
36
  from ai_data_science_team.tools.metadata import get_dataframe_summary
30
37
  from ai_data_science_team.tools.logging import log_ai_function
31
38
 
@@ -33,9 +40,386 @@ from ai_data_science_team.tools.logging import log_ai_function
33
40
  AGENT_NAME = "feature_engineering_agent"
34
41
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
35
42
 
43
+ # Class
44
+
45
+ class FeatureEngineeringAgent(BaseAgent):
46
+ """
47
+ Creates a feature engineering agent that can process datasets based on user-defined instructions or
48
+ default feature engineering steps. The agent generates a Python function to engineer features, executes it,
49
+ and logs the process, including code and errors. It is designed to facilitate reproducible and
50
+ customizable feature engineering workflows.
51
+
52
+ The agent can perform the following default feature engineering steps unless instructed otherwise:
53
+ - Convert features to appropriate data types
54
+ - Remove features that have unique values for each row
55
+ - Remove constant features
56
+ - Encode high-cardinality categoricals (threshold <= 5% of dataset) as 'other'
57
+ - One-hot-encode categorical variables
58
+ - Convert booleans to integer (1/0)
59
+ - Create datetime-based features (if applicable)
60
+ - Handle target variable encoding if specified
61
+ - Any user-provided instructions to add, remove, or modify steps
62
+
63
+ Parameters
64
+ ----------
65
+ model : langchain.llms.base.LLM
66
+ The language model used to generate the feature engineering function.
67
+ n_samples : int, optional
68
+ Number of samples used when summarizing the dataset. Defaults to 30.
69
+ log : bool, optional
70
+ Whether to log the generated code and errors. Defaults to False.
71
+ log_path : str, optional
72
+ Directory path for storing log files. Defaults to None.
73
+ file_name : str, optional
74
+ Name of the file for saving the generated response. Defaults to "feature_engineer.py".
75
+ function_name : str, optional
76
+ Name of the function for data visualization. Defaults to "feature_engineer".
77
+ overwrite : bool, optional
78
+ Whether to overwrite the log file if it exists. If False, a unique file name is created. Defaults to True.
79
+ human_in_the_loop : bool, optional
80
+ Enables user review of feature engineering instructions. Defaults to False.
81
+ bypass_recommended_steps : bool, optional
82
+ If True, skips the default recommended steps. Defaults to False.
83
+ bypass_explain_code : bool, optional
84
+ If True, skips the step that provides code explanations. Defaults to False.
85
+
86
+ Methods
87
+ -------
88
+ update_params(**kwargs)
89
+ Updates the agent's parameters and rebuilds the compiled state graph.
90
+ ainvoke_agent(
91
+ user_instructions: str,
92
+ data_raw: pd.DataFrame,
93
+ target_variable: str = None,
94
+ max_retries=3,
95
+ retry_count=0
96
+ )
97
+ Engineers features from the provided dataset asynchronously based on user instructions.
98
+ invoke_agent(
99
+ user_instructions: str,
100
+ data_raw: pd.DataFrame,
101
+ target_variable: str = None,
102
+ max_retries=3,
103
+ retry_count=0
104
+ )
105
+ Engineers features from the provided dataset synchronously based on user instructions.
106
+ explain_feature_engineering_steps()
107
+ Returns an explanation of the feature engineering steps performed by the agent.
108
+ get_log_summary()
109
+ Retrieves a summary of logged operations if logging is enabled.
110
+ get_data_engineered()
111
+ Retrieves the feature-engineered dataset as a pandas DataFrame.
112
+ get_data_raw()
113
+ Retrieves the raw dataset as a pandas DataFrame.
114
+ get_feature_engineer_function()
115
+ Retrieves the generated Python function used for feature engineering.
116
+ get_recommended_feature_engineering_steps()
117
+ Retrieves the agent's recommended feature engineering steps.
118
+ get_response()
119
+ Returns the response from the agent as a dictionary.
120
+ show()
121
+ Displays the agent's mermaid diagram.
122
+
123
+ Examples
124
+ --------
125
+ ```python
126
+ import pandas as pd
127
+ from langchain_openai import ChatOpenAI
128
+ from ai_data_science_team.agents import FeatureEngineeringAgent
129
+
130
+ llm = ChatOpenAI(model="gpt-4o-mini")
131
+
132
+ feature_agent = FeatureEngineeringAgent(
133
+ model=llm,
134
+ n_samples=30,
135
+ log=True,
136
+ log_path="logs",
137
+ human_in_the_loop=True
138
+ )
139
+
140
+ df = pd.read_csv("https://raw.githubusercontent.com/business-science/ai-data-science-team/refs/heads/master/data/churn_data.csv")
141
+
142
+ feature_agent.invoke_agent(
143
+ user_instructions="Also encode the 'PaymentMethod' column with one-hot encoding.",
144
+ data_raw=df,
145
+ target_variable="Churn",
146
+ max_retries=3,
147
+ retry_count=0
148
+ )
149
+
150
+ engineered_data = feature_agent.get_data_engineered()
151
+ response = feature_agent.get_response()
152
+ ```
153
+
154
+ Returns
155
+ -------
156
+ FeatureEngineeringAgent : langchain.graphs.CompiledStateGraph
157
+ A feature engineering agent implemented as a compiled state graph.
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ model,
163
+ n_samples=30,
164
+ log=False,
165
+ log_path=None,
166
+ file_name="feature_engineer.py",
167
+ function_name="feature_engineer",
168
+ overwrite=True,
169
+ human_in_the_loop=False,
170
+ bypass_recommended_steps=False,
171
+ bypass_explain_code=False
172
+ ):
173
+ self._params = {
174
+ "model": model,
175
+ "n_samples": n_samples,
176
+ "log": log,
177
+ "log_path": log_path,
178
+ "file_name": file_name,
179
+ "function_name": function_name,
180
+ "overwrite": overwrite,
181
+ "human_in_the_loop": human_in_the_loop,
182
+ "bypass_recommended_steps": bypass_recommended_steps,
183
+ "bypass_explain_code": bypass_explain_code
184
+ }
185
+ self._compiled_graph = self._make_compiled_graph()
186
+ self.response = None
187
+
188
+ def _make_compiled_graph(self):
189
+ """
190
+ Create the compiled graph for the feature engineering agent.
191
+ Running this method will reset the response to None.
192
+ """
193
+ self.response = None
194
+ return make_feature_engineering_agent(**self._params)
195
+
196
+ def update_params(self, **kwargs):
197
+ """
198
+ Updates the agent's parameters and rebuilds the compiled graph.
199
+ """
200
+ for k, v in kwargs.items():
201
+ self._params[k] = v
202
+ self._compiled_graph = self._make_compiled_graph()
203
+
204
+ def ainvoke_agent(
205
+ self,
206
+ data_raw: pd.DataFrame,
207
+ user_instructions: str=None,
208
+ target_variable: str = None,
209
+ max_retries=3,
210
+ retry_count=0,
211
+ **kwargs
212
+ ):
213
+ """
214
+ Asynchronously engineers features for the provided dataset.
215
+ The response is stored in the 'response' attribute.
216
+
217
+ Parameters
218
+ ----------
219
+ data_raw : pd.DataFrame
220
+ The raw dataset to be processed.
221
+ user_instructions : str, optional
222
+ Instructions for feature engineering.
223
+ target_variable : str, optional
224
+ The name of the target variable (if any).
225
+ max_retries : int
226
+ Maximum retry attempts.
227
+ retry_count : int
228
+ Current retry attempt count.
229
+ **kwargs
230
+ Additional keyword arguments to pass to ainvoke().
231
+
232
+ Returns
233
+ -------
234
+ None
235
+ """
236
+ response = self._compiled_graph.ainvoke({
237
+ "user_instructions": user_instructions,
238
+ "data_raw": data_raw.to_dict(),
239
+ "target_variable": target_variable,
240
+ "max_retries": max_retries,
241
+ "retry_count": retry_count
242
+ }, **kwargs)
243
+ self.response = response
244
+ return None
245
+
246
+ def invoke_agent(
247
+ self,
248
+ data_raw: pd.DataFrame,
249
+ user_instructions: str=None,
250
+ target_variable: str = None,
251
+ max_retries=3,
252
+ retry_count=0,
253
+ **kwargs
254
+ ):
255
+ """
256
+ Synchronously engineers features for the provided dataset.
257
+ The response is stored in the 'response' attribute.
258
+
259
+ Parameters
260
+ ----------
261
+ data_raw : pd.DataFrame
262
+ The raw dataset to be processed.
263
+ user_instructions : str
264
+ Instructions for feature engineering agent.
265
+ target_variable : str, optional
266
+ The name of the target variable (if any).
267
+ max_retries : int
268
+ Maximum retry attempts.
269
+ retry_count : int
270
+ Current retry attempt count.
271
+ **kwargs
272
+ Additional keyword arguments to pass to invoke().
273
+
274
+ Returns
275
+ -------
276
+ None
277
+ """
278
+ response = self._compiled_graph.invoke({
279
+ "user_instructions": user_instructions,
280
+ "data_raw": data_raw.to_dict(),
281
+ "target_variable": target_variable,
282
+ "max_retries": max_retries,
283
+ "retry_count": retry_count
284
+ }, **kwargs)
285
+ self.response = response
286
+ return None
287
+
288
+ def explain_feature_engineering_steps(self):
289
+ """
290
+ Provides an explanation of the feature engineering steps performed by the agent.
291
+
292
+ Returns
293
+ -------
294
+ str or list
295
+ Explanation of the feature engineering steps.
296
+ """
297
+ if self.response:
298
+ return self.response.get("messages", [])
299
+ return []
300
+
301
+ def get_log_summary(self, markdown=False):
302
+ """
303
+ Logs a summary of the agent's operations, if logging is enabled.
304
+
305
+ Parameters
306
+ ----------
307
+ markdown : bool, optional
308
+ If True, returns Markdown-formatted output.
309
+
310
+ Returns
311
+ -------
312
+ str or None
313
+ Summary of logs, or None if not available.
314
+ """
315
+ if self.response and self.response.get("feature_engineer_function_path"):
316
+ log_details = f"Log Path: {self.response.get('feature_engineer_function_path')}"
317
+ if markdown:
318
+ return Markdown(log_details)
319
+ else:
320
+ return log_details
321
+ return None
322
+
323
+ def get_data_engineered(self):
324
+ """
325
+ Retrieves the engineered data stored after running invoke/ainvoke.
326
+
327
+ Returns
328
+ -------
329
+ pd.DataFrame or None
330
+ The engineered dataset as a pandas DataFrame.
331
+ """
332
+ if self.response and "data_engineered" in self.response:
333
+ return pd.DataFrame(self.response["data_engineered"])
334
+ return None
335
+
336
+ def get_data_raw(self):
337
+ """
338
+ Retrieves the raw data.
339
+
340
+ Returns
341
+ -------
342
+ pd.DataFrame or None
343
+ The raw dataset as a pandas DataFrame if available.
344
+ """
345
+ if self.response and "data_raw" in self.response:
346
+ return pd.DataFrame(self.response["data_raw"])
347
+ return None
348
+
349
+ def get_feature_engineer_function(self, markdown=False):
350
+ """
351
+ Retrieves the feature engineering function generated by the agent.
352
+
353
+ Parameters
354
+ ----------
355
+ markdown : bool, optional
356
+ If True, returns the function in Markdown code block format.
357
+
358
+ Returns
359
+ -------
360
+ str or None
361
+ The Python function code, or None if unavailable.
362
+ """
363
+ if self.response and "feature_engineer_function" in self.response:
364
+ code = self.response["feature_engineer_function"]
365
+ if markdown:
366
+ return Markdown(f"```python\n{code}\n```")
367
+ return code
368
+ return None
369
+
370
+ def get_recommended_feature_engineering_steps(self, markdown=False):
371
+ """
372
+ Retrieves the agent's recommended feature engineering steps.
373
+
374
+ Parameters
375
+ ----------
376
+ markdown : bool, optional
377
+ If True, returns the steps in Markdown format.
378
+
379
+ Returns
380
+ -------
381
+ str or None
382
+ The recommended steps, or None if not available.
383
+ """
384
+ if self.response and "recommended_steps" in self.response:
385
+ steps = self.response["recommended_steps"]
386
+ if markdown:
387
+ return Markdown(steps)
388
+ return steps
389
+ return None
390
+
391
+ def get_response(self):
392
+ """
393
+ Returns the agent's full response dictionary.
394
+
395
+ Returns
396
+ -------
397
+ dict or None
398
+ The response dictionary if available, otherwise None.
399
+ """
400
+ return self.response
401
+
402
+ def show(self):
403
+ """
404
+ Displays the agent's mermaid diagram for visual inspection of the compiled graph.
405
+ """
406
+ return self._compiled_graph.show()
407
+
408
+
36
409
  # * Feature Engineering Agent
37
410
 
38
- def make_feature_engineering_agent(model, log=False, log_path=None, overwrite = True, human_in_the_loop=False, bypass_recommended_steps=False, bypass_explain_code=False):
411
+ def make_feature_engineering_agent(
412
+ model,
413
+ n_samples=30,
414
+ log=False,
415
+ log_path=None,
416
+ file_name="feature_engineer.py",
417
+ function_name="feature_engineer",
418
+ overwrite = True,
419
+ human_in_the_loop=False,
420
+ bypass_recommended_steps=False,
421
+ bypass_explain_code=False,
422
+ ):
39
423
  """
40
424
  Creates a feature engineering agent that can be run on a dataset. The agent applies various feature engineering
41
425
  techniques, such as encoding categorical variables, scaling numeric variables, creating interaction terms,
@@ -61,11 +445,19 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
61
445
  ----------
62
446
  model : langchain.llms.base.LLM
63
447
  The language model to use to generate code.
448
+ n_samples : int, optional
449
+ The number of data samples to use for generating the feature engineering code. Defaults to 30.
450
+ If you get an error due to maximum tokens, try reducing this number.
451
+ > "This model's maximum context length is 128000 tokens. However, your messages resulted in 333858 tokens. Please reduce the length of the messages."
64
452
  log : bool, optional
65
453
  Whether or not to log the code generated and any errors that occur.
66
454
  Defaults to False.
67
455
  log_path : str, optional
68
456
  The path to the directory where the log files should be stored. Defaults to "logs/".
457
+ file_name : str, optional
458
+ The name of the file to save the log to. Defaults to "feature_engineer.py".
459
+ function_name : str, optional
460
+ The name of the function that will be generated. Defaults to "feature_engineer".
69
461
  overwrite : bool, optional
70
462
  Whether or not to overwrite the log file if it already exists. If False, a unique file name will be created.
71
463
  Defaults to True.
@@ -102,10 +494,15 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
102
494
 
103
495
  Returns
104
496
  -------
105
- app : langchain.graphs.StateGraph
497
+ app : langchain.graphs.CompiledStateGraph
106
498
  The feature engineering agent as a state graph.
107
499
  """
108
500
  llm = model
501
+
502
+ # Human in th loop requires recommended steps
503
+ if bypass_recommended_steps and human_in_the_loop:
504
+ bypass_recommended_steps = False
505
+ print("Bypass recommended steps set to False to enable human in the loop.")
109
506
 
110
507
  # Setup Log Directory
111
508
  if log:
@@ -125,6 +522,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
125
522
  all_datasets_summary: str
126
523
  feature_engineer_function: str
127
524
  feature_engineer_function_path: str
525
+ feature_engineer_file_name: str
128
526
  feature_engineer_function_name: str
129
527
  feature_engineer_error: str
130
528
  max_retries: int
@@ -135,7 +533,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
135
533
  Recommend a series of feature engineering steps based on the input data.
136
534
  These recommended steps will be appended to the user_instructions.
137
535
  """
138
- print("---FEATURE ENGINEERING AGENT----")
536
+ print(format_agent_name(AGENT_NAME))
139
537
  print(" * RECOMMEND FEATURE ENGINEERING STEPS")
140
538
 
141
539
  # Prompt to get recommended steps from the LLM
@@ -182,6 +580,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
182
580
 
183
581
  Avoid these:
184
582
  1. Do not include steps to save files.
583
+ 2. Do not include unrelated user instructions that are not related to the feature engineering.
185
584
  """,
186
585
  input_variables=["user_instructions", "recommended_steps", "all_datasets_summary"]
187
586
  )
@@ -189,7 +588,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
189
588
  data_raw = state.get("data_raw")
190
589
  df = pd.DataFrame.from_dict(data_raw)
191
590
 
192
- all_datasets_summary = get_dataframe_summary([df])
591
+ all_datasets_summary = get_dataframe_summary([df], n_sample=n_samples)
193
592
 
194
593
  all_datasets_summary_str = "\n\n".join(all_datasets_summary)
195
594
 
@@ -201,29 +600,57 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
201
600
  })
202
601
 
203
602
  return {
204
- "recommended_steps": "\n\n# Recommended Feature Engineering Steps:\n" + recommended_steps.content.strip(),
603
+ "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended Feature Engineering Steps:"),
205
604
  "all_datasets_summary": all_datasets_summary_str
206
605
  }
207
606
 
208
- def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "create_feature_engineering_code"]]:
209
- return node_func_human_review(
210
- state=state,
211
- prompt_text="Is the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}",
212
- yes_goto="create_feature_engineering_code",
213
- no_goto="recommend_feature_engineering_steps",
214
- user_instructions_key="user_instructions",
215
- recommended_steps_key="recommended_steps"
216
- )
607
+ # Human Review
608
+
609
+ prompt_text_human_review = "Are the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
610
+
611
+ if not bypass_explain_code:
612
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "explain_feature_engineering_code"]]:
613
+ return node_func_human_review(
614
+ state=state,
615
+ prompt_text=prompt_text_human_review,
616
+ yes_goto= 'explain_feature_engineering_code',
617
+ no_goto="recommend_feature_engineering_steps",
618
+ user_instructions_key="user_instructions",
619
+ recommended_steps_key="recommended_steps",
620
+ code_snippet_key="feature_engineer_function",
621
+ )
622
+ else:
623
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "__end__"]]:
624
+ return node_func_human_review(
625
+ state=state,
626
+ prompt_text=prompt_text_human_review,
627
+ yes_goto= '__end__',
628
+ no_goto="recommend_feature_engineering_steps",
629
+ user_instructions_key="user_instructions",
630
+ recommended_steps_key="recommended_steps",
631
+ code_snippet_key="feature_engineer_function",
632
+ )
217
633
 
218
634
  def create_feature_engineering_code(state: GraphState):
219
635
  if bypass_recommended_steps:
220
- print("---FEATURE ENGINEERING AGENT----")
636
+ print(format_agent_name(AGENT_NAME))
637
+
638
+ data_raw = state.get("data_raw")
639
+ df = pd.DataFrame.from_dict(data_raw)
640
+
641
+ all_datasets_summary = get_dataframe_summary([df], n_sample=n_samples)
642
+
643
+ all_datasets_summary_str = "\n\n".join(all_datasets_summary)
644
+
645
+ else:
646
+ all_datasets_summary_str = state.get("all_datasets_summary")
647
+
221
648
  print(" * CREATE FEATURE ENGINEERING CODE")
222
649
 
223
650
  feature_engineering_prompt = PromptTemplate(
224
651
  template="""
225
652
 
226
- You are a Feature Engineering Agent. Your job is to create a feature_engineer() function that can be run on the data provided using the following recommended steps.
653
+ You are a Feature Engineering Agent. Your job is to create a {function_name}() function that can be run on the data provided using the following recommended steps.
227
654
 
228
655
  Recommended Steps:
229
656
  {recommended_steps}
@@ -237,11 +664,11 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
237
664
 
238
665
  You can use Pandas, Numpy, and Scikit Learn libraries to feature engineer the data.
239
666
 
240
- Return Python code in ```python``` format with a single function definition, feature_engineer(data_raw), including all imports inside the function.
667
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), including all imports inside the function.
241
668
 
242
669
  Return code to provide the feature engineering function:
243
670
 
244
- def feature_engineer(data_raw):
671
+ def {function_name}(data_raw):
245
672
  import pandas as pd
246
673
  import numpy as np
247
674
  ...
@@ -264,7 +691,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
264
691
 
265
692
 
266
693
  """,
267
- input_variables=["recommeded_steps", "target_variable", "all_datasets_summary"]
694
+ input_variables=["recommeded_steps", "target_variable", "all_datasets_summary", "function_name"]
268
695
  )
269
696
 
270
697
  feature_engineering_agent = feature_engineering_prompt | llm | PythonOutputParser()
@@ -272,16 +699,17 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
272
699
  response = feature_engineering_agent.invoke({
273
700
  "recommended_steps": state.get("recommended_steps"),
274
701
  "target_variable": state.get("target_variable"),
275
- "all_datasets_summary": state.get("all_datasets_summary"),
702
+ "all_datasets_summary": all_datasets_summary_str,
703
+ "function_name": function_name
276
704
  })
277
705
 
278
706
  response = relocate_imports_inside_function(response)
279
707
  response = add_comments_to_top(response, agent_name=AGENT_NAME)
280
708
 
281
709
  # For logging: store the code generated
282
- file_path, file_name = log_ai_function(
710
+ file_path, file_name_2 = log_ai_function(
283
711
  response=response,
284
- file_name="feature_engineer.py",
712
+ file_name=file_name,
285
713
  log=log,
286
714
  log_path=log_path,
287
715
  overwrite=overwrite
@@ -290,11 +718,11 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
290
718
  return {
291
719
  "feature_engineer_function": response,
292
720
  "feature_engineer_function_path": file_path,
293
- "feature_engineer_function_name": file_name
721
+ "feature_engineer_file_name": file_name_2,
722
+ "feature_engineer_function_name": function_name,
723
+ "all_datasets_summary": all_datasets_summary_str
294
724
  }
295
725
 
296
-
297
-
298
726
  def execute_feature_engineering_code(state):
299
727
  return node_func_execute_agent_code_on_data(
300
728
  state=state,
@@ -302,7 +730,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
302
730
  result_key="data_engineered",
303
731
  error_key="feature_engineer_error",
304
732
  code_snippet_key="feature_engineer_function",
305
- agent_function_name="feature_engineer",
733
+ agent_function_name=state.get("feature_engineer_function_name"),
306
734
  pre_processing=lambda data: pd.DataFrame.from_dict(data),
307
735
  post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
308
736
  error_message_prefix="An error occurred during feature engineering: "
@@ -310,11 +738,13 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
310
738
 
311
739
  def fix_feature_engineering_code(state: GraphState):
312
740
  feature_engineer_prompt = """
313
- You are a Feature Engineering Agent. Your job is to fix the feature_engineer() function that currently contains errors.
741
+ You are a Feature Engineering Agent. Your job is to fix the {function_name}() function that currently contains errors.
742
+
743
+ Provide only the corrected function definition for {function_name}().
314
744
 
315
- Provide only the corrected function definition.
745
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), that includes all imports inside the function.
316
746
 
317
- Broken code:
747
+ This is the broken code (please fix):
318
748
  {code_snippet}
319
749
 
320
750
  Last Known Error:
@@ -330,6 +760,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
330
760
  agent_name=AGENT_NAME,
331
761
  log=log,
332
762
  file_path=state.get("feature_engineer_function_path"),
763
+ function_name=state.get("feature_engineer_function_name"),
333
764
  )
334
765
 
335
766
  def explain_feature_engineering_code(state: GraphState):
@@ -366,9 +797,11 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
366
797
  fix_code_node_name="fix_feature_engineering_code",
367
798
  explain_code_node_name="explain_feature_engineering_code",
368
799
  error_key="feature_engineer_error",
800
+ max_retries_key = "max_retries",
801
+ retry_count_key = "retry_count",
369
802
  human_in_the_loop=human_in_the_loop,
370
803
  human_review_node_name="human_review",
371
- checkpointer=MemorySaver() if human_in_the_loop else None,
804
+ checkpointer=MemorySaver(),
372
805
  bypass_recommended_steps=bypass_recommended_steps,
373
806
  bypass_explain_code=bypass_explain_code,
374
807
  )