ai-data-science-team 0.0.0.9006__py3-none-any.whl → 0.0.0.9008__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,18 +14,25 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
18
17
  import pandas as pd
19
18
 
20
- from ai_data_science_team.templates.agent_templates import(
19
+ from IPython.display import Markdown
20
+
21
+ from ai_data_science_team.templates import(
21
22
  node_func_execute_agent_code_on_data,
22
23
  node_func_human_review,
23
24
  node_func_fix_agent_code,
24
25
  node_func_explain_agent_code,
25
- create_coding_agent_graph
26
+ create_coding_agent_graph,
27
+ BaseAgent,
26
28
  )
27
29
  from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top
30
+ from ai_data_science_team.tools.regex import (
31
+ relocate_imports_inside_function,
32
+ add_comments_to_top,
33
+ format_agent_name,
34
+ format_recommended_steps
35
+ )
29
36
  from ai_data_science_team.tools.metadata import get_dataframe_summary
30
37
  from ai_data_science_team.tools.logging import log_ai_function
31
38
 
@@ -33,9 +40,386 @@ from ai_data_science_team.tools.logging import log_ai_function
33
40
  AGENT_NAME = "feature_engineering_agent"
34
41
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
35
42
 
43
+ # Class
44
+
45
+ class FeatureEngineeringAgent(BaseAgent):
46
+ """
47
+ Creates a feature engineering agent that can process datasets based on user-defined instructions or
48
+ default feature engineering steps. The agent generates a Python function to engineer features, executes it,
49
+ and logs the process, including code and errors. It is designed to facilitate reproducible and
50
+ customizable feature engineering workflows.
51
+
52
+ The agent can perform the following default feature engineering steps unless instructed otherwise:
53
+ - Convert features to appropriate data types
54
+ - Remove features that have unique values for each row
55
+ - Remove constant features
56
+ - Encode high-cardinality categoricals (threshold <= 5% of dataset) as 'other'
57
+ - One-hot-encode categorical variables
58
+ - Convert booleans to integer (1/0)
59
+ - Create datetime-based features (if applicable)
60
+ - Handle target variable encoding if specified
61
+ - Any user-provided instructions to add, remove, or modify steps
62
+
63
+ Parameters
64
+ ----------
65
+ model : langchain.llms.base.LLM
66
+ The language model used to generate the feature engineering function.
67
+ n_samples : int, optional
68
+ Number of samples used when summarizing the dataset. Defaults to 30.
69
+ log : bool, optional
70
+ Whether to log the generated code and errors. Defaults to False.
71
+ log_path : str, optional
72
+ Directory path for storing log files. Defaults to None.
73
+ file_name : str, optional
74
+ Name of the file for saving the generated response. Defaults to "feature_engineer.py".
75
+ function_name : str, optional
76
+ Name of the function for data visualization. Defaults to "feature_engineer".
77
+ overwrite : bool, optional
78
+ Whether to overwrite the log file if it exists. If False, a unique file name is created. Defaults to True.
79
+ human_in_the_loop : bool, optional
80
+ Enables user review of feature engineering instructions. Defaults to False.
81
+ bypass_recommended_steps : bool, optional
82
+ If True, skips the default recommended steps. Defaults to False.
83
+ bypass_explain_code : bool, optional
84
+ If True, skips the step that provides code explanations. Defaults to False.
85
+
86
+ Methods
87
+ -------
88
+ update_params(**kwargs)
89
+ Updates the agent's parameters and rebuilds the compiled state graph.
90
+ ainvoke_agent(
91
+ user_instructions: str,
92
+ data_raw: pd.DataFrame,
93
+ target_variable: str = None,
94
+ max_retries=3,
95
+ retry_count=0
96
+ )
97
+ Engineers features from the provided dataset asynchronously based on user instructions.
98
+ invoke_agent(
99
+ user_instructions: str,
100
+ data_raw: pd.DataFrame,
101
+ target_variable: str = None,
102
+ max_retries=3,
103
+ retry_count=0
104
+ )
105
+ Engineers features from the provided dataset synchronously based on user instructions.
106
+ explain_feature_engineering_steps()
107
+ Returns an explanation of the feature engineering steps performed by the agent.
108
+ get_log_summary()
109
+ Retrieves a summary of logged operations if logging is enabled.
110
+ get_data_engineered()
111
+ Retrieves the feature-engineered dataset as a pandas DataFrame.
112
+ get_data_raw()
113
+ Retrieves the raw dataset as a pandas DataFrame.
114
+ get_feature_engineer_function()
115
+ Retrieves the generated Python function used for feature engineering.
116
+ get_recommended_feature_engineering_steps()
117
+ Retrieves the agent's recommended feature engineering steps.
118
+ get_response()
119
+ Returns the response from the agent as a dictionary.
120
+ show()
121
+ Displays the agent's mermaid diagram.
122
+
123
+ Examples
124
+ --------
125
+ ```python
126
+ import pandas as pd
127
+ from langchain_openai import ChatOpenAI
128
+ from ai_data_science_team.agents import FeatureEngineeringAgent
129
+
130
+ llm = ChatOpenAI(model="gpt-4o-mini")
131
+
132
+ feature_agent = FeatureEngineeringAgent(
133
+ model=llm,
134
+ n_samples=30,
135
+ log=True,
136
+ log_path="logs",
137
+ human_in_the_loop=True
138
+ )
139
+
140
+ df = pd.read_csv("https://raw.githubusercontent.com/business-science/ai-data-science-team/refs/heads/master/data/churn_data.csv")
141
+
142
+ feature_agent.invoke_agent(
143
+ user_instructions="Also encode the 'PaymentMethod' column with one-hot encoding.",
144
+ data_raw=df,
145
+ target_variable="Churn",
146
+ max_retries=3,
147
+ retry_count=0
148
+ )
149
+
150
+ engineered_data = feature_agent.get_data_engineered()
151
+ response = feature_agent.get_response()
152
+ ```
153
+
154
+ Returns
155
+ -------
156
+ FeatureEngineeringAgent : langchain.graphs.CompiledStateGraph
157
+ A feature engineering agent implemented as a compiled state graph.
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ model,
163
+ n_samples=30,
164
+ log=False,
165
+ log_path=None,
166
+ file_name="feature_engineer.py",
167
+ function_name="feature_engineer",
168
+ overwrite=True,
169
+ human_in_the_loop=False,
170
+ bypass_recommended_steps=False,
171
+ bypass_explain_code=False
172
+ ):
173
+ self._params = {
174
+ "model": model,
175
+ "n_samples": n_samples,
176
+ "log": log,
177
+ "log_path": log_path,
178
+ "file_name": file_name,
179
+ "function_name": function_name,
180
+ "overwrite": overwrite,
181
+ "human_in_the_loop": human_in_the_loop,
182
+ "bypass_recommended_steps": bypass_recommended_steps,
183
+ "bypass_explain_code": bypass_explain_code
184
+ }
185
+ self._compiled_graph = self._make_compiled_graph()
186
+ self.response = None
187
+
188
+ def _make_compiled_graph(self):
189
+ """
190
+ Create the compiled graph for the feature engineering agent.
191
+ Running this method will reset the response to None.
192
+ """
193
+ self.response = None
194
+ return make_feature_engineering_agent(**self._params)
195
+
196
+ def update_params(self, **kwargs):
197
+ """
198
+ Updates the agent's parameters and rebuilds the compiled graph.
199
+ """
200
+ for k, v in kwargs.items():
201
+ self._params[k] = v
202
+ self._compiled_graph = self._make_compiled_graph()
203
+
204
+ def ainvoke_agent(
205
+ self,
206
+ data_raw: pd.DataFrame,
207
+ user_instructions: str=None,
208
+ target_variable: str = None,
209
+ max_retries=3,
210
+ retry_count=0,
211
+ **kwargs
212
+ ):
213
+ """
214
+ Asynchronously engineers features for the provided dataset.
215
+ The response is stored in the 'response' attribute.
216
+
217
+ Parameters
218
+ ----------
219
+ data_raw : pd.DataFrame
220
+ The raw dataset to be processed.
221
+ user_instructions : str, optional
222
+ Instructions for feature engineering.
223
+ target_variable : str, optional
224
+ The name of the target variable (if any).
225
+ max_retries : int
226
+ Maximum retry attempts.
227
+ retry_count : int
228
+ Current retry attempt count.
229
+ **kwargs
230
+ Additional keyword arguments to pass to ainvoke().
231
+
232
+ Returns
233
+ -------
234
+ None
235
+ """
236
+ response = self._compiled_graph.ainvoke({
237
+ "user_instructions": user_instructions,
238
+ "data_raw": data_raw.to_dict(),
239
+ "target_variable": target_variable,
240
+ "max_retries": max_retries,
241
+ "retry_count": retry_count
242
+ }, **kwargs)
243
+ self.response = response
244
+ return None
245
+
246
+ def invoke_agent(
247
+ self,
248
+ data_raw: pd.DataFrame,
249
+ user_instructions: str=None,
250
+ target_variable: str = None,
251
+ max_retries=3,
252
+ retry_count=0,
253
+ **kwargs
254
+ ):
255
+ """
256
+ Synchronously engineers features for the provided dataset.
257
+ The response is stored in the 'response' attribute.
258
+
259
+ Parameters
260
+ ----------
261
+ data_raw : pd.DataFrame
262
+ The raw dataset to be processed.
263
+ user_instructions : str
264
+ Instructions for feature engineering agent.
265
+ target_variable : str, optional
266
+ The name of the target variable (if any).
267
+ max_retries : int
268
+ Maximum retry attempts.
269
+ retry_count : int
270
+ Current retry attempt count.
271
+ **kwargs
272
+ Additional keyword arguments to pass to invoke().
273
+
274
+ Returns
275
+ -------
276
+ None
277
+ """
278
+ response = self._compiled_graph.invoke({
279
+ "user_instructions": user_instructions,
280
+ "data_raw": data_raw.to_dict(),
281
+ "target_variable": target_variable,
282
+ "max_retries": max_retries,
283
+ "retry_count": retry_count
284
+ }, **kwargs)
285
+ self.response = response
286
+ return None
287
+
288
+ def explain_feature_engineering_steps(self):
289
+ """
290
+ Provides an explanation of the feature engineering steps performed by the agent.
291
+
292
+ Returns
293
+ -------
294
+ str or list
295
+ Explanation of the feature engineering steps.
296
+ """
297
+ if self.response:
298
+ return self.response.get("messages", [])
299
+ return []
300
+
301
+ def get_log_summary(self, markdown=False):
302
+ """
303
+ Logs a summary of the agent's operations, if logging is enabled.
304
+
305
+ Parameters
306
+ ----------
307
+ markdown : bool, optional
308
+ If True, returns Markdown-formatted output.
309
+
310
+ Returns
311
+ -------
312
+ str or None
313
+ Summary of logs, or None if not available.
314
+ """
315
+ if self.response and self.response.get("feature_engineer_function_path"):
316
+ log_details = f"Log Path: {self.response.get('feature_engineer_function_path')}"
317
+ if markdown:
318
+ return Markdown(log_details)
319
+ else:
320
+ return log_details
321
+ return None
322
+
323
+ def get_data_engineered(self):
324
+ """
325
+ Retrieves the engineered data stored after running invoke/ainvoke.
326
+
327
+ Returns
328
+ -------
329
+ pd.DataFrame or None
330
+ The engineered dataset as a pandas DataFrame.
331
+ """
332
+ if self.response and "data_engineered" in self.response:
333
+ return pd.DataFrame(self.response["data_engineered"])
334
+ return None
335
+
336
+ def get_data_raw(self):
337
+ """
338
+ Retrieves the raw data.
339
+
340
+ Returns
341
+ -------
342
+ pd.DataFrame or None
343
+ The raw dataset as a pandas DataFrame if available.
344
+ """
345
+ if self.response and "data_raw" in self.response:
346
+ return pd.DataFrame(self.response["data_raw"])
347
+ return None
348
+
349
+ def get_feature_engineer_function(self, markdown=False):
350
+ """
351
+ Retrieves the feature engineering function generated by the agent.
352
+
353
+ Parameters
354
+ ----------
355
+ markdown : bool, optional
356
+ If True, returns the function in Markdown code block format.
357
+
358
+ Returns
359
+ -------
360
+ str or None
361
+ The Python function code, or None if unavailable.
362
+ """
363
+ if self.response and "feature_engineer_function" in self.response:
364
+ code = self.response["feature_engineer_function"]
365
+ if markdown:
366
+ return Markdown(f"```python\n{code}\n```")
367
+ return code
368
+ return None
369
+
370
+ def get_recommended_feature_engineering_steps(self, markdown=False):
371
+ """
372
+ Retrieves the agent's recommended feature engineering steps.
373
+
374
+ Parameters
375
+ ----------
376
+ markdown : bool, optional
377
+ If True, returns the steps in Markdown format.
378
+
379
+ Returns
380
+ -------
381
+ str or None
382
+ The recommended steps, or None if not available.
383
+ """
384
+ if self.response and "recommended_steps" in self.response:
385
+ steps = self.response["recommended_steps"]
386
+ if markdown:
387
+ return Markdown(steps)
388
+ return steps
389
+ return None
390
+
391
+ def get_response(self):
392
+ """
393
+ Returns the agent's full response dictionary.
394
+
395
+ Returns
396
+ -------
397
+ dict or None
398
+ The response dictionary if available, otherwise None.
399
+ """
400
+ return self.response
401
+
402
+ def show(self):
403
+ """
404
+ Displays the agent's mermaid diagram for visual inspection of the compiled graph.
405
+ """
406
+ return self._compiled_graph.show()
407
+
408
+
36
409
  # * Feature Engineering Agent
37
410
 
38
- def make_feature_engineering_agent(model, log=False, log_path=None, overwrite = True, human_in_the_loop=False, bypass_recommended_steps=False, bypass_explain_code=False):
411
+ def make_feature_engineering_agent(
412
+ model,
413
+ n_samples=30,
414
+ log=False,
415
+ log_path=None,
416
+ file_name="feature_engineer.py",
417
+ function_name="feature_engineer",
418
+ overwrite = True,
419
+ human_in_the_loop=False,
420
+ bypass_recommended_steps=False,
421
+ bypass_explain_code=False,
422
+ ):
39
423
  """
40
424
  Creates a feature engineering agent that can be run on a dataset. The agent applies various feature engineering
41
425
  techniques, such as encoding categorical variables, scaling numeric variables, creating interaction terms,
@@ -61,11 +445,19 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
61
445
  ----------
62
446
  model : langchain.llms.base.LLM
63
447
  The language model to use to generate code.
448
+ n_samples : int, optional
449
+ The number of data samples to use for generating the feature engineering code. Defaults to 30.
450
+ If you get an error due to maximum tokens, try reducing this number.
451
+ > "This model's maximum context length is 128000 tokens. However, your messages resulted in 333858 tokens. Please reduce the length of the messages."
64
452
  log : bool, optional
65
453
  Whether or not to log the code generated and any errors that occur.
66
454
  Defaults to False.
67
455
  log_path : str, optional
68
456
  The path to the directory where the log files should be stored. Defaults to "logs/".
457
+ file_name : str, optional
458
+ The name of the file to save the log to. Defaults to "feature_engineer.py".
459
+ function_name : str, optional
460
+ The name of the function that will be generated. Defaults to "feature_engineer".
69
461
  overwrite : bool, optional
70
462
  Whether or not to overwrite the log file if it already exists. If False, a unique file name will be created.
71
463
  Defaults to True.
@@ -102,10 +494,15 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
102
494
 
103
495
  Returns
104
496
  -------
105
- app : langchain.graphs.StateGraph
497
+ app : langchain.graphs.CompiledStateGraph
106
498
  The feature engineering agent as a state graph.
107
499
  """
108
500
  llm = model
501
+
502
+ # Human in th loop requires recommended steps
503
+ if bypass_recommended_steps and human_in_the_loop:
504
+ bypass_recommended_steps = False
505
+ print("Bypass recommended steps set to False to enable human in the loop.")
109
506
 
110
507
  # Setup Log Directory
111
508
  if log:
@@ -125,6 +522,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
125
522
  all_datasets_summary: str
126
523
  feature_engineer_function: str
127
524
  feature_engineer_function_path: str
525
+ feature_engineer_file_name: str
128
526
  feature_engineer_function_name: str
129
527
  feature_engineer_error: str
130
528
  max_retries: int
@@ -135,7 +533,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
135
533
  Recommend a series of feature engineering steps based on the input data.
136
534
  These recommended steps will be appended to the user_instructions.
137
535
  """
138
- print("---FEATURE ENGINEERING AGENT----")
536
+ print(format_agent_name(AGENT_NAME))
139
537
  print(" * RECOMMEND FEATURE ENGINEERING STEPS")
140
538
 
141
539
  # Prompt to get recommended steps from the LLM
@@ -182,6 +580,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
182
580
 
183
581
  Avoid these:
184
582
  1. Do not include steps to save files.
583
+ 2. Do not include unrelated user instructions that are not related to the feature engineering.
185
584
  """,
186
585
  input_variables=["user_instructions", "recommended_steps", "all_datasets_summary"]
187
586
  )
@@ -189,7 +588,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
189
588
  data_raw = state.get("data_raw")
190
589
  df = pd.DataFrame.from_dict(data_raw)
191
590
 
192
- all_datasets_summary = get_dataframe_summary([df])
591
+ all_datasets_summary = get_dataframe_summary([df], n_sample=n_samples)
193
592
 
194
593
  all_datasets_summary_str = "\n\n".join(all_datasets_summary)
195
594
 
@@ -201,29 +600,57 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
201
600
  })
202
601
 
203
602
  return {
204
- "recommended_steps": "\n\n# Recommended Feature Engineering Steps:\n" + recommended_steps.content.strip(),
603
+ "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended Feature Engineering Steps:"),
205
604
  "all_datasets_summary": all_datasets_summary_str
206
605
  }
207
606
 
208
- def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "create_feature_engineering_code"]]:
209
- return node_func_human_review(
210
- state=state,
211
- prompt_text="Is the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}",
212
- yes_goto="create_feature_engineering_code",
213
- no_goto="recommend_feature_engineering_steps",
214
- user_instructions_key="user_instructions",
215
- recommended_steps_key="recommended_steps"
216
- )
607
+ # Human Review
608
+
609
+ prompt_text_human_review = "Are the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
610
+
611
+ if not bypass_explain_code:
612
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "explain_feature_engineering_code"]]:
613
+ return node_func_human_review(
614
+ state=state,
615
+ prompt_text=prompt_text_human_review,
616
+ yes_goto= 'explain_feature_engineering_code',
617
+ no_goto="recommend_feature_engineering_steps",
618
+ user_instructions_key="user_instructions",
619
+ recommended_steps_key="recommended_steps",
620
+ code_snippet_key="feature_engineer_function",
621
+ )
622
+ else:
623
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "__end__"]]:
624
+ return node_func_human_review(
625
+ state=state,
626
+ prompt_text=prompt_text_human_review,
627
+ yes_goto= '__end__',
628
+ no_goto="recommend_feature_engineering_steps",
629
+ user_instructions_key="user_instructions",
630
+ recommended_steps_key="recommended_steps",
631
+ code_snippet_key="feature_engineer_function",
632
+ )
217
633
 
218
634
  def create_feature_engineering_code(state: GraphState):
219
635
  if bypass_recommended_steps:
220
- print("---FEATURE ENGINEERING AGENT----")
636
+ print(format_agent_name(AGENT_NAME))
637
+
638
+ data_raw = state.get("data_raw")
639
+ df = pd.DataFrame.from_dict(data_raw)
640
+
641
+ all_datasets_summary = get_dataframe_summary([df], n_sample=n_samples)
642
+
643
+ all_datasets_summary_str = "\n\n".join(all_datasets_summary)
644
+
645
+ else:
646
+ all_datasets_summary_str = state.get("all_datasets_summary")
647
+
221
648
  print(" * CREATE FEATURE ENGINEERING CODE")
222
649
 
223
650
  feature_engineering_prompt = PromptTemplate(
224
651
  template="""
225
652
 
226
- You are a Feature Engineering Agent. Your job is to create a feature_engineer() function that can be run on the data provided using the following recommended steps.
653
+ You are a Feature Engineering Agent. Your job is to create a {function_name}() function that can be run on the data provided using the following recommended steps.
227
654
 
228
655
  Recommended Steps:
229
656
  {recommended_steps}
@@ -237,11 +664,11 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
237
664
 
238
665
  You can use Pandas, Numpy, and Scikit Learn libraries to feature engineer the data.
239
666
 
240
- Return Python code in ```python``` format with a single function definition, feature_engineer(data_raw), including all imports inside the function.
667
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), including all imports inside the function.
241
668
 
242
669
  Return code to provide the feature engineering function:
243
670
 
244
- def feature_engineer(data_raw):
671
+ def {function_name}(data_raw):
245
672
  import pandas as pd
246
673
  import numpy as np
247
674
  ...
@@ -264,7 +691,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
264
691
 
265
692
 
266
693
  """,
267
- input_variables=["recommeded_steps", "target_variable", "all_datasets_summary"]
694
+ input_variables=["recommeded_steps", "target_variable", "all_datasets_summary", "function_name"]
268
695
  )
269
696
 
270
697
  feature_engineering_agent = feature_engineering_prompt | llm | PythonOutputParser()
@@ -272,16 +699,17 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
272
699
  response = feature_engineering_agent.invoke({
273
700
  "recommended_steps": state.get("recommended_steps"),
274
701
  "target_variable": state.get("target_variable"),
275
- "all_datasets_summary": state.get("all_datasets_summary"),
702
+ "all_datasets_summary": all_datasets_summary_str,
703
+ "function_name": function_name
276
704
  })
277
705
 
278
706
  response = relocate_imports_inside_function(response)
279
707
  response = add_comments_to_top(response, agent_name=AGENT_NAME)
280
708
 
281
709
  # For logging: store the code generated
282
- file_path, file_name = log_ai_function(
710
+ file_path, file_name_2 = log_ai_function(
283
711
  response=response,
284
- file_name="feature_engineer.py",
712
+ file_name=file_name,
285
713
  log=log,
286
714
  log_path=log_path,
287
715
  overwrite=overwrite
@@ -290,11 +718,11 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
290
718
  return {
291
719
  "feature_engineer_function": response,
292
720
  "feature_engineer_function_path": file_path,
293
- "feature_engineer_function_name": file_name
721
+ "feature_engineer_file_name": file_name_2,
722
+ "feature_engineer_function_name": function_name,
723
+ "all_datasets_summary": all_datasets_summary_str
294
724
  }
295
725
 
296
-
297
-
298
726
  def execute_feature_engineering_code(state):
299
727
  return node_func_execute_agent_code_on_data(
300
728
  state=state,
@@ -302,7 +730,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
302
730
  result_key="data_engineered",
303
731
  error_key="feature_engineer_error",
304
732
  code_snippet_key="feature_engineer_function",
305
- agent_function_name="feature_engineer",
733
+ agent_function_name=state.get("feature_engineer_function_name"),
306
734
  pre_processing=lambda data: pd.DataFrame.from_dict(data),
307
735
  post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
308
736
  error_message_prefix="An error occurred during feature engineering: "
@@ -310,11 +738,13 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
310
738
 
311
739
  def fix_feature_engineering_code(state: GraphState):
312
740
  feature_engineer_prompt = """
313
- You are a Feature Engineering Agent. Your job is to fix the feature_engineer() function that currently contains errors.
741
+ You are a Feature Engineering Agent. Your job is to fix the {function_name}() function that currently contains errors.
742
+
743
+ Provide only the corrected function definition for {function_name}().
314
744
 
315
- Provide only the corrected function definition.
745
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), that includes all imports inside the function.
316
746
 
317
- Broken code:
747
+ This is the broken code (please fix):
318
748
  {code_snippet}
319
749
 
320
750
  Last Known Error:
@@ -330,6 +760,7 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
330
760
  agent_name=AGENT_NAME,
331
761
  log=log,
332
762
  file_path=state.get("feature_engineer_function_path"),
763
+ function_name=state.get("feature_engineer_function_name"),
333
764
  )
334
765
 
335
766
  def explain_feature_engineering_code(state: GraphState):
@@ -366,9 +797,11 @@ def make_feature_engineering_agent(model, log=False, log_path=None, overwrite =
366
797
  fix_code_node_name="fix_feature_engineering_code",
367
798
  explain_code_node_name="explain_feature_engineering_code",
368
799
  error_key="feature_engineer_error",
800
+ max_retries_key = "max_retries",
801
+ retry_count_key = "retry_count",
369
802
  human_in_the_loop=human_in_the_loop,
370
803
  human_review_node_name="human_review",
371
- checkpointer=MemorySaver() if human_in_the_loop else None,
804
+ checkpointer=MemorySaver(),
372
805
  bypass_recommended_steps=bypass_recommended_steps,
373
806
  bypass_explain_code=bypass_explain_code,
374
807
  )