ai-data-science-team 0.0.0.9007__py3-none-any.whl → 0.0.0.9009__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (25) hide show
  1. ai_data_science_team/_version.py +1 -1
  2. ai_data_science_team/agents/__init__.py +4 -5
  3. ai_data_science_team/agents/data_cleaning_agent.py +268 -116
  4. ai_data_science_team/agents/data_visualization_agent.py +470 -41
  5. ai_data_science_team/agents/data_wrangling_agent.py +471 -31
  6. ai_data_science_team/agents/feature_engineering_agent.py +426 -41
  7. ai_data_science_team/agents/sql_database_agent.py +458 -58
  8. ai_data_science_team/ml_agents/__init__.py +1 -0
  9. ai_data_science_team/ml_agents/h2o_ml_agent.py +1032 -0
  10. ai_data_science_team/multiagents/__init__.py +1 -0
  11. ai_data_science_team/multiagents/sql_data_analyst.py +398 -0
  12. ai_data_science_team/multiagents/supervised_data_analyst.py +2 -0
  13. ai_data_science_team/templates/__init__.py +3 -1
  14. ai_data_science_team/templates/agent_templates.py +319 -43
  15. ai_data_science_team/tools/metadata.py +94 -62
  16. ai_data_science_team/tools/regex.py +86 -1
  17. ai_data_science_team/utils/__init__.py +0 -0
  18. ai_data_science_team/utils/plotly.py +24 -0
  19. ai_data_science_team-0.0.0.9009.dist-info/METADATA +245 -0
  20. ai_data_science_team-0.0.0.9009.dist-info/RECORD +28 -0
  21. ai_data_science_team-0.0.0.9007.dist-info/METADATA +0 -183
  22. ai_data_science_team-0.0.0.9007.dist-info/RECORD +0 -21
  23. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/LICENSE +0 -0
  24. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/WHEEL +0 -0
  25. {ai_data_science_team-0.0.0.9007.dist-info → ai_data_science_team-0.0.0.9009.dist-info}/top_level.txt +0 -0
@@ -14,18 +14,27 @@ from langgraph.types import Command
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
 
16
16
  import os
17
- import io
17
+ import json
18
18
  import pandas as pd
19
19
 
20
+ from IPython.display import Markdown
21
+
20
22
  from ai_data_science_team.templates import(
21
23
  node_func_execute_agent_code_on_data,
22
24
  node_func_human_review,
23
25
  node_func_fix_agent_code,
24
- node_func_explain_agent_code,
25
- create_coding_agent_graph
26
+ node_func_report_agent_outputs,
27
+ create_coding_agent_graph,
28
+ BaseAgent,
26
29
  )
27
30
  from ai_data_science_team.tools.parsers import PythonOutputParser
28
- from ai_data_science_team.tools.regex import relocate_imports_inside_function, add_comments_to_top, format_agent_name
31
+ from ai_data_science_team.tools.regex import (
32
+ relocate_imports_inside_function,
33
+ add_comments_to_top,
34
+ format_agent_name,
35
+ format_recommended_steps,
36
+ get_generic_summary,
37
+ )
29
38
  from ai_data_science_team.tools.metadata import get_dataframe_summary
30
39
  from ai_data_science_team.tools.logging import log_ai_function
31
40
 
@@ -33,6 +42,351 @@ from ai_data_science_team.tools.logging import log_ai_function
33
42
  AGENT_NAME = "feature_engineering_agent"
34
43
  LOG_PATH = os.path.join(os.getcwd(), "logs/")
35
44
 
45
+ # Class
46
+
47
+ class FeatureEngineeringAgent(BaseAgent):
48
+ """
49
+ Creates a feature engineering agent that can process datasets based on user-defined instructions or
50
+ default feature engineering steps. The agent generates a Python function to engineer features, executes it,
51
+ and logs the process, including code and errors. It is designed to facilitate reproducible and
52
+ customizable feature engineering workflows.
53
+
54
+ The agent can perform the following default feature engineering steps unless instructed otherwise:
55
+ - Convert features to appropriate data types
56
+ - Remove features that have unique values for each row
57
+ - Remove constant features
58
+ - Encode high-cardinality categoricals (threshold <= 5% of dataset) as 'other'
59
+ - One-hot-encode categorical variables
60
+ - Convert booleans to integer (1/0)
61
+ - Create datetime-based features (if applicable)
62
+ - Handle target variable encoding if specified
63
+ - Any user-provided instructions to add, remove, or modify steps
64
+
65
+ Parameters
66
+ ----------
67
+ model : langchain.llms.base.LLM
68
+ The language model used to generate the feature engineering function.
69
+ n_samples : int, optional
70
+ Number of samples used when summarizing the dataset. Defaults to 30.
71
+ log : bool, optional
72
+ Whether to log the generated code and errors. Defaults to False.
73
+ log_path : str, optional
74
+ Directory path for storing log files. Defaults to None.
75
+ file_name : str, optional
76
+ Name of the file for saving the generated response. Defaults to "feature_engineer.py".
77
+ function_name : str, optional
78
+ Name of the function for data visualization. Defaults to "feature_engineer".
79
+ overwrite : bool, optional
80
+ Whether to overwrite the log file if it exists. If False, a unique file name is created. Defaults to True.
81
+ human_in_the_loop : bool, optional
82
+ Enables user review of feature engineering instructions. Defaults to False.
83
+ bypass_recommended_steps : bool, optional
84
+ If True, skips the default recommended steps. Defaults to False.
85
+ bypass_explain_code : bool, optional
86
+ If True, skips the step that provides code explanations. Defaults to False.
87
+
88
+ Methods
89
+ -------
90
+ update_params(**kwargs)
91
+ Updates the agent's parameters and rebuilds the compiled state graph.
92
+ ainvoke_agent(
93
+ user_instructions: str,
94
+ data_raw: pd.DataFrame,
95
+ target_variable: str = None,
96
+ max_retries=3,
97
+ retry_count=0
98
+ )
99
+ Engineers features from the provided dataset asynchronously based on user instructions.
100
+ invoke_agent(
101
+ user_instructions: str,
102
+ data_raw: pd.DataFrame,
103
+ target_variable: str = None,
104
+ max_retries=3,
105
+ retry_count=0
106
+ )
107
+ Engineers features from the provided dataset synchronously based on user instructions.
108
+ get_workflow_summary()
109
+ Retrieves a summary of the agent's workflow.
110
+ get_log_summary()
111
+ Retrieves a summary of logged operations if logging is enabled.
112
+ get_data_engineered()
113
+ Retrieves the feature-engineered dataset as a pandas DataFrame.
114
+ get_data_raw()
115
+ Retrieves the raw dataset as a pandas DataFrame.
116
+ get_feature_engineer_function()
117
+ Retrieves the generated Python function used for feature engineering.
118
+ get_recommended_feature_engineering_steps()
119
+ Retrieves the agent's recommended feature engineering steps.
120
+ get_response()
121
+ Returns the response from the agent as a dictionary.
122
+ show()
123
+ Displays the agent's mermaid diagram.
124
+
125
+ Examples
126
+ --------
127
+ ```python
128
+ import pandas as pd
129
+ from langchain_openai import ChatOpenAI
130
+ from ai_data_science_team.agents import FeatureEngineeringAgent
131
+
132
+ llm = ChatOpenAI(model="gpt-4o-mini")
133
+
134
+ feature_agent = FeatureEngineeringAgent(
135
+ model=llm,
136
+ n_samples=30,
137
+ log=True,
138
+ log_path="logs",
139
+ human_in_the_loop=True
140
+ )
141
+
142
+ df = pd.read_csv("https://raw.githubusercontent.com/business-science/ai-data-science-team/refs/heads/master/data/churn_data.csv")
143
+
144
+ feature_agent.invoke_agent(
145
+ user_instructions="Also encode the 'PaymentMethod' column with one-hot encoding.",
146
+ data_raw=df,
147
+ target_variable="Churn",
148
+ max_retries=3,
149
+ retry_count=0
150
+ )
151
+
152
+ engineered_data = feature_agent.get_data_engineered()
153
+ response = feature_agent.get_response()
154
+ ```
155
+
156
+ Returns
157
+ -------
158
+ FeatureEngineeringAgent : langchain.graphs.CompiledStateGraph
159
+ A feature engineering agent implemented as a compiled state graph.
160
+ """
161
+
162
+ def __init__(
163
+ self,
164
+ model,
165
+ n_samples=30,
166
+ log=False,
167
+ log_path=None,
168
+ file_name="feature_engineer.py",
169
+ function_name="feature_engineer",
170
+ overwrite=True,
171
+ human_in_the_loop=False,
172
+ bypass_recommended_steps=False,
173
+ bypass_explain_code=False
174
+ ):
175
+ self._params = {
176
+ "model": model,
177
+ "n_samples": n_samples,
178
+ "log": log,
179
+ "log_path": log_path,
180
+ "file_name": file_name,
181
+ "function_name": function_name,
182
+ "overwrite": overwrite,
183
+ "human_in_the_loop": human_in_the_loop,
184
+ "bypass_recommended_steps": bypass_recommended_steps,
185
+ "bypass_explain_code": bypass_explain_code
186
+ }
187
+ self._compiled_graph = self._make_compiled_graph()
188
+ self.response = None
189
+
190
+ def _make_compiled_graph(self):
191
+ """
192
+ Create the compiled graph for the feature engineering agent.
193
+ Running this method will reset the response to None.
194
+ """
195
+ self.response = None
196
+ return make_feature_engineering_agent(**self._params)
197
+
198
+ def update_params(self, **kwargs):
199
+ """
200
+ Updates the agent's parameters and rebuilds the compiled graph.
201
+ """
202
+ for k, v in kwargs.items():
203
+ self._params[k] = v
204
+ self._compiled_graph = self._make_compiled_graph()
205
+
206
+ def ainvoke_agent(
207
+ self,
208
+ data_raw: pd.DataFrame,
209
+ user_instructions: str=None,
210
+ target_variable: str = None,
211
+ max_retries=3,
212
+ retry_count=0,
213
+ **kwargs
214
+ ):
215
+ """
216
+ Asynchronously engineers features for the provided dataset.
217
+ The response is stored in the 'response' attribute.
218
+
219
+ Parameters
220
+ ----------
221
+ data_raw : pd.DataFrame
222
+ The raw dataset to be processed.
223
+ user_instructions : str, optional
224
+ Instructions for feature engineering.
225
+ target_variable : str, optional
226
+ The name of the target variable (if any).
227
+ max_retries : int
228
+ Maximum retry attempts.
229
+ retry_count : int
230
+ Current retry attempt count.
231
+ **kwargs
232
+ Additional keyword arguments to pass to ainvoke().
233
+
234
+ Returns
235
+ -------
236
+ None
237
+ """
238
+ response = self._compiled_graph.ainvoke({
239
+ "user_instructions": user_instructions,
240
+ "data_raw": data_raw.to_dict(),
241
+ "target_variable": target_variable,
242
+ "max_retries": max_retries,
243
+ "retry_count": retry_count
244
+ }, **kwargs)
245
+ self.response = response
246
+ return None
247
+
248
+ def invoke_agent(
249
+ self,
250
+ data_raw: pd.DataFrame,
251
+ user_instructions: str=None,
252
+ target_variable: str = None,
253
+ max_retries=3,
254
+ retry_count=0,
255
+ **kwargs
256
+ ):
257
+ """
258
+ Synchronously engineers features for the provided dataset.
259
+ The response is stored in the 'response' attribute.
260
+
261
+ Parameters
262
+ ----------
263
+ data_raw : pd.DataFrame
264
+ The raw dataset to be processed.
265
+ user_instructions : str
266
+ Instructions for feature engineering agent.
267
+ target_variable : str, optional
268
+ The name of the target variable (if any).
269
+ max_retries : int
270
+ Maximum retry attempts.
271
+ retry_count : int
272
+ Current retry attempt count.
273
+ **kwargs
274
+ Additional keyword arguments to pass to invoke().
275
+
276
+ Returns
277
+ -------
278
+ None
279
+ """
280
+ response = self._compiled_graph.invoke({
281
+ "user_instructions": user_instructions,
282
+ "data_raw": data_raw.to_dict(),
283
+ "target_variable": target_variable,
284
+ "max_retries": max_retries,
285
+ "retry_count": retry_count
286
+ }, **kwargs)
287
+ self.response = response
288
+ return None
289
+
290
+ def get_workflow_summary(self, markdown=False):
291
+ """
292
+ Retrieves the agent's workflow summary, if logging is enabled.
293
+ """
294
+ if self.response and self.response.get("messages"):
295
+ summary = get_generic_summary(json.loads(self.response.get("messages")[-1].content))
296
+ if markdown:
297
+ return Markdown(summary)
298
+ else:
299
+ return summary
300
+
301
+ def get_log_summary(self, markdown=False):
302
+ """
303
+ Logs a summary of the agent's operations, if logging is enabled.
304
+ """
305
+ if self.response:
306
+ if self.response.get('feature_engineer_function_path'):
307
+ log_details = f"""
308
+ ## Featuring Engineering Agent Log Summary:
309
+
310
+ Function Path: {self.response.get('feature_engineer_function_path')}
311
+
312
+ Function Name: {self.response.get('feature_engineer_function_name')}
313
+ """
314
+ if markdown:
315
+ return Markdown(log_details)
316
+ else:
317
+ return log_details
318
+
319
+ def get_data_engineered(self):
320
+ """
321
+ Retrieves the engineered data stored after running invoke/ainvoke.
322
+
323
+ Returns
324
+ -------
325
+ pd.DataFrame or None
326
+ The engineered dataset as a pandas DataFrame.
327
+ """
328
+ if self.response and "data_engineered" in self.response:
329
+ return pd.DataFrame(self.response["data_engineered"])
330
+ return None
331
+
332
+ def get_data_raw(self):
333
+ """
334
+ Retrieves the raw data.
335
+
336
+ Returns
337
+ -------
338
+ pd.DataFrame or None
339
+ The raw dataset as a pandas DataFrame if available.
340
+ """
341
+ if self.response and "data_raw" in self.response:
342
+ return pd.DataFrame(self.response["data_raw"])
343
+ return None
344
+
345
+ def get_feature_engineer_function(self, markdown=False):
346
+ """
347
+ Retrieves the feature engineering function generated by the agent.
348
+
349
+ Parameters
350
+ ----------
351
+ markdown : bool, optional
352
+ If True, returns the function in Markdown code block format.
353
+
354
+ Returns
355
+ -------
356
+ str or None
357
+ The Python function code, or None if unavailable.
358
+ """
359
+ if self.response and "feature_engineer_function" in self.response:
360
+ code = self.response["feature_engineer_function"]
361
+ if markdown:
362
+ return Markdown(f"```python\n{code}\n```")
363
+ return code
364
+ return None
365
+
366
+ def get_recommended_feature_engineering_steps(self, markdown=False):
367
+ """
368
+ Retrieves the agent's recommended feature engineering steps.
369
+
370
+ Parameters
371
+ ----------
372
+ markdown : bool, optional
373
+ If True, returns the steps in Markdown format.
374
+
375
+ Returns
376
+ -------
377
+ str or None
378
+ The recommended steps, or None if not available.
379
+ """
380
+ if self.response and "recommended_steps" in self.response:
381
+ steps = self.response["recommended_steps"]
382
+ if markdown:
383
+ return Markdown(steps)
384
+ return steps
385
+ return None
386
+
387
+
388
+
389
+
36
390
  # * Feature Engineering Agent
37
391
 
38
392
  def make_feature_engineering_agent(
@@ -41,6 +395,7 @@ def make_feature_engineering_agent(
41
395
  log=False,
42
396
  log_path=None,
43
397
  file_name="feature_engineer.py",
398
+ function_name="feature_engineer",
44
399
  overwrite = True,
45
400
  human_in_the_loop=False,
46
401
  bypass_recommended_steps=False,
@@ -82,6 +437,8 @@ def make_feature_engineering_agent(
82
437
  The path to the directory where the log files should be stored. Defaults to "logs/".
83
438
  file_name : str, optional
84
439
  The name of the file to save the log to. Defaults to "feature_engineer.py".
440
+ function_name : str, optional
441
+ The name of the function that will be generated. Defaults to "feature_engineer".
85
442
  overwrite : bool, optional
86
443
  Whether or not to overwrite the log file if it already exists. If False, a unique file name will be created.
87
444
  Defaults to True.
@@ -122,6 +479,11 @@ def make_feature_engineering_agent(
122
479
  The feature engineering agent as a state graph.
123
480
  """
124
481
  llm = model
482
+
483
+ # Human in th loop requires recommended steps
484
+ if bypass_recommended_steps and human_in_the_loop:
485
+ bypass_recommended_steps = False
486
+ print("Bypass recommended steps set to False to enable human in the loop.")
125
487
 
126
488
  # Setup Log Directory
127
489
  if log:
@@ -141,6 +503,7 @@ def make_feature_engineering_agent(
141
503
  all_datasets_summary: str
142
504
  feature_engineer_function: str
143
505
  feature_engineer_function_path: str
506
+ feature_engineer_file_name: str
144
507
  feature_engineer_function_name: str
145
508
  feature_engineer_error: str
146
509
  max_retries: int
@@ -194,7 +557,7 @@ def make_feature_engineering_agent(
194
557
  Below are summaries of all datasets provided:
195
558
  {all_datasets_summary}
196
559
 
197
- Return the steps as a numbered list (no code, just the steps).
560
+ Return steps as a numbered list. You can return short code snippets to demonstrate actions. But do not return a fully coded solution. The code will be generated separately by a Coding Agent.
198
561
 
199
562
  Avoid these:
200
563
  1. Do not include steps to save files.
@@ -218,19 +581,36 @@ def make_feature_engineering_agent(
218
581
  })
219
582
 
220
583
  return {
221
- "recommended_steps": "\n\n# Recommended Feature Engineering Steps:\n" + recommended_steps.content.strip(),
584
+ "recommended_steps": format_recommended_steps(recommended_steps.content.strip(), heading="# Recommended Feature Engineering Steps:"),
222
585
  "all_datasets_summary": all_datasets_summary_str
223
586
  }
224
587
 
225
- def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "create_feature_engineering_code"]]:
226
- return node_func_human_review(
227
- state=state,
228
- prompt_text="Is the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}",
229
- yes_goto="create_feature_engineering_code",
230
- no_goto="recommend_feature_engineering_steps",
231
- user_instructions_key="user_instructions",
232
- recommended_steps_key="recommended_steps"
233
- )
588
+ # Human Review
589
+
590
+ prompt_text_human_review = "Are the following feature engineering instructions correct? (Answer 'yes' or provide modifications)\n{steps}"
591
+
592
+ if not bypass_explain_code:
593
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "explain_feature_engineering_code"]]:
594
+ return node_func_human_review(
595
+ state=state,
596
+ prompt_text=prompt_text_human_review,
597
+ yes_goto= 'explain_feature_engineering_code',
598
+ no_goto="recommend_feature_engineering_steps",
599
+ user_instructions_key="user_instructions",
600
+ recommended_steps_key="recommended_steps",
601
+ code_snippet_key="feature_engineer_function",
602
+ )
603
+ else:
604
+ def human_review(state: GraphState) -> Command[Literal["recommend_feature_engineering_steps", "__end__"]]:
605
+ return node_func_human_review(
606
+ state=state,
607
+ prompt_text=prompt_text_human_review,
608
+ yes_goto= '__end__',
609
+ no_goto="recommend_feature_engineering_steps",
610
+ user_instructions_key="user_instructions",
611
+ recommended_steps_key="recommended_steps",
612
+ code_snippet_key="feature_engineer_function",
613
+ )
234
614
 
235
615
  def create_feature_engineering_code(state: GraphState):
236
616
  if bypass_recommended_steps:
@@ -250,8 +630,7 @@ def make_feature_engineering_agent(
250
630
 
251
631
  feature_engineering_prompt = PromptTemplate(
252
632
  template="""
253
-
254
- You are a Feature Engineering Agent. Your job is to create a feature_engineer() function that can be run on the data provided using the following recommended steps.
633
+ You are a Feature Engineering Agent. Your job is to create a {function_name}() function that can be run on the data provided using the following recommended steps.
255
634
 
256
635
  Recommended Steps:
257
636
  {recommended_steps}
@@ -265,11 +644,11 @@ def make_feature_engineering_agent(
265
644
 
266
645
  You can use Pandas, Numpy, and Scikit Learn libraries to feature engineer the data.
267
646
 
268
- Return Python code in ```python``` format with a single function definition, feature_engineer(data_raw), including all imports inside the function.
647
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), including all imports inside the function.
269
648
 
270
649
  Return code to provide the feature engineering function:
271
650
 
272
- def feature_engineer(data_raw):
651
+ def {function_name}(data_raw):
273
652
  import pandas as pd
274
653
  import numpy as np
275
654
  ...
@@ -292,7 +671,7 @@ def make_feature_engineering_agent(
292
671
 
293
672
 
294
673
  """,
295
- input_variables=["recommeded_steps", "target_variable", "all_datasets_summary"]
674
+ input_variables=["recommeded_steps", "target_variable", "all_datasets_summary", "function_name"]
296
675
  )
297
676
 
298
677
  feature_engineering_agent = feature_engineering_prompt | llm | PythonOutputParser()
@@ -301,6 +680,7 @@ def make_feature_engineering_agent(
301
680
  "recommended_steps": state.get("recommended_steps"),
302
681
  "target_variable": state.get("target_variable"),
303
682
  "all_datasets_summary": all_datasets_summary_str,
683
+ "function_name": function_name
304
684
  })
305
685
 
306
686
  response = relocate_imports_inside_function(response)
@@ -318,12 +698,11 @@ def make_feature_engineering_agent(
318
698
  return {
319
699
  "feature_engineer_function": response,
320
700
  "feature_engineer_function_path": file_path,
321
- "feature_engineer_function_name": file_name_2,
701
+ "feature_engineer_file_name": file_name_2,
702
+ "feature_engineer_function_name": function_name,
322
703
  "all_datasets_summary": all_datasets_summary_str
323
704
  }
324
705
 
325
-
326
-
327
706
  def execute_feature_engineering_code(state):
328
707
  return node_func_execute_agent_code_on_data(
329
708
  state=state,
@@ -331,7 +710,7 @@ def make_feature_engineering_agent(
331
710
  result_key="data_engineered",
332
711
  error_key="feature_engineer_error",
333
712
  code_snippet_key="feature_engineer_function",
334
- agent_function_name="feature_engineer",
713
+ agent_function_name=state.get("feature_engineer_function_name"),
335
714
  pre_processing=lambda data: pd.DataFrame.from_dict(data),
336
715
  post_processing=lambda df: df.to_dict() if isinstance(df, pd.DataFrame) else df,
337
716
  error_message_prefix="An error occurred during feature engineering: "
@@ -339,11 +718,13 @@ def make_feature_engineering_agent(
339
718
 
340
719
  def fix_feature_engineering_code(state: GraphState):
341
720
  feature_engineer_prompt = """
342
- You are a Feature Engineering Agent. Your job is to fix the feature_engineer() function that currently contains errors.
721
+ You are a Feature Engineering Agent. Your job is to fix the {function_name}() function that currently contains errors.
722
+
723
+ Provide only the corrected function definition for {function_name}().
343
724
 
344
- Provide only the corrected function definition.
725
+ Return Python code in ```python``` format with a single function definition, {function_name}(data_raw), that includes all imports inside the function.
345
726
 
346
- Broken code:
727
+ This is the broken code (please fix):
347
728
  {code_snippet}
348
729
 
349
730
  Last Known Error:
@@ -359,23 +740,25 @@ def make_feature_engineering_agent(
359
740
  agent_name=AGENT_NAME,
360
741
  log=log,
361
742
  file_path=state.get("feature_engineer_function_path"),
743
+ function_name=state.get("feature_engineer_function_name"),
362
744
  )
363
745
 
364
- def explain_feature_engineering_code(state: GraphState):
365
- return node_func_explain_agent_code(
746
+ # Final reporting node
747
+ def report_agent_outputs(state: GraphState):
748
+ return node_func_report_agent_outputs(
366
749
  state=state,
367
- code_snippet_key="feature_engineer_function",
750
+ keys_to_include=[
751
+ "recommended_steps",
752
+ "feature_engineer_function",
753
+ "feature_engineer_function_path",
754
+ "feature_engineer_function_name",
755
+ "feature_engineer_error",
756
+ ],
368
757
  result_key="messages",
369
- error_key="feature_engineer_error",
370
- llm=llm,
371
758
  role=AGENT_NAME,
372
- explanation_prompt_template="""
373
- Explain the feature engineering steps performed by this function. Keep the explanation clear and concise.\n\n# Feature Engineering Agent:\n\n{code}
374
- """,
375
- success_prefix="# Feature Engineering Agent:\n\n ",
376
- error_message="The Feature Engineering Agent encountered an error during feature engineering. Data could not be explained."
759
+ custom_title="Feature Engineering Agent Outputs"
377
760
  )
378
-
761
+
379
762
  # Create the graph
380
763
  node_functions = {
381
764
  "recommend_feature_engineering_steps": recommend_feature_engineering_steps,
@@ -383,7 +766,7 @@ def make_feature_engineering_agent(
383
766
  "create_feature_engineering_code": create_feature_engineering_code,
384
767
  "execute_feature_engineering_code": execute_feature_engineering_code,
385
768
  "fix_feature_engineering_code": fix_feature_engineering_code,
386
- "explain_feature_engineering_code": explain_feature_engineering_code
769
+ "report_agent_outputs": report_agent_outputs,
387
770
  }
388
771
 
389
772
  app = create_coding_agent_graph(
@@ -393,11 +776,13 @@ def make_feature_engineering_agent(
393
776
  create_code_node_name="create_feature_engineering_code",
394
777
  execute_code_node_name="execute_feature_engineering_code",
395
778
  fix_code_node_name="fix_feature_engineering_code",
396
- explain_code_node_name="explain_feature_engineering_code",
779
+ explain_code_node_name="report_agent_outputs",
397
780
  error_key="feature_engineer_error",
781
+ max_retries_key = "max_retries",
782
+ retry_count_key = "retry_count",
398
783
  human_in_the_loop=human_in_the_loop,
399
784
  human_review_node_name="human_review",
400
- checkpointer=MemorySaver() if human_in_the_loop else None,
785
+ checkpointer=MemorySaver(),
401
786
  bypass_recommended_steps=bypass_recommended_steps,
402
787
  bypass_explain_code=bypass_explain_code,
403
788
  )