dslighting 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsat/__init__.py +3 -0
- dsat/benchmark/__init__.py +1 -0
- dsat/benchmark/benchmark.py +168 -0
- dsat/benchmark/datasci.py +291 -0
- dsat/benchmark/mle.py +777 -0
- dsat/benchmark/sciencebench.py +304 -0
- dsat/common/__init__.py +0 -0
- dsat/common/constants.py +11 -0
- dsat/common/exceptions.py +48 -0
- dsat/common/typing.py +19 -0
- dsat/config.py +79 -0
- dsat/models/__init__.py +3 -0
- dsat/models/candidates.py +16 -0
- dsat/models/formats.py +52 -0
- dsat/models/task.py +64 -0
- dsat/operators/__init__.py +0 -0
- dsat/operators/aflow_ops.py +90 -0
- dsat/operators/autokaggle_ops.py +170 -0
- dsat/operators/automind_ops.py +38 -0
- dsat/operators/base.py +22 -0
- dsat/operators/code.py +45 -0
- dsat/operators/dsagent_ops.py +123 -0
- dsat/operators/llm_basic.py +84 -0
- dsat/prompts/__init__.py +0 -0
- dsat/prompts/aflow_prompt.py +76 -0
- dsat/prompts/aide_prompt.py +52 -0
- dsat/prompts/autokaggle_prompt.py +290 -0
- dsat/prompts/automind_prompt.py +29 -0
- dsat/prompts/common.py +51 -0
- dsat/prompts/data_interpreter_prompt.py +82 -0
- dsat/prompts/dsagent_prompt.py +88 -0
- dsat/runner.py +554 -0
- dsat/services/__init__.py +0 -0
- dsat/services/data_analyzer.py +387 -0
- dsat/services/llm.py +486 -0
- dsat/services/llm_single.py +421 -0
- dsat/services/sandbox.py +386 -0
- dsat/services/states/__init__.py +0 -0
- dsat/services/states/autokaggle_state.py +43 -0
- dsat/services/states/base.py +14 -0
- dsat/services/states/dsa_log.py +13 -0
- dsat/services/states/experience.py +237 -0
- dsat/services/states/journal.py +153 -0
- dsat/services/states/operator_library.py +290 -0
- dsat/services/vdb.py +76 -0
- dsat/services/workspace.py +178 -0
- dsat/tasks/__init__.py +3 -0
- dsat/tasks/handlers.py +376 -0
- dsat/templates/open_ended/grade_template.py +107 -0
- dsat/tools/__init__.py +4 -0
- dsat/utils/__init__.py +0 -0
- dsat/utils/context.py +172 -0
- dsat/utils/dynamic_import.py +71 -0
- dsat/utils/parsing.py +33 -0
- dsat/workflows/__init__.py +12 -0
- dsat/workflows/base.py +53 -0
- dsat/workflows/factory.py +439 -0
- dsat/workflows/manual/__init__.py +0 -0
- dsat/workflows/manual/autokaggle_workflow.py +148 -0
- dsat/workflows/manual/data_interpreter_workflow.py +153 -0
- dsat/workflows/manual/deepanalyze_workflow.py +484 -0
- dsat/workflows/manual/dsagent_workflow.py +76 -0
- dsat/workflows/search/__init__.py +0 -0
- dsat/workflows/search/aflow_workflow.py +344 -0
- dsat/workflows/search/aide_workflow.py +283 -0
- dsat/workflows/search/automind_workflow.py +237 -0
- dsat/workflows/templates/__init__.py +0 -0
- dsat/workflows/templates/basic_kaggle_loop.py +71 -0
- dslighting/__init__.py +170 -0
- dslighting/core/__init__.py +13 -0
- dslighting/core/agent.py +646 -0
- dslighting/core/config_builder.py +318 -0
- dslighting/core/data_loader.py +422 -0
- dslighting/core/task_detector.py +422 -0
- dslighting/utils/__init__.py +19 -0
- dslighting/utils/defaults.py +151 -0
- dslighting-1.3.9.dist-info/METADATA +554 -0
- dslighting-1.3.9.dist-info/RECORD +80 -0
- dslighting-1.3.9.dist-info/WHEEL +5 -0
- dslighting-1.3.9.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from dsat.utils.context import MAX_HISTORY_CHARS, MAX_OUTPUT_CHARS, truncate_output
|
|
4
|
+
from dsat.prompts.common import _dict_to_str, _get_common_guidelines, create_draft_prompt
|
|
5
|
+
|
|
6
|
+
def create_improve_prompt(task_context: Dict, memory_summary: str, previous_code: str, previous_analysis: str, previous_plan: str = "", previous_output: str = "") -> str:
|
|
7
|
+
"""Creates the system prompt for improving an existing solution."""
|
|
8
|
+
safe_previous_code = truncate_output(previous_code, MAX_OUTPUT_CHARS)
|
|
9
|
+
safe_previous_analysis = truncate_output(previous_analysis, MAX_OUTPUT_CHARS)
|
|
10
|
+
safe_previous_plan = truncate_output(previous_plan, MAX_OUTPUT_CHARS)
|
|
11
|
+
safe_previous_output = truncate_output(previous_output, MAX_OUTPUT_CHARS)
|
|
12
|
+
prompt_dict = {
|
|
13
|
+
"Role": "You are an expert AI Developer tasked with improving a previous solution.",
|
|
14
|
+
"Task Goal and Data Overview": task_context.get("goal_and_data", "N/A"),
|
|
15
|
+
"CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)": task_context.get("io_instructions", "N/A"),
|
|
16
|
+
"Memory of Past Attempts": memory_summary,
|
|
17
|
+
"Previous Successful Solution": {
|
|
18
|
+
"Plan": safe_previous_plan,
|
|
19
|
+
"Analysis": safe_previous_analysis,
|
|
20
|
+
"Execution Output (Summarized)": safe_previous_output,
|
|
21
|
+
"Code": f"```python\n{safe_previous_code}\n```",
|
|
22
|
+
},
|
|
23
|
+
"Instructions": {
|
|
24
|
+
"Goal": "Propose a single, atomic improvement and implement the complete, updated code. Ensure the improved code still adheres to the CRITICAL I/O REQUIREMENTS.",
|
|
25
|
+
"Improvement Guideline": "Focus on one specific change to the approach, algorithm, data processing pipeline, or parameters to better meet the task requirements.",
|
|
26
|
+
**_get_common_guidelines()
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return _dict_to_str(prompt_dict)
|
|
30
|
+
|
|
31
|
+
def create_debug_prompt(task_context: Dict, buggy_code: str, error_history: str, previous_plan: str = "", memory_summary: str = "") -> str:
|
|
32
|
+
"""Creates the system prompt for debugging a failed solution."""
|
|
33
|
+
safe_buggy_code = truncate_output(buggy_code, MAX_OUTPUT_CHARS)
|
|
34
|
+
safe_error_history = truncate_output(error_history, MAX_HISTORY_CHARS)
|
|
35
|
+
safe_previous_plan = truncate_output(previous_plan, MAX_OUTPUT_CHARS)
|
|
36
|
+
prompt_dict = {
|
|
37
|
+
"Role": "You are an expert Python programmer debugging a data science script.",
|
|
38
|
+
"Task Goal and Data Overview": task_context.get("goal_and_data", "N/A"),
|
|
39
|
+
"CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)": task_context.get("io_instructions", "N/A"),
|
|
40
|
+
"Memory of Past Attempts": memory_summary,
|
|
41
|
+
"Most Recent Failed Attempt": {
|
|
42
|
+
"Plan": safe_previous_plan,
|
|
43
|
+
"Code": f"```python\n{safe_buggy_code}\n```",
|
|
44
|
+
},
|
|
45
|
+
"History of Failures (Oldest to Newest)": safe_error_history,
|
|
46
|
+
"Instructions": {
|
|
47
|
+
"Goal": "Analyze the full history of failures for this task. Identify the root cause, propose a fix, and implement the complete, corrected code.",
|
|
48
|
+
"Debugging Guideline": "Your plan should state the root cause and the fix. The new code must be a complete, runnable script.",
|
|
49
|
+
**_get_common_guidelines()
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return _dict_to_str(prompt_dict)
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
from typing import Dict, Any, List
|
|
2
|
+
import json
|
|
3
|
+
from dsat.services.states.autokaggle_state import TaskContract, AutoKaggleState, PhaseMemory
|
|
4
|
+
from dsat.models.formats import StepPlan
|
|
5
|
+
from dsat.utils.context import MAX_OUTPUT_CHARS, truncate_output
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_deconstructor_prompt(description: str, schema: Dict) -> str:
|
|
9
|
+
return f"""
|
|
10
|
+
# TASK
|
|
11
|
+
You are an expert AI system designer. Your job is to analyze the user's request, the provided Comprehensive Data Exploration Report, AND the CRITICAL I/O REQUIREMENTS, and deconstruct it into a structured JSON Task Contract according to the schema.
|
|
12
|
+
|
|
13
|
+
# USER REQUEST, DATA REPORT, AND I/O REQUIREMENTS
|
|
14
|
+
"{description}"
|
|
15
|
+
|
|
16
|
+
# INSTRUCTIONS
|
|
17
|
+
1. Analyze the 'COMPREHENSIVE DATA EXPLORATION REPORT (Ground Truth)'.
|
|
18
|
+
2. Reconcile the findings in the data report with the 'USER DESCRIPTION'.
|
|
19
|
+
3. Populate the 'input_files' field accurately.
|
|
20
|
+
4. **CRITICAL: Determine the output files.** You MUST use the filename specified in the 'CRITICAL I/O REQUIREMENTS' section for the 'output_files' field. This requirement overrides any other filename mentioned elsewhere.
|
|
21
|
+
5. Extract the overall goal, type, outputs, and metrics from the user request, informed by the insights in the data report.
|
|
22
|
+
|
|
23
|
+
# RESPONSE JSON SCHEMA (Task Contract)
|
|
24
|
+
{json.dumps(schema, indent=2)}
|
|
25
|
+
|
|
26
|
+
# RESPONSE
|
|
27
|
+
Provide ONLY the JSON object that conforms to the schema.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_phase_planner_prompt(contract: TaskContract) -> str:
|
|
32
|
+
return f"""
|
|
33
|
+
# TASK
|
|
34
|
+
You are an expert project manager. Based on the provided Task Contract, break down the project into a sequence of high-level, logical phases.
|
|
35
|
+
|
|
36
|
+
# TASK CONTRACT
|
|
37
|
+
{contract.model_dump_json(indent=2)}
|
|
38
|
+
|
|
39
|
+
# INSTRUCTIONS
|
|
40
|
+
- The phases should be a logical progression from start to finish.
|
|
41
|
+
- Each phase should represent a distinct stage of work.
|
|
42
|
+
- The workflow must adapt dynamically based on the specific task type ('{contract.task_type}') and the `task_goal`. Do not assume a standard ML pipeline (e.g., do not include "Build Model" unless the task explicitly requires it).
|
|
43
|
+
- Do not be overly specific. These are high-level phases.
|
|
44
|
+
|
|
45
|
+
# RESPONSE FORMAT
|
|
46
|
+
Provide ONLY a JSON object with a single key "phases" containing a list of strings.
|
|
47
|
+
Example: {{"phases": ["Phase 1 Goal", "Phase 2 Goal", "Phase 3 Goal"]}}
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _summarize_phase_history_and_artifacts(state: AutoKaggleState) -> str:
|
|
52
|
+
"""Helper function to summarize phase history AND available artifacts."""
|
|
53
|
+
history_summary_parts = []
|
|
54
|
+
if not state.phase_history:
|
|
55
|
+
history_summary = "This is the first phase. No previous phases to report."
|
|
56
|
+
else:
|
|
57
|
+
for ph in state.phase_history:
|
|
58
|
+
artifacts_list = "\\n".join([f"- ./{key}" for key in ph.output_artifacts.keys()])
|
|
59
|
+
history_summary_parts.append(
|
|
60
|
+
f"## Report for Phase: {ph.phase_goal}\\n"
|
|
61
|
+
f"{ph.final_report}\\n"
|
|
62
|
+
f"**Generated Artifacts:**\\n{artifacts_list if artifacts_list else 'None'}"
|
|
63
|
+
)
|
|
64
|
+
history_summary = "\\n\\n".join(history_summary_parts)
|
|
65
|
+
|
|
66
|
+
# NEW: Add a clear summary of all available artifacts
|
|
67
|
+
available_artifacts_str = "\\n".join([f"- {fname}" for fname in state.global_artifacts.keys()])
|
|
68
|
+
if not available_artifacts_str:
|
|
69
|
+
available_artifacts_str = "None"
|
|
70
|
+
|
|
71
|
+
return f"""
|
|
72
|
+
# PREVIOUSLY COMPLETED PHASES SUMMARY
|
|
73
|
+
{history_summary}
|
|
74
|
+
|
|
75
|
+
# CUMULATIVE ARTIFACTS AVAILABLE IN CWD
|
|
76
|
+
```
|
|
77
|
+
{available_artifacts_str}
|
|
78
|
+
```
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_step_planner_prompt(state: AutoKaggleState, phase_goal: str) -> str:
|
|
83
|
+
history_summary = _summarize_phase_history_and_artifacts(state)
|
|
84
|
+
|
|
85
|
+
return f"""
|
|
86
|
+
# TASK
|
|
87
|
+
You are a meticulous planner. Your task is to create a detailed, step-by-step plan for the developer to execute the CURRENT PHASE. You must also specify ALL input artifacts to be used and ALL output artifacts to be generated.
|
|
88
|
+
|
|
89
|
+
# COMPREHENSIVE DATA REPORT AND OVERALL GOAL
|
|
90
|
+
{state.full_task_description}
|
|
91
|
+
|
|
92
|
+
# OVERALL TASK CONTRACT
|
|
93
|
+
{state.contract.model_dump_json(indent=2)}
|
|
94
|
+
|
|
95
|
+
# CRITICAL I/O REQUIREMENTS
|
|
96
|
+
{state.io_instructions}
|
|
97
|
+
|
|
98
|
+
{history_summary}
|
|
99
|
+
|
|
100
|
+
# CURRENT PHASE GOAL
|
|
101
|
+
"{phase_goal}"
|
|
102
|
+
|
|
103
|
+
# INSTRUCTIONS
|
|
104
|
+
1. **Analyze Available Artifacts**: Review the list of available artifacts. Your plan MUST use these as inputs where appropriate (e.g., use 'train_preprocessed.csv' for model training).
|
|
105
|
+
2. **Create Plan**: Create a detailed, numbered list of actions for the developer. Be specific.
|
|
106
|
+
3. **Specify Inputs**: In the `input_artifacts` field of your JSON response, list the EXACT filenames of all artifacts your plan requires.
|
|
107
|
+
4. **Specify Outputs**: In the `output_files` field, list the EXACT filenames of all new artifacts the plan will create (e.g., models, reports, processed data). It is CRITICAL to include files for saving state, like models (`.pkl`, `.joblib`) or scalers.
|
|
108
|
+
|
|
109
|
+
# RESPONSE JSON SCHEMA
|
|
110
|
+
Provide ONLY a JSON object that conforms to the following schema.
|
|
111
|
+
```json
|
|
112
|
+
{json.dumps(StepPlan.model_json_schema(), indent=2)}
|
|
113
|
+
```
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_developer_prompt(state: AutoKaggleState, phase_goal: str, plan: str, attempt_history: List) -> str:
|
|
118
|
+
history_summary = _summarize_phase_history_and_artifacts(state)
|
|
119
|
+
|
|
120
|
+
error_context = ""
|
|
121
|
+
if attempt_history:
|
|
122
|
+
history_str_parts = []
|
|
123
|
+
# Iterate over all previous attempts to build a comprehensive history
|
|
124
|
+
for attempt in reversed(attempt_history): # Show most recent failure first
|
|
125
|
+
safe_validation = truncate_output(json.dumps(attempt.validation_result), MAX_OUTPUT_CHARS)
|
|
126
|
+
safe_error = truncate_output(attempt.execution_error or "No runtime error.", MAX_OUTPUT_CHARS)
|
|
127
|
+
safe_code = truncate_output(attempt.code or "", MAX_OUTPUT_CHARS)
|
|
128
|
+
history_str_parts.append(f"""
|
|
129
|
+
---
|
|
130
|
+
### FAILED ATTEMPT {attempt.attempt_number + 1}
|
|
131
|
+
#### Reviewer's Suggestion: "{attempt.review_suggestion}"
|
|
132
|
+
#### Validation Failure: {safe_validation}
|
|
133
|
+
#### Execution Error:
|
|
134
|
+
```
|
|
135
|
+
{safe_error}
|
|
136
|
+
```
|
|
137
|
+
#### Previous Code for this Attempt:
|
|
138
|
+
```python
|
|
139
|
+
{safe_code}
|
|
140
|
+
```
|
|
141
|
+
---
|
|
142
|
+
""")
|
|
143
|
+
|
|
144
|
+
error_context = f"""
|
|
145
|
+
# PREVIOUS ATTEMPTS FAILED
|
|
146
|
+
Your previous attempts to complete this phase failed. Analyze the full history below (most recent first) to write a corrected version and avoid repeating mistakes.
|
|
147
|
+
{''.join(history_str_parts)}
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
return f"""
|
|
151
|
+
# TASK
|
|
152
|
+
You are an expert developer. Your task is to write a complete Python script to execute the provided plan for the current phase.
|
|
153
|
+
|
|
154
|
+
# COMPREHENSIVE DATA REPORT AND OVERALL GOAL
|
|
155
|
+
{state.full_task_description}
|
|
156
|
+
|
|
157
|
+
# OVERALL TASK CONTRACT
|
|
158
|
+
{state.contract.model_dump_json(indent=2)}
|
|
159
|
+
|
|
160
|
+
# CRITICAL I/O REQUIREMENTS
|
|
161
|
+
{state.io_instructions}
|
|
162
|
+
|
|
163
|
+
{history_summary}
|
|
164
|
+
|
|
165
|
+
# CURRENT PHASE GOAL
|
|
166
|
+
"{phase_goal}"
|
|
167
|
+
|
|
168
|
+
# DETAILED PLAN FOR THIS PHASE
|
|
169
|
+
{plan}
|
|
170
|
+
|
|
171
|
+
{error_context}
|
|
172
|
+
|
|
173
|
+
# INSTRUCTIONS
|
|
174
|
+
- Your script MUST load any necessary input artifacts listed in the "CUMULATIVE ARTIFACTS AVAILABLE" section using their exact filenames.
|
|
175
|
+
- Your script MUST generate and save all output files specified in the plan. Use libraries like `joblib` or `pickle` to save Python objects like models or scalers.
|
|
176
|
+
- All file operations must be relative to the current working directory.
|
|
177
|
+
|
|
178
|
+
# RESPONSE FORMAT
|
|
179
|
+
Provide ONLY the complete, runnable Python code in a single code block. Do not add explanations.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_validator_prompt(contract: TaskContract, filename: str, content_snippet: str) -> str:
|
|
184
|
+
return f"""
|
|
185
|
+
# TASK
|
|
186
|
+
|
|
187
|
+
You are an automated QA agent. Validate if the generated file conforms to the Task Contract.
|
|
188
|
+
|
|
189
|
+
# TASK CONTRACT
|
|
190
|
+
|
|
191
|
+
{contract.model_dump_json(indent=2)}
|
|
192
|
+
|
|
193
|
+
# GENERATED FILE: {filename}
|
|
194
|
+
|
|
195
|
+
# FILE CONTENT (first 20 lines):
|
|
196
|
+
|
|
197
|
+
{content_snippet}
|
|
198
|
+
|
|
199
|
+
# VALIDATION
|
|
200
|
+
|
|
201
|
+
Based on the contract, does this file meet the requirements for '{filename}'? Check format, content, data types, and any other constraints mentioned in the contract's description for this file.
|
|
202
|
+
|
|
203
|
+
# RESPONSE FORMAT
|
|
204
|
+
|
|
205
|
+
Respond with a single JSON object: {{"passed": <true_or_false>, "reason": "A detailed explanation."}}
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_reviewer_prompt(phase_goal: str, dev_result: Dict, plan: str = "") -> str:
|
|
210
|
+
safe_code = truncate_output(dev_result.get('code', '# N/A'), MAX_OUTPUT_CHARS)
|
|
211
|
+
safe_error = truncate_output(dev_result.get('error') or "None", MAX_OUTPUT_CHARS)
|
|
212
|
+
safe_validation = truncate_output(json.dumps(dev_result.get('validation_result')), MAX_OUTPUT_CHARS)
|
|
213
|
+
return f"""
|
|
214
|
+
# TASK
|
|
215
|
+
|
|
216
|
+
You are a meticulous reviewer. Assess the developer's work for the given phase. Provide a score (1-5) and a constructive suggestion for improvement if the score is below 4.
|
|
217
|
+
|
|
218
|
+
# CURRENT PHASE GOAL
|
|
219
|
+
|
|
220
|
+
"{phase_goal}"
|
|
221
|
+
|
|
222
|
+
# PLAN BEING EXECUTED
|
|
223
|
+
|
|
224
|
+
{plan}
|
|
225
|
+
|
|
226
|
+
# DEVELOPER'S WORK
|
|
227
|
+
|
|
228
|
+
- **Code:**
|
|
229
|
+
```python
|
|
230
|
+
{safe_code}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
- **Code Execution Status:** {"Success" if dev_result['status'] else "Failed"}
|
|
234
|
+
- **Execution Error:** {safe_error}
|
|
235
|
+
- **Validation Result:** {safe_validation}
|
|
236
|
+
|
|
237
|
+
# INSTRUCTIONS
|
|
238
|
+
|
|
239
|
+
- If the code failed to execute, the score must be low (1 or 2). Your suggestion should focus on fixing the error.
|
|
240
|
+
- If the phase is an early one like "Explore Data" or "Prepare Data", DO NOT penalize the developer for not creating the final output file specified in the contract. The validation check for that file might fail, which is acceptable at this stage. Focus on whether the code successfully achieved the phase's specific goal.
|
|
241
|
+
- If the code executed and passed relevant validations for this phase, evaluate the approach. Is it a good way to achieve the phase goal? Score 3-5.
|
|
242
|
+
- A score of 5 requires a truly excellent and efficient implementation.
|
|
243
|
+
|
|
244
|
+
# RESPONSE FORMAT
|
|
245
|
+
|
|
246
|
+
Provide ONLY a JSON object: {{"score": <1_to_5>, "suggestion": "Your suggestion here."}}
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def get_summarizer_prompt(state: AutoKaggleState, phase_memory: PhaseMemory) -> str:
|
|
251
|
+
attempt_summary = ""
|
|
252
|
+
for attempt in phase_memory.attempts:
|
|
253
|
+
attempt_summary += f"\\n### Attempt {attempt.attempt_number + 1} (Score: {attempt.review_score})\\n- Suggestion: {attempt.review_suggestion}\\n"
|
|
254
|
+
|
|
255
|
+
return f"""
|
|
256
|
+
# TASK
|
|
257
|
+
|
|
258
|
+
You are a technical writer. Summarize the work done in the completed phase into a concise report.
|
|
259
|
+
|
|
260
|
+
# OVERALL TASK CONTRACT
|
|
261
|
+
|
|
262
|
+
{state.contract.model_dump_json(indent=2)}
|
|
263
|
+
|
|
264
|
+
# CURRENT PHASE GOAL
|
|
265
|
+
|
|
266
|
+
"{phase_memory.phase_goal}"
|
|
267
|
+
|
|
268
|
+
# ATTEMPT HISTORY FOR THIS PHASE
|
|
269
|
+
|
|
270
|
+
{attempt_summary}
|
|
271
|
+
|
|
272
|
+
# FINAL SUCCESSFUL CODE
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
{phase_memory.attempts[-1].code if phase_memory.attempts else 'No code available'}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
# INSTRUCTIONS
|
|
279
|
+
|
|
280
|
+
Synthesize all information into a report covering:
|
|
281
|
+
|
|
282
|
+
1. The main objective of this phase.
|
|
283
|
+
2. The final approach taken to achieve it.
|
|
284
|
+
3. Key findings or results from the code execution.
|
|
285
|
+
4. How this phase's outcome contributes to the overall project goal.
|
|
286
|
+
|
|
287
|
+
# RESPONSE FORMAT
|
|
288
|
+
|
|
289
|
+
Provide the summary as a comprehensive report in Markdown.
|
|
290
|
+
"""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
from dsat.prompts.common import create_draft_prompt
|
|
3
|
+
|
|
4
|
+
def create_stepwise_code_prompt(goal: str, plan: str, history: str, current_step: str, io_instructions: str) -> str:
|
|
5
|
+
"""Creates a prompt for generating code for a single step in a complex plan."""
|
|
6
|
+
return (
|
|
7
|
+
f"You are executing a complex data science plan step-by-step.\n\n"
|
|
8
|
+
f"# OVERALL GOAL\n{goal}\n\n"
|
|
9
|
+
f"# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)\n{io_instructions}\n\n"
|
|
10
|
+
f"# OVERALL PLAN\n{plan}\n\n"
|
|
11
|
+
f"# PREVIOUSLY EXECUTED STEPS (CODE & OUTPUT)\n{history}\n\n"
|
|
12
|
+
f"# CURRENT STEP TO IMPLEMENT\n{current_step}\n\n"
|
|
13
|
+
"Your task is to write the Python code ONLY for the CURRENT STEP. The code will be executed in a Jupyter-like environment, so you can assume variables from previous steps are in memory. Ensure your code adheres to the I/O requirements."
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_stepwise_debug_prompt(goal: str, plan: str, history: str, current_step: str, failed_code: str, error_history: str, io_instructions: str) -> str:
|
|
18
|
+
"""Creates a prompt for debugging a failed step in a stepwise execution."""
|
|
19
|
+
return (
|
|
20
|
+
f"You are debugging a complex data science plan being executed step-by-step.\n\n"
|
|
21
|
+
f"# OVERALL GOAL\n{goal}\n\n"
|
|
22
|
+
f"# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)\n{io_instructions}\n\n"
|
|
23
|
+
f"# OVERALL PLAN\n{plan}\n\n"
|
|
24
|
+
f"# PREVIOUSLY EXECUTED STEPS (CODE & OUTPUT)\n{history}\n\n"
|
|
25
|
+
f"# FAILED STEP INSTRUCTION\n{current_step}\n\n"
|
|
26
|
+
f"# MOST RECENT FAILED CODE\n```python\n{failed_code}\n```\n\n"
|
|
27
|
+
f"# HISTORY OF FAILED ATTEMPTS FOR THIS STEP\n{error_history}\n\n"
|
|
28
|
+
"Your task is to analyze the full history of errors for this step and provide the corrected Python code ONLY for the FAILED STEP. Assume variables from previous steps are still in memory. Ensure your corrected code adheres to the I/O requirements."
|
|
29
|
+
)
|
dsat/prompts/common.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
def _dict_to_str(d: Dict, indent=0) -> str:
|
|
4
|
+
"""Helper to format dictionaries into a readable string for prompts."""
|
|
5
|
+
lines = []
|
|
6
|
+
for k, v in d.items():
|
|
7
|
+
prefix = ' ' * (indent * 2)
|
|
8
|
+
if isinstance(v, dict):
|
|
9
|
+
lines.append(f"{prefix}{k}:")
|
|
10
|
+
lines.append(_dict_to_str(v, indent + 1))
|
|
11
|
+
elif isinstance(v, list):
|
|
12
|
+
lines.append(f"{prefix}{k}:")
|
|
13
|
+
for item in v:
|
|
14
|
+
lines.append(' ' * ((indent + 1) * 2) + f"- {item}")
|
|
15
|
+
else:
|
|
16
|
+
lines.append(f"{prefix}{k}: {v}")
|
|
17
|
+
return "\n".join(lines)
|
|
18
|
+
|
|
19
|
+
def _get_common_guidelines() -> Dict:
|
|
20
|
+
"""Returns a dictionary of common instructions for all prompts."""
|
|
21
|
+
return {
|
|
22
|
+
"Response Format": (
|
|
23
|
+
"Your response MUST start with a brief natural language plan (3-5 sentences), "
|
|
24
|
+
"followed by a single, complete Python code block wrapped in ```python ... ```. "
|
|
25
|
+
"Do not include any other text or headings."
|
|
26
|
+
),
|
|
27
|
+
"Implementation Guidelines": [
|
|
28
|
+
"The code must be a self-contained, single-file Python script.",
|
|
29
|
+
"If the task involves modeling or optimization, print key metrics (e.g., validation scores) to standard output. Otherwise, ensure the output clearly presents the findings or results.",
|
|
30
|
+
"Follow the CRITICAL I/O REQUIREMENTS provided in the task description precisely for all file operations.",
|
|
31
|
+
"Do not use interactive elements like `input()` or `matplotlib.pyplot.show()`."
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
def create_draft_prompt(task_context: Dict, memory_summary: str, retrieved_knowledge: Optional[str] = None) -> str:
|
|
36
|
+
"""Creates the system prompt for generating an initial solution draft."""
|
|
37
|
+
prompt_dict = {
|
|
38
|
+
"Role": "You are an expert Data Scientist and AI Engineer.",
|
|
39
|
+
"Task Goal and Data Overview": task_context.get("goal_and_data", "N/A"),
|
|
40
|
+
"CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)": task_context.get("io_instructions", "N/A"),
|
|
41
|
+
"Memory of Past Attempts": memory_summary,
|
|
42
|
+
"Retrieved Knowledge": retrieved_knowledge or "No relevant knowledge was retrieved for this task.",
|
|
43
|
+
"Instructions": {
|
|
44
|
+
"Goal": "Propose a simple but effective plan and implement it in Python" + (
|
|
45
|
+
", incorporating insights from the retrieved knowledge if applicable." if retrieved_knowledge else "."
|
|
46
|
+
),
|
|
47
|
+
"Design Guideline": "Your first solution should be straightforward and robust. Focus on correctly addressing the core requirements based on the data report AND the CRITICAL I/O REQUIREMENTS.",
|
|
48
|
+
**_get_common_guidelines()
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return _dict_to_str(prompt_dict)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# System message for the Planner
|
|
2
|
+
PLAN_SYSTEM_MESSAGE = """
|
|
3
|
+
You are a master planner AI. Break down a complex user request into a sequence of simple, actionable steps for a data scientist.
|
|
4
|
+
Your plan MUST conclude with a final step that generates the required output file as specified in the user request's I/O requirements.
|
|
5
|
+
Output a JSON list of tasks, where each task has "task_id", "instruction", and "dependent_task_ids".
|
|
6
|
+
Provide ONLY the JSON list in a single code block.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
PLAN_PROMPT = """
|
|
10
|
+
# User Request
|
|
11
|
+
{user_request}
|
|
12
|
+
|
|
13
|
+
# Your Plan (JSON format)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
GENERATE_CODE_PROMPT = """
|
|
17
|
+
# Overall Goal and Data Report
|
|
18
|
+
{user_requirement}
|
|
19
|
+
|
|
20
|
+
# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)
|
|
21
|
+
{io_instructions}
|
|
22
|
+
|
|
23
|
+
# Plan Status
|
|
24
|
+
{plan_status}
|
|
25
|
+
|
|
26
|
+
# Current Task
|
|
27
|
+
{current_task}
|
|
28
|
+
|
|
29
|
+
# History (Previous Code and Outputs)
|
|
30
|
+
{history}
|
|
31
|
+
|
|
32
|
+
# Instruction
|
|
33
|
+
Write Python code for the **Current Task**. Ensure the code strictly follows the CRITICAL I/O REQUIREMENTS. Generate visualizations if required.
|
|
34
|
+
Provide ONLY the Python code in a single code block.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
REFLECT_AND_DEBUG_PROMPT = """
|
|
38
|
+
# Overall Goal and Data Report
|
|
39
|
+
{user_requirement}
|
|
40
|
+
|
|
41
|
+
# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)
|
|
42
|
+
{io_instructions}
|
|
43
|
+
|
|
44
|
+
# History (Previous Code and Outputs)
|
|
45
|
+
{history}
|
|
46
|
+
|
|
47
|
+
# Current Task
|
|
48
|
+
{current_task}
|
|
49
|
+
|
|
50
|
+
# Failed Code
|
|
51
|
+
```python
|
|
52
|
+
{failed_code}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
# Error Output
|
|
56
|
+
```
|
|
57
|
+
{error_output}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
# Instruction
|
|
61
|
+
Your previous code failed. Analyze the error in the context of the execution history (previous steps) and rewrite the full, corrected Python code for the **Current Task**. Ensure the corrected code strictly follows the CRITICAL I/O REQUIREMENTS.
|
|
62
|
+
Provide ONLY the corrected Python code in a single code block.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
FINALIZE_OUTPUT_PROMPT = """
|
|
66
|
+
# Overall Goal and Data Report
|
|
67
|
+
{user_requirement}
|
|
68
|
+
|
|
69
|
+
# Execution History
|
|
70
|
+
The following tasks have been successfully executed, and their results and variables are available in the current notebook session:
|
|
71
|
+
{history}
|
|
72
|
+
|
|
73
|
+
# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)
|
|
74
|
+
{io_instructions}
|
|
75
|
+
|
|
76
|
+
# Final Instruction
|
|
77
|
+
Your final and most important task is to generate the required output file.
|
|
78
|
+
Based on all the previous steps and the CRITICAL I/O REQUIREMENTS, write the Python code that creates the final output file named **'{output_filename}'** in the correct format.
|
|
79
|
+
|
|
80
|
+
The file MUST be saved in the current working directory.
|
|
81
|
+
Provide ONLY the Python code for this final step in a single code block.
|
|
82
|
+
"""
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
PLAN_PROMPT_TEMPLATE = """
|
|
2
|
+
You are a helpful AI expert assistant, responsible for decision making on the action plans.
|
|
3
|
+
|
|
4
|
+
# Task Objective and Data Report
|
|
5
|
+
{research_problem}
|
|
6
|
+
|
|
7
|
+
# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)
|
|
8
|
+
{io_instructions}
|
|
9
|
+
|
|
10
|
+
# Current Progress Log
|
|
11
|
+
{running_log}
|
|
12
|
+
|
|
13
|
+
# Relevant Past Experience Case
|
|
14
|
+
{case}
|
|
15
|
+
|
|
16
|
+
Based on all this information, provide a short, precise but detailed instruction summary on the action plan for the next step. Ensure the plan considers the CRITICAL I/O REQUIREMENTS.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
PROGRAMMER_PROMPT_TEMPLATE = """
|
|
20
|
+
You are a helpful AI-oriented programming expert.
|
|
21
|
+
|
|
22
|
+
# Overall Task Objective and Data Report
|
|
23
|
+
{research_problem}
|
|
24
|
+
|
|
25
|
+
# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)
|
|
26
|
+
{io_instructions}
|
|
27
|
+
|
|
28
|
+
# Current Progress Log (history of actions)
|
|
29
|
+
{running_log}
|
|
30
|
+
|
|
31
|
+
Given this python script:
|
|
32
|
+
```python
|
|
33
|
+
{code}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Now please edit this script according to the following instructions:
|
|
37
|
+
|
|
38
|
+
```instruction
|
|
39
|
+
{plan}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Provide the **full** code after the edit, ensuring it aligns with the overall goal, the CRITICAL I/O REQUIREMENTS, and lessons learned from the progress log.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
DEBUGGER_PROMPT_TEMPLATE = """
|
|
46
|
+
You are a helpful AI-oriented programming expert.
|
|
47
|
+
|
|
48
|
+
# Overall Task Objective and Data Report
|
|
49
|
+
{research_problem}
|
|
50
|
+
|
|
51
|
+
# CRITICAL I/O REQUIREMENTS (MUST BE FOLLOWED)
|
|
52
|
+
{io_instructions}
|
|
53
|
+
|
|
54
|
+
# Current Progress Log (history of actions)
|
|
55
|
+
{running_log}
|
|
56
|
+
|
|
57
|
+
The instruction for modification was:
|
|
58
|
+
|
|
59
|
+
```instruction
|
|
60
|
+
{plan}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
This is the current python code:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
{code}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
However, there are some bugs in this version. Here is the execution log:
|
|
70
|
+
|
|
71
|
+
```log
|
|
72
|
+
{error_log}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Please revise the script to fix these bugs and provide the **full** corrected code, ensuring it aligns with the overall goal, the CRITICAL I/O REQUIREMENTS, and lessons learned from the progress log.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
LOGGER_PROMPT_TEMPLATE = """
|
|
79
|
+
Given the instructions, execution log, and code difference of the last action:
|
|
80
|
+
[Instructions]: {plan}
|
|
81
|
+
[Execution Log]: {execution_log}
|
|
82
|
+
[Code Difference]: {diff}
|
|
83
|
+
[Progress Log]: {running_log}
|
|
84
|
+
|
|
85
|
+
Summarize the progress of the last step and append it to the progress log in this format:
|
|
86
|
+
[Action Summary]: Summarize what action was taken in the last step.
|
|
87
|
+
[Action Result]: Summarize the outcome of the action and whether it successfully achieved the objective defined in the [Instructions].
|
|
88
|
+
"""
|