pdd-cli 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +7 -1
- pdd/bug_main.py +21 -3
- pdd/bug_to_unit_test.py +16 -5
- pdd/change.py +2 -1
- pdd/change_main.py +407 -189
- pdd/cli.py +853 -301
- pdd/code_generator.py +2 -1
- pdd/conflicts_in_prompts.py +2 -1
- pdd/construct_paths.py +377 -222
- pdd/context_generator.py +2 -1
- pdd/continue_generation.py +3 -2
- pdd/crash_main.py +55 -20
- pdd/data/llm_model.csv +8 -8
- pdd/detect_change.py +2 -1
- pdd/fix_code_loop.py +465 -160
- pdd/fix_code_module_errors.py +7 -4
- pdd/fix_error_loop.py +9 -9
- pdd/fix_errors_from_unit_tests.py +207 -365
- pdd/fix_main.py +31 -4
- pdd/fix_verification_errors.py +285 -0
- pdd/fix_verification_errors_loop.py +975 -0
- pdd/fix_verification_main.py +412 -0
- pdd/generate_output_paths.py +427 -183
- pdd/generate_test.py +3 -2
- pdd/increase_tests.py +2 -2
- pdd/llm_invoke.py +18 -8
- pdd/pdd_completion.zsh +38 -1
- pdd/preprocess.py +3 -3
- pdd/process_csv_change.py +466 -154
- pdd/prompts/extract_prompt_split_LLM.prompt +7 -4
- pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
- pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
- pdd/prompts/find_verification_errors_LLM.prompt +25 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
- pdd/prompts/fix_verification_errors_LLM.prompt +20 -0
- pdd/prompts/generate_test_LLM.prompt +9 -3
- pdd/prompts/split_LLM.prompt +3 -3
- pdd/prompts/update_prompt_LLM.prompt +3 -3
- pdd/split.py +13 -12
- pdd/split_main.py +22 -13
- pdd/trace_main.py +7 -0
- pdd/xml_tagger.py +2 -1
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/METADATA +4 -4
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/RECORD +49 -44
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/WHEEL +1 -1
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,975 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
import subprocess
|
|
4
|
+
import datetime
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Tuple, Any, Optional
|
|
8
|
+
from xml.sax.saxutils import escape
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
# Use relative import assuming fix_verification_errors is in the same package
|
|
13
|
+
try:
|
|
14
|
+
# Attempt relative import for package context
|
|
15
|
+
from .fix_verification_errors import fix_verification_errors
|
|
16
|
+
except ImportError:
|
|
17
|
+
# Fallback for direct script execution (e.g., testing)
|
|
18
|
+
# This assumes 'pdd' package structure exists relative to the script
|
|
19
|
+
try:
|
|
20
|
+
from pdd.fix_verification_errors import fix_verification_errors
|
|
21
|
+
except ImportError:
|
|
22
|
+
raise ImportError(
|
|
23
|
+
"Could not import 'fix_verification_errors'. "
|
|
24
|
+
"Ensure it's available via relative import or in the 'pdd' package."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Initialize Rich Console for pretty printing
|
|
28
|
+
console = Console()
|
|
29
|
+
|
|
30
|
+
def _run_program(
|
|
31
|
+
program_path: Path,
|
|
32
|
+
args: Optional[list[str]] = None,
|
|
33
|
+
timeout: int = 60
|
|
34
|
+
) -> Tuple[int, str]:
|
|
35
|
+
"""
|
|
36
|
+
Runs a Python program using subprocess, capturing combined stdout and stderr.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
program_path: Path to the Python program to run.
|
|
40
|
+
args: Optional list of command-line arguments for the program.
|
|
41
|
+
timeout: Timeout in seconds for the subprocess.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
A tuple containing the return code (int) and the combined output (str).
|
|
45
|
+
Returns (-1, error_message) if the program is not found or other execution error occurs.
|
|
46
|
+
"""
|
|
47
|
+
if not program_path.is_file():
|
|
48
|
+
return -1, f"Error: Program file not found at {program_path}"
|
|
49
|
+
|
|
50
|
+
command = ["python", str(program_path)]
|
|
51
|
+
if args:
|
|
52
|
+
command.extend(args)
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
result = subprocess.run(
|
|
56
|
+
command,
|
|
57
|
+
capture_output=True,
|
|
58
|
+
text=True,
|
|
59
|
+
timeout=timeout,
|
|
60
|
+
check=False, # Don't raise exception for non-zero exit codes
|
|
61
|
+
)
|
|
62
|
+
combined_output = result.stdout + result.stderr
|
|
63
|
+
return result.returncode, combined_output
|
|
64
|
+
except FileNotFoundError:
|
|
65
|
+
return -1, f"Error: Python interpreter not found or '{program_path}' not found."
|
|
66
|
+
except subprocess.TimeoutExpired:
|
|
67
|
+
return -1, f"Error: Program execution timed out after {timeout} seconds."
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return -1, f"Error: An unexpected error occurred while running the program: {e}"
|
|
70
|
+
|
|
71
|
+
def _write_log_entry(log_file_path: Path, xml_content: str):
|
|
72
|
+
"""Appends XML content to the log file."""
|
|
73
|
+
try:
|
|
74
|
+
with open(log_file_path, "a", encoding="utf-8") as f:
|
|
75
|
+
f.write(xml_content + "\n")
|
|
76
|
+
except IOError as e:
|
|
77
|
+
console.print(f"[bold red]Error writing to log file {log_file_path}: {e}[/bold red]")
|
|
78
|
+
|
|
79
|
+
def fix_verification_errors_loop(
|
|
80
|
+
program_file: str,
|
|
81
|
+
code_file: str,
|
|
82
|
+
prompt: str,
|
|
83
|
+
verification_program: str,
|
|
84
|
+
strength: float,
|
|
85
|
+
temperature: float,
|
|
86
|
+
max_attempts: int,
|
|
87
|
+
budget: float,
|
|
88
|
+
verification_log_file: str = "verification.log",
|
|
89
|
+
verbose: bool = False,
|
|
90
|
+
program_args: Optional[list[str]] = None,
|
|
91
|
+
) -> Dict[str, Any]:
|
|
92
|
+
"""
|
|
93
|
+
Attempts to fix errors in a code file based on program execution output
|
|
94
|
+
against the prompt's intent, iterating multiple times with secondary verification.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
program_file: Path to the Python program exercising the code.
|
|
98
|
+
code_file: Path to the code file being tested/verified.
|
|
99
|
+
prompt: The prompt defining the intended behavior.
|
|
100
|
+
verification_program: Path to a secondary program to verify code changes.
|
|
101
|
+
strength: LLM model strength (0.0 to 1.0).
|
|
102
|
+
temperature: LLM temperature (0.0 to 1.0).
|
|
103
|
+
max_attempts: Maximum number of fix attempts.
|
|
104
|
+
budget: Maximum allowed cost in USD.
|
|
105
|
+
verification_log_file: Path for detailed XML logging (default: "verification.log").
|
|
106
|
+
verbose: Enable verbose logging (default: False).
|
|
107
|
+
program_args: Optional list of command-line arguments for the program_file.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
A dictionary containing:
|
|
111
|
+
'success': bool - Whether the code was successfully fixed.
|
|
112
|
+
'final_program': str - Contents of the final program file.
|
|
113
|
+
'final_code': str - Contents of the final code file.
|
|
114
|
+
'total_attempts': int - Number of fix attempts made (loop iterations started).
|
|
115
|
+
'total_cost': float - Total cost of LLM calls.
|
|
116
|
+
'model_name': str | None - Name of the LLM model used.
|
|
117
|
+
'statistics': dict - Detailed statistics about the process.
|
|
118
|
+
"""
|
|
119
|
+
program_path = Path(program_file).resolve()
|
|
120
|
+
code_path = Path(code_file).resolve()
|
|
121
|
+
verification_program_path = Path(verification_program).resolve()
|
|
122
|
+
log_path = Path(verification_log_file).resolve()
|
|
123
|
+
|
|
124
|
+
# --- Validate Inputs ---
|
|
125
|
+
if not program_path.is_file():
|
|
126
|
+
console.print(f"[bold red]Error: Program file not found: {program_path}[/bold red]")
|
|
127
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
128
|
+
if not code_path.is_file():
|
|
129
|
+
console.print(f"[bold red]Error: Code file not found: {code_path}[/bold red]")
|
|
130
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
131
|
+
if not verification_program_path.is_file():
|
|
132
|
+
console.print(f"[bold red]Error: Verification program not found: {verification_program_path}[/bold red]")
|
|
133
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
134
|
+
if not 0.0 <= strength <= 1.0:
|
|
135
|
+
console.print(f"[bold red]Error: Strength must be between 0.0 and 1.0.[/bold red]")
|
|
136
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
137
|
+
if not 0.0 <= temperature <= 1.0:
|
|
138
|
+
console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
|
|
139
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
140
|
+
# Prompt requires positive max_attempts
|
|
141
|
+
if max_attempts <= 0:
|
|
142
|
+
console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
|
|
143
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
144
|
+
if budget < 0:
|
|
145
|
+
console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
|
|
146
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# Step 1: Remove existing verification log file
|
|
150
|
+
try:
|
|
151
|
+
if log_path.exists():
|
|
152
|
+
os.remove(log_path)
|
|
153
|
+
if verbose:
|
|
154
|
+
console.print(f"Removed existing log file: {log_path}")
|
|
155
|
+
except OSError as e:
|
|
156
|
+
console.print(f"[bold red]Error removing log file {log_path}: {e}[/bold red]")
|
|
157
|
+
# Continue execution, but logging might fail
|
|
158
|
+
|
|
159
|
+
# Step 2: Initialize variables
|
|
160
|
+
attempts = 0 # Counter for loop iterations started
|
|
161
|
+
total_cost = 0.0
|
|
162
|
+
model_name: Optional[str] = None
|
|
163
|
+
overall_success = False
|
|
164
|
+
best_iteration = {
|
|
165
|
+
'attempt': -1, # 0 represents initial state
|
|
166
|
+
'program_backup': None,
|
|
167
|
+
'code_backup': None,
|
|
168
|
+
'issues': float('inf')
|
|
169
|
+
}
|
|
170
|
+
stats = {
|
|
171
|
+
'initial_issues': -1,
|
|
172
|
+
'final_issues': -1,
|
|
173
|
+
'best_iteration_num': -1,
|
|
174
|
+
'best_iteration_issues': float('inf'),
|
|
175
|
+
'improvement_issues': 0,
|
|
176
|
+
'improvement_percent': 0.0,
|
|
177
|
+
'status_message': 'Initialization',
|
|
178
|
+
}
|
|
179
|
+
initial_program_content = ""
|
|
180
|
+
initial_code_content = ""
|
|
181
|
+
program_contents = "" # Keep track of current contents
|
|
182
|
+
code_contents = "" # Keep track of current contents
|
|
183
|
+
|
|
184
|
+
# --- Step 3: Determine Initial State ---
|
|
185
|
+
if verbose:
|
|
186
|
+
console.print("[bold cyan]Step 3: Determining Initial State...[/bold cyan]")
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
initial_program_content = program_path.read_text(encoding="utf-8")
|
|
190
|
+
initial_code_content = code_path.read_text(encoding="utf-8")
|
|
191
|
+
program_contents = initial_program_content # Initialize current contents
|
|
192
|
+
code_contents = initial_code_content # Initialize current contents
|
|
193
|
+
except IOError as e:
|
|
194
|
+
console.print(f"[bold red]Error reading initial program/code files: {e}[/bold red]")
|
|
195
|
+
stats['status_message'] = f'Error reading initial files: {e}' # Add status message
|
|
196
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": stats}
|
|
197
|
+
|
|
198
|
+
# 3a: Run initial program with args
|
|
199
|
+
initial_return_code, initial_output = _run_program(program_path, args=program_args)
|
|
200
|
+
if verbose:
|
|
201
|
+
console.print(f"Initial program run exit code: {initial_return_code}")
|
|
202
|
+
console.print(f"Initial program output:\n{initial_output}")
|
|
203
|
+
|
|
204
|
+
# 3b: Log initial state
|
|
205
|
+
timestamp = datetime.datetime.now().isoformat()
|
|
206
|
+
initial_log_entry = f'<InitialState timestamp="{timestamp}">\n'
|
|
207
|
+
initial_log_entry += f' <ProgramFile>{escape(str(program_path))}</ProgramFile>\n'
|
|
208
|
+
initial_log_entry += f' <CodeFile>{escape(str(code_path))}</CodeFile>\n'
|
|
209
|
+
initial_log_entry += f' <ExitCode>{initial_return_code}</ExitCode>\n'
|
|
210
|
+
initial_log_entry += f' <Output>{escape(initial_output)}</Output>\n'
|
|
211
|
+
initial_log_entry += '</InitialState>'
|
|
212
|
+
_write_log_entry(log_path, initial_log_entry)
|
|
213
|
+
|
|
214
|
+
# 3d: Call fix_verification_errors for initial assessment
|
|
215
|
+
try:
|
|
216
|
+
if verbose:
|
|
217
|
+
console.print("Running initial assessment with fix_verification_errors...")
|
|
218
|
+
# Use actual strength/temp for realistic initial assessment
|
|
219
|
+
initial_fix_result = fix_verification_errors(
|
|
220
|
+
program=initial_program_content,
|
|
221
|
+
prompt=prompt,
|
|
222
|
+
code=initial_code_content,
|
|
223
|
+
output=initial_output,
|
|
224
|
+
strength=strength,
|
|
225
|
+
temperature=temperature,
|
|
226
|
+
verbose=verbose
|
|
227
|
+
)
|
|
228
|
+
# 3e: Add cost
|
|
229
|
+
initial_cost = initial_fix_result.get('total_cost', 0.0)
|
|
230
|
+
total_cost += initial_cost
|
|
231
|
+
model_name = initial_fix_result.get('model_name') # Capture model name early
|
|
232
|
+
if verbose:
|
|
233
|
+
console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
234
|
+
|
|
235
|
+
# 3f: Extract initial issues
|
|
236
|
+
initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
|
|
237
|
+
stats['initial_issues'] = initial_issues_count
|
|
238
|
+
if verbose:
|
|
239
|
+
console.print(f"Initial verification issues found: {initial_issues_count}")
|
|
240
|
+
if initial_fix_result.get('explanation'):
|
|
241
|
+
console.print("Initial assessment explanation:")
|
|
242
|
+
console.print(initial_fix_result['explanation'])
|
|
243
|
+
|
|
244
|
+
# FIX: Add check for initial assessment error *before* checking success/budget
|
|
245
|
+
# Check if the fixer function returned its specific error state (None explanation/model)
|
|
246
|
+
if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
|
|
247
|
+
error_msg = "Error: Fixer returned invalid/error state during initial assessment"
|
|
248
|
+
console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
|
|
249
|
+
stats['status_message'] = error_msg
|
|
250
|
+
stats['final_issues'] = -1 # Indicate unknown/error state
|
|
251
|
+
# Write final action log for error on initial check
|
|
252
|
+
final_log_entry = "<FinalActions>\n"
|
|
253
|
+
final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
|
|
254
|
+
final_log_entry += "</FinalActions>"
|
|
255
|
+
_write_log_entry(log_path, final_log_entry)
|
|
256
|
+
# Return failure state
|
|
257
|
+
return {
|
|
258
|
+
"success": False,
|
|
259
|
+
"final_program": initial_program_content,
|
|
260
|
+
"final_code": initial_code_content,
|
|
261
|
+
"total_attempts": 0,
|
|
262
|
+
"total_cost": total_cost, # May be non-zero if error occurred after some cost
|
|
263
|
+
"model_name": model_name, # May have been set before error
|
|
264
|
+
"statistics": stats,
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
# 3g: Initialize best iteration tracker
|
|
268
|
+
# Store original paths as the 'backup' for iteration 0
|
|
269
|
+
best_iteration = {
|
|
270
|
+
'attempt': 0, # Use 0 for initial state
|
|
271
|
+
'program_backup': str(program_path), # Path to original
|
|
272
|
+
'code_backup': str(code_path), # Path to original
|
|
273
|
+
'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
|
|
274
|
+
}
|
|
275
|
+
stats['best_iteration_num'] = 0
|
|
276
|
+
stats['best_iteration_issues'] = best_iteration['issues']
|
|
277
|
+
|
|
278
|
+
# 3h: Check for immediate success or budget exceeded
|
|
279
|
+
if initial_issues_count == 0:
|
|
280
|
+
console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
|
|
281
|
+
overall_success = True
|
|
282
|
+
stats['final_issues'] = 0
|
|
283
|
+
stats['status_message'] = 'Success on initial check'
|
|
284
|
+
stats['improvement_issues'] = 0
|
|
285
|
+
stats['improvement_percent'] = 100.0 # Reached target of 0 issues
|
|
286
|
+
|
|
287
|
+
# Write final action log for successful initial check
|
|
288
|
+
final_log_entry = "<FinalActions>\n"
|
|
289
|
+
final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
|
|
290
|
+
final_log_entry += "</FinalActions>"
|
|
291
|
+
_write_log_entry(log_path, final_log_entry)
|
|
292
|
+
|
|
293
|
+
# Step 7 (early exit): Print stats
|
|
294
|
+
console.print("\n[bold]--- Final Statistics ---[/bold]")
|
|
295
|
+
console.print(f"Initial Issues: {stats['initial_issues']}")
|
|
296
|
+
console.print(f"Final Issues: {stats['final_issues']}")
|
|
297
|
+
console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
|
|
298
|
+
console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
|
|
299
|
+
console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
|
|
300
|
+
console.print(f"Overall Status: {stats['status_message']}")
|
|
301
|
+
console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
|
|
302
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
303
|
+
console.print(f"Model Used: {model_name or 'N/A'}")
|
|
304
|
+
# Step 8 (early exit): Return
|
|
305
|
+
return {
|
|
306
|
+
"success": overall_success,
|
|
307
|
+
"final_program": initial_program_content,
|
|
308
|
+
"final_code": initial_code_content,
|
|
309
|
+
"total_attempts": attempts, # attempts is 0
|
|
310
|
+
"total_cost": total_cost,
|
|
311
|
+
"model_name": model_name,
|
|
312
|
+
"statistics": stats,
|
|
313
|
+
}
|
|
314
|
+
elif total_cost >= budget:
|
|
315
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
|
|
316
|
+
stats['status_message'] = 'Budget exceeded on initial check'
|
|
317
|
+
stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
|
|
318
|
+
|
|
319
|
+
# Write final action log for budget exceeded on initial check
|
|
320
|
+
final_log_entry = "<FinalActions>\n"
|
|
321
|
+
final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
|
|
322
|
+
final_log_entry += "</FinalActions>"
|
|
323
|
+
_write_log_entry(log_path, final_log_entry)
|
|
324
|
+
|
|
325
|
+
# No changes made, return initial state
|
|
326
|
+
return {
|
|
327
|
+
"success": False,
|
|
328
|
+
"final_program": initial_program_content,
|
|
329
|
+
"final_code": initial_code_content,
|
|
330
|
+
"total_attempts": 0,
|
|
331
|
+
"total_cost": total_cost,
|
|
332
|
+
"model_name": model_name,
|
|
333
|
+
"statistics": stats,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
|
|
338
|
+
stats['status_message'] = f'Error during initial assessment: {e}'
|
|
339
|
+
# Cannot proceed without initial assessment
|
|
340
|
+
return {"success": False, "final_program": initial_program_content, "final_code": initial_code_content, "total_attempts": 0, "total_cost": total_cost, "model_name": model_name, "statistics": stats}
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
# --- Step 4: Enter the Fixing Loop ---
|
|
344
|
+
if verbose:
|
|
345
|
+
console.print("\n[bold cyan]Step 4: Starting Fixing Loop...[/bold cyan]")
|
|
346
|
+
|
|
347
|
+
# Loop while attempts < max_attempts and budget not exceeded
|
|
348
|
+
# Note: The loop condition checks attempts *before* incrementing for the current iteration
|
|
349
|
+
while attempts < max_attempts:
|
|
350
|
+
current_attempt = attempts + 1 # 1-based for reporting
|
|
351
|
+
timestamp = datetime.datetime.now().isoformat()
|
|
352
|
+
iteration_log_xml = f'<Iteration attempt="{current_attempt}" timestamp="{timestamp}">\n'
|
|
353
|
+
|
|
354
|
+
# 4a: Print attempt number and increment counter for attempts *started*
|
|
355
|
+
console.print(f"\n[bold]Attempt {current_attempt}/{max_attempts} (Cost: ${total_cost:.4f}/{budget:.4f})[/bold]")
|
|
356
|
+
attempts += 1 # Increment attempts counter here for iterations started
|
|
357
|
+
|
|
358
|
+
# Check budget *before* running expensive operations in the loop
|
|
359
|
+
if total_cost >= budget:
|
|
360
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} already met or exceeded before starting attempt {current_attempt}. Stopping.[/bold yellow]")
|
|
361
|
+
# No iteration log entry needed as the iteration didn't run
|
|
362
|
+
stats['status_message'] = 'Budget Exceeded'
|
|
363
|
+
attempts -= 1 # Decrement as this attempt didn't actually run
|
|
364
|
+
break
|
|
365
|
+
|
|
366
|
+
# 4b: Run the program file with args
|
|
367
|
+
if verbose:
|
|
368
|
+
console.print(f"Running program: {program_path} with args: {program_args}")
|
|
369
|
+
return_code, program_output = _run_program(program_path, args=program_args)
|
|
370
|
+
iteration_log_xml += f' <ProgramExecution>\n'
|
|
371
|
+
iteration_log_xml += f' <ExitCode>{return_code}</ExitCode>\n'
|
|
372
|
+
iteration_log_xml += f' <OutputBeforeFix>{escape(program_output)}</OutputBeforeFix>\n'
|
|
373
|
+
iteration_log_xml += f' </ProgramExecution>\n'
|
|
374
|
+
if verbose:
|
|
375
|
+
console.print(f"Program exit code: {return_code}")
|
|
376
|
+
# console.print(f"Program output:\n{program_output}") # Can be long
|
|
377
|
+
|
|
378
|
+
# 4c: Read current contents (already stored in program_contents/code_contents)
|
|
379
|
+
# Re-read could be added here if external modification is possible, but generally not needed
|
|
380
|
+
# try:
|
|
381
|
+
# program_contents = program_path.read_text(encoding="utf-8")
|
|
382
|
+
# code_contents = code_path.read_text(encoding="utf-8")
|
|
383
|
+
# except IOError as e: ...
|
|
384
|
+
|
|
385
|
+
# 4d: Create backups
|
|
386
|
+
program_backup_path = program_path.with_stem(f"{program_path.stem}_iteration_{current_attempt}").with_suffix(program_path.suffix)
|
|
387
|
+
code_backup_path = code_path.with_stem(f"{code_path.stem}_iteration_{current_attempt}").with_suffix(code_path.suffix)
|
|
388
|
+
try:
|
|
389
|
+
# Copy from the *current* state before this iteration's fix
|
|
390
|
+
program_path.write_text(program_contents, encoding="utf-8") # Ensure file matches memory state
|
|
391
|
+
code_path.write_text(code_contents, encoding="utf-8") # Ensure file matches memory state
|
|
392
|
+
shutil.copy2(program_path, program_backup_path)
|
|
393
|
+
shutil.copy2(code_path, code_backup_path)
|
|
394
|
+
if verbose:
|
|
395
|
+
console.print(f"Created backups: {program_backup_path}, {code_backup_path}")
|
|
396
|
+
iteration_log_xml += f' <Backups>\n'
|
|
397
|
+
iteration_log_xml += f' <Program>{escape(str(program_backup_path))}</Program>\n'
|
|
398
|
+
iteration_log_xml += f' <Code>{escape(str(code_backup_path))}</Code>\n'
|
|
399
|
+
iteration_log_xml += f' </Backups>\n'
|
|
400
|
+
except OSError as e:
|
|
401
|
+
console.print(f"[bold red]Error creating backup files during attempt {current_attempt}: {e}[/bold red]")
|
|
402
|
+
iteration_log_xml += f' <Status>Error Creating Backups</Status>\n</Iteration>'
|
|
403
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
404
|
+
stats['status_message'] = f'Error creating backups on attempt {current_attempt}'
|
|
405
|
+
break # Don't proceed without backups
|
|
406
|
+
|
|
407
|
+
# 4e: Call fix_verification_errors
|
|
408
|
+
iteration_log_xml += f' <InputsToFixer>\n'
|
|
409
|
+
iteration_log_xml += f' <Program>{escape(program_contents)}</Program>\n'
|
|
410
|
+
iteration_log_xml += f' <Code>{escape(code_contents)}</Code>\n'
|
|
411
|
+
iteration_log_xml += f' <Prompt>{escape(prompt)}</Prompt>\n'
|
|
412
|
+
iteration_log_xml += f' <ProgramOutput>{escape(program_output)}</ProgramOutput>\n'
|
|
413
|
+
iteration_log_xml += f' </InputsToFixer>\n'
|
|
414
|
+
|
|
415
|
+
fix_result = {}
|
|
416
|
+
try:
|
|
417
|
+
if verbose:
|
|
418
|
+
console.print("Calling fix_verification_errors...")
|
|
419
|
+
fix_result = fix_verification_errors(
|
|
420
|
+
program=program_contents,
|
|
421
|
+
prompt=prompt,
|
|
422
|
+
code=code_contents,
|
|
423
|
+
output=program_output,
|
|
424
|
+
strength=strength,
|
|
425
|
+
temperature=temperature,
|
|
426
|
+
verbose=verbose # Pass verbose flag down
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# 4f: Add cost
|
|
430
|
+
attempt_cost = fix_result.get('total_cost', 0.0)
|
|
431
|
+
total_cost += attempt_cost
|
|
432
|
+
model_name = fix_result.get('model_name', model_name) # Update if available
|
|
433
|
+
current_issues_count = fix_result.get('verification_issues_count', -1)
|
|
434
|
+
|
|
435
|
+
if verbose:
|
|
436
|
+
console.print(f"Fixer cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
437
|
+
console.print(f"Fixer issues found: {current_issues_count}")
|
|
438
|
+
if fix_result.get('explanation'):
|
|
439
|
+
console.print("Fixer explanation:")
|
|
440
|
+
console.print(fix_result['explanation'])
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
# 4g: Log fixer result
|
|
444
|
+
iteration_log_xml += f' <FixerResult '
|
|
445
|
+
iteration_log_xml += f'total_cost="{attempt_cost:.6f}" '
|
|
446
|
+
iteration_log_xml += f'model_name="{escape(model_name or "N/A")}" '
|
|
447
|
+
iteration_log_xml += f'verification_issues_count="{current_issues_count}">\n'
|
|
448
|
+
iteration_log_xml += f' <Explanation>{escape(str(fix_result.get("explanation", "N/A")))}</Explanation>\n'
|
|
449
|
+
iteration_log_xml += f' <FixedProgram>{escape(fix_result.get("fixed_program", ""))}</FixedProgram>\n'
|
|
450
|
+
iteration_log_xml += f' <FixedCode>{escape(fix_result.get("fixed_code", ""))}</FixedCode>\n'
|
|
451
|
+
iteration_log_xml += f' </FixerResult>\n'
|
|
452
|
+
|
|
453
|
+
except Exception as e:
|
|
454
|
+
console.print(f"[bold red]Error calling fix_verification_errors on attempt {current_attempt}: {e}[/bold red]")
|
|
455
|
+
iteration_log_xml += f' <Status>Error in Fixer Call: {escape(str(e))}</Status>\n</Iteration>'
|
|
456
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
457
|
+
stats['status_message'] = f'Error in fixer call on attempt {current_attempt}'
|
|
458
|
+
# Continue to next attempt if possible, don't break immediately
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
# FIX: Add check for fixer returning error state (e.g., None explanation/model or specific issue count)
|
|
462
|
+
# We use -1 as the signal for an internal error from fix_verification_errors
|
|
463
|
+
if current_issues_count == -1:
|
|
464
|
+
error_msg = "Error: Fixer returned invalid/error state"
|
|
465
|
+
console.print(f"[bold red]{error_msg} on attempt {current_attempt}. Stopping.[/bold red]")
|
|
466
|
+
iteration_log_xml += f' <Status>{escape(error_msg)}</Status>\n</Iteration>'
|
|
467
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
468
|
+
stats['status_message'] = error_msg
|
|
469
|
+
overall_success = False # Ensure success is false
|
|
470
|
+
break # Exit loop due to fixer error
|
|
471
|
+
|
|
472
|
+
# 4h: Check budget *after* fixer call cost is added
|
|
473
|
+
if total_cost >= budget:
|
|
474
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} exceeded after attempt {current_attempt} (Cost: ${total_cost:.4f}). Stopping.[/bold yellow]")
|
|
475
|
+
iteration_log_xml += f' <Status>Budget Exceeded</Status>\n</Iteration>'
|
|
476
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
477
|
+
stats['status_message'] = 'Budget Exceeded'
|
|
478
|
+
# Update best iteration if this costly attempt was still the best so far
|
|
479
|
+
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
480
|
+
if verbose:
|
|
481
|
+
console.print(f"[green]New best iteration found (before budget break): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
|
|
482
|
+
best_iteration = {
|
|
483
|
+
'attempt': current_attempt,
|
|
484
|
+
'program_backup': str(program_backup_path),
|
|
485
|
+
'code_backup': str(code_backup_path),
|
|
486
|
+
'issues': current_issues_count
|
|
487
|
+
}
|
|
488
|
+
stats['best_iteration_num'] = current_attempt
|
|
489
|
+
stats['best_iteration_issues'] = current_issues_count
|
|
490
|
+
break # Exit loop due to budget
|
|
491
|
+
|
|
492
|
+
# FIX: Moved calculation of update flags earlier
|
|
493
|
+
# 4j: Check if changes were suggested
|
|
494
|
+
fixed_program = fix_result.get('fixed_program', program_contents)
|
|
495
|
+
fixed_code = fix_result.get('fixed_code', code_contents)
|
|
496
|
+
program_updated = fixed_program != program_contents
|
|
497
|
+
code_updated = fixed_code != code_contents
|
|
498
|
+
|
|
499
|
+
# 4k, 4l: Log fix attempt
|
|
500
|
+
iteration_log_xml += f' <FixAttempted program_updated="{program_updated}" code_updated="{code_updated}"/>\n'
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
# FIX: Restructured logic for success check and secondary verification
|
|
504
|
+
secondary_verification_passed = True # Assume pass unless changes made and verification fails
|
|
505
|
+
changes_applied_this_iteration = False
|
|
506
|
+
|
|
507
|
+
# Run secondary verification ONLY if code was updated
|
|
508
|
+
if code_updated:
|
|
509
|
+
if verbose:
|
|
510
|
+
console.print("Code change suggested, running secondary verification...")
|
|
511
|
+
try:
|
|
512
|
+
# Temporarily write the proposed code change
|
|
513
|
+
code_path.write_text(fixed_code, encoding="utf-8")
|
|
514
|
+
|
|
515
|
+
# Run verification program
|
|
516
|
+
verify_ret_code, verify_output = _run_program(verification_program_path)
|
|
517
|
+
|
|
518
|
+
# Determine pass/fail (simple: exit code 0 = pass)
|
|
519
|
+
secondary_verification_passed = (verify_ret_code == 0)
|
|
520
|
+
|
|
521
|
+
if verbose:
|
|
522
|
+
console.print(f"Secondary verification exit code: {verify_ret_code}")
|
|
523
|
+
console.print(f"Secondary verification passed: {secondary_verification_passed}")
|
|
524
|
+
# console.print(f"Secondary verification output:\n{verify_output}")
|
|
525
|
+
|
|
526
|
+
passed_str = str(secondary_verification_passed).lower()
|
|
527
|
+
iteration_log_xml += f' <SecondaryVerification passed="{passed_str}">\n'
|
|
528
|
+
iteration_log_xml += f' <ExitCode>{verify_ret_code}</ExitCode>\n'
|
|
529
|
+
iteration_log_xml += f' <Output>{escape(verify_output)}</Output>\n'
|
|
530
|
+
iteration_log_xml += f' </SecondaryVerification>\n'
|
|
531
|
+
|
|
532
|
+
if not secondary_verification_passed:
|
|
533
|
+
console.print("[yellow]Secondary verification failed. Restoring code file.[/yellow]")
|
|
534
|
+
code_path.write_text(code_contents, encoding="utf-8") # Restore from memory state before this attempt
|
|
535
|
+
|
|
536
|
+
except IOError as e:
|
|
537
|
+
console.print(f"[bold red]Error during secondary verification I/O: {e}[/bold red]")
|
|
538
|
+
iteration_log_xml += f' <Status>Error during secondary verification I/O: {escape(str(e))}</Status>\n'
|
|
539
|
+
secondary_verification_passed = False # Treat I/O error as failure
|
|
540
|
+
try:
|
|
541
|
+
code_path.write_text(code_contents, encoding="utf-8")
|
|
542
|
+
except IOError:
|
|
543
|
+
console.print(f"[bold red]Failed to restore code file after I/O error.[/bold red]")
|
|
544
|
+
|
|
545
|
+
# Now, decide outcome based on issue count and verification status
|
|
546
|
+
if secondary_verification_passed:
|
|
547
|
+
# Update best iteration if current attempt is better
|
|
548
|
+
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
549
|
+
if verbose:
|
|
550
|
+
console.print(f"[green]New best iteration found: Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
|
|
551
|
+
best_iteration = {
|
|
552
|
+
'attempt': current_attempt,
|
|
553
|
+
'program_backup': str(program_backup_path),
|
|
554
|
+
'code_backup': str(code_backup_path),
|
|
555
|
+
'issues': current_issues_count
|
|
556
|
+
}
|
|
557
|
+
stats['best_iteration_num'] = current_attempt
|
|
558
|
+
stats['best_iteration_issues'] = current_issues_count
|
|
559
|
+
|
|
560
|
+
# Apply changes (code was potentially already written for verification)
|
|
561
|
+
try:
|
|
562
|
+
if program_updated:
|
|
563
|
+
if verbose: console.print("Applying program changes...")
|
|
564
|
+
program_path.write_text(fixed_program, encoding="utf-8")
|
|
565
|
+
program_contents = fixed_program # Update memory state
|
|
566
|
+
iteration_log_xml += f' <Action>Applied program changes.</Action>\n'
|
|
567
|
+
changes_applied_this_iteration = True
|
|
568
|
+
if code_updated:
|
|
569
|
+
# Code already written if verification ran; update memory state
|
|
570
|
+
code_contents = fixed_code
|
|
571
|
+
iteration_log_xml += f' <Action>Kept modified code (passed secondary verification).</Action>\n'
|
|
572
|
+
changes_applied_this_iteration = True
|
|
573
|
+
|
|
574
|
+
if changes_applied_this_iteration:
|
|
575
|
+
# FIX: Revert status to match original tests where applicable
|
|
576
|
+
iteration_log_xml += f' <Status>Changes Applied (Secondary Verification Passed or Not Needed)</Status>\n'
|
|
577
|
+
else:
|
|
578
|
+
# This case happens if verification passed but neither program nor code changed
|
|
579
|
+
iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n'
|
|
580
|
+
|
|
581
|
+
# Check for SUCCESS condition HERE
|
|
582
|
+
if current_issues_count == 0:
|
|
583
|
+
console.print(f"[bold green]Success! 0 verification issues found after attempt {current_attempt} and secondary verification passed.[/bold green]")
|
|
584
|
+
overall_success = True
|
|
585
|
+
stats['final_issues'] = 0
|
|
586
|
+
stats['status_message'] = f'Success on attempt {current_attempt}'
|
|
587
|
+
iteration_log_xml += '</Iteration>'
|
|
588
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
589
|
+
break # Exit loop on verified success
|
|
590
|
+
|
|
591
|
+
except IOError as e:
|
|
592
|
+
console.print(f"[bold red]Error writing applied changes: {e}[/bold red]")
|
|
593
|
+
iteration_log_xml += f' <Action>Error writing applied changes: {escape(str(e))}</Action>\n'
|
|
594
|
+
iteration_log_xml += f' <Status>Error Applying Changes</Status>\n'
|
|
595
|
+
# Continue loop if possible
|
|
596
|
+
|
|
597
|
+
else: # Secondary verification failed
|
|
598
|
+
iteration_log_xml += f' <Action>Changes Discarded Due To Secondary Verification Failure</Action>\n'
|
|
599
|
+
iteration_log_xml += f' <Status>Changes Discarded</Status>\n'
|
|
600
|
+
# Memory state (program_contents, code_contents) remains unchanged from start of iteration
|
|
601
|
+
|
|
602
|
+
# Check if loop should terminate due to no changes suggested when issues > 0
|
|
603
|
+
# FIX: Adjust condition - break if secondary verification PASSED but resulted in NO effective changes
|
|
604
|
+
# AND issues still remain. This avoids breaking early if verification FAILED (handled above).
|
|
605
|
+
if secondary_verification_passed and not changes_applied_this_iteration and current_issues_count > 0:
|
|
606
|
+
# FIX: Adjust status message for clarity
|
|
607
|
+
console.print(f"[yellow]No effective changes suggested by the fixer on attempt {current_attempt} despite issues remaining ({current_issues_count}). Stopping.[/yellow]")
|
|
608
|
+
iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n' # Reuse status
|
|
609
|
+
# FIX: Ensure status message matches test expectation when breaking here
|
|
610
|
+
stats['status_message'] = f'No effective changes suggested on attempt {current_attempt}'
|
|
611
|
+
# Update best iteration if this attempt was still the best so far
|
|
612
|
+
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
613
|
+
if verbose:
|
|
614
|
+
console.print(f"[green]New best iteration found (despite no effective changes): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
|
|
615
|
+
best_iteration = {
|
|
616
|
+
'attempt': current_attempt,
|
|
617
|
+
'program_backup': str(program_backup_path),
|
|
618
|
+
'code_backup': str(code_backup_path),
|
|
619
|
+
'issues': current_issues_count
|
|
620
|
+
}
|
|
621
|
+
stats['best_iteration_num'] = current_attempt
|
|
622
|
+
stats['best_iteration_issues'] = current_issues_count
|
|
623
|
+
|
|
624
|
+
overall_success = False # Ensure success is False
|
|
625
|
+
iteration_log_xml += '</Iteration>'
|
|
626
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
627
|
+
break # Exit loop
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# Append iteration log (if not already done on success break or no-change break)
|
|
631
|
+
iteration_log_xml += '</Iteration>'
|
|
632
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
633
|
+
|
|
634
|
+
# Small delay to avoid hitting rate limits if applicable
|
|
635
|
+
time.sleep(0.5)
|
|
636
|
+
|
|
637
|
+
# --- End of Loop ---
|
|
638
|
+
|
|
639
|
+
# --- Step 5: Determine Final State ---
|
|
640
|
+
if verbose:
|
|
641
|
+
console.print("\n[bold cyan]Step 5: Determining Final State...[/bold cyan]")
|
|
642
|
+
|
|
643
|
+
final_log_entry = "<FinalActions>\n"
|
|
644
|
+
|
|
645
|
+
if not overall_success:
|
|
646
|
+
# Determine reason for loop exit if not already set by break conditions
|
|
647
|
+
# FIX: Ensure status message isn't overwritten if already set by break condition
|
|
648
|
+
exit_reason_determined = stats['status_message'] not in ['Initialization', '']
|
|
649
|
+
if not exit_reason_determined:
|
|
650
|
+
if attempts == max_attempts:
|
|
651
|
+
console.print(f"[bold yellow]Maximum attempts ({max_attempts}) reached.[/bold yellow]")
|
|
652
|
+
stats['status_message'] = f'Max attempts ({max_attempts}) reached'
|
|
653
|
+
final_log_entry += f' <Action>Max attempts ({max_attempts}) reached.</Action>\n'
|
|
654
|
+
else:
|
|
655
|
+
# Loop likely exited due to an unexpected break or condition not setting status
|
|
656
|
+
stats['status_message'] = 'Loop finished without success for unknown reason'
|
|
657
|
+
final_log_entry += f' <Action>Loop finished without reaching success state ({escape(stats["status_message"])}).</Action>\n'
|
|
658
|
+
elif stats['status_message'] == 'Budget Exceeded':
|
|
659
|
+
final_log_entry += f' <Action>Loop stopped due to budget.</Action>\n'
|
|
660
|
+
elif stats['status_message'].startswith('No changes suggested') or stats['status_message'].startswith('No effective changes'):
|
|
661
|
+
final_log_entry += f' <Action>Loop stopped as no changes were suggested.</Action>\n'
|
|
662
|
+
elif stats['status_message'].startswith('Error'):
|
|
663
|
+
final_log_entry += f' <Action>Loop stopped due to error: {escape(stats["status_message"])}</Action>\n'
|
|
664
|
+
# else: status already set by a break condition inside loop
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
# 5b: Restore best iteration if one exists and is better than initial
|
|
668
|
+
# Check if best_iteration recorded is actually better than initial state
|
|
669
|
+
# And ensure it's not the initial state itself (attempt > 0)
|
|
670
|
+
initial_issues_val = stats['initial_issues'] if stats['initial_issues'] != -1 else float('inf')
|
|
671
|
+
if best_iteration['attempt'] > 0 and best_iteration['issues'] < initial_issues_val:
|
|
672
|
+
console.print(f"[yellow]Restoring state from best iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})[/yellow]")
|
|
673
|
+
final_log_entry += f' <Action>Restored Best Iteration {best_iteration["attempt"]} (Issues: {best_iteration["issues"]})</Action>\n'
|
|
674
|
+
stats['status_message'] += f' - Restored best iteration {best_iteration["attempt"]}'
|
|
675
|
+
try:
|
|
676
|
+
best_program_path = Path(best_iteration['program_backup'])
|
|
677
|
+
best_code_path = Path(best_iteration['code_backup'])
|
|
678
|
+
if best_program_path.is_file() and best_code_path.is_file():
|
|
679
|
+
# Read content from backup before copying to handle potential race conditions if needed
|
|
680
|
+
restored_program_content = best_program_path.read_text(encoding='utf-8')
|
|
681
|
+
restored_code_content = best_code_path.read_text(encoding='utf-8')
|
|
682
|
+
program_path.write_text(restored_program_content, encoding='utf-8')
|
|
683
|
+
code_path.write_text(restored_code_content, encoding='utf-8')
|
|
684
|
+
program_contents = restored_program_content # Update memory state
|
|
685
|
+
code_contents = restored_code_content # Update memory state
|
|
686
|
+
if verbose:
|
|
687
|
+
console.print(f"Restored {program_path} from {best_program_path}")
|
|
688
|
+
console.print(f"Restored {code_path} from {best_code_path}")
|
|
689
|
+
# Final issues count is the best achieved count
|
|
690
|
+
stats['final_issues'] = best_iteration['issues']
|
|
691
|
+
else:
|
|
692
|
+
console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
|
|
693
|
+
final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
|
|
694
|
+
stats['status_message'] += ' - Error restoring best iteration (files missing)'
|
|
695
|
+
# Keep the last state, final issues remain unknown or last attempted
|
|
696
|
+
stats['final_issues'] = -1 # Indicate uncertainty
|
|
697
|
+
|
|
698
|
+
except (OSError, IOError) as e:
|
|
699
|
+
console.print(f"[bold red]Error restoring files from best iteration {best_iteration['attempt']}: {e}[/bold red]")
|
|
700
|
+
final_log_entry += f' <Error>Error restoring files from best iteration {best_iteration["attempt"]}: {escape(str(e))}</Error>\n'
|
|
701
|
+
stats['status_message'] += f' - Error restoring best iteration: {e}'
|
|
702
|
+
stats['final_issues'] = -1 # Indicate uncertainty
|
|
703
|
+
|
|
704
|
+
# If no improvement was made or recorded (best is still initial state or worse)
|
|
705
|
+
elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
|
|
706
|
+
console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
|
|
707
|
+
final_log_entry += f' <Action>No improvement found or recorded; restoring original state.</Action>\n'
|
|
708
|
+
stats['final_issues'] = stats['initial_issues'] # Final issues are same as initial
|
|
709
|
+
# Add restoration info to status message if not already implied
|
|
710
|
+
if 'keeping original state' not in stats['status_message']:
|
|
711
|
+
stats['status_message'] += ' - keeping original state'
|
|
712
|
+
# Ensure original files are restored if they were modified in a failed attempt
|
|
713
|
+
try:
|
|
714
|
+
# Only write if current memory state differs from initial
|
|
715
|
+
if program_contents != initial_program_content:
|
|
716
|
+
program_path.write_text(initial_program_content, encoding='utf-8')
|
|
717
|
+
program_contents = initial_program_content
|
|
718
|
+
if code_contents != initial_code_content:
|
|
719
|
+
code_path.write_text(initial_code_content, encoding='utf-8')
|
|
720
|
+
code_contents = initial_code_content
|
|
721
|
+
except IOError as e:
|
|
722
|
+
console.print(f"[bold red]Error restoring initial files: {e}[/bold red]")
|
|
723
|
+
final_log_entry += f' <Error>Error restoring initial files: {escape(str(e))}</Error>\n'
|
|
724
|
+
stats['status_message'] += f' - Error restoring initial files: {e}'
|
|
725
|
+
stats['final_issues'] = -1 # State uncertain
|
|
726
|
+
# Set final issues if not set by restoration logic (e.g., error during restore)
|
|
727
|
+
if stats['final_issues'] == -1 and stats['initial_issues'] != -1:
|
|
728
|
+
stats['final_issues'] = stats['initial_issues'] # Default to initial if unsure
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
else: # overall_success is True
|
|
732
|
+
final_log_entry += f' <Action>Process finished successfully.</Action>\n'
|
|
733
|
+
stats['final_issues'] = 0 # Success means 0 issues
|
|
734
|
+
|
|
735
|
+
final_log_entry += "</FinalActions>"
|
|
736
|
+
_write_log_entry(log_path, final_log_entry)
|
|
737
|
+
|
|
738
|
+
# --- Step 6: Read Final Contents ---
|
|
739
|
+
# Use the in-memory contents which should reflect the final state after potential restoration
|
|
740
|
+
if verbose:
|
|
741
|
+
console.print("\n[bold cyan]Step 6: Using Final In-Memory File Contents...[/bold cyan]")
|
|
742
|
+
final_program_content = program_contents
|
|
743
|
+
final_code_content = code_contents
|
|
744
|
+
# Optionally re-read from disk for verification, but memory should be source of truth
|
|
745
|
+
# try:
|
|
746
|
+
# final_program_content_disk = program_path.read_text(encoding="utf-8")
|
|
747
|
+
# final_code_content_disk = code_path.read_text(encoding="utf-8")
|
|
748
|
+
# if final_program_content != final_program_content_disk or final_code_content != final_code_content_disk:
|
|
749
|
+
# console.print("[bold red]Warning: Final file content on disk differs from expected state![/bold red]")
|
|
750
|
+
# # Decide whether to trust disk or memory
|
|
751
|
+
# except IOError as e:
|
|
752
|
+
# console.print(f"[bold red]Error reading final program/code files for verification: {e}[/bold red]")
|
|
753
|
+
# stats['status_message'] += ' - Error reading final files for verification'
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
# --- Step 7: Calculate and Print Summary Statistics ---
|
|
757
|
+
if verbose:
|
|
758
|
+
console.print("\n[bold cyan]Step 7: Calculating Final Statistics...[/bold cyan]")
|
|
759
|
+
|
|
760
|
+
initial_known = stats['initial_issues'] != -1
|
|
761
|
+
final_known = stats['final_issues'] != -1
|
|
762
|
+
|
|
763
|
+
if initial_known and final_known:
|
|
764
|
+
if stats['initial_issues'] > 0:
|
|
765
|
+
if stats['final_issues'] == 0: # Successful fix
|
|
766
|
+
stats['improvement_issues'] = stats['initial_issues']
|
|
767
|
+
stats['improvement_percent'] = 100.0
|
|
768
|
+
elif stats['final_issues'] < stats['initial_issues']: # Partial improvement
|
|
769
|
+
stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
|
|
770
|
+
# % improvement towards reaching 0
|
|
771
|
+
stats['improvement_percent'] = (stats['improvement_issues'] / stats['initial_issues']) * 100.0
|
|
772
|
+
else: # No improvement or regression
|
|
773
|
+
stats['improvement_issues'] = 0 # Can be negative if regression occurred
|
|
774
|
+
stats['improvement_percent'] = 0.0 # Or negative? Let's cap at 0.
|
|
775
|
+
if stats['final_issues'] > stats['initial_issues']:
|
|
776
|
+
stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues'] # Negative value
|
|
777
|
+
# Percentage calculation might be misleading here, stick to 0% improvement towards goal.
|
|
778
|
+
elif stats['initial_issues'] == 0: # Started perfect
|
|
779
|
+
stats['improvement_issues'] = 0
|
|
780
|
+
stats['improvement_percent'] = 100.0 # Already at target
|
|
781
|
+
if stats['final_issues'] > 0: # Regression occurred during loop?
|
|
782
|
+
stats['improvement_issues'] = -stats['final_issues']
|
|
783
|
+
stats['improvement_percent'] = 0.0 # No longer at target
|
|
784
|
+
overall_success = False # Ensure success is false if regression happened after initial success
|
|
785
|
+
if 'Success on initial check' in stats['status_message']: # Update status if loop ran after initial success
|
|
786
|
+
stats['status_message'] = f'Regression occurred after initial success - Final Issues: {stats["final_issues"]}'
|
|
787
|
+
# else: initial_issues < 0 (should not happen if known)
|
|
788
|
+
# stats['improvement_issues'] = 'N/A'
|
|
789
|
+
# stats['improvement_percent'] = 'N/A'
|
|
790
|
+
else: # Initial or final state unknown
|
|
791
|
+
stats['improvement_issues'] = 'N/A'
|
|
792
|
+
stats['improvement_percent'] = 'N/A'
|
|
793
|
+
if final_known and stats['final_issues'] == 0:
|
|
794
|
+
overall_success = True # Assume success if final is 0, even if initial unknown
|
|
795
|
+
else:
|
|
796
|
+
overall_success = False # Cannot guarantee success if initial/final unknown
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
console.print("\n[bold]--- Final Statistics ---[/bold]")
|
|
800
|
+
console.print(f"Initial Issues: {stats['initial_issues'] if initial_known else 'Unknown'}")
|
|
801
|
+
console.print(f"Final Issues: {stats['final_issues'] if final_known else 'Unknown'}")
|
|
802
|
+
best_iter_num_str = stats['best_iteration_num'] if stats['best_iteration_num'] != -1 else 'N/A'
|
|
803
|
+
best_iter_iss_str = stats['best_iteration_issues'] if stats['best_iteration_issues'] != float('inf') else 'N/A'
|
|
804
|
+
console.print(f"Best Iteration Found: {best_iter_num_str} (Issues: {best_iter_iss_str})")
|
|
805
|
+
console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
|
|
806
|
+
improvement_percent_str = f"{stats['improvement_percent']:.2f}%" if isinstance(stats['improvement_percent'], float) else stats['improvement_percent']
|
|
807
|
+
console.print(f"Improvement (Percent Towards 0 Issues): {improvement_percent_str}")
|
|
808
|
+
console.print(f"Overall Status: {stats['status_message']}")
|
|
809
|
+
console.print(f"Total Attempts Made: {attempts}") # Now reflects loop iterations started
|
|
810
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
811
|
+
console.print(f"Model Used: {model_name or 'N/A'}")
|
|
812
|
+
|
|
813
|
+
# --- Step 8: Return Results ---
|
|
814
|
+
# Ensure final success status matches reality (e.g., if regression occurred)
|
|
815
|
+
if final_known and stats['final_issues'] != 0:
|
|
816
|
+
overall_success = False
|
|
817
|
+
|
|
818
|
+
return {
|
|
819
|
+
"success": overall_success,
|
|
820
|
+
"final_program": final_program_content,
|
|
821
|
+
"final_code": final_code_content,
|
|
822
|
+
"total_attempts": attempts, # Return the number of loop iterations started
|
|
823
|
+
"total_cost": total_cost,
|
|
824
|
+
"model_name": model_name,
|
|
825
|
+
"statistics": stats,
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
# Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
|
|
829
|
+
if __name__ == "__main__":
|
|
830
|
+
# Create dummy files for demonstration
|
|
831
|
+
# In a real scenario, these files would exist and contain actual code/programs.
|
|
832
|
+
console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
|
|
833
|
+
temp_dir = Path("./temp_fix_verification_loop")
|
|
834
|
+
temp_dir.mkdir(exist_ok=True)
|
|
835
|
+
|
|
836
|
+
program_file = temp_dir / "my_program.py"
|
|
837
|
+
code_file = temp_dir / "my_code_module.py"
|
|
838
|
+
verification_program_file = temp_dir / "verify_syntax.py"
|
|
839
|
+
|
|
840
|
+
program_file.write_text("""
|
|
841
|
+
import my_code_module
|
|
842
|
+
import sys
|
|
843
|
+
# Simulate using the module and checking output
|
|
844
|
+
val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
|
|
845
|
+
result = my_code_module.process(val)
|
|
846
|
+
expected = val * 2
|
|
847
|
+
print(f"Input: {val}")
|
|
848
|
+
print(f"Result: {result}")
|
|
849
|
+
print(f"Expected: {expected}")
|
|
850
|
+
if result == expected:
|
|
851
|
+
print("VERIFICATION_SUCCESS")
|
|
852
|
+
else:
|
|
853
|
+
print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
|
|
854
|
+
""", encoding="utf-8")
|
|
855
|
+
|
|
856
|
+
# Initial code with a bug
|
|
857
|
+
code_file.write_text("""
|
|
858
|
+
# my_code_module.py
|
|
859
|
+
def process(x):
|
|
860
|
+
# Bug: should be x * 2
|
|
861
|
+
return x + 2
|
|
862
|
+
""", encoding="utf-8")
|
|
863
|
+
|
|
864
|
+
# Simple verification program (e.g., syntax check)
|
|
865
|
+
verification_program_file.write_text("""
|
|
866
|
+
import sys
|
|
867
|
+
import py_compile
|
|
868
|
+
import os
|
|
869
|
+
# Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
|
|
870
|
+
code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
|
|
871
|
+
print(f"Checking syntax of: {code_to_check}")
|
|
872
|
+
try:
|
|
873
|
+
py_compile.compile(code_to_check, doraise=True)
|
|
874
|
+
print("Syntax OK.")
|
|
875
|
+
sys.exit(0) # Success
|
|
876
|
+
except py_compile.PyCompileError as e:
|
|
877
|
+
print(f"Syntax Error: {e}")
|
|
878
|
+
sys.exit(1) # Failure
|
|
879
|
+
except Exception as e:
|
|
880
|
+
print(f"Verification Error: {e}")
|
|
881
|
+
sys.exit(1) # Failure
|
|
882
|
+
""", encoding="utf-8")
|
|
883
|
+
# Set environment variable for the verification script
|
|
884
|
+
os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
# --- Mock fix_verification_errors ---
|
|
888
|
+
# This is crucial for testing without actual LLM calls / costs
|
|
889
|
+
# In a real test suite, use unittest.mock
|
|
890
|
+
_original_fix_verification_errors = fix_verification_errors
|
|
891
|
+
_call_count = 0
|
|
892
|
+
|
|
893
|
+
def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
|
|
894
|
+
global _call_count
|
|
895
|
+
_call_count += 1
|
|
896
|
+
cost = 0.001 * _call_count # Simulate increasing cost
|
|
897
|
+
model = "mock_model_v1"
|
|
898
|
+
explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
|
|
899
|
+
issues_count = 1 # Assume 1 issue initially
|
|
900
|
+
|
|
901
|
+
fixed_program = program # Assume program doesn't need fixing
|
|
902
|
+
fixed_code = code
|
|
903
|
+
|
|
904
|
+
# Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
|
|
905
|
+
if "VERIFICATION_FAILURE" in output and _call_count >= 2:
|
|
906
|
+
explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
|
|
907
|
+
fixed_code = """
|
|
908
|
+
# my_code_module.py
|
|
909
|
+
def process(x):
|
|
910
|
+
# Fixed: should be x * 2
|
|
911
|
+
return x * 2
|
|
912
|
+
"""
|
|
913
|
+
issues_count = 0 # Fixed!
|
|
914
|
+
elif "VERIFICATION_SUCCESS" in output:
|
|
915
|
+
explanation = ["Output indicates VERIFICATION_SUCCESS."]
|
|
916
|
+
issues_count = 0 # Already correct
|
|
917
|
+
|
|
918
|
+
return {
|
|
919
|
+
'explanation': explanation,
|
|
920
|
+
'fixed_program': fixed_program,
|
|
921
|
+
'fixed_code': fixed_code,
|
|
922
|
+
'total_cost': cost,
|
|
923
|
+
'model_name': model,
|
|
924
|
+
'verification_issues_count': issues_count,
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
# Replace the real function with the mock
|
|
928
|
+
# In package context, you might need to patch differently
|
|
929
|
+
# For this script execution:
|
|
930
|
+
# Note: This direct replacement might not work if the function is imported
|
|
931
|
+
# using `from .fix_verification_errors import fix_verification_errors`.
|
|
932
|
+
# A proper mock framework (`unittest.mock.patch`) is better.
|
|
933
|
+
# Let's assume for this example run, we can modify the global scope *before* the loop calls it.
|
|
934
|
+
# This is fragile. A better approach involves dependency injection or mocking frameworks.
|
|
935
|
+
# HACK: Re-assigning the imported name in the global scope of this script
|
|
936
|
+
globals()['fix_verification_errors'] = mock_fix_verification_errors
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
|
|
940
|
+
|
|
941
|
+
# Example program_args: Pass input value 10 and another arg 5
|
|
942
|
+
# Note: The example program only uses the first arg sys.argv[1]
|
|
943
|
+
example_args = ["10", "another_arg"]
|
|
944
|
+
|
|
945
|
+
results = fix_verification_errors_loop(
|
|
946
|
+
program_file=str(program_file),
|
|
947
|
+
code_file=str(code_file),
|
|
948
|
+
prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
|
|
949
|
+
verification_program=str(verification_program_file),
|
|
950
|
+
strength=0.5,
|
|
951
|
+
temperature=0.1,
|
|
952
|
+
max_attempts=3,
|
|
953
|
+
budget=0.10, # Set a budget
|
|
954
|
+
verification_log_file=str(temp_dir / "test_verification.log"),
|
|
955
|
+
verbose=True,
|
|
956
|
+
program_args=example_args
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
|
|
960
|
+
console.print(f"Success: {results['success']}")
|
|
961
|
+
console.print(f"Total Attempts: {results['total_attempts']}")
|
|
962
|
+
console.print(f"Total Cost: ${results['total_cost']:.6f}")
|
|
963
|
+
console.print(f"Model Name: {results['model_name']}")
|
|
964
|
+
# console.print(f"Final Program:\n{results['final_program']}") # Can be long
|
|
965
|
+
console.print(f"Final Code:\n{results['final_code']}")
|
|
966
|
+
console.print(f"Statistics:\n{results['statistics']}")
|
|
967
|
+
|
|
968
|
+
# Restore original function if needed elsewhere
|
|
969
|
+
globals()['fix_verification_errors'] = _original_fix_verification_errors
|
|
970
|
+
|
|
971
|
+
# Clean up dummy files
|
|
972
|
+
# console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
|
|
973
|
+
# shutil.rmtree(temp_dir)
|
|
974
|
+
console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
|
|
975
|
+
console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
|