pdd-cli 0.0.24__py3-none-any.whl → 0.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +7 -1
- pdd/bug_main.py +5 -1
- pdd/bug_to_unit_test.py +16 -5
- pdd/change.py +2 -1
- pdd/change_main.py +407 -189
- pdd/cli.py +853 -301
- pdd/code_generator.py +2 -1
- pdd/conflicts_in_prompts.py +2 -1
- pdd/construct_paths.py +377 -222
- pdd/context_generator.py +2 -1
- pdd/continue_generation.py +3 -2
- pdd/crash_main.py +55 -20
- pdd/detect_change.py +2 -1
- pdd/fix_code_loop.py +465 -160
- pdd/fix_code_module_errors.py +7 -4
- pdd/fix_error_loop.py +9 -9
- pdd/fix_errors_from_unit_tests.py +207 -365
- pdd/fix_main.py +31 -4
- pdd/fix_verification_errors.py +60 -34
- pdd/fix_verification_errors_loop.py +842 -768
- pdd/fix_verification_main.py +412 -0
- pdd/generate_output_paths.py +427 -189
- pdd/generate_test.py +3 -2
- pdd/increase_tests.py +2 -2
- pdd/llm_invoke.py +14 -3
- pdd/preprocess.py +3 -3
- pdd/process_csv_change.py +466 -154
- pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
- pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
- pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
- pdd/prompts/generate_test_LLM.prompt +9 -3
- pdd/prompts/update_prompt_LLM.prompt +3 -3
- pdd/split.py +6 -5
- pdd/split_main.py +13 -4
- pdd/trace_main.py +7 -0
- pdd/xml_tagger.py +2 -1
- {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/METADATA +4 -4
- {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/RECORD +43 -42
- {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/WHEEL +1 -1
- {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/top_level.txt +0 -0
|
@@ -1,161 +1,80 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
Module for iteratively fixing code verification errors using LLMs.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
1
|
import os
|
|
7
|
-
import subprocess
|
|
8
2
|
import shutil
|
|
9
|
-
import
|
|
3
|
+
import subprocess
|
|
10
4
|
import datetime
|
|
11
|
-
import
|
|
12
|
-
from
|
|
13
|
-
import
|
|
14
|
-
from
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Tuple, Any, Optional
|
|
8
|
+
from xml.sax.saxutils import escape
|
|
15
9
|
|
|
16
|
-
# Use Rich for pretty console output
|
|
17
10
|
from rich.console import Console
|
|
18
|
-
from rich.panel import Panel
|
|
19
|
-
from rich.syntax import Syntax
|
|
20
|
-
from rich.text import Text
|
|
21
11
|
|
|
22
|
-
#
|
|
23
|
-
# Attempt relative import for package structure
|
|
12
|
+
# Use relative import assuming fix_verification_errors is in the same package
|
|
24
13
|
try:
|
|
14
|
+
# Attempt relative import for package context
|
|
25
15
|
from .fix_verification_errors import fix_verification_errors
|
|
26
|
-
from .utils import ensure_dir_exists # Assuming a utility function exists
|
|
27
16
|
except ImportError:
|
|
28
|
-
# Fallback for
|
|
29
|
-
# This
|
|
30
|
-
print("Warning: Could not perform relative import. Falling back.")
|
|
31
|
-
# If fix_verification_errors is in the same directory or PYTHONPATH:
|
|
17
|
+
# Fallback for direct script execution (e.g., testing)
|
|
18
|
+
# This assumes 'pdd' package structure exists relative to the script
|
|
32
19
|
try:
|
|
33
|
-
from fix_verification_errors import fix_verification_errors
|
|
34
|
-
except ImportError
|
|
20
|
+
from pdd.fix_verification_errors import fix_verification_errors
|
|
21
|
+
except ImportError:
|
|
35
22
|
raise ImportError(
|
|
36
23
|
"Could not import 'fix_verification_errors'. "
|
|
37
|
-
"Ensure it's
|
|
38
|
-
)
|
|
39
|
-
# Define a dummy ensure_dir_exists if not available
|
|
40
|
-
def ensure_dir_exists(file_path: str):
|
|
41
|
-
"""Ensure the directory for the given file path exists."""
|
|
42
|
-
directory = os.path.dirname(file_path)
|
|
43
|
-
if directory and not os.path.exists(directory):
|
|
44
|
-
os.makedirs(directory)
|
|
45
|
-
|
|
46
|
-
# Initialize Rich Console
|
|
47
|
-
console = Console()
|
|
24
|
+
"Ensure it's available via relative import or in the 'pdd' package."
|
|
25
|
+
)
|
|
48
26
|
|
|
49
|
-
#
|
|
27
|
+
# Initialize Rich Console for pretty printing
|
|
28
|
+
console = Console()
|
|
50
29
|
|
|
51
|
-
def
|
|
30
|
+
def _run_program(
|
|
31
|
+
program_path: Path,
|
|
32
|
+
args: Optional[list[str]] = None,
|
|
33
|
+
timeout: int = 60
|
|
34
|
+
) -> Tuple[int, str]:
|
|
52
35
|
"""
|
|
53
|
-
Runs a subprocess
|
|
36
|
+
Runs a Python program using subprocess, capturing combined stdout and stderr.
|
|
54
37
|
|
|
55
38
|
Args:
|
|
56
|
-
|
|
57
|
-
|
|
39
|
+
program_path: Path to the Python program to run.
|
|
40
|
+
args: Optional list of command-line arguments for the program.
|
|
41
|
+
timeout: Timeout in seconds for the subprocess.
|
|
58
42
|
|
|
59
43
|
Returns:
|
|
60
|
-
A tuple containing
|
|
61
|
-
-
|
|
62
|
-
- output (str): The combined stdout and stderr of the command.
|
|
63
|
-
- return_code (int): The exit code of the command.
|
|
44
|
+
A tuple containing the return code (int) and the combined output (str).
|
|
45
|
+
Returns (-1, error_message) if the program is not found or other execution error occurs.
|
|
64
46
|
"""
|
|
47
|
+
if not program_path.is_file():
|
|
48
|
+
return -1, f"Error: Program file not found at {program_path}"
|
|
49
|
+
|
|
50
|
+
command = ["python", str(program_path)]
|
|
51
|
+
if args:
|
|
52
|
+
command.extend(args)
|
|
53
|
+
|
|
65
54
|
try:
|
|
66
|
-
|
|
55
|
+
result = subprocess.run(
|
|
67
56
|
command,
|
|
68
57
|
capture_output=True,
|
|
69
58
|
text=True,
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
encoding='utf-8',
|
|
73
|
-
errors='replace' # Handle potential encoding errors
|
|
59
|
+
timeout=timeout,
|
|
60
|
+
check=False, # Don't raise exception for non-zero exit codes
|
|
74
61
|
)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
return success, output.strip(), process.returncode
|
|
78
|
-
except FileNotFoundError:
|
|
79
|
-
error_msg = f"Error: Command not found: '{command[0]}'. Please ensure it's installed and in PATH."
|
|
80
|
-
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
81
|
-
return False, error_msg, -1 # Use -1 to indicate execution failure
|
|
82
|
-
except Exception as e:
|
|
83
|
-
error_msg = f"Error running subprocess {' '.join(command)}: {e}"
|
|
84
|
-
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
85
|
-
return False, error_msg, -1
|
|
86
|
-
|
|
87
|
-
def _read_file(file_path: str) -> Optional[str]:
|
|
88
|
-
"""Reads the content of a file."""
|
|
89
|
-
try:
|
|
90
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
91
|
-
return f.read()
|
|
62
|
+
combined_output = result.stdout + result.stderr
|
|
63
|
+
return result.returncode, combined_output
|
|
92
64
|
except FileNotFoundError:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
console.print(f"[bold red]Error reading file {file_path}: {e}[/bold red]")
|
|
97
|
-
return None
|
|
98
|
-
|
|
99
|
-
def _write_file(file_path: str, content: str) -> bool:
|
|
100
|
-
"""Writes content to a file."""
|
|
101
|
-
try:
|
|
102
|
-
ensure_dir_exists(file_path)
|
|
103
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
104
|
-
f.write(content)
|
|
105
|
-
return True
|
|
65
|
+
return -1, f"Error: Python interpreter not found or '{program_path}' not found."
|
|
66
|
+
except subprocess.TimeoutExpired:
|
|
67
|
+
return -1, f"Error: Program execution timed out after {timeout} seconds."
|
|
106
68
|
except Exception as e:
|
|
107
|
-
|
|
108
|
-
return False
|
|
109
|
-
|
|
110
|
-
def _create_backup(file_path: str, iteration: int) -> Optional[str]:
|
|
111
|
-
"""Creates a backup copy of a file."""
|
|
112
|
-
if not os.path.exists(file_path):
|
|
113
|
-
console.print(f"[yellow]Warning: Cannot backup non-existent file: {file_path}[/yellow]")
|
|
114
|
-
return None
|
|
115
|
-
try:
|
|
116
|
-
base, ext = os.path.splitext(file_path)
|
|
117
|
-
backup_path = f"{base}_iteration_{iteration}{ext}"
|
|
118
|
-
shutil.copy2(file_path, backup_path) # copy2 preserves metadata
|
|
119
|
-
return backup_path
|
|
120
|
-
except Exception as e:
|
|
121
|
-
console.print(f"[bold red]Error creating backup for {file_path}: {e}[/bold red]")
|
|
122
|
-
return None
|
|
123
|
-
|
|
124
|
-
def _restore_backup(backup_path: str, original_path: str) -> bool:
|
|
125
|
-
"""Restores a file from its backup."""
|
|
126
|
-
if not backup_path or not os.path.exists(backup_path):
|
|
127
|
-
console.print(f"[bold red]Error: Backup file not found: {backup_path}[/bold red]")
|
|
128
|
-
return False
|
|
129
|
-
try:
|
|
130
|
-
shutil.copy2(backup_path, original_path)
|
|
131
|
-
return True
|
|
132
|
-
except Exception as e:
|
|
133
|
-
console.print(f"[bold red]Error restoring {original_path} from {backup_path}: {e}[/bold red]")
|
|
134
|
-
return False
|
|
69
|
+
return -1, f"Error: An unexpected error occurred while running the program: {e}"
|
|
135
70
|
|
|
136
|
-
def
|
|
137
|
-
"""Appends
|
|
71
|
+
def _write_log_entry(log_file_path: Path, xml_content: str):
|
|
72
|
+
"""Appends XML content to the log file."""
|
|
138
73
|
try:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
reparsed = minidom.parseString(rough_string)
|
|
144
|
-
pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
|
|
145
|
-
|
|
146
|
-
with open(log_file, 'wb') as f: # Write bytes for encoded XML
|
|
147
|
-
f.write(pretty_xml)
|
|
148
|
-
except Exception as e:
|
|
149
|
-
console.print(f"[bold red]Error writing to XML log file {log_file}: {e}[/bold red]")
|
|
150
|
-
|
|
151
|
-
def _create_cdata_element(parent: ET.Element, tag_name: str, content: Optional[str]):
|
|
152
|
-
"""Creates an XML element with CDATA content."""
|
|
153
|
-
element = ET.SubElement(parent, tag_name)
|
|
154
|
-
# Use a placeholder if content is None or empty to ensure valid XML structure
|
|
155
|
-
element.text = ET.CDATA(content if content is not None else "")
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# --- Main Function ---
|
|
74
|
+
with open(log_file_path, "a", encoding="utf-8") as f:
|
|
75
|
+
f.write(xml_content + "\n")
|
|
76
|
+
except IOError as e:
|
|
77
|
+
console.print(f"[bold red]Error writing to log file {log_file_path}: {e}[/bold red]")
|
|
159
78
|
|
|
160
79
|
def fix_verification_errors_loop(
|
|
161
80
|
program_file: str,
|
|
@@ -166,736 +85,891 @@ def fix_verification_errors_loop(
|
|
|
166
85
|
temperature: float,
|
|
167
86
|
max_attempts: int,
|
|
168
87
|
budget: float,
|
|
169
|
-
verification_log_file: str = "
|
|
170
|
-
verbose: bool = False
|
|
88
|
+
verification_log_file: str = "verification.log",
|
|
89
|
+
verbose: bool = False,
|
|
90
|
+
program_args: Optional[list[str]] = None,
|
|
171
91
|
) -> Dict[str, Any]:
|
|
172
92
|
"""
|
|
173
|
-
Attempts to fix errors in a code file
|
|
93
|
+
Attempts to fix errors in a code file based on program execution output
|
|
94
|
+
against the prompt's intent, iterating multiple times with secondary verification.
|
|
174
95
|
|
|
175
96
|
Args:
|
|
176
|
-
program_file: Path to the Python program
|
|
97
|
+
program_file: Path to the Python program exercising the code.
|
|
177
98
|
code_file: Path to the code file being tested/verified.
|
|
178
|
-
prompt: The prompt
|
|
179
|
-
verification_program: Path to a secondary
|
|
180
|
-
strength: LLM strength
|
|
181
|
-
temperature: LLM temperature
|
|
99
|
+
prompt: The prompt defining the intended behavior.
|
|
100
|
+
verification_program: Path to a secondary program to verify code changes.
|
|
101
|
+
strength: LLM model strength (0.0 to 1.0).
|
|
102
|
+
temperature: LLM temperature (0.0 to 1.0).
|
|
182
103
|
max_attempts: Maximum number of fix attempts.
|
|
183
|
-
budget: Maximum allowed cost
|
|
184
|
-
verification_log_file: Path for detailed XML logging.
|
|
185
|
-
verbose: Enable
|
|
104
|
+
budget: Maximum allowed cost in USD.
|
|
105
|
+
verification_log_file: Path for detailed XML logging (default: "verification.log").
|
|
106
|
+
verbose: Enable verbose logging (default: False).
|
|
107
|
+
program_args: Optional list of command-line arguments for the program_file.
|
|
186
108
|
|
|
187
109
|
Returns:
|
|
188
110
|
A dictionary containing:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
111
|
+
'success': bool - Whether the code was successfully fixed.
|
|
112
|
+
'final_program': str - Contents of the final program file.
|
|
113
|
+
'final_code': str - Contents of the final code file.
|
|
114
|
+
'total_attempts': int - Number of fix attempts made (loop iterations started).
|
|
115
|
+
'total_cost': float - Total cost of LLM calls.
|
|
116
|
+
'model_name': str | None - Name of the LLM model used.
|
|
117
|
+
'statistics': dict - Detailed statistics about the process.
|
|
196
118
|
"""
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
119
|
+
program_path = Path(program_file).resolve()
|
|
120
|
+
code_path = Path(code_file).resolve()
|
|
121
|
+
verification_program_path = Path(verification_program).resolve()
|
|
122
|
+
log_path = Path(verification_log_file).resolve()
|
|
123
|
+
|
|
124
|
+
# --- Validate Inputs ---
|
|
125
|
+
if not program_path.is_file():
|
|
126
|
+
console.print(f"[bold red]Error: Program file not found: {program_path}[/bold red]")
|
|
127
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
128
|
+
if not code_path.is_file():
|
|
129
|
+
console.print(f"[bold red]Error: Code file not found: {code_path}[/bold red]")
|
|
130
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
131
|
+
if not verification_program_path.is_file():
|
|
132
|
+
console.print(f"[bold red]Error: Verification program not found: {verification_program_path}[/bold red]")
|
|
133
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
134
|
+
if not 0.0 <= strength <= 1.0:
|
|
135
|
+
console.print(f"[bold red]Error: Strength must be between 0.0 and 1.0.[/bold red]")
|
|
136
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
137
|
+
if not 0.0 <= temperature <= 1.0:
|
|
138
|
+
console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
|
|
139
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
140
|
+
# Prompt requires positive max_attempts
|
|
141
|
+
if max_attempts <= 0:
|
|
142
|
+
console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
|
|
143
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
144
|
+
if budget < 0:
|
|
145
|
+
console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
|
|
146
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# Step 1: Remove existing verification log file
|
|
150
|
+
try:
|
|
151
|
+
if log_path.exists():
|
|
152
|
+
os.remove(log_path)
|
|
203
153
|
if verbose:
|
|
204
|
-
console.print(f"Removed existing log file: {
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
log_root = ET.Element("VerificationLog")
|
|
209
|
-
log_root.set("startTime", datetime.datetime.now().isoformat())
|
|
154
|
+
console.print(f"Removed existing log file: {log_path}")
|
|
155
|
+
except OSError as e:
|
|
156
|
+
console.print(f"[bold red]Error removing log file {log_path}: {e}[/bold red]")
|
|
157
|
+
# Continue execution, but logging might fail
|
|
210
158
|
|
|
211
|
-
#
|
|
212
|
-
attempts = 0
|
|
159
|
+
# Step 2: Initialize variables
|
|
160
|
+
attempts = 0 # Counter for loop iterations started
|
|
213
161
|
total_cost = 0.0
|
|
214
162
|
model_name: Optional[str] = None
|
|
215
163
|
overall_success = False
|
|
216
|
-
last_fix_result: Optional[Dict[str, Any]] = None # Store the result of the last fix attempt
|
|
217
|
-
|
|
218
|
-
# Best iteration tracker: Stores the state with the minimum verified issues
|
|
219
164
|
best_iteration = {
|
|
220
|
-
'attempt': -1, #
|
|
221
|
-
'
|
|
222
|
-
'
|
|
223
|
-
'
|
|
224
|
-
'model_name': None,
|
|
165
|
+
'attempt': -1, # 0 represents initial state
|
|
166
|
+
'program_backup': None,
|
|
167
|
+
'code_backup': None,
|
|
168
|
+
'issues': float('inf')
|
|
225
169
|
}
|
|
226
|
-
|
|
227
|
-
# Statistics tracker
|
|
228
170
|
stats = {
|
|
229
|
-
'initial_issues': -1,
|
|
171
|
+
'initial_issues': -1,
|
|
230
172
|
'final_issues': -1,
|
|
231
|
-
'
|
|
173
|
+
'best_iteration_num': -1,
|
|
232
174
|
'best_iteration_issues': float('inf'),
|
|
233
175
|
'improvement_issues': 0,
|
|
234
|
-
'
|
|
235
|
-
'
|
|
176
|
+
'improvement_percent': 0.0,
|
|
177
|
+
'status_message': 'Initialization',
|
|
236
178
|
}
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
stats['exit_reason'] = "Input Error: Program file not found"
|
|
242
|
-
return {
|
|
243
|
-
'success': False, 'final_program': "", 'final_code': "",
|
|
244
|
-
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
245
|
-
'statistics': stats
|
|
246
|
-
}
|
|
247
|
-
if not os.path.isfile(code_file):
|
|
248
|
-
console.print(f"[bold red]Error: Code file not found: {code_file}[/bold red]")
|
|
249
|
-
stats['exit_reason'] = "Input Error: Code file not found"
|
|
250
|
-
return {
|
|
251
|
-
'success': False, 'final_program': "", 'final_code': "",
|
|
252
|
-
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
253
|
-
'statistics': stats
|
|
254
|
-
}
|
|
255
|
-
if not os.path.isfile(verification_program):
|
|
256
|
-
console.print(f"[bold red]Error: Secondary verification program not found: {verification_program}[/bold red]")
|
|
257
|
-
stats['exit_reason'] = "Input Error: Verification program not found"
|
|
258
|
-
return {
|
|
259
|
-
'success': False, 'final_program': "", 'final_code': "",
|
|
260
|
-
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
261
|
-
'statistics': stats
|
|
262
|
-
}
|
|
179
|
+
initial_program_content = ""
|
|
180
|
+
initial_code_content = ""
|
|
181
|
+
program_contents = "" # Keep track of current contents
|
|
182
|
+
code_contents = "" # Keep track of current contents
|
|
263
183
|
|
|
264
184
|
# --- Step 3: Determine Initial State ---
|
|
265
185
|
if verbose:
|
|
266
|
-
console.print("
|
|
186
|
+
console.print("[bold cyan]Step 3: Determining Initial State...[/bold cyan]")
|
|
267
187
|
|
|
268
|
-
|
|
269
|
-
|
|
188
|
+
try:
|
|
189
|
+
initial_program_content = program_path.read_text(encoding="utf-8")
|
|
190
|
+
initial_code_content = code_path.read_text(encoding="utf-8")
|
|
191
|
+
program_contents = initial_program_content # Initialize current contents
|
|
192
|
+
code_contents = initial_code_content # Initialize current contents
|
|
193
|
+
except IOError as e:
|
|
194
|
+
console.print(f"[bold red]Error reading initial program/code files: {e}[/bold red]")
|
|
195
|
+
stats['status_message'] = f'Error reading initial files: {e}' # Add status message
|
|
196
|
+
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": stats}
|
|
197
|
+
|
|
198
|
+
# 3a: Run initial program with args
|
|
199
|
+
initial_return_code, initial_output = _run_program(program_path, args=program_args)
|
|
270
200
|
if verbose:
|
|
271
|
-
console.print(f"Initial program
|
|
272
|
-
console.print("
|
|
273
|
-
console.print(f"[grey37]{initial_output or '[No Output]'}[/grey37]")
|
|
201
|
+
console.print(f"Initial program run exit code: {initial_return_code}")
|
|
202
|
+
console.print(f"Initial program output:\n{initial_output}")
|
|
274
203
|
|
|
275
204
|
# 3b: Log initial state
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
if initial_program_contents is None or initial_code_contents is None:
|
|
285
|
-
stats['exit_reason'] = "File Read Error: Could not read initial program or code file."
|
|
286
|
-
return {
|
|
287
|
-
'success': False, 'final_program': initial_program_contents or "", 'final_code': initial_code_contents or "",
|
|
288
|
-
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
289
|
-
'statistics': stats
|
|
290
|
-
}
|
|
205
|
+
timestamp = datetime.datetime.now().isoformat()
|
|
206
|
+
initial_log_entry = f'<InitialState timestamp="{timestamp}">\n'
|
|
207
|
+
initial_log_entry += f' <ProgramFile>{escape(str(program_path))}</ProgramFile>\n'
|
|
208
|
+
initial_log_entry += f' <CodeFile>{escape(str(code_path))}</CodeFile>\n'
|
|
209
|
+
initial_log_entry += f' <ExitCode>{initial_return_code}</ExitCode>\n'
|
|
210
|
+
initial_log_entry += f' <Output>{escape(initial_output)}</Output>\n'
|
|
211
|
+
initial_log_entry += '</InitialState>'
|
|
212
|
+
_write_log_entry(log_path, initial_log_entry)
|
|
291
213
|
|
|
292
214
|
# 3d: Call fix_verification_errors for initial assessment
|
|
293
|
-
if verbose:
|
|
294
|
-
console.print("Running initial assessment with 'fix_verification_errors'...")
|
|
295
215
|
try:
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
# Log the error
|
|
315
|
-
error_log = ET.Element("Error")
|
|
316
|
-
error_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
317
|
-
error_log.set("phase", "InitialAssessment")
|
|
318
|
-
_create_cdata_element(error_log, "ErrorMessage", str(e))
|
|
319
|
-
_append_log_entry(verification_log_file, log_root, error_log)
|
|
320
|
-
return {
|
|
321
|
-
'success': False, 'final_program': initial_program_contents, 'final_code': initial_code_contents,
|
|
322
|
-
'total_attempts': 0, 'total_cost': total_cost, 'model_name': model_name,
|
|
323
|
-
'statistics': stats
|
|
324
|
-
}
|
|
216
|
+
if verbose:
|
|
217
|
+
console.print("Running initial assessment with fix_verification_errors...")
|
|
218
|
+
# Use actual strength/temp for realistic initial assessment
|
|
219
|
+
initial_fix_result = fix_verification_errors(
|
|
220
|
+
program=initial_program_content,
|
|
221
|
+
prompt=prompt,
|
|
222
|
+
code=initial_code_content,
|
|
223
|
+
output=initial_output,
|
|
224
|
+
strength=strength,
|
|
225
|
+
temperature=temperature,
|
|
226
|
+
verbose=verbose
|
|
227
|
+
)
|
|
228
|
+
# 3e: Add cost
|
|
229
|
+
initial_cost = initial_fix_result.get('total_cost', 0.0)
|
|
230
|
+
total_cost += initial_cost
|
|
231
|
+
model_name = initial_fix_result.get('model_name') # Capture model name early
|
|
232
|
+
if verbose:
|
|
233
|
+
console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
325
234
|
|
|
235
|
+
# 3f: Extract initial issues
|
|
236
|
+
initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
|
|
237
|
+
stats['initial_issues'] = initial_issues_count
|
|
238
|
+
if verbose:
|
|
239
|
+
console.print(f"Initial verification issues found: {initial_issues_count}")
|
|
240
|
+
if initial_fix_result.get('explanation'):
|
|
241
|
+
console.print("Initial assessment explanation:")
|
|
242
|
+
console.print(initial_fix_result['explanation'])
|
|
243
|
+
|
|
244
|
+
# FIX: Add check for initial assessment error *before* checking success/budget
|
|
245
|
+
# Check if the fixer function returned its specific error state (None explanation/model)
|
|
246
|
+
if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
|
|
247
|
+
error_msg = "Error: Fixer returned invalid/error state during initial assessment"
|
|
248
|
+
console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
|
|
249
|
+
stats['status_message'] = error_msg
|
|
250
|
+
stats['final_issues'] = -1 # Indicate unknown/error state
|
|
251
|
+
# Write final action log for error on initial check
|
|
252
|
+
final_log_entry = "<FinalActions>\n"
|
|
253
|
+
final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
|
|
254
|
+
final_log_entry += "</FinalActions>"
|
|
255
|
+
_write_log_entry(log_path, final_log_entry)
|
|
256
|
+
# Return failure state
|
|
257
|
+
return {
|
|
258
|
+
"success": False,
|
|
259
|
+
"final_program": initial_program_content,
|
|
260
|
+
"final_code": initial_code_content,
|
|
261
|
+
"total_attempts": 0,
|
|
262
|
+
"total_cost": total_cost, # May be non-zero if error occurred after some cost
|
|
263
|
+
"model_name": model_name, # May have been set before error
|
|
264
|
+
"statistics": stats,
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
# 3g: Initialize best iteration tracker
|
|
268
|
+
# Store original paths as the 'backup' for iteration 0
|
|
269
|
+
best_iteration = {
|
|
270
|
+
'attempt': 0, # Use 0 for initial state
|
|
271
|
+
'program_backup': str(program_path), # Path to original
|
|
272
|
+
'code_backup': str(code_path), # Path to original
|
|
273
|
+
'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
|
|
274
|
+
}
|
|
275
|
+
stats['best_iteration_num'] = 0
|
|
276
|
+
stats['best_iteration_issues'] = best_iteration['issues']
|
|
277
|
+
|
|
278
|
+
# 3h: Check for immediate success or budget exceeded
|
|
279
|
+
if initial_issues_count == 0:
|
|
280
|
+
console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
|
|
281
|
+
overall_success = True
|
|
282
|
+
stats['final_issues'] = 0
|
|
283
|
+
stats['status_message'] = 'Success on initial check'
|
|
284
|
+
stats['improvement_issues'] = 0
|
|
285
|
+
stats['improvement_percent'] = 100.0 # Reached target of 0 issues
|
|
286
|
+
|
|
287
|
+
# Write final action log for successful initial check
|
|
288
|
+
final_log_entry = "<FinalActions>\n"
|
|
289
|
+
final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
|
|
290
|
+
final_log_entry += "</FinalActions>"
|
|
291
|
+
_write_log_entry(log_path, final_log_entry)
|
|
292
|
+
|
|
293
|
+
# Step 7 (early exit): Print stats
|
|
294
|
+
console.print("\n[bold]--- Final Statistics ---[/bold]")
|
|
295
|
+
console.print(f"Initial Issues: {stats['initial_issues']}")
|
|
296
|
+
console.print(f"Final Issues: {stats['final_issues']}")
|
|
297
|
+
console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
|
|
298
|
+
console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
|
|
299
|
+
console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
|
|
300
|
+
console.print(f"Overall Status: {stats['status_message']}")
|
|
301
|
+
console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
|
|
302
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
303
|
+
console.print(f"Model Used: {model_name or 'N/A'}")
|
|
304
|
+
# Step 8 (early exit): Return
|
|
305
|
+
return {
|
|
306
|
+
"success": overall_success,
|
|
307
|
+
"final_program": initial_program_content,
|
|
308
|
+
"final_code": initial_code_content,
|
|
309
|
+
"total_attempts": attempts, # attempts is 0
|
|
310
|
+
"total_cost": total_cost,
|
|
311
|
+
"model_name": model_name,
|
|
312
|
+
"statistics": stats,
|
|
313
|
+
}
|
|
314
|
+
elif total_cost >= budget:
|
|
315
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
|
|
316
|
+
stats['status_message'] = 'Budget exceeded on initial check'
|
|
317
|
+
stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
|
|
318
|
+
|
|
319
|
+
# Write final action log for budget exceeded on initial check
|
|
320
|
+
final_log_entry = "<FinalActions>\n"
|
|
321
|
+
final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
|
|
322
|
+
final_log_entry += "</FinalActions>"
|
|
323
|
+
_write_log_entry(log_path, final_log_entry)
|
|
324
|
+
|
|
325
|
+
# No changes made, return initial state
|
|
326
|
+
return {
|
|
327
|
+
"success": False,
|
|
328
|
+
"final_program": initial_program_content,
|
|
329
|
+
"final_code": initial_code_content,
|
|
330
|
+
"total_attempts": 0,
|
|
331
|
+
"total_cost": total_cost,
|
|
332
|
+
"model_name": model_name,
|
|
333
|
+
"statistics": stats,
|
|
334
|
+
}
|
|
326
335
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
336
|
+
except Exception as e:
|
|
337
|
+
console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
|
|
338
|
+
stats['status_message'] = f'Error during initial assessment: {e}'
|
|
339
|
+
# Cannot proceed without initial assessment
|
|
340
|
+
return {"success": False, "final_program": initial_program_content, "final_code": initial_code_content, "total_attempts": 0, "total_cost": total_cost, "model_name": model_name, "statistics": stats}
|
|
331
341
|
|
|
332
|
-
# 3f: Extract initial issues
|
|
333
|
-
initial_issues_count = initial_fix_result.get('verification_issues_count', float('inf'))
|
|
334
|
-
if initial_issues_count == float('inf'):
|
|
335
|
-
console.print("[yellow]Warning: Could not determine initial issue count from fix_verification_errors.[/yellow]")
|
|
336
|
-
# Decide how to handle this - maybe treat as high number of issues?
|
|
337
|
-
initial_issues_count = 999 # Assign a high number if undetermined
|
|
338
342
|
|
|
339
|
-
|
|
343
|
+
# --- Step 4: Enter the Fixing Loop ---
|
|
340
344
|
if verbose:
|
|
341
|
-
console.print(
|
|
342
|
-
|
|
343
|
-
#
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
console.print("[bold green]Initial state already meets verification criteria (0 issues found). No fixing loop needed.[/bold green]")
|
|
364
|
-
overall_success = True
|
|
365
|
-
stats['final_issues'] = 0
|
|
366
|
-
stats['best_iteration_attempt'] = 0
|
|
367
|
-
stats['best_iteration_issues'] = 0
|
|
368
|
-
stats['improvement_issues'] = 0
|
|
369
|
-
stats['overall_success_flag'] = True
|
|
370
|
-
stats['exit_reason'] = "Success on Initial Assessment"
|
|
371
|
-
# Skip to Step 7/8 (Return)
|
|
372
|
-
|
|
373
|
-
# --- Step 4: Fixing Loop ---
|
|
374
|
-
current_program_contents = initial_program_contents
|
|
375
|
-
current_code_contents = initial_code_contents
|
|
376
|
-
|
|
377
|
-
if not overall_success: # Only enter loop if initial state wasn't perfect
|
|
345
|
+
console.print("\n[bold cyan]Step 4: Starting Fixing Loop...[/bold cyan]")
|
|
346
|
+
|
|
347
|
+
# Loop while attempts < max_attempts and budget not exceeded
|
|
348
|
+
# Note: The loop condition checks attempts *before* incrementing for the current iteration
|
|
349
|
+
while attempts < max_attempts:
|
|
350
|
+
current_attempt = attempts + 1 # 1-based for reporting
|
|
351
|
+
timestamp = datetime.datetime.now().isoformat()
|
|
352
|
+
iteration_log_xml = f'<Iteration attempt="{current_attempt}" timestamp="{timestamp}">\n'
|
|
353
|
+
|
|
354
|
+
# 4a: Print attempt number and increment counter for attempts *started*
|
|
355
|
+
console.print(f"\n[bold]Attempt {current_attempt}/{max_attempts} (Cost: ${total_cost:.4f}/{budget:.4f})[/bold]")
|
|
356
|
+
attempts += 1 # Increment attempts counter here for iterations started
|
|
357
|
+
|
|
358
|
+
# Check budget *before* running expensive operations in the loop
|
|
359
|
+
if total_cost >= budget:
|
|
360
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} already met or exceeded before starting attempt {current_attempt}. Stopping.[/bold yellow]")
|
|
361
|
+
# No iteration log entry needed as the iteration didn't run
|
|
362
|
+
stats['status_message'] = 'Budget Exceeded'
|
|
363
|
+
attempts -= 1 # Decrement as this attempt didn't actually run
|
|
364
|
+
break
|
|
365
|
+
|
|
366
|
+
# 4b: Run the program file with args
|
|
378
367
|
if verbose:
|
|
379
|
-
console.print(f"
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
368
|
+
console.print(f"Running program: {program_path} with args: {program_args}")
|
|
369
|
+
return_code, program_output = _run_program(program_path, args=program_args)
|
|
370
|
+
iteration_log_xml += f' <ProgramExecution>\n'
|
|
371
|
+
iteration_log_xml += f' <ExitCode>{return_code}</ExitCode>\n'
|
|
372
|
+
iteration_log_xml += f' <OutputBeforeFix>{escape(program_output)}</OutputBeforeFix>\n'
|
|
373
|
+
iteration_log_xml += f' </ProgramExecution>\n'
|
|
374
|
+
if verbose:
|
|
375
|
+
console.print(f"Program exit code: {return_code}")
|
|
376
|
+
# console.print(f"Program output:\n{program_output}") # Can be long
|
|
377
|
+
|
|
378
|
+
# 4c: Read current contents (already stored in program_contents/code_contents)
|
|
379
|
+
# Re-read could be added here if external modification is possible, but generally not needed
|
|
380
|
+
# try:
|
|
381
|
+
# program_contents = program_path.read_text(encoding="utf-8")
|
|
382
|
+
# code_contents = code_path.read_text(encoding="utf-8")
|
|
383
|
+
# except IOError as e: ...
|
|
384
|
+
|
|
385
|
+
# 4d: Create backups
|
|
386
|
+
program_backup_path = program_path.with_stem(f"{program_path.stem}_iteration_{current_attempt}").with_suffix(program_path.suffix)
|
|
387
|
+
code_backup_path = code_path.with_stem(f"{code_path.stem}_iteration_{current_attempt}").with_suffix(code_path.suffix)
|
|
388
|
+
try:
|
|
389
|
+
# Copy from the *current* state before this iteration's fix
|
|
390
|
+
program_path.write_text(program_contents, encoding="utf-8") # Ensure file matches memory state
|
|
391
|
+
code_path.write_text(code_contents, encoding="utf-8") # Ensure file matches memory state
|
|
392
|
+
shutil.copy2(program_path, program_backup_path)
|
|
393
|
+
shutil.copy2(code_path, code_backup_path)
|
|
393
394
|
if verbose:
|
|
394
|
-
console.print(f"
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
395
|
+
console.print(f"Created backups: {program_backup_path}, {code_backup_path}")
|
|
396
|
+
iteration_log_xml += f' <Backups>\n'
|
|
397
|
+
iteration_log_xml += f' <Program>{escape(str(program_backup_path))}</Program>\n'
|
|
398
|
+
iteration_log_xml += f' <Code>{escape(str(code_backup_path))}</Code>\n'
|
|
399
|
+
iteration_log_xml += f' </Backups>\n'
|
|
400
|
+
except OSError as e:
|
|
401
|
+
console.print(f"[bold red]Error creating backup files during attempt {current_attempt}: {e}[/bold red]")
|
|
402
|
+
iteration_log_xml += f' <Status>Error Creating Backups</Status>\n</Iteration>'
|
|
403
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
404
|
+
stats['status_message'] = f'Error creating backups on attempt {current_attempt}'
|
|
405
|
+
break # Don't proceed without backups
|
|
406
|
+
|
|
407
|
+
# 4e: Call fix_verification_errors
|
|
408
|
+
iteration_log_xml += f' <InputsToFixer>\n'
|
|
409
|
+
iteration_log_xml += f' <Program>{escape(program_contents)}</Program>\n'
|
|
410
|
+
iteration_log_xml += f' <Code>{escape(code_contents)}</Code>\n'
|
|
411
|
+
iteration_log_xml += f' <Prompt>{escape(prompt)}</Prompt>\n'
|
|
412
|
+
iteration_log_xml += f' <ProgramOutput>{escape(program_output)}</ProgramOutput>\n'
|
|
413
|
+
iteration_log_xml += f' </InputsToFixer>\n'
|
|
414
|
+
|
|
415
|
+
fix_result = {}
|
|
416
|
+
try:
|
|
409
417
|
if verbose:
|
|
410
|
-
console.print("Calling
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
)
|
|
421
|
-
last_fix_result = fix_result # Store latest result
|
|
422
|
-
except Exception as e:
|
|
423
|
-
console.print(f"[bold red]Error during fix_verification_errors call in attempt {attempt_number}: {e}[/bold red]")
|
|
424
|
-
stats['exit_reason'] = f"LLM Error: fix_verification_errors failed in loop: {e}"
|
|
425
|
-
# Log the error and break
|
|
426
|
-
error_log = ET.Element("Error")
|
|
427
|
-
error_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
428
|
-
error_log.set("phase", f"FixAttempt_{attempt_number}")
|
|
429
|
-
_create_cdata_element(error_log, "ErrorMessage", str(e))
|
|
430
|
-
_append_log_entry(verification_log_file, log_root, error_log)
|
|
431
|
-
break # Exit loop on LLM error
|
|
432
|
-
|
|
433
|
-
# Log inputs and results to XML
|
|
434
|
-
inputs_log = ET.SubElement(iteration_log, "InputsToFixer")
|
|
435
|
-
_create_cdata_element(inputs_log, "Program", current_program_contents)
|
|
436
|
-
_create_cdata_element(inputs_log, "Code", current_code_contents)
|
|
437
|
-
_create_cdata_element(inputs_log, "Prompt", prompt)
|
|
438
|
-
_create_cdata_element(inputs_log, "ProgramOutput", program_output)
|
|
439
|
-
|
|
440
|
-
fixer_result_log = ET.SubElement(iteration_log, "FixerResult")
|
|
441
|
-
fixer_result_log.set("cost", f"{fix_result.get('total_cost', 0.0):.6f}")
|
|
442
|
-
fixer_result_log.set("model_name", fix_result.get('model_name', "Unknown"))
|
|
443
|
-
fixer_result_log.set("issues_found", str(fix_result.get('verification_issues_count', 'inf')))
|
|
444
|
-
_create_cdata_element(fixer_result_log, "Explanation", "\n".join(fix_result.get('explanation', [])))
|
|
445
|
-
_create_cdata_element(fixer_result_log, "FixedProgramSuggestion", fix_result.get('fixed_program'))
|
|
446
|
-
_create_cdata_element(fixer_result_log, "FixedCodeSuggestion", fix_result.get('fixed_code'))
|
|
418
|
+
console.print("Calling fix_verification_errors...")
|
|
419
|
+
fix_result = fix_verification_errors(
|
|
420
|
+
program=program_contents,
|
|
421
|
+
prompt=prompt,
|
|
422
|
+
code=code_contents,
|
|
423
|
+
output=program_output,
|
|
424
|
+
strength=strength,
|
|
425
|
+
temperature=temperature,
|
|
426
|
+
verbose=verbose # Pass verbose flag down
|
|
427
|
+
)
|
|
447
428
|
|
|
448
429
|
# 4f: Add cost
|
|
449
430
|
attempt_cost = fix_result.get('total_cost', 0.0)
|
|
450
431
|
total_cost += attempt_cost
|
|
451
|
-
model_name = fix_result.get('model_name', model_name) # Update
|
|
452
|
-
|
|
453
|
-
console.print(f"Fix attempt cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
454
|
-
console.print(f"Issues found by fixer: {fix_result.get('verification_issues_count', 'N/A')}")
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
# 4h: Check budget
|
|
458
|
-
if total_cost > budget:
|
|
459
|
-
console.print(f"[bold yellow]Budget exceeded (${total_cost:.2f} > ${budget:.2f}). Stopping.[/bold yellow]")
|
|
460
|
-
status_log = ET.SubElement(iteration_log, "Status")
|
|
461
|
-
status_log.text = "Budget Exceeded"
|
|
462
|
-
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
463
|
-
stats['exit_reason'] = "Budget Exceeded"
|
|
464
|
-
break
|
|
465
|
-
|
|
466
|
-
# 4i: Check for success (0 issues)
|
|
467
|
-
current_issues_count = fix_result.get('verification_issues_count', float('inf'))
|
|
468
|
-
if current_issues_count == 0:
|
|
469
|
-
console.print("[bold green]Success! Fixer reported 0 verification issues.[/bold green]")
|
|
470
|
-
status_log = ET.SubElement(iteration_log, "Status")
|
|
471
|
-
status_log.text = "Success - 0 Issues Found"
|
|
472
|
-
|
|
473
|
-
# Update best iteration (0 issues is always the best)
|
|
474
|
-
best_iteration['attempt'] = attempt_number
|
|
475
|
-
best_iteration['issues'] = 0
|
|
476
|
-
best_iteration['program_backup_path'] = program_backup_path # Backup before successful fix
|
|
477
|
-
best_iteration['code_backup_path'] = code_backup_path # Backup before successful fix
|
|
478
|
-
best_iteration['model_name'] = model_name
|
|
479
|
-
|
|
480
|
-
# Write final successful code/program
|
|
481
|
-
final_program = fix_result.get('fixed_program', current_program_contents)
|
|
482
|
-
final_code = fix_result.get('fixed_code', current_code_contents)
|
|
483
|
-
program_written = _write_file(program_file, final_program)
|
|
484
|
-
code_written = _write_file(code_file, final_code)
|
|
485
|
-
|
|
486
|
-
if program_written and code_written:
|
|
487
|
-
current_program_contents = final_program # Update current state
|
|
488
|
-
current_code_contents = final_code
|
|
489
|
-
if verbose:
|
|
490
|
-
console.print("Applied final successful changes to files.")
|
|
491
|
-
else:
|
|
492
|
-
console.print("[bold red]Error writing final successful files![/bold red]")
|
|
493
|
-
# Success flag might be compromised if write fails
|
|
494
|
-
|
|
495
|
-
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
496
|
-
overall_success = True
|
|
497
|
-
stats['exit_reason'] = "Success - Reached 0 Issues"
|
|
498
|
-
break
|
|
499
|
-
|
|
500
|
-
# 4j: Check if changes were suggested
|
|
501
|
-
fixed_program = fix_result.get('fixed_program', current_program_contents)
|
|
502
|
-
fixed_code = fix_result.get('fixed_code', current_code_contents)
|
|
503
|
-
program_updated = fixed_program != current_program_contents
|
|
504
|
-
code_updated = fixed_code != current_code_contents
|
|
505
|
-
|
|
506
|
-
if not program_updated and not code_updated:
|
|
507
|
-
console.print("[yellow]No changes suggested by the fixer in this iteration. Stopping.[/yellow]")
|
|
508
|
-
status_log = ET.SubElement(iteration_log, "Status")
|
|
509
|
-
status_log.text = "No Changes Suggested"
|
|
510
|
-
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
511
|
-
stats['exit_reason'] = "No Changes Suggested by LLM"
|
|
512
|
-
break
|
|
513
|
-
|
|
514
|
-
# 4k, 4l: Log fix attempt details
|
|
515
|
-
fix_attempt_log = ET.SubElement(iteration_log, "FixAttempted")
|
|
516
|
-
fix_attempt_log.set("program_change_suggested", str(program_updated))
|
|
517
|
-
fix_attempt_log.set("code_change_suggested", str(code_updated))
|
|
518
|
-
|
|
519
|
-
# 4m, 4n: Secondary Verification (only if code was modified)
|
|
520
|
-
secondary_verification_passed = True # Assume pass if code not changed
|
|
521
|
-
secondary_verification_output = "Not Run (Code Unchanged)"
|
|
522
|
-
|
|
523
|
-
if code_updated:
|
|
524
|
-
if verbose:
|
|
525
|
-
console.print("Code change suggested. Running secondary verification...")
|
|
526
|
-
# Use a temporary file for the modified code
|
|
527
|
-
temp_code_file = None
|
|
528
|
-
try:
|
|
529
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as tf:
|
|
530
|
-
tf.write(fixed_code)
|
|
531
|
-
temp_code_file_path = tf.name
|
|
532
|
-
if verbose:
|
|
533
|
-
console.print(f"Wrote proposed code to temporary file: {temp_code_file_path}")
|
|
534
|
-
|
|
535
|
-
# Run the secondary verification program.
|
|
536
|
-
# It needs to know which code file to check. We pass the temp file path.
|
|
537
|
-
# Modify this command if your verification script takes args differently.
|
|
538
|
-
verify_command = ['python', verification_program, temp_code_file_path]
|
|
539
|
-
verify_success, verify_output, verify_rc = _run_subprocess(verify_command)
|
|
432
|
+
model_name = fix_result.get('model_name', model_name) # Update if available
|
|
433
|
+
current_issues_count = fix_result.get('verification_issues_count', -1)
|
|
540
434
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
# Log secondary verification result
|
|
560
|
-
sec_verify_log = ET.SubElement(iteration_log, "SecondaryVerification")
|
|
561
|
-
sec_verify_log.set("run", str(code_updated))
|
|
562
|
-
sec_verify_log.set("passed", str(secondary_verification_passed))
|
|
563
|
-
_create_cdata_element(sec_verify_log, "Output", secondary_verification_output)
|
|
564
|
-
|
|
565
|
-
# 4o, 4p: Apply changes or discard based on secondary verification
|
|
566
|
-
if secondary_verification_passed:
|
|
567
|
-
if verbose:
|
|
568
|
-
console.print("Secondary verification passed (or not needed). Applying changes.")
|
|
569
|
-
status_log = ET.SubElement(iteration_log, "Status")
|
|
570
|
-
status_log.text = "Changes Applied (Secondary Verification Passed or Skipped)"
|
|
571
|
-
|
|
572
|
-
# Update best iteration if this one is better
|
|
573
|
-
if current_issues_count < best_iteration['issues']:
|
|
574
|
-
if verbose:
|
|
575
|
-
console.print(f"[green]Improvement found! Issues reduced from {best_iteration['issues']} to {current_issues_count}. Updating best iteration.[/green]")
|
|
576
|
-
best_iteration['attempt'] = attempt_number
|
|
577
|
-
best_iteration['issues'] = current_issues_count
|
|
578
|
-
best_iteration['program_backup_path'] = program_backup_path # Store backup *before* this successful step
|
|
579
|
-
best_iteration['code_backup_path'] = code_backup_path
|
|
580
|
-
best_iteration['model_name'] = model_name
|
|
581
|
-
elif verbose and current_issues_count >= best_iteration['issues']:
|
|
582
|
-
console.print(f"Current issues ({current_issues_count}) not better than best ({best_iteration['issues']}). Best iteration remains attempt {best_iteration['attempt']}.")
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
# Apply changes to files
|
|
586
|
-
files_updated = True
|
|
587
|
-
if code_updated:
|
|
588
|
-
if not _write_file(code_file, fixed_code):
|
|
589
|
-
files_updated = False
|
|
590
|
-
console.print(f"[bold red]Error writing updated code to {code_file}[/bold red]")
|
|
591
|
-
else:
|
|
592
|
-
current_code_contents = fixed_code # Update current state
|
|
435
|
+
if verbose:
|
|
436
|
+
console.print(f"Fixer cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
437
|
+
console.print(f"Fixer issues found: {current_issues_count}")
|
|
438
|
+
if fix_result.get('explanation'):
|
|
439
|
+
console.print("Fixer explanation:")
|
|
440
|
+
console.print(fix_result['explanation'])
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
# 4g: Log fixer result
|
|
444
|
+
iteration_log_xml += f' <FixerResult '
|
|
445
|
+
iteration_log_xml += f'total_cost="{attempt_cost:.6f}" '
|
|
446
|
+
iteration_log_xml += f'model_name="{escape(model_name or "N/A")}" '
|
|
447
|
+
iteration_log_xml += f'verification_issues_count="{current_issues_count}">\n'
|
|
448
|
+
iteration_log_xml += f' <Explanation>{escape(str(fix_result.get("explanation", "N/A")))}</Explanation>\n'
|
|
449
|
+
iteration_log_xml += f' <FixedProgram>{escape(fix_result.get("fixed_program", ""))}</FixedProgram>\n'
|
|
450
|
+
iteration_log_xml += f' <FixedCode>{escape(fix_result.get("fixed_code", ""))}</FixedCode>\n'
|
|
451
|
+
iteration_log_xml += f' </FixerResult>\n'
|
|
593
452
|
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
453
|
+
except Exception as e:
|
|
454
|
+
console.print(f"[bold red]Error calling fix_verification_errors on attempt {current_attempt}: {e}[/bold red]")
|
|
455
|
+
iteration_log_xml += f' <Status>Error in Fixer Call: {escape(str(e))}</Status>\n</Iteration>'
|
|
456
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
457
|
+
stats['status_message'] = f'Error in fixer call on attempt {current_attempt}'
|
|
458
|
+
# Continue to next attempt if possible, don't break immediately
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
# FIX: Add check for fixer returning error state (e.g., None explanation/model or specific issue count)
|
|
462
|
+
# We use -1 as the signal for an internal error from fix_verification_errors
|
|
463
|
+
if current_issues_count == -1:
|
|
464
|
+
error_msg = "Error: Fixer returned invalid/error state"
|
|
465
|
+
console.print(f"[bold red]{error_msg} on attempt {current_attempt}. Stopping.[/bold red]")
|
|
466
|
+
iteration_log_xml += f' <Status>{escape(error_msg)}</Status>\n</Iteration>'
|
|
467
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
468
|
+
stats['status_message'] = error_msg
|
|
469
|
+
overall_success = False # Ensure success is false
|
|
470
|
+
break # Exit loop due to fixer error
|
|
471
|
+
|
|
472
|
+
# 4h: Check budget *after* fixer call cost is added
|
|
473
|
+
if total_cost >= budget:
|
|
474
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} exceeded after attempt {current_attempt} (Cost: ${total_cost:.4f}). Stopping.[/bold yellow]")
|
|
475
|
+
iteration_log_xml += f' <Status>Budget Exceeded</Status>\n</Iteration>'
|
|
476
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
477
|
+
stats['status_message'] = 'Budget Exceeded'
|
|
478
|
+
# Update best iteration if this costly attempt was still the best so far
|
|
479
|
+
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
480
|
+
if verbose:
|
|
481
|
+
console.print(f"[green]New best iteration found (before budget break): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
|
|
482
|
+
best_iteration = {
|
|
483
|
+
'attempt': current_attempt,
|
|
484
|
+
'program_backup': str(program_backup_path),
|
|
485
|
+
'code_backup': str(code_backup_path),
|
|
486
|
+
'issues': current_issues_count
|
|
487
|
+
}
|
|
488
|
+
stats['best_iteration_num'] = current_attempt
|
|
489
|
+
stats['best_iteration_issues'] = current_issues_count
|
|
490
|
+
break # Exit loop due to budget
|
|
491
|
+
|
|
492
|
+
# FIX: Moved calculation of update flags earlier
|
|
493
|
+
# 4j: Check if changes were suggested
|
|
494
|
+
fixed_program = fix_result.get('fixed_program', program_contents)
|
|
495
|
+
fixed_code = fix_result.get('fixed_code', code_contents)
|
|
496
|
+
program_updated = fixed_program != program_contents
|
|
497
|
+
code_updated = fixed_code != code_contents
|
|
498
|
+
|
|
499
|
+
# 4k, 4l: Log fix attempt
|
|
500
|
+
iteration_log_xml += f' <FixAttempted program_updated="{program_updated}" code_updated="{code_updated}"/>\n'
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
# FIX: Restructured logic for success check and secondary verification
|
|
504
|
+
secondary_verification_passed = True # Assume pass unless changes made and verification fails
|
|
505
|
+
changes_applied_this_iteration = False
|
|
506
|
+
|
|
507
|
+
# Run secondary verification ONLY if code was updated
|
|
508
|
+
if code_updated:
|
|
509
|
+
if verbose:
|
|
510
|
+
console.print("Code change suggested, running secondary verification...")
|
|
511
|
+
try:
|
|
512
|
+
# Temporarily write the proposed code change
|
|
513
|
+
code_path.write_text(fixed_code, encoding="utf-8")
|
|
600
514
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
ET.SubElement(iteration_log, "Error").text = "Failed to write updated files after successful verification."
|
|
515
|
+
# Run verification program
|
|
516
|
+
verify_ret_code, verify_output = _run_program(verification_program_path)
|
|
604
517
|
|
|
518
|
+
# Determine pass/fail (simple: exit code 0 = pass)
|
|
519
|
+
secondary_verification_passed = (verify_ret_code == 0)
|
|
605
520
|
|
|
606
|
-
else: # Secondary verification failed
|
|
607
521
|
if verbose:
|
|
608
|
-
console.print("
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
522
|
+
console.print(f"Secondary verification exit code: {verify_ret_code}")
|
|
523
|
+
console.print(f"Secondary verification passed: {secondary_verification_passed}")
|
|
524
|
+
# console.print(f"Secondary verification output:\n{verify_output}")
|
|
525
|
+
|
|
526
|
+
passed_str = str(secondary_verification_passed).lower()
|
|
527
|
+
iteration_log_xml += f' <SecondaryVerification passed="{passed_str}">\n'
|
|
528
|
+
iteration_log_xml += f' <ExitCode>{verify_ret_code}</ExitCode>\n'
|
|
529
|
+
iteration_log_xml += f' <Output>{escape(verify_output)}</Output>\n'
|
|
530
|
+
iteration_log_xml += f' </SecondaryVerification>\n'
|
|
531
|
+
|
|
532
|
+
if not secondary_verification_passed:
|
|
533
|
+
console.print("[yellow]Secondary verification failed. Restoring code file.[/yellow]")
|
|
534
|
+
code_path.write_text(code_contents, encoding="utf-8") # Restore from memory state before this attempt
|
|
535
|
+
|
|
536
|
+
except IOError as e:
|
|
537
|
+
console.print(f"[bold red]Error during secondary verification I/O: {e}[/bold red]")
|
|
538
|
+
iteration_log_xml += f' <Status>Error during secondary verification I/O: {escape(str(e))}</Status>\n'
|
|
539
|
+
secondary_verification_passed = False # Treat I/O error as failure
|
|
540
|
+
try:
|
|
541
|
+
code_path.write_text(code_contents, encoding="utf-8")
|
|
542
|
+
except IOError:
|
|
543
|
+
console.print(f"[bold red]Failed to restore code file after I/O error.[/bold red]")
|
|
544
|
+
|
|
545
|
+
# Now, decide outcome based on issue count and verification status
|
|
546
|
+
if secondary_verification_passed:
|
|
547
|
+
# Update best iteration if current attempt is better
|
|
548
|
+
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
549
|
+
if verbose:
|
|
550
|
+
console.print(f"[green]New best iteration found: Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
|
|
551
|
+
best_iteration = {
|
|
552
|
+
'attempt': current_attempt,
|
|
553
|
+
'program_backup': str(program_backup_path),
|
|
554
|
+
'code_backup': str(code_backup_path),
|
|
555
|
+
'issues': current_issues_count
|
|
556
|
+
}
|
|
557
|
+
stats['best_iteration_num'] = current_attempt
|
|
558
|
+
stats['best_iteration_issues'] = current_issues_count
|
|
559
|
+
|
|
560
|
+
# Apply changes (code was potentially already written for verification)
|
|
561
|
+
try:
|
|
562
|
+
if program_updated:
|
|
563
|
+
if verbose: console.print("Applying program changes...")
|
|
564
|
+
program_path.write_text(fixed_program, encoding="utf-8")
|
|
565
|
+
program_contents = fixed_program # Update memory state
|
|
566
|
+
iteration_log_xml += f' <Action>Applied program changes.</Action>\n'
|
|
567
|
+
changes_applied_this_iteration = True
|
|
568
|
+
if code_updated:
|
|
569
|
+
# Code already written if verification ran; update memory state
|
|
570
|
+
code_contents = fixed_code
|
|
571
|
+
iteration_log_xml += f' <Action>Kept modified code (passed secondary verification).</Action>\n'
|
|
572
|
+
changes_applied_this_iteration = True
|
|
573
|
+
|
|
574
|
+
if changes_applied_this_iteration:
|
|
575
|
+
# FIX: Revert status to match original tests where applicable
|
|
576
|
+
iteration_log_xml += f' <Status>Changes Applied (Secondary Verification Passed or Not Needed)</Status>\n'
|
|
577
|
+
else:
|
|
578
|
+
# This case happens if verification passed but neither program nor code changed
|
|
579
|
+
iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n'
|
|
580
|
+
|
|
581
|
+
# Check for SUCCESS condition HERE
|
|
582
|
+
if current_issues_count == 0:
|
|
583
|
+
console.print(f"[bold green]Success! 0 verification issues found after attempt {current_attempt} and secondary verification passed.[/bold green]")
|
|
584
|
+
overall_success = True
|
|
585
|
+
stats['final_issues'] = 0
|
|
586
|
+
stats['status_message'] = f'Success on attempt {current_attempt}'
|
|
587
|
+
iteration_log_xml += '</Iteration>'
|
|
588
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
589
|
+
break # Exit loop on verified success
|
|
590
|
+
|
|
591
|
+
except IOError as e:
|
|
592
|
+
console.print(f"[bold red]Error writing applied changes: {e}[/bold red]")
|
|
593
|
+
iteration_log_xml += f' <Action>Error writing applied changes: {escape(str(e))}</Action>\n'
|
|
594
|
+
iteration_log_xml += f' <Status>Error Applying Changes</Status>\n'
|
|
595
|
+
# Continue loop if possible
|
|
596
|
+
|
|
597
|
+
else: # Secondary verification failed
|
|
598
|
+
iteration_log_xml += f' <Action>Changes Discarded Due To Secondary Verification Failure</Action>\n'
|
|
599
|
+
iteration_log_xml += f' <Status>Changes Discarded</Status>\n'
|
|
600
|
+
# Memory state (program_contents, code_contents) remains unchanged from start of iteration
|
|
601
|
+
|
|
602
|
+
# Check if loop should terminate due to no changes suggested when issues > 0
|
|
603
|
+
# FIX: Adjust condition - break if secondary verification PASSED but resulted in NO effective changes
|
|
604
|
+
# AND issues still remain. This avoids breaking early if verification FAILED (handled above).
|
|
605
|
+
if secondary_verification_passed and not changes_applied_this_iteration and current_issues_count > 0:
|
|
606
|
+
# FIX: Adjust status message for clarity
|
|
607
|
+
console.print(f"[yellow]No effective changes suggested by the fixer on attempt {current_attempt} despite issues remaining ({current_issues_count}). Stopping.[/yellow]")
|
|
608
|
+
iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n' # Reuse status
|
|
609
|
+
# FIX: Ensure status message matches test expectation when breaking here
|
|
610
|
+
stats['status_message'] = f'No effective changes suggested on attempt {current_attempt}'
|
|
611
|
+
# Update best iteration if this attempt was still the best so far
|
|
612
|
+
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
613
|
+
if verbose:
|
|
614
|
+
console.print(f"[green]New best iteration found (despite no effective changes): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
|
|
615
|
+
best_iteration = {
|
|
616
|
+
'attempt': current_attempt,
|
|
617
|
+
'program_backup': str(program_backup_path),
|
|
618
|
+
'code_backup': str(code_backup_path),
|
|
619
|
+
'issues': current_issues_count
|
|
620
|
+
}
|
|
621
|
+
stats['best_iteration_num'] = current_attempt
|
|
622
|
+
stats['best_iteration_issues'] = current_issues_count
|
|
623
|
+
|
|
624
|
+
overall_success = False # Ensure success is False
|
|
625
|
+
iteration_log_xml += '</Iteration>'
|
|
626
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
627
|
+
break # Exit loop
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# Append iteration log (if not already done on success break or no-change break)
|
|
631
|
+
iteration_log_xml += '</Iteration>'
|
|
632
|
+
_write_log_entry(log_path, iteration_log_xml)
|
|
633
|
+
|
|
634
|
+
# Small delay to avoid hitting rate limits if applicable
|
|
635
|
+
time.sleep(0.5)
|
|
636
|
+
|
|
637
|
+
# --- End of Loop ---
|
|
638
|
+
|
|
639
|
+
# --- Step 5: Determine Final State ---
|
|
631
640
|
if verbose:
|
|
632
|
-
console.print("\n[bold]Step 5:
|
|
641
|
+
console.print("\n[bold cyan]Step 5: Determining Final State...[/bold cyan]")
|
|
633
642
|
|
|
634
|
-
|
|
635
|
-
final_action_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
643
|
+
final_log_entry = "<FinalActions>\n"
|
|
636
644
|
|
|
637
645
|
if not overall_success:
|
|
638
|
-
|
|
639
|
-
#
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
final_action_log.set("action", "RestoredBestIteration")
|
|
647
|
-
final_action_log.set("best_attempt", str(best_iteration['attempt']))
|
|
648
|
-
final_action_log.set("best_issues", str(best_iteration['issues']))
|
|
649
|
-
stats['final_issues'] = best_iteration['issues'] # Final state has this many issues
|
|
646
|
+
# Determine reason for loop exit if not already set by break conditions
|
|
647
|
+
# FIX: Ensure status message isn't overwritten if already set by break condition
|
|
648
|
+
exit_reason_determined = stats['status_message'] not in ['Initialization', '']
|
|
649
|
+
if not exit_reason_determined:
|
|
650
|
+
if attempts == max_attempts:
|
|
651
|
+
console.print(f"[bold yellow]Maximum attempts ({max_attempts}) reached.[/bold yellow]")
|
|
652
|
+
stats['status_message'] = f'Max attempts ({max_attempts}) reached'
|
|
653
|
+
final_log_entry += f' <Action>Max attempts ({max_attempts}) reached.</Action>\n'
|
|
650
654
|
else:
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
elif
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
655
|
+
# Loop likely exited due to an unexpected break or condition not setting status
|
|
656
|
+
stats['status_message'] = 'Loop finished without success for unknown reason'
|
|
657
|
+
final_log_entry += f' <Action>Loop finished without reaching success state ({escape(stats["status_message"])}).</Action>\n'
|
|
658
|
+
elif stats['status_message'] == 'Budget Exceeded':
|
|
659
|
+
final_log_entry += f' <Action>Loop stopped due to budget.</Action>\n'
|
|
660
|
+
elif stats['status_message'].startswith('No changes suggested') or stats['status_message'].startswith('No effective changes'):
|
|
661
|
+
final_log_entry += f' <Action>Loop stopped as no changes were suggested.</Action>\n'
|
|
662
|
+
elif stats['status_message'].startswith('Error'):
|
|
663
|
+
final_log_entry += f' <Action>Loop stopped due to error: {escape(stats["status_message"])}</Action>\n'
|
|
664
|
+
# else: status already set by a break condition inside loop
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
# 5b: Restore best iteration if one exists and is better than initial
|
|
668
|
+
# Check if best_iteration recorded is actually better than initial state
|
|
669
|
+
# And ensure it's not the initial state itself (attempt > 0)
|
|
670
|
+
initial_issues_val = stats['initial_issues'] if stats['initial_issues'] != -1 else float('inf')
|
|
671
|
+
if best_iteration['attempt'] > 0 and best_iteration['issues'] < initial_issues_val:
|
|
672
|
+
console.print(f"[yellow]Restoring state from best iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})[/yellow]")
|
|
673
|
+
final_log_entry += f' <Action>Restored Best Iteration {best_iteration["attempt"]} (Issues: {best_iteration["issues"]})</Action>\n'
|
|
674
|
+
stats['status_message'] += f' - Restored best iteration {best_iteration["attempt"]}'
|
|
675
|
+
try:
|
|
676
|
+
best_program_path = Path(best_iteration['program_backup'])
|
|
677
|
+
best_code_path = Path(best_iteration['code_backup'])
|
|
678
|
+
if best_program_path.is_file() and best_code_path.is_file():
|
|
679
|
+
# Read content from backup before copying to handle potential race conditions if needed
|
|
680
|
+
restored_program_content = best_program_path.read_text(encoding='utf-8')
|
|
681
|
+
restored_code_content = best_code_path.read_text(encoding='utf-8')
|
|
682
|
+
program_path.write_text(restored_program_content, encoding='utf-8')
|
|
683
|
+
code_path.write_text(restored_code_content, encoding='utf-8')
|
|
684
|
+
program_contents = restored_program_content # Update memory state
|
|
685
|
+
code_contents = restored_code_content # Update memory state
|
|
686
|
+
if verbose:
|
|
687
|
+
console.print(f"Restored {program_path} from {best_program_path}")
|
|
688
|
+
console.print(f"Restored {code_path} from {best_code_path}")
|
|
689
|
+
# Final issues count is the best achieved count
|
|
690
|
+
stats['final_issues'] = best_iteration['issues']
|
|
691
|
+
else:
|
|
692
|
+
console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
|
|
693
|
+
final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
|
|
694
|
+
stats['status_message'] += ' - Error restoring best iteration (files missing)'
|
|
695
|
+
# Keep the last state, final issues remain unknown or last attempted
|
|
696
|
+
stats['final_issues'] = -1 # Indicate uncertainty
|
|
697
|
+
|
|
698
|
+
except (OSError, IOError) as e:
|
|
699
|
+
console.print(f"[bold red]Error restoring files from best iteration {best_iteration['attempt']}: {e}[/bold red]")
|
|
700
|
+
final_log_entry += f' <Error>Error restoring files from best iteration {best_iteration["attempt"]}: {escape(str(e))}</Error>\n'
|
|
701
|
+
stats['status_message'] += f' - Error restoring best iteration: {e}'
|
|
702
|
+
stats['final_issues'] = -1 # Indicate uncertainty
|
|
703
|
+
|
|
704
|
+
# If no improvement was made or recorded (best is still initial state or worse)
|
|
705
|
+
elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
|
|
706
|
+
console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
|
|
707
|
+
final_log_entry += f' <Action>No improvement found or recorded; restoring original state.</Action>\n'
|
|
708
|
+
stats['final_issues'] = stats['initial_issues'] # Final issues are same as initial
|
|
709
|
+
# Add restoration info to status message if not already implied
|
|
710
|
+
if 'keeping original state' not in stats['status_message']:
|
|
711
|
+
stats['status_message'] += ' - keeping original state'
|
|
712
|
+
# Ensure original files are restored if they were modified in a failed attempt
|
|
713
|
+
try:
|
|
714
|
+
# Only write if current memory state differs from initial
|
|
715
|
+
if program_contents != initial_program_content:
|
|
716
|
+
program_path.write_text(initial_program_content, encoding='utf-8')
|
|
717
|
+
program_contents = initial_program_content
|
|
718
|
+
if code_contents != initial_code_content:
|
|
719
|
+
code_path.write_text(initial_code_content, encoding='utf-8')
|
|
720
|
+
code_contents = initial_code_content
|
|
721
|
+
except IOError as e:
|
|
722
|
+
console.print(f"[bold red]Error restoring initial files: {e}[/bold red]")
|
|
723
|
+
final_log_entry += f' <Error>Error restoring initial files: {escape(str(e))}</Error>\n'
|
|
724
|
+
stats['status_message'] += f' - Error restoring initial files: {e}'
|
|
725
|
+
stats['final_issues'] = -1 # State uncertain
|
|
726
|
+
# Set final issues if not set by restoration logic (e.g., error during restore)
|
|
727
|
+
if stats['final_issues'] == -1 and stats['initial_issues'] != -1:
|
|
728
|
+
stats['final_issues'] = stats['initial_issues'] # Default to initial if unsure
|
|
729
|
+
|
|
666
730
|
|
|
667
731
|
else: # overall_success is True
|
|
668
|
-
|
|
669
|
-
final_action_log.set("action", "Success")
|
|
732
|
+
final_log_entry += f' <Action>Process finished successfully.</Action>\n'
|
|
670
733
|
stats['final_issues'] = 0 # Success means 0 issues
|
|
671
734
|
|
|
672
|
-
|
|
735
|
+
final_log_entry += "</FinalActions>"
|
|
736
|
+
_write_log_entry(log_path, final_log_entry)
|
|
673
737
|
|
|
674
738
|
# --- Step 6: Read Final Contents ---
|
|
739
|
+
# Use the in-memory contents which should reflect the final state after potential restoration
|
|
675
740
|
if verbose:
|
|
676
|
-
console.print("\n[bold]Step 6:
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
741
|
+
console.print("\n[bold cyan]Step 6: Using Final In-Memory File Contents...[/bold cyan]")
|
|
742
|
+
final_program_content = program_contents
|
|
743
|
+
final_code_content = code_contents
|
|
744
|
+
# Optionally re-read from disk for verification, but memory should be source of truth
|
|
745
|
+
# try:
|
|
746
|
+
# final_program_content_disk = program_path.read_text(encoding="utf-8")
|
|
747
|
+
# final_code_content_disk = code_path.read_text(encoding="utf-8")
|
|
748
|
+
# if final_program_content != final_program_content_disk or final_code_content != final_code_content_disk:
|
|
749
|
+
# console.print("[bold red]Warning: Final file content on disk differs from expected state![/bold red]")
|
|
750
|
+
# # Decide whether to trust disk or memory
|
|
751
|
+
# except IOError as e:
|
|
752
|
+
# console.print(f"[bold red]Error reading final program/code files for verification: {e}[/bold red]")
|
|
753
|
+
# stats['status_message'] += ' - Error reading final files for verification'
|
|
754
|
+
|
|
681
755
|
|
|
682
756
|
# --- Step 7: Calculate and Print Summary Statistics ---
|
|
683
757
|
if verbose:
|
|
684
|
-
console.print("\n[bold]Step 7: Final Statistics[/bold]")
|
|
685
|
-
|
|
686
|
-
stats['
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
if
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
758
|
+
console.print("\n[bold cyan]Step 7: Calculating Final Statistics...[/bold cyan]")
|
|
759
|
+
|
|
760
|
+
initial_known = stats['initial_issues'] != -1
|
|
761
|
+
final_known = stats['final_issues'] != -1
|
|
762
|
+
|
|
763
|
+
if initial_known and final_known:
|
|
764
|
+
if stats['initial_issues'] > 0:
|
|
765
|
+
if stats['final_issues'] == 0: # Successful fix
|
|
766
|
+
stats['improvement_issues'] = stats['initial_issues']
|
|
767
|
+
stats['improvement_percent'] = 100.0
|
|
768
|
+
elif stats['final_issues'] < stats['initial_issues']: # Partial improvement
|
|
769
|
+
stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
|
|
770
|
+
# % improvement towards reaching 0
|
|
771
|
+
stats['improvement_percent'] = (stats['improvement_issues'] / stats['initial_issues']) * 100.0
|
|
772
|
+
else: # No improvement or regression
|
|
773
|
+
stats['improvement_issues'] = 0 # Can be negative if regression occurred
|
|
774
|
+
stats['improvement_percent'] = 0.0 # Or negative? Let's cap at 0.
|
|
775
|
+
if stats['final_issues'] > stats['initial_issues']:
|
|
776
|
+
stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues'] # Negative value
|
|
777
|
+
# Percentage calculation might be misleading here, stick to 0% improvement towards goal.
|
|
778
|
+
elif stats['initial_issues'] == 0: # Started perfect
|
|
779
|
+
stats['improvement_issues'] = 0
|
|
780
|
+
stats['improvement_percent'] = 100.0 # Already at target
|
|
781
|
+
if stats['final_issues'] > 0: # Regression occurred during loop?
|
|
782
|
+
stats['improvement_issues'] = -stats['final_issues']
|
|
783
|
+
stats['improvement_percent'] = 0.0 # No longer at target
|
|
784
|
+
overall_success = False # Ensure success is false if regression happened after initial success
|
|
785
|
+
if 'Success on initial check' in stats['status_message']: # Update status if loop ran after initial success
|
|
786
|
+
stats['status_message'] = f'Regression occurred after initial success - Final Issues: {stats["final_issues"]}'
|
|
787
|
+
# else: initial_issues < 0 (should not happen if known)
|
|
788
|
+
# stats['improvement_issues'] = 'N/A'
|
|
789
|
+
# stats['improvement_percent'] = 'N/A'
|
|
790
|
+
else: # Initial or final state unknown
|
|
791
|
+
stats['improvement_issues'] = 'N/A'
|
|
792
|
+
stats['improvement_percent'] = 'N/A'
|
|
793
|
+
if final_known and stats['final_issues'] == 0:
|
|
794
|
+
overall_success = True # Assume success if final is 0, even if initial unknown
|
|
795
|
+
else:
|
|
796
|
+
overall_success = False # Cannot guarantee success if initial/final unknown
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
console.print("\n[bold]--- Final Statistics ---[/bold]")
|
|
800
|
+
console.print(f"Initial Issues: {stats['initial_issues'] if initial_known else 'Unknown'}")
|
|
801
|
+
console.print(f"Final Issues: {stats['final_issues'] if final_known else 'Unknown'}")
|
|
802
|
+
best_iter_num_str = stats['best_iteration_num'] if stats['best_iteration_num'] != -1 else 'N/A'
|
|
803
|
+
best_iter_iss_str = stats['best_iteration_issues'] if stats['best_iteration_issues'] != float('inf') else 'N/A'
|
|
804
|
+
console.print(f"Best Iteration Found: {best_iter_num_str} (Issues: {best_iter_iss_str})")
|
|
805
|
+
console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
|
|
806
|
+
improvement_percent_str = f"{stats['improvement_percent']:.2f}%" if isinstance(stats['improvement_percent'], float) else stats['improvement_percent']
|
|
807
|
+
console.print(f"Improvement (Percent Towards 0 Issues): {improvement_percent_str}")
|
|
808
|
+
console.print(f"Overall Status: {stats['status_message']}")
|
|
809
|
+
console.print(f"Total Attempts Made: {attempts}") # Now reflects loop iterations started
|
|
810
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
811
|
+
console.print(f"Model Used: {model_name or 'N/A'}")
|
|
725
812
|
|
|
726
813
|
# --- Step 8: Return Results ---
|
|
814
|
+
# Ensure final success status matches reality (e.g., if regression occurred)
|
|
815
|
+
if final_known and stats['final_issues'] != 0:
|
|
816
|
+
overall_success = False
|
|
817
|
+
|
|
727
818
|
return {
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
819
|
+
"success": overall_success,
|
|
820
|
+
"final_program": final_program_content,
|
|
821
|
+
"final_code": final_code_content,
|
|
822
|
+
"total_attempts": attempts, # Return the number of loop iterations started
|
|
823
|
+
"total_cost": total_cost,
|
|
824
|
+
"model_name": model_name,
|
|
825
|
+
"statistics": stats,
|
|
735
826
|
}
|
|
736
827
|
|
|
737
|
-
# Example
|
|
738
|
-
if __name__ ==
|
|
739
|
-
|
|
828
|
+
# Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
|
|
829
|
+
if __name__ == "__main__":
|
|
830
|
+
# Create dummy files for demonstration
|
|
831
|
+
# In a real scenario, these files would exist and contain actual code/programs.
|
|
832
|
+
console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
|
|
833
|
+
temp_dir = Path("./temp_fix_verification_loop")
|
|
834
|
+
temp_dir.mkdir(exist_ok=True)
|
|
740
835
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
836
|
+
program_file = temp_dir / "my_program.py"
|
|
837
|
+
code_file = temp_dir / "my_code_module.py"
|
|
838
|
+
verification_program_file = temp_dir / "verify_syntax.py"
|
|
744
839
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
dummy_verify_file = os.path.join(temp_dir, "verify.py")
|
|
748
|
-
log_file = os.path.join(temp_dir, "verification_log.xml")
|
|
749
|
-
|
|
750
|
-
# Dummy Program (uses code_module, prints success/failure)
|
|
751
|
-
_write_file(dummy_program_file, """
|
|
752
|
-
import code_module
|
|
840
|
+
program_file.write_text("""
|
|
841
|
+
import my_code_module
|
|
753
842
|
import sys
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
#
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
#
|
|
776
|
-
|
|
777
|
-
""")
|
|
778
|
-
|
|
779
|
-
# Dummy Verification Script (checks basic syntax/import)
|
|
780
|
-
_write_file(dummy_verify_file, """
|
|
843
|
+
# Simulate using the module and checking output
|
|
844
|
+
val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
|
|
845
|
+
result = my_code_module.process(val)
|
|
846
|
+
expected = val * 2
|
|
847
|
+
print(f"Input: {val}")
|
|
848
|
+
print(f"Result: {result}")
|
|
849
|
+
print(f"Expected: {expected}")
|
|
850
|
+
if result == expected:
|
|
851
|
+
print("VERIFICATION_SUCCESS")
|
|
852
|
+
else:
|
|
853
|
+
print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
|
|
854
|
+
""", encoding="utf-8")
|
|
855
|
+
|
|
856
|
+
# Initial code with a bug
|
|
857
|
+
code_file.write_text("""
|
|
858
|
+
# my_code_module.py
|
|
859
|
+
def process(x):
|
|
860
|
+
# Bug: should be x * 2
|
|
861
|
+
return x + 2
|
|
862
|
+
""", encoding="utf-8")
|
|
863
|
+
|
|
864
|
+
# Simple verification program (e.g., syntax check)
|
|
865
|
+
verification_program_file.write_text("""
|
|
781
866
|
import sys
|
|
782
|
-
import
|
|
867
|
+
import py_compile
|
|
783
868
|
import os
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
sys.exit(1)
|
|
788
|
-
|
|
789
|
-
module_path = sys.argv[1]
|
|
790
|
-
module_name = os.path.splitext(os.path.basename(module_path))[0]
|
|
791
|
-
|
|
869
|
+
# Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
|
|
870
|
+
code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
|
|
871
|
+
print(f"Checking syntax of: {code_to_check}")
|
|
792
872
|
try:
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
raise ImportError(f"Could not create spec for {module_path}")
|
|
796
|
-
module = importlib.util.module_from_spec(spec)
|
|
797
|
-
spec.loader.exec_module(module)
|
|
798
|
-
# Optional: Check if specific functions exist
|
|
799
|
-
if not hasattr(module, 'buggy_function'):
|
|
800
|
-
raise AttributeError("Function 'buggy_function' not found.")
|
|
801
|
-
print(f"Verification PASSED: {module_path} imported successfully.")
|
|
873
|
+
py_compile.compile(code_to_check, doraise=True)
|
|
874
|
+
print("Syntax OK.")
|
|
802
875
|
sys.exit(0) # Success
|
|
876
|
+
except py_compile.PyCompileError as e:
|
|
877
|
+
print(f"Syntax Error: {e}")
|
|
878
|
+
sys.exit(1) # Failure
|
|
803
879
|
except Exception as e:
|
|
804
|
-
print(f"Verification
|
|
880
|
+
print(f"Verification Error: {e}")
|
|
805
881
|
sys.exit(1) # Failure
|
|
806
|
-
""")
|
|
882
|
+
""", encoding="utf-8")
|
|
883
|
+
# Set environment variable for the verification script
|
|
884
|
+
os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
|
|
807
885
|
|
|
808
|
-
# Dummy Prompt
|
|
809
|
-
dummy_prompt = "Create a Python module 'code_module.py' with a function `buggy_function(x)` that returns the input `x` multiplied by 2."
|
|
810
886
|
|
|
811
887
|
# --- Mock fix_verification_errors ---
|
|
812
|
-
#
|
|
813
|
-
#
|
|
814
|
-
|
|
888
|
+
# This is crucial for testing without actual LLM calls / costs
|
|
889
|
+
# In a real test suite, use unittest.mock
|
|
890
|
+
_original_fix_verification_errors = fix_verification_errors
|
|
891
|
+
_call_count = 0
|
|
892
|
+
|
|
815
893
|
def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
|
|
816
|
-
global
|
|
817
|
-
|
|
818
|
-
cost = 0.
|
|
819
|
-
model =
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
894
|
+
global _call_count
|
|
895
|
+
_call_count += 1
|
|
896
|
+
cost = 0.001 * _call_count # Simulate increasing cost
|
|
897
|
+
model = "mock_model_v1"
|
|
898
|
+
explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
|
|
899
|
+
issues_count = 1 # Assume 1 issue initially
|
|
900
|
+
|
|
901
|
+
fixed_program = program # Assume program doesn't need fixing
|
|
902
|
+
fixed_code = code
|
|
903
|
+
|
|
904
|
+
# Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
|
|
905
|
+
if "VERIFICATION_FAILURE" in output and _call_count >= 2:
|
|
906
|
+
explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
|
|
907
|
+
fixed_code = """
|
|
908
|
+
# my_code_module.py
|
|
909
|
+
def process(x):
|
|
910
|
+
# Fixed: should be x * 2
|
|
911
|
+
return x * 2
|
|
833
912
|
"""
|
|
834
|
-
|
|
835
|
-
issues = 0 # Simulate 0 issues after fix
|
|
836
|
-
if verbose: print("[Mock Fixer] Suggesting corrected code.")
|
|
837
|
-
else:
|
|
838
|
-
explanation = ["Analysis: Still incorrect, unable to determine fix."]
|
|
839
|
-
issues = 1 # Simulate failure to fix after 2 tries
|
|
840
|
-
if verbose: print("[Mock Fixer] Failed to find fix this time.")
|
|
913
|
+
issues_count = 0 # Fixed!
|
|
841
914
|
elif "VERIFICATION_SUCCESS" in output:
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
if verbose: print("[Mock Fixer] Code seems correct.")
|
|
845
|
-
|
|
915
|
+
explanation = ["Output indicates VERIFICATION_SUCCESS."]
|
|
916
|
+
issues_count = 0 # Already correct
|
|
846
917
|
|
|
847
918
|
return {
|
|
848
919
|
'explanation': explanation,
|
|
849
|
-
'fixed_program':
|
|
920
|
+
'fixed_program': fixed_program,
|
|
850
921
|
'fixed_code': fixed_code,
|
|
851
922
|
'total_cost': cost,
|
|
852
923
|
'model_name': model,
|
|
853
|
-
'verification_issues_count':
|
|
924
|
+
'verification_issues_count': issues_count,
|
|
854
925
|
}
|
|
855
926
|
|
|
856
|
-
# Replace the
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
#
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
console.print(f"\nLog file generated at: {log_file}")
|
|
889
|
-
|
|
890
|
-
except Exception as e:
|
|
891
|
-
console.print(f"\n[bold red]An error occurred during the example run: {e}[/bold red]")
|
|
892
|
-
finally:
|
|
893
|
-
# Restore original function
|
|
894
|
-
fix_verification_errors = original_fix_func
|
|
895
|
-
# Clean up dummy files
|
|
896
|
-
try:
|
|
897
|
-
shutil.rmtree(temp_dir)
|
|
898
|
-
console.print(f"Cleaned up temporary directory: {temp_dir}")
|
|
899
|
-
except Exception as e:
|
|
900
|
-
console.print(f"[bold red]Error cleaning up temp directory {temp_dir}: {e}[/bold red]")
|
|
927
|
+
# Replace the real function with the mock
|
|
928
|
+
# In package context, you might need to patch differently
|
|
929
|
+
# For this script execution:
|
|
930
|
+
# Note: This direct replacement might not work if the function is imported
|
|
931
|
+
# using `from .fix_verification_errors import fix_verification_errors`.
|
|
932
|
+
# A proper mock framework (`unittest.mock.patch`) is better.
|
|
933
|
+
# Let's assume for this example run, we can modify the global scope *before* the loop calls it.
|
|
934
|
+
# This is fragile. A better approach involves dependency injection or mocking frameworks.
|
|
935
|
+
# HACK: Re-assigning the imported name in the global scope of this script
|
|
936
|
+
globals()['fix_verification_errors'] = mock_fix_verification_errors
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
|
|
940
|
+
|
|
941
|
+
# Example program_args: Pass input value 10 and another arg 5
|
|
942
|
+
# Note: The example program only uses the first arg sys.argv[1]
|
|
943
|
+
example_args = ["10", "another_arg"]
|
|
944
|
+
|
|
945
|
+
results = fix_verification_errors_loop(
|
|
946
|
+
program_file=str(program_file),
|
|
947
|
+
code_file=str(code_file),
|
|
948
|
+
prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
|
|
949
|
+
verification_program=str(verification_program_file),
|
|
950
|
+
strength=0.5,
|
|
951
|
+
temperature=0.1,
|
|
952
|
+
max_attempts=3,
|
|
953
|
+
budget=0.10, # Set a budget
|
|
954
|
+
verification_log_file=str(temp_dir / "test_verification.log"),
|
|
955
|
+
verbose=True,
|
|
956
|
+
program_args=example_args
|
|
957
|
+
)
|
|
901
958
|
|
|
959
|
+
console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
|
|
960
|
+
console.print(f"Success: {results['success']}")
|
|
961
|
+
console.print(f"Total Attempts: {results['total_attempts']}")
|
|
962
|
+
console.print(f"Total Cost: ${results['total_cost']:.6f}")
|
|
963
|
+
console.print(f"Model Name: {results['model_name']}")
|
|
964
|
+
# console.print(f"Final Program:\n{results['final_program']}") # Can be long
|
|
965
|
+
console.print(f"Final Code:\n{results['final_code']}")
|
|
966
|
+
console.print(f"Statistics:\n{results['statistics']}")
|
|
967
|
+
|
|
968
|
+
# Restore original function if needed elsewhere
|
|
969
|
+
globals()['fix_verification_errors'] = _original_fix_verification_errors
|
|
970
|
+
|
|
971
|
+
# Clean up dummy files
|
|
972
|
+
# console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
|
|
973
|
+
# shutil.rmtree(temp_dir)
|
|
974
|
+
console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
|
|
975
|
+
console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
|