pdd-cli 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -1
- pdd/bug_main.py +16 -2
- pdd/data/llm_model.csv +8 -8
- pdd/fix_verification_errors.py +259 -0
- pdd/fix_verification_errors_loop.py +901 -0
- pdd/generate_output_paths.py +6 -0
- pdd/llm_invoke.py +4 -5
- pdd/pdd_completion.zsh +38 -1
- pdd/prompts/extract_prompt_split_LLM.prompt +7 -4
- pdd/prompts/find_verification_errors_LLM.prompt +25 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +20 -0
- pdd/prompts/split_LLM.prompt +3 -3
- pdd/split.py +9 -9
- pdd/split_main.py +11 -11
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/METADATA +3 -3
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/RECORD +20 -16
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,901 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Module for iteratively fixing code verification errors using LLMs.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
import shutil
|
|
9
|
+
import time
|
|
10
|
+
import datetime
|
|
11
|
+
import xml.etree.ElementTree as ET
|
|
12
|
+
from xml.dom import minidom
|
|
13
|
+
import tempfile
|
|
14
|
+
from typing import Dict, Any, Tuple, Optional
|
|
15
|
+
|
|
16
|
+
# Use Rich for pretty console output
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
from rich.panel import Panel
|
|
19
|
+
from rich.syntax import Syntax
|
|
20
|
+
from rich.text import Text
|
|
21
|
+
|
|
22
|
+
# --- Internal Module Imports ---
|
|
23
|
+
# Attempt relative import for package structure
|
|
24
|
+
try:
|
|
25
|
+
from .fix_verification_errors import fix_verification_errors
|
|
26
|
+
from .utils import ensure_dir_exists # Assuming a utility function exists
|
|
27
|
+
except ImportError:
|
|
28
|
+
# Fallback for standalone execution or different structure
|
|
29
|
+
# This might indicate a setup issue if running as part of the package
|
|
30
|
+
print("Warning: Could not perform relative import. Falling back.")
|
|
31
|
+
# If fix_verification_errors is in the same directory or PYTHONPATH:
|
|
32
|
+
try:
|
|
33
|
+
from fix_verification_errors import fix_verification_errors
|
|
34
|
+
except ImportError as e:
|
|
35
|
+
raise ImportError(
|
|
36
|
+
"Could not import 'fix_verification_errors'. "
|
|
37
|
+
"Ensure it's in the correct path or package structure."
|
|
38
|
+
) from e
|
|
39
|
+
# Define a dummy ensure_dir_exists if not available
|
|
40
|
+
def ensure_dir_exists(file_path: str):
|
|
41
|
+
"""Ensure the directory for the given file path exists."""
|
|
42
|
+
directory = os.path.dirname(file_path)
|
|
43
|
+
if directory and not os.path.exists(directory):
|
|
44
|
+
os.makedirs(directory)
|
|
45
|
+
|
|
46
|
+
# Initialize Rich Console
|
|
47
|
+
console = Console()
|
|
48
|
+
|
|
49
|
+
# --- Helper Functions ---
|
|
50
|
+
|
|
51
|
+
def _run_subprocess(command: list[str], cwd: Optional[str] = None) -> Tuple[bool, str, int]:
|
|
52
|
+
"""
|
|
53
|
+
Runs a subprocess command and captures its output.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
command: A list of strings representing the command and its arguments.
|
|
57
|
+
cwd: The working directory to run the command in.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
A tuple containing:
|
|
61
|
+
- success (bool): True if the command exited with code 0, False otherwise.
|
|
62
|
+
- output (str): The combined stdout and stderr of the command.
|
|
63
|
+
- return_code (int): The exit code of the command.
|
|
64
|
+
"""
|
|
65
|
+
try:
|
|
66
|
+
process = subprocess.run(
|
|
67
|
+
command,
|
|
68
|
+
capture_output=True,
|
|
69
|
+
text=True,
|
|
70
|
+
check=False, # Don't raise exception on non-zero exit
|
|
71
|
+
cwd=cwd,
|
|
72
|
+
encoding='utf-8',
|
|
73
|
+
errors='replace' # Handle potential encoding errors
|
|
74
|
+
)
|
|
75
|
+
output = process.stdout + process.stderr
|
|
76
|
+
success = process.returncode == 0
|
|
77
|
+
return success, output.strip(), process.returncode
|
|
78
|
+
except FileNotFoundError:
|
|
79
|
+
error_msg = f"Error: Command not found: '{command[0]}'. Please ensure it's installed and in PATH."
|
|
80
|
+
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
81
|
+
return False, error_msg, -1 # Use -1 to indicate execution failure
|
|
82
|
+
except Exception as e:
|
|
83
|
+
error_msg = f"Error running subprocess {' '.join(command)}: {e}"
|
|
84
|
+
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
85
|
+
return False, error_msg, -1
|
|
86
|
+
|
|
87
|
+
def _read_file(file_path: str) -> Optional[str]:
|
|
88
|
+
"""Reads the content of a file."""
|
|
89
|
+
try:
|
|
90
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
91
|
+
return f.read()
|
|
92
|
+
except FileNotFoundError:
|
|
93
|
+
console.print(f"[bold red]Error: File not found: {file_path}[/bold red]")
|
|
94
|
+
return None
|
|
95
|
+
except Exception as e:
|
|
96
|
+
console.print(f"[bold red]Error reading file {file_path}: {e}[/bold red]")
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
def _write_file(file_path: str, content: str) -> bool:
|
|
100
|
+
"""Writes content to a file."""
|
|
101
|
+
try:
|
|
102
|
+
ensure_dir_exists(file_path)
|
|
103
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
104
|
+
f.write(content)
|
|
105
|
+
return True
|
|
106
|
+
except Exception as e:
|
|
107
|
+
console.print(f"[bold red]Error writing file {file_path}: {e}[/bold red]")
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
def _create_backup(file_path: str, iteration: int) -> Optional[str]:
|
|
111
|
+
"""Creates a backup copy of a file."""
|
|
112
|
+
if not os.path.exists(file_path):
|
|
113
|
+
console.print(f"[yellow]Warning: Cannot backup non-existent file: {file_path}[/yellow]")
|
|
114
|
+
return None
|
|
115
|
+
try:
|
|
116
|
+
base, ext = os.path.splitext(file_path)
|
|
117
|
+
backup_path = f"{base}_iteration_{iteration}{ext}"
|
|
118
|
+
shutil.copy2(file_path, backup_path) # copy2 preserves metadata
|
|
119
|
+
return backup_path
|
|
120
|
+
except Exception as e:
|
|
121
|
+
console.print(f"[bold red]Error creating backup for {file_path}: {e}[/bold red]")
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def _restore_backup(backup_path: str, original_path: str) -> bool:
|
|
125
|
+
"""Restores a file from its backup."""
|
|
126
|
+
if not backup_path or not os.path.exists(backup_path):
|
|
127
|
+
console.print(f"[bold red]Error: Backup file not found: {backup_path}[/bold red]")
|
|
128
|
+
return False
|
|
129
|
+
try:
|
|
130
|
+
shutil.copy2(backup_path, original_path)
|
|
131
|
+
return True
|
|
132
|
+
except Exception as e:
|
|
133
|
+
console.print(f"[bold red]Error restoring {original_path} from {backup_path}: {e}[/bold red]")
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
def _append_log_entry(log_file: str, root_element: ET.Element, entry_element: ET.Element):
|
|
137
|
+
"""Appends an XML element to the log file."""
|
|
138
|
+
try:
|
|
139
|
+
ensure_dir_exists(log_file)
|
|
140
|
+
root_element.append(entry_element)
|
|
141
|
+
# Use minidom for pretty printing XML
|
|
142
|
+
rough_string = ET.tostring(root_element, 'utf-8')
|
|
143
|
+
reparsed = minidom.parseString(rough_string)
|
|
144
|
+
pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
|
|
145
|
+
|
|
146
|
+
with open(log_file, 'wb') as f: # Write bytes for encoded XML
|
|
147
|
+
f.write(pretty_xml)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
console.print(f"[bold red]Error writing to XML log file {log_file}: {e}[/bold red]")
|
|
150
|
+
|
|
151
|
+
def _create_cdata_element(parent: ET.Element, tag_name: str, content: Optional[str]):
|
|
152
|
+
"""Creates an XML element with CDATA content."""
|
|
153
|
+
element = ET.SubElement(parent, tag_name)
|
|
154
|
+
# Use a placeholder if content is None or empty to ensure valid XML structure
|
|
155
|
+
element.text = ET.CDATA(content if content is not None else "")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# --- Main Function ---
|
|
159
|
+
|
|
160
|
+
def fix_verification_errors_loop(
|
|
161
|
+
program_file: str,
|
|
162
|
+
code_file: str,
|
|
163
|
+
prompt: str,
|
|
164
|
+
verification_program: str,
|
|
165
|
+
strength: float,
|
|
166
|
+
temperature: float,
|
|
167
|
+
max_attempts: int,
|
|
168
|
+
budget: float,
|
|
169
|
+
verification_log_file: str = "verification_log.xml",
|
|
170
|
+
verbose: bool = False
|
|
171
|
+
) -> Dict[str, Any]:
|
|
172
|
+
"""
|
|
173
|
+
Attempts to fix errors in a code file iteratively based on program execution.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
program_file: Path to the Python program file that exercises the code_file.
|
|
177
|
+
code_file: Path to the code file being tested/verified.
|
|
178
|
+
prompt: The prompt that generated the code under test.
|
|
179
|
+
verification_program: Path to a secondary Python program for basic verification.
|
|
180
|
+
strength: LLM strength parameter (0.0 to 1.0).
|
|
181
|
+
temperature: LLM temperature parameter (>= 0.0).
|
|
182
|
+
max_attempts: Maximum number of fix attempts.
|
|
183
|
+
budget: Maximum allowed cost for LLM calls.
|
|
184
|
+
verification_log_file: Path for detailed XML logging.
|
|
185
|
+
verbose: Enable detailed console logging.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
A dictionary containing:
|
|
189
|
+
- 'success': bool - True if the code was successfully fixed.
|
|
190
|
+
- 'final_program': str - Contents of the final program file.
|
|
191
|
+
- 'final_code': str - Contents of the final code file.
|
|
192
|
+
- 'total_attempts': int - Number of fix attempts made.
|
|
193
|
+
- 'total_cost': float - Total cost incurred.
|
|
194
|
+
- 'model_name': str | None - Name of the LLM model used (last successful call).
|
|
195
|
+
- 'statistics': dict - Detailed statistics about the process.
|
|
196
|
+
"""
|
|
197
|
+
console.print(Panel(f"Starting Verification Fix Loop for [cyan]{code_file}[/cyan]", title="[bold blue]Process Start[/bold blue]", expand=False))
|
|
198
|
+
|
|
199
|
+
# --- Step 1: Initialize Log File ---
|
|
200
|
+
if os.path.exists(verification_log_file):
|
|
201
|
+
try:
|
|
202
|
+
os.remove(verification_log_file)
|
|
203
|
+
if verbose:
|
|
204
|
+
console.print(f"Removed existing log file: {verification_log_file}")
|
|
205
|
+
except OSError as e:
|
|
206
|
+
console.print(f"[bold red]Error removing existing log file {verification_log_file}: {e}[/bold red]")
|
|
207
|
+
# Continue execution, but logging might be appended or fail later
|
|
208
|
+
log_root = ET.Element("VerificationLog")
|
|
209
|
+
log_root.set("startTime", datetime.datetime.now().isoformat())
|
|
210
|
+
|
|
211
|
+
# --- Step 2: Initialize Variables ---
|
|
212
|
+
attempts = 0
|
|
213
|
+
total_cost = 0.0
|
|
214
|
+
model_name: Optional[str] = None
|
|
215
|
+
overall_success = False
|
|
216
|
+
last_fix_result: Optional[Dict[str, Any]] = None # Store the result of the last fix attempt
|
|
217
|
+
|
|
218
|
+
# Best iteration tracker: Stores the state with the minimum verified issues
|
|
219
|
+
best_iteration = {
|
|
220
|
+
'attempt': -1, # -1 means initial state, 0+ for loop iterations
|
|
221
|
+
'issues': float('inf'),
|
|
222
|
+
'program_backup_path': None,
|
|
223
|
+
'code_backup_path': None,
|
|
224
|
+
'model_name': None,
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
# Statistics tracker
|
|
228
|
+
stats = {
|
|
229
|
+
'initial_issues': -1, # -1 indicates not yet determined
|
|
230
|
+
'final_issues': -1,
|
|
231
|
+
'best_iteration_attempt': -1,
|
|
232
|
+
'best_iteration_issues': float('inf'),
|
|
233
|
+
'improvement_issues': 0,
|
|
234
|
+
'overall_success_flag': False,
|
|
235
|
+
'exit_reason': "Unknown",
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# --- Input Validation ---
|
|
239
|
+
if not os.path.isfile(program_file):
|
|
240
|
+
console.print(f"[bold red]Error: Program file not found: {program_file}[/bold red]")
|
|
241
|
+
stats['exit_reason'] = "Input Error: Program file not found"
|
|
242
|
+
return {
|
|
243
|
+
'success': False, 'final_program': "", 'final_code': "",
|
|
244
|
+
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
245
|
+
'statistics': stats
|
|
246
|
+
}
|
|
247
|
+
if not os.path.isfile(code_file):
|
|
248
|
+
console.print(f"[bold red]Error: Code file not found: {code_file}[/bold red]")
|
|
249
|
+
stats['exit_reason'] = "Input Error: Code file not found"
|
|
250
|
+
return {
|
|
251
|
+
'success': False, 'final_program': "", 'final_code': "",
|
|
252
|
+
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
253
|
+
'statistics': stats
|
|
254
|
+
}
|
|
255
|
+
if not os.path.isfile(verification_program):
|
|
256
|
+
console.print(f"[bold red]Error: Secondary verification program not found: {verification_program}[/bold red]")
|
|
257
|
+
stats['exit_reason'] = "Input Error: Verification program not found"
|
|
258
|
+
return {
|
|
259
|
+
'success': False, 'final_program': "", 'final_code': "",
|
|
260
|
+
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
261
|
+
'statistics': stats
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
# --- Step 3: Determine Initial State ---
|
|
265
|
+
if verbose:
|
|
266
|
+
console.print("\n[bold]Step 3: Determining Initial State[/bold]")
|
|
267
|
+
|
|
268
|
+
# 3a: Run initial program
|
|
269
|
+
initial_run_success, initial_output, _ = _run_subprocess(['python', program_file])
|
|
270
|
+
if verbose:
|
|
271
|
+
console.print(f"Initial program execution {'succeeded' if initial_run_success else 'failed'}.")
|
|
272
|
+
console.print("[dim]Initial Output:[/dim]")
|
|
273
|
+
console.print(f"[grey37]{initial_output or '[No Output]'}[/grey37]")
|
|
274
|
+
|
|
275
|
+
# 3b: Log initial state
|
|
276
|
+
initial_state_log = ET.Element("InitialState")
|
|
277
|
+
initial_state_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
278
|
+
_create_cdata_element(initial_state_log, "InitialProgramOutput", initial_output)
|
|
279
|
+
_append_log_entry(verification_log_file, log_root, initial_state_log)
|
|
280
|
+
|
|
281
|
+
# 3c: Read initial contents
|
|
282
|
+
initial_program_contents = _read_file(program_file)
|
|
283
|
+
initial_code_contents = _read_file(code_file)
|
|
284
|
+
if initial_program_contents is None or initial_code_contents is None:
|
|
285
|
+
stats['exit_reason'] = "File Read Error: Could not read initial program or code file."
|
|
286
|
+
return {
|
|
287
|
+
'success': False, 'final_program': initial_program_contents or "", 'final_code': initial_code_contents or "",
|
|
288
|
+
'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
|
|
289
|
+
'statistics': stats
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# 3d: Call fix_verification_errors for initial assessment
|
|
293
|
+
if verbose:
|
|
294
|
+
console.print("Running initial assessment with 'fix_verification_errors'...")
|
|
295
|
+
try:
|
|
296
|
+
# Use provided strength/temp for consistency, but check budget
|
|
297
|
+
if budget <= 0:
|
|
298
|
+
console.print("[bold yellow]Warning: Initial budget is zero or negative. Skipping initial assessment.[/bold yellow]")
|
|
299
|
+
initial_fix_result = {'total_cost': 0.0, 'verification_issues_count': float('inf'), 'model_name': None, 'explanation': ['Skipped due to budget']} # Mock result
|
|
300
|
+
else:
|
|
301
|
+
initial_fix_result = fix_verification_errors(
|
|
302
|
+
program=initial_program_contents,
|
|
303
|
+
prompt=prompt,
|
|
304
|
+
code=initial_code_contents,
|
|
305
|
+
output=initial_output,
|
|
306
|
+
strength=strength, # Use actual strength/temp for initial check
|
|
307
|
+
temperature=temperature,
|
|
308
|
+
verbose=verbose # Pass verbose flag down
|
|
309
|
+
)
|
|
310
|
+
last_fix_result = initial_fix_result # Store for potential later use
|
|
311
|
+
except Exception as e:
|
|
312
|
+
console.print(f"[bold red]Error during initial call to fix_verification_errors: {e}[/bold red]")
|
|
313
|
+
stats['exit_reason'] = f"LLM Error: Initial fix_verification_errors call failed: {e}"
|
|
314
|
+
# Log the error
|
|
315
|
+
error_log = ET.Element("Error")
|
|
316
|
+
error_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
317
|
+
error_log.set("phase", "InitialAssessment")
|
|
318
|
+
_create_cdata_element(error_log, "ErrorMessage", str(e))
|
|
319
|
+
_append_log_entry(verification_log_file, log_root, error_log)
|
|
320
|
+
return {
|
|
321
|
+
'success': False, 'final_program': initial_program_contents, 'final_code': initial_code_contents,
|
|
322
|
+
'total_attempts': 0, 'total_cost': total_cost, 'model_name': model_name,
|
|
323
|
+
'statistics': stats
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# 3e: Add cost
|
|
328
|
+
initial_cost = initial_fix_result.get('total_cost', 0.0)
|
|
329
|
+
total_cost += initial_cost
|
|
330
|
+
model_name = initial_fix_result.get('model_name', model_name) # Update model name
|
|
331
|
+
|
|
332
|
+
# 3f: Extract initial issues
|
|
333
|
+
initial_issues_count = initial_fix_result.get('verification_issues_count', float('inf'))
|
|
334
|
+
if initial_issues_count == float('inf'):
|
|
335
|
+
console.print("[yellow]Warning: Could not determine initial issue count from fix_verification_errors.[/yellow]")
|
|
336
|
+
# Decide how to handle this - maybe treat as high number of issues?
|
|
337
|
+
initial_issues_count = 999 # Assign a high number if undetermined
|
|
338
|
+
|
|
339
|
+
stats['initial_issues'] = initial_issues_count
|
|
340
|
+
if verbose:
|
|
341
|
+
console.print(f"Initial assessment complete. Issues found: {initial_issues_count}, Cost: ${initial_cost:.6f}")
|
|
342
|
+
|
|
343
|
+
# 3g: Initialize best iteration with initial state
|
|
344
|
+
best_iteration['attempt'] = 0 # Representing the initial state before loop
|
|
345
|
+
best_iteration['issues'] = initial_issues_count
|
|
346
|
+
best_iteration['program_backup_path'] = program_file # Original file path
|
|
347
|
+
best_iteration['code_backup_path'] = code_file # Original file path
|
|
348
|
+
best_iteration['model_name'] = model_name
|
|
349
|
+
|
|
350
|
+
# Log initial assessment details
|
|
351
|
+
initial_assessment_log = ET.Element("InitialAssessment")
|
|
352
|
+
initial_assessment_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
353
|
+
initial_assessment_log.set("issues_found", str(initial_issues_count))
|
|
354
|
+
initial_assessment_log.set("cost", f"{initial_cost:.6f}")
|
|
355
|
+
if model_name:
|
|
356
|
+
initial_assessment_log.set("model_name", model_name)
|
|
357
|
+
_create_cdata_element(initial_assessment_log, "Explanation", "\n".join(initial_fix_result.get('explanation', [])))
|
|
358
|
+
_append_log_entry(verification_log_file, log_root, initial_assessment_log)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# 3h: Check if already successful
|
|
362
|
+
if initial_issues_count == 0:
|
|
363
|
+
console.print("[bold green]Initial state already meets verification criteria (0 issues found). No fixing loop needed.[/bold green]")
|
|
364
|
+
overall_success = True
|
|
365
|
+
stats['final_issues'] = 0
|
|
366
|
+
stats['best_iteration_attempt'] = 0
|
|
367
|
+
stats['best_iteration_issues'] = 0
|
|
368
|
+
stats['improvement_issues'] = 0
|
|
369
|
+
stats['overall_success_flag'] = True
|
|
370
|
+
stats['exit_reason'] = "Success on Initial Assessment"
|
|
371
|
+
# Skip to Step 7/8 (Return)
|
|
372
|
+
|
|
373
|
+
# --- Step 4: Fixing Loop ---
|
|
374
|
+
current_program_contents = initial_program_contents
|
|
375
|
+
current_code_contents = initial_code_contents
|
|
376
|
+
|
|
377
|
+
if not overall_success: # Only enter loop if initial state wasn't perfect
|
|
378
|
+
if verbose:
|
|
379
|
+
console.print(f"\n[bold]Step 4: Starting Fixing Loop (Max Attempts: {max_attempts}, Budget: ${budget:.2f})[/bold]")
|
|
380
|
+
|
|
381
|
+
while attempts < max_attempts and total_cost < budget:
|
|
382
|
+
attempt_number = attempts + 1
|
|
383
|
+
if verbose:
|
|
384
|
+
console.print(f"\n--- Attempt {attempt_number}/{max_attempts} --- Cost so far: ${total_cost:.6f}")
|
|
385
|
+
|
|
386
|
+
# 4a: Log attempt start (done within iteration log)
|
|
387
|
+
iteration_log = ET.Element("Iteration")
|
|
388
|
+
iteration_log.set("attempt", str(attempt_number))
|
|
389
|
+
iteration_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
390
|
+
|
|
391
|
+
# 4b: Run the program file
|
|
392
|
+
run_success, program_output, _ = _run_subprocess(['python', program_file])
|
|
393
|
+
if verbose:
|
|
394
|
+
console.print(f"Program execution {'succeeded' if run_success else 'failed'}.")
|
|
395
|
+
# console.print("[dim]Current Output:[/dim]")
|
|
396
|
+
# console.print(f"[grey37]{program_output or '[No Output]'}[/grey37]") # Can be very long
|
|
397
|
+
|
|
398
|
+
_create_cdata_element(iteration_log, "ProgramOutputBeforeFix", program_output)
|
|
399
|
+
|
|
400
|
+
# 4c: Read current contents (already stored in current_*)
|
|
401
|
+
|
|
402
|
+
# 4d: Create backups
|
|
403
|
+
program_backup_path = _create_backup(program_file, attempt_number)
|
|
404
|
+
code_backup_path = _create_backup(code_file, attempt_number)
|
|
405
|
+
if program_backup_path: iteration_log.set("program_backup", program_backup_path)
|
|
406
|
+
if code_backup_path: iteration_log.set("code_backup", code_backup_path)
|
|
407
|
+
|
|
408
|
+
# 4e: Call fix_verification_errors
|
|
409
|
+
if verbose:
|
|
410
|
+
console.print("Calling 'fix_verification_errors' to suggest fixes...")
|
|
411
|
+
try:
|
|
412
|
+
fix_result = fix_verification_errors(
|
|
413
|
+
program=current_program_contents,
|
|
414
|
+
prompt=prompt,
|
|
415
|
+
code=current_code_contents,
|
|
416
|
+
output=program_output,
|
|
417
|
+
strength=strength,
|
|
418
|
+
temperature=temperature,
|
|
419
|
+
verbose=verbose # Pass verbose flag down
|
|
420
|
+
)
|
|
421
|
+
last_fix_result = fix_result # Store latest result
|
|
422
|
+
except Exception as e:
|
|
423
|
+
console.print(f"[bold red]Error during fix_verification_errors call in attempt {attempt_number}: {e}[/bold red]")
|
|
424
|
+
stats['exit_reason'] = f"LLM Error: fix_verification_errors failed in loop: {e}"
|
|
425
|
+
# Log the error and break
|
|
426
|
+
error_log = ET.Element("Error")
|
|
427
|
+
error_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
428
|
+
error_log.set("phase", f"FixAttempt_{attempt_number}")
|
|
429
|
+
_create_cdata_element(error_log, "ErrorMessage", str(e))
|
|
430
|
+
_append_log_entry(verification_log_file, log_root, error_log)
|
|
431
|
+
break # Exit loop on LLM error
|
|
432
|
+
|
|
433
|
+
# Log inputs and results to XML
|
|
434
|
+
inputs_log = ET.SubElement(iteration_log, "InputsToFixer")
|
|
435
|
+
_create_cdata_element(inputs_log, "Program", current_program_contents)
|
|
436
|
+
_create_cdata_element(inputs_log, "Code", current_code_contents)
|
|
437
|
+
_create_cdata_element(inputs_log, "Prompt", prompt)
|
|
438
|
+
_create_cdata_element(inputs_log, "ProgramOutput", program_output)
|
|
439
|
+
|
|
440
|
+
fixer_result_log = ET.SubElement(iteration_log, "FixerResult")
|
|
441
|
+
fixer_result_log.set("cost", f"{fix_result.get('total_cost', 0.0):.6f}")
|
|
442
|
+
fixer_result_log.set("model_name", fix_result.get('model_name', "Unknown"))
|
|
443
|
+
fixer_result_log.set("issues_found", str(fix_result.get('verification_issues_count', 'inf')))
|
|
444
|
+
_create_cdata_element(fixer_result_log, "Explanation", "\n".join(fix_result.get('explanation', [])))
|
|
445
|
+
_create_cdata_element(fixer_result_log, "FixedProgramSuggestion", fix_result.get('fixed_program'))
|
|
446
|
+
_create_cdata_element(fixer_result_log, "FixedCodeSuggestion", fix_result.get('fixed_code'))
|
|
447
|
+
|
|
448
|
+
# 4f: Add cost
|
|
449
|
+
attempt_cost = fix_result.get('total_cost', 0.0)
|
|
450
|
+
total_cost += attempt_cost
|
|
451
|
+
model_name = fix_result.get('model_name', model_name) # Update model name if available
|
|
452
|
+
if verbose:
|
|
453
|
+
console.print(f"Fix attempt cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
454
|
+
console.print(f"Issues found by fixer: {fix_result.get('verification_issues_count', 'N/A')}")
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
# 4h: Check budget
|
|
458
|
+
if total_cost > budget:
|
|
459
|
+
console.print(f"[bold yellow]Budget exceeded (${total_cost:.2f} > ${budget:.2f}). Stopping.[/bold yellow]")
|
|
460
|
+
status_log = ET.SubElement(iteration_log, "Status")
|
|
461
|
+
status_log.text = "Budget Exceeded"
|
|
462
|
+
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
463
|
+
stats['exit_reason'] = "Budget Exceeded"
|
|
464
|
+
break
|
|
465
|
+
|
|
466
|
+
# 4i: Check for success (0 issues)
|
|
467
|
+
current_issues_count = fix_result.get('verification_issues_count', float('inf'))
|
|
468
|
+
if current_issues_count == 0:
|
|
469
|
+
console.print("[bold green]Success! Fixer reported 0 verification issues.[/bold green]")
|
|
470
|
+
status_log = ET.SubElement(iteration_log, "Status")
|
|
471
|
+
status_log.text = "Success - 0 Issues Found"
|
|
472
|
+
|
|
473
|
+
# Update best iteration (0 issues is always the best)
|
|
474
|
+
best_iteration['attempt'] = attempt_number
|
|
475
|
+
best_iteration['issues'] = 0
|
|
476
|
+
best_iteration['program_backup_path'] = program_backup_path # Backup before successful fix
|
|
477
|
+
best_iteration['code_backup_path'] = code_backup_path # Backup before successful fix
|
|
478
|
+
best_iteration['model_name'] = model_name
|
|
479
|
+
|
|
480
|
+
# Write final successful code/program
|
|
481
|
+
final_program = fix_result.get('fixed_program', current_program_contents)
|
|
482
|
+
final_code = fix_result.get('fixed_code', current_code_contents)
|
|
483
|
+
program_written = _write_file(program_file, final_program)
|
|
484
|
+
code_written = _write_file(code_file, final_code)
|
|
485
|
+
|
|
486
|
+
if program_written and code_written:
|
|
487
|
+
current_program_contents = final_program # Update current state
|
|
488
|
+
current_code_contents = final_code
|
|
489
|
+
if verbose:
|
|
490
|
+
console.print("Applied final successful changes to files.")
|
|
491
|
+
else:
|
|
492
|
+
console.print("[bold red]Error writing final successful files![/bold red]")
|
|
493
|
+
# Success flag might be compromised if write fails
|
|
494
|
+
|
|
495
|
+
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
496
|
+
overall_success = True
|
|
497
|
+
stats['exit_reason'] = "Success - Reached 0 Issues"
|
|
498
|
+
break
|
|
499
|
+
|
|
500
|
+
# 4j: Check if changes were suggested
|
|
501
|
+
fixed_program = fix_result.get('fixed_program', current_program_contents)
|
|
502
|
+
fixed_code = fix_result.get('fixed_code', current_code_contents)
|
|
503
|
+
program_updated = fixed_program != current_program_contents
|
|
504
|
+
code_updated = fixed_code != current_code_contents
|
|
505
|
+
|
|
506
|
+
if not program_updated and not code_updated:
|
|
507
|
+
console.print("[yellow]No changes suggested by the fixer in this iteration. Stopping.[/yellow]")
|
|
508
|
+
status_log = ET.SubElement(iteration_log, "Status")
|
|
509
|
+
status_log.text = "No Changes Suggested"
|
|
510
|
+
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
511
|
+
stats['exit_reason'] = "No Changes Suggested by LLM"
|
|
512
|
+
break
|
|
513
|
+
|
|
514
|
+
# 4k, 4l: Log fix attempt details
|
|
515
|
+
fix_attempt_log = ET.SubElement(iteration_log, "FixAttempted")
|
|
516
|
+
fix_attempt_log.set("program_change_suggested", str(program_updated))
|
|
517
|
+
fix_attempt_log.set("code_change_suggested", str(code_updated))
|
|
518
|
+
|
|
519
|
+
# 4m, 4n: Secondary Verification (only if code was modified)
|
|
520
|
+
secondary_verification_passed = True # Assume pass if code not changed
|
|
521
|
+
secondary_verification_output = "Not Run (Code Unchanged)"
|
|
522
|
+
|
|
523
|
+
if code_updated:
|
|
524
|
+
if verbose:
|
|
525
|
+
console.print("Code change suggested. Running secondary verification...")
|
|
526
|
+
# Use a temporary file for the modified code
|
|
527
|
+
temp_code_file = None
|
|
528
|
+
try:
|
|
529
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as tf:
|
|
530
|
+
tf.write(fixed_code)
|
|
531
|
+
temp_code_file_path = tf.name
|
|
532
|
+
if verbose:
|
|
533
|
+
console.print(f"Wrote proposed code to temporary file: {temp_code_file_path}")
|
|
534
|
+
|
|
535
|
+
# Run the secondary verification program.
|
|
536
|
+
# It needs to know which code file to check. We pass the temp file path.
|
|
537
|
+
# Modify this command if your verification script takes args differently.
|
|
538
|
+
verify_command = ['python', verification_program, temp_code_file_path]
|
|
539
|
+
verify_success, verify_output, verify_rc = _run_subprocess(verify_command)
|
|
540
|
+
|
|
541
|
+
secondary_verification_passed = verify_success
|
|
542
|
+
secondary_verification_output = verify_output
|
|
543
|
+
if verbose:
|
|
544
|
+
console.print(f"Secondary verification {'PASSED' if verify_success else 'FAILED'} (Exit Code: {verify_rc}).")
|
|
545
|
+
# console.print(f"[dim]Verification Output:[/dim]\n[grey37]{verify_output or '[No Output]'}[/grey37]")
|
|
546
|
+
|
|
547
|
+
except Exception as e:
|
|
548
|
+
console.print(f"[bold red]Error during secondary verification: {e}[/bold red]")
|
|
549
|
+
secondary_verification_passed = False
|
|
550
|
+
secondary_verification_output = f"Error during verification: {e}"
|
|
551
|
+
finally:
|
|
552
|
+
# Clean up the temporary file
|
|
553
|
+
if temp_code_file_path and os.path.exists(temp_code_file_path):
|
|
554
|
+
try:
|
|
555
|
+
os.remove(temp_code_file_path)
|
|
556
|
+
except OSError as e:
|
|
557
|
+
console.print(f"[yellow]Warning: Could not remove temp file {temp_code_file_path}: {e}[/yellow]")
|
|
558
|
+
|
|
559
|
+
# Log secondary verification result
|
|
560
|
+
sec_verify_log = ET.SubElement(iteration_log, "SecondaryVerification")
|
|
561
|
+
sec_verify_log.set("run", str(code_updated))
|
|
562
|
+
sec_verify_log.set("passed", str(secondary_verification_passed))
|
|
563
|
+
_create_cdata_element(sec_verify_log, "Output", secondary_verification_output)
|
|
564
|
+
|
|
565
|
+
# 4o, 4p: Apply changes or discard based on secondary verification
|
|
566
|
+
if secondary_verification_passed:
|
|
567
|
+
if verbose:
|
|
568
|
+
console.print("Secondary verification passed (or not needed). Applying changes.")
|
|
569
|
+
status_log = ET.SubElement(iteration_log, "Status")
|
|
570
|
+
status_log.text = "Changes Applied (Secondary Verification Passed or Skipped)"
|
|
571
|
+
|
|
572
|
+
# Update best iteration if this one is better
|
|
573
|
+
if current_issues_count < best_iteration['issues']:
|
|
574
|
+
if verbose:
|
|
575
|
+
console.print(f"[green]Improvement found! Issues reduced from {best_iteration['issues']} to {current_issues_count}. Updating best iteration.[/green]")
|
|
576
|
+
best_iteration['attempt'] = attempt_number
|
|
577
|
+
best_iteration['issues'] = current_issues_count
|
|
578
|
+
best_iteration['program_backup_path'] = program_backup_path # Store backup *before* this successful step
|
|
579
|
+
best_iteration['code_backup_path'] = code_backup_path
|
|
580
|
+
best_iteration['model_name'] = model_name
|
|
581
|
+
elif verbose and current_issues_count >= best_iteration['issues']:
|
|
582
|
+
console.print(f"Current issues ({current_issues_count}) not better than best ({best_iteration['issues']}). Best iteration remains attempt {best_iteration['attempt']}.")
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
# Apply changes to files
|
|
586
|
+
files_updated = True
|
|
587
|
+
if code_updated:
|
|
588
|
+
if not _write_file(code_file, fixed_code):
|
|
589
|
+
files_updated = False
|
|
590
|
+
console.print(f"[bold red]Error writing updated code to {code_file}[/bold red]")
|
|
591
|
+
else:
|
|
592
|
+
current_code_contents = fixed_code # Update current state
|
|
593
|
+
|
|
594
|
+
if program_updated:
|
|
595
|
+
if not _write_file(program_file, fixed_program):
|
|
596
|
+
files_updated = False
|
|
597
|
+
console.print(f"[bold red]Error writing updated program to {program_file}[/bold red]")
|
|
598
|
+
else:
|
|
599
|
+
current_program_contents = fixed_program # Update current state
|
|
600
|
+
|
|
601
|
+
if not files_updated:
|
|
602
|
+
# If writing failed, we might be in an inconsistent state. Log it.
|
|
603
|
+
ET.SubElement(iteration_log, "Error").text = "Failed to write updated files after successful verification."
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
else: # Secondary verification failed
|
|
607
|
+
if verbose:
|
|
608
|
+
console.print("[bold red]Secondary verification failed. Discarding suggested changes for this iteration.[/bold red]")
|
|
609
|
+
status_log = ET.SubElement(iteration_log, "Status")
|
|
610
|
+
status_log.text = "Changes Discarded (Secondary Verification Failed)"
|
|
611
|
+
# Do not update files, do not update best_iteration
|
|
612
|
+
|
|
613
|
+
# 4q: Append log entry for the iteration
|
|
614
|
+
_append_log_entry(verification_log_file, log_root, iteration_log)
|
|
615
|
+
|
|
616
|
+
# 4r: Increment attempt counter
|
|
617
|
+
attempts += 1
|
|
618
|
+
|
|
619
|
+
# Check if max attempts reached
|
|
620
|
+
if attempts >= max_attempts:
|
|
621
|
+
console.print(f"[yellow]Maximum attempts ({max_attempts}) reached. Stopping.[/yellow]")
|
|
622
|
+
stats['exit_reason'] = "Max Attempts Reached"
|
|
623
|
+
# Add status to log if loop didn't break for other reasons already
|
|
624
|
+
if iteration_log.find("Status") is None:
|
|
625
|
+
status_log = ET.SubElement(iteration_log, "Status")
|
|
626
|
+
status_log.text = "Max Attempts Reached"
|
|
627
|
+
_append_log_entry(verification_log_file, log_root, iteration_log) # Ensure last log is written
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# --- Step 5: Post-Loop Processing ---
|
|
631
|
+
if verbose:
|
|
632
|
+
console.print("\n[bold]Step 5: Post-Loop Processing[/bold]")
|
|
633
|
+
|
|
634
|
+
final_action_log = ET.Element("FinalAction")
|
|
635
|
+
final_action_log.set("timestamp", datetime.datetime.now().isoformat())
|
|
636
|
+
|
|
637
|
+
if not overall_success:
|
|
638
|
+
console.print("[yellow]Fixing loop finished without reaching 0 issues.[/yellow]")
|
|
639
|
+
# Check if a 'best' iteration (better than initial and passed secondary verification) was found
|
|
640
|
+
if best_iteration['attempt'] > 0 and best_iteration['issues'] < stats['initial_issues']:
|
|
641
|
+
console.print(f"Restoring state from best recorded iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})")
|
|
642
|
+
restored_program = _restore_backup(best_iteration['program_backup_path'], program_file)
|
|
643
|
+
restored_code = _restore_backup(best_iteration['code_backup_path'], code_file)
|
|
644
|
+
if restored_program and restored_code:
|
|
645
|
+
console.print("[green]Successfully restored files from the best iteration.[/green]")
|
|
646
|
+
final_action_log.set("action", "RestoredBestIteration")
|
|
647
|
+
final_action_log.set("best_attempt", str(best_iteration['attempt']))
|
|
648
|
+
final_action_log.set("best_issues", str(best_iteration['issues']))
|
|
649
|
+
stats['final_issues'] = best_iteration['issues'] # Final state has this many issues
|
|
650
|
+
else:
|
|
651
|
+
console.print("[bold red]Error restoring files from the best iteration! Final files might be from the last attempt.[/bold red]")
|
|
652
|
+
final_action_log.set("action", "RestorationFailed")
|
|
653
|
+
# Final issues remain from the last attempt before loop exit, or initial if no changes applied
|
|
654
|
+
stats['final_issues'] = last_fix_result.get('verification_issues_count', stats['initial_issues']) if last_fix_result else stats['initial_issues']
|
|
655
|
+
|
|
656
|
+
elif best_iteration['attempt'] == 0: # Best was the initial state
|
|
657
|
+
console.print("No improvement found compared to the initial state. Keeping original files.")
|
|
658
|
+
# No restoration needed, files should be in original state unless write failed earlier
|
|
659
|
+
final_action_log.set("action", "NoImprovementFound")
|
|
660
|
+
stats['final_issues'] = stats['initial_issues']
|
|
661
|
+
else: # No iteration ever passed secondary verification or improved
|
|
662
|
+
console.print("No verified improvement was found. Final files are from the last attempted state before loop exit.")
|
|
663
|
+
final_action_log.set("action", "NoVerifiedImprovement")
|
|
664
|
+
# Final issues remain from the last attempt before loop exit
|
|
665
|
+
stats['final_issues'] = last_fix_result.get('verification_issues_count', stats['initial_issues']) if last_fix_result else stats['initial_issues']
|
|
666
|
+
|
|
667
|
+
else: # overall_success is True
|
|
668
|
+
console.print("[bold green]Process finished successfully![/bold green]")
|
|
669
|
+
final_action_log.set("action", "Success")
|
|
670
|
+
stats['final_issues'] = 0 # Success means 0 issues
|
|
671
|
+
|
|
672
|
+
_append_log_entry(verification_log_file, log_root, final_action_log)
|
|
673
|
+
|
|
674
|
+
# --- Step 6: Read Final Contents ---
|
|
675
|
+
if verbose:
|
|
676
|
+
console.print("\n[bold]Step 6: Reading Final File Contents[/bold]")
|
|
677
|
+
final_program_contents = _read_file(program_file)
|
|
678
|
+
final_code_contents = _read_file(code_file)
|
|
679
|
+
if final_program_contents is None: final_program_contents = "Error reading final program file."
|
|
680
|
+
if final_code_contents is None: final_code_contents = "Error reading final code file."
|
|
681
|
+
|
|
682
|
+
# --- Step 7: Calculate and Print Summary Statistics ---
|
|
683
|
+
if verbose:
|
|
684
|
+
console.print("\n[bold]Step 7: Final Statistics[/bold]")
|
|
685
|
+
|
|
686
|
+
stats['overall_success_flag'] = overall_success
|
|
687
|
+
stats['best_iteration_attempt'] = best_iteration['attempt'] if best_iteration['attempt'] >= 0 else 'N/A'
|
|
688
|
+
stats['best_iteration_issues'] = best_iteration['issues'] if best_iteration['issues'] != float('inf') else 'N/A'
|
|
689
|
+
if stats['initial_issues'] != float('inf') and stats['final_issues'] != float('inf') and stats['initial_issues'] >= 0 and stats['final_issues'] >= 0:
|
|
690
|
+
stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
|
|
691
|
+
else:
|
|
692
|
+
stats['improvement_issues'] = 'N/A' # Cannot calculate if initial/final unknown
|
|
693
|
+
|
|
694
|
+
summary_text = Text.assemble(
|
|
695
|
+
("Initial Issues: ", "bold"), str(stats['initial_issues']), "\n",
|
|
696
|
+
("Final Issues: ", "bold"), str(stats['final_issues']), "\n",
|
|
697
|
+
("Improvement (Issues Reduced): ", "bold"), str(stats['improvement_issues']), "\n",
|
|
698
|
+
("Best Iteration Attempt: ", "bold"), str(stats['best_iteration_attempt']), "\n",
|
|
699
|
+
("Best Iteration Issues: ", "bold"), str(stats['best_iteration_issues']), "\n",
|
|
700
|
+
("Total Attempts Made: ", "bold"), str(attempts), "\n",
|
|
701
|
+
("Total LLM Cost: ", "bold"), f"${total_cost:.6f}", "\n",
|
|
702
|
+
("Model Used (Last/Best): ", "bold"), str(best_iteration.get('model_name') or model_name or 'N/A'), "\n",
|
|
703
|
+
("Exit Reason: ", "bold"), stats['exit_reason'], "\n",
|
|
704
|
+
("Overall Success: ", "bold"), (str(overall_success), "bold green" if overall_success else "bold red")
|
|
705
|
+
)
|
|
706
|
+
console.print(Panel(summary_text, title="[bold blue]Verification Fix Loop Summary[/bold blue]", expand=False))
|
|
707
|
+
|
|
708
|
+
# Finalize XML log
|
|
709
|
+
log_root.set("endTime", datetime.datetime.now().isoformat())
|
|
710
|
+
log_root.set("totalAttempts", str(attempts))
|
|
711
|
+
log_root.set("totalCost", f"{total_cost:.6f}")
|
|
712
|
+
log_root.set("overallSuccess", str(overall_success))
|
|
713
|
+
# Re-write the log one last time with final attributes and pretty print
|
|
714
|
+
try:
|
|
715
|
+
rough_string = ET.tostring(log_root, 'utf-8')
|
|
716
|
+
reparsed = minidom.parseString(rough_string)
|
|
717
|
+
pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
|
|
718
|
+
with open(verification_log_file, 'wb') as f:
|
|
719
|
+
f.write(pretty_xml)
|
|
720
|
+
if verbose:
|
|
721
|
+
console.print(f"Final XML log written to: {verification_log_file}")
|
|
722
|
+
except Exception as e:
|
|
723
|
+
console.print(f"[bold red]Error writing final XML log file {verification_log_file}: {e}[/bold red]")
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
# --- Step 8: Return Results ---
|
|
727
|
+
return {
|
|
728
|
+
'success': overall_success,
|
|
729
|
+
'final_program': final_program_contents,
|
|
730
|
+
'final_code': final_code_contents,
|
|
731
|
+
'total_attempts': attempts,
|
|
732
|
+
'total_cost': total_cost,
|
|
733
|
+
'model_name': best_iteration.get('model_name') or model_name, # Prefer model from best iter, fallback to last used
|
|
734
|
+
'statistics': stats,
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
# Example Usage (Illustrative - requires setting up files and dependencies)
|
|
738
|
+
if __name__ == '__main__':
|
|
739
|
+
console.print(Panel("[bold yellow]Running Example Usage[/bold yellow]\nThis is illustrative and requires setting up dummy files and potentially the 'fix_verification_errors' function/package.", title="Example"))
|
|
740
|
+
|
|
741
|
+
# --- Create Dummy Files for Demonstration ---
|
|
742
|
+
temp_dir = tempfile.mkdtemp()
|
|
743
|
+
console.print(f"Created temporary directory: {temp_dir}")
|
|
744
|
+
|
|
745
|
+
dummy_program_file = os.path.join(temp_dir, "program.py")
|
|
746
|
+
dummy_code_file = os.path.join(temp_dir, "code_module.py")
|
|
747
|
+
dummy_verify_file = os.path.join(temp_dir, "verify.py")
|
|
748
|
+
log_file = os.path.join(temp_dir, "verification_log.xml")
|
|
749
|
+
|
|
750
|
+
# Dummy Program (uses code_module, prints success/failure)
|
|
751
|
+
_write_file(dummy_program_file, """
|
|
752
|
+
import code_module
|
|
753
|
+
import sys
|
|
754
|
+
try:
|
|
755
|
+
result = code_module.buggy_function(5)
|
|
756
|
+
expected = 10
|
|
757
|
+
print(f"Input: 5")
|
|
758
|
+
print(f"Expected: {expected}")
|
|
759
|
+
print(f"Actual: {result}")
|
|
760
|
+
if result == expected:
|
|
761
|
+
print("VERIFICATION_SUCCESS")
|
|
762
|
+
sys.exit(0)
|
|
763
|
+
else:
|
|
764
|
+
print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
|
|
765
|
+
sys.exit(1)
|
|
766
|
+
except Exception as e:
|
|
767
|
+
print(f"VERIFICATION_ERROR: {e}")
|
|
768
|
+
sys.exit(2)
|
|
769
|
+
""")
|
|
770
|
+
|
|
771
|
+
# Dummy Code (initially buggy)
|
|
772
|
+
_write_file(dummy_code_file, """
|
|
773
|
+
# Code module with a bug
|
|
774
|
+
def buggy_function(x):
|
|
775
|
+
# Intended to return x * 2, but has a bug
|
|
776
|
+
return x + 1 # Bug! Should be x * 2
|
|
777
|
+
""")
|
|
778
|
+
|
|
779
|
+
# Dummy Verification Script (checks basic syntax/import)
|
|
780
|
+
_write_file(dummy_verify_file, """
|
|
781
|
+
import sys
|
|
782
|
+
import importlib.util
|
|
783
|
+
import os
|
|
784
|
+
|
|
785
|
+
if len(sys.argv) < 2:
|
|
786
|
+
print("Usage: python verify.py <path_to_code_module.py>")
|
|
787
|
+
sys.exit(1)
|
|
788
|
+
|
|
789
|
+
module_path = sys.argv[1]
|
|
790
|
+
module_name = os.path.splitext(os.path.basename(module_path))[0]
|
|
791
|
+
|
|
792
|
+
try:
|
|
793
|
+
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
794
|
+
if spec is None or spec.loader is None:
|
|
795
|
+
raise ImportError(f"Could not create spec for {module_path}")
|
|
796
|
+
module = importlib.util.module_from_spec(spec)
|
|
797
|
+
spec.loader.exec_module(module)
|
|
798
|
+
# Optional: Check if specific functions exist
|
|
799
|
+
if not hasattr(module, 'buggy_function'):
|
|
800
|
+
raise AttributeError("Function 'buggy_function' not found.")
|
|
801
|
+
print(f"Verification PASSED: {module_path} imported successfully.")
|
|
802
|
+
sys.exit(0) # Success
|
|
803
|
+
except Exception as e:
|
|
804
|
+
print(f"Verification FAILED: {e}")
|
|
805
|
+
sys.exit(1) # Failure
|
|
806
|
+
""")
|
|
807
|
+
|
|
808
|
+
# Dummy Prompt
|
|
809
|
+
dummy_prompt = "Create a Python module 'code_module.py' with a function `buggy_function(x)` that returns the input `x` multiplied by 2."
|
|
810
|
+
|
|
811
|
+
# --- Mock fix_verification_errors ---
|
|
812
|
+
# In a real scenario, this would be the actual LLM call function
|
|
813
|
+
# For this example, we simulate its behavior based on attempts
|
|
814
|
+
_fix_call_count = 0
|
|
815
|
+
def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
|
|
816
|
+
global _fix_call_count
|
|
817
|
+
_fix_call_count += 1
|
|
818
|
+
cost = 0.01 + (strength * 0.02) # Simulate cost based on strength
|
|
819
|
+
model = f"mock-model-s{strength:.1f}"
|
|
820
|
+
issues = 1 # Default to 1 issue initially
|
|
821
|
+
fixed_code = code # Default to no change
|
|
822
|
+
explanation = ["Initial analysis: Function seems incorrect."]
|
|
823
|
+
|
|
824
|
+
if "VERIFICATION_FAILURE" in output or "VERIFICATION_ERROR" in output:
|
|
825
|
+
issues = 1
|
|
826
|
+
if _fix_call_count <= 2: # Simulate fixing on the first or second try
|
|
827
|
+
# Simulate a fix
|
|
828
|
+
fixed_code = """
|
|
829
|
+
# Code module - Attempting fix
|
|
830
|
+
def buggy_function(x):
|
|
831
|
+
# Intended to return x * 2
|
|
832
|
+
return x * 2 # Corrected code
|
|
833
|
+
"""
|
|
834
|
+
explanation = ["Identified incorrect arithmetic operation. Changed '+' to '*'."]
|
|
835
|
+
issues = 0 # Simulate 0 issues after fix
|
|
836
|
+
if verbose: print("[Mock Fixer] Suggesting corrected code.")
|
|
837
|
+
else:
|
|
838
|
+
explanation = ["Analysis: Still incorrect, unable to determine fix."]
|
|
839
|
+
issues = 1 # Simulate failure to fix after 2 tries
|
|
840
|
+
if verbose: print("[Mock Fixer] Failed to find fix this time.")
|
|
841
|
+
elif "VERIFICATION_SUCCESS" in output:
|
|
842
|
+
issues = 0
|
|
843
|
+
explanation = ["Code appears correct based on output."]
|
|
844
|
+
if verbose: print("[Mock Fixer] Code seems correct.")
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
return {
|
|
848
|
+
'explanation': explanation,
|
|
849
|
+
'fixed_program': program, # Assume program doesn't change in mock
|
|
850
|
+
'fixed_code': fixed_code,
|
|
851
|
+
'total_cost': cost,
|
|
852
|
+
'model_name': model,
|
|
853
|
+
'verification_issues_count': issues,
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
# Replace the actual function with the mock for this example run
|
|
857
|
+
original_fix_func = fix_verification_errors
|
|
858
|
+
fix_verification_errors = mock_fix_verification_errors
|
|
859
|
+
|
|
860
|
+
# --- Run the Loop ---
|
|
861
|
+
try:
|
|
862
|
+
results = fix_verification_errors_loop(
|
|
863
|
+
program_file=dummy_program_file,
|
|
864
|
+
code_file=dummy_code_file,
|
|
865
|
+
prompt=dummy_prompt,
|
|
866
|
+
verification_program=dummy_verify_file,
|
|
867
|
+
strength=0.5,
|
|
868
|
+
temperature=0.1,
|
|
869
|
+
max_attempts=3,
|
|
870
|
+
budget=0.50, # $0.50 budget
|
|
871
|
+
verification_log_file=log_file,
|
|
872
|
+
verbose=True
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
console.print("\n[bold magenta]--- Final Results ---[/bold magenta]")
|
|
876
|
+
console.print(f"Success: {results['success']}")
|
|
877
|
+
console.print(f"Total Attempts: {results['total_attempts']}")
|
|
878
|
+
console.print(f"Total Cost: ${results['total_cost']:.6f}")
|
|
879
|
+
console.print(f"Model Name: {results['model_name']}")
|
|
880
|
+
|
|
881
|
+
console.print("\nFinal Code Content:")
|
|
882
|
+
console.print(Syntax(results['final_code'], "python", theme="default", line_numbers=True))
|
|
883
|
+
|
|
884
|
+
console.print("\nStatistics:")
|
|
885
|
+
import json
|
|
886
|
+
console.print(json.dumps(results['statistics'], indent=2))
|
|
887
|
+
|
|
888
|
+
console.print(f"\nLog file generated at: {log_file}")
|
|
889
|
+
|
|
890
|
+
except Exception as e:
|
|
891
|
+
console.print(f"\n[bold red]An error occurred during the example run: {e}[/bold red]")
|
|
892
|
+
finally:
|
|
893
|
+
# Restore original function
|
|
894
|
+
fix_verification_errors = original_fix_func
|
|
895
|
+
# Clean up dummy files
|
|
896
|
+
try:
|
|
897
|
+
shutil.rmtree(temp_dir)
|
|
898
|
+
console.print(f"Cleaned up temporary directory: {temp_dir}")
|
|
899
|
+
except Exception as e:
|
|
900
|
+
console.print(f"[bold red]Error cleaning up temp directory {temp_dir}: {e}[/bold red]")
|
|
901
|
+
|