pdd-cli 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

@@ -0,0 +1,901 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Module for iteratively fixing code verification errors using LLMs.
4
+ """
5
+
6
+ import os
7
+ import subprocess
8
+ import shutil
9
+ import time
10
+ import datetime
11
+ import xml.etree.ElementTree as ET
12
+ from xml.dom import minidom
13
+ import tempfile
14
+ from typing import Dict, Any, Tuple, Optional
15
+
16
+ # Use Rich for pretty console output
17
+ from rich.console import Console
18
+ from rich.panel import Panel
19
+ from rich.syntax import Syntax
20
+ from rich.text import Text
21
+
22
+ # --- Internal Module Imports ---
23
+ # Attempt relative import for package structure
24
+ try:
25
+ from .fix_verification_errors import fix_verification_errors
26
+ from .utils import ensure_dir_exists # Assuming a utility function exists
27
+ except ImportError:
28
+ # Fallback for standalone execution or different structure
29
+ # This might indicate a setup issue if running as part of the package
30
+ print("Warning: Could not perform relative import. Falling back.")
31
+ # If fix_verification_errors is in the same directory or PYTHONPATH:
32
+ try:
33
+ from fix_verification_errors import fix_verification_errors
34
+ except ImportError as e:
35
+ raise ImportError(
36
+ "Could not import 'fix_verification_errors'. "
37
+ "Ensure it's in the correct path or package structure."
38
+ ) from e
39
+ # Define a dummy ensure_dir_exists if not available
40
+ def ensure_dir_exists(file_path: str):
41
+ """Ensure the directory for the given file path exists."""
42
+ directory = os.path.dirname(file_path)
43
+ if directory and not os.path.exists(directory):
44
+ os.makedirs(directory)
45
+
46
+ # Initialize Rich Console
47
+ console = Console()
48
+
49
+ # --- Helper Functions ---
50
+
51
+ def _run_subprocess(command: list[str], cwd: Optional[str] = None) -> Tuple[bool, str, int]:
52
+ """
53
+ Runs a subprocess command and captures its output.
54
+
55
+ Args:
56
+ command: A list of strings representing the command and its arguments.
57
+ cwd: The working directory to run the command in.
58
+
59
+ Returns:
60
+ A tuple containing:
61
+ - success (bool): True if the command exited with code 0, False otherwise.
62
+ - output (str): The combined stdout and stderr of the command.
63
+ - return_code (int): The exit code of the command.
64
+ """
65
+ try:
66
+ process = subprocess.run(
67
+ command,
68
+ capture_output=True,
69
+ text=True,
70
+ check=False, # Don't raise exception on non-zero exit
71
+ cwd=cwd,
72
+ encoding='utf-8',
73
+ errors='replace' # Handle potential encoding errors
74
+ )
75
+ output = process.stdout + process.stderr
76
+ success = process.returncode == 0
77
+ return success, output.strip(), process.returncode
78
+ except FileNotFoundError:
79
+ error_msg = f"Error: Command not found: '{command[0]}'. Please ensure it's installed and in PATH."
80
+ console.print(f"[bold red]{error_msg}[/bold red]")
81
+ return False, error_msg, -1 # Use -1 to indicate execution failure
82
+ except Exception as e:
83
+ error_msg = f"Error running subprocess {' '.join(command)}: {e}"
84
+ console.print(f"[bold red]{error_msg}[/bold red]")
85
+ return False, error_msg, -1
86
+
87
+ def _read_file(file_path: str) -> Optional[str]:
88
+ """Reads the content of a file."""
89
+ try:
90
+ with open(file_path, 'r', encoding='utf-8') as f:
91
+ return f.read()
92
+ except FileNotFoundError:
93
+ console.print(f"[bold red]Error: File not found: {file_path}[/bold red]")
94
+ return None
95
+ except Exception as e:
96
+ console.print(f"[bold red]Error reading file {file_path}: {e}[/bold red]")
97
+ return None
98
+
99
+ def _write_file(file_path: str, content: str) -> bool:
100
+ """Writes content to a file."""
101
+ try:
102
+ ensure_dir_exists(file_path)
103
+ with open(file_path, 'w', encoding='utf-8') as f:
104
+ f.write(content)
105
+ return True
106
+ except Exception as e:
107
+ console.print(f"[bold red]Error writing file {file_path}: {e}[/bold red]")
108
+ return False
109
+
110
+ def _create_backup(file_path: str, iteration: int) -> Optional[str]:
111
+ """Creates a backup copy of a file."""
112
+ if not os.path.exists(file_path):
113
+ console.print(f"[yellow]Warning: Cannot backup non-existent file: {file_path}[/yellow]")
114
+ return None
115
+ try:
116
+ base, ext = os.path.splitext(file_path)
117
+ backup_path = f"{base}_iteration_{iteration}{ext}"
118
+ shutil.copy2(file_path, backup_path) # copy2 preserves metadata
119
+ return backup_path
120
+ except Exception as e:
121
+ console.print(f"[bold red]Error creating backup for {file_path}: {e}[/bold red]")
122
+ return None
123
+
124
+ def _restore_backup(backup_path: str, original_path: str) -> bool:
125
+ """Restores a file from its backup."""
126
+ if not backup_path or not os.path.exists(backup_path):
127
+ console.print(f"[bold red]Error: Backup file not found: {backup_path}[/bold red]")
128
+ return False
129
+ try:
130
+ shutil.copy2(backup_path, original_path)
131
+ return True
132
+ except Exception as e:
133
+ console.print(f"[bold red]Error restoring {original_path} from {backup_path}: {e}[/bold red]")
134
+ return False
135
+
136
+ def _append_log_entry(log_file: str, root_element: ET.Element, entry_element: ET.Element):
137
+ """Appends an XML element to the log file."""
138
+ try:
139
+ ensure_dir_exists(log_file)
140
+ root_element.append(entry_element)
141
+ # Use minidom for pretty printing XML
142
+ rough_string = ET.tostring(root_element, 'utf-8')
143
+ reparsed = minidom.parseString(rough_string)
144
+ pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
145
+
146
+ with open(log_file, 'wb') as f: # Write bytes for encoded XML
147
+ f.write(pretty_xml)
148
+ except Exception as e:
149
+ console.print(f"[bold red]Error writing to XML log file {log_file}: {e}[/bold red]")
150
+
151
+ def _create_cdata_element(parent: ET.Element, tag_name: str, content: Optional[str]):
152
+ """Creates an XML element with CDATA content."""
153
+ element = ET.SubElement(parent, tag_name)
154
+ # Use a placeholder if content is None or empty to ensure valid XML structure
155
+ element.text = ET.CDATA(content if content is not None else "")
156
+
157
+
158
+ # --- Main Function ---
159
+
160
+ def fix_verification_errors_loop(
161
+ program_file: str,
162
+ code_file: str,
163
+ prompt: str,
164
+ verification_program: str,
165
+ strength: float,
166
+ temperature: float,
167
+ max_attempts: int,
168
+ budget: float,
169
+ verification_log_file: str = "verification_log.xml",
170
+ verbose: bool = False
171
+ ) -> Dict[str, Any]:
172
+ """
173
+ Attempts to fix errors in a code file iteratively based on program execution.
174
+
175
+ Args:
176
+ program_file: Path to the Python program file that exercises the code_file.
177
+ code_file: Path to the code file being tested/verified.
178
+ prompt: The prompt that generated the code under test.
179
+ verification_program: Path to a secondary Python program for basic verification.
180
+ strength: LLM strength parameter (0.0 to 1.0).
181
+ temperature: LLM temperature parameter (>= 0.0).
182
+ max_attempts: Maximum number of fix attempts.
183
+ budget: Maximum allowed cost for LLM calls.
184
+ verification_log_file: Path for detailed XML logging.
185
+ verbose: Enable detailed console logging.
186
+
187
+ Returns:
188
+ A dictionary containing:
189
+ - 'success': bool - True if the code was successfully fixed.
190
+ - 'final_program': str - Contents of the final program file.
191
+ - 'final_code': str - Contents of the final code file.
192
+ - 'total_attempts': int - Number of fix attempts made.
193
+ - 'total_cost': float - Total cost incurred.
194
+ - 'model_name': str | None - Name of the LLM model used (last successful call).
195
+ - 'statistics': dict - Detailed statistics about the process.
196
+ """
197
+ console.print(Panel(f"Starting Verification Fix Loop for [cyan]{code_file}[/cyan]", title="[bold blue]Process Start[/bold blue]", expand=False))
198
+
199
+ # --- Step 1: Initialize Log File ---
200
+ if os.path.exists(verification_log_file):
201
+ try:
202
+ os.remove(verification_log_file)
203
+ if verbose:
204
+ console.print(f"Removed existing log file: {verification_log_file}")
205
+ except OSError as e:
206
+ console.print(f"[bold red]Error removing existing log file {verification_log_file}: {e}[/bold red]")
207
+ # Continue execution, but logging might be appended or fail later
208
+ log_root = ET.Element("VerificationLog")
209
+ log_root.set("startTime", datetime.datetime.now().isoformat())
210
+
211
+ # --- Step 2: Initialize Variables ---
212
+ attempts = 0
213
+ total_cost = 0.0
214
+ model_name: Optional[str] = None
215
+ overall_success = False
216
+ last_fix_result: Optional[Dict[str, Any]] = None # Store the result of the last fix attempt
217
+
218
+ # Best iteration tracker: Stores the state with the minimum verified issues
219
+ best_iteration = {
220
+ 'attempt': -1, # -1 means initial state, 0+ for loop iterations
221
+ 'issues': float('inf'),
222
+ 'program_backup_path': None,
223
+ 'code_backup_path': None,
224
+ 'model_name': None,
225
+ }
226
+
227
+ # Statistics tracker
228
+ stats = {
229
+ 'initial_issues': -1, # -1 indicates not yet determined
230
+ 'final_issues': -1,
231
+ 'best_iteration_attempt': -1,
232
+ 'best_iteration_issues': float('inf'),
233
+ 'improvement_issues': 0,
234
+ 'overall_success_flag': False,
235
+ 'exit_reason': "Unknown",
236
+ }
237
+
238
+ # --- Input Validation ---
239
+ if not os.path.isfile(program_file):
240
+ console.print(f"[bold red]Error: Program file not found: {program_file}[/bold red]")
241
+ stats['exit_reason'] = "Input Error: Program file not found"
242
+ return {
243
+ 'success': False, 'final_program': "", 'final_code': "",
244
+ 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
245
+ 'statistics': stats
246
+ }
247
+ if not os.path.isfile(code_file):
248
+ console.print(f"[bold red]Error: Code file not found: {code_file}[/bold red]")
249
+ stats['exit_reason'] = "Input Error: Code file not found"
250
+ return {
251
+ 'success': False, 'final_program': "", 'final_code': "",
252
+ 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
253
+ 'statistics': stats
254
+ }
255
+ if not os.path.isfile(verification_program):
256
+ console.print(f"[bold red]Error: Secondary verification program not found: {verification_program}[/bold red]")
257
+ stats['exit_reason'] = "Input Error: Verification program not found"
258
+ return {
259
+ 'success': False, 'final_program': "", 'final_code': "",
260
+ 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
261
+ 'statistics': stats
262
+ }
263
+
264
+ # --- Step 3: Determine Initial State ---
265
+ if verbose:
266
+ console.print("\n[bold]Step 3: Determining Initial State[/bold]")
267
+
268
+ # 3a: Run initial program
269
+ initial_run_success, initial_output, _ = _run_subprocess(['python', program_file])
270
+ if verbose:
271
+ console.print(f"Initial program execution {'succeeded' if initial_run_success else 'failed'}.")
272
+ console.print("[dim]Initial Output:[/dim]")
273
+ console.print(f"[grey37]{initial_output or '[No Output]'}[/grey37]")
274
+
275
+ # 3b: Log initial state
276
+ initial_state_log = ET.Element("InitialState")
277
+ initial_state_log.set("timestamp", datetime.datetime.now().isoformat())
278
+ _create_cdata_element(initial_state_log, "InitialProgramOutput", initial_output)
279
+ _append_log_entry(verification_log_file, log_root, initial_state_log)
280
+
281
+ # 3c: Read initial contents
282
+ initial_program_contents = _read_file(program_file)
283
+ initial_code_contents = _read_file(code_file)
284
+ if initial_program_contents is None or initial_code_contents is None:
285
+ stats['exit_reason'] = "File Read Error: Could not read initial program or code file."
286
+ return {
287
+ 'success': False, 'final_program': initial_program_contents or "", 'final_code': initial_code_contents or "",
288
+ 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
289
+ 'statistics': stats
290
+ }
291
+
292
+ # 3d: Call fix_verification_errors for initial assessment
293
+ if verbose:
294
+ console.print("Running initial assessment with 'fix_verification_errors'...")
295
+ try:
296
+ # Use provided strength/temp for consistency, but check budget
297
+ if budget <= 0:
298
+ console.print("[bold yellow]Warning: Initial budget is zero or negative. Skipping initial assessment.[/bold yellow]")
299
+ initial_fix_result = {'total_cost': 0.0, 'verification_issues_count': float('inf'), 'model_name': None, 'explanation': ['Skipped due to budget']} # Mock result
300
+ else:
301
+ initial_fix_result = fix_verification_errors(
302
+ program=initial_program_contents,
303
+ prompt=prompt,
304
+ code=initial_code_contents,
305
+ output=initial_output,
306
+ strength=strength, # Use actual strength/temp for initial check
307
+ temperature=temperature,
308
+ verbose=verbose # Pass verbose flag down
309
+ )
310
+ last_fix_result = initial_fix_result # Store for potential later use
311
+ except Exception as e:
312
+ console.print(f"[bold red]Error during initial call to fix_verification_errors: {e}[/bold red]")
313
+ stats['exit_reason'] = f"LLM Error: Initial fix_verification_errors call failed: {e}"
314
+ # Log the error
315
+ error_log = ET.Element("Error")
316
+ error_log.set("timestamp", datetime.datetime.now().isoformat())
317
+ error_log.set("phase", "InitialAssessment")
318
+ _create_cdata_element(error_log, "ErrorMessage", str(e))
319
+ _append_log_entry(verification_log_file, log_root, error_log)
320
+ return {
321
+ 'success': False, 'final_program': initial_program_contents, 'final_code': initial_code_contents,
322
+ 'total_attempts': 0, 'total_cost': total_cost, 'model_name': model_name,
323
+ 'statistics': stats
324
+ }
325
+
326
+
327
+ # 3e: Add cost
328
+ initial_cost = initial_fix_result.get('total_cost', 0.0)
329
+ total_cost += initial_cost
330
+ model_name = initial_fix_result.get('model_name', model_name) # Update model name
331
+
332
+ # 3f: Extract initial issues
333
+ initial_issues_count = initial_fix_result.get('verification_issues_count', float('inf'))
334
+ if initial_issues_count == float('inf'):
335
+ console.print("[yellow]Warning: Could not determine initial issue count from fix_verification_errors.[/yellow]")
336
+ # Decide how to handle this - maybe treat as high number of issues?
337
+ initial_issues_count = 999 # Assign a high number if undetermined
338
+
339
+ stats['initial_issues'] = initial_issues_count
340
+ if verbose:
341
+ console.print(f"Initial assessment complete. Issues found: {initial_issues_count}, Cost: ${initial_cost:.6f}")
342
+
343
+ # 3g: Initialize best iteration with initial state
344
+ best_iteration['attempt'] = 0 # Representing the initial state before loop
345
+ best_iteration['issues'] = initial_issues_count
346
+ best_iteration['program_backup_path'] = program_file # Original file path
347
+ best_iteration['code_backup_path'] = code_file # Original file path
348
+ best_iteration['model_name'] = model_name
349
+
350
+ # Log initial assessment details
351
+ initial_assessment_log = ET.Element("InitialAssessment")
352
+ initial_assessment_log.set("timestamp", datetime.datetime.now().isoformat())
353
+ initial_assessment_log.set("issues_found", str(initial_issues_count))
354
+ initial_assessment_log.set("cost", f"{initial_cost:.6f}")
355
+ if model_name:
356
+ initial_assessment_log.set("model_name", model_name)
357
+ _create_cdata_element(initial_assessment_log, "Explanation", "\n".join(initial_fix_result.get('explanation', [])))
358
+ _append_log_entry(verification_log_file, log_root, initial_assessment_log)
359
+
360
+
361
+ # 3h: Check if already successful
362
+ if initial_issues_count == 0:
363
+ console.print("[bold green]Initial state already meets verification criteria (0 issues found). No fixing loop needed.[/bold green]")
364
+ overall_success = True
365
+ stats['final_issues'] = 0
366
+ stats['best_iteration_attempt'] = 0
367
+ stats['best_iteration_issues'] = 0
368
+ stats['improvement_issues'] = 0
369
+ stats['overall_success_flag'] = True
370
+ stats['exit_reason'] = "Success on Initial Assessment"
371
+ # Skip to Step 7/8 (Return)
372
+
373
+ # --- Step 4: Fixing Loop ---
374
+ current_program_contents = initial_program_contents
375
+ current_code_contents = initial_code_contents
376
+
377
+ if not overall_success: # Only enter loop if initial state wasn't perfect
378
+ if verbose:
379
+ console.print(f"\n[bold]Step 4: Starting Fixing Loop (Max Attempts: {max_attempts}, Budget: ${budget:.2f})[/bold]")
380
+
381
+ while attempts < max_attempts and total_cost < budget:
382
+ attempt_number = attempts + 1
383
+ if verbose:
384
+ console.print(f"\n--- Attempt {attempt_number}/{max_attempts} --- Cost so far: ${total_cost:.6f}")
385
+
386
+ # 4a: Log attempt start (done within iteration log)
387
+ iteration_log = ET.Element("Iteration")
388
+ iteration_log.set("attempt", str(attempt_number))
389
+ iteration_log.set("timestamp", datetime.datetime.now().isoformat())
390
+
391
+ # 4b: Run the program file
392
+ run_success, program_output, _ = _run_subprocess(['python', program_file])
393
+ if verbose:
394
+ console.print(f"Program execution {'succeeded' if run_success else 'failed'}.")
395
+ # console.print("[dim]Current Output:[/dim]")
396
+ # console.print(f"[grey37]{program_output or '[No Output]'}[/grey37]") # Can be very long
397
+
398
+ _create_cdata_element(iteration_log, "ProgramOutputBeforeFix", program_output)
399
+
400
+ # 4c: Read current contents (already stored in current_*)
401
+
402
+ # 4d: Create backups
403
+ program_backup_path = _create_backup(program_file, attempt_number)
404
+ code_backup_path = _create_backup(code_file, attempt_number)
405
+ if program_backup_path: iteration_log.set("program_backup", program_backup_path)
406
+ if code_backup_path: iteration_log.set("code_backup", code_backup_path)
407
+
408
+ # 4e: Call fix_verification_errors
409
+ if verbose:
410
+ console.print("Calling 'fix_verification_errors' to suggest fixes...")
411
+ try:
412
+ fix_result = fix_verification_errors(
413
+ program=current_program_contents,
414
+ prompt=prompt,
415
+ code=current_code_contents,
416
+ output=program_output,
417
+ strength=strength,
418
+ temperature=temperature,
419
+ verbose=verbose # Pass verbose flag down
420
+ )
421
+ last_fix_result = fix_result # Store latest result
422
+ except Exception as e:
423
+ console.print(f"[bold red]Error during fix_verification_errors call in attempt {attempt_number}: {e}[/bold red]")
424
+ stats['exit_reason'] = f"LLM Error: fix_verification_errors failed in loop: {e}"
425
+ # Log the error and break
426
+ error_log = ET.Element("Error")
427
+ error_log.set("timestamp", datetime.datetime.now().isoformat())
428
+ error_log.set("phase", f"FixAttempt_{attempt_number}")
429
+ _create_cdata_element(error_log, "ErrorMessage", str(e))
430
+ _append_log_entry(verification_log_file, log_root, error_log)
431
+ break # Exit loop on LLM error
432
+
433
+ # Log inputs and results to XML
434
+ inputs_log = ET.SubElement(iteration_log, "InputsToFixer")
435
+ _create_cdata_element(inputs_log, "Program", current_program_contents)
436
+ _create_cdata_element(inputs_log, "Code", current_code_contents)
437
+ _create_cdata_element(inputs_log, "Prompt", prompt)
438
+ _create_cdata_element(inputs_log, "ProgramOutput", program_output)
439
+
440
+ fixer_result_log = ET.SubElement(iteration_log, "FixerResult")
441
+ fixer_result_log.set("cost", f"{fix_result.get('total_cost', 0.0):.6f}")
442
+ fixer_result_log.set("model_name", fix_result.get('model_name', "Unknown"))
443
+ fixer_result_log.set("issues_found", str(fix_result.get('verification_issues_count', 'inf')))
444
+ _create_cdata_element(fixer_result_log, "Explanation", "\n".join(fix_result.get('explanation', [])))
445
+ _create_cdata_element(fixer_result_log, "FixedProgramSuggestion", fix_result.get('fixed_program'))
446
+ _create_cdata_element(fixer_result_log, "FixedCodeSuggestion", fix_result.get('fixed_code'))
447
+
448
+ # 4f: Add cost
449
+ attempt_cost = fix_result.get('total_cost', 0.0)
450
+ total_cost += attempt_cost
451
+ model_name = fix_result.get('model_name', model_name) # Update model name if available
452
+ if verbose:
453
+ console.print(f"Fix attempt cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
454
+ console.print(f"Issues found by fixer: {fix_result.get('verification_issues_count', 'N/A')}")
455
+
456
+
457
+ # 4h: Check budget
458
+ if total_cost > budget:
459
+ console.print(f"[bold yellow]Budget exceeded (${total_cost:.2f} > ${budget:.2f}). Stopping.[/bold yellow]")
460
+ status_log = ET.SubElement(iteration_log, "Status")
461
+ status_log.text = "Budget Exceeded"
462
+ _append_log_entry(verification_log_file, log_root, iteration_log)
463
+ stats['exit_reason'] = "Budget Exceeded"
464
+ break
465
+
466
+ # 4i: Check for success (0 issues)
467
+ current_issues_count = fix_result.get('verification_issues_count', float('inf'))
468
+ if current_issues_count == 0:
469
+ console.print("[bold green]Success! Fixer reported 0 verification issues.[/bold green]")
470
+ status_log = ET.SubElement(iteration_log, "Status")
471
+ status_log.text = "Success - 0 Issues Found"
472
+
473
+ # Update best iteration (0 issues is always the best)
474
+ best_iteration['attempt'] = attempt_number
475
+ best_iteration['issues'] = 0
476
+ best_iteration['program_backup_path'] = program_backup_path # Backup before successful fix
477
+ best_iteration['code_backup_path'] = code_backup_path # Backup before successful fix
478
+ best_iteration['model_name'] = model_name
479
+
480
+ # Write final successful code/program
481
+ final_program = fix_result.get('fixed_program', current_program_contents)
482
+ final_code = fix_result.get('fixed_code', current_code_contents)
483
+ program_written = _write_file(program_file, final_program)
484
+ code_written = _write_file(code_file, final_code)
485
+
486
+ if program_written and code_written:
487
+ current_program_contents = final_program # Update current state
488
+ current_code_contents = final_code
489
+ if verbose:
490
+ console.print("Applied final successful changes to files.")
491
+ else:
492
+ console.print("[bold red]Error writing final successful files![/bold red]")
493
+ # Success flag might be compromised if write fails
494
+
495
+ _append_log_entry(verification_log_file, log_root, iteration_log)
496
+ overall_success = True
497
+ stats['exit_reason'] = "Success - Reached 0 Issues"
498
+ break
499
+
500
+ # 4j: Check if changes were suggested
501
+ fixed_program = fix_result.get('fixed_program', current_program_contents)
502
+ fixed_code = fix_result.get('fixed_code', current_code_contents)
503
+ program_updated = fixed_program != current_program_contents
504
+ code_updated = fixed_code != current_code_contents
505
+
506
+ if not program_updated and not code_updated:
507
+ console.print("[yellow]No changes suggested by the fixer in this iteration. Stopping.[/yellow]")
508
+ status_log = ET.SubElement(iteration_log, "Status")
509
+ status_log.text = "No Changes Suggested"
510
+ _append_log_entry(verification_log_file, log_root, iteration_log)
511
+ stats['exit_reason'] = "No Changes Suggested by LLM"
512
+ break
513
+
514
+ # 4k, 4l: Log fix attempt details
515
+ fix_attempt_log = ET.SubElement(iteration_log, "FixAttempted")
516
+ fix_attempt_log.set("program_change_suggested", str(program_updated))
517
+ fix_attempt_log.set("code_change_suggested", str(code_updated))
518
+
519
+ # 4m, 4n: Secondary Verification (only if code was modified)
520
+ secondary_verification_passed = True # Assume pass if code not changed
521
+ secondary_verification_output = "Not Run (Code Unchanged)"
522
+
523
+ if code_updated:
524
+ if verbose:
525
+ console.print("Code change suggested. Running secondary verification...")
526
+ # Use a temporary file for the modified code
527
+ temp_code_file = None
528
+ try:
529
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as tf:
530
+ tf.write(fixed_code)
531
+ temp_code_file_path = tf.name
532
+ if verbose:
533
+ console.print(f"Wrote proposed code to temporary file: {temp_code_file_path}")
534
+
535
+ # Run the secondary verification program.
536
+ # It needs to know which code file to check. We pass the temp file path.
537
+ # Modify this command if your verification script takes args differently.
538
+ verify_command = ['python', verification_program, temp_code_file_path]
539
+ verify_success, verify_output, verify_rc = _run_subprocess(verify_command)
540
+
541
+ secondary_verification_passed = verify_success
542
+ secondary_verification_output = verify_output
543
+ if verbose:
544
+ console.print(f"Secondary verification {'PASSED' if verify_success else 'FAILED'} (Exit Code: {verify_rc}).")
545
+ # console.print(f"[dim]Verification Output:[/dim]\n[grey37]{verify_output or '[No Output]'}[/grey37]")
546
+
547
+ except Exception as e:
548
+ console.print(f"[bold red]Error during secondary verification: {e}[/bold red]")
549
+ secondary_verification_passed = False
550
+ secondary_verification_output = f"Error during verification: {e}"
551
+ finally:
552
+ # Clean up the temporary file
553
+ if temp_code_file_path and os.path.exists(temp_code_file_path):
554
+ try:
555
+ os.remove(temp_code_file_path)
556
+ except OSError as e:
557
+ console.print(f"[yellow]Warning: Could not remove temp file {temp_code_file_path}: {e}[/yellow]")
558
+
559
+ # Log secondary verification result
560
+ sec_verify_log = ET.SubElement(iteration_log, "SecondaryVerification")
561
+ sec_verify_log.set("run", str(code_updated))
562
+ sec_verify_log.set("passed", str(secondary_verification_passed))
563
+ _create_cdata_element(sec_verify_log, "Output", secondary_verification_output)
564
+
565
+ # 4o, 4p: Apply changes or discard based on secondary verification
566
+ if secondary_verification_passed:
567
+ if verbose:
568
+ console.print("Secondary verification passed (or not needed). Applying changes.")
569
+ status_log = ET.SubElement(iteration_log, "Status")
570
+ status_log.text = "Changes Applied (Secondary Verification Passed or Skipped)"
571
+
572
+ # Update best iteration if this one is better
573
+ if current_issues_count < best_iteration['issues']:
574
+ if verbose:
575
+ console.print(f"[green]Improvement found! Issues reduced from {best_iteration['issues']} to {current_issues_count}. Updating best iteration.[/green]")
576
+ best_iteration['attempt'] = attempt_number
577
+ best_iteration['issues'] = current_issues_count
578
+ best_iteration['program_backup_path'] = program_backup_path # Store backup *before* this successful step
579
+ best_iteration['code_backup_path'] = code_backup_path
580
+ best_iteration['model_name'] = model_name
581
+ elif verbose and current_issues_count >= best_iteration['issues']:
582
+ console.print(f"Current issues ({current_issues_count}) not better than best ({best_iteration['issues']}). Best iteration remains attempt {best_iteration['attempt']}.")
583
+
584
+
585
+ # Apply changes to files
586
+ files_updated = True
587
+ if code_updated:
588
+ if not _write_file(code_file, fixed_code):
589
+ files_updated = False
590
+ console.print(f"[bold red]Error writing updated code to {code_file}[/bold red]")
591
+ else:
592
+ current_code_contents = fixed_code # Update current state
593
+
594
+ if program_updated:
595
+ if not _write_file(program_file, fixed_program):
596
+ files_updated = False
597
+ console.print(f"[bold red]Error writing updated program to {program_file}[/bold red]")
598
+ else:
599
+ current_program_contents = fixed_program # Update current state
600
+
601
+ if not files_updated:
602
+ # If writing failed, we might be in an inconsistent state. Log it.
603
+ ET.SubElement(iteration_log, "Error").text = "Failed to write updated files after successful verification."
604
+
605
+
606
+ else: # Secondary verification failed
607
+ if verbose:
608
+ console.print("[bold red]Secondary verification failed. Discarding suggested changes for this iteration.[/bold red]")
609
+ status_log = ET.SubElement(iteration_log, "Status")
610
+ status_log.text = "Changes Discarded (Secondary Verification Failed)"
611
+ # Do not update files, do not update best_iteration
612
+
613
+ # 4q: Append log entry for the iteration
614
+ _append_log_entry(verification_log_file, log_root, iteration_log)
615
+
616
+ # 4r: Increment attempt counter
617
+ attempts += 1
618
+
619
+ # Check if max attempts reached
620
+ if attempts >= max_attempts:
621
+ console.print(f"[yellow]Maximum attempts ({max_attempts}) reached. Stopping.[/yellow]")
622
+ stats['exit_reason'] = "Max Attempts Reached"
623
+ # Add status to log if loop didn't break for other reasons already
624
+ if iteration_log.find("Status") is None:
625
+ status_log = ET.SubElement(iteration_log, "Status")
626
+ status_log.text = "Max Attempts Reached"
627
+ _append_log_entry(verification_log_file, log_root, iteration_log) # Ensure last log is written
628
+
629
+
630
+ # --- Step 5: Post-Loop Processing ---
631
+ if verbose:
632
+ console.print("\n[bold]Step 5: Post-Loop Processing[/bold]")
633
+
634
+ final_action_log = ET.Element("FinalAction")
635
+ final_action_log.set("timestamp", datetime.datetime.now().isoformat())
636
+
637
+ if not overall_success:
638
+ console.print("[yellow]Fixing loop finished without reaching 0 issues.[/yellow]")
639
+ # Check if a 'best' iteration (better than initial and passed secondary verification) was found
640
+ if best_iteration['attempt'] > 0 and best_iteration['issues'] < stats['initial_issues']:
641
+ console.print(f"Restoring state from best recorded iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})")
642
+ restored_program = _restore_backup(best_iteration['program_backup_path'], program_file)
643
+ restored_code = _restore_backup(best_iteration['code_backup_path'], code_file)
644
+ if restored_program and restored_code:
645
+ console.print("[green]Successfully restored files from the best iteration.[/green]")
646
+ final_action_log.set("action", "RestoredBestIteration")
647
+ final_action_log.set("best_attempt", str(best_iteration['attempt']))
648
+ final_action_log.set("best_issues", str(best_iteration['issues']))
649
+ stats['final_issues'] = best_iteration['issues'] # Final state has this many issues
650
+ else:
651
+ console.print("[bold red]Error restoring files from the best iteration! Final files might be from the last attempt.[/bold red]")
652
+ final_action_log.set("action", "RestorationFailed")
653
+ # Final issues remain from the last attempt before loop exit, or initial if no changes applied
654
+ stats['final_issues'] = last_fix_result.get('verification_issues_count', stats['initial_issues']) if last_fix_result else stats['initial_issues']
655
+
656
+ elif best_iteration['attempt'] == 0: # Best was the initial state
657
+ console.print("No improvement found compared to the initial state. Keeping original files.")
658
+ # No restoration needed, files should be in original state unless write failed earlier
659
+ final_action_log.set("action", "NoImprovementFound")
660
+ stats['final_issues'] = stats['initial_issues']
661
+ else: # No iteration ever passed secondary verification or improved
662
+ console.print("No verified improvement was found. Final files are from the last attempted state before loop exit.")
663
+ final_action_log.set("action", "NoVerifiedImprovement")
664
+ # Final issues remain from the last attempt before loop exit
665
+ stats['final_issues'] = last_fix_result.get('verification_issues_count', stats['initial_issues']) if last_fix_result else stats['initial_issues']
666
+
667
+ else: # overall_success is True
668
+ console.print("[bold green]Process finished successfully![/bold green]")
669
+ final_action_log.set("action", "Success")
670
+ stats['final_issues'] = 0 # Success means 0 issues
671
+
672
+ _append_log_entry(verification_log_file, log_root, final_action_log)
673
+
674
+ # --- Step 6: Read Final Contents ---
675
+ if verbose:
676
+ console.print("\n[bold]Step 6: Reading Final File Contents[/bold]")
677
+ final_program_contents = _read_file(program_file)
678
+ final_code_contents = _read_file(code_file)
679
+ if final_program_contents is None: final_program_contents = "Error reading final program file."
680
+ if final_code_contents is None: final_code_contents = "Error reading final code file."
681
+
682
+ # --- Step 7: Calculate and Print Summary Statistics ---
683
+ if verbose:
684
+ console.print("\n[bold]Step 7: Final Statistics[/bold]")
685
+
686
+ stats['overall_success_flag'] = overall_success
687
+ stats['best_iteration_attempt'] = best_iteration['attempt'] if best_iteration['attempt'] >= 0 else 'N/A'
688
+ stats['best_iteration_issues'] = best_iteration['issues'] if best_iteration['issues'] != float('inf') else 'N/A'
689
+ if stats['initial_issues'] != float('inf') and stats['final_issues'] != float('inf') and stats['initial_issues'] >= 0 and stats['final_issues'] >= 0:
690
+ stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
691
+ else:
692
+ stats['improvement_issues'] = 'N/A' # Cannot calculate if initial/final unknown
693
+
694
+ summary_text = Text.assemble(
695
+ ("Initial Issues: ", "bold"), str(stats['initial_issues']), "\n",
696
+ ("Final Issues: ", "bold"), str(stats['final_issues']), "\n",
697
+ ("Improvement (Issues Reduced): ", "bold"), str(stats['improvement_issues']), "\n",
698
+ ("Best Iteration Attempt: ", "bold"), str(stats['best_iteration_attempt']), "\n",
699
+ ("Best Iteration Issues: ", "bold"), str(stats['best_iteration_issues']), "\n",
700
+ ("Total Attempts Made: ", "bold"), str(attempts), "\n",
701
+ ("Total LLM Cost: ", "bold"), f"${total_cost:.6f}", "\n",
702
+ ("Model Used (Last/Best): ", "bold"), str(best_iteration.get('model_name') or model_name or 'N/A'), "\n",
703
+ ("Exit Reason: ", "bold"), stats['exit_reason'], "\n",
704
+ ("Overall Success: ", "bold"), (str(overall_success), "bold green" if overall_success else "bold red")
705
+ )
706
+ console.print(Panel(summary_text, title="[bold blue]Verification Fix Loop Summary[/bold blue]", expand=False))
707
+
708
+ # Finalize XML log
709
+ log_root.set("endTime", datetime.datetime.now().isoformat())
710
+ log_root.set("totalAttempts", str(attempts))
711
+ log_root.set("totalCost", f"{total_cost:.6f}")
712
+ log_root.set("overallSuccess", str(overall_success))
713
+ # Re-write the log one last time with final attributes and pretty print
714
+ try:
715
+ rough_string = ET.tostring(log_root, 'utf-8')
716
+ reparsed = minidom.parseString(rough_string)
717
+ pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
718
+ with open(verification_log_file, 'wb') as f:
719
+ f.write(pretty_xml)
720
+ if verbose:
721
+ console.print(f"Final XML log written to: {verification_log_file}")
722
+ except Exception as e:
723
+ console.print(f"[bold red]Error writing final XML log file {verification_log_file}: {e}[/bold red]")
724
+
725
+
726
+ # --- Step 8: Return Results ---
727
+ return {
728
+ 'success': overall_success,
729
+ 'final_program': final_program_contents,
730
+ 'final_code': final_code_contents,
731
+ 'total_attempts': attempts,
732
+ 'total_cost': total_cost,
733
+ 'model_name': best_iteration.get('model_name') or model_name, # Prefer model from best iter, fallback to last used
734
+ 'statistics': stats,
735
+ }
736
+
737
+ # Example Usage (Illustrative - requires setting up files and dependencies)
738
+ if __name__ == '__main__':
739
+ console.print(Panel("[bold yellow]Running Example Usage[/bold yellow]\nThis is illustrative and requires setting up dummy files and potentially the 'fix_verification_errors' function/package.", title="Example"))
740
+
741
+ # --- Create Dummy Files for Demonstration ---
742
+ temp_dir = tempfile.mkdtemp()
743
+ console.print(f"Created temporary directory: {temp_dir}")
744
+
745
+ dummy_program_file = os.path.join(temp_dir, "program.py")
746
+ dummy_code_file = os.path.join(temp_dir, "code_module.py")
747
+ dummy_verify_file = os.path.join(temp_dir, "verify.py")
748
+ log_file = os.path.join(temp_dir, "verification_log.xml")
749
+
750
+ # Dummy Program (uses code_module, prints success/failure)
751
+ _write_file(dummy_program_file, """
752
+ import code_module
753
+ import sys
754
+ try:
755
+ result = code_module.buggy_function(5)
756
+ expected = 10
757
+ print(f"Input: 5")
758
+ print(f"Expected: {expected}")
759
+ print(f"Actual: {result}")
760
+ if result == expected:
761
+ print("VERIFICATION_SUCCESS")
762
+ sys.exit(0)
763
+ else:
764
+ print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
765
+ sys.exit(1)
766
+ except Exception as e:
767
+ print(f"VERIFICATION_ERROR: {e}")
768
+ sys.exit(2)
769
+ """)
770
+
771
+ # Dummy Code (initially buggy)
772
+ _write_file(dummy_code_file, """
773
+ # Code module with a bug
774
+ def buggy_function(x):
775
+ # Intended to return x * 2, but has a bug
776
+ return x + 1 # Bug! Should be x * 2
777
+ """)
778
+
779
+ # Dummy Verification Script (checks basic syntax/import)
780
+ _write_file(dummy_verify_file, """
781
+ import sys
782
+ import importlib.util
783
+ import os
784
+
785
+ if len(sys.argv) < 2:
786
+ print("Usage: python verify.py <path_to_code_module.py>")
787
+ sys.exit(1)
788
+
789
+ module_path = sys.argv[1]
790
+ module_name = os.path.splitext(os.path.basename(module_path))[0]
791
+
792
+ try:
793
+ spec = importlib.util.spec_from_file_location(module_name, module_path)
794
+ if spec is None or spec.loader is None:
795
+ raise ImportError(f"Could not create spec for {module_path}")
796
+ module = importlib.util.module_from_spec(spec)
797
+ spec.loader.exec_module(module)
798
+ # Optional: Check if specific functions exist
799
+ if not hasattr(module, 'buggy_function'):
800
+ raise AttributeError("Function 'buggy_function' not found.")
801
+ print(f"Verification PASSED: {module_path} imported successfully.")
802
+ sys.exit(0) # Success
803
+ except Exception as e:
804
+ print(f"Verification FAILED: {e}")
805
+ sys.exit(1) # Failure
806
+ """)
807
+
808
+ # Dummy Prompt
809
+ dummy_prompt = "Create a Python module 'code_module.py' with a function `buggy_function(x)` that returns the input `x` multiplied by 2."
810
+
811
+ # --- Mock fix_verification_errors ---
812
+ # In a real scenario, this would be the actual LLM call function
813
+ # For this example, we simulate its behavior based on attempts
814
+ _fix_call_count = 0
815
+ def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
816
+ global _fix_call_count
817
+ _fix_call_count += 1
818
+ cost = 0.01 + (strength * 0.02) # Simulate cost based on strength
819
+ model = f"mock-model-s{strength:.1f}"
820
+ issues = 1 # Default to 1 issue initially
821
+ fixed_code = code # Default to no change
822
+ explanation = ["Initial analysis: Function seems incorrect."]
823
+
824
+ if "VERIFICATION_FAILURE" in output or "VERIFICATION_ERROR" in output:
825
+ issues = 1
826
+ if _fix_call_count <= 2: # Simulate fixing on the first or second try
827
+ # Simulate a fix
828
+ fixed_code = """
829
+ # Code module - Attempting fix
830
+ def buggy_function(x):
831
+ # Intended to return x * 2
832
+ return x * 2 # Corrected code
833
+ """
834
+ explanation = ["Identified incorrect arithmetic operation. Changed '+' to '*'."]
835
+ issues = 0 # Simulate 0 issues after fix
836
+ if verbose: print("[Mock Fixer] Suggesting corrected code.")
837
+ else:
838
+ explanation = ["Analysis: Still incorrect, unable to determine fix."]
839
+ issues = 1 # Simulate failure to fix after 2 tries
840
+ if verbose: print("[Mock Fixer] Failed to find fix this time.")
841
+ elif "VERIFICATION_SUCCESS" in output:
842
+ issues = 0
843
+ explanation = ["Code appears correct based on output."]
844
+ if verbose: print("[Mock Fixer] Code seems correct.")
845
+
846
+
847
+ return {
848
+ 'explanation': explanation,
849
+ 'fixed_program': program, # Assume program doesn't change in mock
850
+ 'fixed_code': fixed_code,
851
+ 'total_cost': cost,
852
+ 'model_name': model,
853
+ 'verification_issues_count': issues,
854
+ }
855
+
856
+ # Replace the actual function with the mock for this example run
857
+ original_fix_func = fix_verification_errors
858
+ fix_verification_errors = mock_fix_verification_errors
859
+
860
+ # --- Run the Loop ---
861
+ try:
862
+ results = fix_verification_errors_loop(
863
+ program_file=dummy_program_file,
864
+ code_file=dummy_code_file,
865
+ prompt=dummy_prompt,
866
+ verification_program=dummy_verify_file,
867
+ strength=0.5,
868
+ temperature=0.1,
869
+ max_attempts=3,
870
+ budget=0.50, # $0.50 budget
871
+ verification_log_file=log_file,
872
+ verbose=True
873
+ )
874
+
875
+ console.print("\n[bold magenta]--- Final Results ---[/bold magenta]")
876
+ console.print(f"Success: {results['success']}")
877
+ console.print(f"Total Attempts: {results['total_attempts']}")
878
+ console.print(f"Total Cost: ${results['total_cost']:.6f}")
879
+ console.print(f"Model Name: {results['model_name']}")
880
+
881
+ console.print("\nFinal Code Content:")
882
+ console.print(Syntax(results['final_code'], "python", theme="default", line_numbers=True))
883
+
884
+ console.print("\nStatistics:")
885
+ import json
886
+ console.print(json.dumps(results['statistics'], indent=2))
887
+
888
+ console.print(f"\nLog file generated at: {log_file}")
889
+
890
+ except Exception as e:
891
+ console.print(f"\n[bold red]An error occurred during the example run: {e}[/bold red]")
892
+ finally:
893
+ # Restore original function
894
+ fix_verification_errors = original_fix_func
895
+ # Clean up dummy files
896
+ try:
897
+ shutil.rmtree(temp_dir)
898
+ console.print(f"Cleaned up temporary directory: {temp_dir}")
899
+ except Exception as e:
900
+ console.print(f"[bold red]Error cleaning up temp directory {temp_dir}: {e}[/bold red]")
901
+