pdd-cli 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

Files changed (49) hide show
  1. pdd/__init__.py +7 -1
  2. pdd/bug_main.py +21 -3
  3. pdd/bug_to_unit_test.py +16 -5
  4. pdd/change.py +2 -1
  5. pdd/change_main.py +407 -189
  6. pdd/cli.py +853 -301
  7. pdd/code_generator.py +2 -1
  8. pdd/conflicts_in_prompts.py +2 -1
  9. pdd/construct_paths.py +377 -222
  10. pdd/context_generator.py +2 -1
  11. pdd/continue_generation.py +3 -2
  12. pdd/crash_main.py +55 -20
  13. pdd/data/llm_model.csv +8 -8
  14. pdd/detect_change.py +2 -1
  15. pdd/fix_code_loop.py +465 -160
  16. pdd/fix_code_module_errors.py +7 -4
  17. pdd/fix_error_loop.py +9 -9
  18. pdd/fix_errors_from_unit_tests.py +207 -365
  19. pdd/fix_main.py +31 -4
  20. pdd/fix_verification_errors.py +285 -0
  21. pdd/fix_verification_errors_loop.py +975 -0
  22. pdd/fix_verification_main.py +412 -0
  23. pdd/generate_output_paths.py +427 -183
  24. pdd/generate_test.py +3 -2
  25. pdd/increase_tests.py +2 -2
  26. pdd/llm_invoke.py +18 -8
  27. pdd/pdd_completion.zsh +38 -1
  28. pdd/preprocess.py +3 -3
  29. pdd/process_csv_change.py +466 -154
  30. pdd/prompts/extract_prompt_split_LLM.prompt +7 -4
  31. pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
  32. pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
  33. pdd/prompts/find_verification_errors_LLM.prompt +25 -0
  34. pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
  35. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
  36. pdd/prompts/fix_verification_errors_LLM.prompt +20 -0
  37. pdd/prompts/generate_test_LLM.prompt +9 -3
  38. pdd/prompts/split_LLM.prompt +3 -3
  39. pdd/prompts/update_prompt_LLM.prompt +3 -3
  40. pdd/split.py +13 -12
  41. pdd/split_main.py +22 -13
  42. pdd/trace_main.py +7 -0
  43. pdd/xml_tagger.py +2 -1
  44. {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/METADATA +4 -4
  45. {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/RECORD +49 -44
  46. {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/WHEEL +1 -1
  47. {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/entry_points.txt +0 -0
  48. {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/licenses/LICENSE +0 -0
  49. {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,975 @@
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import datetime
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Dict, Tuple, Any, Optional
8
+ from xml.sax.saxutils import escape
9
+
10
+ from rich.console import Console
11
+
12
+ # Use relative import assuming fix_verification_errors is in the same package
13
+ try:
14
+ # Attempt relative import for package context
15
+ from .fix_verification_errors import fix_verification_errors
16
+ except ImportError:
17
+ # Fallback for direct script execution (e.g., testing)
18
+ # This assumes 'pdd' package structure exists relative to the script
19
+ try:
20
+ from pdd.fix_verification_errors import fix_verification_errors
21
+ except ImportError:
22
+ raise ImportError(
23
+ "Could not import 'fix_verification_errors'. "
24
+ "Ensure it's available via relative import or in the 'pdd' package."
25
+ )
26
+
27
+ # Initialize Rich Console for pretty printing
28
+ console = Console()
29
+
30
+ def _run_program(
31
+ program_path: Path,
32
+ args: Optional[list[str]] = None,
33
+ timeout: int = 60
34
+ ) -> Tuple[int, str]:
35
+ """
36
+ Runs a Python program using subprocess, capturing combined stdout and stderr.
37
+
38
+ Args:
39
+ program_path: Path to the Python program to run.
40
+ args: Optional list of command-line arguments for the program.
41
+ timeout: Timeout in seconds for the subprocess.
42
+
43
+ Returns:
44
+ A tuple containing the return code (int) and the combined output (str).
45
+ Returns (-1, error_message) if the program is not found or other execution error occurs.
46
+ """
47
+ if not program_path.is_file():
48
+ return -1, f"Error: Program file not found at {program_path}"
49
+
50
+ command = ["python", str(program_path)]
51
+ if args:
52
+ command.extend(args)
53
+
54
+ try:
55
+ result = subprocess.run(
56
+ command,
57
+ capture_output=True,
58
+ text=True,
59
+ timeout=timeout,
60
+ check=False, # Don't raise exception for non-zero exit codes
61
+ )
62
+ combined_output = result.stdout + result.stderr
63
+ return result.returncode, combined_output
64
+ except FileNotFoundError:
65
+ return -1, f"Error: Python interpreter not found or '{program_path}' not found."
66
+ except subprocess.TimeoutExpired:
67
+ return -1, f"Error: Program execution timed out after {timeout} seconds."
68
+ except Exception as e:
69
+ return -1, f"Error: An unexpected error occurred while running the program: {e}"
70
+
71
+ def _write_log_entry(log_file_path: Path, xml_content: str):
72
+ """Appends XML content to the log file."""
73
+ try:
74
+ with open(log_file_path, "a", encoding="utf-8") as f:
75
+ f.write(xml_content + "\n")
76
+ except IOError as e:
77
+ console.print(f"[bold red]Error writing to log file {log_file_path}: {e}[/bold red]")
78
+
79
+ def fix_verification_errors_loop(
80
+ program_file: str,
81
+ code_file: str,
82
+ prompt: str,
83
+ verification_program: str,
84
+ strength: float,
85
+ temperature: float,
86
+ max_attempts: int,
87
+ budget: float,
88
+ verification_log_file: str = "verification.log",
89
+ verbose: bool = False,
90
+ program_args: Optional[list[str]] = None,
91
+ ) -> Dict[str, Any]:
92
+ """
93
+ Attempts to fix errors in a code file based on program execution output
94
+ against the prompt's intent, iterating multiple times with secondary verification.
95
+
96
+ Args:
97
+ program_file: Path to the Python program exercising the code.
98
+ code_file: Path to the code file being tested/verified.
99
+ prompt: The prompt defining the intended behavior.
100
+ verification_program: Path to a secondary program to verify code changes.
101
+ strength: LLM model strength (0.0 to 1.0).
102
+ temperature: LLM temperature (0.0 to 1.0).
103
+ max_attempts: Maximum number of fix attempts.
104
+ budget: Maximum allowed cost in USD.
105
+ verification_log_file: Path for detailed XML logging (default: "verification.log").
106
+ verbose: Enable verbose logging (default: False).
107
+ program_args: Optional list of command-line arguments for the program_file.
108
+
109
+ Returns:
110
+ A dictionary containing:
111
+ 'success': bool - Whether the code was successfully fixed.
112
+ 'final_program': str - Contents of the final program file.
113
+ 'final_code': str - Contents of the final code file.
114
+ 'total_attempts': int - Number of fix attempts made (loop iterations started).
115
+ 'total_cost': float - Total cost of LLM calls.
116
+ 'model_name': str | None - Name of the LLM model used.
117
+ 'statistics': dict - Detailed statistics about the process.
118
+ """
119
+ program_path = Path(program_file).resolve()
120
+ code_path = Path(code_file).resolve()
121
+ verification_program_path = Path(verification_program).resolve()
122
+ log_path = Path(verification_log_file).resolve()
123
+
124
+ # --- Validate Inputs ---
125
+ if not program_path.is_file():
126
+ console.print(f"[bold red]Error: Program file not found: {program_path}[/bold red]")
127
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
128
+ if not code_path.is_file():
129
+ console.print(f"[bold red]Error: Code file not found: {code_path}[/bold red]")
130
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
131
+ if not verification_program_path.is_file():
132
+ console.print(f"[bold red]Error: Verification program not found: {verification_program_path}[/bold red]")
133
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
134
+ if not 0.0 <= strength <= 1.0:
135
+ console.print(f"[bold red]Error: Strength must be between 0.0 and 1.0.[/bold red]")
136
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
137
+ if not 0.0 <= temperature <= 1.0:
138
+ console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
139
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
140
+ # Prompt requires positive max_attempts
141
+ if max_attempts <= 0:
142
+ console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
143
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
144
+ if budget < 0:
145
+ console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
146
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
147
+
148
+
149
+ # Step 1: Remove existing verification log file
150
+ try:
151
+ if log_path.exists():
152
+ os.remove(log_path)
153
+ if verbose:
154
+ console.print(f"Removed existing log file: {log_path}")
155
+ except OSError as e:
156
+ console.print(f"[bold red]Error removing log file {log_path}: {e}[/bold red]")
157
+ # Continue execution, but logging might fail
158
+
159
+ # Step 2: Initialize variables
160
+ attempts = 0 # Counter for loop iterations started
161
+ total_cost = 0.0
162
+ model_name: Optional[str] = None
163
+ overall_success = False
164
+ best_iteration = {
165
+ 'attempt': -1, # 0 represents initial state
166
+ 'program_backup': None,
167
+ 'code_backup': None,
168
+ 'issues': float('inf')
169
+ }
170
+ stats = {
171
+ 'initial_issues': -1,
172
+ 'final_issues': -1,
173
+ 'best_iteration_num': -1,
174
+ 'best_iteration_issues': float('inf'),
175
+ 'improvement_issues': 0,
176
+ 'improvement_percent': 0.0,
177
+ 'status_message': 'Initialization',
178
+ }
179
+ initial_program_content = ""
180
+ initial_code_content = ""
181
+ program_contents = "" # Keep track of current contents
182
+ code_contents = "" # Keep track of current contents
183
+
184
+ # --- Step 3: Determine Initial State ---
185
+ if verbose:
186
+ console.print("[bold cyan]Step 3: Determining Initial State...[/bold cyan]")
187
+
188
+ try:
189
+ initial_program_content = program_path.read_text(encoding="utf-8")
190
+ initial_code_content = code_path.read_text(encoding="utf-8")
191
+ program_contents = initial_program_content # Initialize current contents
192
+ code_contents = initial_code_content # Initialize current contents
193
+ except IOError as e:
194
+ console.print(f"[bold red]Error reading initial program/code files: {e}[/bold red]")
195
+ stats['status_message'] = f'Error reading initial files: {e}' # Add status message
196
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": stats}
197
+
198
+ # 3a: Run initial program with args
199
+ initial_return_code, initial_output = _run_program(program_path, args=program_args)
200
+ if verbose:
201
+ console.print(f"Initial program run exit code: {initial_return_code}")
202
+ console.print(f"Initial program output:\n{initial_output}")
203
+
204
+ # 3b: Log initial state
205
+ timestamp = datetime.datetime.now().isoformat()
206
+ initial_log_entry = f'<InitialState timestamp="{timestamp}">\n'
207
+ initial_log_entry += f' <ProgramFile>{escape(str(program_path))}</ProgramFile>\n'
208
+ initial_log_entry += f' <CodeFile>{escape(str(code_path))}</CodeFile>\n'
209
+ initial_log_entry += f' <ExitCode>{initial_return_code}</ExitCode>\n'
210
+ initial_log_entry += f' <Output>{escape(initial_output)}</Output>\n'
211
+ initial_log_entry += '</InitialState>'
212
+ _write_log_entry(log_path, initial_log_entry)
213
+
214
+ # 3d: Call fix_verification_errors for initial assessment
215
+ try:
216
+ if verbose:
217
+ console.print("Running initial assessment with fix_verification_errors...")
218
+ # Use actual strength/temp for realistic initial assessment
219
+ initial_fix_result = fix_verification_errors(
220
+ program=initial_program_content,
221
+ prompt=prompt,
222
+ code=initial_code_content,
223
+ output=initial_output,
224
+ strength=strength,
225
+ temperature=temperature,
226
+ verbose=verbose
227
+ )
228
+ # 3e: Add cost
229
+ initial_cost = initial_fix_result.get('total_cost', 0.0)
230
+ total_cost += initial_cost
231
+ model_name = initial_fix_result.get('model_name') # Capture model name early
232
+ if verbose:
233
+ console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
234
+
235
+ # 3f: Extract initial issues
236
+ initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
237
+ stats['initial_issues'] = initial_issues_count
238
+ if verbose:
239
+ console.print(f"Initial verification issues found: {initial_issues_count}")
240
+ if initial_fix_result.get('explanation'):
241
+ console.print("Initial assessment explanation:")
242
+ console.print(initial_fix_result['explanation'])
243
+
244
+ # FIX: Add check for initial assessment error *before* checking success/budget
245
+ # Check if the fixer function returned its specific error state (None explanation/model)
246
+ if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
247
+ error_msg = "Error: Fixer returned invalid/error state during initial assessment"
248
+ console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
249
+ stats['status_message'] = error_msg
250
+ stats['final_issues'] = -1 # Indicate unknown/error state
251
+ # Write final action log for error on initial check
252
+ final_log_entry = "<FinalActions>\n"
253
+ final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
254
+ final_log_entry += "</FinalActions>"
255
+ _write_log_entry(log_path, final_log_entry)
256
+ # Return failure state
257
+ return {
258
+ "success": False,
259
+ "final_program": initial_program_content,
260
+ "final_code": initial_code_content,
261
+ "total_attempts": 0,
262
+ "total_cost": total_cost, # May be non-zero if error occurred after some cost
263
+ "model_name": model_name, # May have been set before error
264
+ "statistics": stats,
265
+ }
266
+
267
+ # 3g: Initialize best iteration tracker
268
+ # Store original paths as the 'backup' for iteration 0
269
+ best_iteration = {
270
+ 'attempt': 0, # Use 0 for initial state
271
+ 'program_backup': str(program_path), # Path to original
272
+ 'code_backup': str(code_path), # Path to original
273
+ 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
274
+ }
275
+ stats['best_iteration_num'] = 0
276
+ stats['best_iteration_issues'] = best_iteration['issues']
277
+
278
+ # 3h: Check for immediate success or budget exceeded
279
+ if initial_issues_count == 0:
280
+ console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
281
+ overall_success = True
282
+ stats['final_issues'] = 0
283
+ stats['status_message'] = 'Success on initial check'
284
+ stats['improvement_issues'] = 0
285
+ stats['improvement_percent'] = 100.0 # Reached target of 0 issues
286
+
287
+ # Write final action log for successful initial check
288
+ final_log_entry = "<FinalActions>\n"
289
+ final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
290
+ final_log_entry += "</FinalActions>"
291
+ _write_log_entry(log_path, final_log_entry)
292
+
293
+ # Step 7 (early exit): Print stats
294
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
295
+ console.print(f"Initial Issues: {stats['initial_issues']}")
296
+ console.print(f"Final Issues: {stats['final_issues']}")
297
+ console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
298
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
299
+ console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
300
+ console.print(f"Overall Status: {stats['status_message']}")
301
+ console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
302
+ console.print(f"Total Cost: ${total_cost:.6f}")
303
+ console.print(f"Model Used: {model_name or 'N/A'}")
304
+ # Step 8 (early exit): Return
305
+ return {
306
+ "success": overall_success,
307
+ "final_program": initial_program_content,
308
+ "final_code": initial_code_content,
309
+ "total_attempts": attempts, # attempts is 0
310
+ "total_cost": total_cost,
311
+ "model_name": model_name,
312
+ "statistics": stats,
313
+ }
314
+ elif total_cost >= budget:
315
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
316
+ stats['status_message'] = 'Budget exceeded on initial check'
317
+ stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
318
+
319
+ # Write final action log for budget exceeded on initial check
320
+ final_log_entry = "<FinalActions>\n"
321
+ final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
322
+ final_log_entry += "</FinalActions>"
323
+ _write_log_entry(log_path, final_log_entry)
324
+
325
+ # No changes made, return initial state
326
+ return {
327
+ "success": False,
328
+ "final_program": initial_program_content,
329
+ "final_code": initial_code_content,
330
+ "total_attempts": 0,
331
+ "total_cost": total_cost,
332
+ "model_name": model_name,
333
+ "statistics": stats,
334
+ }
335
+
336
+ except Exception as e:
337
+ console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
338
+ stats['status_message'] = f'Error during initial assessment: {e}'
339
+ # Cannot proceed without initial assessment
340
+ return {"success": False, "final_program": initial_program_content, "final_code": initial_code_content, "total_attempts": 0, "total_cost": total_cost, "model_name": model_name, "statistics": stats}
341
+
342
+
343
+ # --- Step 4: Enter the Fixing Loop ---
344
+ if verbose:
345
+ console.print("\n[bold cyan]Step 4: Starting Fixing Loop...[/bold cyan]")
346
+
347
+ # Loop while attempts < max_attempts and budget not exceeded
348
+ # Note: The loop condition checks attempts *before* incrementing for the current iteration
349
+ while attempts < max_attempts:
350
+ current_attempt = attempts + 1 # 1-based for reporting
351
+ timestamp = datetime.datetime.now().isoformat()
352
+ iteration_log_xml = f'<Iteration attempt="{current_attempt}" timestamp="{timestamp}">\n'
353
+
354
+ # 4a: Print attempt number and increment counter for attempts *started*
355
+ console.print(f"\n[bold]Attempt {current_attempt}/{max_attempts} (Cost: ${total_cost:.4f}/{budget:.4f})[/bold]")
356
+ attempts += 1 # Increment attempts counter here for iterations started
357
+
358
+ # Check budget *before* running expensive operations in the loop
359
+ if total_cost >= budget:
360
+ console.print(f"[bold yellow]Budget ${budget:.4f} already met or exceeded before starting attempt {current_attempt}. Stopping.[/bold yellow]")
361
+ # No iteration log entry needed as the iteration didn't run
362
+ stats['status_message'] = 'Budget Exceeded'
363
+ attempts -= 1 # Decrement as this attempt didn't actually run
364
+ break
365
+
366
+ # 4b: Run the program file with args
367
+ if verbose:
368
+ console.print(f"Running program: {program_path} with args: {program_args}")
369
+ return_code, program_output = _run_program(program_path, args=program_args)
370
+ iteration_log_xml += f' <ProgramExecution>\n'
371
+ iteration_log_xml += f' <ExitCode>{return_code}</ExitCode>\n'
372
+ iteration_log_xml += f' <OutputBeforeFix>{escape(program_output)}</OutputBeforeFix>\n'
373
+ iteration_log_xml += f' </ProgramExecution>\n'
374
+ if verbose:
375
+ console.print(f"Program exit code: {return_code}")
376
+ # console.print(f"Program output:\n{program_output}") # Can be long
377
+
378
+ # 4c: Read current contents (already stored in program_contents/code_contents)
379
+ # Re-read could be added here if external modification is possible, but generally not needed
380
+ # try:
381
+ # program_contents = program_path.read_text(encoding="utf-8")
382
+ # code_contents = code_path.read_text(encoding="utf-8")
383
+ # except IOError as e: ...
384
+
385
+ # 4d: Create backups
386
+ program_backup_path = program_path.with_stem(f"{program_path.stem}_iteration_{current_attempt}").with_suffix(program_path.suffix)
387
+ code_backup_path = code_path.with_stem(f"{code_path.stem}_iteration_{current_attempt}").with_suffix(code_path.suffix)
388
+ try:
389
+ # Copy from the *current* state before this iteration's fix
390
+ program_path.write_text(program_contents, encoding="utf-8") # Ensure file matches memory state
391
+ code_path.write_text(code_contents, encoding="utf-8") # Ensure file matches memory state
392
+ shutil.copy2(program_path, program_backup_path)
393
+ shutil.copy2(code_path, code_backup_path)
394
+ if verbose:
395
+ console.print(f"Created backups: {program_backup_path}, {code_backup_path}")
396
+ iteration_log_xml += f' <Backups>\n'
397
+ iteration_log_xml += f' <Program>{escape(str(program_backup_path))}</Program>\n'
398
+ iteration_log_xml += f' <Code>{escape(str(code_backup_path))}</Code>\n'
399
+ iteration_log_xml += f' </Backups>\n'
400
+ except OSError as e:
401
+ console.print(f"[bold red]Error creating backup files during attempt {current_attempt}: {e}[/bold red]")
402
+ iteration_log_xml += f' <Status>Error Creating Backups</Status>\n</Iteration>'
403
+ _write_log_entry(log_path, iteration_log_xml)
404
+ stats['status_message'] = f'Error creating backups on attempt {current_attempt}'
405
+ break # Don't proceed without backups
406
+
407
+ # 4e: Call fix_verification_errors
408
+ iteration_log_xml += f' <InputsToFixer>\n'
409
+ iteration_log_xml += f' <Program>{escape(program_contents)}</Program>\n'
410
+ iteration_log_xml += f' <Code>{escape(code_contents)}</Code>\n'
411
+ iteration_log_xml += f' <Prompt>{escape(prompt)}</Prompt>\n'
412
+ iteration_log_xml += f' <ProgramOutput>{escape(program_output)}</ProgramOutput>\n'
413
+ iteration_log_xml += f' </InputsToFixer>\n'
414
+
415
+ fix_result = {}
416
+ try:
417
+ if verbose:
418
+ console.print("Calling fix_verification_errors...")
419
+ fix_result = fix_verification_errors(
420
+ program=program_contents,
421
+ prompt=prompt,
422
+ code=code_contents,
423
+ output=program_output,
424
+ strength=strength,
425
+ temperature=temperature,
426
+ verbose=verbose # Pass verbose flag down
427
+ )
428
+
429
+ # 4f: Add cost
430
+ attempt_cost = fix_result.get('total_cost', 0.0)
431
+ total_cost += attempt_cost
432
+ model_name = fix_result.get('model_name', model_name) # Update if available
433
+ current_issues_count = fix_result.get('verification_issues_count', -1)
434
+
435
+ if verbose:
436
+ console.print(f"Fixer cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
437
+ console.print(f"Fixer issues found: {current_issues_count}")
438
+ if fix_result.get('explanation'):
439
+ console.print("Fixer explanation:")
440
+ console.print(fix_result['explanation'])
441
+
442
+
443
+ # 4g: Log fixer result
444
+ iteration_log_xml += f' <FixerResult '
445
+ iteration_log_xml += f'total_cost="{attempt_cost:.6f}" '
446
+ iteration_log_xml += f'model_name="{escape(model_name or "N/A")}" '
447
+ iteration_log_xml += f'verification_issues_count="{current_issues_count}">\n'
448
+ iteration_log_xml += f' <Explanation>{escape(str(fix_result.get("explanation", "N/A")))}</Explanation>\n'
449
+ iteration_log_xml += f' <FixedProgram>{escape(fix_result.get("fixed_program", ""))}</FixedProgram>\n'
450
+ iteration_log_xml += f' <FixedCode>{escape(fix_result.get("fixed_code", ""))}</FixedCode>\n'
451
+ iteration_log_xml += f' </FixerResult>\n'
452
+
453
+ except Exception as e:
454
+ console.print(f"[bold red]Error calling fix_verification_errors on attempt {current_attempt}: {e}[/bold red]")
455
+ iteration_log_xml += f' <Status>Error in Fixer Call: {escape(str(e))}</Status>\n</Iteration>'
456
+ _write_log_entry(log_path, iteration_log_xml)
457
+ stats['status_message'] = f'Error in fixer call on attempt {current_attempt}'
458
+ # Continue to next attempt if possible, don't break immediately
459
+ continue
460
+
461
+ # FIX: Add check for fixer returning error state (e.g., None explanation/model or specific issue count)
462
+ # We use -1 as the signal for an internal error from fix_verification_errors
463
+ if current_issues_count == -1:
464
+ error_msg = "Error: Fixer returned invalid/error state"
465
+ console.print(f"[bold red]{error_msg} on attempt {current_attempt}. Stopping.[/bold red]")
466
+ iteration_log_xml += f' <Status>{escape(error_msg)}</Status>\n</Iteration>'
467
+ _write_log_entry(log_path, iteration_log_xml)
468
+ stats['status_message'] = error_msg
469
+ overall_success = False # Ensure success is false
470
+ break # Exit loop due to fixer error
471
+
472
+ # 4h: Check budget *after* fixer call cost is added
473
+ if total_cost >= budget:
474
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded after attempt {current_attempt} (Cost: ${total_cost:.4f}). Stopping.[/bold yellow]")
475
+ iteration_log_xml += f' <Status>Budget Exceeded</Status>\n</Iteration>'
476
+ _write_log_entry(log_path, iteration_log_xml)
477
+ stats['status_message'] = 'Budget Exceeded'
478
+ # Update best iteration if this costly attempt was still the best so far
479
+ if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
480
+ if verbose:
481
+ console.print(f"[green]New best iteration found (before budget break): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
482
+ best_iteration = {
483
+ 'attempt': current_attempt,
484
+ 'program_backup': str(program_backup_path),
485
+ 'code_backup': str(code_backup_path),
486
+ 'issues': current_issues_count
487
+ }
488
+ stats['best_iteration_num'] = current_attempt
489
+ stats['best_iteration_issues'] = current_issues_count
490
+ break # Exit loop due to budget
491
+
492
+ # FIX: Moved calculation of update flags earlier
493
+ # 4j: Check if changes were suggested
494
+ fixed_program = fix_result.get('fixed_program', program_contents)
495
+ fixed_code = fix_result.get('fixed_code', code_contents)
496
+ program_updated = fixed_program != program_contents
497
+ code_updated = fixed_code != code_contents
498
+
499
+ # 4k, 4l: Log fix attempt
500
+ iteration_log_xml += f' <FixAttempted program_updated="{program_updated}" code_updated="{code_updated}"/>\n'
501
+
502
+
503
+ # FIX: Restructured logic for success check and secondary verification
504
+ secondary_verification_passed = True # Assume pass unless changes made and verification fails
505
+ changes_applied_this_iteration = False
506
+
507
+ # Run secondary verification ONLY if code was updated
508
+ if code_updated:
509
+ if verbose:
510
+ console.print("Code change suggested, running secondary verification...")
511
+ try:
512
+ # Temporarily write the proposed code change
513
+ code_path.write_text(fixed_code, encoding="utf-8")
514
+
515
+ # Run verification program
516
+ verify_ret_code, verify_output = _run_program(verification_program_path)
517
+
518
+ # Determine pass/fail (simple: exit code 0 = pass)
519
+ secondary_verification_passed = (verify_ret_code == 0)
520
+
521
+ if verbose:
522
+ console.print(f"Secondary verification exit code: {verify_ret_code}")
523
+ console.print(f"Secondary verification passed: {secondary_verification_passed}")
524
+ # console.print(f"Secondary verification output:\n{verify_output}")
525
+
526
+ passed_str = str(secondary_verification_passed).lower()
527
+ iteration_log_xml += f' <SecondaryVerification passed="{passed_str}">\n'
528
+ iteration_log_xml += f' <ExitCode>{verify_ret_code}</ExitCode>\n'
529
+ iteration_log_xml += f' <Output>{escape(verify_output)}</Output>\n'
530
+ iteration_log_xml += f' </SecondaryVerification>\n'
531
+
532
+ if not secondary_verification_passed:
533
+ console.print("[yellow]Secondary verification failed. Restoring code file.[/yellow]")
534
+ code_path.write_text(code_contents, encoding="utf-8") # Restore from memory state before this attempt
535
+
536
+ except IOError as e:
537
+ console.print(f"[bold red]Error during secondary verification I/O: {e}[/bold red]")
538
+ iteration_log_xml += f' <Status>Error during secondary verification I/O: {escape(str(e))}</Status>\n'
539
+ secondary_verification_passed = False # Treat I/O error as failure
540
+ try:
541
+ code_path.write_text(code_contents, encoding="utf-8")
542
+ except IOError:
543
+ console.print(f"[bold red]Failed to restore code file after I/O error.[/bold red]")
544
+
545
+ # Now, decide outcome based on issue count and verification status
546
+ if secondary_verification_passed:
547
+ # Update best iteration if current attempt is better
548
+ if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
549
+ if verbose:
550
+ console.print(f"[green]New best iteration found: Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
551
+ best_iteration = {
552
+ 'attempt': current_attempt,
553
+ 'program_backup': str(program_backup_path),
554
+ 'code_backup': str(code_backup_path),
555
+ 'issues': current_issues_count
556
+ }
557
+ stats['best_iteration_num'] = current_attempt
558
+ stats['best_iteration_issues'] = current_issues_count
559
+
560
+ # Apply changes (code was potentially already written for verification)
561
+ try:
562
+ if program_updated:
563
+ if verbose: console.print("Applying program changes...")
564
+ program_path.write_text(fixed_program, encoding="utf-8")
565
+ program_contents = fixed_program # Update memory state
566
+ iteration_log_xml += f' <Action>Applied program changes.</Action>\n'
567
+ changes_applied_this_iteration = True
568
+ if code_updated:
569
+ # Code already written if verification ran; update memory state
570
+ code_contents = fixed_code
571
+ iteration_log_xml += f' <Action>Kept modified code (passed secondary verification).</Action>\n'
572
+ changes_applied_this_iteration = True
573
+
574
+ if changes_applied_this_iteration:
575
+ # FIX: Revert status to match original tests where applicable
576
+ iteration_log_xml += f' <Status>Changes Applied (Secondary Verification Passed or Not Needed)</Status>\n'
577
+ else:
578
+ # This case happens if verification passed but neither program nor code changed
579
+ iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n'
580
+
581
+ # Check for SUCCESS condition HERE
582
+ if current_issues_count == 0:
583
+ console.print(f"[bold green]Success! 0 verification issues found after attempt {current_attempt} and secondary verification passed.[/bold green]")
584
+ overall_success = True
585
+ stats['final_issues'] = 0
586
+ stats['status_message'] = f'Success on attempt {current_attempt}'
587
+ iteration_log_xml += '</Iteration>'
588
+ _write_log_entry(log_path, iteration_log_xml)
589
+ break # Exit loop on verified success
590
+
591
+ except IOError as e:
592
+ console.print(f"[bold red]Error writing applied changes: {e}[/bold red]")
593
+ iteration_log_xml += f' <Action>Error writing applied changes: {escape(str(e))}</Action>\n'
594
+ iteration_log_xml += f' <Status>Error Applying Changes</Status>\n'
595
+ # Continue loop if possible
596
+
597
+ else: # Secondary verification failed
598
+ iteration_log_xml += f' <Action>Changes Discarded Due To Secondary Verification Failure</Action>\n'
599
+ iteration_log_xml += f' <Status>Changes Discarded</Status>\n'
600
+ # Memory state (program_contents, code_contents) remains unchanged from start of iteration
601
+
602
+ # Check if loop should terminate due to no changes suggested when issues > 0
603
+ # FIX: Adjust condition - break if secondary verification PASSED but resulted in NO effective changes
604
+ # AND issues still remain. This avoids breaking early if verification FAILED (handled above).
605
+ if secondary_verification_passed and not changes_applied_this_iteration and current_issues_count > 0:
606
+ # FIX: Adjust status message for clarity
607
+ console.print(f"[yellow]No effective changes suggested by the fixer on attempt {current_attempt} despite issues remaining ({current_issues_count}). Stopping.[/yellow]")
608
+ iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n' # Reuse status
609
+ # FIX: Ensure status message matches test expectation when breaking here
610
+ stats['status_message'] = f'No effective changes suggested on attempt {current_attempt}'
611
+ # Update best iteration if this attempt was still the best so far
612
+ if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
613
+ if verbose:
614
+ console.print(f"[green]New best iteration found (despite no effective changes): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
615
+ best_iteration = {
616
+ 'attempt': current_attempt,
617
+ 'program_backup': str(program_backup_path),
618
+ 'code_backup': str(code_backup_path),
619
+ 'issues': current_issues_count
620
+ }
621
+ stats['best_iteration_num'] = current_attempt
622
+ stats['best_iteration_issues'] = current_issues_count
623
+
624
+ overall_success = False # Ensure success is False
625
+ iteration_log_xml += '</Iteration>'
626
+ _write_log_entry(log_path, iteration_log_xml)
627
+ break # Exit loop
628
+
629
+
630
+ # Append iteration log (if not already done on success break or no-change break)
631
+ iteration_log_xml += '</Iteration>'
632
+ _write_log_entry(log_path, iteration_log_xml)
633
+
634
+ # Small delay to avoid hitting rate limits if applicable
635
+ time.sleep(0.5)
636
+
637
+ # --- End of Loop ---
638
+
639
+ # --- Step 5: Determine Final State ---
640
+ if verbose:
641
+ console.print("\n[bold cyan]Step 5: Determining Final State...[/bold cyan]")
642
+
643
+ final_log_entry = "<FinalActions>\n"
644
+
645
+ if not overall_success:
646
+ # Determine reason for loop exit if not already set by break conditions
647
+ # FIX: Ensure status message isn't overwritten if already set by break condition
648
+ exit_reason_determined = stats['status_message'] not in ['Initialization', '']
649
+ if not exit_reason_determined:
650
+ if attempts == max_attempts:
651
+ console.print(f"[bold yellow]Maximum attempts ({max_attempts}) reached.[/bold yellow]")
652
+ stats['status_message'] = f'Max attempts ({max_attempts}) reached'
653
+ final_log_entry += f' <Action>Max attempts ({max_attempts}) reached.</Action>\n'
654
+ else:
655
+ # Loop likely exited due to an unexpected break or condition not setting status
656
+ stats['status_message'] = 'Loop finished without success for unknown reason'
657
+ final_log_entry += f' <Action>Loop finished without reaching success state ({escape(stats["status_message"])}).</Action>\n'
658
+ elif stats['status_message'] == 'Budget Exceeded':
659
+ final_log_entry += f' <Action>Loop stopped due to budget.</Action>\n'
660
+ elif stats['status_message'].startswith('No changes suggested') or stats['status_message'].startswith('No effective changes'):
661
+ final_log_entry += f' <Action>Loop stopped as no changes were suggested.</Action>\n'
662
+ elif stats['status_message'].startswith('Error'):
663
+ final_log_entry += f' <Action>Loop stopped due to error: {escape(stats["status_message"])}</Action>\n'
664
+ # else: status already set by a break condition inside loop
665
+
666
+
667
+ # 5b: Restore best iteration if one exists and is better than initial
668
+ # Check if best_iteration recorded is actually better than initial state
669
+ # And ensure it's not the initial state itself (attempt > 0)
670
+ initial_issues_val = stats['initial_issues'] if stats['initial_issues'] != -1 else float('inf')
671
+ if best_iteration['attempt'] > 0 and best_iteration['issues'] < initial_issues_val:
672
+ console.print(f"[yellow]Restoring state from best iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})[/yellow]")
673
+ final_log_entry += f' <Action>Restored Best Iteration {best_iteration["attempt"]} (Issues: {best_iteration["issues"]})</Action>\n'
674
+ stats['status_message'] += f' - Restored best iteration {best_iteration["attempt"]}'
675
+ try:
676
+ best_program_path = Path(best_iteration['program_backup'])
677
+ best_code_path = Path(best_iteration['code_backup'])
678
+ if best_program_path.is_file() and best_code_path.is_file():
679
+ # Read content from backup before copying to handle potential race conditions if needed
680
+ restored_program_content = best_program_path.read_text(encoding='utf-8')
681
+ restored_code_content = best_code_path.read_text(encoding='utf-8')
682
+ program_path.write_text(restored_program_content, encoding='utf-8')
683
+ code_path.write_text(restored_code_content, encoding='utf-8')
684
+ program_contents = restored_program_content # Update memory state
685
+ code_contents = restored_code_content # Update memory state
686
+ if verbose:
687
+ console.print(f"Restored {program_path} from {best_program_path}")
688
+ console.print(f"Restored {code_path} from {best_code_path}")
689
+ # Final issues count is the best achieved count
690
+ stats['final_issues'] = best_iteration['issues']
691
+ else:
692
+ console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
693
+ final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
694
+ stats['status_message'] += ' - Error restoring best iteration (files missing)'
695
+ # Keep the last state, final issues remain unknown or last attempted
696
+ stats['final_issues'] = -1 # Indicate uncertainty
697
+
698
+ except (OSError, IOError) as e:
699
+ console.print(f"[bold red]Error restoring files from best iteration {best_iteration['attempt']}: {e}[/bold red]")
700
+ final_log_entry += f' <Error>Error restoring files from best iteration {best_iteration["attempt"]}: {escape(str(e))}</Error>\n'
701
+ stats['status_message'] += f' - Error restoring best iteration: {e}'
702
+ stats['final_issues'] = -1 # Indicate uncertainty
703
+
704
+ # If no improvement was made or recorded (best is still initial state or worse)
705
+ elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
706
+ console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
707
+ final_log_entry += f' <Action>No improvement found or recorded; restoring original state.</Action>\n'
708
+ stats['final_issues'] = stats['initial_issues'] # Final issues are same as initial
709
+ # Add restoration info to status message if not already implied
710
+ if 'keeping original state' not in stats['status_message']:
711
+ stats['status_message'] += ' - keeping original state'
712
+ # Ensure original files are restored if they were modified in a failed attempt
713
+ try:
714
+ # Only write if current memory state differs from initial
715
+ if program_contents != initial_program_content:
716
+ program_path.write_text(initial_program_content, encoding='utf-8')
717
+ program_contents = initial_program_content
718
+ if code_contents != initial_code_content:
719
+ code_path.write_text(initial_code_content, encoding='utf-8')
720
+ code_contents = initial_code_content
721
+ except IOError as e:
722
+ console.print(f"[bold red]Error restoring initial files: {e}[/bold red]")
723
+ final_log_entry += f' <Error>Error restoring initial files: {escape(str(e))}</Error>\n'
724
+ stats['status_message'] += f' - Error restoring initial files: {e}'
725
+ stats['final_issues'] = -1 # State uncertain
726
+ # Set final issues if not set by restoration logic (e.g., error during restore)
727
+ if stats['final_issues'] == -1 and stats['initial_issues'] != -1:
728
+ stats['final_issues'] = stats['initial_issues'] # Default to initial if unsure
729
+
730
+
731
+ else: # overall_success is True
732
+ final_log_entry += f' <Action>Process finished successfully.</Action>\n'
733
+ stats['final_issues'] = 0 # Success means 0 issues
734
+
735
+ final_log_entry += "</FinalActions>"
736
+ _write_log_entry(log_path, final_log_entry)
737
+
738
+ # --- Step 6: Read Final Contents ---
739
+ # Use the in-memory contents which should reflect the final state after potential restoration
740
+ if verbose:
741
+ console.print("\n[bold cyan]Step 6: Using Final In-Memory File Contents...[/bold cyan]")
742
+ final_program_content = program_contents
743
+ final_code_content = code_contents
744
+ # Optionally re-read from disk for verification, but memory should be source of truth
745
+ # try:
746
+ # final_program_content_disk = program_path.read_text(encoding="utf-8")
747
+ # final_code_content_disk = code_path.read_text(encoding="utf-8")
748
+ # if final_program_content != final_program_content_disk or final_code_content != final_code_content_disk:
749
+ # console.print("[bold red]Warning: Final file content on disk differs from expected state![/bold red]")
750
+ # # Decide whether to trust disk or memory
751
+ # except IOError as e:
752
+ # console.print(f"[bold red]Error reading final program/code files for verification: {e}[/bold red]")
753
+ # stats['status_message'] += ' - Error reading final files for verification'
754
+
755
+
756
+ # --- Step 7: Calculate and Print Summary Statistics ---
757
+ if verbose:
758
+ console.print("\n[bold cyan]Step 7: Calculating Final Statistics...[/bold cyan]")
759
+
760
+ initial_known = stats['initial_issues'] != -1
761
+ final_known = stats['final_issues'] != -1
762
+
763
+ if initial_known and final_known:
764
+ if stats['initial_issues'] > 0:
765
+ if stats['final_issues'] == 0: # Successful fix
766
+ stats['improvement_issues'] = stats['initial_issues']
767
+ stats['improvement_percent'] = 100.0
768
+ elif stats['final_issues'] < stats['initial_issues']: # Partial improvement
769
+ stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
770
+ # % improvement towards reaching 0
771
+ stats['improvement_percent'] = (stats['improvement_issues'] / stats['initial_issues']) * 100.0
772
+ else: # No improvement or regression
773
+ stats['improvement_issues'] = 0 # Can be negative if regression occurred
774
+ stats['improvement_percent'] = 0.0 # Or negative? Let's cap at 0.
775
+ if stats['final_issues'] > stats['initial_issues']:
776
+ stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues'] # Negative value
777
+ # Percentage calculation might be misleading here, stick to 0% improvement towards goal.
778
+ elif stats['initial_issues'] == 0: # Started perfect
779
+ stats['improvement_issues'] = 0
780
+ stats['improvement_percent'] = 100.0 # Already at target
781
+ if stats['final_issues'] > 0: # Regression occurred during loop?
782
+ stats['improvement_issues'] = -stats['final_issues']
783
+ stats['improvement_percent'] = 0.0 # No longer at target
784
+ overall_success = False # Ensure success is false if regression happened after initial success
785
+ if 'Success on initial check' in stats['status_message']: # Update status if loop ran after initial success
786
+ stats['status_message'] = f'Regression occurred after initial success - Final Issues: {stats["final_issues"]}'
787
+ # else: initial_issues < 0 (should not happen if known)
788
+ # stats['improvement_issues'] = 'N/A'
789
+ # stats['improvement_percent'] = 'N/A'
790
+ else: # Initial or final state unknown
791
+ stats['improvement_issues'] = 'N/A'
792
+ stats['improvement_percent'] = 'N/A'
793
+ if final_known and stats['final_issues'] == 0:
794
+ overall_success = True # Assume success if final is 0, even if initial unknown
795
+ else:
796
+ overall_success = False # Cannot guarantee success if initial/final unknown
797
+
798
+
799
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
800
+ console.print(f"Initial Issues: {stats['initial_issues'] if initial_known else 'Unknown'}")
801
+ console.print(f"Final Issues: {stats['final_issues'] if final_known else 'Unknown'}")
802
+ best_iter_num_str = stats['best_iteration_num'] if stats['best_iteration_num'] != -1 else 'N/A'
803
+ best_iter_iss_str = stats['best_iteration_issues'] if stats['best_iteration_issues'] != float('inf') else 'N/A'
804
+ console.print(f"Best Iteration Found: {best_iter_num_str} (Issues: {best_iter_iss_str})")
805
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
806
+ improvement_percent_str = f"{stats['improvement_percent']:.2f}%" if isinstance(stats['improvement_percent'], float) else stats['improvement_percent']
807
+ console.print(f"Improvement (Percent Towards 0 Issues): {improvement_percent_str}")
808
+ console.print(f"Overall Status: {stats['status_message']}")
809
+ console.print(f"Total Attempts Made: {attempts}") # Now reflects loop iterations started
810
+ console.print(f"Total Cost: ${total_cost:.6f}")
811
+ console.print(f"Model Used: {model_name or 'N/A'}")
812
+
813
+ # --- Step 8: Return Results ---
814
+ # Ensure final success status matches reality (e.g., if regression occurred)
815
+ if final_known and stats['final_issues'] != 0:
816
+ overall_success = False
817
+
818
+ return {
819
+ "success": overall_success,
820
+ "final_program": final_program_content,
821
+ "final_code": final_code_content,
822
+ "total_attempts": attempts, # Return the number of loop iterations started
823
+ "total_cost": total_cost,
824
+ "model_name": model_name,
825
+ "statistics": stats,
826
+ }
827
+
828
+ # Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
829
+ if __name__ == "__main__":
830
+ # Create dummy files for demonstration
831
+ # In a real scenario, these files would exist and contain actual code/programs.
832
+ console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
833
+ temp_dir = Path("./temp_fix_verification_loop")
834
+ temp_dir.mkdir(exist_ok=True)
835
+
836
+ program_file = temp_dir / "my_program.py"
837
+ code_file = temp_dir / "my_code_module.py"
838
+ verification_program_file = temp_dir / "verify_syntax.py"
839
+
840
+ program_file.write_text("""
841
+ import my_code_module
842
+ import sys
843
+ # Simulate using the module and checking output
844
+ val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
845
+ result = my_code_module.process(val)
846
+ expected = val * 2
847
+ print(f"Input: {val}")
848
+ print(f"Result: {result}")
849
+ print(f"Expected: {expected}")
850
+ if result == expected:
851
+ print("VERIFICATION_SUCCESS")
852
+ else:
853
+ print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
854
+ """, encoding="utf-8")
855
+
856
+ # Initial code with a bug
857
+ code_file.write_text("""
858
+ # my_code_module.py
859
+ def process(x):
860
+ # Bug: should be x * 2
861
+ return x + 2
862
+ """, encoding="utf-8")
863
+
864
+ # Simple verification program (e.g., syntax check)
865
+ verification_program_file.write_text("""
866
+ import sys
867
+ import py_compile
868
+ import os
869
+ # Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
870
+ code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
871
+ print(f"Checking syntax of: {code_to_check}")
872
+ try:
873
+ py_compile.compile(code_to_check, doraise=True)
874
+ print("Syntax OK.")
875
+ sys.exit(0) # Success
876
+ except py_compile.PyCompileError as e:
877
+ print(f"Syntax Error: {e}")
878
+ sys.exit(1) # Failure
879
+ except Exception as e:
880
+ print(f"Verification Error: {e}")
881
+ sys.exit(1) # Failure
882
+ """, encoding="utf-8")
883
+ # Set environment variable for the verification script
884
+ os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
885
+
886
+
887
+ # --- Mock fix_verification_errors ---
888
+ # This is crucial for testing without actual LLM calls / costs
889
+ # In a real test suite, use unittest.mock
890
+ _original_fix_verification_errors = fix_verification_errors
891
+ _call_count = 0
892
+
893
+ def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
894
+ global _call_count
895
+ _call_count += 1
896
+ cost = 0.001 * _call_count # Simulate increasing cost
897
+ model = "mock_model_v1"
898
+ explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
899
+ issues_count = 1 # Assume 1 issue initially
900
+
901
+ fixed_program = program # Assume program doesn't need fixing
902
+ fixed_code = code
903
+
904
+ # Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
905
+ if "VERIFICATION_FAILURE" in output and _call_count >= 2:
906
+ explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
907
+ fixed_code = """
908
+ # my_code_module.py
909
+ def process(x):
910
+ # Fixed: should be x * 2
911
+ return x * 2
912
+ """
913
+ issues_count = 0 # Fixed!
914
+ elif "VERIFICATION_SUCCESS" in output:
915
+ explanation = ["Output indicates VERIFICATION_SUCCESS."]
916
+ issues_count = 0 # Already correct
917
+
918
+ return {
919
+ 'explanation': explanation,
920
+ 'fixed_program': fixed_program,
921
+ 'fixed_code': fixed_code,
922
+ 'total_cost': cost,
923
+ 'model_name': model,
924
+ 'verification_issues_count': issues_count,
925
+ }
926
+
927
+ # Replace the real function with the mock
928
+ # In package context, you might need to patch differently
929
+ # For this script execution:
930
+ # Note: This direct replacement might not work if the function is imported
931
+ # using `from .fix_verification_errors import fix_verification_errors`.
932
+ # A proper mock framework (`unittest.mock.patch`) is better.
933
+ # Let's assume for this example run, we can modify the global scope *before* the loop calls it.
934
+ # This is fragile. A better approach involves dependency injection or mocking frameworks.
935
+ # HACK: Re-assigning the imported name in the global scope of this script
936
+ globals()['fix_verification_errors'] = mock_fix_verification_errors
937
+
938
+
939
+ console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
940
+
941
+ # Example program_args: Pass input value 10 and another arg 5
942
+ # Note: The example program only uses the first arg sys.argv[1]
943
+ example_args = ["10", "another_arg"]
944
+
945
+ results = fix_verification_errors_loop(
946
+ program_file=str(program_file),
947
+ code_file=str(code_file),
948
+ prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
949
+ verification_program=str(verification_program_file),
950
+ strength=0.5,
951
+ temperature=0.1,
952
+ max_attempts=3,
953
+ budget=0.10, # Set a budget
954
+ verification_log_file=str(temp_dir / "test_verification.log"),
955
+ verbose=True,
956
+ program_args=example_args
957
+ )
958
+
959
+ console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
960
+ console.print(f"Success: {results['success']}")
961
+ console.print(f"Total Attempts: {results['total_attempts']}")
962
+ console.print(f"Total Cost: ${results['total_cost']:.6f}")
963
+ console.print(f"Model Name: {results['model_name']}")
964
+ # console.print(f"Final Program:\n{results['final_program']}") # Can be long
965
+ console.print(f"Final Code:\n{results['final_code']}")
966
+ console.print(f"Statistics:\n{results['statistics']}")
967
+
968
+ # Restore original function if needed elsewhere
969
+ globals()['fix_verification_errors'] = _original_fix_verification_errors
970
+
971
+ # Clean up dummy files
972
+ # console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
973
+ # shutil.rmtree(temp_dir)
974
+ console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
975
+ console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")