pdd-cli 0.0.24__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

Files changed (43) hide show
  1. pdd/__init__.py +7 -1
  2. pdd/bug_main.py +5 -1
  3. pdd/bug_to_unit_test.py +16 -5
  4. pdd/change.py +2 -1
  5. pdd/change_main.py +407 -189
  6. pdd/cli.py +853 -301
  7. pdd/code_generator.py +2 -1
  8. pdd/conflicts_in_prompts.py +2 -1
  9. pdd/construct_paths.py +377 -222
  10. pdd/context_generator.py +2 -1
  11. pdd/continue_generation.py +3 -2
  12. pdd/crash_main.py +55 -20
  13. pdd/detect_change.py +2 -1
  14. pdd/fix_code_loop.py +465 -160
  15. pdd/fix_code_module_errors.py +7 -4
  16. pdd/fix_error_loop.py +9 -9
  17. pdd/fix_errors_from_unit_tests.py +207 -365
  18. pdd/fix_main.py +31 -4
  19. pdd/fix_verification_errors.py +60 -34
  20. pdd/fix_verification_errors_loop.py +842 -768
  21. pdd/fix_verification_main.py +412 -0
  22. pdd/generate_output_paths.py +427 -189
  23. pdd/generate_test.py +3 -2
  24. pdd/increase_tests.py +2 -2
  25. pdd/llm_invoke.py +14 -3
  26. pdd/preprocess.py +3 -3
  27. pdd/process_csv_change.py +466 -154
  28. pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
  29. pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
  30. pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
  31. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
  32. pdd/prompts/generate_test_LLM.prompt +9 -3
  33. pdd/prompts/update_prompt_LLM.prompt +3 -3
  34. pdd/split.py +6 -5
  35. pdd/split_main.py +13 -4
  36. pdd/trace_main.py +7 -0
  37. pdd/xml_tagger.py +2 -1
  38. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/METADATA +4 -4
  39. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/RECORD +43 -42
  40. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/WHEEL +1 -1
  41. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/entry_points.txt +0 -0
  42. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/licenses/LICENSE +0 -0
  43. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.25.dist-info}/top_level.txt +0 -0
@@ -1,161 +1,80 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Module for iteratively fixing code verification errors using LLMs.
4
- """
5
-
6
1
  import os
7
- import subprocess
8
2
  import shutil
9
- import time
3
+ import subprocess
10
4
  import datetime
11
- import xml.etree.ElementTree as ET
12
- from xml.dom import minidom
13
- import tempfile
14
- from typing import Dict, Any, Tuple, Optional
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Dict, Tuple, Any, Optional
8
+ from xml.sax.saxutils import escape
15
9
 
16
- # Use Rich for pretty console output
17
10
  from rich.console import Console
18
- from rich.panel import Panel
19
- from rich.syntax import Syntax
20
- from rich.text import Text
21
11
 
22
- # --- Internal Module Imports ---
23
- # Attempt relative import for package structure
12
+ # Use relative import assuming fix_verification_errors is in the same package
24
13
  try:
14
+ # Attempt relative import for package context
25
15
  from .fix_verification_errors import fix_verification_errors
26
- from .utils import ensure_dir_exists # Assuming a utility function exists
27
16
  except ImportError:
28
- # Fallback for standalone execution or different structure
29
- # This might indicate a setup issue if running as part of the package
30
- print("Warning: Could not perform relative import. Falling back.")
31
- # If fix_verification_errors is in the same directory or PYTHONPATH:
17
+ # Fallback for direct script execution (e.g., testing)
18
+ # This assumes 'pdd' package structure exists relative to the script
32
19
  try:
33
- from fix_verification_errors import fix_verification_errors
34
- except ImportError as e:
20
+ from pdd.fix_verification_errors import fix_verification_errors
21
+ except ImportError:
35
22
  raise ImportError(
36
23
  "Could not import 'fix_verification_errors'. "
37
- "Ensure it's in the correct path or package structure."
38
- ) from e
39
- # Define a dummy ensure_dir_exists if not available
40
- def ensure_dir_exists(file_path: str):
41
- """Ensure the directory for the given file path exists."""
42
- directory = os.path.dirname(file_path)
43
- if directory and not os.path.exists(directory):
44
- os.makedirs(directory)
45
-
46
- # Initialize Rich Console
47
- console = Console()
24
+ "Ensure it's available via relative import or in the 'pdd' package."
25
+ )
48
26
 
49
- # --- Helper Functions ---
27
+ # Initialize Rich Console for pretty printing
28
+ console = Console()
50
29
 
51
- def _run_subprocess(command: list[str], cwd: Optional[str] = None) -> Tuple[bool, str, int]:
30
+ def _run_program(
31
+ program_path: Path,
32
+ args: Optional[list[str]] = None,
33
+ timeout: int = 60
34
+ ) -> Tuple[int, str]:
52
35
  """
53
- Runs a subprocess command and captures its output.
36
+ Runs a Python program using subprocess, capturing combined stdout and stderr.
54
37
 
55
38
  Args:
56
- command: A list of strings representing the command and its arguments.
57
- cwd: The working directory to run the command in.
39
+ program_path: Path to the Python program to run.
40
+ args: Optional list of command-line arguments for the program.
41
+ timeout: Timeout in seconds for the subprocess.
58
42
 
59
43
  Returns:
60
- A tuple containing:
61
- - success (bool): True if the command exited with code 0, False otherwise.
62
- - output (str): The combined stdout and stderr of the command.
63
- - return_code (int): The exit code of the command.
44
+ A tuple containing the return code (int) and the combined output (str).
45
+ Returns (-1, error_message) if the program is not found or other execution error occurs.
64
46
  """
47
+ if not program_path.is_file():
48
+ return -1, f"Error: Program file not found at {program_path}"
49
+
50
+ command = ["python", str(program_path)]
51
+ if args:
52
+ command.extend(args)
53
+
65
54
  try:
66
- process = subprocess.run(
55
+ result = subprocess.run(
67
56
  command,
68
57
  capture_output=True,
69
58
  text=True,
70
- check=False, # Don't raise exception on non-zero exit
71
- cwd=cwd,
72
- encoding='utf-8',
73
- errors='replace' # Handle potential encoding errors
59
+ timeout=timeout,
60
+ check=False, # Don't raise exception for non-zero exit codes
74
61
  )
75
- output = process.stdout + process.stderr
76
- success = process.returncode == 0
77
- return success, output.strip(), process.returncode
78
- except FileNotFoundError:
79
- error_msg = f"Error: Command not found: '{command[0]}'. Please ensure it's installed and in PATH."
80
- console.print(f"[bold red]{error_msg}[/bold red]")
81
- return False, error_msg, -1 # Use -1 to indicate execution failure
82
- except Exception as e:
83
- error_msg = f"Error running subprocess {' '.join(command)}: {e}"
84
- console.print(f"[bold red]{error_msg}[/bold red]")
85
- return False, error_msg, -1
86
-
87
- def _read_file(file_path: str) -> Optional[str]:
88
- """Reads the content of a file."""
89
- try:
90
- with open(file_path, 'r', encoding='utf-8') as f:
91
- return f.read()
62
+ combined_output = result.stdout + result.stderr
63
+ return result.returncode, combined_output
92
64
  except FileNotFoundError:
93
- console.print(f"[bold red]Error: File not found: {file_path}[/bold red]")
94
- return None
95
- except Exception as e:
96
- console.print(f"[bold red]Error reading file {file_path}: {e}[/bold red]")
97
- return None
98
-
99
- def _write_file(file_path: str, content: str) -> bool:
100
- """Writes content to a file."""
101
- try:
102
- ensure_dir_exists(file_path)
103
- with open(file_path, 'w', encoding='utf-8') as f:
104
- f.write(content)
105
- return True
65
+ return -1, f"Error: Python interpreter not found or '{program_path}' not found."
66
+ except subprocess.TimeoutExpired:
67
+ return -1, f"Error: Program execution timed out after {timeout} seconds."
106
68
  except Exception as e:
107
- console.print(f"[bold red]Error writing file {file_path}: {e}[/bold red]")
108
- return False
109
-
110
- def _create_backup(file_path: str, iteration: int) -> Optional[str]:
111
- """Creates a backup copy of a file."""
112
- if not os.path.exists(file_path):
113
- console.print(f"[yellow]Warning: Cannot backup non-existent file: {file_path}[/yellow]")
114
- return None
115
- try:
116
- base, ext = os.path.splitext(file_path)
117
- backup_path = f"{base}_iteration_{iteration}{ext}"
118
- shutil.copy2(file_path, backup_path) # copy2 preserves metadata
119
- return backup_path
120
- except Exception as e:
121
- console.print(f"[bold red]Error creating backup for {file_path}: {e}[/bold red]")
122
- return None
123
-
124
- def _restore_backup(backup_path: str, original_path: str) -> bool:
125
- """Restores a file from its backup."""
126
- if not backup_path or not os.path.exists(backup_path):
127
- console.print(f"[bold red]Error: Backup file not found: {backup_path}[/bold red]")
128
- return False
129
- try:
130
- shutil.copy2(backup_path, original_path)
131
- return True
132
- except Exception as e:
133
- console.print(f"[bold red]Error restoring {original_path} from {backup_path}: {e}[/bold red]")
134
- return False
69
+ return -1, f"Error: An unexpected error occurred while running the program: {e}"
135
70
 
136
- def _append_log_entry(log_file: str, root_element: ET.Element, entry_element: ET.Element):
137
- """Appends an XML element to the log file."""
71
+ def _write_log_entry(log_file_path: Path, xml_content: str):
72
+ """Appends XML content to the log file."""
138
73
  try:
139
- ensure_dir_exists(log_file)
140
- root_element.append(entry_element)
141
- # Use minidom for pretty printing XML
142
- rough_string = ET.tostring(root_element, 'utf-8')
143
- reparsed = minidom.parseString(rough_string)
144
- pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
145
-
146
- with open(log_file, 'wb') as f: # Write bytes for encoded XML
147
- f.write(pretty_xml)
148
- except Exception as e:
149
- console.print(f"[bold red]Error writing to XML log file {log_file}: {e}[/bold red]")
150
-
151
- def _create_cdata_element(parent: ET.Element, tag_name: str, content: Optional[str]):
152
- """Creates an XML element with CDATA content."""
153
- element = ET.SubElement(parent, tag_name)
154
- # Use a placeholder if content is None or empty to ensure valid XML structure
155
- element.text = ET.CDATA(content if content is not None else "")
156
-
157
-
158
- # --- Main Function ---
74
+ with open(log_file_path, "a", encoding="utf-8") as f:
75
+ f.write(xml_content + "\n")
76
+ except IOError as e:
77
+ console.print(f"[bold red]Error writing to log file {log_file_path}: {e}[/bold red]")
159
78
 
160
79
  def fix_verification_errors_loop(
161
80
  program_file: str,
@@ -166,736 +85,891 @@ def fix_verification_errors_loop(
166
85
  temperature: float,
167
86
  max_attempts: int,
168
87
  budget: float,
169
- verification_log_file: str = "verification_log.xml",
170
- verbose: bool = False
88
+ verification_log_file: str = "verification.log",
89
+ verbose: bool = False,
90
+ program_args: Optional[list[str]] = None,
171
91
  ) -> Dict[str, Any]:
172
92
  """
173
- Attempts to fix errors in a code file iteratively based on program execution.
93
+ Attempts to fix errors in a code file based on program execution output
94
+ against the prompt's intent, iterating multiple times with secondary verification.
174
95
 
175
96
  Args:
176
- program_file: Path to the Python program file that exercises the code_file.
97
+ program_file: Path to the Python program exercising the code.
177
98
  code_file: Path to the code file being tested/verified.
178
- prompt: The prompt that generated the code under test.
179
- verification_program: Path to a secondary Python program for basic verification.
180
- strength: LLM strength parameter (0.0 to 1.0).
181
- temperature: LLM temperature parameter (>= 0.0).
99
+ prompt: The prompt defining the intended behavior.
100
+ verification_program: Path to a secondary program to verify code changes.
101
+ strength: LLM model strength (0.0 to 1.0).
102
+ temperature: LLM temperature (0.0 to 1.0).
182
103
  max_attempts: Maximum number of fix attempts.
183
- budget: Maximum allowed cost for LLM calls.
184
- verification_log_file: Path for detailed XML logging.
185
- verbose: Enable detailed console logging.
104
+ budget: Maximum allowed cost in USD.
105
+ verification_log_file: Path for detailed XML logging (default: "verification.log").
106
+ verbose: Enable verbose logging (default: False).
107
+ program_args: Optional list of command-line arguments for the program_file.
186
108
 
187
109
  Returns:
188
110
  A dictionary containing:
189
- - 'success': bool - True if the code was successfully fixed.
190
- - 'final_program': str - Contents of the final program file.
191
- - 'final_code': str - Contents of the final code file.
192
- - 'total_attempts': int - Number of fix attempts made.
193
- - 'total_cost': float - Total cost incurred.
194
- - 'model_name': str | None - Name of the LLM model used (last successful call).
195
- - 'statistics': dict - Detailed statistics about the process.
111
+ 'success': bool - Whether the code was successfully fixed.
112
+ 'final_program': str - Contents of the final program file.
113
+ 'final_code': str - Contents of the final code file.
114
+ 'total_attempts': int - Number of fix attempts made (loop iterations started).
115
+ 'total_cost': float - Total cost of LLM calls.
116
+ 'model_name': str | None - Name of the LLM model used.
117
+ 'statistics': dict - Detailed statistics about the process.
196
118
  """
197
- console.print(Panel(f"Starting Verification Fix Loop for [cyan]{code_file}[/cyan]", title="[bold blue]Process Start[/bold blue]", expand=False))
198
-
199
- # --- Step 1: Initialize Log File ---
200
- if os.path.exists(verification_log_file):
201
- try:
202
- os.remove(verification_log_file)
119
+ program_path = Path(program_file).resolve()
120
+ code_path = Path(code_file).resolve()
121
+ verification_program_path = Path(verification_program).resolve()
122
+ log_path = Path(verification_log_file).resolve()
123
+
124
+ # --- Validate Inputs ---
125
+ if not program_path.is_file():
126
+ console.print(f"[bold red]Error: Program file not found: {program_path}[/bold red]")
127
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
128
+ if not code_path.is_file():
129
+ console.print(f"[bold red]Error: Code file not found: {code_path}[/bold red]")
130
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
131
+ if not verification_program_path.is_file():
132
+ console.print(f"[bold red]Error: Verification program not found: {verification_program_path}[/bold red]")
133
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
134
+ if not 0.0 <= strength <= 1.0:
135
+ console.print(f"[bold red]Error: Strength must be between 0.0 and 1.0.[/bold red]")
136
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
137
+ if not 0.0 <= temperature <= 1.0:
138
+ console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
139
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
140
+ # Prompt requires positive max_attempts
141
+ if max_attempts <= 0:
142
+ console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
143
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
144
+ if budget < 0:
145
+ console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
146
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
147
+
148
+
149
+ # Step 1: Remove existing verification log file
150
+ try:
151
+ if log_path.exists():
152
+ os.remove(log_path)
203
153
  if verbose:
204
- console.print(f"Removed existing log file: {verification_log_file}")
205
- except OSError as e:
206
- console.print(f"[bold red]Error removing existing log file {verification_log_file}: {e}[/bold red]")
207
- # Continue execution, but logging might be appended or fail later
208
- log_root = ET.Element("VerificationLog")
209
- log_root.set("startTime", datetime.datetime.now().isoformat())
154
+ console.print(f"Removed existing log file: {log_path}")
155
+ except OSError as e:
156
+ console.print(f"[bold red]Error removing log file {log_path}: {e}[/bold red]")
157
+ # Continue execution, but logging might fail
210
158
 
211
- # --- Step 2: Initialize Variables ---
212
- attempts = 0
159
+ # Step 2: Initialize variables
160
+ attempts = 0 # Counter for loop iterations started
213
161
  total_cost = 0.0
214
162
  model_name: Optional[str] = None
215
163
  overall_success = False
216
- last_fix_result: Optional[Dict[str, Any]] = None # Store the result of the last fix attempt
217
-
218
- # Best iteration tracker: Stores the state with the minimum verified issues
219
164
  best_iteration = {
220
- 'attempt': -1, # -1 means initial state, 0+ for loop iterations
221
- 'issues': float('inf'),
222
- 'program_backup_path': None,
223
- 'code_backup_path': None,
224
- 'model_name': None,
165
+ 'attempt': -1, # 0 represents initial state
166
+ 'program_backup': None,
167
+ 'code_backup': None,
168
+ 'issues': float('inf')
225
169
  }
226
-
227
- # Statistics tracker
228
170
  stats = {
229
- 'initial_issues': -1, # -1 indicates not yet determined
171
+ 'initial_issues': -1,
230
172
  'final_issues': -1,
231
- 'best_iteration_attempt': -1,
173
+ 'best_iteration_num': -1,
232
174
  'best_iteration_issues': float('inf'),
233
175
  'improvement_issues': 0,
234
- 'overall_success_flag': False,
235
- 'exit_reason': "Unknown",
176
+ 'improvement_percent': 0.0,
177
+ 'status_message': 'Initialization',
236
178
  }
237
-
238
- # --- Input Validation ---
239
- if not os.path.isfile(program_file):
240
- console.print(f"[bold red]Error: Program file not found: {program_file}[/bold red]")
241
- stats['exit_reason'] = "Input Error: Program file not found"
242
- return {
243
- 'success': False, 'final_program': "", 'final_code': "",
244
- 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
245
- 'statistics': stats
246
- }
247
- if not os.path.isfile(code_file):
248
- console.print(f"[bold red]Error: Code file not found: {code_file}[/bold red]")
249
- stats['exit_reason'] = "Input Error: Code file not found"
250
- return {
251
- 'success': False, 'final_program': "", 'final_code': "",
252
- 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
253
- 'statistics': stats
254
- }
255
- if not os.path.isfile(verification_program):
256
- console.print(f"[bold red]Error: Secondary verification program not found: {verification_program}[/bold red]")
257
- stats['exit_reason'] = "Input Error: Verification program not found"
258
- return {
259
- 'success': False, 'final_program': "", 'final_code': "",
260
- 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
261
- 'statistics': stats
262
- }
179
+ initial_program_content = ""
180
+ initial_code_content = ""
181
+ program_contents = "" # Keep track of current contents
182
+ code_contents = "" # Keep track of current contents
263
183
 
264
184
  # --- Step 3: Determine Initial State ---
265
185
  if verbose:
266
- console.print("\n[bold]Step 3: Determining Initial State[/bold]")
186
+ console.print("[bold cyan]Step 3: Determining Initial State...[/bold cyan]")
267
187
 
268
- # 3a: Run initial program
269
- initial_run_success, initial_output, _ = _run_subprocess(['python', program_file])
188
+ try:
189
+ initial_program_content = program_path.read_text(encoding="utf-8")
190
+ initial_code_content = code_path.read_text(encoding="utf-8")
191
+ program_contents = initial_program_content # Initialize current contents
192
+ code_contents = initial_code_content # Initialize current contents
193
+ except IOError as e:
194
+ console.print(f"[bold red]Error reading initial program/code files: {e}[/bold red]")
195
+ stats['status_message'] = f'Error reading initial files: {e}' # Add status message
196
+ return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": stats}
197
+
198
+ # 3a: Run initial program with args
199
+ initial_return_code, initial_output = _run_program(program_path, args=program_args)
270
200
  if verbose:
271
- console.print(f"Initial program execution {'succeeded' if initial_run_success else 'failed'}.")
272
- console.print("[dim]Initial Output:[/dim]")
273
- console.print(f"[grey37]{initial_output or '[No Output]'}[/grey37]")
201
+ console.print(f"Initial program run exit code: {initial_return_code}")
202
+ console.print(f"Initial program output:\n{initial_output}")
274
203
 
275
204
  # 3b: Log initial state
276
- initial_state_log = ET.Element("InitialState")
277
- initial_state_log.set("timestamp", datetime.datetime.now().isoformat())
278
- _create_cdata_element(initial_state_log, "InitialProgramOutput", initial_output)
279
- _append_log_entry(verification_log_file, log_root, initial_state_log)
280
-
281
- # 3c: Read initial contents
282
- initial_program_contents = _read_file(program_file)
283
- initial_code_contents = _read_file(code_file)
284
- if initial_program_contents is None or initial_code_contents is None:
285
- stats['exit_reason'] = "File Read Error: Could not read initial program or code file."
286
- return {
287
- 'success': False, 'final_program': initial_program_contents or "", 'final_code': initial_code_contents or "",
288
- 'total_attempts': 0, 'total_cost': 0.0, 'model_name': None,
289
- 'statistics': stats
290
- }
205
+ timestamp = datetime.datetime.now().isoformat()
206
+ initial_log_entry = f'<InitialState timestamp="{timestamp}">\n'
207
+ initial_log_entry += f' <ProgramFile>{escape(str(program_path))}</ProgramFile>\n'
208
+ initial_log_entry += f' <CodeFile>{escape(str(code_path))}</CodeFile>\n'
209
+ initial_log_entry += f' <ExitCode>{initial_return_code}</ExitCode>\n'
210
+ initial_log_entry += f' <Output>{escape(initial_output)}</Output>\n'
211
+ initial_log_entry += '</InitialState>'
212
+ _write_log_entry(log_path, initial_log_entry)
291
213
 
292
214
  # 3d: Call fix_verification_errors for initial assessment
293
- if verbose:
294
- console.print("Running initial assessment with 'fix_verification_errors'...")
295
215
  try:
296
- # Use provided strength/temp for consistency, but check budget
297
- if budget <= 0:
298
- console.print("[bold yellow]Warning: Initial budget is zero or negative. Skipping initial assessment.[/bold yellow]")
299
- initial_fix_result = {'total_cost': 0.0, 'verification_issues_count': float('inf'), 'model_name': None, 'explanation': ['Skipped due to budget']} # Mock result
300
- else:
301
- initial_fix_result = fix_verification_errors(
302
- program=initial_program_contents,
303
- prompt=prompt,
304
- code=initial_code_contents,
305
- output=initial_output,
306
- strength=strength, # Use actual strength/temp for initial check
307
- temperature=temperature,
308
- verbose=verbose # Pass verbose flag down
309
- )
310
- last_fix_result = initial_fix_result # Store for potential later use
311
- except Exception as e:
312
- console.print(f"[bold red]Error during initial call to fix_verification_errors: {e}[/bold red]")
313
- stats['exit_reason'] = f"LLM Error: Initial fix_verification_errors call failed: {e}"
314
- # Log the error
315
- error_log = ET.Element("Error")
316
- error_log.set("timestamp", datetime.datetime.now().isoformat())
317
- error_log.set("phase", "InitialAssessment")
318
- _create_cdata_element(error_log, "ErrorMessage", str(e))
319
- _append_log_entry(verification_log_file, log_root, error_log)
320
- return {
321
- 'success': False, 'final_program': initial_program_contents, 'final_code': initial_code_contents,
322
- 'total_attempts': 0, 'total_cost': total_cost, 'model_name': model_name,
323
- 'statistics': stats
324
- }
216
+ if verbose:
217
+ console.print("Running initial assessment with fix_verification_errors...")
218
+ # Use actual strength/temp for realistic initial assessment
219
+ initial_fix_result = fix_verification_errors(
220
+ program=initial_program_content,
221
+ prompt=prompt,
222
+ code=initial_code_content,
223
+ output=initial_output,
224
+ strength=strength,
225
+ temperature=temperature,
226
+ verbose=verbose
227
+ )
228
+ # 3e: Add cost
229
+ initial_cost = initial_fix_result.get('total_cost', 0.0)
230
+ total_cost += initial_cost
231
+ model_name = initial_fix_result.get('model_name') # Capture model name early
232
+ if verbose:
233
+ console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
325
234
 
235
+ # 3f: Extract initial issues
236
+ initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
237
+ stats['initial_issues'] = initial_issues_count
238
+ if verbose:
239
+ console.print(f"Initial verification issues found: {initial_issues_count}")
240
+ if initial_fix_result.get('explanation'):
241
+ console.print("Initial assessment explanation:")
242
+ console.print(initial_fix_result['explanation'])
243
+
244
+ # FIX: Add check for initial assessment error *before* checking success/budget
245
+ # Check if the fixer function returned its specific error state (None explanation/model)
246
+ if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
247
+ error_msg = "Error: Fixer returned invalid/error state during initial assessment"
248
+ console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
249
+ stats['status_message'] = error_msg
250
+ stats['final_issues'] = -1 # Indicate unknown/error state
251
+ # Write final action log for error on initial check
252
+ final_log_entry = "<FinalActions>\n"
253
+ final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
254
+ final_log_entry += "</FinalActions>"
255
+ _write_log_entry(log_path, final_log_entry)
256
+ # Return failure state
257
+ return {
258
+ "success": False,
259
+ "final_program": initial_program_content,
260
+ "final_code": initial_code_content,
261
+ "total_attempts": 0,
262
+ "total_cost": total_cost, # May be non-zero if error occurred after some cost
263
+ "model_name": model_name, # May have been set before error
264
+ "statistics": stats,
265
+ }
266
+
267
+ # 3g: Initialize best iteration tracker
268
+ # Store original paths as the 'backup' for iteration 0
269
+ best_iteration = {
270
+ 'attempt': 0, # Use 0 for initial state
271
+ 'program_backup': str(program_path), # Path to original
272
+ 'code_backup': str(code_path), # Path to original
273
+ 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
274
+ }
275
+ stats['best_iteration_num'] = 0
276
+ stats['best_iteration_issues'] = best_iteration['issues']
277
+
278
+ # 3h: Check for immediate success or budget exceeded
279
+ if initial_issues_count == 0:
280
+ console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
281
+ overall_success = True
282
+ stats['final_issues'] = 0
283
+ stats['status_message'] = 'Success on initial check'
284
+ stats['improvement_issues'] = 0
285
+ stats['improvement_percent'] = 100.0 # Reached target of 0 issues
286
+
287
+ # Write final action log for successful initial check
288
+ final_log_entry = "<FinalActions>\n"
289
+ final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
290
+ final_log_entry += "</FinalActions>"
291
+ _write_log_entry(log_path, final_log_entry)
292
+
293
+ # Step 7 (early exit): Print stats
294
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
295
+ console.print(f"Initial Issues: {stats['initial_issues']}")
296
+ console.print(f"Final Issues: {stats['final_issues']}")
297
+ console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
298
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
299
+ console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
300
+ console.print(f"Overall Status: {stats['status_message']}")
301
+ console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
302
+ console.print(f"Total Cost: ${total_cost:.6f}")
303
+ console.print(f"Model Used: {model_name or 'N/A'}")
304
+ # Step 8 (early exit): Return
305
+ return {
306
+ "success": overall_success,
307
+ "final_program": initial_program_content,
308
+ "final_code": initial_code_content,
309
+ "total_attempts": attempts, # attempts is 0
310
+ "total_cost": total_cost,
311
+ "model_name": model_name,
312
+ "statistics": stats,
313
+ }
314
+ elif total_cost >= budget:
315
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
316
+ stats['status_message'] = 'Budget exceeded on initial check'
317
+ stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
318
+
319
+ # Write final action log for budget exceeded on initial check
320
+ final_log_entry = "<FinalActions>\n"
321
+ final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
322
+ final_log_entry += "</FinalActions>"
323
+ _write_log_entry(log_path, final_log_entry)
324
+
325
+ # No changes made, return initial state
326
+ return {
327
+ "success": False,
328
+ "final_program": initial_program_content,
329
+ "final_code": initial_code_content,
330
+ "total_attempts": 0,
331
+ "total_cost": total_cost,
332
+ "model_name": model_name,
333
+ "statistics": stats,
334
+ }
326
335
 
327
- # 3e: Add cost
328
- initial_cost = initial_fix_result.get('total_cost', 0.0)
329
- total_cost += initial_cost
330
- model_name = initial_fix_result.get('model_name', model_name) # Update model name
336
+ except Exception as e:
337
+ console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
338
+ stats['status_message'] = f'Error during initial assessment: {e}'
339
+ # Cannot proceed without initial assessment
340
+ return {"success": False, "final_program": initial_program_content, "final_code": initial_code_content, "total_attempts": 0, "total_cost": total_cost, "model_name": model_name, "statistics": stats}
331
341
 
332
- # 3f: Extract initial issues
333
- initial_issues_count = initial_fix_result.get('verification_issues_count', float('inf'))
334
- if initial_issues_count == float('inf'):
335
- console.print("[yellow]Warning: Could not determine initial issue count from fix_verification_errors.[/yellow]")
336
- # Decide how to handle this - maybe treat as high number of issues?
337
- initial_issues_count = 999 # Assign a high number if undetermined
338
342
 
339
- stats['initial_issues'] = initial_issues_count
343
+ # --- Step 4: Enter the Fixing Loop ---
340
344
  if verbose:
341
- console.print(f"Initial assessment complete. Issues found: {initial_issues_count}, Cost: ${initial_cost:.6f}")
342
-
343
- # 3g: Initialize best iteration with initial state
344
- best_iteration['attempt'] = 0 # Representing the initial state before loop
345
- best_iteration['issues'] = initial_issues_count
346
- best_iteration['program_backup_path'] = program_file # Original file path
347
- best_iteration['code_backup_path'] = code_file # Original file path
348
- best_iteration['model_name'] = model_name
349
-
350
- # Log initial assessment details
351
- initial_assessment_log = ET.Element("InitialAssessment")
352
- initial_assessment_log.set("timestamp", datetime.datetime.now().isoformat())
353
- initial_assessment_log.set("issues_found", str(initial_issues_count))
354
- initial_assessment_log.set("cost", f"{initial_cost:.6f}")
355
- if model_name:
356
- initial_assessment_log.set("model_name", model_name)
357
- _create_cdata_element(initial_assessment_log, "Explanation", "\n".join(initial_fix_result.get('explanation', [])))
358
- _append_log_entry(verification_log_file, log_root, initial_assessment_log)
359
-
360
-
361
- # 3h: Check if already successful
362
- if initial_issues_count == 0:
363
- console.print("[bold green]Initial state already meets verification criteria (0 issues found). No fixing loop needed.[/bold green]")
364
- overall_success = True
365
- stats['final_issues'] = 0
366
- stats['best_iteration_attempt'] = 0
367
- stats['best_iteration_issues'] = 0
368
- stats['improvement_issues'] = 0
369
- stats['overall_success_flag'] = True
370
- stats['exit_reason'] = "Success on Initial Assessment"
371
- # Skip to Step 7/8 (Return)
372
-
373
- # --- Step 4: Fixing Loop ---
374
- current_program_contents = initial_program_contents
375
- current_code_contents = initial_code_contents
376
-
377
- if not overall_success: # Only enter loop if initial state wasn't perfect
345
+ console.print("\n[bold cyan]Step 4: Starting Fixing Loop...[/bold cyan]")
346
+
347
+ # Loop while attempts < max_attempts and budget not exceeded
348
+ # Note: The loop condition checks attempts *before* incrementing for the current iteration
349
+ while attempts < max_attempts:
350
+ current_attempt = attempts + 1 # 1-based for reporting
351
+ timestamp = datetime.datetime.now().isoformat()
352
+ iteration_log_xml = f'<Iteration attempt="{current_attempt}" timestamp="{timestamp}">\n'
353
+
354
+ # 4a: Print attempt number and increment counter for attempts *started*
355
+ console.print(f"\n[bold]Attempt {current_attempt}/{max_attempts} (Cost: ${total_cost:.4f}/{budget:.4f})[/bold]")
356
+ attempts += 1 # Increment attempts counter here for iterations started
357
+
358
+ # Check budget *before* running expensive operations in the loop
359
+ if total_cost >= budget:
360
+ console.print(f"[bold yellow]Budget ${budget:.4f} already met or exceeded before starting attempt {current_attempt}. Stopping.[/bold yellow]")
361
+ # No iteration log entry needed as the iteration didn't run
362
+ stats['status_message'] = 'Budget Exceeded'
363
+ attempts -= 1 # Decrement as this attempt didn't actually run
364
+ break
365
+
366
+ # 4b: Run the program file with args
378
367
  if verbose:
379
- console.print(f"\n[bold]Step 4: Starting Fixing Loop (Max Attempts: {max_attempts}, Budget: ${budget:.2f})[/bold]")
380
-
381
- while attempts < max_attempts and total_cost < budget:
382
- attempt_number = attempts + 1
383
- if verbose:
384
- console.print(f"\n--- Attempt {attempt_number}/{max_attempts} --- Cost so far: ${total_cost:.6f}")
385
-
386
- # 4a: Log attempt start (done within iteration log)
387
- iteration_log = ET.Element("Iteration")
388
- iteration_log.set("attempt", str(attempt_number))
389
- iteration_log.set("timestamp", datetime.datetime.now().isoformat())
390
-
391
- # 4b: Run the program file
392
- run_success, program_output, _ = _run_subprocess(['python', program_file])
368
+ console.print(f"Running program: {program_path} with args: {program_args}")
369
+ return_code, program_output = _run_program(program_path, args=program_args)
370
+ iteration_log_xml += f' <ProgramExecution>\n'
371
+ iteration_log_xml += f' <ExitCode>{return_code}</ExitCode>\n'
372
+ iteration_log_xml += f' <OutputBeforeFix>{escape(program_output)}</OutputBeforeFix>\n'
373
+ iteration_log_xml += f' </ProgramExecution>\n'
374
+ if verbose:
375
+ console.print(f"Program exit code: {return_code}")
376
+ # console.print(f"Program output:\n{program_output}") # Can be long
377
+
378
+ # 4c: Read current contents (already stored in program_contents/code_contents)
379
+ # Re-read could be added here if external modification is possible, but generally not needed
380
+ # try:
381
+ # program_contents = program_path.read_text(encoding="utf-8")
382
+ # code_contents = code_path.read_text(encoding="utf-8")
383
+ # except IOError as e: ...
384
+
385
+ # 4d: Create backups
386
+ program_backup_path = program_path.with_stem(f"{program_path.stem}_iteration_{current_attempt}").with_suffix(program_path.suffix)
387
+ code_backup_path = code_path.with_stem(f"{code_path.stem}_iteration_{current_attempt}").with_suffix(code_path.suffix)
388
+ try:
389
+ # Copy from the *current* state before this iteration's fix
390
+ program_path.write_text(program_contents, encoding="utf-8") # Ensure file matches memory state
391
+ code_path.write_text(code_contents, encoding="utf-8") # Ensure file matches memory state
392
+ shutil.copy2(program_path, program_backup_path)
393
+ shutil.copy2(code_path, code_backup_path)
393
394
  if verbose:
394
- console.print(f"Program execution {'succeeded' if run_success else 'failed'}.")
395
- # console.print("[dim]Current Output:[/dim]")
396
- # console.print(f"[grey37]{program_output or '[No Output]'}[/grey37]") # Can be very long
397
-
398
- _create_cdata_element(iteration_log, "ProgramOutputBeforeFix", program_output)
399
-
400
- # 4c: Read current contents (already stored in current_*)
401
-
402
- # 4d: Create backups
403
- program_backup_path = _create_backup(program_file, attempt_number)
404
- code_backup_path = _create_backup(code_file, attempt_number)
405
- if program_backup_path: iteration_log.set("program_backup", program_backup_path)
406
- if code_backup_path: iteration_log.set("code_backup", code_backup_path)
407
-
408
- # 4e: Call fix_verification_errors
395
+ console.print(f"Created backups: {program_backup_path}, {code_backup_path}")
396
+ iteration_log_xml += f' <Backups>\n'
397
+ iteration_log_xml += f' <Program>{escape(str(program_backup_path))}</Program>\n'
398
+ iteration_log_xml += f' <Code>{escape(str(code_backup_path))}</Code>\n'
399
+ iteration_log_xml += f' </Backups>\n'
400
+ except OSError as e:
401
+ console.print(f"[bold red]Error creating backup files during attempt {current_attempt}: {e}[/bold red]")
402
+ iteration_log_xml += f' <Status>Error Creating Backups</Status>\n</Iteration>'
403
+ _write_log_entry(log_path, iteration_log_xml)
404
+ stats['status_message'] = f'Error creating backups on attempt {current_attempt}'
405
+ break # Don't proceed without backups
406
+
407
+ # 4e: Call fix_verification_errors
408
+ iteration_log_xml += f' <InputsToFixer>\n'
409
+ iteration_log_xml += f' <Program>{escape(program_contents)}</Program>\n'
410
+ iteration_log_xml += f' <Code>{escape(code_contents)}</Code>\n'
411
+ iteration_log_xml += f' <Prompt>{escape(prompt)}</Prompt>\n'
412
+ iteration_log_xml += f' <ProgramOutput>{escape(program_output)}</ProgramOutput>\n'
413
+ iteration_log_xml += f' </InputsToFixer>\n'
414
+
415
+ fix_result = {}
416
+ try:
409
417
  if verbose:
410
- console.print("Calling 'fix_verification_errors' to suggest fixes...")
411
- try:
412
- fix_result = fix_verification_errors(
413
- program=current_program_contents,
414
- prompt=prompt,
415
- code=current_code_contents,
416
- output=program_output,
417
- strength=strength,
418
- temperature=temperature,
419
- verbose=verbose # Pass verbose flag down
420
- )
421
- last_fix_result = fix_result # Store latest result
422
- except Exception as e:
423
- console.print(f"[bold red]Error during fix_verification_errors call in attempt {attempt_number}: {e}[/bold red]")
424
- stats['exit_reason'] = f"LLM Error: fix_verification_errors failed in loop: {e}"
425
- # Log the error and break
426
- error_log = ET.Element("Error")
427
- error_log.set("timestamp", datetime.datetime.now().isoformat())
428
- error_log.set("phase", f"FixAttempt_{attempt_number}")
429
- _create_cdata_element(error_log, "ErrorMessage", str(e))
430
- _append_log_entry(verification_log_file, log_root, error_log)
431
- break # Exit loop on LLM error
432
-
433
- # Log inputs and results to XML
434
- inputs_log = ET.SubElement(iteration_log, "InputsToFixer")
435
- _create_cdata_element(inputs_log, "Program", current_program_contents)
436
- _create_cdata_element(inputs_log, "Code", current_code_contents)
437
- _create_cdata_element(inputs_log, "Prompt", prompt)
438
- _create_cdata_element(inputs_log, "ProgramOutput", program_output)
439
-
440
- fixer_result_log = ET.SubElement(iteration_log, "FixerResult")
441
- fixer_result_log.set("cost", f"{fix_result.get('total_cost', 0.0):.6f}")
442
- fixer_result_log.set("model_name", fix_result.get('model_name', "Unknown"))
443
- fixer_result_log.set("issues_found", str(fix_result.get('verification_issues_count', 'inf')))
444
- _create_cdata_element(fixer_result_log, "Explanation", "\n".join(fix_result.get('explanation', [])))
445
- _create_cdata_element(fixer_result_log, "FixedProgramSuggestion", fix_result.get('fixed_program'))
446
- _create_cdata_element(fixer_result_log, "FixedCodeSuggestion", fix_result.get('fixed_code'))
418
+ console.print("Calling fix_verification_errors...")
419
+ fix_result = fix_verification_errors(
420
+ program=program_contents,
421
+ prompt=prompt,
422
+ code=code_contents,
423
+ output=program_output,
424
+ strength=strength,
425
+ temperature=temperature,
426
+ verbose=verbose # Pass verbose flag down
427
+ )
447
428
 
448
429
  # 4f: Add cost
449
430
  attempt_cost = fix_result.get('total_cost', 0.0)
450
431
  total_cost += attempt_cost
451
- model_name = fix_result.get('model_name', model_name) # Update model name if available
452
- if verbose:
453
- console.print(f"Fix attempt cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
454
- console.print(f"Issues found by fixer: {fix_result.get('verification_issues_count', 'N/A')}")
455
-
456
-
457
- # 4h: Check budget
458
- if total_cost > budget:
459
- console.print(f"[bold yellow]Budget exceeded (${total_cost:.2f} > ${budget:.2f}). Stopping.[/bold yellow]")
460
- status_log = ET.SubElement(iteration_log, "Status")
461
- status_log.text = "Budget Exceeded"
462
- _append_log_entry(verification_log_file, log_root, iteration_log)
463
- stats['exit_reason'] = "Budget Exceeded"
464
- break
465
-
466
- # 4i: Check for success (0 issues)
467
- current_issues_count = fix_result.get('verification_issues_count', float('inf'))
468
- if current_issues_count == 0:
469
- console.print("[bold green]Success! Fixer reported 0 verification issues.[/bold green]")
470
- status_log = ET.SubElement(iteration_log, "Status")
471
- status_log.text = "Success - 0 Issues Found"
472
-
473
- # Update best iteration (0 issues is always the best)
474
- best_iteration['attempt'] = attempt_number
475
- best_iteration['issues'] = 0
476
- best_iteration['program_backup_path'] = program_backup_path # Backup before successful fix
477
- best_iteration['code_backup_path'] = code_backup_path # Backup before successful fix
478
- best_iteration['model_name'] = model_name
479
-
480
- # Write final successful code/program
481
- final_program = fix_result.get('fixed_program', current_program_contents)
482
- final_code = fix_result.get('fixed_code', current_code_contents)
483
- program_written = _write_file(program_file, final_program)
484
- code_written = _write_file(code_file, final_code)
485
-
486
- if program_written and code_written:
487
- current_program_contents = final_program # Update current state
488
- current_code_contents = final_code
489
- if verbose:
490
- console.print("Applied final successful changes to files.")
491
- else:
492
- console.print("[bold red]Error writing final successful files![/bold red]")
493
- # Success flag might be compromised if write fails
494
-
495
- _append_log_entry(verification_log_file, log_root, iteration_log)
496
- overall_success = True
497
- stats['exit_reason'] = "Success - Reached 0 Issues"
498
- break
499
-
500
- # 4j: Check if changes were suggested
501
- fixed_program = fix_result.get('fixed_program', current_program_contents)
502
- fixed_code = fix_result.get('fixed_code', current_code_contents)
503
- program_updated = fixed_program != current_program_contents
504
- code_updated = fixed_code != current_code_contents
505
-
506
- if not program_updated and not code_updated:
507
- console.print("[yellow]No changes suggested by the fixer in this iteration. Stopping.[/yellow]")
508
- status_log = ET.SubElement(iteration_log, "Status")
509
- status_log.text = "No Changes Suggested"
510
- _append_log_entry(verification_log_file, log_root, iteration_log)
511
- stats['exit_reason'] = "No Changes Suggested by LLM"
512
- break
513
-
514
- # 4k, 4l: Log fix attempt details
515
- fix_attempt_log = ET.SubElement(iteration_log, "FixAttempted")
516
- fix_attempt_log.set("program_change_suggested", str(program_updated))
517
- fix_attempt_log.set("code_change_suggested", str(code_updated))
518
-
519
- # 4m, 4n: Secondary Verification (only if code was modified)
520
- secondary_verification_passed = True # Assume pass if code not changed
521
- secondary_verification_output = "Not Run (Code Unchanged)"
522
-
523
- if code_updated:
524
- if verbose:
525
- console.print("Code change suggested. Running secondary verification...")
526
- # Use a temporary file for the modified code
527
- temp_code_file = None
528
- try:
529
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as tf:
530
- tf.write(fixed_code)
531
- temp_code_file_path = tf.name
532
- if verbose:
533
- console.print(f"Wrote proposed code to temporary file: {temp_code_file_path}")
534
-
535
- # Run the secondary verification program.
536
- # It needs to know which code file to check. We pass the temp file path.
537
- # Modify this command if your verification script takes args differently.
538
- verify_command = ['python', verification_program, temp_code_file_path]
539
- verify_success, verify_output, verify_rc = _run_subprocess(verify_command)
432
+ model_name = fix_result.get('model_name', model_name) # Update if available
433
+ current_issues_count = fix_result.get('verification_issues_count', -1)
540
434
 
541
- secondary_verification_passed = verify_success
542
- secondary_verification_output = verify_output
543
- if verbose:
544
- console.print(f"Secondary verification {'PASSED' if verify_success else 'FAILED'} (Exit Code: {verify_rc}).")
545
- # console.print(f"[dim]Verification Output:[/dim]\n[grey37]{verify_output or '[No Output]'}[/grey37]")
546
-
547
- except Exception as e:
548
- console.print(f"[bold red]Error during secondary verification: {e}[/bold red]")
549
- secondary_verification_passed = False
550
- secondary_verification_output = f"Error during verification: {e}"
551
- finally:
552
- # Clean up the temporary file
553
- if temp_code_file_path and os.path.exists(temp_code_file_path):
554
- try:
555
- os.remove(temp_code_file_path)
556
- except OSError as e:
557
- console.print(f"[yellow]Warning: Could not remove temp file {temp_code_file_path}: {e}[/yellow]")
558
-
559
- # Log secondary verification result
560
- sec_verify_log = ET.SubElement(iteration_log, "SecondaryVerification")
561
- sec_verify_log.set("run", str(code_updated))
562
- sec_verify_log.set("passed", str(secondary_verification_passed))
563
- _create_cdata_element(sec_verify_log, "Output", secondary_verification_output)
564
-
565
- # 4o, 4p: Apply changes or discard based on secondary verification
566
- if secondary_verification_passed:
567
- if verbose:
568
- console.print("Secondary verification passed (or not needed). Applying changes.")
569
- status_log = ET.SubElement(iteration_log, "Status")
570
- status_log.text = "Changes Applied (Secondary Verification Passed or Skipped)"
571
-
572
- # Update best iteration if this one is better
573
- if current_issues_count < best_iteration['issues']:
574
- if verbose:
575
- console.print(f"[green]Improvement found! Issues reduced from {best_iteration['issues']} to {current_issues_count}. Updating best iteration.[/green]")
576
- best_iteration['attempt'] = attempt_number
577
- best_iteration['issues'] = current_issues_count
578
- best_iteration['program_backup_path'] = program_backup_path # Store backup *before* this successful step
579
- best_iteration['code_backup_path'] = code_backup_path
580
- best_iteration['model_name'] = model_name
581
- elif verbose and current_issues_count >= best_iteration['issues']:
582
- console.print(f"Current issues ({current_issues_count}) not better than best ({best_iteration['issues']}). Best iteration remains attempt {best_iteration['attempt']}.")
583
-
584
-
585
- # Apply changes to files
586
- files_updated = True
587
- if code_updated:
588
- if not _write_file(code_file, fixed_code):
589
- files_updated = False
590
- console.print(f"[bold red]Error writing updated code to {code_file}[/bold red]")
591
- else:
592
- current_code_contents = fixed_code # Update current state
435
+ if verbose:
436
+ console.print(f"Fixer cost: ${attempt_cost:.6f}, Total cost: ${total_cost:.6f}")
437
+ console.print(f"Fixer issues found: {current_issues_count}")
438
+ if fix_result.get('explanation'):
439
+ console.print("Fixer explanation:")
440
+ console.print(fix_result['explanation'])
441
+
442
+
443
+ # 4g: Log fixer result
444
+ iteration_log_xml += f' <FixerResult '
445
+ iteration_log_xml += f'total_cost="{attempt_cost:.6f}" '
446
+ iteration_log_xml += f'model_name="{escape(model_name or "N/A")}" '
447
+ iteration_log_xml += f'verification_issues_count="{current_issues_count}">\n'
448
+ iteration_log_xml += f' <Explanation>{escape(str(fix_result.get("explanation", "N/A")))}</Explanation>\n'
449
+ iteration_log_xml += f' <FixedProgram>{escape(fix_result.get("fixed_program", ""))}</FixedProgram>\n'
450
+ iteration_log_xml += f' <FixedCode>{escape(fix_result.get("fixed_code", ""))}</FixedCode>\n'
451
+ iteration_log_xml += f' </FixerResult>\n'
593
452
 
594
- if program_updated:
595
- if not _write_file(program_file, fixed_program):
596
- files_updated = False
597
- console.print(f"[bold red]Error writing updated program to {program_file}[/bold red]")
598
- else:
599
- current_program_contents = fixed_program # Update current state
453
+ except Exception as e:
454
+ console.print(f"[bold red]Error calling fix_verification_errors on attempt {current_attempt}: {e}[/bold red]")
455
+ iteration_log_xml += f' <Status>Error in Fixer Call: {escape(str(e))}</Status>\n</Iteration>'
456
+ _write_log_entry(log_path, iteration_log_xml)
457
+ stats['status_message'] = f'Error in fixer call on attempt {current_attempt}'
458
+ # Continue to next attempt if possible, don't break immediately
459
+ continue
460
+
461
+ # FIX: Add check for fixer returning error state (e.g., None explanation/model or specific issue count)
462
+ # We use -1 as the signal for an internal error from fix_verification_errors
463
+ if current_issues_count == -1:
464
+ error_msg = "Error: Fixer returned invalid/error state"
465
+ console.print(f"[bold red]{error_msg} on attempt {current_attempt}. Stopping.[/bold red]")
466
+ iteration_log_xml += f' <Status>{escape(error_msg)}</Status>\n</Iteration>'
467
+ _write_log_entry(log_path, iteration_log_xml)
468
+ stats['status_message'] = error_msg
469
+ overall_success = False # Ensure success is false
470
+ break # Exit loop due to fixer error
471
+
472
+ # 4h: Check budget *after* fixer call cost is added
473
+ if total_cost >= budget:
474
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded after attempt {current_attempt} (Cost: ${total_cost:.4f}). Stopping.[/bold yellow]")
475
+ iteration_log_xml += f' <Status>Budget Exceeded</Status>\n</Iteration>'
476
+ _write_log_entry(log_path, iteration_log_xml)
477
+ stats['status_message'] = 'Budget Exceeded'
478
+ # Update best iteration if this costly attempt was still the best so far
479
+ if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
480
+ if verbose:
481
+ console.print(f"[green]New best iteration found (before budget break): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
482
+ best_iteration = {
483
+ 'attempt': current_attempt,
484
+ 'program_backup': str(program_backup_path),
485
+ 'code_backup': str(code_backup_path),
486
+ 'issues': current_issues_count
487
+ }
488
+ stats['best_iteration_num'] = current_attempt
489
+ stats['best_iteration_issues'] = current_issues_count
490
+ break # Exit loop due to budget
491
+
492
+ # FIX: Moved calculation of update flags earlier
493
+ # 4j: Check if changes were suggested
494
+ fixed_program = fix_result.get('fixed_program', program_contents)
495
+ fixed_code = fix_result.get('fixed_code', code_contents)
496
+ program_updated = fixed_program != program_contents
497
+ code_updated = fixed_code != code_contents
498
+
499
+ # 4k, 4l: Log fix attempt
500
+ iteration_log_xml += f' <FixAttempted program_updated="{program_updated}" code_updated="{code_updated}"/>\n'
501
+
502
+
503
+ # FIX: Restructured logic for success check and secondary verification
504
+ secondary_verification_passed = True # Assume pass unless changes made and verification fails
505
+ changes_applied_this_iteration = False
506
+
507
+ # Run secondary verification ONLY if code was updated
508
+ if code_updated:
509
+ if verbose:
510
+ console.print("Code change suggested, running secondary verification...")
511
+ try:
512
+ # Temporarily write the proposed code change
513
+ code_path.write_text(fixed_code, encoding="utf-8")
600
514
 
601
- if not files_updated:
602
- # If writing failed, we might be in an inconsistent state. Log it.
603
- ET.SubElement(iteration_log, "Error").text = "Failed to write updated files after successful verification."
515
+ # Run verification program
516
+ verify_ret_code, verify_output = _run_program(verification_program_path)
604
517
 
518
+ # Determine pass/fail (simple: exit code 0 = pass)
519
+ secondary_verification_passed = (verify_ret_code == 0)
605
520
 
606
- else: # Secondary verification failed
607
521
  if verbose:
608
- console.print("[bold red]Secondary verification failed. Discarding suggested changes for this iteration.[/bold red]")
609
- status_log = ET.SubElement(iteration_log, "Status")
610
- status_log.text = "Changes Discarded (Secondary Verification Failed)"
611
- # Do not update files, do not update best_iteration
612
-
613
- # 4q: Append log entry for the iteration
614
- _append_log_entry(verification_log_file, log_root, iteration_log)
615
-
616
- # 4r: Increment attempt counter
617
- attempts += 1
618
-
619
- # Check if max attempts reached
620
- if attempts >= max_attempts:
621
- console.print(f"[yellow]Maximum attempts ({max_attempts}) reached. Stopping.[/yellow]")
622
- stats['exit_reason'] = "Max Attempts Reached"
623
- # Add status to log if loop didn't break for other reasons already
624
- if iteration_log.find("Status") is None:
625
- status_log = ET.SubElement(iteration_log, "Status")
626
- status_log.text = "Max Attempts Reached"
627
- _append_log_entry(verification_log_file, log_root, iteration_log) # Ensure last log is written
628
-
629
-
630
- # --- Step 5: Post-Loop Processing ---
522
+ console.print(f"Secondary verification exit code: {verify_ret_code}")
523
+ console.print(f"Secondary verification passed: {secondary_verification_passed}")
524
+ # console.print(f"Secondary verification output:\n{verify_output}")
525
+
526
+ passed_str = str(secondary_verification_passed).lower()
527
+ iteration_log_xml += f' <SecondaryVerification passed="{passed_str}">\n'
528
+ iteration_log_xml += f' <ExitCode>{verify_ret_code}</ExitCode>\n'
529
+ iteration_log_xml += f' <Output>{escape(verify_output)}</Output>\n'
530
+ iteration_log_xml += f' </SecondaryVerification>\n'
531
+
532
+ if not secondary_verification_passed:
533
+ console.print("[yellow]Secondary verification failed. Restoring code file.[/yellow]")
534
+ code_path.write_text(code_contents, encoding="utf-8") # Restore from memory state before this attempt
535
+
536
+ except IOError as e:
537
+ console.print(f"[bold red]Error during secondary verification I/O: {e}[/bold red]")
538
+ iteration_log_xml += f' <Status>Error during secondary verification I/O: {escape(str(e))}</Status>\n'
539
+ secondary_verification_passed = False # Treat I/O error as failure
540
+ try:
541
+ code_path.write_text(code_contents, encoding="utf-8")
542
+ except IOError:
543
+ console.print(f"[bold red]Failed to restore code file after I/O error.[/bold red]")
544
+
545
+ # Now, decide outcome based on issue count and verification status
546
+ if secondary_verification_passed:
547
+ # Update best iteration if current attempt is better
548
+ if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
549
+ if verbose:
550
+ console.print(f"[green]New best iteration found: Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
551
+ best_iteration = {
552
+ 'attempt': current_attempt,
553
+ 'program_backup': str(program_backup_path),
554
+ 'code_backup': str(code_backup_path),
555
+ 'issues': current_issues_count
556
+ }
557
+ stats['best_iteration_num'] = current_attempt
558
+ stats['best_iteration_issues'] = current_issues_count
559
+
560
+ # Apply changes (code was potentially already written for verification)
561
+ try:
562
+ if program_updated:
563
+ if verbose: console.print("Applying program changes...")
564
+ program_path.write_text(fixed_program, encoding="utf-8")
565
+ program_contents = fixed_program # Update memory state
566
+ iteration_log_xml += f' <Action>Applied program changes.</Action>\n'
567
+ changes_applied_this_iteration = True
568
+ if code_updated:
569
+ # Code already written if verification ran; update memory state
570
+ code_contents = fixed_code
571
+ iteration_log_xml += f' <Action>Kept modified code (passed secondary verification).</Action>\n'
572
+ changes_applied_this_iteration = True
573
+
574
+ if changes_applied_this_iteration:
575
+ # FIX: Revert status to match original tests where applicable
576
+ iteration_log_xml += f' <Status>Changes Applied (Secondary Verification Passed or Not Needed)</Status>\n'
577
+ else:
578
+ # This case happens if verification passed but neither program nor code changed
579
+ iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n'
580
+
581
+ # Check for SUCCESS condition HERE
582
+ if current_issues_count == 0:
583
+ console.print(f"[bold green]Success! 0 verification issues found after attempt {current_attempt} and secondary verification passed.[/bold green]")
584
+ overall_success = True
585
+ stats['final_issues'] = 0
586
+ stats['status_message'] = f'Success on attempt {current_attempt}'
587
+ iteration_log_xml += '</Iteration>'
588
+ _write_log_entry(log_path, iteration_log_xml)
589
+ break # Exit loop on verified success
590
+
591
+ except IOError as e:
592
+ console.print(f"[bold red]Error writing applied changes: {e}[/bold red]")
593
+ iteration_log_xml += f' <Action>Error writing applied changes: {escape(str(e))}</Action>\n'
594
+ iteration_log_xml += f' <Status>Error Applying Changes</Status>\n'
595
+ # Continue loop if possible
596
+
597
+ else: # Secondary verification failed
598
+ iteration_log_xml += f' <Action>Changes Discarded Due To Secondary Verification Failure</Action>\n'
599
+ iteration_log_xml += f' <Status>Changes Discarded</Status>\n'
600
+ # Memory state (program_contents, code_contents) remains unchanged from start of iteration
601
+
602
+ # Check if loop should terminate due to no changes suggested when issues > 0
603
+ # FIX: Adjust condition - break if secondary verification PASSED but resulted in NO effective changes
604
+ # AND issues still remain. This avoids breaking early if verification FAILED (handled above).
605
+ if secondary_verification_passed and not changes_applied_this_iteration and current_issues_count > 0:
606
+ # FIX: Adjust status message for clarity
607
+ console.print(f"[yellow]No effective changes suggested by the fixer on attempt {current_attempt} despite issues remaining ({current_issues_count}). Stopping.[/yellow]")
608
+ iteration_log_xml += f' <Status>No Effective Changes Suggested (Identical Code)</Status>\n' # Reuse status
609
+ # FIX: Ensure status message matches test expectation when breaking here
610
+ stats['status_message'] = f'No effective changes suggested on attempt {current_attempt}'
611
+ # Update best iteration if this attempt was still the best so far
612
+ if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
613
+ if verbose:
614
+ console.print(f"[green]New best iteration found (despite no effective changes): Attempt {current_attempt} (Issues: {current_issues_count})[/green]")
615
+ best_iteration = {
616
+ 'attempt': current_attempt,
617
+ 'program_backup': str(program_backup_path),
618
+ 'code_backup': str(code_backup_path),
619
+ 'issues': current_issues_count
620
+ }
621
+ stats['best_iteration_num'] = current_attempt
622
+ stats['best_iteration_issues'] = current_issues_count
623
+
624
+ overall_success = False # Ensure success is False
625
+ iteration_log_xml += '</Iteration>'
626
+ _write_log_entry(log_path, iteration_log_xml)
627
+ break # Exit loop
628
+
629
+
630
+ # Append iteration log (if not already done on success break or no-change break)
631
+ iteration_log_xml += '</Iteration>'
632
+ _write_log_entry(log_path, iteration_log_xml)
633
+
634
+ # Small delay to avoid hitting rate limits if applicable
635
+ time.sleep(0.5)
636
+
637
+ # --- End of Loop ---
638
+
639
+ # --- Step 5: Determine Final State ---
631
640
  if verbose:
632
- console.print("\n[bold]Step 5: Post-Loop Processing[/bold]")
641
+ console.print("\n[bold cyan]Step 5: Determining Final State...[/bold cyan]")
633
642
 
634
- final_action_log = ET.Element("FinalAction")
635
- final_action_log.set("timestamp", datetime.datetime.now().isoformat())
643
+ final_log_entry = "<FinalActions>\n"
636
644
 
637
645
  if not overall_success:
638
- console.print("[yellow]Fixing loop finished without reaching 0 issues.[/yellow]")
639
- # Check if a 'best' iteration (better than initial and passed secondary verification) was found
640
- if best_iteration['attempt'] > 0 and best_iteration['issues'] < stats['initial_issues']:
641
- console.print(f"Restoring state from best recorded iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})")
642
- restored_program = _restore_backup(best_iteration['program_backup_path'], program_file)
643
- restored_code = _restore_backup(best_iteration['code_backup_path'], code_file)
644
- if restored_program and restored_code:
645
- console.print("[green]Successfully restored files from the best iteration.[/green]")
646
- final_action_log.set("action", "RestoredBestIteration")
647
- final_action_log.set("best_attempt", str(best_iteration['attempt']))
648
- final_action_log.set("best_issues", str(best_iteration['issues']))
649
- stats['final_issues'] = best_iteration['issues'] # Final state has this many issues
646
+ # Determine reason for loop exit if not already set by break conditions
647
+ # FIX: Ensure status message isn't overwritten if already set by break condition
648
+ exit_reason_determined = stats['status_message'] not in ['Initialization', '']
649
+ if not exit_reason_determined:
650
+ if attempts == max_attempts:
651
+ console.print(f"[bold yellow]Maximum attempts ({max_attempts}) reached.[/bold yellow]")
652
+ stats['status_message'] = f'Max attempts ({max_attempts}) reached'
653
+ final_log_entry += f' <Action>Max attempts ({max_attempts}) reached.</Action>\n'
650
654
  else:
651
- console.print("[bold red]Error restoring files from the best iteration! Final files might be from the last attempt.[/bold red]")
652
- final_action_log.set("action", "RestorationFailed")
653
- # Final issues remain from the last attempt before loop exit, or initial if no changes applied
654
- stats['final_issues'] = last_fix_result.get('verification_issues_count', stats['initial_issues']) if last_fix_result else stats['initial_issues']
655
-
656
- elif best_iteration['attempt'] == 0: # Best was the initial state
657
- console.print("No improvement found compared to the initial state. Keeping original files.")
658
- # No restoration needed, files should be in original state unless write failed earlier
659
- final_action_log.set("action", "NoImprovementFound")
660
- stats['final_issues'] = stats['initial_issues']
661
- else: # No iteration ever passed secondary verification or improved
662
- console.print("No verified improvement was found. Final files are from the last attempted state before loop exit.")
663
- final_action_log.set("action", "NoVerifiedImprovement")
664
- # Final issues remain from the last attempt before loop exit
665
- stats['final_issues'] = last_fix_result.get('verification_issues_count', stats['initial_issues']) if last_fix_result else stats['initial_issues']
655
+ # Loop likely exited due to an unexpected break or condition not setting status
656
+ stats['status_message'] = 'Loop finished without success for unknown reason'
657
+ final_log_entry += f' <Action>Loop finished without reaching success state ({escape(stats["status_message"])}).</Action>\n'
658
+ elif stats['status_message'] == 'Budget Exceeded':
659
+ final_log_entry += f' <Action>Loop stopped due to budget.</Action>\n'
660
+ elif stats['status_message'].startswith('No changes suggested') or stats['status_message'].startswith('No effective changes'):
661
+ final_log_entry += f' <Action>Loop stopped as no changes were suggested.</Action>\n'
662
+ elif stats['status_message'].startswith('Error'):
663
+ final_log_entry += f' <Action>Loop stopped due to error: {escape(stats["status_message"])}</Action>\n'
664
+ # else: status already set by a break condition inside loop
665
+
666
+
667
+ # 5b: Restore best iteration if one exists and is better than initial
668
+ # Check if best_iteration recorded is actually better than initial state
669
+ # And ensure it's not the initial state itself (attempt > 0)
670
+ initial_issues_val = stats['initial_issues'] if stats['initial_issues'] != -1 else float('inf')
671
+ if best_iteration['attempt'] > 0 and best_iteration['issues'] < initial_issues_val:
672
+ console.print(f"[yellow]Restoring state from best iteration: Attempt {best_iteration['attempt']} (Issues: {best_iteration['issues']})[/yellow]")
673
+ final_log_entry += f' <Action>Restored Best Iteration {best_iteration["attempt"]} (Issues: {best_iteration["issues"]})</Action>\n'
674
+ stats['status_message'] += f' - Restored best iteration {best_iteration["attempt"]}'
675
+ try:
676
+ best_program_path = Path(best_iteration['program_backup'])
677
+ best_code_path = Path(best_iteration['code_backup'])
678
+ if best_program_path.is_file() and best_code_path.is_file():
679
+ # Read content from backup before copying to handle potential race conditions if needed
680
+ restored_program_content = best_program_path.read_text(encoding='utf-8')
681
+ restored_code_content = best_code_path.read_text(encoding='utf-8')
682
+ program_path.write_text(restored_program_content, encoding='utf-8')
683
+ code_path.write_text(restored_code_content, encoding='utf-8')
684
+ program_contents = restored_program_content # Update memory state
685
+ code_contents = restored_code_content # Update memory state
686
+ if verbose:
687
+ console.print(f"Restored {program_path} from {best_program_path}")
688
+ console.print(f"Restored {code_path} from {best_code_path}")
689
+ # Final issues count is the best achieved count
690
+ stats['final_issues'] = best_iteration['issues']
691
+ else:
692
+ console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
693
+ final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
694
+ stats['status_message'] += ' - Error restoring best iteration (files missing)'
695
+ # Keep the last state, final issues remain unknown or last attempted
696
+ stats['final_issues'] = -1 # Indicate uncertainty
697
+
698
+ except (OSError, IOError) as e:
699
+ console.print(f"[bold red]Error restoring files from best iteration {best_iteration['attempt']}: {e}[/bold red]")
700
+ final_log_entry += f' <Error>Error restoring files from best iteration {best_iteration["attempt"]}: {escape(str(e))}</Error>\n'
701
+ stats['status_message'] += f' - Error restoring best iteration: {e}'
702
+ stats['final_issues'] = -1 # Indicate uncertainty
703
+
704
+ # If no improvement was made or recorded (best is still initial state or worse)
705
+ elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
706
+ console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
707
+ final_log_entry += f' <Action>No improvement found or recorded; restoring original state.</Action>\n'
708
+ stats['final_issues'] = stats['initial_issues'] # Final issues are same as initial
709
+ # Add restoration info to status message if not already implied
710
+ if 'keeping original state' not in stats['status_message']:
711
+ stats['status_message'] += ' - keeping original state'
712
+ # Ensure original files are restored if they were modified in a failed attempt
713
+ try:
714
+ # Only write if current memory state differs from initial
715
+ if program_contents != initial_program_content:
716
+ program_path.write_text(initial_program_content, encoding='utf-8')
717
+ program_contents = initial_program_content
718
+ if code_contents != initial_code_content:
719
+ code_path.write_text(initial_code_content, encoding='utf-8')
720
+ code_contents = initial_code_content
721
+ except IOError as e:
722
+ console.print(f"[bold red]Error restoring initial files: {e}[/bold red]")
723
+ final_log_entry += f' <Error>Error restoring initial files: {escape(str(e))}</Error>\n'
724
+ stats['status_message'] += f' - Error restoring initial files: {e}'
725
+ stats['final_issues'] = -1 # State uncertain
726
+ # Set final issues if not set by restoration logic (e.g., error during restore)
727
+ if stats['final_issues'] == -1 and stats['initial_issues'] != -1:
728
+ stats['final_issues'] = stats['initial_issues'] # Default to initial if unsure
729
+
666
730
 
667
731
  else: # overall_success is True
668
- console.print("[bold green]Process finished successfully![/bold green]")
669
- final_action_log.set("action", "Success")
732
+ final_log_entry += f' <Action>Process finished successfully.</Action>\n'
670
733
  stats['final_issues'] = 0 # Success means 0 issues
671
734
 
672
- _append_log_entry(verification_log_file, log_root, final_action_log)
735
+ final_log_entry += "</FinalActions>"
736
+ _write_log_entry(log_path, final_log_entry)
673
737
 
674
738
  # --- Step 6: Read Final Contents ---
739
+ # Use the in-memory contents which should reflect the final state after potential restoration
675
740
  if verbose:
676
- console.print("\n[bold]Step 6: Reading Final File Contents[/bold]")
677
- final_program_contents = _read_file(program_file)
678
- final_code_contents = _read_file(code_file)
679
- if final_program_contents is None: final_program_contents = "Error reading final program file."
680
- if final_code_contents is None: final_code_contents = "Error reading final code file."
741
+ console.print("\n[bold cyan]Step 6: Using Final In-Memory File Contents...[/bold cyan]")
742
+ final_program_content = program_contents
743
+ final_code_content = code_contents
744
+ # Optionally re-read from disk for verification, but memory should be source of truth
745
+ # try:
746
+ # final_program_content_disk = program_path.read_text(encoding="utf-8")
747
+ # final_code_content_disk = code_path.read_text(encoding="utf-8")
748
+ # if final_program_content != final_program_content_disk or final_code_content != final_code_content_disk:
749
+ # console.print("[bold red]Warning: Final file content on disk differs from expected state![/bold red]")
750
+ # # Decide whether to trust disk or memory
751
+ # except IOError as e:
752
+ # console.print(f"[bold red]Error reading final program/code files for verification: {e}[/bold red]")
753
+ # stats['status_message'] += ' - Error reading final files for verification'
754
+
681
755
 
682
756
  # --- Step 7: Calculate and Print Summary Statistics ---
683
757
  if verbose:
684
- console.print("\n[bold]Step 7: Final Statistics[/bold]")
685
-
686
- stats['overall_success_flag'] = overall_success
687
- stats['best_iteration_attempt'] = best_iteration['attempt'] if best_iteration['attempt'] >= 0 else 'N/A'
688
- stats['best_iteration_issues'] = best_iteration['issues'] if best_iteration['issues'] != float('inf') else 'N/A'
689
- if stats['initial_issues'] != float('inf') and stats['final_issues'] != float('inf') and stats['initial_issues'] >= 0 and stats['final_issues'] >= 0:
690
- stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
691
- else:
692
- stats['improvement_issues'] = 'N/A' # Cannot calculate if initial/final unknown
693
-
694
- summary_text = Text.assemble(
695
- ("Initial Issues: ", "bold"), str(stats['initial_issues']), "\n",
696
- ("Final Issues: ", "bold"), str(stats['final_issues']), "\n",
697
- ("Improvement (Issues Reduced): ", "bold"), str(stats['improvement_issues']), "\n",
698
- ("Best Iteration Attempt: ", "bold"), str(stats['best_iteration_attempt']), "\n",
699
- ("Best Iteration Issues: ", "bold"), str(stats['best_iteration_issues']), "\n",
700
- ("Total Attempts Made: ", "bold"), str(attempts), "\n",
701
- ("Total LLM Cost: ", "bold"), f"${total_cost:.6f}", "\n",
702
- ("Model Used (Last/Best): ", "bold"), str(best_iteration.get('model_name') or model_name or 'N/A'), "\n",
703
- ("Exit Reason: ", "bold"), stats['exit_reason'], "\n",
704
- ("Overall Success: ", "bold"), (str(overall_success), "bold green" if overall_success else "bold red")
705
- )
706
- console.print(Panel(summary_text, title="[bold blue]Verification Fix Loop Summary[/bold blue]", expand=False))
707
-
708
- # Finalize XML log
709
- log_root.set("endTime", datetime.datetime.now().isoformat())
710
- log_root.set("totalAttempts", str(attempts))
711
- log_root.set("totalCost", f"{total_cost:.6f}")
712
- log_root.set("overallSuccess", str(overall_success))
713
- # Re-write the log one last time with final attributes and pretty print
714
- try:
715
- rough_string = ET.tostring(log_root, 'utf-8')
716
- reparsed = minidom.parseString(rough_string)
717
- pretty_xml = reparsed.toprettyxml(indent=" ", encoding='utf-8')
718
- with open(verification_log_file, 'wb') as f:
719
- f.write(pretty_xml)
720
- if verbose:
721
- console.print(f"Final XML log written to: {verification_log_file}")
722
- except Exception as e:
723
- console.print(f"[bold red]Error writing final XML log file {verification_log_file}: {e}[/bold red]")
724
-
758
+ console.print("\n[bold cyan]Step 7: Calculating Final Statistics...[/bold cyan]")
759
+
760
+ initial_known = stats['initial_issues'] != -1
761
+ final_known = stats['final_issues'] != -1
762
+
763
+ if initial_known and final_known:
764
+ if stats['initial_issues'] > 0:
765
+ if stats['final_issues'] == 0: # Successful fix
766
+ stats['improvement_issues'] = stats['initial_issues']
767
+ stats['improvement_percent'] = 100.0
768
+ elif stats['final_issues'] < stats['initial_issues']: # Partial improvement
769
+ stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues']
770
+ # % improvement towards reaching 0
771
+ stats['improvement_percent'] = (stats['improvement_issues'] / stats['initial_issues']) * 100.0
772
+ else: # No improvement or regression
773
+ stats['improvement_issues'] = 0 # Can be negative if regression occurred
774
+ stats['improvement_percent'] = 0.0 # Or negative? Let's cap at 0.
775
+ if stats['final_issues'] > stats['initial_issues']:
776
+ stats['improvement_issues'] = stats['initial_issues'] - stats['final_issues'] # Negative value
777
+ # Percentage calculation might be misleading here, stick to 0% improvement towards goal.
778
+ elif stats['initial_issues'] == 0: # Started perfect
779
+ stats['improvement_issues'] = 0
780
+ stats['improvement_percent'] = 100.0 # Already at target
781
+ if stats['final_issues'] > 0: # Regression occurred during loop?
782
+ stats['improvement_issues'] = -stats['final_issues']
783
+ stats['improvement_percent'] = 0.0 # No longer at target
784
+ overall_success = False # Ensure success is false if regression happened after initial success
785
+ if 'Success on initial check' in stats['status_message']: # Update status if loop ran after initial success
786
+ stats['status_message'] = f'Regression occurred after initial success - Final Issues: {stats["final_issues"]}'
787
+ # else: initial_issues < 0 (should not happen if known)
788
+ # stats['improvement_issues'] = 'N/A'
789
+ # stats['improvement_percent'] = 'N/A'
790
+ else: # Initial or final state unknown
791
+ stats['improvement_issues'] = 'N/A'
792
+ stats['improvement_percent'] = 'N/A'
793
+ if final_known and stats['final_issues'] == 0:
794
+ overall_success = True # Assume success if final is 0, even if initial unknown
795
+ else:
796
+ overall_success = False # Cannot guarantee success if initial/final unknown
797
+
798
+
799
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
800
+ console.print(f"Initial Issues: {stats['initial_issues'] if initial_known else 'Unknown'}")
801
+ console.print(f"Final Issues: {stats['final_issues'] if final_known else 'Unknown'}")
802
+ best_iter_num_str = stats['best_iteration_num'] if stats['best_iteration_num'] != -1 else 'N/A'
803
+ best_iter_iss_str = stats['best_iteration_issues'] if stats['best_iteration_issues'] != float('inf') else 'N/A'
804
+ console.print(f"Best Iteration Found: {best_iter_num_str} (Issues: {best_iter_iss_str})")
805
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
806
+ improvement_percent_str = f"{stats['improvement_percent']:.2f}%" if isinstance(stats['improvement_percent'], float) else stats['improvement_percent']
807
+ console.print(f"Improvement (Percent Towards 0 Issues): {improvement_percent_str}")
808
+ console.print(f"Overall Status: {stats['status_message']}")
809
+ console.print(f"Total Attempts Made: {attempts}") # Now reflects loop iterations started
810
+ console.print(f"Total Cost: ${total_cost:.6f}")
811
+ console.print(f"Model Used: {model_name or 'N/A'}")
725
812
 
726
813
  # --- Step 8: Return Results ---
814
+ # Ensure final success status matches reality (e.g., if regression occurred)
815
+ if final_known and stats['final_issues'] != 0:
816
+ overall_success = False
817
+
727
818
  return {
728
- 'success': overall_success,
729
- 'final_program': final_program_contents,
730
- 'final_code': final_code_contents,
731
- 'total_attempts': attempts,
732
- 'total_cost': total_cost,
733
- 'model_name': best_iteration.get('model_name') or model_name, # Prefer model from best iter, fallback to last used
734
- 'statistics': stats,
819
+ "success": overall_success,
820
+ "final_program": final_program_content,
821
+ "final_code": final_code_content,
822
+ "total_attempts": attempts, # Return the number of loop iterations started
823
+ "total_cost": total_cost,
824
+ "model_name": model_name,
825
+ "statistics": stats,
735
826
  }
736
827
 
737
- # Example Usage (Illustrative - requires setting up files and dependencies)
738
- if __name__ == '__main__':
739
- console.print(Panel("[bold yellow]Running Example Usage[/bold yellow]\nThis is illustrative and requires setting up dummy files and potentially the 'fix_verification_errors' function/package.", title="Example"))
828
+ # Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
829
+ if __name__ == "__main__":
830
+ # Create dummy files for demonstration
831
+ # In a real scenario, these files would exist and contain actual code/programs.
832
+ console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
833
+ temp_dir = Path("./temp_fix_verification_loop")
834
+ temp_dir.mkdir(exist_ok=True)
740
835
 
741
- # --- Create Dummy Files for Demonstration ---
742
- temp_dir = tempfile.mkdtemp()
743
- console.print(f"Created temporary directory: {temp_dir}")
836
+ program_file = temp_dir / "my_program.py"
837
+ code_file = temp_dir / "my_code_module.py"
838
+ verification_program_file = temp_dir / "verify_syntax.py"
744
839
 
745
- dummy_program_file = os.path.join(temp_dir, "program.py")
746
- dummy_code_file = os.path.join(temp_dir, "code_module.py")
747
- dummy_verify_file = os.path.join(temp_dir, "verify.py")
748
- log_file = os.path.join(temp_dir, "verification_log.xml")
749
-
750
- # Dummy Program (uses code_module, prints success/failure)
751
- _write_file(dummy_program_file, """
752
- import code_module
840
+ program_file.write_text("""
841
+ import my_code_module
753
842
  import sys
754
- try:
755
- result = code_module.buggy_function(5)
756
- expected = 10
757
- print(f"Input: 5")
758
- print(f"Expected: {expected}")
759
- print(f"Actual: {result}")
760
- if result == expected:
761
- print("VERIFICATION_SUCCESS")
762
- sys.exit(0)
763
- else:
764
- print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
765
- sys.exit(1)
766
- except Exception as e:
767
- print(f"VERIFICATION_ERROR: {e}")
768
- sys.exit(2)
769
- """)
770
-
771
- # Dummy Code (initially buggy)
772
- _write_file(dummy_code_file, """
773
- # Code module with a bug
774
- def buggy_function(x):
775
- # Intended to return x * 2, but has a bug
776
- return x + 1 # Bug! Should be x * 2
777
- """)
778
-
779
- # Dummy Verification Script (checks basic syntax/import)
780
- _write_file(dummy_verify_file, """
843
+ # Simulate using the module and checking output
844
+ val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
845
+ result = my_code_module.process(val)
846
+ expected = val * 2
847
+ print(f"Input: {val}")
848
+ print(f"Result: {result}")
849
+ print(f"Expected: {expected}")
850
+ if result == expected:
851
+ print("VERIFICATION_SUCCESS")
852
+ else:
853
+ print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
854
+ """, encoding="utf-8")
855
+
856
+ # Initial code with a bug
857
+ code_file.write_text("""
858
+ # my_code_module.py
859
+ def process(x):
860
+ # Bug: should be x * 2
861
+ return x + 2
862
+ """, encoding="utf-8")
863
+
864
+ # Simple verification program (e.g., syntax check)
865
+ verification_program_file.write_text("""
781
866
  import sys
782
- import importlib.util
867
+ import py_compile
783
868
  import os
784
-
785
- if len(sys.argv) < 2:
786
- print("Usage: python verify.py <path_to_code_module.py>")
787
- sys.exit(1)
788
-
789
- module_path = sys.argv[1]
790
- module_name = os.path.splitext(os.path.basename(module_path))[0]
791
-
869
+ # Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
870
+ code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
871
+ print(f"Checking syntax of: {code_to_check}")
792
872
  try:
793
- spec = importlib.util.spec_from_file_location(module_name, module_path)
794
- if spec is None or spec.loader is None:
795
- raise ImportError(f"Could not create spec for {module_path}")
796
- module = importlib.util.module_from_spec(spec)
797
- spec.loader.exec_module(module)
798
- # Optional: Check if specific functions exist
799
- if not hasattr(module, 'buggy_function'):
800
- raise AttributeError("Function 'buggy_function' not found.")
801
- print(f"Verification PASSED: {module_path} imported successfully.")
873
+ py_compile.compile(code_to_check, doraise=True)
874
+ print("Syntax OK.")
802
875
  sys.exit(0) # Success
876
+ except py_compile.PyCompileError as e:
877
+ print(f"Syntax Error: {e}")
878
+ sys.exit(1) # Failure
803
879
  except Exception as e:
804
- print(f"Verification FAILED: {e}")
880
+ print(f"Verification Error: {e}")
805
881
  sys.exit(1) # Failure
806
- """)
882
+ """, encoding="utf-8")
883
+ # Set environment variable for the verification script
884
+ os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
807
885
 
808
- # Dummy Prompt
809
- dummy_prompt = "Create a Python module 'code_module.py' with a function `buggy_function(x)` that returns the input `x` multiplied by 2."
810
886
 
811
887
  # --- Mock fix_verification_errors ---
812
- # In a real scenario, this would be the actual LLM call function
813
- # For this example, we simulate its behavior based on attempts
814
- _fix_call_count = 0
888
+ # This is crucial for testing without actual LLM calls / costs
889
+ # In a real test suite, use unittest.mock
890
+ _original_fix_verification_errors = fix_verification_errors
891
+ _call_count = 0
892
+
815
893
  def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
816
- global _fix_call_count
817
- _fix_call_count += 1
818
- cost = 0.01 + (strength * 0.02) # Simulate cost based on strength
819
- model = f"mock-model-s{strength:.1f}"
820
- issues = 1 # Default to 1 issue initially
821
- fixed_code = code # Default to no change
822
- explanation = ["Initial analysis: Function seems incorrect."]
823
-
824
- if "VERIFICATION_FAILURE" in output or "VERIFICATION_ERROR" in output:
825
- issues = 1
826
- if _fix_call_count <= 2: # Simulate fixing on the first or second try
827
- # Simulate a fix
828
- fixed_code = """
829
- # Code module - Attempting fix
830
- def buggy_function(x):
831
- # Intended to return x * 2
832
- return x * 2 # Corrected code
894
+ global _call_count
895
+ _call_count += 1
896
+ cost = 0.001 * _call_count # Simulate increasing cost
897
+ model = "mock_model_v1"
898
+ explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
899
+ issues_count = 1 # Assume 1 issue initially
900
+
901
+ fixed_program = program # Assume program doesn't need fixing
902
+ fixed_code = code
903
+
904
+ # Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
905
+ if "VERIFICATION_FAILURE" in output and _call_count >= 2:
906
+ explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
907
+ fixed_code = """
908
+ # my_code_module.py
909
+ def process(x):
910
+ # Fixed: should be x * 2
911
+ return x * 2
833
912
  """
834
- explanation = ["Identified incorrect arithmetic operation. Changed '+' to '*'."]
835
- issues = 0 # Simulate 0 issues after fix
836
- if verbose: print("[Mock Fixer] Suggesting corrected code.")
837
- else:
838
- explanation = ["Analysis: Still incorrect, unable to determine fix."]
839
- issues = 1 # Simulate failure to fix after 2 tries
840
- if verbose: print("[Mock Fixer] Failed to find fix this time.")
913
+ issues_count = 0 # Fixed!
841
914
  elif "VERIFICATION_SUCCESS" in output:
842
- issues = 0
843
- explanation = ["Code appears correct based on output."]
844
- if verbose: print("[Mock Fixer] Code seems correct.")
845
-
915
+ explanation = ["Output indicates VERIFICATION_SUCCESS."]
916
+ issues_count = 0 # Already correct
846
917
 
847
918
  return {
848
919
  'explanation': explanation,
849
- 'fixed_program': program, # Assume program doesn't change in mock
920
+ 'fixed_program': fixed_program,
850
921
  'fixed_code': fixed_code,
851
922
  'total_cost': cost,
852
923
  'model_name': model,
853
- 'verification_issues_count': issues,
924
+ 'verification_issues_count': issues_count,
854
925
  }
855
926
 
856
- # Replace the actual function with the mock for this example run
857
- original_fix_func = fix_verification_errors
858
- fix_verification_errors = mock_fix_verification_errors
859
-
860
- # --- Run the Loop ---
861
- try:
862
- results = fix_verification_errors_loop(
863
- program_file=dummy_program_file,
864
- code_file=dummy_code_file,
865
- prompt=dummy_prompt,
866
- verification_program=dummy_verify_file,
867
- strength=0.5,
868
- temperature=0.1,
869
- max_attempts=3,
870
- budget=0.50, # $0.50 budget
871
- verification_log_file=log_file,
872
- verbose=True
873
- )
874
-
875
- console.print("\n[bold magenta]--- Final Results ---[/bold magenta]")
876
- console.print(f"Success: {results['success']}")
877
- console.print(f"Total Attempts: {results['total_attempts']}")
878
- console.print(f"Total Cost: ${results['total_cost']:.6f}")
879
- console.print(f"Model Name: {results['model_name']}")
880
-
881
- console.print("\nFinal Code Content:")
882
- console.print(Syntax(results['final_code'], "python", theme="default", line_numbers=True))
883
-
884
- console.print("\nStatistics:")
885
- import json
886
- console.print(json.dumps(results['statistics'], indent=2))
887
-
888
- console.print(f"\nLog file generated at: {log_file}")
889
-
890
- except Exception as e:
891
- console.print(f"\n[bold red]An error occurred during the example run: {e}[/bold red]")
892
- finally:
893
- # Restore original function
894
- fix_verification_errors = original_fix_func
895
- # Clean up dummy files
896
- try:
897
- shutil.rmtree(temp_dir)
898
- console.print(f"Cleaned up temporary directory: {temp_dir}")
899
- except Exception as e:
900
- console.print(f"[bold red]Error cleaning up temp directory {temp_dir}: {e}[/bold red]")
927
+ # Replace the real function with the mock
928
+ # In package context, you might need to patch differently
929
+ # For this script execution:
930
+ # Note: This direct replacement might not work if the function is imported
931
+ # using `from .fix_verification_errors import fix_verification_errors`.
932
+ # A proper mock framework (`unittest.mock.patch`) is better.
933
+ # Let's assume for this example run, we can modify the global scope *before* the loop calls it.
934
+ # This is fragile. A better approach involves dependency injection or mocking frameworks.
935
+ # HACK: Re-assigning the imported name in the global scope of this script
936
+ globals()['fix_verification_errors'] = mock_fix_verification_errors
937
+
938
+
939
+ console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
940
+
941
+ # Example program_args: Pass input value 10 and another arg 5
942
+ # Note: The example program only uses the first arg sys.argv[1]
943
+ example_args = ["10", "another_arg"]
944
+
945
+ results = fix_verification_errors_loop(
946
+ program_file=str(program_file),
947
+ code_file=str(code_file),
948
+ prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
949
+ verification_program=str(verification_program_file),
950
+ strength=0.5,
951
+ temperature=0.1,
952
+ max_attempts=3,
953
+ budget=0.10, # Set a budget
954
+ verification_log_file=str(temp_dir / "test_verification.log"),
955
+ verbose=True,
956
+ program_args=example_args
957
+ )
901
958
 
959
+ console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
960
+ console.print(f"Success: {results['success']}")
961
+ console.print(f"Total Attempts: {results['total_attempts']}")
962
+ console.print(f"Total Cost: ${results['total_cost']:.6f}")
963
+ console.print(f"Model Name: {results['model_name']}")
964
+ # console.print(f"Final Program:\n{results['final_program']}") # Can be long
965
+ console.print(f"Final Code:\n{results['final_code']}")
966
+ console.print(f"Statistics:\n{results['statistics']}")
967
+
968
+ # Restore original function if needed elsewhere
969
+ globals()['fix_verification_errors'] = _original_fix_verification_errors
970
+
971
+ # Clean up dummy files
972
+ # console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
973
+ # shutil.rmtree(temp_dir)
974
+ console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
975
+ console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")