pdd-cli 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +7 -1
- pdd/bug_main.py +21 -3
- pdd/bug_to_unit_test.py +16 -5
- pdd/change.py +2 -1
- pdd/change_main.py +407 -189
- pdd/cli.py +853 -301
- pdd/code_generator.py +2 -1
- pdd/conflicts_in_prompts.py +2 -1
- pdd/construct_paths.py +377 -222
- pdd/context_generator.py +2 -1
- pdd/continue_generation.py +3 -2
- pdd/crash_main.py +55 -20
- pdd/data/llm_model.csv +8 -8
- pdd/detect_change.py +2 -1
- pdd/fix_code_loop.py +465 -160
- pdd/fix_code_module_errors.py +7 -4
- pdd/fix_error_loop.py +9 -9
- pdd/fix_errors_from_unit_tests.py +207 -365
- pdd/fix_main.py +31 -4
- pdd/fix_verification_errors.py +285 -0
- pdd/fix_verification_errors_loop.py +975 -0
- pdd/fix_verification_main.py +412 -0
- pdd/generate_output_paths.py +427 -183
- pdd/generate_test.py +3 -2
- pdd/increase_tests.py +2 -2
- pdd/llm_invoke.py +18 -8
- pdd/pdd_completion.zsh +38 -1
- pdd/preprocess.py +3 -3
- pdd/process_csv_change.py +466 -154
- pdd/prompts/extract_prompt_split_LLM.prompt +7 -4
- pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
- pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
- pdd/prompts/find_verification_errors_LLM.prompt +25 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
- pdd/prompts/fix_verification_errors_LLM.prompt +20 -0
- pdd/prompts/generate_test_LLM.prompt +9 -3
- pdd/prompts/split_LLM.prompt +3 -3
- pdd/prompts/update_prompt_LLM.prompt +3 -3
- pdd/split.py +13 -12
- pdd/split_main.py +22 -13
- pdd/trace_main.py +7 -0
- pdd/xml_tagger.py +2 -1
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/METADATA +4 -4
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/RECORD +49 -44
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/WHEEL +1 -1
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.25.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
import click
|
|
5
|
+
from typing import Optional, Tuple, List, Dict, Any
|
|
6
|
+
|
|
7
|
+
# Use Rich for pretty printing to the console
|
|
8
|
+
from rich import print as rich_print
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.syntax import Syntax
|
|
11
|
+
|
|
12
|
+
# Internal imports using relative paths
|
|
13
|
+
from .construct_paths import construct_paths
|
|
14
|
+
from .fix_verification_errors import fix_verification_errors
|
|
15
|
+
from .fix_verification_errors_loop import fix_verification_errors_loop
|
|
16
|
+
# Import DEFAULT_STRENGTH from the main package
|
|
17
|
+
from . import DEFAULT_STRENGTH
|
|
18
|
+
|
|
19
|
+
# Default values from the README
|
|
20
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
|
21
|
+
DEFAULT_BUDGET = 5.0
|
|
22
|
+
DEFAULT_TEMPERATURE = 0.0
|
|
23
|
+
|
|
24
|
+
def run_program(program_path: str, args: List[str] = []) -> Tuple[bool, str, str]:
|
|
25
|
+
"""
|
|
26
|
+
Executes a program file and captures its output.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
program_path (str): The path to the executable program file.
|
|
30
|
+
args (List[str]): Optional list of command-line arguments for the program.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Tuple[bool, str, str]: A tuple containing:
|
|
34
|
+
- bool: True if the program executed successfully (exit code 0), False otherwise.
|
|
35
|
+
- str: The captured standard output.
|
|
36
|
+
- str: The captured standard error.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
# Determine the interpreter based on file extension (basic example)
|
|
40
|
+
# A more robust solution might use the 'language' from construct_paths
|
|
41
|
+
interpreter = []
|
|
42
|
+
if program_path.endswith(".py"):
|
|
43
|
+
interpreter = [sys.executable] # Use the current Python interpreter
|
|
44
|
+
elif program_path.endswith(".js"):
|
|
45
|
+
interpreter = ["node"]
|
|
46
|
+
elif program_path.endswith(".sh"):
|
|
47
|
+
interpreter = ["bash"]
|
|
48
|
+
# Add other languages as needed
|
|
49
|
+
|
|
50
|
+
command = interpreter + [program_path] + args
|
|
51
|
+
rich_print(f"[dim]Running command:[/dim] {' '.join(command)}")
|
|
52
|
+
|
|
53
|
+
process = subprocess.run(
|
|
54
|
+
command,
|
|
55
|
+
capture_output=True,
|
|
56
|
+
text=True,
|
|
57
|
+
check=False, # Don't raise exception on non-zero exit code
|
|
58
|
+
timeout=60 # Add a timeout to prevent hangs
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
success = process.returncode == 0
|
|
62
|
+
stdout = process.stdout
|
|
63
|
+
stderr = process.stderr
|
|
64
|
+
|
|
65
|
+
if not success:
|
|
66
|
+
rich_print(f"[yellow]Warning:[/yellow] Program '{os.path.basename(program_path)}' exited with code {process.returncode}.")
|
|
67
|
+
if stderr:
|
|
68
|
+
rich_print("[yellow]Stderr:[/yellow]")
|
|
69
|
+
rich_print(Panel(stderr, border_style="yellow"))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
return success, stdout, stderr
|
|
73
|
+
|
|
74
|
+
except FileNotFoundError:
|
|
75
|
+
rich_print(f"[bold red]Error:[/bold red] Program file not found: '{program_path}'")
|
|
76
|
+
return False, "", f"Program file not found: {program_path}"
|
|
77
|
+
except subprocess.TimeoutExpired:
|
|
78
|
+
rich_print(f"[bold red]Error:[/bold red] Program execution timed out: '{program_path}'")
|
|
79
|
+
return False, "", f"Program execution timed out: {program_path}"
|
|
80
|
+
except Exception as e:
|
|
81
|
+
rich_print(f"[bold red]Error:[/bold red] Failed to run program '{program_path}': {e}")
|
|
82
|
+
return False, "", f"Failed to run program: {e}"
|
|
83
|
+
|
|
84
|
+
def fix_verification_main(
|
|
85
|
+
ctx: click.Context,
|
|
86
|
+
prompt_file: str,
|
|
87
|
+
code_file: str,
|
|
88
|
+
program_file: str,
|
|
89
|
+
output_results: Optional[str],
|
|
90
|
+
output_code: Optional[str],
|
|
91
|
+
loop: bool,
|
|
92
|
+
verification_program: Optional[str], # Only used if loop=True
|
|
93
|
+
max_attempts: int = DEFAULT_MAX_ATTEMPTS,
|
|
94
|
+
budget: float = DEFAULT_BUDGET,
|
|
95
|
+
) -> Tuple[bool, str, str, int, float, str]:
|
|
96
|
+
"""
|
|
97
|
+
CLI wrapper for the 'verify' command. Verifies code correctness by running
|
|
98
|
+
a program, using an LLM to judge its output against the prompt's intent,
|
|
99
|
+
and potentially fixing the code iteratively.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
ctx (click.Context): The Click context object.
|
|
103
|
+
prompt_file (str): Path to the prompt file.
|
|
104
|
+
code_file (str): Path to the code file to verify/fix.
|
|
105
|
+
program_file (str): Path to the program to run for verification.
|
|
106
|
+
output_results (Optional[str]): Path to save verification results log.
|
|
107
|
+
output_code (Optional[str]): Path to save the verified code file.
|
|
108
|
+
loop (bool): If True, perform iterative verification and fixing.
|
|
109
|
+
verification_program (Optional[str]): Path to a verification program (required if loop=True).
|
|
110
|
+
max_attempts (int): Max attempts for the loop.
|
|
111
|
+
budget (float): Max cost budget for the loop.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Tuple[bool, str, str, int, float, str]:
|
|
115
|
+
- success_status (bool): True if verification passed (or was fixed).
|
|
116
|
+
- final_program (str): Content of the program file (potentially modified if loop=True).
|
|
117
|
+
- final_code (str): Content of the code file after verification/fixing.
|
|
118
|
+
- attempts (int): Number of attempts made.
|
|
119
|
+
- total_cost (float): Total cost incurred.
|
|
120
|
+
- model_name (str): Name of the LLM used.
|
|
121
|
+
"""
|
|
122
|
+
# Extract global options from context
|
|
123
|
+
# params = ctx.params # We need obj for global flags
|
|
124
|
+
strength: float = ctx.obj.get('strength', DEFAULT_STRENGTH) # Get globals from obj
|
|
125
|
+
temperature: float = ctx.obj.get('temperature', DEFAULT_TEMPERATURE)
|
|
126
|
+
force: bool = ctx.obj.get('force', False) # <<< FIX: Get force from ctx.obj
|
|
127
|
+
quiet: bool = ctx.obj.get('quiet', False)
|
|
128
|
+
verbose: bool = ctx.obj.get('verbose', False)
|
|
129
|
+
|
|
130
|
+
# --- Input Validation ---
|
|
131
|
+
if loop and not verification_program:
|
|
132
|
+
raise click.UsageError("The '--loop' option requires '--verification-program' to be specified.")
|
|
133
|
+
|
|
134
|
+
if not quiet:
|
|
135
|
+
rich_print(Panel(f"Starting Verification Process for [cyan]{os.path.basename(code_file)}[/cyan]", title="PDD Verify", border_style="blue"))
|
|
136
|
+
rich_print(f" Prompt: [green]{prompt_file}[/green]")
|
|
137
|
+
rich_print(f" Code: [green]{code_file}[/green]")
|
|
138
|
+
rich_print(f" Program: [green]{program_file}[/green]")
|
|
139
|
+
if loop:
|
|
140
|
+
rich_print(f" Mode: [yellow]Iterative Loop[/yellow]")
|
|
141
|
+
rich_print(f" Verification Program: [green]{verification_program}[/green]")
|
|
142
|
+
rich_print(f" Max Attempts: {max_attempts}")
|
|
143
|
+
rich_print(f" Budget: ${budget:.2f}")
|
|
144
|
+
else:
|
|
145
|
+
rich_print(f" Mode: [yellow]Single Pass[/yellow]")
|
|
146
|
+
rich_print(f" Strength: {strength}, Temperature: {temperature}")
|
|
147
|
+
|
|
148
|
+
# ------------------- File-path handling -------------------
|
|
149
|
+
input_file_paths: Dict[str, str] = {
|
|
150
|
+
"prompt_file": prompt_file,
|
|
151
|
+
"code_file": code_file,
|
|
152
|
+
"program_file": program_file,
|
|
153
|
+
}
|
|
154
|
+
# verification_program is only needed as an *input file path* for the loop function
|
|
155
|
+
if loop and verification_program:
|
|
156
|
+
# Add verification_program path for construct_paths if loop is enabled
|
|
157
|
+
# Although construct_paths doesn't read it, including it ensures consistency
|
|
158
|
+
# and allows potential future use cases.
|
|
159
|
+
input_file_paths["verification_program"] = verification_program
|
|
160
|
+
|
|
161
|
+
command_options: Dict[str, Optional[str]] = {
|
|
162
|
+
"output_results": output_results,
|
|
163
|
+
"output_code": output_code,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# Initial default values (in case we need the manual fallback)
|
|
167
|
+
input_strings: Dict[str, str] = {}
|
|
168
|
+
output_code_path: Optional[str] = output_code
|
|
169
|
+
output_results_path: Optional[str] = output_results
|
|
170
|
+
language: str = ""
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
# First try the official helper.
|
|
174
|
+
input_strings, output_file_paths, language = construct_paths(
|
|
175
|
+
input_file_paths=input_file_paths,
|
|
176
|
+
force=force,
|
|
177
|
+
quiet=quiet,
|
|
178
|
+
command="verify",
|
|
179
|
+
command_options=command_options,
|
|
180
|
+
)
|
|
181
|
+
output_code_path = output_file_paths.get("output_code")
|
|
182
|
+
output_results_path = output_file_paths.get("output_results")
|
|
183
|
+
|
|
184
|
+
if verbose:
|
|
185
|
+
rich_print("[dim]Resolved output paths via construct_paths.[/dim]")
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
# If the helper does not understand the "verify" command fall back.
|
|
189
|
+
if "invalid command" in str(e).lower():
|
|
190
|
+
if verbose:
|
|
191
|
+
rich_print(
|
|
192
|
+
"[yellow]construct_paths does not recognise "
|
|
193
|
+
"'verify'. Falling back to manual path handling.[/yellow]"
|
|
194
|
+
)
|
|
195
|
+
try:
|
|
196
|
+
# Manually read the three mandatory files
|
|
197
|
+
with open(prompt_file, "r") as f:
|
|
198
|
+
input_strings["prompt_file"] = f.read()
|
|
199
|
+
with open(code_file, "r") as f:
|
|
200
|
+
input_strings["code_file"] = f.read()
|
|
201
|
+
with open(program_file, "r") as f:
|
|
202
|
+
input_strings["program_file"] = f.read()
|
|
203
|
+
except FileNotFoundError as fe:
|
|
204
|
+
rich_print(f"[bold red]Error:[/bold red] {fe}")
|
|
205
|
+
sys.exit(1)
|
|
206
|
+
|
|
207
|
+
# Pick or build output paths
|
|
208
|
+
if output_code_path is None:
|
|
209
|
+
base, ext = os.path.splitext(code_file)
|
|
210
|
+
output_code_path = f"{base}_verified{ext}"
|
|
211
|
+
if output_results_path is None:
|
|
212
|
+
base, _ = os.path.splitext(program_file)
|
|
213
|
+
output_results_path = f"{base}_verify_results.log"
|
|
214
|
+
|
|
215
|
+
# Best‑effort language guess
|
|
216
|
+
if program_file.endswith(".py"):
|
|
217
|
+
language = "python"
|
|
218
|
+
elif program_file.endswith(".js"):
|
|
219
|
+
language = "javascript"
|
|
220
|
+
elif program_file.endswith(".sh"):
|
|
221
|
+
language = "bash"
|
|
222
|
+
|
|
223
|
+
else:
|
|
224
|
+
# Some other error – re‑raise / abort
|
|
225
|
+
rich_print(f"[bold red]Error:[/bold red] Failed during path construction: {e}")
|
|
226
|
+
if verbose:
|
|
227
|
+
import traceback
|
|
228
|
+
rich_print(Panel(traceback.format_exc(), title="Traceback", border_style="red"))
|
|
229
|
+
sys.exit(1)
|
|
230
|
+
|
|
231
|
+
# --- Core Logic ---
|
|
232
|
+
success: bool = False
|
|
233
|
+
final_program: str = input_strings.get("program_file", "") # Initialize with input content
|
|
234
|
+
final_code: str = input_strings.get("code_file", "") # Initialize with input content
|
|
235
|
+
attempts: int = 0
|
|
236
|
+
total_cost: float = 0.0
|
|
237
|
+
model_name: str = "N/A"
|
|
238
|
+
results_log_content: str = "" # To store content for the results file
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
if loop:
|
|
242
|
+
if not quiet:
|
|
243
|
+
rich_print("\n[bold blue]Running Iterative Verification (fix_verification_errors_loop)...[/bold blue]")
|
|
244
|
+
|
|
245
|
+
# fix_verification_errors_loop handles file I/O internally
|
|
246
|
+
# Pass file paths directly
|
|
247
|
+
# TODO: How are program_args passed? Assuming empty list for now. Needs CLI option.
|
|
248
|
+
program_args: List[str] = []
|
|
249
|
+
|
|
250
|
+
loop_results = fix_verification_errors_loop(
|
|
251
|
+
program_file=program_file,
|
|
252
|
+
code_file=code_file,
|
|
253
|
+
prompt=input_strings["prompt_file"], # Prompt content is needed
|
|
254
|
+
verification_program=verification_program, # Path is needed
|
|
255
|
+
strength=strength,
|
|
256
|
+
temperature=temperature,
|
|
257
|
+
max_attempts=max_attempts,
|
|
258
|
+
budget=budget,
|
|
259
|
+
verification_log_file=output_results_path, # Pass path for internal logging
|
|
260
|
+
verbose=verbose,
|
|
261
|
+
program_args=program_args
|
|
262
|
+
)
|
|
263
|
+
success = loop_results['success']
|
|
264
|
+
final_program = loop_results['final_program'] # Loop function returns final content
|
|
265
|
+
final_code = loop_results['final_code'] # Loop function returns final content
|
|
266
|
+
attempts = loop_results['total_attempts']
|
|
267
|
+
total_cost = loop_results['total_cost']
|
|
268
|
+
model_name = loop_results['model_name']
|
|
269
|
+
# The loop function writes its own detailed log, so we don't need to build one here.
|
|
270
|
+
# We just need to ensure the path was passed correctly.
|
|
271
|
+
|
|
272
|
+
else: # Single pass verification
|
|
273
|
+
if not quiet:
|
|
274
|
+
rich_print("\n[bold blue]Running Single Pass Verification (fix_verification_errors)...[/bold blue]")
|
|
275
|
+
attempts = 1 # Single pass is one attempt
|
|
276
|
+
|
|
277
|
+
# 1. Run the program file to get its output
|
|
278
|
+
if not quiet:
|
|
279
|
+
rich_print(f"Executing program: [cyan]{program_file}[/cyan]")
|
|
280
|
+
run_success, program_stdout, program_stderr = run_program(program_file)
|
|
281
|
+
program_output = program_stdout + ("\n--- STDERR ---\n" + program_stderr if program_stderr else "")
|
|
282
|
+
|
|
283
|
+
if verbose:
|
|
284
|
+
rich_print("[dim]--- Program Output ---[/dim]")
|
|
285
|
+
rich_print(Panel(program_output if program_output else "[No Output]", border_style="dim"))
|
|
286
|
+
rich_print("[dim]--- End Program Output ---[/dim]")
|
|
287
|
+
|
|
288
|
+
# Check if program ran successfully before calling LLM (optional, but good practice)
|
|
289
|
+
# if not run_success:
|
|
290
|
+
# rich_print("[yellow]Warning:[/yellow] Program execution failed. LLM verification might be less effective.")
|
|
291
|
+
|
|
292
|
+
# 2. Call fix_verification_errors with content and program output
|
|
293
|
+
if not quiet:
|
|
294
|
+
rich_print("Calling LLM to verify program output against prompt...")
|
|
295
|
+
|
|
296
|
+
fix_results = fix_verification_errors(
|
|
297
|
+
program=input_strings["program_file"],
|
|
298
|
+
prompt=input_strings["prompt_file"],
|
|
299
|
+
code=input_strings["code_file"],
|
|
300
|
+
output=program_output, # Pass the captured output
|
|
301
|
+
strength=strength,
|
|
302
|
+
temperature=temperature,
|
|
303
|
+
verbose=verbose
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Determine success: If no issues were found OR if fixes were applied
|
|
307
|
+
# The definition of 'success' here means the *final* state is verified.
|
|
308
|
+
issues_found = fix_results['verification_issues_count'] > 0
|
|
309
|
+
code_updated = fix_results['fixed_code'] != input_strings["code_file"]
|
|
310
|
+
program_updated = fix_results['fixed_program'] != input_strings["program_file"]
|
|
311
|
+
|
|
312
|
+
if not issues_found:
|
|
313
|
+
success = True
|
|
314
|
+
if not quiet:
|
|
315
|
+
rich_print("[green]Verification Passed:[/green] LLM found no discrepancies.")
|
|
316
|
+
elif code_updated or program_updated:
|
|
317
|
+
# If issues were found AND fixes were made, assume success for this single pass.
|
|
318
|
+
# A more robust check might re-run the program with fixed code, but that's the loop's job.
|
|
319
|
+
success = True
|
|
320
|
+
if not quiet:
|
|
321
|
+
rich_print("[yellow]Verification Issues Found:[/yellow] LLM proposed fixes.")
|
|
322
|
+
else:
|
|
323
|
+
success = False # Issues found, but no fixes proposed
|
|
324
|
+
if not quiet:
|
|
325
|
+
rich_print("[red]Verification Failed:[/red] LLM found discrepancies but proposed no fixes.")
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
final_program = fix_results['fixed_program']
|
|
329
|
+
final_code = fix_results['fixed_code']
|
|
330
|
+
total_cost = fix_results['total_cost']
|
|
331
|
+
model_name = fix_results['model_name']
|
|
332
|
+
|
|
333
|
+
# Build results log content for single pass
|
|
334
|
+
results_log_content = f"PDD Verify Results (Single Pass)\n"
|
|
335
|
+
results_log_content += f"Timestamp: {os.path.getmtime(prompt_file)}\n" # Use prompt timestamp as reference
|
|
336
|
+
results_log_content += f"Prompt File: {prompt_file}\n"
|
|
337
|
+
results_log_content += f"Code File: {code_file}\n"
|
|
338
|
+
results_log_content += f"Program File: {program_file}\n"
|
|
339
|
+
results_log_content += f"Success: {success}\n"
|
|
340
|
+
results_log_content += f"Issues Found Count: {fix_results['verification_issues_count']}\n"
|
|
341
|
+
results_log_content += f"Code Updated: {code_updated}\n"
|
|
342
|
+
results_log_content += f"Program Updated: {program_updated}\n"
|
|
343
|
+
results_log_content += f"Model Used: {model_name}\n"
|
|
344
|
+
results_log_content += f"Total Cost: ${total_cost:.6f}\n"
|
|
345
|
+
results_log_content += "\n--- LLM Explanation ---\n"
|
|
346
|
+
results_log_content += "\n".join(fix_results.get('explanation', ['N/A']))
|
|
347
|
+
results_log_content += "\n\n--- Program Output Used for Verification ---\n"
|
|
348
|
+
results_log_content += program_output
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
except Exception as e:
|
|
352
|
+
success = False
|
|
353
|
+
rich_print(f"[bold red]Error during verification process:[/bold red] {e}")
|
|
354
|
+
# Optionally log the full traceback if verbose
|
|
355
|
+
if verbose:
|
|
356
|
+
import traceback
|
|
357
|
+
rich_print(Panel(traceback.format_exc(), title="Traceback", border_style="red"))
|
|
358
|
+
# Attempt to return partial results if possible
|
|
359
|
+
return success, final_program, final_code, attempts, total_cost, model_name
|
|
360
|
+
|
|
361
|
+
# --- Output File Writing ---
|
|
362
|
+
saved_code_path: Optional[str] = None
|
|
363
|
+
saved_results_path: Optional[str] = None
|
|
364
|
+
|
|
365
|
+
if success and output_code_path:
|
|
366
|
+
try:
|
|
367
|
+
with open(output_code_path, "w") as f:
|
|
368
|
+
f.write(final_code)
|
|
369
|
+
saved_code_path = output_code_path
|
|
370
|
+
if not quiet:
|
|
371
|
+
rich_print(f"Successfully verified code saved to: [green]{output_code_path}[/green]")
|
|
372
|
+
except IOError as e:
|
|
373
|
+
rich_print(f"[bold red]Error:[/bold red] Failed to write verified code file '{output_code_path}': {e}")
|
|
374
|
+
|
|
375
|
+
# Write results log (only for single pass, loop writes its own)
|
|
376
|
+
if not loop and output_results_path:
|
|
377
|
+
try:
|
|
378
|
+
with open(output_results_path, "w") as f:
|
|
379
|
+
f.write(results_log_content)
|
|
380
|
+
saved_results_path = output_results_path
|
|
381
|
+
if not quiet:
|
|
382
|
+
rich_print(f"Verification results log saved to: [green]{output_results_path}[/green]")
|
|
383
|
+
except IOError as e:
|
|
384
|
+
rich_print(f"[bold red]Error:[/bold red] Failed to write results log file '{output_results_path}': {e}")
|
|
385
|
+
elif loop and output_results_path:
|
|
386
|
+
# For loop, just confirm the path where the loop function *should* have saved the log
|
|
387
|
+
saved_results_path = output_results_path
|
|
388
|
+
if not quiet:
|
|
389
|
+
rich_print(f"Verification results log (from loop) should be at: [green]{output_results_path}[/green]")
|
|
390
|
+
|
|
391
|
+
# --- Final User Feedback ---
|
|
392
|
+
if not quiet:
|
|
393
|
+
rich_print("\n" + "="*40)
|
|
394
|
+
title = "[bold green]Verification Complete[/bold green]" if success else "[bold red]Verification Failed[/bold red]"
|
|
395
|
+
summary_panel = Panel(
|
|
396
|
+
f"Status: {'[green]Success[/green]' if success else '[red]Failure[/red]'}\n"
|
|
397
|
+
f"Attempts: {attempts}\n"
|
|
398
|
+
f"Total Cost: ${total_cost:.6f}\n"
|
|
399
|
+
f"Model Used: {model_name}\n"
|
|
400
|
+
f"Verified Code Saved: {saved_code_path or 'N/A'}\n"
|
|
401
|
+
f"Results Log Saved: {saved_results_path or 'N/A'}",
|
|
402
|
+
title=title,
|
|
403
|
+
border_style="green" if success else "red"
|
|
404
|
+
)
|
|
405
|
+
rich_print(summary_panel)
|
|
406
|
+
|
|
407
|
+
if verbose and not success and not loop:
|
|
408
|
+
rich_print("[bold yellow]Final Code (after failed single pass):[/bold yellow]")
|
|
409
|
+
rich_print(Syntax(final_code, language or "python", theme="default", line_numbers=True))
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
return success, final_program, final_code, attempts, total_cost, model_name
|