pdd-cli 0.0.24__py3-none-any.whl → 0.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

Files changed (49) hide show
  1. pdd/__init__.py +14 -1
  2. pdd/bug_main.py +5 -1
  3. pdd/bug_to_unit_test.py +16 -5
  4. pdd/change.py +2 -1
  5. pdd/change_main.py +407 -189
  6. pdd/cli.py +853 -301
  7. pdd/code_generator.py +2 -1
  8. pdd/conflicts_in_prompts.py +2 -1
  9. pdd/construct_paths.py +377 -222
  10. pdd/context_generator.py +2 -1
  11. pdd/continue_generation.py +5 -2
  12. pdd/crash_main.py +55 -20
  13. pdd/data/llm_model.csv +18 -17
  14. pdd/detect_change.py +2 -1
  15. pdd/fix_code_loop.py +465 -160
  16. pdd/fix_code_module_errors.py +7 -4
  17. pdd/fix_error_loop.py +9 -9
  18. pdd/fix_errors_from_unit_tests.py +207 -365
  19. pdd/fix_main.py +32 -4
  20. pdd/fix_verification_errors.py +148 -77
  21. pdd/fix_verification_errors_loop.py +842 -768
  22. pdd/fix_verification_main.py +412 -0
  23. pdd/generate_output_paths.py +427 -189
  24. pdd/generate_test.py +3 -2
  25. pdd/increase_tests.py +2 -2
  26. pdd/llm_invoke.py +1167 -343
  27. pdd/preprocess.py +3 -3
  28. pdd/process_csv_change.py +466 -154
  29. pdd/prompts/bug_to_unit_test_LLM.prompt +11 -11
  30. pdd/prompts/extract_prompt_update_LLM.prompt +11 -5
  31. pdd/prompts/extract_unit_code_fix_LLM.prompt +2 -2
  32. pdd/prompts/find_verification_errors_LLM.prompt +11 -9
  33. pdd/prompts/fix_code_module_errors_LLM.prompt +29 -0
  34. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +5 -5
  35. pdd/prompts/fix_verification_errors_LLM.prompt +8 -1
  36. pdd/prompts/generate_test_LLM.prompt +9 -3
  37. pdd/prompts/trim_results_start_LLM.prompt +1 -1
  38. pdd/prompts/update_prompt_LLM.prompt +3 -3
  39. pdd/split.py +6 -5
  40. pdd/split_main.py +13 -4
  41. pdd/trace_main.py +7 -0
  42. pdd/update_model_costs.py +446 -0
  43. pdd/xml_tagger.py +2 -1
  44. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/METADATA +8 -16
  45. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/RECORD +49 -47
  46. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/WHEEL +1 -1
  47. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/entry_points.txt +0 -0
  48. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/licenses/LICENSE +0 -0
  49. {pdd_cli-0.0.24.dist-info → pdd_cli-0.0.26.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,412 @@
1
+ import sys
2
+ import os
3
+ import subprocess
4
+ import click
5
+ from typing import Optional, Tuple, List, Dict, Any
6
+
7
+ # Use Rich for pretty printing to the console
8
+ from rich import print as rich_print
9
+ from rich.panel import Panel
10
+ from rich.syntax import Syntax
11
+
12
+ # Internal imports using relative paths
13
+ from .construct_paths import construct_paths
14
+ from .fix_verification_errors import fix_verification_errors
15
+ from .fix_verification_errors_loop import fix_verification_errors_loop
16
+ # Import DEFAULT_STRENGTH from the main package
17
+ from . import DEFAULT_STRENGTH
18
+
19
+ # Default values from the README
20
+ DEFAULT_MAX_ATTEMPTS = 3
21
+ DEFAULT_BUDGET = 5.0
22
+ DEFAULT_TEMPERATURE = 0.0
23
+
24
+ def run_program(program_path: str, args: List[str] = []) -> Tuple[bool, str, str]:
25
+ """
26
+ Executes a program file and captures its output.
27
+
28
+ Args:
29
+ program_path (str): The path to the executable program file.
30
+ args (List[str]): Optional list of command-line arguments for the program.
31
+
32
+ Returns:
33
+ Tuple[bool, str, str]: A tuple containing:
34
+ - bool: True if the program executed successfully (exit code 0), False otherwise.
35
+ - str: The captured standard output.
36
+ - str: The captured standard error.
37
+ """
38
+ try:
39
+ # Determine the interpreter based on file extension (basic example)
40
+ # A more robust solution might use the 'language' from construct_paths
41
+ interpreter = []
42
+ if program_path.endswith(".py"):
43
+ interpreter = [sys.executable] # Use the current Python interpreter
44
+ elif program_path.endswith(".js"):
45
+ interpreter = ["node"]
46
+ elif program_path.endswith(".sh"):
47
+ interpreter = ["bash"]
48
+ # Add other languages as needed
49
+
50
+ command = interpreter + [program_path] + args
51
+ rich_print(f"[dim]Running command:[/dim] {' '.join(command)}")
52
+
53
+ process = subprocess.run(
54
+ command,
55
+ capture_output=True,
56
+ text=True,
57
+ check=False, # Don't raise exception on non-zero exit code
58
+ timeout=60 # Add a timeout to prevent hangs
59
+ )
60
+
61
+ success = process.returncode == 0
62
+ stdout = process.stdout
63
+ stderr = process.stderr
64
+
65
+ if not success:
66
+ rich_print(f"[yellow]Warning:[/yellow] Program '{os.path.basename(program_path)}' exited with code {process.returncode}.")
67
+ if stderr:
68
+ rich_print("[yellow]Stderr:[/yellow]")
69
+ rich_print(Panel(stderr, border_style="yellow"))
70
+
71
+
72
+ return success, stdout, stderr
73
+
74
+ except FileNotFoundError:
75
+ rich_print(f"[bold red]Error:[/bold red] Program file not found: '{program_path}'")
76
+ return False, "", f"Program file not found: {program_path}"
77
+ except subprocess.TimeoutExpired:
78
+ rich_print(f"[bold red]Error:[/bold red] Program execution timed out: '{program_path}'")
79
+ return False, "", f"Program execution timed out: {program_path}"
80
+ except Exception as e:
81
+ rich_print(f"[bold red]Error:[/bold red] Failed to run program '{program_path}': {e}")
82
+ return False, "", f"Failed to run program: {e}"
83
+
84
+ def fix_verification_main(
85
+ ctx: click.Context,
86
+ prompt_file: str,
87
+ code_file: str,
88
+ program_file: str,
89
+ output_results: Optional[str],
90
+ output_code: Optional[str],
91
+ loop: bool,
92
+ verification_program: Optional[str], # Only used if loop=True
93
+ max_attempts: int = DEFAULT_MAX_ATTEMPTS,
94
+ budget: float = DEFAULT_BUDGET,
95
+ ) -> Tuple[bool, str, str, int, float, str]:
96
+ """
97
+ CLI wrapper for the 'verify' command. Verifies code correctness by running
98
+ a program, using an LLM to judge its output against the prompt's intent,
99
+ and potentially fixing the code iteratively.
100
+
101
+ Args:
102
+ ctx (click.Context): The Click context object.
103
+ prompt_file (str): Path to the prompt file.
104
+ code_file (str): Path to the code file to verify/fix.
105
+ program_file (str): Path to the program to run for verification.
106
+ output_results (Optional[str]): Path to save verification results log.
107
+ output_code (Optional[str]): Path to save the verified code file.
108
+ loop (bool): If True, perform iterative verification and fixing.
109
+ verification_program (Optional[str]): Path to a verification program (required if loop=True).
110
+ max_attempts (int): Max attempts for the loop.
111
+ budget (float): Max cost budget for the loop.
112
+
113
+ Returns:
114
+ Tuple[bool, str, str, int, float, str]:
115
+ - success_status (bool): True if verification passed (or was fixed).
116
+ - final_program (str): Content of the program file (potentially modified if loop=True).
117
+ - final_code (str): Content of the code file after verification/fixing.
118
+ - attempts (int): Number of attempts made.
119
+ - total_cost (float): Total cost incurred.
120
+ - model_name (str): Name of the LLM used.
121
+ """
122
+ # Extract global options from context
123
+ # params = ctx.params # We need obj for global flags
124
+ strength: float = ctx.obj.get('strength', DEFAULT_STRENGTH) # Get globals from obj
125
+ temperature: float = ctx.obj.get('temperature', DEFAULT_TEMPERATURE)
126
+ force: bool = ctx.obj.get('force', False) # <<< FIX: Get force from ctx.obj
127
+ quiet: bool = ctx.obj.get('quiet', False)
128
+ verbose: bool = ctx.obj.get('verbose', False)
129
+
130
+ # --- Input Validation ---
131
+ if loop and not verification_program:
132
+ raise click.UsageError("The '--loop' option requires '--verification-program' to be specified.")
133
+
134
+ if not quiet:
135
+ rich_print(Panel(f"Starting Verification Process for [cyan]{os.path.basename(code_file)}[/cyan]", title="PDD Verify", border_style="blue"))
136
+ rich_print(f" Prompt: [green]{prompt_file}[/green]")
137
+ rich_print(f" Code: [green]{code_file}[/green]")
138
+ rich_print(f" Program: [green]{program_file}[/green]")
139
+ if loop:
140
+ rich_print(f" Mode: [yellow]Iterative Loop[/yellow]")
141
+ rich_print(f" Verification Program: [green]{verification_program}[/green]")
142
+ rich_print(f" Max Attempts: {max_attempts}")
143
+ rich_print(f" Budget: ${budget:.2f}")
144
+ else:
145
+ rich_print(f" Mode: [yellow]Single Pass[/yellow]")
146
+ rich_print(f" Strength: {strength}, Temperature: {temperature}")
147
+
148
+ # ------------------- File-path handling -------------------
149
+ input_file_paths: Dict[str, str] = {
150
+ "prompt_file": prompt_file,
151
+ "code_file": code_file,
152
+ "program_file": program_file,
153
+ }
154
+ # verification_program is only needed as an *input file path* for the loop function
155
+ if loop and verification_program:
156
+ # Add verification_program path for construct_paths if loop is enabled
157
+ # Although construct_paths doesn't read it, including it ensures consistency
158
+ # and allows potential future use cases.
159
+ input_file_paths["verification_program"] = verification_program
160
+
161
+ command_options: Dict[str, Optional[str]] = {
162
+ "output_results": output_results,
163
+ "output_code": output_code,
164
+ }
165
+
166
+ # Initial default values (in case we need the manual fallback)
167
+ input_strings: Dict[str, str] = {}
168
+ output_code_path: Optional[str] = output_code
169
+ output_results_path: Optional[str] = output_results
170
+ language: str = ""
171
+
172
+ try:
173
+ # First try the official helper.
174
+ input_strings, output_file_paths, language = construct_paths(
175
+ input_file_paths=input_file_paths,
176
+ force=force,
177
+ quiet=quiet,
178
+ command="verify",
179
+ command_options=command_options,
180
+ )
181
+ output_code_path = output_file_paths.get("output_code")
182
+ output_results_path = output_file_paths.get("output_results")
183
+
184
+ if verbose:
185
+ rich_print("[dim]Resolved output paths via construct_paths.[/dim]")
186
+
187
+ except Exception as e:
188
+ # If the helper does not understand the "verify" command fall back.
189
+ if "invalid command" in str(e).lower():
190
+ if verbose:
191
+ rich_print(
192
+ "[yellow]construct_paths does not recognise "
193
+ "'verify'. Falling back to manual path handling.[/yellow]"
194
+ )
195
+ try:
196
+ # Manually read the three mandatory files
197
+ with open(prompt_file, "r") as f:
198
+ input_strings["prompt_file"] = f.read()
199
+ with open(code_file, "r") as f:
200
+ input_strings["code_file"] = f.read()
201
+ with open(program_file, "r") as f:
202
+ input_strings["program_file"] = f.read()
203
+ except FileNotFoundError as fe:
204
+ rich_print(f"[bold red]Error:[/bold red] {fe}")
205
+ sys.exit(1)
206
+
207
+ # Pick or build output paths
208
+ if output_code_path is None:
209
+ base, ext = os.path.splitext(code_file)
210
+ output_code_path = f"{base}_verified{ext}"
211
+ if output_results_path is None:
212
+ base, _ = os.path.splitext(program_file)
213
+ output_results_path = f"{base}_verify_results.log"
214
+
215
+ # Best‑effort language guess
216
+ if program_file.endswith(".py"):
217
+ language = "python"
218
+ elif program_file.endswith(".js"):
219
+ language = "javascript"
220
+ elif program_file.endswith(".sh"):
221
+ language = "bash"
222
+
223
+ else:
224
+ # Some other error – re‑raise / abort
225
+ rich_print(f"[bold red]Error:[/bold red] Failed during path construction: {e}")
226
+ if verbose:
227
+ import traceback
228
+ rich_print(Panel(traceback.format_exc(), title="Traceback", border_style="red"))
229
+ sys.exit(1)
230
+
231
+ # --- Core Logic ---
232
+ success: bool = False
233
+ final_program: str = input_strings.get("program_file", "") # Initialize with input content
234
+ final_code: str = input_strings.get("code_file", "") # Initialize with input content
235
+ attempts: int = 0
236
+ total_cost: float = 0.0
237
+ model_name: str = "N/A"
238
+ results_log_content: str = "" # To store content for the results file
239
+
240
+ try:
241
+ if loop:
242
+ if not quiet:
243
+ rich_print("\n[bold blue]Running Iterative Verification (fix_verification_errors_loop)...[/bold blue]")
244
+
245
+ # fix_verification_errors_loop handles file I/O internally
246
+ # Pass file paths directly
247
+ # TODO: How are program_args passed? Assuming empty list for now. Needs CLI option.
248
+ program_args: List[str] = []
249
+
250
+ loop_results = fix_verification_errors_loop(
251
+ program_file=program_file,
252
+ code_file=code_file,
253
+ prompt=input_strings["prompt_file"], # Prompt content is needed
254
+ verification_program=verification_program, # Path is needed
255
+ strength=strength,
256
+ temperature=temperature,
257
+ max_attempts=max_attempts,
258
+ budget=budget,
259
+ verification_log_file=output_results_path, # Pass path for internal logging
260
+ verbose=verbose,
261
+ program_args=program_args
262
+ )
263
+ success = loop_results['success']
264
+ final_program = loop_results['final_program'] # Loop function returns final content
265
+ final_code = loop_results['final_code'] # Loop function returns final content
266
+ attempts = loop_results['total_attempts']
267
+ total_cost = loop_results['total_cost']
268
+ model_name = loop_results['model_name']
269
+ # The loop function writes its own detailed log, so we don't need to build one here.
270
+ # We just need to ensure the path was passed correctly.
271
+
272
+ else: # Single pass verification
273
+ if not quiet:
274
+ rich_print("\n[bold blue]Running Single Pass Verification (fix_verification_errors)...[/bold blue]")
275
+ attempts = 1 # Single pass is one attempt
276
+
277
+ # 1. Run the program file to get its output
278
+ if not quiet:
279
+ rich_print(f"Executing program: [cyan]{program_file}[/cyan]")
280
+ run_success, program_stdout, program_stderr = run_program(program_file)
281
+ program_output = program_stdout + ("\n--- STDERR ---\n" + program_stderr if program_stderr else "")
282
+
283
+ if verbose:
284
+ rich_print("[dim]--- Program Output ---[/dim]")
285
+ rich_print(Panel(program_output if program_output else "[No Output]", border_style="dim"))
286
+ rich_print("[dim]--- End Program Output ---[/dim]")
287
+
288
+ # Check if program ran successfully before calling LLM (optional, but good practice)
289
+ # if not run_success:
290
+ # rich_print("[yellow]Warning:[/yellow] Program execution failed. LLM verification might be less effective.")
291
+
292
+ # 2. Call fix_verification_errors with content and program output
293
+ if not quiet:
294
+ rich_print("Calling LLM to verify program output against prompt...")
295
+
296
+ fix_results = fix_verification_errors(
297
+ program=input_strings["program_file"],
298
+ prompt=input_strings["prompt_file"],
299
+ code=input_strings["code_file"],
300
+ output=program_output, # Pass the captured output
301
+ strength=strength,
302
+ temperature=temperature,
303
+ verbose=verbose
304
+ )
305
+
306
+ # Determine success: If no issues were found OR if fixes were applied
307
+ # The definition of 'success' here means the *final* state is verified.
308
+ issues_found = fix_results['verification_issues_count'] > 0
309
+ code_updated = fix_results['fixed_code'] != input_strings["code_file"]
310
+ program_updated = fix_results['fixed_program'] != input_strings["program_file"]
311
+
312
+ if not issues_found:
313
+ success = True
314
+ if not quiet:
315
+ rich_print("[green]Verification Passed:[/green] LLM found no discrepancies.")
316
+ elif code_updated or program_updated:
317
+ # If issues were found AND fixes were made, assume success for this single pass.
318
+ # A more robust check might re-run the program with fixed code, but that's the loop's job.
319
+ success = True
320
+ if not quiet:
321
+ rich_print("[yellow]Verification Issues Found:[/yellow] LLM proposed fixes.")
322
+ else:
323
+ success = False # Issues found, but no fixes proposed
324
+ if not quiet:
325
+ rich_print("[red]Verification Failed:[/red] LLM found discrepancies but proposed no fixes.")
326
+
327
+
328
+ final_program = fix_results['fixed_program']
329
+ final_code = fix_results['fixed_code']
330
+ total_cost = fix_results['total_cost']
331
+ model_name = fix_results['model_name']
332
+
333
+ # Build results log content for single pass
334
+ results_log_content = f"PDD Verify Results (Single Pass)\n"
335
+ results_log_content += f"Timestamp: {os.path.getmtime(prompt_file)}\n" # Use prompt timestamp as reference
336
+ results_log_content += f"Prompt File: {prompt_file}\n"
337
+ results_log_content += f"Code File: {code_file}\n"
338
+ results_log_content += f"Program File: {program_file}\n"
339
+ results_log_content += f"Success: {success}\n"
340
+ results_log_content += f"Issues Found Count: {fix_results['verification_issues_count']}\n"
341
+ results_log_content += f"Code Updated: {code_updated}\n"
342
+ results_log_content += f"Program Updated: {program_updated}\n"
343
+ results_log_content += f"Model Used: {model_name}\n"
344
+ results_log_content += f"Total Cost: ${total_cost:.6f}\n"
345
+ results_log_content += "\n--- LLM Explanation ---\n"
346
+ results_log_content += "\n".join(fix_results.get('explanation', ['N/A']))
347
+ results_log_content += "\n\n--- Program Output Used for Verification ---\n"
348
+ results_log_content += program_output
349
+
350
+
351
+ except Exception as e:
352
+ success = False
353
+ rich_print(f"[bold red]Error during verification process:[/bold red] {e}")
354
+ # Optionally log the full traceback if verbose
355
+ if verbose:
356
+ import traceback
357
+ rich_print(Panel(traceback.format_exc(), title="Traceback", border_style="red"))
358
+ # Attempt to return partial results if possible
359
+ return success, final_program, final_code, attempts, total_cost, model_name
360
+
361
+ # --- Output File Writing ---
362
+ saved_code_path: Optional[str] = None
363
+ saved_results_path: Optional[str] = None
364
+
365
+ if success and output_code_path:
366
+ try:
367
+ with open(output_code_path, "w") as f:
368
+ f.write(final_code)
369
+ saved_code_path = output_code_path
370
+ if not quiet:
371
+ rich_print(f"Successfully verified code saved to: [green]{output_code_path}[/green]")
372
+ except IOError as e:
373
+ rich_print(f"[bold red]Error:[/bold red] Failed to write verified code file '{output_code_path}': {e}")
374
+
375
+ # Write results log (only for single pass, loop writes its own)
376
+ if not loop and output_results_path:
377
+ try:
378
+ with open(output_results_path, "w") as f:
379
+ f.write(results_log_content)
380
+ saved_results_path = output_results_path
381
+ if not quiet:
382
+ rich_print(f"Verification results log saved to: [green]{output_results_path}[/green]")
383
+ except IOError as e:
384
+ rich_print(f"[bold red]Error:[/bold red] Failed to write results log file '{output_results_path}': {e}")
385
+ elif loop and output_results_path:
386
+ # For loop, just confirm the path where the loop function *should* have saved the log
387
+ saved_results_path = output_results_path
388
+ if not quiet:
389
+ rich_print(f"Verification results log (from loop) should be at: [green]{output_results_path}[/green]")
390
+
391
+ # --- Final User Feedback ---
392
+ if not quiet:
393
+ rich_print("\n" + "="*40)
394
+ title = "[bold green]Verification Complete[/bold green]" if success else "[bold red]Verification Failed[/bold red]"
395
+ summary_panel = Panel(
396
+ f"Status: {'[green]Success[/green]' if success else '[red]Failure[/red]'}\n"
397
+ f"Attempts: {attempts}\n"
398
+ f"Total Cost: ${total_cost:.6f}\n"
399
+ f"Model Used: {model_name}\n"
400
+ f"Verified Code Saved: {saved_code_path or 'N/A'}\n"
401
+ f"Results Log Saved: {saved_results_path or 'N/A'}",
402
+ title=title,
403
+ border_style="green" if success else "red"
404
+ )
405
+ rich_print(summary_panel)
406
+
407
+ if verbose and not success and not loop:
408
+ rich_print("[bold yellow]Final Code (after failed single pass):[/bold yellow]")
409
+ rich_print(Syntax(final_code, language or "python", theme="default", line_numbers=True))
410
+
411
+
412
+ return success, final_program, final_code, attempts, total_cost, model_name