pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +73 -21
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +258 -82
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -63
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +330 -76
  43. pdd/fix_error_loop.py +207 -61
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +306 -272
  48. pdd/fix_verification_main.py +28 -9
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +9 -2
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/incremental_code_generator.py +2 -2
  56. pdd/insert_includes.py +11 -3
  57. pdd/llm_invoke.py +1269 -103
  58. pdd/load_prompt_template.py +36 -10
  59. pdd/pdd_completion.fish +25 -2
  60. pdd/pdd_completion.sh +30 -4
  61. pdd/pdd_completion.zsh +79 -4
  62. pdd/postprocess.py +10 -3
  63. pdd/preprocess.py +228 -15
  64. pdd/preprocess_main.py +8 -5
  65. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  66. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  67. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  68. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  69. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  70. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  71. pdd/prompts/auto_include_LLM.prompt +100 -905
  72. pdd/prompts/detect_change_LLM.prompt +122 -20
  73. pdd/prompts/example_generator_LLM.prompt +22 -1
  74. pdd/prompts/extract_code_LLM.prompt +5 -1
  75. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  76. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  77. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  78. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  79. pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
  80. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
  81. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  82. pdd/prompts/generate_test_LLM.prompt +21 -6
  83. pdd/prompts/increase_tests_LLM.prompt +1 -5
  84. pdd/prompts/insert_includes_LLM.prompt +228 -108
  85. pdd/prompts/trace_LLM.prompt +25 -22
  86. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  87. pdd/prompts/update_prompt_LLM.prompt +22 -1
  88. pdd/pytest_output.py +127 -12
  89. pdd/render_mermaid.py +236 -0
  90. pdd/setup_tool.py +648 -0
  91. pdd/simple_math.py +2 -0
  92. pdd/split_main.py +3 -2
  93. pdd/summarize_directory.py +49 -6
  94. pdd/sync_determine_operation.py +543 -98
  95. pdd/sync_main.py +81 -31
  96. pdd/sync_orchestration.py +1334 -751
  97. pdd/sync_tui.py +848 -0
  98. pdd/template_registry.py +264 -0
  99. pdd/templates/architecture/architecture_json.prompt +242 -0
  100. pdd/templates/generic/generate_prompt.prompt +174 -0
  101. pdd/trace.py +168 -12
  102. pdd/trace_main.py +4 -3
  103. pdd/track_cost.py +151 -61
  104. pdd/unfinished_prompt.py +49 -3
  105. pdd/update_main.py +549 -67
  106. pdd/update_model_costs.py +2 -2
  107. pdd/update_prompt.py +19 -4
  108. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
  109. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  110. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  111. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  112. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  113. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  114. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,54 @@ except ImportError:
27
27
 
28
28
  from . import DEFAULT_TIME # Import DEFAULT_TIME
29
29
  from .python_env_detector import detect_host_python_executable
30
+ from .get_language import get_language
31
+ from .agentic_langtest import default_verify_cmd_for
32
+ from .agentic_verify import run_agentic_verify
33
+
34
+ def _normalize_agentic_result(result):
35
+ """
36
+ Normalize run_agentic_verify result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
37
+ Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
38
+ """
39
+ if isinstance(result, tuple):
40
+ if len(result) == 5:
41
+ ok, msg, cost, model, changed_files = result
42
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
43
+ if len(result) == 4:
44
+ ok, msg, cost, model = result
45
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
46
+ if len(result) == 3:
47
+ ok, msg, cost = result
48
+ return bool(ok), str(msg), float(cost), "agentic-cli", []
49
+ if len(result) == 2:
50
+ ok, msg = result
51
+ return bool(ok), str(msg), 0.0, "agentic-cli", []
52
+ # Fallback (shouldn't happen)
53
+ return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
54
+
55
+ def _safe_run_agentic_verify(*, prompt_file, code_file, program_file, verification_log_file, verbose=False, cwd=None):
56
+ """
57
+ Call (possibly monkeypatched) run_agentic_verify and normalize its return.
58
+
59
+ Note: cwd parameter is accepted for compatibility but not passed to run_agentic_verify
60
+ as it determines the working directory from prompt_file.parent internally.
61
+ """
62
+ if not prompt_file:
63
+ return False, "Agentic verify requires a valid prompt file.", 0.0, "agentic-cli", []
64
+
65
+ try:
66
+ res = run_agentic_verify(
67
+ prompt_file=Path(prompt_file),
68
+ code_file=Path(code_file),
69
+ program_file=Path(program_file),
70
+ verification_log_file=Path(verification_log_file),
71
+ verbose=verbose,
72
+ quiet=not verbose,
73
+ # Note: cwd is not passed - run_agentic_verify uses prompt_file.parent as project root
74
+ )
75
+ return _normalize_agentic_result(res)
76
+ except Exception as e:
77
+ return False, f"Agentic verify failed: {e}", 0.0, "agentic-cli", []
30
78
 
31
79
  # Initialize Rich Console for pretty printing
32
80
  console = Console()
@@ -56,7 +104,7 @@ def _run_program(
56
104
  command.extend(args)
57
105
 
58
106
  try:
59
- # Run from staging root directory instead of examples/ directory
107
+ # Run from staging root directory instead of examples/
60
108
  # This allows imports from both pdd/ and examples/ subdirectories
61
109
  staging_root = program_path.parent.parent # Go up from examples/ to staging root
62
110
 
@@ -95,6 +143,7 @@ def fix_verification_errors_loop(
95
143
  program_file: str,
96
144
  code_file: str,
97
145
  prompt: str,
146
+ prompt_file: str,
98
147
  verification_program: str,
99
148
  strength: float,
100
149
  temperature: float,
@@ -105,7 +154,8 @@ def fix_verification_errors_loop(
105
154
  output_program_path: Optional[str] = None,
106
155
  verbose: bool = False,
107
156
  program_args: Optional[list[str]] = None,
108
- llm_time: float = DEFAULT_TIME # Add time parameter
157
+ llm_time: float = DEFAULT_TIME, # Add time parameter
158
+ agentic_fallback: bool = True,
109
159
  ) -> Dict[str, Any]:
110
160
  """
111
161
  Attempts to fix errors in a code file based on program execution output
@@ -115,6 +165,7 @@ def fix_verification_errors_loop(
115
165
  program_file: Path to the Python program exercising the code.
116
166
  code_file: Path to the code file being tested/verified.
117
167
  prompt: The prompt defining the intended behavior.
168
+ prompt_file: Path to the prompt file.
118
169
  verification_program: Path to a secondary program to verify code changes.
119
170
  strength: LLM model strength (0.0 to 1.0).
120
171
  temperature: LLM temperature (0.0 to 1.0).
@@ -126,6 +177,7 @@ def fix_verification_errors_loop(
126
177
  verbose: Enable verbose logging (default: False).
127
178
  program_args: Optional list of command-line arguments for the program_file.
128
179
  llm_time: Time parameter for fix_verification_errors calls (default: DEFAULT_TIME).
180
+ agentic_fallback: Enable agentic fallback if the primary fix mechanism fails.
129
181
 
130
182
  Returns:
131
183
  A dictionary containing:
@@ -137,6 +189,61 @@ def fix_verification_errors_loop(
137
189
  'model_name': str | None - Name of the LLM model used.
138
190
  'statistics': dict - Detailed statistics about the process.
139
191
  """
192
+ is_python = str(code_file).lower().endswith(".py")
193
+ if not is_python:
194
+ # For non-Python files, run the verification program to get an initial error state
195
+ console.print(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
196
+ lang = get_language(os.path.splitext(code_file)[1])
197
+ verify_cmd = default_verify_cmd_for(lang, verification_program)
198
+ if not verify_cmd:
199
+ raise ValueError(f"No default verification command for language: {lang}")
200
+
201
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True)
202
+ pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
203
+ console.print("[cyan]Non-Python target detected. Triggering agentic fallback...[/cyan]")
204
+ verification_log_path = Path(verification_log_file)
205
+ verification_log_path.parent.mkdir(parents=True, exist_ok=True)
206
+ with open(verification_log_path, "w") as f:
207
+ f.write(pytest_output)
208
+
209
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
210
+ console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
211
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
212
+ prompt_file=prompt_file,
213
+ code_file=code_file,
214
+ program_file=verification_program,
215
+ verification_log_file=verification_log_file,
216
+ verbose=verbose,
217
+ cwd=agent_cwd,
218
+ )
219
+ if not success:
220
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
221
+ if agent_changed_files:
222
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
223
+ for f in agent_changed_files:
224
+ console.print(f" • {f}")
225
+ final_program = ""
226
+ final_code = ""
227
+ try:
228
+ with open(verification_program, "r") as f:
229
+ final_program = f.read()
230
+ except Exception:
231
+ pass
232
+ try:
233
+ with open(code_file, "r") as f:
234
+ final_code = f.read()
235
+ except Exception:
236
+ pass
237
+ return {
238
+ "success": success,
239
+ "final_program": final_program,
240
+ "final_code": final_code,
241
+ "total_attempts": 1,
242
+ "total_cost": agent_cost,
243
+ "model_name": agent_model,
244
+ "statistics": {},
245
+ }
246
+
140
247
  program_path = Path(program_file).resolve()
141
248
  code_path = Path(code_file).resolve()
142
249
  verification_program_path = Path(verification_program).resolve()
@@ -158,9 +265,9 @@ def fix_verification_errors_loop(
158
265
  if not 0.0 <= temperature <= 1.0:
159
266
  console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
160
267
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
161
- # Prompt requires positive max_attempts
162
- if max_attempts <= 0:
163
- console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
268
+ # max_attempts must be non-negative (0 is valid - skips LLM loop, goes straight to agentic mode)
269
+ if max_attempts < 0:
270
+ console.print(f"[bold red]Error: Max attempts must be non-negative.[/bold red]")
164
271
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
165
272
  if budget < 0:
166
273
  console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
@@ -182,6 +289,7 @@ def fix_verification_errors_loop(
182
289
  total_cost = 0.0
183
290
  model_name: Optional[str] = None
184
291
  overall_success = False
292
+ any_verification_passed = False # Track if ANY iteration passed secondary verification
185
293
  best_iteration = {
186
294
  'attempt': -1, # 0 represents initial state
187
295
  'program_backup': None,
@@ -232,128 +340,155 @@ def fix_verification_errors_loop(
232
340
  initial_log_entry += '</InitialState>'
233
341
  _write_log_entry(log_path, initial_log_entry)
234
342
 
343
+ # 3c: Check if skipping LLM assessment (max_attempts=0 means skip to agentic fallback)
344
+ skip_llm = (max_attempts == 0)
345
+
235
346
  # 3d: Call fix_verification_errors for initial assessment
236
347
  try:
237
- if verbose:
238
- console.print("Running initial assessment with fix_verification_errors...")
239
- # Use actual strength/temp for realistic initial assessment
240
- initial_fix_result = fix_verification_errors(
241
- program=initial_program_content,
242
- prompt=prompt,
243
- code=initial_code_content,
244
- output=initial_output,
245
- strength=strength,
246
- temperature=temperature,
247
- verbose=verbose,
248
- time=llm_time # Pass time
249
- )
250
- # 3e: Add cost
251
- initial_cost = initial_fix_result.get('total_cost', 0.0)
252
- total_cost += initial_cost
253
- model_name = initial_fix_result.get('model_name') # Capture model name early
254
- if verbose:
255
- console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
256
-
257
- # 3f: Extract initial issues
258
- initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
259
- stats['initial_issues'] = initial_issues_count
260
- if verbose:
261
- console.print(f"Initial verification issues found: {initial_issues_count}")
262
- if initial_fix_result.get('explanation'):
263
- console.print("Initial assessment explanation:")
264
- console.print(initial_fix_result['explanation'])
265
-
266
- # FIX: Add check for initial assessment error *before* checking success/budget
267
- # Check if the fixer function returned its specific error state (None explanation/model)
268
- if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
269
- error_msg = "Error: Fixer returned invalid/error state during initial assessment"
270
- console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
271
- stats['status_message'] = error_msg
272
- stats['final_issues'] = -1 # Indicate unknown/error state
273
- # Write final action log for error on initial check
274
- final_log_entry = "<FinalActions>\n"
275
- final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
276
- final_log_entry += "</FinalActions>"
277
- _write_log_entry(log_path, final_log_entry)
278
- # Return failure state
279
- return {
280
- "success": False,
281
- "final_program": initial_program_content,
282
- "final_code": initial_code_content,
283
- "total_attempts": 0,
284
- "total_cost": total_cost, # May be non-zero if error occurred after some cost
285
- "model_name": model_name, # May have been set before error
286
- "statistics": stats,
287
- }
288
-
289
- # 3g: Initialize best iteration tracker
290
- # Store original paths as the 'backup' for iteration 0
291
- best_iteration = {
292
- 'attempt': 0, # Use 0 for initial state
293
- 'program_backup': str(program_path), # Path to original
294
- 'code_backup': str(code_path), # Path to original
295
- 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
296
- }
297
- stats['best_iteration_num'] = 0
298
- stats['best_iteration_issues'] = best_iteration['issues']
299
-
300
- # 3h: Check for immediate success or budget exceeded
301
- if initial_issues_count == 0:
302
- console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
303
- overall_success = True
304
- stats['final_issues'] = 0
305
- stats['status_message'] = 'Success on initial check'
306
- stats['improvement_issues'] = 0
307
- stats['improvement_percent'] = 100.0 # Reached target of 0 issues
308
-
309
- # Write final action log for successful initial check
348
+ if skip_llm:
349
+ # Skip initial LLM assessment when max_attempts=0
350
+ console.print("[bold cyan]max_attempts=0: Skipping LLM assessment, proceeding to agentic fallback.[/bold cyan]")
351
+ # Set up state for skipping the LLM loop
352
+ stats['initial_issues'] = -1 # Unknown since we skipped assessment
353
+ stats['final_issues'] = -1
354
+ stats['best_iteration_num'] = -1
355
+ stats['best_iteration_issues'] = float('inf')
356
+ stats['status_message'] = 'Skipped LLM (max_attempts=0)'
357
+ stats['improvement_issues'] = 'N/A'
358
+ stats['improvement_percent'] = 'N/A'
359
+ overall_success = False # Trigger agentic fallback
360
+ final_program_content = initial_program_content
361
+ final_code_content = initial_code_content
362
+ # Write log entry for skipped LLM
310
363
  final_log_entry = "<FinalActions>\n"
311
- final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
364
+ final_log_entry += f' <Action>Skipped LLM assessment and loop (max_attempts=0), proceeding to agentic fallback.</Action>\n'
312
365
  final_log_entry += "</FinalActions>"
313
366
  _write_log_entry(log_path, final_log_entry)
367
+ # Skip to final stats (the while loop below will also be skipped since 0 < 0 is False)
368
+ initial_issues_count = -1 # Sentinel: unknown/not applicable when LLM assessment is skipped; kept numeric for downstream comparisons
369
+ else:
370
+ if verbose:
371
+ console.print("Running initial assessment with fix_verification_errors...")
372
+ # Use actual strength/temp for realistic initial assessment
373
+ initial_fix_result = fix_verification_errors(
374
+ program=initial_program_content,
375
+ prompt=prompt,
376
+ code=initial_code_content,
377
+ output=initial_output,
378
+ strength=strength,
379
+ temperature=temperature,
380
+ verbose=verbose,
381
+ time=llm_time # Pass time
382
+ )
383
+ # 3e: Add cost
384
+ initial_cost = initial_fix_result.get('total_cost', 0.0)
385
+ total_cost += initial_cost
386
+ model_name = initial_fix_result.get('model_name') # Capture model name early
387
+ if verbose:
388
+ console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
314
389
 
315
- # Step 7 (early exit): Print stats
316
- console.print("\n[bold]--- Final Statistics ---[/bold]")
317
- console.print(f"Initial Issues: {stats['initial_issues']}")
318
- console.print(f"Final Issues: {stats['final_issues']}")
319
- console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
320
- console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
321
- console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
322
- console.print(f"Overall Status: {stats['status_message']}")
323
- console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
324
- console.print(f"Total Cost: ${total_cost:.6f}")
325
- console.print(f"Model Used: {model_name or 'N/A'}")
326
- # Step 8 (early exit): Return
327
- return {
328
- "success": overall_success,
329
- "final_program": initial_program_content,
330
- "final_code": initial_code_content,
331
- "total_attempts": attempts, # attempts is 0
332
- "total_cost": total_cost,
333
- "model_name": model_name,
334
- "statistics": stats,
390
+ # 3f: Extract initial issues
391
+ initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
392
+ stats['initial_issues'] = initial_issues_count
393
+ if verbose:
394
+ console.print(f"Initial verification issues found: {initial_issues_count}")
395
+ if initial_fix_result.get('explanation'):
396
+ console.print("Initial assessment explanation:")
397
+ console.print(initial_fix_result['explanation'])
398
+
399
+ # The following checks only apply when we ran the LLM assessment (not skipped)
400
+ if not skip_llm:
401
+ # FIX: Add check for initial assessment error *before* checking success/budget
402
+ # Check if the fixer function returned its specific error state (None explanation/model)
403
+ if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
404
+ error_msg = "Error: Fixer returned invalid/error state during initial assessment"
405
+ console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
406
+ stats['status_message'] = error_msg
407
+ stats['final_issues'] = -1 # Indicate unknown/error state
408
+ # Write final action log for error on initial check
409
+ final_log_entry = "<FinalActions>\n"
410
+ final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
411
+ final_log_entry += "</FinalActions>"
412
+ _write_log_entry(log_path, final_log_entry)
413
+ # Return failure state
414
+ return {
415
+ "success": False,
416
+ "final_program": initial_program_content,
417
+ "final_code": initial_code_content,
418
+ "total_attempts": 0,
419
+ "total_cost": total_cost, # May be non-zero if error occurred after some cost
420
+ "model_name": model_name, # May have been set before error
421
+ "statistics": stats,
422
+ }
423
+
424
+ # 3g: Initialize best iteration tracker
425
+ # Store original paths as the 'backup' for iteration 0
426
+ best_iteration = {
427
+ 'attempt': 0, # Use 0 for initial state
428
+ 'program_backup': str(program_path), # Path to original
429
+ 'code_backup': str(code_path), # Path to original
430
+ 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
335
431
  }
336
- elif total_cost >= budget:
337
- console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
338
- stats['status_message'] = 'Budget exceeded on initial check'
339
- stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
340
-
341
- # Write final action log for budget exceeded on initial check
342
- final_log_entry = "<FinalActions>\n"
343
- final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
344
- final_log_entry += "</FinalActions>"
345
- _write_log_entry(log_path, final_log_entry)
346
-
347
- # No changes made, return initial state
348
- return {
349
- "success": False,
350
- "final_program": initial_program_content,
351
- "final_code": initial_code_content,
352
- "total_attempts": 0,
353
- "total_cost": total_cost,
354
- "model_name": model_name,
355
- "statistics": stats,
356
- }
432
+ stats['best_iteration_num'] = 0
433
+ stats['best_iteration_issues'] = best_iteration['issues']
434
+
435
+ # 3h: Check for immediate success or budget exceeded
436
+ if initial_issues_count == 0:
437
+ console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
438
+ overall_success = True
439
+ stats['final_issues'] = 0
440
+ stats['status_message'] = 'Success on initial check'
441
+ stats['improvement_issues'] = 0
442
+ stats['improvement_percent'] = 100.0 # Reached target of 0 issues
443
+
444
+ # Write final action log for successful initial check
445
+ final_log_entry = "<FinalActions>\n"
446
+ final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
447
+ final_log_entry += "</FinalActions>"
448
+ _write_log_entry(log_path, final_log_entry)
449
+
450
+ # Step 7 (early exit): Print stats
451
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
452
+ console.print(f"Initial Issues: {stats['initial_issues']}")
453
+ console.print(f"Final Issues: {stats['final_issues']}")
454
+ console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
455
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
456
+ console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
457
+ console.print(f"Overall Status: {stats['status_message']}")
458
+ console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
459
+ console.print(f"Total Cost: ${total_cost:.6f}")
460
+ console.print(f"Model Used: {model_name or 'N/A'}")
461
+ # Step 8 (early exit): Return
462
+ return {
463
+ "success": overall_success,
464
+ "final_program": initial_program_content,
465
+ "final_code": initial_code_content,
466
+ "total_attempts": attempts, # attempts is 0
467
+ "total_cost": total_cost,
468
+ "model_name": model_name,
469
+ "statistics": stats,
470
+ }
471
+ elif total_cost >= budget:
472
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
473
+ stats['status_message'] = 'Budget exceeded on initial check'
474
+ stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
475
+
476
+ # Write final action log for budget exceeded on initial check
477
+ final_log_entry = "<FinalActions>\n"
478
+ final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
479
+ final_log_entry += "</FinalActions>"
480
+ _write_log_entry(log_path, final_log_entry)
481
+
482
+ # No changes made, return initial state
483
+ return {
484
+ "success": False,
485
+ "final_program": initial_program_content,
486
+ "final_code": initial_code_content,
487
+ "total_attempts": 0,
488
+ "total_cost": total_cost,
489
+ "model_name": model_name,
490
+ "statistics": stats,
491
+ }
357
492
 
358
493
  except Exception as e:
359
494
  console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
@@ -593,6 +728,9 @@ def fix_verification_errors_loop(
593
728
 
594
729
  # Now, decide outcome based on issue count and verification status
595
730
  if secondary_verification_passed:
731
+ # Only track as "verification passed" if code was actually changed and verified
732
+ if code_updated:
733
+ any_verification_passed = True # Track that at least one verification passed
596
734
  # Update best iteration if current attempt is better
597
735
  if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
598
736
  if verbose:
@@ -735,8 +873,14 @@ def fix_verification_errors_loop(
735
873
  if verbose:
736
874
  console.print(f"Restored {program_path} from {best_program_path}")
737
875
  console.print(f"Restored {code_path} from {best_code_path}")
738
- # Final issues count is the best achieved count
739
- stats['final_issues'] = best_iteration['issues']
876
+ # Only mark as success if verification actually passed
877
+ # (best_iteration is only updated when secondary verification passes,
878
+ # but we double-check with any_verification_passed for safety)
879
+ if any_verification_passed:
880
+ stats['final_issues'] = 0
881
+ overall_success = True
882
+ else:
883
+ stats['final_issues'] = best_iteration['issues']
740
884
  else:
741
885
  console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
742
886
  final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
@@ -750,6 +894,15 @@ def fix_verification_errors_loop(
750
894
  stats['status_message'] += f' - Error restoring best iteration: {e}'
751
895
  stats['final_issues'] = -1 # Indicate uncertainty
752
896
 
897
+ # If verification passed (even if issue count didn't decrease), consider it success
898
+ elif any_verification_passed:
899
+ console.print("[green]Verification passed. Keeping current state.[/green]")
900
+ final_log_entry += f' <Action>Verification passed; keeping current state.</Action>\n'
901
+ # Verification passed = code works, so final issues is effectively 0
902
+ stats['final_issues'] = 0
903
+ stats['status_message'] = 'Success - verification passed'
904
+ overall_success = True
905
+
753
906
  # If no improvement was made or recorded (best is still initial state or worse)
754
907
  elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
755
908
  console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
@@ -864,6 +1017,36 @@ def fix_verification_errors_loop(
864
1017
  if final_known and stats['final_issues'] != 0:
865
1018
  overall_success = False
866
1019
 
1020
+ if not overall_success and agentic_fallback:
1021
+ console.print(f"[bold yellow]Initiating agentic fallback (prompt_file={prompt_file!r})...[/bold yellow]")
1022
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
1023
+ agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
1024
+ prompt_file=prompt_file,
1025
+ code_file=code_file,
1026
+ program_file=verification_program,
1027
+ verification_log_file=verification_log_file,
1028
+ verbose=verbose,
1029
+ cwd=agent_cwd,
1030
+ )
1031
+ total_cost += agent_cost
1032
+ if not agent_success:
1033
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
1034
+ if agent_changed_files:
1035
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
1036
+ for f in agent_changed_files:
1037
+ console.print(f" • {f}")
1038
+ if agent_success:
1039
+ console.print("[bold green]Agentic fallback successful.[/bold green]")
1040
+ overall_success = True
1041
+ model_name = agent_model or model_name
1042
+ try:
1043
+ final_code_content = Path(code_file).read_text(encoding="utf-8")
1044
+ final_program_content = Path(program_file).read_text(encoding="utf-8")
1045
+ except Exception as e:
1046
+ console.print(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
1047
+ else:
1048
+ console.print("[bold red]Agentic fallback failed.[/bold red]")
1049
+
867
1050
  return {
868
1051
  "success": overall_success,
869
1052
  "final_program": final_program_content,
@@ -872,153 +1055,4 @@ def fix_verification_errors_loop(
872
1055
  "total_cost": total_cost,
873
1056
  "model_name": model_name,
874
1057
  "statistics": stats,
875
- }
876
-
877
- # Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
878
- if __name__ == "__main__":
879
- # Create dummy files for demonstration
880
- # In a real scenario, these files would exist and contain actual code/programs.
881
- console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
882
- temp_dir = Path("./temp_fix_verification_loop")
883
- temp_dir.mkdir(exist_ok=True)
884
-
885
- program_file = temp_dir / "my_program.py"
886
- code_file = temp_dir / "my_code_module.py"
887
- verification_program_file = temp_dir / "verify_syntax.py"
888
-
889
- program_file.write_text("""
890
- import my_code_module
891
- import sys
892
- # Simulate using the module and checking output
893
- val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
894
- result = my_code_module.process(val)
895
- expected = val * 2
896
- print(f"Input: {val}")
897
- print(f"Result: {result}")
898
- print(f"Expected: {expected}")
899
- if result == expected:
900
- print("VERIFICATION_SUCCESS")
901
- else:
902
- print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
903
- """, encoding="utf-8")
904
-
905
- # Initial code with a bug
906
- code_file.write_text("""
907
- # my_code_module.py
908
- def process(x):
909
- # Bug: should be x * 2
910
- return x + 2
911
- """, encoding="utf-8")
912
-
913
- # Simple verification program (e.g., syntax check)
914
- verification_program_file.write_text("""
915
- import sys
916
- import py_compile
917
- import os
918
- # Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
919
- code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
920
- print(f"Checking syntax of: {code_to_check}")
921
- try:
922
- py_compile.compile(code_to_check, doraise=True)
923
- print("Syntax OK.")
924
- sys.exit(0) # Success
925
- except py_compile.PyCompileError as e:
926
- print(f"Syntax Error: {e}")
927
- sys.exit(1) # Failure
928
- except Exception as e:
929
- print(f"Verification Error: {e}")
930
- sys.exit(1) # Failure
931
- """, encoding="utf-8")
932
- # Set environment variable for the verification script
933
- os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
934
-
935
-
936
- # --- Mock fix_verification_errors ---
937
- # This is crucial for testing without actual LLM calls / costs
938
- # In a real test suite, use unittest.mock
939
- _original_fix_verification_errors = fix_verification_errors
940
- _call_count = 0
941
-
942
- def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
943
- global _call_count
944
- _call_count += 1
945
- cost = 0.001 * _call_count # Simulate increasing cost
946
- model = "mock_model_v1"
947
- explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
948
- issues_count = 1 # Assume 1 issue initially
949
-
950
- fixed_program = program # Assume program doesn't need fixing
951
- fixed_code = code
952
-
953
- # Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
954
- if "VERIFICATION_FAILURE" in output and _call_count >= 2:
955
- explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
956
- fixed_code = """
957
- # my_code_module.py
958
- def process(x):
959
- # Fixed: should be x * 2
960
- return x * 2
961
- """
962
- issues_count = 0 # Fixed!
963
- elif "VERIFICATION_SUCCESS" in output:
964
- explanation = ["Output indicates VERIFICATION_SUCCESS."]
965
- issues_count = 0 # Already correct
966
-
967
- return {
968
- 'explanation': explanation,
969
- 'fixed_program': fixed_program,
970
- 'fixed_code': fixed_code,
971
- 'total_cost': cost,
972
- 'model_name': model,
973
- 'verification_issues_count': issues_count,
974
- }
975
-
976
- # Replace the real function with the mock
977
- # In package context, you might need to patch differently
978
- # For this script execution:
979
- # Note: This direct replacement might not work if the function is imported
980
- # using `from .fix_verification_errors import fix_verification_errors`.
981
- # A proper mock framework (`unittest.mock.patch`) is better.
982
- # Let's assume for this example run, we can modify the global scope *before* the loop calls it.
983
- # This is fragile. A better approach involves dependency injection or mocking frameworks.
984
- # HACK: Re-assigning the imported name in the global scope of this script
985
- globals()['fix_verification_errors'] = mock_fix_verification_errors
986
-
987
-
988
- console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
989
-
990
- # Example program_args: Pass input value 10 and another arg 5
991
- # Note: The example program only uses the first arg sys.argv[1]
992
- example_args = ["10", "another_arg"]
993
-
994
- results = fix_verification_errors_loop(
995
- program_file=str(program_file),
996
- code_file=str(code_file),
997
- prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
998
- verification_program=str(verification_program_file),
999
- strength=0.5,
1000
- temperature=0.1,
1001
- max_attempts=3,
1002
- budget=0.10, # Set a budget
1003
- verification_log_file=str(temp_dir / "test_verification.log"),
1004
- verbose=True,
1005
- program_args=example_args
1006
- )
1007
-
1008
- console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
1009
- console.print(f"Success: {results['success']}")
1010
- console.print(f"Total Attempts: {results['total_attempts']}")
1011
- console.print(f"Total Cost: ${results['total_cost']:.6f}")
1012
- console.print(f"Model Name: {results['model_name']}")
1013
- # console.print(f"Final Program:\n{results['final_program']}") # Can be long
1014
- console.print(f"Final Code:\n{results['final_code']}")
1015
- console.print(f"Statistics:\n{results['statistics']}")
1016
-
1017
- # Restore original function if needed elsewhere
1018
- globals()['fix_verification_errors'] = _original_fix_verification_errors
1019
-
1020
- # Clean up dummy files
1021
- # console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
1022
- # shutil.rmtree(temp_dir)
1023
- console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
1024
- console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
1058
+ }