pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +80 -19
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +281 -81
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -62
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +331 -77
  43. pdd/fix_error_loop.py +209 -60
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +319 -272
  48. pdd/fix_verification_main.py +57 -17
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +48 -9
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/increase_tests.py +7 -0
  56. pdd/incremental_code_generator.py +2 -2
  57. pdd/insert_includes.py +11 -3
  58. pdd/llm_invoke.py +1278 -110
  59. pdd/load_prompt_template.py +36 -10
  60. pdd/pdd_completion.fish +25 -2
  61. pdd/pdd_completion.sh +30 -4
  62. pdd/pdd_completion.zsh +79 -4
  63. pdd/postprocess.py +10 -3
  64. pdd/preprocess.py +228 -15
  65. pdd/preprocess_main.py +8 -5
  66. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  67. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  68. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  69. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  70. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  71. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  72. pdd/prompts/auto_include_LLM.prompt +98 -101
  73. pdd/prompts/change_LLM.prompt +1 -3
  74. pdd/prompts/detect_change_LLM.prompt +562 -3
  75. pdd/prompts/example_generator_LLM.prompt +22 -1
  76. pdd/prompts/extract_code_LLM.prompt +5 -1
  77. pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
  78. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  79. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  80. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  81. pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
  82. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
  83. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  84. pdd/prompts/generate_test_LLM.prompt +21 -6
  85. pdd/prompts/increase_tests_LLM.prompt +1 -2
  86. pdd/prompts/insert_includes_LLM.prompt +1181 -6
  87. pdd/prompts/split_LLM.prompt +1 -62
  88. pdd/prompts/trace_LLM.prompt +25 -22
  89. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  90. pdd/prompts/update_prompt_LLM.prompt +22 -1
  91. pdd/prompts/xml_convertor_LLM.prompt +3246 -7
  92. pdd/pytest_output.py +188 -21
  93. pdd/python_env_detector.py +151 -0
  94. pdd/render_mermaid.py +236 -0
  95. pdd/setup_tool.py +648 -0
  96. pdd/simple_math.py +2 -0
  97. pdd/split_main.py +3 -2
  98. pdd/summarize_directory.py +56 -7
  99. pdd/sync_determine_operation.py +918 -186
  100. pdd/sync_main.py +82 -32
  101. pdd/sync_orchestration.py +1456 -453
  102. pdd/sync_tui.py +848 -0
  103. pdd/template_registry.py +264 -0
  104. pdd/templates/architecture/architecture_json.prompt +242 -0
  105. pdd/templates/generic/generate_prompt.prompt +174 -0
  106. pdd/trace.py +168 -12
  107. pdd/trace_main.py +4 -3
  108. pdd/track_cost.py +151 -61
  109. pdd/unfinished_prompt.py +49 -3
  110. pdd/update_main.py +549 -67
  111. pdd/update_model_costs.py +2 -2
  112. pdd/update_prompt.py +19 -4
  113. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
  114. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  115. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  116. pdd_cli-0.0.42.dist-info/RECORD +0 -115
  117. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  118. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  119. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@ import os
2
2
  import shutil
3
3
  import subprocess
4
4
  import datetime
5
+ import sys
5
6
  from pathlib import Path
6
7
  from typing import Dict, Tuple, Any, Optional
7
8
  from xml.sax.saxutils import escape
@@ -25,6 +26,55 @@ except ImportError:
25
26
  )
26
27
 
27
28
  from . import DEFAULT_TIME # Import DEFAULT_TIME
29
+ from .python_env_detector import detect_host_python_executable
30
+ from .get_language import get_language
31
+ from .agentic_langtest import default_verify_cmd_for
32
+ from .agentic_verify import run_agentic_verify
33
+
34
+ def _normalize_agentic_result(result):
35
+ """
36
+ Normalize run_agentic_verify result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
37
+ Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
38
+ """
39
+ if isinstance(result, tuple):
40
+ if len(result) == 5:
41
+ ok, msg, cost, model, changed_files = result
42
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
43
+ if len(result) == 4:
44
+ ok, msg, cost, model = result
45
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
46
+ if len(result) == 3:
47
+ ok, msg, cost = result
48
+ return bool(ok), str(msg), float(cost), "agentic-cli", []
49
+ if len(result) == 2:
50
+ ok, msg = result
51
+ return bool(ok), str(msg), 0.0, "agentic-cli", []
52
+ # Fallback (shouldn't happen)
53
+ return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
54
+
55
+ def _safe_run_agentic_verify(*, prompt_file, code_file, program_file, verification_log_file, verbose=False, cwd=None):
56
+ """
57
+ Call (possibly monkeypatched) run_agentic_verify and normalize its return.
58
+
59
+ Note: cwd parameter is accepted for compatibility but not passed to run_agentic_verify
60
+ as it determines the working directory from prompt_file.parent internally.
61
+ """
62
+ if not prompt_file:
63
+ return False, "Agentic verify requires a valid prompt file.", 0.0, "agentic-cli", []
64
+
65
+ try:
66
+ res = run_agentic_verify(
67
+ prompt_file=Path(prompt_file),
68
+ code_file=Path(code_file),
69
+ program_file=Path(program_file),
70
+ verification_log_file=Path(verification_log_file),
71
+ verbose=verbose,
72
+ quiet=not verbose,
73
+ # Note: cwd is not passed - run_agentic_verify uses prompt_file.parent as project root
74
+ )
75
+ return _normalize_agentic_result(res)
76
+ except Exception as e:
77
+ return False, f"Agentic verify failed: {e}", 0.0, "agentic-cli", []
28
78
 
29
79
  # Initialize Rich Console for pretty printing
30
80
  console = Console()
@@ -49,19 +99,30 @@ def _run_program(
49
99
  if not program_path.is_file():
50
100
  return -1, f"Error: Program file not found at {program_path}"
51
101
 
52
- command = ["python", str(program_path)]
102
+ command = [detect_host_python_executable(), str(program_path)]
53
103
  if args:
54
104
  command.extend(args)
55
105
 
56
106
  try:
107
+ # Run from staging root directory instead of examples/
108
+ # This allows imports from both pdd/ and examples/ subdirectories
109
+ staging_root = program_path.parent.parent # Go up from examples/ to staging root
110
+
57
111
  result = subprocess.run(
58
112
  command,
59
113
  capture_output=True,
60
114
  text=True,
61
115
  timeout=timeout,
62
116
  check=False, # Don't raise exception for non-zero exit codes
117
+ env=os.environ.copy(), # Pass current environment variables
118
+ cwd=staging_root # Set working directory to staging root
63
119
  )
64
120
  combined_output = result.stdout + result.stderr
121
+
122
+ # Check for syntax errors
123
+ if result.returncode != 0 and "SyntaxError" in result.stderr:
124
+ return result.returncode, f"SYNTAX_ERROR: {combined_output}"
125
+
65
126
  return result.returncode, combined_output
66
127
  except FileNotFoundError:
67
128
  return -1, f"Error: Python interpreter not found or '{program_path}' not found."
@@ -82,6 +143,7 @@ def fix_verification_errors_loop(
82
143
  program_file: str,
83
144
  code_file: str,
84
145
  prompt: str,
146
+ prompt_file: str,
85
147
  verification_program: str,
86
148
  strength: float,
87
149
  temperature: float,
@@ -92,7 +154,8 @@ def fix_verification_errors_loop(
92
154
  output_program_path: Optional[str] = None,
93
155
  verbose: bool = False,
94
156
  program_args: Optional[list[str]] = None,
95
- llm_time: float = DEFAULT_TIME # Add time parameter
157
+ llm_time: float = DEFAULT_TIME, # Add time parameter
158
+ agentic_fallback: bool = True,
96
159
  ) -> Dict[str, Any]:
97
160
  """
98
161
  Attempts to fix errors in a code file based on program execution output
@@ -102,6 +165,7 @@ def fix_verification_errors_loop(
102
165
  program_file: Path to the Python program exercising the code.
103
166
  code_file: Path to the code file being tested/verified.
104
167
  prompt: The prompt defining the intended behavior.
168
+ prompt_file: Path to the prompt file.
105
169
  verification_program: Path to a secondary program to verify code changes.
106
170
  strength: LLM model strength (0.0 to 1.0).
107
171
  temperature: LLM temperature (0.0 to 1.0).
@@ -113,6 +177,7 @@ def fix_verification_errors_loop(
113
177
  verbose: Enable verbose logging (default: False).
114
178
  program_args: Optional list of command-line arguments for the program_file.
115
179
  llm_time: Time parameter for fix_verification_errors calls (default: DEFAULT_TIME).
180
+ agentic_fallback: Enable agentic fallback if the primary fix mechanism fails.
116
181
 
117
182
  Returns:
118
183
  A dictionary containing:
@@ -124,6 +189,61 @@ def fix_verification_errors_loop(
124
189
  'model_name': str | None - Name of the LLM model used.
125
190
  'statistics': dict - Detailed statistics about the process.
126
191
  """
192
+ is_python = str(code_file).lower().endswith(".py")
193
+ if not is_python:
194
+ # For non-Python files, run the verification program to get an initial error state
195
+ console.print(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
196
+ lang = get_language(os.path.splitext(code_file)[1])
197
+ verify_cmd = default_verify_cmd_for(lang, verification_program)
198
+ if not verify_cmd:
199
+ raise ValueError(f"No default verification command for language: {lang}")
200
+
201
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True)
202
+ pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
203
+ console.print("[cyan]Non-Python target detected. Triggering agentic fallback...[/cyan]")
204
+ verification_log_path = Path(verification_log_file)
205
+ verification_log_path.parent.mkdir(parents=True, exist_ok=True)
206
+ with open(verification_log_path, "w") as f:
207
+ f.write(pytest_output)
208
+
209
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
210
+ console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
211
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
212
+ prompt_file=prompt_file,
213
+ code_file=code_file,
214
+ program_file=verification_program,
215
+ verification_log_file=verification_log_file,
216
+ verbose=verbose,
217
+ cwd=agent_cwd,
218
+ )
219
+ if not success:
220
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
221
+ if agent_changed_files:
222
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
223
+ for f in agent_changed_files:
224
+ console.print(f" • {f}")
225
+ final_program = ""
226
+ final_code = ""
227
+ try:
228
+ with open(verification_program, "r") as f:
229
+ final_program = f.read()
230
+ except Exception:
231
+ pass
232
+ try:
233
+ with open(code_file, "r") as f:
234
+ final_code = f.read()
235
+ except Exception:
236
+ pass
237
+ return {
238
+ "success": success,
239
+ "final_program": final_program,
240
+ "final_code": final_code,
241
+ "total_attempts": 1,
242
+ "total_cost": agent_cost,
243
+ "model_name": agent_model,
244
+ "statistics": {},
245
+ }
246
+
127
247
  program_path = Path(program_file).resolve()
128
248
  code_path = Path(code_file).resolve()
129
249
  verification_program_path = Path(verification_program).resolve()
@@ -145,9 +265,9 @@ def fix_verification_errors_loop(
145
265
  if not 0.0 <= temperature <= 1.0:
146
266
  console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
147
267
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
148
- # Prompt requires positive max_attempts
149
- if max_attempts <= 0:
150
- console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
268
+ # max_attempts must be non-negative (0 is valid - skips LLM loop, goes straight to agentic mode)
269
+ if max_attempts < 0:
270
+ console.print(f"[bold red]Error: Max attempts must be non-negative.[/bold red]")
151
271
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
152
272
  if budget < 0:
153
273
  console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
@@ -169,6 +289,7 @@ def fix_verification_errors_loop(
169
289
  total_cost = 0.0
170
290
  model_name: Optional[str] = None
171
291
  overall_success = False
292
+ any_verification_passed = False # Track if ANY iteration passed secondary verification
172
293
  best_iteration = {
173
294
  'attempt': -1, # 0 represents initial state
174
295
  'program_backup': None,
@@ -219,128 +340,155 @@ def fix_verification_errors_loop(
219
340
  initial_log_entry += '</InitialState>'
220
341
  _write_log_entry(log_path, initial_log_entry)
221
342
 
343
+ # 3c: Check if skipping LLM assessment (max_attempts=0 means skip to agentic fallback)
344
+ skip_llm = (max_attempts == 0)
345
+
222
346
  # 3d: Call fix_verification_errors for initial assessment
223
347
  try:
224
- if verbose:
225
- console.print("Running initial assessment with fix_verification_errors...")
226
- # Use actual strength/temp for realistic initial assessment
227
- initial_fix_result = fix_verification_errors(
228
- program=initial_program_content,
229
- prompt=prompt,
230
- code=initial_code_content,
231
- output=initial_output,
232
- strength=strength,
233
- temperature=temperature,
234
- verbose=verbose,
235
- time=llm_time # Pass time
236
- )
237
- # 3e: Add cost
238
- initial_cost = initial_fix_result.get('total_cost', 0.0)
239
- total_cost += initial_cost
240
- model_name = initial_fix_result.get('model_name') # Capture model name early
241
- if verbose:
242
- console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
243
-
244
- # 3f: Extract initial issues
245
- initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
246
- stats['initial_issues'] = initial_issues_count
247
- if verbose:
248
- console.print(f"Initial verification issues found: {initial_issues_count}")
249
- if initial_fix_result.get('explanation'):
250
- console.print("Initial assessment explanation:")
251
- console.print(initial_fix_result['explanation'])
252
-
253
- # FIX: Add check for initial assessment error *before* checking success/budget
254
- # Check if the fixer function returned its specific error state (None explanation/model)
255
- if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
256
- error_msg = "Error: Fixer returned invalid/error state during initial assessment"
257
- console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
258
- stats['status_message'] = error_msg
259
- stats['final_issues'] = -1 # Indicate unknown/error state
260
- # Write final action log for error on initial check
261
- final_log_entry = "<FinalActions>\n"
262
- final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
263
- final_log_entry += "</FinalActions>"
264
- _write_log_entry(log_path, final_log_entry)
265
- # Return failure state
266
- return {
267
- "success": False,
268
- "final_program": initial_program_content,
269
- "final_code": initial_code_content,
270
- "total_attempts": 0,
271
- "total_cost": total_cost, # May be non-zero if error occurred after some cost
272
- "model_name": model_name, # May have been set before error
273
- "statistics": stats,
274
- }
275
-
276
- # 3g: Initialize best iteration tracker
277
- # Store original paths as the 'backup' for iteration 0
278
- best_iteration = {
279
- 'attempt': 0, # Use 0 for initial state
280
- 'program_backup': str(program_path), # Path to original
281
- 'code_backup': str(code_path), # Path to original
282
- 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
283
- }
284
- stats['best_iteration_num'] = 0
285
- stats['best_iteration_issues'] = best_iteration['issues']
286
-
287
- # 3h: Check for immediate success or budget exceeded
288
- if initial_issues_count == 0:
289
- console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
290
- overall_success = True
291
- stats['final_issues'] = 0
292
- stats['status_message'] = 'Success on initial check'
293
- stats['improvement_issues'] = 0
294
- stats['improvement_percent'] = 100.0 # Reached target of 0 issues
295
-
296
- # Write final action log for successful initial check
348
+ if skip_llm:
349
+ # Skip initial LLM assessment when max_attempts=0
350
+ console.print("[bold cyan]max_attempts=0: Skipping LLM assessment, proceeding to agentic fallback.[/bold cyan]")
351
+ # Set up state for skipping the LLM loop
352
+ stats['initial_issues'] = -1 # Unknown since we skipped assessment
353
+ stats['final_issues'] = -1
354
+ stats['best_iteration_num'] = -1
355
+ stats['best_iteration_issues'] = float('inf')
356
+ stats['status_message'] = 'Skipped LLM (max_attempts=0)'
357
+ stats['improvement_issues'] = 'N/A'
358
+ stats['improvement_percent'] = 'N/A'
359
+ overall_success = False # Trigger agentic fallback
360
+ final_program_content = initial_program_content
361
+ final_code_content = initial_code_content
362
+ # Write log entry for skipped LLM
297
363
  final_log_entry = "<FinalActions>\n"
298
- final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
364
+ final_log_entry += f' <Action>Skipped LLM assessment and loop (max_attempts=0), proceeding to agentic fallback.</Action>\n'
299
365
  final_log_entry += "</FinalActions>"
300
366
  _write_log_entry(log_path, final_log_entry)
367
+ # Skip to final stats (the while loop below will also be skipped since 0 < 0 is False)
368
+ initial_issues_count = -1 # Sentinel: unknown/not applicable when LLM assessment is skipped; kept numeric for downstream comparisons
369
+ else:
370
+ if verbose:
371
+ console.print("Running initial assessment with fix_verification_errors...")
372
+ # Use actual strength/temp for realistic initial assessment
373
+ initial_fix_result = fix_verification_errors(
374
+ program=initial_program_content,
375
+ prompt=prompt,
376
+ code=initial_code_content,
377
+ output=initial_output,
378
+ strength=strength,
379
+ temperature=temperature,
380
+ verbose=verbose,
381
+ time=llm_time # Pass time
382
+ )
383
+ # 3e: Add cost
384
+ initial_cost = initial_fix_result.get('total_cost', 0.0)
385
+ total_cost += initial_cost
386
+ model_name = initial_fix_result.get('model_name') # Capture model name early
387
+ if verbose:
388
+ console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
301
389
 
302
- # Step 7 (early exit): Print stats
303
- console.print("\n[bold]--- Final Statistics ---[/bold]")
304
- console.print(f"Initial Issues: {stats['initial_issues']}")
305
- console.print(f"Final Issues: {stats['final_issues']}")
306
- console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
307
- console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
308
- console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
309
- console.print(f"Overall Status: {stats['status_message']}")
310
- console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
311
- console.print(f"Total Cost: ${total_cost:.6f}")
312
- console.print(f"Model Used: {model_name or 'N/A'}")
313
- # Step 8 (early exit): Return
314
- return {
315
- "success": overall_success,
316
- "final_program": initial_program_content,
317
- "final_code": initial_code_content,
318
- "total_attempts": attempts, # attempts is 0
319
- "total_cost": total_cost,
320
- "model_name": model_name,
321
- "statistics": stats,
390
+ # 3f: Extract initial issues
391
+ initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
392
+ stats['initial_issues'] = initial_issues_count
393
+ if verbose:
394
+ console.print(f"Initial verification issues found: {initial_issues_count}")
395
+ if initial_fix_result.get('explanation'):
396
+ console.print("Initial assessment explanation:")
397
+ console.print(initial_fix_result['explanation'])
398
+
399
+ # The following checks only apply when we ran the LLM assessment (not skipped)
400
+ if not skip_llm:
401
+ # FIX: Add check for initial assessment error *before* checking success/budget
402
+ # Check if the fixer function returned its specific error state (None explanation/model)
403
+ if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
404
+ error_msg = "Error: Fixer returned invalid/error state during initial assessment"
405
+ console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
406
+ stats['status_message'] = error_msg
407
+ stats['final_issues'] = -1 # Indicate unknown/error state
408
+ # Write final action log for error on initial check
409
+ final_log_entry = "<FinalActions>\n"
410
+ final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
411
+ final_log_entry += "</FinalActions>"
412
+ _write_log_entry(log_path, final_log_entry)
413
+ # Return failure state
414
+ return {
415
+ "success": False,
416
+ "final_program": initial_program_content,
417
+ "final_code": initial_code_content,
418
+ "total_attempts": 0,
419
+ "total_cost": total_cost, # May be non-zero if error occurred after some cost
420
+ "model_name": model_name, # May have been set before error
421
+ "statistics": stats,
422
+ }
423
+
424
+ # 3g: Initialize best iteration tracker
425
+ # Store original paths as the 'backup' for iteration 0
426
+ best_iteration = {
427
+ 'attempt': 0, # Use 0 for initial state
428
+ 'program_backup': str(program_path), # Path to original
429
+ 'code_backup': str(code_path), # Path to original
430
+ 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
322
431
  }
323
- elif total_cost >= budget:
324
- console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
325
- stats['status_message'] = 'Budget exceeded on initial check'
326
- stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
327
-
328
- # Write final action log for budget exceeded on initial check
329
- final_log_entry = "<FinalActions>\n"
330
- final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
331
- final_log_entry += "</FinalActions>"
332
- _write_log_entry(log_path, final_log_entry)
333
-
334
- # No changes made, return initial state
335
- return {
336
- "success": False,
337
- "final_program": initial_program_content,
338
- "final_code": initial_code_content,
339
- "total_attempts": 0,
340
- "total_cost": total_cost,
341
- "model_name": model_name,
342
- "statistics": stats,
343
- }
432
+ stats['best_iteration_num'] = 0
433
+ stats['best_iteration_issues'] = best_iteration['issues']
434
+
435
+ # 3h: Check for immediate success or budget exceeded
436
+ if initial_issues_count == 0:
437
+ console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
438
+ overall_success = True
439
+ stats['final_issues'] = 0
440
+ stats['status_message'] = 'Success on initial check'
441
+ stats['improvement_issues'] = 0
442
+ stats['improvement_percent'] = 100.0 # Reached target of 0 issues
443
+
444
+ # Write final action log for successful initial check
445
+ final_log_entry = "<FinalActions>\n"
446
+ final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
447
+ final_log_entry += "</FinalActions>"
448
+ _write_log_entry(log_path, final_log_entry)
449
+
450
+ # Step 7 (early exit): Print stats
451
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
452
+ console.print(f"Initial Issues: {stats['initial_issues']}")
453
+ console.print(f"Final Issues: {stats['final_issues']}")
454
+ console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
455
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
456
+ console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
457
+ console.print(f"Overall Status: {stats['status_message']}")
458
+ console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
459
+ console.print(f"Total Cost: ${total_cost:.6f}")
460
+ console.print(f"Model Used: {model_name or 'N/A'}")
461
+ # Step 8 (early exit): Return
462
+ return {
463
+ "success": overall_success,
464
+ "final_program": initial_program_content,
465
+ "final_code": initial_code_content,
466
+ "total_attempts": attempts, # attempts is 0
467
+ "total_cost": total_cost,
468
+ "model_name": model_name,
469
+ "statistics": stats,
470
+ }
471
+ elif total_cost >= budget:
472
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
473
+ stats['status_message'] = 'Budget exceeded on initial check'
474
+ stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
475
+
476
+ # Write final action log for budget exceeded on initial check
477
+ final_log_entry = "<FinalActions>\n"
478
+ final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
479
+ final_log_entry += "</FinalActions>"
480
+ _write_log_entry(log_path, final_log_entry)
481
+
482
+ # No changes made, return initial state
483
+ return {
484
+ "success": False,
485
+ "final_program": initial_program_content,
486
+ "final_code": initial_code_content,
487
+ "total_attempts": 0,
488
+ "total_cost": total_cost,
489
+ "model_name": model_name,
490
+ "statistics": stats,
491
+ }
344
492
 
345
493
  except Exception as e:
346
494
  console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
@@ -580,6 +728,9 @@ def fix_verification_errors_loop(
580
728
 
581
729
  # Now, decide outcome based on issue count and verification status
582
730
  if secondary_verification_passed:
731
+ # Only track as "verification passed" if code was actually changed and verified
732
+ if code_updated:
733
+ any_verification_passed = True # Track that at least one verification passed
583
734
  # Update best iteration if current attempt is better
584
735
  if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
585
736
  if verbose:
@@ -722,8 +873,14 @@ def fix_verification_errors_loop(
722
873
  if verbose:
723
874
  console.print(f"Restored {program_path} from {best_program_path}")
724
875
  console.print(f"Restored {code_path} from {best_code_path}")
725
- # Final issues count is the best achieved count
726
- stats['final_issues'] = best_iteration['issues']
876
+ # Only mark as success if verification actually passed
877
+ # (best_iteration is only updated when secondary verification passes,
878
+ # but we double-check with any_verification_passed for safety)
879
+ if any_verification_passed:
880
+ stats['final_issues'] = 0
881
+ overall_success = True
882
+ else:
883
+ stats['final_issues'] = best_iteration['issues']
727
884
  else:
728
885
  console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
729
886
  final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
@@ -737,6 +894,15 @@ def fix_verification_errors_loop(
737
894
  stats['status_message'] += f' - Error restoring best iteration: {e}'
738
895
  stats['final_issues'] = -1 # Indicate uncertainty
739
896
 
897
+ # If verification passed (even if issue count didn't decrease), consider it success
898
+ elif any_verification_passed:
899
+ console.print("[green]Verification passed. Keeping current state.[/green]")
900
+ final_log_entry += f' <Action>Verification passed; keeping current state.</Action>\n'
901
+ # Verification passed = code works, so final issues is effectively 0
902
+ stats['final_issues'] = 0
903
+ stats['status_message'] = 'Success - verification passed'
904
+ overall_success = True
905
+
740
906
  # If no improvement was made or recorded (best is still initial state or worse)
741
907
  elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
742
908
  console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
@@ -851,6 +1017,36 @@ def fix_verification_errors_loop(
851
1017
  if final_known and stats['final_issues'] != 0:
852
1018
  overall_success = False
853
1019
 
1020
+ if not overall_success and agentic_fallback:
1021
+ console.print(f"[bold yellow]Initiating agentic fallback (prompt_file={prompt_file!r})...[/bold yellow]")
1022
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
1023
+ agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
1024
+ prompt_file=prompt_file,
1025
+ code_file=code_file,
1026
+ program_file=verification_program,
1027
+ verification_log_file=verification_log_file,
1028
+ verbose=verbose,
1029
+ cwd=agent_cwd,
1030
+ )
1031
+ total_cost += agent_cost
1032
+ if not agent_success:
1033
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
1034
+ if agent_changed_files:
1035
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
1036
+ for f in agent_changed_files:
1037
+ console.print(f" • {f}")
1038
+ if agent_success:
1039
+ console.print("[bold green]Agentic fallback successful.[/bold green]")
1040
+ overall_success = True
1041
+ model_name = agent_model or model_name
1042
+ try:
1043
+ final_code_content = Path(code_file).read_text(encoding="utf-8")
1044
+ final_program_content = Path(program_file).read_text(encoding="utf-8")
1045
+ except Exception as e:
1046
+ console.print(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
1047
+ else:
1048
+ console.print("[bold red]Agentic fallback failed.[/bold red]")
1049
+
854
1050
  return {
855
1051
  "success": overall_success,
856
1052
  "final_program": final_program_content,
@@ -859,153 +1055,4 @@ def fix_verification_errors_loop(
859
1055
  "total_cost": total_cost,
860
1056
  "model_name": model_name,
861
1057
  "statistics": stats,
862
- }
863
-
864
- # Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
865
- if __name__ == "__main__":
866
- # Create dummy files for demonstration
867
- # In a real scenario, these files would exist and contain actual code/programs.
868
- console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
869
- temp_dir = Path("./temp_fix_verification_loop")
870
- temp_dir.mkdir(exist_ok=True)
871
-
872
- program_file = temp_dir / "my_program.py"
873
- code_file = temp_dir / "my_code_module.py"
874
- verification_program_file = temp_dir / "verify_syntax.py"
875
-
876
- program_file.write_text("""
877
- import my_code_module
878
- import sys
879
- # Simulate using the module and checking output
880
- val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
881
- result = my_code_module.process(val)
882
- expected = val * 2
883
- print(f"Input: {val}")
884
- print(f"Result: {result}")
885
- print(f"Expected: {expected}")
886
- if result == expected:
887
- print("VERIFICATION_SUCCESS")
888
- else:
889
- print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
890
- """, encoding="utf-8")
891
-
892
- # Initial code with a bug
893
- code_file.write_text("""
894
- # my_code_module.py
895
- def process(x):
896
- # Bug: should be x * 2
897
- return x + 2
898
- """, encoding="utf-8")
899
-
900
- # Simple verification program (e.g., syntax check)
901
- verification_program_file.write_text("""
902
- import sys
903
- import py_compile
904
- import os
905
- # Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
906
- code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
907
- print(f"Checking syntax of: {code_to_check}")
908
- try:
909
- py_compile.compile(code_to_check, doraise=True)
910
- print("Syntax OK.")
911
- sys.exit(0) # Success
912
- except py_compile.PyCompileError as e:
913
- print(f"Syntax Error: {e}")
914
- sys.exit(1) # Failure
915
- except Exception as e:
916
- print(f"Verification Error: {e}")
917
- sys.exit(1) # Failure
918
- """, encoding="utf-8")
919
- # Set environment variable for the verification script
920
- os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
921
-
922
-
923
- # --- Mock fix_verification_errors ---
924
- # This is crucial for testing without actual LLM calls / costs
925
- # In a real test suite, use unittest.mock
926
- _original_fix_verification_errors = fix_verification_errors
927
- _call_count = 0
928
-
929
- def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
930
- global _call_count
931
- _call_count += 1
932
- cost = 0.001 * _call_count # Simulate increasing cost
933
- model = "mock_model_v1"
934
- explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
935
- issues_count = 1 # Assume 1 issue initially
936
-
937
- fixed_program = program # Assume program doesn't need fixing
938
- fixed_code = code
939
-
940
- # Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
941
- if "VERIFICATION_FAILURE" in output and _call_count >= 2:
942
- explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
943
- fixed_code = """
944
- # my_code_module.py
945
- def process(x):
946
- # Fixed: should be x * 2
947
- return x * 2
948
- """
949
- issues_count = 0 # Fixed!
950
- elif "VERIFICATION_SUCCESS" in output:
951
- explanation = ["Output indicates VERIFICATION_SUCCESS."]
952
- issues_count = 0 # Already correct
953
-
954
- return {
955
- 'explanation': explanation,
956
- 'fixed_program': fixed_program,
957
- 'fixed_code': fixed_code,
958
- 'total_cost': cost,
959
- 'model_name': model,
960
- 'verification_issues_count': issues_count,
961
- }
962
-
963
- # Replace the real function with the mock
964
- # In package context, you might need to patch differently
965
- # For this script execution:
966
- # Note: This direct replacement might not work if the function is imported
967
- # using `from .fix_verification_errors import fix_verification_errors`.
968
- # A proper mock framework (`unittest.mock.patch`) is better.
969
- # Let's assume for this example run, we can modify the global scope *before* the loop calls it.
970
- # This is fragile. A better approach involves dependency injection or mocking frameworks.
971
- # HACK: Re-assigning the imported name in the global scope of this script
972
- globals()['fix_verification_errors'] = mock_fix_verification_errors
973
-
974
-
975
- console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
976
-
977
- # Example program_args: Pass input value 10 and another arg 5
978
- # Note: The example program only uses the first arg sys.argv[1]
979
- example_args = ["10", "another_arg"]
980
-
981
- results = fix_verification_errors_loop(
982
- program_file=str(program_file),
983
- code_file=str(code_file),
984
- prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
985
- verification_program=str(verification_program_file),
986
- strength=0.5,
987
- temperature=0.1,
988
- max_attempts=3,
989
- budget=0.10, # Set a budget
990
- verification_log_file=str(temp_dir / "test_verification.log"),
991
- verbose=True,
992
- program_args=example_args
993
- )
994
-
995
- console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
996
- console.print(f"Success: {results['success']}")
997
- console.print(f"Total Attempts: {results['total_attempts']}")
998
- console.print(f"Total Cost: ${results['total_cost']:.6f}")
999
- console.print(f"Model Name: {results['model_name']}")
1000
- # console.print(f"Final Program:\n{results['final_program']}") # Can be long
1001
- console.print(f"Final Code:\n{results['final_code']}")
1002
- console.print(f"Statistics:\n{results['statistics']}")
1003
-
1004
- # Restore original function if needed elsewhere
1005
- globals()['fix_verification_errors'] = _original_fix_verification_errors
1006
-
1007
- # Clean up dummy files
1008
- # console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
1009
- # shutil.rmtree(temp_dir)
1010
- console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
1011
- console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
1058
+ }