pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import os
2
3
  import shutil
3
4
  import subprocess
@@ -8,6 +9,8 @@ from typing import Dict, Tuple, Any, Optional
8
9
  from xml.sax.saxutils import escape
9
10
  import time
10
11
 
12
+ import requests
13
+
11
14
  from rich.console import Console
12
15
 
13
16
  # Use relative import assuming fix_verification_errors is in the same package
@@ -27,6 +30,126 @@ except ImportError:
27
30
 
28
31
  from . import DEFAULT_TIME # Import DEFAULT_TIME
29
32
  from .python_env_detector import detect_host_python_executable
33
+ from .get_language import get_language
34
+ from .agentic_langtest import default_verify_cmd_for
35
+ from .agentic_verify import run_agentic_verify
36
+
37
+ # Cloud configuration
38
+ try:
39
+ from .core.cloud import CloudConfig
40
+ CLOUD_AVAILABLE = True
41
+ except ImportError:
42
+ CLOUD_AVAILABLE = False
43
+ CloudConfig = None
44
+
45
+ # Cloud request timeout for verify fix
46
+ CLOUD_REQUEST_TIMEOUT = 400 # seconds
47
+
48
+
49
+ def cloud_verify_fix(
50
+ program: str,
51
+ prompt: str,
52
+ code: str,
53
+ output: str,
54
+ strength: float,
55
+ temperature: float,
56
+ time_param: float,
57
+ verbose: bool,
58
+ language: str = "python",
59
+ ) -> Dict[str, Any]:
60
+ """
61
+ Call cloud verifyCode endpoint for LLM verification fix.
62
+
63
+ Returns:
64
+ Dict with keys: fixed_code, fixed_program, explanation, verification_issues_count, total_cost, model_name
65
+ """
66
+ if not CLOUD_AVAILABLE or CloudConfig is None:
67
+ raise RuntimeError("Cloud configuration not available")
68
+
69
+ jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
70
+ if not jwt_token:
71
+ raise RuntimeError("Cloud authentication failed - no JWT token")
72
+
73
+ payload = {
74
+ "programContent": program,
75
+ "promptContent": prompt,
76
+ "codeContent": code,
77
+ "outputContent": output,
78
+ "language": language,
79
+ "strength": strength,
80
+ "temperature": temperature,
81
+ "time": time_param if time_param is not None else 0.25,
82
+ "verbose": verbose,
83
+ }
84
+
85
+ headers = {
86
+ "Authorization": f"Bearer {jwt_token}",
87
+ "Content-Type": "application/json"
88
+ }
89
+ cloud_url = CloudConfig.get_endpoint_url("verifyCode")
90
+
91
+ response = requests.post(
92
+ cloud_url,
93
+ json=payload,
94
+ headers=headers,
95
+ timeout=CLOUD_REQUEST_TIMEOUT
96
+ )
97
+ response.raise_for_status()
98
+
99
+ response_data = response.json()
100
+ return {
101
+ "fixed_code": response_data.get("fixedCode", code),
102
+ "fixed_program": response_data.get("fixedProgram", program),
103
+ "explanation": response_data.get("explanation", ""),
104
+ "verification_issues_count": response_data.get("issuesCount", 0),
105
+ "total_cost": float(response_data.get("totalCost", 0.0)),
106
+ "model_name": response_data.get("modelName", "cloud_model"),
107
+ }
108
+
109
+ def _normalize_agentic_result(result):
110
+ """
111
+ Normalize run_agentic_verify result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
112
+ Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
113
+ """
114
+ if isinstance(result, tuple):
115
+ if len(result) == 5:
116
+ ok, msg, cost, model, changed_files = result
117
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
118
+ if len(result) == 4:
119
+ ok, msg, cost, model = result
120
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
121
+ if len(result) == 3:
122
+ ok, msg, cost = result
123
+ return bool(ok), str(msg), float(cost), "agentic-cli", []
124
+ if len(result) == 2:
125
+ ok, msg = result
126
+ return bool(ok), str(msg), 0.0, "agentic-cli", []
127
+ # Fallback (shouldn't happen)
128
+ return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
129
+
130
+ def _safe_run_agentic_verify(*, prompt_file, code_file, program_file, verification_log_file, verbose=False, cwd=None):
131
+ """
132
+ Call (possibly monkeypatched) run_agentic_verify and normalize its return.
133
+
134
+ Note: cwd parameter is accepted for compatibility but not passed to run_agentic_verify
135
+ as it determines the working directory from prompt_file.parent internally.
136
+ """
137
+ if not prompt_file:
138
+ return False, "Agentic verify requires a valid prompt file.", 0.0, "agentic-cli", []
139
+
140
+ try:
141
+ res = run_agentic_verify(
142
+ prompt_file=Path(prompt_file),
143
+ code_file=Path(code_file),
144
+ program_file=Path(program_file),
145
+ verification_log_file=Path(verification_log_file),
146
+ verbose=verbose,
147
+ quiet=not verbose,
148
+ # Note: cwd is not passed - run_agentic_verify uses prompt_file.parent as project root
149
+ )
150
+ return _normalize_agentic_result(res)
151
+ except Exception as e:
152
+ return False, f"Agentic verify failed: {e}", 0.0, "agentic-cli", []
30
153
 
31
154
  # Initialize Rich Console for pretty printing
32
155
  console = Console()
@@ -56,7 +179,7 @@ def _run_program(
56
179
  command.extend(args)
57
180
 
58
181
  try:
59
- # Run from staging root directory instead of examples/ directory
182
+ # Run from staging root directory instead of examples/
60
183
  # This allows imports from both pdd/ and examples/ subdirectories
61
184
  staging_root = program_path.parent.parent # Go up from examples/ to staging root
62
185
 
@@ -95,6 +218,7 @@ def fix_verification_errors_loop(
95
218
  program_file: str,
96
219
  code_file: str,
97
220
  prompt: str,
221
+ prompt_file: str,
98
222
  verification_program: str,
99
223
  strength: float,
100
224
  temperature: float,
@@ -105,16 +229,24 @@ def fix_verification_errors_loop(
105
229
  output_program_path: Optional[str] = None,
106
230
  verbose: bool = False,
107
231
  program_args: Optional[list[str]] = None,
108
- llm_time: float = DEFAULT_TIME # Add time parameter
232
+ llm_time: float = DEFAULT_TIME, # Add time parameter
233
+ agentic_fallback: bool = True,
234
+ use_cloud: bool = False,
109
235
  ) -> Dict[str, Any]:
110
236
  """
111
237
  Attempts to fix errors in a code file based on program execution output
112
238
  against the prompt's intent, iterating multiple times with secondary verification.
113
239
 
240
+ Hybrid Cloud Support:
241
+ When use_cloud=True, the LLM fix calls are routed to the cloud verifyCode endpoint
242
+ while local program execution stays local. This allows the loop to pass local
243
+ verification results to the cloud for analysis and fixes.
244
+
114
245
  Args:
115
246
  program_file: Path to the Python program exercising the code.
116
247
  code_file: Path to the code file being tested/verified.
117
248
  prompt: The prompt defining the intended behavior.
249
+ prompt_file: Path to the prompt file.
118
250
  verification_program: Path to a secondary program to verify code changes.
119
251
  strength: LLM model strength (0.0 to 1.0).
120
252
  temperature: LLM temperature (0.0 to 1.0).
@@ -126,6 +258,8 @@ def fix_verification_errors_loop(
126
258
  verbose: Enable verbose logging (default: False).
127
259
  program_args: Optional list of command-line arguments for the program_file.
128
260
  llm_time: Time parameter for fix_verification_errors calls (default: DEFAULT_TIME).
261
+ agentic_fallback: Enable agentic fallback if the primary fix mechanism fails.
262
+ use_cloud: If True, use cloud LLM for fix calls while keeping verification execution local.
129
263
 
130
264
  Returns:
131
265
  A dictionary containing:
@@ -137,6 +271,108 @@ def fix_verification_errors_loop(
137
271
  'model_name': str | None - Name of the LLM model used.
138
272
  'statistics': dict - Detailed statistics about the process.
139
273
  """
274
+ is_python = str(code_file).lower().endswith(".py")
275
+ if not is_python:
276
+ # For non-Python files, run the verification program to get an initial error state
277
+ console.print(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
278
+ lang = get_language(os.path.splitext(code_file)[1])
279
+ verify_cmd = default_verify_cmd_for(lang, verification_program)
280
+ if not verify_cmd:
281
+ # No verify command available (e.g., Java without maven/gradle).
282
+ # Trigger agentic fallback directly.
283
+ console.print(f"[cyan]No verification command for {lang}. Triggering agentic fallback directly...[/cyan]")
284
+ verification_log_path = Path(verification_log_file)
285
+ verification_log_path.parent.mkdir(parents=True, exist_ok=True)
286
+ # Create minimal error log if it doesn't exist
287
+ if not verification_log_path.exists() or verification_log_path.stat().st_size == 0:
288
+ with open(verification_log_path, "w") as f:
289
+ f.write(f"No verification command available for language: {lang}\n")
290
+ f.write("Agentic fix will attempt to resolve the issue.\n")
291
+
292
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
293
+ console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
294
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
295
+ prompt_file=prompt_file,
296
+ code_file=code_file,
297
+ program_file=verification_program,
298
+ verification_log_file=verification_log_file,
299
+ verbose=verbose,
300
+ cwd=agent_cwd,
301
+ )
302
+ if not success:
303
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
304
+ if agent_changed_files:
305
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
306
+ for f in agent_changed_files:
307
+ console.print(f" • {f}")
308
+ final_program = ""
309
+ final_code = ""
310
+ try:
311
+ with open(verification_program, "r") as f:
312
+ final_program = f.read()
313
+ except Exception:
314
+ pass
315
+ try:
316
+ with open(code_file, "r") as f:
317
+ final_code = f.read()
318
+ except Exception:
319
+ pass
320
+ return {
321
+ "success": success,
322
+ "final_program": final_program,
323
+ "final_code": final_code,
324
+ "total_attempts": 1,
325
+ "total_cost": agent_cost,
326
+ "model_name": agent_model,
327
+ "statistics": {},
328
+ }
329
+
330
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True)
331
+ pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
332
+ console.print("[cyan]Non-Python target detected. Triggering agentic fallback...[/cyan]")
333
+ verification_log_path = Path(verification_log_file)
334
+ verification_log_path.parent.mkdir(parents=True, exist_ok=True)
335
+ with open(verification_log_path, "w") as f:
336
+ f.write(pytest_output)
337
+
338
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
339
+ console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
340
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
341
+ prompt_file=prompt_file,
342
+ code_file=code_file,
343
+ program_file=verification_program,
344
+ verification_log_file=verification_log_file,
345
+ verbose=verbose,
346
+ cwd=agent_cwd,
347
+ )
348
+ if not success:
349
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
350
+ if agent_changed_files:
351
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
352
+ for f in agent_changed_files:
353
+ console.print(f" • {f}")
354
+ final_program = ""
355
+ final_code = ""
356
+ try:
357
+ with open(verification_program, "r") as f:
358
+ final_program = f.read()
359
+ except Exception:
360
+ pass
361
+ try:
362
+ with open(code_file, "r") as f:
363
+ final_code = f.read()
364
+ except Exception:
365
+ pass
366
+ return {
367
+ "success": success,
368
+ "final_program": final_program,
369
+ "final_code": final_code,
370
+ "total_attempts": 1,
371
+ "total_cost": agent_cost,
372
+ "model_name": agent_model,
373
+ "statistics": {},
374
+ }
375
+
140
376
  program_path = Path(program_file).resolve()
141
377
  code_path = Path(code_file).resolve()
142
378
  verification_program_path = Path(verification_program).resolve()
@@ -158,9 +394,9 @@ def fix_verification_errors_loop(
158
394
  if not 0.0 <= temperature <= 1.0:
159
395
  console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
160
396
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
161
- # Prompt requires positive max_attempts
162
- if max_attempts <= 0:
163
- console.print(f"[bold red]Error: Max attempts must be positive.[/bold red]")
397
+ # max_attempts must be non-negative (0 is valid - skips LLM loop, goes straight to agentic mode)
398
+ if max_attempts < 0:
399
+ console.print(f"[bold red]Error: Max attempts must be non-negative.[/bold red]")
164
400
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
165
401
  if budget < 0:
166
402
  console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
@@ -182,6 +418,7 @@ def fix_verification_errors_loop(
182
418
  total_cost = 0.0
183
419
  model_name: Optional[str] = None
184
420
  overall_success = False
421
+ any_verification_passed = False # Track if ANY iteration passed secondary verification
185
422
  best_iteration = {
186
423
  'attempt': -1, # 0 represents initial state
187
424
  'program_backup': None,
@@ -202,6 +439,11 @@ def fix_verification_errors_loop(
202
439
  program_contents = "" # Keep track of current contents
203
440
  code_contents = "" # Keep track of current contents
204
441
 
442
+ # Create backup directory in .pdd/backups/ to avoid polluting code/test directories
443
+ backup_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
444
+ backup_dir = Path.cwd() / '.pdd' / 'backups' / code_path.stem / backup_timestamp
445
+ backup_dir.mkdir(parents=True, exist_ok=True)
446
+
205
447
  # --- Step 3: Determine Initial State ---
206
448
  if verbose:
207
449
  console.print("[bold cyan]Step 3: Determining Initial State...[/bold cyan]")
@@ -216,6 +458,21 @@ def fix_verification_errors_loop(
216
458
  stats['status_message'] = f'Error reading initial files: {e}' # Add status message
217
459
  return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": stats}
218
460
 
461
+ # 3a-pre: Validate code file is not empty (prevents infinite loops with empty content)
462
+ if not initial_code_content or len(initial_code_content.strip()) == 0:
463
+ error_msg = f"Code file is empty or contains only whitespace: {code_path}"
464
+ console.print(f"[bold red]Error: {error_msg}[/bold red]")
465
+ stats['status_message'] = f'Error: Code file is empty - cannot verify'
466
+ return {
467
+ "success": False,
468
+ "final_program": initial_program_content,
469
+ "final_code": "",
470
+ "total_attempts": 0,
471
+ "total_cost": 0.0,
472
+ "model_name": None,
473
+ "statistics": stats
474
+ }
475
+
219
476
  # 3a: Run initial program with args
220
477
  initial_return_code, initial_output = _run_program(program_path, args=program_args)
221
478
  if verbose:
@@ -232,128 +489,185 @@ def fix_verification_errors_loop(
232
489
  initial_log_entry += '</InitialState>'
233
490
  _write_log_entry(log_path, initial_log_entry)
234
491
 
492
+ # 3c: Check if skipping LLM assessment (max_attempts=0 means skip to agentic fallback)
493
+ skip_llm = (max_attempts == 0)
494
+
235
495
  # 3d: Call fix_verification_errors for initial assessment
236
496
  try:
237
- if verbose:
238
- console.print("Running initial assessment with fix_verification_errors...")
239
- # Use actual strength/temp for realistic initial assessment
240
- initial_fix_result = fix_verification_errors(
241
- program=initial_program_content,
242
- prompt=prompt,
243
- code=initial_code_content,
244
- output=initial_output,
245
- strength=strength,
246
- temperature=temperature,
247
- verbose=verbose,
248
- time=llm_time # Pass time
249
- )
250
- # 3e: Add cost
251
- initial_cost = initial_fix_result.get('total_cost', 0.0)
252
- total_cost += initial_cost
253
- model_name = initial_fix_result.get('model_name') # Capture model name early
254
- if verbose:
255
- console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
256
-
257
- # 3f: Extract initial issues
258
- initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
259
- stats['initial_issues'] = initial_issues_count
260
- if verbose:
261
- console.print(f"Initial verification issues found: {initial_issues_count}")
262
- if initial_fix_result.get('explanation'):
263
- console.print("Initial assessment explanation:")
264
- console.print(initial_fix_result['explanation'])
265
-
266
- # FIX: Add check for initial assessment error *before* checking success/budget
267
- # Check if the fixer function returned its specific error state (None explanation/model)
268
- if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
269
- error_msg = "Error: Fixer returned invalid/error state during initial assessment"
270
- console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
271
- stats['status_message'] = error_msg
272
- stats['final_issues'] = -1 # Indicate unknown/error state
273
- # Write final action log for error on initial check
274
- final_log_entry = "<FinalActions>\n"
275
- final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
276
- final_log_entry += "</FinalActions>"
277
- _write_log_entry(log_path, final_log_entry)
278
- # Return failure state
279
- return {
280
- "success": False,
281
- "final_program": initial_program_content,
282
- "final_code": initial_code_content,
283
- "total_attempts": 0,
284
- "total_cost": total_cost, # May be non-zero if error occurred after some cost
285
- "model_name": model_name, # May have been set before error
286
- "statistics": stats,
287
- }
288
-
289
- # 3g: Initialize best iteration tracker
290
- # Store original paths as the 'backup' for iteration 0
291
- best_iteration = {
292
- 'attempt': 0, # Use 0 for initial state
293
- 'program_backup': str(program_path), # Path to original
294
- 'code_backup': str(code_path), # Path to original
295
- 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
296
- }
297
- stats['best_iteration_num'] = 0
298
- stats['best_iteration_issues'] = best_iteration['issues']
299
-
300
- # 3h: Check for immediate success or budget exceeded
301
- if initial_issues_count == 0:
302
- console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
303
- overall_success = True
304
- stats['final_issues'] = 0
305
- stats['status_message'] = 'Success on initial check'
306
- stats['improvement_issues'] = 0
307
- stats['improvement_percent'] = 100.0 # Reached target of 0 issues
308
-
309
- # Write final action log for successful initial check
497
+ if skip_llm:
498
+ # Skip initial LLM assessment when max_attempts=0
499
+ console.print("[bold cyan]max_attempts=0: Skipping LLM assessment, proceeding to agentic fallback.[/bold cyan]")
500
+ # Set up state for skipping the LLM loop
501
+ stats['initial_issues'] = -1 # Unknown since we skipped assessment
502
+ stats['final_issues'] = -1
503
+ stats['best_iteration_num'] = -1
504
+ stats['best_iteration_issues'] = float('inf')
505
+ stats['status_message'] = 'Skipped LLM (max_attempts=0)'
506
+ stats['improvement_issues'] = 'N/A'
507
+ stats['improvement_percent'] = 'N/A'
508
+ overall_success = False # Trigger agentic fallback
509
+ final_program_content = initial_program_content
510
+ final_code_content = initial_code_content
511
+ # Write log entry for skipped LLM
310
512
  final_log_entry = "<FinalActions>\n"
311
- final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
513
+ final_log_entry += f' <Action>Skipped LLM assessment and loop (max_attempts=0), proceeding to agentic fallback.</Action>\n'
312
514
  final_log_entry += "</FinalActions>"
313
515
  _write_log_entry(log_path, final_log_entry)
516
+ # Skip to final stats (the while loop below will also be skipped since 0 < 0 is False)
517
+ initial_issues_count = -1 # Sentinel: unknown/not applicable when LLM assessment is skipped; kept numeric for downstream comparisons
518
+ else:
519
+ if verbose:
520
+ console.print("Running initial assessment with fix_verification_errors...")
521
+ # Use actual strength/temp for realistic initial assessment
522
+ # Use cloud or local based on use_cloud parameter
523
+ if use_cloud:
524
+ try:
525
+ initial_fix_result = cloud_verify_fix(
526
+ program=initial_program_content,
527
+ prompt=prompt,
528
+ code=initial_code_content,
529
+ output=initial_output,
530
+ strength=strength,
531
+ temperature=temperature,
532
+ time_param=llm_time,
533
+ verbose=verbose,
534
+ language="python" if is_python else get_language(os.path.splitext(code_file)[1]),
535
+ )
536
+ if verbose:
537
+ console.print(f"[cyan]Cloud verify fix completed.[/cyan]")
538
+ except (requests.exceptions.RequestException, RuntimeError) as cloud_err:
539
+ # Cloud failed - fall back to local
540
+ console.print(f"[yellow]Cloud verify fix failed: {cloud_err}. Falling back to local.[/yellow]")
541
+ initial_fix_result = fix_verification_errors(
542
+ program=initial_program_content,
543
+ prompt=prompt,
544
+ code=initial_code_content,
545
+ output=initial_output,
546
+ strength=strength,
547
+ temperature=temperature,
548
+ verbose=verbose,
549
+ time=llm_time
550
+ )
551
+ else:
552
+ initial_fix_result = fix_verification_errors(
553
+ program=initial_program_content,
554
+ prompt=prompt,
555
+ code=initial_code_content,
556
+ output=initial_output,
557
+ strength=strength,
558
+ temperature=temperature,
559
+ verbose=verbose,
560
+ time=llm_time # Pass time
561
+ )
562
+ # 3e: Add cost
563
+ initial_cost = initial_fix_result.get('total_cost', 0.0)
564
+ total_cost += initial_cost
565
+ model_name = initial_fix_result.get('model_name') # Capture model name early
566
+ if verbose:
567
+ console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
314
568
 
315
- # Step 7 (early exit): Print stats
316
- console.print("\n[bold]--- Final Statistics ---[/bold]")
317
- console.print(f"Initial Issues: {stats['initial_issues']}")
318
- console.print(f"Final Issues: {stats['final_issues']}")
319
- console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
320
- console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
321
- console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
322
- console.print(f"Overall Status: {stats['status_message']}")
323
- console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
324
- console.print(f"Total Cost: ${total_cost:.6f}")
325
- console.print(f"Model Used: {model_name or 'N/A'}")
326
- # Step 8 (early exit): Return
327
- return {
328
- "success": overall_success,
329
- "final_program": initial_program_content,
330
- "final_code": initial_code_content,
331
- "total_attempts": attempts, # attempts is 0
332
- "total_cost": total_cost,
333
- "model_name": model_name,
334
- "statistics": stats,
569
+ # 3f: Extract initial issues
570
+ initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
571
+ stats['initial_issues'] = initial_issues_count
572
+ if verbose:
573
+ console.print(f"Initial verification issues found: {initial_issues_count}")
574
+ if initial_fix_result.get('explanation'):
575
+ console.print("Initial assessment explanation:")
576
+ console.print(initial_fix_result['explanation'])
577
+
578
+ # The following checks only apply when we ran the LLM assessment (not skipped)
579
+ if not skip_llm:
580
+ # FIX: Add check for initial assessment error *before* checking success/budget
581
+ # Check if the fixer function returned its specific error state (None explanation/model)
582
+ if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
583
+ error_msg = "Error: Fixer returned invalid/error state during initial assessment"
584
+ console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
585
+ stats['status_message'] = error_msg
586
+ stats['final_issues'] = -1 # Indicate unknown/error state
587
+ # Write final action log for error on initial check
588
+ final_log_entry = "<FinalActions>\n"
589
+ final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
590
+ final_log_entry += "</FinalActions>"
591
+ _write_log_entry(log_path, final_log_entry)
592
+ # Return failure state
593
+ return {
594
+ "success": False,
595
+ "final_program": initial_program_content,
596
+ "final_code": initial_code_content,
597
+ "total_attempts": 0,
598
+ "total_cost": total_cost, # May be non-zero if error occurred after some cost
599
+ "model_name": model_name, # May have been set before error
600
+ "statistics": stats,
601
+ }
602
+
603
+ # 3g: Initialize best iteration tracker
604
+ # Store original paths as the 'backup' for iteration 0
605
+ best_iteration = {
606
+ 'attempt': 0, # Use 0 for initial state
607
+ 'program_backup': str(program_path), # Path to original
608
+ 'code_backup': str(code_path), # Path to original
609
+ 'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
335
610
  }
336
- elif total_cost >= budget:
337
- console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
338
- stats['status_message'] = 'Budget exceeded on initial check'
339
- stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
340
-
341
- # Write final action log for budget exceeded on initial check
342
- final_log_entry = "<FinalActions>\n"
343
- final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
344
- final_log_entry += "</FinalActions>"
345
- _write_log_entry(log_path, final_log_entry)
346
-
347
- # No changes made, return initial state
348
- return {
349
- "success": False,
350
- "final_program": initial_program_content,
351
- "final_code": initial_code_content,
352
- "total_attempts": 0,
353
- "total_cost": total_cost,
354
- "model_name": model_name,
355
- "statistics": stats,
356
- }
611
+ stats['best_iteration_num'] = 0
612
+ stats['best_iteration_issues'] = best_iteration['issues']
613
+
614
+ # 3h: Check for immediate success or budget exceeded
615
+ if initial_issues_count == 0:
616
+ console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
617
+ overall_success = True
618
+ stats['final_issues'] = 0
619
+ stats['status_message'] = 'Success on initial check'
620
+ stats['improvement_issues'] = 0
621
+ stats['improvement_percent'] = 100.0 # Reached target of 0 issues
622
+
623
+ # Write final action log for successful initial check
624
+ final_log_entry = "<FinalActions>\n"
625
+ final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
626
+ final_log_entry += "</FinalActions>"
627
+ _write_log_entry(log_path, final_log_entry)
628
+
629
+ # Step 7 (early exit): Print stats
630
+ console.print("\n[bold]--- Final Statistics ---[/bold]")
631
+ console.print(f"Initial Issues: {stats['initial_issues']}")
632
+ console.print(f"Final Issues: {stats['final_issues']}")
633
+ console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
634
+ console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
635
+ console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
636
+ console.print(f"Overall Status: {stats['status_message']}")
637
+ console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
638
+ console.print(f"Total Cost: ${total_cost:.6f}")
639
+ console.print(f"Model Used: {model_name or 'N/A'}")
640
+ # Step 8 (early exit): Return
641
+ return {
642
+ "success": overall_success,
643
+ "final_program": initial_program_content,
644
+ "final_code": initial_code_content,
645
+ "total_attempts": attempts, # attempts is 0
646
+ "total_cost": total_cost,
647
+ "model_name": model_name,
648
+ "statistics": stats,
649
+ }
650
+ elif total_cost >= budget:
651
+ console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
652
+ stats['status_message'] = 'Budget exceeded on initial check'
653
+ stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
654
+
655
+ # Write final action log for budget exceeded on initial check
656
+ final_log_entry = "<FinalActions>\n"
657
+ final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
658
+ final_log_entry += "</FinalActions>"
659
+ _write_log_entry(log_path, final_log_entry)
660
+
661
+ # No changes made, return initial state
662
+ return {
663
+ "success": False,
664
+ "final_program": initial_program_content,
665
+ "final_code": initial_code_content,
666
+ "total_attempts": 0,
667
+ "total_cost": total_cost,
668
+ "model_name": model_name,
669
+ "statistics": stats,
670
+ }
357
671
 
358
672
  except Exception as e:
359
673
  console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
@@ -404,9 +718,9 @@ def fix_verification_errors_loop(
404
718
  # code_contents = code_path.read_text(encoding="utf-8")
405
719
  # except IOError as e: ...
406
720
 
407
- # 4d: Create backups
408
- program_backup_path = program_path.with_stem(f"{program_path.stem}_iteration_{current_attempt}").with_suffix(program_path.suffix)
409
- code_backup_path = code_path.with_stem(f"{code_path.stem}_iteration_{current_attempt}").with_suffix(code_path.suffix)
721
+ # 4d: Create backups in .pdd/backups/ (backup_dir already created above)
722
+ program_backup_path = backup_dir / f"program_{current_attempt}{program_path.suffix}"
723
+ code_backup_path = backup_dir / f"code_{current_attempt}{code_path.suffix}"
410
724
  try:
411
725
  # Copy from the *current* state before this iteration's fix
412
726
  program_path.write_text(program_contents, encoding="utf-8") # Ensure file matches memory state
@@ -426,7 +740,7 @@ def fix_verification_errors_loop(
426
740
  stats['status_message'] = f'Error creating backups on attempt {current_attempt}'
427
741
  break # Don't proceed without backups
428
742
 
429
- # 4e: Call fix_verification_errors
743
+ # 4e: Call fix_verification_errors (cloud or local based on use_cloud parameter)
430
744
  iteration_log_xml += f' <InputsToFixer>\n'
431
745
  iteration_log_xml += f' <Program>{escape(program_contents)}</Program>\n'
432
746
  iteration_log_xml += f' <Code>{escape(code_contents)}</Code>\n'
@@ -438,16 +752,46 @@ def fix_verification_errors_loop(
438
752
  try:
439
753
  if verbose:
440
754
  console.print("Calling fix_verification_errors...")
441
- fix_result = fix_verification_errors(
442
- program=program_contents,
443
- prompt=prompt,
444
- code=code_contents,
445
- output=program_output,
446
- strength=strength,
447
- temperature=temperature,
448
- verbose=verbose,
449
- time=llm_time # Pass time
450
- )
755
+ # Use cloud or local based on use_cloud parameter
756
+ if use_cloud:
757
+ try:
758
+ fix_result = cloud_verify_fix(
759
+ program=program_contents,
760
+ prompt=prompt,
761
+ code=code_contents,
762
+ output=program_output,
763
+ strength=strength,
764
+ temperature=temperature,
765
+ time_param=llm_time,
766
+ verbose=verbose,
767
+ language="python" if is_python else get_language(os.path.splitext(code_file)[1]),
768
+ )
769
+ if verbose:
770
+ console.print(f"[cyan]Cloud verify fix completed.[/cyan]")
771
+ except (requests.exceptions.RequestException, RuntimeError) as cloud_err:
772
+ # Cloud failed - fall back to local
773
+ console.print(f"[yellow]Cloud verify fix failed: {cloud_err}. Falling back to local.[/yellow]")
774
+ fix_result = fix_verification_errors(
775
+ program=program_contents,
776
+ prompt=prompt,
777
+ code=code_contents,
778
+ output=program_output,
779
+ strength=strength,
780
+ temperature=temperature,
781
+ verbose=verbose,
782
+ time=llm_time
783
+ )
784
+ else:
785
+ fix_result = fix_verification_errors(
786
+ program=program_contents,
787
+ prompt=prompt,
788
+ code=code_contents,
789
+ output=program_output,
790
+ strength=strength,
791
+ temperature=temperature,
792
+ verbose=verbose,
793
+ time=llm_time # Pass time
794
+ )
451
795
 
452
796
  # 4f: Add cost
453
797
  attempt_cost = fix_result.get('total_cost', 0.0)
@@ -593,6 +937,9 @@ def fix_verification_errors_loop(
593
937
 
594
938
  # Now, decide outcome based on issue count and verification status
595
939
  if secondary_verification_passed:
940
+ # Only track as "verification passed" if code was actually changed and verified
941
+ if code_updated:
942
+ any_verification_passed = True # Track that at least one verification passed
596
943
  # Update best iteration if current attempt is better
597
944
  if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
598
945
  if verbose:
@@ -735,8 +1082,14 @@ def fix_verification_errors_loop(
735
1082
  if verbose:
736
1083
  console.print(f"Restored {program_path} from {best_program_path}")
737
1084
  console.print(f"Restored {code_path} from {best_code_path}")
738
- # Final issues count is the best achieved count
739
- stats['final_issues'] = best_iteration['issues']
1085
+ # Only mark as success if verification actually passed
1086
+ # (best_iteration is only updated when secondary verification passes,
1087
+ # but we double-check with any_verification_passed for safety)
1088
+ if any_verification_passed:
1089
+ stats['final_issues'] = 0
1090
+ overall_success = True
1091
+ else:
1092
+ stats['final_issues'] = best_iteration['issues']
740
1093
  else:
741
1094
  console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
742
1095
  final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
@@ -750,6 +1103,15 @@ def fix_verification_errors_loop(
750
1103
  stats['status_message'] += f' - Error restoring best iteration: {e}'
751
1104
  stats['final_issues'] = -1 # Indicate uncertainty
752
1105
 
1106
+ # If verification passed (even if issue count didn't decrease), consider it success
1107
+ elif any_verification_passed:
1108
+ console.print("[green]Verification passed. Keeping current state.[/green]")
1109
+ final_log_entry += f' <Action>Verification passed; keeping current state.</Action>\n'
1110
+ # Verification passed = code works, so final issues is effectively 0
1111
+ stats['final_issues'] = 0
1112
+ stats['status_message'] = 'Success - verification passed'
1113
+ overall_success = True
1114
+
753
1115
  # If no improvement was made or recorded (best is still initial state or worse)
754
1116
  elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
755
1117
  console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
@@ -864,6 +1226,36 @@ def fix_verification_errors_loop(
864
1226
  if final_known and stats['final_issues'] != 0:
865
1227
  overall_success = False
866
1228
 
1229
+ if not overall_success and agentic_fallback:
1230
+ console.print(f"[bold yellow]Initiating agentic fallback (prompt_file={prompt_file!r})...[/bold yellow]")
1231
+ agent_cwd = Path(prompt_file).parent if prompt_file else None
1232
+ agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
1233
+ prompt_file=prompt_file,
1234
+ code_file=code_file,
1235
+ program_file=verification_program,
1236
+ verification_log_file=verification_log_file,
1237
+ verbose=verbose,
1238
+ cwd=agent_cwd,
1239
+ )
1240
+ total_cost += agent_cost
1241
+ if not agent_success:
1242
+ console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
1243
+ if agent_changed_files:
1244
+ console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
1245
+ for f in agent_changed_files:
1246
+ console.print(f" • {f}")
1247
+ if agent_success:
1248
+ console.print("[bold green]Agentic fallback successful.[/bold green]")
1249
+ overall_success = True
1250
+ model_name = agent_model or model_name
1251
+ try:
1252
+ final_code_content = Path(code_file).read_text(encoding="utf-8")
1253
+ final_program_content = Path(program_file).read_text(encoding="utf-8")
1254
+ except Exception as e:
1255
+ console.print(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
1256
+ else:
1257
+ console.print("[bold red]Agentic fallback failed.[/bold red]")
1258
+
867
1259
  return {
868
1260
  "success": overall_success,
869
1261
  "final_program": final_program_content,
@@ -872,153 +1264,4 @@ def fix_verification_errors_loop(
872
1264
  "total_cost": total_cost,
873
1265
  "model_name": model_name,
874
1266
  "statistics": stats,
875
- }
876
-
877
- # Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
878
- if __name__ == "__main__":
879
- # Create dummy files for demonstration
880
- # In a real scenario, these files would exist and contain actual code/programs.
881
- console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
882
- temp_dir = Path("./temp_fix_verification_loop")
883
- temp_dir.mkdir(exist_ok=True)
884
-
885
- program_file = temp_dir / "my_program.py"
886
- code_file = temp_dir / "my_code_module.py"
887
- verification_program_file = temp_dir / "verify_syntax.py"
888
-
889
- program_file.write_text("""
890
- import my_code_module
891
- import sys
892
- # Simulate using the module and checking output
893
- val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
894
- result = my_code_module.process(val)
895
- expected = val * 2
896
- print(f"Input: {val}")
897
- print(f"Result: {result}")
898
- print(f"Expected: {expected}")
899
- if result == expected:
900
- print("VERIFICATION_SUCCESS")
901
- else:
902
- print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
903
- """, encoding="utf-8")
904
-
905
- # Initial code with a bug
906
- code_file.write_text("""
907
- # my_code_module.py
908
- def process(x):
909
- # Bug: should be x * 2
910
- return x + 2
911
- """, encoding="utf-8")
912
-
913
- # Simple verification program (e.g., syntax check)
914
- verification_program_file.write_text("""
915
- import sys
916
- import py_compile
917
- import os
918
- # Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
919
- code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
920
- print(f"Checking syntax of: {code_to_check}")
921
- try:
922
- py_compile.compile(code_to_check, doraise=True)
923
- print("Syntax OK.")
924
- sys.exit(0) # Success
925
- except py_compile.PyCompileError as e:
926
- print(f"Syntax Error: {e}")
927
- sys.exit(1) # Failure
928
- except Exception as e:
929
- print(f"Verification Error: {e}")
930
- sys.exit(1) # Failure
931
- """, encoding="utf-8")
932
- # Set environment variable for the verification script
933
- os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
934
-
935
-
936
- # --- Mock fix_verification_errors ---
937
- # This is crucial for testing without actual LLM calls / costs
938
- # In a real test suite, use unittest.mock
939
- _original_fix_verification_errors = fix_verification_errors
940
- _call_count = 0
941
-
942
- def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
943
- global _call_count
944
- _call_count += 1
945
- cost = 0.001 * _call_count # Simulate increasing cost
946
- model = "mock_model_v1"
947
- explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
948
- issues_count = 1 # Assume 1 issue initially
949
-
950
- fixed_program = program # Assume program doesn't need fixing
951
- fixed_code = code
952
-
953
- # Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
954
- if "VERIFICATION_FAILURE" in output and _call_count >= 2:
955
- explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
956
- fixed_code = """
957
- # my_code_module.py
958
- def process(x):
959
- # Fixed: should be x * 2
960
- return x * 2
961
- """
962
- issues_count = 0 # Fixed!
963
- elif "VERIFICATION_SUCCESS" in output:
964
- explanation = ["Output indicates VERIFICATION_SUCCESS."]
965
- issues_count = 0 # Already correct
966
-
967
- return {
968
- 'explanation': explanation,
969
- 'fixed_program': fixed_program,
970
- 'fixed_code': fixed_code,
971
- 'total_cost': cost,
972
- 'model_name': model,
973
- 'verification_issues_count': issues_count,
974
- }
975
-
976
- # Replace the real function with the mock
977
- # In package context, you might need to patch differently
978
- # For this script execution:
979
- # Note: This direct replacement might not work if the function is imported
980
- # using `from .fix_verification_errors import fix_verification_errors`.
981
- # A proper mock framework (`unittest.mock.patch`) is better.
982
- # Let's assume for this example run, we can modify the global scope *before* the loop calls it.
983
- # This is fragile. A better approach involves dependency injection or mocking frameworks.
984
- # HACK: Re-assigning the imported name in the global scope of this script
985
- globals()['fix_verification_errors'] = mock_fix_verification_errors
986
-
987
-
988
- console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
989
-
990
- # Example program_args: Pass input value 10 and another arg 5
991
- # Note: The example program only uses the first arg sys.argv[1]
992
- example_args = ["10", "another_arg"]
993
-
994
- results = fix_verification_errors_loop(
995
- program_file=str(program_file),
996
- code_file=str(code_file),
997
- prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
998
- verification_program=str(verification_program_file),
999
- strength=0.5,
1000
- temperature=0.1,
1001
- max_attempts=3,
1002
- budget=0.10, # Set a budget
1003
- verification_log_file=str(temp_dir / "test_verification.log"),
1004
- verbose=True,
1005
- program_args=example_args
1006
- )
1007
-
1008
- console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
1009
- console.print(f"Success: {results['success']}")
1010
- console.print(f"Total Attempts: {results['total_attempts']}")
1011
- console.print(f"Total Cost: ${results['total_cost']:.6f}")
1012
- console.print(f"Model Name: {results['model_name']}")
1013
- # console.print(f"Final Program:\n{results['final_program']}") # Can be long
1014
- console.print(f"Final Code:\n{results['final_code']}")
1015
- console.print(f"Statistics:\n{results['statistics']}")
1016
-
1017
- # Restore original function if needed elsewhere
1018
- globals()['fix_verification_errors'] = _original_fix_verification_errors
1019
-
1020
- # Clean up dummy files
1021
- # console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
1022
- # shutil.rmtree(temp_dir)
1023
- console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
1024
- console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
1267
+ }