pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/fix_error_loop.py CHANGED
@@ -5,14 +5,25 @@ import subprocess
5
5
  import shutil
6
6
  import json
7
7
  from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Tuple, Optional
8
10
 
11
+ import requests
9
12
  from rich import print as rprint
10
13
  from rich.console import Console
14
+ from rich.panel import Panel
11
15
 
12
16
  # Relative import from an internal module.
17
+ from .get_language import get_language
13
18
  from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
14
- from . import DEFAULT_TIME # Import DEFAULT_TIME
19
+ from . import DEFAULT_TIME # Import DEFAULT_TIME
15
20
  from .python_env_detector import detect_host_python_executable
21
+ from .agentic_fix import run_agentic_fix
22
+ from .agentic_langtest import default_verify_cmd_for
23
+ from .core.cloud import CloudConfig
24
+
25
+ # Cloud request timeout for LLM calls
26
+ CLOUD_FIX_TIMEOUT = 400 # seconds
16
27
 
17
28
  console = Console()
18
29
 
@@ -20,44 +31,190 @@ def escape_brackets(text: str) -> str:
20
31
  """Escape square brackets so Rich doesn't misinterpret them."""
21
32
  return text.replace("[", "\\[").replace("]", "\\]")
22
33
 
23
- def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
34
+
35
+ def cloud_fix_errors(
36
+ unit_test: str,
37
+ code: str,
38
+ prompt: str,
39
+ error: str,
40
+ error_file: str,
41
+ strength: float,
42
+ temperature: float,
43
+ verbose: bool = False,
44
+ time: float = DEFAULT_TIME,
45
+ code_file_ext: str = ".py"
46
+ ) -> Tuple[bool, bool, str, str, str, float, str]:
24
47
  """
25
- Run pytest on the specified test file using subprocess.
26
- Returns a tuple: (failures, errors, warnings, logs)
48
+ Call the cloud fixCode endpoint to fix errors in code and unit tests.
49
+
50
+ This function has the same interface as fix_errors_from_unit_tests to allow
51
+ seamless switching between local and cloud execution in the fix loop.
52
+
53
+ Args:
54
+ unit_test: Unit test code string
55
+ code: Source code string
56
+ prompt: Prompt that generated the code
57
+ error: Error messages/logs from test failures
58
+ error_file: Path to write error analysis (not used in cloud, but kept for interface compatibility)
59
+ strength: Model strength parameter [0,1]
60
+ temperature: Model temperature parameter [0,1]
61
+ verbose: Enable verbose logging
62
+ time: Time budget for thinking effort
63
+ code_file_ext: File extension to determine language (e.g., ".py", ".java")
64
+
65
+ Returns:
66
+ Tuple of:
67
+ - update_unit_test: Whether unit test was updated
68
+ - update_code: Whether code was updated
69
+ - fixed_unit_test: Fixed unit test code
70
+ - fixed_code: Fixed source code
71
+ - analysis: Analysis/explanation of fixes
72
+ - total_cost: Cost of the operation
73
+ - model_name: Name of model used
74
+
75
+ Raises:
76
+ RuntimeError: When cloud execution fails with non-recoverable error
27
77
  """
78
+ jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
79
+
80
+ if not jwt_token:
81
+ raise RuntimeError("Cloud authentication failed - no JWT token available")
82
+
83
+ # Build cloud payload
84
+ payload = {
85
+ "unitTest": unit_test,
86
+ "code": code,
87
+ "prompt": prompt,
88
+ "errors": error,
89
+ "language": get_language(code_file_ext),
90
+ "strength": strength,
91
+ "temperature": temperature,
92
+ "time": time if time is not None else 0.25,
93
+ "verbose": verbose,
94
+ }
95
+
96
+ headers = {
97
+ "Authorization": f"Bearer {jwt_token}",
98
+ "Content-Type": "application/json"
99
+ }
100
+ cloud_url = CloudConfig.get_endpoint_url("fixCode")
101
+
102
+ if verbose:
103
+ console.print(Panel(f"Calling cloud fix at {cloud_url}", title="[blue]Cloud LLM[/blue]", expand=False))
104
+
28
105
  try:
29
- # Include "--json-only" to ensure only valid JSON is printed.
30
- # Use environment-aware Python executable for pytest execution
31
- python_executable = detect_host_python_executable()
32
- cmd = [python_executable, "-m", "pdd.pytest_output", "--json-only", test_file]
33
- result = subprocess.run(cmd, capture_output=True, text=True)
34
-
35
- # Parse the JSON output from stdout
36
- try:
37
- output = json.loads(result.stdout)
38
- test_results = output.get('test_results', [{}])[0]
39
-
40
- # Check pytest's return code first
41
- return_code = test_results.get('return_code', 1)
42
-
43
- failures = test_results.get('failures', 0)
44
- errors = test_results.get('errors', 0)
45
- warnings = test_results.get('warnings', 0)
106
+ response = requests.post(
107
+ cloud_url,
108
+ json=payload,
109
+ headers=headers,
110
+ timeout=CLOUD_FIX_TIMEOUT
111
+ )
112
+ response.raise_for_status()
46
113
 
47
- if return_code == 2:
48
- errors += 1
49
-
50
- # Combine stdout and stderr from the test results
51
- logs = test_results.get('standard_output', '') + '\n' + test_results.get('standard_error', '')
52
-
53
- return failures, errors, warnings, logs
54
-
55
- except json.JSONDecodeError:
56
- # If JSON parsing fails, return the raw output
57
- return 1, 1, 0, f"Failed to parse pytest output:\n{result.stdout}\n{result.stderr}"
58
-
59
- except Exception as e:
60
- return 1, 1, 0, f"Error running pytest: {str(e)}"
114
+ response_data = response.json()
115
+ fixed_unit_test = response_data.get("fixedUnitTest", "")
116
+ fixed_code = response_data.get("fixedCode", "")
117
+ analysis = response_data.get("analysis", "")
118
+ total_cost = float(response_data.get("totalCost", 0.0))
119
+ model_name = response_data.get("modelName", "cloud_model")
120
+ update_unit_test = response_data.get("updateUnitTest", False)
121
+ update_code = response_data.get("updateCode", False)
122
+
123
+ if verbose:
124
+ console.print(f"[cyan]Cloud fix completed. Model: {model_name}, Cost: ${total_cost:.6f}[/cyan]")
125
+
126
+ return update_unit_test, update_code, fixed_unit_test, fixed_code, analysis, total_cost, model_name
127
+
128
+ except requests.exceptions.Timeout:
129
+ raise RuntimeError(f"Cloud fix timed out after {CLOUD_FIX_TIMEOUT}s")
130
+
131
+ except requests.exceptions.HTTPError as e:
132
+ status_code = e.response.status_code if e.response else 0
133
+ err_content = e.response.text[:200] if e.response else "No response content"
134
+
135
+ # Non-recoverable errors
136
+ if status_code == 402:
137
+ try:
138
+ error_data = e.response.json()
139
+ current_balance = error_data.get("currentBalance", "unknown")
140
+ estimated_cost = error_data.get("estimatedCost", "unknown")
141
+ raise RuntimeError(f"Insufficient credits. Balance: {current_balance}, estimated cost: {estimated_cost}")
142
+ except json.JSONDecodeError:
143
+ raise RuntimeError(f"Insufficient credits: {err_content}")
144
+ elif status_code == 401:
145
+ raise RuntimeError(f"Authentication failed: {err_content}")
146
+ elif status_code == 403:
147
+ raise RuntimeError(f"Access denied: {err_content}")
148
+ elif status_code == 400:
149
+ raise RuntimeError(f"Invalid request: {err_content}")
150
+ else:
151
+ # 5xx or other errors - raise for caller to handle
152
+ raise RuntimeError(f"Cloud HTTP error ({status_code}): {err_content}")
153
+
154
+ except requests.exceptions.RequestException as e:
155
+ raise RuntimeError(f"Cloud network error: {e}")
156
+
157
+ except json.JSONDecodeError:
158
+ raise RuntimeError("Cloud returned invalid JSON response")
159
+
160
+
161
+ # ---------- Normalize any agentic return shape to a 4-tuple ----------
162
+ def _normalize_agentic_result(result):
163
+ """
164
+ Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
165
+ Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
166
+ """
167
+ if isinstance(result, tuple):
168
+ if len(result) == 5:
169
+ ok, msg, cost, model, changed_files = result
170
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
171
+ if len(result) == 4:
172
+ ok, msg, cost, model = result
173
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
174
+ if len(result) == 3:
175
+ ok, msg, cost = result
176
+ return bool(ok), str(msg), float(cost), "agentic-cli", []
177
+ if len(result) == 2:
178
+ ok, msg = result
179
+ return bool(ok), str(msg), 0.0, "agentic-cli", []
180
+ # Fallback (shouldn't happen)
181
+ return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
182
+
183
+ def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
184
+ """
185
+ Call (possibly monkeypatched) run_agentic_fix and normalize its return.
186
+ """
187
+ res = run_agentic_fix(
188
+ prompt_file=prompt_file,
189
+ code_file=code_file,
190
+ unit_test_file=unit_test_file,
191
+ error_log_file=error_log_file,
192
+ cwd=cwd,
193
+ )
194
+ return _normalize_agentic_result(res)
195
+ # ---------------------------------------------------------------------
196
+
197
+
198
+ def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
199
+ """
200
+ Run pytest on the specified test file using the subprocess-based runner.
201
+ Returns a tuple: (failures, errors, warnings, logs)
202
+ """
203
+ from .pytest_output import run_pytest_and_capture_output
204
+ # Use the subprocess-based runner to avoid module caching issues
205
+ output_data = run_pytest_and_capture_output(test_file)
206
+
207
+ # Extract results
208
+ results = output_data.get("test_results", [{}])[0]
209
+
210
+ failures = results.get("failures", 0)
211
+ errors = results.get("errors", 0)
212
+ warnings = results.get("warnings", 0)
213
+
214
+ # Combine stdout/stderr for the log
215
+ logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
216
+
217
+ return failures, errors, warnings, logs
61
218
 
62
219
  def format_log_for_output(log_structure):
63
220
  """
@@ -77,6 +234,8 @@ def format_log_for_output(log_structure):
77
234
  # Fix attempt with XML tags
78
235
  if iteration.get("fix_attempt"):
79
236
  formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
237
+ if iteration.get("model_name"):
238
+ formatted_text += f"Model: {iteration['model_name']}\n"
80
239
  formatted_text += f"{iteration['fix_attempt']}\n"
81
240
  formatted_text += "</fix_attempt>\n\n"
82
241
 
@@ -101,6 +260,7 @@ def format_log_for_output(log_structure):
101
260
 
102
261
  def fix_error_loop(unit_test_file: str,
103
262
  code_file: str,
263
+ prompt_file: str,
104
264
  prompt: str,
105
265
  verification_program: str,
106
266
  strength: float,
@@ -109,15 +269,22 @@ def fix_error_loop(unit_test_file: str,
109
269
  budget: float,
110
270
  error_log_file: str = "error_log.txt",
111
271
  verbose: bool = False,
112
- time: float = DEFAULT_TIME):
272
+ time: float = DEFAULT_TIME,
273
+ agentic_fallback: bool = True,
274
+ use_cloud: bool = False):
113
275
  """
114
- Attempt to fix errors in a unit test and corresponding code using repeated iterations,
115
- counting only the number of times we actually call the LLM fix function.
276
+ Attempt to fix errors in a unit test and corresponding code using repeated iterations,
277
+ counting only the number of times we actually call the LLM fix function.
116
278
  The tests are re-run in the same iteration after a fix to see if we've succeeded,
117
279
  so that 'attempts' matches the number of fix attempts (not the total test runs).
118
280
 
119
281
  This updated version uses structured logging to avoid redundant entries.
120
282
 
283
+ Hybrid Cloud Support:
284
+ When use_cloud=True, the LLM fix calls are routed to the cloud fixCode endpoint
285
+ while local test execution (pytest, verification programs) stays local. This allows
286
+ the loop to pass local test results to the cloud for analysis and fixes.
287
+
121
288
  Inputs:
122
289
  unit_test_file: Path to the file containing unit tests.
123
290
  code_file: Path to the file containing the code under test.
@@ -130,7 +297,8 @@ def fix_error_loop(unit_test_file: str,
130
297
  error_log_file: Path to file to log errors (default: "error_log.txt").
131
298
  verbose: Enable verbose logging (default: False).
132
299
  time: Time parameter for the fix_errors_from_unit_tests call.
133
-
300
+ agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
301
+ use_cloud: If True, use cloud LLM for fix calls while keeping test execution local.
134
302
  Outputs:
135
303
  success: Boolean indicating if the overall process succeeded.
136
304
  final_unit_test: String contents of the final unit test file.
@@ -185,9 +353,63 @@ def fix_error_loop(unit_test_file: str,
185
353
 
186
354
  # We do up to max_attempts fix attempts or until budget is exceeded
187
355
  iteration = 0
356
+ # Determine if target is Python (moved before try block for use in exception handler)
357
+ is_python = str(code_file).lower().endswith(".py")
188
358
  # Run an initial test to determine starting state
189
359
  try:
190
- initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
360
+ if is_python:
361
+ initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
362
+ else:
363
+ # For non-Python files, run the verification program to get an initial error state
364
+ rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
365
+ lang = get_language(os.path.splitext(code_file)[1])
366
+ verify_cmd = default_verify_cmd_for(lang, unit_test_file)
367
+ if not verify_cmd:
368
+ # No verify command available (e.g., Java without maven/gradle).
369
+ # Trigger agentic fallback directly.
370
+ rprint(f"[cyan]No verification command for {lang}. Triggering agentic fallback directly...[/cyan]")
371
+ error_log_path = Path(error_log_file)
372
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
373
+ if not error_log_path.exists() or error_log_path.stat().st_size == 0:
374
+ with open(error_log_path, "w") as f:
375
+ f.write(f"No verification command available for language: {lang}\n")
376
+ f.write("Agentic fix will attempt to resolve the issue.\n")
377
+
378
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
379
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
380
+ prompt_file=prompt_file,
381
+ code_file=code_file,
382
+ unit_test_file=unit_test_file,
383
+ error_log_file=error_log_file,
384
+ cwd=None, # Use project root (cwd), not prompt file's parent
385
+ )
386
+ if not success:
387
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
388
+ if agent_changed_files:
389
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
390
+ for f in agent_changed_files:
391
+ rprint(f" • {f}")
392
+ final_unit_test = ""
393
+ final_code = ""
394
+ try:
395
+ with open(unit_test_file, "r") as f:
396
+ final_unit_test = f.read()
397
+ except Exception:
398
+ pass
399
+ try:
400
+ with open(code_file, "r") as f:
401
+ final_code = f.read()
402
+ except Exception:
403
+ pass
404
+ return success, final_unit_test, final_code, 1, agent_cost, agent_model
405
+
406
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
407
+ pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
408
+ if verify_result.returncode == 0:
409
+ initial_fails, initial_errors, initial_warnings = 0, 0, 0
410
+ else:
411
+ initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
412
+
191
413
  # Store initial state for statistics
192
414
  stats = {
193
415
  "initial_fails": initial_fails,
@@ -200,14 +422,98 @@ def fix_error_loop(unit_test_file: str,
200
422
  "iterations_info": []
201
423
  }
202
424
  except Exception as e:
203
- rprint(f"[red]Error running initial pytest:[/red] {e}")
204
- return False, "", "", fix_attempts, total_cost, model_name
425
+ rprint(f"[red]Error running initial test/verification:[/red] {e}")
426
+ # Instead of returning early, trigger agentic fallback if enabled (Issue #266)
427
+ if agentic_fallback:
428
+ rprint("[cyan]Initial test failed with exception. Triggering agentic fallback...[/cyan]")
429
+ error_log_path = Path(error_log_file)
430
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
431
+ with open(error_log_path, "w") as f:
432
+ f.write(f"Initial test/verification failed with exception:\n{e}\n")
433
+
434
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
435
+ prompt_file=prompt_file,
436
+ code_file=code_file,
437
+ unit_test_file=unit_test_file,
438
+ error_log_file=error_log_file,
439
+ cwd=None,
440
+ )
441
+ if not success:
442
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
443
+ if agent_changed_files:
444
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
445
+ for f in agent_changed_files:
446
+ rprint(f" • {f}")
447
+ final_unit_test = ""
448
+ final_code = ""
449
+ try:
450
+ with open(unit_test_file, "r") as f:
451
+ final_unit_test = f.read()
452
+ except Exception:
453
+ pass
454
+ try:
455
+ with open(code_file, "r") as f:
456
+ final_code = f.read()
457
+ except Exception:
458
+ pass
459
+ return success, final_unit_test, final_code, 1, agent_cost, agent_model
460
+ else:
461
+ # Agentic fallback disabled, return failure
462
+ return False, "", "", fix_attempts, total_cost, model_name
463
+
464
+ # If target is not a Python file, trigger agentic fallback if tests fail
465
+ if not is_python:
466
+ if initial_fails > 0 or initial_errors > 0:
467
+ rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
468
+ error_log_path = Path(error_log_file)
469
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
470
+ with open(error_log_path, "w") as f:
471
+ f.write(pytest_output)
472
+
473
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
474
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
475
+ prompt_file=prompt_file,
476
+ code_file=code_file,
477
+ unit_test_file=unit_test_file,
478
+ error_log_file=error_log_file,
479
+ cwd=None, # Use project root (cwd), not prompt file's parent
480
+ )
481
+ if not success:
482
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
483
+ if agent_changed_files:
484
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
485
+ for f in agent_changed_files:
486
+ rprint(f" • {f}")
487
+ final_unit_test = ""
488
+ final_code = ""
489
+ try:
490
+ with open(unit_test_file, "r") as f:
491
+ final_unit_test = f.read()
492
+ except Exception:
493
+ pass
494
+ try:
495
+ with open(code_file, "r") as f:
496
+ final_code = f.read()
497
+ except Exception:
498
+ pass
499
+ return success, final_unit_test, final_code, 1, agent_cost, agent_model
500
+ else:
501
+ # Non-python tests passed, so we are successful.
502
+ rprint("[green]Non-Python tests passed. No fix needed.[/green]")
503
+ try:
504
+ with open(unit_test_file, "r") as f:
505
+ final_unit_test = f.read()
506
+ with open(code_file, "r") as f:
507
+ final_code = f.read()
508
+ except Exception as e:
509
+ rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
510
+ return True, final_unit_test, final_code, 0, 0.0, "N/A"
205
511
 
206
512
  fails, errors, warnings = initial_fails, initial_errors, initial_warnings
207
513
 
208
514
  # Determine success state immediately
209
515
  success = (fails == 0 and errors == 0 and warnings == 0)
210
-
516
+
211
517
  # Track if tests were initially passing
212
518
  initially_passing = success
213
519
 
@@ -244,13 +550,23 @@ def fix_error_loop(unit_test_file: str,
244
550
 
245
551
  # Update structured log
246
552
  log_structure["iterations"][-1]["post_test_output"] = pytest_output
247
-
553
+
248
554
  # Write formatted log to file
249
- with open(error_log_file, "w") as elog:
555
+ error_log_path = Path(error_log_file)
556
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
557
+ with open(error_log_path, "w") as elog:
250
558
  elog.write(format_log_for_output(log_structure))
251
559
 
252
560
  # Set success to True (already determined)
253
- # No need to read the files - keep empty strings for passing cases
561
+ # Read the actual fixed files to return the successful state
562
+ try:
563
+ with open(unit_test_file, "r") as f:
564
+ final_unit_test = f.read()
565
+ with open(code_file, "r") as f:
566
+ final_code = f.read()
567
+ except Exception as e:
568
+ rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
569
+ # Keep empty strings as fallback
254
570
  break
255
571
 
256
572
  iteration_header = f"=== Attempt iteration {iteration} ==="
@@ -280,17 +596,18 @@ def fix_error_loop(unit_test_file: str,
280
596
  break
281
597
 
282
598
  # We only attempt to fix if test is failing or has warnings:
283
- # Let's create backups:
284
- unit_test_dir, unit_test_name = os.path.split(unit_test_file)
285
- code_dir, code_name = os.path.split(code_file)
286
- unit_test_backup = os.path.join(
287
- unit_test_dir,
288
- f"{os.path.splitext(unit_test_name)[0]}_{iteration}_{errors}_{fails}_{warnings}_{timestamp}.py"
289
- )
290
- code_backup = os.path.join(
291
- code_dir,
292
- f"{os.path.splitext(code_name)[0]}_{iteration}_{errors}_{fails}_{warnings}_{timestamp}.py"
293
- )
599
+ # Let's create backups in .pdd/backups/ to avoid polluting code/test directories
600
+ code_name = os.path.basename(code_file)
601
+ code_basename = os.path.splitext(code_name)[0]
602
+ unit_test_name = os.path.basename(unit_test_file)
603
+ unit_test_ext = os.path.splitext(unit_test_name)[1]
604
+ code_ext = os.path.splitext(code_name)[1]
605
+
606
+ backup_dir = Path.cwd() / '.pdd' / 'backups' / code_basename / timestamp
607
+ backup_dir.mkdir(parents=True, exist_ok=True)
608
+
609
+ unit_test_backup = str(backup_dir / f"test_{iteration}_{errors}_{fails}_{warnings}{unit_test_ext}")
610
+ code_backup = str(backup_dir / f"code_{iteration}_{errors}_{fails}_{warnings}{code_ext}")
294
611
  try:
295
612
  shutil.copy(unit_test_file, unit_test_backup)
296
613
  shutil.copy(code_file, code_backup)
@@ -299,7 +616,8 @@ def fix_error_loop(unit_test_file: str,
299
616
  rprint(f"[green]Created backup for code file:[/green] {code_backup}")
300
617
  except Exception as e:
301
618
  rprint(f"[red]Error creating backup files:[/red] {e}")
302
- return False, "", "", fix_attempts, total_cost, model_name
619
+ success = False
620
+ break # Exit loop but continue to agentic fallback (Issue #266)
303
621
 
304
622
  # Update best iteration if needed:
305
623
  if (errors < best_iteration_info["errors"] or
@@ -322,29 +640,67 @@ def fix_error_loop(unit_test_file: str,
322
640
  code_contents = f.read()
323
641
  except Exception as e:
324
642
  rprint(f"[red]Error reading input files:[/red] {e}")
325
- return False, "", "", fix_attempts, total_cost, model_name
643
+ success = False
644
+ break # Exit loop but continue to agentic fallback (Issue #266)
326
645
 
327
- # Call fix:
646
+ # Call fix (cloud or local based on use_cloud parameter):
328
647
  try:
329
- # Format the log for the LLM
648
+ # Format the log for the LLM - includes local test results
330
649
  formatted_log = format_log_for_output(log_structure)
331
-
332
- updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
333
- unit_test_contents,
334
- code_contents,
335
- prompt,
336
- formatted_log, # Use formatted log instead of reading the file
337
- error_log_file,
338
- strength,
339
- temperature,
340
- verbose=verbose,
341
- time=time # Pass time parameter
342
- )
343
-
650
+
651
+ if use_cloud:
652
+ # Use cloud LLM for fix - local test results passed via formatted_log
653
+ try:
654
+ updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = cloud_fix_errors(
655
+ unit_test=unit_test_contents,
656
+ code=code_contents,
657
+ prompt=prompt,
658
+ error=formatted_log, # Pass local test results to cloud
659
+ error_file=error_log_file,
660
+ strength=strength,
661
+ temperature=temperature,
662
+ verbose=verbose,
663
+ time=time,
664
+ code_file_ext=os.path.splitext(code_file)[1]
665
+ )
666
+ except RuntimeError as cloud_err:
667
+ # Cloud failed - fall back to local if it's a recoverable error
668
+ if "Insufficient credits" in str(cloud_err) or "Authentication failed" in str(cloud_err) or "Access denied" in str(cloud_err):
669
+ # Non-recoverable errors - stop the loop
670
+ rprint(f"[red]Cloud fix error (non-recoverable):[/red] {cloud_err}")
671
+ break
672
+ # Recoverable errors - fall back to local
673
+ rprint(f"[yellow]Cloud fix failed, falling back to local:[/yellow] {cloud_err}")
674
+ updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
675
+ unit_test_contents,
676
+ code_contents,
677
+ prompt,
678
+ formatted_log,
679
+ error_log_file,
680
+ strength,
681
+ temperature,
682
+ verbose=verbose,
683
+ time=time
684
+ )
685
+ else:
686
+ # Use local LLM for fix
687
+ updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
688
+ unit_test_contents,
689
+ code_contents,
690
+ prompt,
691
+ formatted_log, # Use formatted log instead of reading the file
692
+ error_log_file,
693
+ strength,
694
+ temperature,
695
+ verbose=verbose,
696
+ time=time # Pass time parameter
697
+ )
698
+
344
699
  # Update the fix attempt in the structured log
345
700
  log_structure["iterations"][-1]["fix_attempt"] = analysis
701
+ log_structure["iterations"][-1]["model_name"] = model_name
346
702
  except Exception as e:
347
- rprint(f"[red]Error during fix_errors_from_unit_tests call:[/red] {e}")
703
+ rprint(f"[red]Error during fix call:[/red] {e}")
348
704
  break
349
705
 
350
706
  fix_attempts += 1 # We used one fix attempt
@@ -384,7 +740,7 @@ def fix_error_loop(unit_test_file: str,
384
740
  # Run the verification:
385
741
  try:
386
742
  verify_cmd = [detect_host_python_executable(), verification_program]
387
- verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
743
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
388
744
  # Safely handle None for stdout or stderr:
389
745
  verify_stdout = verify_result.stdout or ""
390
746
  verify_stderr = verify_result.stderr or ""
@@ -414,9 +770,11 @@ def fix_error_loop(unit_test_file: str,
414
770
 
415
771
  # Update post-test output in structured log
416
772
  log_structure["iterations"][-1]["post_test_output"] = pytest_output
417
-
773
+
418
774
  # Write updated structured log to file after each iteration
419
- with open(error_log_file, "w") as elog:
775
+ error_log_path = Path(error_log_file)
776
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
777
+ with open(error_log_path, "w") as elog:
420
778
  elog.write(format_log_for_output(log_structure))
421
779
 
422
780
  # Update iteration stats with post-fix results
@@ -438,7 +796,8 @@ def fix_error_loop(unit_test_file: str,
438
796
  stats["final_warnings"] = warnings
439
797
  except Exception as e:
440
798
  rprint(f"[red]Error running pytest for next iteration:[/red] {e}")
441
- return False, "", "", fix_attempts, total_cost, model_name
799
+ success = False
800
+ break # Exit loop but continue to agentic fallback (Issue #266)
442
801
 
443
802
  # Possibly restore best iteration if the final run is not as good:
444
803
  if best_iteration_info["attempt"] is not None and not success:
@@ -480,8 +839,8 @@ def fix_error_loop(unit_test_file: str,
480
839
  else:
481
840
  stats["best_iteration"] = "final"
482
841
 
483
- # Read final file contents, but only if tests weren't initially passing
484
- # For initially passing tests, keep empty strings as required by the test
842
+ # Read final file contents for non-initially-passing tests
843
+ # (Initially passing tests have files read at lines 344-348)
485
844
  try:
486
845
  if not initially_passing:
487
846
  with open(unit_test_file, "r") as f:
@@ -492,11 +851,6 @@ def fix_error_loop(unit_test_file: str,
492
851
  rprint(f"[red]Error reading final files:[/red] {e}")
493
852
  final_unit_test, final_code = "", ""
494
853
 
495
- # Check if we broke out early because tests already passed
496
- if stats["best_iteration"] == 0 and fix_attempts == 0:
497
- # Still return at least 1 attempt to acknowledge the work done
498
- fix_attempts = 1
499
-
500
854
  # Print summary statistics
501
855
  rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
502
856
  rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
@@ -506,17 +860,62 @@ def fix_error_loop(unit_test_file: str,
506
860
 
507
861
  # Calculate improvements
508
862
  stats["improvement"] = {
509
- "fails_reduced": initial_fails - stats["final_fails"],
510
- "errors_reduced": initial_errors - stats["final_errors"],
511
- "warnings_reduced": initial_warnings - stats["final_warnings"],
512
- "percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
513
- (1 - (stats["final_fails"] + stats["final_errors"] + stats["final_warnings"]) /
863
+ "fails_reduced": initial_fails - stats['final_fails'],
864
+ "errors_reduced": initial_errors - stats['final_errors'],
865
+ "warnings_reduced": initial_warnings - stats['final_warnings'],
866
+ "percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
867
+ (1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
514
868
  (initial_fails + initial_errors + initial_warnings)) * 100
515
869
  }
516
870
 
517
871
  rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
518
872
  rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
519
873
 
874
+ # Agentic fallback at end adds cost & model (normalized)
875
+ if not success and agentic_fallback and total_cost < budget:
876
+ # Ensure error_log_file exists before calling agentic fix
877
+ # Write the current log structure if it hasn't been written yet
878
+ try:
879
+ if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
880
+ error_log_path = Path(error_log_file)
881
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
882
+ with open(error_log_path, "w") as elog:
883
+ if log_structure["iterations"]:
884
+ elog.write(format_log_for_output(log_structure))
885
+ else:
886
+ # No iterations ran, write initial state info
887
+ elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
888
+ if 'pytest_output' in locals():
889
+ elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
890
+ except Exception as e:
891
+ rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
892
+
893
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
894
+ agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
895
+ prompt_file=prompt_file,
896
+ code_file=code_file,
897
+ unit_test_file=unit_test_file,
898
+ error_log_file=error_log_file,
899
+ cwd=None, # Use project root (cwd), not prompt file's parent
900
+ )
901
+ total_cost += agent_cost
902
+ if not agent_success:
903
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
904
+ if agent_changed_files:
905
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
906
+ for f in agent_changed_files:
907
+ rprint(f" • {f}")
908
+ if agent_success:
909
+ model_name = agent_model or model_name
910
+ try:
911
+ with open(unit_test_file, "r") as f:
912
+ final_unit_test = f.read()
913
+ with open(code_file, "r") as f:
914
+ final_code = f.read()
915
+ except Exception as e:
916
+ rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
917
+ success = True
918
+
520
919
  return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
521
920
 
522
921
  # If this module is run directly for testing purposes:
@@ -551,4 +950,4 @@ if __name__ == "__main__":
551
950
  rprint(f"Attempts: {attempts}")
552
951
  rprint(f"Total cost: ${total_cost:.6f}")
553
952
  rprint(f"Model used: {model_name}")
554
- rprint(f"Final unit test contents:\n{final_unit_test}")
953
+ rprint(f"Final unit test contents:\n{final_unit_test}")