pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
114
114
  Fix errors in unit tests using LLM models and log the process.
115
115
 
116
116
  Args:
117
- unit_test (str): The unit test code
117
+ unit_test (str): The unit test code, potentially multiple files concatenated
118
+ with <file name="filename.py">...</file> tags.
118
119
  code (str): The code under test
119
120
  prompt (str): The prompt that generated the code
120
121
  error (str): The error message
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
244
245
  if verbose:
245
246
  console.print(f"[bold red]{error_msg}[/bold red]")
246
247
  write_to_error_file(error_file, error_msg)
247
- return False, False, "", "", "", 0.0, ""
248
+ return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
248
249
  except Exception as e:
249
250
  error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
250
251
  if verbose:
251
252
  console.print(f"[bold red]{error_msg}[/bold red]")
252
253
  write_to_error_file(error_file, error_msg)
253
- return False, False, "", "", "", 0.0, ""
254
+ return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"
pdd/fix_main.py CHANGED
@@ -1,8 +1,11 @@
1
1
  import sys
2
2
  from typing import Tuple, Optional
3
+ import json
3
4
  import click
4
5
  from rich import print as rprint
5
6
  from rich.markup import MarkupError, escape
7
+ from rich.console import Console
8
+ from rich.panel import Panel
6
9
 
7
10
  import requests
8
11
  import asyncio
@@ -13,13 +16,27 @@ from .preprocess import preprocess
13
16
 
14
17
  from .construct_paths import construct_paths
15
18
  from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
16
- from .fix_error_loop import fix_error_loop
19
+ from .fix_error_loop import fix_error_loop, run_pytest_on_file
17
20
  from .get_jwt_token import get_jwt_token
18
21
  from .get_language import get_language
22
+ from .core.cloud import CloudConfig
19
23
 
20
24
  # Import DEFAULT_STRENGTH from the package
21
25
  from . import DEFAULT_STRENGTH
22
26
 
27
+ # Cloud request timeout
28
+ CLOUD_REQUEST_TIMEOUT = 400 # seconds
29
+
30
+ console = Console()
31
+
32
+
33
+ def _env_flag_enabled(name: str) -> bool:
34
+ """Return True when an env var is set to a truthy value."""
35
+ value = os.environ.get(name)
36
+ if value is None:
37
+ return False
38
+ return str(value).strip().lower() in {"1", "true", "yes", "on"}
39
+
23
40
  def fix_main(
24
41
  ctx: click.Context,
25
42
  prompt_file: str,
@@ -33,7 +50,10 @@ def fix_main(
33
50
  verification_program: Optional[str],
34
51
  max_attempts: int,
35
52
  budget: float,
36
- auto_submit: bool
53
+ auto_submit: bool,
54
+ agentic_fallback: bool = True,
55
+ strength: Optional[float] = None,
56
+ temperature: Optional[float] = None,
37
57
  ) -> Tuple[bool, str, str, int, float, str]:
38
58
  """
39
59
  Main function to fix errors in code and unit tests.
@@ -52,7 +72,7 @@ def fix_main(
52
72
  max_attempts: Maximum number of fix attempts
53
73
  budget: Maximum cost allowed for fixing
54
74
  auto_submit: Whether to auto-submit example if tests pass
55
-
75
+ agentic_fallback: Whether the cli agent fallback is triggered
56
76
  Returns:
57
77
  Tuple containing:
58
78
  - Success status (bool)
@@ -69,13 +89,13 @@ def fix_main(
69
89
  # Initialize analysis_results to None to prevent reference errors
70
90
  analysis_results = None
71
91
 
92
+ # Input validation - let these propagate to caller for proper exit code
93
+ if not loop:
94
+ error_path = Path(error_file)
95
+ if not error_path.exists():
96
+ raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
97
+
72
98
  try:
73
- # Verify error file exists if not in loop mode
74
- if not loop:
75
- error_path = Path(error_file)
76
- if not error_path.exists():
77
- raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
78
-
79
99
  # Construct file paths
80
100
  input_file_paths = {
81
101
  "prompt_file": prompt_file,
@@ -97,20 +117,200 @@ def fix_main(
97
117
  quiet=ctx.obj.get('quiet', False),
98
118
  command="fix",
99
119
  command_options=command_options,
100
- create_error_file=loop # Only create error file if in loop mode
120
+ create_error_file=loop, # Only create error file if in loop mode
121
+ context_override=ctx.obj.get('context'),
122
+ confirm_callback=ctx.obj.get('confirm_callback')
101
123
  )
102
124
 
103
- # Get parameters from context
104
- strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
105
- temperature = ctx.obj.get('temperature', 0)
125
+ # Get parameters from context (prefer passed parameters over ctx.obj)
126
+ strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
127
+ temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
106
128
  verbose = ctx.obj.get('verbose', False)
107
129
  time = ctx.obj.get('time') # Get time from context
108
130
 
131
+ # Determine cloud vs local execution preference
132
+ is_local_execution_preferred = ctx.obj.get('local', False)
133
+ cloud_only = _env_flag_enabled("PDD_CLOUD_ONLY") or _env_flag_enabled("PDD_NO_LOCAL_FALLBACK")
134
+ current_execution_is_local = is_local_execution_preferred and not cloud_only
135
+
136
+ # Cloud execution is only supported for single-pass mode (not loop mode)
137
+ # because loop mode requires running tests and verification programs locally
138
+ cloud_execution_attempted = False
139
+ cloud_execution_succeeded = False
140
+
141
+ if not loop and not current_execution_is_local:
142
+ if verbose:
143
+ console.print(Panel("Attempting cloud fix execution...", title="[blue]Mode[/blue]", expand=False))
144
+
145
+ jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
146
+
147
+ if not jwt_token:
148
+ if cloud_only:
149
+ console.print("[red]Cloud authentication failed.[/red]")
150
+ raise click.UsageError("Cloud authentication failed")
151
+ console.print("[yellow]Cloud authentication failed. Falling back to local execution.[/yellow]")
152
+ current_execution_is_local = True
153
+
154
+ if jwt_token and not current_execution_is_local:
155
+ cloud_execution_attempted = True
156
+ # Build cloud payload
157
+ payload = {
158
+ "unitTest": input_strings["unit_test_file"],
159
+ "code": input_strings["code_file"],
160
+ "prompt": input_strings["prompt_file"],
161
+ "errors": input_strings.get("error_file", ""),
162
+ "language": get_language(os.path.splitext(code_file)[1]),
163
+ "strength": strength,
164
+ "temperature": temperature,
165
+ "time": time if time is not None else 0.25,
166
+ "verbose": verbose,
167
+ }
168
+
169
+ headers = {
170
+ "Authorization": f"Bearer {jwt_token}",
171
+ "Content-Type": "application/json"
172
+ }
173
+ cloud_url = CloudConfig.get_endpoint_url("fixCode")
174
+
175
+ try:
176
+ response = requests.post(
177
+ cloud_url,
178
+ json=payload,
179
+ headers=headers,
180
+ timeout=CLOUD_REQUEST_TIMEOUT
181
+ )
182
+ response.raise_for_status()
183
+
184
+ response_data = response.json()
185
+ fixed_unit_test = response_data.get("fixedUnitTest", "")
186
+ fixed_code = response_data.get("fixedCode", "")
187
+ analysis_results = response_data.get("analysis", "")
188
+ total_cost = float(response_data.get("totalCost", 0.0))
189
+ model_name = response_data.get("modelName", "cloud_model")
190
+ success = response_data.get("success", False)
191
+ update_unit_test = response_data.get("updateUnitTest", False)
192
+ update_code = response_data.get("updateCode", False)
193
+
194
+ if not (fixed_unit_test or fixed_code):
195
+ if cloud_only:
196
+ console.print("[red]Cloud execution returned no fixed code.[/red]")
197
+ raise click.UsageError("Cloud execution returned no fixed code")
198
+ console.print("[yellow]Cloud execution returned no fixed code. Falling back to local.[/yellow]")
199
+ current_execution_is_local = True
200
+ else:
201
+ cloud_execution_succeeded = True
202
+ attempts = 1
203
+
204
+ # Validate the fix by running tests (same as local)
205
+ if update_unit_test or update_code:
206
+ import tempfile
207
+ import shutil as shutil_module
208
+
209
+ test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
210
+ temp_test_file = os.path.join(test_dir, "test_temp.py")
211
+ temp_code_file = os.path.join(test_dir, "code_temp.py")
212
+
213
+ try:
214
+ test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
215
+ code_content = fixed_code if fixed_code else input_strings["code_file"]
216
+
217
+ with open(temp_test_file, 'w') as f:
218
+ f.write(test_content)
219
+ with open(temp_code_file, 'w') as f:
220
+ f.write(code_content)
221
+
222
+ fails, errors_count, warnings, test_output = run_pytest_on_file(temp_test_file)
223
+ success = (fails == 0 and errors_count == 0)
224
+
225
+ if verbose:
226
+ rprint(f"[cyan]Fix validation: {fails} failures, {errors_count} errors, {warnings} warnings[/cyan]")
227
+ if not success:
228
+ rprint("[yellow]Fix suggested by cloud did not pass tests[/yellow]")
229
+ finally:
230
+ try:
231
+ shutil_module.rmtree(test_dir)
232
+ except Exception:
233
+ pass
234
+ else:
235
+ success = False
236
+
237
+ if verbose:
238
+ console.print(Panel(
239
+ f"Cloud fix completed. Model: {model_name}, Cost: ${total_cost:.6f}",
240
+ title="[green]Cloud Success[/green]",
241
+ expand=False
242
+ ))
243
+
244
+ except requests.exceptions.Timeout:
245
+ if cloud_only:
246
+ console.print(f"[red]Cloud execution timed out ({CLOUD_REQUEST_TIMEOUT}s).[/red]")
247
+ raise click.UsageError("Cloud execution timed out")
248
+ console.print(f"[yellow]Cloud execution timed out ({CLOUD_REQUEST_TIMEOUT}s). Falling back to local.[/yellow]")
249
+ current_execution_is_local = True
250
+
251
+ except requests.exceptions.HTTPError as e:
252
+ status_code = e.response.status_code if e.response else 0
253
+ err_content = e.response.text[:200] if e.response else "No response content"
254
+
255
+ # Non-recoverable errors: do NOT fall back to local
256
+ if status_code == 402: # Insufficient credits
257
+ try:
258
+ error_data = e.response.json()
259
+ current_balance = error_data.get("currentBalance", "unknown")
260
+ estimated_cost = error_data.get("estimatedCost", "unknown")
261
+ console.print(f"[red]Insufficient credits. Current balance: {current_balance}, estimated cost: {estimated_cost}[/red]")
262
+ except Exception:
263
+ console.print(f"[red]Insufficient credits: {err_content}[/red]")
264
+ raise click.UsageError("Insufficient credits for cloud fix")
265
+ elif status_code == 401: # Authentication error
266
+ console.print(f"[red]Authentication failed: {err_content}[/red]")
267
+ raise click.UsageError("Cloud authentication failed")
268
+ elif status_code == 403: # Authorization error (not approved)
269
+ console.print(f"[red]Access denied: {err_content}[/red]")
270
+ raise click.UsageError("Access denied - user not approved")
271
+ elif status_code == 400: # Validation error
272
+ console.print(f"[red]Invalid request: {err_content}[/red]")
273
+ raise click.UsageError(f"Invalid request: {err_content}")
274
+ else:
275
+ # Recoverable errors (5xx, unexpected errors): fall back to local
276
+ if cloud_only:
277
+ console.print(f"[red]Cloud HTTP error ({status_code}): {err_content}[/red]")
278
+ raise click.UsageError(f"Cloud HTTP error ({status_code}): {err_content}")
279
+ console.print(f"[yellow]Cloud HTTP error ({status_code}): {err_content}. Falling back to local.[/yellow]")
280
+ current_execution_is_local = True
281
+
282
+ except requests.exceptions.RequestException as e:
283
+ if cloud_only:
284
+ console.print(f"[red]Cloud network error: {e}[/red]")
285
+ raise click.UsageError(f"Cloud network error: {e}")
286
+ console.print(f"[yellow]Cloud network error: {e}. Falling back to local.[/yellow]")
287
+ current_execution_is_local = True
288
+
289
+ except json.JSONDecodeError:
290
+ if cloud_only:
291
+ console.print("[red]Cloud returned invalid JSON.[/red]")
292
+ raise click.UsageError("Cloud returned invalid JSON")
293
+ console.print("[yellow]Cloud returned invalid JSON. Falling back to local.[/yellow]")
294
+ current_execution_is_local = True
295
+
296
+ # Local execution path (for loop mode or when cloud failed/skipped)
109
297
  if loop:
110
- # Use fix_error_loop for iterative fixing
298
+ # Determine if loop should use cloud for LLM calls (hybrid mode)
299
+ # Local test execution stays local, but LLM fix calls can go to cloud
300
+ use_cloud_for_loop = not is_local_execution_preferred and not cloud_only
301
+
302
+ # If cloud_only is set but we're in loop mode, we still use hybrid approach
303
+ if cloud_only and not is_local_execution_preferred:
304
+ use_cloud_for_loop = True
305
+
306
+ if verbose:
307
+ mode_desc = "hybrid (local tests + cloud LLM)" if use_cloud_for_loop else "local"
308
+ console.print(Panel(f"Performing {mode_desc} fix loop...", title="[blue]Mode[/blue]", expand=False))
309
+
111
310
  success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
112
311
  unit_test_file=unit_test_file,
113
312
  code_file=code_file,
313
+ prompt_file=prompt_file,
114
314
  prompt=input_strings["prompt_file"],
115
315
  verification_program=verification_program,
116
316
  strength=strength,
@@ -119,10 +319,14 @@ def fix_main(
119
319
  max_attempts=max_attempts,
120
320
  budget=budget,
121
321
  error_log_file=output_file_paths.get("output_results"),
122
- verbose=verbose
322
+ verbose=verbose,
323
+ agentic_fallback=agentic_fallback,
324
+ use_cloud=use_cloud_for_loop
123
325
  )
124
- else:
125
- # Use fix_errors_from_unit_tests for single-pass fixing
326
+ elif not cloud_execution_succeeded:
327
+ # Use fix_errors_from_unit_tests for single-pass fixing (local fallback)
328
+ if verbose:
329
+ console.print(Panel("Performing local fix...", title="[blue]Mode[/blue]", expand=False))
126
330
  update_unit_test, update_code, fixed_unit_test, fixed_code, analysis_results, total_cost, model_name = fix_errors_from_unit_tests(
127
331
  unit_test=input_strings["unit_test_file"],
128
332
  code=input_strings["code_file"],
@@ -134,16 +338,62 @@ def fix_main(
134
338
  time=time, # Pass time to fix_errors_from_unit_tests
135
339
  verbose=verbose
136
340
  )
137
- success = update_unit_test or update_code
138
341
  attempts = 1
139
342
 
343
+ # Issue #158 fix: Validate the fix by running tests instead of
344
+ # trusting the LLM's suggestion flags (update_unit_test/update_code)
345
+ if update_unit_test or update_code:
346
+ # Write fixed files to temp location first, then run tests
347
+ import tempfile
348
+ import os as os_module
349
+
350
+ # Create temp files for testing
351
+ test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
352
+ temp_test_file = os_module.path.join(test_dir, "test_temp.py")
353
+ temp_code_file = os_module.path.join(test_dir, "code_temp.py")
354
+
355
+ try:
356
+ # Write the fixed content (or original if not changed)
357
+ test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
358
+ code_content = fixed_code if fixed_code else input_strings["code_file"]
359
+
360
+ with open(temp_test_file, 'w') as f:
361
+ f.write(test_content)
362
+ with open(temp_code_file, 'w') as f:
363
+ f.write(code_content)
364
+
365
+ # Run pytest on the fixed test file to validate
366
+ fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
367
+
368
+ # Success only if tests pass (no failures or errors)
369
+ success = (fails == 0 and errors == 0)
370
+
371
+ if verbose:
372
+ rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
373
+ if not success:
374
+ rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
375
+ finally:
376
+ # Cleanup temp files
377
+ import shutil
378
+ try:
379
+ shutil.rmtree(test_dir)
380
+ except Exception:
381
+ pass
382
+ else:
383
+ # No changes suggested by LLM
384
+ success = False
385
+
140
386
  # Save fixed files
141
387
  if fixed_unit_test:
142
- with open(output_file_paths["output_test"], 'w') as f:
388
+ output_test_path = Path(output_file_paths["output_test"])
389
+ output_test_path.parent.mkdir(parents=True, exist_ok=True)
390
+ with open(output_test_path, 'w') as f:
143
391
  f.write(fixed_unit_test)
144
392
 
145
393
  if fixed_code:
146
- with open(output_file_paths["output_code"], 'w') as f:
394
+ output_code_path = Path(output_file_paths["output_code"])
395
+ output_code_path.parent.mkdir(parents=True, exist_ok=True)
396
+ with open(output_code_path, 'w') as f:
147
397
  f.write(fixed_code)
148
398
 
149
399
  # Provide user feedback
@@ -286,6 +536,12 @@ def fix_main(
286
536
 
287
537
  return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
288
538
 
539
+ except click.Abort:
540
+ # User cancelled - re-raise to stop the sync loop
541
+ raise
542
+ except click.UsageError:
543
+ # Re-raise UsageError for proper CLI handling (e.g., cloud auth failures, insufficient credits)
544
+ raise
289
545
  except Exception as e:
290
546
  if not ctx.obj.get('quiet', False):
291
547
  # Safely handle and print MarkupError
@@ -296,4 +552,5 @@ def fix_main(
296
552
  # Print other errors normally, escaping the error string
297
553
  from rich.markup import escape # Ensure escape is imported
298
554
  rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
299
- sys.exit(1)
555
+ # Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
556
+ return False, "", "", 0, 0.0, f"Error: {e}"
@@ -1,4 +1,3 @@
1
- import re
2
1
  from typing import Dict, Any, Optional
3
2
  from rich import print as rprint
4
3
  from rich.markdown import Markdown
@@ -145,6 +144,7 @@ def fix_verification_errors(
145
144
  verification_result_obj = verification_response.get('result')
146
145
 
147
146
  if isinstance(verification_result_obj, VerificationOutput):
147
+ # llm_invoke handles all parsing when output_pydantic is specified
148
148
  verification_issues_count = verification_result_obj.issues_count
149
149
  verification_details = verification_result_obj.details
150
150
  if verbose:
@@ -162,66 +162,18 @@ def fix_verification_errors(
162
162
  if verbose:
163
163
  rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
164
164
  else:
165
- rprint(f"[yellow]Warning:[/yellow] <issues_count> is {verification_issues_count}, but <details> field is empty or missing. Treating as no actionable issues found.")
165
+ rprint(f"[yellow]Warning:[/yellow] issues_count is {verification_issues_count}, but details field is empty or missing. Treating as no actionable issues found.")
166
166
  verification_issues_count = 0
167
167
  else:
168
168
  if verbose:
169
169
  rprint("\n[green]No issues found during verification based on structured output.[/green]")
170
- elif isinstance(verification_result_obj, str):
171
- try:
172
- issues_match = re.search(r'<issues_count>(\d+)</issues_count>', verification_result_obj)
173
- if issues_match:
174
- parsed_issues_count = int(issues_match.group(1))
175
- details_match = re.search(r'<details>(.*?)</details>', verification_result_obj, re.DOTALL)
176
- parsed_verification_details = details_match.group(1).strip() if (details_match and details_match.group(1)) else None
177
-
178
-
179
- if parsed_issues_count > 0:
180
- if parsed_verification_details: # Check if details exist and are not empty
181
- issues_found = True
182
- verification_issues_count = parsed_issues_count
183
- verification_details = parsed_verification_details
184
- if verbose:
185
- rprint(f"\n[yellow]Found {verification_issues_count} potential issues in string response. Proceeding to fix step.[/yellow]")
186
- else:
187
- rprint(f"[yellow]Warning:[/yellow] <issues_count> is {parsed_issues_count} in string response, but <details> field is empty or missing. Treating as no actionable issues found.")
188
- verification_issues_count = 0
189
- issues_found = False
190
- else: # parsed_issues_count == 0
191
- verification_issues_count = 0
192
- issues_found = False
193
- if verbose:
194
- rprint("\n[green]No issues found in string verification based on <issues_count> being 0.[/green]")
195
- else: # issues_match is None (tag not found or content not digits)
196
- rprint("[bold red]Error:[/bold red] Could not find or parse integer value from <issues_count> tag in string response.")
197
- return {
198
- "explanation": None,
199
- "fixed_program": program,
200
- "fixed_code": code,
201
- "total_cost": total_cost,
202
- "model_name": model_name,
203
- "verification_issues_count": 0,
204
- }
205
- except ValueError: # Should not be hit if regex is \d+, but as a safeguard
206
- rprint("[bold red]Error:[/bold red] Invalid non-integer value in <issues_count> tag in string response.")
207
- return {
208
- "explanation": None,
209
- "fixed_program": program,
210
- "fixed_code": code,
211
- "total_cost": total_cost,
212
- "model_name": model_name,
213
- "verification_issues_count": 0,
214
- }
215
- else: # Not VerificationOutput and not a successfully parsed string
216
- rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output (e.g., parsing failed).")
217
- rprint(f" [dim]Expected type:[/dim] {VerificationOutput} or str")
170
+ else:
171
+ # llm_invoke should always return VerificationOutput when output_pydantic is specified
172
+ rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output.")
173
+ rprint(f" [dim]Expected type:[/dim] {VerificationOutput}")
218
174
  rprint(f" [dim]Received type:[/dim] {type(verification_result_obj)}")
219
175
  content_str = str(verification_result_obj)
220
176
  rprint(f" [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
221
- raw_text = verification_response.get('result_text')
222
- if raw_text:
223
- raw_text_str = str(raw_text)
224
- rprint(f" [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
225
177
  return {
226
178
  "explanation": None,
227
179
  "fixed_program": program,
@@ -262,63 +214,23 @@ def fix_verification_errors(
262
214
  rprint(f" [dim]Cost:[/dim] ${fix_response.get('cost', 0.0):.6f}")
263
215
 
264
216
  fix_result_obj = fix_response.get('result')
265
- parsed_fix_successfully = False
266
217
 
267
218
  if isinstance(fix_result_obj, FixerOutput):
219
+ # llm_invoke handles all parsing and unescaping via _unescape_code_newlines
268
220
  fixed_program = fix_result_obj.fixed_program
269
221
  fixed_code = fix_result_obj.fixed_code
270
222
  fix_explanation = fix_result_obj.explanation
271
-
272
- # Unescape literal \n strings to actual newlines
273
- if fixed_program:
274
- fixed_program = fixed_program.replace('\\n', '\n')
275
- if fixed_code:
276
- fixed_code = fixed_code.replace('\\n', '\n')
277
-
278
- parsed_fix_successfully = True
223
+
279
224
  if verbose:
280
225
  rprint("[green]Successfully parsed structured output for fix.[/green]")
281
226
  rprint(Markdown(f"**Explanation from LLM:**\n{fix_explanation}"))
282
- elif isinstance(fix_result_obj, str):
283
- program_match = re.search(r'<fixed_program>(.*?)</fixed_program>', fix_result_obj, re.DOTALL)
284
- code_match = re.search(r'<fixed_code>(.*?)</fixed_code>', fix_result_obj, re.DOTALL)
285
- explanation_match = re.search(r'<explanation>(.*?)</explanation>', fix_result_obj, re.DOTALL)
286
-
287
- if program_match or code_match or explanation_match: # If any tag is found, attempt to parse
288
- fixed_program_candidate = program_match.group(1).strip() if (program_match and program_match.group(1)) else None
289
- fixed_code_candidate = code_match.group(1).strip() if (code_match and code_match.group(1)) else None
290
- fix_explanation_candidate = explanation_match.group(1).strip() if (explanation_match and explanation_match.group(1)) else None
291
-
292
- # Unescape literal \n strings to actual newlines
293
- if fixed_program_candidate:
294
- fixed_program_candidate = fixed_program_candidate.replace('\\n', '\n')
295
- if fixed_code_candidate:
296
- fixed_code_candidate = fixed_code_candidate.replace('\\n', '\n')
297
-
298
- fixed_program = fixed_program_candidate if fixed_program_candidate else program
299
- fixed_code = fixed_code_candidate if fixed_code_candidate else code
300
- fix_explanation = fix_explanation_candidate if fix_explanation_candidate else "[Fix explanation not provided by LLM]"
301
- parsed_fix_successfully = True
302
-
303
- if verbose:
304
- if not program_match or not fixed_program_candidate:
305
- rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_program> tag in fix result string. Using original program.")
306
- if not code_match or not fixed_code_candidate:
307
- rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_code> tag in fix result string. Using original code module.")
308
- if not explanation_match or not fix_explanation_candidate:
309
- rprint("[yellow]Warning:[/yellow] Could not find or parse <explanation> tag in fix result string. Using default explanation.")
310
- # else: string, but no relevant tags. Will fall to parsed_fix_successfully = False below
311
-
312
- if not parsed_fix_successfully:
313
- rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output (e.g., parsing failed).")
314
- rprint(f" [dim]Expected type:[/dim] {FixerOutput} or str (with XML tags)")
227
+ else:
228
+ # llm_invoke should always return FixerOutput when output_pydantic is specified
229
+ rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output.")
230
+ rprint(f" [dim]Expected type:[/dim] {FixerOutput}")
315
231
  rprint(f" [dim]Received type:[/dim] {type(fix_result_obj)}")
316
232
  content_str = str(fix_result_obj)
317
233
  rprint(f" [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
318
- raw_text = fix_response.get('result_text')
319
- if raw_text:
320
- raw_text_str = str(raw_text)
321
- rprint(f" [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
322
234
  fix_explanation = "[Error: Failed to parse structured output from LLM for fix explanation]"
323
235
  # fixed_program and fixed_code remain original (already initialized)
324
236