pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +80 -19
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +281 -81
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -62
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +331 -77
  43. pdd/fix_error_loop.py +209 -60
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +319 -272
  48. pdd/fix_verification_main.py +57 -17
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +48 -9
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/increase_tests.py +7 -0
  56. pdd/incremental_code_generator.py +2 -2
  57. pdd/insert_includes.py +11 -3
  58. pdd/llm_invoke.py +1278 -110
  59. pdd/load_prompt_template.py +36 -10
  60. pdd/pdd_completion.fish +25 -2
  61. pdd/pdd_completion.sh +30 -4
  62. pdd/pdd_completion.zsh +79 -4
  63. pdd/postprocess.py +10 -3
  64. pdd/preprocess.py +228 -15
  65. pdd/preprocess_main.py +8 -5
  66. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  67. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  68. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  69. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  70. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  71. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  72. pdd/prompts/auto_include_LLM.prompt +98 -101
  73. pdd/prompts/change_LLM.prompt +1 -3
  74. pdd/prompts/detect_change_LLM.prompt +562 -3
  75. pdd/prompts/example_generator_LLM.prompt +22 -1
  76. pdd/prompts/extract_code_LLM.prompt +5 -1
  77. pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
  78. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  79. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  80. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  81. pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
  82. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
  83. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  84. pdd/prompts/generate_test_LLM.prompt +21 -6
  85. pdd/prompts/increase_tests_LLM.prompt +1 -2
  86. pdd/prompts/insert_includes_LLM.prompt +1181 -6
  87. pdd/prompts/split_LLM.prompt +1 -62
  88. pdd/prompts/trace_LLM.prompt +25 -22
  89. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  90. pdd/prompts/update_prompt_LLM.prompt +22 -1
  91. pdd/prompts/xml_convertor_LLM.prompt +3246 -7
  92. pdd/pytest_output.py +188 -21
  93. pdd/python_env_detector.py +151 -0
  94. pdd/render_mermaid.py +236 -0
  95. pdd/setup_tool.py +648 -0
  96. pdd/simple_math.py +2 -0
  97. pdd/split_main.py +3 -2
  98. pdd/summarize_directory.py +56 -7
  99. pdd/sync_determine_operation.py +918 -186
  100. pdd/sync_main.py +82 -32
  101. pdd/sync_orchestration.py +1456 -453
  102. pdd/sync_tui.py +848 -0
  103. pdd/template_registry.py +264 -0
  104. pdd/templates/architecture/architecture_json.prompt +242 -0
  105. pdd/templates/generic/generate_prompt.prompt +174 -0
  106. pdd/trace.py +168 -12
  107. pdd/trace_main.py +4 -3
  108. pdd/track_cost.py +151 -61
  109. pdd/unfinished_prompt.py +49 -3
  110. pdd/update_main.py +549 -67
  111. pdd/update_model_costs.py +2 -2
  112. pdd/update_prompt.py +19 -4
  113. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
  114. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  115. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  116. pdd_cli-0.0.42.dist-info/RECORD +0 -115
  117. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  118. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  119. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/fix_error_loop.py CHANGED
@@ -5,13 +5,19 @@ import subprocess
5
5
  import shutil
6
6
  import json
7
7
  from datetime import datetime
8
+ from pathlib import Path
8
9
 
9
10
  from rich import print as rprint
10
11
  from rich.console import Console
11
12
 
12
13
  # Relative import from an internal module.
14
+ from .get_language import get_language
13
15
  from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
14
- from . import DEFAULT_TIME # Import DEFAULT_TIME
16
+ from . import DEFAULT_TIME # Import DEFAULT_TIME
17
+ from .python_env_detector import detect_host_python_executable
18
+ from .agentic_fix import run_agentic_fix
19
+ from .agentic_langtest import default_verify_cmd_for
20
+
15
21
 
16
22
  console = Console()
17
23
 
@@ -19,42 +25,63 @@ def escape_brackets(text: str) -> str:
19
25
  """Escape square brackets so Rich doesn't misinterpret them."""
20
26
  return text.replace("[", "\\[").replace("]", "\\]")
21
27
 
28
+ # ---------- Normalize any agentic return shape to a 4-tuple ----------
29
+ def _normalize_agentic_result(result):
30
+ """
31
+ Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
32
+ Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
33
+ """
34
+ if isinstance(result, tuple):
35
+ if len(result) == 5:
36
+ ok, msg, cost, model, changed_files = result
37
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
38
+ if len(result) == 4:
39
+ ok, msg, cost, model = result
40
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
41
+ if len(result) == 3:
42
+ ok, msg, cost = result
43
+ return bool(ok), str(msg), float(cost), "agentic-cli", []
44
+ if len(result) == 2:
45
+ ok, msg = result
46
+ return bool(ok), str(msg), 0.0, "agentic-cli", []
47
+ # Fallback (shouldn't happen)
48
+ return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
49
+
50
+ def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
51
+ """
52
+ Call (possibly monkeypatched) run_agentic_fix and normalize its return.
53
+ """
54
+ res = run_agentic_fix(
55
+ prompt_file=prompt_file,
56
+ code_file=code_file,
57
+ unit_test_file=unit_test_file,
58
+ error_log_file=error_log_file,
59
+ cwd=cwd,
60
+ )
61
+ return _normalize_agentic_result(res)
62
+ # ---------------------------------------------------------------------
63
+
64
+
22
65
  def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
23
66
  """
24
- Run pytest on the specified test file using subprocess.
67
+ Run pytest on the specified test file using the subprocess-based runner.
25
68
  Returns a tuple: (failures, errors, warnings, logs)
26
69
  """
27
- try:
28
- # Include "--json-only" to ensure only valid JSON is printed.
29
- cmd = [sys.executable, "-m", "pdd.pytest_output", "--json-only", test_file]
30
- result = subprocess.run(cmd, capture_output=True, text=True)
31
-
32
- # Parse the JSON output from stdout
33
- try:
34
- output = json.loads(result.stdout)
35
- test_results = output.get('test_results', [{}])[0]
36
-
37
- # Check pytest's return code first
38
- return_code = test_results.get('return_code', 1)
39
-
40
- failures = test_results.get('failures', 0)
41
- errors = test_results.get('errors', 0)
42
- warnings = test_results.get('warnings', 0)
43
-
44
- if return_code == 2:
45
- errors += 1
46
-
47
- # Combine stdout and stderr from the test results
48
- logs = test_results.get('standard_output', '') + '\n' + test_results.get('standard_error', '')
49
-
50
- return failures, errors, warnings, logs
51
-
52
- except json.JSONDecodeError:
53
- # If JSON parsing fails, return the raw output
54
- return 1, 1, 0, f"Failed to parse pytest output:\n{result.stdout}\n{result.stderr}"
55
-
56
- except Exception as e:
57
- return 1, 1, 0, f"Error running pytest: {str(e)}"
70
+ from .pytest_output import run_pytest_and_capture_output
71
+ # Use the subprocess-based runner to avoid module caching issues
72
+ output_data = run_pytest_and_capture_output(test_file)
73
+
74
+ # Extract results
75
+ results = output_data.get("test_results", [{}])[0]
76
+
77
+ failures = results.get("failures", 0)
78
+ errors = results.get("errors", 0)
79
+ warnings = results.get("warnings", 0)
80
+
81
+ # Combine stdout/stderr for the log
82
+ logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
83
+
84
+ return failures, errors, warnings, logs
58
85
 
59
86
  def format_log_for_output(log_structure):
60
87
  """
@@ -74,6 +101,8 @@ def format_log_for_output(log_structure):
74
101
  # Fix attempt with XML tags
75
102
  if iteration.get("fix_attempt"):
76
103
  formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
104
+ if iteration.get("model_name"):
105
+ formatted_text += f"Model: {iteration['model_name']}\n"
77
106
  formatted_text += f"{iteration['fix_attempt']}\n"
78
107
  formatted_text += "</fix_attempt>\n\n"
79
108
 
@@ -98,6 +127,7 @@ def format_log_for_output(log_structure):
98
127
 
99
128
  def fix_error_loop(unit_test_file: str,
100
129
  code_file: str,
130
+ prompt_file: str,
101
131
  prompt: str,
102
132
  verification_program: str,
103
133
  strength: float,
@@ -106,7 +136,8 @@ def fix_error_loop(unit_test_file: str,
106
136
  budget: float,
107
137
  error_log_file: str = "error_log.txt",
108
138
  verbose: bool = False,
109
- time: float = DEFAULT_TIME):
139
+ time: float = DEFAULT_TIME,
140
+ agentic_fallback: bool = True):
110
141
  """
111
142
  Attempt to fix errors in a unit test and corresponding code using repeated iterations,
112
143
  counting only the number of times we actually call the LLM fix function.
@@ -127,7 +158,7 @@ def fix_error_loop(unit_test_file: str,
127
158
  error_log_file: Path to file to log errors (default: "error_log.txt").
128
159
  verbose: Enable verbose logging (default: False).
129
160
  time: Time parameter for the fix_errors_from_unit_tests call.
130
-
161
+ agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
131
162
  Outputs:
132
163
  success: Boolean indicating if the overall process succeeded.
133
164
  final_unit_test: String contents of the final unit test file.
@@ -184,7 +215,24 @@ def fix_error_loop(unit_test_file: str,
184
215
  iteration = 0
185
216
  # Run an initial test to determine starting state
186
217
  try:
187
- initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
218
+ is_python = str(code_file).lower().endswith(".py")
219
+ if is_python:
220
+ initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
221
+ else:
222
+ # For non-Python files, run the verification program to get an initial error state
223
+ rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
224
+ lang = get_language(os.path.splitext(code_file)[1])
225
+ verify_cmd = default_verify_cmd_for(lang, unit_test_file)
226
+ if not verify_cmd:
227
+ raise ValueError(f"No default verification command for language: {lang}")
228
+
229
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
230
+ pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
231
+ if verify_result.returncode == 0:
232
+ initial_fails, initial_errors, initial_warnings = 0, 0, 0
233
+ else:
234
+ initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
235
+
188
236
  # Store initial state for statistics
189
237
  stats = {
190
238
  "initial_fails": initial_fails,
@@ -197,14 +245,62 @@ def fix_error_loop(unit_test_file: str,
197
245
  "iterations_info": []
198
246
  }
199
247
  except Exception as e:
200
- rprint(f"[red]Error running initial pytest:[/red] {e}")
248
+ rprint(f"[red]Error running initial test/verification:[/red] {e}")
201
249
  return False, "", "", fix_attempts, total_cost, model_name
202
250
 
251
+ # If target is not a Python file, trigger agentic fallback if tests fail
252
+ if not is_python:
253
+ if initial_fails > 0 or initial_errors > 0:
254
+ rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
255
+ error_log_path = Path(error_log_file)
256
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
257
+ with open(error_log_path, "w") as f:
258
+ f.write(pytest_output)
259
+
260
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
261
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
262
+ prompt_file=prompt_file,
263
+ code_file=code_file,
264
+ unit_test_file=unit_test_file,
265
+ error_log_file=error_log_file,
266
+ cwd=Path(prompt_file).parent if prompt_file else None,
267
+ )
268
+ if not success:
269
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
270
+ if agent_changed_files:
271
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
272
+ for f in agent_changed_files:
273
+ rprint(f" • {f}")
274
+ final_unit_test = ""
275
+ final_code = ""
276
+ try:
277
+ with open(unit_test_file, "r") as f:
278
+ final_unit_test = f.read()
279
+ except Exception:
280
+ pass
281
+ try:
282
+ with open(code_file, "r") as f:
283
+ final_code = f.read()
284
+ except Exception:
285
+ pass
286
+ return success, final_unit_test, final_code, 1, agent_cost, agent_model
287
+ else:
288
+ # Non-python tests passed, so we are successful.
289
+ rprint("[green]Non-Python tests passed. No fix needed.[/green]")
290
+ try:
291
+ with open(unit_test_file, "r") as f:
292
+ final_unit_test = f.read()
293
+ with open(code_file, "r") as f:
294
+ final_code = f.read()
295
+ except Exception as e:
296
+ rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
297
+ return True, final_unit_test, final_code, 0, 0.0, "N/A"
298
+
203
299
  fails, errors, warnings = initial_fails, initial_errors, initial_warnings
204
300
 
205
301
  # Determine success state immediately
206
302
  success = (fails == 0 and errors == 0 and warnings == 0)
207
-
303
+
208
304
  # Track if tests were initially passing
209
305
  initially_passing = success
210
306
 
@@ -241,13 +337,23 @@ def fix_error_loop(unit_test_file: str,
241
337
 
242
338
  # Update structured log
243
339
  log_structure["iterations"][-1]["post_test_output"] = pytest_output
244
-
340
+
245
341
  # Write formatted log to file
246
- with open(error_log_file, "w") as elog:
342
+ error_log_path = Path(error_log_file)
343
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
344
+ with open(error_log_path, "w") as elog:
247
345
  elog.write(format_log_for_output(log_structure))
248
346
 
249
347
  # Set success to True (already determined)
250
- # No need to read the files - keep empty strings for passing cases
348
+ # Read the actual fixed files to return the successful state
349
+ try:
350
+ with open(unit_test_file, "r") as f:
351
+ final_unit_test = f.read()
352
+ with open(code_file, "r") as f:
353
+ final_code = f.read()
354
+ except Exception as e:
355
+ rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
356
+ # Keep empty strings as fallback
251
357
  break
252
358
 
253
359
  iteration_header = f"=== Attempt iteration {iteration} ==="
@@ -325,7 +431,7 @@ def fix_error_loop(unit_test_file: str,
325
431
  try:
326
432
  # Format the log for the LLM
327
433
  formatted_log = format_log_for_output(log_structure)
328
-
434
+
329
435
  updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
330
436
  unit_test_contents,
331
437
  code_contents,
@@ -335,11 +441,12 @@ def fix_error_loop(unit_test_file: str,
335
441
  strength,
336
442
  temperature,
337
443
  verbose=verbose,
338
- time=time # Pass time parameter
444
+ time=time # Pass time parameter
339
445
  )
340
446
 
341
447
  # Update the fix attempt in the structured log
342
448
  log_structure["iterations"][-1]["fix_attempt"] = analysis
449
+ log_structure["iterations"][-1]["model_name"] = model_name
343
450
  except Exception as e:
344
451
  rprint(f"[red]Error during fix_errors_from_unit_tests call:[/red] {e}")
345
452
  break
@@ -380,8 +487,8 @@ def fix_error_loop(unit_test_file: str,
380
487
 
381
488
  # Run the verification:
382
489
  try:
383
- verify_cmd = [sys.executable, verification_program]
384
- verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
490
+ verify_cmd = [detect_host_python_executable(), verification_program]
491
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
385
492
  # Safely handle None for stdout or stderr:
386
493
  verify_stdout = verify_result.stdout or ""
387
494
  verify_stderr = verify_result.stderr or ""
@@ -411,9 +518,11 @@ def fix_error_loop(unit_test_file: str,
411
518
 
412
519
  # Update post-test output in structured log
413
520
  log_structure["iterations"][-1]["post_test_output"] = pytest_output
414
-
521
+
415
522
  # Write updated structured log to file after each iteration
416
- with open(error_log_file, "w") as elog:
523
+ error_log_path = Path(error_log_file)
524
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
525
+ with open(error_log_path, "w") as elog:
417
526
  elog.write(format_log_for_output(log_structure))
418
527
 
419
528
  # Update iteration stats with post-fix results
@@ -477,8 +586,8 @@ def fix_error_loop(unit_test_file: str,
477
586
  else:
478
587
  stats["best_iteration"] = "final"
479
588
 
480
- # Read final file contents, but only if tests weren't initially passing
481
- # For initially passing tests, keep empty strings as required by the test
589
+ # Read final file contents for non-initially-passing tests
590
+ # (Initially passing tests have files read at lines 344-348)
482
591
  try:
483
592
  if not initially_passing:
484
593
  with open(unit_test_file, "r") as f:
@@ -489,11 +598,6 @@ def fix_error_loop(unit_test_file: str,
489
598
  rprint(f"[red]Error reading final files:[/red] {e}")
490
599
  final_unit_test, final_code = "", ""
491
600
 
492
- # Check if we broke out early because tests already passed
493
- if stats["best_iteration"] == 0 and fix_attempts == 0:
494
- # Still return at least 1 attempt to acknowledge the work done
495
- fix_attempts = 1
496
-
497
601
  # Print summary statistics
498
602
  rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
499
603
  rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
@@ -503,17 +607,62 @@ def fix_error_loop(unit_test_file: str,
503
607
 
504
608
  # Calculate improvements
505
609
  stats["improvement"] = {
506
- "fails_reduced": initial_fails - stats["final_fails"],
507
- "errors_reduced": initial_errors - stats["final_errors"],
508
- "warnings_reduced": initial_warnings - stats["final_warnings"],
509
- "percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
510
- (1 - (stats["final_fails"] + stats["final_errors"] + stats["final_warnings"]) /
610
+ "fails_reduced": initial_fails - stats['final_fails'],
611
+ "errors_reduced": initial_errors - stats['final_errors'],
612
+ "warnings_reduced": initial_warnings - stats['final_warnings'],
613
+ "percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
614
+ (1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
511
615
  (initial_fails + initial_errors + initial_warnings)) * 100
512
616
  }
513
617
 
514
618
  rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
515
619
  rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
516
620
 
621
+ # Agentic fallback at end adds cost & model (normalized)
622
+ if not success and agentic_fallback and total_cost < budget:
623
+ # Ensure error_log_file exists before calling agentic fix
624
+ # Write the current log structure if it hasn't been written yet
625
+ try:
626
+ if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
627
+ error_log_path = Path(error_log_file)
628
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
629
+ with open(error_log_path, "w") as elog:
630
+ if log_structure["iterations"]:
631
+ elog.write(format_log_for_output(log_structure))
632
+ else:
633
+ # No iterations ran, write initial state info
634
+ elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
635
+ if 'pytest_output' in locals():
636
+ elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
637
+ except Exception as e:
638
+ rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
639
+
640
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
641
+ agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
642
+ prompt_file=prompt_file,
643
+ code_file=code_file,
644
+ unit_test_file=unit_test_file,
645
+ error_log_file=error_log_file,
646
+ cwd=Path(prompt_file).parent if prompt_file else None,
647
+ )
648
+ total_cost += agent_cost
649
+ if not agent_success:
650
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
651
+ if agent_changed_files:
652
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
653
+ for f in agent_changed_files:
654
+ rprint(f" • {f}")
655
+ if agent_success:
656
+ model_name = agent_model or model_name
657
+ try:
658
+ with open(unit_test_file, "r") as f:
659
+ final_unit_test = f.read()
660
+ with open(code_file, "r") as f:
661
+ final_code = f.read()
662
+ except Exception as e:
663
+ rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
664
+ success = True
665
+
517
666
  return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
518
667
 
519
668
  # If this module is run directly for testing purposes:
@@ -548,4 +697,4 @@ if __name__ == "__main__":
548
697
  rprint(f"Attempts: {attempts}")
549
698
  rprint(f"Total cost: ${total_cost:.6f}")
550
699
  rprint(f"Model used: {model_name}")
551
- rprint(f"Final unit test contents:\n{final_unit_test}")
700
+ rprint(f"Final unit test contents:\n{final_unit_test}")
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
114
114
  Fix errors in unit tests using LLM models and log the process.
115
115
 
116
116
  Args:
117
- unit_test (str): The unit test code
117
+ unit_test (str): The unit test code, potentially multiple files concatenated
118
+ with <file name="filename.py">...</file> tags.
118
119
  code (str): The code under test
119
120
  prompt (str): The prompt that generated the code
120
121
  error (str): The error message
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
244
245
  if verbose:
245
246
  console.print(f"[bold red]{error_msg}[/bold red]")
246
247
  write_to_error_file(error_file, error_msg)
247
- return False, False, "", "", "", 0.0, ""
248
+ return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
248
249
  except Exception as e:
249
250
  error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
250
251
  if verbose:
251
252
  console.print(f"[bold red]{error_msg}[/bold red]")
252
253
  write_to_error_file(error_file, error_msg)
253
- return False, False, "", "", "", 0.0, ""
254
+ return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"
pdd/fix_main.py CHANGED
@@ -13,7 +13,7 @@ from .preprocess import preprocess
13
13
 
14
14
  from .construct_paths import construct_paths
15
15
  from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
16
- from .fix_error_loop import fix_error_loop
16
+ from .fix_error_loop import fix_error_loop, run_pytest_on_file
17
17
  from .get_jwt_token import get_jwt_token
18
18
  from .get_language import get_language
19
19
 
@@ -33,7 +33,10 @@ def fix_main(
33
33
  verification_program: Optional[str],
34
34
  max_attempts: int,
35
35
  budget: float,
36
- auto_submit: bool
36
+ auto_submit: bool,
37
+ agentic_fallback: bool = True,
38
+ strength: Optional[float] = None,
39
+ temperature: Optional[float] = None,
37
40
  ) -> Tuple[bool, str, str, int, float, str]:
38
41
  """
39
42
  Main function to fix errors in code and unit tests.
@@ -52,7 +55,7 @@ def fix_main(
52
55
  max_attempts: Maximum number of fix attempts
53
56
  budget: Maximum cost allowed for fixing
54
57
  auto_submit: Whether to auto-submit example if tests pass
55
-
58
+ agentic_fallback: Whether the cli agent fallback is triggered
56
59
  Returns:
57
60
  Tuple containing:
58
61
  - Success status (bool)
@@ -69,13 +72,13 @@ def fix_main(
69
72
  # Initialize analysis_results to None to prevent reference errors
70
73
  analysis_results = None
71
74
 
75
+ # Input validation - let these propagate to caller for proper exit code
76
+ if not loop:
77
+ error_path = Path(error_file)
78
+ if not error_path.exists():
79
+ raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
80
+
72
81
  try:
73
- # Verify error file exists if not in loop mode
74
- if not loop:
75
- error_path = Path(error_file)
76
- if not error_path.exists():
77
- raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
78
-
79
82
  # Construct file paths
80
83
  input_file_paths = {
81
84
  "prompt_file": prompt_file,
@@ -97,12 +100,14 @@ def fix_main(
97
100
  quiet=ctx.obj.get('quiet', False),
98
101
  command="fix",
99
102
  command_options=command_options,
100
- create_error_file=loop # Only create error file if in loop mode
103
+ create_error_file=loop, # Only create error file if in loop mode
104
+ context_override=ctx.obj.get('context'),
105
+ confirm_callback=ctx.obj.get('confirm_callback')
101
106
  )
102
107
 
103
- # Get parameters from context
104
- strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
105
- temperature = ctx.obj.get('temperature', 0)
108
+ # Get parameters from context (prefer passed parameters over ctx.obj)
109
+ strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
110
+ temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
106
111
  verbose = ctx.obj.get('verbose', False)
107
112
  time = ctx.obj.get('time') # Get time from context
108
113
 
@@ -111,6 +116,7 @@ def fix_main(
111
116
  success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
112
117
  unit_test_file=unit_test_file,
113
118
  code_file=code_file,
119
+ prompt_file=prompt_file,
114
120
  prompt=input_strings["prompt_file"],
115
121
  verification_program=verification_program,
116
122
  strength=strength,
@@ -119,7 +125,8 @@ def fix_main(
119
125
  max_attempts=max_attempts,
120
126
  budget=budget,
121
127
  error_log_file=output_file_paths.get("output_results"),
122
- verbose=verbose
128
+ verbose=verbose,
129
+ agentic_fallback=agentic_fallback
123
130
  )
124
131
  else:
125
132
  # Use fix_errors_from_unit_tests for single-pass fixing
@@ -134,16 +141,62 @@ def fix_main(
134
141
  time=time, # Pass time to fix_errors_from_unit_tests
135
142
  verbose=verbose
136
143
  )
137
- success = update_unit_test or update_code
138
144
  attempts = 1
139
145
 
146
+ # Issue #158 fix: Validate the fix by running tests instead of
147
+ # trusting the LLM's suggestion flags (update_unit_test/update_code)
148
+ if update_unit_test or update_code:
149
+ # Write fixed files to temp location first, then run tests
150
+ import tempfile
151
+ import os as os_module
152
+
153
+ # Create temp files for testing
154
+ test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
155
+ temp_test_file = os_module.path.join(test_dir, "test_temp.py")
156
+ temp_code_file = os_module.path.join(test_dir, "code_temp.py")
157
+
158
+ try:
159
+ # Write the fixed content (or original if not changed)
160
+ test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
161
+ code_content = fixed_code if fixed_code else input_strings["code_file"]
162
+
163
+ with open(temp_test_file, 'w') as f:
164
+ f.write(test_content)
165
+ with open(temp_code_file, 'w') as f:
166
+ f.write(code_content)
167
+
168
+ # Run pytest on the fixed test file to validate
169
+ fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
170
+
171
+ # Success only if tests pass (no failures or errors)
172
+ success = (fails == 0 and errors == 0)
173
+
174
+ if verbose:
175
+ rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
176
+ if not success:
177
+ rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
178
+ finally:
179
+ # Cleanup temp files
180
+ import shutil
181
+ try:
182
+ shutil.rmtree(test_dir)
183
+ except Exception:
184
+ pass
185
+ else:
186
+ # No changes suggested by LLM
187
+ success = False
188
+
140
189
  # Save fixed files
141
190
  if fixed_unit_test:
142
- with open(output_file_paths["output_test"], 'w') as f:
191
+ output_test_path = Path(output_file_paths["output_test"])
192
+ output_test_path.parent.mkdir(parents=True, exist_ok=True)
193
+ with open(output_test_path, 'w') as f:
143
194
  f.write(fixed_unit_test)
144
195
 
145
196
  if fixed_code:
146
- with open(output_file_paths["output_code"], 'w') as f:
197
+ output_code_path = Path(output_file_paths["output_code"])
198
+ output_code_path.parent.mkdir(parents=True, exist_ok=True)
199
+ with open(output_code_path, 'w') as f:
147
200
  f.write(fixed_code)
148
201
 
149
202
  # Provide user feedback
@@ -286,6 +339,9 @@ def fix_main(
286
339
 
287
340
  return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
288
341
 
342
+ except click.Abort:
343
+ # User cancelled - re-raise to stop the sync loop
344
+ raise
289
345
  except Exception as e:
290
346
  if not ctx.obj.get('quiet', False):
291
347
  # Safely handle and print MarkupError
@@ -296,4 +352,5 @@ def fix_main(
296
352
  # Print other errors normally, escaping the error string
297
353
  from rich.markup import escape # Ensure escape is imported
298
354
  rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
299
- sys.exit(1)
355
+ # Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
356
+ return False, "", "", 0, 0.0, f"Error: {e}"