pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +73 -21
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +258 -82
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -63
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +330 -76
  43. pdd/fix_error_loop.py +207 -61
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +306 -272
  48. pdd/fix_verification_main.py +28 -9
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +9 -2
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/incremental_code_generator.py +2 -2
  56. pdd/insert_includes.py +11 -3
  57. pdd/llm_invoke.py +1269 -103
  58. pdd/load_prompt_template.py +36 -10
  59. pdd/pdd_completion.fish +25 -2
  60. pdd/pdd_completion.sh +30 -4
  61. pdd/pdd_completion.zsh +79 -4
  62. pdd/postprocess.py +10 -3
  63. pdd/preprocess.py +228 -15
  64. pdd/preprocess_main.py +8 -5
  65. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  66. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  67. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  68. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  69. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  70. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  71. pdd/prompts/auto_include_LLM.prompt +100 -905
  72. pdd/prompts/detect_change_LLM.prompt +122 -20
  73. pdd/prompts/example_generator_LLM.prompt +22 -1
  74. pdd/prompts/extract_code_LLM.prompt +5 -1
  75. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  76. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  77. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  78. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  79. pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
  80. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
  81. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  82. pdd/prompts/generate_test_LLM.prompt +21 -6
  83. pdd/prompts/increase_tests_LLM.prompt +1 -5
  84. pdd/prompts/insert_includes_LLM.prompt +228 -108
  85. pdd/prompts/trace_LLM.prompt +25 -22
  86. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  87. pdd/prompts/update_prompt_LLM.prompt +22 -1
  88. pdd/pytest_output.py +127 -12
  89. pdd/render_mermaid.py +236 -0
  90. pdd/setup_tool.py +648 -0
  91. pdd/simple_math.py +2 -0
  92. pdd/split_main.py +3 -2
  93. pdd/summarize_directory.py +49 -6
  94. pdd/sync_determine_operation.py +543 -98
  95. pdd/sync_main.py +81 -31
  96. pdd/sync_orchestration.py +1334 -751
  97. pdd/sync_tui.py +848 -0
  98. pdd/template_registry.py +264 -0
  99. pdd/templates/architecture/architecture_json.prompt +242 -0
  100. pdd/templates/generic/generate_prompt.prompt +174 -0
  101. pdd/trace.py +168 -12
  102. pdd/trace_main.py +4 -3
  103. pdd/track_cost.py +151 -61
  104. pdd/unfinished_prompt.py +49 -3
  105. pdd/update_main.py +549 -67
  106. pdd/update_model_costs.py +2 -2
  107. pdd/update_prompt.py +19 -4
  108. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
  109. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  110. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  111. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  112. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  113. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  114. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/fix_error_loop.py CHANGED
@@ -5,14 +5,19 @@ import subprocess
5
5
  import shutil
6
6
  import json
7
7
  from datetime import datetime
8
+ from pathlib import Path
8
9
 
9
10
  from rich import print as rprint
10
11
  from rich.console import Console
11
12
 
12
13
  # Relative import from an internal module.
14
+ from .get_language import get_language
13
15
  from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
14
- from . import DEFAULT_TIME # Import DEFAULT_TIME
16
+ from . import DEFAULT_TIME # Import DEFAULT_TIME
15
17
  from .python_env_detector import detect_host_python_executable
18
+ from .agentic_fix import run_agentic_fix
19
+ from .agentic_langtest import default_verify_cmd_for
20
+
16
21
 
17
22
  console = Console()
18
23
 
@@ -20,44 +25,63 @@ def escape_brackets(text: str) -> str:
20
25
  """Escape square brackets so Rich doesn't misinterpret them."""
21
26
  return text.replace("[", "\\[").replace("]", "\\]")
22
27
 
28
+ # ---------- Normalize any agentic return shape to a 4-tuple ----------
29
+ def _normalize_agentic_result(result):
30
+ """
31
+ Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
32
+ Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
33
+ """
34
+ if isinstance(result, tuple):
35
+ if len(result) == 5:
36
+ ok, msg, cost, model, changed_files = result
37
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
38
+ if len(result) == 4:
39
+ ok, msg, cost, model = result
40
+ return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
41
+ if len(result) == 3:
42
+ ok, msg, cost = result
43
+ return bool(ok), str(msg), float(cost), "agentic-cli", []
44
+ if len(result) == 2:
45
+ ok, msg = result
46
+ return bool(ok), str(msg), 0.0, "agentic-cli", []
47
+ # Fallback (shouldn't happen)
48
+ return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
49
+
50
+ def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
51
+ """
52
+ Call (possibly monkeypatched) run_agentic_fix and normalize its return.
53
+ """
54
+ res = run_agentic_fix(
55
+ prompt_file=prompt_file,
56
+ code_file=code_file,
57
+ unit_test_file=unit_test_file,
58
+ error_log_file=error_log_file,
59
+ cwd=cwd,
60
+ )
61
+ return _normalize_agentic_result(res)
62
+ # ---------------------------------------------------------------------
63
+
64
+
23
65
  def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
24
66
  """
25
- Run pytest on the specified test file using subprocess.
67
+ Run pytest on the specified test file using the subprocess-based runner.
26
68
  Returns a tuple: (failures, errors, warnings, logs)
27
69
  """
28
- try:
29
- # Include "--json-only" to ensure only valid JSON is printed.
30
- # Use environment-aware Python executable for pytest execution
31
- python_executable = detect_host_python_executable()
32
- cmd = [python_executable, "-m", "pdd.pytest_output", "--json-only", test_file]
33
- result = subprocess.run(cmd, capture_output=True, text=True)
34
-
35
- # Parse the JSON output from stdout
36
- try:
37
- output = json.loads(result.stdout)
38
- test_results = output.get('test_results', [{}])[0]
39
-
40
- # Check pytest's return code first
41
- return_code = test_results.get('return_code', 1)
42
-
43
- failures = test_results.get('failures', 0)
44
- errors = test_results.get('errors', 0)
45
- warnings = test_results.get('warnings', 0)
46
-
47
- if return_code == 2:
48
- errors += 1
49
-
50
- # Combine stdout and stderr from the test results
51
- logs = test_results.get('standard_output', '') + '\n' + test_results.get('standard_error', '')
52
-
53
- return failures, errors, warnings, logs
54
-
55
- except json.JSONDecodeError:
56
- # If JSON parsing fails, return the raw output
57
- return 1, 1, 0, f"Failed to parse pytest output:\n{result.stdout}\n{result.stderr}"
58
-
59
- except Exception as e:
60
- return 1, 1, 0, f"Error running pytest: {str(e)}"
70
+ from .pytest_output import run_pytest_and_capture_output
71
+ # Use the subprocess-based runner to avoid module caching issues
72
+ output_data = run_pytest_and_capture_output(test_file)
73
+
74
+ # Extract results
75
+ results = output_data.get("test_results", [{}])[0]
76
+
77
+ failures = results.get("failures", 0)
78
+ errors = results.get("errors", 0)
79
+ warnings = results.get("warnings", 0)
80
+
81
+ # Combine stdout/stderr for the log
82
+ logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
83
+
84
+ return failures, errors, warnings, logs
61
85
 
62
86
  def format_log_for_output(log_structure):
63
87
  """
@@ -77,6 +101,8 @@ def format_log_for_output(log_structure):
77
101
  # Fix attempt with XML tags
78
102
  if iteration.get("fix_attempt"):
79
103
  formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
104
+ if iteration.get("model_name"):
105
+ formatted_text += f"Model: {iteration['model_name']}\n"
80
106
  formatted_text += f"{iteration['fix_attempt']}\n"
81
107
  formatted_text += "</fix_attempt>\n\n"
82
108
 
@@ -101,6 +127,7 @@ def format_log_for_output(log_structure):
101
127
 
102
128
  def fix_error_loop(unit_test_file: str,
103
129
  code_file: str,
130
+ prompt_file: str,
104
131
  prompt: str,
105
132
  verification_program: str,
106
133
  strength: float,
@@ -109,7 +136,8 @@ def fix_error_loop(unit_test_file: str,
109
136
  budget: float,
110
137
  error_log_file: str = "error_log.txt",
111
138
  verbose: bool = False,
112
- time: float = DEFAULT_TIME):
139
+ time: float = DEFAULT_TIME,
140
+ agentic_fallback: bool = True):
113
141
  """
114
142
  Attempt to fix errors in a unit test and corresponding code using repeated iterations,
115
143
  counting only the number of times we actually call the LLM fix function.
@@ -130,7 +158,7 @@ def fix_error_loop(unit_test_file: str,
130
158
  error_log_file: Path to file to log errors (default: "error_log.txt").
131
159
  verbose: Enable verbose logging (default: False).
132
160
  time: Time parameter for the fix_errors_from_unit_tests call.
133
-
161
+ agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
134
162
  Outputs:
135
163
  success: Boolean indicating if the overall process succeeded.
136
164
  final_unit_test: String contents of the final unit test file.
@@ -187,7 +215,24 @@ def fix_error_loop(unit_test_file: str,
187
215
  iteration = 0
188
216
  # Run an initial test to determine starting state
189
217
  try:
190
- initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
218
+ is_python = str(code_file).lower().endswith(".py")
219
+ if is_python:
220
+ initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
221
+ else:
222
+ # For non-Python files, run the verification program to get an initial error state
223
+ rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
224
+ lang = get_language(os.path.splitext(code_file)[1])
225
+ verify_cmd = default_verify_cmd_for(lang, unit_test_file)
226
+ if not verify_cmd:
227
+ raise ValueError(f"No default verification command for language: {lang}")
228
+
229
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
230
+ pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
231
+ if verify_result.returncode == 0:
232
+ initial_fails, initial_errors, initial_warnings = 0, 0, 0
233
+ else:
234
+ initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
235
+
191
236
  # Store initial state for statistics
192
237
  stats = {
193
238
  "initial_fails": initial_fails,
@@ -200,14 +245,62 @@ def fix_error_loop(unit_test_file: str,
200
245
  "iterations_info": []
201
246
  }
202
247
  except Exception as e:
203
- rprint(f"[red]Error running initial pytest:[/red] {e}")
248
+ rprint(f"[red]Error running initial test/verification:[/red] {e}")
204
249
  return False, "", "", fix_attempts, total_cost, model_name
205
250
 
251
+ # If target is not a Python file, trigger agentic fallback if tests fail
252
+ if not is_python:
253
+ if initial_fails > 0 or initial_errors > 0:
254
+ rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
255
+ error_log_path = Path(error_log_file)
256
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
257
+ with open(error_log_path, "w") as f:
258
+ f.write(pytest_output)
259
+
260
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
261
+ success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
262
+ prompt_file=prompt_file,
263
+ code_file=code_file,
264
+ unit_test_file=unit_test_file,
265
+ error_log_file=error_log_file,
266
+ cwd=Path(prompt_file).parent if prompt_file else None,
267
+ )
268
+ if not success:
269
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
270
+ if agent_changed_files:
271
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
272
+ for f in agent_changed_files:
273
+ rprint(f" • {f}")
274
+ final_unit_test = ""
275
+ final_code = ""
276
+ try:
277
+ with open(unit_test_file, "r") as f:
278
+ final_unit_test = f.read()
279
+ except Exception:
280
+ pass
281
+ try:
282
+ with open(code_file, "r") as f:
283
+ final_code = f.read()
284
+ except Exception:
285
+ pass
286
+ return success, final_unit_test, final_code, 1, agent_cost, agent_model
287
+ else:
288
+ # Non-python tests passed, so we are successful.
289
+ rprint("[green]Non-Python tests passed. No fix needed.[/green]")
290
+ try:
291
+ with open(unit_test_file, "r") as f:
292
+ final_unit_test = f.read()
293
+ with open(code_file, "r") as f:
294
+ final_code = f.read()
295
+ except Exception as e:
296
+ rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
297
+ return True, final_unit_test, final_code, 0, 0.0, "N/A"
298
+
206
299
  fails, errors, warnings = initial_fails, initial_errors, initial_warnings
207
300
 
208
301
  # Determine success state immediately
209
302
  success = (fails == 0 and errors == 0 and warnings == 0)
210
-
303
+
211
304
  # Track if tests were initially passing
212
305
  initially_passing = success
213
306
 
@@ -244,13 +337,23 @@ def fix_error_loop(unit_test_file: str,
244
337
 
245
338
  # Update structured log
246
339
  log_structure["iterations"][-1]["post_test_output"] = pytest_output
247
-
340
+
248
341
  # Write formatted log to file
249
- with open(error_log_file, "w") as elog:
342
+ error_log_path = Path(error_log_file)
343
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
344
+ with open(error_log_path, "w") as elog:
250
345
  elog.write(format_log_for_output(log_structure))
251
346
 
252
347
  # Set success to True (already determined)
253
- # No need to read the files - keep empty strings for passing cases
348
+ # Read the actual fixed files to return the successful state
349
+ try:
350
+ with open(unit_test_file, "r") as f:
351
+ final_unit_test = f.read()
352
+ with open(code_file, "r") as f:
353
+ final_code = f.read()
354
+ except Exception as e:
355
+ rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
356
+ # Keep empty strings as fallback
254
357
  break
255
358
 
256
359
  iteration_header = f"=== Attempt iteration {iteration} ==="
@@ -328,7 +431,7 @@ def fix_error_loop(unit_test_file: str,
328
431
  try:
329
432
  # Format the log for the LLM
330
433
  formatted_log = format_log_for_output(log_structure)
331
-
434
+
332
435
  updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
333
436
  unit_test_contents,
334
437
  code_contents,
@@ -338,11 +441,12 @@ def fix_error_loop(unit_test_file: str,
338
441
  strength,
339
442
  temperature,
340
443
  verbose=verbose,
341
- time=time # Pass time parameter
444
+ time=time # Pass time parameter
342
445
  )
343
446
 
344
447
  # Update the fix attempt in the structured log
345
448
  log_structure["iterations"][-1]["fix_attempt"] = analysis
449
+ log_structure["iterations"][-1]["model_name"] = model_name
346
450
  except Exception as e:
347
451
  rprint(f"[red]Error during fix_errors_from_unit_tests call:[/red] {e}")
348
452
  break
@@ -384,7 +488,7 @@ def fix_error_loop(unit_test_file: str,
384
488
  # Run the verification:
385
489
  try:
386
490
  verify_cmd = [detect_host_python_executable(), verification_program]
387
- verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
491
+ verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
388
492
  # Safely handle None for stdout or stderr:
389
493
  verify_stdout = verify_result.stdout or ""
390
494
  verify_stderr = verify_result.stderr or ""
@@ -414,9 +518,11 @@ def fix_error_loop(unit_test_file: str,
414
518
 
415
519
  # Update post-test output in structured log
416
520
  log_structure["iterations"][-1]["post_test_output"] = pytest_output
417
-
521
+
418
522
  # Write updated structured log to file after each iteration
419
- with open(error_log_file, "w") as elog:
523
+ error_log_path = Path(error_log_file)
524
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
525
+ with open(error_log_path, "w") as elog:
420
526
  elog.write(format_log_for_output(log_structure))
421
527
 
422
528
  # Update iteration stats with post-fix results
@@ -480,8 +586,8 @@ def fix_error_loop(unit_test_file: str,
480
586
  else:
481
587
  stats["best_iteration"] = "final"
482
588
 
483
- # Read final file contents, but only if tests weren't initially passing
484
- # For initially passing tests, keep empty strings as required by the test
589
+ # Read final file contents for non-initially-passing tests
590
+ # (Initially passing tests have files read at lines 344-348)
485
591
  try:
486
592
  if not initially_passing:
487
593
  with open(unit_test_file, "r") as f:
@@ -492,11 +598,6 @@ def fix_error_loop(unit_test_file: str,
492
598
  rprint(f"[red]Error reading final files:[/red] {e}")
493
599
  final_unit_test, final_code = "", ""
494
600
 
495
- # Check if we broke out early because tests already passed
496
- if stats["best_iteration"] == 0 and fix_attempts == 0:
497
- # Still return at least 1 attempt to acknowledge the work done
498
- fix_attempts = 1
499
-
500
601
  # Print summary statistics
501
602
  rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
502
603
  rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
@@ -506,17 +607,62 @@ def fix_error_loop(unit_test_file: str,
506
607
 
507
608
  # Calculate improvements
508
609
  stats["improvement"] = {
509
- "fails_reduced": initial_fails - stats["final_fails"],
510
- "errors_reduced": initial_errors - stats["final_errors"],
511
- "warnings_reduced": initial_warnings - stats["final_warnings"],
512
- "percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
513
- (1 - (stats["final_fails"] + stats["final_errors"] + stats["final_warnings"]) /
610
+ "fails_reduced": initial_fails - stats['final_fails'],
611
+ "errors_reduced": initial_errors - stats['final_errors'],
612
+ "warnings_reduced": initial_warnings - stats['final_warnings'],
613
+ "percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
614
+ (1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
514
615
  (initial_fails + initial_errors + initial_warnings)) * 100
515
616
  }
516
617
 
517
618
  rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
518
619
  rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
519
620
 
621
+ # Agentic fallback at end adds cost & model (normalized)
622
+ if not success and agentic_fallback and total_cost < budget:
623
+ # Ensure error_log_file exists before calling agentic fix
624
+ # Write the current log structure if it hasn't been written yet
625
+ try:
626
+ if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
627
+ error_log_path = Path(error_log_file)
628
+ error_log_path.parent.mkdir(parents=True, exist_ok=True)
629
+ with open(error_log_path, "w") as elog:
630
+ if log_structure["iterations"]:
631
+ elog.write(format_log_for_output(log_structure))
632
+ else:
633
+ # No iterations ran, write initial state info
634
+ elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
635
+ if 'pytest_output' in locals():
636
+ elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
637
+ except Exception as e:
638
+ rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
639
+
640
+ rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
641
+ agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
642
+ prompt_file=prompt_file,
643
+ code_file=code_file,
644
+ unit_test_file=unit_test_file,
645
+ error_log_file=error_log_file,
646
+ cwd=Path(prompt_file).parent if prompt_file else None,
647
+ )
648
+ total_cost += agent_cost
649
+ if not agent_success:
650
+ rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
651
+ if agent_changed_files:
652
+ rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
653
+ for f in agent_changed_files:
654
+ rprint(f" • {f}")
655
+ if agent_success:
656
+ model_name = agent_model or model_name
657
+ try:
658
+ with open(unit_test_file, "r") as f:
659
+ final_unit_test = f.read()
660
+ with open(code_file, "r") as f:
661
+ final_code = f.read()
662
+ except Exception as e:
663
+ rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
664
+ success = True
665
+
520
666
  return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
521
667
 
522
668
  # If this module is run directly for testing purposes:
@@ -551,4 +697,4 @@ if __name__ == "__main__":
551
697
  rprint(f"Attempts: {attempts}")
552
698
  rprint(f"Total cost: ${total_cost:.6f}")
553
699
  rprint(f"Model used: {model_name}")
554
- rprint(f"Final unit test contents:\n{final_unit_test}")
700
+ rprint(f"Final unit test contents:\n{final_unit_test}")
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
114
114
  Fix errors in unit tests using LLM models and log the process.
115
115
 
116
116
  Args:
117
- unit_test (str): The unit test code
117
+ unit_test (str): The unit test code, potentially multiple files concatenated
118
+ with <file name="filename.py">...</file> tags.
118
119
  code (str): The code under test
119
120
  prompt (str): The prompt that generated the code
120
121
  error (str): The error message
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
244
245
  if verbose:
245
246
  console.print(f"[bold red]{error_msg}[/bold red]")
246
247
  write_to_error_file(error_file, error_msg)
247
- return False, False, "", "", "", 0.0, ""
248
+ return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
248
249
  except Exception as e:
249
250
  error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
250
251
  if verbose:
251
252
  console.print(f"[bold red]{error_msg}[/bold red]")
252
253
  write_to_error_file(error_file, error_msg)
253
- return False, False, "", "", "", 0.0, ""
254
+ return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"
pdd/fix_main.py CHANGED
@@ -13,7 +13,7 @@ from .preprocess import preprocess
13
13
 
14
14
  from .construct_paths import construct_paths
15
15
  from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
16
- from .fix_error_loop import fix_error_loop
16
+ from .fix_error_loop import fix_error_loop, run_pytest_on_file
17
17
  from .get_jwt_token import get_jwt_token
18
18
  from .get_language import get_language
19
19
 
@@ -33,7 +33,10 @@ def fix_main(
33
33
  verification_program: Optional[str],
34
34
  max_attempts: int,
35
35
  budget: float,
36
- auto_submit: bool
36
+ auto_submit: bool,
37
+ agentic_fallback: bool = True,
38
+ strength: Optional[float] = None,
39
+ temperature: Optional[float] = None,
37
40
  ) -> Tuple[bool, str, str, int, float, str]:
38
41
  """
39
42
  Main function to fix errors in code and unit tests.
@@ -52,7 +55,7 @@ def fix_main(
52
55
  max_attempts: Maximum number of fix attempts
53
56
  budget: Maximum cost allowed for fixing
54
57
  auto_submit: Whether to auto-submit example if tests pass
55
-
58
+ agentic_fallback: Whether the cli agent fallback is triggered
56
59
  Returns:
57
60
  Tuple containing:
58
61
  - Success status (bool)
@@ -69,13 +72,13 @@ def fix_main(
69
72
  # Initialize analysis_results to None to prevent reference errors
70
73
  analysis_results = None
71
74
 
75
+ # Input validation - let these propagate to caller for proper exit code
76
+ if not loop:
77
+ error_path = Path(error_file)
78
+ if not error_path.exists():
79
+ raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
80
+
72
81
  try:
73
- # Verify error file exists if not in loop mode
74
- if not loop:
75
- error_path = Path(error_file)
76
- if not error_path.exists():
77
- raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
78
-
79
82
  # Construct file paths
80
83
  input_file_paths = {
81
84
  "prompt_file": prompt_file,
@@ -97,12 +100,14 @@ def fix_main(
97
100
  quiet=ctx.obj.get('quiet', False),
98
101
  command="fix",
99
102
  command_options=command_options,
100
- create_error_file=loop # Only create error file if in loop mode
103
+ create_error_file=loop, # Only create error file if in loop mode
104
+ context_override=ctx.obj.get('context'),
105
+ confirm_callback=ctx.obj.get('confirm_callback')
101
106
  )
102
107
 
103
- # Get parameters from context
104
- strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
105
- temperature = ctx.obj.get('temperature', 0)
108
+ # Get parameters from context (prefer passed parameters over ctx.obj)
109
+ strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
110
+ temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
106
111
  verbose = ctx.obj.get('verbose', False)
107
112
  time = ctx.obj.get('time') # Get time from context
108
113
 
@@ -111,6 +116,7 @@ def fix_main(
111
116
  success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
112
117
  unit_test_file=unit_test_file,
113
118
  code_file=code_file,
119
+ prompt_file=prompt_file,
114
120
  prompt=input_strings["prompt_file"],
115
121
  verification_program=verification_program,
116
122
  strength=strength,
@@ -119,7 +125,8 @@ def fix_main(
119
125
  max_attempts=max_attempts,
120
126
  budget=budget,
121
127
  error_log_file=output_file_paths.get("output_results"),
122
- verbose=verbose
128
+ verbose=verbose,
129
+ agentic_fallback=agentic_fallback
123
130
  )
124
131
  else:
125
132
  # Use fix_errors_from_unit_tests for single-pass fixing
@@ -134,16 +141,62 @@ def fix_main(
134
141
  time=time, # Pass time to fix_errors_from_unit_tests
135
142
  verbose=verbose
136
143
  )
137
- success = update_unit_test or update_code
138
144
  attempts = 1
139
145
 
146
+ # Issue #158 fix: Validate the fix by running tests instead of
147
+ # trusting the LLM's suggestion flags (update_unit_test/update_code)
148
+ if update_unit_test or update_code:
149
+ # Write fixed files to temp location first, then run tests
150
+ import tempfile
151
+ import os as os_module
152
+
153
+ # Create temp files for testing
154
+ test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
155
+ temp_test_file = os_module.path.join(test_dir, "test_temp.py")
156
+ temp_code_file = os_module.path.join(test_dir, "code_temp.py")
157
+
158
+ try:
159
+ # Write the fixed content (or original if not changed)
160
+ test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
161
+ code_content = fixed_code if fixed_code else input_strings["code_file"]
162
+
163
+ with open(temp_test_file, 'w') as f:
164
+ f.write(test_content)
165
+ with open(temp_code_file, 'w') as f:
166
+ f.write(code_content)
167
+
168
+ # Run pytest on the fixed test file to validate
169
+ fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
170
+
171
+ # Success only if tests pass (no failures or errors)
172
+ success = (fails == 0 and errors == 0)
173
+
174
+ if verbose:
175
+ rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
176
+ if not success:
177
+ rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
178
+ finally:
179
+ # Cleanup temp files
180
+ import shutil
181
+ try:
182
+ shutil.rmtree(test_dir)
183
+ except Exception:
184
+ pass
185
+ else:
186
+ # No changes suggested by LLM
187
+ success = False
188
+
140
189
  # Save fixed files
141
190
  if fixed_unit_test:
142
- with open(output_file_paths["output_test"], 'w') as f:
191
+ output_test_path = Path(output_file_paths["output_test"])
192
+ output_test_path.parent.mkdir(parents=True, exist_ok=True)
193
+ with open(output_test_path, 'w') as f:
143
194
  f.write(fixed_unit_test)
144
195
 
145
196
  if fixed_code:
146
- with open(output_file_paths["output_code"], 'w') as f:
197
+ output_code_path = Path(output_file_paths["output_code"])
198
+ output_code_path.parent.mkdir(parents=True, exist_ok=True)
199
+ with open(output_code_path, 'w') as f:
147
200
  f.write(fixed_code)
148
201
 
149
202
  # Provide user feedback
@@ -286,6 +339,9 @@ def fix_main(
286
339
 
287
340
  return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
288
341
 
342
+ except click.Abort:
343
+ # User cancelled - re-raise to stop the sync loop
344
+ raise
289
345
  except Exception as e:
290
346
  if not ctx.obj.get('quiet', False):
291
347
  # Safely handle and print MarkupError
@@ -296,4 +352,5 @@ def fix_main(
296
352
  # Print other errors normally, escaping the error string
297
353
  from rich.markup import escape # Ensure escape is imported
298
354
  rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
299
- sys.exit(1)
355
+ # Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
356
+ return False, "", "", 0, 0.0, f"Error: {e}"