pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +73 -21
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +258 -82
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -63
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +330 -76
  43. pdd/fix_error_loop.py +207 -61
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +306 -272
  48. pdd/fix_verification_main.py +28 -9
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +9 -2
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/incremental_code_generator.py +2 -2
  56. pdd/insert_includes.py +11 -3
  57. pdd/llm_invoke.py +1269 -103
  58. pdd/load_prompt_template.py +36 -10
  59. pdd/pdd_completion.fish +25 -2
  60. pdd/pdd_completion.sh +30 -4
  61. pdd/pdd_completion.zsh +79 -4
  62. pdd/postprocess.py +10 -3
  63. pdd/preprocess.py +228 -15
  64. pdd/preprocess_main.py +8 -5
  65. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  66. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  67. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  68. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  69. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  70. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  71. pdd/prompts/auto_include_LLM.prompt +100 -905
  72. pdd/prompts/detect_change_LLM.prompt +122 -20
  73. pdd/prompts/example_generator_LLM.prompt +22 -1
  74. pdd/prompts/extract_code_LLM.prompt +5 -1
  75. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  76. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  77. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  78. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  79. pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
  80. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
  81. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  82. pdd/prompts/generate_test_LLM.prompt +21 -6
  83. pdd/prompts/increase_tests_LLM.prompt +1 -5
  84. pdd/prompts/insert_includes_LLM.prompt +228 -108
  85. pdd/prompts/trace_LLM.prompt +25 -22
  86. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  87. pdd/prompts/update_prompt_LLM.prompt +22 -1
  88. pdd/pytest_output.py +127 -12
  89. pdd/render_mermaid.py +236 -0
  90. pdd/setup_tool.py +648 -0
  91. pdd/simple_math.py +2 -0
  92. pdd/split_main.py +3 -2
  93. pdd/summarize_directory.py +49 -6
  94. pdd/sync_determine_operation.py +543 -98
  95. pdd/sync_main.py +81 -31
  96. pdd/sync_orchestration.py +1334 -751
  97. pdd/sync_tui.py +848 -0
  98. pdd/template_registry.py +264 -0
  99. pdd/templates/architecture/architecture_json.prompt +242 -0
  100. pdd/templates/generic/generate_prompt.prompt +174 -0
  101. pdd/trace.py +168 -12
  102. pdd/trace_main.py +4 -3
  103. pdd/track_cost.py +151 -61
  104. pdd/unfinished_prompt.py +49 -3
  105. pdd/update_main.py +549 -67
  106. pdd/update_model_costs.py +2 -2
  107. pdd/update_prompt.py +19 -4
  108. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
  109. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  110. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  111. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  112. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  113. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  114. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
@@ -69,7 +69,8 @@ class Fingerprint:
69
69
  prompt_hash: Optional[str]
70
70
  code_hash: Optional[str]
71
71
  example_hash: Optional[str]
72
- test_hash: Optional[str]
72
+ test_hash: Optional[str] # Keep for backward compat (primary test file)
73
+ test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
73
74
 
74
75
 
75
76
  @dataclass
@@ -80,6 +81,8 @@ class RunReport:
80
81
  tests_passed: int
81
82
  tests_failed: int
82
83
  coverage: float
84
+ test_hash: Optional[str] = None # Hash of test file when tests were run (for staleness detection)
85
+ test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
83
86
 
84
87
 
85
88
  @dataclass
@@ -209,51 +212,131 @@ def get_extension(language: str) -> str:
209
212
  return extensions.get(language.lower(), language.lower())
210
213
 
211
214
 
212
- def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
215
+ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts", context_override: Optional[str] = None) -> Dict[str, Path]:
213
216
  """Returns a dictionary mapping file types to their expected Path objects."""
217
+ import logging
218
+ logger = logging.getLogger(__name__)
219
+ logger.info(f"get_pdd_file_paths called: basename={basename}, language={language}, prompts_dir={prompts_dir}")
220
+
214
221
  try:
215
222
  # Use construct_paths to get configuration-aware paths
216
223
  prompt_filename = f"{basename}_{language}.prompt"
217
224
  prompt_path = str(Path(prompts_dir) / prompt_filename)
225
+ logger.info(f"Checking prompt_path={prompt_path}, exists={Path(prompt_path).exists()}")
218
226
 
219
- # Check if prompt file exists - if not, we can't proceed with construct_paths
227
+ # Check if prompt file exists - if not, we still need configuration-aware paths
220
228
  if not Path(prompt_path).exists():
221
- # Fall back to default path construction if prompt doesn't exist
229
+ # Use construct_paths with minimal inputs to get configuration-aware paths
230
+ # even when prompt doesn't exist
222
231
  extension = get_extension(language)
223
- return {
224
- 'prompt': Path(prompt_path),
225
- 'code': Path(f"{basename}.{extension}"),
226
- 'example': Path(f"{basename}_example.{extension}"),
227
- 'test': Path(f"test_{basename}.{extension}")
228
- }
232
+ try:
233
+ # Call construct_paths with empty input_file_paths to get configured output paths
234
+ resolved_config, _, output_paths, _ = construct_paths(
235
+ input_file_paths={}, # Empty dict since files don't exist yet
236
+ force=True,
237
+ quiet=True,
238
+ command="sync",
239
+ command_options={"basename": basename, "language": language},
240
+ context_override=context_override
241
+ )
242
+
243
+ import logging
244
+ logger = logging.getLogger(__name__)
245
+ logger.info(f"resolved_config: {resolved_config}")
246
+ logger.info(f"output_paths: {output_paths}")
247
+
248
+ # Extract directory configuration from resolved_config
249
+ # Note: construct_paths sets tests_dir, examples_dir, code_dir keys
250
+ test_dir = resolved_config.get('tests_dir', 'tests/')
251
+ example_dir = resolved_config.get('examples_dir', 'examples/')
252
+ code_dir = resolved_config.get('code_dir', './')
253
+
254
+ logger.info(f"Extracted dirs - test: {test_dir}, example: {example_dir}, code: {code_dir}")
255
+
256
+ # Ensure directories end with /
257
+ if test_dir and not test_dir.endswith('/'):
258
+ test_dir = test_dir + '/'
259
+ if example_dir and not example_dir.endswith('/'):
260
+ example_dir = example_dir + '/'
261
+ if code_dir and not code_dir.endswith('/'):
262
+ code_dir = code_dir + '/'
263
+
264
+ # Construct the full paths
265
+ test_path = f"{test_dir}test_{basename}.{extension}"
266
+ example_path = f"{example_dir}{basename}_example.{extension}"
267
+ code_path = f"{code_dir}{basename}.{extension}"
268
+
269
+ logger.debug(f"Final paths: test={test_path}, example={example_path}, code={code_path}")
270
+
271
+ # Convert to Path objects
272
+ test_path = Path(test_path)
273
+ example_path = Path(example_path)
274
+ code_path = Path(code_path)
275
+
276
+ # Bug #156: Find all matching test files
277
+ test_dir_path = test_path.parent
278
+ test_stem = f"test_{basename}"
279
+ if test_dir_path.exists():
280
+ matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
281
+ else:
282
+ matching_test_files = [test_path] if test_path.exists() else []
283
+
284
+ result = {
285
+ 'prompt': Path(prompt_path),
286
+ 'code': code_path,
287
+ 'example': example_path,
288
+ 'test': test_path,
289
+ 'test_files': matching_test_files or [test_path] # Bug #156
290
+ }
291
+ logger.debug(f"get_pdd_file_paths returning (prompt missing): test={test_path}")
292
+ return result
293
+ except Exception as e:
294
+ # If construct_paths fails, fall back to current directory paths
295
+ # This maintains backward compatibility
296
+ import logging
297
+ logger = logging.getLogger(__name__)
298
+ logger.debug(f"construct_paths failed for non-existent prompt, using defaults: {e}")
299
+ fallback_test_path = Path(f"test_{basename}.{extension}")
300
+ # Bug #156: Find matching test files even in fallback
301
+ if Path('.').exists():
302
+ fallback_matching = sorted(Path('.').glob(f"test_{basename}*.{extension}"))
303
+ else:
304
+ fallback_matching = [fallback_test_path] if fallback_test_path.exists() else []
305
+ return {
306
+ 'prompt': Path(prompt_path),
307
+ 'code': Path(f"{basename}.{extension}"),
308
+ 'example': Path(f"{basename}_example.{extension}"),
309
+ 'test': fallback_test_path,
310
+ 'test_files': fallback_matching or [fallback_test_path] # Bug #156
311
+ }
229
312
 
230
313
  input_file_paths = {
231
314
  "prompt_file": prompt_path
232
315
  }
233
316
 
234
- # Only call construct_paths if the prompt file exists
317
+ # Call construct_paths to get configuration-aware paths
235
318
  resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
236
319
  input_file_paths=input_file_paths,
237
320
  force=True, # Use force=True to avoid interactive prompts during sync
238
321
  quiet=True,
239
- command="generate",
240
- command_options={}
322
+ command="sync", # Use sync command to get more tolerant path handling
323
+ command_options={"basename": basename, "language": language},
324
+ context_override=context_override
241
325
  )
242
326
 
243
- # Extract paths from config as specified in the spec
244
- # The spec shows: return { 'prompt': Path(config['prompt_file']), ... }
245
- # But we need to map the output_file_paths keys to our expected structure
246
-
247
- # For generate command, construct_paths returns these in output_file_paths:
248
- # - 'output' or 'code_file' for the generated code
249
- # For other commands, we need to construct the full set of paths
250
-
251
- # Get the code file path from output_file_paths
252
- code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
327
+ # For sync command, output_file_paths contains the configured paths
328
+ # Extract the code path from output_file_paths
329
+ code_path = output_file_paths.get('generate_output_path', '')
330
+ if not code_path:
331
+ # Try other possible keys
332
+ code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
253
333
  if not code_path:
254
- # Fallback to constructing from basename
334
+ # Fallback to constructing from basename with configuration
255
335
  extension = get_extension(language)
256
- code_path = f"{basename}.{extension}"
336
+ code_dir = resolved_config.get('generate_output_path', './')
337
+ if code_dir and not code_dir.endswith('/'):
338
+ code_dir = code_dir + '/'
339
+ code_path = f"{code_dir}{basename}.{extension}"
257
340
 
258
341
  # Get configured paths for example and test files using construct_paths
259
342
  # Note: construct_paths requires files to exist, so we need to handle the case
@@ -269,18 +352,27 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
269
352
 
270
353
  try:
271
354
  # Get example path using example command
355
+ # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
272
356
  _, _, example_output_paths, _ = construct_paths(
273
357
  input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
274
- force=True, quiet=True, command="example", command_options={}
358
+ force=True, quiet=True, command="example", command_options={},
359
+ context_override=context_override,
360
+ path_resolution_mode="cwd"
275
361
  )
276
362
  example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
277
-
278
- # Get test path using test command
279
- _, _, test_output_paths, _ = construct_paths(
280
- input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
281
- force=True, quiet=True, command="test", command_options={}
282
- )
283
- test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
363
+
364
+ # Get test path using test command - handle case where test file doesn't exist yet
365
+ try:
366
+ _, _, test_output_paths, _ = construct_paths(
367
+ input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
368
+ force=True, quiet=True, command="test", command_options={},
369
+ context_override=context_override,
370
+ path_resolution_mode="cwd"
371
+ )
372
+ test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
373
+ except FileNotFoundError:
374
+ # Test file doesn't exist yet - create default path
375
+ test_path = Path(f"test_{basename}.{get_extension(language)}")
284
376
 
285
377
  finally:
286
378
  # Clean up temporary file if we created it
@@ -298,17 +390,26 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
298
390
  # Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
299
391
  try:
300
392
  # Get configured directories by using construct_paths with just the prompt file
393
+ # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
301
394
  _, _, example_output_paths, _ = construct_paths(
302
395
  input_file_paths={"prompt_file": prompt_path},
303
- force=True, quiet=True, command="example", command_options={}
396
+ force=True, quiet=True, command="example", command_options={},
397
+ context_override=context_override,
398
+ path_resolution_mode="cwd"
304
399
  )
305
400
  example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
306
-
307
- _, _, test_output_paths, _ = construct_paths(
308
- input_file_paths={"prompt_file": prompt_path},
309
- force=True, quiet=True, command="test", command_options={}
310
- )
311
- test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
401
+
402
+ try:
403
+ _, _, test_output_paths, _ = construct_paths(
404
+ input_file_paths={"prompt_file": prompt_path},
405
+ force=True, quiet=True, command="test", command_options={},
406
+ context_override=context_override,
407
+ path_resolution_mode="cwd"
408
+ )
409
+ test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
410
+ except Exception:
411
+ # If test path construction fails, use default naming
412
+ test_path = Path(f"test_{basename}.{get_extension(language)}")
312
413
 
313
414
  except Exception:
314
415
  # Final fallback to deriving from code path if all else fails
@@ -319,21 +420,47 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
319
420
  example_path = code_dir / f"{code_stem}_example{code_ext}"
320
421
  test_path = code_dir / f"test_{code_stem}{code_ext}"
321
422
 
423
+ # Ensure all paths are Path objects
424
+ if isinstance(code_path, str):
425
+ code_path = Path(code_path)
426
+
427
+ # Keep paths as they are (absolute or relative as returned by construct_paths)
428
+ # This ensures consistency with how construct_paths expects them
429
+
430
+ # Bug #156: Find all matching test files
431
+ test_dir = test_path.parent
432
+ test_stem = f"test_{basename}"
433
+ extension = get_extension(language)
434
+ if test_dir.exists():
435
+ matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
436
+ else:
437
+ matching_test_files = [test_path] if test_path.exists() else []
438
+
322
439
  return {
323
440
  'prompt': Path(prompt_path),
324
- 'code': Path(code_path),
441
+ 'code': code_path,
325
442
  'example': example_path,
326
- 'test': test_path
443
+ 'test': test_path,
444
+ 'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
327
445
  }
328
446
 
329
447
  except Exception as e:
330
448
  # Fallback to simple naming if construct_paths fails
331
449
  extension = get_extension(language)
450
+ test_path = Path(f"test_{basename}.{extension}")
451
+ # Bug #156: Try to find matching test files even in fallback
452
+ test_dir = Path('.')
453
+ test_stem = f"test_{basename}"
454
+ if test_dir.exists():
455
+ matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
456
+ else:
457
+ matching_test_files = [test_path] if test_path.exists() else []
332
458
  return {
333
459
  'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
334
460
  'code': Path(f"{basename}.{extension}"),
335
461
  'example': Path(f"{basename}_example.{extension}"),
336
- 'test': Path(f"test_{basename}.{extension}")
462
+ 'test': test_path,
463
+ 'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
337
464
  }
338
465
 
339
466
 
@@ -372,7 +499,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
372
499
  prompt_hash=data.get('prompt_hash'),
373
500
  code_hash=data.get('code_hash'),
374
501
  example_hash=data.get('example_hash'),
375
- test_hash=data.get('test_hash')
502
+ test_hash=data.get('test_hash'),
503
+ test_files=data.get('test_files') # Bug #156
376
504
  )
377
505
  except (json.JSONDecodeError, KeyError, IOError):
378
506
  return None
@@ -396,19 +524,29 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
396
524
  exit_code=data['exit_code'],
397
525
  tests_passed=data['tests_passed'],
398
526
  tests_failed=data['tests_failed'],
399
- coverage=data['coverage']
527
+ coverage=data['coverage'],
528
+ test_hash=data.get('test_hash'), # Optional for backward compatibility
529
+ test_files=data.get('test_files') # Bug #156
400
530
  )
401
531
  except (json.JSONDecodeError, KeyError, IOError):
402
532
  return None
403
533
 
404
534
 
405
- def calculate_current_hashes(paths: Dict[str, Path]) -> Dict[str, Optional[str]]:
535
+ def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
406
536
  """Computes the hashes for all current files on disk."""
407
537
  # Return hash keys that match what the fingerprint expects
408
- return {
409
- f"{file_type}_hash": calculate_sha256(file_path)
410
- for file_type, file_path in paths.items()
411
- }
538
+ hashes = {}
539
+ for file_type, file_path in paths.items():
540
+ if file_type == 'test_files':
541
+ # Bug #156: Calculate hashes for all test files
542
+ hashes['test_files'] = {
543
+ f.name: calculate_sha256(f)
544
+ for f in file_path
545
+ if isinstance(f, Path) and f.exists()
546
+ }
547
+ elif isinstance(file_path, Path):
548
+ hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
549
+ return hashes
412
550
 
413
551
 
414
552
  def get_git_diff(file_path: Path) -> str:
@@ -438,6 +576,7 @@ def estimate_operation_cost(operation: str, language: str = "python") -> float:
438
576
  'crash': 0.40,
439
577
  'verify': 0.35,
440
578
  'test': 0.60,
579
+ 'test_extend': 0.60, # Same cost as test - generates additional tests
441
580
  'fix': 0.45,
442
581
  'update': 0.25,
443
582
  'analyze_conflict': 0.20,
@@ -591,24 +730,103 @@ def _handle_missing_expected_files(
591
730
  )
592
731
 
593
732
 
594
- def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False) -> bool:
733
+ def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False,
734
+ basename: str = None, language: str = None) -> bool:
595
735
  """
596
736
  Check if workflow is complete considering skip flags.
597
-
737
+
598
738
  Args:
599
739
  paths: Dict mapping file types to their expected Path objects
600
740
  skip_tests: If True, test files are not required for completion
601
741
  skip_verify: If True, verification operations are not required
602
-
742
+ basename: Module basename (required for run_report check)
743
+ language: Module language (required for run_report check)
744
+
603
745
  Returns:
604
- True if all required files exist for the current workflow configuration
746
+ True if all required files exist AND have been validated (run_report exists)
605
747
  """
606
748
  required_files = ['code', 'example']
607
-
749
+
608
750
  if not skip_tests:
609
751
  required_files.append('test')
610
-
611
- return all(paths[f].exists() for f in required_files)
752
+
753
+ # Check all required files exist
754
+ if not all(paths[f].exists() for f in required_files):
755
+ return False
756
+
757
+ # Also check that run_report exists and code works (exit_code == 0)
758
+ # Without this, newly generated code would incorrectly be marked as "complete"
759
+ if basename and language:
760
+ run_report = read_run_report(basename, language)
761
+ if not run_report or run_report.exit_code != 0:
762
+ return False
763
+
764
+ # Check that run_report corresponds to current test files (staleness detection)
765
+ # If any test file changed since run_report was created, we can't trust the results
766
+ if not skip_tests:
767
+ # Bug #156: Check ALL test files, not just the primary one
768
+ if 'test_files' in paths and run_report.test_files:
769
+ # New multi-file comparison
770
+ current_test_hashes = {
771
+ f.name: calculate_sha256(f)
772
+ for f in paths['test_files']
773
+ if f.exists()
774
+ }
775
+ stored_test_hashes = run_report.test_files
776
+
777
+ # Check if any test file changed or new ones added/removed
778
+ if set(current_test_hashes.keys()) != set(stored_test_hashes.keys()):
779
+ return False # Test files added or removed
780
+
781
+ for fname, current_hash in current_test_hashes.items():
782
+ if stored_test_hashes.get(fname) != current_hash:
783
+ return False # Test file content changed
784
+ elif 'test' in paths and paths['test'].exists():
785
+ # Backward compat: single file check
786
+ current_test_hash = calculate_sha256(paths['test'])
787
+ if run_report.test_hash and current_test_hash != run_report.test_hash:
788
+ # run_report was created for a different version of the test file
789
+ return False
790
+ if not run_report.test_hash:
791
+ # Legacy run_report without test_hash - check fingerprint timestamp as fallback
792
+ fingerprint = read_fingerprint(basename, language)
793
+ if fingerprint:
794
+ # If fingerprint is newer than run_report, run_report might be stale
795
+ from datetime import datetime
796
+ try:
797
+ fp_time = datetime.fromisoformat(fingerprint.timestamp.replace('Z', '+00:00'))
798
+ rr_time = datetime.fromisoformat(run_report.timestamp.replace('Z', '+00:00'))
799
+ if fp_time > rr_time:
800
+ return False # run_report predates fingerprint, might be stale
801
+ except (ValueError, AttributeError):
802
+ pass # If timestamps can't be parsed, skip this check
803
+
804
+ # Check verify has been done (unless skip_verify)
805
+ # Without this, workflow would be "complete" after crash even though verify hasn't run
806
+ # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
807
+ if not skip_verify:
808
+ fingerprint = read_fingerprint(basename, language)
809
+ if fingerprint:
810
+ # If command starts with 'skip:', the operation was skipped, not completed
811
+ if fingerprint.command.startswith('skip:'):
812
+ return False
813
+ if fingerprint.command not in ['verify', 'test', 'fix', 'update']:
814
+ return False
815
+
816
+ # CRITICAL FIX: Check tests have been run (unless skip_tests)
817
+ # Without this, workflow would be "complete" after verify even though tests haven't run
818
+ # This prevents false positive success when skip_verify=True but tests are still required
819
+ # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
820
+ if not skip_tests:
821
+ fp = read_fingerprint(basename, language)
822
+ if fp:
823
+ # If command starts with 'skip:', the operation was skipped, not completed
824
+ if fp.command.startswith('skip:'):
825
+ return False
826
+ if fp.command not in ['test', 'fix', 'update']:
827
+ return False
828
+
829
+ return True
612
830
 
613
831
 
614
832
  def check_for_dependencies(prompt_content: str) -> bool:
@@ -693,7 +911,7 @@ def _check_example_success_history(basename: str, language: str) -> bool:
693
911
  return False
694
912
 
695
913
 
696
- def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
914
+ def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
697
915
  """
698
916
  Core decision-making function for sync operations with skip flag awareness.
699
917
 
@@ -713,14 +931,14 @@ def sync_determine_operation(basename: str, language: str, target_coverage: floa
713
931
 
714
932
  if log_mode:
715
933
  # Skip locking for read-only analysis
716
- return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
934
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
717
935
  else:
718
936
  # Normal exclusive locking for actual operations
719
937
  with SyncLock(basename, language) as lock:
720
- return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
938
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
721
939
 
722
940
 
723
- def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
941
+ def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
724
942
  """
725
943
  Perform the sync state analysis without locking concerns.
726
944
 
@@ -749,26 +967,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
749
967
 
750
968
  # Read fingerprint early since we need it for crash verification
751
969
  fingerprint = read_fingerprint(basename, language)
752
-
970
+
971
+ # Check if auto-deps just completed - ALWAYS regenerate code after auto-deps
972
+ # This must be checked early, before any run_report processing, because:
973
+ # 1. Old run_report (if exists) is stale and should be ignored
974
+ # 2. auto-deps updates dependencies but doesn't regenerate code
975
+ if fingerprint and fingerprint.command == 'auto-deps':
976
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
977
+ return SyncDecision(
978
+ operation='generate',
979
+ reason='Auto-deps completed - regenerate code with updated prompt',
980
+ confidence=0.90,
981
+ estimated_cost=estimate_operation_cost('generate'),
982
+ details={
983
+ 'decision_type': 'heuristic',
984
+ 'previous_command': 'auto-deps',
985
+ 'code_exists': paths['code'].exists() if paths.get('code') else False,
986
+ 'regenerate_after_autodeps': True
987
+ }
988
+ )
989
+
753
990
  run_report = read_run_report(basename, language)
754
- if run_report:
755
- # Check test failures first (higher priority than exit code)
756
- if run_report.tests_failed > 0:
757
- return SyncDecision(
758
- operation='fix',
759
- reason=f'Test failures detected: {run_report.tests_failed} failed tests',
760
- confidence=0.90,
761
- estimated_cost=estimate_operation_cost('fix'),
762
- details={
763
- 'decision_type': 'heuristic',
764
- 'tests_failed': run_report.tests_failed,
765
- 'exit_code': run_report.exit_code,
766
- 'coverage': run_report.coverage
767
- }
768
- )
769
-
770
- # Check if we just completed a crash operation and need verification
991
+ # Only process runtime signals (crash/fix/test) if we have a fingerprint
992
+ # Without a fingerprint, run_report is stale/orphaned and should be ignored
993
+ if run_report and fingerprint:
994
+ # Check for prompt changes FIRST - prompt changes take priority over runtime signals
995
+ # If the user modified the prompt, we need to regenerate regardless of runtime state
996
+ if fingerprint:
997
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
998
+ current_prompt_hash = calculate_sha256(paths['prompt'])
999
+ if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
1000
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
1001
+ has_deps = check_for_dependencies(prompt_content)
1002
+ return SyncDecision(
1003
+ operation='auto-deps' if has_deps else 'generate',
1004
+ reason='Prompt changed - regenerating (takes priority over runtime signals)',
1005
+ confidence=0.95,
1006
+ estimated_cost=estimate_operation_cost('generate'),
1007
+ details={
1008
+ 'decision_type': 'heuristic',
1009
+ 'prompt_changed': True,
1010
+ 'previous_command': fingerprint.command,
1011
+ 'runtime_state_ignored': True
1012
+ }
1013
+ )
1014
+
1015
+ # Check if we just completed a crash operation and need verification FIRST
1016
+ # This takes priority over test failures because we need to verify the crash fix worked
1017
+ # BUT only proceed to verify if exit_code == 0 (crash fix succeeded)
771
1018
  if fingerprint and fingerprint.command == 'crash' and not skip_verify:
1019
+ if run_report.exit_code != 0:
1020
+ # Crash fix didn't work - need to re-run crash
1021
+ return SyncDecision(
1022
+ operation='crash',
1023
+ reason=f'Previous crash operation failed (exit_code={run_report.exit_code}) - retry crash fix',
1024
+ confidence=0.90,
1025
+ estimated_cost=estimate_operation_cost('crash'),
1026
+ details={
1027
+ 'decision_type': 'heuristic',
1028
+ 'previous_command': 'crash',
1029
+ 'exit_code': run_report.exit_code,
1030
+ 'workflow_stage': 'crash_retry'
1031
+ }
1032
+ )
772
1033
  return SyncDecision(
773
1034
  operation='verify',
774
1035
  reason='Previous crash operation completed - verify example runs correctly',
@@ -782,6 +1043,41 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
782
1043
  }
783
1044
  )
784
1045
 
1046
+ # Check test failures (after crash verification check)
1047
+ if run_report.tests_failed > 0:
1048
+ # First check if the test file actually exists
1049
+ pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1050
+ test_file = pdd_files.get('test')
1051
+
1052
+ # Only suggest 'fix' if test file exists
1053
+ if test_file and test_file.exists():
1054
+ return SyncDecision(
1055
+ operation='fix',
1056
+ reason=f'Test failures detected: {run_report.tests_failed} failed tests',
1057
+ confidence=0.90,
1058
+ estimated_cost=estimate_operation_cost('fix'),
1059
+ details={
1060
+ 'decision_type': 'heuristic',
1061
+ 'tests_failed': run_report.tests_failed,
1062
+ 'exit_code': run_report.exit_code,
1063
+ 'coverage': run_report.coverage
1064
+ }
1065
+ )
1066
+ # If test file doesn't exist but we have test failures in run report,
1067
+ # we need to generate the test first
1068
+ else:
1069
+ return SyncDecision(
1070
+ operation='test',
1071
+ reason='Test failures reported but test file missing - need to generate tests',
1072
+ confidence=0.85,
1073
+ estimated_cost=estimate_operation_cost('test'),
1074
+ details={
1075
+ 'decision_type': 'heuristic',
1076
+ 'run_report_shows_failures': True,
1077
+ 'test_file_exists': False
1078
+ }
1079
+ )
1080
+
785
1081
  # Then check for runtime crashes (only if no test failures)
786
1082
  if run_report.exit_code != 0:
787
1083
  # Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
@@ -833,6 +1129,23 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
833
1129
  'skip_tests': True
834
1130
  }
835
1131
  )
1132
+ elif run_report.tests_failed == 0 and run_report.tests_passed > 0:
1133
+ # Tests pass but coverage is below target
1134
+ # Return 'test_extend' to signal we need to ADD more tests, not regenerate
1135
+ return SyncDecision(
1136
+ operation='test_extend',
1137
+ reason=f'Tests pass ({run_report.tests_passed} passed) but coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% - extending tests',
1138
+ confidence=0.85,
1139
+ estimated_cost=estimate_operation_cost('test'),
1140
+ details={
1141
+ 'decision_type': 'heuristic',
1142
+ 'current_coverage': run_report.coverage,
1143
+ 'target_coverage': target_coverage,
1144
+ 'tests_passed': run_report.tests_passed,
1145
+ 'tests_failed': run_report.tests_failed,
1146
+ 'extend_tests': True
1147
+ }
1148
+ )
836
1149
  else:
837
1150
  return SyncDecision(
838
1151
  operation='test',
@@ -849,7 +1162,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
849
1162
  )
850
1163
 
851
1164
  # 2. Analyze File State
852
- paths = get_pdd_file_paths(basename, language, prompts_dir)
1165
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
853
1166
  current_hashes = calculate_current_hashes(paths)
854
1167
 
855
1168
  # 3. Implement the Decision Tree
@@ -926,7 +1239,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
926
1239
 
927
1240
  if not changes:
928
1241
  # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
929
- if _is_workflow_complete(paths, skip_tests, skip_verify):
1242
+ if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
930
1243
  return SyncDecision(
931
1244
  operation='nothing',
932
1245
  reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
@@ -939,7 +1252,63 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
939
1252
  'workflow_complete': True
940
1253
  }
941
1254
  )
942
-
1255
+
1256
+ # Handle incomplete workflow when all files exist (including test)
1257
+ # This addresses the blind spot where crash/verify/test logic only runs when test is missing
1258
+ if (paths['code'].exists() and paths['example'].exists() and paths['test'].exists()):
1259
+ run_report = read_run_report(basename, language)
1260
+
1261
+ # BUG 4 & 1: No run_report OR crash detected (exit_code != 0)
1262
+ if not run_report or run_report.exit_code != 0:
1263
+ return SyncDecision(
1264
+ operation='crash',
1265
+ reason='All files exist but needs validation' +
1266
+ (' - no run_report' if not run_report else f' - exit_code={run_report.exit_code}'),
1267
+ confidence=0.85,
1268
+ estimated_cost=estimate_operation_cost('crash'),
1269
+ details={
1270
+ 'decision_type': 'heuristic',
1271
+ 'all_files_exist': True,
1272
+ 'run_report_missing': not run_report,
1273
+ 'exit_code': None if not run_report else run_report.exit_code,
1274
+ 'workflow_stage': 'post_regeneration_validation'
1275
+ }
1276
+ )
1277
+
1278
+ # BUG 2: Verify not run yet (run_report exists, exit_code=0, but command != verify/test)
1279
+ if fingerprint and fingerprint.command not in ['verify', 'test', 'fix', 'update'] and not skip_verify:
1280
+ return SyncDecision(
1281
+ operation='verify',
1282
+ reason='All files exist but verification not completed',
1283
+ confidence=0.85,
1284
+ estimated_cost=estimate_operation_cost('verify'),
1285
+ details={
1286
+ 'decision_type': 'heuristic',
1287
+ 'all_files_exist': True,
1288
+ 'last_command': fingerprint.command,
1289
+ 'workflow_stage': 'verification_pending'
1290
+ }
1291
+ )
1292
+
1293
+ # Stale run_report detected: _is_workflow_complete returned False but all other conditions passed
1294
+ # This happens when run_report.test_hash doesn't match current test file, or
1295
+ # when fingerprint timestamp > run_report timestamp (legacy detection)
1296
+ # Need to re-run tests to get accurate results
1297
+ if run_report and run_report.exit_code == 0:
1298
+ return SyncDecision(
1299
+ operation='test',
1300
+ reason='Run report is stale - need to re-run tests to verify current state',
1301
+ confidence=0.9,
1302
+ estimated_cost=estimate_operation_cost('test'),
1303
+ details={
1304
+ 'decision_type': 'heuristic',
1305
+ 'all_files_exist': True,
1306
+ 'run_report_stale': True,
1307
+ 'run_report_test_hash': run_report.test_hash,
1308
+ 'workflow_stage': 'revalidation'
1309
+ }
1310
+ )
1311
+
943
1312
  # Progress workflow considering skip flags
944
1313
  if paths['code'].exists() and not paths['example'].exists():
945
1314
  return SyncDecision(
@@ -960,8 +1329,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
960
1329
 
961
1330
  # Check if example has been crash-tested and verified before allowing test generation
962
1331
  run_report = read_run_report(basename, language)
963
- if not run_report:
1332
+ if not run_report and not skip_verify:
964
1333
  # No run report exists - need to test the example first
1334
+ # But if skip_verify is True, skip crash/verify and go to test generation
965
1335
  return SyncDecision(
966
1336
  operation='crash',
967
1337
  reason='Example exists but needs runtime testing before test generation',
@@ -975,8 +1345,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
975
1345
  'workflow_stage': 'crash_validation'
976
1346
  }
977
1347
  )
978
- elif run_report.exit_code != 0:
1348
+ elif run_report and run_report.exit_code != 0 and not skip_verify:
979
1349
  # Example crashed - fix it before proceeding
1350
+ # But if skip_verify is True, skip crash fix and proceed
980
1351
  return SyncDecision(
981
1352
  operation='crash',
982
1353
  reason='Example crashes - fix runtime errors before test generation',
@@ -1023,6 +1394,22 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
1023
1394
  # Some files are missing but no changes detected
1024
1395
  if not paths['code'].exists():
1025
1396
  if paths['prompt'].exists():
1397
+ # CRITICAL FIX: Check if auto-deps was just completed to prevent infinite loop
1398
+ if fingerprint and fingerprint.command == 'auto-deps':
1399
+ return SyncDecision(
1400
+ operation='generate',
1401
+ reason='Auto-deps completed, now generate missing code file',
1402
+ confidence=0.90,
1403
+ estimated_cost=estimate_operation_cost('generate'),
1404
+ details={
1405
+ 'decision_type': 'heuristic',
1406
+ 'prompt_path': str(paths['prompt']),
1407
+ 'code_exists': False,
1408
+ 'auto_deps_completed': True,
1409
+ 'previous_command': fingerprint.command
1410
+ }
1411
+ )
1412
+
1026
1413
  prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
1027
1414
  if check_for_dependencies(prompt_content):
1028
1415
  return SyncDecision(
@@ -1124,18 +1511,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
1124
1511
  )
1125
1512
 
1126
1513
  else:
1127
- # Complex Changes (Multiple Files Modified / Conflicts)
1128
- return SyncDecision(
1129
- operation='analyze_conflict',
1130
- reason='Multiple files changed - requires conflict analysis',
1131
- confidence=0.70,
1132
- estimated_cost=estimate_operation_cost('analyze_conflict'),
1133
- details={
1134
- 'decision_type': 'heuristic',
1135
- 'changed_files': changes,
1136
- 'num_changes': len(changes)
1137
- }
1138
- )
1514
+ # Complex Changes (Multiple Files Modified)
1515
+ # CRITICAL: Only treat as conflict if prompt changed along with derived artifacts
1516
+ # If only derived artifacts changed (code, example, test), this is NOT a conflict
1517
+ # per PDD doctrine - all are derived from the unchanged prompt
1518
+
1519
+ if 'prompt' in changes:
1520
+ # True conflict: prompt (source of truth) changed along with derived artifacts
1521
+ return SyncDecision(
1522
+ operation='analyze_conflict',
1523
+ reason='Prompt and derived files changed - requires conflict analysis',
1524
+ confidence=0.70,
1525
+ estimated_cost=estimate_operation_cost('analyze_conflict'),
1526
+ details={
1527
+ 'decision_type': 'heuristic',
1528
+ 'changed_files': changes,
1529
+ 'num_changes': len(changes),
1530
+ 'prompt_changed': True
1531
+ }
1532
+ )
1533
+ else:
1534
+ # Only derived artifacts changed - prompt (source of truth) is unchanged
1535
+ # Continue workflow from where it was interrupted
1536
+
1537
+ # If code changed, need to re-verify
1538
+ if 'code' in changes:
1539
+ return SyncDecision(
1540
+ operation='verify',
1541
+ reason='Derived files changed (prompt unchanged) - verify code works',
1542
+ confidence=0.85,
1543
+ estimated_cost=estimate_operation_cost('verify'),
1544
+ details={
1545
+ 'decision_type': 'heuristic',
1546
+ 'changed_files': changes,
1547
+ 'num_changes': len(changes),
1548
+ 'prompt_changed': False,
1549
+ 'workflow_stage': 'continue_after_interruption'
1550
+ }
1551
+ )
1552
+ # If only example/test changed
1553
+ elif 'example' in changes:
1554
+ return SyncDecision(
1555
+ operation='verify',
1556
+ reason='Example changed (prompt unchanged) - verify example runs',
1557
+ confidence=0.85,
1558
+ estimated_cost=estimate_operation_cost('verify'),
1559
+ details={
1560
+ 'decision_type': 'heuristic',
1561
+ 'changed_files': changes,
1562
+ 'prompt_changed': False
1563
+ }
1564
+ )
1565
+ elif 'test' in changes:
1566
+ return SyncDecision(
1567
+ operation='test',
1568
+ reason='Test changed (prompt unchanged) - run tests',
1569
+ confidence=0.85,
1570
+ estimated_cost=estimate_operation_cost('test'),
1571
+ details={
1572
+ 'decision_type': 'heuristic',
1573
+ 'changed_files': changes,
1574
+ 'prompt_changed': False
1575
+ }
1576
+ )
1139
1577
 
1140
1578
  # Fallback - should not reach here normally
1141
1579
  return SyncDecision(
@@ -1152,7 +1590,14 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
1152
1590
  )
1153
1591
 
1154
1592
 
1155
- def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerprint, changed_files: List[str], prompts_dir: str = "prompts") -> SyncDecision:
1593
+ def analyze_conflict_with_llm(
1594
+ basename: str,
1595
+ language: str,
1596
+ fingerprint: Fingerprint,
1597
+ changed_files: List[str],
1598
+ prompts_dir: str = "prompts",
1599
+ context_override: Optional[str] = None,
1600
+ ) -> SyncDecision:
1156
1601
  """
1157
1602
  Resolve complex sync conflicts using an LLM.
1158
1603
 
@@ -1185,7 +1630,7 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
1185
1630
  )
1186
1631
 
1187
1632
  # 2. Gather file paths and diffs
1188
- paths = get_pdd_file_paths(basename, language, prompts_dir)
1633
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1189
1634
 
1190
1635
  # Generate diffs for changed files
1191
1636
  diffs = {}