pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ import sys
11
11
  import json
12
12
  import hashlib
13
13
  import subprocess
14
+ import fnmatch
14
15
  from pathlib import Path
15
16
  from dataclasses import dataclass, field
16
17
  from typing import Dict, List, Optional, Any
@@ -31,10 +32,17 @@ except ImportError:
31
32
  HAS_MSVCRT = False
32
33
 
33
34
  # Import PDD internal modules
34
- from pdd.construct_paths import construct_paths
35
+ from pdd.construct_paths import (
36
+ _detect_context,
37
+ _find_pddrc_file,
38
+ _get_relative_basename,
39
+ _load_pddrc_config,
40
+ construct_paths,
41
+ )
35
42
  from pdd.load_prompt_template import load_prompt_template
36
43
  from pdd.llm_invoke import llm_invoke
37
44
  from pdd.get_language import get_language
45
+ from pdd.template_expander import expand_template
38
46
 
39
47
  # Constants - Use functions for dynamic path resolution
40
48
  def get_pdd_dir():
@@ -55,11 +63,41 @@ META_DIR = get_meta_dir()
55
63
  LOCKS_DIR = get_locks_dir()
56
64
 
57
65
  # Export constants for other modules
58
- __all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
66
+ __all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
59
67
  'sync_determine_operation', 'analyze_conflict_with_llm', 'read_run_report', 'get_pdd_file_paths',
60
68
  '_check_example_success_history']
61
69
 
62
70
 
71
+ def _safe_basename(basename: str) -> str:
72
+ """Sanitize basename for use in metadata filenames.
73
+
74
+ Replaces '/' with '_' to prevent path interpretation when the basename
75
+ contains subdirectory components (e.g., 'core/cloud' -> 'core_cloud').
76
+ """
77
+ return basename.replace('/', '_')
78
+
79
+
80
+ def _extract_name_part(basename: str) -> tuple:
81
+ """Extract directory and name parts from a subdirectory basename.
82
+
83
+ For subdirectory basenames like 'core/cloud', separates the directory
84
+ prefix from the actual name so that filename patterns can be applied
85
+ correctly.
86
+
87
+ Args:
88
+ basename: The full basename, possibly containing subdirectory path.
89
+
90
+ Returns:
91
+ Tuple of (dir_prefix, name_part):
92
+ - 'core/cloud' -> ('core/', 'cloud')
93
+ - 'calculator' -> ('', 'calculator')
94
+ """
95
+ if '/' in basename:
96
+ dir_part, name_part = basename.rsplit('/', 1)
97
+ return dir_part + '/', name_part
98
+ return '', basename
99
+
100
+
63
101
  @dataclass
64
102
  class Fingerprint:
65
103
  """Represents the last known good state of a PDD unit."""
@@ -69,7 +107,8 @@ class Fingerprint:
69
107
  prompt_hash: Optional[str]
70
108
  code_hash: Optional[str]
71
109
  example_hash: Optional[str]
72
- test_hash: Optional[str]
110
+ test_hash: Optional[str] # Keep for backward compat (primary test file)
111
+ test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
73
112
 
74
113
 
75
114
  @dataclass
@@ -80,6 +119,8 @@ class RunReport:
80
119
  tests_passed: int
81
120
  tests_failed: int
82
121
  coverage: float
122
+ test_hash: Optional[str] = None # Hash of test file when tests were run (for staleness detection)
123
+ test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
83
124
 
84
125
 
85
126
  @dataclass
@@ -99,7 +140,7 @@ class SyncLock:
99
140
  def __init__(self, basename: str, language: str):
100
141
  self.basename = basename
101
142
  self.language = language
102
- self.lock_file = get_locks_dir() / f"{basename}_{language}.lock"
143
+ self.lock_file = get_locks_dir() / f"{_safe_basename(basename)}_{language}.lock"
103
144
  self.fd = None
104
145
  self.current_pid = os.getpid()
105
146
 
@@ -209,51 +250,363 @@ def get_extension(language: str) -> str:
209
250
  return extensions.get(language.lower(), language.lower())
210
251
 
211
252
 
212
- def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
253
+ def _resolve_prompts_root(prompts_dir: str) -> Path:
254
+ """Resolve prompts root relative to the .pddrc location when available."""
255
+ prompts_root = Path(prompts_dir)
256
+ pddrc_path = _find_pddrc_file()
257
+ if pddrc_path and not prompts_root.is_absolute():
258
+ prompts_root = pddrc_path.parent / prompts_root
259
+
260
+ parts = prompts_root.parts
261
+ if "prompts" in parts:
262
+ prompt_index = parts.index("prompts")
263
+ prompts_root = Path(*parts[: prompt_index + 1])
264
+
265
+ return prompts_root
266
+
267
+
268
+ def _relative_basename_for_context(basename: str, context_name: Optional[str]) -> str:
269
+ """Strip context-specific prefixes from basename when possible."""
270
+ if not context_name:
271
+ return basename
272
+
273
+ pddrc_path = _find_pddrc_file()
274
+ if not pddrc_path:
275
+ return basename
276
+
277
+ try:
278
+ config = _load_pddrc_config(pddrc_path)
279
+ except ValueError:
280
+ return basename
281
+
282
+ contexts = config.get("contexts", {})
283
+ context_config = contexts.get(context_name, {})
284
+ defaults = context_config.get("defaults", {})
285
+
286
+ matches = []
287
+
288
+ prompts_dir = defaults.get("prompts_dir", "")
289
+ if prompts_dir:
290
+ normalized = prompts_dir.rstrip("/")
291
+ prefix = normalized
292
+ if normalized == "prompts":
293
+ prefix = ""
294
+ elif normalized.startswith("prompts/"):
295
+ prefix = normalized[len("prompts/"):]
296
+
297
+ if prefix and (basename == prefix or basename.startswith(prefix + "/")):
298
+ relative = basename[len(prefix) + 1 :] if basename != prefix else basename.split("/")[-1]
299
+ matches.append((len(prefix), relative))
300
+
301
+ for pattern in context_config.get("paths", []):
302
+ pattern_base = pattern.rstrip("/**").rstrip("/*")
303
+ if fnmatch.fnmatch(basename, pattern) or \
304
+ basename.startswith(pattern_base + "/") or \
305
+ basename == pattern_base:
306
+ relative = _get_relative_basename(basename, pattern)
307
+ matches.append((len(pattern_base), relative))
308
+
309
+ if not matches:
310
+ return basename
311
+
312
+ matches.sort(key=lambda item: item[0], reverse=True)
313
+ return matches[0][1]
314
+
315
+
316
+ def _generate_paths_from_templates(
317
+ basename: str,
318
+ language: str,
319
+ extension: str,
320
+ outputs_config: Dict[str, Any],
321
+ prompt_path: str
322
+ ) -> Dict[str, Path]:
323
+ """
324
+ Generate file paths from template configuration.
325
+
326
+ This function is used by Issue #237 to support extensible output path patterns
327
+ for different project layouts (Next.js, Vue, Python backend, etc.).
328
+
329
+ Args:
330
+ basename: The relative basename (e.g., 'marketplace/AssetCard' or 'credit_helpers')
331
+ language: The full language name (e.g., 'python', 'typescript')
332
+ extension: The file extension (e.g., 'py', 'tsx')
333
+ outputs_config: The 'outputs' section from .pddrc context config
334
+ prompt_path: The prompt file path to use as fallback
335
+
336
+ Returns:
337
+ Dictionary mapping file types ('prompt', 'code', 'test', etc.) to Path objects
338
+ """
339
+ import logging
340
+ logger = logging.getLogger(__name__)
341
+
342
+ # Extract name parts for template context
343
+ parts = basename.split('/')
344
+ name = parts[-1] if parts else basename
345
+ category = '/'.join(parts[:-1]) if len(parts) > 1 else ''
346
+
347
+ # Build dir_prefix (for legacy template compatibility)
348
+ dir_prefix = '/'.join(parts[:-1]) + '/' if len(parts) > 1 else ''
349
+
350
+ # Build template context
351
+ template_context = {
352
+ 'name': name,
353
+ 'category': category,
354
+ 'dir_prefix': dir_prefix,
355
+ 'ext': extension,
356
+ 'language': language,
357
+ }
358
+
359
+ logger.debug(f"Template context: {template_context}")
360
+
361
+ result = {}
362
+
363
+ # Expand templates for each output type
364
+ for output_type, config in outputs_config.items():
365
+ if isinstance(config, dict) and 'path' in config:
366
+ template = config['path']
367
+ expanded = expand_template(template, template_context)
368
+ result[output_type] = Path(expanded)
369
+ logger.debug(f"Template {output_type}: {template} -> {expanded}")
370
+
371
+ # Ensure prompt is always present (fallback to provided prompt_path)
372
+ if 'prompt' not in result:
373
+ result['prompt'] = Path(prompt_path)
374
+
375
+ # Handle test_files for Bug #156 compatibility
376
+ if 'test' in result:
377
+ test_path = result['test']
378
+ test_dir_path = test_path.parent
379
+ test_stem = f"test_{name}"
380
+ if test_dir_path.exists():
381
+ matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
382
+ else:
383
+ matching_test_files = [test_path] if test_path.exists() else []
384
+ result['test_files'] = matching_test_files or [test_path]
385
+
386
+ return result
387
+
388
+
389
+ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts", context_override: Optional[str] = None) -> Dict[str, Path]:
213
390
  """Returns a dictionary mapping file types to their expected Path objects."""
391
+ import logging
392
+ logger = logging.getLogger(__name__)
393
+ logger.info(f"get_pdd_file_paths called: basename={basename}, language={language}, prompts_dir={prompts_dir}")
394
+
214
395
  try:
215
396
  # Use construct_paths to get configuration-aware paths
397
+ prompts_root = _resolve_prompts_root(prompts_dir)
216
398
  prompt_filename = f"{basename}_{language}.prompt"
217
- prompt_path = str(Path(prompts_dir) / prompt_filename)
399
+ prompt_path = str(prompts_root / prompt_filename)
400
+ pddrc_path = _find_pddrc_file()
401
+ if pddrc_path:
402
+ try:
403
+ config = _load_pddrc_config(pddrc_path)
404
+ context_name = context_override or _detect_context(Path.cwd(), config, None)
405
+ context_config = config.get('contexts', {}).get(context_name or '', {})
406
+ prompts_dir_config = context_config.get('defaults', {}).get('prompts_dir', '')
407
+ if prompts_dir_config:
408
+ normalized = prompts_dir_config.rstrip('/')
409
+ prefix = normalized
410
+ if normalized == 'prompts':
411
+ prefix = ''
412
+ elif normalized.startswith('prompts/'):
413
+ prefix = normalized[len('prompts/'):]
414
+ if prefix and not (basename == prefix or basename.startswith(prefix + '/')):
415
+ prompt_path = str(prompts_root / prefix / prompt_filename)
416
+ except ValueError:
417
+ pass
418
+ logger.info(f"Checking prompt_path={prompt_path}, exists={Path(prompt_path).exists()}")
218
419
 
219
- # Check if prompt file exists - if not, we can't proceed with construct_paths
420
+ # Check if prompt file exists - if not, we still need configuration-aware paths
220
421
  if not Path(prompt_path).exists():
221
- # Fall back to default path construction if prompt doesn't exist
422
+ # Use construct_paths with minimal inputs to get configuration-aware paths
423
+ # even when prompt doesn't exist
222
424
  extension = get_extension(language)
223
- return {
224
- 'prompt': Path(prompt_path),
225
- 'code': Path(f"{basename}.{extension}"),
226
- 'example': Path(f"{basename}_example.{extension}"),
227
- 'test': Path(f"test_{basename}.{extension}")
228
- }
425
+ try:
426
+ # Call construct_paths with empty input_file_paths to get configured output paths
427
+ resolved_config, _, output_paths, _ = construct_paths(
428
+ input_file_paths={}, # Empty dict since files don't exist yet
429
+ force=True,
430
+ quiet=True,
431
+ command="sync",
432
+ command_options={"basename": basename, "language": language},
433
+ context_override=context_override,
434
+ path_resolution_mode="cwd"
435
+ )
436
+
437
+ import logging
438
+ logger = logging.getLogger(__name__)
439
+ logger.info(f"resolved_config: {resolved_config}")
440
+ logger.info(f"output_paths: {output_paths}")
441
+
442
+ # Issue #237: Check for 'outputs' config for template-based path generation
443
+ outputs_config = resolved_config.get('outputs')
444
+ if outputs_config:
445
+ logger.info(f"Using template-based paths from outputs config")
446
+ context_name = context_override or resolved_config.get('_matched_context')
447
+ basename_for_templates = _relative_basename_for_context(basename, context_name)
448
+ result = _generate_paths_from_templates(
449
+ basename=basename_for_templates,
450
+ language=language,
451
+ extension=extension,
452
+ outputs_config=outputs_config,
453
+ prompt_path=prompt_path
454
+ )
455
+ logger.debug(f"get_pdd_file_paths returning (template-based): {result}")
456
+ return result
457
+
458
+ # Legacy path construction (backwards compatibility)
459
+ # Extract directory configuration from resolved_config
460
+ # Note: construct_paths sets tests_dir, examples_dir, code_dir keys
461
+ test_dir = resolved_config.get('tests_dir', 'tests/')
462
+ example_dir = resolved_config.get('examples_dir', 'examples/')
463
+ code_dir = resolved_config.get('code_dir', './')
464
+
465
+ logger.info(f"Extracted dirs - test: {test_dir}, example: {example_dir}, code: {code_dir}")
466
+
467
+ # Ensure directories end with /
468
+ if test_dir and not test_dir.endswith('/'):
469
+ test_dir = test_dir + '/'
470
+ if example_dir and not example_dir.endswith('/'):
471
+ example_dir = example_dir + '/'
472
+ if code_dir and not code_dir.endswith('/'):
473
+ code_dir = code_dir + '/'
474
+
475
+ # Extract directory and name parts for subdirectory basename support
476
+ dir_prefix, name_part = _extract_name_part(basename)
477
+
478
+ # Get explicit config paths (these are the SOURCE OF TRUTH when configured)
479
+ # These should be used directly, NOT combined with dir_prefix
480
+ generate_output_path = resolved_config.get('generate_output_path', '')
481
+ example_output_path = resolved_config.get('example_output_path', '')
482
+ test_output_path = resolved_config.get('test_output_path', '')
483
+
484
+ # Construct paths: use explicit config paths directly when configured,
485
+ # otherwise fall back to old behavior with dir_prefix for backwards compat
486
+
487
+ # Code path
488
+ if generate_output_path and generate_output_path.endswith('/'):
489
+ # Explicit complete directory - use directly with just filename
490
+ code_path = f"{generate_output_path}{name_part}.{extension}"
491
+ else:
492
+ # Old behavior - use code_dir + dir_prefix
493
+ code_path = f"{code_dir}{dir_prefix}{name_part}.{extension}"
494
+
495
+ # Example path
496
+ if example_output_path and example_output_path.endswith('/'):
497
+ # Explicit complete directory - use directly with just filename
498
+ example_path = f"{example_output_path}{name_part}_example.{extension}"
499
+ else:
500
+ # Old behavior - use example_dir + dir_prefix
501
+ example_path = f"{example_dir}{dir_prefix}{name_part}_example.{extension}"
502
+
503
+ # Test path
504
+ if test_output_path and test_output_path.endswith('/'):
505
+ # Explicit complete directory - use directly with just filename
506
+ test_path = f"{test_output_path}test_{name_part}.{extension}"
507
+ else:
508
+ # Old behavior - use test_dir + dir_prefix
509
+ test_path = f"{test_dir}{dir_prefix}test_{name_part}.{extension}"
510
+
511
+ logger.debug(f"Final paths: test={test_path}, example={example_path}, code={code_path}")
512
+
513
+ # Convert to Path objects
514
+ test_path = Path(test_path)
515
+ example_path = Path(example_path)
516
+ code_path = Path(code_path)
517
+
518
+ # Bug #156: Find all matching test files
519
+ test_dir_path = test_path.parent
520
+ test_stem = f"test_{name_part}"
521
+ if test_dir_path.exists():
522
+ matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
523
+ else:
524
+ matching_test_files = [test_path] if test_path.exists() else []
525
+
526
+ result = {
527
+ 'prompt': Path(prompt_path),
528
+ 'code': code_path,
529
+ 'example': example_path,
530
+ 'test': test_path,
531
+ 'test_files': matching_test_files or [test_path] # Bug #156
532
+ }
533
+ logger.debug(f"get_pdd_file_paths returning (prompt missing): test={test_path}")
534
+ return result
535
+ except Exception as e:
536
+ # If construct_paths fails, fall back to current directory paths
537
+ # This maintains backward compatibility
538
+ import logging
539
+ logger = logging.getLogger(__name__)
540
+ logger.debug(f"construct_paths failed for non-existent prompt, using defaults: {e}")
541
+ dir_prefix, name_part = _extract_name_part(basename)
542
+ fallback_test_path = Path(f"{dir_prefix}test_{name_part}.{extension}")
543
+ # Bug #156: Find matching test files even in fallback
544
+ if Path('.').exists():
545
+ fallback_matching = sorted(Path('.').glob(f"{dir_prefix}test_{name_part}*.{extension}"))
546
+ else:
547
+ fallback_matching = [fallback_test_path] if fallback_test_path.exists() else []
548
+ return {
549
+ 'prompt': Path(prompt_path),
550
+ 'code': Path(f"{dir_prefix}{name_part}.{extension}"),
551
+ 'example': Path(f"{dir_prefix}{name_part}_example.{extension}"),
552
+ 'test': fallback_test_path,
553
+ 'test_files': fallback_matching or [fallback_test_path] # Bug #156
554
+ }
229
555
 
230
556
  input_file_paths = {
231
557
  "prompt_file": prompt_path
232
558
  }
233
559
 
234
- # Only call construct_paths if the prompt file exists
560
+ # Call construct_paths to get configuration-aware paths
235
561
  resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
236
562
  input_file_paths=input_file_paths,
237
563
  force=True, # Use force=True to avoid interactive prompts during sync
238
564
  quiet=True,
239
- command="generate",
240
- command_options={}
565
+ command="sync", # Use sync command to get more tolerant path handling
566
+ command_options={"basename": basename, "language": language},
567
+ context_override=context_override,
568
+ path_resolution_mode="cwd"
241
569
  )
242
-
243
- # Extract paths from config as specified in the spec
244
- # The spec shows: return { 'prompt': Path(config['prompt_file']), ... }
245
- # But we need to map the output_file_paths keys to our expected structure
246
-
247
- # For generate command, construct_paths returns these in output_file_paths:
248
- # - 'output' or 'code_file' for the generated code
249
- # For other commands, we need to construct the full set of paths
250
-
251
- # Get the code file path from output_file_paths
252
- code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
570
+
571
+ # Issue #237: Check for 'outputs' config for template-based path generation
572
+ # This must be checked even when prompt EXISTS (not just when it doesn't exist)
573
+ outputs_config = resolved_config.get('outputs')
574
+ if outputs_config:
575
+ extension = get_extension(language)
576
+ logger.info(f"Using template-based paths from outputs config (prompt exists)")
577
+ context_name = context_override or resolved_config.get('_matched_context')
578
+ basename_for_templates = _relative_basename_for_context(basename, context_name)
579
+ result = _generate_paths_from_templates(
580
+ basename=basename_for_templates,
581
+ language=language,
582
+ extension=extension,
583
+ outputs_config=outputs_config,
584
+ prompt_path=prompt_path
585
+ )
586
+ logger.debug(f"get_pdd_file_paths returning (template-based, prompt exists): {result}")
587
+ return result
588
+
589
+ # For sync command, output_file_paths contains the configured paths
590
+ # Extract the code path from output_file_paths
591
+ code_path = output_file_paths.get('generate_output_path', '')
592
+ if not code_path:
593
+ # Try other possible keys
594
+ code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
253
595
  if not code_path:
254
- # Fallback to constructing from basename
596
+ # Fallback to constructing from basename with configuration
255
597
  extension = get_extension(language)
256
- code_path = f"{basename}.{extension}"
598
+ generate_output_path = resolved_config.get('generate_output_path', '')
599
+ dir_prefix, name_part = _extract_name_part(basename)
600
+
601
+ # Use explicit config path directly when configured (ending with /)
602
+ if generate_output_path and generate_output_path.endswith('/'):
603
+ code_path = f"{generate_output_path}{name_part}.{extension}"
604
+ else:
605
+ # Old behavior - use path + dir_prefix
606
+ code_dir = generate_output_path or './'
607
+ if not code_dir.endswith('/'):
608
+ code_dir = code_dir + '/'
609
+ code_path = f"{code_dir}{dir_prefix}{name_part}.{extension}"
257
610
 
258
611
  # Get configured paths for example and test files using construct_paths
259
612
  # Note: construct_paths requires files to exist, so we need to handle the case
@@ -269,18 +622,32 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
269
622
 
270
623
  try:
271
624
  # Get example path using example command
625
+ # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
626
+ # Pass basename in command_options to preserve subdirectory structure
272
627
  _, _, example_output_paths, _ = construct_paths(
273
628
  input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
274
- force=True, quiet=True, command="example", command_options={}
275
- )
276
- example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
277
-
278
- # Get test path using test command
279
- _, _, test_output_paths, _ = construct_paths(
280
- input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
281
- force=True, quiet=True, command="test", command_options={}
629
+ force=True, quiet=True, command="example",
630
+ command_options={"basename": basename},
631
+ context_override=context_override,
632
+ path_resolution_mode="cwd"
282
633
  )
283
- test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
634
+ dir_prefix, name_part = _extract_name_part(basename)
635
+ example_path = Path(example_output_paths.get('output', f"{dir_prefix}{name_part}_example.{get_extension(language)}"))
636
+
637
+ # Get test path using test command - handle case where test file doesn't exist yet
638
+ # Pass basename in command_options to preserve subdirectory structure
639
+ try:
640
+ _, _, test_output_paths, _ = construct_paths(
641
+ input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
642
+ force=True, quiet=True, command="test",
643
+ command_options={"basename": basename},
644
+ context_override=context_override,
645
+ path_resolution_mode="cwd"
646
+ )
647
+ test_path = Path(test_output_paths.get('output', f"{dir_prefix}test_{name_part}.{get_extension(language)}"))
648
+ except FileNotFoundError:
649
+ # Test file doesn't exist yet - create default path
650
+ test_path = Path(f"{dir_prefix}test_{name_part}.{get_extension(language)}")
284
651
 
285
652
  finally:
286
653
  # Clean up temporary file if we created it
@@ -298,17 +665,30 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
298
665
  # Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
299
666
  try:
300
667
  # Get configured directories by using construct_paths with just the prompt file
668
+ # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
669
+ # Pass basename in command_options to preserve subdirectory structure
301
670
  _, _, example_output_paths, _ = construct_paths(
302
671
  input_file_paths={"prompt_file": prompt_path},
303
- force=True, quiet=True, command="example", command_options={}
672
+ force=True, quiet=True, command="example",
673
+ command_options={"basename": basename},
674
+ context_override=context_override,
675
+ path_resolution_mode="cwd"
304
676
  )
305
- example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
306
-
307
- _, _, test_output_paths, _ = construct_paths(
308
- input_file_paths={"prompt_file": prompt_path},
309
- force=True, quiet=True, command="test", command_options={}
310
- )
311
- test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
677
+ dir_prefix, name_part = _extract_name_part(basename)
678
+ example_path = Path(example_output_paths.get('output', f"{dir_prefix}{name_part}_example.{get_extension(language)}"))
679
+
680
+ try:
681
+ _, _, test_output_paths, _ = construct_paths(
682
+ input_file_paths={"prompt_file": prompt_path},
683
+ force=True, quiet=True, command="test",
684
+ command_options={"basename": basename},
685
+ context_override=context_override,
686
+ path_resolution_mode="cwd"
687
+ )
688
+ test_path = Path(test_output_paths.get('output', f"{dir_prefix}test_{name_part}.{get_extension(language)}"))
689
+ except Exception:
690
+ # If test path construction fails, use default naming
691
+ test_path = Path(f"{dir_prefix}test_{name_part}.{get_extension(language)}")
312
692
 
313
693
  except Exception:
314
694
  # Final fallback to deriving from code path if all else fails
@@ -319,21 +699,50 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
319
699
  example_path = code_dir / f"{code_stem}_example{code_ext}"
320
700
  test_path = code_dir / f"test_{code_stem}{code_ext}"
321
701
 
702
+ # Ensure all paths are Path objects
703
+ if isinstance(code_path, str):
704
+ code_path = Path(code_path)
705
+
706
+ # Keep paths as they are (absolute or relative as returned by construct_paths)
707
+ # This ensures consistency with how construct_paths expects them
708
+
709
+ # Bug #156: Find all matching test files
710
+ test_dir = test_path.parent
711
+ _, name_part_for_glob = _extract_name_part(basename)
712
+ test_stem = f"test_{name_part_for_glob}"
713
+ extension = get_extension(language)
714
+ if test_dir.exists():
715
+ matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
716
+ else:
717
+ matching_test_files = [test_path] if test_path.exists() else []
718
+
322
719
  return {
323
720
  'prompt': Path(prompt_path),
324
- 'code': Path(code_path),
721
+ 'code': code_path,
325
722
  'example': example_path,
326
- 'test': test_path
723
+ 'test': test_path,
724
+ 'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
327
725
  }
328
-
726
+
329
727
  except Exception as e:
330
728
  # Fallback to simple naming if construct_paths fails
331
729
  extension = get_extension(language)
730
+ dir_prefix, name_part = _extract_name_part(basename)
731
+ test_path = Path(f"{dir_prefix}test_{name_part}.{extension}")
732
+ # Bug #156: Try to find matching test files even in fallback
733
+ test_dir = Path('.')
734
+ test_stem = f"{dir_prefix}test_{name_part}"
735
+ if test_dir.exists():
736
+ matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
737
+ else:
738
+ matching_test_files = [test_path] if test_path.exists() else []
739
+ prompts_root = _resolve_prompts_root(prompts_dir)
332
740
  return {
333
- 'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
334
- 'code': Path(f"{basename}.{extension}"),
335
- 'example': Path(f"{basename}_example.{extension}"),
336
- 'test': Path(f"test_{basename}.{extension}")
741
+ 'prompt': prompts_root / f"{basename}_{language}.prompt",
742
+ 'code': Path(f"{dir_prefix}{name_part}.{extension}"),
743
+ 'example': Path(f"{dir_prefix}{name_part}_example.{extension}"),
744
+ 'test': test_path,
745
+ 'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
337
746
  }
338
747
 
339
748
 
@@ -356,7 +765,7 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
356
765
  """Reads and validates the JSON fingerprint file."""
357
766
  meta_dir = get_meta_dir()
358
767
  meta_dir.mkdir(parents=True, exist_ok=True)
359
- fingerprint_file = meta_dir / f"{basename}_{language}.json"
768
+ fingerprint_file = meta_dir / f"{_safe_basename(basename)}_{language}.json"
360
769
 
361
770
  if not fingerprint_file.exists():
362
771
  return None
@@ -372,7 +781,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
372
781
  prompt_hash=data.get('prompt_hash'),
373
782
  code_hash=data.get('code_hash'),
374
783
  example_hash=data.get('example_hash'),
375
- test_hash=data.get('test_hash')
784
+ test_hash=data.get('test_hash'),
785
+ test_files=data.get('test_files') # Bug #156
376
786
  )
377
787
  except (json.JSONDecodeError, KeyError, IOError):
378
788
  return None
@@ -382,7 +792,7 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
382
792
  """Reads and validates the JSON run report file."""
383
793
  meta_dir = get_meta_dir()
384
794
  meta_dir.mkdir(parents=True, exist_ok=True)
385
- run_report_file = meta_dir / f"{basename}_{language}_run.json"
795
+ run_report_file = meta_dir / f"{_safe_basename(basename)}_{language}_run.json"
386
796
 
387
797
  if not run_report_file.exists():
388
798
  return None
@@ -396,19 +806,29 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
396
806
  exit_code=data['exit_code'],
397
807
  tests_passed=data['tests_passed'],
398
808
  tests_failed=data['tests_failed'],
399
- coverage=data['coverage']
809
+ coverage=data['coverage'],
810
+ test_hash=data.get('test_hash'), # Optional for backward compatibility
811
+ test_files=data.get('test_files') # Bug #156
400
812
  )
401
813
  except (json.JSONDecodeError, KeyError, IOError):
402
814
  return None
403
815
 
404
816
 
405
- def calculate_current_hashes(paths: Dict[str, Path]) -> Dict[str, Optional[str]]:
817
+ def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
406
818
  """Computes the hashes for all current files on disk."""
407
819
  # Return hash keys that match what the fingerprint expects
408
- return {
409
- f"{file_type}_hash": calculate_sha256(file_path)
410
- for file_type, file_path in paths.items()
411
- }
820
+ hashes = {}
821
+ for file_type, file_path in paths.items():
822
+ if file_type == 'test_files':
823
+ # Bug #156: Calculate hashes for all test files
824
+ hashes['test_files'] = {
825
+ f.name: calculate_sha256(f)
826
+ for f in file_path
827
+ if isinstance(f, Path) and f.exists()
828
+ }
829
+ elif isinstance(file_path, Path):
830
+ hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
831
+ return hashes
412
832
 
413
833
 
414
834
  def get_git_diff(file_path: Path) -> str:
@@ -438,6 +858,7 @@ def estimate_operation_cost(operation: str, language: str = "python") -> float:
438
858
  'crash': 0.40,
439
859
  'verify': 0.35,
440
860
  'test': 0.60,
861
+ 'test_extend': 0.60, # Same cost as test - generates additional tests
441
862
  'fix': 0.45,
442
863
  'update': 0.25,
443
864
  'analyze_conflict': 0.20,
@@ -591,24 +1012,103 @@ def _handle_missing_expected_files(
591
1012
  )
592
1013
 
593
1014
 
594
- def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False) -> bool:
1015
+ def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False,
1016
+ basename: str = None, language: str = None) -> bool:
595
1017
  """
596
1018
  Check if workflow is complete considering skip flags.
597
-
1019
+
598
1020
  Args:
599
1021
  paths: Dict mapping file types to their expected Path objects
600
1022
  skip_tests: If True, test files are not required for completion
601
1023
  skip_verify: If True, verification operations are not required
602
-
1024
+ basename: Module basename (required for run_report check)
1025
+ language: Module language (required for run_report check)
1026
+
603
1027
  Returns:
604
- True if all required files exist for the current workflow configuration
1028
+ True if all required files exist AND have been validated (run_report exists)
605
1029
  """
606
1030
  required_files = ['code', 'example']
607
-
1031
+
608
1032
  if not skip_tests:
609
1033
  required_files.append('test')
610
-
611
- return all(paths[f].exists() for f in required_files)
1034
+
1035
+ # Check all required files exist
1036
+ if not all(paths[f].exists() for f in required_files):
1037
+ return False
1038
+
1039
+ # Also check that run_report exists and code works (exit_code == 0)
1040
+ # Without this, newly generated code would incorrectly be marked as "complete"
1041
+ if basename and language:
1042
+ run_report = read_run_report(basename, language)
1043
+ if not run_report or run_report.exit_code != 0:
1044
+ return False
1045
+
1046
+ # Check that run_report corresponds to current test files (staleness detection)
1047
+ # If any test file changed since run_report was created, we can't trust the results
1048
+ if not skip_tests:
1049
+ # Bug #156: Check ALL test files, not just the primary one
1050
+ if 'test_files' in paths and run_report.test_files:
1051
+ # New multi-file comparison
1052
+ current_test_hashes = {
1053
+ f.name: calculate_sha256(f)
1054
+ for f in paths['test_files']
1055
+ if f.exists()
1056
+ }
1057
+ stored_test_hashes = run_report.test_files
1058
+
1059
+ # Check if any test file changed or new ones added/removed
1060
+ if set(current_test_hashes.keys()) != set(stored_test_hashes.keys()):
1061
+ return False # Test files added or removed
1062
+
1063
+ for fname, current_hash in current_test_hashes.items():
1064
+ if stored_test_hashes.get(fname) != current_hash:
1065
+ return False # Test file content changed
1066
+ elif 'test' in paths and paths['test'].exists():
1067
+ # Backward compat: single file check
1068
+ current_test_hash = calculate_sha256(paths['test'])
1069
+ if run_report.test_hash and current_test_hash != run_report.test_hash:
1070
+ # run_report was created for a different version of the test file
1071
+ return False
1072
+ if not run_report.test_hash:
1073
+ # Legacy run_report without test_hash - check fingerprint timestamp as fallback
1074
+ fingerprint = read_fingerprint(basename, language)
1075
+ if fingerprint:
1076
+ # If fingerprint is newer than run_report, run_report might be stale
1077
+ from datetime import datetime
1078
+ try:
1079
+ fp_time = datetime.fromisoformat(fingerprint.timestamp.replace('Z', '+00:00'))
1080
+ rr_time = datetime.fromisoformat(run_report.timestamp.replace('Z', '+00:00'))
1081
+ if fp_time > rr_time:
1082
+ return False # run_report predates fingerprint, might be stale
1083
+ except (ValueError, AttributeError):
1084
+ pass # If timestamps can't be parsed, skip this check
1085
+
1086
+ # Check verify has been done (unless skip_verify)
1087
+ # Without this, workflow would be "complete" after crash even though verify hasn't run
1088
+ # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
1089
+ if not skip_verify:
1090
+ fingerprint = read_fingerprint(basename, language)
1091
+ if fingerprint:
1092
+ # If command starts with 'skip:', the operation was skipped, not completed
1093
+ if fingerprint.command.startswith('skip:'):
1094
+ return False
1095
+ if fingerprint.command not in ['verify', 'test', 'fix', 'update']:
1096
+ return False
1097
+
1098
+ # CRITICAL FIX: Check tests have been run (unless skip_tests)
1099
+ # Without this, workflow would be "complete" after verify even though tests haven't run
1100
+ # This prevents false positive success when skip_verify=True but tests are still required
1101
+ # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
1102
+ if not skip_tests:
1103
+ fp = read_fingerprint(basename, language)
1104
+ if fp:
1105
+ # If command starts with 'skip:', the operation was skipped, not completed
1106
+ if fp.command.startswith('skip:'):
1107
+ return False
1108
+ if fp.command not in ['test', 'fix', 'update']:
1109
+ return False
1110
+
1111
+ return True
612
1112
 
613
1113
 
614
1114
  def check_for_dependencies(prompt_content: str) -> bool:
@@ -670,7 +1170,7 @@ def _check_example_success_history(basename: str, language: str) -> bool:
670
1170
 
671
1171
  # Strategy 2b: Look for historical run reports with exit_code == 0
672
1172
  # Check all run report files in the meta directory that match the pattern
673
- run_report_pattern = f"{basename}_{language}_run"
1173
+ run_report_pattern = f"{_safe_basename(basename)}_{language}_run"
674
1174
  for file in meta_dir.glob(f"{run_report_pattern}*.json"):
675
1175
  try:
676
1176
  with open(file, 'r') as f:
@@ -693,7 +1193,7 @@ def _check_example_success_history(basename: str, language: str) -> bool:
693
1193
  return False
694
1194
 
695
1195
 
696
- def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
1196
+ def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
697
1197
  """
698
1198
  Core decision-making function for sync operations with skip flag awareness.
699
1199
 
@@ -713,14 +1213,14 @@ def sync_determine_operation(basename: str, language: str, target_coverage: floa
713
1213
 
714
1214
  if log_mode:
715
1215
  # Skip locking for read-only analysis
716
- return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
1216
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
717
1217
  else:
718
1218
  # Normal exclusive locking for actual operations
719
1219
  with SyncLock(basename, language) as lock:
720
- return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
1220
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
721
1221
 
722
1222
 
723
- def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
1223
+ def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
724
1224
  """
725
1225
  Perform the sync state analysis without locking concerns.
726
1226
 
@@ -749,26 +1249,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
749
1249
 
750
1250
  # Read fingerprint early since we need it for crash verification
751
1251
  fingerprint = read_fingerprint(basename, language)
752
-
1252
+
1253
+ # Check if auto-deps just completed - ALWAYS regenerate code after auto-deps
1254
+ # This must be checked early, before any run_report processing, because:
1255
+ # 1. Old run_report (if exists) is stale and should be ignored
1256
+ # 2. auto-deps updates dependencies but doesn't regenerate code
1257
+ if fingerprint and fingerprint.command == 'auto-deps':
1258
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1259
+ return SyncDecision(
1260
+ operation='generate',
1261
+ reason='Auto-deps completed - regenerate code with updated prompt',
1262
+ confidence=0.90,
1263
+ estimated_cost=estimate_operation_cost('generate'),
1264
+ details={
1265
+ 'decision_type': 'heuristic',
1266
+ 'previous_command': 'auto-deps',
1267
+ 'code_exists': paths['code'].exists() if paths.get('code') else False,
1268
+ 'regenerate_after_autodeps': True
1269
+ }
1270
+ )
1271
+
753
1272
  run_report = read_run_report(basename, language)
754
- if run_report:
755
- # Check test failures first (higher priority than exit code)
756
- if run_report.tests_failed > 0:
757
- return SyncDecision(
758
- operation='fix',
759
- reason=f'Test failures detected: {run_report.tests_failed} failed tests',
760
- confidence=0.90,
761
- estimated_cost=estimate_operation_cost('fix'),
762
- details={
763
- 'decision_type': 'heuristic',
764
- 'tests_failed': run_report.tests_failed,
765
- 'exit_code': run_report.exit_code,
766
- 'coverage': run_report.coverage
767
- }
768
- )
769
-
770
- # Check if we just completed a crash operation and need verification
1273
+ # Only process runtime signals (crash/fix/test) if we have a fingerprint
1274
+ # Without a fingerprint, run_report is stale/orphaned and should be ignored
1275
+ if run_report and fingerprint:
1276
+ # Check for prompt changes FIRST - prompt changes take priority over runtime signals
1277
+ # If the user modified the prompt, we need to regenerate regardless of runtime state
1278
+ if fingerprint:
1279
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1280
+ current_prompt_hash = calculate_sha256(paths['prompt'])
1281
+ if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
1282
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
1283
+ has_deps = check_for_dependencies(prompt_content)
1284
+ return SyncDecision(
1285
+ operation='auto-deps' if has_deps else 'generate',
1286
+ reason='Prompt changed - regenerating (takes priority over runtime signals)',
1287
+ confidence=0.95,
1288
+ estimated_cost=estimate_operation_cost('generate'),
1289
+ details={
1290
+ 'decision_type': 'heuristic',
1291
+ 'prompt_changed': True,
1292
+ 'previous_command': fingerprint.command,
1293
+ 'runtime_state_ignored': True
1294
+ }
1295
+ )
1296
+
1297
+ # Check if we just completed a crash operation and need verification FIRST
1298
+ # This takes priority over test failures because we need to verify the crash fix worked
1299
+ # BUT only proceed to verify if exit_code == 0 (crash fix succeeded)
771
1300
  if fingerprint and fingerprint.command == 'crash' and not skip_verify:
1301
+ if run_report.exit_code != 0:
1302
+ # Crash fix didn't work - need to re-run crash
1303
+ return SyncDecision(
1304
+ operation='crash',
1305
+ reason=f'Previous crash operation failed (exit_code={run_report.exit_code}) - retry crash fix',
1306
+ confidence=0.90,
1307
+ estimated_cost=estimate_operation_cost('crash'),
1308
+ details={
1309
+ 'decision_type': 'heuristic',
1310
+ 'previous_command': 'crash',
1311
+ 'exit_code': run_report.exit_code,
1312
+ 'workflow_stage': 'crash_retry'
1313
+ }
1314
+ )
772
1315
  return SyncDecision(
773
1316
  operation='verify',
774
1317
  reason='Previous crash operation completed - verify example runs correctly',
@@ -782,6 +1325,41 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
782
1325
  }
783
1326
  )
784
1327
 
1328
+ # Check test failures (after crash verification check)
1329
+ if run_report.tests_failed > 0:
1330
+ # First check if the test file actually exists
1331
+ pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1332
+ test_file = pdd_files.get('test')
1333
+
1334
+ # Only suggest 'fix' if test file exists
1335
+ if test_file and test_file.exists():
1336
+ return SyncDecision(
1337
+ operation='fix',
1338
+ reason=f'Test failures detected: {run_report.tests_failed} failed tests',
1339
+ confidence=0.90,
1340
+ estimated_cost=estimate_operation_cost('fix'),
1341
+ details={
1342
+ 'decision_type': 'heuristic',
1343
+ 'tests_failed': run_report.tests_failed,
1344
+ 'exit_code': run_report.exit_code,
1345
+ 'coverage': run_report.coverage
1346
+ }
1347
+ )
1348
+ # If test file doesn't exist but we have test failures in run report,
1349
+ # we need to generate the test first
1350
+ else:
1351
+ return SyncDecision(
1352
+ operation='test',
1353
+ reason='Test failures reported but test file missing - need to generate tests',
1354
+ confidence=0.85,
1355
+ estimated_cost=estimate_operation_cost('test'),
1356
+ details={
1357
+ 'decision_type': 'heuristic',
1358
+ 'run_report_shows_failures': True,
1359
+ 'test_file_exists': False
1360
+ }
1361
+ )
1362
+
785
1363
  # Then check for runtime crashes (only if no test failures)
786
1364
  if run_report.exit_code != 0:
787
1365
  # Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
@@ -833,6 +1411,23 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
833
1411
  'skip_tests': True
834
1412
  }
835
1413
  )
1414
+ elif run_report.tests_failed == 0 and run_report.tests_passed > 0:
1415
+ # Tests pass but coverage is below target
1416
+ # Return 'test_extend' to signal we need to ADD more tests, not regenerate
1417
+ return SyncDecision(
1418
+ operation='test_extend',
1419
+ reason=f'Tests pass ({run_report.tests_passed} passed) but coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% - extending tests',
1420
+ confidence=0.85,
1421
+ estimated_cost=estimate_operation_cost('test'),
1422
+ details={
1423
+ 'decision_type': 'heuristic',
1424
+ 'current_coverage': run_report.coverage,
1425
+ 'target_coverage': target_coverage,
1426
+ 'tests_passed': run_report.tests_passed,
1427
+ 'tests_failed': run_report.tests_failed,
1428
+ 'extend_tests': True
1429
+ }
1430
+ )
836
1431
  else:
837
1432
  return SyncDecision(
838
1433
  operation='test',
@@ -849,7 +1444,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
849
1444
  )
850
1445
 
851
1446
  # 2. Analyze File State
852
- paths = get_pdd_file_paths(basename, language, prompts_dir)
1447
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
853
1448
  current_hashes = calculate_current_hashes(paths)
854
1449
 
855
1450
  # 3. Implement the Decision Tree
@@ -926,7 +1521,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
926
1521
 
927
1522
  if not changes:
928
1523
  # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
929
- if _is_workflow_complete(paths, skip_tests, skip_verify):
1524
+ if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
930
1525
  return SyncDecision(
931
1526
  operation='nothing',
932
1527
  reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
@@ -939,7 +1534,63 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
939
1534
  'workflow_complete': True
940
1535
  }
941
1536
  )
942
-
1537
+
1538
+ # Handle incomplete workflow when all files exist (including test)
1539
+ # This addresses the blind spot where crash/verify/test logic only runs when test is missing
1540
+ if (paths['code'].exists() and paths['example'].exists() and paths['test'].exists()):
1541
+ run_report = read_run_report(basename, language)
1542
+
1543
+ # BUG 4 & 1: No run_report OR crash detected (exit_code != 0)
1544
+ if not run_report or run_report.exit_code != 0:
1545
+ return SyncDecision(
1546
+ operation='crash',
1547
+ reason='All files exist but needs validation' +
1548
+ (' - no run_report' if not run_report else f' - exit_code={run_report.exit_code}'),
1549
+ confidence=0.85,
1550
+ estimated_cost=estimate_operation_cost('crash'),
1551
+ details={
1552
+ 'decision_type': 'heuristic',
1553
+ 'all_files_exist': True,
1554
+ 'run_report_missing': not run_report,
1555
+ 'exit_code': None if not run_report else run_report.exit_code,
1556
+ 'workflow_stage': 'post_regeneration_validation'
1557
+ }
1558
+ )
1559
+
1560
+ # BUG 2: Verify not run yet (run_report exists, exit_code=0, but command != verify/test)
1561
+ if fingerprint and fingerprint.command not in ['verify', 'test', 'fix', 'update'] and not skip_verify:
1562
+ return SyncDecision(
1563
+ operation='verify',
1564
+ reason='All files exist but verification not completed',
1565
+ confidence=0.85,
1566
+ estimated_cost=estimate_operation_cost('verify'),
1567
+ details={
1568
+ 'decision_type': 'heuristic',
1569
+ 'all_files_exist': True,
1570
+ 'last_command': fingerprint.command,
1571
+ 'workflow_stage': 'verification_pending'
1572
+ }
1573
+ )
1574
+
1575
+ # Stale run_report detected: _is_workflow_complete returned False but all other conditions passed
1576
+ # This happens when run_report.test_hash doesn't match current test file, or
1577
+ # when fingerprint timestamp > run_report timestamp (legacy detection)
1578
+ # Need to re-run tests to get accurate results
1579
+ if run_report and run_report.exit_code == 0:
1580
+ return SyncDecision(
1581
+ operation='test',
1582
+ reason='Run report is stale - need to re-run tests to verify current state',
1583
+ confidence=0.9,
1584
+ estimated_cost=estimate_operation_cost('test'),
1585
+ details={
1586
+ 'decision_type': 'heuristic',
1587
+ 'all_files_exist': True,
1588
+ 'run_report_stale': True,
1589
+ 'run_report_test_hash': run_report.test_hash,
1590
+ 'workflow_stage': 'revalidation'
1591
+ }
1592
+ )
1593
+
943
1594
  # Progress workflow considering skip flags
944
1595
  if paths['code'].exists() and not paths['example'].exists():
945
1596
  return SyncDecision(
@@ -960,8 +1611,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
960
1611
 
961
1612
  # Check if example has been crash-tested and verified before allowing test generation
962
1613
  run_report = read_run_report(basename, language)
963
- if not run_report:
1614
+ if not run_report and not skip_verify:
964
1615
  # No run report exists - need to test the example first
1616
+ # But if skip_verify is True, skip crash/verify and go to test generation
965
1617
  return SyncDecision(
966
1618
  operation='crash',
967
1619
  reason='Example exists but needs runtime testing before test generation',
@@ -975,8 +1627,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
975
1627
  'workflow_stage': 'crash_validation'
976
1628
  }
977
1629
  )
978
- elif run_report.exit_code != 0:
1630
+ elif run_report and run_report.exit_code != 0 and not skip_verify:
979
1631
  # Example crashed - fix it before proceeding
1632
+ # But if skip_verify is True, skip crash fix and proceed
980
1633
  return SyncDecision(
981
1634
  operation='crash',
982
1635
  reason='Example crashes - fix runtime errors before test generation',
@@ -1023,6 +1676,22 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
1023
1676
  # Some files are missing but no changes detected
1024
1677
  if not paths['code'].exists():
1025
1678
  if paths['prompt'].exists():
1679
+ # CRITICAL FIX: Check if auto-deps was just completed to prevent infinite loop
1680
+ if fingerprint and fingerprint.command == 'auto-deps':
1681
+ return SyncDecision(
1682
+ operation='generate',
1683
+ reason='Auto-deps completed, now generate missing code file',
1684
+ confidence=0.90,
1685
+ estimated_cost=estimate_operation_cost('generate'),
1686
+ details={
1687
+ 'decision_type': 'heuristic',
1688
+ 'prompt_path': str(paths['prompt']),
1689
+ 'code_exists': False,
1690
+ 'auto_deps_completed': True,
1691
+ 'previous_command': fingerprint.command
1692
+ }
1693
+ )
1694
+
1026
1695
  prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
1027
1696
  if check_for_dependencies(prompt_content):
1028
1697
  return SyncDecision(
@@ -1124,18 +1793,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
1124
1793
  )
1125
1794
 
1126
1795
  else:
1127
- # Complex Changes (Multiple Files Modified / Conflicts)
1128
- return SyncDecision(
1129
- operation='analyze_conflict',
1130
- reason='Multiple files changed - requires conflict analysis',
1131
- confidence=0.70,
1132
- estimated_cost=estimate_operation_cost('analyze_conflict'),
1133
- details={
1134
- 'decision_type': 'heuristic',
1135
- 'changed_files': changes,
1136
- 'num_changes': len(changes)
1137
- }
1138
- )
1796
+ # Complex Changes (Multiple Files Modified)
1797
+ # CRITICAL: Only treat as conflict if prompt changed along with derived artifacts
1798
+ # If only derived artifacts changed (code, example, test), this is NOT a conflict
1799
+ # per PDD doctrine - all are derived from the unchanged prompt
1800
+
1801
+ if 'prompt' in changes:
1802
+ # True conflict: prompt (source of truth) changed along with derived artifacts
1803
+ return SyncDecision(
1804
+ operation='analyze_conflict',
1805
+ reason='Prompt and derived files changed - requires conflict analysis',
1806
+ confidence=0.70,
1807
+ estimated_cost=estimate_operation_cost('analyze_conflict'),
1808
+ details={
1809
+ 'decision_type': 'heuristic',
1810
+ 'changed_files': changes,
1811
+ 'num_changes': len(changes),
1812
+ 'prompt_changed': True
1813
+ }
1814
+ )
1815
+ else:
1816
+ # Only derived artifacts changed - prompt (source of truth) is unchanged
1817
+ # Continue workflow from where it was interrupted
1818
+
1819
+ # If code changed, need to re-verify
1820
+ if 'code' in changes:
1821
+ return SyncDecision(
1822
+ operation='verify',
1823
+ reason='Derived files changed (prompt unchanged) - verify code works',
1824
+ confidence=0.85,
1825
+ estimated_cost=estimate_operation_cost('verify'),
1826
+ details={
1827
+ 'decision_type': 'heuristic',
1828
+ 'changed_files': changes,
1829
+ 'num_changes': len(changes),
1830
+ 'prompt_changed': False,
1831
+ 'workflow_stage': 'continue_after_interruption'
1832
+ }
1833
+ )
1834
+ # If only example/test changed
1835
+ elif 'example' in changes:
1836
+ return SyncDecision(
1837
+ operation='verify',
1838
+ reason='Example changed (prompt unchanged) - verify example runs',
1839
+ confidence=0.85,
1840
+ estimated_cost=estimate_operation_cost('verify'),
1841
+ details={
1842
+ 'decision_type': 'heuristic',
1843
+ 'changed_files': changes,
1844
+ 'prompt_changed': False
1845
+ }
1846
+ )
1847
+ elif 'test' in changes:
1848
+ return SyncDecision(
1849
+ operation='test',
1850
+ reason='Test changed (prompt unchanged) - run tests',
1851
+ confidence=0.85,
1852
+ estimated_cost=estimate_operation_cost('test'),
1853
+ details={
1854
+ 'decision_type': 'heuristic',
1855
+ 'changed_files': changes,
1856
+ 'prompt_changed': False
1857
+ }
1858
+ )
1139
1859
 
1140
1860
  # Fallback - should not reach here normally
1141
1861
  return SyncDecision(
@@ -1152,7 +1872,14 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
1152
1872
  )
1153
1873
 
1154
1874
 
1155
- def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerprint, changed_files: List[str], prompts_dir: str = "prompts") -> SyncDecision:
1875
+ def analyze_conflict_with_llm(
1876
+ basename: str,
1877
+ language: str,
1878
+ fingerprint: Fingerprint,
1879
+ changed_files: List[str],
1880
+ prompts_dir: str = "prompts",
1881
+ context_override: Optional[str] = None,
1882
+ ) -> SyncDecision:
1156
1883
  """
1157
1884
  Resolve complex sync conflicts using an LLM.
1158
1885
 
@@ -1185,7 +1912,7 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
1185
1912
  )
1186
1913
 
1187
1914
  # 2. Gather file paths and diffs
1188
- paths = get_pdd_file_paths(basename, language, prompts_dir)
1915
+ paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
1189
1916
 
1190
1917
  # Generate diffs for changed files
1191
1918
  diffs = {}