PyPI - pdd-cli - Versions diffs - 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

pdd-cli 0.0.42py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

pdd/__init__.py +4 -4
pdd/agentic_common.py +863 -0
pdd/agentic_crash.py +534 -0
pdd/agentic_fix.py +1179 -0
pdd/agentic_langtest.py +162 -0
pdd/agentic_update.py +370 -0
pdd/agentic_verify.py +183 -0
pdd/auto_deps_main.py +15 -5
pdd/auto_include.py +63 -5
pdd/bug_main.py +3 -2
pdd/bug_to_unit_test.py +2 -0
pdd/change_main.py +11 -4
pdd/cli.py +22 -1181
pdd/cmd_test_main.py +80 -19
pdd/code_generator.py +58 -18
pdd/code_generator_main.py +672 -25
pdd/commands/__init__.py +42 -0
pdd/commands/analysis.py +248 -0
pdd/commands/fix.py +140 -0
pdd/commands/generate.py +257 -0
pdd/commands/maintenance.py +174 -0
pdd/commands/misc.py +79 -0
pdd/commands/modify.py +230 -0
pdd/commands/report.py +144 -0
pdd/commands/templates.py +215 -0
pdd/commands/utility.py +110 -0
pdd/config_resolution.py +58 -0
pdd/conflicts_main.py +8 -3
pdd/construct_paths.py +281 -81
pdd/context_generator.py +10 -2
pdd/context_generator_main.py +113 -11
pdd/continue_generation.py +47 -7
pdd/core/__init__.py +0 -0
pdd/core/cli.py +503 -0
pdd/core/dump.py +554 -0
pdd/core/errors.py +63 -0
pdd/core/utils.py +90 -0
pdd/crash_main.py +44 -11
pdd/data/language_format.csv +71 -62
pdd/data/llm_model.csv +20 -18
pdd/detect_change_main.py +5 -4
pdd/fix_code_loop.py +331 -77
pdd/fix_error_loop.py +209 -60
pdd/fix_errors_from_unit_tests.py +4 -3
pdd/fix_main.py +75 -18
pdd/fix_verification_errors.py +12 -100
pdd/fix_verification_errors_loop.py +319 -272
pdd/fix_verification_main.py +57 -17
pdd/generate_output_paths.py +93 -10
pdd/generate_test.py +16 -5
pdd/get_jwt_token.py +48 -9
pdd/get_run_command.py +73 -0
pdd/get_test_command.py +68 -0
pdd/git_update.py +70 -19
pdd/increase_tests.py +7 -0
pdd/incremental_code_generator.py +2 -2
pdd/insert_includes.py +11 -3
pdd/llm_invoke.py +1278 -110
pdd/load_prompt_template.py +36 -10
pdd/pdd_completion.fish +25 -2
pdd/pdd_completion.sh +30 -4
pdd/pdd_completion.zsh +79 -4
pdd/postprocess.py +10 -3
pdd/preprocess.py +228 -15
pdd/preprocess_main.py +8 -5
pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
pdd/prompts/agentic_update_LLM.prompt +1071 -0
pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
pdd/prompts/auto_include_LLM.prompt +98 -101
pdd/prompts/change_LLM.prompt +1 -3
pdd/prompts/detect_change_LLM.prompt +562 -3
pdd/prompts/example_generator_LLM.prompt +22 -1
pdd/prompts/extract_code_LLM.prompt +5 -1
pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
pdd/prompts/extract_promptline_LLM.prompt +17 -11
pdd/prompts/find_verification_errors_LLM.prompt +6 -0
pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
pdd/prompts/generate_test_LLM.prompt +21 -6
pdd/prompts/increase_tests_LLM.prompt +1 -2
pdd/prompts/insert_includes_LLM.prompt +1181 -6
pdd/prompts/split_LLM.prompt +1 -62
pdd/prompts/trace_LLM.prompt +25 -22
pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
pdd/prompts/update_prompt_LLM.prompt +22 -1
pdd/prompts/xml_convertor_LLM.prompt +3246 -7
pdd/pytest_output.py +188 -21
pdd/python_env_detector.py +151 -0
pdd/render_mermaid.py +236 -0
pdd/setup_tool.py +648 -0
pdd/simple_math.py +2 -0
pdd/split_main.py +3 -2
pdd/summarize_directory.py +56 -7
pdd/sync_determine_operation.py +918 -186
pdd/sync_main.py +82 -32
pdd/sync_orchestration.py +1456 -453
pdd/sync_tui.py +848 -0
pdd/template_registry.py +264 -0
pdd/templates/architecture/architecture_json.prompt +242 -0
pdd/templates/generic/generate_prompt.prompt +174 -0
pdd/trace.py +168 -12
pdd/trace_main.py +4 -3
pdd/track_cost.py +151 -61
pdd/unfinished_prompt.py +49 -3
pdd/update_main.py +549 -67
pdd/update_model_costs.py +2 -2
pdd/update_prompt.py +19 -4
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
pdd_cli-0.0.90.dist-info/RECORD +153 -0
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
pdd_cli-0.0.42.dist-info/RECORD +0 -115
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0

pdd/sync_determine_operation.py CHANGED Viewed

@@ -56,7 +56,8 @@ LOCKS_DIR = get_locks_dir()
 # Export constants for other modules
 __all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
-           'sync_determine_operation', 'analyze_conflict_with_llm']
+           'sync_determine_operation', 'analyze_conflict_with_llm', 'read_run_report', 'get_pdd_file_paths',
+           '_check_example_success_history']
 @dataclass
@@ -68,7 +69,8 @@ class Fingerprint:
     prompt_hash: Optional[str]
     code_hash: Optional[str]
     example_hash: Optional[str]
-    test_hash: Optional[str]
+    test_hash: Optional[str]  # Keep for backward compat (primary test file)
+    test_files: Optional[Dict[str, str]] = None  # Bug #156: {"test_foo.py": "hash1", ...}
 @dataclass
@@ -79,17 +81,19 @@ class RunReport:
     tests_passed: int
     tests_failed: int
     coverage: float
+    test_hash: Optional[str] = None  # Hash of test file when tests were run (for staleness detection)
+    test_files: Optional[Dict[str, str]] = None  # Bug #156: {"test_foo.py": "hash1", ...}
 @dataclass
 class SyncDecision:
     """Represents a decision about what PDD operation to run next."""
-    operation: str  # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing'
-    reason: str
-    details: Dict[str, Any] = field(default_factory=dict)
-    estimated_cost: float = 0.0
-    confidence: float = 1.0
-    prerequisites: List[str] = field(default_factory=list)
+    operation: str  # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing', 'all_synced', 'error', 'fail_and_request_manual_merge'
+    reason: str  # A human-readable explanation for the decision
+    confidence: float = 1.0  # Confidence level in the decision, 0.0 to 1.0, default 1.0 for deterministic decisions
+    estimated_cost: float = 0.0  # Estimated cost for the operation in dollars, default 0.0
+    details: Optional[Dict[str, Any]] = None  # Extra context for logging and debugging, default None
+    prerequisites: Optional[List[str]] = None  # List of operations that should be completed first, default None
 class SyncLock:
@@ -208,51 +212,131 @@ def get_extension(language: str) -> str:
     return extensions.get(language.lower(), language.lower())
-def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
+def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts", context_override: Optional[str] = None) -> Dict[str, Path]:
     """Returns a dictionary mapping file types to their expected Path objects."""
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.info(f"get_pdd_file_paths called: basename={basename}, language={language}, prompts_dir={prompts_dir}")
     try:
         # Use construct_paths to get configuration-aware paths
         prompt_filename = f"{basename}_{language}.prompt"
         prompt_path = str(Path(prompts_dir) / prompt_filename)
+        logger.info(f"Checking prompt_path={prompt_path}, exists={Path(prompt_path).exists()}")
-        # Check if prompt file exists - if not, we can't proceed with construct_paths
+        # Check if prompt file exists - if not, we still need configuration-aware paths
         if not Path(prompt_path).exists():
-            # Fall back to default path construction if prompt doesn't exist
+            # Use construct_paths with minimal inputs to get configuration-aware paths
+            # even when prompt doesn't exist
             extension = get_extension(language)
-            return {
-                'prompt': Path(prompt_path),
-                'code': Path(f"{basename}.{extension}"),
-                'example': Path(f"{basename}_example.{extension}"),
-                'test': Path(f"test_{basename}.{extension}")
-            }
+            try:
+                # Call construct_paths with empty input_file_paths to get configured output paths
+                resolved_config, _, output_paths, _ = construct_paths(
+                    input_file_paths={},  # Empty dict since files don't exist yet
+                    force=True,
+                    quiet=True,
+                    command="sync",
+                    command_options={"basename": basename, "language": language},
+                    context_override=context_override
+                )
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.info(f"resolved_config: {resolved_config}")
+                logger.info(f"output_paths: {output_paths}")
+                # Extract directory configuration from resolved_config
+                # Note: construct_paths sets tests_dir, examples_dir, code_dir keys
+                test_dir = resolved_config.get('tests_dir', 'tests/')
+                example_dir = resolved_config.get('examples_dir', 'examples/')
+                code_dir = resolved_config.get('code_dir', './')
+                logger.info(f"Extracted dirs - test: {test_dir}, example: {example_dir}, code: {code_dir}")
+                # Ensure directories end with /
+                if test_dir and not test_dir.endswith('/'):
+                    test_dir = test_dir + '/'
+                if example_dir and not example_dir.endswith('/'):
+                    example_dir = example_dir + '/'
+                if code_dir and not code_dir.endswith('/'):
+                    code_dir = code_dir + '/'
+                # Construct the full paths
+                test_path = f"{test_dir}test_{basename}.{extension}"
+                example_path = f"{example_dir}{basename}_example.{extension}"
+                code_path = f"{code_dir}{basename}.{extension}"
+                logger.debug(f"Final paths: test={test_path}, example={example_path}, code={code_path}")
+                # Convert to Path objects
+                test_path = Path(test_path)
+                example_path = Path(example_path)
+                code_path = Path(code_path)
+                # Bug #156: Find all matching test files
+                test_dir_path = test_path.parent
+                test_stem = f"test_{basename}"
+                if test_dir_path.exists():
+                    matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
+                else:
+                    matching_test_files = [test_path] if test_path.exists() else []
+                result = {
+                    'prompt': Path(prompt_path),
+                    'code': code_path,
+                    'example': example_path,
+                    'test': test_path,
+                    'test_files': matching_test_files or [test_path]  # Bug #156
+                }
+                logger.debug(f"get_pdd_file_paths returning (prompt missing): test={test_path}")
+                return result
+            except Exception as e:
+                # If construct_paths fails, fall back to current directory paths
+                # This maintains backward compatibility
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.debug(f"construct_paths failed for non-existent prompt, using defaults: {e}")
+                fallback_test_path = Path(f"test_{basename}.{extension}")
+                # Bug #156: Find matching test files even in fallback
+                if Path('.').exists():
+                    fallback_matching = sorted(Path('.').glob(f"test_{basename}*.{extension}"))
+                else:
+                    fallback_matching = [fallback_test_path] if fallback_test_path.exists() else []
+                return {
+                    'prompt': Path(prompt_path),
+                    'code': Path(f"{basename}.{extension}"),
+                    'example': Path(f"{basename}_example.{extension}"),
+                    'test': fallback_test_path,
+                    'test_files': fallback_matching or [fallback_test_path]  # Bug #156
+                }
         input_file_paths = {
             "prompt_file": prompt_path
         }
-        # Only call construct_paths if the prompt file exists
+        # Call construct_paths to get configuration-aware paths
         resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
             input_file_paths=input_file_paths,
             force=True,  # Use force=True to avoid interactive prompts during sync
             quiet=True,
-            command="generate",
-            command_options={}
+            command="sync",  # Use sync command to get more tolerant path handling
+            command_options={"basename": basename, "language": language},
+            context_override=context_override
         )
-        # Extract paths from config as specified in the spec
-        # The spec shows: return { 'prompt': Path(config['prompt_file']), ... }
-        # But we need to map the output_file_paths keys to our expected structure
-        # For generate command, construct_paths returns these in output_file_paths:
-        # - 'output' or 'code_file' for the generated code
-        # For other commands, we need to construct the full set of paths
-        # Get the code file path from output_file_paths
-        code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
+        # For sync command, output_file_paths contains the configured paths
+        # Extract the code path from output_file_paths
+        code_path = output_file_paths.get('generate_output_path', '')
+        if not code_path:
+            # Try other possible keys
+            code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
         if not code_path:
-            # Fallback to constructing from basename
+            # Fallback to constructing from basename with configuration
             extension = get_extension(language)
-            code_path = f"{basename}.{extension}"
+            code_dir = resolved_config.get('generate_output_path', './')
+            if code_dir and not code_dir.endswith('/'):
+                code_dir = code_dir + '/'
+            code_path = f"{code_dir}{basename}.{extension}"
         # Get configured paths for example and test files using construct_paths
         # Note: construct_paths requires files to exist, so we need to handle the case
@@ -268,18 +352,27 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
             try:
                 # Get example path using example command
+                # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
                 _, _, example_output_paths, _ = construct_paths(
                     input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
-                    force=True, quiet=True, command="example", command_options={}
+                    force=True, quiet=True, command="example", command_options={},
+                    context_override=context_override,
+                    path_resolution_mode="cwd"
                 )
                 example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
-                # Get test path using test command
-                _, _, test_output_paths, _ = construct_paths(
-                    input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
-                    force=True, quiet=True, command="test", command_options={}
-                )
-                test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                # Get test path using test command - handle case where test file doesn't exist yet
+                try:
+                    _, _, test_output_paths, _ = construct_paths(
+                        input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
+                        force=True, quiet=True, command="test", command_options={},
+                        context_override=context_override,
+                        path_resolution_mode="cwd"
+                    )
+                    test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                except FileNotFoundError:
+                    # Test file doesn't exist yet - create default path
+                    test_path = Path(f"test_{basename}.{get_extension(language)}")
             finally:
                 # Clean up temporary file if we created it
@@ -297,17 +390,26 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
             # Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
             try:
                 # Get configured directories by using construct_paths with just the prompt file
+                # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
                 _, _, example_output_paths, _ = construct_paths(
                     input_file_paths={"prompt_file": prompt_path},
-                    force=True, quiet=True, command="example", command_options={}
+                    force=True, quiet=True, command="example", command_options={},
+                    context_override=context_override,
+                    path_resolution_mode="cwd"
                 )
                 example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
-                _, _, test_output_paths, _ = construct_paths(
-                    input_file_paths={"prompt_file": prompt_path},
-                    force=True, quiet=True, command="test", command_options={}
-                )
-                test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                try:
+                    _, _, test_output_paths, _ = construct_paths(
+                        input_file_paths={"prompt_file": prompt_path},
+                        force=True, quiet=True, command="test", command_options={},
+                        context_override=context_override,
+                        path_resolution_mode="cwd"
+                    )
+                    test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                except Exception:
+                    # If test path construction fails, use default naming
+                    test_path = Path(f"test_{basename}.{get_extension(language)}")
             except Exception:
                 # Final fallback to deriving from code path if all else fails
@@ -318,21 +420,47 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
                 example_path = code_dir / f"{code_stem}_example{code_ext}"
                 test_path = code_dir / f"test_{code_stem}{code_ext}"
+        # Ensure all paths are Path objects
+        if isinstance(code_path, str):
+            code_path = Path(code_path)
+        # Keep paths as they are (absolute or relative as returned by construct_paths)
+        # This ensures consistency with how construct_paths expects them
+        # Bug #156: Find all matching test files
+        test_dir = test_path.parent
+        test_stem = f"test_{basename}"
+        extension = get_extension(language)
+        if test_dir.exists():
+            matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
+        else:
+            matching_test_files = [test_path] if test_path.exists() else []
         return {
             'prompt': Path(prompt_path),
-            'code': Path(code_path),
+            'code': code_path,
             'example': example_path,
-            'test': test_path
+            'test': test_path,
+            'test_files': matching_test_files or [test_path]  # Bug #156: All matching test files
         }
     except Exception as e:
         # Fallback to simple naming if construct_paths fails
         extension = get_extension(language)
+        test_path = Path(f"test_{basename}.{extension}")
+        # Bug #156: Try to find matching test files even in fallback
+        test_dir = Path('.')
+        test_stem = f"test_{basename}"
+        if test_dir.exists():
+            matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
+        else:
+            matching_test_files = [test_path] if test_path.exists() else []
         return {
             'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
             'code': Path(f"{basename}.{extension}"),
             'example': Path(f"{basename}_example.{extension}"),
-            'test': Path(f"test_{basename}.{extension}")
+            'test': test_path,
+            'test_files': matching_test_files or [test_path]  # Bug #156: All matching test files
         }
@@ -371,7 +499,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
             prompt_hash=data.get('prompt_hash'),
             code_hash=data.get('code_hash'),
             example_hash=data.get('example_hash'),
-            test_hash=data.get('test_hash')
+            test_hash=data.get('test_hash'),
+            test_files=data.get('test_files')  # Bug #156
         )
     except (json.JSONDecodeError, KeyError, IOError):
         return None
@@ -395,19 +524,29 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
             exit_code=data['exit_code'],
             tests_passed=data['tests_passed'],
             tests_failed=data['tests_failed'],
-            coverage=data['coverage']
+            coverage=data['coverage'],
+            test_hash=data.get('test_hash'),  # Optional for backward compatibility
+            test_files=data.get('test_files')  # Bug #156
         )
     except (json.JSONDecodeError, KeyError, IOError):
         return None
-def calculate_current_hashes(paths: Dict[str, Path]) -> Dict[str, Optional[str]]:
+def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
     """Computes the hashes for all current files on disk."""
     # Return hash keys that match what the fingerprint expects
-    return {
-        f"{file_type}_hash": calculate_sha256(file_path)
-        for file_type, file_path in paths.items()
-    }
+    hashes = {}
+    for file_type, file_path in paths.items():
+        if file_type == 'test_files':
+            # Bug #156: Calculate hashes for all test files
+            hashes['test_files'] = {
+                f.name: calculate_sha256(f)
+                for f in file_path
+                if isinstance(f, Path) and f.exists()
+            }
+        elif isinstance(file_path, Path):
+            hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
+    return hashes
 def get_git_diff(file_path: Path) -> str:
@@ -428,6 +567,27 @@ def get_git_diff(file_path: Path) -> str:
         return ""
+def estimate_operation_cost(operation: str, language: str = "python") -> float:
+    """Returns estimated cost in dollars for each operation based on typical LLM usage."""
+    cost_map = {
+        'auto-deps': 0.10,
+        'generate': 0.50,
+        'example': 0.30,
+        'crash': 0.40,
+        'verify': 0.35,
+        'test': 0.60,
+        'test_extend': 0.60,  # Same cost as test - generates additional tests
+        'fix': 0.45,
+        'update': 0.25,
+        'analyze_conflict': 0.20,
+        'nothing': 0.0,
+        'all_synced': 0.0,
+        'error': 0.0,
+        'fail_and_request_manual_merge': 0.0
+    }
+    return cost_map.get(operation, 0.0)
 def validate_expected_files(fingerprint: Optional[Fingerprint], paths: Dict[str, Path]) -> Dict[str, bool]:
     """
     Validate that files expected to exist based on fingerprint actually exist.
@@ -492,17 +652,27 @@ def _handle_missing_expected_files(
                 return SyncDecision(
                     operation='auto-deps',
                     reason='Code file missing, prompt has dependencies - regenerate from auto-deps',
-                    details={'missing_files': missing_files, 'prompt_path': str(paths['prompt'])},
-                    estimated_cost=0.5,
-                    confidence=0.85
+                    confidence=1.0,
+                    estimated_cost=estimate_operation_cost('auto-deps'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'missing_files': missing_files,
+                        'prompt_path': str(paths['prompt']),
+                        'has_dependencies': True
+                    }
                 )
             else:
                 return SyncDecision(
                     operation='generate',
                     reason='Code file missing - regenerate from prompt',
-                    details={'missing_files': missing_files, 'prompt_path': str(paths['prompt'])},
-                    estimated_cost=1.0,
-                    confidence=0.90
+                    confidence=1.0,
+                    estimated_cost=estimate_operation_cost('generate'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'missing_files': missing_files,
+                        'prompt_path': str(paths['prompt']),
+                        'has_dependencies': False
+                    }
                 )
     elif 'example' in missing_files and paths['code'].exists():
@@ -510,9 +680,13 @@ def _handle_missing_expected_files(
         return SyncDecision(
             operation='example',
             reason='Example file missing - regenerate example',
-            details={'missing_files': missing_files, 'code_path': str(paths['code'])},
-            estimated_cost=0.5,
-            confidence=0.85
+            confidence=1.0,
+            estimated_cost=estimate_operation_cost('example'),
+            details={
+                'decision_type': 'heuristic',
+                'missing_files': missing_files,
+                'code_path': str(paths['code'])
+            }
         )
     elif 'test' in missing_files and paths['code'].exists() and paths['example'].exists():
@@ -522,47 +696,137 @@ def _handle_missing_expected_files(
             return SyncDecision(
                 operation='nothing',
                 reason='Test file missing but --skip-tests specified - workflow complete',
-                details={'missing_files': missing_files, 'skip_tests': True},
-                estimated_cost=0.0,
-                confidence=1.0
+                confidence=1.0,
+                estimated_cost=estimate_operation_cost('nothing'),
+                details={
+                    'decision_type': 'heuristic',
+                    'missing_files': missing_files,
+                    'skip_tests': True
+                }
             )
         else:
             return SyncDecision(
                 operation='test',
                 reason='Test file missing - regenerate tests',
-                details={'missing_files': missing_files, 'code_path': str(paths['code'])},
-                estimated_cost=1.0,
-                confidence=0.85
+                confidence=1.0,
+                estimated_cost=estimate_operation_cost('test'),
+                details={
+                    'decision_type': 'heuristic',
+                    'missing_files': missing_files,
+                    'code_path': str(paths['code'])
+                }
             )
     # Fallback - regenerate everything
     return SyncDecision(
         operation='generate',
         reason='Multiple files missing - regenerate from prompt',
-        details={'missing_files': missing_files},
-        estimated_cost=2.0,
-        confidence=0.80
+        confidence=1.0,
+        estimated_cost=estimate_operation_cost('generate'),
+        details={
+            'decision_type': 'heuristic',
+            'missing_files': missing_files
+        }
     )
-def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False) -> bool:
+def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False,
+                          basename: str = None, language: str = None) -> bool:
     """
     Check if workflow is complete considering skip flags.
     Args:
         paths: Dict mapping file types to their expected Path objects
         skip_tests: If True, test files are not required for completion
         skip_verify: If True, verification operations are not required
+        basename: Module basename (required for run_report check)
+        language: Module language (required for run_report check)
     Returns:
-        True if all required files exist for the current workflow configuration
+        True if all required files exist AND have been validated (run_report exists)
     """
     required_files = ['code', 'example']
     if not skip_tests:
         required_files.append('test')
-    return all(paths[f].exists() for f in required_files)
+    # Check all required files exist
+    if not all(paths[f].exists() for f in required_files):
+        return False
+    # Also check that run_report exists and code works (exit_code == 0)
+    # Without this, newly generated code would incorrectly be marked as "complete"
+    if basename and language:
+        run_report = read_run_report(basename, language)
+        if not run_report or run_report.exit_code != 0:
+            return False
+        # Check that run_report corresponds to current test files (staleness detection)
+        # If any test file changed since run_report was created, we can't trust the results
+        if not skip_tests:
+            # Bug #156: Check ALL test files, not just the primary one
+            if 'test_files' in paths and run_report.test_files:
+                # New multi-file comparison
+                current_test_hashes = {
+                    f.name: calculate_sha256(f)
+                    for f in paths['test_files']
+                    if f.exists()
+                }
+                stored_test_hashes = run_report.test_files
+                # Check if any test file changed or new ones added/removed
+                if set(current_test_hashes.keys()) != set(stored_test_hashes.keys()):
+                    return False  # Test files added or removed
+                for fname, current_hash in current_test_hashes.items():
+                    if stored_test_hashes.get(fname) != current_hash:
+                        return False  # Test file content changed
+            elif 'test' in paths and paths['test'].exists():
+                # Backward compat: single file check
+                current_test_hash = calculate_sha256(paths['test'])
+                if run_report.test_hash and current_test_hash != run_report.test_hash:
+                    # run_report was created for a different version of the test file
+                    return False
+                if not run_report.test_hash:
+                    # Legacy run_report without test_hash - check fingerprint timestamp as fallback
+                    fingerprint = read_fingerprint(basename, language)
+                    if fingerprint:
+                        # If fingerprint is newer than run_report, run_report might be stale
+                        from datetime import datetime
+                        try:
+                            fp_time = datetime.fromisoformat(fingerprint.timestamp.replace('Z', '+00:00'))
+                            rr_time = datetime.fromisoformat(run_report.timestamp.replace('Z', '+00:00'))
+                            if fp_time > rr_time:
+                                return False  # run_report predates fingerprint, might be stale
+                        except (ValueError, AttributeError):
+                            pass  # If timestamps can't be parsed, skip this check
+        # Check verify has been done (unless skip_verify)
+        # Without this, workflow would be "complete" after crash even though verify hasn't run
+        # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
+        if not skip_verify:
+            fingerprint = read_fingerprint(basename, language)
+            if fingerprint:
+                # If command starts with 'skip:', the operation was skipped, not completed
+                if fingerprint.command.startswith('skip:'):
+                    return False
+                if fingerprint.command not in ['verify', 'test', 'fix', 'update']:
+                    return False
+        # CRITICAL FIX: Check tests have been run (unless skip_tests)
+        # Without this, workflow would be "complete" after verify even though tests haven't run
+        # This prevents false positive success when skip_verify=True but tests are still required
+        # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
+        if not skip_tests:
+            fp = read_fingerprint(basename, language)
+            if fp:
+                # If command starts with 'skip:', the operation was skipped, not completed
+                if fp.command.startswith('skip:'):
+                    return False
+                if fp.command not in ['test', 'fix', 'update']:
+                    return False
+    return True
 def check_for_dependencies(prompt_content: str) -> bool:
@@ -594,7 +858,60 @@ def check_for_dependencies(prompt_content: str) -> bool:
     return has_xml_deps or has_explicit_deps
-def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
+def _check_example_success_history(basename: str, language: str) -> bool:
+    """
+    Check if the example has run successfully before by examining historical fingerprints and run reports.
+    Args:
+        basename: The base name for the PDD unit
+        language: The programming language
+    Returns:
+        True if the example has run successfully before, False otherwise
+    """
+    meta_dir = get_meta_dir()
+    # Strategy 1: Check if there's a fingerprint with 'verify' command (indicates successful example run)
+    # Cache fingerprint and run report to avoid redundant I/O operations
+    fingerprint = read_fingerprint(basename, language)
+    current_run_report = read_run_report(basename, language)
+    # Strategy 1: Check if there's a fingerprint with 'verify' command (indicates successful example run)
+    if fingerprint and fingerprint.command == 'verify':
+        return True
+    # Strategy 2: Check current run report for successful runs (exit_code == 0)
+    # Note: We check the current run report for successful history since it's updated
+    # This allows for a simple check of recent success
+    if current_run_report and current_run_report.exit_code == 0:
+        return True
+    # Strategy 2b: Look for historical run reports with exit_code == 0
+    # Check all run report files in the meta directory that match the pattern
+    run_report_pattern = f"{basename}_{language}_run"
+    for file in meta_dir.glob(f"{run_report_pattern}*.json"):
+        try:
+            with open(file, 'r') as f:
+                data = json.load(f)
+            # If we find any historical run with exit_code == 0, the example has run successfully
+            if data.get('exit_code') == 0:
+                return True
+        except (json.JSONDecodeError, KeyError, IOError):
+            continue
+    # Strategy 3: Check if fingerprint has example_hash and was created after successful operations
+    # Commands that indicate example was working: 'example', 'verify', 'test', 'fix'
+    if fingerprint and fingerprint.example_hash:
+        successful_commands = {'example', 'verify', 'test', 'fix'}
+        if fingerprint.command in successful_commands:
+            # If the fingerprint was created after these commands, the example likely worked
+            return True
+    return False
+def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
     """
     Core decision-making function for sync operations with skip flag awareness.
@@ -614,14 +931,14 @@ def sync_determine_operation(basename: str, language: str, target_coverage: floa
     if log_mode:
         # Skip locking for read-only analysis
-        return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
+        return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
     else:
         # Normal exclusive locking for actual operations
         with SyncLock(basename, language) as lock:
-            return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
+            return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
-def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
+def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
     """
     Perform the sync state analysis without locking concerns.
@@ -650,37 +967,149 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     # Read fingerprint early since we need it for crash verification
     fingerprint = read_fingerprint(basename, language)
+    # Check if auto-deps just completed - ALWAYS regenerate code after auto-deps
+    # This must be checked early, before any run_report processing, because:
+    # 1. Old run_report (if exists) is stale and should be ignored
+    # 2. auto-deps updates dependencies but doesn't regenerate code
+    if fingerprint and fingerprint.command == 'auto-deps':
+        paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+        return SyncDecision(
+            operation='generate',
+            reason='Auto-deps completed - regenerate code with updated prompt',
+            confidence=0.90,
+            estimated_cost=estimate_operation_cost('generate'),
+            details={
+                'decision_type': 'heuristic',
+                'previous_command': 'auto-deps',
+                'code_exists': paths['code'].exists() if paths.get('code') else False,
+                'regenerate_after_autodeps': True
+            }
+        )
     run_report = read_run_report(basename, language)
-    if run_report:
-        # Check test failures first (higher priority than exit code)
-        if run_report.tests_failed > 0:
+    # Only process runtime signals (crash/fix/test) if we have a fingerprint
+    # Without a fingerprint, run_report is stale/orphaned and should be ignored
+    if run_report and fingerprint:
+        # Check for prompt changes FIRST - prompt changes take priority over runtime signals
+        # If the user modified the prompt, we need to regenerate regardless of runtime state
+        if fingerprint:
+            paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+            current_prompt_hash = calculate_sha256(paths['prompt'])
+            if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
+                prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
+                has_deps = check_for_dependencies(prompt_content)
+                return SyncDecision(
+                    operation='auto-deps' if has_deps else 'generate',
+                    reason='Prompt changed - regenerating (takes priority over runtime signals)',
+                    confidence=0.95,
+                    estimated_cost=estimate_operation_cost('generate'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'prompt_changed': True,
+                        'previous_command': fingerprint.command,
+                        'runtime_state_ignored': True
+                    }
+                )
+        # Check if we just completed a crash operation and need verification FIRST
+        # This takes priority over test failures because we need to verify the crash fix worked
+        # BUT only proceed to verify if exit_code == 0 (crash fix succeeded)
+        if fingerprint and fingerprint.command == 'crash' and not skip_verify:
+            if run_report.exit_code != 0:
+                # Crash fix didn't work - need to re-run crash
+                return SyncDecision(
+                    operation='crash',
+                    reason=f'Previous crash operation failed (exit_code={run_report.exit_code}) - retry crash fix',
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'previous_command': 'crash',
+                        'exit_code': run_report.exit_code,
+                        'workflow_stage': 'crash_retry'
+                    }
+                )
             return SyncDecision(
-                operation='fix',
-                reason=f'Test failures detected: {run_report.tests_failed} failed tests',
-                details={'tests_failed': run_report.tests_failed},
-                estimated_cost=1.5,
-                confidence=0.90
+                operation='verify',
+                reason='Previous crash operation completed - verify example runs correctly',
+                confidence=0.90,
+                estimated_cost=estimate_operation_cost('verify'),
+                details={
+                    'decision_type': 'heuristic',
+                    'previous_command': 'crash',
+                    'current_exit_code': run_report.exit_code,
+                    'fingerprint_command': fingerprint.command
+                }
             )
+        # Check test failures (after crash verification check)
+        if run_report.tests_failed > 0:
+            # First check if the test file actually exists
+            pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+            test_file = pdd_files.get('test')
+            # Only suggest 'fix' if test file exists
+            if test_file and test_file.exists():
+                return SyncDecision(
+                    operation='fix',
+                    reason=f'Test failures detected: {run_report.tests_failed} failed tests',
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('fix'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'tests_failed': run_report.tests_failed,
+                        'exit_code': run_report.exit_code,
+                        'coverage': run_report.coverage
+                    }
+                )
+            # If test file doesn't exist but we have test failures in run report,
+            # we need to generate the test first
+            else:
+                return SyncDecision(
+                    operation='test',
+                    reason='Test failures reported but test file missing - need to generate tests',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'run_report_shows_failures': True,
+                        'test_file_exists': False
+                    }
+                )
         # Then check for runtime crashes (only if no test failures)
         if run_report.exit_code != 0:
-            # Check if this was from a crash fix that needs verification
-            if fingerprint and fingerprint.command == 'crash':
+            # Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
+            has_example_run_successfully = _check_example_success_history(basename, language)
+            if has_example_run_successfully:
                 return SyncDecision(
-                    operation='verify',
-                    reason='Previous crash was fixed - verify example runs correctly',
-                    details={'previous_command': 'crash', 'previous_exit_code': run_report.exit_code},
-                    estimated_cost=0.7,
-                    confidence=0.90
+                    operation='fix',
+                    reason='Runtime error detected but example has run successfully before - prefer fix over crash',
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('fix'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'exit_code': run_report.exit_code,
+                        'timestamp': run_report.timestamp,
+                        'example_success_history': True,
+                        'decision_rationale': 'prefer_fix_over_crash'
+                    }
                 )
             else:
                 return SyncDecision(
                     operation='crash',
-                    reason='Runtime error detected in last run',
-                    details={'exit_code': run_report.exit_code},
-                    estimated_cost=2.0,
-                    confidence=0.95
+                    reason='Runtime error detected in last run - no successful example history',
+                    confidence=0.95,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'exit_code': run_report.exit_code,
+                        'timestamp': run_report.timestamp,
+                        'example_success_history': False,
+                        'decision_rationale': 'crash_without_history'
+                    }
                 )
         if run_report.coverage < target_coverage:
@@ -690,21 +1119,50 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                 return SyncDecision(
                     operation='all_synced',
                     reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% but tests skipped',
-                    details={'current_coverage': run_report.coverage, 'target_coverage': target_coverage, 'tests_skipped': True},
-                    estimated_cost=0.0,
-                    confidence=0.90
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('all_synced'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'current_coverage': run_report.coverage,
+                        'target_coverage': target_coverage,
+                        'tests_skipped': True,
+                        'skip_tests': True
+                    }
+                )
+            elif run_report.tests_failed == 0 and run_report.tests_passed > 0:
+                # Tests pass but coverage is below target
+                # Return 'test_extend' to signal we need to ADD more tests, not regenerate
+                return SyncDecision(
+                    operation='test_extend',
+                    reason=f'Tests pass ({run_report.tests_passed} passed) but coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% - extending tests',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'current_coverage': run_report.coverage,
+                        'target_coverage': target_coverage,
+                        'tests_passed': run_report.tests_passed,
+                        'tests_failed': run_report.tests_failed,
+                        'extend_tests': True
+                    }
                 )
             else:
                 return SyncDecision(
                     operation='test',
                     reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}%',
-                    details={'current_coverage': run_report.coverage, 'target_coverage': target_coverage},
-                    estimated_cost=1.0,
-                    confidence=0.85
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'current_coverage': run_report.coverage,
+                        'target_coverage': target_coverage,
+                        'tests_passed': run_report.tests_passed,
+                        'tests_failed': run_report.tests_failed
+                    }
                 )
     # 2. Analyze File State
-    paths = get_pdd_file_paths(basename, language, prompts_dir)
+    paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
     current_hashes = calculate_current_hashes(paths)
     # 3. Implement the Decision Tree
@@ -716,25 +1174,39 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                 return SyncDecision(
                     operation='auto-deps',
                     reason='New prompt with dependencies detected',
-                    details={'prompt_path': str(paths['prompt'])},
-                    estimated_cost=0.5,
-                    confidence=0.80
+                    confidence=0.80,
+                    estimated_cost=estimate_operation_cost('auto-deps'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'prompt_path': str(paths['prompt']),
+                        'fingerprint_found': False,
+                        'has_dependencies': True
+                    }
                 )
             else:
                 return SyncDecision(
                     operation='generate',
                     reason='New prompt ready for code generation',
-                    details={'prompt_path': str(paths['prompt'])},
-                    estimated_cost=1.0,
-                    confidence=0.90
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('generate'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'prompt_path': str(paths['prompt']),
+                        'fingerprint_found': False,
+                        'has_dependencies': False
+                    }
                 )
         else:
             return SyncDecision(
                 operation='nothing',
                 reason='No prompt file and no history - nothing to do',
-                details={},
-                estimated_cost=0.0,
-                confidence=1.0
+                confidence=1.0,
+                estimated_cost=estimate_operation_cost('nothing'),
+                details={
+                    'decision_type': 'heuristic',
+                    'prompt_exists': False,
+                    'fingerprint_found': False
+                }
             )
     # CRITICAL FIX: Validate expected files exist before hash comparison
@@ -767,54 +1239,203 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     if not changes:
         # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
-        if _is_workflow_complete(paths, skip_tests, skip_verify):
+        if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
             return SyncDecision(
                 operation='nothing',
                 reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
-                details={'skip_tests': skip_tests, 'skip_verify': skip_verify},
-                estimated_cost=0.0,
-                confidence=1.0
+                confidence=1.0,
+                estimated_cost=estimate_operation_cost('nothing'),
+                details={
+                    'decision_type': 'heuristic',
+                    'skip_tests': skip_tests,
+                    'skip_verify': skip_verify,
+                    'workflow_complete': True
+                }
             )
+        # Handle incomplete workflow when all files exist (including test)
+        # This addresses the blind spot where crash/verify/test logic only runs when test is missing
+        if (paths['code'].exists() and paths['example'].exists() and paths['test'].exists()):
+            run_report = read_run_report(basename, language)
+            # BUG 4 & 1: No run_report OR crash detected (exit_code != 0)
+            if not run_report or run_report.exit_code != 0:
+                return SyncDecision(
+                    operation='crash',
+                    reason='All files exist but needs validation' +
+                           (' - no run_report' if not run_report else f' - exit_code={run_report.exit_code}'),
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'all_files_exist': True,
+                        'run_report_missing': not run_report,
+                        'exit_code': None if not run_report else run_report.exit_code,
+                        'workflow_stage': 'post_regeneration_validation'
+                    }
+                )
+            # BUG 2: Verify not run yet (run_report exists, exit_code=0, but command != verify/test)
+            if fingerprint and fingerprint.command not in ['verify', 'test', 'fix', 'update'] and not skip_verify:
+                return SyncDecision(
+                    operation='verify',
+                    reason='All files exist but verification not completed',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'all_files_exist': True,
+                        'last_command': fingerprint.command,
+                        'workflow_stage': 'verification_pending'
+                    }
+                )
+            # Stale run_report detected: _is_workflow_complete returned False but all other conditions passed
+            # This happens when run_report.test_hash doesn't match current test file, or
+            # when fingerprint timestamp > run_report timestamp (legacy detection)
+            # Need to re-run tests to get accurate results
+            if run_report and run_report.exit_code == 0:
+                return SyncDecision(
+                    operation='test',
+                    reason='Run report is stale - need to re-run tests to verify current state',
+                    confidence=0.9,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'all_files_exist': True,
+                        'run_report_stale': True,
+                        'run_report_test_hash': run_report.test_hash,
+                        'workflow_stage': 'revalidation'
+                    }
+                )
         # Progress workflow considering skip flags
         if paths['code'].exists() and not paths['example'].exists():
             return SyncDecision(
                 operation='example',
                 reason='Code exists but example missing - progress workflow',
-                details={'code_path': str(paths['code'])},
-                estimated_cost=0.5,
-                confidence=0.85
+                confidence=0.85,
+                estimated_cost=estimate_operation_cost('example'),
+                details={
+                    'decision_type': 'heuristic',
+                    'code_path': str(paths['code']),
+                    'code_exists': True,
+                    'example_exists': False
+                }
             )
         if (paths['code'].exists() and paths['example'].exists() and
             not skip_tests and not paths['test'].exists()):
-            return SyncDecision(
-                operation='test',
-                reason='Code and example exist but test missing - progress workflow',
-                details={'code_path': str(paths['code']), 'example_path': str(paths['example'])},
-                estimated_cost=1.0,
-                confidence=0.85
-            )
+            # Check if example has been crash-tested and verified before allowing test generation
+            run_report = read_run_report(basename, language)
+            if not run_report and not skip_verify:
+                # No run report exists - need to test the example first
+                # But if skip_verify is True, skip crash/verify and go to test generation
+                return SyncDecision(
+                    operation='crash',
+                    reason='Example exists but needs runtime testing before test generation',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'code_path': str(paths['code']),
+                        'example_path': str(paths['example']),
+                        'no_run_report': True,
+                        'workflow_stage': 'crash_validation'
+                    }
+                )
+            elif run_report and run_report.exit_code != 0 and not skip_verify:
+                # Example crashed - fix it before proceeding
+                # But if skip_verify is True, skip crash fix and proceed
+                return SyncDecision(
+                    operation='crash',
+                    reason='Example crashes - fix runtime errors before test generation',
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'exit_code': run_report.exit_code,
+                        'workflow_stage': 'crash_fix'
+                    }
+                )
+            elif fingerprint and fingerprint.command != 'verify' and not skip_verify:
+                # Example runs but hasn't been verified yet
+                return SyncDecision(
+                    operation='verify',
+                    reason='Example runs but needs verification before test generation',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'exit_code': run_report.exit_code,
+                        'last_command': fingerprint.command,
+                        'workflow_stage': 'verify_validation'
+                    }
+                )
+            else:
+                # Example runs and is verified (or verify is skipped) - now safe to generate tests
+                return SyncDecision(
+                    operation='test',
+                    reason='Example validated - ready for test generation',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'code_path': str(paths['code']),
+                        'example_path': str(paths['example']),
+                        'code_exists': True,
+                        'example_exists': True,
+                        'test_exists': False,
+                        'workflow_stage': 'test_generation'
+                    }
+                )
         # Some files are missing but no changes detected
         if not paths['code'].exists():
             if paths['prompt'].exists():
+                # CRITICAL FIX: Check if auto-deps was just completed to prevent infinite loop
+                if fingerprint and fingerprint.command == 'auto-deps':
+                    return SyncDecision(
+                        operation='generate',
+                        reason='Auto-deps completed, now generate missing code file',
+                        confidence=0.90,
+                        estimated_cost=estimate_operation_cost('generate'),
+                        details={
+                            'decision_type': 'heuristic',
+                            'prompt_path': str(paths['prompt']),
+                            'code_exists': False,
+                            'auto_deps_completed': True,
+                            'previous_command': fingerprint.command
+                        }
+                    )
                 prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
                 if check_for_dependencies(prompt_content):
                     return SyncDecision(
                         operation='auto-deps',
                         reason='Missing code file, prompt has dependencies',
-                        details={'prompt_path': str(paths['prompt'])},
-                        estimated_cost=0.5,
-                        confidence=0.80
+                        confidence=0.80,
+                        estimated_cost=estimate_operation_cost('auto-deps'),
+                        details={
+                            'decision_type': 'heuristic',
+                            'prompt_path': str(paths['prompt']),
+                            'code_exists': False,
+                            'has_dependencies': True
+                        }
                     )
                 else:
                     return SyncDecision(
                         operation='generate',
                         reason='Missing code file - generate from prompt',
-                        details={'prompt_path': str(paths['prompt'])},
-                        estimated_cost=1.0,
-                        confidence=0.90
+                        confidence=0.90,
+                        estimated_cost=estimate_operation_cost('generate'),
+                        details={
+                            'decision_type': 'heuristic',
+                            'prompt_path': str(paths['prompt']),
+                            'code_exists': False,
+                            'has_dependencies': False
+                        }
                     )
     elif len(changes) == 1:
@@ -827,67 +1448,156 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                 return SyncDecision(
                     operation='auto-deps',
                     reason='Prompt changed and dependencies need updating',
-                    details={'changed_file': 'prompt'},
-                    estimated_cost=0.5,
-                    confidence=0.85
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('auto-deps'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_file': 'prompt',
+                        'has_dependencies': True,
+                        'prompt_changed': True
+                    }
                 )
             else:
                 return SyncDecision(
                     operation='generate',
                     reason='Prompt changed - regenerate code',
-                    details={'changed_file': 'prompt'},
-                    estimated_cost=1.0,
-                    confidence=0.90
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('generate'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_file': 'prompt',
+                        'has_dependencies': False,
+                        'prompt_changed': True
+                    }
                 )
         elif change == 'code':
             return SyncDecision(
                 operation='update',
                 reason='Code changed - update prompt to reflect changes',
-                details={'changed_file': 'code'},
-                estimated_cost=0.8,
-                confidence=0.85
+                confidence=0.85,
+                estimated_cost=estimate_operation_cost('update'),
+                details={
+                    'decision_type': 'heuristic',
+                    'changed_file': 'code',
+                    'code_changed': True
+                }
             )
         elif change == 'test':
             return SyncDecision(
                 operation='test',
                 reason='Test changed - run new tests',
-                details={'changed_file': 'test'},
-                estimated_cost=0.5,
-                confidence=0.80
+                confidence=0.80,
+                estimated_cost=estimate_operation_cost('test'),
+                details={
+                    'decision_type': 'heuristic',
+                    'changed_file': 'test',
+                    'test_changed': True
+                }
             )
         elif change == 'example':
             return SyncDecision(
                 operation='verify',
                 reason='Example changed - verify new example',
-                details={'changed_file': 'example'},
-                estimated_cost=0.7,
-                confidence=0.80
+                confidence=0.80,
+                estimated_cost=estimate_operation_cost('verify'),
+                details={
+                    'decision_type': 'heuristic',
+                    'changed_file': 'example',
+                    'example_changed': True
+                }
             )
     else:
-        # Complex Changes (Multiple Files Modified / Conflicts)
-        return SyncDecision(
-            operation='analyze_conflict',
-            reason='Multiple files changed - requires conflict analysis',
-            details={'changed_files': changes},
-            estimated_cost=2.0,
-            confidence=0.70
-        )
+        # Complex Changes (Multiple Files Modified)
+        # CRITICAL: Only treat as conflict if prompt changed along with derived artifacts
+        # If only derived artifacts changed (code, example, test), this is NOT a conflict
+        # per PDD doctrine - all are derived from the unchanged prompt
+        if 'prompt' in changes:
+            # True conflict: prompt (source of truth) changed along with derived artifacts
+            return SyncDecision(
+                operation='analyze_conflict',
+                reason='Prompt and derived files changed - requires conflict analysis',
+                confidence=0.70,
+                estimated_cost=estimate_operation_cost('analyze_conflict'),
+                details={
+                    'decision_type': 'heuristic',
+                    'changed_files': changes,
+                    'num_changes': len(changes),
+                    'prompt_changed': True
+                }
+            )
+        else:
+            # Only derived artifacts changed - prompt (source of truth) is unchanged
+            # Continue workflow from where it was interrupted
+            # If code changed, need to re-verify
+            if 'code' in changes:
+                return SyncDecision(
+                    operation='verify',
+                    reason='Derived files changed (prompt unchanged) - verify code works',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_files': changes,
+                        'num_changes': len(changes),
+                        'prompt_changed': False,
+                        'workflow_stage': 'continue_after_interruption'
+                    }
+                )
+            # If only example/test changed
+            elif 'example' in changes:
+                return SyncDecision(
+                    operation='verify',
+                    reason='Example changed (prompt unchanged) - verify example runs',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_files': changes,
+                        'prompt_changed': False
+                    }
+                )
+            elif 'test' in changes:
+                return SyncDecision(
+                    operation='test',
+                    reason='Test changed (prompt unchanged) - run tests',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_files': changes,
+                        'prompt_changed': False
+                    }
+                )
     # Fallback - should not reach here normally
     return SyncDecision(
         operation='nothing',
         reason='No clear operation determined',
-        details={'fingerprint_exists': fingerprint is not None, 'changes': changes},
-        estimated_cost=0.0,
-        confidence=0.50
+        confidence=0.50,
+        estimated_cost=estimate_operation_cost('nothing'),
+        details={
+            'decision_type': 'heuristic',
+            'fingerprint_exists': fingerprint is not None,
+            'changes': changes,
+            'fallback': True
+        }
     )
-def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerprint, changed_files: List[str], prompts_dir: str = "prompts") -> SyncDecision:
+def analyze_conflict_with_llm(
+    basename: str,
+    language: str,
+    fingerprint: Fingerprint,
+    changed_files: List[str],
+    prompts_dir: str = "prompts",
+    context_override: Optional[str] = None,
+) -> SyncDecision:
     """
     Resolve complex sync conflicts using an LLM.
@@ -910,13 +1620,17 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
             return SyncDecision(
                 operation='fail_and_request_manual_merge',
                 reason='LLM analysis template not found - manual merge required',
-                details={'error': 'Template not available'},
-                estimated_cost=0.0,
-                confidence=0.0
+                confidence=0.0,
+                estimated_cost=estimate_operation_cost('fail_and_request_manual_merge'),
+                details={
+                    'decision_type': 'llm',
+                    'error': 'Template not available',
+                    'changed_files': changed_files
+                }
             )
         # 2. Gather file paths and diffs
-        paths = get_pdd_file_paths(basename, language, prompts_dir)
+        paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
         # Generate diffs for changed files
         diffs = {}
@@ -974,9 +1688,14 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
                 return SyncDecision(
                     operation='fail_and_request_manual_merge',
                     reason=f'LLM confidence too low ({confidence:.2f}) - manual merge required',
-                    details={'llm_response': llm_result, 'changed_files': changed_files},
+                    confidence=confidence,
                     estimated_cost=response.get('cost', 0.0),
-                    confidence=confidence
+                    details={
+                        'decision_type': 'llm',
+                        'llm_response': llm_result,
+                        'changed_files': changed_files,
+                        'confidence_threshold': 0.75
+                    }
                 )
             # Extract operation and details
@@ -988,14 +1707,15 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
             return SyncDecision(
                 operation=operation,
                 reason=f"LLM analysis: {reason}",
+                confidence=confidence,
+                estimated_cost=response.get('cost', 0.0),
                 details={
+                    'decision_type': 'llm',
                     'llm_response': llm_result,
                     'changed_files': changed_files,
                     'merge_strategy': merge_strategy,
                     'follow_up_operations': follow_up_operations
                 },
-                estimated_cost=response.get('cost', 0.0),
-                confidence=confidence,
                 prerequisites=follow_up_operations
             )
@@ -1004,9 +1724,15 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
             return SyncDecision(
                 operation='fail_and_request_manual_merge',
                 reason=f'Invalid LLM response: {e} - manual merge required',
-                details={'error': str(e), 'raw_response': response.get('result', ''), 'changed_files': changed_files},
+                confidence=0.0,
                 estimated_cost=response.get('cost', 0.0),
-                confidence=0.0
+                details={
+                    'decision_type': 'llm',
+                    'error': str(e),
+                    'raw_response': response.get('result', ''),
+                    'changed_files': changed_files,
+                    'llm_error': True
+                }
             )
     except Exception as e:
@@ -1014,22 +1740,28 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
         return SyncDecision(
             operation='fail_and_request_manual_merge',
             reason=f'Error during LLM analysis: {e} - manual merge required',
-            details={'error': str(e), 'changed_files': changed_files},
-            estimated_cost=0.0,
-            confidence=0.0
+            confidence=0.0,
+            estimated_cost=estimate_operation_cost('fail_and_request_manual_merge'),
+            details={
+                'decision_type': 'llm',
+                'error': str(e),
+                'changed_files': changed_files,
+                'llm_error': True
+            }
         )
 if __name__ == "__main__":
     # Example usage
-    if len(sys.argv) != 3:
-        print("Usage: python sync_determine_operation.py <basename> <language>")
+    if len(sys.argv) < 3 or len(sys.argv) > 4:
+        print("Usage: python sync_determine_operation.py <basename> <language> [target_coverage]")
         sys.exit(1)
     basename = sys.argv[1]
     language = sys.argv[2]
+    target_coverage = float(sys.argv[3]) if len(sys.argv) == 4 else 90.0
-    decision = sync_determine_operation(basename, language, target_coverage=90.0)
+    decision = sync_determine_operation(basename, language, target_coverage)
     print(f"Operation: {decision.operation}")
     print(f"Reason: {decision.reason}")

pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl

pdd-cli 0.0.42py3-none-any.whl → 0.0.90py3-none-any.whl