PyPI - pdd-cli - Versions diffs - 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

pdd-cli 0.0.45py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

pdd/__init__.py +4 -4
pdd/agentic_common.py +863 -0
pdd/agentic_crash.py +534 -0
pdd/agentic_fix.py +1179 -0
pdd/agentic_langtest.py +162 -0
pdd/agentic_update.py +370 -0
pdd/agentic_verify.py +183 -0
pdd/auto_deps_main.py +15 -5
pdd/auto_include.py +63 -5
pdd/bug_main.py +3 -2
pdd/bug_to_unit_test.py +2 -0
pdd/change_main.py +11 -4
pdd/cli.py +22 -1181
pdd/cmd_test_main.py +73 -21
pdd/code_generator.py +58 -18
pdd/code_generator_main.py +672 -25
pdd/commands/__init__.py +42 -0
pdd/commands/analysis.py +248 -0
pdd/commands/fix.py +140 -0
pdd/commands/generate.py +257 -0
pdd/commands/maintenance.py +174 -0
pdd/commands/misc.py +79 -0
pdd/commands/modify.py +230 -0
pdd/commands/report.py +144 -0
pdd/commands/templates.py +215 -0
pdd/commands/utility.py +110 -0
pdd/config_resolution.py +58 -0
pdd/conflicts_main.py +8 -3
pdd/construct_paths.py +258 -82
pdd/context_generator.py +10 -2
pdd/context_generator_main.py +113 -11
pdd/continue_generation.py +47 -7
pdd/core/__init__.py +0 -0
pdd/core/cli.py +503 -0
pdd/core/dump.py +554 -0
pdd/core/errors.py +63 -0
pdd/core/utils.py +90 -0
pdd/crash_main.py +44 -11
pdd/data/language_format.csv +71 -63
pdd/data/llm_model.csv +20 -18
pdd/detect_change_main.py +5 -4
pdd/fix_code_loop.py +330 -76
pdd/fix_error_loop.py +207 -61
pdd/fix_errors_from_unit_tests.py +4 -3
pdd/fix_main.py +75 -18
pdd/fix_verification_errors.py +12 -100
pdd/fix_verification_errors_loop.py +306 -272
pdd/fix_verification_main.py +28 -9
pdd/generate_output_paths.py +93 -10
pdd/generate_test.py +16 -5
pdd/get_jwt_token.py +9 -2
pdd/get_run_command.py +73 -0
pdd/get_test_command.py +68 -0
pdd/git_update.py +70 -19
pdd/incremental_code_generator.py +2 -2
pdd/insert_includes.py +11 -3
pdd/llm_invoke.py +1269 -103
pdd/load_prompt_template.py +36 -10
pdd/pdd_completion.fish +25 -2
pdd/pdd_completion.sh +30 -4
pdd/pdd_completion.zsh +79 -4
pdd/postprocess.py +10 -3
pdd/preprocess.py +228 -15
pdd/preprocess_main.py +8 -5
pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
pdd/prompts/agentic_update_LLM.prompt +1071 -0
pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
pdd/prompts/auto_include_LLM.prompt +100 -905
pdd/prompts/detect_change_LLM.prompt +122 -20
pdd/prompts/example_generator_LLM.prompt +22 -1
pdd/prompts/extract_code_LLM.prompt +5 -1
pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
pdd/prompts/extract_promptline_LLM.prompt +17 -11
pdd/prompts/find_verification_errors_LLM.prompt +6 -0
pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
pdd/prompts/generate_test_LLM.prompt +21 -6
pdd/prompts/increase_tests_LLM.prompt +1 -5
pdd/prompts/insert_includes_LLM.prompt +228 -108
pdd/prompts/trace_LLM.prompt +25 -22
pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
pdd/prompts/update_prompt_LLM.prompt +22 -1
pdd/pytest_output.py +127 -12
pdd/render_mermaid.py +236 -0
pdd/setup_tool.py +648 -0
pdd/simple_math.py +2 -0
pdd/split_main.py +3 -2
pdd/summarize_directory.py +49 -6
pdd/sync_determine_operation.py +543 -98
pdd/sync_main.py +81 -31
pdd/sync_orchestration.py +1334 -751
pdd/sync_tui.py +848 -0
pdd/template_registry.py +264 -0
pdd/templates/architecture/architecture_json.prompt +242 -0
pdd/templates/generic/generate_prompt.prompt +174 -0
pdd/trace.py +168 -12
pdd/trace_main.py +4 -3
pdd/track_cost.py +151 -61
pdd/unfinished_prompt.py +49 -3
pdd/update_main.py +549 -67
pdd/update_model_costs.py +2 -2
pdd/update_prompt.py +19 -4
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
pdd_cli-0.0.90.dist-info/RECORD +153 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
pdd_cli-0.0.45.dist-info/RECORD +0 -116
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0

pdd/sync_determine_operation.py CHANGED Viewed

@@ -69,7 +69,8 @@ class Fingerprint:
     prompt_hash: Optional[str]
     code_hash: Optional[str]
     example_hash: Optional[str]
-    test_hash: Optional[str]
+    test_hash: Optional[str]  # Keep for backward compat (primary test file)
+    test_files: Optional[Dict[str, str]] = None  # Bug #156: {"test_foo.py": "hash1", ...}
 @dataclass
@@ -80,6 +81,8 @@ class RunReport:
     tests_passed: int
     tests_failed: int
     coverage: float
+    test_hash: Optional[str] = None  # Hash of test file when tests were run (for staleness detection)
+    test_files: Optional[Dict[str, str]] = None  # Bug #156: {"test_foo.py": "hash1", ...}
 @dataclass
@@ -209,51 +212,131 @@ def get_extension(language: str) -> str:
     return extensions.get(language.lower(), language.lower())
-def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
+def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts", context_override: Optional[str] = None) -> Dict[str, Path]:
     """Returns a dictionary mapping file types to their expected Path objects."""
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.info(f"get_pdd_file_paths called: basename={basename}, language={language}, prompts_dir={prompts_dir}")
     try:
         # Use construct_paths to get configuration-aware paths
         prompt_filename = f"{basename}_{language}.prompt"
         prompt_path = str(Path(prompts_dir) / prompt_filename)
+        logger.info(f"Checking prompt_path={prompt_path}, exists={Path(prompt_path).exists()}")
-        # Check if prompt file exists - if not, we can't proceed with construct_paths
+        # Check if prompt file exists - if not, we still need configuration-aware paths
         if not Path(prompt_path).exists():
-            # Fall back to default path construction if prompt doesn't exist
+            # Use construct_paths with minimal inputs to get configuration-aware paths
+            # even when prompt doesn't exist
             extension = get_extension(language)
-            return {
-                'prompt': Path(prompt_path),
-                'code': Path(f"{basename}.{extension}"),
-                'example': Path(f"{basename}_example.{extension}"),
-                'test': Path(f"test_{basename}.{extension}")
-            }
+            try:
+                # Call construct_paths with empty input_file_paths to get configured output paths
+                resolved_config, _, output_paths, _ = construct_paths(
+                    input_file_paths={},  # Empty dict since files don't exist yet
+                    force=True,
+                    quiet=True,
+                    command="sync",
+                    command_options={"basename": basename, "language": language},
+                    context_override=context_override
+                )
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.info(f"resolved_config: {resolved_config}")
+                logger.info(f"output_paths: {output_paths}")
+                # Extract directory configuration from resolved_config
+                # Note: construct_paths sets tests_dir, examples_dir, code_dir keys
+                test_dir = resolved_config.get('tests_dir', 'tests/')
+                example_dir = resolved_config.get('examples_dir', 'examples/')
+                code_dir = resolved_config.get('code_dir', './')
+                logger.info(f"Extracted dirs - test: {test_dir}, example: {example_dir}, code: {code_dir}")
+                # Ensure directories end with /
+                if test_dir and not test_dir.endswith('/'):
+                    test_dir = test_dir + '/'
+                if example_dir and not example_dir.endswith('/'):
+                    example_dir = example_dir + '/'
+                if code_dir and not code_dir.endswith('/'):
+                    code_dir = code_dir + '/'
+                # Construct the full paths
+                test_path = f"{test_dir}test_{basename}.{extension}"
+                example_path = f"{example_dir}{basename}_example.{extension}"
+                code_path = f"{code_dir}{basename}.{extension}"
+                logger.debug(f"Final paths: test={test_path}, example={example_path}, code={code_path}")
+                # Convert to Path objects
+                test_path = Path(test_path)
+                example_path = Path(example_path)
+                code_path = Path(code_path)
+                # Bug #156: Find all matching test files
+                test_dir_path = test_path.parent
+                test_stem = f"test_{basename}"
+                if test_dir_path.exists():
+                    matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
+                else:
+                    matching_test_files = [test_path] if test_path.exists() else []
+                result = {
+                    'prompt': Path(prompt_path),
+                    'code': code_path,
+                    'example': example_path,
+                    'test': test_path,
+                    'test_files': matching_test_files or [test_path]  # Bug #156
+                }
+                logger.debug(f"get_pdd_file_paths returning (prompt missing): test={test_path}")
+                return result
+            except Exception as e:
+                # If construct_paths fails, fall back to current directory paths
+                # This maintains backward compatibility
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.debug(f"construct_paths failed for non-existent prompt, using defaults: {e}")
+                fallback_test_path = Path(f"test_{basename}.{extension}")
+                # Bug #156: Find matching test files even in fallback
+                if Path('.').exists():
+                    fallback_matching = sorted(Path('.').glob(f"test_{basename}*.{extension}"))
+                else:
+                    fallback_matching = [fallback_test_path] if fallback_test_path.exists() else []
+                return {
+                    'prompt': Path(prompt_path),
+                    'code': Path(f"{basename}.{extension}"),
+                    'example': Path(f"{basename}_example.{extension}"),
+                    'test': fallback_test_path,
+                    'test_files': fallback_matching or [fallback_test_path]  # Bug #156
+                }
         input_file_paths = {
             "prompt_file": prompt_path
         }
-        # Only call construct_paths if the prompt file exists
+        # Call construct_paths to get configuration-aware paths
         resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
             input_file_paths=input_file_paths,
             force=True,  # Use force=True to avoid interactive prompts during sync
             quiet=True,
-            command="generate",
-            command_options={}
+            command="sync",  # Use sync command to get more tolerant path handling
+            command_options={"basename": basename, "language": language},
+            context_override=context_override
         )
-        # Extract paths from config as specified in the spec
-        # The spec shows: return { 'prompt': Path(config['prompt_file']), ... }
-        # But we need to map the output_file_paths keys to our expected structure
-        # For generate command, construct_paths returns these in output_file_paths:
-        # - 'output' or 'code_file' for the generated code
-        # For other commands, we need to construct the full set of paths
-        # Get the code file path from output_file_paths
-        code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
+        # For sync command, output_file_paths contains the configured paths
+        # Extract the code path from output_file_paths
+        code_path = output_file_paths.get('generate_output_path', '')
+        if not code_path:
+            # Try other possible keys
+            code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
         if not code_path:
-            # Fallback to constructing from basename
+            # Fallback to constructing from basename with configuration
             extension = get_extension(language)
-            code_path = f"{basename}.{extension}"
+            code_dir = resolved_config.get('generate_output_path', './')
+            if code_dir and not code_dir.endswith('/'):
+                code_dir = code_dir + '/'
+            code_path = f"{code_dir}{basename}.{extension}"
         # Get configured paths for example and test files using construct_paths
         # Note: construct_paths requires files to exist, so we need to handle the case
@@ -269,18 +352,27 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
             try:
                 # Get example path using example command
+                # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
                 _, _, example_output_paths, _ = construct_paths(
                     input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
-                    force=True, quiet=True, command="example", command_options={}
+                    force=True, quiet=True, command="example", command_options={},
+                    context_override=context_override,
+                    path_resolution_mode="cwd"
                 )
                 example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
-                # Get test path using test command
-                _, _, test_output_paths, _ = construct_paths(
-                    input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
-                    force=True, quiet=True, command="test", command_options={}
-                )
-                test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                # Get test path using test command - handle case where test file doesn't exist yet
+                try:
+                    _, _, test_output_paths, _ = construct_paths(
+                        input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
+                        force=True, quiet=True, command="test", command_options={},
+                        context_override=context_override,
+                        path_resolution_mode="cwd"
+                    )
+                    test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                except FileNotFoundError:
+                    # Test file doesn't exist yet - create default path
+                    test_path = Path(f"test_{basename}.{get_extension(language)}")
             finally:
                 # Clean up temporary file if we created it
@@ -298,17 +390,26 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
             # Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
             try:
                 # Get configured directories by using construct_paths with just the prompt file
+                # Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
                 _, _, example_output_paths, _ = construct_paths(
                     input_file_paths={"prompt_file": prompt_path},
-                    force=True, quiet=True, command="example", command_options={}
+                    force=True, quiet=True, command="example", command_options={},
+                    context_override=context_override,
+                    path_resolution_mode="cwd"
                 )
                 example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
-                _, _, test_output_paths, _ = construct_paths(
-                    input_file_paths={"prompt_file": prompt_path},
-                    force=True, quiet=True, command="test", command_options={}
-                )
-                test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                try:
+                    _, _, test_output_paths, _ = construct_paths(
+                        input_file_paths={"prompt_file": prompt_path},
+                        force=True, quiet=True, command="test", command_options={},
+                        context_override=context_override,
+                        path_resolution_mode="cwd"
+                    )
+                    test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
+                except Exception:
+                    # If test path construction fails, use default naming
+                    test_path = Path(f"test_{basename}.{get_extension(language)}")
             except Exception:
                 # Final fallback to deriving from code path if all else fails
@@ -319,21 +420,47 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
                 example_path = code_dir / f"{code_stem}_example{code_ext}"
                 test_path = code_dir / f"test_{code_stem}{code_ext}"
+        # Ensure all paths are Path objects
+        if isinstance(code_path, str):
+            code_path = Path(code_path)
+        # Keep paths as they are (absolute or relative as returned by construct_paths)
+        # This ensures consistency with how construct_paths expects them
+        # Bug #156: Find all matching test files
+        test_dir = test_path.parent
+        test_stem = f"test_{basename}"
+        extension = get_extension(language)
+        if test_dir.exists():
+            matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
+        else:
+            matching_test_files = [test_path] if test_path.exists() else []
         return {
             'prompt': Path(prompt_path),
-            'code': Path(code_path),
+            'code': code_path,
             'example': example_path,
-            'test': test_path
+            'test': test_path,
+            'test_files': matching_test_files or [test_path]  # Bug #156: All matching test files
         }
     except Exception as e:
         # Fallback to simple naming if construct_paths fails
         extension = get_extension(language)
+        test_path = Path(f"test_{basename}.{extension}")
+        # Bug #156: Try to find matching test files even in fallback
+        test_dir = Path('.')
+        test_stem = f"test_{basename}"
+        if test_dir.exists():
+            matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
+        else:
+            matching_test_files = [test_path] if test_path.exists() else []
         return {
             'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
             'code': Path(f"{basename}.{extension}"),
             'example': Path(f"{basename}_example.{extension}"),
-            'test': Path(f"test_{basename}.{extension}")
+            'test': test_path,
+            'test_files': matching_test_files or [test_path]  # Bug #156: All matching test files
         }
@@ -372,7 +499,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
             prompt_hash=data.get('prompt_hash'),
             code_hash=data.get('code_hash'),
             example_hash=data.get('example_hash'),
-            test_hash=data.get('test_hash')
+            test_hash=data.get('test_hash'),
+            test_files=data.get('test_files')  # Bug #156
         )
     except (json.JSONDecodeError, KeyError, IOError):
         return None
@@ -396,19 +524,29 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
             exit_code=data['exit_code'],
             tests_passed=data['tests_passed'],
             tests_failed=data['tests_failed'],
-            coverage=data['coverage']
+            coverage=data['coverage'],
+            test_hash=data.get('test_hash'),  # Optional for backward compatibility
+            test_files=data.get('test_files')  # Bug #156
         )
     except (json.JSONDecodeError, KeyError, IOError):
         return None
-def calculate_current_hashes(paths: Dict[str, Path]) -> Dict[str, Optional[str]]:
+def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
     """Computes the hashes for all current files on disk."""
     # Return hash keys that match what the fingerprint expects
-    return {
-        f"{file_type}_hash": calculate_sha256(file_path)
-        for file_type, file_path in paths.items()
-    }
+    hashes = {}
+    for file_type, file_path in paths.items():
+        if file_type == 'test_files':
+            # Bug #156: Calculate hashes for all test files
+            hashes['test_files'] = {
+                f.name: calculate_sha256(f)
+                for f in file_path
+                if isinstance(f, Path) and f.exists()
+            }
+        elif isinstance(file_path, Path):
+            hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
+    return hashes
 def get_git_diff(file_path: Path) -> str:
@@ -438,6 +576,7 @@ def estimate_operation_cost(operation: str, language: str = "python") -> float:
         'crash': 0.40,
         'verify': 0.35,
         'test': 0.60,
+        'test_extend': 0.60,  # Same cost as test - generates additional tests
         'fix': 0.45,
         'update': 0.25,
         'analyze_conflict': 0.20,
@@ -591,24 +730,103 @@ def _handle_missing_expected_files(
     )
-def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False) -> bool:
+def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False,
+                          basename: str = None, language: str = None) -> bool:
     """
     Check if workflow is complete considering skip flags.
     Args:
         paths: Dict mapping file types to their expected Path objects
         skip_tests: If True, test files are not required for completion
         skip_verify: If True, verification operations are not required
+        basename: Module basename (required for run_report check)
+        language: Module language (required for run_report check)
     Returns:
-        True if all required files exist for the current workflow configuration
+        True if all required files exist AND have been validated (run_report exists)
     """
     required_files = ['code', 'example']
     if not skip_tests:
         required_files.append('test')
-    return all(paths[f].exists() for f in required_files)
+    # Check all required files exist
+    if not all(paths[f].exists() for f in required_files):
+        return False
+    # Also check that run_report exists and code works (exit_code == 0)
+    # Without this, newly generated code would incorrectly be marked as "complete"
+    if basename and language:
+        run_report = read_run_report(basename, language)
+        if not run_report or run_report.exit_code != 0:
+            return False
+        # Check that run_report corresponds to current test files (staleness detection)
+        # If any test file changed since run_report was created, we can't trust the results
+        if not skip_tests:
+            # Bug #156: Check ALL test files, not just the primary one
+            if 'test_files' in paths and run_report.test_files:
+                # New multi-file comparison
+                current_test_hashes = {
+                    f.name: calculate_sha256(f)
+                    for f in paths['test_files']
+                    if f.exists()
+                }
+                stored_test_hashes = run_report.test_files
+                # Check if any test file changed or new ones added/removed
+                if set(current_test_hashes.keys()) != set(stored_test_hashes.keys()):
+                    return False  # Test files added or removed
+                for fname, current_hash in current_test_hashes.items():
+                    if stored_test_hashes.get(fname) != current_hash:
+                        return False  # Test file content changed
+            elif 'test' in paths and paths['test'].exists():
+                # Backward compat: single file check
+                current_test_hash = calculate_sha256(paths['test'])
+                if run_report.test_hash and current_test_hash != run_report.test_hash:
+                    # run_report was created for a different version of the test file
+                    return False
+                if not run_report.test_hash:
+                    # Legacy run_report without test_hash - check fingerprint timestamp as fallback
+                    fingerprint = read_fingerprint(basename, language)
+                    if fingerprint:
+                        # If fingerprint is newer than run_report, run_report might be stale
+                        from datetime import datetime
+                        try:
+                            fp_time = datetime.fromisoformat(fingerprint.timestamp.replace('Z', '+00:00'))
+                            rr_time = datetime.fromisoformat(run_report.timestamp.replace('Z', '+00:00'))
+                            if fp_time > rr_time:
+                                return False  # run_report predates fingerprint, might be stale
+                        except (ValueError, AttributeError):
+                            pass  # If timestamps can't be parsed, skip this check
+        # Check verify has been done (unless skip_verify)
+        # Without this, workflow would be "complete" after crash even though verify hasn't run
+        # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
+        if not skip_verify:
+            fingerprint = read_fingerprint(basename, language)
+            if fingerprint:
+                # If command starts with 'skip:', the operation was skipped, not completed
+                if fingerprint.command.startswith('skip:'):
+                    return False
+                if fingerprint.command not in ['verify', 'test', 'fix', 'update']:
+                    return False
+        # CRITICAL FIX: Check tests have been run (unless skip_tests)
+        # Without this, workflow would be "complete" after verify even though tests haven't run
+        # This prevents false positive success when skip_verify=True but tests are still required
+        # Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
+        if not skip_tests:
+            fp = read_fingerprint(basename, language)
+            if fp:
+                # If command starts with 'skip:', the operation was skipped, not completed
+                if fp.command.startswith('skip:'):
+                    return False
+                if fp.command not in ['test', 'fix', 'update']:
+                    return False
+    return True
 def check_for_dependencies(prompt_content: str) -> bool:
@@ -693,7 +911,7 @@ def _check_example_success_history(basename: str, language: str) -> bool:
     return False
-def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
+def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
     """
     Core decision-making function for sync operations with skip flag awareness.
@@ -713,14 +931,14 @@ def sync_determine_operation(basename: str, language: str, target_coverage: floa
     if log_mode:
         # Skip locking for read-only analysis
-        return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
+        return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
     else:
         # Normal exclusive locking for actual operations
         with SyncLock(basename, language) as lock:
-            return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
+            return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
-def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
+def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
     """
     Perform the sync state analysis without locking concerns.
@@ -749,26 +967,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     # Read fingerprint early since we need it for crash verification
     fingerprint = read_fingerprint(basename, language)
+    # Check if auto-deps just completed - ALWAYS regenerate code after auto-deps
+    # This must be checked early, before any run_report processing, because:
+    # 1. Old run_report (if exists) is stale and should be ignored
+    # 2. auto-deps updates dependencies but doesn't regenerate code
+    if fingerprint and fingerprint.command == 'auto-deps':
+        paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+        return SyncDecision(
+            operation='generate',
+            reason='Auto-deps completed - regenerate code with updated prompt',
+            confidence=0.90,
+            estimated_cost=estimate_operation_cost('generate'),
+            details={
+                'decision_type': 'heuristic',
+                'previous_command': 'auto-deps',
+                'code_exists': paths['code'].exists() if paths.get('code') else False,
+                'regenerate_after_autodeps': True
+            }
+        )
     run_report = read_run_report(basename, language)
-    if run_report:
-        # Check test failures first (higher priority than exit code)
-        if run_report.tests_failed > 0:
-            return SyncDecision(
-                operation='fix',
-                reason=f'Test failures detected: {run_report.tests_failed} failed tests',
-                confidence=0.90,
-                estimated_cost=estimate_operation_cost('fix'),
-                details={
-                    'decision_type': 'heuristic',
-                    'tests_failed': run_report.tests_failed,
-                    'exit_code': run_report.exit_code,
-                    'coverage': run_report.coverage
-                }
-            )
-        # Check if we just completed a crash operation and need verification
+    # Only process runtime signals (crash/fix/test) if we have a fingerprint
+    # Without a fingerprint, run_report is stale/orphaned and should be ignored
+    if run_report and fingerprint:
+        # Check for prompt changes FIRST - prompt changes take priority over runtime signals
+        # If the user modified the prompt, we need to regenerate regardless of runtime state
+        if fingerprint:
+            paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+            current_prompt_hash = calculate_sha256(paths['prompt'])
+            if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
+                prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
+                has_deps = check_for_dependencies(prompt_content)
+                return SyncDecision(
+                    operation='auto-deps' if has_deps else 'generate',
+                    reason='Prompt changed - regenerating (takes priority over runtime signals)',
+                    confidence=0.95,
+                    estimated_cost=estimate_operation_cost('generate'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'prompt_changed': True,
+                        'previous_command': fingerprint.command,
+                        'runtime_state_ignored': True
+                    }
+                )
+        # Check if we just completed a crash operation and need verification FIRST
+        # This takes priority over test failures because we need to verify the crash fix worked
+        # BUT only proceed to verify if exit_code == 0 (crash fix succeeded)
         if fingerprint and fingerprint.command == 'crash' and not skip_verify:
+            if run_report.exit_code != 0:
+                # Crash fix didn't work - need to re-run crash
+                return SyncDecision(
+                    operation='crash',
+                    reason=f'Previous crash operation failed (exit_code={run_report.exit_code}) - retry crash fix',
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'previous_command': 'crash',
+                        'exit_code': run_report.exit_code,
+                        'workflow_stage': 'crash_retry'
+                    }
+                )
             return SyncDecision(
                 operation='verify',
                 reason='Previous crash operation completed - verify example runs correctly',
@@ -782,6 +1043,41 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                 }
             )
+        # Check test failures (after crash verification check)
+        if run_report.tests_failed > 0:
+            # First check if the test file actually exists
+            pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+            test_file = pdd_files.get('test')
+            # Only suggest 'fix' if test file exists
+            if test_file and test_file.exists():
+                return SyncDecision(
+                    operation='fix',
+                    reason=f'Test failures detected: {run_report.tests_failed} failed tests',
+                    confidence=0.90,
+                    estimated_cost=estimate_operation_cost('fix'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'tests_failed': run_report.tests_failed,
+                        'exit_code': run_report.exit_code,
+                        'coverage': run_report.coverage
+                    }
+                )
+            # If test file doesn't exist but we have test failures in run report,
+            # we need to generate the test first
+            else:
+                return SyncDecision(
+                    operation='test',
+                    reason='Test failures reported but test file missing - need to generate tests',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'run_report_shows_failures': True,
+                        'test_file_exists': False
+                    }
+                )
         # Then check for runtime crashes (only if no test failures)
         if run_report.exit_code != 0:
             # Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
@@ -833,6 +1129,23 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                         'skip_tests': True
                     }
                 )
+            elif run_report.tests_failed == 0 and run_report.tests_passed > 0:
+                # Tests pass but coverage is below target
+                # Return 'test_extend' to signal we need to ADD more tests, not regenerate
+                return SyncDecision(
+                    operation='test_extend',
+                    reason=f'Tests pass ({run_report.tests_passed} passed) but coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% - extending tests',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'current_coverage': run_report.coverage,
+                        'target_coverage': target_coverage,
+                        'tests_passed': run_report.tests_passed,
+                        'tests_failed': run_report.tests_failed,
+                        'extend_tests': True
+                    }
+                )
             else:
                 return SyncDecision(
                     operation='test',
@@ -849,7 +1162,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                 )
     # 2. Analyze File State
-    paths = get_pdd_file_paths(basename, language, prompts_dir)
+    paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
     current_hashes = calculate_current_hashes(paths)
     # 3. Implement the Decision Tree
@@ -926,7 +1239,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     if not changes:
         # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
-        if _is_workflow_complete(paths, skip_tests, skip_verify):
+        if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
             return SyncDecision(
                 operation='nothing',
                 reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
@@ -939,7 +1252,63 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                     'workflow_complete': True
                 }
             )
+        # Handle incomplete workflow when all files exist (including test)
+        # This addresses the blind spot where crash/verify/test logic only runs when test is missing
+        if (paths['code'].exists() and paths['example'].exists() and paths['test'].exists()):
+            run_report = read_run_report(basename, language)
+            # BUG 4 & 1: No run_report OR crash detected (exit_code != 0)
+            if not run_report or run_report.exit_code != 0:
+                return SyncDecision(
+                    operation='crash',
+                    reason='All files exist but needs validation' +
+                           (' - no run_report' if not run_report else f' - exit_code={run_report.exit_code}'),
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('crash'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'all_files_exist': True,
+                        'run_report_missing': not run_report,
+                        'exit_code': None if not run_report else run_report.exit_code,
+                        'workflow_stage': 'post_regeneration_validation'
+                    }
+                )
+            # BUG 2: Verify not run yet (run_report exists, exit_code=0, but command != verify/test)
+            if fingerprint and fingerprint.command not in ['verify', 'test', 'fix', 'update'] and not skip_verify:
+                return SyncDecision(
+                    operation='verify',
+                    reason='All files exist but verification not completed',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'all_files_exist': True,
+                        'last_command': fingerprint.command,
+                        'workflow_stage': 'verification_pending'
+                    }
+                )
+            # Stale run_report detected: _is_workflow_complete returned False but all other conditions passed
+            # This happens when run_report.test_hash doesn't match current test file, or
+            # when fingerprint timestamp > run_report timestamp (legacy detection)
+            # Need to re-run tests to get accurate results
+            if run_report and run_report.exit_code == 0:
+                return SyncDecision(
+                    operation='test',
+                    reason='Run report is stale - need to re-run tests to verify current state',
+                    confidence=0.9,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'all_files_exist': True,
+                        'run_report_stale': True,
+                        'run_report_test_hash': run_report.test_hash,
+                        'workflow_stage': 'revalidation'
+                    }
+                )
         # Progress workflow considering skip flags
         if paths['code'].exists() and not paths['example'].exists():
             return SyncDecision(
@@ -960,8 +1329,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
             # Check if example has been crash-tested and verified before allowing test generation
             run_report = read_run_report(basename, language)
-            if not run_report:
+            if not run_report and not skip_verify:
                 # No run report exists - need to test the example first
+                # But if skip_verify is True, skip crash/verify and go to test generation
                 return SyncDecision(
                     operation='crash',
                     reason='Example exists but needs runtime testing before test generation',
@@ -975,8 +1345,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
                         'workflow_stage': 'crash_validation'
                     }
                 )
-            elif run_report.exit_code != 0:
+            elif run_report and run_report.exit_code != 0 and not skip_verify:
                 # Example crashed - fix it before proceeding
+                # But if skip_verify is True, skip crash fix and proceed
                 return SyncDecision(
                     operation='crash',
                     reason='Example crashes - fix runtime errors before test generation',
@@ -1023,6 +1394,22 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
         # Some files are missing but no changes detected
         if not paths['code'].exists():
             if paths['prompt'].exists():
+                # CRITICAL FIX: Check if auto-deps was just completed to prevent infinite loop
+                if fingerprint and fingerprint.command == 'auto-deps':
+                    return SyncDecision(
+                        operation='generate',
+                        reason='Auto-deps completed, now generate missing code file',
+                        confidence=0.90,
+                        estimated_cost=estimate_operation_cost('generate'),
+                        details={
+                            'decision_type': 'heuristic',
+                            'prompt_path': str(paths['prompt']),
+                            'code_exists': False,
+                            'auto_deps_completed': True,
+                            'previous_command': fingerprint.command
+                        }
+                    )
                 prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
                 if check_for_dependencies(prompt_content):
                     return SyncDecision(
@@ -1124,18 +1511,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
             )
     else:
-        # Complex Changes (Multiple Files Modified / Conflicts)
-        return SyncDecision(
-            operation='analyze_conflict',
-            reason='Multiple files changed - requires conflict analysis',
-            confidence=0.70,
-            estimated_cost=estimate_operation_cost('analyze_conflict'),
-            details={
-                'decision_type': 'heuristic',
-                'changed_files': changes,
-                'num_changes': len(changes)
-            }
-        )
+        # Complex Changes (Multiple Files Modified)
+        # CRITICAL: Only treat as conflict if prompt changed along with derived artifacts
+        # If only derived artifacts changed (code, example, test), this is NOT a conflict
+        # per PDD doctrine - all are derived from the unchanged prompt
+        if 'prompt' in changes:
+            # True conflict: prompt (source of truth) changed along with derived artifacts
+            return SyncDecision(
+                operation='analyze_conflict',
+                reason='Prompt and derived files changed - requires conflict analysis',
+                confidence=0.70,
+                estimated_cost=estimate_operation_cost('analyze_conflict'),
+                details={
+                    'decision_type': 'heuristic',
+                    'changed_files': changes,
+                    'num_changes': len(changes),
+                    'prompt_changed': True
+                }
+            )
+        else:
+            # Only derived artifacts changed - prompt (source of truth) is unchanged
+            # Continue workflow from where it was interrupted
+            # If code changed, need to re-verify
+            if 'code' in changes:
+                return SyncDecision(
+                    operation='verify',
+                    reason='Derived files changed (prompt unchanged) - verify code works',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_files': changes,
+                        'num_changes': len(changes),
+                        'prompt_changed': False,
+                        'workflow_stage': 'continue_after_interruption'
+                    }
+                )
+            # If only example/test changed
+            elif 'example' in changes:
+                return SyncDecision(
+                    operation='verify',
+                    reason='Example changed (prompt unchanged) - verify example runs',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('verify'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_files': changes,
+                        'prompt_changed': False
+                    }
+                )
+            elif 'test' in changes:
+                return SyncDecision(
+                    operation='test',
+                    reason='Test changed (prompt unchanged) - run tests',
+                    confidence=0.85,
+                    estimated_cost=estimate_operation_cost('test'),
+                    details={
+                        'decision_type': 'heuristic',
+                        'changed_files': changes,
+                        'prompt_changed': False
+                    }
+                )
     # Fallback - should not reach here normally
     return SyncDecision(
@@ -1152,7 +1590,14 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
     )
-def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerprint, changed_files: List[str], prompts_dir: str = "prompts") -> SyncDecision:
+def analyze_conflict_with_llm(
+    basename: str,
+    language: str,
+    fingerprint: Fingerprint,
+    changed_files: List[str],
+    prompts_dir: str = "prompts",
+    context_override: Optional[str] = None,
+) -> SyncDecision:
     """
     Resolve complex sync conflicts using an LLM.
@@ -1185,7 +1630,7 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
             )
         # 2. Gather file paths and diffs
-        paths = get_pdd_file_paths(basename, language, prompts_dir)
+        paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
         # Generate diffs for changed files
         diffs = {}

pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

pdd-cli 0.0.45py3-none-any.whl → 0.0.90py3-none-any.whl