PyPI - pdd-cli - Versions diffs - 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

pdd-cli 0.0.45py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

pdd/__init__.py +4 -4
pdd/agentic_common.py +863 -0
pdd/agentic_crash.py +534 -0
pdd/agentic_fix.py +1179 -0
pdd/agentic_langtest.py +162 -0
pdd/agentic_update.py +370 -0
pdd/agentic_verify.py +183 -0
pdd/auto_deps_main.py +15 -5
pdd/auto_include.py +63 -5
pdd/bug_main.py +3 -2
pdd/bug_to_unit_test.py +2 -0
pdd/change_main.py +11 -4
pdd/cli.py +22 -1181
pdd/cmd_test_main.py +73 -21
pdd/code_generator.py +58 -18
pdd/code_generator_main.py +672 -25
pdd/commands/__init__.py +42 -0
pdd/commands/analysis.py +248 -0
pdd/commands/fix.py +140 -0
pdd/commands/generate.py +257 -0
pdd/commands/maintenance.py +174 -0
pdd/commands/misc.py +79 -0
pdd/commands/modify.py +230 -0
pdd/commands/report.py +144 -0
pdd/commands/templates.py +215 -0
pdd/commands/utility.py +110 -0
pdd/config_resolution.py +58 -0
pdd/conflicts_main.py +8 -3
pdd/construct_paths.py +258 -82
pdd/context_generator.py +10 -2
pdd/context_generator_main.py +113 -11
pdd/continue_generation.py +47 -7
pdd/core/__init__.py +0 -0
pdd/core/cli.py +503 -0
pdd/core/dump.py +554 -0
pdd/core/errors.py +63 -0
pdd/core/utils.py +90 -0
pdd/crash_main.py +44 -11
pdd/data/language_format.csv +71 -63
pdd/data/llm_model.csv +20 -18
pdd/detect_change_main.py +5 -4
pdd/fix_code_loop.py +330 -76
pdd/fix_error_loop.py +207 -61
pdd/fix_errors_from_unit_tests.py +4 -3
pdd/fix_main.py +75 -18
pdd/fix_verification_errors.py +12 -100
pdd/fix_verification_errors_loop.py +306 -272
pdd/fix_verification_main.py +28 -9
pdd/generate_output_paths.py +93 -10
pdd/generate_test.py +16 -5
pdd/get_jwt_token.py +9 -2
pdd/get_run_command.py +73 -0
pdd/get_test_command.py +68 -0
pdd/git_update.py +70 -19
pdd/incremental_code_generator.py +2 -2
pdd/insert_includes.py +11 -3
pdd/llm_invoke.py +1269 -103
pdd/load_prompt_template.py +36 -10
pdd/pdd_completion.fish +25 -2
pdd/pdd_completion.sh +30 -4
pdd/pdd_completion.zsh +79 -4
pdd/postprocess.py +10 -3
pdd/preprocess.py +228 -15
pdd/preprocess_main.py +8 -5
pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
pdd/prompts/agentic_update_LLM.prompt +1071 -0
pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
pdd/prompts/auto_include_LLM.prompt +100 -905
pdd/prompts/detect_change_LLM.prompt +122 -20
pdd/prompts/example_generator_LLM.prompt +22 -1
pdd/prompts/extract_code_LLM.prompt +5 -1
pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
pdd/prompts/extract_promptline_LLM.prompt +17 -11
pdd/prompts/find_verification_errors_LLM.prompt +6 -0
pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
pdd/prompts/generate_test_LLM.prompt +21 -6
pdd/prompts/increase_tests_LLM.prompt +1 -5
pdd/prompts/insert_includes_LLM.prompt +228 -108
pdd/prompts/trace_LLM.prompt +25 -22
pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
pdd/prompts/update_prompt_LLM.prompt +22 -1
pdd/pytest_output.py +127 -12
pdd/render_mermaid.py +236 -0
pdd/setup_tool.py +648 -0
pdd/simple_math.py +2 -0
pdd/split_main.py +3 -2
pdd/summarize_directory.py +49 -6
pdd/sync_determine_operation.py +543 -98
pdd/sync_main.py +81 -31
pdd/sync_orchestration.py +1334 -751
pdd/sync_tui.py +848 -0
pdd/template_registry.py +264 -0
pdd/templates/architecture/architecture_json.prompt +242 -0
pdd/templates/generic/generate_prompt.prompt +174 -0
pdd/trace.py +168 -12
pdd/trace_main.py +4 -3
pdd/track_cost.py +151 -61
pdd/unfinished_prompt.py +49 -3
pdd/update_main.py +549 -67
pdd/update_model_costs.py +2 -2
pdd/update_prompt.py +19 -4
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
pdd_cli-0.0.90.dist-info/RECORD +153 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
pdd_cli-0.0.45.dist-info/RECORD +0 -116
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0

pdd/sync_orchestration.py CHANGED Viewed

@@ -12,21 +12,32 @@ import subprocess
 import re
 import os
 from pathlib import Path
-from typing import Dict, Any, Optional, List
-from dataclasses import asdict
+from typing import Dict, Any, Optional, List, Callable
+from dataclasses import asdict, dataclass, field
+import tempfile
+import sys
 import click
+import logging
+# --- Constants ---
+MAX_CONSECUTIVE_TESTS = 3  # Allow up to 3 consecutive test attempts
+MAX_TEST_EXTEND_ATTEMPTS = 2  # Allow up to 2 attempts to extend tests for coverage
+MAX_CONSECUTIVE_CRASHES = 3  # Allow up to 3 consecutive crash attempts (Bug #157 fix)
 # --- Real PDD Component Imports ---
-from .sync_animation import sync_animation
+from .sync_tui import SyncApp
 from .sync_determine_operation import (
     sync_determine_operation,
     get_pdd_file_paths,
     RunReport,
+    SyncDecision,
     PDD_DIR,
     META_DIR,
     SyncLock,
     read_run_report,
+    calculate_sha256,
+    calculate_current_hashes,
 )
 from .auto_deps_main import auto_deps_main
 from .code_generator_main import code_generator_main
@@ -37,6 +48,104 @@ from .cmd_test_main import cmd_test_main
 from .fix_main import fix_main
 from .update_main import update_main
 from .python_env_detector import detect_host_python_executable
+from .get_run_command import get_run_command_for_file
+from .pytest_output import extract_failing_files_from_output
+from . import DEFAULT_STRENGTH
+# --- Atomic State Update (Issue #159 Fix) ---
+@dataclass
+class PendingStateUpdate:
+    """Holds pending state updates for atomic commit."""
+    run_report: Optional[Dict[str, Any]] = None
+    fingerprint: Optional[Dict[str, Any]] = None
+    run_report_path: Optional[Path] = None
+    fingerprint_path: Optional[Path] = None
+class AtomicStateUpdate:
+    """
+    Context manager for atomic state updates.
+    Ensures run_report and fingerprint are both written or neither is written.
+    This fixes Issue #159 where non-atomic writes caused state desynchronization.
+    Usage:
+        with AtomicStateUpdate(basename, language) as state:
+            state.set_run_report(report_dict, report_path)
+            state.set_fingerprint(fingerprint_dict, fp_path)
+        # On successful exit, both files are written atomically
+        # On exception, neither file is written (rollback)
+    """
+    def __init__(self, basename: str, language: str):
+        self.basename = basename
+        self.language = language
+        self.pending = PendingStateUpdate()
+        self._temp_files: List[str] = []
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is None:
+            self._commit()
+        else:
+            self._rollback()
+        return False  # Don't suppress exceptions
+    def set_run_report(self, report: Dict[str, Any], path: Path):
+        """Buffer a run report for atomic write."""
+        self.pending.run_report = report
+        self.pending.run_report_path = path
+    def set_fingerprint(self, fingerprint: Dict[str, Any], path: Path):
+        """Buffer a fingerprint for atomic write."""
+        self.pending.fingerprint = fingerprint
+        self.pending.fingerprint_path = path
+    def _atomic_write(self, data: Dict[str, Any], target_path: Path) -> None:
+        """Write data to file atomically using temp file + rename pattern."""
+        target_path.parent.mkdir(parents=True, exist_ok=True)
+        # Write to temp file in same directory (required for atomic rename)
+        fd, temp_path = tempfile.mkstemp(
+            dir=target_path.parent,
+            prefix=f".{target_path.stem}_",
+            suffix=".tmp"
+        )
+        self._temp_files.append(temp_path)
+        try:
+            with os.fdopen(fd, 'w') as f:
+                json.dump(data, f, indent=2, default=str)
+            # Atomic rename - guaranteed atomic on POSIX systems
+            os.replace(temp_path, target_path)
+            self._temp_files.remove(temp_path)  # Successfully moved, stop tracking
+        except Exception:
+            # Leave temp file for rollback to clean up
+            raise
+    def _commit(self):
+        """Commit all pending state updates atomically."""
+        # Write fingerprint first (checkpoint), then run_report
+        if self.pending.fingerprint and self.pending.fingerprint_path:
+            self._atomic_write(self.pending.fingerprint, self.pending.fingerprint_path)
+        if self.pending.run_report and self.pending.run_report_path:
+            self._atomic_write(self.pending.run_report, self.pending.run_report_path)
+    def _rollback(self):
+        """Clean up any temp files without committing changes."""
+        for temp_path in self._temp_files:
+            try:
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+            except OSError:
+                pass  # Best effort cleanup
+        self._temp_files.clear()
 # --- Mock Helper Functions ---
@@ -98,20 +207,44 @@ def log_sync_event(basename: str, language: str, event: str, details: Dict[str,
     }
     append_sync_log(basename, language, entry)
-def save_run_report(report: Dict[str, Any], basename: str, language: str):
-    """Save a run report to the metadata directory."""
+def save_run_report(report: Dict[str, Any], basename: str, language: str,
+                    atomic_state: Optional['AtomicStateUpdate'] = None):
+    """Save a run report to the metadata directory.
+    Args:
+        report: The run report dictionary to save.
+        basename: The module basename.
+        language: The programming language.
+        atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
+    """
     report_file = META_DIR / f"{basename}_{language}_run.json"
-    META_DIR.mkdir(parents=True, exist_ok=True)
-    with open(report_file, 'w') as f:
-        json.dump(report, f, indent=2, default=str)
+    if atomic_state:
+        # Buffer for atomic write
+        atomic_state.set_run_report(report, report_file)
+    else:
+        # Legacy direct write
+        META_DIR.mkdir(parents=True, exist_ok=True)
+        with open(report_file, 'w') as f:
+            json.dump(report, f, indent=2, default=str)
+def _save_operation_fingerprint(basename: str, language: str, operation: str,
+                               paths: Dict[str, Path], cost: float, model: str,
+                               atomic_state: Optional['AtomicStateUpdate'] = None):
+    """Save fingerprint state after successful operation.
-def _save_operation_fingerprint(basename: str, language: str, operation: str,
-                               paths: Dict[str, Path], cost: float, model: str):
-    """Save fingerprint state after successful operation."""
+    Args:
+        basename: The module basename.
+        language: The programming language.
+        operation: The operation that was performed.
+        paths: Dictionary of PDD file paths.
+        cost: The cost of the operation.
+        model: The model used.
+        atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
+    """
     from datetime import datetime, timezone
     from .sync_determine_operation import calculate_current_hashes, Fingerprint
     from . import __version__
     current_hashes = calculate_current_hashes(paths)
     fingerprint = Fingerprint(
         pdd_version=__version__,
@@ -120,103 +253,544 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
         prompt_hash=current_hashes.get('prompt_hash'),
         code_hash=current_hashes.get('code_hash'),
         example_hash=current_hashes.get('example_hash'),
-        test_hash=current_hashes.get('test_hash')
+        test_hash=current_hashes.get('test_hash'),
+        test_files=current_hashes.get('test_files'),  # Bug #156
     )
-    META_DIR.mkdir(parents=True, exist_ok=True)
     fingerprint_file = META_DIR / f"{basename}_{language}.json"
-    with open(fingerprint_file, 'w') as f:
-        json.dump(asdict(fingerprint), f, indent=2, default=str)
+    if atomic_state:
+        # Buffer for atomic write
+        atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
+    else:
+        # Legacy direct write
+        META_DIR.mkdir(parents=True, exist_ok=True)
+        with open(fingerprint_file, 'w') as f:
+            json.dump(asdict(fingerprint), f, indent=2, default=str)
-# SyncLock class now imported from sync_determine_operation module
+def _python_cov_target_for_code_file(code_file: Path) -> str:
+    """Return a `pytest-cov` `--cov` target for a Python code file.
+    - If the file is inside a Python package (directories with `__init__.py`),
+      returns a dotted module path (e.g., `pdd.sync_orchestration`).
+    - Otherwise falls back to the filename stem (e.g., `admin_get_users`).
+    """
+    if code_file.suffix != ".py":
+        return code_file.stem
+    package_dir: Optional[Path] = None
+    current = code_file.parent
+    while (current / "__init__.py").exists():
+        package_dir = current
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    if package_dir:
+        relative_module = code_file.relative_to(package_dir.parent).with_suffix("")
+        return str(relative_module).replace(os.sep, ".")
+    return code_file.stem
+def _python_cov_target_for_test_and_code(test_file: Path, code_file: Path, fallback: str) -> str:
+    """Choose the best `--cov` target based on how tests import the code.
+    In some repos, tests add a directory to `sys.path` and import modules by their
+    filename stem (e.g., `from admin_get_users import ...`) even when the code
+    also lives under a package (e.g., `backend.functions.admin_get_users`).
+    Heuristic:
+    - Prefer the code file stem when the test file imports it directly.
+    - Otherwise, prefer the dotted module path derived from the package layout.
+    - Fall back to the provided fallback (usually the basename).
+    """
+    def _imports_module(source: str, module: str) -> bool:
+        escaped = re.escape(module)
+        return bool(
+            re.search(rf"^\s*import\s+{escaped}\b", source, re.MULTILINE)
+            or re.search(rf"^\s*from\s+{escaped}\b", source, re.MULTILINE)
+        )
+    stem = code_file.stem
+    dotted = _python_cov_target_for_code_file(code_file)
-def _execute_tests_and_create_run_report(test_file: Path, basename: str, language: str, target_coverage: float = 90.0) -> RunReport:
-    """Execute tests and create a RunReport with actual results."""
-    timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
     try:
-        # Execute pytest with coverage reporting on the specific module
-        # Extract module name from test file (e.g., test_factorial.py -> factorial)
-        module_name = test_file.name.replace('test_', '').replace('.py', '')
-        # Use the module import path rather than file path for coverage
-        # Use environment-aware Python executable for pytest execution
-        python_executable = detect_host_python_executable()
-        # Determine coverage target based on module location
-        if base_package:
-            cov_target = f'{base_package}.{module_name}'
-        else:
-            # Dynamically discover package structure based on test file location
-            relative_path = test_file.parent.relative_to(Path.cwd())
-            package_path = str(relative_path).replace(os.sep, '.')
-            cov_target = f'{package_path}.{module_name}' if package_path else module_name
-        result = subprocess.run([
-            python_executable, '-m', 'pytest',
-            str(test_file),
-            '-v',
-            '--tb=short',
-            f'--cov={cov_target}',
-            '--cov-report=term-missing'
-        ], capture_output=True, text=True, timeout=300)
-        exit_code = result.returncode
-        stdout = result.stdout
-        stderr = result.stderr
-        # Parse test results from pytest output
-        tests_passed = 0
-        tests_failed = 0
-        coverage = 0.0
-        # Parse passed/failed tests
-        if 'passed' in stdout:
-            passed_match = re.search(r'(\d+) passed', stdout)
+        test_source = test_file.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        test_source = ""
+    if stem and _imports_module(test_source, stem):
+        return stem
+    if dotted and dotted != stem:
+        if _imports_module(test_source, dotted):
+            return dotted
+        if "." in dotted:
+            parent = dotted.rsplit(".", 1)[0]
+            # e.g. `from backend.functions import admin_get_users`
+            if re.search(
+                rf"^\s*from\s+{re.escape(parent)}\s+import\s+.*\b{re.escape(stem)}\b",
+                test_source,
+                re.MULTILINE,
+            ):
+                return dotted
+            # e.g. `import backend.functions.admin_get_users`
+            if re.search(
+                rf"^\s*import\s+{re.escape(parent)}\.{re.escape(stem)}\b",
+                test_source,
+                re.MULTILINE,
+            ):
+                return dotted
+        return dotted
+    return stem or fallback
+def _parse_test_output(output: str, language: str) -> tuple[int, int, float]:
+    """
+    Parse test output to extract passed/failed/coverage.
+    Args:
+        output: Combined stdout/stderr from test runner
+        language: Language name (e.g., 'python', 'typescript', 'go')
+    Returns:
+        (tests_passed, tests_failed, coverage)
+    """
+    tests_passed = 0
+    tests_failed = 0
+    coverage = 0.0
+    lang = language.lower()
+    # Python (pytest)
+    if lang == 'python':
+        if 'passed' in output:
+            passed_match = re.search(r'(\d+) passed', output)
             if passed_match:
                 tests_passed = int(passed_match.group(1))
-        if 'failed' in stdout:
-            failed_match = re.search(r'(\d+) failed', stdout)
+        if 'failed' in output:
+            failed_match = re.search(r'(\d+) failed', output)
             if failed_match:
                 tests_failed = int(failed_match.group(1))
-        # Parse coverage percentage - try multiple patterns
-        coverage_match = re.search(r'TOTAL.*?(\d+)%', stdout)
+        if 'error' in output:
+            error_match = re.search(r'(\d+) error', output)
+            if error_match:
+                tests_failed += int(error_match.group(1))
+        coverage_match = re.search(r'TOTAL.*?(\d+)%', output)
         if not coverage_match:
-            # Try alternative patterns for coverage output
-            coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
+            coverage_match = re.search(r'(\d+)%\s*$', output, re.MULTILINE)
         if not coverage_match:
-            # Try pattern with decimal
-            coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
+            coverage_match = re.search(r'(\d+(?:\.\d+)?)%', output)
         if coverage_match:
             coverage = float(coverage_match.group(1))
-        # Create and save run report
+    # Jest/Vitest (JavaScript/TypeScript)
+    elif lang in ('javascript', 'typescript', 'typescriptreact'):
+        # "Tests: X passed, Y failed" or "Tests: X passed, Y failed, Z total"
+        match = re.search(r'Tests:\s*(\d+)\s+passed', output)
+        if match:
+            tests_passed = int(match.group(1))
+        match = re.search(r'Tests:.*?(\d+)\s+failed', output)
+        if match:
+            tests_failed = int(match.group(1))
+        # Alternative Mocha-style: "X passing, Y failing"
+        if tests_passed == 0:
+            pass_match = re.search(r'(\d+)\s+pass(?:ing)?', output, re.I)
+            if pass_match:
+                tests_passed = int(pass_match.group(1))
+        if tests_failed == 0:
+            fail_match = re.search(r'(\d+)\s+fail(?:ing)?', output, re.I)
+            if fail_match:
+                tests_failed = int(fail_match.group(1))
+        # Coverage: "All files | XX.XX |"
+        cov_match = re.search(r'All files[^|]*\|\s*(\d+\.?\d*)', output)
+        if cov_match:
+            coverage = float(cov_match.group(1))
+    # Go
+    elif lang == 'go':
+        # Count PASS and FAIL occurrences for individual tests
+        tests_passed = len(re.findall(r'--- PASS:', output))
+        tests_failed = len(re.findall(r'--- FAIL:', output))
+        # Fallback: check for overall PASS/FAIL
+        if tests_passed == 0 and 'PASS' in output and 'FAIL' not in output:
+            tests_passed = 1
+        if tests_failed == 0 and 'FAIL' in output:
+            tests_failed = 1
+        # coverage: XX.X% of statements
+        cov_match = re.search(r'coverage:\s*(\d+\.?\d*)%', output)
+        if cov_match:
+            coverage = float(cov_match.group(1))
+    # Rust (cargo test)
+    elif lang == 'rust':
+        # "test result: ok. X passed; Y failed;"
+        match = re.search(r'(\d+)\s+passed', output)
+        if match:
+            tests_passed = int(match.group(1))
+        match = re.search(r'(\d+)\s+failed', output)
+        if match:
+            tests_failed = int(match.group(1))
+    # Fallback: try generic patterns
+    else:
+        pass_match = re.search(r'(\d+)\s+(?:tests?\s+)?pass(?:ed)?', output, re.I)
+        fail_match = re.search(r'(\d+)\s+(?:tests?\s+)?fail(?:ed)?', output, re.I)
+        if pass_match:
+            tests_passed = int(pass_match.group(1))
+        if fail_match:
+            tests_failed = int(fail_match.group(1))
+    return tests_passed, tests_failed, coverage
+def _detect_example_errors(output: str) -> tuple[bool, str]:
+    """
+    Detect if example output contains error indicators.
+    Only detects true crashes/errors:
+    - Python tracebacks (catches ALL unhandled exceptions)
+    - ERROR level log messages
+    Intentionally does NOT detect:
+    - HTTP status codes (examples may test error responses)
+    - Individual exception type names (causes false positives, redundant with traceback)
+    Returns:
+        (has_errors, error_summary)
+    """
+    error_patterns = [
+        (r'Traceback \(most recent call last\):', 'Python traceback'),
+        (r' - ERROR - ', 'Error log message'),  # Python logging format
+    ]
+    errors_found = []
+    for pattern, description in error_patterns:
+        if re.search(pattern, output, re.MULTILINE):
+            errors_found.append(description)
+    if errors_found:
+        return True, '; '.join(errors_found)
+    return False, ''
+def _try_auto_fix_import_error(
+    error_output: str,
+    code_file: Path,
+    example_file: Path,
+) -> tuple[bool, str]:
+    """
+    Try to automatically fix common import errors before calling expensive agentic fix.
+    Returns:
+        (fixed, message): Whether a fix was attempted and what was done.
+    """
+    import re
+    # Check for ModuleNotFoundError or ImportError
+    module_not_found = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", error_output)
+    import_error = re.search(r"ImportError: cannot import name ['\"]([^'\"]+)['\"]", error_output)
+    if not module_not_found and not import_error:
+        return False, "No import error detected"
+    if module_not_found:
+        missing_module = module_not_found.group(1)
+        # Split by . to get the top-level package
+        top_level_package = missing_module.split('.')[0]
+        # Check if this is the module we're trying to import (local module)
+        code_module_name = code_file.stem  # e.g., "data_validator" from "data_validator.py"
+        if top_level_package == code_module_name:
+            # It's trying to import our own generated code - fix the example's sys.path
+            # Read the example and fix the path manipulation
+            try:
+                example_content = example_file.read_text(encoding='utf-8')
+                code_dir = str(code_file.parent.resolve())
+                # Look for existing sys.path manipulation
+                if 'sys.path' in example_content:
+                    # Try to fix the existing path manipulation
+                    # Common pattern: module_path = os.path.abspath(os.path.join(...))
+                    # Replace with correct path
+                    fixed_content = re.sub(
+                        r"module_path\s*=\s*os\.path\.abspath\([^)]+\)",
+                        f"module_path = '{code_dir}'",
+                        example_content
+                    )
+                    if fixed_content != example_content:
+                        example_file.write_text(fixed_content, encoding='utf-8')
+                        return True, f"Fixed sys.path to point to {code_dir}"
+                # If no existing sys.path, add one at the start after imports
+                lines = example_content.split('\n')
+                insert_pos = 0
+                for i, line in enumerate(lines):
+                    if line.startswith('import ') or line.startswith('from '):
+                        if 'sys' in line or 'os' in line:
+                            insert_pos = i + 1
+                            continue
+                    if line.strip() and not line.startswith('#') and not line.startswith('import') and not line.startswith('from'):
+                        insert_pos = i
+                        break
+                path_fix = f"\n# Auto-added by pdd to fix import\nimport sys\nsys.path.insert(0, '{code_dir}')\n"
+                lines.insert(insert_pos, path_fix)
+                example_file.write_text('\n'.join(lines), encoding='utf-8')
+                return True, f"Added sys.path.insert(0, '{code_dir}') to example"
+            except Exception as e:
+                return False, f"Failed to fix import path: {e}"
+        else:
+            # It's an external package - try pip install
+            try:
+                result = subprocess.run(
+                    [sys.executable, '-m', 'pip', 'install', top_level_package],
+                    capture_output=True,
+                    text=True,
+                    timeout=120
+                )
+                if result.returncode == 0:
+                    return True, f"Installed missing package: {top_level_package}"
+                else:
+                    return False, f"Failed to install {top_level_package}: {result.stderr}"
+            except Exception as e:
+                return False, f"Failed to run pip install: {e}"
+    return False, "Import error detected but no auto-fix available"
+def _run_example_with_error_detection(
+    cmd_parts: list[str],
+    env: dict,
+    cwd: str,
+    timeout: int = 60
+) -> tuple[int, str, str]:
+    """
+    Run example file, detecting errors from output.
+    For server-style examples that block, this runs until timeout
+    then analyzes output for errors. No errors = success.
+    Returns:
+        (returncode, stdout, stderr)
+        - returncode: 0 if no errors detected, positive if errors found or process failed
+    """
+    import threading
+    proc = subprocess.Popen(
+        cmd_parts,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        stdin=subprocess.DEVNULL,
+        env=env,
+        cwd=cwd,
+        start_new_session=True,
+    )
+    stdout_chunks = []
+    stderr_chunks = []
+    def read_pipe(pipe, chunks):
+        try:
+            for line in iter(pipe.readline, b''):
+                chunks.append(line)
+        except Exception:
+            pass
+    t_out = threading.Thread(target=read_pipe, args=(proc.stdout, stdout_chunks), daemon=True)
+    t_err = threading.Thread(target=read_pipe, args=(proc.stderr, stderr_chunks), daemon=True)
+    t_out.start()
+    t_err.start()
+    # Wait for process or timeout
+    try:
+        proc.wait(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        proc.terminate()
+        try:
+            proc.wait(timeout=5)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            proc.wait()
+    t_out.join(timeout=2)
+    t_err.join(timeout=2)
+    stdout = b''.join(stdout_chunks).decode('utf-8', errors='replace')
+    stderr = b''.join(stderr_chunks).decode('utf-8', errors='replace')
+    combined = stdout + '\n' + stderr
+    # Check for errors in output
+    has_errors, error_summary = _detect_example_errors(combined)
+    # Determine result:
+    # - Errors in output → failure
+    # - Positive exit code (process failed normally, e.g., sys.exit(1)) → failure
+    # - Negative exit code (killed by signal, e.g., -9 for SIGKILL) → check output
+    # - Zero exit code → success
+    #
+    # IMPORTANT: When we kill the process after timeout, returncode is negative
+    # (the signal number). This is NOT a failure if output has no errors.
+    if has_errors:
+        return 1, stdout, stderr  # Errors detected in output
+    elif proc.returncode is not None and proc.returncode > 0:
+        return proc.returncode, stdout, stderr  # Process exited with error
+    else:
+        # Success cases:
+        # - returncode == 0 (clean exit)
+        # - returncode < 0 (killed by signal, but no errors in output)
+        # - returncode is None (shouldn't happen after wait, but safe fallback)
+        return 0, stdout, stderr
+def _execute_tests_and_create_run_report(
+    test_file: Path,
+    basename: str,
+    language: str,
+    target_coverage: float = 90.0,
+    *,
+    code_file: Optional[Path] = None,
+    atomic_state: Optional['AtomicStateUpdate'] = None,
+    test_files: Optional[List[Path]] = None,  # Bug #156: Support multiple test files
+) -> RunReport:
+    """Execute tests and create a RunReport with actual results.
+    Now supports multiple languages by using get_test_command_for_file()
+    to determine the appropriate test runner.
+    Args:
+        test_file: Primary test file (for backward compat)
+        test_files: Optional list of all test files to run (Bug #156)
+    """
+    from .get_test_command import get_test_command_for_file
+    timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
+    # Bug #156: Use test_files if provided, otherwise just the single test_file
+    all_test_files = test_files if test_files else [test_file]
+    # Calculate test file hash for staleness detection (primary file for backward compat)
+    test_hash = calculate_sha256(test_file) if test_file.exists() else None
+    # Bug #156: Calculate hashes for ALL test files
+    test_file_hashes = {
+        f.name: calculate_sha256(f)
+        for f in all_test_files
+        if f.exists()
+    } if all_test_files else None
+    # Use clean env without TUI-specific vars
+    clean_env = os.environ.copy()
+    for var in ['FORCE_COLOR', 'COLUMNS']:
+        clean_env.pop(var, None)
+    try:
+        lang_lower = language.lower()
+        # Python: use existing pytest logic with coverage
+        if lang_lower == "python":
+            module_name = test_file.name.replace('test_', '').replace('.py', '')
+            python_executable = detect_host_python_executable()
+            cov_target = None
+            if code_file is not None:
+                cov_target = _python_cov_target_for_test_and_code(test_file, code_file, basename or module_name)
+            else:
+                cov_target = basename or module_name
+            if not cov_target:
+                cov_target = basename or module_name
+            # Bug #156: Run pytest on ALL test files
+            pytest_args = [
+                python_executable, '-m', 'pytest',
+            ] + [str(f) for f in all_test_files] + [
+                '-v',
+                '--tb=short',
+                f'--cov={cov_target}',
+                '--cov-report=term-missing'
+            ]
+            result = subprocess.run(
+                pytest_args,
+                capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
+            )
+            exit_code = result.returncode
+            stdout = result.stdout + (result.stderr or '')
+            tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
+        else:
+            # Non-Python: use language-appropriate test command
+            test_cmd = get_test_command_for_file(str(test_file), language)
+            if test_cmd is None:
+                # No test command available - return report indicating this
+                report = RunReport(
+                    timestamp=timestamp,
+                    exit_code=127,  # Command not found
+                    tests_passed=0,
+                    tests_failed=0,
+                    coverage=0.0,
+                    test_hash=test_hash,
+                    test_files=test_file_hashes,  # Bug #156
+                )
+                save_run_report(asdict(report), basename, language, atomic_state)
+                return report
+            # Run the test command
+            result = subprocess.run(
+                test_cmd,
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=300,
+                env=clean_env,
+                cwd=str(test_file.parent),
+                stdin=subprocess.DEVNULL,
+                start_new_session=True
+            )
+            exit_code = result.returncode
+            stdout = (result.stdout or '') + '\n' + (result.stderr or '')
+            # Parse results based on language
+            tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
         report = RunReport(
             timestamp=timestamp,
             exit_code=exit_code,
             tests_passed=tests_passed,
             tests_failed=tests_failed,
-            coverage=coverage
+            coverage=coverage,
+            test_hash=test_hash,
+            test_files=test_file_hashes,  # Bug #156
         )
     except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception) as e:
-        # If test execution fails, create a report indicating failure
         report = RunReport(
             timestamp=timestamp,
             exit_code=1,
             tests_passed=0,
             tests_failed=1,
-            coverage=0.0
+            coverage=0.0,
+            test_hash=test_hash,
+            test_files=test_file_hashes,  # Bug #156
         )
-    # Save the run report
-    save_run_report(asdict(report), basename, language)
-    return report
-# --- Helper for Click Context ---
+    save_run_report(asdict(report), basename, language, atomic_state)
+    return report
 def _create_mock_context(**kwargs) -> click.Context:
     """Creates a mock Click context object to pass parameters to command functions."""
@@ -242,7 +816,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
     for entry in log_entries:
         timestamp = entry.get('timestamp', 'N/A')
-        # Handle special event entries
         if 'event' in entry:
             event = entry.get('event', 'N/A')
             print(f"[{timestamp[:19]}] EVENT: {event}")
@@ -251,7 +824,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
                 print(f"  Details: {details_str}")
             continue
-        # Handle operation entries
         operation = entry.get('operation', 'N/A')
         reason = entry.get('reason', 'N/A')
         success = entry.get('success')
@@ -260,7 +832,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
         duration = entry.get('duration')
         if verbose:
-            # Verbose format
             print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
             decision_type = entry.get('decision_type', 'N/A')
             confidence = entry.get('confidence', 'N/A')
@@ -276,14 +847,12 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
                 print(f"  Estimated Cost: ${estimated_cost:.2f}")
             if 'details' in entry and entry['details']:
-                # Show details without budget_remaining to avoid clutter
                 details_copy = entry['details'].copy()
                 details_copy.pop('budget_remaining', None)
                 if details_copy:
                     details_str = json.dumps(details_copy, indent=2)
                     print(f"  Details: {details_str}")
         else:
-            # Normal format: [timestamp] operation | reason | status cost | duration
             status_icon = "✓" if success else "✗" if success is False else "?"
             cost_info = ""
@@ -318,45 +887,60 @@ def sync_orchestration(
     budget: float = 10.0,
     skip_verify: bool = False,
     skip_tests: bool = False,
-    log: bool = False,
+    dry_run: bool = False,
     force: bool = False,
-    strength: float = 0.5,
+    strength: float = DEFAULT_STRENGTH,
     temperature: float = 0.0,
-    time_param: float = 0.25, # Renamed to avoid conflict with `time` module
+    time_param: float = 0.25,
     verbose: bool = False,
     quiet: bool = False,
     output_cost: Optional[str] = None,
     review_examples: bool = False,
     local: bool = False,
     context_config: Optional[Dict[str, str]] = None,
+    context_override: Optional[str] = None,
+    confirm_callback: Optional[Callable[[str, str], bool]] = None,
 ) -> Dict[str, Any]:
     """
     Orchestrates the complete PDD sync workflow with parallel animation.
-    If log=True, displays the sync log instead of running sync operations.
-    The verbose flag controls the detail level of the log output.
-    Returns a dictionary summarizing the outcome of the sync process.
     """
-    if log:
+    # Import get_extension at function scope
+    from .sync_determine_operation import get_extension
+    if dry_run:
         return _display_sync_log(basename, language, verbose)
     # --- Initialize State and Paths ---
     try:
-        pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
+        pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
+    except FileNotFoundError as e:
+        if "test_config.py" in str(e) or "tests/test_" in str(e):
+            pdd_files = {
+                'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
+                'code': Path(f"src/{basename}.{get_extension(language)}"),
+                'example': Path(f"context/{basename}_example.{get_extension(language)}"),
+                'test': Path(f"tests/test_{basename}.{get_extension(language)}")
+            }
+            if not quiet:
+                print(f"Note: Test file missing, continuing with sync workflow to generate it")
+        else:
+            print(f"Error constructing paths: {e}")
+            return {
+                "success": False,
+                "error": f"Failed to construct paths: {str(e)}",
+                "operations_completed": [],
+                "errors": [f"Path construction failed: {str(e)}"]
+            }
     except Exception as e:
-        # Log the error and return early with failure status
         print(f"Error constructing paths: {e}")
         return {
             "success": False,
-            "total_cost": 0.0,
-            "model_name": "",
             "error": f"Failed to construct paths: {str(e)}",
             "operations_completed": [],
             "errors": [f"Path construction failed: {str(e)}"]
         }
-    # Shared state for animation thread
+    # Shared state for animation (passed to App)
     current_function_name_ref = ["initializing"]
     stop_event = threading.Event()
     current_cost_ref = [0.0]
@@ -364,696 +948,695 @@ def sync_orchestration(
     code_path_ref = [str(pdd_files.get('code', 'N/A'))]
     example_path_ref = [str(pdd_files.get('example', 'N/A'))]
     tests_path_ref = [str(pdd_files.get('test', 'N/A'))]
-    prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref = \
-        ["blue"], ["blue"], ["blue"], ["blue"]
-    # Orchestration state
-    operations_completed: List[str] = []
-    skipped_operations: List[str] = []
-    errors: List[str] = []
-    start_time = time.time()
-    animation_thread = None
-    # Track operation history for cycle detection
-    operation_history: List[str] = []
-    MAX_CYCLE_REPEATS = 2  # Maximum times to allow crash-verify cycle
+    prompt_box_color_ref = ["blue"]
+    code_box_color_ref = ["blue"]
+    example_box_color_ref = ["blue"]
+    tests_box_color_ref = ["blue"]
-    try:
-        with SyncLock(basename, language):
-            # Log lock acquisition
-            log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
-            # --- Start Animation Thread ---
-            animation_thread = threading.Thread(
-                target=sync_animation,
-                args=(
-                    current_function_name_ref, stop_event, basename, current_cost_ref, budget,
-                    prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref,
-                    prompt_path_ref, code_path_ref, example_path_ref, tests_path_ref
-                ),
-                daemon=True
-            )
-            animation_thread.start()
-            # --- Main Workflow Loop ---
-            while True:
-                budget_remaining = budget - current_cost_ref[0]
-                if current_cost_ref[0] >= budget:
-                    errors.append(f"Budget of ${budget:.2f} exceeded.")
-                    log_sync_event(basename, language, "budget_exceeded", {
-                        "total_cost": current_cost_ref[0],
-                        "budget": budget
-                    })
-                    break
-                # Log budget warning when running low
-                if budget_remaining < budget * 0.2 and budget_remaining > 0:
-                    log_sync_event(basename, language, "budget_warning", {
-                        "remaining": budget_remaining,
-                        "percentage": (budget_remaining / budget) * 100
-                    })
-                decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
-                operation = decision.operation
-                # Create log entry with decision info
-                log_entry = create_sync_log_entry(decision, budget_remaining)
-                # Track operation history
-                operation_history.append(operation)
-                # Detect crash-verify cycles
-                if len(operation_history) >= 4:
-                    # Check for repeating crash-verify pattern
-                    recent_ops = operation_history[-4:]
-                    if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
-                        recent_ops == ['verify', 'crash', 'verify', 'crash']):
-                        # Count how many times this cycle has occurred
-                        cycle_count = 0
-                        for i in range(0, len(operation_history) - 1, 2):
-                            if i + 1 < len(operation_history):
-                                if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
-                                    (operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
-                                    cycle_count += 1
-                        if cycle_count >= MAX_CYCLE_REPEATS:
-                            errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
-                            errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
-                            log_sync_event(basename, language, "cycle_detected", {
-                                "cycle_type": "crash-verify",
-                                "cycle_count": cycle_count,
-                                "operation_history": operation_history[-10:]  # Last 10 operations
-                            })
-                            break
+    # Mutable container for the app reference (set after app creation)
+    # This allows the worker to access app.request_confirmation()
+    app_ref: List[Optional['SyncApp']] = [None]
-                # Detect consecutive fix operations (infinite fix loop protection)
-                if operation == 'fix':
-                    # Count consecutive fix operations
-                    consecutive_fixes = 0
-                    for i in range(len(operation_history) - 1, -1, -1):
-                        if operation_history[i] == 'fix':
-                            consecutive_fixes += 1
-                        else:
-                            break
-                    MAX_CONSECUTIVE_FIXES = 5  # Allow up to 5 consecutive fix attempts
-                    if consecutive_fixes >= MAX_CONSECUTIVE_FIXES:
-                        errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
-                        errors.append("The test failures may not be resolvable by automated fixes in this environment.")
-                        log_sync_event(basename, language, "cycle_detected", {
-                            "cycle_type": "consecutive-fix",
-                            "consecutive_count": consecutive_fixes,
-                            "operation_history": operation_history[-10:]  # Last 10 operations
+    # Progress callback ref for TUI ProgressBar updates during auto-deps
+    progress_callback_ref: List[Optional[Callable[[int, int], None]]] = [None]
+    # Track if user has already confirmed overwrite (to avoid asking multiple times)
+    user_confirmed_overwrite: List[bool] = [False]
+    def get_confirm_callback() -> Optional[Callable[[str, str], bool]]:
+        """Get the confirmation callback from the app if available.
+        Once user confirms, we remember it so subsequent operations don't ask again.
+        """
+        if user_confirmed_overwrite[0]:
+            # User already confirmed, return a callback that always returns True
+            return lambda msg, title: True
+        if app_ref[0] is not None:
+            def confirming_callback(msg: str, title: str) -> bool:
+                result = app_ref[0].request_confirmation(msg, title)
+                if result:
+                    user_confirmed_overwrite[0] = True
+                return result
+            return confirming_callback
+        return confirm_callback  # Fall back to provided callback
+    def sync_worker_logic():
+        """
+        The main loop of sync logic, run in a worker thread by Textual App.
+        """
+        operations_completed: List[str] = []
+        skipped_operations: List[str] = []
+        errors: List[str] = []
+        start_time = time.time()
+        last_model_name: str = ""
+        operation_history: List[str] = []
+        MAX_CYCLE_REPEATS = 2
+        # Helper function to print inside worker (goes to RichLog via redirection)
+        # print() will work if sys.stdout is redirected.
+        try:
+            with SyncLock(basename, language):
+                log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
+                while True:
+                    budget_remaining = budget - current_cost_ref[0]
+                    if current_cost_ref[0] >= budget:
+                        errors.append(f"Budget of ${budget:.2f} exceeded.")
+                        log_sync_event(basename, language, "budget_exceeded", {
+                            "total_cost": current_cost_ref[0],
+                            "budget": budget
                         })
                         break
-                if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
-                    current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
-                    # Log these final operations
-                    success = operation in ['all_synced', 'nothing']
-                    error_msg = None
-                    if operation == 'fail_and_request_manual_merge':
-                        errors.append(f"Manual merge required: {decision.reason}")
-                        error_msg = f"Manual merge required: {decision.reason}"
-                    elif operation == 'error':
-                        errors.append(f"Error determining operation: {decision.reason}")
-                        error_msg = f"Error determining operation: {decision.reason}"
-                    elif operation == 'analyze_conflict':
-                        errors.append(f"Conflict detected: {decision.reason}")
-                        error_msg = f"Conflict detected: {decision.reason}"
-                    # Update log entry for final operation
-                    update_sync_log_entry(log_entry, {
-                        'success': success,
-                        'cost': 0.0,
-                        'model': 'none',
-                        'error': error_msg
-                    }, 0.0)
-                    append_sync_log(basename, language, log_entry)
-                    break
-                # Handle skips
-                if operation == 'verify' and (skip_verify or skip_tests):
-                    # Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
-                    skipped_operations.append('verify')
-                    skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
-                    # Update log entry for skipped operation
-                    update_sync_log_entry(log_entry, {
-                        'success': True,
-                        'cost': 0.0,
-                        'model': 'skipped',
-                        'error': None
-                    }, 0.0)
-                    log_entry['details']['skip_reason'] = skip_reason
-                    append_sync_log(basename, language, log_entry)
+                    if budget_remaining < budget * 0.2 and budget_remaining > 0:
+                        log_sync_event(basename, language, "budget_warning", {
+                            "remaining": budget_remaining,
+                            "percentage": (budget_remaining / budget) * 100
+                        })
+                    decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
+                    operation = decision.operation
-                    report_data = RunReport(
-                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                        exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
-                    )
-                    save_run_report(asdict(report_data), basename, language)
-                    _save_operation_fingerprint(basename, language, 'verify', pdd_files, 0.0, skip_reason)
-                    continue
-                if operation == 'test' and skip_tests:
-                    skipped_operations.append('test')
+                    log_entry = create_sync_log_entry(decision, budget_remaining)
+                    operation_history.append(operation)
-                    # Update log entry for skipped operation
-                    update_sync_log_entry(log_entry, {
-                        'success': True,
-                        'cost': 0.0,
-                        'model': 'skipped',
-                        'error': None
-                    }, 0.0)
-                    log_entry['details']['skip_reason'] = 'skip_tests'
-                    append_sync_log(basename, language, log_entry)
+                    # Cycle detection logic
+                    if len(operation_history) >= 3:
+                        recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
+                        if len(recent_auto_deps) >= 2:
+                            errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
+                            log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
+                            operation = 'generate'
+                            decision.operation = 'generate' # Update decision too
+                    # Bug #4 fix: Detect crash-verify cycle pattern
+                    # The pattern [crash, verify, crash, verify] or [verify, crash, verify, crash]
+                    # represents 2 iterations of the alternating cycle, so break immediately
+                    if len(operation_history) >= 4:
+                        recent_ops = operation_history[-4:]
+                        if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
+                            recent_ops == ['verify', 'crash', 'verify', 'crash']):
+                            # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
+                            errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
+                            log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
+                            break
+                    # Bug #4 fix: Detect test-fix cycle pattern
+                    # The pattern [test, fix, test, fix] or [fix, test, fix, test]
+                    # represents 2 iterations of the alternating cycle, so break immediately
+                    if len(operation_history) >= 4:
+                        recent_ops = operation_history[-4:]
+                        if (recent_ops == ['test', 'fix', 'test', 'fix'] or
+                            recent_ops == ['fix', 'test', 'fix', 'test']):
+                            # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
+                            errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
+                            log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
+                            break
+                    if operation == 'fix':
+                        consecutive_fixes = 0
+                        for i in range(len(operation_history) - 1, -1, -1):
+                            if operation_history[i] == 'fix':
+                                consecutive_fixes += 1
+                            else:
+                                break
+                        if consecutive_fixes >= 5:
+                            errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
+                            break
+                    if operation == 'test':
+                        consecutive_tests = 0
+                        for i in range(len(operation_history) - 1, -1, -1):
+                            if operation_history[i] == 'test':
+                                consecutive_tests += 1
+                            else:
+                                break
+                        if consecutive_tests >= MAX_CONSECUTIVE_TESTS:
+                            errors.append(f"Detected {consecutive_tests} consecutive test operations. Breaking infinite test loop.")
+                            break
+                    # Bug #157 fix: Prevent infinite crash retry loops
+                    if operation == 'crash':
+                        consecutive_crashes = 0
+                        for i in range(len(operation_history) - 1, -1, -1):
+                            if operation_history[i] == 'crash':
+                                consecutive_crashes += 1
+                            else:
+                                break
+                        if consecutive_crashes >= MAX_CONSECUTIVE_CRASHES:
+                            errors.append(f"Detected {consecutive_crashes} consecutive crash operations. Breaking infinite crash loop.")
+                            break
+                    if operation == 'test_extend':
+                        # Count test_extend attempts to prevent infinite loop
+                        extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
+                        if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
+                            # Accept current coverage after max attempts
+                            log_sync_event(basename, language, "test_extend_limit", {
+                                "attempts": extend_attempts,
+                                "max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
+                                "reason": "Accepting current coverage after max extend attempts"
+                            })
+                            success = True
+                            break
+                    if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
+                        current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
+                        success = operation in ['all_synced', 'nothing']
+                        error_msg = None
+                        if operation == 'fail_and_request_manual_merge':
+                            errors.append(f"Manual merge required: {decision.reason}")
+                            error_msg = decision.reason
+                        elif operation == 'error':
+                            errors.append(f"Error determining operation: {decision.reason}")
+                            error_msg = decision.reason
+                        elif operation == 'analyze_conflict':
+                            errors.append(f"Conflict detected: {decision.reason}")
+                            error_msg = decision.reason
+                        update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
+                        append_sync_log(basename, language, log_entry)
+                        break
-                    report_data = RunReport(
-                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                        exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
+                    # Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
+                    # Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
+                    if operation == 'verify' and (skip_verify or skip_tests):
+                        skipped_operations.append('verify')
+                        update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
+                        append_sync_log(basename, language, log_entry)
+                        # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
+                        _save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
+                        continue
+                    if operation == 'test' and skip_tests:
+                        skipped_operations.append('test')
+                        update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
+                        append_sync_log(basename, language, log_entry)
+                        # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
+                        _save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
+                        continue
+                    if operation == 'crash' and (skip_tests or skip_verify):
+                        skipped_operations.append('crash')
+                        update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
+                        append_sync_log(basename, language, log_entry)
+                        # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
+                        _save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
+                        # FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
+                        # Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
+                        current_hashes = calculate_current_hashes(pdd_files)
+                        synthetic_report = RunReport(
+                            timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                            exit_code=0,  # Assume success since we're skipping validation
+                            tests_passed=0,
+                            tests_failed=0,
+                            coverage=0.0,
+                            test_hash=current_hashes.get('test_hash')
+                        )
+                        save_run_report(asdict(synthetic_report), basename, language)
+                        continue
+                    current_function_name_ref[0] = operation
+                    ctx = _create_mock_context(
+                        force=force, strength=strength, temperature=temperature, time=time_param,
+                        verbose=verbose, quiet=quiet, output_cost=output_cost,
+                        review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
+                        max_attempts=max_attempts, target_coverage=target_coverage,
+                        confirm_callback=get_confirm_callback(),
+                        context=context_override
                     )
-                    save_run_report(asdict(report_data), basename, language)
-                    _save_operation_fingerprint(basename, language, 'test', pdd_files, 0.0, 'skipped')
-                    continue
-                if operation == 'crash' and skip_tests:
-                    # Skip crash operations when tests are skipped since crash fixes usually require test execution
-                    skipped_operations.append('crash')
-                    # Update log entry for skipped operation
-                    update_sync_log_entry(log_entry, {
-                        'success': True,
-                        'cost': 0.0,
-                        'model': 'skipped',
-                        'error': None
-                    }, 0.0)
-                    log_entry['details']['skip_reason'] = 'skip_tests'
-                    append_sync_log(basename, language, log_entry)
-                    # Create a dummy run report indicating crash was skipped
-                    report_data = RunReport(
-                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                        exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
-                    )
-                    save_run_report(asdict(report_data), basename, language)
-                    _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped')
-                    continue
-                current_function_name_ref[0] = operation
-                ctx = _create_mock_context(
-                    force=force, strength=strength, temperature=temperature, time=time_param,
-                    verbose=verbose, quiet=quiet, output_cost=output_cost,
-                    review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
-                    max_attempts=max_attempts, target_coverage=target_coverage
-                )
-                result = {}
-                success = False
-                start_time = time.time()  # Track execution time
-                # --- Execute Operation ---
-                try:
-                    if operation == 'auto-deps':
-                        # Save the modified prompt to a temporary location
-                        temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
-                        # Read original prompt content to compare later
-                        original_content = pdd_files['prompt'].read_text(encoding='utf-8')
-                        result = auto_deps_main(
-                            ctx,
-                            prompt_file=str(pdd_files['prompt']),
-                            directory_path=examples_dir,
-                            auto_deps_csv_path="project_dependencies.csv",
-                            output=temp_output,
-                            force_scan=False  # Don't force scan every time
-                        )
-                        # Only move the temp file back if content actually changed
-                        if Path(temp_output).exists():
-                            import shutil
-                            new_content = Path(temp_output).read_text(encoding='utf-8')
-                            if new_content != original_content:
-                                shutil.move(temp_output, str(pdd_files['prompt']))
-                            else:
-                                # No changes needed, remove temp file
-                                Path(temp_output).unlink()
-                                # Mark as successful with no changes
-                                result = (new_content, 0.0, 'no-changes')
-                    elif operation == 'generate':
-                        result = code_generator_main(
-                            ctx,
-                            prompt_file=str(pdd_files['prompt']),
-                            output=str(pdd_files['code']),
-                            original_prompt_file_path=None,
-                            force_incremental_flag=False
-                        )
-                    elif operation == 'example':
-                        print(f"DEBUG SYNC: pdd_files['example'] = {pdd_files['example']}")
-                        print(f"DEBUG SYNC: str(pdd_files['example']) = {str(pdd_files['example'])}")
-                        result = context_generator_main(
-                            ctx,
-                            prompt_file=str(pdd_files['prompt']),
-                            code_file=str(pdd_files['code']),
-                            output=str(pdd_files['example'])
-                        )
-                    elif operation == 'crash':
-                        # Validate required files exist before attempting crash operation
-                        required_files = [pdd_files['code'], pdd_files['example']]
-                        missing_files = [f for f in required_files if not f.exists()]
-                        if missing_files:
-                            # Skip crash operation if required files are missing
-                            print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
-                            skipped_operations.append('crash')
-                            # Update log entry for skipped operation
-                            update_sync_log_entry(log_entry, {
-                                'success': True,
-                                'cost': 0.0,
-                                'model': 'skipped',
-                                'error': None
-                            }, 0.0)
-                            log_entry['details']['skip_reason'] = 'missing_files'
-                            log_entry['details']['missing_files'] = [f.name for f in missing_files]
-                            append_sync_log(basename, language, log_entry)
+                    result = {}
+                    success = False
+                    op_start_time = time.time()
+                    # Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
+                    with AtomicStateUpdate(basename, language) as atomic_state:
+                        # --- Execute Operation ---
+                        try:
+                            if operation == 'auto-deps':
+                                temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
+                                original_content = pdd_files['prompt'].read_text(encoding='utf-8')
+                                result = auto_deps_main(
+                                    ctx,
+                                    prompt_file=str(pdd_files['prompt']),
+                                    directory_path=examples_dir,
+                                    auto_deps_csv_path="project_dependencies.csv",
+                                    output=temp_output,
+                                    force_scan=False,
+                                    progress_callback=progress_callback_ref[0]
+                                )
+                                if Path(temp_output).exists():
+                                    import shutil
+                                    new_content = Path(temp_output).read_text(encoding='utf-8')
+                                    if new_content != original_content:
+                                        shutil.move(temp_output, str(pdd_files['prompt']))
+                                    else:
+                                        Path(temp_output).unlink()
+                                        result = (new_content, 0.0, 'no-changes')
+                            elif operation == 'generate':
+                                result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt']), output=str(pdd_files['code']), original_prompt_file_path=None, force_incremental_flag=False)
+                                # Clear stale run_report so crash/verify is required for newly generated code
+                                run_report_file = META_DIR / f"{basename}_{language}_run.json"
+                                run_report_file.unlink(missing_ok=True)
+                            elif operation == 'example':
+                                result = context_generator_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['example']))
+                            elif operation == 'crash':
+                                required_files = [pdd_files['code'], pdd_files['example']]
+                                missing_files = [f for f in required_files if not f.exists()]
+                                if missing_files:
+                                    skipped_operations.append('crash')
+                                    continue
-                            # Create a dummy run report indicating crash was skipped due to missing files
-                            report_data = RunReport(
-                                timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                                exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
-                            )
-                            save_run_report(asdict(report_data), basename, language)
-                            _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
-                            continue
-                        else:
-                            # Check if we have a run report indicating failures that need crash fixing
-                            current_run_report = read_run_report(basename, language)
-                            crash_log_content = ""
+                                # Crash handling logic (simplified copy from original)
+                                current_run_report = read_run_report(basename, language)
+                                crash_log_content = ""
-                            # If we have a run report with exit_code != 0, that indicates a crash that needs fixing
-                            if current_run_report and current_run_report.exit_code != 0:
-                                # We have a crash to fix based on the run report
-                                crash_log_content = f"Test execution failed with exit code: {current_run_report.exit_code}\n\n"
-                                # Try to run the example program to get additional error details
-                                try:
-                                    example_result = subprocess.run(
-                                        ['python', str(pdd_files['example'])],
-                                        capture_output=True,
-                                        text=True,
-                                        timeout=60,
-                                        env=os.environ.copy(),
-                                        cwd=str(pdd_files['example'].parent)
+                                # Check for crash condition (either run report says so, or we check manually)
+                                has_crash = False
+                                if current_run_report and current_run_report.exit_code != 0:
+                                    has_crash = True
+                                    crash_log_content = f"Test execution failed exit code: {current_run_report.exit_code}\n"
+                                else:
+                                    # Manual check - run the example to see if it crashes
+                                    env = os.environ.copy()
+                                    src_dir = Path.cwd() / 'src'
+                                    env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
+                                    # Remove TUI-specific env vars that might contaminate subprocess
+                                    for var in ['FORCE_COLOR', 'COLUMNS']:
+                                        env.pop(var, None)
+                                    # Get language-appropriate run command from language_format.csv
+                                    example_path = str(pdd_files['example'])
+                                    run_cmd = get_run_command_for_file(example_path)
+                                    if run_cmd:
+                                        # Use the language-specific interpreter (e.g., node for .js)
+                                        cmd_parts = run_cmd.split()
+                                    else:
+                                        # Fallback to Python if no run command found
+                                        cmd_parts = ['python', example_path]
+                                    # Use error-detection runner that handles server-style examples
+                                    returncode, stdout, stderr = _run_example_with_error_detection(
+                                        cmd_parts,
+                                        env=env,
+                                        cwd=str(pdd_files['example'].parent),
+                                        timeout=60
                                     )
-                                    if example_result.returncode != 0:
-                                        crash_log_content += f"Example program also failed with exit code: {example_result.returncode}\n\n"
-                                        if example_result.stdout:
-                                            crash_log_content += f"STDOUT:\n{example_result.stdout}\n\n"
-                                        if example_result.stderr:
-                                            crash_log_content += f"STDERR:\n{example_result.stderr}\n"
-                                        # Check for syntax errors specifically
-                                        if "SyntaxError" in example_result.stderr:
-                                            crash_log_content = f"SYNTAX ERROR DETECTED:\n\n{crash_log_content}"
+                                    class ExampleResult:
+                                        def __init__(self, rc, out, err):
+                                            self.returncode = rc
+                                            self.stdout = out
+                                            self.stderr = err
+                                    ex_res = ExampleResult(returncode, stdout, stderr)
+                                    if ex_res.returncode != 0:
+                                        has_crash = True
+                                        crash_log_content = f"Example failed exit code: {ex_res.returncode}\nSTDOUT:\n{ex_res.stdout}\nSTDERR:\n{ex_res.stderr}\n"
+                                        if "SyntaxError" in ex_res.stderr:
+                                             crash_log_content = "SYNTAX ERROR DETECTED:\n" + crash_log_content
                                     else:
-                                        crash_log_content += "Example program runs successfully, but tests are failing.\n"
-                                        crash_log_content += "This may indicate issues with test execution or test file syntax.\n"
-                                except subprocess.TimeoutExpired:
-                                    crash_log_content += "Example program execution timed out after 60 seconds\n"
-                                    crash_log_content += "This may indicate an infinite loop or the program is waiting for input.\n"
-                                except Exception as e:
-                                    crash_log_content += f"Error running example program: {str(e)}\n"
-                                    crash_log_content += f"Program path: {pdd_files['example']}\n"
-                            else:
-                                # No crash detected, skip crash operation
-                                print("No crash detected in run report, skipping crash fix")
-                                skipped_operations.append('crash')
-                                # Update log entry for skipped operation
-                                update_sync_log_entry(log_entry, {
-                                    'success': True,
-                                    'cost': 0.0,
-                                    'model': 'skipped',
-                                    'error': None
-                                }, time.time() - start_time)
-                                log_entry['details']['skip_reason'] = 'no_crash'
-                                append_sync_log(basename, language, log_entry)
-                                report_data = RunReport(
-                                    timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                                    exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
-                                )
-                                save_run_report(asdict(report_data), basename, language)
-                                _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
-                                continue
-                            # Write actual error content or fallback
-                            if not crash_log_content:
-                                crash_log_content = "Unknown crash error - program failed but no error output captured"
-                            Path("crash.log").write_text(crash_log_content)
-                            try:
-                                result = crash_main(
-                                    ctx,
-                                    prompt_file=str(pdd_files['prompt']),
-                                    code_file=str(pdd_files['code']),
-                                    program_file=str(pdd_files['example']),
-                                    error_file="crash.log",
-                                    output=str(pdd_files['code']),
-                                    output_program=str(pdd_files['example']),
-                                    loop=True,
-                                    max_attempts=max_attempts,
-                                    budget=budget - current_cost_ref[0]
-                                )
-                            except (RuntimeError, Exception) as e:
-                                error_str = str(e)
-                                if ("LLM returned None" in error_str or
-                                    "LLM failed to analyze errors" in error_str):
-                                    # Skip crash operation for LLM failures
-                                    print(f"Skipping crash operation due to LLM error: {e}")
-                                    skipped_operations.append('crash')
-                                    # Update log entry for skipped operation
-                                    update_sync_log_entry(log_entry, {
-                                        'success': False,
-                                        'cost': 0.0,
-                                        'model': 'skipped',
-                                        'error': f"LLM error: {str(e)}"
-                                    }, time.time() - start_time)
-                                    log_entry['details']['skip_reason'] = 'llm_error'
-                                    append_sync_log(basename, language, log_entry)
+                                        # No crash - save run report with exit_code=0 so sync_determine_operation
+                                        # knows the example was tested and passed (prevents infinite loop)
+                                        # Include test_hash for staleness detection
+                                        test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
+                                        report = RunReport(
+                                            datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                                            exit_code=0,
+                                            tests_passed=1,
+                                            tests_failed=0,
+                                            coverage=0.0,
+                                            test_hash=test_hash
+                                        )
+                                        save_run_report(asdict(report), basename, language)
+                                        skipped_operations.append('crash')
+                                        continue
-                                    report_data = RunReport(
-                                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                                        exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
+                                if has_crash:
+                                    # Try auto-fix for common import errors before expensive agentic call
+                                    auto_fixed, auto_fix_msg = _try_auto_fix_import_error(
+                                        crash_log_content,
+                                        pdd_files['code'],
+                                        pdd_files['example']
                                     )
-                                    save_run_report(asdict(report_data), basename, language)
-                                    _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_llm_error')
+                                    if auto_fixed:
+                                        log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
+                                        # Retry running the example after auto-fix
+                                        retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
+                                            cmd_parts,
+                                            env=env,
+                                            cwd=str(pdd_files['example'].parent),
+                                            timeout=60
+                                        )
+                                        if retry_returncode == 0:
+                                            # Auto-fix worked! Save run report and continue
+                                            log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
+                                            test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
+                                            report = RunReport(
+                                                datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                                                exit_code=0,
+                                                tests_passed=1,
+                                                tests_failed=0,
+                                                coverage=0.0,
+                                                test_hash=test_hash
+                                            )
+                                            save_run_report(asdict(report), basename, language)
+                                            result = (True, 0.0, 'auto-fix')
+                                            success = True
+                                            actual_cost = 0.0
+                                            model_name = 'auto-fix'
+                                            # Update crash_log_content for logging
+                                            crash_log_content = f"Auto-fixed: {auto_fix_msg}"
+                                            continue  # Skip crash_main, move to next operation
+                                        else:
+                                            # Auto-fix didn't fully work, update error log and proceed
+                                            crash_log_content = f"Auto-fix attempted ({auto_fix_msg}) but still failing:\nRETRY STDOUT:\n{retry_stdout}\nRETRY STDERR:\n{retry_stderr}\n"
+                                    Path("crash.log").write_text(crash_log_content)
+                                    try:
+                                        result = crash_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), error_file="crash.log", output=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
+                                    except Exception as e:
+                                        print(f"Crash fix failed: {e}")
+                                        skipped_operations.append('crash')
+                                        continue
+                            elif operation == 'verify':
+                                if not pdd_files['example'].exists():
+                                    skipped_operations.append('verify')
                                     continue
-                                else:
-                                    # Re-raise other exceptions
-                                    raise
-                    elif operation == 'verify':
-                        result = fix_verification_main(
-                            ctx,
-                            prompt_file=str(pdd_files['prompt']),
-                            code_file=str(pdd_files['code']),
-                            program_file=str(pdd_files['example']),
-                            output_results=f"{basename}_verify_results.log",
-                            output_code=str(pdd_files['code']),
-                            output_program=str(pdd_files['example']),
-                            loop=True,
-                            verification_program=str(pdd_files['example']),
-                            max_attempts=max_attempts,
-                            budget=budget - current_cost_ref[0]
-                        )
-                    elif operation == 'test':
-                        # First, generate the test file
-                        result = cmd_test_main(
-                            ctx,
-                            prompt_file=str(pdd_files['prompt']),
-                            code_file=str(pdd_files['code']),
-                            output=str(pdd_files['test']),
-                            language=language,
-                            coverage_report=None,
-                            existing_tests=None,
-                            target_coverage=target_coverage,
-                            merge=False
-                        )
-                        # After successful test generation, execute the tests and create run report
-                        # This enables the next sync iteration to detect test failures and trigger fix
-                        if isinstance(result, dict) and result.get('success', False):
-                            try:
-                                test_file = pdd_files['test']
-                                if test_file.exists():
+                                result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
+                            elif operation == 'test':
+                                pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
+                                # Use merge=True when test file exists to preserve fixes and append new tests
+                                # instead of regenerating from scratch (which would overwrite fixes)
+                                test_file_exists = pdd_files['test'].exists()
+                                result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=[str(pdd_files['test'])] if test_file_exists else None, target_coverage=target_coverage, merge=test_file_exists, strength=strength, temperature=temperature)
+                                if pdd_files['test'].exists():
                                     _execute_tests_and_create_run_report(
-                                        test_file, basename, language, target_coverage
+                                        pdd_files['test'],
+                                        basename,
+                                        language,
+                                        target_coverage,
+                                        code_file=pdd_files.get("code"),
+                                        atomic_state=atomic_state,
+                                        test_files=pdd_files.get('test_files'),  # Bug #156
+                                    )
+                            elif operation == 'test_extend':
+                                # Extend existing tests to improve coverage
+                                # Uses existing_tests and merge=True to add more test cases
+                                pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
+                                if pdd_files['test'].exists():
+                                    existing_test_path = str(pdd_files['test'])
+                                    result = cmd_test_main(
+                                        ctx,
+                                        prompt_file=str(pdd_files['prompt']),
+                                        code_file=str(pdd_files['code']),
+                                        output=str(pdd_files['test']),
+                                        language=language,
+                                        coverage_report=None,
+                                        existing_tests=[existing_test_path],
+                                        target_coverage=target_coverage,
+                                        merge=True,
+                                        strength=strength,
+                                        temperature=temperature
                                     )
-                            except Exception as e:
-                                # Don't fail the entire operation if test execution fails
-                                # Just log it - the test file generation was successful
-                                print(f"Warning: Test execution failed: {e}")
-                        elif isinstance(result, tuple) and len(result) >= 3:
-                            # Handle tuple return format - assume success and execute tests
-                            try:
-                                test_file = pdd_files['test']
-                                if test_file.exists():
                                     _execute_tests_and_create_run_report(
-                                        test_file, basename, language, target_coverage
+                                        pdd_files['test'],
+                                        basename,
+                                        language,
+                                        target_coverage,
+                                        code_file=pdd_files.get("code"),
+                                        atomic_state=atomic_state,
+                                        test_files=pdd_files.get('test_files'),  # Bug #156
                                     )
-                            except Exception as e:
-                                print(f"Warning: Test execution failed: {e}")
-                    elif operation == 'fix':
-                        # Create error file with actual test failure information
-                        error_file_path = Path("fix_errors.log")
-                        # Try to get actual test failure details from latest run
-                        try:
-                            run_report = read_run_report(basename, language)
-                            if run_report and run_report.tests_failed > 0:
-                                # Run the tests again to capture actual error output
-                                # Use environment-aware Python executable for pytest execution
-                                python_executable = detect_host_python_executable()
-                                test_result = subprocess.run([
-                                    python_executable, '-m', 'pytest',
-                                    str(pdd_files['test']),
-                                    '-v', '--tb=short'
-                                ], capture_output=True, text=True, timeout=300)
-                                error_content = f"Test failures detected ({run_report.tests_failed} failed tests):\n\n"
-                                error_content += "STDOUT:\n" + test_result.stdout + "\n\n"
-                                error_content += "STDERR:\n" + test_result.stderr
+                                else:
+                                    # No existing test file, fall back to regular test generation
+                                    result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=None, target_coverage=target_coverage, merge=False, strength=strength, temperature=temperature)
+                                    if pdd_files['test'].exists():
+                                        _execute_tests_and_create_run_report(
+                                            pdd_files['test'],
+                                            basename,
+                                            language,
+                                            target_coverage,
+                                            code_file=pdd_files.get("code"),
+                                            atomic_state=atomic_state,
+                                            test_files=pdd_files.get('test_files'),  # Bug #156
+                                        )
+                            elif operation == 'fix':
+                                error_file_path = Path("fix_errors.log")
+                                # Capture errors using language-appropriate test command
+                                try:
+                                    from .get_test_command import get_test_command_for_file
+                                    test_cmd = get_test_command_for_file(str(pdd_files['test']), language)
+                                    # Use clean env without TUI-specific vars
+                                    clean_env = os.environ.copy()
+                                    for var in ['FORCE_COLOR', 'COLUMNS']:
+                                        clean_env.pop(var, None)
+                                    if test_cmd:
+                                        # Run language-appropriate test command
+                                        if language.lower() == 'python':
+                                            # Use pytest directly for Python
+                                            python_executable = detect_host_python_executable()
+                                            # Bug #156: Run pytest on ALL matching test files
+                                            test_files = pdd_files.get('test_files', [pdd_files['test']])
+                                            pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
+                                            test_result = subprocess.run(
+                                                pytest_args,
+                                                capture_output=True, text=True, timeout=300,
+                                                stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True,
+                                                cwd=str(pdd_files['test'].parent)
+                                            )
+                                        else:
+                                            # Use shell command for non-Python
+                                            test_result = subprocess.run(
+                                                test_cmd,
+                                                shell=True,
+                                                capture_output=True, text=True, timeout=300,
+                                                stdin=subprocess.DEVNULL, env=clean_env,
+                                                cwd=str(pdd_files['test'].parent),
+                                                start_new_session=True
+                                            )
+                                        error_content = f"Test output:\n{test_result.stdout}\n{test_result.stderr}"
+                                    else:
+                                        # No test command available - trigger agentic fallback with context
+                                        error_content = f"No test command available for {language}. Please run tests manually and provide error output."
+                                except Exception as e:
+                                    error_content = f"Test execution error: {e}"
+                                error_file_path.write_text(error_content)
+                                # Bug #156 fix: Parse pytest output to find actual failing files
+                                # and pass the correct file to fix_main
+                                failing_files = extract_failing_files_from_output(error_content)
+                                unit_test_file_for_fix = str(pdd_files['test'])  # Default to tracked file
+                                if failing_files:
+                                    # Try to resolve the failing file paths
+                                    test_dir = pdd_files['test'].parent
+                                    tracked_file_name = pdd_files['test'].name
+                                    # Check if the tracked file is among the failures
+                                    tracked_in_failures = any(
+                                        Path(ff).name == tracked_file_name for ff in failing_files
+                                    )
+                                    if not tracked_in_failures:
+                                        # Failures are in a different file - use the first failing file
+                                        for ff in failing_files:
+                                            # Try to resolve the path relative to test directory
+                                            ff_path = Path(ff)
+                                            if ff_path.is_absolute() and ff_path.exists():
+                                                unit_test_file_for_fix = str(ff_path)
+                                                break
+                                            else:
+                                                # Try to find it in the test directory
+                                                candidate = test_dir / ff_path.name
+                                                if candidate.exists():
+                                                    unit_test_file_for_fix = str(candidate)
+                                                    break
+                                                # Also try the path as-is relative to cwd
+                                                if ff_path.exists():
+                                                    unit_test_file_for_fix = str(ff_path.resolve())
+                                                    break
+                                result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
+                            elif operation == 'update':
+                                result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
                             else:
-                                error_content = "Simulated test failures"
+                                errors.append(f"Unknown operation {operation}")
+                                result = {'success': False}
+                            # Result parsing
+                            if isinstance(result, dict):
+                                success = result.get('success', False)
+                                current_cost_ref[0] += result.get('cost', 0.0)
+                            elif isinstance(result, tuple) and len(result) >= 3:
+                                if operation == 'test': success = pdd_files['test'].exists()
+                                else: success = bool(result[0])
+                                cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
+                                current_cost_ref[0] += cost
+                            else:
+                                success = result is not None
                         except Exception as e:
-                            error_content = f"Could not capture test failures: {e}\nUsing simulated test failures"
-                        error_file_path.write_text(error_content)
-                        result = fix_main(
-                            ctx,
-                            prompt_file=str(pdd_files['prompt']),
-                            code_file=str(pdd_files['code']),
-                            unit_test_file=str(pdd_files['test']),
-                            error_file=str(error_file_path),
-                            output_test=str(pdd_files['test']),
-                            output_code=str(pdd_files['code']),
-                            output_results=f"{basename}_fix_results.log",
-                            loop=True,
-                            verification_program=str(pdd_files['example']),
-                            max_attempts=max_attempts,
-                            budget=budget - current_cost_ref[0],
-                            auto_submit=True
-                        )
-                    elif operation == 'update':
-                        result = update_main(
-                            ctx,
-                            input_prompt_file=str(pdd_files['prompt']),
-                            modified_code_file=str(pdd_files['code']),
-                            input_code_file=None,
-                            output=str(pdd_files['prompt']),
-                            git=True
-                        )
-                    else:
-                        errors.append(f"Unknown operation '{operation}' requested.")
-                        result = {'success': False, 'cost': 0.0}
+                            errors.append(f"Exception during '{operation}': {e}")
+                            success = False
-                    # Handle different return formats from command functions
-                    if isinstance(result, dict):
-                        # Dictionary return (e.g., from some commands)
-                        success = result.get('success', False)
-                        current_cost_ref[0] += result.get('cost', 0.0)
-                    elif isinstance(result, tuple) and len(result) >= 3:
-                        # Tuple return (e.g., from code_generator_main, context_generator_main)
-                        # For tuples, success is determined by no exceptions and valid return content
-                        # Check if the first element (generated content) is None, which indicates failure
-                        success = result[0] is not None
-                        # Extract cost from tuple (usually second-to-last element)
-                        cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
-                        current_cost_ref[0] += cost
-                    else:
-                        # Unknown return format
-                        success = result is not None
-                        current_cost_ref[0] += 0.0
-                except Exception as e:
-                    errors.append(f"Exception during '{operation}': {e}")
-                    success = False
+                        # Log update
+                        duration = time.time() - op_start_time
+                        actual_cost = 0.0
+                        model_name = "unknown"
+                        if success:
+                            if isinstance(result, dict):
+                                 actual_cost = result.get('cost', 0.0)
+                                 model_name = result.get('model', 'unknown')
+                            elif isinstance(result, tuple) and len(result) >= 3:
+                                 actual_cost = result[-2] if len(result) >= 2 else 0.0
+                                 model_name = result[-1] if len(result) >= 1 else 'unknown'
+                            last_model_name = str(model_name)
+                            operations_completed.append(operation)
+                            _save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
-                # Calculate execution duration
-                duration = time.time() - start_time
+                        update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
+                        append_sync_log(basename, language, log_entry)
-                # Extract cost and model from result for logging
-                actual_cost = 0.0
-                model_name = "unknown"
-                error_message = None
-                if success:
-                    if isinstance(result, dict):
-                        actual_cost = result.get('cost', 0.0)
-                        model_name = result.get('model', 'unknown')
-                    elif isinstance(result, tuple) and len(result) >= 3:
-                        actual_cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
-                        model_name = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else 'unknown'
-                else:
-                    error_message = errors[-1] if errors else "Operation failed"
-                # Update and save log entry with execution results
-                update_sync_log_entry(log_entry, {
-                    'success': success,
-                    'cost': actual_cost,
-                    'model': model_name,
-                    'error': error_message
-                }, duration)
-                append_sync_log(basename, language, log_entry)
-                if success:
-                    operations_completed.append(operation)
-                    # Extract cost and model from result based on format
-                    if isinstance(result, dict):
-                        cost = result.get('cost', 0.0)
-                        model = result.get('model', '')
-                    elif isinstance(result, tuple) and len(result) >= 3:
-                        cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
-                        model = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else ''
-                    else:
-                        cost = 0.0
-                        model = ''
-                    _save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
-                    # After successful crash operation, re-run the example to generate fresh run report
-                    if operation == 'crash':
-                        try:
-                            example_file = pdd_files['example']
-                            if example_file.exists():
-                                # Run the example program to check if crash is actually fixed
-                                try:
-                                    example_result = subprocess.run(
-                                        ['python', str(example_file)],
-                                        capture_output=True,
-                                        text=True,
-                                        timeout=60,
-                                        env=os.environ.copy(),
-                                        cwd=str(example_file.parent)
-                                    )
-                                    # Create fresh run report based on actual execution
-                                    report_data = RunReport(
-                                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                                        exit_code=example_result.returncode,
-                                        tests_passed=1 if example_result.returncode == 0 else 0,
-                                        tests_failed=0 if example_result.returncode == 0 else 1,
-                                        coverage=100.0 if example_result.returncode == 0 else 0.0
-                                    )
-                                    save_run_report(asdict(report_data), basename, language)
-                                    print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
-                                except subprocess.TimeoutExpired:
-                                    # Example timed out - still considered a failure
-                                    report_data = RunReport(
-                                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
-                                        exit_code=124,  # Standard timeout exit code
-                                        tests_passed=0, tests_failed=1, coverage=0.0
-                                    )
-                                    save_run_report(asdict(report_data), basename, language)
-                                    print("Example timed out after crash fix - created failure run report")
-                        except Exception as e:
-                            # Don't fail the entire operation if example re-execution fails
-                            print(f"Warning: Post-crash example re-execution failed: {e}")
+                        # Post-operation checks (simplified)
+                        if success and operation == 'crash':
+                            # Re-run example to verify crash fix worked
+                            try:
+                                 # Use clean env without TUI-specific vars
+                                 clean_env = os.environ.copy()
+                                 for var in ['FORCE_COLOR', 'COLUMNS']:
+                                     clean_env.pop(var, None)
+                                 # Get language-appropriate run command
+                                 example_path = str(pdd_files['example'])
+                                 run_cmd = get_run_command_for_file(example_path)
+                                 if run_cmd:
+                                     cmd_parts = run_cmd.split()
+                                 else:
+                                     cmd_parts = ['python', example_path]
+                                 # Use error-detection runner that handles server-style examples
+                                 returncode, stdout, stderr = _run_example_with_error_detection(
+                                     cmd_parts,
+                                     env=clean_env,
+                                     cwd=str(pdd_files['example'].parent),
+                                     timeout=60
+                                 )
+                                 # Include test_hash for staleness detection
+                                 test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
+                                 report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
+                                 save_run_report(asdict(report), basename, language)
+                            except Exception as e:
+                                 # Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
+                                 error_msg = f"Post-crash verification failed: {e}"
+                                 errors.append(error_msg)
+                                 log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
-                    # After successful fix operation, execute tests to update run report
-                    if operation == 'fix':
-                        try:
-                            test_file = pdd_files['test']
-                            if test_file.exists():
+                        if success and operation == 'fix':
+                            # Re-run tests to update run_report after successful fix
+                            # This prevents infinite loop by updating the state machine
+                            if pdd_files['test'].exists():
                                 _execute_tests_and_create_run_report(
-                                    test_file, basename, language, target_coverage
+                                    pdd_files['test'],
+                                    basename,
+                                    language,
+                                    target_coverage,
+                                    code_file=pdd_files.get("code"),
+                                    atomic_state=atomic_state,
+                                    test_files=pdd_files.get('test_files'),  # Bug #156
                                 )
-                        except Exception as e:
-                            # Don't fail the entire operation if test execution fails
-                            print(f"Warning: Post-fix test execution failed: {e}")
-                else:
-                    errors.append(f"Operation '{operation}' failed.")
-                    break
+                        if not success:
+                            errors.append(f"Operation '{operation}' failed.")
+                            break
-    except TimeoutError:
-        errors.append(f"Could not acquire lock for '{basename}'. Another sync process may be running.")
-    except Exception as e:
-        errors.append(f"An unexpected error occurred in the orchestrator: {e}")
-    finally:
-        # Log lock release
-        try:
-            log_sync_event(basename, language, "lock_released", {
-                "pid": os.getpid(),
-                "total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
-                "total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
-            })
-        except Exception:
-            pass  # Don't fail if logging fails
+        except BaseException as e:
+            errors.append(f"An unexpected error occurred in the orchestrator: {type(e).__name__}: {e}")
+            # Log the full traceback for debugging
+            import traceback
+            traceback.print_exc()
+        finally:
+            try:
+                log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
+            except: pass
-        if stop_event:
-            stop_event.set()
-        if animation_thread and animation_thread.is_alive():
-            animation_thread.join(timeout=5)
+        # Return result dict
+        return {
+            'success': not errors,
+            'operations_completed': operations_completed,
+            'skipped_operations': skipped_operations,
+            'total_cost': current_cost_ref[0],
+            'total_time': time.time() - start_time,
+            'final_state': {p: {'exists': f.exists(), 'path': str(f)} for p, f in pdd_files.items() if p != 'test_files'},
+            'errors': errors,
+            'error': "; ".join(errors) if errors else None,  # Add this line
+            'model_name': last_model_name,
+        }
+    # Instantiate and run Textual App
+    app = SyncApp(
+        basename=basename,
+        budget=budget,
+        worker_func=sync_worker_logic,
+        function_name_ref=current_function_name_ref,
+        cost_ref=current_cost_ref,
+        prompt_path_ref=prompt_path_ref,
+        code_path_ref=code_path_ref,
+        example_path_ref=example_path_ref,
+        tests_path_ref=tests_path_ref,
+        prompt_color_ref=prompt_box_color_ref,
+        code_color_ref=code_box_color_ref,
+        example_color_ref=example_box_color_ref,
+        tests_color_ref=tests_box_color_ref,
+        stop_event=stop_event,
+        progress_callback_ref=progress_callback_ref
+    )
+    # Store app reference so worker can access request_confirmation
+    app_ref[0] = app
+    result = app.run()
+    # Show exit animation if not quiet
+    if not quiet:
+        from .sync_tui import show_exit_animation
+        show_exit_animation()
+    # Check for worker exception that might have caused a crash
+    if app.worker_exception:
+        print(f"\n[Error] Worker thread crashed with exception: {app.worker_exception}", file=sys.stderr)
-    total_time = time.time() - start_time
-    final_state = {
-        p_name: {'exists': p_path.exists(), 'path': str(p_path)}
-        for p_name, p_path in pdd_files.items()
-    }
+        if hasattr(app, 'captured_logs') and app.captured_logs:
+             print("\n[Captured Logs (last 20 lines)]", file=sys.stderr)
+             for line in app.captured_logs[-20:]: # Print last 20 lines
+                 print(f"  {line}", file=sys.stderr)
+        import traceback
+        # Use trace module to print the stored exception's traceback if available
+        if hasattr(app.worker_exception, '__traceback__'):
+            traceback.print_exception(type(app.worker_exception), app.worker_exception, app.worker_exception.__traceback__, file=sys.stderr)
+    if result is None:
+        return {
+            "success": False,
+            "total_cost": current_cost_ref[0],
+            "model_name": "",
+            "error": "Sync process interrupted or returned no result.",
+            "operations_completed": [],
+            "errors": ["App exited without result"]
+        }
-    return {
-        'success': not errors,
-        'operations_completed': operations_completed,
-        'skipped_operations': skipped_operations,
-        'total_cost': current_cost_ref[0],
-        'total_time': total_time,
-        'final_state': final_state,
-        'errors': errors,
-    }
+    return result
 if __name__ == '__main__':
-    # Example usage of the sync_orchestration module.
-    # This simulates running `pdd sync my_calculator` from the command line.
-    print("--- Running Basic Sync Orchestration Example ---")
-    # Setup a dummy project structure
+    # Example usage
     Path("./prompts").mkdir(exist_ok=True)
     Path("./src").mkdir(exist_ok=True)
     Path("./examples").mkdir(exist_ok=True)
     Path("./tests").mkdir(exist_ok=True)
     Path("./prompts/my_calculator_python.prompt").write_text("Create a calculator.")
-    # Ensure PDD meta directory exists for logs and locks
     PDD_DIR.mkdir(exist_ok=True)
     META_DIR.mkdir(exist_ok=True)
-    result = sync_orchestration(
-        basename="my_calculator",
-        language="python",
-        quiet=True # Suppress mock command output for cleaner example run
-    )
-    print("\n--- Sync Orchestration Finished ---")
+    result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
     print(json.dumps(result, indent=2))
-    if result['success']:
-        print("\n✅ Sync completed successfully.")
-    else:
-        print(f"\n❌ Sync failed. Errors: {result['errors']}")
-    print("\n--- Running Sync Log Example ---")
-    # This will now show the log from the run we just completed.
-    log_result = sync_orchestration(
-        basename="my_calculator",
-        language="python",
-        log=True
-    )

pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

pdd-cli 0.0.45py3-none-any.whl → 0.0.90py3-none-any.whl