PyPI - pdd-cli - Versions diffs - 0.0.43__py3-none-any.whl → 0.0.44__py3-none-any.whl - Mend

pdd-cli 0.0.43py3-none-any.whl → 0.0.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pdd-cli might be problematic. Click here for more details.

Files changed (26) hide show

pdd/__init__.py +1 -1
pdd/cli.py +2 -2
pdd/cmd_test_main.py +9 -0
pdd/data/language_format.csv +1 -0
pdd/data/llm_model.csv +2 -2
pdd/fix_code_loop.py +2 -2
pdd/fix_error_loop.py +5 -2
pdd/fix_verification_errors_loop.py +14 -1
pdd/fix_verification_main.py +29 -8
pdd/get_jwt_token.py +39 -7
pdd/increase_tests.py +7 -0
pdd/llm_invoke.py +9 -7
pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
pdd/prompts/fix_code_module_errors_LLM.prompt +13 -3
pdd/pytest_output.py +72 -20
pdd/python_env_detector.py +151 -0
pdd/summarize_directory.py +7 -1
pdd/sync_determine_operation.py +396 -109
pdd/sync_main.py +1 -1
pdd/sync_orchestration.py +448 -28
{pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/METADATA +4 -4
{pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/RECORD +26 -25
{pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/licenses/LICENSE +0 -0
{pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/top_level.txt +0 -0

pdd/sync_orchestration.py CHANGED Viewed

@@ -10,6 +10,7 @@ import json
 import datetime
 import subprocess
 import re
+import os
 from pathlib import Path
 from typing import Dict, Any, Optional, List
 from dataclasses import asdict
@@ -25,6 +26,7 @@ from .sync_determine_operation import (
     PDD_DIR,
     META_DIR,
     SyncLock,
+    read_run_report,
 )
 from .auto_deps_main import auto_deps_main
 from .code_generator_main import code_generator_main
@@ -34,6 +36,7 @@ from .fix_verification_main import fix_verification_main
 from .cmd_test_main import cmd_test_main
 from .fix_main import fix_main
 from .update_main import update_main
+from .python_env_detector import detect_host_python_executable
 # --- Mock Helper Functions ---
@@ -48,6 +51,53 @@ def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
     except Exception:
         return []
+def create_sync_log_entry(decision, budget_remaining: float) -> Dict[str, Any]:
+    """Create initial log entry from decision with all fields (actual results set to None initially)."""
+    return {
+        "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        "operation": decision.operation,
+        "reason": decision.reason,
+        "decision_type": decision.details.get("decision_type", "heuristic") if decision.details else "heuristic",
+        "confidence": decision.confidence,
+        "estimated_cost": decision.estimated_cost,
+        "actual_cost": None,
+        "success": None,
+        "model": None,
+        "duration": None,
+        "error": None,
+        "details": {
+            **(decision.details if decision.details else {}),
+            "budget_remaining": budget_remaining
+        }
+    }
+def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duration: float) -> Dict[str, Any]:
+    """Update log entry with execution results (actual_cost, success, model, duration, error)."""
+    entry.update({
+        "actual_cost": result.get("cost", 0.0),
+        "success": result.get("success", False),
+        "model": result.get("model", "unknown"),
+        "duration": duration,
+        "error": result.get("error") if not result.get("success") else None
+    })
+    return entry
+def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
+    """Append completed log entry to the sync log file."""
+    log_file = META_DIR / f"{basename}_{language}_sync.log"
+    META_DIR.mkdir(parents=True, exist_ok=True)
+    with open(log_file, 'a') as f:
+        f.write(json.dumps(entry) + '\n')
+def log_sync_event(basename: str, language: str, event: str, details: Dict[str, Any] = None):
+    """Log a special sync event (lock_acquired, budget_warning, etc.)."""
+    entry = {
+        "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        "event": event,
+        "details": details or {}
+    }
+    append_sync_log(basename, language, entry)
 def save_run_report(report: Dict[str, Any], basename: str, language: str):
     """Save a run report to the metadata directory."""
     report_file = META_DIR / f"{basename}_{language}_run.json"
@@ -60,10 +110,11 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
     """Save fingerprint state after successful operation."""
     from datetime import datetime, timezone
     from .sync_determine_operation import calculate_current_hashes, Fingerprint
+    from . import __version__
     current_hashes = calculate_current_hashes(paths)
     fingerprint = Fingerprint(
-        pdd_version="0.0.41",
+        pdd_version=__version__,
         timestamp=datetime.now(timezone.utc).isoformat(),
         command=operation,
         prompt_hash=current_hashes.get('prompt_hash'),
@@ -89,12 +140,24 @@ def _execute_tests_and_create_run_report(test_file: Path, basename: str, languag
         module_name = test_file.name.replace('test_', '').replace('.py', '')
         # Use the module import path rather than file path for coverage
+        # Use environment-aware Python executable for pytest execution
+        python_executable = detect_host_python_executable()
+        # Determine coverage target based on module location
+        if base_package:
+            cov_target = f'{base_package}.{module_name}'
+        else:
+            # Dynamically discover package structure based on test file location
+            relative_path = test_file.parent.relative_to(Path.cwd())
+            package_path = str(relative_path).replace(os.sep, '.')
+            cov_target = f'{package_path}.{module_name}' if package_path else module_name
         result = subprocess.run([
-            'python', '-m', 'pytest',
+            python_executable, '-m', 'pytest',
             str(test_file),
             '-v',
             '--tb=short',
-            f'--cov=pdd.{module_name}',
+            f'--cov={cov_target}',
             '--cov-report=term-missing'
         ], capture_output=True, text=True, timeout=300)
@@ -118,8 +181,15 @@ def _execute_tests_and_create_run_report(test_file: Path, basename: str, languag
             if failed_match:
                 tests_failed = int(failed_match.group(1))
-        # Parse coverage percentage
+        # Parse coverage percentage - try multiple patterns
         coverage_match = re.search(r'TOTAL.*?(\d+)%', stdout)
+        if not coverage_match:
+            # Try alternative patterns for coverage output
+            coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
+        if not coverage_match:
+            # Try pattern with decimal
+            coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
         if coverage_match:
             coverage = float(coverage_match.group(1))
@@ -171,13 +241,66 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
     for entry in log_entries:
         timestamp = entry.get('timestamp', 'N/A')
-        decision = entry.get('decision', {})
-        operation = decision.get('operation', 'N/A')
-        reason = decision.get('reason', 'N/A')
-        print(f"[{timestamp}] Operation: {operation:<15} | Reason: {reason}")
-        if verbose and 'details' in decision and decision['details']:
-            details_str = json.dumps(decision['details'], indent=2)
-            print(f"  Details: {details_str}")
+        # Handle special event entries
+        if 'event' in entry:
+            event = entry.get('event', 'N/A')
+            print(f"[{timestamp[:19]}] EVENT: {event}")
+            if verbose and 'details' in entry:
+                details_str = json.dumps(entry['details'], indent=2)
+                print(f"  Details: {details_str}")
+            continue
+        # Handle operation entries
+        operation = entry.get('operation', 'N/A')
+        reason = entry.get('reason', 'N/A')
+        success = entry.get('success')
+        actual_cost = entry.get('actual_cost')
+        estimated_cost = entry.get('estimated_cost', 0.0)
+        duration = entry.get('duration')
+        if verbose:
+            # Verbose format
+            print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
+            decision_type = entry.get('decision_type', 'N/A')
+            confidence = entry.get('confidence', 'N/A')
+            model = entry.get('model', 'N/A')
+            budget_remaining = entry.get('details', {}).get('budget_remaining', 'N/A')
+            print(f"  Decision Type: {decision_type} | Confidence: {confidence}")
+            if actual_cost is not None:
+                print(f"  Cost: ${actual_cost:.2f} (estimated: ${estimated_cost:.2f}) | Model: {model}")
+                if duration is not None:
+                    print(f"  Duration: {duration:.1f}s | Budget Remaining: ${budget_remaining}")
+            else:
+                print(f"  Estimated Cost: ${estimated_cost:.2f}")
+            if 'details' in entry and entry['details']:
+                # Show details without budget_remaining to avoid clutter
+                details_copy = entry['details'].copy()
+                details_copy.pop('budget_remaining', None)
+                if details_copy:
+                    details_str = json.dumps(details_copy, indent=2)
+                    print(f"  Details: {details_str}")
+        else:
+            # Normal format: [timestamp] operation | reason | status cost | duration
+            status_icon = "✓" if success else "✗" if success is False else "?"
+            cost_info = ""
+            if actual_cost is not None:
+                cost_info = f" | {status_icon} ${actual_cost:.2f} (est: ${estimated_cost:.2f})"
+            else:
+                cost_info = f" | Est: ${estimated_cost:.2f}"
+            duration_info = ""
+            if duration is not None:
+                duration_info = f" | {duration:.1f}s"
+            error_info = ""
+            if entry.get('error'):
+                error_info = f" | Error: {entry['error']}"
+            print(f"[{timestamp[:19]}] {operation:<12} | {reason}{cost_info}{duration_info}{error_info}")
     print("--- End of Log ---")
     return {'success': True, 'log_entries': log_entries}
@@ -185,6 +308,7 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
 def sync_orchestration(
     basename: str,
+    target_coverage: float = 90.0,
     language: str = "python",
     prompts_dir: str = "prompts",
     code_dir: str = "src",
@@ -194,7 +318,6 @@ def sync_orchestration(
     budget: float = 10.0,
     skip_verify: bool = False,
     skip_tests: bool = False,
-    target_coverage: float = 90.0,
     log: bool = False,
     force: bool = False,
     strength: float = 0.5,
@@ -223,7 +346,7 @@ def sync_orchestration(
         pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
     except Exception as e:
         # Log the error and return early with failure status
-        console.print(f"[red]Error constructing paths: {e}[/red]")
+        print(f"Error constructing paths: {e}")
         return {
             "success": False,
             "total_cost": 0.0,
@@ -250,9 +373,16 @@ def sync_orchestration(
     errors: List[str] = []
     start_time = time.time()
     animation_thread = None
+    # Track operation history for cycle detection
+    operation_history: List[str] = []
+    MAX_CYCLE_REPEATS = 2  # Maximum times to allow crash-verify cycle
     try:
         with SyncLock(basename, language):
+            # Log lock acquisition
+            log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
             # --- Start Animation Thread ---
             animation_thread = threading.Thread(
                 target=sync_animation,
@@ -267,21 +397,101 @@ def sync_orchestration(
             # --- Main Workflow Loop ---
             while True:
+                budget_remaining = budget - current_cost_ref[0]
                 if current_cost_ref[0] >= budget:
                     errors.append(f"Budget of ${budget:.2f} exceeded.")
+                    log_sync_event(basename, language, "budget_exceeded", {
+                        "total_cost": current_cost_ref[0],
+                        "budget": budget
+                    })
                     break
-                decision = sync_determine_operation(basename, language, target_coverage, budget - current_cost_ref[0], False, prompts_dir, skip_tests, skip_verify)
+                # Log budget warning when running low
+                if budget_remaining < budget * 0.2 and budget_remaining > 0:
+                    log_sync_event(basename, language, "budget_warning", {
+                        "remaining": budget_remaining,
+                        "percentage": (budget_remaining / budget) * 100
+                    })
+                decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
                 operation = decision.operation
+                # Create log entry with decision info
+                log_entry = create_sync_log_entry(decision, budget_remaining)
+                # Track operation history
+                operation_history.append(operation)
+                # Detect crash-verify cycles
+                if len(operation_history) >= 4:
+                    # Check for repeating crash-verify pattern
+                    recent_ops = operation_history[-4:]
+                    if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
+                        recent_ops == ['verify', 'crash', 'verify', 'crash']):
+                        # Count how many times this cycle has occurred
+                        cycle_count = 0
+                        for i in range(0, len(operation_history) - 1, 2):
+                            if i + 1 < len(operation_history):
+                                if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
+                                    (operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
+                                    cycle_count += 1
+                        if cycle_count >= MAX_CYCLE_REPEATS:
+                            errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
+                            errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
+                            log_sync_event(basename, language, "cycle_detected", {
+                                "cycle_type": "crash-verify",
+                                "cycle_count": cycle_count,
+                                "operation_history": operation_history[-10:]  # Last 10 operations
+                            })
+                            break
+                # Detect consecutive fix operations (infinite fix loop protection)
+                if operation == 'fix':
+                    # Count consecutive fix operations
+                    consecutive_fixes = 0
+                    for i in range(len(operation_history) - 1, -1, -1):
+                        if operation_history[i] == 'fix':
+                            consecutive_fixes += 1
+                        else:
+                            break
+                    MAX_CONSECUTIVE_FIXES = 5  # Allow up to 5 consecutive fix attempts
+                    if consecutive_fixes >= MAX_CONSECUTIVE_FIXES:
+                        errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
+                        errors.append("The test failures may not be resolvable by automated fixes in this environment.")
+                        log_sync_event(basename, language, "cycle_detected", {
+                            "cycle_type": "consecutive-fix",
+                            "consecutive_count": consecutive_fixes,
+                            "operation_history": operation_history[-10:]  # Last 10 operations
+                        })
+                        break
                 if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
                     current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
+                    # Log these final operations
+                    success = operation in ['all_synced', 'nothing']
+                    error_msg = None
                     if operation == 'fail_and_request_manual_merge':
                         errors.append(f"Manual merge required: {decision.reason}")
+                        error_msg = f"Manual merge required: {decision.reason}"
                     elif operation == 'error':
                         errors.append(f"Error determining operation: {decision.reason}")
+                        error_msg = f"Error determining operation: {decision.reason}"
                     elif operation == 'analyze_conflict':
                         errors.append(f"Conflict detected: {decision.reason}")
+                        error_msg = f"Conflict detected: {decision.reason}"
+                    # Update log entry for final operation
+                    update_sync_log_entry(log_entry, {
+                        'success': success,
+                        'cost': 0.0,
+                        'model': 'none',
+                        'error': error_msg
+                    }, 0.0)
+                    append_sync_log(basename, language, log_entry)
                     break
                 # Handle skips
@@ -289,6 +499,17 @@ def sync_orchestration(
                     # Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
                     skipped_operations.append('verify')
                     skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
+                    # Update log entry for skipped operation
+                    update_sync_log_entry(log_entry, {
+                        'success': True,
+                        'cost': 0.0,
+                        'model': 'skipped',
+                        'error': None
+                    }, 0.0)
+                    log_entry['details']['skip_reason'] = skip_reason
+                    append_sync_log(basename, language, log_entry)
                     report_data = RunReport(
                         timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
                         exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
@@ -298,6 +519,17 @@ def sync_orchestration(
                     continue
                 if operation == 'test' and skip_tests:
                     skipped_operations.append('test')
+                    # Update log entry for skipped operation
+                    update_sync_log_entry(log_entry, {
+                        'success': True,
+                        'cost': 0.0,
+                        'model': 'skipped',
+                        'error': None
+                    }, 0.0)
+                    log_entry['details']['skip_reason'] = 'skip_tests'
+                    append_sync_log(basename, language, log_entry)
                     report_data = RunReport(
                         timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
                         exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
@@ -308,6 +540,17 @@ def sync_orchestration(
                 if operation == 'crash' and skip_tests:
                     # Skip crash operations when tests are skipped since crash fixes usually require test execution
                     skipped_operations.append('crash')
+                    # Update log entry for skipped operation
+                    update_sync_log_entry(log_entry, {
+                        'success': True,
+                        'cost': 0.0,
+                        'model': 'skipped',
+                        'error': None
+                    }, 0.0)
+                    log_entry['details']['skip_reason'] = 'skip_tests'
+                    append_sync_log(basename, language, log_entry)
                     # Create a dummy run report indicating crash was skipped
                     report_data = RunReport(
                         timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
@@ -327,6 +570,7 @@ def sync_orchestration(
                 result = {}
                 success = False
+                start_time = time.time()  # Track execution time
                 # --- Execute Operation ---
                 try:
@@ -383,6 +627,18 @@ def sync_orchestration(
                             # Skip crash operation if required files are missing
                             print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
                             skipped_operations.append('crash')
+                            # Update log entry for skipped operation
+                            update_sync_log_entry(log_entry, {
+                                'success': True,
+                                'cost': 0.0,
+                                'model': 'skipped',
+                                'error': None
+                            }, 0.0)
+                            log_entry['details']['skip_reason'] = 'missing_files'
+                            log_entry['details']['missing_files'] = [f.name for f in missing_files]
+                            append_sync_log(basename, language, log_entry)
                             # Create a dummy run report indicating crash was skipped due to missing files
                             report_data = RunReport(
                                 timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
@@ -392,23 +648,106 @@ def sync_orchestration(
                             _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
                             continue
                         else:
-                            Path("crash.log").write_text("Simulated crash error")
+                            # Check if we have a run report indicating failures that need crash fixing
+                            current_run_report = read_run_report(basename, language)
+                            crash_log_content = ""
+                            # If we have a run report with exit_code != 0, that indicates a crash that needs fixing
+                            if current_run_report and current_run_report.exit_code != 0:
+                                # We have a crash to fix based on the run report
+                                crash_log_content = f"Test execution failed with exit code: {current_run_report.exit_code}\n\n"
+                                # Try to run the example program to get additional error details
+                                try:
+                                    example_result = subprocess.run(
+                                        ['python', str(pdd_files['example'])],
+                                        capture_output=True,
+                                        text=True,
+                                        timeout=60,
+                                        env=os.environ.copy(),
+                                        cwd=str(pdd_files['example'].parent)
+                                    )
+                                    if example_result.returncode != 0:
+                                        crash_log_content += f"Example program also failed with exit code: {example_result.returncode}\n\n"
+                                        if example_result.stdout:
+                                            crash_log_content += f"STDOUT:\n{example_result.stdout}\n\n"
+                                        if example_result.stderr:
+                                            crash_log_content += f"STDERR:\n{example_result.stderr}\n"
+                                        # Check for syntax errors specifically
+                                        if "SyntaxError" in example_result.stderr:
+                                            crash_log_content = f"SYNTAX ERROR DETECTED:\n\n{crash_log_content}"
+                                    else:
+                                        crash_log_content += "Example program runs successfully, but tests are failing.\n"
+                                        crash_log_content += "This may indicate issues with test execution or test file syntax.\n"
+                                except subprocess.TimeoutExpired:
+                                    crash_log_content += "Example program execution timed out after 60 seconds\n"
+                                    crash_log_content += "This may indicate an infinite loop or the program is waiting for input.\n"
+                                except Exception as e:
+                                    crash_log_content += f"Error running example program: {str(e)}\n"
+                                    crash_log_content += f"Program path: {pdd_files['example']}\n"
+                            else:
+                                # No crash detected, skip crash operation
+                                print("No crash detected in run report, skipping crash fix")
+                                skipped_operations.append('crash')
+                                # Update log entry for skipped operation
+                                update_sync_log_entry(log_entry, {
+                                    'success': True,
+                                    'cost': 0.0,
+                                    'model': 'skipped',
+                                    'error': None
+                                }, time.time() - start_time)
+                                log_entry['details']['skip_reason'] = 'no_crash'
+                                append_sync_log(basename, language, log_entry)
+                                report_data = RunReport(
+                                    timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                                    exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
+                                )
+                                save_run_report(asdict(report_data), basename, language)
+                                _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
+                                continue
+                            # Write actual error content or fallback
+                            if not crash_log_content:
+                                crash_log_content = "Unknown crash error - program failed but no error output captured"
+                            Path("crash.log").write_text(crash_log_content)
                             try:
                                 result = crash_main(
                                     ctx,
                                     prompt_file=str(pdd_files['prompt']),
                                     code_file=str(pdd_files['code']),
                                     program_file=str(pdd_files['example']),
-                                    error_file="crash.log"
+                                    error_file="crash.log",
+                                    output=str(pdd_files['code']),
+                                    output_program=str(pdd_files['example']),
+                                    loop=True,
+                                    max_attempts=max_attempts,
+                                    budget=budget - current_cost_ref[0]
                                 )
                             except (RuntimeError, Exception) as e:
                                 error_str = str(e)
-                                if ("Simulated crash error" in error_str or
-                                    "LLM returned None" in error_str or
+                                if ("LLM returned None" in error_str or
                                     "LLM failed to analyze errors" in error_str):
-                                    # Skip crash operation for simulated errors or LLM failures
-                                    print(f"Skipping crash operation due to simulated/LLM error: {e}")
+                                    # Skip crash operation for LLM failures
+                                    print(f"Skipping crash operation due to LLM error: {e}")
                                     skipped_operations.append('crash')
+                                    # Update log entry for skipped operation
+                                    update_sync_log_entry(log_entry, {
+                                        'success': False,
+                                        'cost': 0.0,
+                                        'model': 'skipped',
+                                        'error': f"LLM error: {str(e)}"
+                                    }, time.time() - start_time)
+                                    log_entry['details']['skip_reason'] = 'llm_error'
+                                    append_sync_log(basename, language, log_entry)
                                     report_data = RunReport(
                                         timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
                                         exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
@@ -425,11 +764,13 @@ def sync_orchestration(
                             prompt_file=str(pdd_files['prompt']),
                             code_file=str(pdd_files['code']),
                             program_file=str(pdd_files['example']),
-                            output_results=None,
+                            output_results=f"{basename}_verify_results.log",
                             output_code=str(pdd_files['code']),
                             output_program=str(pdd_files['example']),
-                            loop=False,
-                            verification_program=None
+                            loop=True,
+                            verification_program=str(pdd_files['example']),
+                            max_attempts=max_attempts,
+                            budget=budget - current_cost_ref[0]
                         )
                     elif operation == 'test':
                         # First, generate the test file
@@ -474,12 +815,13 @@ def sync_orchestration(
                         # Try to get actual test failure details from latest run
                         try:
-                            from .sync_determine_operation import read_run_report
                             run_report = read_run_report(basename, language)
                             if run_report and run_report.tests_failed > 0:
                                 # Run the tests again to capture actual error output
+                                # Use environment-aware Python executable for pytest execution
+                                python_executable = detect_host_python_executable()
                                 test_result = subprocess.run([
-                                    'python', '-m', 'pytest',
+                                    python_executable, '-m', 'pytest',
                                     str(pdd_files['test']),
                                     '-v', '--tb=short'
                                 ], capture_output=True, text=True, timeout=300)
@@ -503,11 +845,11 @@ def sync_orchestration(
                             output_test=str(pdd_files['test']),
                             output_code=str(pdd_files['code']),
                             output_results=f"{basename}_fix_results.log",
-                            loop=False,
-                            verification_program=None,
+                            loop=True,
+                            verification_program=str(pdd_files['example']),
                             max_attempts=max_attempts,
                             budget=budget - current_cost_ref[0],
-                            auto_submit=False
+                            auto_submit=True
                         )
                     elif operation == 'update':
                         result = update_main(
@@ -544,6 +886,33 @@ def sync_orchestration(
                     errors.append(f"Exception during '{operation}': {e}")
                     success = False
+                # Calculate execution duration
+                duration = time.time() - start_time
+                # Extract cost and model from result for logging
+                actual_cost = 0.0
+                model_name = "unknown"
+                error_message = None
+                if success:
+                    if isinstance(result, dict):
+                        actual_cost = result.get('cost', 0.0)
+                        model_name = result.get('model', 'unknown')
+                    elif isinstance(result, tuple) and len(result) >= 3:
+                        actual_cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
+                        model_name = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else 'unknown'
+                else:
+                    error_message = errors[-1] if errors else "Operation failed"
+                # Update and save log entry with execution results
+                update_sync_log_entry(log_entry, {
+                    'success': success,
+                    'cost': actual_cost,
+                    'model': model_name,
+                    'error': error_message
+                }, duration)
+                append_sync_log(basename, language, log_entry)
                 if success:
                     operations_completed.append(operation)
                     # Extract cost and model from result based on format
@@ -558,6 +927,47 @@ def sync_orchestration(
                         model = ''
                     _save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
+                    # After successful crash operation, re-run the example to generate fresh run report
+                    if operation == 'crash':
+                        try:
+                            example_file = pdd_files['example']
+                            if example_file.exists():
+                                # Run the example program to check if crash is actually fixed
+                                try:
+                                    example_result = subprocess.run(
+                                        ['python', str(example_file)],
+                                        capture_output=True,
+                                        text=True,
+                                        timeout=60,
+                                        env=os.environ.copy(),
+                                        cwd=str(example_file.parent)
+                                    )
+                                    # Create fresh run report based on actual execution
+                                    report_data = RunReport(
+                                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                                        exit_code=example_result.returncode,
+                                        tests_passed=1 if example_result.returncode == 0 else 0,
+                                        tests_failed=0 if example_result.returncode == 0 else 1,
+                                        coverage=100.0 if example_result.returncode == 0 else 0.0
+                                    )
+                                    save_run_report(asdict(report_data), basename, language)
+                                    print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
+                                except subprocess.TimeoutExpired:
+                                    # Example timed out - still considered a failure
+                                    report_data = RunReport(
+                                        timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
+                                        exit_code=124,  # Standard timeout exit code
+                                        tests_passed=0, tests_failed=1, coverage=0.0
+                                    )
+                                    save_run_report(asdict(report_data), basename, language)
+                                    print("Example timed out after crash fix - created failure run report")
+                        except Exception as e:
+                            # Don't fail the entire operation if example re-execution fails
+                            print(f"Warning: Post-crash example re-execution failed: {e}")
                     # After successful fix operation, execute tests to update run report
                     if operation == 'fix':
                         try:
@@ -578,6 +988,16 @@ def sync_orchestration(
     except Exception as e:
         errors.append(f"An unexpected error occurred in the orchestrator: {e}")
     finally:
+        # Log lock release
+        try:
+            log_sync_event(basename, language, "lock_released", {
+                "pid": os.getpid(),
+                "total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
+                "total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
+            })
+        except Exception:
+            pass  # Don't fail if logging fails
         if stop_event:
             stop_event.set()
         if animation_thread and animation_thread.is_alive():

pdd-cli 0.0.43__py3-none-any.whl → 0.0.44__py3-none-any.whl

Potentially problematic release.

pdd-cli 0.0.43py3-none-any.whl → 0.0.44py3-none-any.whl