pdd-cli 0.0.43__py3-none-any.whl → 0.0.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

pdd/sync_orchestration.py CHANGED
@@ -10,6 +10,7 @@ import json
10
10
  import datetime
11
11
  import subprocess
12
12
  import re
13
+ import os
13
14
  from pathlib import Path
14
15
  from typing import Dict, Any, Optional, List
15
16
  from dataclasses import asdict
@@ -25,6 +26,7 @@ from .sync_determine_operation import (
25
26
  PDD_DIR,
26
27
  META_DIR,
27
28
  SyncLock,
29
+ read_run_report,
28
30
  )
29
31
  from .auto_deps_main import auto_deps_main
30
32
  from .code_generator_main import code_generator_main
@@ -34,6 +36,7 @@ from .fix_verification_main import fix_verification_main
34
36
  from .cmd_test_main import cmd_test_main
35
37
  from .fix_main import fix_main
36
38
  from .update_main import update_main
39
+ from .python_env_detector import detect_host_python_executable
37
40
 
38
41
  # --- Mock Helper Functions ---
39
42
 
@@ -48,6 +51,53 @@ def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
48
51
  except Exception:
49
52
  return []
50
53
 
54
+ def create_sync_log_entry(decision, budget_remaining: float) -> Dict[str, Any]:
55
+ """Create initial log entry from decision with all fields (actual results set to None initially)."""
56
+ return {
57
+ "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
58
+ "operation": decision.operation,
59
+ "reason": decision.reason,
60
+ "decision_type": decision.details.get("decision_type", "heuristic") if decision.details else "heuristic",
61
+ "confidence": decision.confidence,
62
+ "estimated_cost": decision.estimated_cost,
63
+ "actual_cost": None,
64
+ "success": None,
65
+ "model": None,
66
+ "duration": None,
67
+ "error": None,
68
+ "details": {
69
+ **(decision.details if decision.details else {}),
70
+ "budget_remaining": budget_remaining
71
+ }
72
+ }
73
+
74
+ def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duration: float) -> Dict[str, Any]:
75
+ """Update log entry with execution results (actual_cost, success, model, duration, error)."""
76
+ entry.update({
77
+ "actual_cost": result.get("cost", 0.0),
78
+ "success": result.get("success", False),
79
+ "model": result.get("model", "unknown"),
80
+ "duration": duration,
81
+ "error": result.get("error") if not result.get("success") else None
82
+ })
83
+ return entry
84
+
85
+ def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
86
+ """Append completed log entry to the sync log file."""
87
+ log_file = META_DIR / f"{basename}_{language}_sync.log"
88
+ META_DIR.mkdir(parents=True, exist_ok=True)
89
+ with open(log_file, 'a') as f:
90
+ f.write(json.dumps(entry) + '\n')
91
+
92
+ def log_sync_event(basename: str, language: str, event: str, details: Dict[str, Any] = None):
93
+ """Log a special sync event (lock_acquired, budget_warning, etc.)."""
94
+ entry = {
95
+ "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
96
+ "event": event,
97
+ "details": details or {}
98
+ }
99
+ append_sync_log(basename, language, entry)
100
+
51
101
  def save_run_report(report: Dict[str, Any], basename: str, language: str):
52
102
  """Save a run report to the metadata directory."""
53
103
  report_file = META_DIR / f"{basename}_{language}_run.json"
@@ -60,10 +110,11 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
60
110
  """Save fingerprint state after successful operation."""
61
111
  from datetime import datetime, timezone
62
112
  from .sync_determine_operation import calculate_current_hashes, Fingerprint
113
+ from . import __version__
63
114
 
64
115
  current_hashes = calculate_current_hashes(paths)
65
116
  fingerprint = Fingerprint(
66
- pdd_version="0.0.41",
117
+ pdd_version=__version__,
67
118
  timestamp=datetime.now(timezone.utc).isoformat(),
68
119
  command=operation,
69
120
  prompt_hash=current_hashes.get('prompt_hash'),
@@ -89,12 +140,24 @@ def _execute_tests_and_create_run_report(test_file: Path, basename: str, languag
89
140
  module_name = test_file.name.replace('test_', '').replace('.py', '')
90
141
 
91
142
  # Use the module import path rather than file path for coverage
143
+ # Use environment-aware Python executable for pytest execution
144
+ python_executable = detect_host_python_executable()
145
+
146
+ # Determine coverage target based on module location
147
+ if base_package:
148
+ cov_target = f'{base_package}.{module_name}'
149
+ else:
150
+ # Dynamically discover package structure based on test file location
151
+ relative_path = test_file.parent.relative_to(Path.cwd())
152
+ package_path = str(relative_path).replace(os.sep, '.')
153
+ cov_target = f'{package_path}.{module_name}' if package_path else module_name
154
+
92
155
  result = subprocess.run([
93
- 'python', '-m', 'pytest',
156
+ python_executable, '-m', 'pytest',
94
157
  str(test_file),
95
158
  '-v',
96
159
  '--tb=short',
97
- f'--cov=pdd.{module_name}',
160
+ f'--cov={cov_target}',
98
161
  '--cov-report=term-missing'
99
162
  ], capture_output=True, text=True, timeout=300)
100
163
 
@@ -118,8 +181,15 @@ def _execute_tests_and_create_run_report(test_file: Path, basename: str, languag
118
181
  if failed_match:
119
182
  tests_failed = int(failed_match.group(1))
120
183
 
121
- # Parse coverage percentage
184
+ # Parse coverage percentage - try multiple patterns
122
185
  coverage_match = re.search(r'TOTAL.*?(\d+)%', stdout)
186
+ if not coverage_match:
187
+ # Try alternative patterns for coverage output
188
+ coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
189
+ if not coverage_match:
190
+ # Try pattern with decimal
191
+ coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
192
+
123
193
  if coverage_match:
124
194
  coverage = float(coverage_match.group(1))
125
195
 
@@ -171,13 +241,66 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
171
241
 
172
242
  for entry in log_entries:
173
243
  timestamp = entry.get('timestamp', 'N/A')
174
- decision = entry.get('decision', {})
175
- operation = decision.get('operation', 'N/A')
176
- reason = decision.get('reason', 'N/A')
177
- print(f"[{timestamp}] Operation: {operation:<15} | Reason: {reason}")
178
- if verbose and 'details' in decision and decision['details']:
179
- details_str = json.dumps(decision['details'], indent=2)
180
- print(f" Details: {details_str}")
244
+
245
+ # Handle special event entries
246
+ if 'event' in entry:
247
+ event = entry.get('event', 'N/A')
248
+ print(f"[{timestamp[:19]}] EVENT: {event}")
249
+ if verbose and 'details' in entry:
250
+ details_str = json.dumps(entry['details'], indent=2)
251
+ print(f" Details: {details_str}")
252
+ continue
253
+
254
+ # Handle operation entries
255
+ operation = entry.get('operation', 'N/A')
256
+ reason = entry.get('reason', 'N/A')
257
+ success = entry.get('success')
258
+ actual_cost = entry.get('actual_cost')
259
+ estimated_cost = entry.get('estimated_cost', 0.0)
260
+ duration = entry.get('duration')
261
+
262
+ if verbose:
263
+ # Verbose format
264
+ print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
265
+ decision_type = entry.get('decision_type', 'N/A')
266
+ confidence = entry.get('confidence', 'N/A')
267
+ model = entry.get('model', 'N/A')
268
+ budget_remaining = entry.get('details', {}).get('budget_remaining', 'N/A')
269
+
270
+ print(f" Decision Type: {decision_type} | Confidence: {confidence}")
271
+ if actual_cost is not None:
272
+ print(f" Cost: ${actual_cost:.2f} (estimated: ${estimated_cost:.2f}) | Model: {model}")
273
+ if duration is not None:
274
+ print(f" Duration: {duration:.1f}s | Budget Remaining: ${budget_remaining}")
275
+ else:
276
+ print(f" Estimated Cost: ${estimated_cost:.2f}")
277
+
278
+ if 'details' in entry and entry['details']:
279
+ # Show details without budget_remaining to avoid clutter
280
+ details_copy = entry['details'].copy()
281
+ details_copy.pop('budget_remaining', None)
282
+ if details_copy:
283
+ details_str = json.dumps(details_copy, indent=2)
284
+ print(f" Details: {details_str}")
285
+ else:
286
+ # Normal format: [timestamp] operation | reason | status cost | duration
287
+ status_icon = "✓" if success else "✗" if success is False else "?"
288
+
289
+ cost_info = ""
290
+ if actual_cost is not None:
291
+ cost_info = f" | {status_icon} ${actual_cost:.2f} (est: ${estimated_cost:.2f})"
292
+ else:
293
+ cost_info = f" | Est: ${estimated_cost:.2f}"
294
+
295
+ duration_info = ""
296
+ if duration is not None:
297
+ duration_info = f" | {duration:.1f}s"
298
+
299
+ error_info = ""
300
+ if entry.get('error'):
301
+ error_info = f" | Error: {entry['error']}"
302
+
303
+ print(f"[{timestamp[:19]}] {operation:<12} | {reason}{cost_info}{duration_info}{error_info}")
181
304
 
182
305
  print("--- End of Log ---")
183
306
  return {'success': True, 'log_entries': log_entries}
@@ -185,6 +308,7 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
185
308
 
186
309
  def sync_orchestration(
187
310
  basename: str,
311
+ target_coverage: float = 90.0,
188
312
  language: str = "python",
189
313
  prompts_dir: str = "prompts",
190
314
  code_dir: str = "src",
@@ -194,7 +318,6 @@ def sync_orchestration(
194
318
  budget: float = 10.0,
195
319
  skip_verify: bool = False,
196
320
  skip_tests: bool = False,
197
- target_coverage: float = 90.0,
198
321
  log: bool = False,
199
322
  force: bool = False,
200
323
  strength: float = 0.5,
@@ -223,7 +346,7 @@ def sync_orchestration(
223
346
  pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
224
347
  except Exception as e:
225
348
  # Log the error and return early with failure status
226
- console.print(f"[red]Error constructing paths: {e}[/red]")
349
+ print(f"Error constructing paths: {e}")
227
350
  return {
228
351
  "success": False,
229
352
  "total_cost": 0.0,
@@ -250,9 +373,16 @@ def sync_orchestration(
250
373
  errors: List[str] = []
251
374
  start_time = time.time()
252
375
  animation_thread = None
376
+
377
+ # Track operation history for cycle detection
378
+ operation_history: List[str] = []
379
+ MAX_CYCLE_REPEATS = 2 # Maximum times to allow crash-verify cycle
253
380
 
254
381
  try:
255
382
  with SyncLock(basename, language):
383
+ # Log lock acquisition
384
+ log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
385
+
256
386
  # --- Start Animation Thread ---
257
387
  animation_thread = threading.Thread(
258
388
  target=sync_animation,
@@ -267,21 +397,101 @@ def sync_orchestration(
267
397
 
268
398
  # --- Main Workflow Loop ---
269
399
  while True:
400
+ budget_remaining = budget - current_cost_ref[0]
270
401
  if current_cost_ref[0] >= budget:
271
402
  errors.append(f"Budget of ${budget:.2f} exceeded.")
403
+ log_sync_event(basename, language, "budget_exceeded", {
404
+ "total_cost": current_cost_ref[0],
405
+ "budget": budget
406
+ })
272
407
  break
273
408
 
274
- decision = sync_determine_operation(basename, language, target_coverage, budget - current_cost_ref[0], False, prompts_dir, skip_tests, skip_verify)
409
+ # Log budget warning when running low
410
+ if budget_remaining < budget * 0.2 and budget_remaining > 0:
411
+ log_sync_event(basename, language, "budget_warning", {
412
+ "remaining": budget_remaining,
413
+ "percentage": (budget_remaining / budget) * 100
414
+ })
415
+
416
+ decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
275
417
  operation = decision.operation
418
+
419
+ # Create log entry with decision info
420
+ log_entry = create_sync_log_entry(decision, budget_remaining)
421
+
422
+ # Track operation history
423
+ operation_history.append(operation)
424
+
425
+ # Detect crash-verify cycles
426
+ if len(operation_history) >= 4:
427
+ # Check for repeating crash-verify pattern
428
+ recent_ops = operation_history[-4:]
429
+ if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
430
+ recent_ops == ['verify', 'crash', 'verify', 'crash']):
431
+ # Count how many times this cycle has occurred
432
+ cycle_count = 0
433
+ for i in range(0, len(operation_history) - 1, 2):
434
+ if i + 1 < len(operation_history):
435
+ if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
436
+ (operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
437
+ cycle_count += 1
438
+
439
+ if cycle_count >= MAX_CYCLE_REPEATS:
440
+ errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
441
+ errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
442
+ log_sync_event(basename, language, "cycle_detected", {
443
+ "cycle_type": "crash-verify",
444
+ "cycle_count": cycle_count,
445
+ "operation_history": operation_history[-10:] # Last 10 operations
446
+ })
447
+ break
448
+
449
+ # Detect consecutive fix operations (infinite fix loop protection)
450
+ if operation == 'fix':
451
+ # Count consecutive fix operations
452
+ consecutive_fixes = 0
453
+ for i in range(len(operation_history) - 1, -1, -1):
454
+ if operation_history[i] == 'fix':
455
+ consecutive_fixes += 1
456
+ else:
457
+ break
458
+
459
+ MAX_CONSECUTIVE_FIXES = 5 # Allow up to 5 consecutive fix attempts
460
+ if consecutive_fixes >= MAX_CONSECUTIVE_FIXES:
461
+ errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
462
+ errors.append("The test failures may not be resolvable by automated fixes in this environment.")
463
+ log_sync_event(basename, language, "cycle_detected", {
464
+ "cycle_type": "consecutive-fix",
465
+ "consecutive_count": consecutive_fixes,
466
+ "operation_history": operation_history[-10:] # Last 10 operations
467
+ })
468
+ break
276
469
 
277
470
  if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
278
471
  current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
472
+
473
+ # Log these final operations
474
+ success = operation in ['all_synced', 'nothing']
475
+ error_msg = None
279
476
  if operation == 'fail_and_request_manual_merge':
280
477
  errors.append(f"Manual merge required: {decision.reason}")
478
+ error_msg = f"Manual merge required: {decision.reason}"
281
479
  elif operation == 'error':
282
480
  errors.append(f"Error determining operation: {decision.reason}")
481
+ error_msg = f"Error determining operation: {decision.reason}"
283
482
  elif operation == 'analyze_conflict':
284
483
  errors.append(f"Conflict detected: {decision.reason}")
484
+ error_msg = f"Conflict detected: {decision.reason}"
485
+
486
+ # Update log entry for final operation
487
+ update_sync_log_entry(log_entry, {
488
+ 'success': success,
489
+ 'cost': 0.0,
490
+ 'model': 'none',
491
+ 'error': error_msg
492
+ }, 0.0)
493
+ append_sync_log(basename, language, log_entry)
494
+
285
495
  break
286
496
 
287
497
  # Handle skips
@@ -289,6 +499,17 @@ def sync_orchestration(
289
499
  # Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
290
500
  skipped_operations.append('verify')
291
501
  skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
502
+
503
+ # Update log entry for skipped operation
504
+ update_sync_log_entry(log_entry, {
505
+ 'success': True,
506
+ 'cost': 0.0,
507
+ 'model': 'skipped',
508
+ 'error': None
509
+ }, 0.0)
510
+ log_entry['details']['skip_reason'] = skip_reason
511
+ append_sync_log(basename, language, log_entry)
512
+
292
513
  report_data = RunReport(
293
514
  timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
294
515
  exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
@@ -298,6 +519,17 @@ def sync_orchestration(
298
519
  continue
299
520
  if operation == 'test' and skip_tests:
300
521
  skipped_operations.append('test')
522
+
523
+ # Update log entry for skipped operation
524
+ update_sync_log_entry(log_entry, {
525
+ 'success': True,
526
+ 'cost': 0.0,
527
+ 'model': 'skipped',
528
+ 'error': None
529
+ }, 0.0)
530
+ log_entry['details']['skip_reason'] = 'skip_tests'
531
+ append_sync_log(basename, language, log_entry)
532
+
301
533
  report_data = RunReport(
302
534
  timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
303
535
  exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
@@ -308,6 +540,17 @@ def sync_orchestration(
308
540
  if operation == 'crash' and skip_tests:
309
541
  # Skip crash operations when tests are skipped since crash fixes usually require test execution
310
542
  skipped_operations.append('crash')
543
+
544
+ # Update log entry for skipped operation
545
+ update_sync_log_entry(log_entry, {
546
+ 'success': True,
547
+ 'cost': 0.0,
548
+ 'model': 'skipped',
549
+ 'error': None
550
+ }, 0.0)
551
+ log_entry['details']['skip_reason'] = 'skip_tests'
552
+ append_sync_log(basename, language, log_entry)
553
+
311
554
  # Create a dummy run report indicating crash was skipped
312
555
  report_data = RunReport(
313
556
  timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
@@ -327,6 +570,7 @@ def sync_orchestration(
327
570
 
328
571
  result = {}
329
572
  success = False
573
+ start_time = time.time() # Track execution time
330
574
 
331
575
  # --- Execute Operation ---
332
576
  try:
@@ -383,6 +627,18 @@ def sync_orchestration(
383
627
  # Skip crash operation if required files are missing
384
628
  print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
385
629
  skipped_operations.append('crash')
630
+
631
+ # Update log entry for skipped operation
632
+ update_sync_log_entry(log_entry, {
633
+ 'success': True,
634
+ 'cost': 0.0,
635
+ 'model': 'skipped',
636
+ 'error': None
637
+ }, 0.0)
638
+ log_entry['details']['skip_reason'] = 'missing_files'
639
+ log_entry['details']['missing_files'] = [f.name for f in missing_files]
640
+ append_sync_log(basename, language, log_entry)
641
+
386
642
  # Create a dummy run report indicating crash was skipped due to missing files
387
643
  report_data = RunReport(
388
644
  timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
@@ -392,23 +648,106 @@ def sync_orchestration(
392
648
  _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
393
649
  continue
394
650
  else:
395
- Path("crash.log").write_text("Simulated crash error")
651
+ # Check if we have a run report indicating failures that need crash fixing
652
+ current_run_report = read_run_report(basename, language)
653
+ crash_log_content = ""
654
+
655
+ # If we have a run report with exit_code != 0, that indicates a crash that needs fixing
656
+ if current_run_report and current_run_report.exit_code != 0:
657
+ # We have a crash to fix based on the run report
658
+ crash_log_content = f"Test execution failed with exit code: {current_run_report.exit_code}\n\n"
659
+
660
+ # Try to run the example program to get additional error details
661
+ try:
662
+ example_result = subprocess.run(
663
+ ['python', str(pdd_files['example'])],
664
+ capture_output=True,
665
+ text=True,
666
+ timeout=60,
667
+ env=os.environ.copy(),
668
+ cwd=str(pdd_files['example'].parent)
669
+ )
670
+
671
+ if example_result.returncode != 0:
672
+ crash_log_content += f"Example program also failed with exit code: {example_result.returncode}\n\n"
673
+ if example_result.stdout:
674
+ crash_log_content += f"STDOUT:\n{example_result.stdout}\n\n"
675
+ if example_result.stderr:
676
+ crash_log_content += f"STDERR:\n{example_result.stderr}\n"
677
+
678
+ # Check for syntax errors specifically
679
+ if "SyntaxError" in example_result.stderr:
680
+ crash_log_content = f"SYNTAX ERROR DETECTED:\n\n{crash_log_content}"
681
+ else:
682
+ crash_log_content += "Example program runs successfully, but tests are failing.\n"
683
+ crash_log_content += "This may indicate issues with test execution or test file syntax.\n"
684
+
685
+ except subprocess.TimeoutExpired:
686
+ crash_log_content += "Example program execution timed out after 60 seconds\n"
687
+ crash_log_content += "This may indicate an infinite loop or the program is waiting for input.\n"
688
+ except Exception as e:
689
+ crash_log_content += f"Error running example program: {str(e)}\n"
690
+ crash_log_content += f"Program path: {pdd_files['example']}\n"
691
+ else:
692
+ # No crash detected, skip crash operation
693
+ print("No crash detected in run report, skipping crash fix")
694
+ skipped_operations.append('crash')
695
+
696
+ # Update log entry for skipped operation
697
+ update_sync_log_entry(log_entry, {
698
+ 'success': True,
699
+ 'cost': 0.0,
700
+ 'model': 'skipped',
701
+ 'error': None
702
+ }, time.time() - start_time)
703
+ log_entry['details']['skip_reason'] = 'no_crash'
704
+ append_sync_log(basename, language, log_entry)
705
+
706
+ report_data = RunReport(
707
+ timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
708
+ exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
709
+ )
710
+ save_run_report(asdict(report_data), basename, language)
711
+ _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
712
+ continue
713
+
714
+ # Write actual error content or fallback
715
+ if not crash_log_content:
716
+ crash_log_content = "Unknown crash error - program failed but no error output captured"
717
+
718
+ Path("crash.log").write_text(crash_log_content)
719
+
396
720
  try:
397
721
  result = crash_main(
398
722
  ctx,
399
723
  prompt_file=str(pdd_files['prompt']),
400
724
  code_file=str(pdd_files['code']),
401
725
  program_file=str(pdd_files['example']),
402
- error_file="crash.log"
726
+ error_file="crash.log",
727
+ output=str(pdd_files['code']),
728
+ output_program=str(pdd_files['example']),
729
+ loop=True,
730
+ max_attempts=max_attempts,
731
+ budget=budget - current_cost_ref[0]
403
732
  )
404
733
  except (RuntimeError, Exception) as e:
405
734
  error_str = str(e)
406
- if ("Simulated crash error" in error_str or
407
- "LLM returned None" in error_str or
735
+ if ("LLM returned None" in error_str or
408
736
  "LLM failed to analyze errors" in error_str):
409
- # Skip crash operation for simulated errors or LLM failures
410
- print(f"Skipping crash operation due to simulated/LLM error: {e}")
737
+ # Skip crash operation for LLM failures
738
+ print(f"Skipping crash operation due to LLM error: {e}")
411
739
  skipped_operations.append('crash')
740
+
741
+ # Update log entry for skipped operation
742
+ update_sync_log_entry(log_entry, {
743
+ 'success': False,
744
+ 'cost': 0.0,
745
+ 'model': 'skipped',
746
+ 'error': f"LLM error: {str(e)}"
747
+ }, time.time() - start_time)
748
+ log_entry['details']['skip_reason'] = 'llm_error'
749
+ append_sync_log(basename, language, log_entry)
750
+
412
751
  report_data = RunReport(
413
752
  timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
414
753
  exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
@@ -425,11 +764,13 @@ def sync_orchestration(
425
764
  prompt_file=str(pdd_files['prompt']),
426
765
  code_file=str(pdd_files['code']),
427
766
  program_file=str(pdd_files['example']),
428
- output_results=None,
767
+ output_results=f"{basename}_verify_results.log",
429
768
  output_code=str(pdd_files['code']),
430
769
  output_program=str(pdd_files['example']),
431
- loop=False,
432
- verification_program=None
770
+ loop=True,
771
+ verification_program=str(pdd_files['example']),
772
+ max_attempts=max_attempts,
773
+ budget=budget - current_cost_ref[0]
433
774
  )
434
775
  elif operation == 'test':
435
776
  # First, generate the test file
@@ -474,12 +815,13 @@ def sync_orchestration(
474
815
 
475
816
  # Try to get actual test failure details from latest run
476
817
  try:
477
- from .sync_determine_operation import read_run_report
478
818
  run_report = read_run_report(basename, language)
479
819
  if run_report and run_report.tests_failed > 0:
480
820
  # Run the tests again to capture actual error output
821
+ # Use environment-aware Python executable for pytest execution
822
+ python_executable = detect_host_python_executable()
481
823
  test_result = subprocess.run([
482
- 'python', '-m', 'pytest',
824
+ python_executable, '-m', 'pytest',
483
825
  str(pdd_files['test']),
484
826
  '-v', '--tb=short'
485
827
  ], capture_output=True, text=True, timeout=300)
@@ -503,11 +845,11 @@ def sync_orchestration(
503
845
  output_test=str(pdd_files['test']),
504
846
  output_code=str(pdd_files['code']),
505
847
  output_results=f"{basename}_fix_results.log",
506
- loop=False,
507
- verification_program=None,
848
+ loop=True,
849
+ verification_program=str(pdd_files['example']),
508
850
  max_attempts=max_attempts,
509
851
  budget=budget - current_cost_ref[0],
510
- auto_submit=False
852
+ auto_submit=True
511
853
  )
512
854
  elif operation == 'update':
513
855
  result = update_main(
@@ -544,6 +886,33 @@ def sync_orchestration(
544
886
  errors.append(f"Exception during '{operation}': {e}")
545
887
  success = False
546
888
 
889
+ # Calculate execution duration
890
+ duration = time.time() - start_time
891
+
892
+ # Extract cost and model from result for logging
893
+ actual_cost = 0.0
894
+ model_name = "unknown"
895
+ error_message = None
896
+
897
+ if success:
898
+ if isinstance(result, dict):
899
+ actual_cost = result.get('cost', 0.0)
900
+ model_name = result.get('model', 'unknown')
901
+ elif isinstance(result, tuple) and len(result) >= 3:
902
+ actual_cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
903
+ model_name = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else 'unknown'
904
+ else:
905
+ error_message = errors[-1] if errors else "Operation failed"
906
+
907
+ # Update and save log entry with execution results
908
+ update_sync_log_entry(log_entry, {
909
+ 'success': success,
910
+ 'cost': actual_cost,
911
+ 'model': model_name,
912
+ 'error': error_message
913
+ }, duration)
914
+ append_sync_log(basename, language, log_entry)
915
+
547
916
  if success:
548
917
  operations_completed.append(operation)
549
918
  # Extract cost and model from result based on format
@@ -558,6 +927,47 @@ def sync_orchestration(
558
927
  model = ''
559
928
  _save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
560
929
 
930
+ # After successful crash operation, re-run the example to generate fresh run report
931
+ if operation == 'crash':
932
+ try:
933
+ example_file = pdd_files['example']
934
+ if example_file.exists():
935
+ # Run the example program to check if crash is actually fixed
936
+ try:
937
+ example_result = subprocess.run(
938
+ ['python', str(example_file)],
939
+ capture_output=True,
940
+ text=True,
941
+ timeout=60,
942
+ env=os.environ.copy(),
943
+ cwd=str(example_file.parent)
944
+ )
945
+
946
+ # Create fresh run report based on actual execution
947
+ report_data = RunReport(
948
+ timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
949
+ exit_code=example_result.returncode,
950
+ tests_passed=1 if example_result.returncode == 0 else 0,
951
+ tests_failed=0 if example_result.returncode == 0 else 1,
952
+ coverage=100.0 if example_result.returncode == 0 else 0.0
953
+ )
954
+ save_run_report(asdict(report_data), basename, language)
955
+ print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
956
+
957
+ except subprocess.TimeoutExpired:
958
+ # Example timed out - still considered a failure
959
+ report_data = RunReport(
960
+ timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
961
+ exit_code=124, # Standard timeout exit code
962
+ tests_passed=0, tests_failed=1, coverage=0.0
963
+ )
964
+ save_run_report(asdict(report_data), basename, language)
965
+ print("Example timed out after crash fix - created failure run report")
966
+
967
+ except Exception as e:
968
+ # Don't fail the entire operation if example re-execution fails
969
+ print(f"Warning: Post-crash example re-execution failed: {e}")
970
+
561
971
  # After successful fix operation, execute tests to update run report
562
972
  if operation == 'fix':
563
973
  try:
@@ -578,6 +988,16 @@ def sync_orchestration(
578
988
  except Exception as e:
579
989
  errors.append(f"An unexpected error occurred in the orchestrator: {e}")
580
990
  finally:
991
+ # Log lock release
992
+ try:
993
+ log_sync_event(basename, language, "lock_released", {
994
+ "pid": os.getpid(),
995
+ "total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
996
+ "total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
997
+ })
998
+ except Exception:
999
+ pass # Don't fail if logging fails
1000
+
581
1001
  if stop_event:
582
1002
  stop_event.set()
583
1003
  if animation_thread and animation_thread.is_alive():