pdd-cli 0.0.43__py3-none-any.whl → 0.0.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -1
- pdd/cli.py +2 -2
- pdd/cmd_test_main.py +9 -0
- pdd/data/language_format.csv +1 -0
- pdd/data/llm_model.csv +2 -2
- pdd/fix_code_loop.py +2 -2
- pdd/fix_error_loop.py +5 -2
- pdd/fix_verification_errors_loop.py +14 -1
- pdd/fix_verification_main.py +29 -8
- pdd/get_jwt_token.py +39 -7
- pdd/increase_tests.py +7 -0
- pdd/llm_invoke.py +9 -7
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/fix_code_module_errors_LLM.prompt +13 -3
- pdd/pytest_output.py +72 -20
- pdd/python_env_detector.py +151 -0
- pdd/summarize_directory.py +7 -1
- pdd/sync_determine_operation.py +396 -109
- pdd/sync_main.py +1 -1
- pdd/sync_orchestration.py +448 -28
- {pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/METADATA +4 -4
- {pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/RECORD +26 -25
- {pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.43.dist-info → pdd_cli-0.0.44.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py
CHANGED
|
@@ -10,6 +10,7 @@ import json
|
|
|
10
10
|
import datetime
|
|
11
11
|
import subprocess
|
|
12
12
|
import re
|
|
13
|
+
import os
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Dict, Any, Optional, List
|
|
15
16
|
from dataclasses import asdict
|
|
@@ -25,6 +26,7 @@ from .sync_determine_operation import (
|
|
|
25
26
|
PDD_DIR,
|
|
26
27
|
META_DIR,
|
|
27
28
|
SyncLock,
|
|
29
|
+
read_run_report,
|
|
28
30
|
)
|
|
29
31
|
from .auto_deps_main import auto_deps_main
|
|
30
32
|
from .code_generator_main import code_generator_main
|
|
@@ -34,6 +36,7 @@ from .fix_verification_main import fix_verification_main
|
|
|
34
36
|
from .cmd_test_main import cmd_test_main
|
|
35
37
|
from .fix_main import fix_main
|
|
36
38
|
from .update_main import update_main
|
|
39
|
+
from .python_env_detector import detect_host_python_executable
|
|
37
40
|
|
|
38
41
|
# --- Mock Helper Functions ---
|
|
39
42
|
|
|
@@ -48,6 +51,53 @@ def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
|
|
|
48
51
|
except Exception:
|
|
49
52
|
return []
|
|
50
53
|
|
|
54
|
+
def create_sync_log_entry(decision, budget_remaining: float) -> Dict[str, Any]:
|
|
55
|
+
"""Create initial log entry from decision with all fields (actual results set to None initially)."""
|
|
56
|
+
return {
|
|
57
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
58
|
+
"operation": decision.operation,
|
|
59
|
+
"reason": decision.reason,
|
|
60
|
+
"decision_type": decision.details.get("decision_type", "heuristic") if decision.details else "heuristic",
|
|
61
|
+
"confidence": decision.confidence,
|
|
62
|
+
"estimated_cost": decision.estimated_cost,
|
|
63
|
+
"actual_cost": None,
|
|
64
|
+
"success": None,
|
|
65
|
+
"model": None,
|
|
66
|
+
"duration": None,
|
|
67
|
+
"error": None,
|
|
68
|
+
"details": {
|
|
69
|
+
**(decision.details if decision.details else {}),
|
|
70
|
+
"budget_remaining": budget_remaining
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duration: float) -> Dict[str, Any]:
|
|
75
|
+
"""Update log entry with execution results (actual_cost, success, model, duration, error)."""
|
|
76
|
+
entry.update({
|
|
77
|
+
"actual_cost": result.get("cost", 0.0),
|
|
78
|
+
"success": result.get("success", False),
|
|
79
|
+
"model": result.get("model", "unknown"),
|
|
80
|
+
"duration": duration,
|
|
81
|
+
"error": result.get("error") if not result.get("success") else None
|
|
82
|
+
})
|
|
83
|
+
return entry
|
|
84
|
+
|
|
85
|
+
def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
|
|
86
|
+
"""Append completed log entry to the sync log file."""
|
|
87
|
+
log_file = META_DIR / f"{basename}_{language}_sync.log"
|
|
88
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
with open(log_file, 'a') as f:
|
|
90
|
+
f.write(json.dumps(entry) + '\n')
|
|
91
|
+
|
|
92
|
+
def log_sync_event(basename: str, language: str, event: str, details: Dict[str, Any] = None):
|
|
93
|
+
"""Log a special sync event (lock_acquired, budget_warning, etc.)."""
|
|
94
|
+
entry = {
|
|
95
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
96
|
+
"event": event,
|
|
97
|
+
"details": details or {}
|
|
98
|
+
}
|
|
99
|
+
append_sync_log(basename, language, entry)
|
|
100
|
+
|
|
51
101
|
def save_run_report(report: Dict[str, Any], basename: str, language: str):
|
|
52
102
|
"""Save a run report to the metadata directory."""
|
|
53
103
|
report_file = META_DIR / f"{basename}_{language}_run.json"
|
|
@@ -60,10 +110,11 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
|
60
110
|
"""Save fingerprint state after successful operation."""
|
|
61
111
|
from datetime import datetime, timezone
|
|
62
112
|
from .sync_determine_operation import calculate_current_hashes, Fingerprint
|
|
113
|
+
from . import __version__
|
|
63
114
|
|
|
64
115
|
current_hashes = calculate_current_hashes(paths)
|
|
65
116
|
fingerprint = Fingerprint(
|
|
66
|
-
pdd_version=
|
|
117
|
+
pdd_version=__version__,
|
|
67
118
|
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
68
119
|
command=operation,
|
|
69
120
|
prompt_hash=current_hashes.get('prompt_hash'),
|
|
@@ -89,12 +140,24 @@ def _execute_tests_and_create_run_report(test_file: Path, basename: str, languag
|
|
|
89
140
|
module_name = test_file.name.replace('test_', '').replace('.py', '')
|
|
90
141
|
|
|
91
142
|
# Use the module import path rather than file path for coverage
|
|
143
|
+
# Use environment-aware Python executable for pytest execution
|
|
144
|
+
python_executable = detect_host_python_executable()
|
|
145
|
+
|
|
146
|
+
# Determine coverage target based on module location
|
|
147
|
+
if base_package:
|
|
148
|
+
cov_target = f'{base_package}.{module_name}'
|
|
149
|
+
else:
|
|
150
|
+
# Dynamically discover package structure based on test file location
|
|
151
|
+
relative_path = test_file.parent.relative_to(Path.cwd())
|
|
152
|
+
package_path = str(relative_path).replace(os.sep, '.')
|
|
153
|
+
cov_target = f'{package_path}.{module_name}' if package_path else module_name
|
|
154
|
+
|
|
92
155
|
result = subprocess.run([
|
|
93
|
-
|
|
156
|
+
python_executable, '-m', 'pytest',
|
|
94
157
|
str(test_file),
|
|
95
158
|
'-v',
|
|
96
159
|
'--tb=short',
|
|
97
|
-
f'--cov=
|
|
160
|
+
f'--cov={cov_target}',
|
|
98
161
|
'--cov-report=term-missing'
|
|
99
162
|
], capture_output=True, text=True, timeout=300)
|
|
100
163
|
|
|
@@ -118,8 +181,15 @@ def _execute_tests_and_create_run_report(test_file: Path, basename: str, languag
|
|
|
118
181
|
if failed_match:
|
|
119
182
|
tests_failed = int(failed_match.group(1))
|
|
120
183
|
|
|
121
|
-
# Parse coverage percentage
|
|
184
|
+
# Parse coverage percentage - try multiple patterns
|
|
122
185
|
coverage_match = re.search(r'TOTAL.*?(\d+)%', stdout)
|
|
186
|
+
if not coverage_match:
|
|
187
|
+
# Try alternative patterns for coverage output
|
|
188
|
+
coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
|
|
189
|
+
if not coverage_match:
|
|
190
|
+
# Try pattern with decimal
|
|
191
|
+
coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
|
|
192
|
+
|
|
123
193
|
if coverage_match:
|
|
124
194
|
coverage = float(coverage_match.group(1))
|
|
125
195
|
|
|
@@ -171,13 +241,66 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
171
241
|
|
|
172
242
|
for entry in log_entries:
|
|
173
243
|
timestamp = entry.get('timestamp', 'N/A')
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
244
|
+
|
|
245
|
+
# Handle special event entries
|
|
246
|
+
if 'event' in entry:
|
|
247
|
+
event = entry.get('event', 'N/A')
|
|
248
|
+
print(f"[{timestamp[:19]}] EVENT: {event}")
|
|
249
|
+
if verbose and 'details' in entry:
|
|
250
|
+
details_str = json.dumps(entry['details'], indent=2)
|
|
251
|
+
print(f" Details: {details_str}")
|
|
252
|
+
continue
|
|
253
|
+
|
|
254
|
+
# Handle operation entries
|
|
255
|
+
operation = entry.get('operation', 'N/A')
|
|
256
|
+
reason = entry.get('reason', 'N/A')
|
|
257
|
+
success = entry.get('success')
|
|
258
|
+
actual_cost = entry.get('actual_cost')
|
|
259
|
+
estimated_cost = entry.get('estimated_cost', 0.0)
|
|
260
|
+
duration = entry.get('duration')
|
|
261
|
+
|
|
262
|
+
if verbose:
|
|
263
|
+
# Verbose format
|
|
264
|
+
print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
|
|
265
|
+
decision_type = entry.get('decision_type', 'N/A')
|
|
266
|
+
confidence = entry.get('confidence', 'N/A')
|
|
267
|
+
model = entry.get('model', 'N/A')
|
|
268
|
+
budget_remaining = entry.get('details', {}).get('budget_remaining', 'N/A')
|
|
269
|
+
|
|
270
|
+
print(f" Decision Type: {decision_type} | Confidence: {confidence}")
|
|
271
|
+
if actual_cost is not None:
|
|
272
|
+
print(f" Cost: ${actual_cost:.2f} (estimated: ${estimated_cost:.2f}) | Model: {model}")
|
|
273
|
+
if duration is not None:
|
|
274
|
+
print(f" Duration: {duration:.1f}s | Budget Remaining: ${budget_remaining}")
|
|
275
|
+
else:
|
|
276
|
+
print(f" Estimated Cost: ${estimated_cost:.2f}")
|
|
277
|
+
|
|
278
|
+
if 'details' in entry and entry['details']:
|
|
279
|
+
# Show details without budget_remaining to avoid clutter
|
|
280
|
+
details_copy = entry['details'].copy()
|
|
281
|
+
details_copy.pop('budget_remaining', None)
|
|
282
|
+
if details_copy:
|
|
283
|
+
details_str = json.dumps(details_copy, indent=2)
|
|
284
|
+
print(f" Details: {details_str}")
|
|
285
|
+
else:
|
|
286
|
+
# Normal format: [timestamp] operation | reason | status cost | duration
|
|
287
|
+
status_icon = "✓" if success else "✗" if success is False else "?"
|
|
288
|
+
|
|
289
|
+
cost_info = ""
|
|
290
|
+
if actual_cost is not None:
|
|
291
|
+
cost_info = f" | {status_icon} ${actual_cost:.2f} (est: ${estimated_cost:.2f})"
|
|
292
|
+
else:
|
|
293
|
+
cost_info = f" | Est: ${estimated_cost:.2f}"
|
|
294
|
+
|
|
295
|
+
duration_info = ""
|
|
296
|
+
if duration is not None:
|
|
297
|
+
duration_info = f" | {duration:.1f}s"
|
|
298
|
+
|
|
299
|
+
error_info = ""
|
|
300
|
+
if entry.get('error'):
|
|
301
|
+
error_info = f" | Error: {entry['error']}"
|
|
302
|
+
|
|
303
|
+
print(f"[{timestamp[:19]}] {operation:<12} | {reason}{cost_info}{duration_info}{error_info}")
|
|
181
304
|
|
|
182
305
|
print("--- End of Log ---")
|
|
183
306
|
return {'success': True, 'log_entries': log_entries}
|
|
@@ -185,6 +308,7 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
185
308
|
|
|
186
309
|
def sync_orchestration(
|
|
187
310
|
basename: str,
|
|
311
|
+
target_coverage: float = 90.0,
|
|
188
312
|
language: str = "python",
|
|
189
313
|
prompts_dir: str = "prompts",
|
|
190
314
|
code_dir: str = "src",
|
|
@@ -194,7 +318,6 @@ def sync_orchestration(
|
|
|
194
318
|
budget: float = 10.0,
|
|
195
319
|
skip_verify: bool = False,
|
|
196
320
|
skip_tests: bool = False,
|
|
197
|
-
target_coverage: float = 90.0,
|
|
198
321
|
log: bool = False,
|
|
199
322
|
force: bool = False,
|
|
200
323
|
strength: float = 0.5,
|
|
@@ -223,7 +346,7 @@ def sync_orchestration(
|
|
|
223
346
|
pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
|
|
224
347
|
except Exception as e:
|
|
225
348
|
# Log the error and return early with failure status
|
|
226
|
-
|
|
349
|
+
print(f"Error constructing paths: {e}")
|
|
227
350
|
return {
|
|
228
351
|
"success": False,
|
|
229
352
|
"total_cost": 0.0,
|
|
@@ -250,9 +373,16 @@ def sync_orchestration(
|
|
|
250
373
|
errors: List[str] = []
|
|
251
374
|
start_time = time.time()
|
|
252
375
|
animation_thread = None
|
|
376
|
+
|
|
377
|
+
# Track operation history for cycle detection
|
|
378
|
+
operation_history: List[str] = []
|
|
379
|
+
MAX_CYCLE_REPEATS = 2 # Maximum times to allow crash-verify cycle
|
|
253
380
|
|
|
254
381
|
try:
|
|
255
382
|
with SyncLock(basename, language):
|
|
383
|
+
# Log lock acquisition
|
|
384
|
+
log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
|
|
385
|
+
|
|
256
386
|
# --- Start Animation Thread ---
|
|
257
387
|
animation_thread = threading.Thread(
|
|
258
388
|
target=sync_animation,
|
|
@@ -267,21 +397,101 @@ def sync_orchestration(
|
|
|
267
397
|
|
|
268
398
|
# --- Main Workflow Loop ---
|
|
269
399
|
while True:
|
|
400
|
+
budget_remaining = budget - current_cost_ref[0]
|
|
270
401
|
if current_cost_ref[0] >= budget:
|
|
271
402
|
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
403
|
+
log_sync_event(basename, language, "budget_exceeded", {
|
|
404
|
+
"total_cost": current_cost_ref[0],
|
|
405
|
+
"budget": budget
|
|
406
|
+
})
|
|
272
407
|
break
|
|
273
408
|
|
|
274
|
-
|
|
409
|
+
# Log budget warning when running low
|
|
410
|
+
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
411
|
+
log_sync_event(basename, language, "budget_warning", {
|
|
412
|
+
"remaining": budget_remaining,
|
|
413
|
+
"percentage": (budget_remaining / budget) * 100
|
|
414
|
+
})
|
|
415
|
+
|
|
416
|
+
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
|
|
275
417
|
operation = decision.operation
|
|
418
|
+
|
|
419
|
+
# Create log entry with decision info
|
|
420
|
+
log_entry = create_sync_log_entry(decision, budget_remaining)
|
|
421
|
+
|
|
422
|
+
# Track operation history
|
|
423
|
+
operation_history.append(operation)
|
|
424
|
+
|
|
425
|
+
# Detect crash-verify cycles
|
|
426
|
+
if len(operation_history) >= 4:
|
|
427
|
+
# Check for repeating crash-verify pattern
|
|
428
|
+
recent_ops = operation_history[-4:]
|
|
429
|
+
if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
|
|
430
|
+
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
431
|
+
# Count how many times this cycle has occurred
|
|
432
|
+
cycle_count = 0
|
|
433
|
+
for i in range(0, len(operation_history) - 1, 2):
|
|
434
|
+
if i + 1 < len(operation_history):
|
|
435
|
+
if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
|
|
436
|
+
(operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
|
|
437
|
+
cycle_count += 1
|
|
438
|
+
|
|
439
|
+
if cycle_count >= MAX_CYCLE_REPEATS:
|
|
440
|
+
errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
|
|
441
|
+
errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
|
|
442
|
+
log_sync_event(basename, language, "cycle_detected", {
|
|
443
|
+
"cycle_type": "crash-verify",
|
|
444
|
+
"cycle_count": cycle_count,
|
|
445
|
+
"operation_history": operation_history[-10:] # Last 10 operations
|
|
446
|
+
})
|
|
447
|
+
break
|
|
448
|
+
|
|
449
|
+
# Detect consecutive fix operations (infinite fix loop protection)
|
|
450
|
+
if operation == 'fix':
|
|
451
|
+
# Count consecutive fix operations
|
|
452
|
+
consecutive_fixes = 0
|
|
453
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
454
|
+
if operation_history[i] == 'fix':
|
|
455
|
+
consecutive_fixes += 1
|
|
456
|
+
else:
|
|
457
|
+
break
|
|
458
|
+
|
|
459
|
+
MAX_CONSECUTIVE_FIXES = 5 # Allow up to 5 consecutive fix attempts
|
|
460
|
+
if consecutive_fixes >= MAX_CONSECUTIVE_FIXES:
|
|
461
|
+
errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
|
|
462
|
+
errors.append("The test failures may not be resolvable by automated fixes in this environment.")
|
|
463
|
+
log_sync_event(basename, language, "cycle_detected", {
|
|
464
|
+
"cycle_type": "consecutive-fix",
|
|
465
|
+
"consecutive_count": consecutive_fixes,
|
|
466
|
+
"operation_history": operation_history[-10:] # Last 10 operations
|
|
467
|
+
})
|
|
468
|
+
break
|
|
276
469
|
|
|
277
470
|
if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
|
|
278
471
|
current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
|
|
472
|
+
|
|
473
|
+
# Log these final operations
|
|
474
|
+
success = operation in ['all_synced', 'nothing']
|
|
475
|
+
error_msg = None
|
|
279
476
|
if operation == 'fail_and_request_manual_merge':
|
|
280
477
|
errors.append(f"Manual merge required: {decision.reason}")
|
|
478
|
+
error_msg = f"Manual merge required: {decision.reason}"
|
|
281
479
|
elif operation == 'error':
|
|
282
480
|
errors.append(f"Error determining operation: {decision.reason}")
|
|
481
|
+
error_msg = f"Error determining operation: {decision.reason}"
|
|
283
482
|
elif operation == 'analyze_conflict':
|
|
284
483
|
errors.append(f"Conflict detected: {decision.reason}")
|
|
484
|
+
error_msg = f"Conflict detected: {decision.reason}"
|
|
485
|
+
|
|
486
|
+
# Update log entry for final operation
|
|
487
|
+
update_sync_log_entry(log_entry, {
|
|
488
|
+
'success': success,
|
|
489
|
+
'cost': 0.0,
|
|
490
|
+
'model': 'none',
|
|
491
|
+
'error': error_msg
|
|
492
|
+
}, 0.0)
|
|
493
|
+
append_sync_log(basename, language, log_entry)
|
|
494
|
+
|
|
285
495
|
break
|
|
286
496
|
|
|
287
497
|
# Handle skips
|
|
@@ -289,6 +499,17 @@ def sync_orchestration(
|
|
|
289
499
|
# Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
|
|
290
500
|
skipped_operations.append('verify')
|
|
291
501
|
skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
|
|
502
|
+
|
|
503
|
+
# Update log entry for skipped operation
|
|
504
|
+
update_sync_log_entry(log_entry, {
|
|
505
|
+
'success': True,
|
|
506
|
+
'cost': 0.0,
|
|
507
|
+
'model': 'skipped',
|
|
508
|
+
'error': None
|
|
509
|
+
}, 0.0)
|
|
510
|
+
log_entry['details']['skip_reason'] = skip_reason
|
|
511
|
+
append_sync_log(basename, language, log_entry)
|
|
512
|
+
|
|
292
513
|
report_data = RunReport(
|
|
293
514
|
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
294
515
|
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
@@ -298,6 +519,17 @@ def sync_orchestration(
|
|
|
298
519
|
continue
|
|
299
520
|
if operation == 'test' and skip_tests:
|
|
300
521
|
skipped_operations.append('test')
|
|
522
|
+
|
|
523
|
+
# Update log entry for skipped operation
|
|
524
|
+
update_sync_log_entry(log_entry, {
|
|
525
|
+
'success': True,
|
|
526
|
+
'cost': 0.0,
|
|
527
|
+
'model': 'skipped',
|
|
528
|
+
'error': None
|
|
529
|
+
}, 0.0)
|
|
530
|
+
log_entry['details']['skip_reason'] = 'skip_tests'
|
|
531
|
+
append_sync_log(basename, language, log_entry)
|
|
532
|
+
|
|
301
533
|
report_data = RunReport(
|
|
302
534
|
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
303
535
|
exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
|
|
@@ -308,6 +540,17 @@ def sync_orchestration(
|
|
|
308
540
|
if operation == 'crash' and skip_tests:
|
|
309
541
|
# Skip crash operations when tests are skipped since crash fixes usually require test execution
|
|
310
542
|
skipped_operations.append('crash')
|
|
543
|
+
|
|
544
|
+
# Update log entry for skipped operation
|
|
545
|
+
update_sync_log_entry(log_entry, {
|
|
546
|
+
'success': True,
|
|
547
|
+
'cost': 0.0,
|
|
548
|
+
'model': 'skipped',
|
|
549
|
+
'error': None
|
|
550
|
+
}, 0.0)
|
|
551
|
+
log_entry['details']['skip_reason'] = 'skip_tests'
|
|
552
|
+
append_sync_log(basename, language, log_entry)
|
|
553
|
+
|
|
311
554
|
# Create a dummy run report indicating crash was skipped
|
|
312
555
|
report_data = RunReport(
|
|
313
556
|
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
@@ -327,6 +570,7 @@ def sync_orchestration(
|
|
|
327
570
|
|
|
328
571
|
result = {}
|
|
329
572
|
success = False
|
|
573
|
+
start_time = time.time() # Track execution time
|
|
330
574
|
|
|
331
575
|
# --- Execute Operation ---
|
|
332
576
|
try:
|
|
@@ -383,6 +627,18 @@ def sync_orchestration(
|
|
|
383
627
|
# Skip crash operation if required files are missing
|
|
384
628
|
print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
|
|
385
629
|
skipped_operations.append('crash')
|
|
630
|
+
|
|
631
|
+
# Update log entry for skipped operation
|
|
632
|
+
update_sync_log_entry(log_entry, {
|
|
633
|
+
'success': True,
|
|
634
|
+
'cost': 0.0,
|
|
635
|
+
'model': 'skipped',
|
|
636
|
+
'error': None
|
|
637
|
+
}, 0.0)
|
|
638
|
+
log_entry['details']['skip_reason'] = 'missing_files'
|
|
639
|
+
log_entry['details']['missing_files'] = [f.name for f in missing_files]
|
|
640
|
+
append_sync_log(basename, language, log_entry)
|
|
641
|
+
|
|
386
642
|
# Create a dummy run report indicating crash was skipped due to missing files
|
|
387
643
|
report_data = RunReport(
|
|
388
644
|
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
@@ -392,23 +648,106 @@ def sync_orchestration(
|
|
|
392
648
|
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
|
|
393
649
|
continue
|
|
394
650
|
else:
|
|
395
|
-
|
|
651
|
+
# Check if we have a run report indicating failures that need crash fixing
|
|
652
|
+
current_run_report = read_run_report(basename, language)
|
|
653
|
+
crash_log_content = ""
|
|
654
|
+
|
|
655
|
+
# If we have a run report with exit_code != 0, that indicates a crash that needs fixing
|
|
656
|
+
if current_run_report and current_run_report.exit_code != 0:
|
|
657
|
+
# We have a crash to fix based on the run report
|
|
658
|
+
crash_log_content = f"Test execution failed with exit code: {current_run_report.exit_code}\n\n"
|
|
659
|
+
|
|
660
|
+
# Try to run the example program to get additional error details
|
|
661
|
+
try:
|
|
662
|
+
example_result = subprocess.run(
|
|
663
|
+
['python', str(pdd_files['example'])],
|
|
664
|
+
capture_output=True,
|
|
665
|
+
text=True,
|
|
666
|
+
timeout=60,
|
|
667
|
+
env=os.environ.copy(),
|
|
668
|
+
cwd=str(pdd_files['example'].parent)
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if example_result.returncode != 0:
|
|
672
|
+
crash_log_content += f"Example program also failed with exit code: {example_result.returncode}\n\n"
|
|
673
|
+
if example_result.stdout:
|
|
674
|
+
crash_log_content += f"STDOUT:\n{example_result.stdout}\n\n"
|
|
675
|
+
if example_result.stderr:
|
|
676
|
+
crash_log_content += f"STDERR:\n{example_result.stderr}\n"
|
|
677
|
+
|
|
678
|
+
# Check for syntax errors specifically
|
|
679
|
+
if "SyntaxError" in example_result.stderr:
|
|
680
|
+
crash_log_content = f"SYNTAX ERROR DETECTED:\n\n{crash_log_content}"
|
|
681
|
+
else:
|
|
682
|
+
crash_log_content += "Example program runs successfully, but tests are failing.\n"
|
|
683
|
+
crash_log_content += "This may indicate issues with test execution or test file syntax.\n"
|
|
684
|
+
|
|
685
|
+
except subprocess.TimeoutExpired:
|
|
686
|
+
crash_log_content += "Example program execution timed out after 60 seconds\n"
|
|
687
|
+
crash_log_content += "This may indicate an infinite loop or the program is waiting for input.\n"
|
|
688
|
+
except Exception as e:
|
|
689
|
+
crash_log_content += f"Error running example program: {str(e)}\n"
|
|
690
|
+
crash_log_content += f"Program path: {pdd_files['example']}\n"
|
|
691
|
+
else:
|
|
692
|
+
# No crash detected, skip crash operation
|
|
693
|
+
print("No crash detected in run report, skipping crash fix")
|
|
694
|
+
skipped_operations.append('crash')
|
|
695
|
+
|
|
696
|
+
# Update log entry for skipped operation
|
|
697
|
+
update_sync_log_entry(log_entry, {
|
|
698
|
+
'success': True,
|
|
699
|
+
'cost': 0.0,
|
|
700
|
+
'model': 'skipped',
|
|
701
|
+
'error': None
|
|
702
|
+
}, time.time() - start_time)
|
|
703
|
+
log_entry['details']['skip_reason'] = 'no_crash'
|
|
704
|
+
append_sync_log(basename, language, log_entry)
|
|
705
|
+
|
|
706
|
+
report_data = RunReport(
|
|
707
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
708
|
+
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
709
|
+
)
|
|
710
|
+
save_run_report(asdict(report_data), basename, language)
|
|
711
|
+
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
|
|
712
|
+
continue
|
|
713
|
+
|
|
714
|
+
# Write actual error content or fallback
|
|
715
|
+
if not crash_log_content:
|
|
716
|
+
crash_log_content = "Unknown crash error - program failed but no error output captured"
|
|
717
|
+
|
|
718
|
+
Path("crash.log").write_text(crash_log_content)
|
|
719
|
+
|
|
396
720
|
try:
|
|
397
721
|
result = crash_main(
|
|
398
722
|
ctx,
|
|
399
723
|
prompt_file=str(pdd_files['prompt']),
|
|
400
724
|
code_file=str(pdd_files['code']),
|
|
401
725
|
program_file=str(pdd_files['example']),
|
|
402
|
-
error_file="crash.log"
|
|
726
|
+
error_file="crash.log",
|
|
727
|
+
output=str(pdd_files['code']),
|
|
728
|
+
output_program=str(pdd_files['example']),
|
|
729
|
+
loop=True,
|
|
730
|
+
max_attempts=max_attempts,
|
|
731
|
+
budget=budget - current_cost_ref[0]
|
|
403
732
|
)
|
|
404
733
|
except (RuntimeError, Exception) as e:
|
|
405
734
|
error_str = str(e)
|
|
406
|
-
if ("
|
|
407
|
-
"LLM returned None" in error_str or
|
|
735
|
+
if ("LLM returned None" in error_str or
|
|
408
736
|
"LLM failed to analyze errors" in error_str):
|
|
409
|
-
# Skip crash operation for
|
|
410
|
-
print(f"Skipping crash operation due to
|
|
737
|
+
# Skip crash operation for LLM failures
|
|
738
|
+
print(f"Skipping crash operation due to LLM error: {e}")
|
|
411
739
|
skipped_operations.append('crash')
|
|
740
|
+
|
|
741
|
+
# Update log entry for skipped operation
|
|
742
|
+
update_sync_log_entry(log_entry, {
|
|
743
|
+
'success': False,
|
|
744
|
+
'cost': 0.0,
|
|
745
|
+
'model': 'skipped',
|
|
746
|
+
'error': f"LLM error: {str(e)}"
|
|
747
|
+
}, time.time() - start_time)
|
|
748
|
+
log_entry['details']['skip_reason'] = 'llm_error'
|
|
749
|
+
append_sync_log(basename, language, log_entry)
|
|
750
|
+
|
|
412
751
|
report_data = RunReport(
|
|
413
752
|
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
414
753
|
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
@@ -425,11 +764,13 @@ def sync_orchestration(
|
|
|
425
764
|
prompt_file=str(pdd_files['prompt']),
|
|
426
765
|
code_file=str(pdd_files['code']),
|
|
427
766
|
program_file=str(pdd_files['example']),
|
|
428
|
-
output_results=
|
|
767
|
+
output_results=f"{basename}_verify_results.log",
|
|
429
768
|
output_code=str(pdd_files['code']),
|
|
430
769
|
output_program=str(pdd_files['example']),
|
|
431
|
-
loop=
|
|
432
|
-
verification_program=
|
|
770
|
+
loop=True,
|
|
771
|
+
verification_program=str(pdd_files['example']),
|
|
772
|
+
max_attempts=max_attempts,
|
|
773
|
+
budget=budget - current_cost_ref[0]
|
|
433
774
|
)
|
|
434
775
|
elif operation == 'test':
|
|
435
776
|
# First, generate the test file
|
|
@@ -474,12 +815,13 @@ def sync_orchestration(
|
|
|
474
815
|
|
|
475
816
|
# Try to get actual test failure details from latest run
|
|
476
817
|
try:
|
|
477
|
-
from .sync_determine_operation import read_run_report
|
|
478
818
|
run_report = read_run_report(basename, language)
|
|
479
819
|
if run_report and run_report.tests_failed > 0:
|
|
480
820
|
# Run the tests again to capture actual error output
|
|
821
|
+
# Use environment-aware Python executable for pytest execution
|
|
822
|
+
python_executable = detect_host_python_executable()
|
|
481
823
|
test_result = subprocess.run([
|
|
482
|
-
|
|
824
|
+
python_executable, '-m', 'pytest',
|
|
483
825
|
str(pdd_files['test']),
|
|
484
826
|
'-v', '--tb=short'
|
|
485
827
|
], capture_output=True, text=True, timeout=300)
|
|
@@ -503,11 +845,11 @@ def sync_orchestration(
|
|
|
503
845
|
output_test=str(pdd_files['test']),
|
|
504
846
|
output_code=str(pdd_files['code']),
|
|
505
847
|
output_results=f"{basename}_fix_results.log",
|
|
506
|
-
loop=
|
|
507
|
-
verification_program=
|
|
848
|
+
loop=True,
|
|
849
|
+
verification_program=str(pdd_files['example']),
|
|
508
850
|
max_attempts=max_attempts,
|
|
509
851
|
budget=budget - current_cost_ref[0],
|
|
510
|
-
auto_submit=
|
|
852
|
+
auto_submit=True
|
|
511
853
|
)
|
|
512
854
|
elif operation == 'update':
|
|
513
855
|
result = update_main(
|
|
@@ -544,6 +886,33 @@ def sync_orchestration(
|
|
|
544
886
|
errors.append(f"Exception during '{operation}': {e}")
|
|
545
887
|
success = False
|
|
546
888
|
|
|
889
|
+
# Calculate execution duration
|
|
890
|
+
duration = time.time() - start_time
|
|
891
|
+
|
|
892
|
+
# Extract cost and model from result for logging
|
|
893
|
+
actual_cost = 0.0
|
|
894
|
+
model_name = "unknown"
|
|
895
|
+
error_message = None
|
|
896
|
+
|
|
897
|
+
if success:
|
|
898
|
+
if isinstance(result, dict):
|
|
899
|
+
actual_cost = result.get('cost', 0.0)
|
|
900
|
+
model_name = result.get('model', 'unknown')
|
|
901
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
902
|
+
actual_cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
|
|
903
|
+
model_name = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else 'unknown'
|
|
904
|
+
else:
|
|
905
|
+
error_message = errors[-1] if errors else "Operation failed"
|
|
906
|
+
|
|
907
|
+
# Update and save log entry with execution results
|
|
908
|
+
update_sync_log_entry(log_entry, {
|
|
909
|
+
'success': success,
|
|
910
|
+
'cost': actual_cost,
|
|
911
|
+
'model': model_name,
|
|
912
|
+
'error': error_message
|
|
913
|
+
}, duration)
|
|
914
|
+
append_sync_log(basename, language, log_entry)
|
|
915
|
+
|
|
547
916
|
if success:
|
|
548
917
|
operations_completed.append(operation)
|
|
549
918
|
# Extract cost and model from result based on format
|
|
@@ -558,6 +927,47 @@ def sync_orchestration(
|
|
|
558
927
|
model = ''
|
|
559
928
|
_save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
|
|
560
929
|
|
|
930
|
+
# After successful crash operation, re-run the example to generate fresh run report
|
|
931
|
+
if operation == 'crash':
|
|
932
|
+
try:
|
|
933
|
+
example_file = pdd_files['example']
|
|
934
|
+
if example_file.exists():
|
|
935
|
+
# Run the example program to check if crash is actually fixed
|
|
936
|
+
try:
|
|
937
|
+
example_result = subprocess.run(
|
|
938
|
+
['python', str(example_file)],
|
|
939
|
+
capture_output=True,
|
|
940
|
+
text=True,
|
|
941
|
+
timeout=60,
|
|
942
|
+
env=os.environ.copy(),
|
|
943
|
+
cwd=str(example_file.parent)
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
# Create fresh run report based on actual execution
|
|
947
|
+
report_data = RunReport(
|
|
948
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
949
|
+
exit_code=example_result.returncode,
|
|
950
|
+
tests_passed=1 if example_result.returncode == 0 else 0,
|
|
951
|
+
tests_failed=0 if example_result.returncode == 0 else 1,
|
|
952
|
+
coverage=100.0 if example_result.returncode == 0 else 0.0
|
|
953
|
+
)
|
|
954
|
+
save_run_report(asdict(report_data), basename, language)
|
|
955
|
+
print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
|
|
956
|
+
|
|
957
|
+
except subprocess.TimeoutExpired:
|
|
958
|
+
# Example timed out - still considered a failure
|
|
959
|
+
report_data = RunReport(
|
|
960
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
961
|
+
exit_code=124, # Standard timeout exit code
|
|
962
|
+
tests_passed=0, tests_failed=1, coverage=0.0
|
|
963
|
+
)
|
|
964
|
+
save_run_report(asdict(report_data), basename, language)
|
|
965
|
+
print("Example timed out after crash fix - created failure run report")
|
|
966
|
+
|
|
967
|
+
except Exception as e:
|
|
968
|
+
# Don't fail the entire operation if example re-execution fails
|
|
969
|
+
print(f"Warning: Post-crash example re-execution failed: {e}")
|
|
970
|
+
|
|
561
971
|
# After successful fix operation, execute tests to update run report
|
|
562
972
|
if operation == 'fix':
|
|
563
973
|
try:
|
|
@@ -578,6 +988,16 @@ def sync_orchestration(
|
|
|
578
988
|
except Exception as e:
|
|
579
989
|
errors.append(f"An unexpected error occurred in the orchestrator: {e}")
|
|
580
990
|
finally:
|
|
991
|
+
# Log lock release
|
|
992
|
+
try:
|
|
993
|
+
log_sync_event(basename, language, "lock_released", {
|
|
994
|
+
"pid": os.getpid(),
|
|
995
|
+
"total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
|
|
996
|
+
"total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
|
|
997
|
+
})
|
|
998
|
+
except Exception:
|
|
999
|
+
pass # Don't fail if logging fails
|
|
1000
|
+
|
|
581
1001
|
if stop_event:
|
|
582
1002
|
stop_event.set()
|
|
583
1003
|
if animation_thread and animation_thread.is_alive():
|