pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py
CHANGED
|
@@ -12,21 +12,33 @@ import subprocess
|
|
|
12
12
|
import re
|
|
13
13
|
import os
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Dict, Any, Optional, List
|
|
16
|
-
from dataclasses import asdict
|
|
15
|
+
from typing import Dict, Any, Optional, List, Callable
|
|
16
|
+
from dataclasses import asdict, dataclass, field
|
|
17
|
+
import tempfile
|
|
18
|
+
import sys
|
|
17
19
|
|
|
18
20
|
import click
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
# --- Constants ---
|
|
24
|
+
MAX_CONSECUTIVE_TESTS = 3 # Allow up to 3 consecutive test attempts
|
|
25
|
+
MAX_TEST_EXTEND_ATTEMPTS = 2 # Allow up to 2 attempts to extend tests for coverage
|
|
26
|
+
MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #157 fix)
|
|
19
27
|
|
|
20
28
|
# --- Real PDD Component Imports ---
|
|
21
|
-
from .
|
|
29
|
+
from .sync_tui import SyncApp
|
|
22
30
|
from .sync_determine_operation import (
|
|
23
31
|
sync_determine_operation,
|
|
24
32
|
get_pdd_file_paths,
|
|
25
33
|
RunReport,
|
|
34
|
+
SyncDecision,
|
|
26
35
|
PDD_DIR,
|
|
27
36
|
META_DIR,
|
|
28
37
|
SyncLock,
|
|
29
38
|
read_run_report,
|
|
39
|
+
calculate_sha256,
|
|
40
|
+
calculate_current_hashes,
|
|
41
|
+
_safe_basename,
|
|
30
42
|
)
|
|
31
43
|
from .auto_deps_main import auto_deps_main
|
|
32
44
|
from .code_generator_main import code_generator_main
|
|
@@ -37,12 +49,114 @@ from .cmd_test_main import cmd_test_main
|
|
|
37
49
|
from .fix_main import fix_main
|
|
38
50
|
from .update_main import update_main
|
|
39
51
|
from .python_env_detector import detect_host_python_executable
|
|
52
|
+
from .get_run_command import get_run_command_for_file
|
|
53
|
+
from .pytest_output import extract_failing_files_from_output
|
|
54
|
+
from . import DEFAULT_STRENGTH
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# --- Helper Functions ---
|
|
58
|
+
# Note: _safe_basename is imported from sync_determine_operation
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# --- Atomic State Update (Issue #159 Fix) ---
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class PendingStateUpdate:
|
|
65
|
+
"""Holds pending state updates for atomic commit."""
|
|
66
|
+
run_report: Optional[Dict[str, Any]] = None
|
|
67
|
+
fingerprint: Optional[Dict[str, Any]] = None
|
|
68
|
+
run_report_path: Optional[Path] = None
|
|
69
|
+
fingerprint_path: Optional[Path] = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class AtomicStateUpdate:
|
|
73
|
+
"""
|
|
74
|
+
Context manager for atomic state updates.
|
|
75
|
+
|
|
76
|
+
Ensures run_report and fingerprint are both written or neither is written.
|
|
77
|
+
This fixes Issue #159 where non-atomic writes caused state desynchronization.
|
|
78
|
+
|
|
79
|
+
Usage:
|
|
80
|
+
with AtomicStateUpdate(basename, language) as state:
|
|
81
|
+
state.set_run_report(report_dict, report_path)
|
|
82
|
+
state.set_fingerprint(fingerprint_dict, fp_path)
|
|
83
|
+
# On successful exit, both files are written atomically
|
|
84
|
+
# On exception, neither file is written (rollback)
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(self, basename: str, language: str):
|
|
88
|
+
self.basename = basename
|
|
89
|
+
self.language = language
|
|
90
|
+
self.pending = PendingStateUpdate()
|
|
91
|
+
self._temp_files: List[str] = []
|
|
92
|
+
|
|
93
|
+
def __enter__(self):
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
97
|
+
if exc_type is None:
|
|
98
|
+
self._commit()
|
|
99
|
+
else:
|
|
100
|
+
self._rollback()
|
|
101
|
+
return False # Don't suppress exceptions
|
|
102
|
+
|
|
103
|
+
def set_run_report(self, report: Dict[str, Any], path: Path):
|
|
104
|
+
"""Buffer a run report for atomic write."""
|
|
105
|
+
self.pending.run_report = report
|
|
106
|
+
self.pending.run_report_path = path
|
|
107
|
+
|
|
108
|
+
def set_fingerprint(self, fingerprint: Dict[str, Any], path: Path):
|
|
109
|
+
"""Buffer a fingerprint for atomic write."""
|
|
110
|
+
self.pending.fingerprint = fingerprint
|
|
111
|
+
self.pending.fingerprint_path = path
|
|
112
|
+
|
|
113
|
+
def _atomic_write(self, data: Dict[str, Any], target_path: Path) -> None:
|
|
114
|
+
"""Write data to file atomically using temp file + rename pattern."""
|
|
115
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
|
|
117
|
+
# Write to temp file in same directory (required for atomic rename)
|
|
118
|
+
fd, temp_path = tempfile.mkstemp(
|
|
119
|
+
dir=target_path.parent,
|
|
120
|
+
prefix=f".{target_path.stem}_",
|
|
121
|
+
suffix=".tmp"
|
|
122
|
+
)
|
|
123
|
+
self._temp_files.append(temp_path)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
with os.fdopen(fd, 'w') as f:
|
|
127
|
+
json.dump(data, f, indent=2, default=str)
|
|
128
|
+
|
|
129
|
+
# Atomic rename - guaranteed atomic on POSIX systems
|
|
130
|
+
os.replace(temp_path, target_path)
|
|
131
|
+
self._temp_files.remove(temp_path) # Successfully moved, stop tracking
|
|
132
|
+
except Exception:
|
|
133
|
+
# Leave temp file for rollback to clean up
|
|
134
|
+
raise
|
|
135
|
+
|
|
136
|
+
def _commit(self):
|
|
137
|
+
"""Commit all pending state updates atomically."""
|
|
138
|
+
# Write fingerprint first (checkpoint), then run_report
|
|
139
|
+
if self.pending.fingerprint and self.pending.fingerprint_path:
|
|
140
|
+
self._atomic_write(self.pending.fingerprint, self.pending.fingerprint_path)
|
|
141
|
+
if self.pending.run_report and self.pending.run_report_path:
|
|
142
|
+
self._atomic_write(self.pending.run_report, self.pending.run_report_path)
|
|
143
|
+
|
|
144
|
+
def _rollback(self):
|
|
145
|
+
"""Clean up any temp files without committing changes."""
|
|
146
|
+
for temp_path in self._temp_files:
|
|
147
|
+
try:
|
|
148
|
+
if os.path.exists(temp_path):
|
|
149
|
+
os.unlink(temp_path)
|
|
150
|
+
except OSError:
|
|
151
|
+
pass # Best effort cleanup
|
|
152
|
+
self._temp_files.clear()
|
|
153
|
+
|
|
40
154
|
|
|
41
155
|
# --- Mock Helper Functions ---
|
|
42
156
|
|
|
43
157
|
def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
|
|
44
158
|
"""Load sync log entries for a basename and language."""
|
|
45
|
-
log_file = META_DIR / f"{basename}_{language}_sync.log"
|
|
159
|
+
log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
|
|
46
160
|
if not log_file.exists():
|
|
47
161
|
return []
|
|
48
162
|
try:
|
|
@@ -84,7 +198,7 @@ def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duratio
|
|
|
84
198
|
|
|
85
199
|
def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
|
|
86
200
|
"""Append completed log entry to the sync log file."""
|
|
87
|
-
log_file = META_DIR / f"{basename}_{language}_sync.log"
|
|
201
|
+
log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
|
|
88
202
|
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
89
203
|
with open(log_file, 'a') as f:
|
|
90
204
|
f.write(json.dumps(entry) + '\n')
|
|
@@ -98,20 +212,44 @@ def log_sync_event(basename: str, language: str, event: str, details: Dict[str,
|
|
|
98
212
|
}
|
|
99
213
|
append_sync_log(basename, language, entry)
|
|
100
214
|
|
|
101
|
-
def save_run_report(report: Dict[str, Any], basename: str, language: str
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
215
|
+
def save_run_report(report: Dict[str, Any], basename: str, language: str,
|
|
216
|
+
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
217
|
+
"""Save a run report to the metadata directory.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
report: The run report dictionary to save.
|
|
221
|
+
basename: The module basename.
|
|
222
|
+
language: The programming language.
|
|
223
|
+
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
224
|
+
"""
|
|
225
|
+
report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
|
|
226
|
+
if atomic_state:
|
|
227
|
+
# Buffer for atomic write
|
|
228
|
+
atomic_state.set_run_report(report, report_file)
|
|
229
|
+
else:
|
|
230
|
+
# Legacy direct write
|
|
231
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
232
|
+
with open(report_file, 'w') as f:
|
|
233
|
+
json.dump(report, f, indent=2, default=str)
|
|
234
|
+
|
|
235
|
+
def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
236
|
+
paths: Dict[str, Path], cost: float, model: str,
|
|
237
|
+
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
238
|
+
"""Save fingerprint state after successful operation.
|
|
107
239
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
240
|
+
Args:
|
|
241
|
+
basename: The module basename.
|
|
242
|
+
language: The programming language.
|
|
243
|
+
operation: The operation that was performed.
|
|
244
|
+
paths: Dictionary of PDD file paths.
|
|
245
|
+
cost: The cost of the operation.
|
|
246
|
+
model: The model used.
|
|
247
|
+
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
248
|
+
"""
|
|
111
249
|
from datetime import datetime, timezone
|
|
112
250
|
from .sync_determine_operation import calculate_current_hashes, Fingerprint
|
|
113
251
|
from . import __version__
|
|
114
|
-
|
|
252
|
+
|
|
115
253
|
current_hashes = calculate_current_hashes(paths)
|
|
116
254
|
fingerprint = Fingerprint(
|
|
117
255
|
pdd_version=__version__,
|
|
@@ -120,103 +258,543 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
|
120
258
|
prompt_hash=current_hashes.get('prompt_hash'),
|
|
121
259
|
code_hash=current_hashes.get('code_hash'),
|
|
122
260
|
example_hash=current_hashes.get('example_hash'),
|
|
123
|
-
test_hash=current_hashes.get('test_hash')
|
|
261
|
+
test_hash=current_hashes.get('test_hash'),
|
|
262
|
+
test_files=current_hashes.get('test_files'), # Bug #156
|
|
124
263
|
)
|
|
125
|
-
|
|
126
|
-
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
127
|
-
fingerprint_file = META_DIR / f"{basename}_{language}.json"
|
|
128
|
-
with open(fingerprint_file, 'w') as f:
|
|
129
|
-
json.dump(asdict(fingerprint), f, indent=2, default=str)
|
|
130
264
|
|
|
131
|
-
|
|
265
|
+
fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
|
|
266
|
+
if atomic_state:
|
|
267
|
+
# Buffer for atomic write
|
|
268
|
+
atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
|
|
269
|
+
else:
|
|
270
|
+
# Legacy direct write
|
|
271
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
272
|
+
with open(fingerprint_file, 'w') as f:
|
|
273
|
+
json.dump(asdict(fingerprint), f, indent=2, default=str)
|
|
274
|
+
|
|
275
|
+
def _python_cov_target_for_code_file(code_file: Path) -> str:
|
|
276
|
+
"""Return a `pytest-cov` `--cov` target for a Python code file.
|
|
277
|
+
|
|
278
|
+
- If the file is inside a Python package (directories with `__init__.py`),
|
|
279
|
+
returns a dotted module path (e.g., `pdd.sync_orchestration`).
|
|
280
|
+
- Otherwise falls back to the filename stem (e.g., `admin_get_users`).
|
|
281
|
+
"""
|
|
282
|
+
if code_file.suffix != ".py":
|
|
283
|
+
return code_file.stem
|
|
284
|
+
|
|
285
|
+
package_dir: Optional[Path] = None
|
|
286
|
+
current = code_file.parent
|
|
287
|
+
while (current / "__init__.py").exists():
|
|
288
|
+
package_dir = current
|
|
289
|
+
parent = current.parent
|
|
290
|
+
if parent == current:
|
|
291
|
+
break
|
|
292
|
+
current = parent
|
|
293
|
+
|
|
294
|
+
if package_dir:
|
|
295
|
+
relative_module = code_file.relative_to(package_dir.parent).with_suffix("")
|
|
296
|
+
return str(relative_module).replace(os.sep, ".")
|
|
297
|
+
|
|
298
|
+
return code_file.stem
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _python_cov_target_for_test_and_code(test_file: Path, code_file: Path, fallback: str) -> str:
|
|
302
|
+
"""Choose the best `--cov` target based on how tests import the code.
|
|
303
|
+
|
|
304
|
+
In some repos, tests add a directory to `sys.path` and import modules by their
|
|
305
|
+
filename stem (e.g., `from admin_get_users import ...`) even when the code
|
|
306
|
+
also lives under a package (e.g., `backend.functions.admin_get_users`).
|
|
307
|
+
|
|
308
|
+
Heuristic:
|
|
309
|
+
- Prefer the code file stem when the test file imports it directly.
|
|
310
|
+
- Otherwise, prefer the dotted module path derived from the package layout.
|
|
311
|
+
- Fall back to the provided fallback (usually the basename).
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
def _imports_module(source: str, module: str) -> bool:
|
|
315
|
+
escaped = re.escape(module)
|
|
316
|
+
return bool(
|
|
317
|
+
re.search(rf"^\s*import\s+{escaped}\b", source, re.MULTILINE)
|
|
318
|
+
or re.search(rf"^\s*from\s+{escaped}\b", source, re.MULTILINE)
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
stem = code_file.stem
|
|
322
|
+
dotted = _python_cov_target_for_code_file(code_file)
|
|
132
323
|
|
|
133
|
-
def _execute_tests_and_create_run_report(test_file: Path, basename: str, language: str, target_coverage: float = 90.0) -> RunReport:
|
|
134
|
-
"""Execute tests and create a RunReport with actual results."""
|
|
135
|
-
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
136
|
-
|
|
137
324
|
try:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
325
|
+
test_source = test_file.read_text(encoding="utf-8", errors="ignore")
|
|
326
|
+
except Exception:
|
|
327
|
+
test_source = ""
|
|
328
|
+
|
|
329
|
+
if stem and _imports_module(test_source, stem):
|
|
330
|
+
return stem
|
|
331
|
+
|
|
332
|
+
if dotted and dotted != stem:
|
|
333
|
+
if _imports_module(test_source, dotted):
|
|
334
|
+
return dotted
|
|
335
|
+
|
|
336
|
+
if "." in dotted:
|
|
337
|
+
parent = dotted.rsplit(".", 1)[0]
|
|
338
|
+
# e.g. `from backend.functions import admin_get_users`
|
|
339
|
+
if re.search(
|
|
340
|
+
rf"^\s*from\s+{re.escape(parent)}\s+import\s+.*\b{re.escape(stem)}\b",
|
|
341
|
+
test_source,
|
|
342
|
+
re.MULTILINE,
|
|
343
|
+
):
|
|
344
|
+
return dotted
|
|
345
|
+
# e.g. `import backend.functions.admin_get_users`
|
|
346
|
+
if re.search(
|
|
347
|
+
rf"^\s*import\s+{re.escape(parent)}\.{re.escape(stem)}\b",
|
|
348
|
+
test_source,
|
|
349
|
+
re.MULTILINE,
|
|
350
|
+
):
|
|
351
|
+
return dotted
|
|
352
|
+
|
|
353
|
+
return dotted
|
|
354
|
+
|
|
355
|
+
return stem or fallback
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _parse_test_output(output: str, language: str) -> tuple[int, int, float]:
|
|
359
|
+
"""
|
|
360
|
+
Parse test output to extract passed/failed/coverage.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
output: Combined stdout/stderr from test runner
|
|
364
|
+
language: Language name (e.g., 'python', 'typescript', 'go')
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
(tests_passed, tests_failed, coverage)
|
|
368
|
+
"""
|
|
369
|
+
tests_passed = 0
|
|
370
|
+
tests_failed = 0
|
|
371
|
+
coverage = 0.0
|
|
372
|
+
|
|
373
|
+
lang = language.lower()
|
|
374
|
+
|
|
375
|
+
# Python (pytest)
|
|
376
|
+
if lang == 'python':
|
|
377
|
+
if 'passed' in output:
|
|
378
|
+
passed_match = re.search(r'(\d+) passed', output)
|
|
176
379
|
if passed_match:
|
|
177
380
|
tests_passed = int(passed_match.group(1))
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
failed_match = re.search(r'(\d+) failed', stdout)
|
|
381
|
+
if 'failed' in output:
|
|
382
|
+
failed_match = re.search(r'(\d+) failed', output)
|
|
181
383
|
if failed_match:
|
|
182
384
|
tests_failed = int(failed_match.group(1))
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
385
|
+
if 'error' in output:
|
|
386
|
+
error_match = re.search(r'(\d+) error', output)
|
|
387
|
+
if error_match:
|
|
388
|
+
tests_failed += int(error_match.group(1))
|
|
389
|
+
coverage_match = re.search(r'TOTAL.*?(\d+)%', output)
|
|
186
390
|
if not coverage_match:
|
|
187
|
-
|
|
188
|
-
coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
|
|
391
|
+
coverage_match = re.search(r'(\d+)%\s*$', output, re.MULTILINE)
|
|
189
392
|
if not coverage_match:
|
|
190
|
-
|
|
191
|
-
coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
|
|
192
|
-
|
|
393
|
+
coverage_match = re.search(r'(\d+(?:\.\d+)?)%', output)
|
|
193
394
|
if coverage_match:
|
|
194
395
|
coverage = float(coverage_match.group(1))
|
|
195
|
-
|
|
196
|
-
|
|
396
|
+
|
|
397
|
+
# Jest/Vitest (JavaScript/TypeScript)
|
|
398
|
+
elif lang in ('javascript', 'typescript', 'typescriptreact'):
|
|
399
|
+
# "Tests: X passed, Y failed" or "Tests: X passed, Y failed, Z total"
|
|
400
|
+
match = re.search(r'Tests:\s*(\d+)\s+passed', output)
|
|
401
|
+
if match:
|
|
402
|
+
tests_passed = int(match.group(1))
|
|
403
|
+
match = re.search(r'Tests:.*?(\d+)\s+failed', output)
|
|
404
|
+
if match:
|
|
405
|
+
tests_failed = int(match.group(1))
|
|
406
|
+
|
|
407
|
+
# Alternative Mocha-style: "X passing, Y failing"
|
|
408
|
+
if tests_passed == 0:
|
|
409
|
+
pass_match = re.search(r'(\d+)\s+pass(?:ing)?', output, re.I)
|
|
410
|
+
if pass_match:
|
|
411
|
+
tests_passed = int(pass_match.group(1))
|
|
412
|
+
if tests_failed == 0:
|
|
413
|
+
fail_match = re.search(r'(\d+)\s+fail(?:ing)?', output, re.I)
|
|
414
|
+
if fail_match:
|
|
415
|
+
tests_failed = int(fail_match.group(1))
|
|
416
|
+
|
|
417
|
+
# Coverage: "All files | XX.XX |"
|
|
418
|
+
cov_match = re.search(r'All files[^|]*\|\s*(\d+\.?\d*)', output)
|
|
419
|
+
if cov_match:
|
|
420
|
+
coverage = float(cov_match.group(1))
|
|
421
|
+
|
|
422
|
+
# Go
|
|
423
|
+
elif lang == 'go':
|
|
424
|
+
# Count PASS and FAIL occurrences for individual tests
|
|
425
|
+
tests_passed = len(re.findall(r'--- PASS:', output))
|
|
426
|
+
tests_failed = len(re.findall(r'--- FAIL:', output))
|
|
427
|
+
|
|
428
|
+
# Fallback: check for overall PASS/FAIL
|
|
429
|
+
if tests_passed == 0 and 'PASS' in output and 'FAIL' not in output:
|
|
430
|
+
tests_passed = 1
|
|
431
|
+
if tests_failed == 0 and 'FAIL' in output:
|
|
432
|
+
tests_failed = 1
|
|
433
|
+
|
|
434
|
+
# coverage: XX.X% of statements
|
|
435
|
+
cov_match = re.search(r'coverage:\s*(\d+\.?\d*)%', output)
|
|
436
|
+
if cov_match:
|
|
437
|
+
coverage = float(cov_match.group(1))
|
|
438
|
+
|
|
439
|
+
# Rust (cargo test)
|
|
440
|
+
elif lang == 'rust':
|
|
441
|
+
# "test result: ok. X passed; Y failed;"
|
|
442
|
+
match = re.search(r'(\d+)\s+passed', output)
|
|
443
|
+
if match:
|
|
444
|
+
tests_passed = int(match.group(1))
|
|
445
|
+
match = re.search(r'(\d+)\s+failed', output)
|
|
446
|
+
if match:
|
|
447
|
+
tests_failed = int(match.group(1))
|
|
448
|
+
|
|
449
|
+
# Fallback: try generic patterns
|
|
450
|
+
else:
|
|
451
|
+
pass_match = re.search(r'(\d+)\s+(?:tests?\s+)?pass(?:ed)?', output, re.I)
|
|
452
|
+
fail_match = re.search(r'(\d+)\s+(?:tests?\s+)?fail(?:ed)?', output, re.I)
|
|
453
|
+
if pass_match:
|
|
454
|
+
tests_passed = int(pass_match.group(1))
|
|
455
|
+
if fail_match:
|
|
456
|
+
tests_failed = int(fail_match.group(1))
|
|
457
|
+
|
|
458
|
+
return tests_passed, tests_failed, coverage
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _detect_example_errors(output: str) -> tuple[bool, str]:
|
|
462
|
+
"""
|
|
463
|
+
Detect if example output contains error indicators.
|
|
464
|
+
|
|
465
|
+
Only detects true crashes/errors:
|
|
466
|
+
- Python tracebacks (catches ALL unhandled exceptions)
|
|
467
|
+
- ERROR level log messages
|
|
468
|
+
|
|
469
|
+
Intentionally does NOT detect:
|
|
470
|
+
- HTTP status codes (examples may test error responses)
|
|
471
|
+
- Individual exception type names (causes false positives, redundant with traceback)
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
(has_errors, error_summary)
|
|
475
|
+
"""
|
|
476
|
+
error_patterns = [
|
|
477
|
+
(r'Traceback \(most recent call last\):', 'Python traceback'),
|
|
478
|
+
(r' - ERROR - ', 'Error log message'), # Python logging format
|
|
479
|
+
]
|
|
480
|
+
|
|
481
|
+
errors_found = []
|
|
482
|
+
for pattern, description in error_patterns:
|
|
483
|
+
if re.search(pattern, output, re.MULTILINE):
|
|
484
|
+
errors_found.append(description)
|
|
485
|
+
|
|
486
|
+
if errors_found:
|
|
487
|
+
return True, '; '.join(errors_found)
|
|
488
|
+
return False, ''
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def _try_auto_fix_import_error(
|
|
492
|
+
error_output: str,
|
|
493
|
+
code_file: Path,
|
|
494
|
+
example_file: Path,
|
|
495
|
+
) -> tuple[bool, str]:
|
|
496
|
+
"""
|
|
497
|
+
Try to automatically fix common import errors before calling expensive agentic fix.
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
(fixed, message): Whether a fix was attempted and what was done.
|
|
501
|
+
"""
|
|
502
|
+
import re
|
|
503
|
+
|
|
504
|
+
# Check for ModuleNotFoundError or ImportError
|
|
505
|
+
module_not_found = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", error_output)
|
|
506
|
+
import_error = re.search(r"ImportError: cannot import name ['\"]([^'\"]+)['\"]", error_output)
|
|
507
|
+
|
|
508
|
+
if not module_not_found and not import_error:
|
|
509
|
+
return False, "No import error detected"
|
|
510
|
+
|
|
511
|
+
if module_not_found:
|
|
512
|
+
missing_module = module_not_found.group(1)
|
|
513
|
+
# Split by . to get the top-level package
|
|
514
|
+
top_level_package = missing_module.split('.')[0]
|
|
515
|
+
|
|
516
|
+
# Check if this is the module we're trying to import (local module)
|
|
517
|
+
code_module_name = code_file.stem # e.g., "data_validator" from "data_validator.py"
|
|
518
|
+
|
|
519
|
+
if top_level_package == code_module_name:
|
|
520
|
+
# It's trying to import our own generated code - fix the example's sys.path
|
|
521
|
+
# Read the example and fix the path manipulation
|
|
522
|
+
try:
|
|
523
|
+
example_content = example_file.read_text(encoding='utf-8')
|
|
524
|
+
code_dir = str(code_file.parent.resolve())
|
|
525
|
+
|
|
526
|
+
# Look for existing sys.path manipulation
|
|
527
|
+
if 'sys.path' in example_content:
|
|
528
|
+
# Try to fix the existing path manipulation
|
|
529
|
+
# Common pattern: module_path = os.path.abspath(os.path.join(...))
|
|
530
|
+
# Replace with correct path
|
|
531
|
+
fixed_content = re.sub(
|
|
532
|
+
r"module_path\s*=\s*os\.path\.abspath\([^)]+\)",
|
|
533
|
+
f"module_path = '{code_dir}'",
|
|
534
|
+
example_content
|
|
535
|
+
)
|
|
536
|
+
if fixed_content != example_content:
|
|
537
|
+
example_file.write_text(fixed_content, encoding='utf-8')
|
|
538
|
+
return True, f"Fixed sys.path to point to {code_dir}"
|
|
539
|
+
|
|
540
|
+
# If no existing sys.path, add one at the start after imports
|
|
541
|
+
lines = example_content.split('\n')
|
|
542
|
+
insert_pos = 0
|
|
543
|
+
for i, line in enumerate(lines):
|
|
544
|
+
if line.startswith('import ') or line.startswith('from '):
|
|
545
|
+
if 'sys' in line or 'os' in line:
|
|
546
|
+
insert_pos = i + 1
|
|
547
|
+
continue
|
|
548
|
+
if line.strip() and not line.startswith('#') and not line.startswith('import') and not line.startswith('from'):
|
|
549
|
+
insert_pos = i
|
|
550
|
+
break
|
|
551
|
+
|
|
552
|
+
path_fix = f"\n# Auto-added by pdd to fix import\nimport sys\nsys.path.insert(0, '{code_dir}')\n"
|
|
553
|
+
lines.insert(insert_pos, path_fix)
|
|
554
|
+
example_file.write_text('\n'.join(lines), encoding='utf-8')
|
|
555
|
+
return True, f"Added sys.path.insert(0, '{code_dir}') to example"
|
|
556
|
+
|
|
557
|
+
except Exception as e:
|
|
558
|
+
return False, f"Failed to fix import path: {e}"
|
|
559
|
+
|
|
560
|
+
else:
|
|
561
|
+
# It's an external package - try pip install
|
|
562
|
+
try:
|
|
563
|
+
result = subprocess.run(
|
|
564
|
+
[sys.executable, '-m', 'pip', 'install', top_level_package],
|
|
565
|
+
capture_output=True,
|
|
566
|
+
text=True,
|
|
567
|
+
timeout=120
|
|
568
|
+
)
|
|
569
|
+
if result.returncode == 0:
|
|
570
|
+
return True, f"Installed missing package: {top_level_package}"
|
|
571
|
+
else:
|
|
572
|
+
return False, f"Failed to install {top_level_package}: {result.stderr}"
|
|
573
|
+
except Exception as e:
|
|
574
|
+
return False, f"Failed to run pip install: {e}"
|
|
575
|
+
|
|
576
|
+
return False, "Import error detected but no auto-fix available"
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def _run_example_with_error_detection(
|
|
580
|
+
cmd_parts: list[str],
|
|
581
|
+
env: dict,
|
|
582
|
+
cwd: Optional[str] = None,
|
|
583
|
+
timeout: int = 60
|
|
584
|
+
) -> tuple[int, str, str]:
|
|
585
|
+
"""
|
|
586
|
+
Run example file, detecting errors from output.
|
|
587
|
+
|
|
588
|
+
For server-style examples that block, this runs until timeout
|
|
589
|
+
then analyzes output for errors. No errors = success.
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
(returncode, stdout, stderr)
|
|
593
|
+
- returncode: 0 if no errors detected, positive if errors found or process failed
|
|
594
|
+
"""
|
|
595
|
+
import threading
|
|
596
|
+
|
|
597
|
+
proc = subprocess.Popen(
|
|
598
|
+
cmd_parts,
|
|
599
|
+
stdout=subprocess.PIPE,
|
|
600
|
+
stderr=subprocess.PIPE,
|
|
601
|
+
stdin=subprocess.DEVNULL,
|
|
602
|
+
env=env,
|
|
603
|
+
cwd=cwd,
|
|
604
|
+
start_new_session=True,
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
stdout_chunks = []
|
|
608
|
+
stderr_chunks = []
|
|
609
|
+
|
|
610
|
+
def read_pipe(pipe, chunks):
|
|
611
|
+
try:
|
|
612
|
+
for line in iter(pipe.readline, b''):
|
|
613
|
+
chunks.append(line)
|
|
614
|
+
except Exception:
|
|
615
|
+
pass
|
|
616
|
+
|
|
617
|
+
t_out = threading.Thread(target=read_pipe, args=(proc.stdout, stdout_chunks), daemon=True)
|
|
618
|
+
t_err = threading.Thread(target=read_pipe, args=(proc.stderr, stderr_chunks), daemon=True)
|
|
619
|
+
t_out.start()
|
|
620
|
+
t_err.start()
|
|
621
|
+
|
|
622
|
+
# Wait for process or timeout
|
|
623
|
+
try:
|
|
624
|
+
proc.wait(timeout=timeout)
|
|
625
|
+
except subprocess.TimeoutExpired:
|
|
626
|
+
proc.terminate()
|
|
627
|
+
try:
|
|
628
|
+
proc.wait(timeout=5)
|
|
629
|
+
except subprocess.TimeoutExpired:
|
|
630
|
+
proc.kill()
|
|
631
|
+
proc.wait()
|
|
632
|
+
|
|
633
|
+
t_out.join(timeout=2)
|
|
634
|
+
t_err.join(timeout=2)
|
|
635
|
+
|
|
636
|
+
stdout = b''.join(stdout_chunks).decode('utf-8', errors='replace')
|
|
637
|
+
stderr = b''.join(stderr_chunks).decode('utf-8', errors='replace')
|
|
638
|
+
combined = stdout + '\n' + stderr
|
|
639
|
+
|
|
640
|
+
# Check for errors in output
|
|
641
|
+
has_errors, error_summary = _detect_example_errors(combined)
|
|
642
|
+
|
|
643
|
+
# Determine result (check returncode first, then use error detection for signal-killed):
|
|
644
|
+
# - Zero exit code → success (trust the exit code)
|
|
645
|
+
# - Positive exit code (process failed normally, e.g., sys.exit(1)) → failure
|
|
646
|
+
# - Negative exit code (killed by signal, e.g., -9 for SIGKILL) → check output
|
|
647
|
+
#
|
|
648
|
+
# IMPORTANT: When we kill the process after timeout, returncode is negative
|
|
649
|
+
# (the signal number). This is NOT a failure if output has no errors.
|
|
650
|
+
if proc.returncode is not None and proc.returncode == 0:
|
|
651
|
+
return 0, stdout, stderr # Clean exit = success (trust exit code)
|
|
652
|
+
elif proc.returncode is not None and proc.returncode > 0:
|
|
653
|
+
return proc.returncode, stdout, stderr # Process exited with error
|
|
654
|
+
else:
|
|
655
|
+
# Killed by signal (returncode < 0 or None) - use error detection
|
|
656
|
+
# Server-style examples may run until timeout, need to check output
|
|
657
|
+
if has_errors:
|
|
658
|
+
return 1, stdout, stderr # Errors detected in output
|
|
659
|
+
return 0, stdout, stderr # No errors, server was running fine
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def _execute_tests_and_create_run_report(
|
|
663
|
+
test_file: Path,
|
|
664
|
+
basename: str,
|
|
665
|
+
language: str,
|
|
666
|
+
target_coverage: float = 90.0,
|
|
667
|
+
*,
|
|
668
|
+
code_file: Optional[Path] = None,
|
|
669
|
+
atomic_state: Optional['AtomicStateUpdate'] = None,
|
|
670
|
+
test_files: Optional[List[Path]] = None, # Bug #156: Support multiple test files
|
|
671
|
+
) -> RunReport:
|
|
672
|
+
"""Execute tests and create a RunReport with actual results.
|
|
673
|
+
|
|
674
|
+
Now supports multiple languages by using get_test_command_for_file()
|
|
675
|
+
to determine the appropriate test runner.
|
|
676
|
+
|
|
677
|
+
Args:
|
|
678
|
+
test_file: Primary test file (for backward compat)
|
|
679
|
+
test_files: Optional list of all test files to run (Bug #156)
|
|
680
|
+
"""
|
|
681
|
+
from .get_test_command import get_test_command_for_file
|
|
682
|
+
|
|
683
|
+
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
684
|
+
|
|
685
|
+
# Bug #156: Use test_files if provided, otherwise just the single test_file
|
|
686
|
+
all_test_files = test_files if test_files else [test_file]
|
|
687
|
+
|
|
688
|
+
# Calculate test file hash for staleness detection (primary file for backward compat)
|
|
689
|
+
test_hash = calculate_sha256(test_file) if test_file.exists() else None
|
|
690
|
+
|
|
691
|
+
# Bug #156: Calculate hashes for ALL test files
|
|
692
|
+
test_file_hashes = {
|
|
693
|
+
f.name: calculate_sha256(f)
|
|
694
|
+
for f in all_test_files
|
|
695
|
+
if f.exists()
|
|
696
|
+
} if all_test_files else None
|
|
697
|
+
|
|
698
|
+
# Use clean env without TUI-specific vars
|
|
699
|
+
clean_env = os.environ.copy()
|
|
700
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
701
|
+
clean_env.pop(var, None)
|
|
702
|
+
|
|
703
|
+
try:
|
|
704
|
+
lang_lower = language.lower()
|
|
705
|
+
|
|
706
|
+
# Python: use existing pytest logic with coverage
|
|
707
|
+
if lang_lower == "python":
|
|
708
|
+
module_name = test_file.name.replace('test_', '').replace('.py', '')
|
|
709
|
+
python_executable = detect_host_python_executable()
|
|
710
|
+
|
|
711
|
+
cov_target = None
|
|
712
|
+
if code_file is not None:
|
|
713
|
+
cov_target = _python_cov_target_for_test_and_code(test_file, code_file, basename or module_name)
|
|
714
|
+
else:
|
|
715
|
+
cov_target = basename or module_name
|
|
716
|
+
|
|
717
|
+
if not cov_target:
|
|
718
|
+
cov_target = basename or module_name
|
|
719
|
+
|
|
720
|
+
# Bug #156: Run pytest on ALL test files
|
|
721
|
+
pytest_args = [
|
|
722
|
+
python_executable, '-m', 'pytest',
|
|
723
|
+
] + [str(f) for f in all_test_files] + [
|
|
724
|
+
'-v',
|
|
725
|
+
'--tb=short',
|
|
726
|
+
f'--cov={cov_target}',
|
|
727
|
+
'--cov-report=term-missing'
|
|
728
|
+
]
|
|
729
|
+
result = subprocess.run(
|
|
730
|
+
pytest_args,
|
|
731
|
+
capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
exit_code = result.returncode
|
|
735
|
+
stdout = result.stdout + (result.stderr or '')
|
|
736
|
+
tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
|
|
737
|
+
|
|
738
|
+
else:
|
|
739
|
+
# Non-Python: use language-appropriate test command
|
|
740
|
+
test_cmd = get_test_command_for_file(str(test_file), language)
|
|
741
|
+
|
|
742
|
+
if test_cmd is None:
|
|
743
|
+
# No test command available - return report indicating this
|
|
744
|
+
report = RunReport(
|
|
745
|
+
timestamp=timestamp,
|
|
746
|
+
exit_code=127, # Command not found
|
|
747
|
+
tests_passed=0,
|
|
748
|
+
tests_failed=0,
|
|
749
|
+
coverage=0.0,
|
|
750
|
+
test_hash=test_hash,
|
|
751
|
+
test_files=test_file_hashes, # Bug #156
|
|
752
|
+
)
|
|
753
|
+
save_run_report(asdict(report), basename, language, atomic_state)
|
|
754
|
+
return report
|
|
755
|
+
|
|
756
|
+
# Run the test command
|
|
757
|
+
result = subprocess.run(
|
|
758
|
+
test_cmd,
|
|
759
|
+
shell=True,
|
|
760
|
+
capture_output=True,
|
|
761
|
+
text=True,
|
|
762
|
+
timeout=300,
|
|
763
|
+
env=clean_env,
|
|
764
|
+
cwd=str(test_file.parent),
|
|
765
|
+
stdin=subprocess.DEVNULL,
|
|
766
|
+
start_new_session=True
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
exit_code = result.returncode
|
|
770
|
+
stdout = (result.stdout or '') + '\n' + (result.stderr or '')
|
|
771
|
+
|
|
772
|
+
# Parse results based on language
|
|
773
|
+
tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
|
|
774
|
+
|
|
197
775
|
report = RunReport(
|
|
198
776
|
timestamp=timestamp,
|
|
199
777
|
exit_code=exit_code,
|
|
200
778
|
tests_passed=tests_passed,
|
|
201
779
|
tests_failed=tests_failed,
|
|
202
|
-
coverage=coverage
|
|
780
|
+
coverage=coverage,
|
|
781
|
+
test_hash=test_hash,
|
|
782
|
+
test_files=test_file_hashes, # Bug #156
|
|
203
783
|
)
|
|
204
|
-
|
|
784
|
+
|
|
205
785
|
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception) as e:
|
|
206
|
-
# If test execution fails, create a report indicating failure
|
|
207
786
|
report = RunReport(
|
|
208
787
|
timestamp=timestamp,
|
|
209
788
|
exit_code=1,
|
|
210
789
|
tests_passed=0,
|
|
211
790
|
tests_failed=1,
|
|
212
|
-
coverage=0.0
|
|
791
|
+
coverage=0.0,
|
|
792
|
+
test_hash=test_hash,
|
|
793
|
+
test_files=test_file_hashes, # Bug #156
|
|
213
794
|
)
|
|
214
|
-
|
|
215
|
-
# Save the run report
|
|
216
|
-
save_run_report(asdict(report), basename, language)
|
|
217
|
-
return report
|
|
218
795
|
|
|
219
|
-
|
|
796
|
+
save_run_report(asdict(report), basename, language, atomic_state)
|
|
797
|
+
return report
|
|
220
798
|
|
|
221
799
|
def _create_mock_context(**kwargs) -> click.Context:
|
|
222
800
|
"""Creates a mock Click context object to pass parameters to command functions."""
|
|
@@ -227,7 +805,7 @@ def _create_mock_context(**kwargs) -> click.Context:
|
|
|
227
805
|
|
|
228
806
|
def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Dict[str, Any]:
|
|
229
807
|
"""Displays the sync log for a given basename and language."""
|
|
230
|
-
log_file = META_DIR / f"{basename}_{language}_sync.log"
|
|
808
|
+
log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
|
|
231
809
|
if not log_file.exists():
|
|
232
810
|
print(f"No sync log found for '{basename}' in language '{language}'.")
|
|
233
811
|
return {'success': False, 'errors': ['Log file not found.'], 'log_entries': []}
|
|
@@ -242,7 +820,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
242
820
|
for entry in log_entries:
|
|
243
821
|
timestamp = entry.get('timestamp', 'N/A')
|
|
244
822
|
|
|
245
|
-
# Handle special event entries
|
|
246
823
|
if 'event' in entry:
|
|
247
824
|
event = entry.get('event', 'N/A')
|
|
248
825
|
print(f"[{timestamp[:19]}] EVENT: {event}")
|
|
@@ -251,7 +828,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
251
828
|
print(f" Details: {details_str}")
|
|
252
829
|
continue
|
|
253
830
|
|
|
254
|
-
# Handle operation entries
|
|
255
831
|
operation = entry.get('operation', 'N/A')
|
|
256
832
|
reason = entry.get('reason', 'N/A')
|
|
257
833
|
success = entry.get('success')
|
|
@@ -260,7 +836,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
260
836
|
duration = entry.get('duration')
|
|
261
837
|
|
|
262
838
|
if verbose:
|
|
263
|
-
# Verbose format
|
|
264
839
|
print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
|
|
265
840
|
decision_type = entry.get('decision_type', 'N/A')
|
|
266
841
|
confidence = entry.get('confidence', 'N/A')
|
|
@@ -276,14 +851,12 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
276
851
|
print(f" Estimated Cost: ${estimated_cost:.2f}")
|
|
277
852
|
|
|
278
853
|
if 'details' in entry and entry['details']:
|
|
279
|
-
# Show details without budget_remaining to avoid clutter
|
|
280
854
|
details_copy = entry['details'].copy()
|
|
281
855
|
details_copy.pop('budget_remaining', None)
|
|
282
856
|
if details_copy:
|
|
283
857
|
details_str = json.dumps(details_copy, indent=2)
|
|
284
858
|
print(f" Details: {details_str}")
|
|
285
859
|
else:
|
|
286
|
-
# Normal format: [timestamp] operation | reason | status cost | duration
|
|
287
860
|
status_icon = "✓" if success else "✗" if success is False else "?"
|
|
288
861
|
|
|
289
862
|
cost_info = ""
|
|
@@ -318,45 +891,68 @@ def sync_orchestration(
|
|
|
318
891
|
budget: float = 10.0,
|
|
319
892
|
skip_verify: bool = False,
|
|
320
893
|
skip_tests: bool = False,
|
|
321
|
-
|
|
894
|
+
dry_run: bool = False,
|
|
322
895
|
force: bool = False,
|
|
323
|
-
strength: float =
|
|
896
|
+
strength: float = DEFAULT_STRENGTH,
|
|
324
897
|
temperature: float = 0.0,
|
|
325
|
-
time_param: float = 0.25,
|
|
898
|
+
time_param: float = 0.25,
|
|
326
899
|
verbose: bool = False,
|
|
327
900
|
quiet: bool = False,
|
|
328
901
|
output_cost: Optional[str] = None,
|
|
329
902
|
review_examples: bool = False,
|
|
330
903
|
local: bool = False,
|
|
331
904
|
context_config: Optional[Dict[str, str]] = None,
|
|
905
|
+
context_override: Optional[str] = None,
|
|
906
|
+
confirm_callback: Optional[Callable[[str, str], bool]] = None,
|
|
332
907
|
) -> Dict[str, Any]:
|
|
333
908
|
"""
|
|
334
909
|
Orchestrates the complete PDD sync workflow with parallel animation.
|
|
335
|
-
|
|
336
|
-
If log=True, displays the sync log instead of running sync operations.
|
|
337
|
-
The verbose flag controls the detail level of the log output.
|
|
338
|
-
|
|
339
|
-
Returns a dictionary summarizing the outcome of the sync process.
|
|
340
910
|
"""
|
|
341
|
-
|
|
911
|
+
# Handle None values from CLI (Issue #194) - defense in depth
|
|
912
|
+
if target_coverage is None:
|
|
913
|
+
target_coverage = 90.0
|
|
914
|
+
if budget is None:
|
|
915
|
+
budget = 10.0
|
|
916
|
+
if max_attempts is None:
|
|
917
|
+
max_attempts = 3
|
|
918
|
+
|
|
919
|
+
# Import get_extension at function scope
|
|
920
|
+
from .sync_determine_operation import get_extension
|
|
921
|
+
|
|
922
|
+
if dry_run:
|
|
342
923
|
return _display_sync_log(basename, language, verbose)
|
|
343
924
|
|
|
344
925
|
# --- Initialize State and Paths ---
|
|
345
926
|
try:
|
|
346
|
-
pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
|
|
927
|
+
pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
928
|
+
except FileNotFoundError as e:
|
|
929
|
+
if "test_config.py" in str(e) or "tests/test_" in str(e):
|
|
930
|
+
pdd_files = {
|
|
931
|
+
'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
|
|
932
|
+
'code': Path(f"src/{basename}.{get_extension(language)}"),
|
|
933
|
+
'example': Path(f"context/{basename}_example.{get_extension(language)}"),
|
|
934
|
+
'test': Path(f"tests/test_{basename}.{get_extension(language)}")
|
|
935
|
+
}
|
|
936
|
+
if not quiet:
|
|
937
|
+
print(f"Note: Test file missing, continuing with sync workflow to generate it")
|
|
938
|
+
else:
|
|
939
|
+
print(f"Error constructing paths: {e}")
|
|
940
|
+
return {
|
|
941
|
+
"success": False,
|
|
942
|
+
"error": f"Failed to construct paths: {str(e)}",
|
|
943
|
+
"operations_completed": [],
|
|
944
|
+
"errors": [f"Path construction failed: {str(e)}"]
|
|
945
|
+
}
|
|
347
946
|
except Exception as e:
|
|
348
|
-
# Log the error and return early with failure status
|
|
349
947
|
print(f"Error constructing paths: {e}")
|
|
350
948
|
return {
|
|
351
949
|
"success": False,
|
|
352
|
-
"total_cost": 0.0,
|
|
353
|
-
"model_name": "",
|
|
354
950
|
"error": f"Failed to construct paths: {str(e)}",
|
|
355
951
|
"operations_completed": [],
|
|
356
952
|
"errors": [f"Path construction failed: {str(e)}"]
|
|
357
953
|
}
|
|
358
954
|
|
|
359
|
-
# Shared state for animation
|
|
955
|
+
# Shared state for animation (passed to App)
|
|
360
956
|
current_function_name_ref = ["initializing"]
|
|
361
957
|
stop_event = threading.Event()
|
|
362
958
|
current_cost_ref = [0.0]
|
|
@@ -364,696 +960,744 @@ def sync_orchestration(
|
|
|
364
960
|
code_path_ref = [str(pdd_files.get('code', 'N/A'))]
|
|
365
961
|
example_path_ref = [str(pdd_files.get('example', 'N/A'))]
|
|
366
962
|
tests_path_ref = [str(pdd_files.get('test', 'N/A'))]
|
|
367
|
-
prompt_box_color_ref
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
operations_completed: List[str] = []
|
|
372
|
-
skipped_operations: List[str] = []
|
|
373
|
-
errors: List[str] = []
|
|
374
|
-
start_time = time.time()
|
|
375
|
-
animation_thread = None
|
|
376
|
-
|
|
377
|
-
# Track operation history for cycle detection
|
|
378
|
-
operation_history: List[str] = []
|
|
379
|
-
MAX_CYCLE_REPEATS = 2 # Maximum times to allow crash-verify cycle
|
|
963
|
+
prompt_box_color_ref = ["blue"]
|
|
964
|
+
code_box_color_ref = ["blue"]
|
|
965
|
+
example_box_color_ref = ["blue"]
|
|
966
|
+
tests_box_color_ref = ["blue"]
|
|
380
967
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
|
|
385
|
-
|
|
386
|
-
# --- Start Animation Thread ---
|
|
387
|
-
animation_thread = threading.Thread(
|
|
388
|
-
target=sync_animation,
|
|
389
|
-
args=(
|
|
390
|
-
current_function_name_ref, stop_event, basename, current_cost_ref, budget,
|
|
391
|
-
prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref,
|
|
392
|
-
prompt_path_ref, code_path_ref, example_path_ref, tests_path_ref
|
|
393
|
-
),
|
|
394
|
-
daemon=True
|
|
395
|
-
)
|
|
396
|
-
animation_thread.start()
|
|
397
|
-
|
|
398
|
-
# --- Main Workflow Loop ---
|
|
399
|
-
while True:
|
|
400
|
-
budget_remaining = budget - current_cost_ref[0]
|
|
401
|
-
if current_cost_ref[0] >= budget:
|
|
402
|
-
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
403
|
-
log_sync_event(basename, language, "budget_exceeded", {
|
|
404
|
-
"total_cost": current_cost_ref[0],
|
|
405
|
-
"budget": budget
|
|
406
|
-
})
|
|
407
|
-
break
|
|
408
|
-
|
|
409
|
-
# Log budget warning when running low
|
|
410
|
-
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
411
|
-
log_sync_event(basename, language, "budget_warning", {
|
|
412
|
-
"remaining": budget_remaining,
|
|
413
|
-
"percentage": (budget_remaining / budget) * 100
|
|
414
|
-
})
|
|
415
|
-
|
|
416
|
-
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
|
|
417
|
-
operation = decision.operation
|
|
418
|
-
|
|
419
|
-
# Create log entry with decision info
|
|
420
|
-
log_entry = create_sync_log_entry(decision, budget_remaining)
|
|
421
|
-
|
|
422
|
-
# Track operation history
|
|
423
|
-
operation_history.append(operation)
|
|
424
|
-
|
|
425
|
-
# Detect crash-verify cycles
|
|
426
|
-
if len(operation_history) >= 4:
|
|
427
|
-
# Check for repeating crash-verify pattern
|
|
428
|
-
recent_ops = operation_history[-4:]
|
|
429
|
-
if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
|
|
430
|
-
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
431
|
-
# Count how many times this cycle has occurred
|
|
432
|
-
cycle_count = 0
|
|
433
|
-
for i in range(0, len(operation_history) - 1, 2):
|
|
434
|
-
if i + 1 < len(operation_history):
|
|
435
|
-
if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
|
|
436
|
-
(operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
|
|
437
|
-
cycle_count += 1
|
|
438
|
-
|
|
439
|
-
if cycle_count >= MAX_CYCLE_REPEATS:
|
|
440
|
-
errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
|
|
441
|
-
errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
|
|
442
|
-
log_sync_event(basename, language, "cycle_detected", {
|
|
443
|
-
"cycle_type": "crash-verify",
|
|
444
|
-
"cycle_count": cycle_count,
|
|
445
|
-
"operation_history": operation_history[-10:] # Last 10 operations
|
|
446
|
-
})
|
|
447
|
-
break
|
|
968
|
+
# Mutable container for the app reference (set after app creation)
|
|
969
|
+
# This allows the worker to access app.request_confirmation()
|
|
970
|
+
app_ref: List[Optional['SyncApp']] = [None]
|
|
448
971
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
972
|
+
# Progress callback ref for TUI ProgressBar updates during auto-deps
|
|
973
|
+
progress_callback_ref: List[Optional[Callable[[int, int], None]]] = [None]
|
|
974
|
+
|
|
975
|
+
# Track if user has already confirmed overwrite (to avoid asking multiple times)
|
|
976
|
+
user_confirmed_overwrite: List[bool] = [False]
|
|
977
|
+
|
|
978
|
+
def get_confirm_callback() -> Optional[Callable[[str, str], bool]]:
|
|
979
|
+
"""Get the confirmation callback from the app if available.
|
|
980
|
+
|
|
981
|
+
Once user confirms, we remember it so subsequent operations don't ask again.
|
|
982
|
+
|
|
983
|
+
Fix for Issue #277: In headless mode, we now return a wrapper callback
|
|
984
|
+
that uses click.confirm AND sets user_confirmed_overwrite[0] = True,
|
|
985
|
+
so subsequent calls auto-confirm instead of prompting repeatedly.
|
|
986
|
+
"""
|
|
987
|
+
if user_confirmed_overwrite[0]:
|
|
988
|
+
# User already confirmed, return a callback that always returns True
|
|
989
|
+
return lambda msg, title: True
|
|
990
|
+
|
|
991
|
+
if app_ref[0] is not None:
|
|
992
|
+
def confirming_callback(msg: str, title: str) -> bool:
|
|
993
|
+
result = app_ref[0].request_confirmation(msg, title)
|
|
994
|
+
if result:
|
|
995
|
+
user_confirmed_overwrite[0] = True
|
|
996
|
+
return result
|
|
997
|
+
return confirming_callback
|
|
998
|
+
|
|
999
|
+
# Fix #277: In headless mode (app_ref is None), create a wrapper callback
|
|
1000
|
+
# that sets the flag after confirmation, preventing repeated prompts
|
|
1001
|
+
if confirm_callback is None:
|
|
1002
|
+
def headless_confirming_callback(msg: str, title: str) -> bool:
|
|
1003
|
+
"""Headless mode callback that remembers user confirmation."""
|
|
1004
|
+
try:
|
|
1005
|
+
prompt = msg or "Overwrite existing files?"
|
|
1006
|
+
result = click.confirm(
|
|
1007
|
+
click.style(prompt, fg="yellow"),
|
|
1008
|
+
default=True,
|
|
1009
|
+
show_default=True
|
|
1010
|
+
)
|
|
1011
|
+
except (click.Abort, EOFError):
|
|
1012
|
+
return False
|
|
1013
|
+
if result:
|
|
1014
|
+
user_confirmed_overwrite[0] = True
|
|
1015
|
+
return result
|
|
1016
|
+
return headless_confirming_callback
|
|
1017
|
+
|
|
1018
|
+
return confirm_callback # Fall back to provided callback
|
|
1019
|
+
|
|
1020
|
+
def sync_worker_logic():
|
|
1021
|
+
"""
|
|
1022
|
+
The main loop of sync logic, run in a worker thread by Textual App.
|
|
1023
|
+
"""
|
|
1024
|
+
operations_completed: List[str] = []
|
|
1025
|
+
skipped_operations: List[str] = []
|
|
1026
|
+
errors: List[str] = []
|
|
1027
|
+
start_time = time.time()
|
|
1028
|
+
last_model_name: str = ""
|
|
1029
|
+
operation_history: List[str] = []
|
|
1030
|
+
MAX_CYCLE_REPEATS = 2
|
|
1031
|
+
|
|
1032
|
+
# Helper function to print inside worker (goes to RichLog via redirection)
|
|
1033
|
+
# print() will work if sys.stdout is redirected.
|
|
1034
|
+
|
|
1035
|
+
try:
|
|
1036
|
+
with SyncLock(basename, language):
|
|
1037
|
+
log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
|
|
1038
|
+
|
|
1039
|
+
while True:
|
|
1040
|
+
budget_remaining = budget - current_cost_ref[0]
|
|
1041
|
+
if current_cost_ref[0] >= budget:
|
|
1042
|
+
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
1043
|
+
log_sync_event(basename, language, "budget_exceeded", {
|
|
1044
|
+
"total_cost": current_cost_ref[0],
|
|
1045
|
+
"budget": budget
|
|
467
1046
|
})
|
|
468
1047
|
break
|
|
469
1048
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
error_msg = f"Manual merge required: {decision.reason}"
|
|
479
|
-
elif operation == 'error':
|
|
480
|
-
errors.append(f"Error determining operation: {decision.reason}")
|
|
481
|
-
error_msg = f"Error determining operation: {decision.reason}"
|
|
482
|
-
elif operation == 'analyze_conflict':
|
|
483
|
-
errors.append(f"Conflict detected: {decision.reason}")
|
|
484
|
-
error_msg = f"Conflict detected: {decision.reason}"
|
|
485
|
-
|
|
486
|
-
# Update log entry for final operation
|
|
487
|
-
update_sync_log_entry(log_entry, {
|
|
488
|
-
'success': success,
|
|
489
|
-
'cost': 0.0,
|
|
490
|
-
'model': 'none',
|
|
491
|
-
'error': error_msg
|
|
492
|
-
}, 0.0)
|
|
493
|
-
append_sync_log(basename, language, log_entry)
|
|
494
|
-
|
|
495
|
-
break
|
|
496
|
-
|
|
497
|
-
# Handle skips
|
|
498
|
-
if operation == 'verify' and (skip_verify or skip_tests):
|
|
499
|
-
# Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
|
|
500
|
-
skipped_operations.append('verify')
|
|
501
|
-
skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
|
|
502
|
-
|
|
503
|
-
# Update log entry for skipped operation
|
|
504
|
-
update_sync_log_entry(log_entry, {
|
|
505
|
-
'success': True,
|
|
506
|
-
'cost': 0.0,
|
|
507
|
-
'model': 'skipped',
|
|
508
|
-
'error': None
|
|
509
|
-
}, 0.0)
|
|
510
|
-
log_entry['details']['skip_reason'] = skip_reason
|
|
511
|
-
append_sync_log(basename, language, log_entry)
|
|
1049
|
+
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
1050
|
+
log_sync_event(basename, language, "budget_warning", {
|
|
1051
|
+
"remaining": budget_remaining,
|
|
1052
|
+
"percentage": (budget_remaining / budget) * 100
|
|
1053
|
+
})
|
|
1054
|
+
|
|
1055
|
+
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
|
|
1056
|
+
operation = decision.operation
|
|
512
1057
|
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
516
|
-
)
|
|
517
|
-
save_run_report(asdict(report_data), basename, language)
|
|
518
|
-
_save_operation_fingerprint(basename, language, 'verify', pdd_files, 0.0, skip_reason)
|
|
519
|
-
continue
|
|
520
|
-
if operation == 'test' and skip_tests:
|
|
521
|
-
skipped_operations.append('test')
|
|
1058
|
+
log_entry = create_sync_log_entry(decision, budget_remaining)
|
|
1059
|
+
operation_history.append(operation)
|
|
522
1060
|
|
|
523
|
-
#
|
|
524
|
-
|
|
525
|
-
'
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
1061
|
+
# Cycle detection logic
|
|
1062
|
+
if len(operation_history) >= 3:
|
|
1063
|
+
recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
|
|
1064
|
+
if len(recent_auto_deps) >= 2:
|
|
1065
|
+
errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
|
|
1066
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
|
|
1067
|
+
operation = 'generate'
|
|
1068
|
+
decision.operation = 'generate' # Update decision too
|
|
1069
|
+
|
|
1070
|
+
# Bug #4 fix: Detect crash-verify cycle pattern
|
|
1071
|
+
# The pattern [crash, verify, crash, verify] or [verify, crash, verify, crash]
|
|
1072
|
+
# represents 2 iterations of the alternating cycle, so break immediately
|
|
1073
|
+
if len(operation_history) >= 4:
|
|
1074
|
+
recent_ops = operation_history[-4:]
|
|
1075
|
+
if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
|
|
1076
|
+
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
1077
|
+
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1078
|
+
errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1079
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
|
|
1080
|
+
break
|
|
1081
|
+
|
|
1082
|
+
# Bug #4 fix: Detect test-fix cycle pattern
|
|
1083
|
+
# The pattern [test, fix, test, fix] or [fix, test, fix, test]
|
|
1084
|
+
# represents 2 iterations of the alternating cycle, so break immediately
|
|
1085
|
+
if len(operation_history) >= 4:
|
|
1086
|
+
recent_ops = operation_history[-4:]
|
|
1087
|
+
if (recent_ops == ['test', 'fix', 'test', 'fix'] or
|
|
1088
|
+
recent_ops == ['fix', 'test', 'fix', 'test']):
|
|
1089
|
+
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1090
|
+
errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1091
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
|
|
1092
|
+
break
|
|
1093
|
+
|
|
1094
|
+
if operation == 'fix':
|
|
1095
|
+
consecutive_fixes = 0
|
|
1096
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1097
|
+
if operation_history[i] == 'fix':
|
|
1098
|
+
consecutive_fixes += 1
|
|
1099
|
+
else:
|
|
1100
|
+
break
|
|
1101
|
+
if consecutive_fixes >= 5:
|
|
1102
|
+
errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
|
|
1103
|
+
break
|
|
1104
|
+
|
|
1105
|
+
if operation == 'test':
|
|
1106
|
+
consecutive_tests = 0
|
|
1107
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1108
|
+
if operation_history[i] == 'test':
|
|
1109
|
+
consecutive_tests += 1
|
|
1110
|
+
else:
|
|
1111
|
+
break
|
|
1112
|
+
if consecutive_tests >= MAX_CONSECUTIVE_TESTS:
|
|
1113
|
+
errors.append(f"Detected {consecutive_tests} consecutive test operations. Breaking infinite test loop.")
|
|
1114
|
+
break
|
|
1115
|
+
|
|
1116
|
+
# Bug #157 fix: Prevent infinite crash retry loops
|
|
1117
|
+
if operation == 'crash':
|
|
1118
|
+
consecutive_crashes = 0
|
|
1119
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1120
|
+
if operation_history[i] == 'crash':
|
|
1121
|
+
consecutive_crashes += 1
|
|
1122
|
+
else:
|
|
1123
|
+
break
|
|
1124
|
+
if consecutive_crashes >= MAX_CONSECUTIVE_CRASHES:
|
|
1125
|
+
errors.append(f"Detected {consecutive_crashes} consecutive crash operations. Breaking infinite crash loop.")
|
|
1126
|
+
break
|
|
1127
|
+
|
|
1128
|
+
if operation == 'test_extend':
|
|
1129
|
+
# Count test_extend attempts to prevent infinite loop
|
|
1130
|
+
extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
|
|
1131
|
+
if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
|
|
1132
|
+
# Accept current coverage after max attempts
|
|
1133
|
+
log_sync_event(basename, language, "test_extend_limit", {
|
|
1134
|
+
"attempts": extend_attempts,
|
|
1135
|
+
"max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
|
|
1136
|
+
"reason": "Accepting current coverage after max extend attempts"
|
|
1137
|
+
})
|
|
1138
|
+
success = True
|
|
1139
|
+
break
|
|
1140
|
+
|
|
1141
|
+
if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
|
|
1142
|
+
current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
|
|
1143
|
+
success = operation in ['all_synced', 'nothing']
|
|
1144
|
+
error_msg = None
|
|
1145
|
+
if operation == 'fail_and_request_manual_merge':
|
|
1146
|
+
errors.append(f"Manual merge required: {decision.reason}")
|
|
1147
|
+
error_msg = decision.reason
|
|
1148
|
+
elif operation == 'error':
|
|
1149
|
+
errors.append(f"Error determining operation: {decision.reason}")
|
|
1150
|
+
error_msg = decision.reason
|
|
1151
|
+
elif operation == 'analyze_conflict':
|
|
1152
|
+
errors.append(f"Conflict detected: {decision.reason}")
|
|
1153
|
+
error_msg = decision.reason
|
|
1154
|
+
|
|
1155
|
+
update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
|
|
1156
|
+
append_sync_log(basename, language, log_entry)
|
|
1157
|
+
break
|
|
532
1158
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
1159
|
+
# Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
|
|
1160
|
+
# Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
|
|
1161
|
+
if operation == 'verify' and (skip_verify or skip_tests):
|
|
1162
|
+
skipped_operations.append('verify')
|
|
1163
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1164
|
+
append_sync_log(basename, language, log_entry)
|
|
1165
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1166
|
+
_save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
|
|
1167
|
+
continue
|
|
1168
|
+
if operation == 'test' and skip_tests:
|
|
1169
|
+
skipped_operations.append('test')
|
|
1170
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1171
|
+
append_sync_log(basename, language, log_entry)
|
|
1172
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1173
|
+
_save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
|
|
1174
|
+
continue
|
|
1175
|
+
if operation == 'crash' and (skip_tests or skip_verify):
|
|
1176
|
+
skipped_operations.append('crash')
|
|
1177
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1178
|
+
append_sync_log(basename, language, log_entry)
|
|
1179
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1180
|
+
_save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
|
|
1181
|
+
# FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
|
|
1182
|
+
# Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
|
|
1183
|
+
current_hashes = calculate_current_hashes(pdd_files)
|
|
1184
|
+
synthetic_report = RunReport(
|
|
1185
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1186
|
+
exit_code=0, # Assume success since we're skipping validation
|
|
1187
|
+
tests_passed=0,
|
|
1188
|
+
tests_failed=0,
|
|
1189
|
+
coverage=0.0,
|
|
1190
|
+
test_hash=current_hashes.get('test_hash')
|
|
1191
|
+
)
|
|
1192
|
+
save_run_report(asdict(synthetic_report), basename, language)
|
|
1193
|
+
continue
|
|
1194
|
+
|
|
1195
|
+
current_function_name_ref[0] = operation
|
|
1196
|
+
ctx = _create_mock_context(
|
|
1197
|
+
force=force, strength=strength, temperature=temperature, time=time_param,
|
|
1198
|
+
verbose=verbose, quiet=quiet, output_cost=output_cost,
|
|
1199
|
+
review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
|
|
1200
|
+
max_attempts=max_attempts, target_coverage=target_coverage,
|
|
1201
|
+
confirm_callback=get_confirm_callback(),
|
|
1202
|
+
context=context_override
|
|
536
1203
|
)
|
|
537
|
-
save_run_report(asdict(report_data), basename, language)
|
|
538
|
-
_save_operation_fingerprint(basename, language, 'test', pdd_files, 0.0, 'skipped')
|
|
539
|
-
continue
|
|
540
|
-
if operation == 'crash' and skip_tests:
|
|
541
|
-
# Skip crash operations when tests are skipped since crash fixes usually require test execution
|
|
542
|
-
skipped_operations.append('crash')
|
|
543
1204
|
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
'cost': 0.0,
|
|
548
|
-
'model': 'skipped',
|
|
549
|
-
'error': None
|
|
550
|
-
}, 0.0)
|
|
551
|
-
log_entry['details']['skip_reason'] = 'skip_tests'
|
|
552
|
-
append_sync_log(basename, language, log_entry)
|
|
553
|
-
|
|
554
|
-
# Create a dummy run report indicating crash was skipped
|
|
555
|
-
report_data = RunReport(
|
|
556
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
557
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
558
|
-
)
|
|
559
|
-
save_run_report(asdict(report_data), basename, language)
|
|
560
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped')
|
|
561
|
-
continue
|
|
562
|
-
|
|
563
|
-
current_function_name_ref[0] = operation
|
|
564
|
-
ctx = _create_mock_context(
|
|
565
|
-
force=force, strength=strength, temperature=temperature, time=time_param,
|
|
566
|
-
verbose=verbose, quiet=quiet, output_cost=output_cost,
|
|
567
|
-
review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
|
|
568
|
-
max_attempts=max_attempts, target_coverage=target_coverage
|
|
569
|
-
)
|
|
570
|
-
|
|
571
|
-
result = {}
|
|
572
|
-
success = False
|
|
573
|
-
start_time = time.time() # Track execution time
|
|
1205
|
+
result = {}
|
|
1206
|
+
success = False
|
|
1207
|
+
op_start_time = time.time()
|
|
574
1208
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
#
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
output=str(pdd_files['example'])
|
|
620
|
-
)
|
|
621
|
-
elif operation == 'crash':
|
|
622
|
-
# Validate required files exist before attempting crash operation
|
|
623
|
-
required_files = [pdd_files['code'], pdd_files['example']]
|
|
624
|
-
missing_files = [f for f in required_files if not f.exists()]
|
|
625
|
-
|
|
626
|
-
if missing_files:
|
|
627
|
-
# Skip crash operation if required files are missing
|
|
628
|
-
print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
|
|
629
|
-
skipped_operations.append('crash')
|
|
630
|
-
|
|
631
|
-
# Update log entry for skipped operation
|
|
632
|
-
update_sync_log_entry(log_entry, {
|
|
633
|
-
'success': True,
|
|
634
|
-
'cost': 0.0,
|
|
635
|
-
'model': 'skipped',
|
|
636
|
-
'error': None
|
|
637
|
-
}, 0.0)
|
|
638
|
-
log_entry['details']['skip_reason'] = 'missing_files'
|
|
639
|
-
log_entry['details']['missing_files'] = [f.name for f in missing_files]
|
|
640
|
-
append_sync_log(basename, language, log_entry)
|
|
1209
|
+
# Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
|
|
1210
|
+
with AtomicStateUpdate(basename, language) as atomic_state:
|
|
1211
|
+
|
|
1212
|
+
# --- Execute Operation ---
|
|
1213
|
+
try:
|
|
1214
|
+
if operation == 'auto-deps':
|
|
1215
|
+
temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
|
|
1216
|
+
original_content = pdd_files['prompt'].read_text(encoding='utf-8')
|
|
1217
|
+
result = auto_deps_main(
|
|
1218
|
+
ctx,
|
|
1219
|
+
prompt_file=str(pdd_files['prompt']),
|
|
1220
|
+
directory_path=examples_dir,
|
|
1221
|
+
auto_deps_csv_path="project_dependencies.csv",
|
|
1222
|
+
output=temp_output,
|
|
1223
|
+
force_scan=False,
|
|
1224
|
+
progress_callback=progress_callback_ref[0]
|
|
1225
|
+
)
|
|
1226
|
+
if Path(temp_output).exists():
|
|
1227
|
+
import shutil
|
|
1228
|
+
new_content = Path(temp_output).read_text(encoding='utf-8')
|
|
1229
|
+
if new_content != original_content:
|
|
1230
|
+
shutil.move(temp_output, str(pdd_files['prompt']))
|
|
1231
|
+
else:
|
|
1232
|
+
Path(temp_output).unlink()
|
|
1233
|
+
result = (new_content, 0.0, 'no-changes')
|
|
1234
|
+
elif operation == 'generate':
|
|
1235
|
+
# Ensure code directory exists before generating
|
|
1236
|
+
pdd_files['code'].parent.mkdir(parents=True, exist_ok=True)
|
|
1237
|
+
# Use absolute paths to avoid path_resolution_mode mismatch between sync (cwd) and generate (config_base)
|
|
1238
|
+
result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt'].resolve()), output=str(pdd_files['code'].resolve()), original_prompt_file_path=None, force_incremental_flag=False)
|
|
1239
|
+
# Clear stale run_report so crash/verify is required for newly generated code
|
|
1240
|
+
run_report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
|
|
1241
|
+
run_report_file.unlink(missing_ok=True)
|
|
1242
|
+
elif operation == 'example':
|
|
1243
|
+
# Ensure example directory exists before generating
|
|
1244
|
+
pdd_files['example'].parent.mkdir(parents=True, exist_ok=True)
|
|
1245
|
+
# Use absolute paths to avoid path_resolution_mode mismatch between sync (cwd) and example (config_base)
|
|
1246
|
+
result = context_generator_main(ctx, prompt_file=str(pdd_files['prompt'].resolve()), code_file=str(pdd_files['code'].resolve()), output=str(pdd_files['example'].resolve()))
|
|
1247
|
+
elif operation == 'crash':
|
|
1248
|
+
required_files = [pdd_files['code'], pdd_files['example']]
|
|
1249
|
+
missing_files = [f for f in required_files if not f.exists()]
|
|
1250
|
+
if missing_files:
|
|
1251
|
+
skipped_operations.append('crash')
|
|
1252
|
+
continue
|
|
641
1253
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
646
|
-
)
|
|
647
|
-
save_run_report(asdict(report_data), basename, language)
|
|
648
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
|
|
649
|
-
continue
|
|
650
|
-
else:
|
|
651
|
-
# Check if we have a run report indicating failures that need crash fixing
|
|
652
|
-
current_run_report = read_run_report(basename, language)
|
|
653
|
-
crash_log_content = ""
|
|
1254
|
+
# Crash handling logic (simplified copy from original)
|
|
1255
|
+
current_run_report = read_run_report(basename, language)
|
|
1256
|
+
crash_log_content = ""
|
|
654
1257
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
env
|
|
668
|
-
|
|
1258
|
+
# Check for crash condition (either run report says so, or we check manually)
|
|
1259
|
+
has_crash = False
|
|
1260
|
+
if current_run_report and current_run_report.exit_code != 0:
|
|
1261
|
+
has_crash = True
|
|
1262
|
+
crash_log_content = f"Test execution failed exit code: {current_run_report.exit_code}\n"
|
|
1263
|
+
else:
|
|
1264
|
+
# Manual check - run the example to see if it crashes
|
|
1265
|
+
env = os.environ.copy()
|
|
1266
|
+
src_dir = Path.cwd() / 'src'
|
|
1267
|
+
env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
|
|
1268
|
+
# Remove TUI-specific env vars that might contaminate subprocess
|
|
1269
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1270
|
+
env.pop(var, None)
|
|
1271
|
+
# Bug fix: Use sys.executable to match crash_main's Python interpreter
|
|
1272
|
+
# and do NOT set cwd - inherit from pdd invocation directory
|
|
1273
|
+
# to match crash_main behavior. Setting cwd to example's parent breaks imports.
|
|
1274
|
+
example_path = str(pdd_files['example'].resolve())
|
|
1275
|
+
cmd_parts = [sys.executable, example_path]
|
|
1276
|
+
# Use error-detection runner that handles server-style examples
|
|
1277
|
+
returncode, stdout, stderr = _run_example_with_error_detection(
|
|
1278
|
+
cmd_parts,
|
|
1279
|
+
env=env,
|
|
1280
|
+
timeout=60
|
|
669
1281
|
)
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
1282
|
+
|
|
1283
|
+
class ExampleResult:
|
|
1284
|
+
def __init__(self, rc, out, err):
|
|
1285
|
+
self.returncode = rc
|
|
1286
|
+
self.stdout = out
|
|
1287
|
+
self.stderr = err
|
|
1288
|
+
|
|
1289
|
+
ex_res = ExampleResult(returncode, stdout, stderr)
|
|
1290
|
+
if ex_res.returncode != 0:
|
|
1291
|
+
has_crash = True
|
|
1292
|
+
crash_log_content = f"Example failed exit code: {ex_res.returncode}\nSTDOUT:\n{ex_res.stdout}\nSTDERR:\n{ex_res.stderr}\n"
|
|
1293
|
+
if "SyntaxError" in ex_res.stderr:
|
|
1294
|
+
crash_log_content = "SYNTAX ERROR DETECTED:\n" + crash_log_content
|
|
681
1295
|
else:
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
update_sync_log_entry(log_entry, {
|
|
698
|
-
'success': True,
|
|
699
|
-
'cost': 0.0,
|
|
700
|
-
'model': 'skipped',
|
|
701
|
-
'error': None
|
|
702
|
-
}, time.time() - start_time)
|
|
703
|
-
log_entry['details']['skip_reason'] = 'no_crash'
|
|
704
|
-
append_sync_log(basename, language, log_entry)
|
|
705
|
-
|
|
706
|
-
report_data = RunReport(
|
|
707
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
708
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
709
|
-
)
|
|
710
|
-
save_run_report(asdict(report_data), basename, language)
|
|
711
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
|
|
712
|
-
continue
|
|
713
|
-
|
|
714
|
-
# Write actual error content or fallback
|
|
715
|
-
if not crash_log_content:
|
|
716
|
-
crash_log_content = "Unknown crash error - program failed but no error output captured"
|
|
717
|
-
|
|
718
|
-
Path("crash.log").write_text(crash_log_content)
|
|
719
|
-
|
|
720
|
-
try:
|
|
721
|
-
result = crash_main(
|
|
722
|
-
ctx,
|
|
723
|
-
prompt_file=str(pdd_files['prompt']),
|
|
724
|
-
code_file=str(pdd_files['code']),
|
|
725
|
-
program_file=str(pdd_files['example']),
|
|
726
|
-
error_file="crash.log",
|
|
727
|
-
output=str(pdd_files['code']),
|
|
728
|
-
output_program=str(pdd_files['example']),
|
|
729
|
-
loop=True,
|
|
730
|
-
max_attempts=max_attempts,
|
|
731
|
-
budget=budget - current_cost_ref[0]
|
|
732
|
-
)
|
|
733
|
-
except (RuntimeError, Exception) as e:
|
|
734
|
-
error_str = str(e)
|
|
735
|
-
if ("LLM returned None" in error_str or
|
|
736
|
-
"LLM failed to analyze errors" in error_str):
|
|
737
|
-
# Skip crash operation for LLM failures
|
|
738
|
-
print(f"Skipping crash operation due to LLM error: {e}")
|
|
739
|
-
skipped_operations.append('crash')
|
|
740
|
-
|
|
741
|
-
# Update log entry for skipped operation
|
|
742
|
-
update_sync_log_entry(log_entry, {
|
|
743
|
-
'success': False,
|
|
744
|
-
'cost': 0.0,
|
|
745
|
-
'model': 'skipped',
|
|
746
|
-
'error': f"LLM error: {str(e)}"
|
|
747
|
-
}, time.time() - start_time)
|
|
748
|
-
log_entry['details']['skip_reason'] = 'llm_error'
|
|
749
|
-
append_sync_log(basename, language, log_entry)
|
|
1296
|
+
# No crash - save run report with exit_code=0 so sync_determine_operation
|
|
1297
|
+
# knows the example was tested and passed (prevents infinite loop)
|
|
1298
|
+
# Include test_hash for staleness detection
|
|
1299
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1300
|
+
report = RunReport(
|
|
1301
|
+
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1302
|
+
exit_code=0,
|
|
1303
|
+
tests_passed=1,
|
|
1304
|
+
tests_failed=0,
|
|
1305
|
+
coverage=0.0,
|
|
1306
|
+
test_hash=test_hash
|
|
1307
|
+
)
|
|
1308
|
+
save_run_report(asdict(report), basename, language)
|
|
1309
|
+
skipped_operations.append('crash')
|
|
1310
|
+
continue
|
|
750
1311
|
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1312
|
+
if has_crash:
|
|
1313
|
+
# Try auto-fix for common import errors before expensive agentic call
|
|
1314
|
+
auto_fixed, auto_fix_msg = _try_auto_fix_import_error(
|
|
1315
|
+
crash_log_content,
|
|
1316
|
+
pdd_files['code'],
|
|
1317
|
+
pdd_files['example']
|
|
754
1318
|
)
|
|
755
|
-
|
|
756
|
-
|
|
1319
|
+
if auto_fixed:
|
|
1320
|
+
log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
|
|
1321
|
+
# Retry running the example after auto-fix
|
|
1322
|
+
retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
|
|
1323
|
+
cmd_parts,
|
|
1324
|
+
env=env,
|
|
1325
|
+
timeout=60
|
|
1326
|
+
)
|
|
1327
|
+
if retry_returncode == 0:
|
|
1328
|
+
# Auto-fix worked! Save run report and continue
|
|
1329
|
+
log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
|
|
1330
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1331
|
+
report = RunReport(
|
|
1332
|
+
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1333
|
+
exit_code=0,
|
|
1334
|
+
tests_passed=1,
|
|
1335
|
+
tests_failed=0,
|
|
1336
|
+
coverage=0.0,
|
|
1337
|
+
test_hash=test_hash
|
|
1338
|
+
)
|
|
1339
|
+
save_run_report(asdict(report), basename, language)
|
|
1340
|
+
result = (True, 0.0, 'auto-fix')
|
|
1341
|
+
success = True
|
|
1342
|
+
actual_cost = 0.0
|
|
1343
|
+
model_name = 'auto-fix'
|
|
1344
|
+
# Update crash_log_content for logging
|
|
1345
|
+
crash_log_content = f"Auto-fixed: {auto_fix_msg}"
|
|
1346
|
+
continue # Skip crash_main, move to next operation
|
|
1347
|
+
else:
|
|
1348
|
+
# Auto-fix didn't fully work, update error log and proceed
|
|
1349
|
+
crash_log_content = f"Auto-fix attempted ({auto_fix_msg}) but still failing:\nRETRY STDOUT:\n{retry_stdout}\nRETRY STDERR:\n{retry_stderr}\n"
|
|
1350
|
+
|
|
1351
|
+
Path("crash.log").write_text(crash_log_content)
|
|
1352
|
+
try:
|
|
1353
|
+
# For non-Python languages, set max_attempts=0 to skip iterative loop
|
|
1354
|
+
# and go directly to agentic fallback
|
|
1355
|
+
effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
|
|
1356
|
+
result = crash_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), error_file="crash.log", output=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1357
|
+
except Exception as e:
|
|
1358
|
+
print(f"Crash fix failed: {e}")
|
|
1359
|
+
skipped_operations.append('crash')
|
|
1360
|
+
continue
|
|
1361
|
+
|
|
1362
|
+
elif operation == 'verify':
|
|
1363
|
+
if not pdd_files['example'].exists():
|
|
1364
|
+
skipped_operations.append('verify')
|
|
757
1365
|
continue
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
output_program=str(pdd_files['example']),
|
|
770
|
-
loop=True,
|
|
771
|
-
verification_program=str(pdd_files['example']),
|
|
772
|
-
max_attempts=max_attempts,
|
|
773
|
-
budget=budget - current_cost_ref[0]
|
|
774
|
-
)
|
|
775
|
-
elif operation == 'test':
|
|
776
|
-
# First, generate the test file
|
|
777
|
-
result = cmd_test_main(
|
|
778
|
-
ctx,
|
|
779
|
-
prompt_file=str(pdd_files['prompt']),
|
|
780
|
-
code_file=str(pdd_files['code']),
|
|
781
|
-
output=str(pdd_files['test']),
|
|
782
|
-
language=language,
|
|
783
|
-
coverage_report=None,
|
|
784
|
-
existing_tests=None,
|
|
785
|
-
target_coverage=target_coverage,
|
|
786
|
-
merge=False
|
|
787
|
-
)
|
|
788
|
-
|
|
789
|
-
# After successful test generation, execute the tests and create run report
|
|
790
|
-
# This enables the next sync iteration to detect test failures and trigger fix
|
|
791
|
-
if isinstance(result, dict) and result.get('success', False):
|
|
792
|
-
try:
|
|
793
|
-
test_file = pdd_files['test']
|
|
794
|
-
if test_file.exists():
|
|
1366
|
+
# For non-Python languages, set max_attempts=0 to skip iterative loop
|
|
1367
|
+
# and go directly to agentic fallback
|
|
1368
|
+
effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
|
|
1369
|
+
result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1370
|
+
elif operation == 'test':
|
|
1371
|
+
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1372
|
+
# Use merge=True when test file exists to preserve fixes and append new tests
|
|
1373
|
+
# instead of regenerating from scratch (which would overwrite fixes)
|
|
1374
|
+
test_file_exists = pdd_files['test'].exists()
|
|
1375
|
+
result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=[str(pdd_files['test'])] if test_file_exists else None, target_coverage=target_coverage, merge=test_file_exists, strength=strength, temperature=temperature)
|
|
1376
|
+
if pdd_files['test'].exists():
|
|
795
1377
|
_execute_tests_and_create_run_report(
|
|
796
|
-
|
|
1378
|
+
pdd_files['test'],
|
|
1379
|
+
basename,
|
|
1380
|
+
language,
|
|
1381
|
+
target_coverage,
|
|
1382
|
+
code_file=pdd_files.get("code"),
|
|
1383
|
+
atomic_state=atomic_state,
|
|
1384
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
1385
|
+
)
|
|
1386
|
+
elif operation == 'test_extend':
|
|
1387
|
+
# Extend existing tests to improve coverage
|
|
1388
|
+
# Uses existing_tests and merge=True to add more test cases
|
|
1389
|
+
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1390
|
+
if pdd_files['test'].exists():
|
|
1391
|
+
existing_test_path = str(pdd_files['test'])
|
|
1392
|
+
result = cmd_test_main(
|
|
1393
|
+
ctx,
|
|
1394
|
+
prompt_file=str(pdd_files['prompt']),
|
|
1395
|
+
code_file=str(pdd_files['code']),
|
|
1396
|
+
output=str(pdd_files['test']),
|
|
1397
|
+
language=language,
|
|
1398
|
+
coverage_report=None,
|
|
1399
|
+
existing_tests=[existing_test_path],
|
|
1400
|
+
target_coverage=target_coverage,
|
|
1401
|
+
merge=True,
|
|
1402
|
+
strength=strength,
|
|
1403
|
+
temperature=temperature
|
|
797
1404
|
)
|
|
798
|
-
except Exception as e:
|
|
799
|
-
# Don't fail the entire operation if test execution fails
|
|
800
|
-
# Just log it - the test file generation was successful
|
|
801
|
-
print(f"Warning: Test execution failed: {e}")
|
|
802
|
-
elif isinstance(result, tuple) and len(result) >= 3:
|
|
803
|
-
# Handle tuple return format - assume success and execute tests
|
|
804
|
-
try:
|
|
805
|
-
test_file = pdd_files['test']
|
|
806
|
-
if test_file.exists():
|
|
807
1405
|
_execute_tests_and_create_run_report(
|
|
808
|
-
|
|
1406
|
+
pdd_files['test'],
|
|
1407
|
+
basename,
|
|
1408
|
+
language,
|
|
1409
|
+
target_coverage,
|
|
1410
|
+
code_file=pdd_files.get("code"),
|
|
1411
|
+
atomic_state=atomic_state,
|
|
1412
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
809
1413
|
)
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
1414
|
+
else:
|
|
1415
|
+
# No existing test file, fall back to regular test generation
|
|
1416
|
+
result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=None, target_coverage=target_coverage, merge=False, strength=strength, temperature=temperature)
|
|
1417
|
+
if pdd_files['test'].exists():
|
|
1418
|
+
_execute_tests_and_create_run_report(
|
|
1419
|
+
pdd_files['test'],
|
|
1420
|
+
basename,
|
|
1421
|
+
language,
|
|
1422
|
+
target_coverage,
|
|
1423
|
+
code_file=pdd_files.get("code"),
|
|
1424
|
+
atomic_state=atomic_state,
|
|
1425
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
1426
|
+
)
|
|
1427
|
+
elif operation == 'fix':
|
|
1428
|
+
error_file_path = Path("fix_errors.log")
|
|
1429
|
+
# Capture errors using language-appropriate test command
|
|
1430
|
+
try:
|
|
1431
|
+
from .get_test_command import get_test_command_for_file
|
|
1432
|
+
test_cmd = get_test_command_for_file(str(pdd_files['test']), language)
|
|
1433
|
+
|
|
1434
|
+
# Use clean env without TUI-specific vars
|
|
1435
|
+
clean_env = os.environ.copy()
|
|
1436
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1437
|
+
clean_env.pop(var, None)
|
|
1438
|
+
|
|
1439
|
+
if test_cmd:
|
|
1440
|
+
# Run language-appropriate test command
|
|
1441
|
+
if language.lower() == 'python':
|
|
1442
|
+
# Use pytest directly for Python
|
|
1443
|
+
python_executable = detect_host_python_executable()
|
|
1444
|
+
# Bug #156: Run pytest on ALL matching test files
|
|
1445
|
+
test_files = pdd_files.get('test_files', [pdd_files['test']])
|
|
1446
|
+
pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
|
|
1447
|
+
# Bug fix: Run from project root (no cwd), matching _run_tests_and_report pattern
|
|
1448
|
+
# Using cwd=test.parent with paths like 'backend/tests/test_foo.py' causes
|
|
1449
|
+
# pytest to look for 'backend/tests/backend/tests/test_foo.py' (not found)
|
|
1450
|
+
test_result = subprocess.run(
|
|
1451
|
+
pytest_args,
|
|
1452
|
+
capture_output=True, text=True, timeout=300,
|
|
1453
|
+
stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
|
|
1454
|
+
)
|
|
1455
|
+
else:
|
|
1456
|
+
# Use shell command for non-Python
|
|
1457
|
+
test_result = subprocess.run(
|
|
1458
|
+
test_cmd,
|
|
1459
|
+
shell=True,
|
|
1460
|
+
capture_output=True, text=True, timeout=300,
|
|
1461
|
+
stdin=subprocess.DEVNULL, env=clean_env,
|
|
1462
|
+
cwd=str(pdd_files['test'].parent),
|
|
1463
|
+
start_new_session=True
|
|
1464
|
+
)
|
|
1465
|
+
error_content = f"Test output:\n{test_result.stdout}\n{test_result.stderr}"
|
|
1466
|
+
else:
|
|
1467
|
+
# No test command available - trigger agentic fallback with context
|
|
1468
|
+
error_content = f"No test command available for {language}. Please run tests manually and provide error output."
|
|
1469
|
+
except Exception as e:
|
|
1470
|
+
error_content = f"Test execution error: {e}"
|
|
1471
|
+
error_file_path.write_text(error_content)
|
|
1472
|
+
|
|
1473
|
+
# Bug #156 fix: Parse pytest output to find actual failing files
|
|
1474
|
+
# and pass the correct file to fix_main
|
|
1475
|
+
failing_files = extract_failing_files_from_output(error_content)
|
|
1476
|
+
unit_test_file_for_fix = str(pdd_files['test']) # Default to tracked file
|
|
1477
|
+
|
|
1478
|
+
if failing_files:
|
|
1479
|
+
# Try to resolve the failing file paths
|
|
1480
|
+
test_dir = pdd_files['test'].parent
|
|
1481
|
+
tracked_file_name = pdd_files['test'].name
|
|
1482
|
+
|
|
1483
|
+
# Check if the tracked file is among the failures
|
|
1484
|
+
tracked_in_failures = any(
|
|
1485
|
+
Path(ff).name == tracked_file_name for ff in failing_files
|
|
1486
|
+
)
|
|
1487
|
+
|
|
1488
|
+
if not tracked_in_failures:
|
|
1489
|
+
# Failures are in a different file - use the first failing file
|
|
1490
|
+
for ff in failing_files:
|
|
1491
|
+
# Try to resolve the path relative to test directory
|
|
1492
|
+
ff_path = Path(ff)
|
|
1493
|
+
if ff_path.is_absolute() and ff_path.exists():
|
|
1494
|
+
unit_test_file_for_fix = str(ff_path)
|
|
1495
|
+
break
|
|
1496
|
+
else:
|
|
1497
|
+
# Try to find it in the test directory
|
|
1498
|
+
candidate = test_dir / ff_path.name
|
|
1499
|
+
if candidate.exists():
|
|
1500
|
+
unit_test_file_for_fix = str(candidate)
|
|
1501
|
+
break
|
|
1502
|
+
# Also try the path as-is relative to cwd
|
|
1503
|
+
if ff_path.exists():
|
|
1504
|
+
unit_test_file_for_fix = str(ff_path.resolve())
|
|
1505
|
+
break
|
|
1506
|
+
|
|
1507
|
+
# For non-Python languages, set max_attempts=0 to skip iterative loop
|
|
1508
|
+
# and go directly to agentic fallback
|
|
1509
|
+
effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
|
|
1510
|
+
result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
|
|
1511
|
+
elif operation == 'update':
|
|
1512
|
+
result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
|
|
1513
|
+
else:
|
|
1514
|
+
errors.append(f"Unknown operation {operation}")
|
|
1515
|
+
result = {'success': False}
|
|
1516
|
+
|
|
1517
|
+
# Result parsing
|
|
1518
|
+
if isinstance(result, dict):
|
|
1519
|
+
success = result.get('success', False)
|
|
1520
|
+
current_cost_ref[0] += result.get('cost', 0.0)
|
|
1521
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
1522
|
+
if operation == 'test': success = pdd_files['test'].exists()
|
|
1523
|
+
else: success = bool(result[0])
|
|
1524
|
+
cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
|
|
1525
|
+
current_cost_ref[0] += cost
|
|
832
1526
|
else:
|
|
833
|
-
|
|
1527
|
+
success = result is not None
|
|
1528
|
+
|
|
834
1529
|
except Exception as e:
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
error_file_path.write_text(error_content)
|
|
838
|
-
|
|
839
|
-
result = fix_main(
|
|
840
|
-
ctx,
|
|
841
|
-
prompt_file=str(pdd_files['prompt']),
|
|
842
|
-
code_file=str(pdd_files['code']),
|
|
843
|
-
unit_test_file=str(pdd_files['test']),
|
|
844
|
-
error_file=str(error_file_path),
|
|
845
|
-
output_test=str(pdd_files['test']),
|
|
846
|
-
output_code=str(pdd_files['code']),
|
|
847
|
-
output_results=f"{basename}_fix_results.log",
|
|
848
|
-
loop=True,
|
|
849
|
-
verification_program=str(pdd_files['example']),
|
|
850
|
-
max_attempts=max_attempts,
|
|
851
|
-
budget=budget - current_cost_ref[0],
|
|
852
|
-
auto_submit=True
|
|
853
|
-
)
|
|
854
|
-
elif operation == 'update':
|
|
855
|
-
result = update_main(
|
|
856
|
-
ctx,
|
|
857
|
-
input_prompt_file=str(pdd_files['prompt']),
|
|
858
|
-
modified_code_file=str(pdd_files['code']),
|
|
859
|
-
input_code_file=None,
|
|
860
|
-
output=str(pdd_files['prompt']),
|
|
861
|
-
git=True
|
|
862
|
-
)
|
|
863
|
-
else:
|
|
864
|
-
errors.append(f"Unknown operation '{operation}' requested.")
|
|
865
|
-
result = {'success': False, 'cost': 0.0}
|
|
1530
|
+
errors.append(f"Exception during '{operation}': {e}")
|
|
1531
|
+
success = False
|
|
866
1532
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
# Unknown return format
|
|
882
|
-
success = result is not None
|
|
883
|
-
current_cost_ref[0] += 0.0
|
|
884
|
-
|
|
885
|
-
except Exception as e:
|
|
886
|
-
errors.append(f"Exception during '{operation}': {e}")
|
|
887
|
-
success = False
|
|
1533
|
+
# Log update
|
|
1534
|
+
duration = time.time() - op_start_time
|
|
1535
|
+
actual_cost = 0.0
|
|
1536
|
+
model_name = "unknown"
|
|
1537
|
+
if success:
|
|
1538
|
+
if isinstance(result, dict):
|
|
1539
|
+
actual_cost = result.get('cost', 0.0)
|
|
1540
|
+
model_name = result.get('model', 'unknown')
|
|
1541
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
1542
|
+
actual_cost = result[-2] if len(result) >= 2 else 0.0
|
|
1543
|
+
model_name = result[-1] if len(result) >= 1 else 'unknown'
|
|
1544
|
+
last_model_name = str(model_name)
|
|
1545
|
+
operations_completed.append(operation)
|
|
1546
|
+
_save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
|
|
888
1547
|
|
|
889
|
-
|
|
890
|
-
|
|
1548
|
+
update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
|
|
1549
|
+
append_sync_log(basename, language, log_entry)
|
|
891
1550
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
|
|
924
|
-
model = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else ''
|
|
925
|
-
else:
|
|
926
|
-
cost = 0.0
|
|
927
|
-
model = ''
|
|
928
|
-
_save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
|
|
929
|
-
|
|
930
|
-
# After successful crash operation, re-run the example to generate fresh run report
|
|
931
|
-
if operation == 'crash':
|
|
932
|
-
try:
|
|
933
|
-
example_file = pdd_files['example']
|
|
934
|
-
if example_file.exists():
|
|
935
|
-
# Run the example program to check if crash is actually fixed
|
|
936
|
-
try:
|
|
937
|
-
example_result = subprocess.run(
|
|
938
|
-
['python', str(example_file)],
|
|
939
|
-
capture_output=True,
|
|
940
|
-
text=True,
|
|
941
|
-
timeout=60,
|
|
942
|
-
env=os.environ.copy(),
|
|
943
|
-
cwd=str(example_file.parent)
|
|
944
|
-
)
|
|
945
|
-
|
|
946
|
-
# Create fresh run report based on actual execution
|
|
947
|
-
report_data = RunReport(
|
|
948
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
949
|
-
exit_code=example_result.returncode,
|
|
950
|
-
tests_passed=1 if example_result.returncode == 0 else 0,
|
|
951
|
-
tests_failed=0 if example_result.returncode == 0 else 1,
|
|
952
|
-
coverage=100.0 if example_result.returncode == 0 else 0.0
|
|
953
|
-
)
|
|
954
|
-
save_run_report(asdict(report_data), basename, language)
|
|
955
|
-
print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
|
|
956
|
-
|
|
957
|
-
except subprocess.TimeoutExpired:
|
|
958
|
-
# Example timed out - still considered a failure
|
|
959
|
-
report_data = RunReport(
|
|
960
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
961
|
-
exit_code=124, # Standard timeout exit code
|
|
962
|
-
tests_passed=0, tests_failed=1, coverage=0.0
|
|
963
|
-
)
|
|
964
|
-
save_run_report(asdict(report_data), basename, language)
|
|
965
|
-
print("Example timed out after crash fix - created failure run report")
|
|
966
|
-
|
|
967
|
-
except Exception as e:
|
|
968
|
-
# Don't fail the entire operation if example re-execution fails
|
|
969
|
-
print(f"Warning: Post-crash example re-execution failed: {e}")
|
|
1551
|
+
# Post-operation checks (simplified)
|
|
1552
|
+
if success and operation == 'crash':
|
|
1553
|
+
# Re-run example to verify crash fix worked
|
|
1554
|
+
try:
|
|
1555
|
+
# Use clean env without TUI-specific vars
|
|
1556
|
+
clean_env = os.environ.copy()
|
|
1557
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1558
|
+
clean_env.pop(var, None)
|
|
1559
|
+
# Bug fix: Use sys.executable to ensure same Python interpreter as
|
|
1560
|
+
# crash_main (fix_code_loop.py:477). When both venv and conda are
|
|
1561
|
+
# active, PATH lookup for 'python' may resolve to a different
|
|
1562
|
+
# interpreter, causing infinite crash loops.
|
|
1563
|
+
# Bug fix: Do NOT set cwd - inherit from pdd invocation directory
|
|
1564
|
+
# to match crash_main behavior. Setting cwd to example's parent breaks imports.
|
|
1565
|
+
example_path = str(pdd_files['example'].resolve())
|
|
1566
|
+
cmd_parts = [sys.executable, example_path]
|
|
1567
|
+
# Use error-detection runner that handles server-style examples
|
|
1568
|
+
returncode, stdout, stderr = _run_example_with_error_detection(
|
|
1569
|
+
cmd_parts,
|
|
1570
|
+
env=clean_env,
|
|
1571
|
+
timeout=60
|
|
1572
|
+
)
|
|
1573
|
+
# Include test_hash for staleness detection
|
|
1574
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1575
|
+
report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
|
|
1576
|
+
save_run_report(asdict(report), basename, language)
|
|
1577
|
+
except Exception as e:
|
|
1578
|
+
# Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
|
|
1579
|
+
error_msg = f"Post-crash verification failed: {e}"
|
|
1580
|
+
errors.append(error_msg)
|
|
1581
|
+
log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
|
|
970
1582
|
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
if test_file.exists():
|
|
1583
|
+
if success and operation == 'fix':
|
|
1584
|
+
# Re-run tests to update run_report after successful fix
|
|
1585
|
+
# This prevents infinite loop by updating the state machine
|
|
1586
|
+
if pdd_files['test'].exists():
|
|
976
1587
|
_execute_tests_and_create_run_report(
|
|
977
|
-
|
|
1588
|
+
pdd_files['test'],
|
|
1589
|
+
basename,
|
|
1590
|
+
language,
|
|
1591
|
+
target_coverage,
|
|
1592
|
+
code_file=pdd_files.get("code"),
|
|
1593
|
+
atomic_state=atomic_state,
|
|
1594
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
978
1595
|
)
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
errors.append(f"Operation '{operation}' failed.")
|
|
984
|
-
break
|
|
1596
|
+
|
|
1597
|
+
if not success:
|
|
1598
|
+
errors.append(f"Operation '{operation}' failed.")
|
|
1599
|
+
break
|
|
985
1600
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
"total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
|
|
996
|
-
"total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
|
|
997
|
-
})
|
|
998
|
-
except Exception:
|
|
999
|
-
pass # Don't fail if logging fails
|
|
1601
|
+
except BaseException as e:
|
|
1602
|
+
errors.append(f"An unexpected error occurred in the orchestrator: {type(e).__name__}: {e}")
|
|
1603
|
+
# Log the full traceback for debugging
|
|
1604
|
+
import traceback
|
|
1605
|
+
traceback.print_exc()
|
|
1606
|
+
finally:
|
|
1607
|
+
try:
|
|
1608
|
+
log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
|
|
1609
|
+
except: pass
|
|
1000
1610
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1611
|
+
# Return result dict
|
|
1612
|
+
return {
|
|
1613
|
+
'success': not errors,
|
|
1614
|
+
'operations_completed': operations_completed,
|
|
1615
|
+
'skipped_operations': skipped_operations,
|
|
1616
|
+
'total_cost': current_cost_ref[0],
|
|
1617
|
+
'total_time': time.time() - start_time,
|
|
1618
|
+
'final_state': {p: {'exists': f.exists(), 'path': str(f)} for p, f in pdd_files.items() if p != 'test_files'},
|
|
1619
|
+
'errors': errors,
|
|
1620
|
+
'error': "; ".join(errors) if errors else None, # Add this line
|
|
1621
|
+
'model_name': last_model_name,
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
# Detect headless mode (no TTY, CI environment, or quiet mode)
|
|
1625
|
+
headless = quiet or not sys.stdout.isatty() or os.environ.get('CI')
|
|
1626
|
+
|
|
1627
|
+
if headless:
|
|
1628
|
+
# Set PDD_FORCE to also skip API key prompts in headless mode
|
|
1629
|
+
os.environ['PDD_FORCE'] = '1'
|
|
1630
|
+
# Run worker logic directly without TUI in headless mode
|
|
1631
|
+
if not quiet:
|
|
1632
|
+
print(f"Running sync in headless mode (CI/non-TTY environment)...")
|
|
1633
|
+
result = sync_worker_logic()
|
|
1634
|
+
# No TUI app, so no worker_exception to check
|
|
1635
|
+
worker_exception = None
|
|
1636
|
+
else:
|
|
1637
|
+
# Instantiate and run Textual App
|
|
1638
|
+
app = SyncApp(
|
|
1639
|
+
basename=basename,
|
|
1640
|
+
budget=budget,
|
|
1641
|
+
worker_func=sync_worker_logic,
|
|
1642
|
+
function_name_ref=current_function_name_ref,
|
|
1643
|
+
cost_ref=current_cost_ref,
|
|
1644
|
+
prompt_path_ref=prompt_path_ref,
|
|
1645
|
+
code_path_ref=code_path_ref,
|
|
1646
|
+
example_path_ref=example_path_ref,
|
|
1647
|
+
tests_path_ref=tests_path_ref,
|
|
1648
|
+
prompt_color_ref=prompt_box_color_ref,
|
|
1649
|
+
code_color_ref=code_box_color_ref,
|
|
1650
|
+
example_color_ref=example_box_color_ref,
|
|
1651
|
+
tests_color_ref=tests_box_color_ref,
|
|
1652
|
+
stop_event=stop_event,
|
|
1653
|
+
progress_callback_ref=progress_callback_ref
|
|
1654
|
+
)
|
|
1655
|
+
|
|
1656
|
+
# Store app reference so worker can access request_confirmation
|
|
1657
|
+
app_ref[0] = app
|
|
1658
|
+
|
|
1659
|
+
result = app.run()
|
|
1660
|
+
|
|
1661
|
+
# Show exit animation if not quiet
|
|
1662
|
+
from .sync_tui import show_exit_animation
|
|
1663
|
+
show_exit_animation()
|
|
1664
|
+
|
|
1665
|
+
worker_exception = app.worker_exception
|
|
1666
|
+
|
|
1667
|
+
# Check for worker exception that might have caused a crash (TUI mode only)
|
|
1668
|
+
if not headless and worker_exception:
|
|
1669
|
+
print(f"\n[Error] Worker thread crashed with exception: {worker_exception}", file=sys.stderr)
|
|
1670
|
+
|
|
1671
|
+
if hasattr(app, 'captured_logs') and app.captured_logs:
|
|
1672
|
+
print("\n[Captured Logs (last 20 lines)]", file=sys.stderr)
|
|
1673
|
+
for line in app.captured_logs[-20:]: # Print last 20 lines
|
|
1674
|
+
print(f" {line}", file=sys.stderr)
|
|
1675
|
+
|
|
1676
|
+
import traceback
|
|
1677
|
+
# Use trace module to print the stored exception's traceback if available
|
|
1678
|
+
if hasattr(worker_exception, '__traceback__'):
|
|
1679
|
+
traceback.print_exception(type(worker_exception), worker_exception, worker_exception.__traceback__, file=sys.stderr)
|
|
1680
|
+
|
|
1681
|
+
if result is None:
|
|
1682
|
+
return {
|
|
1683
|
+
"success": False,
|
|
1684
|
+
"total_cost": current_cost_ref[0],
|
|
1685
|
+
"model_name": "",
|
|
1686
|
+
"error": "Sync process interrupted or returned no result.",
|
|
1687
|
+
"operations_completed": [],
|
|
1688
|
+
"errors": ["App exited without result"]
|
|
1689
|
+
}
|
|
1011
1690
|
|
|
1012
|
-
return
|
|
1013
|
-
'success': not errors,
|
|
1014
|
-
'operations_completed': operations_completed,
|
|
1015
|
-
'skipped_operations': skipped_operations,
|
|
1016
|
-
'total_cost': current_cost_ref[0],
|
|
1017
|
-
'total_time': total_time,
|
|
1018
|
-
'final_state': final_state,
|
|
1019
|
-
'errors': errors,
|
|
1020
|
-
}
|
|
1691
|
+
return result
|
|
1021
1692
|
|
|
1022
1693
|
if __name__ == '__main__':
|
|
1023
|
-
# Example usage
|
|
1024
|
-
# This simulates running `pdd sync my_calculator` from the command line.
|
|
1025
|
-
|
|
1026
|
-
print("--- Running Basic Sync Orchestration Example ---")
|
|
1027
|
-
|
|
1028
|
-
# Setup a dummy project structure
|
|
1694
|
+
# Example usage
|
|
1029
1695
|
Path("./prompts").mkdir(exist_ok=True)
|
|
1030
1696
|
Path("./src").mkdir(exist_ok=True)
|
|
1031
1697
|
Path("./examples").mkdir(exist_ok=True)
|
|
1032
1698
|
Path("./tests").mkdir(exist_ok=True)
|
|
1033
1699
|
Path("./prompts/my_calculator_python.prompt").write_text("Create a calculator.")
|
|
1034
|
-
|
|
1035
|
-
# Ensure PDD meta directory exists for logs and locks
|
|
1036
1700
|
PDD_DIR.mkdir(exist_ok=True)
|
|
1037
1701
|
META_DIR.mkdir(exist_ok=True)
|
|
1038
|
-
|
|
1039
|
-
result = sync_orchestration(
|
|
1040
|
-
basename="my_calculator",
|
|
1041
|
-
language="python",
|
|
1042
|
-
quiet=True # Suppress mock command output for cleaner example run
|
|
1043
|
-
)
|
|
1044
|
-
|
|
1045
|
-
print("\n--- Sync Orchestration Finished ---")
|
|
1702
|
+
result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
|
|
1046
1703
|
print(json.dumps(result, indent=2))
|
|
1047
|
-
|
|
1048
|
-
if result['success']:
|
|
1049
|
-
print("\n✅ Sync completed successfully.")
|
|
1050
|
-
else:
|
|
1051
|
-
print(f"\n❌ Sync failed. Errors: {result['errors']}")
|
|
1052
|
-
|
|
1053
|
-
print("\n--- Running Sync Log Example ---")
|
|
1054
|
-
# This will now show the log from the run we just completed.
|
|
1055
|
-
log_result = sync_orchestration(
|
|
1056
|
-
basename="my_calculator",
|
|
1057
|
-
language="python",
|
|
1058
|
-
log=True
|
|
1059
|
-
)
|