pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +80 -19
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +281 -81
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -62
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +331 -77
- pdd/fix_error_loop.py +209 -60
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +319 -272
- pdd/fix_verification_main.py +57 -17
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +48 -9
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/increase_tests.py +7 -0
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1278 -110
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +98 -101
- pdd/prompts/change_LLM.prompt +1 -3
- pdd/prompts/detect_change_LLM.prompt +562 -3
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -2
- pdd/prompts/insert_includes_LLM.prompt +1181 -6
- pdd/prompts/split_LLM.prompt +1 -62
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/prompts/xml_convertor_LLM.prompt +3246 -7
- pdd/pytest_output.py +188 -21
- pdd/python_env_detector.py +151 -0
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +56 -7
- pdd/sync_determine_operation.py +918 -186
- pdd/sync_main.py +82 -32
- pdd/sync_orchestration.py +1456 -453
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.42.dist-info/RECORD +0 -115
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py
CHANGED
|
@@ -10,21 +10,34 @@ import json
|
|
|
10
10
|
import datetime
|
|
11
11
|
import subprocess
|
|
12
12
|
import re
|
|
13
|
+
import os
|
|
13
14
|
from pathlib import Path
|
|
14
|
-
from typing import Dict, Any, Optional, List
|
|
15
|
-
from dataclasses import asdict
|
|
15
|
+
from typing import Dict, Any, Optional, List, Callable
|
|
16
|
+
from dataclasses import asdict, dataclass, field
|
|
17
|
+
import tempfile
|
|
18
|
+
import sys
|
|
16
19
|
|
|
17
20
|
import click
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
# --- Constants ---
|
|
24
|
+
MAX_CONSECUTIVE_TESTS = 3 # Allow up to 3 consecutive test attempts
|
|
25
|
+
MAX_TEST_EXTEND_ATTEMPTS = 2 # Allow up to 2 attempts to extend tests for coverage
|
|
26
|
+
MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #157 fix)
|
|
18
27
|
|
|
19
28
|
# --- Real PDD Component Imports ---
|
|
20
|
-
from .
|
|
29
|
+
from .sync_tui import SyncApp
|
|
21
30
|
from .sync_determine_operation import (
|
|
22
31
|
sync_determine_operation,
|
|
23
32
|
get_pdd_file_paths,
|
|
24
33
|
RunReport,
|
|
34
|
+
SyncDecision,
|
|
25
35
|
PDD_DIR,
|
|
26
36
|
META_DIR,
|
|
27
37
|
SyncLock,
|
|
38
|
+
read_run_report,
|
|
39
|
+
calculate_sha256,
|
|
40
|
+
calculate_current_hashes,
|
|
28
41
|
)
|
|
29
42
|
from .auto_deps_main import auto_deps_main
|
|
30
43
|
from .code_generator_main import code_generator_main
|
|
@@ -34,6 +47,105 @@ from .fix_verification_main import fix_verification_main
|
|
|
34
47
|
from .cmd_test_main import cmd_test_main
|
|
35
48
|
from .fix_main import fix_main
|
|
36
49
|
from .update_main import update_main
|
|
50
|
+
from .python_env_detector import detect_host_python_executable
|
|
51
|
+
from .get_run_command import get_run_command_for_file
|
|
52
|
+
from .pytest_output import extract_failing_files_from_output
|
|
53
|
+
from . import DEFAULT_STRENGTH
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# --- Atomic State Update (Issue #159 Fix) ---
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class PendingStateUpdate:
|
|
60
|
+
"""Holds pending state updates for atomic commit."""
|
|
61
|
+
run_report: Optional[Dict[str, Any]] = None
|
|
62
|
+
fingerprint: Optional[Dict[str, Any]] = None
|
|
63
|
+
run_report_path: Optional[Path] = None
|
|
64
|
+
fingerprint_path: Optional[Path] = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AtomicStateUpdate:
|
|
68
|
+
"""
|
|
69
|
+
Context manager for atomic state updates.
|
|
70
|
+
|
|
71
|
+
Ensures run_report and fingerprint are both written or neither is written.
|
|
72
|
+
This fixes Issue #159 where non-atomic writes caused state desynchronization.
|
|
73
|
+
|
|
74
|
+
Usage:
|
|
75
|
+
with AtomicStateUpdate(basename, language) as state:
|
|
76
|
+
state.set_run_report(report_dict, report_path)
|
|
77
|
+
state.set_fingerprint(fingerprint_dict, fp_path)
|
|
78
|
+
# On successful exit, both files are written atomically
|
|
79
|
+
# On exception, neither file is written (rollback)
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, basename: str, language: str):
|
|
83
|
+
self.basename = basename
|
|
84
|
+
self.language = language
|
|
85
|
+
self.pending = PendingStateUpdate()
|
|
86
|
+
self._temp_files: List[str] = []
|
|
87
|
+
|
|
88
|
+
def __enter__(self):
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
92
|
+
if exc_type is None:
|
|
93
|
+
self._commit()
|
|
94
|
+
else:
|
|
95
|
+
self._rollback()
|
|
96
|
+
return False # Don't suppress exceptions
|
|
97
|
+
|
|
98
|
+
def set_run_report(self, report: Dict[str, Any], path: Path):
|
|
99
|
+
"""Buffer a run report for atomic write."""
|
|
100
|
+
self.pending.run_report = report
|
|
101
|
+
self.pending.run_report_path = path
|
|
102
|
+
|
|
103
|
+
def set_fingerprint(self, fingerprint: Dict[str, Any], path: Path):
|
|
104
|
+
"""Buffer a fingerprint for atomic write."""
|
|
105
|
+
self.pending.fingerprint = fingerprint
|
|
106
|
+
self.pending.fingerprint_path = path
|
|
107
|
+
|
|
108
|
+
def _atomic_write(self, data: Dict[str, Any], target_path: Path) -> None:
|
|
109
|
+
"""Write data to file atomically using temp file + rename pattern."""
|
|
110
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
|
|
112
|
+
# Write to temp file in same directory (required for atomic rename)
|
|
113
|
+
fd, temp_path = tempfile.mkstemp(
|
|
114
|
+
dir=target_path.parent,
|
|
115
|
+
prefix=f".{target_path.stem}_",
|
|
116
|
+
suffix=".tmp"
|
|
117
|
+
)
|
|
118
|
+
self._temp_files.append(temp_path)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
with os.fdopen(fd, 'w') as f:
|
|
122
|
+
json.dump(data, f, indent=2, default=str)
|
|
123
|
+
|
|
124
|
+
# Atomic rename - guaranteed atomic on POSIX systems
|
|
125
|
+
os.replace(temp_path, target_path)
|
|
126
|
+
self._temp_files.remove(temp_path) # Successfully moved, stop tracking
|
|
127
|
+
except Exception:
|
|
128
|
+
# Leave temp file for rollback to clean up
|
|
129
|
+
raise
|
|
130
|
+
|
|
131
|
+
def _commit(self):
|
|
132
|
+
"""Commit all pending state updates atomically."""
|
|
133
|
+
# Write fingerprint first (checkpoint), then run_report
|
|
134
|
+
if self.pending.fingerprint and self.pending.fingerprint_path:
|
|
135
|
+
self._atomic_write(self.pending.fingerprint, self.pending.fingerprint_path)
|
|
136
|
+
if self.pending.run_report and self.pending.run_report_path:
|
|
137
|
+
self._atomic_write(self.pending.run_report, self.pending.run_report_path)
|
|
138
|
+
|
|
139
|
+
def _rollback(self):
|
|
140
|
+
"""Clean up any temp files without committing changes."""
|
|
141
|
+
for temp_path in self._temp_files:
|
|
142
|
+
try:
|
|
143
|
+
if os.path.exists(temp_path):
|
|
144
|
+
os.unlink(temp_path)
|
|
145
|
+
except OSError:
|
|
146
|
+
pass # Best effort cleanup
|
|
147
|
+
self._temp_files.clear()
|
|
148
|
+
|
|
37
149
|
|
|
38
150
|
# --- Mock Helper Functions ---
|
|
39
151
|
|
|
@@ -48,105 +160,637 @@ def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
|
|
|
48
160
|
except Exception:
|
|
49
161
|
return []
|
|
50
162
|
|
|
51
|
-
def
|
|
52
|
-
"""
|
|
53
|
-
|
|
163
|
+
def create_sync_log_entry(decision, budget_remaining: float) -> Dict[str, Any]:
|
|
164
|
+
"""Create initial log entry from decision with all fields (actual results set to None initially)."""
|
|
165
|
+
return {
|
|
166
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
167
|
+
"operation": decision.operation,
|
|
168
|
+
"reason": decision.reason,
|
|
169
|
+
"decision_type": decision.details.get("decision_type", "heuristic") if decision.details else "heuristic",
|
|
170
|
+
"confidence": decision.confidence,
|
|
171
|
+
"estimated_cost": decision.estimated_cost,
|
|
172
|
+
"actual_cost": None,
|
|
173
|
+
"success": None,
|
|
174
|
+
"model": None,
|
|
175
|
+
"duration": None,
|
|
176
|
+
"error": None,
|
|
177
|
+
"details": {
|
|
178
|
+
**(decision.details if decision.details else {}),
|
|
179
|
+
"budget_remaining": budget_remaining
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duration: float) -> Dict[str, Any]:
|
|
184
|
+
"""Update log entry with execution results (actual_cost, success, model, duration, error)."""
|
|
185
|
+
entry.update({
|
|
186
|
+
"actual_cost": result.get("cost", 0.0),
|
|
187
|
+
"success": result.get("success", False),
|
|
188
|
+
"model": result.get("model", "unknown"),
|
|
189
|
+
"duration": duration,
|
|
190
|
+
"error": result.get("error") if not result.get("success") else None
|
|
191
|
+
})
|
|
192
|
+
return entry
|
|
193
|
+
|
|
194
|
+
def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
|
|
195
|
+
"""Append completed log entry to the sync log file."""
|
|
196
|
+
log_file = META_DIR / f"{basename}_{language}_sync.log"
|
|
54
197
|
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
55
|
-
with open(
|
|
56
|
-
json.
|
|
198
|
+
with open(log_file, 'a') as f:
|
|
199
|
+
f.write(json.dumps(entry) + '\n')
|
|
200
|
+
|
|
201
|
+
def log_sync_event(basename: str, language: str, event: str, details: Dict[str, Any] = None):
|
|
202
|
+
"""Log a special sync event (lock_acquired, budget_warning, etc.)."""
|
|
203
|
+
entry = {
|
|
204
|
+
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
205
|
+
"event": event,
|
|
206
|
+
"details": details or {}
|
|
207
|
+
}
|
|
208
|
+
append_sync_log(basename, language, entry)
|
|
209
|
+
|
|
210
|
+
def save_run_report(report: Dict[str, Any], basename: str, language: str,
|
|
211
|
+
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
212
|
+
"""Save a run report to the metadata directory.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
report: The run report dictionary to save.
|
|
216
|
+
basename: The module basename.
|
|
217
|
+
language: The programming language.
|
|
218
|
+
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
219
|
+
"""
|
|
220
|
+
report_file = META_DIR / f"{basename}_{language}_run.json"
|
|
221
|
+
if atomic_state:
|
|
222
|
+
# Buffer for atomic write
|
|
223
|
+
atomic_state.set_run_report(report, report_file)
|
|
224
|
+
else:
|
|
225
|
+
# Legacy direct write
|
|
226
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
227
|
+
with open(report_file, 'w') as f:
|
|
228
|
+
json.dump(report, f, indent=2, default=str)
|
|
229
|
+
|
|
230
|
+
def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
231
|
+
paths: Dict[str, Path], cost: float, model: str,
|
|
232
|
+
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
233
|
+
"""Save fingerprint state after successful operation.
|
|
57
234
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
235
|
+
Args:
|
|
236
|
+
basename: The module basename.
|
|
237
|
+
language: The programming language.
|
|
238
|
+
operation: The operation that was performed.
|
|
239
|
+
paths: Dictionary of PDD file paths.
|
|
240
|
+
cost: The cost of the operation.
|
|
241
|
+
model: The model used.
|
|
242
|
+
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
243
|
+
"""
|
|
61
244
|
from datetime import datetime, timezone
|
|
62
245
|
from .sync_determine_operation import calculate_current_hashes, Fingerprint
|
|
63
|
-
|
|
246
|
+
from . import __version__
|
|
247
|
+
|
|
64
248
|
current_hashes = calculate_current_hashes(paths)
|
|
65
249
|
fingerprint = Fingerprint(
|
|
66
|
-
pdd_version=
|
|
250
|
+
pdd_version=__version__,
|
|
67
251
|
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
68
252
|
command=operation,
|
|
69
253
|
prompt_hash=current_hashes.get('prompt_hash'),
|
|
70
254
|
code_hash=current_hashes.get('code_hash'),
|
|
71
255
|
example_hash=current_hashes.get('example_hash'),
|
|
72
|
-
test_hash=current_hashes.get('test_hash')
|
|
256
|
+
test_hash=current_hashes.get('test_hash'),
|
|
257
|
+
test_files=current_hashes.get('test_files'), # Bug #156
|
|
73
258
|
)
|
|
74
|
-
|
|
75
|
-
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
259
|
+
|
|
76
260
|
fingerprint_file = META_DIR / f"{basename}_{language}.json"
|
|
77
|
-
|
|
78
|
-
|
|
261
|
+
if atomic_state:
|
|
262
|
+
# Buffer for atomic write
|
|
263
|
+
atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
|
|
264
|
+
else:
|
|
265
|
+
# Legacy direct write
|
|
266
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
with open(fingerprint_file, 'w') as f:
|
|
268
|
+
json.dump(asdict(fingerprint), f, indent=2, default=str)
|
|
79
269
|
|
|
80
|
-
|
|
270
|
+
def _python_cov_target_for_code_file(code_file: Path) -> str:
|
|
271
|
+
"""Return a `pytest-cov` `--cov` target for a Python code file.
|
|
272
|
+
|
|
273
|
+
- If the file is inside a Python package (directories with `__init__.py`),
|
|
274
|
+
returns a dotted module path (e.g., `pdd.sync_orchestration`).
|
|
275
|
+
- Otherwise falls back to the filename stem (e.g., `admin_get_users`).
|
|
276
|
+
"""
|
|
277
|
+
if code_file.suffix != ".py":
|
|
278
|
+
return code_file.stem
|
|
279
|
+
|
|
280
|
+
package_dir: Optional[Path] = None
|
|
281
|
+
current = code_file.parent
|
|
282
|
+
while (current / "__init__.py").exists():
|
|
283
|
+
package_dir = current
|
|
284
|
+
parent = current.parent
|
|
285
|
+
if parent == current:
|
|
286
|
+
break
|
|
287
|
+
current = parent
|
|
288
|
+
|
|
289
|
+
if package_dir:
|
|
290
|
+
relative_module = code_file.relative_to(package_dir.parent).with_suffix("")
|
|
291
|
+
return str(relative_module).replace(os.sep, ".")
|
|
292
|
+
|
|
293
|
+
return code_file.stem
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _python_cov_target_for_test_and_code(test_file: Path, code_file: Path, fallback: str) -> str:
|
|
297
|
+
"""Choose the best `--cov` target based on how tests import the code.
|
|
298
|
+
|
|
299
|
+
In some repos, tests add a directory to `sys.path` and import modules by their
|
|
300
|
+
filename stem (e.g., `from admin_get_users import ...`) even when the code
|
|
301
|
+
also lives under a package (e.g., `backend.functions.admin_get_users`).
|
|
302
|
+
|
|
303
|
+
Heuristic:
|
|
304
|
+
- Prefer the code file stem when the test file imports it directly.
|
|
305
|
+
- Otherwise, prefer the dotted module path derived from the package layout.
|
|
306
|
+
- Fall back to the provided fallback (usually the basename).
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
def _imports_module(source: str, module: str) -> bool:
|
|
310
|
+
escaped = re.escape(module)
|
|
311
|
+
return bool(
|
|
312
|
+
re.search(rf"^\s*import\s+{escaped}\b", source, re.MULTILINE)
|
|
313
|
+
or re.search(rf"^\s*from\s+{escaped}\b", source, re.MULTILINE)
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
stem = code_file.stem
|
|
317
|
+
dotted = _python_cov_target_for_code_file(code_file)
|
|
81
318
|
|
|
82
|
-
def _execute_tests_and_create_run_report(test_file: Path, basename: str, language: str, target_coverage: float = 90.0) -> RunReport:
|
|
83
|
-
"""Execute tests and create a RunReport with actual results."""
|
|
84
|
-
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
85
|
-
|
|
86
319
|
try:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
320
|
+
test_source = test_file.read_text(encoding="utf-8", errors="ignore")
|
|
321
|
+
except Exception:
|
|
322
|
+
test_source = ""
|
|
323
|
+
|
|
324
|
+
if stem and _imports_module(test_source, stem):
|
|
325
|
+
return stem
|
|
326
|
+
|
|
327
|
+
if dotted and dotted != stem:
|
|
328
|
+
if _imports_module(test_source, dotted):
|
|
329
|
+
return dotted
|
|
330
|
+
|
|
331
|
+
if "." in dotted:
|
|
332
|
+
parent = dotted.rsplit(".", 1)[0]
|
|
333
|
+
# e.g. `from backend.functions import admin_get_users`
|
|
334
|
+
if re.search(
|
|
335
|
+
rf"^\s*from\s+{re.escape(parent)}\s+import\s+.*\b{re.escape(stem)}\b",
|
|
336
|
+
test_source,
|
|
337
|
+
re.MULTILINE,
|
|
338
|
+
):
|
|
339
|
+
return dotted
|
|
340
|
+
# e.g. `import backend.functions.admin_get_users`
|
|
341
|
+
if re.search(
|
|
342
|
+
rf"^\s*import\s+{re.escape(parent)}\.{re.escape(stem)}\b",
|
|
343
|
+
test_source,
|
|
344
|
+
re.MULTILINE,
|
|
345
|
+
):
|
|
346
|
+
return dotted
|
|
347
|
+
|
|
348
|
+
return dotted
|
|
349
|
+
|
|
350
|
+
return stem or fallback
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _parse_test_output(output: str, language: str) -> tuple[int, int, float]:
|
|
354
|
+
"""
|
|
355
|
+
Parse test output to extract passed/failed/coverage.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
output: Combined stdout/stderr from test runner
|
|
359
|
+
language: Language name (e.g., 'python', 'typescript', 'go')
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
(tests_passed, tests_failed, coverage)
|
|
363
|
+
"""
|
|
364
|
+
tests_passed = 0
|
|
365
|
+
tests_failed = 0
|
|
366
|
+
coverage = 0.0
|
|
367
|
+
|
|
368
|
+
lang = language.lower()
|
|
369
|
+
|
|
370
|
+
# Python (pytest)
|
|
371
|
+
if lang == 'python':
|
|
372
|
+
if 'passed' in output:
|
|
373
|
+
passed_match = re.search(r'(\d+) passed', output)
|
|
113
374
|
if passed_match:
|
|
114
375
|
tests_passed = int(passed_match.group(1))
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
failed_match = re.search(r'(\d+) failed', stdout)
|
|
376
|
+
if 'failed' in output:
|
|
377
|
+
failed_match = re.search(r'(\d+) failed', output)
|
|
118
378
|
if failed_match:
|
|
119
379
|
tests_failed = int(failed_match.group(1))
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
380
|
+
if 'error' in output:
|
|
381
|
+
error_match = re.search(r'(\d+) error', output)
|
|
382
|
+
if error_match:
|
|
383
|
+
tests_failed += int(error_match.group(1))
|
|
384
|
+
coverage_match = re.search(r'TOTAL.*?(\d+)%', output)
|
|
385
|
+
if not coverage_match:
|
|
386
|
+
coverage_match = re.search(r'(\d+)%\s*$', output, re.MULTILINE)
|
|
387
|
+
if not coverage_match:
|
|
388
|
+
coverage_match = re.search(r'(\d+(?:\.\d+)?)%', output)
|
|
123
389
|
if coverage_match:
|
|
124
390
|
coverage = float(coverage_match.group(1))
|
|
125
|
-
|
|
126
|
-
|
|
391
|
+
|
|
392
|
+
# Jest/Vitest (JavaScript/TypeScript)
|
|
393
|
+
elif lang in ('javascript', 'typescript', 'typescriptreact'):
|
|
394
|
+
# "Tests: X passed, Y failed" or "Tests: X passed, Y failed, Z total"
|
|
395
|
+
match = re.search(r'Tests:\s*(\d+)\s+passed', output)
|
|
396
|
+
if match:
|
|
397
|
+
tests_passed = int(match.group(1))
|
|
398
|
+
match = re.search(r'Tests:.*?(\d+)\s+failed', output)
|
|
399
|
+
if match:
|
|
400
|
+
tests_failed = int(match.group(1))
|
|
401
|
+
|
|
402
|
+
# Alternative Mocha-style: "X passing, Y failing"
|
|
403
|
+
if tests_passed == 0:
|
|
404
|
+
pass_match = re.search(r'(\d+)\s+pass(?:ing)?', output, re.I)
|
|
405
|
+
if pass_match:
|
|
406
|
+
tests_passed = int(pass_match.group(1))
|
|
407
|
+
if tests_failed == 0:
|
|
408
|
+
fail_match = re.search(r'(\d+)\s+fail(?:ing)?', output, re.I)
|
|
409
|
+
if fail_match:
|
|
410
|
+
tests_failed = int(fail_match.group(1))
|
|
411
|
+
|
|
412
|
+
# Coverage: "All files | XX.XX |"
|
|
413
|
+
cov_match = re.search(r'All files[^|]*\|\s*(\d+\.?\d*)', output)
|
|
414
|
+
if cov_match:
|
|
415
|
+
coverage = float(cov_match.group(1))
|
|
416
|
+
|
|
417
|
+
# Go
|
|
418
|
+
elif lang == 'go':
|
|
419
|
+
# Count PASS and FAIL occurrences for individual tests
|
|
420
|
+
tests_passed = len(re.findall(r'--- PASS:', output))
|
|
421
|
+
tests_failed = len(re.findall(r'--- FAIL:', output))
|
|
422
|
+
|
|
423
|
+
# Fallback: check for overall PASS/FAIL
|
|
424
|
+
if tests_passed == 0 and 'PASS' in output and 'FAIL' not in output:
|
|
425
|
+
tests_passed = 1
|
|
426
|
+
if tests_failed == 0 and 'FAIL' in output:
|
|
427
|
+
tests_failed = 1
|
|
428
|
+
|
|
429
|
+
# coverage: XX.X% of statements
|
|
430
|
+
cov_match = re.search(r'coverage:\s*(\d+\.?\d*)%', output)
|
|
431
|
+
if cov_match:
|
|
432
|
+
coverage = float(cov_match.group(1))
|
|
433
|
+
|
|
434
|
+
# Rust (cargo test)
|
|
435
|
+
elif lang == 'rust':
|
|
436
|
+
# "test result: ok. X passed; Y failed;"
|
|
437
|
+
match = re.search(r'(\d+)\s+passed', output)
|
|
438
|
+
if match:
|
|
439
|
+
tests_passed = int(match.group(1))
|
|
440
|
+
match = re.search(r'(\d+)\s+failed', output)
|
|
441
|
+
if match:
|
|
442
|
+
tests_failed = int(match.group(1))
|
|
443
|
+
|
|
444
|
+
# Fallback: try generic patterns
|
|
445
|
+
else:
|
|
446
|
+
pass_match = re.search(r'(\d+)\s+(?:tests?\s+)?pass(?:ed)?', output, re.I)
|
|
447
|
+
fail_match = re.search(r'(\d+)\s+(?:tests?\s+)?fail(?:ed)?', output, re.I)
|
|
448
|
+
if pass_match:
|
|
449
|
+
tests_passed = int(pass_match.group(1))
|
|
450
|
+
if fail_match:
|
|
451
|
+
tests_failed = int(fail_match.group(1))
|
|
452
|
+
|
|
453
|
+
return tests_passed, tests_failed, coverage
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _detect_example_errors(output: str) -> tuple[bool, str]:
|
|
457
|
+
"""
|
|
458
|
+
Detect if example output contains error indicators.
|
|
459
|
+
|
|
460
|
+
Only detects true crashes/errors:
|
|
461
|
+
- Python tracebacks (catches ALL unhandled exceptions)
|
|
462
|
+
- ERROR level log messages
|
|
463
|
+
|
|
464
|
+
Intentionally does NOT detect:
|
|
465
|
+
- HTTP status codes (examples may test error responses)
|
|
466
|
+
- Individual exception type names (causes false positives, redundant with traceback)
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
(has_errors, error_summary)
|
|
470
|
+
"""
|
|
471
|
+
error_patterns = [
|
|
472
|
+
(r'Traceback \(most recent call last\):', 'Python traceback'),
|
|
473
|
+
(r' - ERROR - ', 'Error log message'), # Python logging format
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
errors_found = []
|
|
477
|
+
for pattern, description in error_patterns:
|
|
478
|
+
if re.search(pattern, output, re.MULTILINE):
|
|
479
|
+
errors_found.append(description)
|
|
480
|
+
|
|
481
|
+
if errors_found:
|
|
482
|
+
return True, '; '.join(errors_found)
|
|
483
|
+
return False, ''
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _try_auto_fix_import_error(
|
|
487
|
+
error_output: str,
|
|
488
|
+
code_file: Path,
|
|
489
|
+
example_file: Path,
|
|
490
|
+
) -> tuple[bool, str]:
|
|
491
|
+
"""
|
|
492
|
+
Try to automatically fix common import errors before calling expensive agentic fix.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
(fixed, message): Whether a fix was attempted and what was done.
|
|
496
|
+
"""
|
|
497
|
+
import re
|
|
498
|
+
|
|
499
|
+
# Check for ModuleNotFoundError or ImportError
|
|
500
|
+
module_not_found = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", error_output)
|
|
501
|
+
import_error = re.search(r"ImportError: cannot import name ['\"]([^'\"]+)['\"]", error_output)
|
|
502
|
+
|
|
503
|
+
if not module_not_found and not import_error:
|
|
504
|
+
return False, "No import error detected"
|
|
505
|
+
|
|
506
|
+
if module_not_found:
|
|
507
|
+
missing_module = module_not_found.group(1)
|
|
508
|
+
# Split by . to get the top-level package
|
|
509
|
+
top_level_package = missing_module.split('.')[0]
|
|
510
|
+
|
|
511
|
+
# Check if this is the module we're trying to import (local module)
|
|
512
|
+
code_module_name = code_file.stem # e.g., "data_validator" from "data_validator.py"
|
|
513
|
+
|
|
514
|
+
if top_level_package == code_module_name:
|
|
515
|
+
# It's trying to import our own generated code - fix the example's sys.path
|
|
516
|
+
# Read the example and fix the path manipulation
|
|
517
|
+
try:
|
|
518
|
+
example_content = example_file.read_text(encoding='utf-8')
|
|
519
|
+
code_dir = str(code_file.parent.resolve())
|
|
520
|
+
|
|
521
|
+
# Look for existing sys.path manipulation
|
|
522
|
+
if 'sys.path' in example_content:
|
|
523
|
+
# Try to fix the existing path manipulation
|
|
524
|
+
# Common pattern: module_path = os.path.abspath(os.path.join(...))
|
|
525
|
+
# Replace with correct path
|
|
526
|
+
fixed_content = re.sub(
|
|
527
|
+
r"module_path\s*=\s*os\.path\.abspath\([^)]+\)",
|
|
528
|
+
f"module_path = '{code_dir}'",
|
|
529
|
+
example_content
|
|
530
|
+
)
|
|
531
|
+
if fixed_content != example_content:
|
|
532
|
+
example_file.write_text(fixed_content, encoding='utf-8')
|
|
533
|
+
return True, f"Fixed sys.path to point to {code_dir}"
|
|
534
|
+
|
|
535
|
+
# If no existing sys.path, add one at the start after imports
|
|
536
|
+
lines = example_content.split('\n')
|
|
537
|
+
insert_pos = 0
|
|
538
|
+
for i, line in enumerate(lines):
|
|
539
|
+
if line.startswith('import ') or line.startswith('from '):
|
|
540
|
+
if 'sys' in line or 'os' in line:
|
|
541
|
+
insert_pos = i + 1
|
|
542
|
+
continue
|
|
543
|
+
if line.strip() and not line.startswith('#') and not line.startswith('import') and not line.startswith('from'):
|
|
544
|
+
insert_pos = i
|
|
545
|
+
break
|
|
546
|
+
|
|
547
|
+
path_fix = f"\n# Auto-added by pdd to fix import\nimport sys\nsys.path.insert(0, '{code_dir}')\n"
|
|
548
|
+
lines.insert(insert_pos, path_fix)
|
|
549
|
+
example_file.write_text('\n'.join(lines), encoding='utf-8')
|
|
550
|
+
return True, f"Added sys.path.insert(0, '{code_dir}') to example"
|
|
551
|
+
|
|
552
|
+
except Exception as e:
|
|
553
|
+
return False, f"Failed to fix import path: {e}"
|
|
554
|
+
|
|
555
|
+
else:
|
|
556
|
+
# It's an external package - try pip install
|
|
557
|
+
try:
|
|
558
|
+
result = subprocess.run(
|
|
559
|
+
[sys.executable, '-m', 'pip', 'install', top_level_package],
|
|
560
|
+
capture_output=True,
|
|
561
|
+
text=True,
|
|
562
|
+
timeout=120
|
|
563
|
+
)
|
|
564
|
+
if result.returncode == 0:
|
|
565
|
+
return True, f"Installed missing package: {top_level_package}"
|
|
566
|
+
else:
|
|
567
|
+
return False, f"Failed to install {top_level_package}: {result.stderr}"
|
|
568
|
+
except Exception as e:
|
|
569
|
+
return False, f"Failed to run pip install: {e}"
|
|
570
|
+
|
|
571
|
+
return False, "Import error detected but no auto-fix available"
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _run_example_with_error_detection(
|
|
575
|
+
cmd_parts: list[str],
|
|
576
|
+
env: dict,
|
|
577
|
+
cwd: str,
|
|
578
|
+
timeout: int = 60
|
|
579
|
+
) -> tuple[int, str, str]:
|
|
580
|
+
"""
|
|
581
|
+
Run example file, detecting errors from output.
|
|
582
|
+
|
|
583
|
+
For server-style examples that block, this runs until timeout
|
|
584
|
+
then analyzes output for errors. No errors = success.
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
(returncode, stdout, stderr)
|
|
588
|
+
- returncode: 0 if no errors detected, positive if errors found or process failed
|
|
589
|
+
"""
|
|
590
|
+
import threading
|
|
591
|
+
|
|
592
|
+
proc = subprocess.Popen(
|
|
593
|
+
cmd_parts,
|
|
594
|
+
stdout=subprocess.PIPE,
|
|
595
|
+
stderr=subprocess.PIPE,
|
|
596
|
+
stdin=subprocess.DEVNULL,
|
|
597
|
+
env=env,
|
|
598
|
+
cwd=cwd,
|
|
599
|
+
start_new_session=True,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
stdout_chunks = []
|
|
603
|
+
stderr_chunks = []
|
|
604
|
+
|
|
605
|
+
def read_pipe(pipe, chunks):
|
|
606
|
+
try:
|
|
607
|
+
for line in iter(pipe.readline, b''):
|
|
608
|
+
chunks.append(line)
|
|
609
|
+
except Exception:
|
|
610
|
+
pass
|
|
611
|
+
|
|
612
|
+
t_out = threading.Thread(target=read_pipe, args=(proc.stdout, stdout_chunks), daemon=True)
|
|
613
|
+
t_err = threading.Thread(target=read_pipe, args=(proc.stderr, stderr_chunks), daemon=True)
|
|
614
|
+
t_out.start()
|
|
615
|
+
t_err.start()
|
|
616
|
+
|
|
617
|
+
# Wait for process or timeout
|
|
618
|
+
try:
|
|
619
|
+
proc.wait(timeout=timeout)
|
|
620
|
+
except subprocess.TimeoutExpired:
|
|
621
|
+
proc.terminate()
|
|
622
|
+
try:
|
|
623
|
+
proc.wait(timeout=5)
|
|
624
|
+
except subprocess.TimeoutExpired:
|
|
625
|
+
proc.kill()
|
|
626
|
+
proc.wait()
|
|
627
|
+
|
|
628
|
+
t_out.join(timeout=2)
|
|
629
|
+
t_err.join(timeout=2)
|
|
630
|
+
|
|
631
|
+
stdout = b''.join(stdout_chunks).decode('utf-8', errors='replace')
|
|
632
|
+
stderr = b''.join(stderr_chunks).decode('utf-8', errors='replace')
|
|
633
|
+
combined = stdout + '\n' + stderr
|
|
634
|
+
|
|
635
|
+
# Check for errors in output
|
|
636
|
+
has_errors, error_summary = _detect_example_errors(combined)
|
|
637
|
+
|
|
638
|
+
# Determine result:
|
|
639
|
+
# - Errors in output → failure
|
|
640
|
+
# - Positive exit code (process failed normally, e.g., sys.exit(1)) → failure
|
|
641
|
+
# - Negative exit code (killed by signal, e.g., -9 for SIGKILL) → check output
|
|
642
|
+
# - Zero exit code → success
|
|
643
|
+
#
|
|
644
|
+
# IMPORTANT: When we kill the process after timeout, returncode is negative
|
|
645
|
+
# (the signal number). This is NOT a failure if output has no errors.
|
|
646
|
+
if has_errors:
|
|
647
|
+
return 1, stdout, stderr # Errors detected in output
|
|
648
|
+
elif proc.returncode is not None and proc.returncode > 0:
|
|
649
|
+
return proc.returncode, stdout, stderr # Process exited with error
|
|
650
|
+
else:
|
|
651
|
+
# Success cases:
|
|
652
|
+
# - returncode == 0 (clean exit)
|
|
653
|
+
# - returncode < 0 (killed by signal, but no errors in output)
|
|
654
|
+
# - returncode is None (shouldn't happen after wait, but safe fallback)
|
|
655
|
+
return 0, stdout, stderr
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def _execute_tests_and_create_run_report(
|
|
659
|
+
test_file: Path,
|
|
660
|
+
basename: str,
|
|
661
|
+
language: str,
|
|
662
|
+
target_coverage: float = 90.0,
|
|
663
|
+
*,
|
|
664
|
+
code_file: Optional[Path] = None,
|
|
665
|
+
atomic_state: Optional['AtomicStateUpdate'] = None,
|
|
666
|
+
test_files: Optional[List[Path]] = None, # Bug #156: Support multiple test files
|
|
667
|
+
) -> RunReport:
|
|
668
|
+
"""Execute tests and create a RunReport with actual results.
|
|
669
|
+
|
|
670
|
+
Now supports multiple languages by using get_test_command_for_file()
|
|
671
|
+
to determine the appropriate test runner.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
test_file: Primary test file (for backward compat)
|
|
675
|
+
test_files: Optional list of all test files to run (Bug #156)
|
|
676
|
+
"""
|
|
677
|
+
from .get_test_command import get_test_command_for_file
|
|
678
|
+
|
|
679
|
+
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
680
|
+
|
|
681
|
+
# Bug #156: Use test_files if provided, otherwise just the single test_file
|
|
682
|
+
all_test_files = test_files if test_files else [test_file]
|
|
683
|
+
|
|
684
|
+
# Calculate test file hash for staleness detection (primary file for backward compat)
|
|
685
|
+
test_hash = calculate_sha256(test_file) if test_file.exists() else None
|
|
686
|
+
|
|
687
|
+
# Bug #156: Calculate hashes for ALL test files
|
|
688
|
+
test_file_hashes = {
|
|
689
|
+
f.name: calculate_sha256(f)
|
|
690
|
+
for f in all_test_files
|
|
691
|
+
if f.exists()
|
|
692
|
+
} if all_test_files else None
|
|
693
|
+
|
|
694
|
+
# Use clean env without TUI-specific vars
|
|
695
|
+
clean_env = os.environ.copy()
|
|
696
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
697
|
+
clean_env.pop(var, None)
|
|
698
|
+
|
|
699
|
+
try:
|
|
700
|
+
lang_lower = language.lower()
|
|
701
|
+
|
|
702
|
+
# Python: use existing pytest logic with coverage
|
|
703
|
+
if lang_lower == "python":
|
|
704
|
+
module_name = test_file.name.replace('test_', '').replace('.py', '')
|
|
705
|
+
python_executable = detect_host_python_executable()
|
|
706
|
+
|
|
707
|
+
cov_target = None
|
|
708
|
+
if code_file is not None:
|
|
709
|
+
cov_target = _python_cov_target_for_test_and_code(test_file, code_file, basename or module_name)
|
|
710
|
+
else:
|
|
711
|
+
cov_target = basename or module_name
|
|
712
|
+
|
|
713
|
+
if not cov_target:
|
|
714
|
+
cov_target = basename or module_name
|
|
715
|
+
|
|
716
|
+
# Bug #156: Run pytest on ALL test files
|
|
717
|
+
pytest_args = [
|
|
718
|
+
python_executable, '-m', 'pytest',
|
|
719
|
+
] + [str(f) for f in all_test_files] + [
|
|
720
|
+
'-v',
|
|
721
|
+
'--tb=short',
|
|
722
|
+
f'--cov={cov_target}',
|
|
723
|
+
'--cov-report=term-missing'
|
|
724
|
+
]
|
|
725
|
+
result = subprocess.run(
|
|
726
|
+
pytest_args,
|
|
727
|
+
capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
exit_code = result.returncode
|
|
731
|
+
stdout = result.stdout + (result.stderr or '')
|
|
732
|
+
tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
|
|
733
|
+
|
|
734
|
+
else:
|
|
735
|
+
# Non-Python: use language-appropriate test command
|
|
736
|
+
test_cmd = get_test_command_for_file(str(test_file), language)
|
|
737
|
+
|
|
738
|
+
if test_cmd is None:
|
|
739
|
+
# No test command available - return report indicating this
|
|
740
|
+
report = RunReport(
|
|
741
|
+
timestamp=timestamp,
|
|
742
|
+
exit_code=127, # Command not found
|
|
743
|
+
tests_passed=0,
|
|
744
|
+
tests_failed=0,
|
|
745
|
+
coverage=0.0,
|
|
746
|
+
test_hash=test_hash,
|
|
747
|
+
test_files=test_file_hashes, # Bug #156
|
|
748
|
+
)
|
|
749
|
+
save_run_report(asdict(report), basename, language, atomic_state)
|
|
750
|
+
return report
|
|
751
|
+
|
|
752
|
+
# Run the test command
|
|
753
|
+
result = subprocess.run(
|
|
754
|
+
test_cmd,
|
|
755
|
+
shell=True,
|
|
756
|
+
capture_output=True,
|
|
757
|
+
text=True,
|
|
758
|
+
timeout=300,
|
|
759
|
+
env=clean_env,
|
|
760
|
+
cwd=str(test_file.parent),
|
|
761
|
+
stdin=subprocess.DEVNULL,
|
|
762
|
+
start_new_session=True
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
exit_code = result.returncode
|
|
766
|
+
stdout = (result.stdout or '') + '\n' + (result.stderr or '')
|
|
767
|
+
|
|
768
|
+
# Parse results based on language
|
|
769
|
+
tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
|
|
770
|
+
|
|
127
771
|
report = RunReport(
|
|
128
772
|
timestamp=timestamp,
|
|
129
773
|
exit_code=exit_code,
|
|
130
774
|
tests_passed=tests_passed,
|
|
131
775
|
tests_failed=tests_failed,
|
|
132
|
-
coverage=coverage
|
|
776
|
+
coverage=coverage,
|
|
777
|
+
test_hash=test_hash,
|
|
778
|
+
test_files=test_file_hashes, # Bug #156
|
|
133
779
|
)
|
|
134
|
-
|
|
780
|
+
|
|
135
781
|
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception) as e:
|
|
136
|
-
# If test execution fails, create a report indicating failure
|
|
137
782
|
report = RunReport(
|
|
138
783
|
timestamp=timestamp,
|
|
139
784
|
exit_code=1,
|
|
140
785
|
tests_passed=0,
|
|
141
786
|
tests_failed=1,
|
|
142
|
-
coverage=0.0
|
|
787
|
+
coverage=0.0,
|
|
788
|
+
test_hash=test_hash,
|
|
789
|
+
test_files=test_file_hashes, # Bug #156
|
|
143
790
|
)
|
|
144
|
-
|
|
145
|
-
# Save the run report
|
|
146
|
-
save_run_report(asdict(report), basename, language)
|
|
147
|
-
return report
|
|
148
791
|
|
|
149
|
-
|
|
792
|
+
save_run_report(asdict(report), basename, language, atomic_state)
|
|
793
|
+
return report
|
|
150
794
|
|
|
151
795
|
def _create_mock_context(**kwargs) -> click.Context:
|
|
152
796
|
"""Creates a mock Click context object to pass parameters to command functions."""
|
|
@@ -171,13 +815,61 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
171
815
|
|
|
172
816
|
for entry in log_entries:
|
|
173
817
|
timestamp = entry.get('timestamp', 'N/A')
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
818
|
+
|
|
819
|
+
if 'event' in entry:
|
|
820
|
+
event = entry.get('event', 'N/A')
|
|
821
|
+
print(f"[{timestamp[:19]}] EVENT: {event}")
|
|
822
|
+
if verbose and 'details' in entry:
|
|
823
|
+
details_str = json.dumps(entry['details'], indent=2)
|
|
824
|
+
print(f" Details: {details_str}")
|
|
825
|
+
continue
|
|
826
|
+
|
|
827
|
+
operation = entry.get('operation', 'N/A')
|
|
828
|
+
reason = entry.get('reason', 'N/A')
|
|
829
|
+
success = entry.get('success')
|
|
830
|
+
actual_cost = entry.get('actual_cost')
|
|
831
|
+
estimated_cost = entry.get('estimated_cost', 0.0)
|
|
832
|
+
duration = entry.get('duration')
|
|
833
|
+
|
|
834
|
+
if verbose:
|
|
835
|
+
print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
|
|
836
|
+
decision_type = entry.get('decision_type', 'N/A')
|
|
837
|
+
confidence = entry.get('confidence', 'N/A')
|
|
838
|
+
model = entry.get('model', 'N/A')
|
|
839
|
+
budget_remaining = entry.get('details', {}).get('budget_remaining', 'N/A')
|
|
840
|
+
|
|
841
|
+
print(f" Decision Type: {decision_type} | Confidence: {confidence}")
|
|
842
|
+
if actual_cost is not None:
|
|
843
|
+
print(f" Cost: ${actual_cost:.2f} (estimated: ${estimated_cost:.2f}) | Model: {model}")
|
|
844
|
+
if duration is not None:
|
|
845
|
+
print(f" Duration: {duration:.1f}s | Budget Remaining: ${budget_remaining}")
|
|
846
|
+
else:
|
|
847
|
+
print(f" Estimated Cost: ${estimated_cost:.2f}")
|
|
848
|
+
|
|
849
|
+
if 'details' in entry and entry['details']:
|
|
850
|
+
details_copy = entry['details'].copy()
|
|
851
|
+
details_copy.pop('budget_remaining', None)
|
|
852
|
+
if details_copy:
|
|
853
|
+
details_str = json.dumps(details_copy, indent=2)
|
|
854
|
+
print(f" Details: {details_str}")
|
|
855
|
+
else:
|
|
856
|
+
status_icon = "✓" if success else "✗" if success is False else "?"
|
|
857
|
+
|
|
858
|
+
cost_info = ""
|
|
859
|
+
if actual_cost is not None:
|
|
860
|
+
cost_info = f" | {status_icon} ${actual_cost:.2f} (est: ${estimated_cost:.2f})"
|
|
861
|
+
else:
|
|
862
|
+
cost_info = f" | Est: ${estimated_cost:.2f}"
|
|
863
|
+
|
|
864
|
+
duration_info = ""
|
|
865
|
+
if duration is not None:
|
|
866
|
+
duration_info = f" | {duration:.1f}s"
|
|
867
|
+
|
|
868
|
+
error_info = ""
|
|
869
|
+
if entry.get('error'):
|
|
870
|
+
error_info = f" | Error: {entry['error']}"
|
|
871
|
+
|
|
872
|
+
print(f"[{timestamp[:19]}] {operation:<12} | {reason}{cost_info}{duration_info}{error_info}")
|
|
181
873
|
|
|
182
874
|
print("--- End of Log ---")
|
|
183
875
|
return {'success': True, 'log_entries': log_entries}
|
|
@@ -185,6 +877,7 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
185
877
|
|
|
186
878
|
def sync_orchestration(
|
|
187
879
|
basename: str,
|
|
880
|
+
target_coverage: float = 90.0,
|
|
188
881
|
language: str = "python",
|
|
189
882
|
prompts_dir: str = "prompts",
|
|
190
883
|
code_dir: str = "src",
|
|
@@ -194,46 +887,60 @@ def sync_orchestration(
|
|
|
194
887
|
budget: float = 10.0,
|
|
195
888
|
skip_verify: bool = False,
|
|
196
889
|
skip_tests: bool = False,
|
|
197
|
-
|
|
198
|
-
log: bool = False,
|
|
890
|
+
dry_run: bool = False,
|
|
199
891
|
force: bool = False,
|
|
200
|
-
strength: float =
|
|
892
|
+
strength: float = DEFAULT_STRENGTH,
|
|
201
893
|
temperature: float = 0.0,
|
|
202
|
-
time_param: float = 0.25,
|
|
894
|
+
time_param: float = 0.25,
|
|
203
895
|
verbose: bool = False,
|
|
204
896
|
quiet: bool = False,
|
|
205
897
|
output_cost: Optional[str] = None,
|
|
206
898
|
review_examples: bool = False,
|
|
207
899
|
local: bool = False,
|
|
208
900
|
context_config: Optional[Dict[str, str]] = None,
|
|
901
|
+
context_override: Optional[str] = None,
|
|
902
|
+
confirm_callback: Optional[Callable[[str, str], bool]] = None,
|
|
209
903
|
) -> Dict[str, Any]:
|
|
210
904
|
"""
|
|
211
905
|
Orchestrates the complete PDD sync workflow with parallel animation.
|
|
212
|
-
|
|
213
|
-
If log=True, displays the sync log instead of running sync operations.
|
|
214
|
-
The verbose flag controls the detail level of the log output.
|
|
215
|
-
|
|
216
|
-
Returns a dictionary summarizing the outcome of the sync process.
|
|
217
906
|
"""
|
|
218
|
-
|
|
907
|
+
# Import get_extension at function scope
|
|
908
|
+
from .sync_determine_operation import get_extension
|
|
909
|
+
|
|
910
|
+
if dry_run:
|
|
219
911
|
return _display_sync_log(basename, language, verbose)
|
|
220
912
|
|
|
221
913
|
# --- Initialize State and Paths ---
|
|
222
914
|
try:
|
|
223
|
-
pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
|
|
915
|
+
pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
916
|
+
except FileNotFoundError as e:
|
|
917
|
+
if "test_config.py" in str(e) or "tests/test_" in str(e):
|
|
918
|
+
pdd_files = {
|
|
919
|
+
'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
|
|
920
|
+
'code': Path(f"src/{basename}.{get_extension(language)}"),
|
|
921
|
+
'example': Path(f"context/{basename}_example.{get_extension(language)}"),
|
|
922
|
+
'test': Path(f"tests/test_{basename}.{get_extension(language)}")
|
|
923
|
+
}
|
|
924
|
+
if not quiet:
|
|
925
|
+
print(f"Note: Test file missing, continuing with sync workflow to generate it")
|
|
926
|
+
else:
|
|
927
|
+
print(f"Error constructing paths: {e}")
|
|
928
|
+
return {
|
|
929
|
+
"success": False,
|
|
930
|
+
"error": f"Failed to construct paths: {str(e)}",
|
|
931
|
+
"operations_completed": [],
|
|
932
|
+
"errors": [f"Path construction failed: {str(e)}"]
|
|
933
|
+
}
|
|
224
934
|
except Exception as e:
|
|
225
|
-
|
|
226
|
-
console.print(f"[red]Error constructing paths: {e}[/red]")
|
|
935
|
+
print(f"Error constructing paths: {e}")
|
|
227
936
|
return {
|
|
228
937
|
"success": False,
|
|
229
|
-
"total_cost": 0.0,
|
|
230
|
-
"model_name": "",
|
|
231
938
|
"error": f"Failed to construct paths: {str(e)}",
|
|
232
939
|
"operations_completed": [],
|
|
233
940
|
"errors": [f"Path construction failed: {str(e)}"]
|
|
234
941
|
}
|
|
235
942
|
|
|
236
|
-
# Shared state for animation
|
|
943
|
+
# Shared state for animation (passed to App)
|
|
237
944
|
current_function_name_ref = ["initializing"]
|
|
238
945
|
stop_event = threading.Event()
|
|
239
946
|
current_cost_ref = [0.0]
|
|
@@ -241,399 +948,695 @@ def sync_orchestration(
|
|
|
241
948
|
code_path_ref = [str(pdd_files.get('code', 'N/A'))]
|
|
242
949
|
example_path_ref = [str(pdd_files.get('example', 'N/A'))]
|
|
243
950
|
tests_path_ref = [str(pdd_files.get('test', 'N/A'))]
|
|
244
|
-
prompt_box_color_ref
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
operations_completed: List[str] = []
|
|
249
|
-
skipped_operations: List[str] = []
|
|
250
|
-
errors: List[str] = []
|
|
251
|
-
start_time = time.time()
|
|
252
|
-
animation_thread = None
|
|
951
|
+
prompt_box_color_ref = ["blue"]
|
|
952
|
+
code_box_color_ref = ["blue"]
|
|
953
|
+
example_box_color_ref = ["blue"]
|
|
954
|
+
tests_box_color_ref = ["blue"]
|
|
253
955
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
skipped_operations.append('test')
|
|
301
|
-
report_data = RunReport(
|
|
302
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
303
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
|
|
304
|
-
)
|
|
305
|
-
save_run_report(asdict(report_data), basename, language)
|
|
306
|
-
_save_operation_fingerprint(basename, language, 'test', pdd_files, 0.0, 'skipped')
|
|
307
|
-
continue
|
|
308
|
-
if operation == 'crash' and skip_tests:
|
|
309
|
-
# Skip crash operations when tests are skipped since crash fixes usually require test execution
|
|
310
|
-
skipped_operations.append('crash')
|
|
311
|
-
# Create a dummy run report indicating crash was skipped
|
|
312
|
-
report_data = RunReport(
|
|
313
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
314
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
315
|
-
)
|
|
316
|
-
save_run_report(asdict(report_data), basename, language)
|
|
317
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped')
|
|
318
|
-
continue
|
|
319
|
-
|
|
320
|
-
current_function_name_ref[0] = operation
|
|
321
|
-
ctx = _create_mock_context(
|
|
322
|
-
force=force, strength=strength, temperature=temperature, time=time_param,
|
|
323
|
-
verbose=verbose, quiet=quiet, output_cost=output_cost,
|
|
324
|
-
review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
|
|
325
|
-
max_attempts=max_attempts, target_coverage=target_coverage
|
|
326
|
-
)
|
|
956
|
+
# Mutable container for the app reference (set after app creation)
|
|
957
|
+
# This allows the worker to access app.request_confirmation()
|
|
958
|
+
app_ref: List[Optional['SyncApp']] = [None]
|
|
959
|
+
|
|
960
|
+
# Progress callback ref for TUI ProgressBar updates during auto-deps
|
|
961
|
+
progress_callback_ref: List[Optional[Callable[[int, int], None]]] = [None]
|
|
962
|
+
|
|
963
|
+
# Track if user has already confirmed overwrite (to avoid asking multiple times)
|
|
964
|
+
user_confirmed_overwrite: List[bool] = [False]
|
|
965
|
+
|
|
966
|
+
def get_confirm_callback() -> Optional[Callable[[str, str], bool]]:
|
|
967
|
+
"""Get the confirmation callback from the app if available.
|
|
968
|
+
|
|
969
|
+
Once user confirms, we remember it so subsequent operations don't ask again.
|
|
970
|
+
"""
|
|
971
|
+
if user_confirmed_overwrite[0]:
|
|
972
|
+
# User already confirmed, return a callback that always returns True
|
|
973
|
+
return lambda msg, title: True
|
|
974
|
+
|
|
975
|
+
if app_ref[0] is not None:
|
|
976
|
+
def confirming_callback(msg: str, title: str) -> bool:
|
|
977
|
+
result = app_ref[0].request_confirmation(msg, title)
|
|
978
|
+
if result:
|
|
979
|
+
user_confirmed_overwrite[0] = True
|
|
980
|
+
return result
|
|
981
|
+
return confirming_callback
|
|
982
|
+
return confirm_callback # Fall back to provided callback
|
|
983
|
+
|
|
984
|
+
def sync_worker_logic():
|
|
985
|
+
"""
|
|
986
|
+
The main loop of sync logic, run in a worker thread by Textual App.
|
|
987
|
+
"""
|
|
988
|
+
operations_completed: List[str] = []
|
|
989
|
+
skipped_operations: List[str] = []
|
|
990
|
+
errors: List[str] = []
|
|
991
|
+
start_time = time.time()
|
|
992
|
+
last_model_name: str = ""
|
|
993
|
+
operation_history: List[str] = []
|
|
994
|
+
MAX_CYCLE_REPEATS = 2
|
|
995
|
+
|
|
996
|
+
# Helper function to print inside worker (goes to RichLog via redirection)
|
|
997
|
+
# print() will work if sys.stdout is redirected.
|
|
998
|
+
|
|
999
|
+
try:
|
|
1000
|
+
with SyncLock(basename, language):
|
|
1001
|
+
log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
|
|
327
1002
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
1003
|
+
while True:
|
|
1004
|
+
budget_remaining = budget - current_cost_ref[0]
|
|
1005
|
+
if current_cost_ref[0] >= budget:
|
|
1006
|
+
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
1007
|
+
log_sync_event(basename, language, "budget_exceeded", {
|
|
1008
|
+
"total_cost": current_cost_ref[0],
|
|
1009
|
+
"budget": budget
|
|
1010
|
+
})
|
|
1011
|
+
break
|
|
1012
|
+
|
|
1013
|
+
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
1014
|
+
log_sync_event(basename, language, "budget_warning", {
|
|
1015
|
+
"remaining": budget_remaining,
|
|
1016
|
+
"percentage": (budget_remaining / budget) * 100
|
|
1017
|
+
})
|
|
1018
|
+
|
|
1019
|
+
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
|
|
1020
|
+
operation = decision.operation
|
|
1021
|
+
|
|
1022
|
+
log_entry = create_sync_log_entry(decision, budget_remaining)
|
|
1023
|
+
operation_history.append(operation)
|
|
1024
|
+
|
|
1025
|
+
# Cycle detection logic
|
|
1026
|
+
if len(operation_history) >= 3:
|
|
1027
|
+
recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
|
|
1028
|
+
if len(recent_auto_deps) >= 2:
|
|
1029
|
+
errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
|
|
1030
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
|
|
1031
|
+
operation = 'generate'
|
|
1032
|
+
decision.operation = 'generate' # Update decision too
|
|
1033
|
+
|
|
1034
|
+
# Bug #4 fix: Detect crash-verify cycle pattern
|
|
1035
|
+
# The pattern [crash, verify, crash, verify] or [verify, crash, verify, crash]
|
|
1036
|
+
# represents 2 iterations of the alternating cycle, so break immediately
|
|
1037
|
+
if len(operation_history) >= 4:
|
|
1038
|
+
recent_ops = operation_history[-4:]
|
|
1039
|
+
if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
|
|
1040
|
+
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
1041
|
+
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1042
|
+
errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1043
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
|
|
1044
|
+
break
|
|
1045
|
+
|
|
1046
|
+
# Bug #4 fix: Detect test-fix cycle pattern
|
|
1047
|
+
# The pattern [test, fix, test, fix] or [fix, test, fix, test]
|
|
1048
|
+
# represents 2 iterations of the alternating cycle, so break immediately
|
|
1049
|
+
if len(operation_history) >= 4:
|
|
1050
|
+
recent_ops = operation_history[-4:]
|
|
1051
|
+
if (recent_ops == ['test', 'fix', 'test', 'fix'] or
|
|
1052
|
+
recent_ops == ['fix', 'test', 'fix', 'test']):
|
|
1053
|
+
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1054
|
+
errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1055
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
|
|
1056
|
+
break
|
|
1057
|
+
|
|
1058
|
+
if operation == 'fix':
|
|
1059
|
+
consecutive_fixes = 0
|
|
1060
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1061
|
+
if operation_history[i] == 'fix':
|
|
1062
|
+
consecutive_fixes += 1
|
|
355
1063
|
else:
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
1064
|
+
break
|
|
1065
|
+
if consecutive_fixes >= 5:
|
|
1066
|
+
errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
|
|
1067
|
+
break
|
|
1068
|
+
|
|
1069
|
+
if operation == 'test':
|
|
1070
|
+
consecutive_tests = 0
|
|
1071
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1072
|
+
if operation_history[i] == 'test':
|
|
1073
|
+
consecutive_tests += 1
|
|
1074
|
+
else:
|
|
1075
|
+
break
|
|
1076
|
+
if consecutive_tests >= MAX_CONSECUTIVE_TESTS:
|
|
1077
|
+
errors.append(f"Detected {consecutive_tests} consecutive test operations. Breaking infinite test loop.")
|
|
1078
|
+
break
|
|
1079
|
+
|
|
1080
|
+
# Bug #157 fix: Prevent infinite crash retry loops
|
|
1081
|
+
if operation == 'crash':
|
|
1082
|
+
consecutive_crashes = 0
|
|
1083
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1084
|
+
if operation_history[i] == 'crash':
|
|
1085
|
+
consecutive_crashes += 1
|
|
1086
|
+
else:
|
|
1087
|
+
break
|
|
1088
|
+
if consecutive_crashes >= MAX_CONSECUTIVE_CRASHES:
|
|
1089
|
+
errors.append(f"Detected {consecutive_crashes} consecutive crash operations. Breaking infinite crash loop.")
|
|
1090
|
+
break
|
|
1091
|
+
|
|
1092
|
+
if operation == 'test_extend':
|
|
1093
|
+
# Count test_extend attempts to prevent infinite loop
|
|
1094
|
+
extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
|
|
1095
|
+
if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
|
|
1096
|
+
# Accept current coverage after max attempts
|
|
1097
|
+
log_sync_event(basename, language, "test_extend_limit", {
|
|
1098
|
+
"attempts": extend_attempts,
|
|
1099
|
+
"max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
|
|
1100
|
+
"reason": "Accepting current coverage after max extend attempts"
|
|
1101
|
+
})
|
|
1102
|
+
success = True
|
|
1103
|
+
break
|
|
1104
|
+
|
|
1105
|
+
if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
|
|
1106
|
+
current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
|
|
1107
|
+
success = operation in ['all_synced', 'nothing']
|
|
1108
|
+
error_msg = None
|
|
1109
|
+
if operation == 'fail_and_request_manual_merge':
|
|
1110
|
+
errors.append(f"Manual merge required: {decision.reason}")
|
|
1111
|
+
error_msg = decision.reason
|
|
1112
|
+
elif operation == 'error':
|
|
1113
|
+
errors.append(f"Error determining operation: {decision.reason}")
|
|
1114
|
+
error_msg = decision.reason
|
|
1115
|
+
elif operation == 'analyze_conflict':
|
|
1116
|
+
errors.append(f"Conflict detected: {decision.reason}")
|
|
1117
|
+
error_msg = decision.reason
|
|
381
1118
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
1119
|
+
update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
|
|
1120
|
+
append_sync_log(basename, language, log_entry)
|
|
1121
|
+
break
|
|
1122
|
+
|
|
1123
|
+
# Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
|
|
1124
|
+
# Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
|
|
1125
|
+
if operation == 'verify' and (skip_verify or skip_tests):
|
|
1126
|
+
skipped_operations.append('verify')
|
|
1127
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1128
|
+
append_sync_log(basename, language, log_entry)
|
|
1129
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1130
|
+
_save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
|
|
1131
|
+
continue
|
|
1132
|
+
if operation == 'test' and skip_tests:
|
|
1133
|
+
skipped_operations.append('test')
|
|
1134
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1135
|
+
append_sync_log(basename, language, log_entry)
|
|
1136
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1137
|
+
_save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
|
|
1138
|
+
continue
|
|
1139
|
+
if operation == 'crash' and (skip_tests or skip_verify):
|
|
1140
|
+
skipped_operations.append('crash')
|
|
1141
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1142
|
+
append_sync_log(basename, language, log_entry)
|
|
1143
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1144
|
+
_save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
|
|
1145
|
+
# FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
|
|
1146
|
+
# Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
|
|
1147
|
+
current_hashes = calculate_current_hashes(pdd_files)
|
|
1148
|
+
synthetic_report = RunReport(
|
|
1149
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1150
|
+
exit_code=0, # Assume success since we're skipping validation
|
|
1151
|
+
tests_passed=0,
|
|
1152
|
+
tests_failed=0,
|
|
1153
|
+
coverage=0.0,
|
|
1154
|
+
test_hash=current_hashes.get('test_hash')
|
|
1155
|
+
)
|
|
1156
|
+
save_run_report(asdict(synthetic_report), basename, language)
|
|
1157
|
+
continue
|
|
1158
|
+
|
|
1159
|
+
current_function_name_ref[0] = operation
|
|
1160
|
+
ctx = _create_mock_context(
|
|
1161
|
+
force=force, strength=strength, temperature=temperature, time=time_param,
|
|
1162
|
+
verbose=verbose, quiet=quiet, output_cost=output_cost,
|
|
1163
|
+
review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
|
|
1164
|
+
max_attempts=max_attempts, target_coverage=target_coverage,
|
|
1165
|
+
confirm_callback=get_confirm_callback(),
|
|
1166
|
+
context=context_override
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
result = {}
|
|
1170
|
+
success = False
|
|
1171
|
+
op_start_time = time.time()
|
|
1172
|
+
|
|
1173
|
+
# Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
|
|
1174
|
+
with AtomicStateUpdate(basename, language) as atomic_state:
|
|
1175
|
+
|
|
1176
|
+
# --- Execute Operation ---
|
|
1177
|
+
try:
|
|
1178
|
+
if operation == 'auto-deps':
|
|
1179
|
+
temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
|
|
1180
|
+
original_content = pdd_files['prompt'].read_text(encoding='utf-8')
|
|
1181
|
+
result = auto_deps_main(
|
|
1182
|
+
ctx,
|
|
1183
|
+
prompt_file=str(pdd_files['prompt']),
|
|
1184
|
+
directory_path=examples_dir,
|
|
1185
|
+
auto_deps_csv_path="project_dependencies.csv",
|
|
1186
|
+
output=temp_output,
|
|
1187
|
+
force_scan=False,
|
|
1188
|
+
progress_callback=progress_callback_ref[0]
|
|
403
1189
|
)
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
1190
|
+
if Path(temp_output).exists():
|
|
1191
|
+
import shutil
|
|
1192
|
+
new_content = Path(temp_output).read_text(encoding='utf-8')
|
|
1193
|
+
if new_content != original_content:
|
|
1194
|
+
shutil.move(temp_output, str(pdd_files['prompt']))
|
|
1195
|
+
else:
|
|
1196
|
+
Path(temp_output).unlink()
|
|
1197
|
+
result = (new_content, 0.0, 'no-changes')
|
|
1198
|
+
elif operation == 'generate':
|
|
1199
|
+
result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt']), output=str(pdd_files['code']), original_prompt_file_path=None, force_incremental_flag=False)
|
|
1200
|
+
# Clear stale run_report so crash/verify is required for newly generated code
|
|
1201
|
+
run_report_file = META_DIR / f"{basename}_{language}_run.json"
|
|
1202
|
+
run_report_file.unlink(missing_ok=True)
|
|
1203
|
+
elif operation == 'example':
|
|
1204
|
+
result = context_generator_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['example']))
|
|
1205
|
+
elif operation == 'crash':
|
|
1206
|
+
required_files = [pdd_files['code'], pdd_files['example']]
|
|
1207
|
+
missing_files = [f for f in required_files if not f.exists()]
|
|
1208
|
+
if missing_files:
|
|
411
1209
|
skipped_operations.append('crash')
|
|
412
|
-
report_data = RunReport(
|
|
413
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
414
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
415
|
-
)
|
|
416
|
-
save_run_report(asdict(report_data), basename, language)
|
|
417
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_llm_error')
|
|
418
1210
|
continue
|
|
1211
|
+
|
|
1212
|
+
# Crash handling logic (simplified copy from original)
|
|
1213
|
+
current_run_report = read_run_report(basename, language)
|
|
1214
|
+
crash_log_content = ""
|
|
1215
|
+
|
|
1216
|
+
# Check for crash condition (either run report says so, or we check manually)
|
|
1217
|
+
has_crash = False
|
|
1218
|
+
if current_run_report and current_run_report.exit_code != 0:
|
|
1219
|
+
has_crash = True
|
|
1220
|
+
crash_log_content = f"Test execution failed exit code: {current_run_report.exit_code}\n"
|
|
419
1221
|
else:
|
|
420
|
-
#
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
1222
|
+
# Manual check - run the example to see if it crashes
|
|
1223
|
+
env = os.environ.copy()
|
|
1224
|
+
src_dir = Path.cwd() / 'src'
|
|
1225
|
+
env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
|
|
1226
|
+
# Remove TUI-specific env vars that might contaminate subprocess
|
|
1227
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1228
|
+
env.pop(var, None)
|
|
1229
|
+
# Get language-appropriate run command from language_format.csv
|
|
1230
|
+
example_path = str(pdd_files['example'])
|
|
1231
|
+
run_cmd = get_run_command_for_file(example_path)
|
|
1232
|
+
if run_cmd:
|
|
1233
|
+
# Use the language-specific interpreter (e.g., node for .js)
|
|
1234
|
+
cmd_parts = run_cmd.split()
|
|
1235
|
+
else:
|
|
1236
|
+
# Fallback to Python if no run command found
|
|
1237
|
+
cmd_parts = ['python', example_path]
|
|
1238
|
+
# Use error-detection runner that handles server-style examples
|
|
1239
|
+
returncode, stdout, stderr = _run_example_with_error_detection(
|
|
1240
|
+
cmd_parts,
|
|
1241
|
+
env=env,
|
|
1242
|
+
cwd=str(pdd_files['example'].parent),
|
|
1243
|
+
timeout=60
|
|
1244
|
+
)
|
|
1245
|
+
|
|
1246
|
+
class ExampleResult:
|
|
1247
|
+
def __init__(self, rc, out, err):
|
|
1248
|
+
self.returncode = rc
|
|
1249
|
+
self.stdout = out
|
|
1250
|
+
self.stderr = err
|
|
1251
|
+
|
|
1252
|
+
ex_res = ExampleResult(returncode, stdout, stderr)
|
|
1253
|
+
if ex_res.returncode != 0:
|
|
1254
|
+
has_crash = True
|
|
1255
|
+
crash_log_content = f"Example failed exit code: {ex_res.returncode}\nSTDOUT:\n{ex_res.stdout}\nSTDERR:\n{ex_res.stderr}\n"
|
|
1256
|
+
if "SyntaxError" in ex_res.stderr:
|
|
1257
|
+
crash_log_content = "SYNTAX ERROR DETECTED:\n" + crash_log_content
|
|
1258
|
+
else:
|
|
1259
|
+
# No crash - save run report with exit_code=0 so sync_determine_operation
|
|
1260
|
+
# knows the example was tested and passed (prevents infinite loop)
|
|
1261
|
+
# Include test_hash for staleness detection
|
|
1262
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1263
|
+
report = RunReport(
|
|
1264
|
+
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1265
|
+
exit_code=0,
|
|
1266
|
+
tests_passed=1,
|
|
1267
|
+
tests_failed=0,
|
|
1268
|
+
coverage=0.0,
|
|
1269
|
+
test_hash=test_hash
|
|
1270
|
+
)
|
|
1271
|
+
save_run_report(asdict(report), basename, language)
|
|
1272
|
+
skipped_operations.append('crash')
|
|
1273
|
+
continue
|
|
1274
|
+
|
|
1275
|
+
if has_crash:
|
|
1276
|
+
# Try auto-fix for common import errors before expensive agentic call
|
|
1277
|
+
auto_fixed, auto_fix_msg = _try_auto_fix_import_error(
|
|
1278
|
+
crash_log_content,
|
|
1279
|
+
pdd_files['code'],
|
|
1280
|
+
pdd_files['example']
|
|
1281
|
+
)
|
|
1282
|
+
if auto_fixed:
|
|
1283
|
+
log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
|
|
1284
|
+
# Retry running the example after auto-fix
|
|
1285
|
+
retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
|
|
1286
|
+
cmd_parts,
|
|
1287
|
+
env=env,
|
|
1288
|
+
cwd=str(pdd_files['example'].parent),
|
|
1289
|
+
timeout=60
|
|
1290
|
+
)
|
|
1291
|
+
if retry_returncode == 0:
|
|
1292
|
+
# Auto-fix worked! Save run report and continue
|
|
1293
|
+
log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
|
|
1294
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1295
|
+
report = RunReport(
|
|
1296
|
+
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1297
|
+
exit_code=0,
|
|
1298
|
+
tests_passed=1,
|
|
1299
|
+
tests_failed=0,
|
|
1300
|
+
coverage=0.0,
|
|
1301
|
+
test_hash=test_hash
|
|
1302
|
+
)
|
|
1303
|
+
save_run_report(asdict(report), basename, language)
|
|
1304
|
+
result = (True, 0.0, 'auto-fix')
|
|
1305
|
+
success = True
|
|
1306
|
+
actual_cost = 0.0
|
|
1307
|
+
model_name = 'auto-fix'
|
|
1308
|
+
# Update crash_log_content for logging
|
|
1309
|
+
crash_log_content = f"Auto-fixed: {auto_fix_msg}"
|
|
1310
|
+
continue # Skip crash_main, move to next operation
|
|
1311
|
+
else:
|
|
1312
|
+
# Auto-fix didn't fully work, update error log and proceed
|
|
1313
|
+
crash_log_content = f"Auto-fix attempted ({auto_fix_msg}) but still failing:\nRETRY STDOUT:\n{retry_stdout}\nRETRY STDERR:\n{retry_stderr}\n"
|
|
1314
|
+
|
|
1315
|
+
Path("crash.log").write_text(crash_log_content)
|
|
1316
|
+
try:
|
|
1317
|
+
result = crash_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), error_file="crash.log", output=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1318
|
+
except Exception as e:
|
|
1319
|
+
print(f"Crash fix failed: {e}")
|
|
1320
|
+
skipped_operations.append('crash')
|
|
1321
|
+
continue
|
|
1322
|
+
|
|
1323
|
+
elif operation == 'verify':
|
|
1324
|
+
if not pdd_files['example'].exists():
|
|
1325
|
+
skipped_operations.append('verify')
|
|
1326
|
+
continue
|
|
1327
|
+
result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1328
|
+
elif operation == 'test':
|
|
1329
|
+
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1330
|
+
# Use merge=True when test file exists to preserve fixes and append new tests
|
|
1331
|
+
# instead of regenerating from scratch (which would overwrite fixes)
|
|
1332
|
+
test_file_exists = pdd_files['test'].exists()
|
|
1333
|
+
result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=[str(pdd_files['test'])] if test_file_exists else None, target_coverage=target_coverage, merge=test_file_exists, strength=strength, temperature=temperature)
|
|
1334
|
+
if pdd_files['test'].exists():
|
|
454
1335
|
_execute_tests_and_create_run_report(
|
|
455
|
-
|
|
1336
|
+
pdd_files['test'],
|
|
1337
|
+
basename,
|
|
1338
|
+
language,
|
|
1339
|
+
target_coverage,
|
|
1340
|
+
code_file=pdd_files.get("code"),
|
|
1341
|
+
atomic_state=atomic_state,
|
|
1342
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
1343
|
+
)
|
|
1344
|
+
elif operation == 'test_extend':
|
|
1345
|
+
# Extend existing tests to improve coverage
|
|
1346
|
+
# Uses existing_tests and merge=True to add more test cases
|
|
1347
|
+
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1348
|
+
if pdd_files['test'].exists():
|
|
1349
|
+
existing_test_path = str(pdd_files['test'])
|
|
1350
|
+
result = cmd_test_main(
|
|
1351
|
+
ctx,
|
|
1352
|
+
prompt_file=str(pdd_files['prompt']),
|
|
1353
|
+
code_file=str(pdd_files['code']),
|
|
1354
|
+
output=str(pdd_files['test']),
|
|
1355
|
+
language=language,
|
|
1356
|
+
coverage_report=None,
|
|
1357
|
+
existing_tests=[existing_test_path],
|
|
1358
|
+
target_coverage=target_coverage,
|
|
1359
|
+
merge=True,
|
|
1360
|
+
strength=strength,
|
|
1361
|
+
temperature=temperature
|
|
456
1362
|
)
|
|
457
|
-
except Exception as e:
|
|
458
|
-
# Don't fail the entire operation if test execution fails
|
|
459
|
-
# Just log it - the test file generation was successful
|
|
460
|
-
print(f"Warning: Test execution failed: {e}")
|
|
461
|
-
elif isinstance(result, tuple) and len(result) >= 3:
|
|
462
|
-
# Handle tuple return format - assume success and execute tests
|
|
463
|
-
try:
|
|
464
|
-
test_file = pdd_files['test']
|
|
465
|
-
if test_file.exists():
|
|
466
1363
|
_execute_tests_and_create_run_report(
|
|
467
|
-
|
|
1364
|
+
pdd_files['test'],
|
|
1365
|
+
basename,
|
|
1366
|
+
language,
|
|
1367
|
+
target_coverage,
|
|
1368
|
+
code_file=pdd_files.get("code"),
|
|
1369
|
+
atomic_state=atomic_state,
|
|
1370
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
468
1371
|
)
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
1372
|
+
else:
|
|
1373
|
+
# No existing test file, fall back to regular test generation
|
|
1374
|
+
result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=None, target_coverage=target_coverage, merge=False, strength=strength, temperature=temperature)
|
|
1375
|
+
if pdd_files['test'].exists():
|
|
1376
|
+
_execute_tests_and_create_run_report(
|
|
1377
|
+
pdd_files['test'],
|
|
1378
|
+
basename,
|
|
1379
|
+
language,
|
|
1380
|
+
target_coverage,
|
|
1381
|
+
code_file=pdd_files.get("code"),
|
|
1382
|
+
atomic_state=atomic_state,
|
|
1383
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
1384
|
+
)
|
|
1385
|
+
elif operation == 'fix':
|
|
1386
|
+
error_file_path = Path("fix_errors.log")
|
|
1387
|
+
# Capture errors using language-appropriate test command
|
|
1388
|
+
try:
|
|
1389
|
+
from .get_test_command import get_test_command_for_file
|
|
1390
|
+
test_cmd = get_test_command_for_file(str(pdd_files['test']), language)
|
|
1391
|
+
|
|
1392
|
+
# Use clean env without TUI-specific vars
|
|
1393
|
+
clean_env = os.environ.copy()
|
|
1394
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1395
|
+
clean_env.pop(var, None)
|
|
1396
|
+
|
|
1397
|
+
if test_cmd:
|
|
1398
|
+
# Run language-appropriate test command
|
|
1399
|
+
if language.lower() == 'python':
|
|
1400
|
+
# Use pytest directly for Python
|
|
1401
|
+
python_executable = detect_host_python_executable()
|
|
1402
|
+
# Bug #156: Run pytest on ALL matching test files
|
|
1403
|
+
test_files = pdd_files.get('test_files', [pdd_files['test']])
|
|
1404
|
+
pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
|
|
1405
|
+
test_result = subprocess.run(
|
|
1406
|
+
pytest_args,
|
|
1407
|
+
capture_output=True, text=True, timeout=300,
|
|
1408
|
+
stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True,
|
|
1409
|
+
cwd=str(pdd_files['test'].parent)
|
|
1410
|
+
)
|
|
1411
|
+
else:
|
|
1412
|
+
# Use shell command for non-Python
|
|
1413
|
+
test_result = subprocess.run(
|
|
1414
|
+
test_cmd,
|
|
1415
|
+
shell=True,
|
|
1416
|
+
capture_output=True, text=True, timeout=300,
|
|
1417
|
+
stdin=subprocess.DEVNULL, env=clean_env,
|
|
1418
|
+
cwd=str(pdd_files['test'].parent),
|
|
1419
|
+
start_new_session=True
|
|
1420
|
+
)
|
|
1421
|
+
error_content = f"Test output:\n{test_result.stdout}\n{test_result.stderr}"
|
|
1422
|
+
else:
|
|
1423
|
+
# No test command available - trigger agentic fallback with context
|
|
1424
|
+
error_content = f"No test command available for {language}. Please run tests manually and provide error output."
|
|
1425
|
+
except Exception as e:
|
|
1426
|
+
error_content = f"Test execution error: {e}"
|
|
1427
|
+
error_file_path.write_text(error_content)
|
|
1428
|
+
|
|
1429
|
+
# Bug #156 fix: Parse pytest output to find actual failing files
|
|
1430
|
+
# and pass the correct file to fix_main
|
|
1431
|
+
failing_files = extract_failing_files_from_output(error_content)
|
|
1432
|
+
unit_test_file_for_fix = str(pdd_files['test']) # Default to tracked file
|
|
1433
|
+
|
|
1434
|
+
if failing_files:
|
|
1435
|
+
# Try to resolve the failing file paths
|
|
1436
|
+
test_dir = pdd_files['test'].parent
|
|
1437
|
+
tracked_file_name = pdd_files['test'].name
|
|
1438
|
+
|
|
1439
|
+
# Check if the tracked file is among the failures
|
|
1440
|
+
tracked_in_failures = any(
|
|
1441
|
+
Path(ff).name == tracked_file_name for ff in failing_files
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
if not tracked_in_failures:
|
|
1445
|
+
# Failures are in a different file - use the first failing file
|
|
1446
|
+
for ff in failing_files:
|
|
1447
|
+
# Try to resolve the path relative to test directory
|
|
1448
|
+
ff_path = Path(ff)
|
|
1449
|
+
if ff_path.is_absolute() and ff_path.exists():
|
|
1450
|
+
unit_test_file_for_fix = str(ff_path)
|
|
1451
|
+
break
|
|
1452
|
+
else:
|
|
1453
|
+
# Try to find it in the test directory
|
|
1454
|
+
candidate = test_dir / ff_path.name
|
|
1455
|
+
if candidate.exists():
|
|
1456
|
+
unit_test_file_for_fix = str(candidate)
|
|
1457
|
+
break
|
|
1458
|
+
# Also try the path as-is relative to cwd
|
|
1459
|
+
if ff_path.exists():
|
|
1460
|
+
unit_test_file_for_fix = str(ff_path.resolve())
|
|
1461
|
+
break
|
|
1462
|
+
|
|
1463
|
+
result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
|
|
1464
|
+
elif operation == 'update':
|
|
1465
|
+
result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
|
|
490
1466
|
else:
|
|
491
|
-
|
|
1467
|
+
errors.append(f"Unknown operation {operation}")
|
|
1468
|
+
result = {'success': False}
|
|
1469
|
+
|
|
1470
|
+
# Result parsing
|
|
1471
|
+
if isinstance(result, dict):
|
|
1472
|
+
success = result.get('success', False)
|
|
1473
|
+
current_cost_ref[0] += result.get('cost', 0.0)
|
|
1474
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
1475
|
+
if operation == 'test': success = pdd_files['test'].exists()
|
|
1476
|
+
else: success = bool(result[0])
|
|
1477
|
+
cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
|
|
1478
|
+
current_cost_ref[0] += cost
|
|
1479
|
+
else:
|
|
1480
|
+
success = result is not None
|
|
1481
|
+
|
|
492
1482
|
except Exception as e:
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
error_file_path.write_text(error_content)
|
|
496
|
-
|
|
497
|
-
result = fix_main(
|
|
498
|
-
ctx,
|
|
499
|
-
prompt_file=str(pdd_files['prompt']),
|
|
500
|
-
code_file=str(pdd_files['code']),
|
|
501
|
-
unit_test_file=str(pdd_files['test']),
|
|
502
|
-
error_file=str(error_file_path),
|
|
503
|
-
output_test=str(pdd_files['test']),
|
|
504
|
-
output_code=str(pdd_files['code']),
|
|
505
|
-
output_results=f"{basename}_fix_results.log",
|
|
506
|
-
loop=False,
|
|
507
|
-
verification_program=None,
|
|
508
|
-
max_attempts=max_attempts,
|
|
509
|
-
budget=budget - current_cost_ref[0],
|
|
510
|
-
auto_submit=False
|
|
511
|
-
)
|
|
512
|
-
elif operation == 'update':
|
|
513
|
-
result = update_main(
|
|
514
|
-
ctx,
|
|
515
|
-
input_prompt_file=str(pdd_files['prompt']),
|
|
516
|
-
modified_code_file=str(pdd_files['code']),
|
|
517
|
-
input_code_file=None,
|
|
518
|
-
output=str(pdd_files['prompt']),
|
|
519
|
-
git=True
|
|
520
|
-
)
|
|
521
|
-
else:
|
|
522
|
-
errors.append(f"Unknown operation '{operation}' requested.")
|
|
523
|
-
result = {'success': False, 'cost': 0.0}
|
|
1483
|
+
errors.append(f"Exception during '{operation}': {e}")
|
|
1484
|
+
success = False
|
|
524
1485
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
success
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
except Exception as e:
|
|
544
|
-
errors.append(f"Exception during '{operation}': {e}")
|
|
545
|
-
success = False
|
|
1486
|
+
# Log update
|
|
1487
|
+
duration = time.time() - op_start_time
|
|
1488
|
+
actual_cost = 0.0
|
|
1489
|
+
model_name = "unknown"
|
|
1490
|
+
if success:
|
|
1491
|
+
if isinstance(result, dict):
|
|
1492
|
+
actual_cost = result.get('cost', 0.0)
|
|
1493
|
+
model_name = result.get('model', 'unknown')
|
|
1494
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
1495
|
+
actual_cost = result[-2] if len(result) >= 2 else 0.0
|
|
1496
|
+
model_name = result[-1] if len(result) >= 1 else 'unknown'
|
|
1497
|
+
last_model_name = str(model_name)
|
|
1498
|
+
operations_completed.append(operation)
|
|
1499
|
+
_save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
|
|
1500
|
+
|
|
1501
|
+
update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
|
|
1502
|
+
append_sync_log(basename, language, log_entry)
|
|
546
1503
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
1504
|
+
# Post-operation checks (simplified)
|
|
1505
|
+
if success and operation == 'crash':
|
|
1506
|
+
# Re-run example to verify crash fix worked
|
|
1507
|
+
try:
|
|
1508
|
+
# Use clean env without TUI-specific vars
|
|
1509
|
+
clean_env = os.environ.copy()
|
|
1510
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1511
|
+
clean_env.pop(var, None)
|
|
1512
|
+
# Get language-appropriate run command
|
|
1513
|
+
example_path = str(pdd_files['example'])
|
|
1514
|
+
run_cmd = get_run_command_for_file(example_path)
|
|
1515
|
+
if run_cmd:
|
|
1516
|
+
cmd_parts = run_cmd.split()
|
|
1517
|
+
else:
|
|
1518
|
+
cmd_parts = ['python', example_path]
|
|
1519
|
+
# Use error-detection runner that handles server-style examples
|
|
1520
|
+
returncode, stdout, stderr = _run_example_with_error_detection(
|
|
1521
|
+
cmd_parts,
|
|
1522
|
+
env=clean_env,
|
|
1523
|
+
cwd=str(pdd_files['example'].parent),
|
|
1524
|
+
timeout=60
|
|
1525
|
+
)
|
|
1526
|
+
# Include test_hash for staleness detection
|
|
1527
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1528
|
+
report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
|
|
1529
|
+
save_run_report(asdict(report), basename, language)
|
|
1530
|
+
except Exception as e:
|
|
1531
|
+
# Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
|
|
1532
|
+
error_msg = f"Post-crash verification failed: {e}"
|
|
1533
|
+
errors.append(error_msg)
|
|
1534
|
+
log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
|
|
560
1535
|
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
if test_file.exists():
|
|
1536
|
+
if success and operation == 'fix':
|
|
1537
|
+
# Re-run tests to update run_report after successful fix
|
|
1538
|
+
# This prevents infinite loop by updating the state machine
|
|
1539
|
+
if pdd_files['test'].exists():
|
|
566
1540
|
_execute_tests_and_create_run_report(
|
|
567
|
-
|
|
1541
|
+
pdd_files['test'],
|
|
1542
|
+
basename,
|
|
1543
|
+
language,
|
|
1544
|
+
target_coverage,
|
|
1545
|
+
code_file=pdd_files.get("code"),
|
|
1546
|
+
atomic_state=atomic_state,
|
|
1547
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
568
1548
|
)
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
errors.append(f"Operation '{operation}' failed.")
|
|
574
|
-
break
|
|
1549
|
+
|
|
1550
|
+
if not success:
|
|
1551
|
+
errors.append(f"Operation '{operation}' failed.")
|
|
1552
|
+
break
|
|
575
1553
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
1554
|
+
except BaseException as e:
|
|
1555
|
+
errors.append(f"An unexpected error occurred in the orchestrator: {type(e).__name__}: {e}")
|
|
1556
|
+
# Log the full traceback for debugging
|
|
1557
|
+
import traceback
|
|
1558
|
+
traceback.print_exc()
|
|
1559
|
+
finally:
|
|
1560
|
+
try:
|
|
1561
|
+
log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
|
|
1562
|
+
except: pass
|
|
1563
|
+
|
|
1564
|
+
# Return result dict
|
|
1565
|
+
return {
|
|
1566
|
+
'success': not errors,
|
|
1567
|
+
'operations_completed': operations_completed,
|
|
1568
|
+
'skipped_operations': skipped_operations,
|
|
1569
|
+
'total_cost': current_cost_ref[0],
|
|
1570
|
+
'total_time': time.time() - start_time,
|
|
1571
|
+
'final_state': {p: {'exists': f.exists(), 'path': str(f)} for p, f in pdd_files.items() if p != 'test_files'},
|
|
1572
|
+
'errors': errors,
|
|
1573
|
+
'error': "; ".join(errors) if errors else None, # Add this line
|
|
1574
|
+
'model_name': last_model_name,
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
# Instantiate and run Textual App
|
|
1578
|
+
app = SyncApp(
|
|
1579
|
+
basename=basename,
|
|
1580
|
+
budget=budget,
|
|
1581
|
+
worker_func=sync_worker_logic,
|
|
1582
|
+
function_name_ref=current_function_name_ref,
|
|
1583
|
+
cost_ref=current_cost_ref,
|
|
1584
|
+
prompt_path_ref=prompt_path_ref,
|
|
1585
|
+
code_path_ref=code_path_ref,
|
|
1586
|
+
example_path_ref=example_path_ref,
|
|
1587
|
+
tests_path_ref=tests_path_ref,
|
|
1588
|
+
prompt_color_ref=prompt_box_color_ref,
|
|
1589
|
+
code_color_ref=code_box_color_ref,
|
|
1590
|
+
example_color_ref=example_box_color_ref,
|
|
1591
|
+
tests_color_ref=tests_box_color_ref,
|
|
1592
|
+
stop_event=stop_event,
|
|
1593
|
+
progress_callback_ref=progress_callback_ref
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
# Store app reference so worker can access request_confirmation
|
|
1597
|
+
app_ref[0] = app
|
|
1598
|
+
|
|
1599
|
+
result = app.run()
|
|
1600
|
+
|
|
1601
|
+
# Show exit animation if not quiet
|
|
1602
|
+
if not quiet:
|
|
1603
|
+
from .sync_tui import show_exit_animation
|
|
1604
|
+
show_exit_animation()
|
|
1605
|
+
|
|
1606
|
+
# Check for worker exception that might have caused a crash
|
|
1607
|
+
if app.worker_exception:
|
|
1608
|
+
print(f"\n[Error] Worker thread crashed with exception: {app.worker_exception}", file=sys.stderr)
|
|
585
1609
|
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
1610
|
+
if hasattr(app, 'captured_logs') and app.captured_logs:
|
|
1611
|
+
print("\n[Captured Logs (last 20 lines)]", file=sys.stderr)
|
|
1612
|
+
for line in app.captured_logs[-20:]: # Print last 20 lines
|
|
1613
|
+
print(f" {line}", file=sys.stderr)
|
|
1614
|
+
|
|
1615
|
+
import traceback
|
|
1616
|
+
# Use trace module to print the stored exception's traceback if available
|
|
1617
|
+
if hasattr(app.worker_exception, '__traceback__'):
|
|
1618
|
+
traceback.print_exception(type(app.worker_exception), app.worker_exception, app.worker_exception.__traceback__, file=sys.stderr)
|
|
1619
|
+
|
|
1620
|
+
if result is None:
|
|
1621
|
+
return {
|
|
1622
|
+
"success": False,
|
|
1623
|
+
"total_cost": current_cost_ref[0],
|
|
1624
|
+
"model_name": "",
|
|
1625
|
+
"error": "Sync process interrupted or returned no result.",
|
|
1626
|
+
"operations_completed": [],
|
|
1627
|
+
"errors": ["App exited without result"]
|
|
1628
|
+
}
|
|
591
1629
|
|
|
592
|
-
return
|
|
593
|
-
'success': not errors,
|
|
594
|
-
'operations_completed': operations_completed,
|
|
595
|
-
'skipped_operations': skipped_operations,
|
|
596
|
-
'total_cost': current_cost_ref[0],
|
|
597
|
-
'total_time': total_time,
|
|
598
|
-
'final_state': final_state,
|
|
599
|
-
'errors': errors,
|
|
600
|
-
}
|
|
1630
|
+
return result
|
|
601
1631
|
|
|
602
1632
|
if __name__ == '__main__':
|
|
603
|
-
# Example usage
|
|
604
|
-
# This simulates running `pdd sync my_calculator` from the command line.
|
|
605
|
-
|
|
606
|
-
print("--- Running Basic Sync Orchestration Example ---")
|
|
607
|
-
|
|
608
|
-
# Setup a dummy project structure
|
|
1633
|
+
# Example usage
|
|
609
1634
|
Path("./prompts").mkdir(exist_ok=True)
|
|
610
1635
|
Path("./src").mkdir(exist_ok=True)
|
|
611
1636
|
Path("./examples").mkdir(exist_ok=True)
|
|
612
1637
|
Path("./tests").mkdir(exist_ok=True)
|
|
613
1638
|
Path("./prompts/my_calculator_python.prompt").write_text("Create a calculator.")
|
|
614
|
-
|
|
615
|
-
# Ensure PDD meta directory exists for logs and locks
|
|
616
1639
|
PDD_DIR.mkdir(exist_ok=True)
|
|
617
1640
|
META_DIR.mkdir(exist_ok=True)
|
|
618
|
-
|
|
619
|
-
result = sync_orchestration(
|
|
620
|
-
basename="my_calculator",
|
|
621
|
-
language="python",
|
|
622
|
-
quiet=True # Suppress mock command output for cleaner example run
|
|
623
|
-
)
|
|
624
|
-
|
|
625
|
-
print("\n--- Sync Orchestration Finished ---")
|
|
1641
|
+
result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
|
|
626
1642
|
print(json.dumps(result, indent=2))
|
|
627
|
-
|
|
628
|
-
if result['success']:
|
|
629
|
-
print("\n✅ Sync completed successfully.")
|
|
630
|
-
else:
|
|
631
|
-
print(f"\n❌ Sync failed. Errors: {result['errors']}")
|
|
632
|
-
|
|
633
|
-
print("\n--- Running Sync Log Example ---")
|
|
634
|
-
# This will now show the log from the run we just completed.
|
|
635
|
-
log_result = sync_orchestration(
|
|
636
|
-
basename="my_calculator",
|
|
637
|
-
language="python",
|
|
638
|
-
log=True
|
|
639
|
-
)
|