pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +73 -21
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +258 -82
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +330 -76
- pdd/fix_error_loop.py +207 -61
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +306 -272
- pdd/fix_verification_main.py +28 -9
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +9 -2
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1269 -103
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +100 -905
- pdd/prompts/detect_change_LLM.prompt +122 -20
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +228 -108
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +49 -6
- pdd/sync_determine_operation.py +543 -98
- pdd/sync_main.py +81 -31
- pdd/sync_orchestration.py +1334 -751
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py
CHANGED
|
@@ -12,21 +12,32 @@ import subprocess
|
|
|
12
12
|
import re
|
|
13
13
|
import os
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Dict, Any, Optional, List
|
|
16
|
-
from dataclasses import asdict
|
|
15
|
+
from typing import Dict, Any, Optional, List, Callable
|
|
16
|
+
from dataclasses import asdict, dataclass, field
|
|
17
|
+
import tempfile
|
|
18
|
+
import sys
|
|
17
19
|
|
|
18
20
|
import click
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
# --- Constants ---
|
|
24
|
+
MAX_CONSECUTIVE_TESTS = 3 # Allow up to 3 consecutive test attempts
|
|
25
|
+
MAX_TEST_EXTEND_ATTEMPTS = 2 # Allow up to 2 attempts to extend tests for coverage
|
|
26
|
+
MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #157 fix)
|
|
19
27
|
|
|
20
28
|
# --- Real PDD Component Imports ---
|
|
21
|
-
from .
|
|
29
|
+
from .sync_tui import SyncApp
|
|
22
30
|
from .sync_determine_operation import (
|
|
23
31
|
sync_determine_operation,
|
|
24
32
|
get_pdd_file_paths,
|
|
25
33
|
RunReport,
|
|
34
|
+
SyncDecision,
|
|
26
35
|
PDD_DIR,
|
|
27
36
|
META_DIR,
|
|
28
37
|
SyncLock,
|
|
29
38
|
read_run_report,
|
|
39
|
+
calculate_sha256,
|
|
40
|
+
calculate_current_hashes,
|
|
30
41
|
)
|
|
31
42
|
from .auto_deps_main import auto_deps_main
|
|
32
43
|
from .code_generator_main import code_generator_main
|
|
@@ -37,6 +48,104 @@ from .cmd_test_main import cmd_test_main
|
|
|
37
48
|
from .fix_main import fix_main
|
|
38
49
|
from .update_main import update_main
|
|
39
50
|
from .python_env_detector import detect_host_python_executable
|
|
51
|
+
from .get_run_command import get_run_command_for_file
|
|
52
|
+
from .pytest_output import extract_failing_files_from_output
|
|
53
|
+
from . import DEFAULT_STRENGTH
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# --- Atomic State Update (Issue #159 Fix) ---
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class PendingStateUpdate:
|
|
60
|
+
"""Holds pending state updates for atomic commit."""
|
|
61
|
+
run_report: Optional[Dict[str, Any]] = None
|
|
62
|
+
fingerprint: Optional[Dict[str, Any]] = None
|
|
63
|
+
run_report_path: Optional[Path] = None
|
|
64
|
+
fingerprint_path: Optional[Path] = None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AtomicStateUpdate:
|
|
68
|
+
"""
|
|
69
|
+
Context manager for atomic state updates.
|
|
70
|
+
|
|
71
|
+
Ensures run_report and fingerprint are both written or neither is written.
|
|
72
|
+
This fixes Issue #159 where non-atomic writes caused state desynchronization.
|
|
73
|
+
|
|
74
|
+
Usage:
|
|
75
|
+
with AtomicStateUpdate(basename, language) as state:
|
|
76
|
+
state.set_run_report(report_dict, report_path)
|
|
77
|
+
state.set_fingerprint(fingerprint_dict, fp_path)
|
|
78
|
+
# On successful exit, both files are written atomically
|
|
79
|
+
# On exception, neither file is written (rollback)
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, basename: str, language: str):
|
|
83
|
+
self.basename = basename
|
|
84
|
+
self.language = language
|
|
85
|
+
self.pending = PendingStateUpdate()
|
|
86
|
+
self._temp_files: List[str] = []
|
|
87
|
+
|
|
88
|
+
def __enter__(self):
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
92
|
+
if exc_type is None:
|
|
93
|
+
self._commit()
|
|
94
|
+
else:
|
|
95
|
+
self._rollback()
|
|
96
|
+
return False # Don't suppress exceptions
|
|
97
|
+
|
|
98
|
+
def set_run_report(self, report: Dict[str, Any], path: Path):
|
|
99
|
+
"""Buffer a run report for atomic write."""
|
|
100
|
+
self.pending.run_report = report
|
|
101
|
+
self.pending.run_report_path = path
|
|
102
|
+
|
|
103
|
+
def set_fingerprint(self, fingerprint: Dict[str, Any], path: Path):
|
|
104
|
+
"""Buffer a fingerprint for atomic write."""
|
|
105
|
+
self.pending.fingerprint = fingerprint
|
|
106
|
+
self.pending.fingerprint_path = path
|
|
107
|
+
|
|
108
|
+
def _atomic_write(self, data: Dict[str, Any], target_path: Path) -> None:
|
|
109
|
+
"""Write data to file atomically using temp file + rename pattern."""
|
|
110
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
|
|
112
|
+
# Write to temp file in same directory (required for atomic rename)
|
|
113
|
+
fd, temp_path = tempfile.mkstemp(
|
|
114
|
+
dir=target_path.parent,
|
|
115
|
+
prefix=f".{target_path.stem}_",
|
|
116
|
+
suffix=".tmp"
|
|
117
|
+
)
|
|
118
|
+
self._temp_files.append(temp_path)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
with os.fdopen(fd, 'w') as f:
|
|
122
|
+
json.dump(data, f, indent=2, default=str)
|
|
123
|
+
|
|
124
|
+
# Atomic rename - guaranteed atomic on POSIX systems
|
|
125
|
+
os.replace(temp_path, target_path)
|
|
126
|
+
self._temp_files.remove(temp_path) # Successfully moved, stop tracking
|
|
127
|
+
except Exception:
|
|
128
|
+
# Leave temp file for rollback to clean up
|
|
129
|
+
raise
|
|
130
|
+
|
|
131
|
+
def _commit(self):
|
|
132
|
+
"""Commit all pending state updates atomically."""
|
|
133
|
+
# Write fingerprint first (checkpoint), then run_report
|
|
134
|
+
if self.pending.fingerprint and self.pending.fingerprint_path:
|
|
135
|
+
self._atomic_write(self.pending.fingerprint, self.pending.fingerprint_path)
|
|
136
|
+
if self.pending.run_report and self.pending.run_report_path:
|
|
137
|
+
self._atomic_write(self.pending.run_report, self.pending.run_report_path)
|
|
138
|
+
|
|
139
|
+
def _rollback(self):
|
|
140
|
+
"""Clean up any temp files without committing changes."""
|
|
141
|
+
for temp_path in self._temp_files:
|
|
142
|
+
try:
|
|
143
|
+
if os.path.exists(temp_path):
|
|
144
|
+
os.unlink(temp_path)
|
|
145
|
+
except OSError:
|
|
146
|
+
pass # Best effort cleanup
|
|
147
|
+
self._temp_files.clear()
|
|
148
|
+
|
|
40
149
|
|
|
41
150
|
# --- Mock Helper Functions ---
|
|
42
151
|
|
|
@@ -98,20 +207,44 @@ def log_sync_event(basename: str, language: str, event: str, details: Dict[str,
|
|
|
98
207
|
}
|
|
99
208
|
append_sync_log(basename, language, entry)
|
|
100
209
|
|
|
101
|
-
def save_run_report(report: Dict[str, Any], basename: str, language: str
|
|
102
|
-
|
|
210
|
+
def save_run_report(report: Dict[str, Any], basename: str, language: str,
|
|
211
|
+
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
212
|
+
"""Save a run report to the metadata directory.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
report: The run report dictionary to save.
|
|
216
|
+
basename: The module basename.
|
|
217
|
+
language: The programming language.
|
|
218
|
+
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
219
|
+
"""
|
|
103
220
|
report_file = META_DIR / f"{basename}_{language}_run.json"
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
221
|
+
if atomic_state:
|
|
222
|
+
# Buffer for atomic write
|
|
223
|
+
atomic_state.set_run_report(report, report_file)
|
|
224
|
+
else:
|
|
225
|
+
# Legacy direct write
|
|
226
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
227
|
+
with open(report_file, 'w') as f:
|
|
228
|
+
json.dump(report, f, indent=2, default=str)
|
|
229
|
+
|
|
230
|
+
def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
231
|
+
paths: Dict[str, Path], cost: float, model: str,
|
|
232
|
+
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
233
|
+
"""Save fingerprint state after successful operation.
|
|
107
234
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
235
|
+
Args:
|
|
236
|
+
basename: The module basename.
|
|
237
|
+
language: The programming language.
|
|
238
|
+
operation: The operation that was performed.
|
|
239
|
+
paths: Dictionary of PDD file paths.
|
|
240
|
+
cost: The cost of the operation.
|
|
241
|
+
model: The model used.
|
|
242
|
+
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
243
|
+
"""
|
|
111
244
|
from datetime import datetime, timezone
|
|
112
245
|
from .sync_determine_operation import calculate_current_hashes, Fingerprint
|
|
113
246
|
from . import __version__
|
|
114
|
-
|
|
247
|
+
|
|
115
248
|
current_hashes = calculate_current_hashes(paths)
|
|
116
249
|
fingerprint = Fingerprint(
|
|
117
250
|
pdd_version=__version__,
|
|
@@ -120,103 +253,544 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
|
120
253
|
prompt_hash=current_hashes.get('prompt_hash'),
|
|
121
254
|
code_hash=current_hashes.get('code_hash'),
|
|
122
255
|
example_hash=current_hashes.get('example_hash'),
|
|
123
|
-
test_hash=current_hashes.get('test_hash')
|
|
256
|
+
test_hash=current_hashes.get('test_hash'),
|
|
257
|
+
test_files=current_hashes.get('test_files'), # Bug #156
|
|
124
258
|
)
|
|
125
|
-
|
|
126
|
-
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
259
|
+
|
|
127
260
|
fingerprint_file = META_DIR / f"{basename}_{language}.json"
|
|
128
|
-
|
|
129
|
-
|
|
261
|
+
if atomic_state:
|
|
262
|
+
# Buffer for atomic write
|
|
263
|
+
atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
|
|
264
|
+
else:
|
|
265
|
+
# Legacy direct write
|
|
266
|
+
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
with open(fingerprint_file, 'w') as f:
|
|
268
|
+
json.dump(asdict(fingerprint), f, indent=2, default=str)
|
|
130
269
|
|
|
131
|
-
|
|
270
|
+
def _python_cov_target_for_code_file(code_file: Path) -> str:
|
|
271
|
+
"""Return a `pytest-cov` `--cov` target for a Python code file.
|
|
272
|
+
|
|
273
|
+
- If the file is inside a Python package (directories with `__init__.py`),
|
|
274
|
+
returns a dotted module path (e.g., `pdd.sync_orchestration`).
|
|
275
|
+
- Otherwise falls back to the filename stem (e.g., `admin_get_users`).
|
|
276
|
+
"""
|
|
277
|
+
if code_file.suffix != ".py":
|
|
278
|
+
return code_file.stem
|
|
279
|
+
|
|
280
|
+
package_dir: Optional[Path] = None
|
|
281
|
+
current = code_file.parent
|
|
282
|
+
while (current / "__init__.py").exists():
|
|
283
|
+
package_dir = current
|
|
284
|
+
parent = current.parent
|
|
285
|
+
if parent == current:
|
|
286
|
+
break
|
|
287
|
+
current = parent
|
|
288
|
+
|
|
289
|
+
if package_dir:
|
|
290
|
+
relative_module = code_file.relative_to(package_dir.parent).with_suffix("")
|
|
291
|
+
return str(relative_module).replace(os.sep, ".")
|
|
292
|
+
|
|
293
|
+
return code_file.stem
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _python_cov_target_for_test_and_code(test_file: Path, code_file: Path, fallback: str) -> str:
|
|
297
|
+
"""Choose the best `--cov` target based on how tests import the code.
|
|
298
|
+
|
|
299
|
+
In some repos, tests add a directory to `sys.path` and import modules by their
|
|
300
|
+
filename stem (e.g., `from admin_get_users import ...`) even when the code
|
|
301
|
+
also lives under a package (e.g., `backend.functions.admin_get_users`).
|
|
302
|
+
|
|
303
|
+
Heuristic:
|
|
304
|
+
- Prefer the code file stem when the test file imports it directly.
|
|
305
|
+
- Otherwise, prefer the dotted module path derived from the package layout.
|
|
306
|
+
- Fall back to the provided fallback (usually the basename).
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
def _imports_module(source: str, module: str) -> bool:
|
|
310
|
+
escaped = re.escape(module)
|
|
311
|
+
return bool(
|
|
312
|
+
re.search(rf"^\s*import\s+{escaped}\b", source, re.MULTILINE)
|
|
313
|
+
or re.search(rf"^\s*from\s+{escaped}\b", source, re.MULTILINE)
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
stem = code_file.stem
|
|
317
|
+
dotted = _python_cov_target_for_code_file(code_file)
|
|
132
318
|
|
|
133
|
-
def _execute_tests_and_create_run_report(test_file: Path, basename: str, language: str, target_coverage: float = 90.0) -> RunReport:
|
|
134
|
-
"""Execute tests and create a RunReport with actual results."""
|
|
135
|
-
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
136
|
-
|
|
137
319
|
try:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
320
|
+
test_source = test_file.read_text(encoding="utf-8", errors="ignore")
|
|
321
|
+
except Exception:
|
|
322
|
+
test_source = ""
|
|
323
|
+
|
|
324
|
+
if stem and _imports_module(test_source, stem):
|
|
325
|
+
return stem
|
|
326
|
+
|
|
327
|
+
if dotted and dotted != stem:
|
|
328
|
+
if _imports_module(test_source, dotted):
|
|
329
|
+
return dotted
|
|
330
|
+
|
|
331
|
+
if "." in dotted:
|
|
332
|
+
parent = dotted.rsplit(".", 1)[0]
|
|
333
|
+
# e.g. `from backend.functions import admin_get_users`
|
|
334
|
+
if re.search(
|
|
335
|
+
rf"^\s*from\s+{re.escape(parent)}\s+import\s+.*\b{re.escape(stem)}\b",
|
|
336
|
+
test_source,
|
|
337
|
+
re.MULTILINE,
|
|
338
|
+
):
|
|
339
|
+
return dotted
|
|
340
|
+
# e.g. `import backend.functions.admin_get_users`
|
|
341
|
+
if re.search(
|
|
342
|
+
rf"^\s*import\s+{re.escape(parent)}\.{re.escape(stem)}\b",
|
|
343
|
+
test_source,
|
|
344
|
+
re.MULTILINE,
|
|
345
|
+
):
|
|
346
|
+
return dotted
|
|
347
|
+
|
|
348
|
+
return dotted
|
|
349
|
+
|
|
350
|
+
return stem or fallback
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _parse_test_output(output: str, language: str) -> tuple[int, int, float]:
|
|
354
|
+
"""
|
|
355
|
+
Parse test output to extract passed/failed/coverage.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
output: Combined stdout/stderr from test runner
|
|
359
|
+
language: Language name (e.g., 'python', 'typescript', 'go')
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
(tests_passed, tests_failed, coverage)
|
|
363
|
+
"""
|
|
364
|
+
tests_passed = 0
|
|
365
|
+
tests_failed = 0
|
|
366
|
+
coverage = 0.0
|
|
367
|
+
|
|
368
|
+
lang = language.lower()
|
|
369
|
+
|
|
370
|
+
# Python (pytest)
|
|
371
|
+
if lang == 'python':
|
|
372
|
+
if 'passed' in output:
|
|
373
|
+
passed_match = re.search(r'(\d+) passed', output)
|
|
176
374
|
if passed_match:
|
|
177
375
|
tests_passed = int(passed_match.group(1))
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
failed_match = re.search(r'(\d+) failed', stdout)
|
|
376
|
+
if 'failed' in output:
|
|
377
|
+
failed_match = re.search(r'(\d+) failed', output)
|
|
181
378
|
if failed_match:
|
|
182
379
|
tests_failed = int(failed_match.group(1))
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
380
|
+
if 'error' in output:
|
|
381
|
+
error_match = re.search(r'(\d+) error', output)
|
|
382
|
+
if error_match:
|
|
383
|
+
tests_failed += int(error_match.group(1))
|
|
384
|
+
coverage_match = re.search(r'TOTAL.*?(\d+)%', output)
|
|
186
385
|
if not coverage_match:
|
|
187
|
-
|
|
188
|
-
coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
|
|
386
|
+
coverage_match = re.search(r'(\d+)%\s*$', output, re.MULTILINE)
|
|
189
387
|
if not coverage_match:
|
|
190
|
-
|
|
191
|
-
coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
|
|
192
|
-
|
|
388
|
+
coverage_match = re.search(r'(\d+(?:\.\d+)?)%', output)
|
|
193
389
|
if coverage_match:
|
|
194
390
|
coverage = float(coverage_match.group(1))
|
|
195
|
-
|
|
196
|
-
|
|
391
|
+
|
|
392
|
+
# Jest/Vitest (JavaScript/TypeScript)
|
|
393
|
+
elif lang in ('javascript', 'typescript', 'typescriptreact'):
|
|
394
|
+
# "Tests: X passed, Y failed" or "Tests: X passed, Y failed, Z total"
|
|
395
|
+
match = re.search(r'Tests:\s*(\d+)\s+passed', output)
|
|
396
|
+
if match:
|
|
397
|
+
tests_passed = int(match.group(1))
|
|
398
|
+
match = re.search(r'Tests:.*?(\d+)\s+failed', output)
|
|
399
|
+
if match:
|
|
400
|
+
tests_failed = int(match.group(1))
|
|
401
|
+
|
|
402
|
+
# Alternative Mocha-style: "X passing, Y failing"
|
|
403
|
+
if tests_passed == 0:
|
|
404
|
+
pass_match = re.search(r'(\d+)\s+pass(?:ing)?', output, re.I)
|
|
405
|
+
if pass_match:
|
|
406
|
+
tests_passed = int(pass_match.group(1))
|
|
407
|
+
if tests_failed == 0:
|
|
408
|
+
fail_match = re.search(r'(\d+)\s+fail(?:ing)?', output, re.I)
|
|
409
|
+
if fail_match:
|
|
410
|
+
tests_failed = int(fail_match.group(1))
|
|
411
|
+
|
|
412
|
+
# Coverage: "All files | XX.XX |"
|
|
413
|
+
cov_match = re.search(r'All files[^|]*\|\s*(\d+\.?\d*)', output)
|
|
414
|
+
if cov_match:
|
|
415
|
+
coverage = float(cov_match.group(1))
|
|
416
|
+
|
|
417
|
+
# Go
|
|
418
|
+
elif lang == 'go':
|
|
419
|
+
# Count PASS and FAIL occurrences for individual tests
|
|
420
|
+
tests_passed = len(re.findall(r'--- PASS:', output))
|
|
421
|
+
tests_failed = len(re.findall(r'--- FAIL:', output))
|
|
422
|
+
|
|
423
|
+
# Fallback: check for overall PASS/FAIL
|
|
424
|
+
if tests_passed == 0 and 'PASS' in output and 'FAIL' not in output:
|
|
425
|
+
tests_passed = 1
|
|
426
|
+
if tests_failed == 0 and 'FAIL' in output:
|
|
427
|
+
tests_failed = 1
|
|
428
|
+
|
|
429
|
+
# coverage: XX.X% of statements
|
|
430
|
+
cov_match = re.search(r'coverage:\s*(\d+\.?\d*)%', output)
|
|
431
|
+
if cov_match:
|
|
432
|
+
coverage = float(cov_match.group(1))
|
|
433
|
+
|
|
434
|
+
# Rust (cargo test)
|
|
435
|
+
elif lang == 'rust':
|
|
436
|
+
# "test result: ok. X passed; Y failed;"
|
|
437
|
+
match = re.search(r'(\d+)\s+passed', output)
|
|
438
|
+
if match:
|
|
439
|
+
tests_passed = int(match.group(1))
|
|
440
|
+
match = re.search(r'(\d+)\s+failed', output)
|
|
441
|
+
if match:
|
|
442
|
+
tests_failed = int(match.group(1))
|
|
443
|
+
|
|
444
|
+
# Fallback: try generic patterns
|
|
445
|
+
else:
|
|
446
|
+
pass_match = re.search(r'(\d+)\s+(?:tests?\s+)?pass(?:ed)?', output, re.I)
|
|
447
|
+
fail_match = re.search(r'(\d+)\s+(?:tests?\s+)?fail(?:ed)?', output, re.I)
|
|
448
|
+
if pass_match:
|
|
449
|
+
tests_passed = int(pass_match.group(1))
|
|
450
|
+
if fail_match:
|
|
451
|
+
tests_failed = int(fail_match.group(1))
|
|
452
|
+
|
|
453
|
+
return tests_passed, tests_failed, coverage
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _detect_example_errors(output: str) -> tuple[bool, str]:
|
|
457
|
+
"""
|
|
458
|
+
Detect if example output contains error indicators.
|
|
459
|
+
|
|
460
|
+
Only detects true crashes/errors:
|
|
461
|
+
- Python tracebacks (catches ALL unhandled exceptions)
|
|
462
|
+
- ERROR level log messages
|
|
463
|
+
|
|
464
|
+
Intentionally does NOT detect:
|
|
465
|
+
- HTTP status codes (examples may test error responses)
|
|
466
|
+
- Individual exception type names (causes false positives, redundant with traceback)
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
(has_errors, error_summary)
|
|
470
|
+
"""
|
|
471
|
+
error_patterns = [
|
|
472
|
+
(r'Traceback \(most recent call last\):', 'Python traceback'),
|
|
473
|
+
(r' - ERROR - ', 'Error log message'), # Python logging format
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
errors_found = []
|
|
477
|
+
for pattern, description in error_patterns:
|
|
478
|
+
if re.search(pattern, output, re.MULTILINE):
|
|
479
|
+
errors_found.append(description)
|
|
480
|
+
|
|
481
|
+
if errors_found:
|
|
482
|
+
return True, '; '.join(errors_found)
|
|
483
|
+
return False, ''
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _try_auto_fix_import_error(
|
|
487
|
+
error_output: str,
|
|
488
|
+
code_file: Path,
|
|
489
|
+
example_file: Path,
|
|
490
|
+
) -> tuple[bool, str]:
|
|
491
|
+
"""
|
|
492
|
+
Try to automatically fix common import errors before calling expensive agentic fix.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
(fixed, message): Whether a fix was attempted and what was done.
|
|
496
|
+
"""
|
|
497
|
+
import re
|
|
498
|
+
|
|
499
|
+
# Check for ModuleNotFoundError or ImportError
|
|
500
|
+
module_not_found = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", error_output)
|
|
501
|
+
import_error = re.search(r"ImportError: cannot import name ['\"]([^'\"]+)['\"]", error_output)
|
|
502
|
+
|
|
503
|
+
if not module_not_found and not import_error:
|
|
504
|
+
return False, "No import error detected"
|
|
505
|
+
|
|
506
|
+
if module_not_found:
|
|
507
|
+
missing_module = module_not_found.group(1)
|
|
508
|
+
# Split by . to get the top-level package
|
|
509
|
+
top_level_package = missing_module.split('.')[0]
|
|
510
|
+
|
|
511
|
+
# Check if this is the module we're trying to import (local module)
|
|
512
|
+
code_module_name = code_file.stem # e.g., "data_validator" from "data_validator.py"
|
|
513
|
+
|
|
514
|
+
if top_level_package == code_module_name:
|
|
515
|
+
# It's trying to import our own generated code - fix the example's sys.path
|
|
516
|
+
# Read the example and fix the path manipulation
|
|
517
|
+
try:
|
|
518
|
+
example_content = example_file.read_text(encoding='utf-8')
|
|
519
|
+
code_dir = str(code_file.parent.resolve())
|
|
520
|
+
|
|
521
|
+
# Look for existing sys.path manipulation
|
|
522
|
+
if 'sys.path' in example_content:
|
|
523
|
+
# Try to fix the existing path manipulation
|
|
524
|
+
# Common pattern: module_path = os.path.abspath(os.path.join(...))
|
|
525
|
+
# Replace with correct path
|
|
526
|
+
fixed_content = re.sub(
|
|
527
|
+
r"module_path\s*=\s*os\.path\.abspath\([^)]+\)",
|
|
528
|
+
f"module_path = '{code_dir}'",
|
|
529
|
+
example_content
|
|
530
|
+
)
|
|
531
|
+
if fixed_content != example_content:
|
|
532
|
+
example_file.write_text(fixed_content, encoding='utf-8')
|
|
533
|
+
return True, f"Fixed sys.path to point to {code_dir}"
|
|
534
|
+
|
|
535
|
+
# If no existing sys.path, add one at the start after imports
|
|
536
|
+
lines = example_content.split('\n')
|
|
537
|
+
insert_pos = 0
|
|
538
|
+
for i, line in enumerate(lines):
|
|
539
|
+
if line.startswith('import ') or line.startswith('from '):
|
|
540
|
+
if 'sys' in line or 'os' in line:
|
|
541
|
+
insert_pos = i + 1
|
|
542
|
+
continue
|
|
543
|
+
if line.strip() and not line.startswith('#') and not line.startswith('import') and not line.startswith('from'):
|
|
544
|
+
insert_pos = i
|
|
545
|
+
break
|
|
546
|
+
|
|
547
|
+
path_fix = f"\n# Auto-added by pdd to fix import\nimport sys\nsys.path.insert(0, '{code_dir}')\n"
|
|
548
|
+
lines.insert(insert_pos, path_fix)
|
|
549
|
+
example_file.write_text('\n'.join(lines), encoding='utf-8')
|
|
550
|
+
return True, f"Added sys.path.insert(0, '{code_dir}') to example"
|
|
551
|
+
|
|
552
|
+
except Exception as e:
|
|
553
|
+
return False, f"Failed to fix import path: {e}"
|
|
554
|
+
|
|
555
|
+
else:
|
|
556
|
+
# It's an external package - try pip install
|
|
557
|
+
try:
|
|
558
|
+
result = subprocess.run(
|
|
559
|
+
[sys.executable, '-m', 'pip', 'install', top_level_package],
|
|
560
|
+
capture_output=True,
|
|
561
|
+
text=True,
|
|
562
|
+
timeout=120
|
|
563
|
+
)
|
|
564
|
+
if result.returncode == 0:
|
|
565
|
+
return True, f"Installed missing package: {top_level_package}"
|
|
566
|
+
else:
|
|
567
|
+
return False, f"Failed to install {top_level_package}: {result.stderr}"
|
|
568
|
+
except Exception as e:
|
|
569
|
+
return False, f"Failed to run pip install: {e}"
|
|
570
|
+
|
|
571
|
+
return False, "Import error detected but no auto-fix available"
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _run_example_with_error_detection(
|
|
575
|
+
cmd_parts: list[str],
|
|
576
|
+
env: dict,
|
|
577
|
+
cwd: str,
|
|
578
|
+
timeout: int = 60
|
|
579
|
+
) -> tuple[int, str, str]:
|
|
580
|
+
"""
|
|
581
|
+
Run example file, detecting errors from output.
|
|
582
|
+
|
|
583
|
+
For server-style examples that block, this runs until timeout
|
|
584
|
+
then analyzes output for errors. No errors = success.
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
(returncode, stdout, stderr)
|
|
588
|
+
- returncode: 0 if no errors detected, positive if errors found or process failed
|
|
589
|
+
"""
|
|
590
|
+
import threading
|
|
591
|
+
|
|
592
|
+
proc = subprocess.Popen(
|
|
593
|
+
cmd_parts,
|
|
594
|
+
stdout=subprocess.PIPE,
|
|
595
|
+
stderr=subprocess.PIPE,
|
|
596
|
+
stdin=subprocess.DEVNULL,
|
|
597
|
+
env=env,
|
|
598
|
+
cwd=cwd,
|
|
599
|
+
start_new_session=True,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
stdout_chunks = []
|
|
603
|
+
stderr_chunks = []
|
|
604
|
+
|
|
605
|
+
def read_pipe(pipe, chunks):
|
|
606
|
+
try:
|
|
607
|
+
for line in iter(pipe.readline, b''):
|
|
608
|
+
chunks.append(line)
|
|
609
|
+
except Exception:
|
|
610
|
+
pass
|
|
611
|
+
|
|
612
|
+
t_out = threading.Thread(target=read_pipe, args=(proc.stdout, stdout_chunks), daemon=True)
|
|
613
|
+
t_err = threading.Thread(target=read_pipe, args=(proc.stderr, stderr_chunks), daemon=True)
|
|
614
|
+
t_out.start()
|
|
615
|
+
t_err.start()
|
|
616
|
+
|
|
617
|
+
# Wait for process or timeout
|
|
618
|
+
try:
|
|
619
|
+
proc.wait(timeout=timeout)
|
|
620
|
+
except subprocess.TimeoutExpired:
|
|
621
|
+
proc.terminate()
|
|
622
|
+
try:
|
|
623
|
+
proc.wait(timeout=5)
|
|
624
|
+
except subprocess.TimeoutExpired:
|
|
625
|
+
proc.kill()
|
|
626
|
+
proc.wait()
|
|
627
|
+
|
|
628
|
+
t_out.join(timeout=2)
|
|
629
|
+
t_err.join(timeout=2)
|
|
630
|
+
|
|
631
|
+
stdout = b''.join(stdout_chunks).decode('utf-8', errors='replace')
|
|
632
|
+
stderr = b''.join(stderr_chunks).decode('utf-8', errors='replace')
|
|
633
|
+
combined = stdout + '\n' + stderr
|
|
634
|
+
|
|
635
|
+
# Check for errors in output
|
|
636
|
+
has_errors, error_summary = _detect_example_errors(combined)
|
|
637
|
+
|
|
638
|
+
# Determine result:
|
|
639
|
+
# - Errors in output → failure
|
|
640
|
+
# - Positive exit code (process failed normally, e.g., sys.exit(1)) → failure
|
|
641
|
+
# - Negative exit code (killed by signal, e.g., -9 for SIGKILL) → check output
|
|
642
|
+
# - Zero exit code → success
|
|
643
|
+
#
|
|
644
|
+
# IMPORTANT: When we kill the process after timeout, returncode is negative
|
|
645
|
+
# (the signal number). This is NOT a failure if output has no errors.
|
|
646
|
+
if has_errors:
|
|
647
|
+
return 1, stdout, stderr # Errors detected in output
|
|
648
|
+
elif proc.returncode is not None and proc.returncode > 0:
|
|
649
|
+
return proc.returncode, stdout, stderr # Process exited with error
|
|
650
|
+
else:
|
|
651
|
+
# Success cases:
|
|
652
|
+
# - returncode == 0 (clean exit)
|
|
653
|
+
# - returncode < 0 (killed by signal, but no errors in output)
|
|
654
|
+
# - returncode is None (shouldn't happen after wait, but safe fallback)
|
|
655
|
+
return 0, stdout, stderr
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def _execute_tests_and_create_run_report(
|
|
659
|
+
test_file: Path,
|
|
660
|
+
basename: str,
|
|
661
|
+
language: str,
|
|
662
|
+
target_coverage: float = 90.0,
|
|
663
|
+
*,
|
|
664
|
+
code_file: Optional[Path] = None,
|
|
665
|
+
atomic_state: Optional['AtomicStateUpdate'] = None,
|
|
666
|
+
test_files: Optional[List[Path]] = None, # Bug #156: Support multiple test files
|
|
667
|
+
) -> RunReport:
|
|
668
|
+
"""Execute tests and create a RunReport with actual results.
|
|
669
|
+
|
|
670
|
+
Now supports multiple languages by using get_test_command_for_file()
|
|
671
|
+
to determine the appropriate test runner.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
test_file: Primary test file (for backward compat)
|
|
675
|
+
test_files: Optional list of all test files to run (Bug #156)
|
|
676
|
+
"""
|
|
677
|
+
from .get_test_command import get_test_command_for_file
|
|
678
|
+
|
|
679
|
+
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
680
|
+
|
|
681
|
+
# Bug #156: Use test_files if provided, otherwise just the single test_file
|
|
682
|
+
all_test_files = test_files if test_files else [test_file]
|
|
683
|
+
|
|
684
|
+
# Calculate test file hash for staleness detection (primary file for backward compat)
|
|
685
|
+
test_hash = calculate_sha256(test_file) if test_file.exists() else None
|
|
686
|
+
|
|
687
|
+
# Bug #156: Calculate hashes for ALL test files
|
|
688
|
+
test_file_hashes = {
|
|
689
|
+
f.name: calculate_sha256(f)
|
|
690
|
+
for f in all_test_files
|
|
691
|
+
if f.exists()
|
|
692
|
+
} if all_test_files else None
|
|
693
|
+
|
|
694
|
+
# Use clean env without TUI-specific vars
|
|
695
|
+
clean_env = os.environ.copy()
|
|
696
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
697
|
+
clean_env.pop(var, None)
|
|
698
|
+
|
|
699
|
+
try:
|
|
700
|
+
lang_lower = language.lower()
|
|
701
|
+
|
|
702
|
+
# Python: use existing pytest logic with coverage
|
|
703
|
+
if lang_lower == "python":
|
|
704
|
+
module_name = test_file.name.replace('test_', '').replace('.py', '')
|
|
705
|
+
python_executable = detect_host_python_executable()
|
|
706
|
+
|
|
707
|
+
cov_target = None
|
|
708
|
+
if code_file is not None:
|
|
709
|
+
cov_target = _python_cov_target_for_test_and_code(test_file, code_file, basename or module_name)
|
|
710
|
+
else:
|
|
711
|
+
cov_target = basename or module_name
|
|
712
|
+
|
|
713
|
+
if not cov_target:
|
|
714
|
+
cov_target = basename or module_name
|
|
715
|
+
|
|
716
|
+
# Bug #156: Run pytest on ALL test files
|
|
717
|
+
pytest_args = [
|
|
718
|
+
python_executable, '-m', 'pytest',
|
|
719
|
+
] + [str(f) for f in all_test_files] + [
|
|
720
|
+
'-v',
|
|
721
|
+
'--tb=short',
|
|
722
|
+
f'--cov={cov_target}',
|
|
723
|
+
'--cov-report=term-missing'
|
|
724
|
+
]
|
|
725
|
+
result = subprocess.run(
|
|
726
|
+
pytest_args,
|
|
727
|
+
capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
exit_code = result.returncode
|
|
731
|
+
stdout = result.stdout + (result.stderr or '')
|
|
732
|
+
tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
|
|
733
|
+
|
|
734
|
+
else:
|
|
735
|
+
# Non-Python: use language-appropriate test command
|
|
736
|
+
test_cmd = get_test_command_for_file(str(test_file), language)
|
|
737
|
+
|
|
738
|
+
if test_cmd is None:
|
|
739
|
+
# No test command available - return report indicating this
|
|
740
|
+
report = RunReport(
|
|
741
|
+
timestamp=timestamp,
|
|
742
|
+
exit_code=127, # Command not found
|
|
743
|
+
tests_passed=0,
|
|
744
|
+
tests_failed=0,
|
|
745
|
+
coverage=0.0,
|
|
746
|
+
test_hash=test_hash,
|
|
747
|
+
test_files=test_file_hashes, # Bug #156
|
|
748
|
+
)
|
|
749
|
+
save_run_report(asdict(report), basename, language, atomic_state)
|
|
750
|
+
return report
|
|
751
|
+
|
|
752
|
+
# Run the test command
|
|
753
|
+
result = subprocess.run(
|
|
754
|
+
test_cmd,
|
|
755
|
+
shell=True,
|
|
756
|
+
capture_output=True,
|
|
757
|
+
text=True,
|
|
758
|
+
timeout=300,
|
|
759
|
+
env=clean_env,
|
|
760
|
+
cwd=str(test_file.parent),
|
|
761
|
+
stdin=subprocess.DEVNULL,
|
|
762
|
+
start_new_session=True
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
exit_code = result.returncode
|
|
766
|
+
stdout = (result.stdout or '') + '\n' + (result.stderr or '')
|
|
767
|
+
|
|
768
|
+
# Parse results based on language
|
|
769
|
+
tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
|
|
770
|
+
|
|
197
771
|
report = RunReport(
|
|
198
772
|
timestamp=timestamp,
|
|
199
773
|
exit_code=exit_code,
|
|
200
774
|
tests_passed=tests_passed,
|
|
201
775
|
tests_failed=tests_failed,
|
|
202
|
-
coverage=coverage
|
|
776
|
+
coverage=coverage,
|
|
777
|
+
test_hash=test_hash,
|
|
778
|
+
test_files=test_file_hashes, # Bug #156
|
|
203
779
|
)
|
|
204
|
-
|
|
780
|
+
|
|
205
781
|
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception) as e:
|
|
206
|
-
# If test execution fails, create a report indicating failure
|
|
207
782
|
report = RunReport(
|
|
208
783
|
timestamp=timestamp,
|
|
209
784
|
exit_code=1,
|
|
210
785
|
tests_passed=0,
|
|
211
786
|
tests_failed=1,
|
|
212
|
-
coverage=0.0
|
|
787
|
+
coverage=0.0,
|
|
788
|
+
test_hash=test_hash,
|
|
789
|
+
test_files=test_file_hashes, # Bug #156
|
|
213
790
|
)
|
|
214
|
-
|
|
215
|
-
# Save the run report
|
|
216
|
-
save_run_report(asdict(report), basename, language)
|
|
217
|
-
return report
|
|
218
791
|
|
|
219
|
-
|
|
792
|
+
save_run_report(asdict(report), basename, language, atomic_state)
|
|
793
|
+
return report
|
|
220
794
|
|
|
221
795
|
def _create_mock_context(**kwargs) -> click.Context:
|
|
222
796
|
"""Creates a mock Click context object to pass parameters to command functions."""
|
|
@@ -242,7 +816,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
242
816
|
for entry in log_entries:
|
|
243
817
|
timestamp = entry.get('timestamp', 'N/A')
|
|
244
818
|
|
|
245
|
-
# Handle special event entries
|
|
246
819
|
if 'event' in entry:
|
|
247
820
|
event = entry.get('event', 'N/A')
|
|
248
821
|
print(f"[{timestamp[:19]}] EVENT: {event}")
|
|
@@ -251,7 +824,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
251
824
|
print(f" Details: {details_str}")
|
|
252
825
|
continue
|
|
253
826
|
|
|
254
|
-
# Handle operation entries
|
|
255
827
|
operation = entry.get('operation', 'N/A')
|
|
256
828
|
reason = entry.get('reason', 'N/A')
|
|
257
829
|
success = entry.get('success')
|
|
@@ -260,7 +832,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
260
832
|
duration = entry.get('duration')
|
|
261
833
|
|
|
262
834
|
if verbose:
|
|
263
|
-
# Verbose format
|
|
264
835
|
print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
|
|
265
836
|
decision_type = entry.get('decision_type', 'N/A')
|
|
266
837
|
confidence = entry.get('confidence', 'N/A')
|
|
@@ -276,14 +847,12 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
276
847
|
print(f" Estimated Cost: ${estimated_cost:.2f}")
|
|
277
848
|
|
|
278
849
|
if 'details' in entry and entry['details']:
|
|
279
|
-
# Show details without budget_remaining to avoid clutter
|
|
280
850
|
details_copy = entry['details'].copy()
|
|
281
851
|
details_copy.pop('budget_remaining', None)
|
|
282
852
|
if details_copy:
|
|
283
853
|
details_str = json.dumps(details_copy, indent=2)
|
|
284
854
|
print(f" Details: {details_str}")
|
|
285
855
|
else:
|
|
286
|
-
# Normal format: [timestamp] operation | reason | status cost | duration
|
|
287
856
|
status_icon = "✓" if success else "✗" if success is False else "?"
|
|
288
857
|
|
|
289
858
|
cost_info = ""
|
|
@@ -318,45 +887,60 @@ def sync_orchestration(
|
|
|
318
887
|
budget: float = 10.0,
|
|
319
888
|
skip_verify: bool = False,
|
|
320
889
|
skip_tests: bool = False,
|
|
321
|
-
|
|
890
|
+
dry_run: bool = False,
|
|
322
891
|
force: bool = False,
|
|
323
|
-
strength: float =
|
|
892
|
+
strength: float = DEFAULT_STRENGTH,
|
|
324
893
|
temperature: float = 0.0,
|
|
325
|
-
time_param: float = 0.25,
|
|
894
|
+
time_param: float = 0.25,
|
|
326
895
|
verbose: bool = False,
|
|
327
896
|
quiet: bool = False,
|
|
328
897
|
output_cost: Optional[str] = None,
|
|
329
898
|
review_examples: bool = False,
|
|
330
899
|
local: bool = False,
|
|
331
900
|
context_config: Optional[Dict[str, str]] = None,
|
|
901
|
+
context_override: Optional[str] = None,
|
|
902
|
+
confirm_callback: Optional[Callable[[str, str], bool]] = None,
|
|
332
903
|
) -> Dict[str, Any]:
|
|
333
904
|
"""
|
|
334
905
|
Orchestrates the complete PDD sync workflow with parallel animation.
|
|
335
|
-
|
|
336
|
-
If log=True, displays the sync log instead of running sync operations.
|
|
337
|
-
The verbose flag controls the detail level of the log output.
|
|
338
|
-
|
|
339
|
-
Returns a dictionary summarizing the outcome of the sync process.
|
|
340
906
|
"""
|
|
341
|
-
|
|
907
|
+
# Import get_extension at function scope
|
|
908
|
+
from .sync_determine_operation import get_extension
|
|
909
|
+
|
|
910
|
+
if dry_run:
|
|
342
911
|
return _display_sync_log(basename, language, verbose)
|
|
343
912
|
|
|
344
913
|
# --- Initialize State and Paths ---
|
|
345
914
|
try:
|
|
346
|
-
pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
|
|
915
|
+
pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
916
|
+
except FileNotFoundError as e:
|
|
917
|
+
if "test_config.py" in str(e) or "tests/test_" in str(e):
|
|
918
|
+
pdd_files = {
|
|
919
|
+
'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
|
|
920
|
+
'code': Path(f"src/{basename}.{get_extension(language)}"),
|
|
921
|
+
'example': Path(f"context/{basename}_example.{get_extension(language)}"),
|
|
922
|
+
'test': Path(f"tests/test_{basename}.{get_extension(language)}")
|
|
923
|
+
}
|
|
924
|
+
if not quiet:
|
|
925
|
+
print(f"Note: Test file missing, continuing with sync workflow to generate it")
|
|
926
|
+
else:
|
|
927
|
+
print(f"Error constructing paths: {e}")
|
|
928
|
+
return {
|
|
929
|
+
"success": False,
|
|
930
|
+
"error": f"Failed to construct paths: {str(e)}",
|
|
931
|
+
"operations_completed": [],
|
|
932
|
+
"errors": [f"Path construction failed: {str(e)}"]
|
|
933
|
+
}
|
|
347
934
|
except Exception as e:
|
|
348
|
-
# Log the error and return early with failure status
|
|
349
935
|
print(f"Error constructing paths: {e}")
|
|
350
936
|
return {
|
|
351
937
|
"success": False,
|
|
352
|
-
"total_cost": 0.0,
|
|
353
|
-
"model_name": "",
|
|
354
938
|
"error": f"Failed to construct paths: {str(e)}",
|
|
355
939
|
"operations_completed": [],
|
|
356
940
|
"errors": [f"Path construction failed: {str(e)}"]
|
|
357
941
|
}
|
|
358
942
|
|
|
359
|
-
# Shared state for animation
|
|
943
|
+
# Shared state for animation (passed to App)
|
|
360
944
|
current_function_name_ref = ["initializing"]
|
|
361
945
|
stop_event = threading.Event()
|
|
362
946
|
current_cost_ref = [0.0]
|
|
@@ -364,696 +948,695 @@ def sync_orchestration(
|
|
|
364
948
|
code_path_ref = [str(pdd_files.get('code', 'N/A'))]
|
|
365
949
|
example_path_ref = [str(pdd_files.get('example', 'N/A'))]
|
|
366
950
|
tests_path_ref = [str(pdd_files.get('test', 'N/A'))]
|
|
367
|
-
prompt_box_color_ref
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
operations_completed: List[str] = []
|
|
372
|
-
skipped_operations: List[str] = []
|
|
373
|
-
errors: List[str] = []
|
|
374
|
-
start_time = time.time()
|
|
375
|
-
animation_thread = None
|
|
376
|
-
|
|
377
|
-
# Track operation history for cycle detection
|
|
378
|
-
operation_history: List[str] = []
|
|
379
|
-
MAX_CYCLE_REPEATS = 2 # Maximum times to allow crash-verify cycle
|
|
951
|
+
prompt_box_color_ref = ["blue"]
|
|
952
|
+
code_box_color_ref = ["blue"]
|
|
953
|
+
example_box_color_ref = ["blue"]
|
|
954
|
+
tests_box_color_ref = ["blue"]
|
|
380
955
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
|
|
385
|
-
|
|
386
|
-
# --- Start Animation Thread ---
|
|
387
|
-
animation_thread = threading.Thread(
|
|
388
|
-
target=sync_animation,
|
|
389
|
-
args=(
|
|
390
|
-
current_function_name_ref, stop_event, basename, current_cost_ref, budget,
|
|
391
|
-
prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref,
|
|
392
|
-
prompt_path_ref, code_path_ref, example_path_ref, tests_path_ref
|
|
393
|
-
),
|
|
394
|
-
daemon=True
|
|
395
|
-
)
|
|
396
|
-
animation_thread.start()
|
|
397
|
-
|
|
398
|
-
# --- Main Workflow Loop ---
|
|
399
|
-
while True:
|
|
400
|
-
budget_remaining = budget - current_cost_ref[0]
|
|
401
|
-
if current_cost_ref[0] >= budget:
|
|
402
|
-
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
403
|
-
log_sync_event(basename, language, "budget_exceeded", {
|
|
404
|
-
"total_cost": current_cost_ref[0],
|
|
405
|
-
"budget": budget
|
|
406
|
-
})
|
|
407
|
-
break
|
|
408
|
-
|
|
409
|
-
# Log budget warning when running low
|
|
410
|
-
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
411
|
-
log_sync_event(basename, language, "budget_warning", {
|
|
412
|
-
"remaining": budget_remaining,
|
|
413
|
-
"percentage": (budget_remaining / budget) * 100
|
|
414
|
-
})
|
|
415
|
-
|
|
416
|
-
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
|
|
417
|
-
operation = decision.operation
|
|
418
|
-
|
|
419
|
-
# Create log entry with decision info
|
|
420
|
-
log_entry = create_sync_log_entry(decision, budget_remaining)
|
|
421
|
-
|
|
422
|
-
# Track operation history
|
|
423
|
-
operation_history.append(operation)
|
|
424
|
-
|
|
425
|
-
# Detect crash-verify cycles
|
|
426
|
-
if len(operation_history) >= 4:
|
|
427
|
-
# Check for repeating crash-verify pattern
|
|
428
|
-
recent_ops = operation_history[-4:]
|
|
429
|
-
if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
|
|
430
|
-
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
431
|
-
# Count how many times this cycle has occurred
|
|
432
|
-
cycle_count = 0
|
|
433
|
-
for i in range(0, len(operation_history) - 1, 2):
|
|
434
|
-
if i + 1 < len(operation_history):
|
|
435
|
-
if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
|
|
436
|
-
(operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
|
|
437
|
-
cycle_count += 1
|
|
438
|
-
|
|
439
|
-
if cycle_count >= MAX_CYCLE_REPEATS:
|
|
440
|
-
errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
|
|
441
|
-
errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
|
|
442
|
-
log_sync_event(basename, language, "cycle_detected", {
|
|
443
|
-
"cycle_type": "crash-verify",
|
|
444
|
-
"cycle_count": cycle_count,
|
|
445
|
-
"operation_history": operation_history[-10:] # Last 10 operations
|
|
446
|
-
})
|
|
447
|
-
break
|
|
956
|
+
# Mutable container for the app reference (set after app creation)
|
|
957
|
+
# This allows the worker to access app.request_confirmation()
|
|
958
|
+
app_ref: List[Optional['SyncApp']] = [None]
|
|
448
959
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
960
|
+
# Progress callback ref for TUI ProgressBar updates during auto-deps
|
|
961
|
+
progress_callback_ref: List[Optional[Callable[[int, int], None]]] = [None]
|
|
962
|
+
|
|
963
|
+
# Track if user has already confirmed overwrite (to avoid asking multiple times)
|
|
964
|
+
user_confirmed_overwrite: List[bool] = [False]
|
|
965
|
+
|
|
966
|
+
def get_confirm_callback() -> Optional[Callable[[str, str], bool]]:
|
|
967
|
+
"""Get the confirmation callback from the app if available.
|
|
968
|
+
|
|
969
|
+
Once user confirms, we remember it so subsequent operations don't ask again.
|
|
970
|
+
"""
|
|
971
|
+
if user_confirmed_overwrite[0]:
|
|
972
|
+
# User already confirmed, return a callback that always returns True
|
|
973
|
+
return lambda msg, title: True
|
|
974
|
+
|
|
975
|
+
if app_ref[0] is not None:
|
|
976
|
+
def confirming_callback(msg: str, title: str) -> bool:
|
|
977
|
+
result = app_ref[0].request_confirmation(msg, title)
|
|
978
|
+
if result:
|
|
979
|
+
user_confirmed_overwrite[0] = True
|
|
980
|
+
return result
|
|
981
|
+
return confirming_callback
|
|
982
|
+
return confirm_callback # Fall back to provided callback
|
|
983
|
+
|
|
984
|
+
def sync_worker_logic():
|
|
985
|
+
"""
|
|
986
|
+
The main loop of sync logic, run in a worker thread by Textual App.
|
|
987
|
+
"""
|
|
988
|
+
operations_completed: List[str] = []
|
|
989
|
+
skipped_operations: List[str] = []
|
|
990
|
+
errors: List[str] = []
|
|
991
|
+
start_time = time.time()
|
|
992
|
+
last_model_name: str = ""
|
|
993
|
+
operation_history: List[str] = []
|
|
994
|
+
MAX_CYCLE_REPEATS = 2
|
|
995
|
+
|
|
996
|
+
# Helper function to print inside worker (goes to RichLog via redirection)
|
|
997
|
+
# print() will work if sys.stdout is redirected.
|
|
998
|
+
|
|
999
|
+
try:
|
|
1000
|
+
with SyncLock(basename, language):
|
|
1001
|
+
log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
|
|
1002
|
+
|
|
1003
|
+
while True:
|
|
1004
|
+
budget_remaining = budget - current_cost_ref[0]
|
|
1005
|
+
if current_cost_ref[0] >= budget:
|
|
1006
|
+
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
1007
|
+
log_sync_event(basename, language, "budget_exceeded", {
|
|
1008
|
+
"total_cost": current_cost_ref[0],
|
|
1009
|
+
"budget": budget
|
|
467
1010
|
})
|
|
468
1011
|
break
|
|
469
1012
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
error_msg = f"Manual merge required: {decision.reason}"
|
|
479
|
-
elif operation == 'error':
|
|
480
|
-
errors.append(f"Error determining operation: {decision.reason}")
|
|
481
|
-
error_msg = f"Error determining operation: {decision.reason}"
|
|
482
|
-
elif operation == 'analyze_conflict':
|
|
483
|
-
errors.append(f"Conflict detected: {decision.reason}")
|
|
484
|
-
error_msg = f"Conflict detected: {decision.reason}"
|
|
485
|
-
|
|
486
|
-
# Update log entry for final operation
|
|
487
|
-
update_sync_log_entry(log_entry, {
|
|
488
|
-
'success': success,
|
|
489
|
-
'cost': 0.0,
|
|
490
|
-
'model': 'none',
|
|
491
|
-
'error': error_msg
|
|
492
|
-
}, 0.0)
|
|
493
|
-
append_sync_log(basename, language, log_entry)
|
|
494
|
-
|
|
495
|
-
break
|
|
496
|
-
|
|
497
|
-
# Handle skips
|
|
498
|
-
if operation == 'verify' and (skip_verify or skip_tests):
|
|
499
|
-
# Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
|
|
500
|
-
skipped_operations.append('verify')
|
|
501
|
-
skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
|
|
502
|
-
|
|
503
|
-
# Update log entry for skipped operation
|
|
504
|
-
update_sync_log_entry(log_entry, {
|
|
505
|
-
'success': True,
|
|
506
|
-
'cost': 0.0,
|
|
507
|
-
'model': 'skipped',
|
|
508
|
-
'error': None
|
|
509
|
-
}, 0.0)
|
|
510
|
-
log_entry['details']['skip_reason'] = skip_reason
|
|
511
|
-
append_sync_log(basename, language, log_entry)
|
|
1013
|
+
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
1014
|
+
log_sync_event(basename, language, "budget_warning", {
|
|
1015
|
+
"remaining": budget_remaining,
|
|
1016
|
+
"percentage": (budget_remaining / budget) * 100
|
|
1017
|
+
})
|
|
1018
|
+
|
|
1019
|
+
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
|
|
1020
|
+
operation = decision.operation
|
|
512
1021
|
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
516
|
-
)
|
|
517
|
-
save_run_report(asdict(report_data), basename, language)
|
|
518
|
-
_save_operation_fingerprint(basename, language, 'verify', pdd_files, 0.0, skip_reason)
|
|
519
|
-
continue
|
|
520
|
-
if operation == 'test' and skip_tests:
|
|
521
|
-
skipped_operations.append('test')
|
|
1022
|
+
log_entry = create_sync_log_entry(decision, budget_remaining)
|
|
1023
|
+
operation_history.append(operation)
|
|
522
1024
|
|
|
523
|
-
#
|
|
524
|
-
|
|
525
|
-
'
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
1025
|
+
# Cycle detection logic
|
|
1026
|
+
if len(operation_history) >= 3:
|
|
1027
|
+
recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
|
|
1028
|
+
if len(recent_auto_deps) >= 2:
|
|
1029
|
+
errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
|
|
1030
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
|
|
1031
|
+
operation = 'generate'
|
|
1032
|
+
decision.operation = 'generate' # Update decision too
|
|
1033
|
+
|
|
1034
|
+
# Bug #4 fix: Detect crash-verify cycle pattern
|
|
1035
|
+
# The pattern [crash, verify, crash, verify] or [verify, crash, verify, crash]
|
|
1036
|
+
# represents 2 iterations of the alternating cycle, so break immediately
|
|
1037
|
+
if len(operation_history) >= 4:
|
|
1038
|
+
recent_ops = operation_history[-4:]
|
|
1039
|
+
if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
|
|
1040
|
+
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
1041
|
+
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1042
|
+
errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1043
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
|
|
1044
|
+
break
|
|
1045
|
+
|
|
1046
|
+
# Bug #4 fix: Detect test-fix cycle pattern
|
|
1047
|
+
# The pattern [test, fix, test, fix] or [fix, test, fix, test]
|
|
1048
|
+
# represents 2 iterations of the alternating cycle, so break immediately
|
|
1049
|
+
if len(operation_history) >= 4:
|
|
1050
|
+
recent_ops = operation_history[-4:]
|
|
1051
|
+
if (recent_ops == ['test', 'fix', 'test', 'fix'] or
|
|
1052
|
+
recent_ops == ['fix', 'test', 'fix', 'test']):
|
|
1053
|
+
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1054
|
+
errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1055
|
+
log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
|
|
1056
|
+
break
|
|
1057
|
+
|
|
1058
|
+
if operation == 'fix':
|
|
1059
|
+
consecutive_fixes = 0
|
|
1060
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1061
|
+
if operation_history[i] == 'fix':
|
|
1062
|
+
consecutive_fixes += 1
|
|
1063
|
+
else:
|
|
1064
|
+
break
|
|
1065
|
+
if consecutive_fixes >= 5:
|
|
1066
|
+
errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
|
|
1067
|
+
break
|
|
1068
|
+
|
|
1069
|
+
if operation == 'test':
|
|
1070
|
+
consecutive_tests = 0
|
|
1071
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1072
|
+
if operation_history[i] == 'test':
|
|
1073
|
+
consecutive_tests += 1
|
|
1074
|
+
else:
|
|
1075
|
+
break
|
|
1076
|
+
if consecutive_tests >= MAX_CONSECUTIVE_TESTS:
|
|
1077
|
+
errors.append(f"Detected {consecutive_tests} consecutive test operations. Breaking infinite test loop.")
|
|
1078
|
+
break
|
|
1079
|
+
|
|
1080
|
+
# Bug #157 fix: Prevent infinite crash retry loops
|
|
1081
|
+
if operation == 'crash':
|
|
1082
|
+
consecutive_crashes = 0
|
|
1083
|
+
for i in range(len(operation_history) - 1, -1, -1):
|
|
1084
|
+
if operation_history[i] == 'crash':
|
|
1085
|
+
consecutive_crashes += 1
|
|
1086
|
+
else:
|
|
1087
|
+
break
|
|
1088
|
+
if consecutive_crashes >= MAX_CONSECUTIVE_CRASHES:
|
|
1089
|
+
errors.append(f"Detected {consecutive_crashes} consecutive crash operations. Breaking infinite crash loop.")
|
|
1090
|
+
break
|
|
1091
|
+
|
|
1092
|
+
if operation == 'test_extend':
|
|
1093
|
+
# Count test_extend attempts to prevent infinite loop
|
|
1094
|
+
extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
|
|
1095
|
+
if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
|
|
1096
|
+
# Accept current coverage after max attempts
|
|
1097
|
+
log_sync_event(basename, language, "test_extend_limit", {
|
|
1098
|
+
"attempts": extend_attempts,
|
|
1099
|
+
"max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
|
|
1100
|
+
"reason": "Accepting current coverage after max extend attempts"
|
|
1101
|
+
})
|
|
1102
|
+
success = True
|
|
1103
|
+
break
|
|
1104
|
+
|
|
1105
|
+
if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
|
|
1106
|
+
current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
|
|
1107
|
+
success = operation in ['all_synced', 'nothing']
|
|
1108
|
+
error_msg = None
|
|
1109
|
+
if operation == 'fail_and_request_manual_merge':
|
|
1110
|
+
errors.append(f"Manual merge required: {decision.reason}")
|
|
1111
|
+
error_msg = decision.reason
|
|
1112
|
+
elif operation == 'error':
|
|
1113
|
+
errors.append(f"Error determining operation: {decision.reason}")
|
|
1114
|
+
error_msg = decision.reason
|
|
1115
|
+
elif operation == 'analyze_conflict':
|
|
1116
|
+
errors.append(f"Conflict detected: {decision.reason}")
|
|
1117
|
+
error_msg = decision.reason
|
|
1118
|
+
|
|
1119
|
+
update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
|
|
1120
|
+
append_sync_log(basename, language, log_entry)
|
|
1121
|
+
break
|
|
532
1122
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
1123
|
+
# Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
|
|
1124
|
+
# Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
|
|
1125
|
+
if operation == 'verify' and (skip_verify or skip_tests):
|
|
1126
|
+
skipped_operations.append('verify')
|
|
1127
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1128
|
+
append_sync_log(basename, language, log_entry)
|
|
1129
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1130
|
+
_save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
|
|
1131
|
+
continue
|
|
1132
|
+
if operation == 'test' and skip_tests:
|
|
1133
|
+
skipped_operations.append('test')
|
|
1134
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1135
|
+
append_sync_log(basename, language, log_entry)
|
|
1136
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1137
|
+
_save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
|
|
1138
|
+
continue
|
|
1139
|
+
if operation == 'crash' and (skip_tests or skip_verify):
|
|
1140
|
+
skipped_operations.append('crash')
|
|
1141
|
+
update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
|
|
1142
|
+
append_sync_log(basename, language, log_entry)
|
|
1143
|
+
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1144
|
+
_save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
|
|
1145
|
+
# FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
|
|
1146
|
+
# Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
|
|
1147
|
+
current_hashes = calculate_current_hashes(pdd_files)
|
|
1148
|
+
synthetic_report = RunReport(
|
|
1149
|
+
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1150
|
+
exit_code=0, # Assume success since we're skipping validation
|
|
1151
|
+
tests_passed=0,
|
|
1152
|
+
tests_failed=0,
|
|
1153
|
+
coverage=0.0,
|
|
1154
|
+
test_hash=current_hashes.get('test_hash')
|
|
1155
|
+
)
|
|
1156
|
+
save_run_report(asdict(synthetic_report), basename, language)
|
|
1157
|
+
continue
|
|
1158
|
+
|
|
1159
|
+
current_function_name_ref[0] = operation
|
|
1160
|
+
ctx = _create_mock_context(
|
|
1161
|
+
force=force, strength=strength, temperature=temperature, time=time_param,
|
|
1162
|
+
verbose=verbose, quiet=quiet, output_cost=output_cost,
|
|
1163
|
+
review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
|
|
1164
|
+
max_attempts=max_attempts, target_coverage=target_coverage,
|
|
1165
|
+
confirm_callback=get_confirm_callback(),
|
|
1166
|
+
context=context_override
|
|
536
1167
|
)
|
|
537
|
-
save_run_report(asdict(report_data), basename, language)
|
|
538
|
-
_save_operation_fingerprint(basename, language, 'test', pdd_files, 0.0, 'skipped')
|
|
539
|
-
continue
|
|
540
|
-
if operation == 'crash' and skip_tests:
|
|
541
|
-
# Skip crash operations when tests are skipped since crash fixes usually require test execution
|
|
542
|
-
skipped_operations.append('crash')
|
|
543
1168
|
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
prompt_file=str(pdd_files['prompt']),
|
|
587
|
-
directory_path=examples_dir,
|
|
588
|
-
auto_deps_csv_path="project_dependencies.csv",
|
|
589
|
-
output=temp_output,
|
|
590
|
-
force_scan=False # Don't force scan every time
|
|
591
|
-
)
|
|
592
|
-
|
|
593
|
-
# Only move the temp file back if content actually changed
|
|
594
|
-
if Path(temp_output).exists():
|
|
595
|
-
import shutil
|
|
596
|
-
new_content = Path(temp_output).read_text(encoding='utf-8')
|
|
597
|
-
if new_content != original_content:
|
|
598
|
-
shutil.move(temp_output, str(pdd_files['prompt']))
|
|
599
|
-
else:
|
|
600
|
-
# No changes needed, remove temp file
|
|
601
|
-
Path(temp_output).unlink()
|
|
602
|
-
# Mark as successful with no changes
|
|
603
|
-
result = (new_content, 0.0, 'no-changes')
|
|
604
|
-
elif operation == 'generate':
|
|
605
|
-
result = code_generator_main(
|
|
606
|
-
ctx,
|
|
607
|
-
prompt_file=str(pdd_files['prompt']),
|
|
608
|
-
output=str(pdd_files['code']),
|
|
609
|
-
original_prompt_file_path=None,
|
|
610
|
-
force_incremental_flag=False
|
|
611
|
-
)
|
|
612
|
-
elif operation == 'example':
|
|
613
|
-
print(f"DEBUG SYNC: pdd_files['example'] = {pdd_files['example']}")
|
|
614
|
-
print(f"DEBUG SYNC: str(pdd_files['example']) = {str(pdd_files['example'])}")
|
|
615
|
-
result = context_generator_main(
|
|
616
|
-
ctx,
|
|
617
|
-
prompt_file=str(pdd_files['prompt']),
|
|
618
|
-
code_file=str(pdd_files['code']),
|
|
619
|
-
output=str(pdd_files['example'])
|
|
620
|
-
)
|
|
621
|
-
elif operation == 'crash':
|
|
622
|
-
# Validate required files exist before attempting crash operation
|
|
623
|
-
required_files = [pdd_files['code'], pdd_files['example']]
|
|
624
|
-
missing_files = [f for f in required_files if not f.exists()]
|
|
625
|
-
|
|
626
|
-
if missing_files:
|
|
627
|
-
# Skip crash operation if required files are missing
|
|
628
|
-
print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
|
|
629
|
-
skipped_operations.append('crash')
|
|
630
|
-
|
|
631
|
-
# Update log entry for skipped operation
|
|
632
|
-
update_sync_log_entry(log_entry, {
|
|
633
|
-
'success': True,
|
|
634
|
-
'cost': 0.0,
|
|
635
|
-
'model': 'skipped',
|
|
636
|
-
'error': None
|
|
637
|
-
}, 0.0)
|
|
638
|
-
log_entry['details']['skip_reason'] = 'missing_files'
|
|
639
|
-
log_entry['details']['missing_files'] = [f.name for f in missing_files]
|
|
640
|
-
append_sync_log(basename, language, log_entry)
|
|
1169
|
+
result = {}
|
|
1170
|
+
success = False
|
|
1171
|
+
op_start_time = time.time()
|
|
1172
|
+
|
|
1173
|
+
# Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
|
|
1174
|
+
with AtomicStateUpdate(basename, language) as atomic_state:
|
|
1175
|
+
|
|
1176
|
+
# --- Execute Operation ---
|
|
1177
|
+
try:
|
|
1178
|
+
if operation == 'auto-deps':
|
|
1179
|
+
temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
|
|
1180
|
+
original_content = pdd_files['prompt'].read_text(encoding='utf-8')
|
|
1181
|
+
result = auto_deps_main(
|
|
1182
|
+
ctx,
|
|
1183
|
+
prompt_file=str(pdd_files['prompt']),
|
|
1184
|
+
directory_path=examples_dir,
|
|
1185
|
+
auto_deps_csv_path="project_dependencies.csv",
|
|
1186
|
+
output=temp_output,
|
|
1187
|
+
force_scan=False,
|
|
1188
|
+
progress_callback=progress_callback_ref[0]
|
|
1189
|
+
)
|
|
1190
|
+
if Path(temp_output).exists():
|
|
1191
|
+
import shutil
|
|
1192
|
+
new_content = Path(temp_output).read_text(encoding='utf-8')
|
|
1193
|
+
if new_content != original_content:
|
|
1194
|
+
shutil.move(temp_output, str(pdd_files['prompt']))
|
|
1195
|
+
else:
|
|
1196
|
+
Path(temp_output).unlink()
|
|
1197
|
+
result = (new_content, 0.0, 'no-changes')
|
|
1198
|
+
elif operation == 'generate':
|
|
1199
|
+
result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt']), output=str(pdd_files['code']), original_prompt_file_path=None, force_incremental_flag=False)
|
|
1200
|
+
# Clear stale run_report so crash/verify is required for newly generated code
|
|
1201
|
+
run_report_file = META_DIR / f"{basename}_{language}_run.json"
|
|
1202
|
+
run_report_file.unlink(missing_ok=True)
|
|
1203
|
+
elif operation == 'example':
|
|
1204
|
+
result = context_generator_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['example']))
|
|
1205
|
+
elif operation == 'crash':
|
|
1206
|
+
required_files = [pdd_files['code'], pdd_files['example']]
|
|
1207
|
+
missing_files = [f for f in required_files if not f.exists()]
|
|
1208
|
+
if missing_files:
|
|
1209
|
+
skipped_operations.append('crash')
|
|
1210
|
+
continue
|
|
641
1211
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
646
|
-
)
|
|
647
|
-
save_run_report(asdict(report_data), basename, language)
|
|
648
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
|
|
649
|
-
continue
|
|
650
|
-
else:
|
|
651
|
-
# Check if we have a run report indicating failures that need crash fixing
|
|
652
|
-
current_run_report = read_run_report(basename, language)
|
|
653
|
-
crash_log_content = ""
|
|
1212
|
+
# Crash handling logic (simplified copy from original)
|
|
1213
|
+
current_run_report = read_run_report(basename, language)
|
|
1214
|
+
crash_log_content = ""
|
|
654
1215
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
env
|
|
668
|
-
|
|
1216
|
+
# Check for crash condition (either run report says so, or we check manually)
|
|
1217
|
+
has_crash = False
|
|
1218
|
+
if current_run_report and current_run_report.exit_code != 0:
|
|
1219
|
+
has_crash = True
|
|
1220
|
+
crash_log_content = f"Test execution failed exit code: {current_run_report.exit_code}\n"
|
|
1221
|
+
else:
|
|
1222
|
+
# Manual check - run the example to see if it crashes
|
|
1223
|
+
env = os.environ.copy()
|
|
1224
|
+
src_dir = Path.cwd() / 'src'
|
|
1225
|
+
env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
|
|
1226
|
+
# Remove TUI-specific env vars that might contaminate subprocess
|
|
1227
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1228
|
+
env.pop(var, None)
|
|
1229
|
+
# Get language-appropriate run command from language_format.csv
|
|
1230
|
+
example_path = str(pdd_files['example'])
|
|
1231
|
+
run_cmd = get_run_command_for_file(example_path)
|
|
1232
|
+
if run_cmd:
|
|
1233
|
+
# Use the language-specific interpreter (e.g., node for .js)
|
|
1234
|
+
cmd_parts = run_cmd.split()
|
|
1235
|
+
else:
|
|
1236
|
+
# Fallback to Python if no run command found
|
|
1237
|
+
cmd_parts = ['python', example_path]
|
|
1238
|
+
# Use error-detection runner that handles server-style examples
|
|
1239
|
+
returncode, stdout, stderr = _run_example_with_error_detection(
|
|
1240
|
+
cmd_parts,
|
|
1241
|
+
env=env,
|
|
1242
|
+
cwd=str(pdd_files['example'].parent),
|
|
1243
|
+
timeout=60
|
|
669
1244
|
)
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
1245
|
+
|
|
1246
|
+
class ExampleResult:
|
|
1247
|
+
def __init__(self, rc, out, err):
|
|
1248
|
+
self.returncode = rc
|
|
1249
|
+
self.stdout = out
|
|
1250
|
+
self.stderr = err
|
|
1251
|
+
|
|
1252
|
+
ex_res = ExampleResult(returncode, stdout, stderr)
|
|
1253
|
+
if ex_res.returncode != 0:
|
|
1254
|
+
has_crash = True
|
|
1255
|
+
crash_log_content = f"Example failed exit code: {ex_res.returncode}\nSTDOUT:\n{ex_res.stdout}\nSTDERR:\n{ex_res.stderr}\n"
|
|
1256
|
+
if "SyntaxError" in ex_res.stderr:
|
|
1257
|
+
crash_log_content = "SYNTAX ERROR DETECTED:\n" + crash_log_content
|
|
681
1258
|
else:
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
update_sync_log_entry(log_entry, {
|
|
698
|
-
'success': True,
|
|
699
|
-
'cost': 0.0,
|
|
700
|
-
'model': 'skipped',
|
|
701
|
-
'error': None
|
|
702
|
-
}, time.time() - start_time)
|
|
703
|
-
log_entry['details']['skip_reason'] = 'no_crash'
|
|
704
|
-
append_sync_log(basename, language, log_entry)
|
|
705
|
-
|
|
706
|
-
report_data = RunReport(
|
|
707
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
708
|
-
exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
|
|
709
|
-
)
|
|
710
|
-
save_run_report(asdict(report_data), basename, language)
|
|
711
|
-
_save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
|
|
712
|
-
continue
|
|
713
|
-
|
|
714
|
-
# Write actual error content or fallback
|
|
715
|
-
if not crash_log_content:
|
|
716
|
-
crash_log_content = "Unknown crash error - program failed but no error output captured"
|
|
717
|
-
|
|
718
|
-
Path("crash.log").write_text(crash_log_content)
|
|
719
|
-
|
|
720
|
-
try:
|
|
721
|
-
result = crash_main(
|
|
722
|
-
ctx,
|
|
723
|
-
prompt_file=str(pdd_files['prompt']),
|
|
724
|
-
code_file=str(pdd_files['code']),
|
|
725
|
-
program_file=str(pdd_files['example']),
|
|
726
|
-
error_file="crash.log",
|
|
727
|
-
output=str(pdd_files['code']),
|
|
728
|
-
output_program=str(pdd_files['example']),
|
|
729
|
-
loop=True,
|
|
730
|
-
max_attempts=max_attempts,
|
|
731
|
-
budget=budget - current_cost_ref[0]
|
|
732
|
-
)
|
|
733
|
-
except (RuntimeError, Exception) as e:
|
|
734
|
-
error_str = str(e)
|
|
735
|
-
if ("LLM returned None" in error_str or
|
|
736
|
-
"LLM failed to analyze errors" in error_str):
|
|
737
|
-
# Skip crash operation for LLM failures
|
|
738
|
-
print(f"Skipping crash operation due to LLM error: {e}")
|
|
739
|
-
skipped_operations.append('crash')
|
|
740
|
-
|
|
741
|
-
# Update log entry for skipped operation
|
|
742
|
-
update_sync_log_entry(log_entry, {
|
|
743
|
-
'success': False,
|
|
744
|
-
'cost': 0.0,
|
|
745
|
-
'model': 'skipped',
|
|
746
|
-
'error': f"LLM error: {str(e)}"
|
|
747
|
-
}, time.time() - start_time)
|
|
748
|
-
log_entry['details']['skip_reason'] = 'llm_error'
|
|
749
|
-
append_sync_log(basename, language, log_entry)
|
|
1259
|
+
# No crash - save run report with exit_code=0 so sync_determine_operation
|
|
1260
|
+
# knows the example was tested and passed (prevents infinite loop)
|
|
1261
|
+
# Include test_hash for staleness detection
|
|
1262
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1263
|
+
report = RunReport(
|
|
1264
|
+
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1265
|
+
exit_code=0,
|
|
1266
|
+
tests_passed=1,
|
|
1267
|
+
tests_failed=0,
|
|
1268
|
+
coverage=0.0,
|
|
1269
|
+
test_hash=test_hash
|
|
1270
|
+
)
|
|
1271
|
+
save_run_report(asdict(report), basename, language)
|
|
1272
|
+
skipped_operations.append('crash')
|
|
1273
|
+
continue
|
|
750
1274
|
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1275
|
+
if has_crash:
|
|
1276
|
+
# Try auto-fix for common import errors before expensive agentic call
|
|
1277
|
+
auto_fixed, auto_fix_msg = _try_auto_fix_import_error(
|
|
1278
|
+
crash_log_content,
|
|
1279
|
+
pdd_files['code'],
|
|
1280
|
+
pdd_files['example']
|
|
754
1281
|
)
|
|
755
|
-
|
|
756
|
-
|
|
1282
|
+
if auto_fixed:
|
|
1283
|
+
log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
|
|
1284
|
+
# Retry running the example after auto-fix
|
|
1285
|
+
retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
|
|
1286
|
+
cmd_parts,
|
|
1287
|
+
env=env,
|
|
1288
|
+
cwd=str(pdd_files['example'].parent),
|
|
1289
|
+
timeout=60
|
|
1290
|
+
)
|
|
1291
|
+
if retry_returncode == 0:
|
|
1292
|
+
# Auto-fix worked! Save run report and continue
|
|
1293
|
+
log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
|
|
1294
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1295
|
+
report = RunReport(
|
|
1296
|
+
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
1297
|
+
exit_code=0,
|
|
1298
|
+
tests_passed=1,
|
|
1299
|
+
tests_failed=0,
|
|
1300
|
+
coverage=0.0,
|
|
1301
|
+
test_hash=test_hash
|
|
1302
|
+
)
|
|
1303
|
+
save_run_report(asdict(report), basename, language)
|
|
1304
|
+
result = (True, 0.0, 'auto-fix')
|
|
1305
|
+
success = True
|
|
1306
|
+
actual_cost = 0.0
|
|
1307
|
+
model_name = 'auto-fix'
|
|
1308
|
+
# Update crash_log_content for logging
|
|
1309
|
+
crash_log_content = f"Auto-fixed: {auto_fix_msg}"
|
|
1310
|
+
continue # Skip crash_main, move to next operation
|
|
1311
|
+
else:
|
|
1312
|
+
# Auto-fix didn't fully work, update error log and proceed
|
|
1313
|
+
crash_log_content = f"Auto-fix attempted ({auto_fix_msg}) but still failing:\nRETRY STDOUT:\n{retry_stdout}\nRETRY STDERR:\n{retry_stderr}\n"
|
|
1314
|
+
|
|
1315
|
+
Path("crash.log").write_text(crash_log_content)
|
|
1316
|
+
try:
|
|
1317
|
+
result = crash_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), error_file="crash.log", output=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1318
|
+
except Exception as e:
|
|
1319
|
+
print(f"Crash fix failed: {e}")
|
|
1320
|
+
skipped_operations.append('crash')
|
|
1321
|
+
continue
|
|
1322
|
+
|
|
1323
|
+
elif operation == 'verify':
|
|
1324
|
+
if not pdd_files['example'].exists():
|
|
1325
|
+
skipped_operations.append('verify')
|
|
757
1326
|
continue
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
program_file=str(pdd_files['example']),
|
|
767
|
-
output_results=f"{basename}_verify_results.log",
|
|
768
|
-
output_code=str(pdd_files['code']),
|
|
769
|
-
output_program=str(pdd_files['example']),
|
|
770
|
-
loop=True,
|
|
771
|
-
verification_program=str(pdd_files['example']),
|
|
772
|
-
max_attempts=max_attempts,
|
|
773
|
-
budget=budget - current_cost_ref[0]
|
|
774
|
-
)
|
|
775
|
-
elif operation == 'test':
|
|
776
|
-
# First, generate the test file
|
|
777
|
-
result = cmd_test_main(
|
|
778
|
-
ctx,
|
|
779
|
-
prompt_file=str(pdd_files['prompt']),
|
|
780
|
-
code_file=str(pdd_files['code']),
|
|
781
|
-
output=str(pdd_files['test']),
|
|
782
|
-
language=language,
|
|
783
|
-
coverage_report=None,
|
|
784
|
-
existing_tests=None,
|
|
785
|
-
target_coverage=target_coverage,
|
|
786
|
-
merge=False
|
|
787
|
-
)
|
|
788
|
-
|
|
789
|
-
# After successful test generation, execute the tests and create run report
|
|
790
|
-
# This enables the next sync iteration to detect test failures and trigger fix
|
|
791
|
-
if isinstance(result, dict) and result.get('success', False):
|
|
792
|
-
try:
|
|
793
|
-
test_file = pdd_files['test']
|
|
794
|
-
if test_file.exists():
|
|
1327
|
+
result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1328
|
+
elif operation == 'test':
|
|
1329
|
+
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1330
|
+
# Use merge=True when test file exists to preserve fixes and append new tests
|
|
1331
|
+
# instead of regenerating from scratch (which would overwrite fixes)
|
|
1332
|
+
test_file_exists = pdd_files['test'].exists()
|
|
1333
|
+
result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=[str(pdd_files['test'])] if test_file_exists else None, target_coverage=target_coverage, merge=test_file_exists, strength=strength, temperature=temperature)
|
|
1334
|
+
if pdd_files['test'].exists():
|
|
795
1335
|
_execute_tests_and_create_run_report(
|
|
796
|
-
|
|
1336
|
+
pdd_files['test'],
|
|
1337
|
+
basename,
|
|
1338
|
+
language,
|
|
1339
|
+
target_coverage,
|
|
1340
|
+
code_file=pdd_files.get("code"),
|
|
1341
|
+
atomic_state=atomic_state,
|
|
1342
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
1343
|
+
)
|
|
1344
|
+
elif operation == 'test_extend':
|
|
1345
|
+
# Extend existing tests to improve coverage
|
|
1346
|
+
# Uses existing_tests and merge=True to add more test cases
|
|
1347
|
+
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1348
|
+
if pdd_files['test'].exists():
|
|
1349
|
+
existing_test_path = str(pdd_files['test'])
|
|
1350
|
+
result = cmd_test_main(
|
|
1351
|
+
ctx,
|
|
1352
|
+
prompt_file=str(pdd_files['prompt']),
|
|
1353
|
+
code_file=str(pdd_files['code']),
|
|
1354
|
+
output=str(pdd_files['test']),
|
|
1355
|
+
language=language,
|
|
1356
|
+
coverage_report=None,
|
|
1357
|
+
existing_tests=[existing_test_path],
|
|
1358
|
+
target_coverage=target_coverage,
|
|
1359
|
+
merge=True,
|
|
1360
|
+
strength=strength,
|
|
1361
|
+
temperature=temperature
|
|
797
1362
|
)
|
|
798
|
-
except Exception as e:
|
|
799
|
-
# Don't fail the entire operation if test execution fails
|
|
800
|
-
# Just log it - the test file generation was successful
|
|
801
|
-
print(f"Warning: Test execution failed: {e}")
|
|
802
|
-
elif isinstance(result, tuple) and len(result) >= 3:
|
|
803
|
-
# Handle tuple return format - assume success and execute tests
|
|
804
|
-
try:
|
|
805
|
-
test_file = pdd_files['test']
|
|
806
|
-
if test_file.exists():
|
|
807
1363
|
_execute_tests_and_create_run_report(
|
|
808
|
-
|
|
1364
|
+
pdd_files['test'],
|
|
1365
|
+
basename,
|
|
1366
|
+
language,
|
|
1367
|
+
target_coverage,
|
|
1368
|
+
code_file=pdd_files.get("code"),
|
|
1369
|
+
atomic_state=atomic_state,
|
|
1370
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
809
1371
|
)
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
1372
|
+
else:
|
|
1373
|
+
# No existing test file, fall back to regular test generation
|
|
1374
|
+
result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=None, target_coverage=target_coverage, merge=False, strength=strength, temperature=temperature)
|
|
1375
|
+
if pdd_files['test'].exists():
|
|
1376
|
+
_execute_tests_and_create_run_report(
|
|
1377
|
+
pdd_files['test'],
|
|
1378
|
+
basename,
|
|
1379
|
+
language,
|
|
1380
|
+
target_coverage,
|
|
1381
|
+
code_file=pdd_files.get("code"),
|
|
1382
|
+
atomic_state=atomic_state,
|
|
1383
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
1384
|
+
)
|
|
1385
|
+
elif operation == 'fix':
|
|
1386
|
+
error_file_path = Path("fix_errors.log")
|
|
1387
|
+
# Capture errors using language-appropriate test command
|
|
1388
|
+
try:
|
|
1389
|
+
from .get_test_command import get_test_command_for_file
|
|
1390
|
+
test_cmd = get_test_command_for_file(str(pdd_files['test']), language)
|
|
1391
|
+
|
|
1392
|
+
# Use clean env without TUI-specific vars
|
|
1393
|
+
clean_env = os.environ.copy()
|
|
1394
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1395
|
+
clean_env.pop(var, None)
|
|
1396
|
+
|
|
1397
|
+
if test_cmd:
|
|
1398
|
+
# Run language-appropriate test command
|
|
1399
|
+
if language.lower() == 'python':
|
|
1400
|
+
# Use pytest directly for Python
|
|
1401
|
+
python_executable = detect_host_python_executable()
|
|
1402
|
+
# Bug #156: Run pytest on ALL matching test files
|
|
1403
|
+
test_files = pdd_files.get('test_files', [pdd_files['test']])
|
|
1404
|
+
pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
|
|
1405
|
+
test_result = subprocess.run(
|
|
1406
|
+
pytest_args,
|
|
1407
|
+
capture_output=True, text=True, timeout=300,
|
|
1408
|
+
stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True,
|
|
1409
|
+
cwd=str(pdd_files['test'].parent)
|
|
1410
|
+
)
|
|
1411
|
+
else:
|
|
1412
|
+
# Use shell command for non-Python
|
|
1413
|
+
test_result = subprocess.run(
|
|
1414
|
+
test_cmd,
|
|
1415
|
+
shell=True,
|
|
1416
|
+
capture_output=True, text=True, timeout=300,
|
|
1417
|
+
stdin=subprocess.DEVNULL, env=clean_env,
|
|
1418
|
+
cwd=str(pdd_files['test'].parent),
|
|
1419
|
+
start_new_session=True
|
|
1420
|
+
)
|
|
1421
|
+
error_content = f"Test output:\n{test_result.stdout}\n{test_result.stderr}"
|
|
1422
|
+
else:
|
|
1423
|
+
# No test command available - trigger agentic fallback with context
|
|
1424
|
+
error_content = f"No test command available for {language}. Please run tests manually and provide error output."
|
|
1425
|
+
except Exception as e:
|
|
1426
|
+
error_content = f"Test execution error: {e}"
|
|
1427
|
+
error_file_path.write_text(error_content)
|
|
1428
|
+
|
|
1429
|
+
# Bug #156 fix: Parse pytest output to find actual failing files
|
|
1430
|
+
# and pass the correct file to fix_main
|
|
1431
|
+
failing_files = extract_failing_files_from_output(error_content)
|
|
1432
|
+
unit_test_file_for_fix = str(pdd_files['test']) # Default to tracked file
|
|
1433
|
+
|
|
1434
|
+
if failing_files:
|
|
1435
|
+
# Try to resolve the failing file paths
|
|
1436
|
+
test_dir = pdd_files['test'].parent
|
|
1437
|
+
tracked_file_name = pdd_files['test'].name
|
|
1438
|
+
|
|
1439
|
+
# Check if the tracked file is among the failures
|
|
1440
|
+
tracked_in_failures = any(
|
|
1441
|
+
Path(ff).name == tracked_file_name for ff in failing_files
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
if not tracked_in_failures:
|
|
1445
|
+
# Failures are in a different file - use the first failing file
|
|
1446
|
+
for ff in failing_files:
|
|
1447
|
+
# Try to resolve the path relative to test directory
|
|
1448
|
+
ff_path = Path(ff)
|
|
1449
|
+
if ff_path.is_absolute() and ff_path.exists():
|
|
1450
|
+
unit_test_file_for_fix = str(ff_path)
|
|
1451
|
+
break
|
|
1452
|
+
else:
|
|
1453
|
+
# Try to find it in the test directory
|
|
1454
|
+
candidate = test_dir / ff_path.name
|
|
1455
|
+
if candidate.exists():
|
|
1456
|
+
unit_test_file_for_fix = str(candidate)
|
|
1457
|
+
break
|
|
1458
|
+
# Also try the path as-is relative to cwd
|
|
1459
|
+
if ff_path.exists():
|
|
1460
|
+
unit_test_file_for_fix = str(ff_path.resolve())
|
|
1461
|
+
break
|
|
1462
|
+
|
|
1463
|
+
result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
|
|
1464
|
+
elif operation == 'update':
|
|
1465
|
+
result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
|
|
832
1466
|
else:
|
|
833
|
-
|
|
1467
|
+
errors.append(f"Unknown operation {operation}")
|
|
1468
|
+
result = {'success': False}
|
|
1469
|
+
|
|
1470
|
+
# Result parsing
|
|
1471
|
+
if isinstance(result, dict):
|
|
1472
|
+
success = result.get('success', False)
|
|
1473
|
+
current_cost_ref[0] += result.get('cost', 0.0)
|
|
1474
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
1475
|
+
if operation == 'test': success = pdd_files['test'].exists()
|
|
1476
|
+
else: success = bool(result[0])
|
|
1477
|
+
cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
|
|
1478
|
+
current_cost_ref[0] += cost
|
|
1479
|
+
else:
|
|
1480
|
+
success = result is not None
|
|
1481
|
+
|
|
834
1482
|
except Exception as e:
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
error_file_path.write_text(error_content)
|
|
838
|
-
|
|
839
|
-
result = fix_main(
|
|
840
|
-
ctx,
|
|
841
|
-
prompt_file=str(pdd_files['prompt']),
|
|
842
|
-
code_file=str(pdd_files['code']),
|
|
843
|
-
unit_test_file=str(pdd_files['test']),
|
|
844
|
-
error_file=str(error_file_path),
|
|
845
|
-
output_test=str(pdd_files['test']),
|
|
846
|
-
output_code=str(pdd_files['code']),
|
|
847
|
-
output_results=f"{basename}_fix_results.log",
|
|
848
|
-
loop=True,
|
|
849
|
-
verification_program=str(pdd_files['example']),
|
|
850
|
-
max_attempts=max_attempts,
|
|
851
|
-
budget=budget - current_cost_ref[0],
|
|
852
|
-
auto_submit=True
|
|
853
|
-
)
|
|
854
|
-
elif operation == 'update':
|
|
855
|
-
result = update_main(
|
|
856
|
-
ctx,
|
|
857
|
-
input_prompt_file=str(pdd_files['prompt']),
|
|
858
|
-
modified_code_file=str(pdd_files['code']),
|
|
859
|
-
input_code_file=None,
|
|
860
|
-
output=str(pdd_files['prompt']),
|
|
861
|
-
git=True
|
|
862
|
-
)
|
|
863
|
-
else:
|
|
864
|
-
errors.append(f"Unknown operation '{operation}' requested.")
|
|
865
|
-
result = {'success': False, 'cost': 0.0}
|
|
1483
|
+
errors.append(f"Exception during '{operation}': {e}")
|
|
1484
|
+
success = False
|
|
866
1485
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
# Unknown return format
|
|
882
|
-
success = result is not None
|
|
883
|
-
current_cost_ref[0] += 0.0
|
|
884
|
-
|
|
885
|
-
except Exception as e:
|
|
886
|
-
errors.append(f"Exception during '{operation}': {e}")
|
|
887
|
-
success = False
|
|
1486
|
+
# Log update
|
|
1487
|
+
duration = time.time() - op_start_time
|
|
1488
|
+
actual_cost = 0.0
|
|
1489
|
+
model_name = "unknown"
|
|
1490
|
+
if success:
|
|
1491
|
+
if isinstance(result, dict):
|
|
1492
|
+
actual_cost = result.get('cost', 0.0)
|
|
1493
|
+
model_name = result.get('model', 'unknown')
|
|
1494
|
+
elif isinstance(result, tuple) and len(result) >= 3:
|
|
1495
|
+
actual_cost = result[-2] if len(result) >= 2 else 0.0
|
|
1496
|
+
model_name = result[-1] if len(result) >= 1 else 'unknown'
|
|
1497
|
+
last_model_name = str(model_name)
|
|
1498
|
+
operations_completed.append(operation)
|
|
1499
|
+
_save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
|
|
888
1500
|
|
|
889
|
-
|
|
890
|
-
|
|
1501
|
+
update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
|
|
1502
|
+
append_sync_log(basename, language, log_entry)
|
|
891
1503
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
|
|
924
|
-
model = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else ''
|
|
925
|
-
else:
|
|
926
|
-
cost = 0.0
|
|
927
|
-
model = ''
|
|
928
|
-
_save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
|
|
929
|
-
|
|
930
|
-
# After successful crash operation, re-run the example to generate fresh run report
|
|
931
|
-
if operation == 'crash':
|
|
932
|
-
try:
|
|
933
|
-
example_file = pdd_files['example']
|
|
934
|
-
if example_file.exists():
|
|
935
|
-
# Run the example program to check if crash is actually fixed
|
|
936
|
-
try:
|
|
937
|
-
example_result = subprocess.run(
|
|
938
|
-
['python', str(example_file)],
|
|
939
|
-
capture_output=True,
|
|
940
|
-
text=True,
|
|
941
|
-
timeout=60,
|
|
942
|
-
env=os.environ.copy(),
|
|
943
|
-
cwd=str(example_file.parent)
|
|
944
|
-
)
|
|
945
|
-
|
|
946
|
-
# Create fresh run report based on actual execution
|
|
947
|
-
report_data = RunReport(
|
|
948
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
949
|
-
exit_code=example_result.returncode,
|
|
950
|
-
tests_passed=1 if example_result.returncode == 0 else 0,
|
|
951
|
-
tests_failed=0 if example_result.returncode == 0 else 1,
|
|
952
|
-
coverage=100.0 if example_result.returncode == 0 else 0.0
|
|
953
|
-
)
|
|
954
|
-
save_run_report(asdict(report_data), basename, language)
|
|
955
|
-
print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
|
|
956
|
-
|
|
957
|
-
except subprocess.TimeoutExpired:
|
|
958
|
-
# Example timed out - still considered a failure
|
|
959
|
-
report_data = RunReport(
|
|
960
|
-
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
961
|
-
exit_code=124, # Standard timeout exit code
|
|
962
|
-
tests_passed=0, tests_failed=1, coverage=0.0
|
|
963
|
-
)
|
|
964
|
-
save_run_report(asdict(report_data), basename, language)
|
|
965
|
-
print("Example timed out after crash fix - created failure run report")
|
|
966
|
-
|
|
967
|
-
except Exception as e:
|
|
968
|
-
# Don't fail the entire operation if example re-execution fails
|
|
969
|
-
print(f"Warning: Post-crash example re-execution failed: {e}")
|
|
1504
|
+
# Post-operation checks (simplified)
|
|
1505
|
+
if success and operation == 'crash':
|
|
1506
|
+
# Re-run example to verify crash fix worked
|
|
1507
|
+
try:
|
|
1508
|
+
# Use clean env without TUI-specific vars
|
|
1509
|
+
clean_env = os.environ.copy()
|
|
1510
|
+
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1511
|
+
clean_env.pop(var, None)
|
|
1512
|
+
# Get language-appropriate run command
|
|
1513
|
+
example_path = str(pdd_files['example'])
|
|
1514
|
+
run_cmd = get_run_command_for_file(example_path)
|
|
1515
|
+
if run_cmd:
|
|
1516
|
+
cmd_parts = run_cmd.split()
|
|
1517
|
+
else:
|
|
1518
|
+
cmd_parts = ['python', example_path]
|
|
1519
|
+
# Use error-detection runner that handles server-style examples
|
|
1520
|
+
returncode, stdout, stderr = _run_example_with_error_detection(
|
|
1521
|
+
cmd_parts,
|
|
1522
|
+
env=clean_env,
|
|
1523
|
+
cwd=str(pdd_files['example'].parent),
|
|
1524
|
+
timeout=60
|
|
1525
|
+
)
|
|
1526
|
+
# Include test_hash for staleness detection
|
|
1527
|
+
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1528
|
+
report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
|
|
1529
|
+
save_run_report(asdict(report), basename, language)
|
|
1530
|
+
except Exception as e:
|
|
1531
|
+
# Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
|
|
1532
|
+
error_msg = f"Post-crash verification failed: {e}"
|
|
1533
|
+
errors.append(error_msg)
|
|
1534
|
+
log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
|
|
970
1535
|
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
if test_file.exists():
|
|
1536
|
+
if success and operation == 'fix':
|
|
1537
|
+
# Re-run tests to update run_report after successful fix
|
|
1538
|
+
# This prevents infinite loop by updating the state machine
|
|
1539
|
+
if pdd_files['test'].exists():
|
|
976
1540
|
_execute_tests_and_create_run_report(
|
|
977
|
-
|
|
1541
|
+
pdd_files['test'],
|
|
1542
|
+
basename,
|
|
1543
|
+
language,
|
|
1544
|
+
target_coverage,
|
|
1545
|
+
code_file=pdd_files.get("code"),
|
|
1546
|
+
atomic_state=atomic_state,
|
|
1547
|
+
test_files=pdd_files.get('test_files'), # Bug #156
|
|
978
1548
|
)
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
errors.append(f"Operation '{operation}' failed.")
|
|
984
|
-
break
|
|
1549
|
+
|
|
1550
|
+
if not success:
|
|
1551
|
+
errors.append(f"Operation '{operation}' failed.")
|
|
1552
|
+
break
|
|
985
1553
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
"total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
|
|
996
|
-
"total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
|
|
997
|
-
})
|
|
998
|
-
except Exception:
|
|
999
|
-
pass # Don't fail if logging fails
|
|
1554
|
+
except BaseException as e:
|
|
1555
|
+
errors.append(f"An unexpected error occurred in the orchestrator: {type(e).__name__}: {e}")
|
|
1556
|
+
# Log the full traceback for debugging
|
|
1557
|
+
import traceback
|
|
1558
|
+
traceback.print_exc()
|
|
1559
|
+
finally:
|
|
1560
|
+
try:
|
|
1561
|
+
log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
|
|
1562
|
+
except: pass
|
|
1000
1563
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1564
|
+
# Return result dict
|
|
1565
|
+
return {
|
|
1566
|
+
'success': not errors,
|
|
1567
|
+
'operations_completed': operations_completed,
|
|
1568
|
+
'skipped_operations': skipped_operations,
|
|
1569
|
+
'total_cost': current_cost_ref[0],
|
|
1570
|
+
'total_time': time.time() - start_time,
|
|
1571
|
+
'final_state': {p: {'exists': f.exists(), 'path': str(f)} for p, f in pdd_files.items() if p != 'test_files'},
|
|
1572
|
+
'errors': errors,
|
|
1573
|
+
'error': "; ".join(errors) if errors else None, # Add this line
|
|
1574
|
+
'model_name': last_model_name,
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
# Instantiate and run Textual App
|
|
1578
|
+
app = SyncApp(
|
|
1579
|
+
basename=basename,
|
|
1580
|
+
budget=budget,
|
|
1581
|
+
worker_func=sync_worker_logic,
|
|
1582
|
+
function_name_ref=current_function_name_ref,
|
|
1583
|
+
cost_ref=current_cost_ref,
|
|
1584
|
+
prompt_path_ref=prompt_path_ref,
|
|
1585
|
+
code_path_ref=code_path_ref,
|
|
1586
|
+
example_path_ref=example_path_ref,
|
|
1587
|
+
tests_path_ref=tests_path_ref,
|
|
1588
|
+
prompt_color_ref=prompt_box_color_ref,
|
|
1589
|
+
code_color_ref=code_box_color_ref,
|
|
1590
|
+
example_color_ref=example_box_color_ref,
|
|
1591
|
+
tests_color_ref=tests_box_color_ref,
|
|
1592
|
+
stop_event=stop_event,
|
|
1593
|
+
progress_callback_ref=progress_callback_ref
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
# Store app reference so worker can access request_confirmation
|
|
1597
|
+
app_ref[0] = app
|
|
1598
|
+
|
|
1599
|
+
result = app.run()
|
|
1600
|
+
|
|
1601
|
+
# Show exit animation if not quiet
|
|
1602
|
+
if not quiet:
|
|
1603
|
+
from .sync_tui import show_exit_animation
|
|
1604
|
+
show_exit_animation()
|
|
1605
|
+
|
|
1606
|
+
# Check for worker exception that might have caused a crash
|
|
1607
|
+
if app.worker_exception:
|
|
1608
|
+
print(f"\n[Error] Worker thread crashed with exception: {app.worker_exception}", file=sys.stderr)
|
|
1005
1609
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1610
|
+
if hasattr(app, 'captured_logs') and app.captured_logs:
|
|
1611
|
+
print("\n[Captured Logs (last 20 lines)]", file=sys.stderr)
|
|
1612
|
+
for line in app.captured_logs[-20:]: # Print last 20 lines
|
|
1613
|
+
print(f" {line}", file=sys.stderr)
|
|
1614
|
+
|
|
1615
|
+
import traceback
|
|
1616
|
+
# Use trace module to print the stored exception's traceback if available
|
|
1617
|
+
if hasattr(app.worker_exception, '__traceback__'):
|
|
1618
|
+
traceback.print_exception(type(app.worker_exception), app.worker_exception, app.worker_exception.__traceback__, file=sys.stderr)
|
|
1619
|
+
|
|
1620
|
+
if result is None:
|
|
1621
|
+
return {
|
|
1622
|
+
"success": False,
|
|
1623
|
+
"total_cost": current_cost_ref[0],
|
|
1624
|
+
"model_name": "",
|
|
1625
|
+
"error": "Sync process interrupted or returned no result.",
|
|
1626
|
+
"operations_completed": [],
|
|
1627
|
+
"errors": ["App exited without result"]
|
|
1628
|
+
}
|
|
1011
1629
|
|
|
1012
|
-
return
|
|
1013
|
-
'success': not errors,
|
|
1014
|
-
'operations_completed': operations_completed,
|
|
1015
|
-
'skipped_operations': skipped_operations,
|
|
1016
|
-
'total_cost': current_cost_ref[0],
|
|
1017
|
-
'total_time': total_time,
|
|
1018
|
-
'final_state': final_state,
|
|
1019
|
-
'errors': errors,
|
|
1020
|
-
}
|
|
1630
|
+
return result
|
|
1021
1631
|
|
|
1022
1632
|
if __name__ == '__main__':
|
|
1023
|
-
# Example usage
|
|
1024
|
-
# This simulates running `pdd sync my_calculator` from the command line.
|
|
1025
|
-
|
|
1026
|
-
print("--- Running Basic Sync Orchestration Example ---")
|
|
1027
|
-
|
|
1028
|
-
# Setup a dummy project structure
|
|
1633
|
+
# Example usage
|
|
1029
1634
|
Path("./prompts").mkdir(exist_ok=True)
|
|
1030
1635
|
Path("./src").mkdir(exist_ok=True)
|
|
1031
1636
|
Path("./examples").mkdir(exist_ok=True)
|
|
1032
1637
|
Path("./tests").mkdir(exist_ok=True)
|
|
1033
1638
|
Path("./prompts/my_calculator_python.prompt").write_text("Create a calculator.")
|
|
1034
|
-
|
|
1035
|
-
# Ensure PDD meta directory exists for logs and locks
|
|
1036
1639
|
PDD_DIR.mkdir(exist_ok=True)
|
|
1037
1640
|
META_DIR.mkdir(exist_ok=True)
|
|
1038
|
-
|
|
1039
|
-
result = sync_orchestration(
|
|
1040
|
-
basename="my_calculator",
|
|
1041
|
-
language="python",
|
|
1042
|
-
quiet=True # Suppress mock command output for cleaner example run
|
|
1043
|
-
)
|
|
1044
|
-
|
|
1045
|
-
print("\n--- Sync Orchestration Finished ---")
|
|
1641
|
+
result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
|
|
1046
1642
|
print(json.dumps(result, indent=2))
|
|
1047
|
-
|
|
1048
|
-
if result['success']:
|
|
1049
|
-
print("\n✅ Sync completed successfully.")
|
|
1050
|
-
else:
|
|
1051
|
-
print(f"\n❌ Sync failed. Errors: {result['errors']}")
|
|
1052
|
-
|
|
1053
|
-
print("\n--- Running Sync Log Example ---")
|
|
1054
|
-
# This will now show the log from the run we just completed.
|
|
1055
|
-
log_result = sync_orchestration(
|
|
1056
|
-
basename="my_calculator",
|
|
1057
|
-
language="python",
|
|
1058
|
-
log=True
|
|
1059
|
-
)
|