pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +73 -21
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +258 -82
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -63
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +330 -76
  43. pdd/fix_error_loop.py +207 -61
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +306 -272
  48. pdd/fix_verification_main.py +28 -9
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +9 -2
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/incremental_code_generator.py +2 -2
  56. pdd/insert_includes.py +11 -3
  57. pdd/llm_invoke.py +1269 -103
  58. pdd/load_prompt_template.py +36 -10
  59. pdd/pdd_completion.fish +25 -2
  60. pdd/pdd_completion.sh +30 -4
  61. pdd/pdd_completion.zsh +79 -4
  62. pdd/postprocess.py +10 -3
  63. pdd/preprocess.py +228 -15
  64. pdd/preprocess_main.py +8 -5
  65. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  66. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  67. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  68. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  69. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  70. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  71. pdd/prompts/auto_include_LLM.prompt +100 -905
  72. pdd/prompts/detect_change_LLM.prompt +122 -20
  73. pdd/prompts/example_generator_LLM.prompt +22 -1
  74. pdd/prompts/extract_code_LLM.prompt +5 -1
  75. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  76. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  77. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  78. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  79. pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
  80. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
  81. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  82. pdd/prompts/generate_test_LLM.prompt +21 -6
  83. pdd/prompts/increase_tests_LLM.prompt +1 -5
  84. pdd/prompts/insert_includes_LLM.prompt +228 -108
  85. pdd/prompts/trace_LLM.prompt +25 -22
  86. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  87. pdd/prompts/update_prompt_LLM.prompt +22 -1
  88. pdd/pytest_output.py +127 -12
  89. pdd/render_mermaid.py +236 -0
  90. pdd/setup_tool.py +648 -0
  91. pdd/simple_math.py +2 -0
  92. pdd/split_main.py +3 -2
  93. pdd/summarize_directory.py +49 -6
  94. pdd/sync_determine_operation.py +543 -98
  95. pdd/sync_main.py +81 -31
  96. pdd/sync_orchestration.py +1334 -751
  97. pdd/sync_tui.py +848 -0
  98. pdd/template_registry.py +264 -0
  99. pdd/templates/architecture/architecture_json.prompt +242 -0
  100. pdd/templates/generic/generate_prompt.prompt +174 -0
  101. pdd/trace.py +168 -12
  102. pdd/trace_main.py +4 -3
  103. pdd/track_cost.py +151 -61
  104. pdd/unfinished_prompt.py +49 -3
  105. pdd/update_main.py +549 -67
  106. pdd/update_model_costs.py +2 -2
  107. pdd/update_prompt.py +19 -4
  108. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
  109. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  110. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  111. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  112. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  113. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  114. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py CHANGED
@@ -12,21 +12,32 @@ import subprocess
12
12
  import re
13
13
  import os
14
14
  from pathlib import Path
15
- from typing import Dict, Any, Optional, List
16
- from dataclasses import asdict
15
+ from typing import Dict, Any, Optional, List, Callable
16
+ from dataclasses import asdict, dataclass, field
17
+ import tempfile
18
+ import sys
17
19
 
18
20
  import click
21
+ import logging
22
+
23
+ # --- Constants ---
24
+ MAX_CONSECUTIVE_TESTS = 3 # Allow up to 3 consecutive test attempts
25
+ MAX_TEST_EXTEND_ATTEMPTS = 2 # Allow up to 2 attempts to extend tests for coverage
26
+ MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #157 fix)
19
27
 
20
28
  # --- Real PDD Component Imports ---
21
- from .sync_animation import sync_animation
29
+ from .sync_tui import SyncApp
22
30
  from .sync_determine_operation import (
23
31
  sync_determine_operation,
24
32
  get_pdd_file_paths,
25
33
  RunReport,
34
+ SyncDecision,
26
35
  PDD_DIR,
27
36
  META_DIR,
28
37
  SyncLock,
29
38
  read_run_report,
39
+ calculate_sha256,
40
+ calculate_current_hashes,
30
41
  )
31
42
  from .auto_deps_main import auto_deps_main
32
43
  from .code_generator_main import code_generator_main
@@ -37,6 +48,104 @@ from .cmd_test_main import cmd_test_main
37
48
  from .fix_main import fix_main
38
49
  from .update_main import update_main
39
50
  from .python_env_detector import detect_host_python_executable
51
+ from .get_run_command import get_run_command_for_file
52
+ from .pytest_output import extract_failing_files_from_output
53
+ from . import DEFAULT_STRENGTH
54
+
55
+
56
+ # --- Atomic State Update (Issue #159 Fix) ---
57
+
58
+ @dataclass
59
+ class PendingStateUpdate:
60
+ """Holds pending state updates for atomic commit."""
61
+ run_report: Optional[Dict[str, Any]] = None
62
+ fingerprint: Optional[Dict[str, Any]] = None
63
+ run_report_path: Optional[Path] = None
64
+ fingerprint_path: Optional[Path] = None
65
+
66
+
67
+ class AtomicStateUpdate:
68
+ """
69
+ Context manager for atomic state updates.
70
+
71
+ Ensures run_report and fingerprint are both written or neither is written.
72
+ This fixes Issue #159 where non-atomic writes caused state desynchronization.
73
+
74
+ Usage:
75
+ with AtomicStateUpdate(basename, language) as state:
76
+ state.set_run_report(report_dict, report_path)
77
+ state.set_fingerprint(fingerprint_dict, fp_path)
78
+ # On successful exit, both files are written atomically
79
+ # On exception, neither file is written (rollback)
80
+ """
81
+
82
+ def __init__(self, basename: str, language: str):
83
+ self.basename = basename
84
+ self.language = language
85
+ self.pending = PendingStateUpdate()
86
+ self._temp_files: List[str] = []
87
+
88
+ def __enter__(self):
89
+ return self
90
+
91
+ def __exit__(self, exc_type, exc_val, exc_tb):
92
+ if exc_type is None:
93
+ self._commit()
94
+ else:
95
+ self._rollback()
96
+ return False # Don't suppress exceptions
97
+
98
+ def set_run_report(self, report: Dict[str, Any], path: Path):
99
+ """Buffer a run report for atomic write."""
100
+ self.pending.run_report = report
101
+ self.pending.run_report_path = path
102
+
103
+ def set_fingerprint(self, fingerprint: Dict[str, Any], path: Path):
104
+ """Buffer a fingerprint for atomic write."""
105
+ self.pending.fingerprint = fingerprint
106
+ self.pending.fingerprint_path = path
107
+
108
+ def _atomic_write(self, data: Dict[str, Any], target_path: Path) -> None:
109
+ """Write data to file atomically using temp file + rename pattern."""
110
+ target_path.parent.mkdir(parents=True, exist_ok=True)
111
+
112
+ # Write to temp file in same directory (required for atomic rename)
113
+ fd, temp_path = tempfile.mkstemp(
114
+ dir=target_path.parent,
115
+ prefix=f".{target_path.stem}_",
116
+ suffix=".tmp"
117
+ )
118
+ self._temp_files.append(temp_path)
119
+
120
+ try:
121
+ with os.fdopen(fd, 'w') as f:
122
+ json.dump(data, f, indent=2, default=str)
123
+
124
+ # Atomic rename - guaranteed atomic on POSIX systems
125
+ os.replace(temp_path, target_path)
126
+ self._temp_files.remove(temp_path) # Successfully moved, stop tracking
127
+ except Exception:
128
+ # Leave temp file for rollback to clean up
129
+ raise
130
+
131
+ def _commit(self):
132
+ """Commit all pending state updates atomically."""
133
+ # Write fingerprint first (checkpoint), then run_report
134
+ if self.pending.fingerprint and self.pending.fingerprint_path:
135
+ self._atomic_write(self.pending.fingerprint, self.pending.fingerprint_path)
136
+ if self.pending.run_report and self.pending.run_report_path:
137
+ self._atomic_write(self.pending.run_report, self.pending.run_report_path)
138
+
139
+ def _rollback(self):
140
+ """Clean up any temp files without committing changes."""
141
+ for temp_path in self._temp_files:
142
+ try:
143
+ if os.path.exists(temp_path):
144
+ os.unlink(temp_path)
145
+ except OSError:
146
+ pass # Best effort cleanup
147
+ self._temp_files.clear()
148
+
40
149
 
41
150
  # --- Mock Helper Functions ---
42
151
 
@@ -98,20 +207,44 @@ def log_sync_event(basename: str, language: str, event: str, details: Dict[str,
98
207
  }
99
208
  append_sync_log(basename, language, entry)
100
209
 
101
- def save_run_report(report: Dict[str, Any], basename: str, language: str):
102
- """Save a run report to the metadata directory."""
210
+ def save_run_report(report: Dict[str, Any], basename: str, language: str,
211
+ atomic_state: Optional['AtomicStateUpdate'] = None):
212
+ """Save a run report to the metadata directory.
213
+
214
+ Args:
215
+ report: The run report dictionary to save.
216
+ basename: The module basename.
217
+ language: The programming language.
218
+ atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
219
+ """
103
220
  report_file = META_DIR / f"{basename}_{language}_run.json"
104
- META_DIR.mkdir(parents=True, exist_ok=True)
105
- with open(report_file, 'w') as f:
106
- json.dump(report, f, indent=2, default=str)
221
+ if atomic_state:
222
+ # Buffer for atomic write
223
+ atomic_state.set_run_report(report, report_file)
224
+ else:
225
+ # Legacy direct write
226
+ META_DIR.mkdir(parents=True, exist_ok=True)
227
+ with open(report_file, 'w') as f:
228
+ json.dump(report, f, indent=2, default=str)
229
+
230
+ def _save_operation_fingerprint(basename: str, language: str, operation: str,
231
+ paths: Dict[str, Path], cost: float, model: str,
232
+ atomic_state: Optional['AtomicStateUpdate'] = None):
233
+ """Save fingerprint state after successful operation.
107
234
 
108
- def _save_operation_fingerprint(basename: str, language: str, operation: str,
109
- paths: Dict[str, Path], cost: float, model: str):
110
- """Save fingerprint state after successful operation."""
235
+ Args:
236
+ basename: The module basename.
237
+ language: The programming language.
238
+ operation: The operation that was performed.
239
+ paths: Dictionary of PDD file paths.
240
+ cost: The cost of the operation.
241
+ model: The model used.
242
+ atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
243
+ """
111
244
  from datetime import datetime, timezone
112
245
  from .sync_determine_operation import calculate_current_hashes, Fingerprint
113
246
  from . import __version__
114
-
247
+
115
248
  current_hashes = calculate_current_hashes(paths)
116
249
  fingerprint = Fingerprint(
117
250
  pdd_version=__version__,
@@ -120,103 +253,544 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
120
253
  prompt_hash=current_hashes.get('prompt_hash'),
121
254
  code_hash=current_hashes.get('code_hash'),
122
255
  example_hash=current_hashes.get('example_hash'),
123
- test_hash=current_hashes.get('test_hash')
256
+ test_hash=current_hashes.get('test_hash'),
257
+ test_files=current_hashes.get('test_files'), # Bug #156
124
258
  )
125
-
126
- META_DIR.mkdir(parents=True, exist_ok=True)
259
+
127
260
  fingerprint_file = META_DIR / f"{basename}_{language}.json"
128
- with open(fingerprint_file, 'w') as f:
129
- json.dump(asdict(fingerprint), f, indent=2, default=str)
261
+ if atomic_state:
262
+ # Buffer for atomic write
263
+ atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
264
+ else:
265
+ # Legacy direct write
266
+ META_DIR.mkdir(parents=True, exist_ok=True)
267
+ with open(fingerprint_file, 'w') as f:
268
+ json.dump(asdict(fingerprint), f, indent=2, default=str)
130
269
 
131
- # SyncLock class now imported from sync_determine_operation module
270
+ def _python_cov_target_for_code_file(code_file: Path) -> str:
271
+ """Return a `pytest-cov` `--cov` target for a Python code file.
272
+
273
+ - If the file is inside a Python package (directories with `__init__.py`),
274
+ returns a dotted module path (e.g., `pdd.sync_orchestration`).
275
+ - Otherwise falls back to the filename stem (e.g., `admin_get_users`).
276
+ """
277
+ if code_file.suffix != ".py":
278
+ return code_file.stem
279
+
280
+ package_dir: Optional[Path] = None
281
+ current = code_file.parent
282
+ while (current / "__init__.py").exists():
283
+ package_dir = current
284
+ parent = current.parent
285
+ if parent == current:
286
+ break
287
+ current = parent
288
+
289
+ if package_dir:
290
+ relative_module = code_file.relative_to(package_dir.parent).with_suffix("")
291
+ return str(relative_module).replace(os.sep, ".")
292
+
293
+ return code_file.stem
294
+
295
+
296
+ def _python_cov_target_for_test_and_code(test_file: Path, code_file: Path, fallback: str) -> str:
297
+ """Choose the best `--cov` target based on how tests import the code.
298
+
299
+ In some repos, tests add a directory to `sys.path` and import modules by their
300
+ filename stem (e.g., `from admin_get_users import ...`) even when the code
301
+ also lives under a package (e.g., `backend.functions.admin_get_users`).
302
+
303
+ Heuristic:
304
+ - Prefer the code file stem when the test file imports it directly.
305
+ - Otherwise, prefer the dotted module path derived from the package layout.
306
+ - Fall back to the provided fallback (usually the basename).
307
+ """
308
+
309
+ def _imports_module(source: str, module: str) -> bool:
310
+ escaped = re.escape(module)
311
+ return bool(
312
+ re.search(rf"^\s*import\s+{escaped}\b", source, re.MULTILINE)
313
+ or re.search(rf"^\s*from\s+{escaped}\b", source, re.MULTILINE)
314
+ )
315
+
316
+ stem = code_file.stem
317
+ dotted = _python_cov_target_for_code_file(code_file)
132
318
 
133
- def _execute_tests_and_create_run_report(test_file: Path, basename: str, language: str, target_coverage: float = 90.0) -> RunReport:
134
- """Execute tests and create a RunReport with actual results."""
135
- timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
136
-
137
319
  try:
138
- # Execute pytest with coverage reporting on the specific module
139
- # Extract module name from test file (e.g., test_factorial.py -> factorial)
140
- module_name = test_file.name.replace('test_', '').replace('.py', '')
141
-
142
- # Use the module import path rather than file path for coverage
143
- # Use environment-aware Python executable for pytest execution
144
- python_executable = detect_host_python_executable()
145
-
146
- # Determine coverage target based on module location
147
- if base_package:
148
- cov_target = f'{base_package}.{module_name}'
149
- else:
150
- # Dynamically discover package structure based on test file location
151
- relative_path = test_file.parent.relative_to(Path.cwd())
152
- package_path = str(relative_path).replace(os.sep, '.')
153
- cov_target = f'{package_path}.{module_name}' if package_path else module_name
154
-
155
- result = subprocess.run([
156
- python_executable, '-m', 'pytest',
157
- str(test_file),
158
- '-v',
159
- '--tb=short',
160
- f'--cov={cov_target}',
161
- '--cov-report=term-missing'
162
- ], capture_output=True, text=True, timeout=300)
163
-
164
- exit_code = result.returncode
165
- stdout = result.stdout
166
- stderr = result.stderr
167
-
168
- # Parse test results from pytest output
169
- tests_passed = 0
170
- tests_failed = 0
171
- coverage = 0.0
172
-
173
- # Parse passed/failed tests
174
- if 'passed' in stdout:
175
- passed_match = re.search(r'(\d+) passed', stdout)
320
+ test_source = test_file.read_text(encoding="utf-8", errors="ignore")
321
+ except Exception:
322
+ test_source = ""
323
+
324
+ if stem and _imports_module(test_source, stem):
325
+ return stem
326
+
327
+ if dotted and dotted != stem:
328
+ if _imports_module(test_source, dotted):
329
+ return dotted
330
+
331
+ if "." in dotted:
332
+ parent = dotted.rsplit(".", 1)[0]
333
+ # e.g. `from backend.functions import admin_get_users`
334
+ if re.search(
335
+ rf"^\s*from\s+{re.escape(parent)}\s+import\s+.*\b{re.escape(stem)}\b",
336
+ test_source,
337
+ re.MULTILINE,
338
+ ):
339
+ return dotted
340
+ # e.g. `import backend.functions.admin_get_users`
341
+ if re.search(
342
+ rf"^\s*import\s+{re.escape(parent)}\.{re.escape(stem)}\b",
343
+ test_source,
344
+ re.MULTILINE,
345
+ ):
346
+ return dotted
347
+
348
+ return dotted
349
+
350
+ return stem or fallback
351
+
352
+
353
+ def _parse_test_output(output: str, language: str) -> tuple[int, int, float]:
354
+ """
355
+ Parse test output to extract passed/failed/coverage.
356
+
357
+ Args:
358
+ output: Combined stdout/stderr from test runner
359
+ language: Language name (e.g., 'python', 'typescript', 'go')
360
+
361
+ Returns:
362
+ (tests_passed, tests_failed, coverage)
363
+ """
364
+ tests_passed = 0
365
+ tests_failed = 0
366
+ coverage = 0.0
367
+
368
+ lang = language.lower()
369
+
370
+ # Python (pytest)
371
+ if lang == 'python':
372
+ if 'passed' in output:
373
+ passed_match = re.search(r'(\d+) passed', output)
176
374
  if passed_match:
177
375
  tests_passed = int(passed_match.group(1))
178
-
179
- if 'failed' in stdout:
180
- failed_match = re.search(r'(\d+) failed', stdout)
376
+ if 'failed' in output:
377
+ failed_match = re.search(r'(\d+) failed', output)
181
378
  if failed_match:
182
379
  tests_failed = int(failed_match.group(1))
183
-
184
- # Parse coverage percentage - try multiple patterns
185
- coverage_match = re.search(r'TOTAL.*?(\d+)%', stdout)
380
+ if 'error' in output:
381
+ error_match = re.search(r'(\d+) error', output)
382
+ if error_match:
383
+ tests_failed += int(error_match.group(1))
384
+ coverage_match = re.search(r'TOTAL.*?(\d+)%', output)
186
385
  if not coverage_match:
187
- # Try alternative patterns for coverage output
188
- coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
386
+ coverage_match = re.search(r'(\d+)%\s*$', output, re.MULTILINE)
189
387
  if not coverage_match:
190
- # Try pattern with decimal
191
- coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
192
-
388
+ coverage_match = re.search(r'(\d+(?:\.\d+)?)%', output)
193
389
  if coverage_match:
194
390
  coverage = float(coverage_match.group(1))
195
-
196
- # Create and save run report
391
+
392
+ # Jest/Vitest (JavaScript/TypeScript)
393
+ elif lang in ('javascript', 'typescript', 'typescriptreact'):
394
+ # "Tests: X passed, Y failed" or "Tests: X passed, Y failed, Z total"
395
+ match = re.search(r'Tests:\s*(\d+)\s+passed', output)
396
+ if match:
397
+ tests_passed = int(match.group(1))
398
+ match = re.search(r'Tests:.*?(\d+)\s+failed', output)
399
+ if match:
400
+ tests_failed = int(match.group(1))
401
+
402
+ # Alternative Mocha-style: "X passing, Y failing"
403
+ if tests_passed == 0:
404
+ pass_match = re.search(r'(\d+)\s+pass(?:ing)?', output, re.I)
405
+ if pass_match:
406
+ tests_passed = int(pass_match.group(1))
407
+ if tests_failed == 0:
408
+ fail_match = re.search(r'(\d+)\s+fail(?:ing)?', output, re.I)
409
+ if fail_match:
410
+ tests_failed = int(fail_match.group(1))
411
+
412
+ # Coverage: "All files | XX.XX |"
413
+ cov_match = re.search(r'All files[^|]*\|\s*(\d+\.?\d*)', output)
414
+ if cov_match:
415
+ coverage = float(cov_match.group(1))
416
+
417
+ # Go
418
+ elif lang == 'go':
419
+ # Count PASS and FAIL occurrences for individual tests
420
+ tests_passed = len(re.findall(r'--- PASS:', output))
421
+ tests_failed = len(re.findall(r'--- FAIL:', output))
422
+
423
+ # Fallback: check for overall PASS/FAIL
424
+ if tests_passed == 0 and 'PASS' in output and 'FAIL' not in output:
425
+ tests_passed = 1
426
+ if tests_failed == 0 and 'FAIL' in output:
427
+ tests_failed = 1
428
+
429
+ # coverage: XX.X% of statements
430
+ cov_match = re.search(r'coverage:\s*(\d+\.?\d*)%', output)
431
+ if cov_match:
432
+ coverage = float(cov_match.group(1))
433
+
434
+ # Rust (cargo test)
435
+ elif lang == 'rust':
436
+ # "test result: ok. X passed; Y failed;"
437
+ match = re.search(r'(\d+)\s+passed', output)
438
+ if match:
439
+ tests_passed = int(match.group(1))
440
+ match = re.search(r'(\d+)\s+failed', output)
441
+ if match:
442
+ tests_failed = int(match.group(1))
443
+
444
+ # Fallback: try generic patterns
445
+ else:
446
+ pass_match = re.search(r'(\d+)\s+(?:tests?\s+)?pass(?:ed)?', output, re.I)
447
+ fail_match = re.search(r'(\d+)\s+(?:tests?\s+)?fail(?:ed)?', output, re.I)
448
+ if pass_match:
449
+ tests_passed = int(pass_match.group(1))
450
+ if fail_match:
451
+ tests_failed = int(fail_match.group(1))
452
+
453
+ return tests_passed, tests_failed, coverage
454
+
455
+
456
+ def _detect_example_errors(output: str) -> tuple[bool, str]:
457
+ """
458
+ Detect if example output contains error indicators.
459
+
460
+ Only detects true crashes/errors:
461
+ - Python tracebacks (catches ALL unhandled exceptions)
462
+ - ERROR level log messages
463
+
464
+ Intentionally does NOT detect:
465
+ - HTTP status codes (examples may test error responses)
466
+ - Individual exception type names (causes false positives, redundant with traceback)
467
+
468
+ Returns:
469
+ (has_errors, error_summary)
470
+ """
471
+ error_patterns = [
472
+ (r'Traceback \(most recent call last\):', 'Python traceback'),
473
+ (r' - ERROR - ', 'Error log message'), # Python logging format
474
+ ]
475
+
476
+ errors_found = []
477
+ for pattern, description in error_patterns:
478
+ if re.search(pattern, output, re.MULTILINE):
479
+ errors_found.append(description)
480
+
481
+ if errors_found:
482
+ return True, '; '.join(errors_found)
483
+ return False, ''
484
+
485
+
486
+ def _try_auto_fix_import_error(
487
+ error_output: str,
488
+ code_file: Path,
489
+ example_file: Path,
490
+ ) -> tuple[bool, str]:
491
+ """
492
+ Try to automatically fix common import errors before calling expensive agentic fix.
493
+
494
+ Returns:
495
+ (fixed, message): Whether a fix was attempted and what was done.
496
+ """
497
+ import re
498
+
499
+ # Check for ModuleNotFoundError or ImportError
500
+ module_not_found = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", error_output)
501
+ import_error = re.search(r"ImportError: cannot import name ['\"]([^'\"]+)['\"]", error_output)
502
+
503
+ if not module_not_found and not import_error:
504
+ return False, "No import error detected"
505
+
506
+ if module_not_found:
507
+ missing_module = module_not_found.group(1)
508
+ # Split by . to get the top-level package
509
+ top_level_package = missing_module.split('.')[0]
510
+
511
+ # Check if this is the module we're trying to import (local module)
512
+ code_module_name = code_file.stem # e.g., "data_validator" from "data_validator.py"
513
+
514
+ if top_level_package == code_module_name:
515
+ # It's trying to import our own generated code - fix the example's sys.path
516
+ # Read the example and fix the path manipulation
517
+ try:
518
+ example_content = example_file.read_text(encoding='utf-8')
519
+ code_dir = str(code_file.parent.resolve())
520
+
521
+ # Look for existing sys.path manipulation
522
+ if 'sys.path' in example_content:
523
+ # Try to fix the existing path manipulation
524
+ # Common pattern: module_path = os.path.abspath(os.path.join(...))
525
+ # Replace with correct path
526
+ fixed_content = re.sub(
527
+ r"module_path\s*=\s*os\.path\.abspath\([^)]+\)",
528
+ f"module_path = '{code_dir}'",
529
+ example_content
530
+ )
531
+ if fixed_content != example_content:
532
+ example_file.write_text(fixed_content, encoding='utf-8')
533
+ return True, f"Fixed sys.path to point to {code_dir}"
534
+
535
+ # If no existing sys.path, add one at the start after imports
536
+ lines = example_content.split('\n')
537
+ insert_pos = 0
538
+ for i, line in enumerate(lines):
539
+ if line.startswith('import ') or line.startswith('from '):
540
+ if 'sys' in line or 'os' in line:
541
+ insert_pos = i + 1
542
+ continue
543
+ if line.strip() and not line.startswith('#') and not line.startswith('import') and not line.startswith('from'):
544
+ insert_pos = i
545
+ break
546
+
547
+ path_fix = f"\n# Auto-added by pdd to fix import\nimport sys\nsys.path.insert(0, '{code_dir}')\n"
548
+ lines.insert(insert_pos, path_fix)
549
+ example_file.write_text('\n'.join(lines), encoding='utf-8')
550
+ return True, f"Added sys.path.insert(0, '{code_dir}') to example"
551
+
552
+ except Exception as e:
553
+ return False, f"Failed to fix import path: {e}"
554
+
555
+ else:
556
+ # It's an external package - try pip install
557
+ try:
558
+ result = subprocess.run(
559
+ [sys.executable, '-m', 'pip', 'install', top_level_package],
560
+ capture_output=True,
561
+ text=True,
562
+ timeout=120
563
+ )
564
+ if result.returncode == 0:
565
+ return True, f"Installed missing package: {top_level_package}"
566
+ else:
567
+ return False, f"Failed to install {top_level_package}: {result.stderr}"
568
+ except Exception as e:
569
+ return False, f"Failed to run pip install: {e}"
570
+
571
+ return False, "Import error detected but no auto-fix available"
572
+
573
+
574
+ def _run_example_with_error_detection(
575
+ cmd_parts: list[str],
576
+ env: dict,
577
+ cwd: str,
578
+ timeout: int = 60
579
+ ) -> tuple[int, str, str]:
580
+ """
581
+ Run example file, detecting errors from output.
582
+
583
+ For server-style examples that block, this runs until timeout
584
+ then analyzes output for errors. No errors = success.
585
+
586
+ Returns:
587
+ (returncode, stdout, stderr)
588
+ - returncode: 0 if no errors detected, positive if errors found or process failed
589
+ """
590
+ import threading
591
+
592
+ proc = subprocess.Popen(
593
+ cmd_parts,
594
+ stdout=subprocess.PIPE,
595
+ stderr=subprocess.PIPE,
596
+ stdin=subprocess.DEVNULL,
597
+ env=env,
598
+ cwd=cwd,
599
+ start_new_session=True,
600
+ )
601
+
602
+ stdout_chunks = []
603
+ stderr_chunks = []
604
+
605
+ def read_pipe(pipe, chunks):
606
+ try:
607
+ for line in iter(pipe.readline, b''):
608
+ chunks.append(line)
609
+ except Exception:
610
+ pass
611
+
612
+ t_out = threading.Thread(target=read_pipe, args=(proc.stdout, stdout_chunks), daemon=True)
613
+ t_err = threading.Thread(target=read_pipe, args=(proc.stderr, stderr_chunks), daemon=True)
614
+ t_out.start()
615
+ t_err.start()
616
+
617
+ # Wait for process or timeout
618
+ try:
619
+ proc.wait(timeout=timeout)
620
+ except subprocess.TimeoutExpired:
621
+ proc.terminate()
622
+ try:
623
+ proc.wait(timeout=5)
624
+ except subprocess.TimeoutExpired:
625
+ proc.kill()
626
+ proc.wait()
627
+
628
+ t_out.join(timeout=2)
629
+ t_err.join(timeout=2)
630
+
631
+ stdout = b''.join(stdout_chunks).decode('utf-8', errors='replace')
632
+ stderr = b''.join(stderr_chunks).decode('utf-8', errors='replace')
633
+ combined = stdout + '\n' + stderr
634
+
635
+ # Check for errors in output
636
+ has_errors, error_summary = _detect_example_errors(combined)
637
+
638
+ # Determine result:
639
+ # - Errors in output → failure
640
+ # - Positive exit code (process failed normally, e.g., sys.exit(1)) → failure
641
+ # - Negative exit code (killed by signal, e.g., -9 for SIGKILL) → check output
642
+ # - Zero exit code → success
643
+ #
644
+ # IMPORTANT: When we kill the process after timeout, returncode is negative
645
+ # (the signal number). This is NOT a failure if output has no errors.
646
+ if has_errors:
647
+ return 1, stdout, stderr # Errors detected in output
648
+ elif proc.returncode is not None and proc.returncode > 0:
649
+ return proc.returncode, stdout, stderr # Process exited with error
650
+ else:
651
+ # Success cases:
652
+ # - returncode == 0 (clean exit)
653
+ # - returncode < 0 (killed by signal, but no errors in output)
654
+ # - returncode is None (shouldn't happen after wait, but safe fallback)
655
+ return 0, stdout, stderr
656
+
657
+
658
+ def _execute_tests_and_create_run_report(
659
+ test_file: Path,
660
+ basename: str,
661
+ language: str,
662
+ target_coverage: float = 90.0,
663
+ *,
664
+ code_file: Optional[Path] = None,
665
+ atomic_state: Optional['AtomicStateUpdate'] = None,
666
+ test_files: Optional[List[Path]] = None, # Bug #156: Support multiple test files
667
+ ) -> RunReport:
668
+ """Execute tests and create a RunReport with actual results.
669
+
670
+ Now supports multiple languages by using get_test_command_for_file()
671
+ to determine the appropriate test runner.
672
+
673
+ Args:
674
+ test_file: Primary test file (for backward compat)
675
+ test_files: Optional list of all test files to run (Bug #156)
676
+ """
677
+ from .get_test_command import get_test_command_for_file
678
+
679
+ timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
680
+
681
+ # Bug #156: Use test_files if provided, otherwise just the single test_file
682
+ all_test_files = test_files if test_files else [test_file]
683
+
684
+ # Calculate test file hash for staleness detection (primary file for backward compat)
685
+ test_hash = calculate_sha256(test_file) if test_file.exists() else None
686
+
687
+ # Bug #156: Calculate hashes for ALL test files
688
+ test_file_hashes = {
689
+ f.name: calculate_sha256(f)
690
+ for f in all_test_files
691
+ if f.exists()
692
+ } if all_test_files else None
693
+
694
+ # Use clean env without TUI-specific vars
695
+ clean_env = os.environ.copy()
696
+ for var in ['FORCE_COLOR', 'COLUMNS']:
697
+ clean_env.pop(var, None)
698
+
699
+ try:
700
+ lang_lower = language.lower()
701
+
702
+ # Python: use existing pytest logic with coverage
703
+ if lang_lower == "python":
704
+ module_name = test_file.name.replace('test_', '').replace('.py', '')
705
+ python_executable = detect_host_python_executable()
706
+
707
+ cov_target = None
708
+ if code_file is not None:
709
+ cov_target = _python_cov_target_for_test_and_code(test_file, code_file, basename or module_name)
710
+ else:
711
+ cov_target = basename or module_name
712
+
713
+ if not cov_target:
714
+ cov_target = basename or module_name
715
+
716
+ # Bug #156: Run pytest on ALL test files
717
+ pytest_args = [
718
+ python_executable, '-m', 'pytest',
719
+ ] + [str(f) for f in all_test_files] + [
720
+ '-v',
721
+ '--tb=short',
722
+ f'--cov={cov_target}',
723
+ '--cov-report=term-missing'
724
+ ]
725
+ result = subprocess.run(
726
+ pytest_args,
727
+ capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
728
+ )
729
+
730
+ exit_code = result.returncode
731
+ stdout = result.stdout + (result.stderr or '')
732
+ tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
733
+
734
+ else:
735
+ # Non-Python: use language-appropriate test command
736
+ test_cmd = get_test_command_for_file(str(test_file), language)
737
+
738
+ if test_cmd is None:
739
+ # No test command available - return report indicating this
740
+ report = RunReport(
741
+ timestamp=timestamp,
742
+ exit_code=127, # Command not found
743
+ tests_passed=0,
744
+ tests_failed=0,
745
+ coverage=0.0,
746
+ test_hash=test_hash,
747
+ test_files=test_file_hashes, # Bug #156
748
+ )
749
+ save_run_report(asdict(report), basename, language, atomic_state)
750
+ return report
751
+
752
+ # Run the test command
753
+ result = subprocess.run(
754
+ test_cmd,
755
+ shell=True,
756
+ capture_output=True,
757
+ text=True,
758
+ timeout=300,
759
+ env=clean_env,
760
+ cwd=str(test_file.parent),
761
+ stdin=subprocess.DEVNULL,
762
+ start_new_session=True
763
+ )
764
+
765
+ exit_code = result.returncode
766
+ stdout = (result.stdout or '') + '\n' + (result.stderr or '')
767
+
768
+ # Parse results based on language
769
+ tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
770
+
197
771
  report = RunReport(
198
772
  timestamp=timestamp,
199
773
  exit_code=exit_code,
200
774
  tests_passed=tests_passed,
201
775
  tests_failed=tests_failed,
202
- coverage=coverage
776
+ coverage=coverage,
777
+ test_hash=test_hash,
778
+ test_files=test_file_hashes, # Bug #156
203
779
  )
204
-
780
+
205
781
  except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception) as e:
206
- # If test execution fails, create a report indicating failure
207
782
  report = RunReport(
208
783
  timestamp=timestamp,
209
784
  exit_code=1,
210
785
  tests_passed=0,
211
786
  tests_failed=1,
212
- coverage=0.0
787
+ coverage=0.0,
788
+ test_hash=test_hash,
789
+ test_files=test_file_hashes, # Bug #156
213
790
  )
214
-
215
- # Save the run report
216
- save_run_report(asdict(report), basename, language)
217
- return report
218
791
 
219
- # --- Helper for Click Context ---
792
+ save_run_report(asdict(report), basename, language, atomic_state)
793
+ return report
220
794
 
221
795
  def _create_mock_context(**kwargs) -> click.Context:
222
796
  """Creates a mock Click context object to pass parameters to command functions."""
@@ -242,7 +816,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
242
816
  for entry in log_entries:
243
817
  timestamp = entry.get('timestamp', 'N/A')
244
818
 
245
- # Handle special event entries
246
819
  if 'event' in entry:
247
820
  event = entry.get('event', 'N/A')
248
821
  print(f"[{timestamp[:19]}] EVENT: {event}")
@@ -251,7 +824,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
251
824
  print(f" Details: {details_str}")
252
825
  continue
253
826
 
254
- # Handle operation entries
255
827
  operation = entry.get('operation', 'N/A')
256
828
  reason = entry.get('reason', 'N/A')
257
829
  success = entry.get('success')
@@ -260,7 +832,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
260
832
  duration = entry.get('duration')
261
833
 
262
834
  if verbose:
263
- # Verbose format
264
835
  print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
265
836
  decision_type = entry.get('decision_type', 'N/A')
266
837
  confidence = entry.get('confidence', 'N/A')
@@ -276,14 +847,12 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
276
847
  print(f" Estimated Cost: ${estimated_cost:.2f}")
277
848
 
278
849
  if 'details' in entry and entry['details']:
279
- # Show details without budget_remaining to avoid clutter
280
850
  details_copy = entry['details'].copy()
281
851
  details_copy.pop('budget_remaining', None)
282
852
  if details_copy:
283
853
  details_str = json.dumps(details_copy, indent=2)
284
854
  print(f" Details: {details_str}")
285
855
  else:
286
- # Normal format: [timestamp] operation | reason | status cost | duration
287
856
  status_icon = "✓" if success else "✗" if success is False else "?"
288
857
 
289
858
  cost_info = ""
@@ -318,45 +887,60 @@ def sync_orchestration(
318
887
  budget: float = 10.0,
319
888
  skip_verify: bool = False,
320
889
  skip_tests: bool = False,
321
- log: bool = False,
890
+ dry_run: bool = False,
322
891
  force: bool = False,
323
- strength: float = 0.5,
892
+ strength: float = DEFAULT_STRENGTH,
324
893
  temperature: float = 0.0,
325
- time_param: float = 0.25, # Renamed to avoid conflict with `time` module
894
+ time_param: float = 0.25,
326
895
  verbose: bool = False,
327
896
  quiet: bool = False,
328
897
  output_cost: Optional[str] = None,
329
898
  review_examples: bool = False,
330
899
  local: bool = False,
331
900
  context_config: Optional[Dict[str, str]] = None,
901
+ context_override: Optional[str] = None,
902
+ confirm_callback: Optional[Callable[[str, str], bool]] = None,
332
903
  ) -> Dict[str, Any]:
333
904
  """
334
905
  Orchestrates the complete PDD sync workflow with parallel animation.
335
-
336
- If log=True, displays the sync log instead of running sync operations.
337
- The verbose flag controls the detail level of the log output.
338
-
339
- Returns a dictionary summarizing the outcome of the sync process.
340
906
  """
341
- if log:
907
+ # Import get_extension at function scope
908
+ from .sync_determine_operation import get_extension
909
+
910
+ if dry_run:
342
911
  return _display_sync_log(basename, language, verbose)
343
912
 
344
913
  # --- Initialize State and Paths ---
345
914
  try:
346
- pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
915
+ pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
916
+ except FileNotFoundError as e:
917
+ if "test_config.py" in str(e) or "tests/test_" in str(e):
918
+ pdd_files = {
919
+ 'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
920
+ 'code': Path(f"src/{basename}.{get_extension(language)}"),
921
+ 'example': Path(f"context/{basename}_example.{get_extension(language)}"),
922
+ 'test': Path(f"tests/test_{basename}.{get_extension(language)}")
923
+ }
924
+ if not quiet:
925
+ print(f"Note: Test file missing, continuing with sync workflow to generate it")
926
+ else:
927
+ print(f"Error constructing paths: {e}")
928
+ return {
929
+ "success": False,
930
+ "error": f"Failed to construct paths: {str(e)}",
931
+ "operations_completed": [],
932
+ "errors": [f"Path construction failed: {str(e)}"]
933
+ }
347
934
  except Exception as e:
348
- # Log the error and return early with failure status
349
935
  print(f"Error constructing paths: {e}")
350
936
  return {
351
937
  "success": False,
352
- "total_cost": 0.0,
353
- "model_name": "",
354
938
  "error": f"Failed to construct paths: {str(e)}",
355
939
  "operations_completed": [],
356
940
  "errors": [f"Path construction failed: {str(e)}"]
357
941
  }
358
942
 
359
- # Shared state for animation thread
943
+ # Shared state for animation (passed to App)
360
944
  current_function_name_ref = ["initializing"]
361
945
  stop_event = threading.Event()
362
946
  current_cost_ref = [0.0]
@@ -364,696 +948,695 @@ def sync_orchestration(
364
948
  code_path_ref = [str(pdd_files.get('code', 'N/A'))]
365
949
  example_path_ref = [str(pdd_files.get('example', 'N/A'))]
366
950
  tests_path_ref = [str(pdd_files.get('test', 'N/A'))]
367
- prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref = \
368
- ["blue"], ["blue"], ["blue"], ["blue"]
369
-
370
- # Orchestration state
371
- operations_completed: List[str] = []
372
- skipped_operations: List[str] = []
373
- errors: List[str] = []
374
- start_time = time.time()
375
- animation_thread = None
376
-
377
- # Track operation history for cycle detection
378
- operation_history: List[str] = []
379
- MAX_CYCLE_REPEATS = 2 # Maximum times to allow crash-verify cycle
951
+ prompt_box_color_ref = ["blue"]
952
+ code_box_color_ref = ["blue"]
953
+ example_box_color_ref = ["blue"]
954
+ tests_box_color_ref = ["blue"]
380
955
 
381
- try:
382
- with SyncLock(basename, language):
383
- # Log lock acquisition
384
- log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
385
-
386
- # --- Start Animation Thread ---
387
- animation_thread = threading.Thread(
388
- target=sync_animation,
389
- args=(
390
- current_function_name_ref, stop_event, basename, current_cost_ref, budget,
391
- prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref,
392
- prompt_path_ref, code_path_ref, example_path_ref, tests_path_ref
393
- ),
394
- daemon=True
395
- )
396
- animation_thread.start()
397
-
398
- # --- Main Workflow Loop ---
399
- while True:
400
- budget_remaining = budget - current_cost_ref[0]
401
- if current_cost_ref[0] >= budget:
402
- errors.append(f"Budget of ${budget:.2f} exceeded.")
403
- log_sync_event(basename, language, "budget_exceeded", {
404
- "total_cost": current_cost_ref[0],
405
- "budget": budget
406
- })
407
- break
408
-
409
- # Log budget warning when running low
410
- if budget_remaining < budget * 0.2 and budget_remaining > 0:
411
- log_sync_event(basename, language, "budget_warning", {
412
- "remaining": budget_remaining,
413
- "percentage": (budget_remaining / budget) * 100
414
- })
415
-
416
- decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
417
- operation = decision.operation
418
-
419
- # Create log entry with decision info
420
- log_entry = create_sync_log_entry(decision, budget_remaining)
421
-
422
- # Track operation history
423
- operation_history.append(operation)
424
-
425
- # Detect crash-verify cycles
426
- if len(operation_history) >= 4:
427
- # Check for repeating crash-verify pattern
428
- recent_ops = operation_history[-4:]
429
- if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
430
- recent_ops == ['verify', 'crash', 'verify', 'crash']):
431
- # Count how many times this cycle has occurred
432
- cycle_count = 0
433
- for i in range(0, len(operation_history) - 1, 2):
434
- if i + 1 < len(operation_history):
435
- if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
436
- (operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
437
- cycle_count += 1
438
-
439
- if cycle_count >= MAX_CYCLE_REPEATS:
440
- errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
441
- errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
442
- log_sync_event(basename, language, "cycle_detected", {
443
- "cycle_type": "crash-verify",
444
- "cycle_count": cycle_count,
445
- "operation_history": operation_history[-10:] # Last 10 operations
446
- })
447
- break
956
+ # Mutable container for the app reference (set after app creation)
957
+ # This allows the worker to access app.request_confirmation()
958
+ app_ref: List[Optional['SyncApp']] = [None]
448
959
 
449
- # Detect consecutive fix operations (infinite fix loop protection)
450
- if operation == 'fix':
451
- # Count consecutive fix operations
452
- consecutive_fixes = 0
453
- for i in range(len(operation_history) - 1, -1, -1):
454
- if operation_history[i] == 'fix':
455
- consecutive_fixes += 1
456
- else:
457
- break
458
-
459
- MAX_CONSECUTIVE_FIXES = 5 # Allow up to 5 consecutive fix attempts
460
- if consecutive_fixes >= MAX_CONSECUTIVE_FIXES:
461
- errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
462
- errors.append("The test failures may not be resolvable by automated fixes in this environment.")
463
- log_sync_event(basename, language, "cycle_detected", {
464
- "cycle_type": "consecutive-fix",
465
- "consecutive_count": consecutive_fixes,
466
- "operation_history": operation_history[-10:] # Last 10 operations
960
+ # Progress callback ref for TUI ProgressBar updates during auto-deps
961
+ progress_callback_ref: List[Optional[Callable[[int, int], None]]] = [None]
962
+
963
+ # Track if user has already confirmed overwrite (to avoid asking multiple times)
964
+ user_confirmed_overwrite: List[bool] = [False]
965
+
966
+ def get_confirm_callback() -> Optional[Callable[[str, str], bool]]:
967
+ """Get the confirmation callback from the app if available.
968
+
969
+ Once user confirms, we remember it so subsequent operations don't ask again.
970
+ """
971
+ if user_confirmed_overwrite[0]:
972
+ # User already confirmed, return a callback that always returns True
973
+ return lambda msg, title: True
974
+
975
+ if app_ref[0] is not None:
976
+ def confirming_callback(msg: str, title: str) -> bool:
977
+ result = app_ref[0].request_confirmation(msg, title)
978
+ if result:
979
+ user_confirmed_overwrite[0] = True
980
+ return result
981
+ return confirming_callback
982
+ return confirm_callback # Fall back to provided callback
983
+
984
+ def sync_worker_logic():
985
+ """
986
+ The main loop of sync logic, run in a worker thread by Textual App.
987
+ """
988
+ operations_completed: List[str] = []
989
+ skipped_operations: List[str] = []
990
+ errors: List[str] = []
991
+ start_time = time.time()
992
+ last_model_name: str = ""
993
+ operation_history: List[str] = []
994
+ MAX_CYCLE_REPEATS = 2
995
+
996
+ # Helper function to print inside worker (goes to RichLog via redirection)
997
+ # print() will work if sys.stdout is redirected.
998
+
999
+ try:
1000
+ with SyncLock(basename, language):
1001
+ log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
1002
+
1003
+ while True:
1004
+ budget_remaining = budget - current_cost_ref[0]
1005
+ if current_cost_ref[0] >= budget:
1006
+ errors.append(f"Budget of ${budget:.2f} exceeded.")
1007
+ log_sync_event(basename, language, "budget_exceeded", {
1008
+ "total_cost": current_cost_ref[0],
1009
+ "budget": budget
467
1010
  })
468
1011
  break
469
1012
 
470
- if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
471
- current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
472
-
473
- # Log these final operations
474
- success = operation in ['all_synced', 'nothing']
475
- error_msg = None
476
- if operation == 'fail_and_request_manual_merge':
477
- errors.append(f"Manual merge required: {decision.reason}")
478
- error_msg = f"Manual merge required: {decision.reason}"
479
- elif operation == 'error':
480
- errors.append(f"Error determining operation: {decision.reason}")
481
- error_msg = f"Error determining operation: {decision.reason}"
482
- elif operation == 'analyze_conflict':
483
- errors.append(f"Conflict detected: {decision.reason}")
484
- error_msg = f"Conflict detected: {decision.reason}"
485
-
486
- # Update log entry for final operation
487
- update_sync_log_entry(log_entry, {
488
- 'success': success,
489
- 'cost': 0.0,
490
- 'model': 'none',
491
- 'error': error_msg
492
- }, 0.0)
493
- append_sync_log(basename, language, log_entry)
494
-
495
- break
496
-
497
- # Handle skips
498
- if operation == 'verify' and (skip_verify or skip_tests):
499
- # Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
500
- skipped_operations.append('verify')
501
- skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
502
-
503
- # Update log entry for skipped operation
504
- update_sync_log_entry(log_entry, {
505
- 'success': True,
506
- 'cost': 0.0,
507
- 'model': 'skipped',
508
- 'error': None
509
- }, 0.0)
510
- log_entry['details']['skip_reason'] = skip_reason
511
- append_sync_log(basename, language, log_entry)
1013
+ if budget_remaining < budget * 0.2 and budget_remaining > 0:
1014
+ log_sync_event(basename, language, "budget_warning", {
1015
+ "remaining": budget_remaining,
1016
+ "percentage": (budget_remaining / budget) * 100
1017
+ })
1018
+
1019
+ decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
1020
+ operation = decision.operation
512
1021
 
513
- report_data = RunReport(
514
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
515
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
516
- )
517
- save_run_report(asdict(report_data), basename, language)
518
- _save_operation_fingerprint(basename, language, 'verify', pdd_files, 0.0, skip_reason)
519
- continue
520
- if operation == 'test' and skip_tests:
521
- skipped_operations.append('test')
1022
+ log_entry = create_sync_log_entry(decision, budget_remaining)
1023
+ operation_history.append(operation)
522
1024
 
523
- # Update log entry for skipped operation
524
- update_sync_log_entry(log_entry, {
525
- 'success': True,
526
- 'cost': 0.0,
527
- 'model': 'skipped',
528
- 'error': None
529
- }, 0.0)
530
- log_entry['details']['skip_reason'] = 'skip_tests'
531
- append_sync_log(basename, language, log_entry)
1025
+ # Cycle detection logic
1026
+ if len(operation_history) >= 3:
1027
+ recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
1028
+ if len(recent_auto_deps) >= 2:
1029
+ errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
1030
+ log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
1031
+ operation = 'generate'
1032
+ decision.operation = 'generate' # Update decision too
1033
+
1034
+ # Bug #4 fix: Detect crash-verify cycle pattern
1035
+ # The pattern [crash, verify, crash, verify] or [verify, crash, verify, crash]
1036
+ # represents 2 iterations of the alternating cycle, so break immediately
1037
+ if len(operation_history) >= 4:
1038
+ recent_ops = operation_history[-4:]
1039
+ if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
1040
+ recent_ops == ['verify', 'crash', 'verify', 'crash']):
1041
+ # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
1042
+ errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
1043
+ log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
1044
+ break
1045
+
1046
+ # Bug #4 fix: Detect test-fix cycle pattern
1047
+ # The pattern [test, fix, test, fix] or [fix, test, fix, test]
1048
+ # represents 2 iterations of the alternating cycle, so break immediately
1049
+ if len(operation_history) >= 4:
1050
+ recent_ops = operation_history[-4:]
1051
+ if (recent_ops == ['test', 'fix', 'test', 'fix'] or
1052
+ recent_ops == ['fix', 'test', 'fix', 'test']):
1053
+ # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
1054
+ errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
1055
+ log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
1056
+ break
1057
+
1058
+ if operation == 'fix':
1059
+ consecutive_fixes = 0
1060
+ for i in range(len(operation_history) - 1, -1, -1):
1061
+ if operation_history[i] == 'fix':
1062
+ consecutive_fixes += 1
1063
+ else:
1064
+ break
1065
+ if consecutive_fixes >= 5:
1066
+ errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
1067
+ break
1068
+
1069
+ if operation == 'test':
1070
+ consecutive_tests = 0
1071
+ for i in range(len(operation_history) - 1, -1, -1):
1072
+ if operation_history[i] == 'test':
1073
+ consecutive_tests += 1
1074
+ else:
1075
+ break
1076
+ if consecutive_tests >= MAX_CONSECUTIVE_TESTS:
1077
+ errors.append(f"Detected {consecutive_tests} consecutive test operations. Breaking infinite test loop.")
1078
+ break
1079
+
1080
+ # Bug #157 fix: Prevent infinite crash retry loops
1081
+ if operation == 'crash':
1082
+ consecutive_crashes = 0
1083
+ for i in range(len(operation_history) - 1, -1, -1):
1084
+ if operation_history[i] == 'crash':
1085
+ consecutive_crashes += 1
1086
+ else:
1087
+ break
1088
+ if consecutive_crashes >= MAX_CONSECUTIVE_CRASHES:
1089
+ errors.append(f"Detected {consecutive_crashes} consecutive crash operations. Breaking infinite crash loop.")
1090
+ break
1091
+
1092
+ if operation == 'test_extend':
1093
+ # Count test_extend attempts to prevent infinite loop
1094
+ extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
1095
+ if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
1096
+ # Accept current coverage after max attempts
1097
+ log_sync_event(basename, language, "test_extend_limit", {
1098
+ "attempts": extend_attempts,
1099
+ "max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
1100
+ "reason": "Accepting current coverage after max extend attempts"
1101
+ })
1102
+ success = True
1103
+ break
1104
+
1105
+ if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
1106
+ current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
1107
+ success = operation in ['all_synced', 'nothing']
1108
+ error_msg = None
1109
+ if operation == 'fail_and_request_manual_merge':
1110
+ errors.append(f"Manual merge required: {decision.reason}")
1111
+ error_msg = decision.reason
1112
+ elif operation == 'error':
1113
+ errors.append(f"Error determining operation: {decision.reason}")
1114
+ error_msg = decision.reason
1115
+ elif operation == 'analyze_conflict':
1116
+ errors.append(f"Conflict detected: {decision.reason}")
1117
+ error_msg = decision.reason
1118
+
1119
+ update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
1120
+ append_sync_log(basename, language, log_entry)
1121
+ break
532
1122
 
533
- report_data = RunReport(
534
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
535
- exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
1123
+ # Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
1124
+ # Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
1125
+ if operation == 'verify' and (skip_verify or skip_tests):
1126
+ skipped_operations.append('verify')
1127
+ update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1128
+ append_sync_log(basename, language, log_entry)
1129
+ # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1130
+ _save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
1131
+ continue
1132
+ if operation == 'test' and skip_tests:
1133
+ skipped_operations.append('test')
1134
+ update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1135
+ append_sync_log(basename, language, log_entry)
1136
+ # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1137
+ _save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
1138
+ continue
1139
+ if operation == 'crash' and (skip_tests or skip_verify):
1140
+ skipped_operations.append('crash')
1141
+ update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1142
+ append_sync_log(basename, language, log_entry)
1143
+ # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1144
+ _save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
1145
+ # FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
1146
+ # Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
1147
+ current_hashes = calculate_current_hashes(pdd_files)
1148
+ synthetic_report = RunReport(
1149
+ timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
1150
+ exit_code=0, # Assume success since we're skipping validation
1151
+ tests_passed=0,
1152
+ tests_failed=0,
1153
+ coverage=0.0,
1154
+ test_hash=current_hashes.get('test_hash')
1155
+ )
1156
+ save_run_report(asdict(synthetic_report), basename, language)
1157
+ continue
1158
+
1159
+ current_function_name_ref[0] = operation
1160
+ ctx = _create_mock_context(
1161
+ force=force, strength=strength, temperature=temperature, time=time_param,
1162
+ verbose=verbose, quiet=quiet, output_cost=output_cost,
1163
+ review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
1164
+ max_attempts=max_attempts, target_coverage=target_coverage,
1165
+ confirm_callback=get_confirm_callback(),
1166
+ context=context_override
536
1167
  )
537
- save_run_report(asdict(report_data), basename, language)
538
- _save_operation_fingerprint(basename, language, 'test', pdd_files, 0.0, 'skipped')
539
- continue
540
- if operation == 'crash' and skip_tests:
541
- # Skip crash operations when tests are skipped since crash fixes usually require test execution
542
- skipped_operations.append('crash')
543
1168
 
544
- # Update log entry for skipped operation
545
- update_sync_log_entry(log_entry, {
546
- 'success': True,
547
- 'cost': 0.0,
548
- 'model': 'skipped',
549
- 'error': None
550
- }, 0.0)
551
- log_entry['details']['skip_reason'] = 'skip_tests'
552
- append_sync_log(basename, language, log_entry)
553
-
554
- # Create a dummy run report indicating crash was skipped
555
- report_data = RunReport(
556
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
557
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
558
- )
559
- save_run_report(asdict(report_data), basename, language)
560
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped')
561
- continue
562
-
563
- current_function_name_ref[0] = operation
564
- ctx = _create_mock_context(
565
- force=force, strength=strength, temperature=temperature, time=time_param,
566
- verbose=verbose, quiet=quiet, output_cost=output_cost,
567
- review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
568
- max_attempts=max_attempts, target_coverage=target_coverage
569
- )
570
-
571
- result = {}
572
- success = False
573
- start_time = time.time() # Track execution time
574
-
575
- # --- Execute Operation ---
576
- try:
577
- if operation == 'auto-deps':
578
- # Save the modified prompt to a temporary location
579
- temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
580
-
581
- # Read original prompt content to compare later
582
- original_content = pdd_files['prompt'].read_text(encoding='utf-8')
583
-
584
- result = auto_deps_main(
585
- ctx,
586
- prompt_file=str(pdd_files['prompt']),
587
- directory_path=examples_dir,
588
- auto_deps_csv_path="project_dependencies.csv",
589
- output=temp_output,
590
- force_scan=False # Don't force scan every time
591
- )
592
-
593
- # Only move the temp file back if content actually changed
594
- if Path(temp_output).exists():
595
- import shutil
596
- new_content = Path(temp_output).read_text(encoding='utf-8')
597
- if new_content != original_content:
598
- shutil.move(temp_output, str(pdd_files['prompt']))
599
- else:
600
- # No changes needed, remove temp file
601
- Path(temp_output).unlink()
602
- # Mark as successful with no changes
603
- result = (new_content, 0.0, 'no-changes')
604
- elif operation == 'generate':
605
- result = code_generator_main(
606
- ctx,
607
- prompt_file=str(pdd_files['prompt']),
608
- output=str(pdd_files['code']),
609
- original_prompt_file_path=None,
610
- force_incremental_flag=False
611
- )
612
- elif operation == 'example':
613
- print(f"DEBUG SYNC: pdd_files['example'] = {pdd_files['example']}")
614
- print(f"DEBUG SYNC: str(pdd_files['example']) = {str(pdd_files['example'])}")
615
- result = context_generator_main(
616
- ctx,
617
- prompt_file=str(pdd_files['prompt']),
618
- code_file=str(pdd_files['code']),
619
- output=str(pdd_files['example'])
620
- )
621
- elif operation == 'crash':
622
- # Validate required files exist before attempting crash operation
623
- required_files = [pdd_files['code'], pdd_files['example']]
624
- missing_files = [f for f in required_files if not f.exists()]
625
-
626
- if missing_files:
627
- # Skip crash operation if required files are missing
628
- print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
629
- skipped_operations.append('crash')
630
-
631
- # Update log entry for skipped operation
632
- update_sync_log_entry(log_entry, {
633
- 'success': True,
634
- 'cost': 0.0,
635
- 'model': 'skipped',
636
- 'error': None
637
- }, 0.0)
638
- log_entry['details']['skip_reason'] = 'missing_files'
639
- log_entry['details']['missing_files'] = [f.name for f in missing_files]
640
- append_sync_log(basename, language, log_entry)
1169
+ result = {}
1170
+ success = False
1171
+ op_start_time = time.time()
1172
+
1173
+ # Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
1174
+ with AtomicStateUpdate(basename, language) as atomic_state:
1175
+
1176
+ # --- Execute Operation ---
1177
+ try:
1178
+ if operation == 'auto-deps':
1179
+ temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
1180
+ original_content = pdd_files['prompt'].read_text(encoding='utf-8')
1181
+ result = auto_deps_main(
1182
+ ctx,
1183
+ prompt_file=str(pdd_files['prompt']),
1184
+ directory_path=examples_dir,
1185
+ auto_deps_csv_path="project_dependencies.csv",
1186
+ output=temp_output,
1187
+ force_scan=False,
1188
+ progress_callback=progress_callback_ref[0]
1189
+ )
1190
+ if Path(temp_output).exists():
1191
+ import shutil
1192
+ new_content = Path(temp_output).read_text(encoding='utf-8')
1193
+ if new_content != original_content:
1194
+ shutil.move(temp_output, str(pdd_files['prompt']))
1195
+ else:
1196
+ Path(temp_output).unlink()
1197
+ result = (new_content, 0.0, 'no-changes')
1198
+ elif operation == 'generate':
1199
+ result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt']), output=str(pdd_files['code']), original_prompt_file_path=None, force_incremental_flag=False)
1200
+ # Clear stale run_report so crash/verify is required for newly generated code
1201
+ run_report_file = META_DIR / f"{basename}_{language}_run.json"
1202
+ run_report_file.unlink(missing_ok=True)
1203
+ elif operation == 'example':
1204
+ result = context_generator_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['example']))
1205
+ elif operation == 'crash':
1206
+ required_files = [pdd_files['code'], pdd_files['example']]
1207
+ missing_files = [f for f in required_files if not f.exists()]
1208
+ if missing_files:
1209
+ skipped_operations.append('crash')
1210
+ continue
641
1211
 
642
- # Create a dummy run report indicating crash was skipped due to missing files
643
- report_data = RunReport(
644
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
645
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
646
- )
647
- save_run_report(asdict(report_data), basename, language)
648
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
649
- continue
650
- else:
651
- # Check if we have a run report indicating failures that need crash fixing
652
- current_run_report = read_run_report(basename, language)
653
- crash_log_content = ""
1212
+ # Crash handling logic (simplified copy from original)
1213
+ current_run_report = read_run_report(basename, language)
1214
+ crash_log_content = ""
654
1215
 
655
- # If we have a run report with exit_code != 0, that indicates a crash that needs fixing
656
- if current_run_report and current_run_report.exit_code != 0:
657
- # We have a crash to fix based on the run report
658
- crash_log_content = f"Test execution failed with exit code: {current_run_report.exit_code}\n\n"
659
-
660
- # Try to run the example program to get additional error details
661
- try:
662
- example_result = subprocess.run(
663
- ['python', str(pdd_files['example'])],
664
- capture_output=True,
665
- text=True,
666
- timeout=60,
667
- env=os.environ.copy(),
668
- cwd=str(pdd_files['example'].parent)
1216
+ # Check for crash condition (either run report says so, or we check manually)
1217
+ has_crash = False
1218
+ if current_run_report and current_run_report.exit_code != 0:
1219
+ has_crash = True
1220
+ crash_log_content = f"Test execution failed exit code: {current_run_report.exit_code}\n"
1221
+ else:
1222
+ # Manual check - run the example to see if it crashes
1223
+ env = os.environ.copy()
1224
+ src_dir = Path.cwd() / 'src'
1225
+ env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
1226
+ # Remove TUI-specific env vars that might contaminate subprocess
1227
+ for var in ['FORCE_COLOR', 'COLUMNS']:
1228
+ env.pop(var, None)
1229
+ # Get language-appropriate run command from language_format.csv
1230
+ example_path = str(pdd_files['example'])
1231
+ run_cmd = get_run_command_for_file(example_path)
1232
+ if run_cmd:
1233
+ # Use the language-specific interpreter (e.g., node for .js)
1234
+ cmd_parts = run_cmd.split()
1235
+ else:
1236
+ # Fallback to Python if no run command found
1237
+ cmd_parts = ['python', example_path]
1238
+ # Use error-detection runner that handles server-style examples
1239
+ returncode, stdout, stderr = _run_example_with_error_detection(
1240
+ cmd_parts,
1241
+ env=env,
1242
+ cwd=str(pdd_files['example'].parent),
1243
+ timeout=60
669
1244
  )
670
-
671
- if example_result.returncode != 0:
672
- crash_log_content += f"Example program also failed with exit code: {example_result.returncode}\n\n"
673
- if example_result.stdout:
674
- crash_log_content += f"STDOUT:\n{example_result.stdout}\n\n"
675
- if example_result.stderr:
676
- crash_log_content += f"STDERR:\n{example_result.stderr}\n"
677
-
678
- # Check for syntax errors specifically
679
- if "SyntaxError" in example_result.stderr:
680
- crash_log_content = f"SYNTAX ERROR DETECTED:\n\n{crash_log_content}"
1245
+
1246
+ class ExampleResult:
1247
+ def __init__(self, rc, out, err):
1248
+ self.returncode = rc
1249
+ self.stdout = out
1250
+ self.stderr = err
1251
+
1252
+ ex_res = ExampleResult(returncode, stdout, stderr)
1253
+ if ex_res.returncode != 0:
1254
+ has_crash = True
1255
+ crash_log_content = f"Example failed exit code: {ex_res.returncode}\nSTDOUT:\n{ex_res.stdout}\nSTDERR:\n{ex_res.stderr}\n"
1256
+ if "SyntaxError" in ex_res.stderr:
1257
+ crash_log_content = "SYNTAX ERROR DETECTED:\n" + crash_log_content
681
1258
  else:
682
- crash_log_content += "Example program runs successfully, but tests are failing.\n"
683
- crash_log_content += "This may indicate issues with test execution or test file syntax.\n"
684
-
685
- except subprocess.TimeoutExpired:
686
- crash_log_content += "Example program execution timed out after 60 seconds\n"
687
- crash_log_content += "This may indicate an infinite loop or the program is waiting for input.\n"
688
- except Exception as e:
689
- crash_log_content += f"Error running example program: {str(e)}\n"
690
- crash_log_content += f"Program path: {pdd_files['example']}\n"
691
- else:
692
- # No crash detected, skip crash operation
693
- print("No crash detected in run report, skipping crash fix")
694
- skipped_operations.append('crash')
695
-
696
- # Update log entry for skipped operation
697
- update_sync_log_entry(log_entry, {
698
- 'success': True,
699
- 'cost': 0.0,
700
- 'model': 'skipped',
701
- 'error': None
702
- }, time.time() - start_time)
703
- log_entry['details']['skip_reason'] = 'no_crash'
704
- append_sync_log(basename, language, log_entry)
705
-
706
- report_data = RunReport(
707
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
708
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
709
- )
710
- save_run_report(asdict(report_data), basename, language)
711
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
712
- continue
713
-
714
- # Write actual error content or fallback
715
- if not crash_log_content:
716
- crash_log_content = "Unknown crash error - program failed but no error output captured"
717
-
718
- Path("crash.log").write_text(crash_log_content)
719
-
720
- try:
721
- result = crash_main(
722
- ctx,
723
- prompt_file=str(pdd_files['prompt']),
724
- code_file=str(pdd_files['code']),
725
- program_file=str(pdd_files['example']),
726
- error_file="crash.log",
727
- output=str(pdd_files['code']),
728
- output_program=str(pdd_files['example']),
729
- loop=True,
730
- max_attempts=max_attempts,
731
- budget=budget - current_cost_ref[0]
732
- )
733
- except (RuntimeError, Exception) as e:
734
- error_str = str(e)
735
- if ("LLM returned None" in error_str or
736
- "LLM failed to analyze errors" in error_str):
737
- # Skip crash operation for LLM failures
738
- print(f"Skipping crash operation due to LLM error: {e}")
739
- skipped_operations.append('crash')
740
-
741
- # Update log entry for skipped operation
742
- update_sync_log_entry(log_entry, {
743
- 'success': False,
744
- 'cost': 0.0,
745
- 'model': 'skipped',
746
- 'error': f"LLM error: {str(e)}"
747
- }, time.time() - start_time)
748
- log_entry['details']['skip_reason'] = 'llm_error'
749
- append_sync_log(basename, language, log_entry)
1259
+ # No crash - save run report with exit_code=0 so sync_determine_operation
1260
+ # knows the example was tested and passed (prevents infinite loop)
1261
+ # Include test_hash for staleness detection
1262
+ test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1263
+ report = RunReport(
1264
+ datetime.datetime.now(datetime.timezone.utc).isoformat(),
1265
+ exit_code=0,
1266
+ tests_passed=1,
1267
+ tests_failed=0,
1268
+ coverage=0.0,
1269
+ test_hash=test_hash
1270
+ )
1271
+ save_run_report(asdict(report), basename, language)
1272
+ skipped_operations.append('crash')
1273
+ continue
750
1274
 
751
- report_data = RunReport(
752
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
753
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
1275
+ if has_crash:
1276
+ # Try auto-fix for common import errors before expensive agentic call
1277
+ auto_fixed, auto_fix_msg = _try_auto_fix_import_error(
1278
+ crash_log_content,
1279
+ pdd_files['code'],
1280
+ pdd_files['example']
754
1281
  )
755
- save_run_report(asdict(report_data), basename, language)
756
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_llm_error')
1282
+ if auto_fixed:
1283
+ log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
1284
+ # Retry running the example after auto-fix
1285
+ retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
1286
+ cmd_parts,
1287
+ env=env,
1288
+ cwd=str(pdd_files['example'].parent),
1289
+ timeout=60
1290
+ )
1291
+ if retry_returncode == 0:
1292
+ # Auto-fix worked! Save run report and continue
1293
+ log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
1294
+ test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1295
+ report = RunReport(
1296
+ datetime.datetime.now(datetime.timezone.utc).isoformat(),
1297
+ exit_code=0,
1298
+ tests_passed=1,
1299
+ tests_failed=0,
1300
+ coverage=0.0,
1301
+ test_hash=test_hash
1302
+ )
1303
+ save_run_report(asdict(report), basename, language)
1304
+ result = (True, 0.0, 'auto-fix')
1305
+ success = True
1306
+ actual_cost = 0.0
1307
+ model_name = 'auto-fix'
1308
+ # Update crash_log_content for logging
1309
+ crash_log_content = f"Auto-fixed: {auto_fix_msg}"
1310
+ continue # Skip crash_main, move to next operation
1311
+ else:
1312
+ # Auto-fix didn't fully work, update error log and proceed
1313
+ crash_log_content = f"Auto-fix attempted ({auto_fix_msg}) but still failing:\nRETRY STDOUT:\n{retry_stdout}\nRETRY STDERR:\n{retry_stderr}\n"
1314
+
1315
+ Path("crash.log").write_text(crash_log_content)
1316
+ try:
1317
+ result = crash_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), error_file="crash.log", output=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
1318
+ except Exception as e:
1319
+ print(f"Crash fix failed: {e}")
1320
+ skipped_operations.append('crash')
1321
+ continue
1322
+
1323
+ elif operation == 'verify':
1324
+ if not pdd_files['example'].exists():
1325
+ skipped_operations.append('verify')
757
1326
  continue
758
- else:
759
- # Re-raise other exceptions
760
- raise
761
- elif operation == 'verify':
762
- result = fix_verification_main(
763
- ctx,
764
- prompt_file=str(pdd_files['prompt']),
765
- code_file=str(pdd_files['code']),
766
- program_file=str(pdd_files['example']),
767
- output_results=f"{basename}_verify_results.log",
768
- output_code=str(pdd_files['code']),
769
- output_program=str(pdd_files['example']),
770
- loop=True,
771
- verification_program=str(pdd_files['example']),
772
- max_attempts=max_attempts,
773
- budget=budget - current_cost_ref[0]
774
- )
775
- elif operation == 'test':
776
- # First, generate the test file
777
- result = cmd_test_main(
778
- ctx,
779
- prompt_file=str(pdd_files['prompt']),
780
- code_file=str(pdd_files['code']),
781
- output=str(pdd_files['test']),
782
- language=language,
783
- coverage_report=None,
784
- existing_tests=None,
785
- target_coverage=target_coverage,
786
- merge=False
787
- )
788
-
789
- # After successful test generation, execute the tests and create run report
790
- # This enables the next sync iteration to detect test failures and trigger fix
791
- if isinstance(result, dict) and result.get('success', False):
792
- try:
793
- test_file = pdd_files['test']
794
- if test_file.exists():
1327
+ result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
1328
+ elif operation == 'test':
1329
+ pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
1330
+ # Use merge=True when test file exists to preserve fixes and append new tests
1331
+ # instead of regenerating from scratch (which would overwrite fixes)
1332
+ test_file_exists = pdd_files['test'].exists()
1333
+ result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=[str(pdd_files['test'])] if test_file_exists else None, target_coverage=target_coverage, merge=test_file_exists, strength=strength, temperature=temperature)
1334
+ if pdd_files['test'].exists():
795
1335
  _execute_tests_and_create_run_report(
796
- test_file, basename, language, target_coverage
1336
+ pdd_files['test'],
1337
+ basename,
1338
+ language,
1339
+ target_coverage,
1340
+ code_file=pdd_files.get("code"),
1341
+ atomic_state=atomic_state,
1342
+ test_files=pdd_files.get('test_files'), # Bug #156
1343
+ )
1344
+ elif operation == 'test_extend':
1345
+ # Extend existing tests to improve coverage
1346
+ # Uses existing_tests and merge=True to add more test cases
1347
+ pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
1348
+ if pdd_files['test'].exists():
1349
+ existing_test_path = str(pdd_files['test'])
1350
+ result = cmd_test_main(
1351
+ ctx,
1352
+ prompt_file=str(pdd_files['prompt']),
1353
+ code_file=str(pdd_files['code']),
1354
+ output=str(pdd_files['test']),
1355
+ language=language,
1356
+ coverage_report=None,
1357
+ existing_tests=[existing_test_path],
1358
+ target_coverage=target_coverage,
1359
+ merge=True,
1360
+ strength=strength,
1361
+ temperature=temperature
797
1362
  )
798
- except Exception as e:
799
- # Don't fail the entire operation if test execution fails
800
- # Just log it - the test file generation was successful
801
- print(f"Warning: Test execution failed: {e}")
802
- elif isinstance(result, tuple) and len(result) >= 3:
803
- # Handle tuple return format - assume success and execute tests
804
- try:
805
- test_file = pdd_files['test']
806
- if test_file.exists():
807
1363
  _execute_tests_and_create_run_report(
808
- test_file, basename, language, target_coverage
1364
+ pdd_files['test'],
1365
+ basename,
1366
+ language,
1367
+ target_coverage,
1368
+ code_file=pdd_files.get("code"),
1369
+ atomic_state=atomic_state,
1370
+ test_files=pdd_files.get('test_files'), # Bug #156
809
1371
  )
810
- except Exception as e:
811
- print(f"Warning: Test execution failed: {e}")
812
- elif operation == 'fix':
813
- # Create error file with actual test failure information
814
- error_file_path = Path("fix_errors.log")
815
-
816
- # Try to get actual test failure details from latest run
817
- try:
818
- run_report = read_run_report(basename, language)
819
- if run_report and run_report.tests_failed > 0:
820
- # Run the tests again to capture actual error output
821
- # Use environment-aware Python executable for pytest execution
822
- python_executable = detect_host_python_executable()
823
- test_result = subprocess.run([
824
- python_executable, '-m', 'pytest',
825
- str(pdd_files['test']),
826
- '-v', '--tb=short'
827
- ], capture_output=True, text=True, timeout=300)
828
-
829
- error_content = f"Test failures detected ({run_report.tests_failed} failed tests):\n\n"
830
- error_content += "STDOUT:\n" + test_result.stdout + "\n\n"
831
- error_content += "STDERR:\n" + test_result.stderr
1372
+ else:
1373
+ # No existing test file, fall back to regular test generation
1374
+ result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=None, target_coverage=target_coverage, merge=False, strength=strength, temperature=temperature)
1375
+ if pdd_files['test'].exists():
1376
+ _execute_tests_and_create_run_report(
1377
+ pdd_files['test'],
1378
+ basename,
1379
+ language,
1380
+ target_coverage,
1381
+ code_file=pdd_files.get("code"),
1382
+ atomic_state=atomic_state,
1383
+ test_files=pdd_files.get('test_files'), # Bug #156
1384
+ )
1385
+ elif operation == 'fix':
1386
+ error_file_path = Path("fix_errors.log")
1387
+ # Capture errors using language-appropriate test command
1388
+ try:
1389
+ from .get_test_command import get_test_command_for_file
1390
+ test_cmd = get_test_command_for_file(str(pdd_files['test']), language)
1391
+
1392
+ # Use clean env without TUI-specific vars
1393
+ clean_env = os.environ.copy()
1394
+ for var in ['FORCE_COLOR', 'COLUMNS']:
1395
+ clean_env.pop(var, None)
1396
+
1397
+ if test_cmd:
1398
+ # Run language-appropriate test command
1399
+ if language.lower() == 'python':
1400
+ # Use pytest directly for Python
1401
+ python_executable = detect_host_python_executable()
1402
+ # Bug #156: Run pytest on ALL matching test files
1403
+ test_files = pdd_files.get('test_files', [pdd_files['test']])
1404
+ pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
1405
+ test_result = subprocess.run(
1406
+ pytest_args,
1407
+ capture_output=True, text=True, timeout=300,
1408
+ stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True,
1409
+ cwd=str(pdd_files['test'].parent)
1410
+ )
1411
+ else:
1412
+ # Use shell command for non-Python
1413
+ test_result = subprocess.run(
1414
+ test_cmd,
1415
+ shell=True,
1416
+ capture_output=True, text=True, timeout=300,
1417
+ stdin=subprocess.DEVNULL, env=clean_env,
1418
+ cwd=str(pdd_files['test'].parent),
1419
+ start_new_session=True
1420
+ )
1421
+ error_content = f"Test output:\n{test_result.stdout}\n{test_result.stderr}"
1422
+ else:
1423
+ # No test command available - trigger agentic fallback with context
1424
+ error_content = f"No test command available for {language}. Please run tests manually and provide error output."
1425
+ except Exception as e:
1426
+ error_content = f"Test execution error: {e}"
1427
+ error_file_path.write_text(error_content)
1428
+
1429
+ # Bug #156 fix: Parse pytest output to find actual failing files
1430
+ # and pass the correct file to fix_main
1431
+ failing_files = extract_failing_files_from_output(error_content)
1432
+ unit_test_file_for_fix = str(pdd_files['test']) # Default to tracked file
1433
+
1434
+ if failing_files:
1435
+ # Try to resolve the failing file paths
1436
+ test_dir = pdd_files['test'].parent
1437
+ tracked_file_name = pdd_files['test'].name
1438
+
1439
+ # Check if the tracked file is among the failures
1440
+ tracked_in_failures = any(
1441
+ Path(ff).name == tracked_file_name for ff in failing_files
1442
+ )
1443
+
1444
+ if not tracked_in_failures:
1445
+ # Failures are in a different file - use the first failing file
1446
+ for ff in failing_files:
1447
+ # Try to resolve the path relative to test directory
1448
+ ff_path = Path(ff)
1449
+ if ff_path.is_absolute() and ff_path.exists():
1450
+ unit_test_file_for_fix = str(ff_path)
1451
+ break
1452
+ else:
1453
+ # Try to find it in the test directory
1454
+ candidate = test_dir / ff_path.name
1455
+ if candidate.exists():
1456
+ unit_test_file_for_fix = str(candidate)
1457
+ break
1458
+ # Also try the path as-is relative to cwd
1459
+ if ff_path.exists():
1460
+ unit_test_file_for_fix = str(ff_path.resolve())
1461
+ break
1462
+
1463
+ result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
1464
+ elif operation == 'update':
1465
+ result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
832
1466
  else:
833
- error_content = "Simulated test failures"
1467
+ errors.append(f"Unknown operation {operation}")
1468
+ result = {'success': False}
1469
+
1470
+ # Result parsing
1471
+ if isinstance(result, dict):
1472
+ success = result.get('success', False)
1473
+ current_cost_ref[0] += result.get('cost', 0.0)
1474
+ elif isinstance(result, tuple) and len(result) >= 3:
1475
+ if operation == 'test': success = pdd_files['test'].exists()
1476
+ else: success = bool(result[0])
1477
+ cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
1478
+ current_cost_ref[0] += cost
1479
+ else:
1480
+ success = result is not None
1481
+
834
1482
  except Exception as e:
835
- error_content = f"Could not capture test failures: {e}\nUsing simulated test failures"
836
-
837
- error_file_path.write_text(error_content)
838
-
839
- result = fix_main(
840
- ctx,
841
- prompt_file=str(pdd_files['prompt']),
842
- code_file=str(pdd_files['code']),
843
- unit_test_file=str(pdd_files['test']),
844
- error_file=str(error_file_path),
845
- output_test=str(pdd_files['test']),
846
- output_code=str(pdd_files['code']),
847
- output_results=f"{basename}_fix_results.log",
848
- loop=True,
849
- verification_program=str(pdd_files['example']),
850
- max_attempts=max_attempts,
851
- budget=budget - current_cost_ref[0],
852
- auto_submit=True
853
- )
854
- elif operation == 'update':
855
- result = update_main(
856
- ctx,
857
- input_prompt_file=str(pdd_files['prompt']),
858
- modified_code_file=str(pdd_files['code']),
859
- input_code_file=None,
860
- output=str(pdd_files['prompt']),
861
- git=True
862
- )
863
- else:
864
- errors.append(f"Unknown operation '{operation}' requested.")
865
- result = {'success': False, 'cost': 0.0}
1483
+ errors.append(f"Exception during '{operation}': {e}")
1484
+ success = False
866
1485
 
867
- # Handle different return formats from command functions
868
- if isinstance(result, dict):
869
- # Dictionary return (e.g., from some commands)
870
- success = result.get('success', False)
871
- current_cost_ref[0] += result.get('cost', 0.0)
872
- elif isinstance(result, tuple) and len(result) >= 3:
873
- # Tuple return (e.g., from code_generator_main, context_generator_main)
874
- # For tuples, success is determined by no exceptions and valid return content
875
- # Check if the first element (generated content) is None, which indicates failure
876
- success = result[0] is not None
877
- # Extract cost from tuple (usually second-to-last element)
878
- cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
879
- current_cost_ref[0] += cost
880
- else:
881
- # Unknown return format
882
- success = result is not None
883
- current_cost_ref[0] += 0.0
884
-
885
- except Exception as e:
886
- errors.append(f"Exception during '{operation}': {e}")
887
- success = False
1486
+ # Log update
1487
+ duration = time.time() - op_start_time
1488
+ actual_cost = 0.0
1489
+ model_name = "unknown"
1490
+ if success:
1491
+ if isinstance(result, dict):
1492
+ actual_cost = result.get('cost', 0.0)
1493
+ model_name = result.get('model', 'unknown')
1494
+ elif isinstance(result, tuple) and len(result) >= 3:
1495
+ actual_cost = result[-2] if len(result) >= 2 else 0.0
1496
+ model_name = result[-1] if len(result) >= 1 else 'unknown'
1497
+ last_model_name = str(model_name)
1498
+ operations_completed.append(operation)
1499
+ _save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
888
1500
 
889
- # Calculate execution duration
890
- duration = time.time() - start_time
1501
+ update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
1502
+ append_sync_log(basename, language, log_entry)
891
1503
 
892
- # Extract cost and model from result for logging
893
- actual_cost = 0.0
894
- model_name = "unknown"
895
- error_message = None
896
-
897
- if success:
898
- if isinstance(result, dict):
899
- actual_cost = result.get('cost', 0.0)
900
- model_name = result.get('model', 'unknown')
901
- elif isinstance(result, tuple) and len(result) >= 3:
902
- actual_cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
903
- model_name = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else 'unknown'
904
- else:
905
- error_message = errors[-1] if errors else "Operation failed"
906
-
907
- # Update and save log entry with execution results
908
- update_sync_log_entry(log_entry, {
909
- 'success': success,
910
- 'cost': actual_cost,
911
- 'model': model_name,
912
- 'error': error_message
913
- }, duration)
914
- append_sync_log(basename, language, log_entry)
915
-
916
- if success:
917
- operations_completed.append(operation)
918
- # Extract cost and model from result based on format
919
- if isinstance(result, dict):
920
- cost = result.get('cost', 0.0)
921
- model = result.get('model', '')
922
- elif isinstance(result, tuple) and len(result) >= 3:
923
- cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
924
- model = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else ''
925
- else:
926
- cost = 0.0
927
- model = ''
928
- _save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
929
-
930
- # After successful crash operation, re-run the example to generate fresh run report
931
- if operation == 'crash':
932
- try:
933
- example_file = pdd_files['example']
934
- if example_file.exists():
935
- # Run the example program to check if crash is actually fixed
936
- try:
937
- example_result = subprocess.run(
938
- ['python', str(example_file)],
939
- capture_output=True,
940
- text=True,
941
- timeout=60,
942
- env=os.environ.copy(),
943
- cwd=str(example_file.parent)
944
- )
945
-
946
- # Create fresh run report based on actual execution
947
- report_data = RunReport(
948
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
949
- exit_code=example_result.returncode,
950
- tests_passed=1 if example_result.returncode == 0 else 0,
951
- tests_failed=0 if example_result.returncode == 0 else 1,
952
- coverage=100.0 if example_result.returncode == 0 else 0.0
953
- )
954
- save_run_report(asdict(report_data), basename, language)
955
- print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
956
-
957
- except subprocess.TimeoutExpired:
958
- # Example timed out - still considered a failure
959
- report_data = RunReport(
960
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
961
- exit_code=124, # Standard timeout exit code
962
- tests_passed=0, tests_failed=1, coverage=0.0
963
- )
964
- save_run_report(asdict(report_data), basename, language)
965
- print("Example timed out after crash fix - created failure run report")
966
-
967
- except Exception as e:
968
- # Don't fail the entire operation if example re-execution fails
969
- print(f"Warning: Post-crash example re-execution failed: {e}")
1504
+ # Post-operation checks (simplified)
1505
+ if success and operation == 'crash':
1506
+ # Re-run example to verify crash fix worked
1507
+ try:
1508
+ # Use clean env without TUI-specific vars
1509
+ clean_env = os.environ.copy()
1510
+ for var in ['FORCE_COLOR', 'COLUMNS']:
1511
+ clean_env.pop(var, None)
1512
+ # Get language-appropriate run command
1513
+ example_path = str(pdd_files['example'])
1514
+ run_cmd = get_run_command_for_file(example_path)
1515
+ if run_cmd:
1516
+ cmd_parts = run_cmd.split()
1517
+ else:
1518
+ cmd_parts = ['python', example_path]
1519
+ # Use error-detection runner that handles server-style examples
1520
+ returncode, stdout, stderr = _run_example_with_error_detection(
1521
+ cmd_parts,
1522
+ env=clean_env,
1523
+ cwd=str(pdd_files['example'].parent),
1524
+ timeout=60
1525
+ )
1526
+ # Include test_hash for staleness detection
1527
+ test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1528
+ report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
1529
+ save_run_report(asdict(report), basename, language)
1530
+ except Exception as e:
1531
+ # Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
1532
+ error_msg = f"Post-crash verification failed: {e}"
1533
+ errors.append(error_msg)
1534
+ log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
970
1535
 
971
- # After successful fix operation, execute tests to update run report
972
- if operation == 'fix':
973
- try:
974
- test_file = pdd_files['test']
975
- if test_file.exists():
1536
+ if success and operation == 'fix':
1537
+ # Re-run tests to update run_report after successful fix
1538
+ # This prevents infinite loop by updating the state machine
1539
+ if pdd_files['test'].exists():
976
1540
  _execute_tests_and_create_run_report(
977
- test_file, basename, language, target_coverage
1541
+ pdd_files['test'],
1542
+ basename,
1543
+ language,
1544
+ target_coverage,
1545
+ code_file=pdd_files.get("code"),
1546
+ atomic_state=atomic_state,
1547
+ test_files=pdd_files.get('test_files'), # Bug #156
978
1548
  )
979
- except Exception as e:
980
- # Don't fail the entire operation if test execution fails
981
- print(f"Warning: Post-fix test execution failed: {e}")
982
- else:
983
- errors.append(f"Operation '{operation}' failed.")
984
- break
1549
+
1550
+ if not success:
1551
+ errors.append(f"Operation '{operation}' failed.")
1552
+ break
985
1553
 
986
- except TimeoutError:
987
- errors.append(f"Could not acquire lock for '{basename}'. Another sync process may be running.")
988
- except Exception as e:
989
- errors.append(f"An unexpected error occurred in the orchestrator: {e}")
990
- finally:
991
- # Log lock release
992
- try:
993
- log_sync_event(basename, language, "lock_released", {
994
- "pid": os.getpid(),
995
- "total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
996
- "total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
997
- })
998
- except Exception:
999
- pass # Don't fail if logging fails
1554
+ except BaseException as e:
1555
+ errors.append(f"An unexpected error occurred in the orchestrator: {type(e).__name__}: {e}")
1556
+ # Log the full traceback for debugging
1557
+ import traceback
1558
+ traceback.print_exc()
1559
+ finally:
1560
+ try:
1561
+ log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
1562
+ except: pass
1000
1563
 
1001
- if stop_event:
1002
- stop_event.set()
1003
- if animation_thread and animation_thread.is_alive():
1004
- animation_thread.join(timeout=5)
1564
+ # Return result dict
1565
+ return {
1566
+ 'success': not errors,
1567
+ 'operations_completed': operations_completed,
1568
+ 'skipped_operations': skipped_operations,
1569
+ 'total_cost': current_cost_ref[0],
1570
+ 'total_time': time.time() - start_time,
1571
+ 'final_state': {p: {'exists': f.exists(), 'path': str(f)} for p, f in pdd_files.items() if p != 'test_files'},
1572
+ 'errors': errors,
1573
+ 'error': "; ".join(errors) if errors else None, # Add this line
1574
+ 'model_name': last_model_name,
1575
+ }
1576
+
1577
+ # Instantiate and run Textual App
1578
+ app = SyncApp(
1579
+ basename=basename,
1580
+ budget=budget,
1581
+ worker_func=sync_worker_logic,
1582
+ function_name_ref=current_function_name_ref,
1583
+ cost_ref=current_cost_ref,
1584
+ prompt_path_ref=prompt_path_ref,
1585
+ code_path_ref=code_path_ref,
1586
+ example_path_ref=example_path_ref,
1587
+ tests_path_ref=tests_path_ref,
1588
+ prompt_color_ref=prompt_box_color_ref,
1589
+ code_color_ref=code_box_color_ref,
1590
+ example_color_ref=example_box_color_ref,
1591
+ tests_color_ref=tests_box_color_ref,
1592
+ stop_event=stop_event,
1593
+ progress_callback_ref=progress_callback_ref
1594
+ )
1595
+
1596
+ # Store app reference so worker can access request_confirmation
1597
+ app_ref[0] = app
1598
+
1599
+ result = app.run()
1600
+
1601
+ # Show exit animation if not quiet
1602
+ if not quiet:
1603
+ from .sync_tui import show_exit_animation
1604
+ show_exit_animation()
1605
+
1606
+ # Check for worker exception that might have caused a crash
1607
+ if app.worker_exception:
1608
+ print(f"\n[Error] Worker thread crashed with exception: {app.worker_exception}", file=sys.stderr)
1005
1609
 
1006
- total_time = time.time() - start_time
1007
- final_state = {
1008
- p_name: {'exists': p_path.exists(), 'path': str(p_path)}
1009
- for p_name, p_path in pdd_files.items()
1010
- }
1610
+ if hasattr(app, 'captured_logs') and app.captured_logs:
1611
+ print("\n[Captured Logs (last 20 lines)]", file=sys.stderr)
1612
+ for line in app.captured_logs[-20:]: # Print last 20 lines
1613
+ print(f" {line}", file=sys.stderr)
1614
+
1615
+ import traceback
1616
+ # Use trace module to print the stored exception's traceback if available
1617
+ if hasattr(app.worker_exception, '__traceback__'):
1618
+ traceback.print_exception(type(app.worker_exception), app.worker_exception, app.worker_exception.__traceback__, file=sys.stderr)
1619
+
1620
+ if result is None:
1621
+ return {
1622
+ "success": False,
1623
+ "total_cost": current_cost_ref[0],
1624
+ "model_name": "",
1625
+ "error": "Sync process interrupted or returned no result.",
1626
+ "operations_completed": [],
1627
+ "errors": ["App exited without result"]
1628
+ }
1011
1629
 
1012
- return {
1013
- 'success': not errors,
1014
- 'operations_completed': operations_completed,
1015
- 'skipped_operations': skipped_operations,
1016
- 'total_cost': current_cost_ref[0],
1017
- 'total_time': total_time,
1018
- 'final_state': final_state,
1019
- 'errors': errors,
1020
- }
1630
+ return result
1021
1631
 
1022
1632
  if __name__ == '__main__':
1023
- # Example usage of the sync_orchestration module.
1024
- # This simulates running `pdd sync my_calculator` from the command line.
1025
-
1026
- print("--- Running Basic Sync Orchestration Example ---")
1027
-
1028
- # Setup a dummy project structure
1633
+ # Example usage
1029
1634
  Path("./prompts").mkdir(exist_ok=True)
1030
1635
  Path("./src").mkdir(exist_ok=True)
1031
1636
  Path("./examples").mkdir(exist_ok=True)
1032
1637
  Path("./tests").mkdir(exist_ok=True)
1033
1638
  Path("./prompts/my_calculator_python.prompt").write_text("Create a calculator.")
1034
-
1035
- # Ensure PDD meta directory exists for logs and locks
1036
1639
  PDD_DIR.mkdir(exist_ok=True)
1037
1640
  META_DIR.mkdir(exist_ok=True)
1038
-
1039
- result = sync_orchestration(
1040
- basename="my_calculator",
1041
- language="python",
1042
- quiet=True # Suppress mock command output for cleaner example run
1043
- )
1044
-
1045
- print("\n--- Sync Orchestration Finished ---")
1641
+ result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
1046
1642
  print(json.dumps(result, indent=2))
1047
-
1048
- if result['success']:
1049
- print("\n✅ Sync completed successfully.")
1050
- else:
1051
- print(f"\n❌ Sync failed. Errors: {result['errors']}")
1052
-
1053
- print("\n--- Running Sync Log Example ---")
1054
- # This will now show the log from the run we just completed.
1055
- log_result = sync_orchestration(
1056
- basename="my_calculator",
1057
- language="python",
1058
- log=True
1059
- )