pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py CHANGED
@@ -12,21 +12,33 @@ import subprocess
12
12
  import re
13
13
  import os
14
14
  from pathlib import Path
15
- from typing import Dict, Any, Optional, List
16
- from dataclasses import asdict
15
+ from typing import Dict, Any, Optional, List, Callable
16
+ from dataclasses import asdict, dataclass, field
17
+ import tempfile
18
+ import sys
17
19
 
18
20
  import click
21
+ import logging
22
+
23
+ # --- Constants ---
24
+ MAX_CONSECUTIVE_TESTS = 3 # Allow up to 3 consecutive test attempts
25
+ MAX_TEST_EXTEND_ATTEMPTS = 2 # Allow up to 2 attempts to extend tests for coverage
26
+ MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #157 fix)
19
27
 
20
28
  # --- Real PDD Component Imports ---
21
- from .sync_animation import sync_animation
29
+ from .sync_tui import SyncApp
22
30
  from .sync_determine_operation import (
23
31
  sync_determine_operation,
24
32
  get_pdd_file_paths,
25
33
  RunReport,
34
+ SyncDecision,
26
35
  PDD_DIR,
27
36
  META_DIR,
28
37
  SyncLock,
29
38
  read_run_report,
39
+ calculate_sha256,
40
+ calculate_current_hashes,
41
+ _safe_basename,
30
42
  )
31
43
  from .auto_deps_main import auto_deps_main
32
44
  from .code_generator_main import code_generator_main
@@ -37,12 +49,114 @@ from .cmd_test_main import cmd_test_main
37
49
  from .fix_main import fix_main
38
50
  from .update_main import update_main
39
51
  from .python_env_detector import detect_host_python_executable
52
+ from .get_run_command import get_run_command_for_file
53
+ from .pytest_output import extract_failing_files_from_output
54
+ from . import DEFAULT_STRENGTH
55
+
56
+
57
+ # --- Helper Functions ---
58
+ # Note: _safe_basename is imported from sync_determine_operation
59
+
60
+
61
+ # --- Atomic State Update (Issue #159 Fix) ---
62
+
63
+ @dataclass
64
+ class PendingStateUpdate:
65
+ """Holds pending state updates for atomic commit."""
66
+ run_report: Optional[Dict[str, Any]] = None
67
+ fingerprint: Optional[Dict[str, Any]] = None
68
+ run_report_path: Optional[Path] = None
69
+ fingerprint_path: Optional[Path] = None
70
+
71
+
72
+ class AtomicStateUpdate:
73
+ """
74
+ Context manager for atomic state updates.
75
+
76
+ Ensures run_report and fingerprint are both written or neither is written.
77
+ This fixes Issue #159 where non-atomic writes caused state desynchronization.
78
+
79
+ Usage:
80
+ with AtomicStateUpdate(basename, language) as state:
81
+ state.set_run_report(report_dict, report_path)
82
+ state.set_fingerprint(fingerprint_dict, fp_path)
83
+ # On successful exit, both files are written atomically
84
+ # On exception, neither file is written (rollback)
85
+ """
86
+
87
+ def __init__(self, basename: str, language: str):
88
+ self.basename = basename
89
+ self.language = language
90
+ self.pending = PendingStateUpdate()
91
+ self._temp_files: List[str] = []
92
+
93
+ def __enter__(self):
94
+ return self
95
+
96
+ def __exit__(self, exc_type, exc_val, exc_tb):
97
+ if exc_type is None:
98
+ self._commit()
99
+ else:
100
+ self._rollback()
101
+ return False # Don't suppress exceptions
102
+
103
+ def set_run_report(self, report: Dict[str, Any], path: Path):
104
+ """Buffer a run report for atomic write."""
105
+ self.pending.run_report = report
106
+ self.pending.run_report_path = path
107
+
108
+ def set_fingerprint(self, fingerprint: Dict[str, Any], path: Path):
109
+ """Buffer a fingerprint for atomic write."""
110
+ self.pending.fingerprint = fingerprint
111
+ self.pending.fingerprint_path = path
112
+
113
+ def _atomic_write(self, data: Dict[str, Any], target_path: Path) -> None:
114
+ """Write data to file atomically using temp file + rename pattern."""
115
+ target_path.parent.mkdir(parents=True, exist_ok=True)
116
+
117
+ # Write to temp file in same directory (required for atomic rename)
118
+ fd, temp_path = tempfile.mkstemp(
119
+ dir=target_path.parent,
120
+ prefix=f".{target_path.stem}_",
121
+ suffix=".tmp"
122
+ )
123
+ self._temp_files.append(temp_path)
124
+
125
+ try:
126
+ with os.fdopen(fd, 'w') as f:
127
+ json.dump(data, f, indent=2, default=str)
128
+
129
+ # Atomic rename - guaranteed atomic on POSIX systems
130
+ os.replace(temp_path, target_path)
131
+ self._temp_files.remove(temp_path) # Successfully moved, stop tracking
132
+ except Exception:
133
+ # Leave temp file for rollback to clean up
134
+ raise
135
+
136
+ def _commit(self):
137
+ """Commit all pending state updates atomically."""
138
+ # Write fingerprint first (checkpoint), then run_report
139
+ if self.pending.fingerprint and self.pending.fingerprint_path:
140
+ self._atomic_write(self.pending.fingerprint, self.pending.fingerprint_path)
141
+ if self.pending.run_report and self.pending.run_report_path:
142
+ self._atomic_write(self.pending.run_report, self.pending.run_report_path)
143
+
144
+ def _rollback(self):
145
+ """Clean up any temp files without committing changes."""
146
+ for temp_path in self._temp_files:
147
+ try:
148
+ if os.path.exists(temp_path):
149
+ os.unlink(temp_path)
150
+ except OSError:
151
+ pass # Best effort cleanup
152
+ self._temp_files.clear()
153
+
40
154
 
41
155
  # --- Mock Helper Functions ---
42
156
 
43
157
  def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
44
158
  """Load sync log entries for a basename and language."""
45
- log_file = META_DIR / f"{basename}_{language}_sync.log"
159
+ log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
46
160
  if not log_file.exists():
47
161
  return []
48
162
  try:
@@ -84,7 +198,7 @@ def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duratio
84
198
 
85
199
  def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
86
200
  """Append completed log entry to the sync log file."""
87
- log_file = META_DIR / f"{basename}_{language}_sync.log"
201
+ log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
88
202
  META_DIR.mkdir(parents=True, exist_ok=True)
89
203
  with open(log_file, 'a') as f:
90
204
  f.write(json.dumps(entry) + '\n')
@@ -98,20 +212,44 @@ def log_sync_event(basename: str, language: str, event: str, details: Dict[str,
98
212
  }
99
213
  append_sync_log(basename, language, entry)
100
214
 
101
- def save_run_report(report: Dict[str, Any], basename: str, language: str):
102
- """Save a run report to the metadata directory."""
103
- report_file = META_DIR / f"{basename}_{language}_run.json"
104
- META_DIR.mkdir(parents=True, exist_ok=True)
105
- with open(report_file, 'w') as f:
106
- json.dump(report, f, indent=2, default=str)
215
+ def save_run_report(report: Dict[str, Any], basename: str, language: str,
216
+ atomic_state: Optional['AtomicStateUpdate'] = None):
217
+ """Save a run report to the metadata directory.
218
+
219
+ Args:
220
+ report: The run report dictionary to save.
221
+ basename: The module basename.
222
+ language: The programming language.
223
+ atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
224
+ """
225
+ report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
226
+ if atomic_state:
227
+ # Buffer for atomic write
228
+ atomic_state.set_run_report(report, report_file)
229
+ else:
230
+ # Legacy direct write
231
+ META_DIR.mkdir(parents=True, exist_ok=True)
232
+ with open(report_file, 'w') as f:
233
+ json.dump(report, f, indent=2, default=str)
234
+
235
+ def _save_operation_fingerprint(basename: str, language: str, operation: str,
236
+ paths: Dict[str, Path], cost: float, model: str,
237
+ atomic_state: Optional['AtomicStateUpdate'] = None):
238
+ """Save fingerprint state after successful operation.
107
239
 
108
- def _save_operation_fingerprint(basename: str, language: str, operation: str,
109
- paths: Dict[str, Path], cost: float, model: str):
110
- """Save fingerprint state after successful operation."""
240
+ Args:
241
+ basename: The module basename.
242
+ language: The programming language.
243
+ operation: The operation that was performed.
244
+ paths: Dictionary of PDD file paths.
245
+ cost: The cost of the operation.
246
+ model: The model used.
247
+ atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
248
+ """
111
249
  from datetime import datetime, timezone
112
250
  from .sync_determine_operation import calculate_current_hashes, Fingerprint
113
251
  from . import __version__
114
-
252
+
115
253
  current_hashes = calculate_current_hashes(paths)
116
254
  fingerprint = Fingerprint(
117
255
  pdd_version=__version__,
@@ -120,103 +258,543 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
120
258
  prompt_hash=current_hashes.get('prompt_hash'),
121
259
  code_hash=current_hashes.get('code_hash'),
122
260
  example_hash=current_hashes.get('example_hash'),
123
- test_hash=current_hashes.get('test_hash')
261
+ test_hash=current_hashes.get('test_hash'),
262
+ test_files=current_hashes.get('test_files'), # Bug #156
124
263
  )
125
-
126
- META_DIR.mkdir(parents=True, exist_ok=True)
127
- fingerprint_file = META_DIR / f"{basename}_{language}.json"
128
- with open(fingerprint_file, 'w') as f:
129
- json.dump(asdict(fingerprint), f, indent=2, default=str)
130
264
 
131
- # SyncLock class now imported from sync_determine_operation module
265
+ fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
266
+ if atomic_state:
267
+ # Buffer for atomic write
268
+ atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
269
+ else:
270
+ # Legacy direct write
271
+ META_DIR.mkdir(parents=True, exist_ok=True)
272
+ with open(fingerprint_file, 'w') as f:
273
+ json.dump(asdict(fingerprint), f, indent=2, default=str)
274
+
275
+ def _python_cov_target_for_code_file(code_file: Path) -> str:
276
+ """Return a `pytest-cov` `--cov` target for a Python code file.
277
+
278
+ - If the file is inside a Python package (directories with `__init__.py`),
279
+ returns a dotted module path (e.g., `pdd.sync_orchestration`).
280
+ - Otherwise falls back to the filename stem (e.g., `admin_get_users`).
281
+ """
282
+ if code_file.suffix != ".py":
283
+ return code_file.stem
284
+
285
+ package_dir: Optional[Path] = None
286
+ current = code_file.parent
287
+ while (current / "__init__.py").exists():
288
+ package_dir = current
289
+ parent = current.parent
290
+ if parent == current:
291
+ break
292
+ current = parent
293
+
294
+ if package_dir:
295
+ relative_module = code_file.relative_to(package_dir.parent).with_suffix("")
296
+ return str(relative_module).replace(os.sep, ".")
297
+
298
+ return code_file.stem
299
+
300
+
301
+ def _python_cov_target_for_test_and_code(test_file: Path, code_file: Path, fallback: str) -> str:
302
+ """Choose the best `--cov` target based on how tests import the code.
303
+
304
+ In some repos, tests add a directory to `sys.path` and import modules by their
305
+ filename stem (e.g., `from admin_get_users import ...`) even when the code
306
+ also lives under a package (e.g., `backend.functions.admin_get_users`).
307
+
308
+ Heuristic:
309
+ - Prefer the code file stem when the test file imports it directly.
310
+ - Otherwise, prefer the dotted module path derived from the package layout.
311
+ - Fall back to the provided fallback (usually the basename).
312
+ """
313
+
314
+ def _imports_module(source: str, module: str) -> bool:
315
+ escaped = re.escape(module)
316
+ return bool(
317
+ re.search(rf"^\s*import\s+{escaped}\b", source, re.MULTILINE)
318
+ or re.search(rf"^\s*from\s+{escaped}\b", source, re.MULTILINE)
319
+ )
320
+
321
+ stem = code_file.stem
322
+ dotted = _python_cov_target_for_code_file(code_file)
132
323
 
133
- def _execute_tests_and_create_run_report(test_file: Path, basename: str, language: str, target_coverage: float = 90.0) -> RunReport:
134
- """Execute tests and create a RunReport with actual results."""
135
- timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
136
-
137
324
  try:
138
- # Execute pytest with coverage reporting on the specific module
139
- # Extract module name from test file (e.g., test_factorial.py -> factorial)
140
- module_name = test_file.name.replace('test_', '').replace('.py', '')
141
-
142
- # Use the module import path rather than file path for coverage
143
- # Use environment-aware Python executable for pytest execution
144
- python_executable = detect_host_python_executable()
145
-
146
- # Determine coverage target based on module location
147
- if base_package:
148
- cov_target = f'{base_package}.{module_name}'
149
- else:
150
- # Dynamically discover package structure based on test file location
151
- relative_path = test_file.parent.relative_to(Path.cwd())
152
- package_path = str(relative_path).replace(os.sep, '.')
153
- cov_target = f'{package_path}.{module_name}' if package_path else module_name
154
-
155
- result = subprocess.run([
156
- python_executable, '-m', 'pytest',
157
- str(test_file),
158
- '-v',
159
- '--tb=short',
160
- f'--cov={cov_target}',
161
- '--cov-report=term-missing'
162
- ], capture_output=True, text=True, timeout=300)
163
-
164
- exit_code = result.returncode
165
- stdout = result.stdout
166
- stderr = result.stderr
167
-
168
- # Parse test results from pytest output
169
- tests_passed = 0
170
- tests_failed = 0
171
- coverage = 0.0
172
-
173
- # Parse passed/failed tests
174
- if 'passed' in stdout:
175
- passed_match = re.search(r'(\d+) passed', stdout)
325
+ test_source = test_file.read_text(encoding="utf-8", errors="ignore")
326
+ except Exception:
327
+ test_source = ""
328
+
329
+ if stem and _imports_module(test_source, stem):
330
+ return stem
331
+
332
+ if dotted and dotted != stem:
333
+ if _imports_module(test_source, dotted):
334
+ return dotted
335
+
336
+ if "." in dotted:
337
+ parent = dotted.rsplit(".", 1)[0]
338
+ # e.g. `from backend.functions import admin_get_users`
339
+ if re.search(
340
+ rf"^\s*from\s+{re.escape(parent)}\s+import\s+.*\b{re.escape(stem)}\b",
341
+ test_source,
342
+ re.MULTILINE,
343
+ ):
344
+ return dotted
345
+ # e.g. `import backend.functions.admin_get_users`
346
+ if re.search(
347
+ rf"^\s*import\s+{re.escape(parent)}\.{re.escape(stem)}\b",
348
+ test_source,
349
+ re.MULTILINE,
350
+ ):
351
+ return dotted
352
+
353
+ return dotted
354
+
355
+ return stem or fallback
356
+
357
+
358
+ def _parse_test_output(output: str, language: str) -> tuple[int, int, float]:
359
+ """
360
+ Parse test output to extract passed/failed/coverage.
361
+
362
+ Args:
363
+ output: Combined stdout/stderr from test runner
364
+ language: Language name (e.g., 'python', 'typescript', 'go')
365
+
366
+ Returns:
367
+ (tests_passed, tests_failed, coverage)
368
+ """
369
+ tests_passed = 0
370
+ tests_failed = 0
371
+ coverage = 0.0
372
+
373
+ lang = language.lower()
374
+
375
+ # Python (pytest)
376
+ if lang == 'python':
377
+ if 'passed' in output:
378
+ passed_match = re.search(r'(\d+) passed', output)
176
379
  if passed_match:
177
380
  tests_passed = int(passed_match.group(1))
178
-
179
- if 'failed' in stdout:
180
- failed_match = re.search(r'(\d+) failed', stdout)
381
+ if 'failed' in output:
382
+ failed_match = re.search(r'(\d+) failed', output)
181
383
  if failed_match:
182
384
  tests_failed = int(failed_match.group(1))
183
-
184
- # Parse coverage percentage - try multiple patterns
185
- coverage_match = re.search(r'TOTAL.*?(\d+)%', stdout)
385
+ if 'error' in output:
386
+ error_match = re.search(r'(\d+) error', output)
387
+ if error_match:
388
+ tests_failed += int(error_match.group(1))
389
+ coverage_match = re.search(r'TOTAL.*?(\d+)%', output)
186
390
  if not coverage_match:
187
- # Try alternative patterns for coverage output
188
- coverage_match = re.search(r'(\d+)%\s*$', stdout, re.MULTILINE)
391
+ coverage_match = re.search(r'(\d+)%\s*$', output, re.MULTILINE)
189
392
  if not coverage_match:
190
- # Try pattern with decimal
191
- coverage_match = re.search(r'(\d+(?:\.\d+)?)%', stdout)
192
-
393
+ coverage_match = re.search(r'(\d+(?:\.\d+)?)%', output)
193
394
  if coverage_match:
194
395
  coverage = float(coverage_match.group(1))
195
-
196
- # Create and save run report
396
+
397
+ # Jest/Vitest (JavaScript/TypeScript)
398
+ elif lang in ('javascript', 'typescript', 'typescriptreact'):
399
+ # "Tests: X passed, Y failed" or "Tests: X passed, Y failed, Z total"
400
+ match = re.search(r'Tests:\s*(\d+)\s+passed', output)
401
+ if match:
402
+ tests_passed = int(match.group(1))
403
+ match = re.search(r'Tests:.*?(\d+)\s+failed', output)
404
+ if match:
405
+ tests_failed = int(match.group(1))
406
+
407
+ # Alternative Mocha-style: "X passing, Y failing"
408
+ if tests_passed == 0:
409
+ pass_match = re.search(r'(\d+)\s+pass(?:ing)?', output, re.I)
410
+ if pass_match:
411
+ tests_passed = int(pass_match.group(1))
412
+ if tests_failed == 0:
413
+ fail_match = re.search(r'(\d+)\s+fail(?:ing)?', output, re.I)
414
+ if fail_match:
415
+ tests_failed = int(fail_match.group(1))
416
+
417
+ # Coverage: "All files | XX.XX |"
418
+ cov_match = re.search(r'All files[^|]*\|\s*(\d+\.?\d*)', output)
419
+ if cov_match:
420
+ coverage = float(cov_match.group(1))
421
+
422
+ # Go
423
+ elif lang == 'go':
424
+ # Count PASS and FAIL occurrences for individual tests
425
+ tests_passed = len(re.findall(r'--- PASS:', output))
426
+ tests_failed = len(re.findall(r'--- FAIL:', output))
427
+
428
+ # Fallback: check for overall PASS/FAIL
429
+ if tests_passed == 0 and 'PASS' in output and 'FAIL' not in output:
430
+ tests_passed = 1
431
+ if tests_failed == 0 and 'FAIL' in output:
432
+ tests_failed = 1
433
+
434
+ # coverage: XX.X% of statements
435
+ cov_match = re.search(r'coverage:\s*(\d+\.?\d*)%', output)
436
+ if cov_match:
437
+ coverage = float(cov_match.group(1))
438
+
439
+ # Rust (cargo test)
440
+ elif lang == 'rust':
441
+ # "test result: ok. X passed; Y failed;"
442
+ match = re.search(r'(\d+)\s+passed', output)
443
+ if match:
444
+ tests_passed = int(match.group(1))
445
+ match = re.search(r'(\d+)\s+failed', output)
446
+ if match:
447
+ tests_failed = int(match.group(1))
448
+
449
+ # Fallback: try generic patterns
450
+ else:
451
+ pass_match = re.search(r'(\d+)\s+(?:tests?\s+)?pass(?:ed)?', output, re.I)
452
+ fail_match = re.search(r'(\d+)\s+(?:tests?\s+)?fail(?:ed)?', output, re.I)
453
+ if pass_match:
454
+ tests_passed = int(pass_match.group(1))
455
+ if fail_match:
456
+ tests_failed = int(fail_match.group(1))
457
+
458
+ return tests_passed, tests_failed, coverage
459
+
460
+
461
+ def _detect_example_errors(output: str) -> tuple[bool, str]:
462
+ """
463
+ Detect if example output contains error indicators.
464
+
465
+ Only detects true crashes/errors:
466
+ - Python tracebacks (catches ALL unhandled exceptions)
467
+ - ERROR level log messages
468
+
469
+ Intentionally does NOT detect:
470
+ - HTTP status codes (examples may test error responses)
471
+ - Individual exception type names (causes false positives, redundant with traceback)
472
+
473
+ Returns:
474
+ (has_errors, error_summary)
475
+ """
476
+ error_patterns = [
477
+ (r'Traceback \(most recent call last\):', 'Python traceback'),
478
+ (r' - ERROR - ', 'Error log message'), # Python logging format
479
+ ]
480
+
481
+ errors_found = []
482
+ for pattern, description in error_patterns:
483
+ if re.search(pattern, output, re.MULTILINE):
484
+ errors_found.append(description)
485
+
486
+ if errors_found:
487
+ return True, '; '.join(errors_found)
488
+ return False, ''
489
+
490
+
491
+ def _try_auto_fix_import_error(
492
+ error_output: str,
493
+ code_file: Path,
494
+ example_file: Path,
495
+ ) -> tuple[bool, str]:
496
+ """
497
+ Try to automatically fix common import errors before calling expensive agentic fix.
498
+
499
+ Returns:
500
+ (fixed, message): Whether a fix was attempted and what was done.
501
+ """
502
+ import re
503
+
504
+ # Check for ModuleNotFoundError or ImportError
505
+ module_not_found = re.search(r"ModuleNotFoundError: No module named ['\"]([^'\"]+)['\"]", error_output)
506
+ import_error = re.search(r"ImportError: cannot import name ['\"]([^'\"]+)['\"]", error_output)
507
+
508
+ if not module_not_found and not import_error:
509
+ return False, "No import error detected"
510
+
511
+ if module_not_found:
512
+ missing_module = module_not_found.group(1)
513
+ # Split by . to get the top-level package
514
+ top_level_package = missing_module.split('.')[0]
515
+
516
+ # Check if this is the module we're trying to import (local module)
517
+ code_module_name = code_file.stem # e.g., "data_validator" from "data_validator.py"
518
+
519
+ if top_level_package == code_module_name:
520
+ # It's trying to import our own generated code - fix the example's sys.path
521
+ # Read the example and fix the path manipulation
522
+ try:
523
+ example_content = example_file.read_text(encoding='utf-8')
524
+ code_dir = str(code_file.parent.resolve())
525
+
526
+ # Look for existing sys.path manipulation
527
+ if 'sys.path' in example_content:
528
+ # Try to fix the existing path manipulation
529
+ # Common pattern: module_path = os.path.abspath(os.path.join(...))
530
+ # Replace with correct path
531
+ fixed_content = re.sub(
532
+ r"module_path\s*=\s*os\.path\.abspath\([^)]+\)",
533
+ f"module_path = '{code_dir}'",
534
+ example_content
535
+ )
536
+ if fixed_content != example_content:
537
+ example_file.write_text(fixed_content, encoding='utf-8')
538
+ return True, f"Fixed sys.path to point to {code_dir}"
539
+
540
+ # If no existing sys.path, add one at the start after imports
541
+ lines = example_content.split('\n')
542
+ insert_pos = 0
543
+ for i, line in enumerate(lines):
544
+ if line.startswith('import ') or line.startswith('from '):
545
+ if 'sys' in line or 'os' in line:
546
+ insert_pos = i + 1
547
+ continue
548
+ if line.strip() and not line.startswith('#') and not line.startswith('import') and not line.startswith('from'):
549
+ insert_pos = i
550
+ break
551
+
552
+ path_fix = f"\n# Auto-added by pdd to fix import\nimport sys\nsys.path.insert(0, '{code_dir}')\n"
553
+ lines.insert(insert_pos, path_fix)
554
+ example_file.write_text('\n'.join(lines), encoding='utf-8')
555
+ return True, f"Added sys.path.insert(0, '{code_dir}') to example"
556
+
557
+ except Exception as e:
558
+ return False, f"Failed to fix import path: {e}"
559
+
560
+ else:
561
+ # It's an external package - try pip install
562
+ try:
563
+ result = subprocess.run(
564
+ [sys.executable, '-m', 'pip', 'install', top_level_package],
565
+ capture_output=True,
566
+ text=True,
567
+ timeout=120
568
+ )
569
+ if result.returncode == 0:
570
+ return True, f"Installed missing package: {top_level_package}"
571
+ else:
572
+ return False, f"Failed to install {top_level_package}: {result.stderr}"
573
+ except Exception as e:
574
+ return False, f"Failed to run pip install: {e}"
575
+
576
+ return False, "Import error detected but no auto-fix available"
577
+
578
+
579
+ def _run_example_with_error_detection(
580
+ cmd_parts: list[str],
581
+ env: dict,
582
+ cwd: Optional[str] = None,
583
+ timeout: int = 60
584
+ ) -> tuple[int, str, str]:
585
+ """
586
+ Run example file, detecting errors from output.
587
+
588
+ For server-style examples that block, this runs until timeout
589
+ then analyzes output for errors. No errors = success.
590
+
591
+ Returns:
592
+ (returncode, stdout, stderr)
593
+ - returncode: 0 if no errors detected, positive if errors found or process failed
594
+ """
595
+ import threading
596
+
597
+ proc = subprocess.Popen(
598
+ cmd_parts,
599
+ stdout=subprocess.PIPE,
600
+ stderr=subprocess.PIPE,
601
+ stdin=subprocess.DEVNULL,
602
+ env=env,
603
+ cwd=cwd,
604
+ start_new_session=True,
605
+ )
606
+
607
+ stdout_chunks = []
608
+ stderr_chunks = []
609
+
610
+ def read_pipe(pipe, chunks):
611
+ try:
612
+ for line in iter(pipe.readline, b''):
613
+ chunks.append(line)
614
+ except Exception:
615
+ pass
616
+
617
+ t_out = threading.Thread(target=read_pipe, args=(proc.stdout, stdout_chunks), daemon=True)
618
+ t_err = threading.Thread(target=read_pipe, args=(proc.stderr, stderr_chunks), daemon=True)
619
+ t_out.start()
620
+ t_err.start()
621
+
622
+ # Wait for process or timeout
623
+ try:
624
+ proc.wait(timeout=timeout)
625
+ except subprocess.TimeoutExpired:
626
+ proc.terminate()
627
+ try:
628
+ proc.wait(timeout=5)
629
+ except subprocess.TimeoutExpired:
630
+ proc.kill()
631
+ proc.wait()
632
+
633
+ t_out.join(timeout=2)
634
+ t_err.join(timeout=2)
635
+
636
+ stdout = b''.join(stdout_chunks).decode('utf-8', errors='replace')
637
+ stderr = b''.join(stderr_chunks).decode('utf-8', errors='replace')
638
+ combined = stdout + '\n' + stderr
639
+
640
+ # Check for errors in output
641
+ has_errors, error_summary = _detect_example_errors(combined)
642
+
643
+ # Determine result (check returncode first, then use error detection for signal-killed):
644
+ # - Zero exit code → success (trust the exit code)
645
+ # - Positive exit code (process failed normally, e.g., sys.exit(1)) → failure
646
+ # - Negative exit code (killed by signal, e.g., -9 for SIGKILL) → check output
647
+ #
648
+ # IMPORTANT: When we kill the process after timeout, returncode is negative
649
+ # (the signal number). This is NOT a failure if output has no errors.
650
+ if proc.returncode is not None and proc.returncode == 0:
651
+ return 0, stdout, stderr # Clean exit = success (trust exit code)
652
+ elif proc.returncode is not None and proc.returncode > 0:
653
+ return proc.returncode, stdout, stderr # Process exited with error
654
+ else:
655
+ # Killed by signal (returncode < 0 or None) - use error detection
656
+ # Server-style examples may run until timeout, need to check output
657
+ if has_errors:
658
+ return 1, stdout, stderr # Errors detected in output
659
+ return 0, stdout, stderr # No errors, server was running fine
660
+
661
+
662
+ def _execute_tests_and_create_run_report(
663
+ test_file: Path,
664
+ basename: str,
665
+ language: str,
666
+ target_coverage: float = 90.0,
667
+ *,
668
+ code_file: Optional[Path] = None,
669
+ atomic_state: Optional['AtomicStateUpdate'] = None,
670
+ test_files: Optional[List[Path]] = None, # Bug #156: Support multiple test files
671
+ ) -> RunReport:
672
+ """Execute tests and create a RunReport with actual results.
673
+
674
+ Now supports multiple languages by using get_test_command_for_file()
675
+ to determine the appropriate test runner.
676
+
677
+ Args:
678
+ test_file: Primary test file (for backward compat)
679
+ test_files: Optional list of all test files to run (Bug #156)
680
+ """
681
+ from .get_test_command import get_test_command_for_file
682
+
683
+ timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
684
+
685
+ # Bug #156: Use test_files if provided, otherwise just the single test_file
686
+ all_test_files = test_files if test_files else [test_file]
687
+
688
+ # Calculate test file hash for staleness detection (primary file for backward compat)
689
+ test_hash = calculate_sha256(test_file) if test_file.exists() else None
690
+
691
+ # Bug #156: Calculate hashes for ALL test files
692
+ test_file_hashes = {
693
+ f.name: calculate_sha256(f)
694
+ for f in all_test_files
695
+ if f.exists()
696
+ } if all_test_files else None
697
+
698
+ # Use clean env without TUI-specific vars
699
+ clean_env = os.environ.copy()
700
+ for var in ['FORCE_COLOR', 'COLUMNS']:
701
+ clean_env.pop(var, None)
702
+
703
+ try:
704
+ lang_lower = language.lower()
705
+
706
+ # Python: use existing pytest logic with coverage
707
+ if lang_lower == "python":
708
+ module_name = test_file.name.replace('test_', '').replace('.py', '')
709
+ python_executable = detect_host_python_executable()
710
+
711
+ cov_target = None
712
+ if code_file is not None:
713
+ cov_target = _python_cov_target_for_test_and_code(test_file, code_file, basename or module_name)
714
+ else:
715
+ cov_target = basename or module_name
716
+
717
+ if not cov_target:
718
+ cov_target = basename or module_name
719
+
720
+ # Bug #156: Run pytest on ALL test files
721
+ pytest_args = [
722
+ python_executable, '-m', 'pytest',
723
+ ] + [str(f) for f in all_test_files] + [
724
+ '-v',
725
+ '--tb=short',
726
+ f'--cov={cov_target}',
727
+ '--cov-report=term-missing'
728
+ ]
729
+ result = subprocess.run(
730
+ pytest_args,
731
+ capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
732
+ )
733
+
734
+ exit_code = result.returncode
735
+ stdout = result.stdout + (result.stderr or '')
736
+ tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
737
+
738
+ else:
739
+ # Non-Python: use language-appropriate test command
740
+ test_cmd = get_test_command_for_file(str(test_file), language)
741
+
742
+ if test_cmd is None:
743
+ # No test command available - return report indicating this
744
+ report = RunReport(
745
+ timestamp=timestamp,
746
+ exit_code=127, # Command not found
747
+ tests_passed=0,
748
+ tests_failed=0,
749
+ coverage=0.0,
750
+ test_hash=test_hash,
751
+ test_files=test_file_hashes, # Bug #156
752
+ )
753
+ save_run_report(asdict(report), basename, language, atomic_state)
754
+ return report
755
+
756
+ # Run the test command
757
+ result = subprocess.run(
758
+ test_cmd,
759
+ shell=True,
760
+ capture_output=True,
761
+ text=True,
762
+ timeout=300,
763
+ env=clean_env,
764
+ cwd=str(test_file.parent),
765
+ stdin=subprocess.DEVNULL,
766
+ start_new_session=True
767
+ )
768
+
769
+ exit_code = result.returncode
770
+ stdout = (result.stdout or '') + '\n' + (result.stderr or '')
771
+
772
+ # Parse results based on language
773
+ tests_passed, tests_failed, coverage = _parse_test_output(stdout, language)
774
+
197
775
  report = RunReport(
198
776
  timestamp=timestamp,
199
777
  exit_code=exit_code,
200
778
  tests_passed=tests_passed,
201
779
  tests_failed=tests_failed,
202
- coverage=coverage
780
+ coverage=coverage,
781
+ test_hash=test_hash,
782
+ test_files=test_file_hashes, # Bug #156
203
783
  )
204
-
784
+
205
785
  except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception) as e:
206
- # If test execution fails, create a report indicating failure
207
786
  report = RunReport(
208
787
  timestamp=timestamp,
209
788
  exit_code=1,
210
789
  tests_passed=0,
211
790
  tests_failed=1,
212
- coverage=0.0
791
+ coverage=0.0,
792
+ test_hash=test_hash,
793
+ test_files=test_file_hashes, # Bug #156
213
794
  )
214
-
215
- # Save the run report
216
- save_run_report(asdict(report), basename, language)
217
- return report
218
795
 
219
- # --- Helper for Click Context ---
796
+ save_run_report(asdict(report), basename, language, atomic_state)
797
+ return report
220
798
 
221
799
  def _create_mock_context(**kwargs) -> click.Context:
222
800
  """Creates a mock Click context object to pass parameters to command functions."""
@@ -227,7 +805,7 @@ def _create_mock_context(**kwargs) -> click.Context:
227
805
 
228
806
  def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Dict[str, Any]:
229
807
  """Displays the sync log for a given basename and language."""
230
- log_file = META_DIR / f"{basename}_{language}_sync.log"
808
+ log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
231
809
  if not log_file.exists():
232
810
  print(f"No sync log found for '{basename}' in language '{language}'.")
233
811
  return {'success': False, 'errors': ['Log file not found.'], 'log_entries': []}
@@ -242,7 +820,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
242
820
  for entry in log_entries:
243
821
  timestamp = entry.get('timestamp', 'N/A')
244
822
 
245
- # Handle special event entries
246
823
  if 'event' in entry:
247
824
  event = entry.get('event', 'N/A')
248
825
  print(f"[{timestamp[:19]}] EVENT: {event}")
@@ -251,7 +828,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
251
828
  print(f" Details: {details_str}")
252
829
  continue
253
830
 
254
- # Handle operation entries
255
831
  operation = entry.get('operation', 'N/A')
256
832
  reason = entry.get('reason', 'N/A')
257
833
  success = entry.get('success')
@@ -260,7 +836,6 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
260
836
  duration = entry.get('duration')
261
837
 
262
838
  if verbose:
263
- # Verbose format
264
839
  print(f"[{timestamp[:19]}] {operation:<12} | {reason}")
265
840
  decision_type = entry.get('decision_type', 'N/A')
266
841
  confidence = entry.get('confidence', 'N/A')
@@ -276,14 +851,12 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
276
851
  print(f" Estimated Cost: ${estimated_cost:.2f}")
277
852
 
278
853
  if 'details' in entry and entry['details']:
279
- # Show details without budget_remaining to avoid clutter
280
854
  details_copy = entry['details'].copy()
281
855
  details_copy.pop('budget_remaining', None)
282
856
  if details_copy:
283
857
  details_str = json.dumps(details_copy, indent=2)
284
858
  print(f" Details: {details_str}")
285
859
  else:
286
- # Normal format: [timestamp] operation | reason | status cost | duration
287
860
  status_icon = "✓" if success else "✗" if success is False else "?"
288
861
 
289
862
  cost_info = ""
@@ -318,45 +891,68 @@ def sync_orchestration(
318
891
  budget: float = 10.0,
319
892
  skip_verify: bool = False,
320
893
  skip_tests: bool = False,
321
- log: bool = False,
894
+ dry_run: bool = False,
322
895
  force: bool = False,
323
- strength: float = 0.5,
896
+ strength: float = DEFAULT_STRENGTH,
324
897
  temperature: float = 0.0,
325
- time_param: float = 0.25, # Renamed to avoid conflict with `time` module
898
+ time_param: float = 0.25,
326
899
  verbose: bool = False,
327
900
  quiet: bool = False,
328
901
  output_cost: Optional[str] = None,
329
902
  review_examples: bool = False,
330
903
  local: bool = False,
331
904
  context_config: Optional[Dict[str, str]] = None,
905
+ context_override: Optional[str] = None,
906
+ confirm_callback: Optional[Callable[[str, str], bool]] = None,
332
907
  ) -> Dict[str, Any]:
333
908
  """
334
909
  Orchestrates the complete PDD sync workflow with parallel animation.
335
-
336
- If log=True, displays the sync log instead of running sync operations.
337
- The verbose flag controls the detail level of the log output.
338
-
339
- Returns a dictionary summarizing the outcome of the sync process.
340
910
  """
341
- if log:
911
+ # Handle None values from CLI (Issue #194) - defense in depth
912
+ if target_coverage is None:
913
+ target_coverage = 90.0
914
+ if budget is None:
915
+ budget = 10.0
916
+ if max_attempts is None:
917
+ max_attempts = 3
918
+
919
+ # Import get_extension at function scope
920
+ from .sync_determine_operation import get_extension
921
+
922
+ if dry_run:
342
923
  return _display_sync_log(basename, language, verbose)
343
924
 
344
925
  # --- Initialize State and Paths ---
345
926
  try:
346
- pdd_files = get_pdd_file_paths(basename, language, prompts_dir)
927
+ pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
928
+ except FileNotFoundError as e:
929
+ if "test_config.py" in str(e) or "tests/test_" in str(e):
930
+ pdd_files = {
931
+ 'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
932
+ 'code': Path(f"src/{basename}.{get_extension(language)}"),
933
+ 'example': Path(f"context/{basename}_example.{get_extension(language)}"),
934
+ 'test': Path(f"tests/test_{basename}.{get_extension(language)}")
935
+ }
936
+ if not quiet:
937
+ print(f"Note: Test file missing, continuing with sync workflow to generate it")
938
+ else:
939
+ print(f"Error constructing paths: {e}")
940
+ return {
941
+ "success": False,
942
+ "error": f"Failed to construct paths: {str(e)}",
943
+ "operations_completed": [],
944
+ "errors": [f"Path construction failed: {str(e)}"]
945
+ }
347
946
  except Exception as e:
348
- # Log the error and return early with failure status
349
947
  print(f"Error constructing paths: {e}")
350
948
  return {
351
949
  "success": False,
352
- "total_cost": 0.0,
353
- "model_name": "",
354
950
  "error": f"Failed to construct paths: {str(e)}",
355
951
  "operations_completed": [],
356
952
  "errors": [f"Path construction failed: {str(e)}"]
357
953
  }
358
954
 
359
- # Shared state for animation thread
955
+ # Shared state for animation (passed to App)
360
956
  current_function_name_ref = ["initializing"]
361
957
  stop_event = threading.Event()
362
958
  current_cost_ref = [0.0]
@@ -364,696 +960,744 @@ def sync_orchestration(
364
960
  code_path_ref = [str(pdd_files.get('code', 'N/A'))]
365
961
  example_path_ref = [str(pdd_files.get('example', 'N/A'))]
366
962
  tests_path_ref = [str(pdd_files.get('test', 'N/A'))]
367
- prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref = \
368
- ["blue"], ["blue"], ["blue"], ["blue"]
369
-
370
- # Orchestration state
371
- operations_completed: List[str] = []
372
- skipped_operations: List[str] = []
373
- errors: List[str] = []
374
- start_time = time.time()
375
- animation_thread = None
376
-
377
- # Track operation history for cycle detection
378
- operation_history: List[str] = []
379
- MAX_CYCLE_REPEATS = 2 # Maximum times to allow crash-verify cycle
963
+ prompt_box_color_ref = ["blue"]
964
+ code_box_color_ref = ["blue"]
965
+ example_box_color_ref = ["blue"]
966
+ tests_box_color_ref = ["blue"]
380
967
 
381
- try:
382
- with SyncLock(basename, language):
383
- # Log lock acquisition
384
- log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
385
-
386
- # --- Start Animation Thread ---
387
- animation_thread = threading.Thread(
388
- target=sync_animation,
389
- args=(
390
- current_function_name_ref, stop_event, basename, current_cost_ref, budget,
391
- prompt_box_color_ref, code_box_color_ref, example_box_color_ref, tests_box_color_ref,
392
- prompt_path_ref, code_path_ref, example_path_ref, tests_path_ref
393
- ),
394
- daemon=True
395
- )
396
- animation_thread.start()
397
-
398
- # --- Main Workflow Loop ---
399
- while True:
400
- budget_remaining = budget - current_cost_ref[0]
401
- if current_cost_ref[0] >= budget:
402
- errors.append(f"Budget of ${budget:.2f} exceeded.")
403
- log_sync_event(basename, language, "budget_exceeded", {
404
- "total_cost": current_cost_ref[0],
405
- "budget": budget
406
- })
407
- break
408
-
409
- # Log budget warning when running low
410
- if budget_remaining < budget * 0.2 and budget_remaining > 0:
411
- log_sync_event(basename, language, "budget_warning", {
412
- "remaining": budget_remaining,
413
- "percentage": (budget_remaining / budget) * 100
414
- })
415
-
416
- decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify)
417
- operation = decision.operation
418
-
419
- # Create log entry with decision info
420
- log_entry = create_sync_log_entry(decision, budget_remaining)
421
-
422
- # Track operation history
423
- operation_history.append(operation)
424
-
425
- # Detect crash-verify cycles
426
- if len(operation_history) >= 4:
427
- # Check for repeating crash-verify pattern
428
- recent_ops = operation_history[-4:]
429
- if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
430
- recent_ops == ['verify', 'crash', 'verify', 'crash']):
431
- # Count how many times this cycle has occurred
432
- cycle_count = 0
433
- for i in range(0, len(operation_history) - 1, 2):
434
- if i + 1 < len(operation_history):
435
- if ((operation_history[i] == 'crash' and operation_history[i+1] == 'verify') or
436
- (operation_history[i] == 'verify' and operation_history[i+1] == 'crash')):
437
- cycle_count += 1
438
-
439
- if cycle_count >= MAX_CYCLE_REPEATS:
440
- errors.append(f"Detected crash-verify cycle repeated {cycle_count} times. Breaking cycle.")
441
- errors.append("The example file may have syntax errors that couldn't be automatically fixed.")
442
- log_sync_event(basename, language, "cycle_detected", {
443
- "cycle_type": "crash-verify",
444
- "cycle_count": cycle_count,
445
- "operation_history": operation_history[-10:] # Last 10 operations
446
- })
447
- break
968
+ # Mutable container for the app reference (set after app creation)
969
+ # This allows the worker to access app.request_confirmation()
970
+ app_ref: List[Optional['SyncApp']] = [None]
448
971
 
449
- # Detect consecutive fix operations (infinite fix loop protection)
450
- if operation == 'fix':
451
- # Count consecutive fix operations
452
- consecutive_fixes = 0
453
- for i in range(len(operation_history) - 1, -1, -1):
454
- if operation_history[i] == 'fix':
455
- consecutive_fixes += 1
456
- else:
457
- break
458
-
459
- MAX_CONSECUTIVE_FIXES = 5 # Allow up to 5 consecutive fix attempts
460
- if consecutive_fixes >= MAX_CONSECUTIVE_FIXES:
461
- errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
462
- errors.append("The test failures may not be resolvable by automated fixes in this environment.")
463
- log_sync_event(basename, language, "cycle_detected", {
464
- "cycle_type": "consecutive-fix",
465
- "consecutive_count": consecutive_fixes,
466
- "operation_history": operation_history[-10:] # Last 10 operations
972
+ # Progress callback ref for TUI ProgressBar updates during auto-deps
973
+ progress_callback_ref: List[Optional[Callable[[int, int], None]]] = [None]
974
+
975
+ # Track if user has already confirmed overwrite (to avoid asking multiple times)
976
+ user_confirmed_overwrite: List[bool] = [False]
977
+
978
+ def get_confirm_callback() -> Optional[Callable[[str, str], bool]]:
979
+ """Get the confirmation callback from the app if available.
980
+
981
+ Once user confirms, we remember it so subsequent operations don't ask again.
982
+
983
+ Fix for Issue #277: In headless mode, we now return a wrapper callback
984
+ that uses click.confirm AND sets user_confirmed_overwrite[0] = True,
985
+ so subsequent calls auto-confirm instead of prompting repeatedly.
986
+ """
987
+ if user_confirmed_overwrite[0]:
988
+ # User already confirmed, return a callback that always returns True
989
+ return lambda msg, title: True
990
+
991
+ if app_ref[0] is not None:
992
+ def confirming_callback(msg: str, title: str) -> bool:
993
+ result = app_ref[0].request_confirmation(msg, title)
994
+ if result:
995
+ user_confirmed_overwrite[0] = True
996
+ return result
997
+ return confirming_callback
998
+
999
+ # Fix #277: In headless mode (app_ref is None), create a wrapper callback
1000
+ # that sets the flag after confirmation, preventing repeated prompts
1001
+ if confirm_callback is None:
1002
+ def headless_confirming_callback(msg: str, title: str) -> bool:
1003
+ """Headless mode callback that remembers user confirmation."""
1004
+ try:
1005
+ prompt = msg or "Overwrite existing files?"
1006
+ result = click.confirm(
1007
+ click.style(prompt, fg="yellow"),
1008
+ default=True,
1009
+ show_default=True
1010
+ )
1011
+ except (click.Abort, EOFError):
1012
+ return False
1013
+ if result:
1014
+ user_confirmed_overwrite[0] = True
1015
+ return result
1016
+ return headless_confirming_callback
1017
+
1018
+ return confirm_callback # Fall back to provided callback
1019
+
1020
+ def sync_worker_logic():
1021
+ """
1022
+ The main loop of sync logic, run in a worker thread by Textual App.
1023
+ """
1024
+ operations_completed: List[str] = []
1025
+ skipped_operations: List[str] = []
1026
+ errors: List[str] = []
1027
+ start_time = time.time()
1028
+ last_model_name: str = ""
1029
+ operation_history: List[str] = []
1030
+ MAX_CYCLE_REPEATS = 2
1031
+
1032
+ # Helper function to print inside worker (goes to RichLog via redirection)
1033
+ # print() will work if sys.stdout is redirected.
1034
+
1035
+ try:
1036
+ with SyncLock(basename, language):
1037
+ log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
1038
+
1039
+ while True:
1040
+ budget_remaining = budget - current_cost_ref[0]
1041
+ if current_cost_ref[0] >= budget:
1042
+ errors.append(f"Budget of ${budget:.2f} exceeded.")
1043
+ log_sync_event(basename, language, "budget_exceeded", {
1044
+ "total_cost": current_cost_ref[0],
1045
+ "budget": budget
467
1046
  })
468
1047
  break
469
1048
 
470
- if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
471
- current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
472
-
473
- # Log these final operations
474
- success = operation in ['all_synced', 'nothing']
475
- error_msg = None
476
- if operation == 'fail_and_request_manual_merge':
477
- errors.append(f"Manual merge required: {decision.reason}")
478
- error_msg = f"Manual merge required: {decision.reason}"
479
- elif operation == 'error':
480
- errors.append(f"Error determining operation: {decision.reason}")
481
- error_msg = f"Error determining operation: {decision.reason}"
482
- elif operation == 'analyze_conflict':
483
- errors.append(f"Conflict detected: {decision.reason}")
484
- error_msg = f"Conflict detected: {decision.reason}"
485
-
486
- # Update log entry for final operation
487
- update_sync_log_entry(log_entry, {
488
- 'success': success,
489
- 'cost': 0.0,
490
- 'model': 'none',
491
- 'error': error_msg
492
- }, 0.0)
493
- append_sync_log(basename, language, log_entry)
494
-
495
- break
496
-
497
- # Handle skips
498
- if operation == 'verify' and (skip_verify or skip_tests):
499
- # Skip verification if explicitly requested OR if tests are skipped (can't verify without tests)
500
- skipped_operations.append('verify')
501
- skip_reason = 'skip_verify' if skip_verify else 'skip_tests_implies_skip_verify'
502
-
503
- # Update log entry for skipped operation
504
- update_sync_log_entry(log_entry, {
505
- 'success': True,
506
- 'cost': 0.0,
507
- 'model': 'skipped',
508
- 'error': None
509
- }, 0.0)
510
- log_entry['details']['skip_reason'] = skip_reason
511
- append_sync_log(basename, language, log_entry)
1049
+ if budget_remaining < budget * 0.2 and budget_remaining > 0:
1050
+ log_sync_event(basename, language, "budget_warning", {
1051
+ "remaining": budget_remaining,
1052
+ "percentage": (budget_remaining / budget) * 100
1053
+ })
1054
+
1055
+ decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
1056
+ operation = decision.operation
512
1057
 
513
- report_data = RunReport(
514
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
515
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
516
- )
517
- save_run_report(asdict(report_data), basename, language)
518
- _save_operation_fingerprint(basename, language, 'verify', pdd_files, 0.0, skip_reason)
519
- continue
520
- if operation == 'test' and skip_tests:
521
- skipped_operations.append('test')
1058
+ log_entry = create_sync_log_entry(decision, budget_remaining)
1059
+ operation_history.append(operation)
522
1060
 
523
- # Update log entry for skipped operation
524
- update_sync_log_entry(log_entry, {
525
- 'success': True,
526
- 'cost': 0.0,
527
- 'model': 'skipped',
528
- 'error': None
529
- }, 0.0)
530
- log_entry['details']['skip_reason'] = 'skip_tests'
531
- append_sync_log(basename, language, log_entry)
1061
+ # Cycle detection logic
1062
+ if len(operation_history) >= 3:
1063
+ recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
1064
+ if len(recent_auto_deps) >= 2:
1065
+ errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
1066
+ log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
1067
+ operation = 'generate'
1068
+ decision.operation = 'generate' # Update decision too
1069
+
1070
+ # Bug #4 fix: Detect crash-verify cycle pattern
1071
+ # The pattern [crash, verify, crash, verify] or [verify, crash, verify, crash]
1072
+ # represents 2 iterations of the alternating cycle, so break immediately
1073
+ if len(operation_history) >= 4:
1074
+ recent_ops = operation_history[-4:]
1075
+ if (recent_ops == ['crash', 'verify', 'crash', 'verify'] or
1076
+ recent_ops == ['verify', 'crash', 'verify', 'crash']):
1077
+ # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
1078
+ errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
1079
+ log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
1080
+ break
1081
+
1082
+ # Bug #4 fix: Detect test-fix cycle pattern
1083
+ # The pattern [test, fix, test, fix] or [fix, test, fix, test]
1084
+ # represents 2 iterations of the alternating cycle, so break immediately
1085
+ if len(operation_history) >= 4:
1086
+ recent_ops = operation_history[-4:]
1087
+ if (recent_ops == ['test', 'fix', 'test', 'fix'] or
1088
+ recent_ops == ['fix', 'test', 'fix', 'test']):
1089
+ # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
1090
+ errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
1091
+ log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
1092
+ break
1093
+
1094
+ if operation == 'fix':
1095
+ consecutive_fixes = 0
1096
+ for i in range(len(operation_history) - 1, -1, -1):
1097
+ if operation_history[i] == 'fix':
1098
+ consecutive_fixes += 1
1099
+ else:
1100
+ break
1101
+ if consecutive_fixes >= 5:
1102
+ errors.append(f"Detected {consecutive_fixes} consecutive fix operations. Breaking infinite fix loop.")
1103
+ break
1104
+
1105
+ if operation == 'test':
1106
+ consecutive_tests = 0
1107
+ for i in range(len(operation_history) - 1, -1, -1):
1108
+ if operation_history[i] == 'test':
1109
+ consecutive_tests += 1
1110
+ else:
1111
+ break
1112
+ if consecutive_tests >= MAX_CONSECUTIVE_TESTS:
1113
+ errors.append(f"Detected {consecutive_tests} consecutive test operations. Breaking infinite test loop.")
1114
+ break
1115
+
1116
+ # Bug #157 fix: Prevent infinite crash retry loops
1117
+ if operation == 'crash':
1118
+ consecutive_crashes = 0
1119
+ for i in range(len(operation_history) - 1, -1, -1):
1120
+ if operation_history[i] == 'crash':
1121
+ consecutive_crashes += 1
1122
+ else:
1123
+ break
1124
+ if consecutive_crashes >= MAX_CONSECUTIVE_CRASHES:
1125
+ errors.append(f"Detected {consecutive_crashes} consecutive crash operations. Breaking infinite crash loop.")
1126
+ break
1127
+
1128
+ if operation == 'test_extend':
1129
+ # Count test_extend attempts to prevent infinite loop
1130
+ extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
1131
+ if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
1132
+ # Accept current coverage after max attempts
1133
+ log_sync_event(basename, language, "test_extend_limit", {
1134
+ "attempts": extend_attempts,
1135
+ "max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
1136
+ "reason": "Accepting current coverage after max extend attempts"
1137
+ })
1138
+ success = True
1139
+ break
1140
+
1141
+ if operation in ['all_synced', 'nothing', 'fail_and_request_manual_merge', 'error', 'analyze_conflict']:
1142
+ current_function_name_ref[0] = "synced" if operation in ['all_synced', 'nothing'] else "conflict"
1143
+ success = operation in ['all_synced', 'nothing']
1144
+ error_msg = None
1145
+ if operation == 'fail_and_request_manual_merge':
1146
+ errors.append(f"Manual merge required: {decision.reason}")
1147
+ error_msg = decision.reason
1148
+ elif operation == 'error':
1149
+ errors.append(f"Error determining operation: {decision.reason}")
1150
+ error_msg = decision.reason
1151
+ elif operation == 'analyze_conflict':
1152
+ errors.append(f"Conflict detected: {decision.reason}")
1153
+ error_msg = decision.reason
1154
+
1155
+ update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
1156
+ append_sync_log(basename, language, log_entry)
1157
+ break
532
1158
 
533
- report_data = RunReport(
534
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
535
- exit_code=0, tests_passed=0, tests_failed=0, coverage=1.0
1159
+ # Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
1160
+ # Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
1161
+ if operation == 'verify' and (skip_verify or skip_tests):
1162
+ skipped_operations.append('verify')
1163
+ update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1164
+ append_sync_log(basename, language, log_entry)
1165
+ # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1166
+ _save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
1167
+ continue
1168
+ if operation == 'test' and skip_tests:
1169
+ skipped_operations.append('test')
1170
+ update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1171
+ append_sync_log(basename, language, log_entry)
1172
+ # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1173
+ _save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
1174
+ continue
1175
+ if operation == 'crash' and (skip_tests or skip_verify):
1176
+ skipped_operations.append('crash')
1177
+ update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1178
+ append_sync_log(basename, language, log_entry)
1179
+ # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1180
+ _save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
1181
+ # FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
1182
+ # Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
1183
+ current_hashes = calculate_current_hashes(pdd_files)
1184
+ synthetic_report = RunReport(
1185
+ timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
1186
+ exit_code=0, # Assume success since we're skipping validation
1187
+ tests_passed=0,
1188
+ tests_failed=0,
1189
+ coverage=0.0,
1190
+ test_hash=current_hashes.get('test_hash')
1191
+ )
1192
+ save_run_report(asdict(synthetic_report), basename, language)
1193
+ continue
1194
+
1195
+ current_function_name_ref[0] = operation
1196
+ ctx = _create_mock_context(
1197
+ force=force, strength=strength, temperature=temperature, time=time_param,
1198
+ verbose=verbose, quiet=quiet, output_cost=output_cost,
1199
+ review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
1200
+ max_attempts=max_attempts, target_coverage=target_coverage,
1201
+ confirm_callback=get_confirm_callback(),
1202
+ context=context_override
536
1203
  )
537
- save_run_report(asdict(report_data), basename, language)
538
- _save_operation_fingerprint(basename, language, 'test', pdd_files, 0.0, 'skipped')
539
- continue
540
- if operation == 'crash' and skip_tests:
541
- # Skip crash operations when tests are skipped since crash fixes usually require test execution
542
- skipped_operations.append('crash')
543
1204
 
544
- # Update log entry for skipped operation
545
- update_sync_log_entry(log_entry, {
546
- 'success': True,
547
- 'cost': 0.0,
548
- 'model': 'skipped',
549
- 'error': None
550
- }, 0.0)
551
- log_entry['details']['skip_reason'] = 'skip_tests'
552
- append_sync_log(basename, language, log_entry)
553
-
554
- # Create a dummy run report indicating crash was skipped
555
- report_data = RunReport(
556
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
557
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
558
- )
559
- save_run_report(asdict(report_data), basename, language)
560
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped')
561
- continue
562
-
563
- current_function_name_ref[0] = operation
564
- ctx = _create_mock_context(
565
- force=force, strength=strength, temperature=temperature, time=time_param,
566
- verbose=verbose, quiet=quiet, output_cost=output_cost,
567
- review_examples=review_examples, local=local, budget=budget - current_cost_ref[0],
568
- max_attempts=max_attempts, target_coverage=target_coverage
569
- )
570
-
571
- result = {}
572
- success = False
573
- start_time = time.time() # Track execution time
1205
+ result = {}
1206
+ success = False
1207
+ op_start_time = time.time()
574
1208
 
575
- # --- Execute Operation ---
576
- try:
577
- if operation == 'auto-deps':
578
- # Save the modified prompt to a temporary location
579
- temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
580
-
581
- # Read original prompt content to compare later
582
- original_content = pdd_files['prompt'].read_text(encoding='utf-8')
583
-
584
- result = auto_deps_main(
585
- ctx,
586
- prompt_file=str(pdd_files['prompt']),
587
- directory_path=examples_dir,
588
- auto_deps_csv_path="project_dependencies.csv",
589
- output=temp_output,
590
- force_scan=False # Don't force scan every time
591
- )
592
-
593
- # Only move the temp file back if content actually changed
594
- if Path(temp_output).exists():
595
- import shutil
596
- new_content = Path(temp_output).read_text(encoding='utf-8')
597
- if new_content != original_content:
598
- shutil.move(temp_output, str(pdd_files['prompt']))
599
- else:
600
- # No changes needed, remove temp file
601
- Path(temp_output).unlink()
602
- # Mark as successful with no changes
603
- result = (new_content, 0.0, 'no-changes')
604
- elif operation == 'generate':
605
- result = code_generator_main(
606
- ctx,
607
- prompt_file=str(pdd_files['prompt']),
608
- output=str(pdd_files['code']),
609
- original_prompt_file_path=None,
610
- force_incremental_flag=False
611
- )
612
- elif operation == 'example':
613
- print(f"DEBUG SYNC: pdd_files['example'] = {pdd_files['example']}")
614
- print(f"DEBUG SYNC: str(pdd_files['example']) = {str(pdd_files['example'])}")
615
- result = context_generator_main(
616
- ctx,
617
- prompt_file=str(pdd_files['prompt']),
618
- code_file=str(pdd_files['code']),
619
- output=str(pdd_files['example'])
620
- )
621
- elif operation == 'crash':
622
- # Validate required files exist before attempting crash operation
623
- required_files = [pdd_files['code'], pdd_files['example']]
624
- missing_files = [f for f in required_files if not f.exists()]
625
-
626
- if missing_files:
627
- # Skip crash operation if required files are missing
628
- print(f"Skipping crash operation - missing files: {[f.name for f in missing_files]}")
629
- skipped_operations.append('crash')
630
-
631
- # Update log entry for skipped operation
632
- update_sync_log_entry(log_entry, {
633
- 'success': True,
634
- 'cost': 0.0,
635
- 'model': 'skipped',
636
- 'error': None
637
- }, 0.0)
638
- log_entry['details']['skip_reason'] = 'missing_files'
639
- log_entry['details']['missing_files'] = [f.name for f in missing_files]
640
- append_sync_log(basename, language, log_entry)
1209
+ # Issue #159 fix: Use atomic state for consistent run_report + fingerprint writes
1210
+ with AtomicStateUpdate(basename, language) as atomic_state:
1211
+
1212
+ # --- Execute Operation ---
1213
+ try:
1214
+ if operation == 'auto-deps':
1215
+ temp_output = str(pdd_files['prompt']).replace('.prompt', '_with_deps.prompt')
1216
+ original_content = pdd_files['prompt'].read_text(encoding='utf-8')
1217
+ result = auto_deps_main(
1218
+ ctx,
1219
+ prompt_file=str(pdd_files['prompt']),
1220
+ directory_path=examples_dir,
1221
+ auto_deps_csv_path="project_dependencies.csv",
1222
+ output=temp_output,
1223
+ force_scan=False,
1224
+ progress_callback=progress_callback_ref[0]
1225
+ )
1226
+ if Path(temp_output).exists():
1227
+ import shutil
1228
+ new_content = Path(temp_output).read_text(encoding='utf-8')
1229
+ if new_content != original_content:
1230
+ shutil.move(temp_output, str(pdd_files['prompt']))
1231
+ else:
1232
+ Path(temp_output).unlink()
1233
+ result = (new_content, 0.0, 'no-changes')
1234
+ elif operation == 'generate':
1235
+ # Ensure code directory exists before generating
1236
+ pdd_files['code'].parent.mkdir(parents=True, exist_ok=True)
1237
+ # Use absolute paths to avoid path_resolution_mode mismatch between sync (cwd) and generate (config_base)
1238
+ result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt'].resolve()), output=str(pdd_files['code'].resolve()), original_prompt_file_path=None, force_incremental_flag=False)
1239
+ # Clear stale run_report so crash/verify is required for newly generated code
1240
+ run_report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
1241
+ run_report_file.unlink(missing_ok=True)
1242
+ elif operation == 'example':
1243
+ # Ensure example directory exists before generating
1244
+ pdd_files['example'].parent.mkdir(parents=True, exist_ok=True)
1245
+ # Use absolute paths to avoid path_resolution_mode mismatch between sync (cwd) and example (config_base)
1246
+ result = context_generator_main(ctx, prompt_file=str(pdd_files['prompt'].resolve()), code_file=str(pdd_files['code'].resolve()), output=str(pdd_files['example'].resolve()))
1247
+ elif operation == 'crash':
1248
+ required_files = [pdd_files['code'], pdd_files['example']]
1249
+ missing_files = [f for f in required_files if not f.exists()]
1250
+ if missing_files:
1251
+ skipped_operations.append('crash')
1252
+ continue
641
1253
 
642
- # Create a dummy run report indicating crash was skipped due to missing files
643
- report_data = RunReport(
644
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
645
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
646
- )
647
- save_run_report(asdict(report_data), basename, language)
648
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_missing_files')
649
- continue
650
- else:
651
- # Check if we have a run report indicating failures that need crash fixing
652
- current_run_report = read_run_report(basename, language)
653
- crash_log_content = ""
1254
+ # Crash handling logic (simplified copy from original)
1255
+ current_run_report = read_run_report(basename, language)
1256
+ crash_log_content = ""
654
1257
 
655
- # If we have a run report with exit_code != 0, that indicates a crash that needs fixing
656
- if current_run_report and current_run_report.exit_code != 0:
657
- # We have a crash to fix based on the run report
658
- crash_log_content = f"Test execution failed with exit code: {current_run_report.exit_code}\n\n"
659
-
660
- # Try to run the example program to get additional error details
661
- try:
662
- example_result = subprocess.run(
663
- ['python', str(pdd_files['example'])],
664
- capture_output=True,
665
- text=True,
666
- timeout=60,
667
- env=os.environ.copy(),
668
- cwd=str(pdd_files['example'].parent)
1258
+ # Check for crash condition (either run report says so, or we check manually)
1259
+ has_crash = False
1260
+ if current_run_report and current_run_report.exit_code != 0:
1261
+ has_crash = True
1262
+ crash_log_content = f"Test execution failed exit code: {current_run_report.exit_code}\n"
1263
+ else:
1264
+ # Manual check - run the example to see if it crashes
1265
+ env = os.environ.copy()
1266
+ src_dir = Path.cwd() / 'src'
1267
+ env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
1268
+ # Remove TUI-specific env vars that might contaminate subprocess
1269
+ for var in ['FORCE_COLOR', 'COLUMNS']:
1270
+ env.pop(var, None)
1271
+ # Bug fix: Use sys.executable to match crash_main's Python interpreter
1272
+ # and do NOT set cwd - inherit from pdd invocation directory
1273
+ # to match crash_main behavior. Setting cwd to example's parent breaks imports.
1274
+ example_path = str(pdd_files['example'].resolve())
1275
+ cmd_parts = [sys.executable, example_path]
1276
+ # Use error-detection runner that handles server-style examples
1277
+ returncode, stdout, stderr = _run_example_with_error_detection(
1278
+ cmd_parts,
1279
+ env=env,
1280
+ timeout=60
669
1281
  )
670
-
671
- if example_result.returncode != 0:
672
- crash_log_content += f"Example program also failed with exit code: {example_result.returncode}\n\n"
673
- if example_result.stdout:
674
- crash_log_content += f"STDOUT:\n{example_result.stdout}\n\n"
675
- if example_result.stderr:
676
- crash_log_content += f"STDERR:\n{example_result.stderr}\n"
677
-
678
- # Check for syntax errors specifically
679
- if "SyntaxError" in example_result.stderr:
680
- crash_log_content = f"SYNTAX ERROR DETECTED:\n\n{crash_log_content}"
1282
+
1283
+ class ExampleResult:
1284
+ def __init__(self, rc, out, err):
1285
+ self.returncode = rc
1286
+ self.stdout = out
1287
+ self.stderr = err
1288
+
1289
+ ex_res = ExampleResult(returncode, stdout, stderr)
1290
+ if ex_res.returncode != 0:
1291
+ has_crash = True
1292
+ crash_log_content = f"Example failed exit code: {ex_res.returncode}\nSTDOUT:\n{ex_res.stdout}\nSTDERR:\n{ex_res.stderr}\n"
1293
+ if "SyntaxError" in ex_res.stderr:
1294
+ crash_log_content = "SYNTAX ERROR DETECTED:\n" + crash_log_content
681
1295
  else:
682
- crash_log_content += "Example program runs successfully, but tests are failing.\n"
683
- crash_log_content += "This may indicate issues with test execution or test file syntax.\n"
684
-
685
- except subprocess.TimeoutExpired:
686
- crash_log_content += "Example program execution timed out after 60 seconds\n"
687
- crash_log_content += "This may indicate an infinite loop or the program is waiting for input.\n"
688
- except Exception as e:
689
- crash_log_content += f"Error running example program: {str(e)}\n"
690
- crash_log_content += f"Program path: {pdd_files['example']}\n"
691
- else:
692
- # No crash detected, skip crash operation
693
- print("No crash detected in run report, skipping crash fix")
694
- skipped_operations.append('crash')
695
-
696
- # Update log entry for skipped operation
697
- update_sync_log_entry(log_entry, {
698
- 'success': True,
699
- 'cost': 0.0,
700
- 'model': 'skipped',
701
- 'error': None
702
- }, time.time() - start_time)
703
- log_entry['details']['skip_reason'] = 'no_crash'
704
- append_sync_log(basename, language, log_entry)
705
-
706
- report_data = RunReport(
707
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
708
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
709
- )
710
- save_run_report(asdict(report_data), basename, language)
711
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'no_crash')
712
- continue
713
-
714
- # Write actual error content or fallback
715
- if not crash_log_content:
716
- crash_log_content = "Unknown crash error - program failed but no error output captured"
717
-
718
- Path("crash.log").write_text(crash_log_content)
719
-
720
- try:
721
- result = crash_main(
722
- ctx,
723
- prompt_file=str(pdd_files['prompt']),
724
- code_file=str(pdd_files['code']),
725
- program_file=str(pdd_files['example']),
726
- error_file="crash.log",
727
- output=str(pdd_files['code']),
728
- output_program=str(pdd_files['example']),
729
- loop=True,
730
- max_attempts=max_attempts,
731
- budget=budget - current_cost_ref[0]
732
- )
733
- except (RuntimeError, Exception) as e:
734
- error_str = str(e)
735
- if ("LLM returned None" in error_str or
736
- "LLM failed to analyze errors" in error_str):
737
- # Skip crash operation for LLM failures
738
- print(f"Skipping crash operation due to LLM error: {e}")
739
- skipped_operations.append('crash')
740
-
741
- # Update log entry for skipped operation
742
- update_sync_log_entry(log_entry, {
743
- 'success': False,
744
- 'cost': 0.0,
745
- 'model': 'skipped',
746
- 'error': f"LLM error: {str(e)}"
747
- }, time.time() - start_time)
748
- log_entry['details']['skip_reason'] = 'llm_error'
749
- append_sync_log(basename, language, log_entry)
1296
+ # No crash - save run report with exit_code=0 so sync_determine_operation
1297
+ # knows the example was tested and passed (prevents infinite loop)
1298
+ # Include test_hash for staleness detection
1299
+ test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1300
+ report = RunReport(
1301
+ datetime.datetime.now(datetime.timezone.utc).isoformat(),
1302
+ exit_code=0,
1303
+ tests_passed=1,
1304
+ tests_failed=0,
1305
+ coverage=0.0,
1306
+ test_hash=test_hash
1307
+ )
1308
+ save_run_report(asdict(report), basename, language)
1309
+ skipped_operations.append('crash')
1310
+ continue
750
1311
 
751
- report_data = RunReport(
752
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
753
- exit_code=0, tests_passed=0, tests_failed=0, coverage=0.0
1312
+ if has_crash:
1313
+ # Try auto-fix for common import errors before expensive agentic call
1314
+ auto_fixed, auto_fix_msg = _try_auto_fix_import_error(
1315
+ crash_log_content,
1316
+ pdd_files['code'],
1317
+ pdd_files['example']
754
1318
  )
755
- save_run_report(asdict(report_data), basename, language)
756
- _save_operation_fingerprint(basename, language, 'crash', pdd_files, 0.0, 'skipped_llm_error')
1319
+ if auto_fixed:
1320
+ log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
1321
+ # Retry running the example after auto-fix
1322
+ retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
1323
+ cmd_parts,
1324
+ env=env,
1325
+ timeout=60
1326
+ )
1327
+ if retry_returncode == 0:
1328
+ # Auto-fix worked! Save run report and continue
1329
+ log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
1330
+ test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1331
+ report = RunReport(
1332
+ datetime.datetime.now(datetime.timezone.utc).isoformat(),
1333
+ exit_code=0,
1334
+ tests_passed=1,
1335
+ tests_failed=0,
1336
+ coverage=0.0,
1337
+ test_hash=test_hash
1338
+ )
1339
+ save_run_report(asdict(report), basename, language)
1340
+ result = (True, 0.0, 'auto-fix')
1341
+ success = True
1342
+ actual_cost = 0.0
1343
+ model_name = 'auto-fix'
1344
+ # Update crash_log_content for logging
1345
+ crash_log_content = f"Auto-fixed: {auto_fix_msg}"
1346
+ continue # Skip crash_main, move to next operation
1347
+ else:
1348
+ # Auto-fix didn't fully work, update error log and proceed
1349
+ crash_log_content = f"Auto-fix attempted ({auto_fix_msg}) but still failing:\nRETRY STDOUT:\n{retry_stdout}\nRETRY STDERR:\n{retry_stderr}\n"
1350
+
1351
+ Path("crash.log").write_text(crash_log_content)
1352
+ try:
1353
+ # For non-Python languages, set max_attempts=0 to skip iterative loop
1354
+ # and go directly to agentic fallback
1355
+ effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
1356
+ result = crash_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), error_file="crash.log", output=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
1357
+ except Exception as e:
1358
+ print(f"Crash fix failed: {e}")
1359
+ skipped_operations.append('crash')
1360
+ continue
1361
+
1362
+ elif operation == 'verify':
1363
+ if not pdd_files['example'].exists():
1364
+ skipped_operations.append('verify')
757
1365
  continue
758
- else:
759
- # Re-raise other exceptions
760
- raise
761
- elif operation == 'verify':
762
- result = fix_verification_main(
763
- ctx,
764
- prompt_file=str(pdd_files['prompt']),
765
- code_file=str(pdd_files['code']),
766
- program_file=str(pdd_files['example']),
767
- output_results=f"{basename}_verify_results.log",
768
- output_code=str(pdd_files['code']),
769
- output_program=str(pdd_files['example']),
770
- loop=True,
771
- verification_program=str(pdd_files['example']),
772
- max_attempts=max_attempts,
773
- budget=budget - current_cost_ref[0]
774
- )
775
- elif operation == 'test':
776
- # First, generate the test file
777
- result = cmd_test_main(
778
- ctx,
779
- prompt_file=str(pdd_files['prompt']),
780
- code_file=str(pdd_files['code']),
781
- output=str(pdd_files['test']),
782
- language=language,
783
- coverage_report=None,
784
- existing_tests=None,
785
- target_coverage=target_coverage,
786
- merge=False
787
- )
788
-
789
- # After successful test generation, execute the tests and create run report
790
- # This enables the next sync iteration to detect test failures and trigger fix
791
- if isinstance(result, dict) and result.get('success', False):
792
- try:
793
- test_file = pdd_files['test']
794
- if test_file.exists():
1366
+ # For non-Python languages, set max_attempts=0 to skip iterative loop
1367
+ # and go directly to agentic fallback
1368
+ effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
1369
+ result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
1370
+ elif operation == 'test':
1371
+ pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
1372
+ # Use merge=True when test file exists to preserve fixes and append new tests
1373
+ # instead of regenerating from scratch (which would overwrite fixes)
1374
+ test_file_exists = pdd_files['test'].exists()
1375
+ result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=[str(pdd_files['test'])] if test_file_exists else None, target_coverage=target_coverage, merge=test_file_exists, strength=strength, temperature=temperature)
1376
+ if pdd_files['test'].exists():
795
1377
  _execute_tests_and_create_run_report(
796
- test_file, basename, language, target_coverage
1378
+ pdd_files['test'],
1379
+ basename,
1380
+ language,
1381
+ target_coverage,
1382
+ code_file=pdd_files.get("code"),
1383
+ atomic_state=atomic_state,
1384
+ test_files=pdd_files.get('test_files'), # Bug #156
1385
+ )
1386
+ elif operation == 'test_extend':
1387
+ # Extend existing tests to improve coverage
1388
+ # Uses existing_tests and merge=True to add more test cases
1389
+ pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
1390
+ if pdd_files['test'].exists():
1391
+ existing_test_path = str(pdd_files['test'])
1392
+ result = cmd_test_main(
1393
+ ctx,
1394
+ prompt_file=str(pdd_files['prompt']),
1395
+ code_file=str(pdd_files['code']),
1396
+ output=str(pdd_files['test']),
1397
+ language=language,
1398
+ coverage_report=None,
1399
+ existing_tests=[existing_test_path],
1400
+ target_coverage=target_coverage,
1401
+ merge=True,
1402
+ strength=strength,
1403
+ temperature=temperature
797
1404
  )
798
- except Exception as e:
799
- # Don't fail the entire operation if test execution fails
800
- # Just log it - the test file generation was successful
801
- print(f"Warning: Test execution failed: {e}")
802
- elif isinstance(result, tuple) and len(result) >= 3:
803
- # Handle tuple return format - assume success and execute tests
804
- try:
805
- test_file = pdd_files['test']
806
- if test_file.exists():
807
1405
  _execute_tests_and_create_run_report(
808
- test_file, basename, language, target_coverage
1406
+ pdd_files['test'],
1407
+ basename,
1408
+ language,
1409
+ target_coverage,
1410
+ code_file=pdd_files.get("code"),
1411
+ atomic_state=atomic_state,
1412
+ test_files=pdd_files.get('test_files'), # Bug #156
809
1413
  )
810
- except Exception as e:
811
- print(f"Warning: Test execution failed: {e}")
812
- elif operation == 'fix':
813
- # Create error file with actual test failure information
814
- error_file_path = Path("fix_errors.log")
815
-
816
- # Try to get actual test failure details from latest run
817
- try:
818
- run_report = read_run_report(basename, language)
819
- if run_report and run_report.tests_failed > 0:
820
- # Run the tests again to capture actual error output
821
- # Use environment-aware Python executable for pytest execution
822
- python_executable = detect_host_python_executable()
823
- test_result = subprocess.run([
824
- python_executable, '-m', 'pytest',
825
- str(pdd_files['test']),
826
- '-v', '--tb=short'
827
- ], capture_output=True, text=True, timeout=300)
828
-
829
- error_content = f"Test failures detected ({run_report.tests_failed} failed tests):\n\n"
830
- error_content += "STDOUT:\n" + test_result.stdout + "\n\n"
831
- error_content += "STDERR:\n" + test_result.stderr
1414
+ else:
1415
+ # No existing test file, fall back to regular test generation
1416
+ result = cmd_test_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), output=str(pdd_files['test']), language=language, coverage_report=None, existing_tests=None, target_coverage=target_coverage, merge=False, strength=strength, temperature=temperature)
1417
+ if pdd_files['test'].exists():
1418
+ _execute_tests_and_create_run_report(
1419
+ pdd_files['test'],
1420
+ basename,
1421
+ language,
1422
+ target_coverage,
1423
+ code_file=pdd_files.get("code"),
1424
+ atomic_state=atomic_state,
1425
+ test_files=pdd_files.get('test_files'), # Bug #156
1426
+ )
1427
+ elif operation == 'fix':
1428
+ error_file_path = Path("fix_errors.log")
1429
+ # Capture errors using language-appropriate test command
1430
+ try:
1431
+ from .get_test_command import get_test_command_for_file
1432
+ test_cmd = get_test_command_for_file(str(pdd_files['test']), language)
1433
+
1434
+ # Use clean env without TUI-specific vars
1435
+ clean_env = os.environ.copy()
1436
+ for var in ['FORCE_COLOR', 'COLUMNS']:
1437
+ clean_env.pop(var, None)
1438
+
1439
+ if test_cmd:
1440
+ # Run language-appropriate test command
1441
+ if language.lower() == 'python':
1442
+ # Use pytest directly for Python
1443
+ python_executable = detect_host_python_executable()
1444
+ # Bug #156: Run pytest on ALL matching test files
1445
+ test_files = pdd_files.get('test_files', [pdd_files['test']])
1446
+ pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
1447
+ # Bug fix: Run from project root (no cwd), matching _run_tests_and_report pattern
1448
+ # Using cwd=test.parent with paths like 'backend/tests/test_foo.py' causes
1449
+ # pytest to look for 'backend/tests/backend/tests/test_foo.py' (not found)
1450
+ test_result = subprocess.run(
1451
+ pytest_args,
1452
+ capture_output=True, text=True, timeout=300,
1453
+ stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
1454
+ )
1455
+ else:
1456
+ # Use shell command for non-Python
1457
+ test_result = subprocess.run(
1458
+ test_cmd,
1459
+ shell=True,
1460
+ capture_output=True, text=True, timeout=300,
1461
+ stdin=subprocess.DEVNULL, env=clean_env,
1462
+ cwd=str(pdd_files['test'].parent),
1463
+ start_new_session=True
1464
+ )
1465
+ error_content = f"Test output:\n{test_result.stdout}\n{test_result.stderr}"
1466
+ else:
1467
+ # No test command available - trigger agentic fallback with context
1468
+ error_content = f"No test command available for {language}. Please run tests manually and provide error output."
1469
+ except Exception as e:
1470
+ error_content = f"Test execution error: {e}"
1471
+ error_file_path.write_text(error_content)
1472
+
1473
+ # Bug #156 fix: Parse pytest output to find actual failing files
1474
+ # and pass the correct file to fix_main
1475
+ failing_files = extract_failing_files_from_output(error_content)
1476
+ unit_test_file_for_fix = str(pdd_files['test']) # Default to tracked file
1477
+
1478
+ if failing_files:
1479
+ # Try to resolve the failing file paths
1480
+ test_dir = pdd_files['test'].parent
1481
+ tracked_file_name = pdd_files['test'].name
1482
+
1483
+ # Check if the tracked file is among the failures
1484
+ tracked_in_failures = any(
1485
+ Path(ff).name == tracked_file_name for ff in failing_files
1486
+ )
1487
+
1488
+ if not tracked_in_failures:
1489
+ # Failures are in a different file - use the first failing file
1490
+ for ff in failing_files:
1491
+ # Try to resolve the path relative to test directory
1492
+ ff_path = Path(ff)
1493
+ if ff_path.is_absolute() and ff_path.exists():
1494
+ unit_test_file_for_fix = str(ff_path)
1495
+ break
1496
+ else:
1497
+ # Try to find it in the test directory
1498
+ candidate = test_dir / ff_path.name
1499
+ if candidate.exists():
1500
+ unit_test_file_for_fix = str(candidate)
1501
+ break
1502
+ # Also try the path as-is relative to cwd
1503
+ if ff_path.exists():
1504
+ unit_test_file_for_fix = str(ff_path.resolve())
1505
+ break
1506
+
1507
+ # For non-Python languages, set max_attempts=0 to skip iterative loop
1508
+ # and go directly to agentic fallback
1509
+ effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
1510
+ result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
1511
+ elif operation == 'update':
1512
+ result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
1513
+ else:
1514
+ errors.append(f"Unknown operation {operation}")
1515
+ result = {'success': False}
1516
+
1517
+ # Result parsing
1518
+ if isinstance(result, dict):
1519
+ success = result.get('success', False)
1520
+ current_cost_ref[0] += result.get('cost', 0.0)
1521
+ elif isinstance(result, tuple) and len(result) >= 3:
1522
+ if operation == 'test': success = pdd_files['test'].exists()
1523
+ else: success = bool(result[0])
1524
+ cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
1525
+ current_cost_ref[0] += cost
832
1526
  else:
833
- error_content = "Simulated test failures"
1527
+ success = result is not None
1528
+
834
1529
  except Exception as e:
835
- error_content = f"Could not capture test failures: {e}\nUsing simulated test failures"
836
-
837
- error_file_path.write_text(error_content)
838
-
839
- result = fix_main(
840
- ctx,
841
- prompt_file=str(pdd_files['prompt']),
842
- code_file=str(pdd_files['code']),
843
- unit_test_file=str(pdd_files['test']),
844
- error_file=str(error_file_path),
845
- output_test=str(pdd_files['test']),
846
- output_code=str(pdd_files['code']),
847
- output_results=f"{basename}_fix_results.log",
848
- loop=True,
849
- verification_program=str(pdd_files['example']),
850
- max_attempts=max_attempts,
851
- budget=budget - current_cost_ref[0],
852
- auto_submit=True
853
- )
854
- elif operation == 'update':
855
- result = update_main(
856
- ctx,
857
- input_prompt_file=str(pdd_files['prompt']),
858
- modified_code_file=str(pdd_files['code']),
859
- input_code_file=None,
860
- output=str(pdd_files['prompt']),
861
- git=True
862
- )
863
- else:
864
- errors.append(f"Unknown operation '{operation}' requested.")
865
- result = {'success': False, 'cost': 0.0}
1530
+ errors.append(f"Exception during '{operation}': {e}")
1531
+ success = False
866
1532
 
867
- # Handle different return formats from command functions
868
- if isinstance(result, dict):
869
- # Dictionary return (e.g., from some commands)
870
- success = result.get('success', False)
871
- current_cost_ref[0] += result.get('cost', 0.0)
872
- elif isinstance(result, tuple) and len(result) >= 3:
873
- # Tuple return (e.g., from code_generator_main, context_generator_main)
874
- # For tuples, success is determined by no exceptions and valid return content
875
- # Check if the first element (generated content) is None, which indicates failure
876
- success = result[0] is not None
877
- # Extract cost from tuple (usually second-to-last element)
878
- cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
879
- current_cost_ref[0] += cost
880
- else:
881
- # Unknown return format
882
- success = result is not None
883
- current_cost_ref[0] += 0.0
884
-
885
- except Exception as e:
886
- errors.append(f"Exception during '{operation}': {e}")
887
- success = False
1533
+ # Log update
1534
+ duration = time.time() - op_start_time
1535
+ actual_cost = 0.0
1536
+ model_name = "unknown"
1537
+ if success:
1538
+ if isinstance(result, dict):
1539
+ actual_cost = result.get('cost', 0.0)
1540
+ model_name = result.get('model', 'unknown')
1541
+ elif isinstance(result, tuple) and len(result) >= 3:
1542
+ actual_cost = result[-2] if len(result) >= 2 else 0.0
1543
+ model_name = result[-1] if len(result) >= 1 else 'unknown'
1544
+ last_model_name = str(model_name)
1545
+ operations_completed.append(operation)
1546
+ _save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
888
1547
 
889
- # Calculate execution duration
890
- duration = time.time() - start_time
1548
+ update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
1549
+ append_sync_log(basename, language, log_entry)
891
1550
 
892
- # Extract cost and model from result for logging
893
- actual_cost = 0.0
894
- model_name = "unknown"
895
- error_message = None
896
-
897
- if success:
898
- if isinstance(result, dict):
899
- actual_cost = result.get('cost', 0.0)
900
- model_name = result.get('model', 'unknown')
901
- elif isinstance(result, tuple) and len(result) >= 3:
902
- actual_cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
903
- model_name = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else 'unknown'
904
- else:
905
- error_message = errors[-1] if errors else "Operation failed"
906
-
907
- # Update and save log entry with execution results
908
- update_sync_log_entry(log_entry, {
909
- 'success': success,
910
- 'cost': actual_cost,
911
- 'model': model_name,
912
- 'error': error_message
913
- }, duration)
914
- append_sync_log(basename, language, log_entry)
915
-
916
- if success:
917
- operations_completed.append(operation)
918
- # Extract cost and model from result based on format
919
- if isinstance(result, dict):
920
- cost = result.get('cost', 0.0)
921
- model = result.get('model', '')
922
- elif isinstance(result, tuple) and len(result) >= 3:
923
- cost = result[-2] if len(result) >= 2 and isinstance(result[-2], (int, float)) else 0.0
924
- model = result[-1] if len(result) >= 1 and isinstance(result[-1], str) else ''
925
- else:
926
- cost = 0.0
927
- model = ''
928
- _save_operation_fingerprint(basename, language, operation, pdd_files, cost, model)
929
-
930
- # After successful crash operation, re-run the example to generate fresh run report
931
- if operation == 'crash':
932
- try:
933
- example_file = pdd_files['example']
934
- if example_file.exists():
935
- # Run the example program to check if crash is actually fixed
936
- try:
937
- example_result = subprocess.run(
938
- ['python', str(example_file)],
939
- capture_output=True,
940
- text=True,
941
- timeout=60,
942
- env=os.environ.copy(),
943
- cwd=str(example_file.parent)
944
- )
945
-
946
- # Create fresh run report based on actual execution
947
- report_data = RunReport(
948
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
949
- exit_code=example_result.returncode,
950
- tests_passed=1 if example_result.returncode == 0 else 0,
951
- tests_failed=0 if example_result.returncode == 0 else 1,
952
- coverage=100.0 if example_result.returncode == 0 else 0.0
953
- )
954
- save_run_report(asdict(report_data), basename, language)
955
- print(f"Re-ran example after crash fix: exit_code={example_result.returncode}")
956
-
957
- except subprocess.TimeoutExpired:
958
- # Example timed out - still considered a failure
959
- report_data = RunReport(
960
- timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
961
- exit_code=124, # Standard timeout exit code
962
- tests_passed=0, tests_failed=1, coverage=0.0
963
- )
964
- save_run_report(asdict(report_data), basename, language)
965
- print("Example timed out after crash fix - created failure run report")
966
-
967
- except Exception as e:
968
- # Don't fail the entire operation if example re-execution fails
969
- print(f"Warning: Post-crash example re-execution failed: {e}")
1551
+ # Post-operation checks (simplified)
1552
+ if success and operation == 'crash':
1553
+ # Re-run example to verify crash fix worked
1554
+ try:
1555
+ # Use clean env without TUI-specific vars
1556
+ clean_env = os.environ.copy()
1557
+ for var in ['FORCE_COLOR', 'COLUMNS']:
1558
+ clean_env.pop(var, None)
1559
+ # Bug fix: Use sys.executable to ensure same Python interpreter as
1560
+ # crash_main (fix_code_loop.py:477). When both venv and conda are
1561
+ # active, PATH lookup for 'python' may resolve to a different
1562
+ # interpreter, causing infinite crash loops.
1563
+ # Bug fix: Do NOT set cwd - inherit from pdd invocation directory
1564
+ # to match crash_main behavior. Setting cwd to example's parent breaks imports.
1565
+ example_path = str(pdd_files['example'].resolve())
1566
+ cmd_parts = [sys.executable, example_path]
1567
+ # Use error-detection runner that handles server-style examples
1568
+ returncode, stdout, stderr = _run_example_with_error_detection(
1569
+ cmd_parts,
1570
+ env=clean_env,
1571
+ timeout=60
1572
+ )
1573
+ # Include test_hash for staleness detection
1574
+ test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1575
+ report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
1576
+ save_run_report(asdict(report), basename, language)
1577
+ except Exception as e:
1578
+ # Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
1579
+ error_msg = f"Post-crash verification failed: {e}"
1580
+ errors.append(error_msg)
1581
+ log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
970
1582
 
971
- # After successful fix operation, execute tests to update run report
972
- if operation == 'fix':
973
- try:
974
- test_file = pdd_files['test']
975
- if test_file.exists():
1583
+ if success and operation == 'fix':
1584
+ # Re-run tests to update run_report after successful fix
1585
+ # This prevents infinite loop by updating the state machine
1586
+ if pdd_files['test'].exists():
976
1587
  _execute_tests_and_create_run_report(
977
- test_file, basename, language, target_coverage
1588
+ pdd_files['test'],
1589
+ basename,
1590
+ language,
1591
+ target_coverage,
1592
+ code_file=pdd_files.get("code"),
1593
+ atomic_state=atomic_state,
1594
+ test_files=pdd_files.get('test_files'), # Bug #156
978
1595
  )
979
- except Exception as e:
980
- # Don't fail the entire operation if test execution fails
981
- print(f"Warning: Post-fix test execution failed: {e}")
982
- else:
983
- errors.append(f"Operation '{operation}' failed.")
984
- break
1596
+
1597
+ if not success:
1598
+ errors.append(f"Operation '{operation}' failed.")
1599
+ break
985
1600
 
986
- except TimeoutError:
987
- errors.append(f"Could not acquire lock for '{basename}'. Another sync process may be running.")
988
- except Exception as e:
989
- errors.append(f"An unexpected error occurred in the orchestrator: {e}")
990
- finally:
991
- # Log lock release
992
- try:
993
- log_sync_event(basename, language, "lock_released", {
994
- "pid": os.getpid(),
995
- "total_operations": len(operations_completed) if 'operations_completed' in locals() else 0,
996
- "total_cost": current_cost_ref[0] if 'current_cost_ref' in locals() else 0.0
997
- })
998
- except Exception:
999
- pass # Don't fail if logging fails
1601
+ except BaseException as e:
1602
+ errors.append(f"An unexpected error occurred in the orchestrator: {type(e).__name__}: {e}")
1603
+ # Log the full traceback for debugging
1604
+ import traceback
1605
+ traceback.print_exc()
1606
+ finally:
1607
+ try:
1608
+ log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
1609
+ except: pass
1000
1610
 
1001
- if stop_event:
1002
- stop_event.set()
1003
- if animation_thread and animation_thread.is_alive():
1004
- animation_thread.join(timeout=5)
1005
-
1006
- total_time = time.time() - start_time
1007
- final_state = {
1008
- p_name: {'exists': p_path.exists(), 'path': str(p_path)}
1009
- for p_name, p_path in pdd_files.items()
1010
- }
1611
+ # Return result dict
1612
+ return {
1613
+ 'success': not errors,
1614
+ 'operations_completed': operations_completed,
1615
+ 'skipped_operations': skipped_operations,
1616
+ 'total_cost': current_cost_ref[0],
1617
+ 'total_time': time.time() - start_time,
1618
+ 'final_state': {p: {'exists': f.exists(), 'path': str(f)} for p, f in pdd_files.items() if p != 'test_files'},
1619
+ 'errors': errors,
1620
+ 'error': "; ".join(errors) if errors else None, # Add this line
1621
+ 'model_name': last_model_name,
1622
+ }
1623
+
1624
+ # Detect headless mode (no TTY, CI environment, or quiet mode)
1625
+ headless = quiet or not sys.stdout.isatty() or os.environ.get('CI')
1626
+
1627
+ if headless:
1628
+ # Set PDD_FORCE to also skip API key prompts in headless mode
1629
+ os.environ['PDD_FORCE'] = '1'
1630
+ # Run worker logic directly without TUI in headless mode
1631
+ if not quiet:
1632
+ print(f"Running sync in headless mode (CI/non-TTY environment)...")
1633
+ result = sync_worker_logic()
1634
+ # No TUI app, so no worker_exception to check
1635
+ worker_exception = None
1636
+ else:
1637
+ # Instantiate and run Textual App
1638
+ app = SyncApp(
1639
+ basename=basename,
1640
+ budget=budget,
1641
+ worker_func=sync_worker_logic,
1642
+ function_name_ref=current_function_name_ref,
1643
+ cost_ref=current_cost_ref,
1644
+ prompt_path_ref=prompt_path_ref,
1645
+ code_path_ref=code_path_ref,
1646
+ example_path_ref=example_path_ref,
1647
+ tests_path_ref=tests_path_ref,
1648
+ prompt_color_ref=prompt_box_color_ref,
1649
+ code_color_ref=code_box_color_ref,
1650
+ example_color_ref=example_box_color_ref,
1651
+ tests_color_ref=tests_box_color_ref,
1652
+ stop_event=stop_event,
1653
+ progress_callback_ref=progress_callback_ref
1654
+ )
1655
+
1656
+ # Store app reference so worker can access request_confirmation
1657
+ app_ref[0] = app
1658
+
1659
+ result = app.run()
1660
+
1661
+ # Show exit animation if not quiet
1662
+ from .sync_tui import show_exit_animation
1663
+ show_exit_animation()
1664
+
1665
+ worker_exception = app.worker_exception
1666
+
1667
+ # Check for worker exception that might have caused a crash (TUI mode only)
1668
+ if not headless and worker_exception:
1669
+ print(f"\n[Error] Worker thread crashed with exception: {worker_exception}", file=sys.stderr)
1670
+
1671
+ if hasattr(app, 'captured_logs') and app.captured_logs:
1672
+ print("\n[Captured Logs (last 20 lines)]", file=sys.stderr)
1673
+ for line in app.captured_logs[-20:]: # Print last 20 lines
1674
+ print(f" {line}", file=sys.stderr)
1675
+
1676
+ import traceback
1677
+ # Use trace module to print the stored exception's traceback if available
1678
+ if hasattr(worker_exception, '__traceback__'):
1679
+ traceback.print_exception(type(worker_exception), worker_exception, worker_exception.__traceback__, file=sys.stderr)
1680
+
1681
+ if result is None:
1682
+ return {
1683
+ "success": False,
1684
+ "total_cost": current_cost_ref[0],
1685
+ "model_name": "",
1686
+ "error": "Sync process interrupted or returned no result.",
1687
+ "operations_completed": [],
1688
+ "errors": ["App exited without result"]
1689
+ }
1011
1690
 
1012
- return {
1013
- 'success': not errors,
1014
- 'operations_completed': operations_completed,
1015
- 'skipped_operations': skipped_operations,
1016
- 'total_cost': current_cost_ref[0],
1017
- 'total_time': total_time,
1018
- 'final_state': final_state,
1019
- 'errors': errors,
1020
- }
1691
+ return result
1021
1692
 
1022
1693
  if __name__ == '__main__':
1023
- # Example usage of the sync_orchestration module.
1024
- # This simulates running `pdd sync my_calculator` from the command line.
1025
-
1026
- print("--- Running Basic Sync Orchestration Example ---")
1027
-
1028
- # Setup a dummy project structure
1694
+ # Example usage
1029
1695
  Path("./prompts").mkdir(exist_ok=True)
1030
1696
  Path("./src").mkdir(exist_ok=True)
1031
1697
  Path("./examples").mkdir(exist_ok=True)
1032
1698
  Path("./tests").mkdir(exist_ok=True)
1033
1699
  Path("./prompts/my_calculator_python.prompt").write_text("Create a calculator.")
1034
-
1035
- # Ensure PDD meta directory exists for logs and locks
1036
1700
  PDD_DIR.mkdir(exist_ok=True)
1037
1701
  META_DIR.mkdir(exist_ok=True)
1038
-
1039
- result = sync_orchestration(
1040
- basename="my_calculator",
1041
- language="python",
1042
- quiet=True # Suppress mock command output for cleaner example run
1043
- )
1044
-
1045
- print("\n--- Sync Orchestration Finished ---")
1702
+ result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
1046
1703
  print(json.dumps(result, indent=2))
1047
-
1048
- if result['success']:
1049
- print("\n✅ Sync completed successfully.")
1050
- else:
1051
- print(f"\n❌ Sync failed. Errors: {result['errors']}")
1052
-
1053
- print("\n--- Running Sync Log Example ---")
1054
- # This will now show the log from the run we just completed.
1055
- log_result = sync_orchestration(
1056
- basename="my_calculator",
1057
- language="python",
1058
- log=True
1059
- )