pdd-cli 0.0.118__py3-none-any.whl → 0.0.121__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. pdd/__init__.py +1 -1
  2. pdd/agentic_bug_orchestrator.py +15 -6
  3. pdd/agentic_change_orchestrator.py +18 -7
  4. pdd/agentic_common.py +68 -40
  5. pdd/agentic_crash.py +2 -1
  6. pdd/agentic_e2e_fix_orchestrator.py +165 -9
  7. pdd/agentic_update.py +2 -1
  8. pdd/agentic_verify.py +3 -2
  9. pdd/auto_include.py +51 -0
  10. pdd/commands/analysis.py +32 -25
  11. pdd/commands/connect.py +69 -1
  12. pdd/commands/fix.py +31 -13
  13. pdd/commands/generate.py +5 -0
  14. pdd/commands/modify.py +47 -11
  15. pdd/commands/utility.py +12 -7
  16. pdd/core/cli.py +17 -4
  17. pdd/core/dump.py +68 -20
  18. pdd/fix_main.py +4 -2
  19. pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
  20. pdd/frontend/dist/index.html +1 -1
  21. pdd/llm_invoke.py +82 -12
  22. pdd/operation_log.py +342 -0
  23. pdd/postprocess.py +122 -100
  24. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +11 -2
  25. pdd/prompts/generate_test_LLM.prompt +0 -1
  26. pdd/prompts/generate_test_from_example_LLM.prompt +251 -0
  27. pdd/prompts/prompt_code_diff_LLM.prompt +29 -25
  28. pdd/server/routes/prompts.py +26 -1
  29. pdd/server/terminal_spawner.py +15 -7
  30. pdd/sync_orchestration.py +164 -147
  31. pdd/sync_order.py +304 -0
  32. pdd/update_main.py +48 -24
  33. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +3 -3
  34. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/RECORD +37 -35
  35. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +0 -449
  36. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
  37. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
  38. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
  39. {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py CHANGED
@@ -27,6 +27,16 @@ MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #15
27
27
 
28
28
  # --- Real PDD Component Imports ---
29
29
  from .sync_tui import SyncApp
30
+ from .operation_log import (
31
+ load_operation_log,
32
+ create_log_entry,
33
+ update_log_entry,
34
+ append_log_entry,
35
+ log_event,
36
+ save_fingerprint,
37
+ save_run_report,
38
+ clear_run_report,
39
+ )
30
40
  from .sync_determine_operation import (
31
41
  sync_determine_operation,
32
42
  get_pdd_file_paths,
@@ -50,7 +60,7 @@ from .fix_main import fix_main
50
60
  from .update_main import update_main
51
61
  from .python_env_detector import detect_host_python_executable
52
62
  from .get_run_command import get_run_command_for_file
53
- from .pytest_output import extract_failing_files_from_output
63
+ from .pytest_output import extract_failing_files_from_output, _find_project_root
54
64
  from . import DEFAULT_STRENGTH
55
65
 
56
66
 
@@ -152,69 +162,11 @@ class AtomicStateUpdate:
152
162
  self._temp_files.clear()
153
163
 
154
164
 
155
- # --- Mock Helper Functions ---
165
+ # --- State Management Wrappers ---
156
166
 
157
- def load_sync_log(basename: str, language: str) -> List[Dict[str, Any]]:
158
- """Load sync log entries for a basename and language."""
159
- log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
160
- if not log_file.exists():
161
- return []
162
- try:
163
- with open(log_file, 'r') as f:
164
- return [json.loads(line) for line in f if line.strip()]
165
- except Exception:
166
- return []
167
-
168
- def create_sync_log_entry(decision, budget_remaining: float) -> Dict[str, Any]:
169
- """Create initial log entry from decision with all fields (actual results set to None initially)."""
170
- return {
171
- "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
172
- "operation": decision.operation,
173
- "reason": decision.reason,
174
- "decision_type": decision.details.get("decision_type", "heuristic") if decision.details else "heuristic",
175
- "confidence": decision.confidence,
176
- "estimated_cost": decision.estimated_cost,
177
- "actual_cost": None,
178
- "success": None,
179
- "model": None,
180
- "duration": None,
181
- "error": None,
182
- "details": {
183
- **(decision.details if decision.details else {}),
184
- "budget_remaining": budget_remaining
185
- }
186
- }
187
-
188
- def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duration: float) -> Dict[str, Any]:
189
- """Update log entry with execution results (actual_cost, success, model, duration, error)."""
190
- entry.update({
191
- "actual_cost": result.get("cost", 0.0),
192
- "success": result.get("success", False),
193
- "model": result.get("model", "unknown"),
194
- "duration": duration,
195
- "error": result.get("error") if not result.get("success") else None
196
- })
197
- return entry
198
-
199
- def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
200
- """Append completed log entry to the sync log file."""
201
- log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
202
- META_DIR.mkdir(parents=True, exist_ok=True)
203
- with open(log_file, 'a') as f:
204
- f.write(json.dumps(entry) + '\n')
205
-
206
- def log_sync_event(basename: str, language: str, event: str, details: Dict[str, Any] = None):
207
- """Log a special sync event (lock_acquired, budget_warning, etc.)."""
208
- entry = {
209
- "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
210
- "event": event,
211
- "details": details or {}
212
- }
213
- append_sync_log(basename, language, entry)
214
-
215
- def save_run_report(report: Dict[str, Any], basename: str, language: str,
167
+ def _save_run_report_atomic(report: Dict[str, Any], basename: str, language: str,
216
168
  atomic_state: Optional['AtomicStateUpdate'] = None):
217
- """Save a run report to the metadata directory.
169
+ """Save a run report to the metadata directory, supporting atomic updates.
218
170
 
219
171
  Args:
220
172
  report: The run report dictionary to save.
@@ -222,20 +174,18 @@ def save_run_report(report: Dict[str, Any], basename: str, language: str,
222
174
  language: The programming language.
223
175
  atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
224
176
  """
225
- report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
226
177
  if atomic_state:
227
178
  # Buffer for atomic write
179
+ report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
228
180
  atomic_state.set_run_report(report, report_file)
229
181
  else:
230
- # Legacy direct write
231
- META_DIR.mkdir(parents=True, exist_ok=True)
232
- with open(report_file, 'w') as f:
233
- json.dump(report, f, indent=2, default=str)
182
+ # Direct write using operation_log
183
+ save_run_report(basename, language, report)
234
184
 
235
- def _save_operation_fingerprint(basename: str, language: str, operation: str,
185
+ def _save_fingerprint_atomic(basename: str, language: str, operation: str,
236
186
  paths: Dict[str, Path], cost: float, model: str,
237
187
  atomic_state: Optional['AtomicStateUpdate'] = None):
238
- """Save fingerprint state after successful operation.
188
+ """Save fingerprint state after successful operation, supporting atomic updates.
239
189
 
240
190
  Args:
241
191
  basename: The module basename.
@@ -246,31 +196,29 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
246
196
  model: The model used.
247
197
  atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
248
198
  """
249
- from datetime import datetime, timezone
250
- from .sync_determine_operation import calculate_current_hashes, Fingerprint
251
- from . import __version__
252
-
253
- current_hashes = calculate_current_hashes(paths)
254
- fingerprint = Fingerprint(
255
- pdd_version=__version__,
256
- timestamp=datetime.now(timezone.utc).isoformat(),
257
- command=operation,
258
- prompt_hash=current_hashes.get('prompt_hash'),
259
- code_hash=current_hashes.get('code_hash'),
260
- example_hash=current_hashes.get('example_hash'),
261
- test_hash=current_hashes.get('test_hash'),
262
- test_files=current_hashes.get('test_files'), # Bug #156
263
- )
264
-
265
- fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
266
199
  if atomic_state:
267
200
  # Buffer for atomic write
201
+ from datetime import datetime, timezone
202
+ from .sync_determine_operation import calculate_current_hashes, Fingerprint
203
+ from . import __version__
204
+
205
+ current_hashes = calculate_current_hashes(paths)
206
+ fingerprint = Fingerprint(
207
+ pdd_version=__version__,
208
+ timestamp=datetime.now(timezone.utc).isoformat(),
209
+ command=operation,
210
+ prompt_hash=current_hashes.get('prompt_hash'),
211
+ code_hash=current_hashes.get('code_hash'),
212
+ example_hash=current_hashes.get('example_hash'),
213
+ test_hash=current_hashes.get('test_hash'),
214
+ test_files=current_hashes.get('test_files'), # Bug #156
215
+ )
216
+
217
+ fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
268
218
  atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
269
219
  else:
270
- # Legacy direct write
271
- META_DIR.mkdir(parents=True, exist_ok=True)
272
- with open(fingerprint_file, 'w') as f:
273
- json.dump(asdict(fingerprint), f, indent=2, default=str)
220
+ # Direct write using operation_log
221
+ save_fingerprint(basename, language, operation, paths, cost, model)
274
222
 
275
223
  def _python_cov_target_for_code_file(code_file: Path) -> str:
276
224
  """Return a `pytest-cov` `--cov` target for a Python code file.
@@ -717,6 +665,10 @@ def _execute_tests_and_create_run_report(
717
665
  if not cov_target:
718
666
  cov_target = basename or module_name
719
667
 
668
+ # Find project root for proper pytest configuration (Bug fix: infinite fix loop)
669
+ # This matches the logic in pytest_output.py to ensure consistent behavior
670
+ project_root = _find_project_root(test_file)
671
+
720
672
  # Bug #156: Run pytest on ALL test files
721
673
  pytest_args = [
722
674
  python_executable, '-m', 'pytest',
@@ -726,10 +678,37 @@ def _execute_tests_and_create_run_report(
726
678
  f'--cov={cov_target}',
727
679
  '--cov-report=term-missing'
728
680
  ]
729
- result = subprocess.run(
730
- pytest_args,
731
- capture_output=True, text=True, timeout=300, stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
732
- )
681
+
682
+ # Set up project root configuration to prevent parent config interference
683
+ subprocess_cwd = None
684
+ if project_root is not None:
685
+ # Add PYTHONPATH to include project root and src/ directory
686
+ paths_to_add = [str(project_root)]
687
+ src_dir = project_root / "src"
688
+ if src_dir.is_dir():
689
+ paths_to_add.insert(0, str(src_dir))
690
+ existing_pythonpath = clean_env.get("PYTHONPATH", "")
691
+ if existing_pythonpath:
692
+ paths_to_add.append(existing_pythonpath)
693
+ clean_env["PYTHONPATH"] = os.pathsep.join(paths_to_add)
694
+
695
+ # Add --rootdir and -c /dev/null to prevent parent config discovery
696
+ pytest_args.extend([f'--rootdir={project_root}', '-c', '/dev/null'])
697
+ subprocess_cwd = str(project_root)
698
+
699
+ # Build subprocess kwargs - only include cwd if project root was found
700
+ subprocess_kwargs = {
701
+ 'capture_output': True,
702
+ 'text': True,
703
+ 'timeout': 300,
704
+ 'stdin': subprocess.DEVNULL,
705
+ 'env': clean_env,
706
+ 'start_new_session': True,
707
+ }
708
+ if subprocess_cwd is not None:
709
+ subprocess_kwargs['cwd'] = subprocess_cwd
710
+
711
+ result = subprocess.run(pytest_args, **subprocess_kwargs)
733
712
 
734
713
  exit_code = result.returncode
735
714
  stdout = result.stdout + (result.stderr or '')
@@ -750,7 +729,7 @@ def _execute_tests_and_create_run_report(
750
729
  test_hash=test_hash,
751
730
  test_files=test_file_hashes, # Bug #156
752
731
  )
753
- save_run_report(asdict(report), basename, language, atomic_state)
732
+ _save_run_report_atomic(asdict(report), basename, language, atomic_state)
754
733
  return report
755
734
 
756
735
  # Run the test command
@@ -793,7 +772,7 @@ def _execute_tests_and_create_run_report(
793
772
  test_files=test_file_hashes, # Bug #156
794
773
  )
795
774
 
796
- save_run_report(asdict(report), basename, language, atomic_state)
775
+ _save_run_report_atomic(asdict(report), basename, language, atomic_state)
797
776
  return report
798
777
 
799
778
  def _create_mock_context(**kwargs) -> click.Context:
@@ -810,7 +789,7 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
810
789
  print(f"No sync log found for '{basename}' in language '{language}'.")
811
790
  return {'success': False, 'errors': ['Log file not found.'], 'log_entries': []}
812
791
 
813
- log_entries = load_sync_log(basename, language)
792
+ log_entries = load_operation_log(basename, language)
814
793
  print(f"--- Sync Log for {basename} ({language}) ---")
815
794
 
816
795
  if not log_entries:
@@ -1034,28 +1013,39 @@ def sync_orchestration(
1034
1013
 
1035
1014
  try:
1036
1015
  with SyncLock(basename, language):
1037
- log_sync_event(basename, language, "lock_acquired", {"pid": os.getpid()})
1016
+ log_event(basename, language, "lock_acquired", {"pid": os.getpid()}, invocation_mode="sync")
1038
1017
 
1039
1018
  while True:
1040
1019
  budget_remaining = budget - current_cost_ref[0]
1041
1020
  if current_cost_ref[0] >= budget:
1042
1021
  errors.append(f"Budget of ${budget:.2f} exceeded.")
1043
- log_sync_event(basename, language, "budget_exceeded", {
1022
+ log_event(basename, language, "budget_exceeded", {
1044
1023
  "total_cost": current_cost_ref[0],
1045
1024
  "budget": budget
1046
- })
1025
+ }, invocation_mode="sync")
1047
1026
  break
1048
1027
 
1049
1028
  if budget_remaining < budget * 0.2 and budget_remaining > 0:
1050
- log_sync_event(basename, language, "budget_warning", {
1029
+ log_event(basename, language, "budget_warning", {
1051
1030
  "remaining": budget_remaining,
1052
1031
  "percentage": (budget_remaining / budget) * 100
1053
- })
1032
+ }, invocation_mode="sync")
1054
1033
 
1055
1034
  decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
1056
1035
  operation = decision.operation
1057
1036
 
1058
- log_entry = create_sync_log_entry(decision, budget_remaining)
1037
+ log_entry = create_log_entry(
1038
+ operation=decision.operation,
1039
+ reason=decision.reason,
1040
+ invocation_mode="sync",
1041
+ estimated_cost=decision.estimated_cost,
1042
+ confidence=decision.confidence,
1043
+ decision_type=decision.details.get("decision_type", "heuristic") if decision.details else "heuristic"
1044
+ )
1045
+ if decision.details:
1046
+ log_entry.setdefault('details', {}).update(decision.details)
1047
+ log_entry.setdefault('details', {})['budget_remaining'] = budget_remaining
1048
+
1059
1049
  operation_history.append(operation)
1060
1050
 
1061
1051
  # Cycle detection logic
@@ -1063,7 +1053,7 @@ def sync_orchestration(
1063
1053
  recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
1064
1054
  if len(recent_auto_deps) >= 2:
1065
1055
  errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
1066
- log_sync_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"})
1056
+ log_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"}, invocation_mode="sync")
1067
1057
  operation = 'generate'
1068
1058
  decision.operation = 'generate' # Update decision too
1069
1059
 
@@ -1076,7 +1066,7 @@ def sync_orchestration(
1076
1066
  recent_ops == ['verify', 'crash', 'verify', 'crash']):
1077
1067
  # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
1078
1068
  errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
1079
- log_sync_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS})
1069
+ log_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS}, invocation_mode="sync")
1080
1070
  break
1081
1071
 
1082
1072
  # Bug #4 fix: Detect test-fix cycle pattern
@@ -1088,7 +1078,7 @@ def sync_orchestration(
1088
1078
  recent_ops == ['fix', 'test', 'fix', 'test']):
1089
1079
  # Pattern detected - this represents MAX_CYCLE_REPEATS iterations
1090
1080
  errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
1091
- log_sync_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS})
1081
+ log_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS}, invocation_mode="sync")
1092
1082
  break
1093
1083
 
1094
1084
  if operation == 'fix':
@@ -1130,11 +1120,11 @@ def sync_orchestration(
1130
1120
  extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
1131
1121
  if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
1132
1122
  # Accept current coverage after max attempts
1133
- log_sync_event(basename, language, "test_extend_limit", {
1123
+ log_event(basename, language, "test_extend_limit", {
1134
1124
  "attempts": extend_attempts,
1135
1125
  "max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
1136
1126
  "reason": "Accepting current coverage after max extend attempts"
1137
- })
1127
+ }, invocation_mode="sync")
1138
1128
  success = True
1139
1129
  break
1140
1130
 
@@ -1152,32 +1142,32 @@ def sync_orchestration(
1152
1142
  errors.append(f"Conflict detected: {decision.reason}")
1153
1143
  error_msg = decision.reason
1154
1144
 
1155
- update_sync_log_entry(log_entry, {'success': success, 'cost': 0.0, 'model': 'none', 'error': error_msg}, 0.0)
1156
- append_sync_log(basename, language, log_entry)
1145
+ update_log_entry(log_entry, success=success, cost=0.0, model='none', duration=0.0, error=error_msg)
1146
+ append_log_entry(basename, language, log_entry)
1157
1147
  break
1158
1148
 
1159
1149
  # Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
1160
1150
  # Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
1161
1151
  if operation == 'verify' and (skip_verify or skip_tests):
1162
1152
  skipped_operations.append('verify')
1163
- update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1164
- append_sync_log(basename, language, log_entry)
1153
+ update_log_entry(log_entry, success=True, cost=0.0, model='skipped', duration=0.0, error=None)
1154
+ append_log_entry(basename, language, log_entry)
1165
1155
  # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1166
- _save_operation_fingerprint(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
1156
+ _save_fingerprint_atomic(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
1167
1157
  continue
1168
1158
  if operation == 'test' and skip_tests:
1169
1159
  skipped_operations.append('test')
1170
- update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1171
- append_sync_log(basename, language, log_entry)
1160
+ update_log_entry(log_entry, success=True, cost=0.0, model='skipped', duration=0.0, error=None)
1161
+ append_log_entry(basename, language, log_entry)
1172
1162
  # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1173
- _save_operation_fingerprint(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
1163
+ _save_fingerprint_atomic(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
1174
1164
  continue
1175
1165
  if operation == 'crash' and (skip_tests or skip_verify):
1176
1166
  skipped_operations.append('crash')
1177
- update_sync_log_entry(log_entry, {'success': True, 'cost': 0.0, 'model': 'skipped', 'error': None}, 0.0)
1178
- append_sync_log(basename, language, log_entry)
1167
+ update_log_entry(log_entry, success=True, cost=0.0, model='skipped', duration=0.0, error=None)
1168
+ append_log_entry(basename, language, log_entry)
1179
1169
  # Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
1180
- _save_operation_fingerprint(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
1170
+ _save_fingerprint_atomic(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
1181
1171
  # FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
1182
1172
  # Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
1183
1173
  current_hashes = calculate_current_hashes(pdd_files)
@@ -1189,7 +1179,7 @@ def sync_orchestration(
1189
1179
  coverage=0.0,
1190
1180
  test_hash=current_hashes.get('test_hash')
1191
1181
  )
1192
- save_run_report(asdict(synthetic_report), basename, language)
1182
+ _save_run_report_atomic(asdict(synthetic_report), basename, language)
1193
1183
  continue
1194
1184
 
1195
1185
  current_function_name_ref[0] = operation
@@ -1237,8 +1227,7 @@ def sync_orchestration(
1237
1227
  # Use absolute paths to avoid path_resolution_mode mismatch between sync (cwd) and generate (config_base)
1238
1228
  result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt'].resolve()), output=str(pdd_files['code'].resolve()), original_prompt_file_path=None, force_incremental_flag=False)
1239
1229
  # Clear stale run_report so crash/verify is required for newly generated code
1240
- run_report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
1241
- run_report_file.unlink(missing_ok=True)
1230
+ clear_run_report(basename, language)
1242
1231
  elif operation == 'example':
1243
1232
  # Ensure example directory exists before generating
1244
1233
  pdd_files['example'].parent.mkdir(parents=True, exist_ok=True)
@@ -1263,8 +1252,8 @@ def sync_orchestration(
1263
1252
  else:
1264
1253
  # Manual check - run the example to see if it crashes
1265
1254
  env = os.environ.copy()
1266
- src_dir = Path.cwd() / 'src'
1267
- env['PYTHONPATH'] = f"{src_dir}:{env.get('PYTHONPATH', '')}"
1255
+ code_dir = pdd_files['code'].resolve().parent
1256
+ env['PYTHONPATH'] = f"{code_dir}:{env.get('PYTHONPATH', '')}"
1268
1257
  # Remove TUI-specific env vars that might contaminate subprocess
1269
1258
  for var in ['FORCE_COLOR', 'COLUMNS']:
1270
1259
  env.pop(var, None)
@@ -1305,7 +1294,7 @@ def sync_orchestration(
1305
1294
  coverage=0.0,
1306
1295
  test_hash=test_hash
1307
1296
  )
1308
- save_run_report(asdict(report), basename, language)
1297
+ _save_run_report_atomic(asdict(report), basename, language)
1309
1298
  skipped_operations.append('crash')
1310
1299
  continue
1311
1300
 
@@ -1317,7 +1306,7 @@ def sync_orchestration(
1317
1306
  pdd_files['example']
1318
1307
  )
1319
1308
  if auto_fixed:
1320
- log_sync_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg})
1309
+ log_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg}, invocation_mode="sync")
1321
1310
  # Retry running the example after auto-fix
1322
1311
  retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
1323
1312
  cmd_parts,
@@ -1326,7 +1315,7 @@ def sync_orchestration(
1326
1315
  )
1327
1316
  if retry_returncode == 0:
1328
1317
  # Auto-fix worked! Save run report and continue
1329
- log_sync_event(basename, language, "auto_fix_success", {"message": auto_fix_msg})
1318
+ log_event(basename, language, "auto_fix_success", {"message": auto_fix_msg}, invocation_mode="sync")
1330
1319
  test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1331
1320
  report = RunReport(
1332
1321
  datetime.datetime.now(datetime.timezone.utc).isoformat(),
@@ -1336,7 +1325,7 @@ def sync_orchestration(
1336
1325
  coverage=0.0,
1337
1326
  test_hash=test_hash
1338
1327
  )
1339
- save_run_report(asdict(report), basename, language)
1328
+ _save_run_report_atomic(asdict(report), basename, language)
1340
1329
  result = (True, 0.0, 'auto-fix')
1341
1330
  success = True
1342
1331
  actual_cost = 0.0
@@ -1366,7 +1355,7 @@ def sync_orchestration(
1366
1355
  # For non-Python languages, set max_attempts=0 to skip iterative loop
1367
1356
  # and go directly to agentic fallback
1368
1357
  effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
1369
- result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
1358
+ result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename.replace('/', '_')}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
1370
1359
  elif operation == 'test':
1371
1360
  pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
1372
1361
  # Use merge=True when test file exists to preserve fixes and append new tests
@@ -1444,14 +1433,37 @@ def sync_orchestration(
1444
1433
  # Bug #156: Run pytest on ALL matching test files
1445
1434
  test_files = pdd_files.get('test_files', [pdd_files['test']])
1446
1435
  pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
1447
- # Bug fix: Run from project root (no cwd), matching _run_tests_and_report pattern
1448
- # Using cwd=test.parent with paths like 'backend/tests/test_foo.py' causes
1449
- # pytest to look for 'backend/tests/backend/tests/test_foo.py' (not found)
1450
- test_result = subprocess.run(
1451
- pytest_args,
1452
- capture_output=True, text=True, timeout=300,
1453
- stdin=subprocess.DEVNULL, env=clean_env, start_new_session=True
1454
- )
1436
+
1437
+ # Bug fix: Find project root for proper pytest configuration
1438
+ # This matches the fix in _execute_tests_and_create_run_report()
1439
+ project_root = _find_project_root(pdd_files['test'])
1440
+
1441
+ # Set up subprocess kwargs
1442
+ subprocess_kwargs = {
1443
+ 'capture_output': True,
1444
+ 'text': True,
1445
+ 'timeout': 300,
1446
+ 'stdin': subprocess.DEVNULL,
1447
+ 'env': clean_env,
1448
+ 'start_new_session': True
1449
+ }
1450
+
1451
+ if project_root is not None:
1452
+ # Add PYTHONPATH to include project root and src/ directory
1453
+ paths_to_add = [str(project_root)]
1454
+ src_dir = project_root / "src"
1455
+ if src_dir.is_dir():
1456
+ paths_to_add.insert(0, str(src_dir))
1457
+ existing_pythonpath = clean_env.get("PYTHONPATH", "")
1458
+ if existing_pythonpath:
1459
+ paths_to_add.append(existing_pythonpath)
1460
+ clean_env["PYTHONPATH"] = os.pathsep.join(paths_to_add)
1461
+
1462
+ # Add --rootdir and -c /dev/null to prevent parent config discovery
1463
+ pytest_args.extend([f'--rootdir={project_root}', '-c', '/dev/null'])
1464
+ subprocess_kwargs['cwd'] = str(project_root)
1465
+
1466
+ test_result = subprocess.run(pytest_args, **subprocess_kwargs)
1455
1467
  else:
1456
1468
  # Use shell command for non-Python
1457
1469
  test_result = subprocess.run(
@@ -1507,7 +1519,7 @@ def sync_orchestration(
1507
1519
  # For non-Python languages, set max_attempts=0 to skip iterative loop
1508
1520
  # and go directly to agentic fallback
1509
1521
  effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
1510
- result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
1522
+ result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename.replace('/', '_')}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
1511
1523
  elif operation == 'update':
1512
1524
  result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
1513
1525
  else:
@@ -1526,8 +1538,12 @@ def sync_orchestration(
1526
1538
  else:
1527
1539
  success = result is not None
1528
1540
 
1541
+ except click.Abort:
1542
+ errors.append(f"Operation '{operation}' was cancelled (user declined or non-interactive environment)")
1543
+ success = False
1529
1544
  except Exception as e:
1530
- errors.append(f"Exception during '{operation}': {e}")
1545
+ error_msg = str(e) if str(e) else type(e).__name__
1546
+ errors.append(f"Exception during '{operation}': {error_msg}")
1531
1547
  success = False
1532
1548
 
1533
1549
  # Log update
@@ -1543,10 +1559,10 @@ def sync_orchestration(
1543
1559
  model_name = result[-1] if len(result) >= 1 else 'unknown'
1544
1560
  last_model_name = str(model_name)
1545
1561
  operations_completed.append(operation)
1546
- _save_operation_fingerprint(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
1562
+ _save_fingerprint_atomic(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
1547
1563
 
1548
- update_sync_log_entry(log_entry, {'success': success, 'cost': actual_cost, 'model': model_name, 'error': errors[-1] if errors and not success else None}, duration)
1549
- append_sync_log(basename, language, log_entry)
1564
+ update_log_entry(log_entry, success=success, cost=actual_cost, model=model_name, duration=duration, error=errors[-1] if errors and not success else None)
1565
+ append_log_entry(basename, language, log_entry)
1550
1566
 
1551
1567
  # Post-operation checks (simplified)
1552
1568
  if success and operation == 'crash':
@@ -1573,12 +1589,12 @@ def sync_orchestration(
1573
1589
  # Include test_hash for staleness detection
1574
1590
  test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
1575
1591
  report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
1576
- save_run_report(asdict(report), basename, language)
1592
+ _save_run_report_atomic(asdict(report), basename, language)
1577
1593
  except Exception as e:
1578
1594
  # Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
1579
1595
  error_msg = f"Post-crash verification failed: {e}"
1580
1596
  errors.append(error_msg)
1581
- log_sync_event(basename, language, "post_crash_verification_failed", {"error": str(e)})
1597
+ log_event(basename, language, "post_crash_verification_failed", {"error": str(e)}, invocation_mode="sync")
1582
1598
 
1583
1599
  if success and operation == 'fix':
1584
1600
  # Re-run tests to update run_report after successful fix
@@ -1595,7 +1611,8 @@ def sync_orchestration(
1595
1611
  )
1596
1612
 
1597
1613
  if not success:
1598
- errors.append(f"Operation '{operation}' failed.")
1614
+ if not errors:
1615
+ errors.append(f"Operation '{operation}' failed.")
1599
1616
  break
1600
1617
 
1601
1618
  except BaseException as e:
@@ -1605,7 +1622,7 @@ def sync_orchestration(
1605
1622
  traceback.print_exc()
1606
1623
  finally:
1607
1624
  try:
1608
- log_sync_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]})
1625
+ log_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]}, invocation_mode="sync")
1609
1626
  except: pass
1610
1627
 
1611
1628
  # Return result dict
@@ -1700,4 +1717,4 @@ if __name__ == '__main__':
1700
1717
  PDD_DIR.mkdir(exist_ok=True)
1701
1718
  META_DIR.mkdir(exist_ok=True)
1702
1719
  result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
1703
- print(json.dumps(result, indent=2))
1720
+ print(json.dumps(result, indent=2))