pdd-cli 0.0.118__py3-none-any.whl → 0.0.121__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +1 -1
- pdd/agentic_bug_orchestrator.py +15 -6
- pdd/agentic_change_orchestrator.py +18 -7
- pdd/agentic_common.py +68 -40
- pdd/agentic_crash.py +2 -1
- pdd/agentic_e2e_fix_orchestrator.py +165 -9
- pdd/agentic_update.py +2 -1
- pdd/agentic_verify.py +3 -2
- pdd/auto_include.py +51 -0
- pdd/commands/analysis.py +32 -25
- pdd/commands/connect.py +69 -1
- pdd/commands/fix.py +31 -13
- pdd/commands/generate.py +5 -0
- pdd/commands/modify.py +47 -11
- pdd/commands/utility.py +12 -7
- pdd/core/cli.py +17 -4
- pdd/core/dump.py +68 -20
- pdd/fix_main.py +4 -2
- pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
- pdd/frontend/dist/index.html +1 -1
- pdd/llm_invoke.py +82 -12
- pdd/operation_log.py +342 -0
- pdd/postprocess.py +122 -100
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +11 -2
- pdd/prompts/generate_test_LLM.prompt +0 -1
- pdd/prompts/generate_test_from_example_LLM.prompt +251 -0
- pdd/prompts/prompt_code_diff_LLM.prompt +29 -25
- pdd/server/routes/prompts.py +26 -1
- pdd/server/terminal_spawner.py +15 -7
- pdd/sync_orchestration.py +164 -147
- pdd/sync_order.py +304 -0
- pdd/update_main.py +48 -24
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +3 -3
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/RECORD +37 -35
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +0 -449
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.118.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/sync_orchestration.py
CHANGED
|
@@ -27,6 +27,16 @@ MAX_CONSECUTIVE_CRASHES = 3 # Allow up to 3 consecutive crash attempts (Bug #15
|
|
|
27
27
|
|
|
28
28
|
# --- Real PDD Component Imports ---
|
|
29
29
|
from .sync_tui import SyncApp
|
|
30
|
+
from .operation_log import (
|
|
31
|
+
load_operation_log,
|
|
32
|
+
create_log_entry,
|
|
33
|
+
update_log_entry,
|
|
34
|
+
append_log_entry,
|
|
35
|
+
log_event,
|
|
36
|
+
save_fingerprint,
|
|
37
|
+
save_run_report,
|
|
38
|
+
clear_run_report,
|
|
39
|
+
)
|
|
30
40
|
from .sync_determine_operation import (
|
|
31
41
|
sync_determine_operation,
|
|
32
42
|
get_pdd_file_paths,
|
|
@@ -50,7 +60,7 @@ from .fix_main import fix_main
|
|
|
50
60
|
from .update_main import update_main
|
|
51
61
|
from .python_env_detector import detect_host_python_executable
|
|
52
62
|
from .get_run_command import get_run_command_for_file
|
|
53
|
-
from .pytest_output import extract_failing_files_from_output
|
|
63
|
+
from .pytest_output import extract_failing_files_from_output, _find_project_root
|
|
54
64
|
from . import DEFAULT_STRENGTH
|
|
55
65
|
|
|
56
66
|
|
|
@@ -152,69 +162,11 @@ class AtomicStateUpdate:
|
|
|
152
162
|
self._temp_files.clear()
|
|
153
163
|
|
|
154
164
|
|
|
155
|
-
# ---
|
|
165
|
+
# --- State Management Wrappers ---
|
|
156
166
|
|
|
157
|
-
def
|
|
158
|
-
"""Load sync log entries for a basename and language."""
|
|
159
|
-
log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
|
|
160
|
-
if not log_file.exists():
|
|
161
|
-
return []
|
|
162
|
-
try:
|
|
163
|
-
with open(log_file, 'r') as f:
|
|
164
|
-
return [json.loads(line) for line in f if line.strip()]
|
|
165
|
-
except Exception:
|
|
166
|
-
return []
|
|
167
|
-
|
|
168
|
-
def create_sync_log_entry(decision, budget_remaining: float) -> Dict[str, Any]:
|
|
169
|
-
"""Create initial log entry from decision with all fields (actual results set to None initially)."""
|
|
170
|
-
return {
|
|
171
|
-
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
172
|
-
"operation": decision.operation,
|
|
173
|
-
"reason": decision.reason,
|
|
174
|
-
"decision_type": decision.details.get("decision_type", "heuristic") if decision.details else "heuristic",
|
|
175
|
-
"confidence": decision.confidence,
|
|
176
|
-
"estimated_cost": decision.estimated_cost,
|
|
177
|
-
"actual_cost": None,
|
|
178
|
-
"success": None,
|
|
179
|
-
"model": None,
|
|
180
|
-
"duration": None,
|
|
181
|
-
"error": None,
|
|
182
|
-
"details": {
|
|
183
|
-
**(decision.details if decision.details else {}),
|
|
184
|
-
"budget_remaining": budget_remaining
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
def update_sync_log_entry(entry: Dict[str, Any], result: Dict[str, Any], duration: float) -> Dict[str, Any]:
|
|
189
|
-
"""Update log entry with execution results (actual_cost, success, model, duration, error)."""
|
|
190
|
-
entry.update({
|
|
191
|
-
"actual_cost": result.get("cost", 0.0),
|
|
192
|
-
"success": result.get("success", False),
|
|
193
|
-
"model": result.get("model", "unknown"),
|
|
194
|
-
"duration": duration,
|
|
195
|
-
"error": result.get("error") if not result.get("success") else None
|
|
196
|
-
})
|
|
197
|
-
return entry
|
|
198
|
-
|
|
199
|
-
def append_sync_log(basename: str, language: str, entry: Dict[str, Any]):
|
|
200
|
-
"""Append completed log entry to the sync log file."""
|
|
201
|
-
log_file = META_DIR / f"{_safe_basename(basename)}_{language}_sync.log"
|
|
202
|
-
META_DIR.mkdir(parents=True, exist_ok=True)
|
|
203
|
-
with open(log_file, 'a') as f:
|
|
204
|
-
f.write(json.dumps(entry) + '\n')
|
|
205
|
-
|
|
206
|
-
def log_sync_event(basename: str, language: str, event: str, details: Dict[str, Any] = None):
|
|
207
|
-
"""Log a special sync event (lock_acquired, budget_warning, etc.)."""
|
|
208
|
-
entry = {
|
|
209
|
-
"timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
210
|
-
"event": event,
|
|
211
|
-
"details": details or {}
|
|
212
|
-
}
|
|
213
|
-
append_sync_log(basename, language, entry)
|
|
214
|
-
|
|
215
|
-
def save_run_report(report: Dict[str, Any], basename: str, language: str,
|
|
167
|
+
def _save_run_report_atomic(report: Dict[str, Any], basename: str, language: str,
|
|
216
168
|
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
217
|
-
"""Save a run report to the metadata directory.
|
|
169
|
+
"""Save a run report to the metadata directory, supporting atomic updates.
|
|
218
170
|
|
|
219
171
|
Args:
|
|
220
172
|
report: The run report dictionary to save.
|
|
@@ -222,20 +174,18 @@ def save_run_report(report: Dict[str, Any], basename: str, language: str,
|
|
|
222
174
|
language: The programming language.
|
|
223
175
|
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
224
176
|
"""
|
|
225
|
-
report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
|
|
226
177
|
if atomic_state:
|
|
227
178
|
# Buffer for atomic write
|
|
179
|
+
report_file = META_DIR / f"{_safe_basename(basename)}_{language}_run.json"
|
|
228
180
|
atomic_state.set_run_report(report, report_file)
|
|
229
181
|
else:
|
|
230
|
-
#
|
|
231
|
-
|
|
232
|
-
with open(report_file, 'w') as f:
|
|
233
|
-
json.dump(report, f, indent=2, default=str)
|
|
182
|
+
# Direct write using operation_log
|
|
183
|
+
save_run_report(basename, language, report)
|
|
234
184
|
|
|
235
|
-
def
|
|
185
|
+
def _save_fingerprint_atomic(basename: str, language: str, operation: str,
|
|
236
186
|
paths: Dict[str, Path], cost: float, model: str,
|
|
237
187
|
atomic_state: Optional['AtomicStateUpdate'] = None):
|
|
238
|
-
"""Save fingerprint state after successful operation.
|
|
188
|
+
"""Save fingerprint state after successful operation, supporting atomic updates.
|
|
239
189
|
|
|
240
190
|
Args:
|
|
241
191
|
basename: The module basename.
|
|
@@ -246,31 +196,29 @@ def _save_operation_fingerprint(basename: str, language: str, operation: str,
|
|
|
246
196
|
model: The model used.
|
|
247
197
|
atomic_state: Optional AtomicStateUpdate for atomic writes (Issue #159 fix).
|
|
248
198
|
"""
|
|
249
|
-
from datetime import datetime, timezone
|
|
250
|
-
from .sync_determine_operation import calculate_current_hashes, Fingerprint
|
|
251
|
-
from . import __version__
|
|
252
|
-
|
|
253
|
-
current_hashes = calculate_current_hashes(paths)
|
|
254
|
-
fingerprint = Fingerprint(
|
|
255
|
-
pdd_version=__version__,
|
|
256
|
-
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
257
|
-
command=operation,
|
|
258
|
-
prompt_hash=current_hashes.get('prompt_hash'),
|
|
259
|
-
code_hash=current_hashes.get('code_hash'),
|
|
260
|
-
example_hash=current_hashes.get('example_hash'),
|
|
261
|
-
test_hash=current_hashes.get('test_hash'),
|
|
262
|
-
test_files=current_hashes.get('test_files'), # Bug #156
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
|
|
266
199
|
if atomic_state:
|
|
267
200
|
# Buffer for atomic write
|
|
201
|
+
from datetime import datetime, timezone
|
|
202
|
+
from .sync_determine_operation import calculate_current_hashes, Fingerprint
|
|
203
|
+
from . import __version__
|
|
204
|
+
|
|
205
|
+
current_hashes = calculate_current_hashes(paths)
|
|
206
|
+
fingerprint = Fingerprint(
|
|
207
|
+
pdd_version=__version__,
|
|
208
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
209
|
+
command=operation,
|
|
210
|
+
prompt_hash=current_hashes.get('prompt_hash'),
|
|
211
|
+
code_hash=current_hashes.get('code_hash'),
|
|
212
|
+
example_hash=current_hashes.get('example_hash'),
|
|
213
|
+
test_hash=current_hashes.get('test_hash'),
|
|
214
|
+
test_files=current_hashes.get('test_files'), # Bug #156
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
fingerprint_file = META_DIR / f"{_safe_basename(basename)}_{language}.json"
|
|
268
218
|
atomic_state.set_fingerprint(asdict(fingerprint), fingerprint_file)
|
|
269
219
|
else:
|
|
270
|
-
#
|
|
271
|
-
|
|
272
|
-
with open(fingerprint_file, 'w') as f:
|
|
273
|
-
json.dump(asdict(fingerprint), f, indent=2, default=str)
|
|
220
|
+
# Direct write using operation_log
|
|
221
|
+
save_fingerprint(basename, language, operation, paths, cost, model)
|
|
274
222
|
|
|
275
223
|
def _python_cov_target_for_code_file(code_file: Path) -> str:
|
|
276
224
|
"""Return a `pytest-cov` `--cov` target for a Python code file.
|
|
@@ -717,6 +665,10 @@ def _execute_tests_and_create_run_report(
|
|
|
717
665
|
if not cov_target:
|
|
718
666
|
cov_target = basename or module_name
|
|
719
667
|
|
|
668
|
+
# Find project root for proper pytest configuration (Bug fix: infinite fix loop)
|
|
669
|
+
# This matches the logic in pytest_output.py to ensure consistent behavior
|
|
670
|
+
project_root = _find_project_root(test_file)
|
|
671
|
+
|
|
720
672
|
# Bug #156: Run pytest on ALL test files
|
|
721
673
|
pytest_args = [
|
|
722
674
|
python_executable, '-m', 'pytest',
|
|
@@ -726,10 +678,37 @@ def _execute_tests_and_create_run_report(
|
|
|
726
678
|
f'--cov={cov_target}',
|
|
727
679
|
'--cov-report=term-missing'
|
|
728
680
|
]
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
681
|
+
|
|
682
|
+
# Set up project root configuration to prevent parent config interference
|
|
683
|
+
subprocess_cwd = None
|
|
684
|
+
if project_root is not None:
|
|
685
|
+
# Add PYTHONPATH to include project root and src/ directory
|
|
686
|
+
paths_to_add = [str(project_root)]
|
|
687
|
+
src_dir = project_root / "src"
|
|
688
|
+
if src_dir.is_dir():
|
|
689
|
+
paths_to_add.insert(0, str(src_dir))
|
|
690
|
+
existing_pythonpath = clean_env.get("PYTHONPATH", "")
|
|
691
|
+
if existing_pythonpath:
|
|
692
|
+
paths_to_add.append(existing_pythonpath)
|
|
693
|
+
clean_env["PYTHONPATH"] = os.pathsep.join(paths_to_add)
|
|
694
|
+
|
|
695
|
+
# Add --rootdir and -c /dev/null to prevent parent config discovery
|
|
696
|
+
pytest_args.extend([f'--rootdir={project_root}', '-c', '/dev/null'])
|
|
697
|
+
subprocess_cwd = str(project_root)
|
|
698
|
+
|
|
699
|
+
# Build subprocess kwargs - only include cwd if project root was found
|
|
700
|
+
subprocess_kwargs = {
|
|
701
|
+
'capture_output': True,
|
|
702
|
+
'text': True,
|
|
703
|
+
'timeout': 300,
|
|
704
|
+
'stdin': subprocess.DEVNULL,
|
|
705
|
+
'env': clean_env,
|
|
706
|
+
'start_new_session': True,
|
|
707
|
+
}
|
|
708
|
+
if subprocess_cwd is not None:
|
|
709
|
+
subprocess_kwargs['cwd'] = subprocess_cwd
|
|
710
|
+
|
|
711
|
+
result = subprocess.run(pytest_args, **subprocess_kwargs)
|
|
733
712
|
|
|
734
713
|
exit_code = result.returncode
|
|
735
714
|
stdout = result.stdout + (result.stderr or '')
|
|
@@ -750,7 +729,7 @@ def _execute_tests_and_create_run_report(
|
|
|
750
729
|
test_hash=test_hash,
|
|
751
730
|
test_files=test_file_hashes, # Bug #156
|
|
752
731
|
)
|
|
753
|
-
|
|
732
|
+
_save_run_report_atomic(asdict(report), basename, language, atomic_state)
|
|
754
733
|
return report
|
|
755
734
|
|
|
756
735
|
# Run the test command
|
|
@@ -793,7 +772,7 @@ def _execute_tests_and_create_run_report(
|
|
|
793
772
|
test_files=test_file_hashes, # Bug #156
|
|
794
773
|
)
|
|
795
774
|
|
|
796
|
-
|
|
775
|
+
_save_run_report_atomic(asdict(report), basename, language, atomic_state)
|
|
797
776
|
return report
|
|
798
777
|
|
|
799
778
|
def _create_mock_context(**kwargs) -> click.Context:
|
|
@@ -810,7 +789,7 @@ def _display_sync_log(basename: str, language: str, verbose: bool = False) -> Di
|
|
|
810
789
|
print(f"No sync log found for '{basename}' in language '{language}'.")
|
|
811
790
|
return {'success': False, 'errors': ['Log file not found.'], 'log_entries': []}
|
|
812
791
|
|
|
813
|
-
log_entries =
|
|
792
|
+
log_entries = load_operation_log(basename, language)
|
|
814
793
|
print(f"--- Sync Log for {basename} ({language}) ---")
|
|
815
794
|
|
|
816
795
|
if not log_entries:
|
|
@@ -1034,28 +1013,39 @@ def sync_orchestration(
|
|
|
1034
1013
|
|
|
1035
1014
|
try:
|
|
1036
1015
|
with SyncLock(basename, language):
|
|
1037
|
-
|
|
1016
|
+
log_event(basename, language, "lock_acquired", {"pid": os.getpid()}, invocation_mode="sync")
|
|
1038
1017
|
|
|
1039
1018
|
while True:
|
|
1040
1019
|
budget_remaining = budget - current_cost_ref[0]
|
|
1041
1020
|
if current_cost_ref[0] >= budget:
|
|
1042
1021
|
errors.append(f"Budget of ${budget:.2f} exceeded.")
|
|
1043
|
-
|
|
1022
|
+
log_event(basename, language, "budget_exceeded", {
|
|
1044
1023
|
"total_cost": current_cost_ref[0],
|
|
1045
1024
|
"budget": budget
|
|
1046
|
-
})
|
|
1025
|
+
}, invocation_mode="sync")
|
|
1047
1026
|
break
|
|
1048
1027
|
|
|
1049
1028
|
if budget_remaining < budget * 0.2 and budget_remaining > 0:
|
|
1050
|
-
|
|
1029
|
+
log_event(basename, language, "budget_warning", {
|
|
1051
1030
|
"remaining": budget_remaining,
|
|
1052
1031
|
"percentage": (budget_remaining / budget) * 100
|
|
1053
|
-
})
|
|
1032
|
+
}, invocation_mode="sync")
|
|
1054
1033
|
|
|
1055
1034
|
decision = sync_determine_operation(basename, language, target_coverage, budget_remaining, False, prompts_dir, skip_tests, skip_verify, context_override)
|
|
1056
1035
|
operation = decision.operation
|
|
1057
1036
|
|
|
1058
|
-
log_entry =
|
|
1037
|
+
log_entry = create_log_entry(
|
|
1038
|
+
operation=decision.operation,
|
|
1039
|
+
reason=decision.reason,
|
|
1040
|
+
invocation_mode="sync",
|
|
1041
|
+
estimated_cost=decision.estimated_cost,
|
|
1042
|
+
confidence=decision.confidence,
|
|
1043
|
+
decision_type=decision.details.get("decision_type", "heuristic") if decision.details else "heuristic"
|
|
1044
|
+
)
|
|
1045
|
+
if decision.details:
|
|
1046
|
+
log_entry.setdefault('details', {}).update(decision.details)
|
|
1047
|
+
log_entry.setdefault('details', {})['budget_remaining'] = budget_remaining
|
|
1048
|
+
|
|
1059
1049
|
operation_history.append(operation)
|
|
1060
1050
|
|
|
1061
1051
|
# Cycle detection logic
|
|
@@ -1063,7 +1053,7 @@ def sync_orchestration(
|
|
|
1063
1053
|
recent_auto_deps = [op for op in operation_history[-3:] if op == 'auto-deps']
|
|
1064
1054
|
if len(recent_auto_deps) >= 2:
|
|
1065
1055
|
errors.append("Detected auto-deps infinite loop. Force advancing to generate operation.")
|
|
1066
|
-
|
|
1056
|
+
log_event(basename, language, "cycle_detected", {"cycle_type": "auto-deps-infinite"}, invocation_mode="sync")
|
|
1067
1057
|
operation = 'generate'
|
|
1068
1058
|
decision.operation = 'generate' # Update decision too
|
|
1069
1059
|
|
|
@@ -1076,7 +1066,7 @@ def sync_orchestration(
|
|
|
1076
1066
|
recent_ops == ['verify', 'crash', 'verify', 'crash']):
|
|
1077
1067
|
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1078
1068
|
errors.append(f"Detected crash-verify cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1079
|
-
|
|
1069
|
+
log_event(basename, language, "cycle_detected", {"cycle_type": "crash-verify", "count": MAX_CYCLE_REPEATS}, invocation_mode="sync")
|
|
1080
1070
|
break
|
|
1081
1071
|
|
|
1082
1072
|
# Bug #4 fix: Detect test-fix cycle pattern
|
|
@@ -1088,7 +1078,7 @@ def sync_orchestration(
|
|
|
1088
1078
|
recent_ops == ['fix', 'test', 'fix', 'test']):
|
|
1089
1079
|
# Pattern detected - this represents MAX_CYCLE_REPEATS iterations
|
|
1090
1080
|
errors.append(f"Detected test-fix cycle repeated {MAX_CYCLE_REPEATS} times. Breaking cycle.")
|
|
1091
|
-
|
|
1081
|
+
log_event(basename, language, "cycle_detected", {"cycle_type": "test-fix", "count": MAX_CYCLE_REPEATS}, invocation_mode="sync")
|
|
1092
1082
|
break
|
|
1093
1083
|
|
|
1094
1084
|
if operation == 'fix':
|
|
@@ -1130,11 +1120,11 @@ def sync_orchestration(
|
|
|
1130
1120
|
extend_attempts = sum(1 for op in operation_history if op == 'test_extend')
|
|
1131
1121
|
if extend_attempts >= MAX_TEST_EXTEND_ATTEMPTS:
|
|
1132
1122
|
# Accept current coverage after max attempts
|
|
1133
|
-
|
|
1123
|
+
log_event(basename, language, "test_extend_limit", {
|
|
1134
1124
|
"attempts": extend_attempts,
|
|
1135
1125
|
"max_attempts": MAX_TEST_EXTEND_ATTEMPTS,
|
|
1136
1126
|
"reason": "Accepting current coverage after max extend attempts"
|
|
1137
|
-
})
|
|
1127
|
+
}, invocation_mode="sync")
|
|
1138
1128
|
success = True
|
|
1139
1129
|
break
|
|
1140
1130
|
|
|
@@ -1152,32 +1142,32 @@ def sync_orchestration(
|
|
|
1152
1142
|
errors.append(f"Conflict detected: {decision.reason}")
|
|
1153
1143
|
error_msg = decision.reason
|
|
1154
1144
|
|
|
1155
|
-
|
|
1156
|
-
|
|
1145
|
+
update_log_entry(log_entry, success=success, cost=0.0, model='none', duration=0.0, error=error_msg)
|
|
1146
|
+
append_log_entry(basename, language, log_entry)
|
|
1157
1147
|
break
|
|
1158
1148
|
|
|
1159
1149
|
# Handle skips - save fingerprint with 'skip:' prefix to distinguish from actual execution
|
|
1160
1150
|
# Bug #11 fix: Use 'skip:' prefix so _is_workflow_complete() knows the op was skipped
|
|
1161
1151
|
if operation == 'verify' and (skip_verify or skip_tests):
|
|
1162
1152
|
skipped_operations.append('verify')
|
|
1163
|
-
|
|
1164
|
-
|
|
1153
|
+
update_log_entry(log_entry, success=True, cost=0.0, model='skipped', duration=0.0, error=None)
|
|
1154
|
+
append_log_entry(basename, language, log_entry)
|
|
1165
1155
|
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1166
|
-
|
|
1156
|
+
_save_fingerprint_atomic(basename, language, 'skip:verify', pdd_files, 0.0, 'skipped')
|
|
1167
1157
|
continue
|
|
1168
1158
|
if operation == 'test' and skip_tests:
|
|
1169
1159
|
skipped_operations.append('test')
|
|
1170
|
-
|
|
1171
|
-
|
|
1160
|
+
update_log_entry(log_entry, success=True, cost=0.0, model='skipped', duration=0.0, error=None)
|
|
1161
|
+
append_log_entry(basename, language, log_entry)
|
|
1172
1162
|
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1173
|
-
|
|
1163
|
+
_save_fingerprint_atomic(basename, language, 'skip:test', pdd_files, 0.0, 'skipped')
|
|
1174
1164
|
continue
|
|
1175
1165
|
if operation == 'crash' and (skip_tests or skip_verify):
|
|
1176
1166
|
skipped_operations.append('crash')
|
|
1177
|
-
|
|
1178
|
-
|
|
1167
|
+
update_log_entry(log_entry, success=True, cost=0.0, model='skipped', duration=0.0, error=None)
|
|
1168
|
+
append_log_entry(basename, language, log_entry)
|
|
1179
1169
|
# Save fingerprint with 'skip:' prefix to indicate operation was skipped, not executed
|
|
1180
|
-
|
|
1170
|
+
_save_fingerprint_atomic(basename, language, 'skip:crash', pdd_files, 0.0, 'skipped')
|
|
1181
1171
|
# FIX: Create a synthetic run_report to prevent infinite loop when crash is skipped
|
|
1182
1172
|
# Without this, sync_determine_operation keeps returning 'crash' because no run_report exists
|
|
1183
1173
|
current_hashes = calculate_current_hashes(pdd_files)
|
|
@@ -1189,7 +1179,7 @@ def sync_orchestration(
|
|
|
1189
1179
|
coverage=0.0,
|
|
1190
1180
|
test_hash=current_hashes.get('test_hash')
|
|
1191
1181
|
)
|
|
1192
|
-
|
|
1182
|
+
_save_run_report_atomic(asdict(synthetic_report), basename, language)
|
|
1193
1183
|
continue
|
|
1194
1184
|
|
|
1195
1185
|
current_function_name_ref[0] = operation
|
|
@@ -1237,8 +1227,7 @@ def sync_orchestration(
|
|
|
1237
1227
|
# Use absolute paths to avoid path_resolution_mode mismatch between sync (cwd) and generate (config_base)
|
|
1238
1228
|
result = code_generator_main(ctx, prompt_file=str(pdd_files['prompt'].resolve()), output=str(pdd_files['code'].resolve()), original_prompt_file_path=None, force_incremental_flag=False)
|
|
1239
1229
|
# Clear stale run_report so crash/verify is required for newly generated code
|
|
1240
|
-
|
|
1241
|
-
run_report_file.unlink(missing_ok=True)
|
|
1230
|
+
clear_run_report(basename, language)
|
|
1242
1231
|
elif operation == 'example':
|
|
1243
1232
|
# Ensure example directory exists before generating
|
|
1244
1233
|
pdd_files['example'].parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -1263,8 +1252,8 @@ def sync_orchestration(
|
|
|
1263
1252
|
else:
|
|
1264
1253
|
# Manual check - run the example to see if it crashes
|
|
1265
1254
|
env = os.environ.copy()
|
|
1266
|
-
|
|
1267
|
-
env['PYTHONPATH'] = f"{
|
|
1255
|
+
code_dir = pdd_files['code'].resolve().parent
|
|
1256
|
+
env['PYTHONPATH'] = f"{code_dir}:{env.get('PYTHONPATH', '')}"
|
|
1268
1257
|
# Remove TUI-specific env vars that might contaminate subprocess
|
|
1269
1258
|
for var in ['FORCE_COLOR', 'COLUMNS']:
|
|
1270
1259
|
env.pop(var, None)
|
|
@@ -1305,7 +1294,7 @@ def sync_orchestration(
|
|
|
1305
1294
|
coverage=0.0,
|
|
1306
1295
|
test_hash=test_hash
|
|
1307
1296
|
)
|
|
1308
|
-
|
|
1297
|
+
_save_run_report_atomic(asdict(report), basename, language)
|
|
1309
1298
|
skipped_operations.append('crash')
|
|
1310
1299
|
continue
|
|
1311
1300
|
|
|
@@ -1317,7 +1306,7 @@ def sync_orchestration(
|
|
|
1317
1306
|
pdd_files['example']
|
|
1318
1307
|
)
|
|
1319
1308
|
if auto_fixed:
|
|
1320
|
-
|
|
1309
|
+
log_event(basename, language, "auto_fix_attempted", {"message": auto_fix_msg}, invocation_mode="sync")
|
|
1321
1310
|
# Retry running the example after auto-fix
|
|
1322
1311
|
retry_returncode, retry_stdout, retry_stderr = _run_example_with_error_detection(
|
|
1323
1312
|
cmd_parts,
|
|
@@ -1326,7 +1315,7 @@ def sync_orchestration(
|
|
|
1326
1315
|
)
|
|
1327
1316
|
if retry_returncode == 0:
|
|
1328
1317
|
# Auto-fix worked! Save run report and continue
|
|
1329
|
-
|
|
1318
|
+
log_event(basename, language, "auto_fix_success", {"message": auto_fix_msg}, invocation_mode="sync")
|
|
1330
1319
|
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1331
1320
|
report = RunReport(
|
|
1332
1321
|
datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
@@ -1336,7 +1325,7 @@ def sync_orchestration(
|
|
|
1336
1325
|
coverage=0.0,
|
|
1337
1326
|
test_hash=test_hash
|
|
1338
1327
|
)
|
|
1339
|
-
|
|
1328
|
+
_save_run_report_atomic(asdict(report), basename, language)
|
|
1340
1329
|
result = (True, 0.0, 'auto-fix')
|
|
1341
1330
|
success = True
|
|
1342
1331
|
actual_cost = 0.0
|
|
@@ -1366,7 +1355,7 @@ def sync_orchestration(
|
|
|
1366
1355
|
# For non-Python languages, set max_attempts=0 to skip iterative loop
|
|
1367
1356
|
# and go directly to agentic fallback
|
|
1368
1357
|
effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
|
|
1369
|
-
result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1358
|
+
result = fix_verification_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), program_file=str(pdd_files['example']), output_results=f"{basename.replace('/', '_')}_verify_results.log", output_code=str(pdd_files['code']), output_program=str(pdd_files['example']), loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], strength=strength, temperature=temperature)
|
|
1370
1359
|
elif operation == 'test':
|
|
1371
1360
|
pdd_files['test'].parent.mkdir(parents=True, exist_ok=True)
|
|
1372
1361
|
# Use merge=True when test file exists to preserve fixes and append new tests
|
|
@@ -1444,14 +1433,37 @@ def sync_orchestration(
|
|
|
1444
1433
|
# Bug #156: Run pytest on ALL matching test files
|
|
1445
1434
|
test_files = pdd_files.get('test_files', [pdd_files['test']])
|
|
1446
1435
|
pytest_args = [python_executable, '-m', 'pytest'] + [str(f) for f in test_files] + ['-v', '--tb=short']
|
|
1447
|
-
|
|
1448
|
-
#
|
|
1449
|
-
#
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1436
|
+
|
|
1437
|
+
# Bug fix: Find project root for proper pytest configuration
|
|
1438
|
+
# This matches the fix in _execute_tests_and_create_run_report()
|
|
1439
|
+
project_root = _find_project_root(pdd_files['test'])
|
|
1440
|
+
|
|
1441
|
+
# Set up subprocess kwargs
|
|
1442
|
+
subprocess_kwargs = {
|
|
1443
|
+
'capture_output': True,
|
|
1444
|
+
'text': True,
|
|
1445
|
+
'timeout': 300,
|
|
1446
|
+
'stdin': subprocess.DEVNULL,
|
|
1447
|
+
'env': clean_env,
|
|
1448
|
+
'start_new_session': True
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
if project_root is not None:
|
|
1452
|
+
# Add PYTHONPATH to include project root and src/ directory
|
|
1453
|
+
paths_to_add = [str(project_root)]
|
|
1454
|
+
src_dir = project_root / "src"
|
|
1455
|
+
if src_dir.is_dir():
|
|
1456
|
+
paths_to_add.insert(0, str(src_dir))
|
|
1457
|
+
existing_pythonpath = clean_env.get("PYTHONPATH", "")
|
|
1458
|
+
if existing_pythonpath:
|
|
1459
|
+
paths_to_add.append(existing_pythonpath)
|
|
1460
|
+
clean_env["PYTHONPATH"] = os.pathsep.join(paths_to_add)
|
|
1461
|
+
|
|
1462
|
+
# Add --rootdir and -c /dev/null to prevent parent config discovery
|
|
1463
|
+
pytest_args.extend([f'--rootdir={project_root}', '-c', '/dev/null'])
|
|
1464
|
+
subprocess_kwargs['cwd'] = str(project_root)
|
|
1465
|
+
|
|
1466
|
+
test_result = subprocess.run(pytest_args, **subprocess_kwargs)
|
|
1455
1467
|
else:
|
|
1456
1468
|
# Use shell command for non-Python
|
|
1457
1469
|
test_result = subprocess.run(
|
|
@@ -1507,7 +1519,7 @@ def sync_orchestration(
|
|
|
1507
1519
|
# For non-Python languages, set max_attempts=0 to skip iterative loop
|
|
1508
1520
|
# and go directly to agentic fallback
|
|
1509
1521
|
effective_max_attempts = 0 if language.lower() != 'python' else max_attempts
|
|
1510
|
-
result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
|
|
1522
|
+
result = fix_main(ctx, prompt_file=str(pdd_files['prompt']), code_file=str(pdd_files['code']), unit_test_file=unit_test_file_for_fix, error_file=str(error_file_path), output_test=str(pdd_files['test']), output_code=str(pdd_files['code']), output_results=f"{basename.replace('/', '_')}_fix_results.log", loop=True, verification_program=str(pdd_files['example']), max_attempts=effective_max_attempts, budget=budget - current_cost_ref[0], auto_submit=True, strength=strength, temperature=temperature)
|
|
1511
1523
|
elif operation == 'update':
|
|
1512
1524
|
result = update_main(ctx, input_prompt_file=str(pdd_files['prompt']), modified_code_file=str(pdd_files['code']), input_code_file=None, output=str(pdd_files['prompt']), use_git=True, strength=strength, temperature=temperature)
|
|
1513
1525
|
else:
|
|
@@ -1526,8 +1538,12 @@ def sync_orchestration(
|
|
|
1526
1538
|
else:
|
|
1527
1539
|
success = result is not None
|
|
1528
1540
|
|
|
1541
|
+
except click.Abort:
|
|
1542
|
+
errors.append(f"Operation '{operation}' was cancelled (user declined or non-interactive environment)")
|
|
1543
|
+
success = False
|
|
1529
1544
|
except Exception as e:
|
|
1530
|
-
|
|
1545
|
+
error_msg = str(e) if str(e) else type(e).__name__
|
|
1546
|
+
errors.append(f"Exception during '{operation}': {error_msg}")
|
|
1531
1547
|
success = False
|
|
1532
1548
|
|
|
1533
1549
|
# Log update
|
|
@@ -1543,10 +1559,10 @@ def sync_orchestration(
|
|
|
1543
1559
|
model_name = result[-1] if len(result) >= 1 else 'unknown'
|
|
1544
1560
|
last_model_name = str(model_name)
|
|
1545
1561
|
operations_completed.append(operation)
|
|
1546
|
-
|
|
1562
|
+
_save_fingerprint_atomic(basename, language, operation, pdd_files, actual_cost, str(model_name), atomic_state=atomic_state)
|
|
1547
1563
|
|
|
1548
|
-
|
|
1549
|
-
|
|
1564
|
+
update_log_entry(log_entry, success=success, cost=actual_cost, model=model_name, duration=duration, error=errors[-1] if errors and not success else None)
|
|
1565
|
+
append_log_entry(basename, language, log_entry)
|
|
1550
1566
|
|
|
1551
1567
|
# Post-operation checks (simplified)
|
|
1552
1568
|
if success and operation == 'crash':
|
|
@@ -1573,12 +1589,12 @@ def sync_orchestration(
|
|
|
1573
1589
|
# Include test_hash for staleness detection
|
|
1574
1590
|
test_hash = calculate_sha256(pdd_files['test']) if pdd_files['test'].exists() else None
|
|
1575
1591
|
report = RunReport(datetime.datetime.now(datetime.timezone.utc).isoformat(), returncode, 1 if returncode==0 else 0, 0 if returncode==0 else 1, 100.0 if returncode==0 else 0.0, test_hash=test_hash)
|
|
1576
|
-
|
|
1592
|
+
_save_run_report_atomic(asdict(report), basename, language)
|
|
1577
1593
|
except Exception as e:
|
|
1578
1594
|
# Bug #8 fix: Don't silently swallow exceptions - log them and mark as error
|
|
1579
1595
|
error_msg = f"Post-crash verification failed: {e}"
|
|
1580
1596
|
errors.append(error_msg)
|
|
1581
|
-
|
|
1597
|
+
log_event(basename, language, "post_crash_verification_failed", {"error": str(e)}, invocation_mode="sync")
|
|
1582
1598
|
|
|
1583
1599
|
if success and operation == 'fix':
|
|
1584
1600
|
# Re-run tests to update run_report after successful fix
|
|
@@ -1595,7 +1611,8 @@ def sync_orchestration(
|
|
|
1595
1611
|
)
|
|
1596
1612
|
|
|
1597
1613
|
if not success:
|
|
1598
|
-
|
|
1614
|
+
if not errors:
|
|
1615
|
+
errors.append(f"Operation '{operation}' failed.")
|
|
1599
1616
|
break
|
|
1600
1617
|
|
|
1601
1618
|
except BaseException as e:
|
|
@@ -1605,7 +1622,7 @@ def sync_orchestration(
|
|
|
1605
1622
|
traceback.print_exc()
|
|
1606
1623
|
finally:
|
|
1607
1624
|
try:
|
|
1608
|
-
|
|
1625
|
+
log_event(basename, language, "lock_released", {"pid": os.getpid(), "total_cost": current_cost_ref[0]}, invocation_mode="sync")
|
|
1609
1626
|
except: pass
|
|
1610
1627
|
|
|
1611
1628
|
# Return result dict
|
|
@@ -1700,4 +1717,4 @@ if __name__ == '__main__':
|
|
|
1700
1717
|
PDD_DIR.mkdir(exist_ok=True)
|
|
1701
1718
|
META_DIR.mkdir(exist_ok=True)
|
|
1702
1719
|
result = sync_orchestration(basename="my_calculator", language="python", quiet=True)
|
|
1703
|
-
print(json.dumps(result, indent=2))
|
|
1720
|
+
print(json.dumps(result, indent=2))
|