pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +80 -19
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +281 -81
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -62
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +331 -77
- pdd/fix_error_loop.py +209 -60
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +319 -272
- pdd/fix_verification_main.py +57 -17
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +48 -9
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/increase_tests.py +7 -0
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1278 -110
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +98 -101
- pdd/prompts/change_LLM.prompt +1 -3
- pdd/prompts/detect_change_LLM.prompt +562 -3
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -2
- pdd/prompts/insert_includes_LLM.prompt +1181 -6
- pdd/prompts/split_LLM.prompt +1 -62
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/prompts/xml_convertor_LLM.prompt +3246 -7
- pdd/pytest_output.py +188 -21
- pdd/python_env_detector.py +151 -0
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +56 -7
- pdd/sync_determine_operation.py +918 -186
- pdd/sync_main.py +82 -32
- pdd/sync_orchestration.py +1456 -453
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.42.dist-info/RECORD +0 -115
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/sync_determine_operation.py
CHANGED
|
@@ -56,7 +56,8 @@ LOCKS_DIR = get_locks_dir()
|
|
|
56
56
|
|
|
57
57
|
# Export constants for other modules
|
|
58
58
|
__all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
|
|
59
|
-
'sync_determine_operation', 'analyze_conflict_with_llm'
|
|
59
|
+
'sync_determine_operation', 'analyze_conflict_with_llm', 'read_run_report', 'get_pdd_file_paths',
|
|
60
|
+
'_check_example_success_history']
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
@dataclass
|
|
@@ -68,7 +69,8 @@ class Fingerprint:
|
|
|
68
69
|
prompt_hash: Optional[str]
|
|
69
70
|
code_hash: Optional[str]
|
|
70
71
|
example_hash: Optional[str]
|
|
71
|
-
test_hash: Optional[str]
|
|
72
|
+
test_hash: Optional[str] # Keep for backward compat (primary test file)
|
|
73
|
+
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
@dataclass
|
|
@@ -79,17 +81,19 @@ class RunReport:
|
|
|
79
81
|
tests_passed: int
|
|
80
82
|
tests_failed: int
|
|
81
83
|
coverage: float
|
|
84
|
+
test_hash: Optional[str] = None # Hash of test file when tests were run (for staleness detection)
|
|
85
|
+
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
|
|
82
86
|
|
|
83
87
|
|
|
84
88
|
@dataclass
|
|
85
89
|
class SyncDecision:
|
|
86
90
|
"""Represents a decision about what PDD operation to run next."""
|
|
87
|
-
operation: str # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing'
|
|
88
|
-
reason: str
|
|
89
|
-
|
|
90
|
-
estimated_cost: float = 0.0
|
|
91
|
-
|
|
92
|
-
prerequisites: List[str] =
|
|
91
|
+
operation: str # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing', 'all_synced', 'error', 'fail_and_request_manual_merge'
|
|
92
|
+
reason: str # A human-readable explanation for the decision
|
|
93
|
+
confidence: float = 1.0 # Confidence level in the decision, 0.0 to 1.0, default 1.0 for deterministic decisions
|
|
94
|
+
estimated_cost: float = 0.0 # Estimated cost for the operation in dollars, default 0.0
|
|
95
|
+
details: Optional[Dict[str, Any]] = None # Extra context for logging and debugging, default None
|
|
96
|
+
prerequisites: Optional[List[str]] = None # List of operations that should be completed first, default None
|
|
93
97
|
|
|
94
98
|
|
|
95
99
|
class SyncLock:
|
|
@@ -208,51 +212,131 @@ def get_extension(language: str) -> str:
|
|
|
208
212
|
return extensions.get(language.lower(), language.lower())
|
|
209
213
|
|
|
210
214
|
|
|
211
|
-
def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
|
|
215
|
+
def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts", context_override: Optional[str] = None) -> Dict[str, Path]:
|
|
212
216
|
"""Returns a dictionary mapping file types to their expected Path objects."""
|
|
217
|
+
import logging
|
|
218
|
+
logger = logging.getLogger(__name__)
|
|
219
|
+
logger.info(f"get_pdd_file_paths called: basename={basename}, language={language}, prompts_dir={prompts_dir}")
|
|
220
|
+
|
|
213
221
|
try:
|
|
214
222
|
# Use construct_paths to get configuration-aware paths
|
|
215
223
|
prompt_filename = f"{basename}_{language}.prompt"
|
|
216
224
|
prompt_path = str(Path(prompts_dir) / prompt_filename)
|
|
225
|
+
logger.info(f"Checking prompt_path={prompt_path}, exists={Path(prompt_path).exists()}")
|
|
217
226
|
|
|
218
|
-
# Check if prompt file exists - if not, we
|
|
227
|
+
# Check if prompt file exists - if not, we still need configuration-aware paths
|
|
219
228
|
if not Path(prompt_path).exists():
|
|
220
|
-
#
|
|
229
|
+
# Use construct_paths with minimal inputs to get configuration-aware paths
|
|
230
|
+
# even when prompt doesn't exist
|
|
221
231
|
extension = get_extension(language)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
232
|
+
try:
|
|
233
|
+
# Call construct_paths with empty input_file_paths to get configured output paths
|
|
234
|
+
resolved_config, _, output_paths, _ = construct_paths(
|
|
235
|
+
input_file_paths={}, # Empty dict since files don't exist yet
|
|
236
|
+
force=True,
|
|
237
|
+
quiet=True,
|
|
238
|
+
command="sync",
|
|
239
|
+
command_options={"basename": basename, "language": language},
|
|
240
|
+
context_override=context_override
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
import logging
|
|
244
|
+
logger = logging.getLogger(__name__)
|
|
245
|
+
logger.info(f"resolved_config: {resolved_config}")
|
|
246
|
+
logger.info(f"output_paths: {output_paths}")
|
|
247
|
+
|
|
248
|
+
# Extract directory configuration from resolved_config
|
|
249
|
+
# Note: construct_paths sets tests_dir, examples_dir, code_dir keys
|
|
250
|
+
test_dir = resolved_config.get('tests_dir', 'tests/')
|
|
251
|
+
example_dir = resolved_config.get('examples_dir', 'examples/')
|
|
252
|
+
code_dir = resolved_config.get('code_dir', './')
|
|
253
|
+
|
|
254
|
+
logger.info(f"Extracted dirs - test: {test_dir}, example: {example_dir}, code: {code_dir}")
|
|
255
|
+
|
|
256
|
+
# Ensure directories end with /
|
|
257
|
+
if test_dir and not test_dir.endswith('/'):
|
|
258
|
+
test_dir = test_dir + '/'
|
|
259
|
+
if example_dir and not example_dir.endswith('/'):
|
|
260
|
+
example_dir = example_dir + '/'
|
|
261
|
+
if code_dir and not code_dir.endswith('/'):
|
|
262
|
+
code_dir = code_dir + '/'
|
|
263
|
+
|
|
264
|
+
# Construct the full paths
|
|
265
|
+
test_path = f"{test_dir}test_{basename}.{extension}"
|
|
266
|
+
example_path = f"{example_dir}{basename}_example.{extension}"
|
|
267
|
+
code_path = f"{code_dir}{basename}.{extension}"
|
|
268
|
+
|
|
269
|
+
logger.debug(f"Final paths: test={test_path}, example={example_path}, code={code_path}")
|
|
270
|
+
|
|
271
|
+
# Convert to Path objects
|
|
272
|
+
test_path = Path(test_path)
|
|
273
|
+
example_path = Path(example_path)
|
|
274
|
+
code_path = Path(code_path)
|
|
275
|
+
|
|
276
|
+
# Bug #156: Find all matching test files
|
|
277
|
+
test_dir_path = test_path.parent
|
|
278
|
+
test_stem = f"test_{basename}"
|
|
279
|
+
if test_dir_path.exists():
|
|
280
|
+
matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
|
|
281
|
+
else:
|
|
282
|
+
matching_test_files = [test_path] if test_path.exists() else []
|
|
283
|
+
|
|
284
|
+
result = {
|
|
285
|
+
'prompt': Path(prompt_path),
|
|
286
|
+
'code': code_path,
|
|
287
|
+
'example': example_path,
|
|
288
|
+
'test': test_path,
|
|
289
|
+
'test_files': matching_test_files or [test_path] # Bug #156
|
|
290
|
+
}
|
|
291
|
+
logger.debug(f"get_pdd_file_paths returning (prompt missing): test={test_path}")
|
|
292
|
+
return result
|
|
293
|
+
except Exception as e:
|
|
294
|
+
# If construct_paths fails, fall back to current directory paths
|
|
295
|
+
# This maintains backward compatibility
|
|
296
|
+
import logging
|
|
297
|
+
logger = logging.getLogger(__name__)
|
|
298
|
+
logger.debug(f"construct_paths failed for non-existent prompt, using defaults: {e}")
|
|
299
|
+
fallback_test_path = Path(f"test_{basename}.{extension}")
|
|
300
|
+
# Bug #156: Find matching test files even in fallback
|
|
301
|
+
if Path('.').exists():
|
|
302
|
+
fallback_matching = sorted(Path('.').glob(f"test_{basename}*.{extension}"))
|
|
303
|
+
else:
|
|
304
|
+
fallback_matching = [fallback_test_path] if fallback_test_path.exists() else []
|
|
305
|
+
return {
|
|
306
|
+
'prompt': Path(prompt_path),
|
|
307
|
+
'code': Path(f"{basename}.{extension}"),
|
|
308
|
+
'example': Path(f"{basename}_example.{extension}"),
|
|
309
|
+
'test': fallback_test_path,
|
|
310
|
+
'test_files': fallback_matching or [fallback_test_path] # Bug #156
|
|
311
|
+
}
|
|
228
312
|
|
|
229
313
|
input_file_paths = {
|
|
230
314
|
"prompt_file": prompt_path
|
|
231
315
|
}
|
|
232
316
|
|
|
233
|
-
#
|
|
317
|
+
# Call construct_paths to get configuration-aware paths
|
|
234
318
|
resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
|
|
235
319
|
input_file_paths=input_file_paths,
|
|
236
320
|
force=True, # Use force=True to avoid interactive prompts during sync
|
|
237
321
|
quiet=True,
|
|
238
|
-
command="
|
|
239
|
-
command_options={}
|
|
322
|
+
command="sync", # Use sync command to get more tolerant path handling
|
|
323
|
+
command_options={"basename": basename, "language": language},
|
|
324
|
+
context_override=context_override
|
|
240
325
|
)
|
|
241
326
|
|
|
242
|
-
#
|
|
243
|
-
#
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
# For other commands, we need to construct the full set of paths
|
|
249
|
-
|
|
250
|
-
# Get the code file path from output_file_paths
|
|
251
|
-
code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
|
|
327
|
+
# For sync command, output_file_paths contains the configured paths
|
|
328
|
+
# Extract the code path from output_file_paths
|
|
329
|
+
code_path = output_file_paths.get('generate_output_path', '')
|
|
330
|
+
if not code_path:
|
|
331
|
+
# Try other possible keys
|
|
332
|
+
code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
|
|
252
333
|
if not code_path:
|
|
253
|
-
# Fallback to constructing from basename
|
|
334
|
+
# Fallback to constructing from basename with configuration
|
|
254
335
|
extension = get_extension(language)
|
|
255
|
-
|
|
336
|
+
code_dir = resolved_config.get('generate_output_path', './')
|
|
337
|
+
if code_dir and not code_dir.endswith('/'):
|
|
338
|
+
code_dir = code_dir + '/'
|
|
339
|
+
code_path = f"{code_dir}{basename}.{extension}"
|
|
256
340
|
|
|
257
341
|
# Get configured paths for example and test files using construct_paths
|
|
258
342
|
# Note: construct_paths requires files to exist, so we need to handle the case
|
|
@@ -268,18 +352,27 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
|
|
|
268
352
|
|
|
269
353
|
try:
|
|
270
354
|
# Get example path using example command
|
|
355
|
+
# Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
|
|
271
356
|
_, _, example_output_paths, _ = construct_paths(
|
|
272
357
|
input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
|
|
273
|
-
force=True, quiet=True, command="example", command_options={}
|
|
358
|
+
force=True, quiet=True, command="example", command_options={},
|
|
359
|
+
context_override=context_override,
|
|
360
|
+
path_resolution_mode="cwd"
|
|
274
361
|
)
|
|
275
362
|
example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
|
|
276
|
-
|
|
277
|
-
# Get test path using test command
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
363
|
+
|
|
364
|
+
# Get test path using test command - handle case where test file doesn't exist yet
|
|
365
|
+
try:
|
|
366
|
+
_, _, test_output_paths, _ = construct_paths(
|
|
367
|
+
input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
|
|
368
|
+
force=True, quiet=True, command="test", command_options={},
|
|
369
|
+
context_override=context_override,
|
|
370
|
+
path_resolution_mode="cwd"
|
|
371
|
+
)
|
|
372
|
+
test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
|
|
373
|
+
except FileNotFoundError:
|
|
374
|
+
# Test file doesn't exist yet - create default path
|
|
375
|
+
test_path = Path(f"test_{basename}.{get_extension(language)}")
|
|
283
376
|
|
|
284
377
|
finally:
|
|
285
378
|
# Clean up temporary file if we created it
|
|
@@ -297,17 +390,26 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
|
|
|
297
390
|
# Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
|
|
298
391
|
try:
|
|
299
392
|
# Get configured directories by using construct_paths with just the prompt file
|
|
393
|
+
# Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
|
|
300
394
|
_, _, example_output_paths, _ = construct_paths(
|
|
301
395
|
input_file_paths={"prompt_file": prompt_path},
|
|
302
|
-
force=True, quiet=True, command="example", command_options={}
|
|
396
|
+
force=True, quiet=True, command="example", command_options={},
|
|
397
|
+
context_override=context_override,
|
|
398
|
+
path_resolution_mode="cwd"
|
|
303
399
|
)
|
|
304
400
|
example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
_, _, test_output_paths, _ = construct_paths(
|
|
404
|
+
input_file_paths={"prompt_file": prompt_path},
|
|
405
|
+
force=True, quiet=True, command="test", command_options={},
|
|
406
|
+
context_override=context_override,
|
|
407
|
+
path_resolution_mode="cwd"
|
|
408
|
+
)
|
|
409
|
+
test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
|
|
410
|
+
except Exception:
|
|
411
|
+
# If test path construction fails, use default naming
|
|
412
|
+
test_path = Path(f"test_{basename}.{get_extension(language)}")
|
|
311
413
|
|
|
312
414
|
except Exception:
|
|
313
415
|
# Final fallback to deriving from code path if all else fails
|
|
@@ -318,21 +420,47 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
|
|
|
318
420
|
example_path = code_dir / f"{code_stem}_example{code_ext}"
|
|
319
421
|
test_path = code_dir / f"test_{code_stem}{code_ext}"
|
|
320
422
|
|
|
423
|
+
# Ensure all paths are Path objects
|
|
424
|
+
if isinstance(code_path, str):
|
|
425
|
+
code_path = Path(code_path)
|
|
426
|
+
|
|
427
|
+
# Keep paths as they are (absolute or relative as returned by construct_paths)
|
|
428
|
+
# This ensures consistency with how construct_paths expects them
|
|
429
|
+
|
|
430
|
+
# Bug #156: Find all matching test files
|
|
431
|
+
test_dir = test_path.parent
|
|
432
|
+
test_stem = f"test_{basename}"
|
|
433
|
+
extension = get_extension(language)
|
|
434
|
+
if test_dir.exists():
|
|
435
|
+
matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
|
|
436
|
+
else:
|
|
437
|
+
matching_test_files = [test_path] if test_path.exists() else []
|
|
438
|
+
|
|
321
439
|
return {
|
|
322
440
|
'prompt': Path(prompt_path),
|
|
323
|
-
'code':
|
|
441
|
+
'code': code_path,
|
|
324
442
|
'example': example_path,
|
|
325
|
-
'test': test_path
|
|
443
|
+
'test': test_path,
|
|
444
|
+
'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
|
|
326
445
|
}
|
|
327
446
|
|
|
328
447
|
except Exception as e:
|
|
329
448
|
# Fallback to simple naming if construct_paths fails
|
|
330
449
|
extension = get_extension(language)
|
|
450
|
+
test_path = Path(f"test_{basename}.{extension}")
|
|
451
|
+
# Bug #156: Try to find matching test files even in fallback
|
|
452
|
+
test_dir = Path('.')
|
|
453
|
+
test_stem = f"test_{basename}"
|
|
454
|
+
if test_dir.exists():
|
|
455
|
+
matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
|
|
456
|
+
else:
|
|
457
|
+
matching_test_files = [test_path] if test_path.exists() else []
|
|
331
458
|
return {
|
|
332
459
|
'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
|
|
333
460
|
'code': Path(f"{basename}.{extension}"),
|
|
334
461
|
'example': Path(f"{basename}_example.{extension}"),
|
|
335
|
-
'test':
|
|
462
|
+
'test': test_path,
|
|
463
|
+
'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
|
|
336
464
|
}
|
|
337
465
|
|
|
338
466
|
|
|
@@ -371,7 +499,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
|
|
|
371
499
|
prompt_hash=data.get('prompt_hash'),
|
|
372
500
|
code_hash=data.get('code_hash'),
|
|
373
501
|
example_hash=data.get('example_hash'),
|
|
374
|
-
test_hash=data.get('test_hash')
|
|
502
|
+
test_hash=data.get('test_hash'),
|
|
503
|
+
test_files=data.get('test_files') # Bug #156
|
|
375
504
|
)
|
|
376
505
|
except (json.JSONDecodeError, KeyError, IOError):
|
|
377
506
|
return None
|
|
@@ -395,19 +524,29 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
|
|
|
395
524
|
exit_code=data['exit_code'],
|
|
396
525
|
tests_passed=data['tests_passed'],
|
|
397
526
|
tests_failed=data['tests_failed'],
|
|
398
|
-
coverage=data['coverage']
|
|
527
|
+
coverage=data['coverage'],
|
|
528
|
+
test_hash=data.get('test_hash'), # Optional for backward compatibility
|
|
529
|
+
test_files=data.get('test_files') # Bug #156
|
|
399
530
|
)
|
|
400
531
|
except (json.JSONDecodeError, KeyError, IOError):
|
|
401
532
|
return None
|
|
402
533
|
|
|
403
534
|
|
|
404
|
-
def calculate_current_hashes(paths: Dict[str,
|
|
535
|
+
def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
|
|
405
536
|
"""Computes the hashes for all current files on disk."""
|
|
406
537
|
# Return hash keys that match what the fingerprint expects
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
538
|
+
hashes = {}
|
|
539
|
+
for file_type, file_path in paths.items():
|
|
540
|
+
if file_type == 'test_files':
|
|
541
|
+
# Bug #156: Calculate hashes for all test files
|
|
542
|
+
hashes['test_files'] = {
|
|
543
|
+
f.name: calculate_sha256(f)
|
|
544
|
+
for f in file_path
|
|
545
|
+
if isinstance(f, Path) and f.exists()
|
|
546
|
+
}
|
|
547
|
+
elif isinstance(file_path, Path):
|
|
548
|
+
hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
|
|
549
|
+
return hashes
|
|
411
550
|
|
|
412
551
|
|
|
413
552
|
def get_git_diff(file_path: Path) -> str:
|
|
@@ -428,6 +567,27 @@ def get_git_diff(file_path: Path) -> str:
|
|
|
428
567
|
return ""
|
|
429
568
|
|
|
430
569
|
|
|
570
|
+
def estimate_operation_cost(operation: str, language: str = "python") -> float:
|
|
571
|
+
"""Returns estimated cost in dollars for each operation based on typical LLM usage."""
|
|
572
|
+
cost_map = {
|
|
573
|
+
'auto-deps': 0.10,
|
|
574
|
+
'generate': 0.50,
|
|
575
|
+
'example': 0.30,
|
|
576
|
+
'crash': 0.40,
|
|
577
|
+
'verify': 0.35,
|
|
578
|
+
'test': 0.60,
|
|
579
|
+
'test_extend': 0.60, # Same cost as test - generates additional tests
|
|
580
|
+
'fix': 0.45,
|
|
581
|
+
'update': 0.25,
|
|
582
|
+
'analyze_conflict': 0.20,
|
|
583
|
+
'nothing': 0.0,
|
|
584
|
+
'all_synced': 0.0,
|
|
585
|
+
'error': 0.0,
|
|
586
|
+
'fail_and_request_manual_merge': 0.0
|
|
587
|
+
}
|
|
588
|
+
return cost_map.get(operation, 0.0)
|
|
589
|
+
|
|
590
|
+
|
|
431
591
|
def validate_expected_files(fingerprint: Optional[Fingerprint], paths: Dict[str, Path]) -> Dict[str, bool]:
|
|
432
592
|
"""
|
|
433
593
|
Validate that files expected to exist based on fingerprint actually exist.
|
|
@@ -492,17 +652,27 @@ def _handle_missing_expected_files(
|
|
|
492
652
|
return SyncDecision(
|
|
493
653
|
operation='auto-deps',
|
|
494
654
|
reason='Code file missing, prompt has dependencies - regenerate from auto-deps',
|
|
495
|
-
|
|
496
|
-
estimated_cost=
|
|
497
|
-
|
|
655
|
+
confidence=1.0,
|
|
656
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
657
|
+
details={
|
|
658
|
+
'decision_type': 'heuristic',
|
|
659
|
+
'missing_files': missing_files,
|
|
660
|
+
'prompt_path': str(paths['prompt']),
|
|
661
|
+
'has_dependencies': True
|
|
662
|
+
}
|
|
498
663
|
)
|
|
499
664
|
else:
|
|
500
665
|
return SyncDecision(
|
|
501
666
|
operation='generate',
|
|
502
667
|
reason='Code file missing - regenerate from prompt',
|
|
503
|
-
|
|
504
|
-
estimated_cost=
|
|
505
|
-
|
|
668
|
+
confidence=1.0,
|
|
669
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
670
|
+
details={
|
|
671
|
+
'decision_type': 'heuristic',
|
|
672
|
+
'missing_files': missing_files,
|
|
673
|
+
'prompt_path': str(paths['prompt']),
|
|
674
|
+
'has_dependencies': False
|
|
675
|
+
}
|
|
506
676
|
)
|
|
507
677
|
|
|
508
678
|
elif 'example' in missing_files and paths['code'].exists():
|
|
@@ -510,9 +680,13 @@ def _handle_missing_expected_files(
|
|
|
510
680
|
return SyncDecision(
|
|
511
681
|
operation='example',
|
|
512
682
|
reason='Example file missing - regenerate example',
|
|
513
|
-
|
|
514
|
-
estimated_cost=
|
|
515
|
-
|
|
683
|
+
confidence=1.0,
|
|
684
|
+
estimated_cost=estimate_operation_cost('example'),
|
|
685
|
+
details={
|
|
686
|
+
'decision_type': 'heuristic',
|
|
687
|
+
'missing_files': missing_files,
|
|
688
|
+
'code_path': str(paths['code'])
|
|
689
|
+
}
|
|
516
690
|
)
|
|
517
691
|
|
|
518
692
|
elif 'test' in missing_files and paths['code'].exists() and paths['example'].exists():
|
|
@@ -522,47 +696,137 @@ def _handle_missing_expected_files(
|
|
|
522
696
|
return SyncDecision(
|
|
523
697
|
operation='nothing',
|
|
524
698
|
reason='Test file missing but --skip-tests specified - workflow complete',
|
|
525
|
-
|
|
526
|
-
estimated_cost=
|
|
527
|
-
|
|
699
|
+
confidence=1.0,
|
|
700
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
701
|
+
details={
|
|
702
|
+
'decision_type': 'heuristic',
|
|
703
|
+
'missing_files': missing_files,
|
|
704
|
+
'skip_tests': True
|
|
705
|
+
}
|
|
528
706
|
)
|
|
529
707
|
else:
|
|
530
708
|
return SyncDecision(
|
|
531
709
|
operation='test',
|
|
532
710
|
reason='Test file missing - regenerate tests',
|
|
533
|
-
|
|
534
|
-
estimated_cost=
|
|
535
|
-
|
|
711
|
+
confidence=1.0,
|
|
712
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
713
|
+
details={
|
|
714
|
+
'decision_type': 'heuristic',
|
|
715
|
+
'missing_files': missing_files,
|
|
716
|
+
'code_path': str(paths['code'])
|
|
717
|
+
}
|
|
536
718
|
)
|
|
537
719
|
|
|
538
720
|
# Fallback - regenerate everything
|
|
539
721
|
return SyncDecision(
|
|
540
722
|
operation='generate',
|
|
541
723
|
reason='Multiple files missing - regenerate from prompt',
|
|
542
|
-
|
|
543
|
-
estimated_cost=
|
|
544
|
-
|
|
724
|
+
confidence=1.0,
|
|
725
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
726
|
+
details={
|
|
727
|
+
'decision_type': 'heuristic',
|
|
728
|
+
'missing_files': missing_files
|
|
729
|
+
}
|
|
545
730
|
)
|
|
546
731
|
|
|
547
732
|
|
|
548
|
-
def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False
|
|
733
|
+
def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False,
|
|
734
|
+
basename: str = None, language: str = None) -> bool:
|
|
549
735
|
"""
|
|
550
736
|
Check if workflow is complete considering skip flags.
|
|
551
|
-
|
|
737
|
+
|
|
552
738
|
Args:
|
|
553
739
|
paths: Dict mapping file types to their expected Path objects
|
|
554
740
|
skip_tests: If True, test files are not required for completion
|
|
555
741
|
skip_verify: If True, verification operations are not required
|
|
556
|
-
|
|
742
|
+
basename: Module basename (required for run_report check)
|
|
743
|
+
language: Module language (required for run_report check)
|
|
744
|
+
|
|
557
745
|
Returns:
|
|
558
|
-
True if all required files exist
|
|
746
|
+
True if all required files exist AND have been validated (run_report exists)
|
|
559
747
|
"""
|
|
560
748
|
required_files = ['code', 'example']
|
|
561
|
-
|
|
749
|
+
|
|
562
750
|
if not skip_tests:
|
|
563
751
|
required_files.append('test')
|
|
564
|
-
|
|
565
|
-
|
|
752
|
+
|
|
753
|
+
# Check all required files exist
|
|
754
|
+
if not all(paths[f].exists() for f in required_files):
|
|
755
|
+
return False
|
|
756
|
+
|
|
757
|
+
# Also check that run_report exists and code works (exit_code == 0)
|
|
758
|
+
# Without this, newly generated code would incorrectly be marked as "complete"
|
|
759
|
+
if basename and language:
|
|
760
|
+
run_report = read_run_report(basename, language)
|
|
761
|
+
if not run_report or run_report.exit_code != 0:
|
|
762
|
+
return False
|
|
763
|
+
|
|
764
|
+
# Check that run_report corresponds to current test files (staleness detection)
|
|
765
|
+
# If any test file changed since run_report was created, we can't trust the results
|
|
766
|
+
if not skip_tests:
|
|
767
|
+
# Bug #156: Check ALL test files, not just the primary one
|
|
768
|
+
if 'test_files' in paths and run_report.test_files:
|
|
769
|
+
# New multi-file comparison
|
|
770
|
+
current_test_hashes = {
|
|
771
|
+
f.name: calculate_sha256(f)
|
|
772
|
+
for f in paths['test_files']
|
|
773
|
+
if f.exists()
|
|
774
|
+
}
|
|
775
|
+
stored_test_hashes = run_report.test_files
|
|
776
|
+
|
|
777
|
+
# Check if any test file changed or new ones added/removed
|
|
778
|
+
if set(current_test_hashes.keys()) != set(stored_test_hashes.keys()):
|
|
779
|
+
return False # Test files added or removed
|
|
780
|
+
|
|
781
|
+
for fname, current_hash in current_test_hashes.items():
|
|
782
|
+
if stored_test_hashes.get(fname) != current_hash:
|
|
783
|
+
return False # Test file content changed
|
|
784
|
+
elif 'test' in paths and paths['test'].exists():
|
|
785
|
+
# Backward compat: single file check
|
|
786
|
+
current_test_hash = calculate_sha256(paths['test'])
|
|
787
|
+
if run_report.test_hash and current_test_hash != run_report.test_hash:
|
|
788
|
+
# run_report was created for a different version of the test file
|
|
789
|
+
return False
|
|
790
|
+
if not run_report.test_hash:
|
|
791
|
+
# Legacy run_report without test_hash - check fingerprint timestamp as fallback
|
|
792
|
+
fingerprint = read_fingerprint(basename, language)
|
|
793
|
+
if fingerprint:
|
|
794
|
+
# If fingerprint is newer than run_report, run_report might be stale
|
|
795
|
+
from datetime import datetime
|
|
796
|
+
try:
|
|
797
|
+
fp_time = datetime.fromisoformat(fingerprint.timestamp.replace('Z', '+00:00'))
|
|
798
|
+
rr_time = datetime.fromisoformat(run_report.timestamp.replace('Z', '+00:00'))
|
|
799
|
+
if fp_time > rr_time:
|
|
800
|
+
return False # run_report predates fingerprint, might be stale
|
|
801
|
+
except (ValueError, AttributeError):
|
|
802
|
+
pass # If timestamps can't be parsed, skip this check
|
|
803
|
+
|
|
804
|
+
# Check verify has been done (unless skip_verify)
|
|
805
|
+
# Without this, workflow would be "complete" after crash even though verify hasn't run
|
|
806
|
+
# Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
|
|
807
|
+
if not skip_verify:
|
|
808
|
+
fingerprint = read_fingerprint(basename, language)
|
|
809
|
+
if fingerprint:
|
|
810
|
+
# If command starts with 'skip:', the operation was skipped, not completed
|
|
811
|
+
if fingerprint.command.startswith('skip:'):
|
|
812
|
+
return False
|
|
813
|
+
if fingerprint.command not in ['verify', 'test', 'fix', 'update']:
|
|
814
|
+
return False
|
|
815
|
+
|
|
816
|
+
# CRITICAL FIX: Check tests have been run (unless skip_tests)
|
|
817
|
+
# Without this, workflow would be "complete" after verify even though tests haven't run
|
|
818
|
+
# This prevents false positive success when skip_verify=True but tests are still required
|
|
819
|
+
# Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
|
|
820
|
+
if not skip_tests:
|
|
821
|
+
fp = read_fingerprint(basename, language)
|
|
822
|
+
if fp:
|
|
823
|
+
# If command starts with 'skip:', the operation was skipped, not completed
|
|
824
|
+
if fp.command.startswith('skip:'):
|
|
825
|
+
return False
|
|
826
|
+
if fp.command not in ['test', 'fix', 'update']:
|
|
827
|
+
return False
|
|
828
|
+
|
|
829
|
+
return True
|
|
566
830
|
|
|
567
831
|
|
|
568
832
|
def check_for_dependencies(prompt_content: str) -> bool:
|
|
@@ -594,7 +858,60 @@ def check_for_dependencies(prompt_content: str) -> bool:
|
|
|
594
858
|
return has_xml_deps or has_explicit_deps
|
|
595
859
|
|
|
596
860
|
|
|
597
|
-
def
|
|
861
|
+
def _check_example_success_history(basename: str, language: str) -> bool:
|
|
862
|
+
"""
|
|
863
|
+
Check if the example has run successfully before by examining historical fingerprints and run reports.
|
|
864
|
+
|
|
865
|
+
Args:
|
|
866
|
+
basename: The base name for the PDD unit
|
|
867
|
+
language: The programming language
|
|
868
|
+
|
|
869
|
+
Returns:
|
|
870
|
+
True if the example has run successfully before, False otherwise
|
|
871
|
+
"""
|
|
872
|
+
meta_dir = get_meta_dir()
|
|
873
|
+
|
|
874
|
+
# Strategy 1: Check if there's a fingerprint with 'verify' command (indicates successful example run)
|
|
875
|
+
# Cache fingerprint and run report to avoid redundant I/O operations
|
|
876
|
+
fingerprint = read_fingerprint(basename, language)
|
|
877
|
+
current_run_report = read_run_report(basename, language)
|
|
878
|
+
|
|
879
|
+
# Strategy 1: Check if there's a fingerprint with 'verify' command (indicates successful example run)
|
|
880
|
+
if fingerprint and fingerprint.command == 'verify':
|
|
881
|
+
return True
|
|
882
|
+
|
|
883
|
+
# Strategy 2: Check current run report for successful runs (exit_code == 0)
|
|
884
|
+
# Note: We check the current run report for successful history since it's updated
|
|
885
|
+
# This allows for a simple check of recent success
|
|
886
|
+
if current_run_report and current_run_report.exit_code == 0:
|
|
887
|
+
return True
|
|
888
|
+
|
|
889
|
+
# Strategy 2b: Look for historical run reports with exit_code == 0
|
|
890
|
+
# Check all run report files in the meta directory that match the pattern
|
|
891
|
+
run_report_pattern = f"{basename}_{language}_run"
|
|
892
|
+
for file in meta_dir.glob(f"{run_report_pattern}*.json"):
|
|
893
|
+
try:
|
|
894
|
+
with open(file, 'r') as f:
|
|
895
|
+
data = json.load(f)
|
|
896
|
+
|
|
897
|
+
# If we find any historical run with exit_code == 0, the example has run successfully
|
|
898
|
+
if data.get('exit_code') == 0:
|
|
899
|
+
return True
|
|
900
|
+
except (json.JSONDecodeError, KeyError, IOError):
|
|
901
|
+
continue
|
|
902
|
+
|
|
903
|
+
# Strategy 3: Check if fingerprint has example_hash and was created after successful operations
|
|
904
|
+
# Commands that indicate example was working: 'example', 'verify', 'test', 'fix'
|
|
905
|
+
if fingerprint and fingerprint.example_hash:
|
|
906
|
+
successful_commands = {'example', 'verify', 'test', 'fix'}
|
|
907
|
+
if fingerprint.command in successful_commands:
|
|
908
|
+
# If the fingerprint was created after these commands, the example likely worked
|
|
909
|
+
return True
|
|
910
|
+
|
|
911
|
+
return False
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
|
|
598
915
|
"""
|
|
599
916
|
Core decision-making function for sync operations with skip flag awareness.
|
|
600
917
|
|
|
@@ -614,14 +931,14 @@ def sync_determine_operation(basename: str, language: str, target_coverage: floa
|
|
|
614
931
|
|
|
615
932
|
if log_mode:
|
|
616
933
|
# Skip locking for read-only analysis
|
|
617
|
-
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
|
|
934
|
+
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
|
|
618
935
|
else:
|
|
619
936
|
# Normal exclusive locking for actual operations
|
|
620
937
|
with SyncLock(basename, language) as lock:
|
|
621
|
-
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
|
|
938
|
+
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
|
|
622
939
|
|
|
623
940
|
|
|
624
|
-
def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
|
|
941
|
+
def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
|
|
625
942
|
"""
|
|
626
943
|
Perform the sync state analysis without locking concerns.
|
|
627
944
|
|
|
@@ -650,37 +967,149 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
650
967
|
|
|
651
968
|
# Read fingerprint early since we need it for crash verification
|
|
652
969
|
fingerprint = read_fingerprint(basename, language)
|
|
653
|
-
|
|
970
|
+
|
|
971
|
+
# Check if auto-deps just completed - ALWAYS regenerate code after auto-deps
|
|
972
|
+
# This must be checked early, before any run_report processing, because:
|
|
973
|
+
# 1. Old run_report (if exists) is stale and should be ignored
|
|
974
|
+
# 2. auto-deps updates dependencies but doesn't regenerate code
|
|
975
|
+
if fingerprint and fingerprint.command == 'auto-deps':
|
|
976
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
977
|
+
return SyncDecision(
|
|
978
|
+
operation='generate',
|
|
979
|
+
reason='Auto-deps completed - regenerate code with updated prompt',
|
|
980
|
+
confidence=0.90,
|
|
981
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
982
|
+
details={
|
|
983
|
+
'decision_type': 'heuristic',
|
|
984
|
+
'previous_command': 'auto-deps',
|
|
985
|
+
'code_exists': paths['code'].exists() if paths.get('code') else False,
|
|
986
|
+
'regenerate_after_autodeps': True
|
|
987
|
+
}
|
|
988
|
+
)
|
|
989
|
+
|
|
654
990
|
run_report = read_run_report(basename, language)
|
|
655
|
-
if
|
|
656
|
-
|
|
657
|
-
|
|
991
|
+
# Only process runtime signals (crash/fix/test) if we have a fingerprint
|
|
992
|
+
# Without a fingerprint, run_report is stale/orphaned and should be ignored
|
|
993
|
+
if run_report and fingerprint:
|
|
994
|
+
# Check for prompt changes FIRST - prompt changes take priority over runtime signals
|
|
995
|
+
# If the user modified the prompt, we need to regenerate regardless of runtime state
|
|
996
|
+
if fingerprint:
|
|
997
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
998
|
+
current_prompt_hash = calculate_sha256(paths['prompt'])
|
|
999
|
+
if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
|
|
1000
|
+
prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
|
|
1001
|
+
has_deps = check_for_dependencies(prompt_content)
|
|
1002
|
+
return SyncDecision(
|
|
1003
|
+
operation='auto-deps' if has_deps else 'generate',
|
|
1004
|
+
reason='Prompt changed - regenerating (takes priority over runtime signals)',
|
|
1005
|
+
confidence=0.95,
|
|
1006
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1007
|
+
details={
|
|
1008
|
+
'decision_type': 'heuristic',
|
|
1009
|
+
'prompt_changed': True,
|
|
1010
|
+
'previous_command': fingerprint.command,
|
|
1011
|
+
'runtime_state_ignored': True
|
|
1012
|
+
}
|
|
1013
|
+
)
|
|
1014
|
+
|
|
1015
|
+
# Check if we just completed a crash operation and need verification FIRST
|
|
1016
|
+
# This takes priority over test failures because we need to verify the crash fix worked
|
|
1017
|
+
# BUT only proceed to verify if exit_code == 0 (crash fix succeeded)
|
|
1018
|
+
if fingerprint and fingerprint.command == 'crash' and not skip_verify:
|
|
1019
|
+
if run_report.exit_code != 0:
|
|
1020
|
+
# Crash fix didn't work - need to re-run crash
|
|
1021
|
+
return SyncDecision(
|
|
1022
|
+
operation='crash',
|
|
1023
|
+
reason=f'Previous crash operation failed (exit_code={run_report.exit_code}) - retry crash fix',
|
|
1024
|
+
confidence=0.90,
|
|
1025
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1026
|
+
details={
|
|
1027
|
+
'decision_type': 'heuristic',
|
|
1028
|
+
'previous_command': 'crash',
|
|
1029
|
+
'exit_code': run_report.exit_code,
|
|
1030
|
+
'workflow_stage': 'crash_retry'
|
|
1031
|
+
}
|
|
1032
|
+
)
|
|
658
1033
|
return SyncDecision(
|
|
659
|
-
operation='
|
|
660
|
-
reason=
|
|
661
|
-
|
|
662
|
-
estimated_cost=
|
|
663
|
-
|
|
1034
|
+
operation='verify',
|
|
1035
|
+
reason='Previous crash operation completed - verify example runs correctly',
|
|
1036
|
+
confidence=0.90,
|
|
1037
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1038
|
+
details={
|
|
1039
|
+
'decision_type': 'heuristic',
|
|
1040
|
+
'previous_command': 'crash',
|
|
1041
|
+
'current_exit_code': run_report.exit_code,
|
|
1042
|
+
'fingerprint_command': fingerprint.command
|
|
1043
|
+
}
|
|
664
1044
|
)
|
|
665
1045
|
|
|
1046
|
+
# Check test failures (after crash verification check)
|
|
1047
|
+
if run_report.tests_failed > 0:
|
|
1048
|
+
# First check if the test file actually exists
|
|
1049
|
+
pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
1050
|
+
test_file = pdd_files.get('test')
|
|
1051
|
+
|
|
1052
|
+
# Only suggest 'fix' if test file exists
|
|
1053
|
+
if test_file and test_file.exists():
|
|
1054
|
+
return SyncDecision(
|
|
1055
|
+
operation='fix',
|
|
1056
|
+
reason=f'Test failures detected: {run_report.tests_failed} failed tests',
|
|
1057
|
+
confidence=0.90,
|
|
1058
|
+
estimated_cost=estimate_operation_cost('fix'),
|
|
1059
|
+
details={
|
|
1060
|
+
'decision_type': 'heuristic',
|
|
1061
|
+
'tests_failed': run_report.tests_failed,
|
|
1062
|
+
'exit_code': run_report.exit_code,
|
|
1063
|
+
'coverage': run_report.coverage
|
|
1064
|
+
}
|
|
1065
|
+
)
|
|
1066
|
+
# If test file doesn't exist but we have test failures in run report,
|
|
1067
|
+
# we need to generate the test first
|
|
1068
|
+
else:
|
|
1069
|
+
return SyncDecision(
|
|
1070
|
+
operation='test',
|
|
1071
|
+
reason='Test failures reported but test file missing - need to generate tests',
|
|
1072
|
+
confidence=0.85,
|
|
1073
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1074
|
+
details={
|
|
1075
|
+
'decision_type': 'heuristic',
|
|
1076
|
+
'run_report_shows_failures': True,
|
|
1077
|
+
'test_file_exists': False
|
|
1078
|
+
}
|
|
1079
|
+
)
|
|
1080
|
+
|
|
666
1081
|
# Then check for runtime crashes (only if no test failures)
|
|
667
1082
|
if run_report.exit_code != 0:
|
|
668
|
-
#
|
|
669
|
-
|
|
1083
|
+
# Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
|
|
1084
|
+
has_example_run_successfully = _check_example_success_history(basename, language)
|
|
1085
|
+
|
|
1086
|
+
if has_example_run_successfully:
|
|
670
1087
|
return SyncDecision(
|
|
671
|
-
operation='
|
|
672
|
-
reason='
|
|
673
|
-
|
|
674
|
-
estimated_cost=
|
|
675
|
-
|
|
1088
|
+
operation='fix',
|
|
1089
|
+
reason='Runtime error detected but example has run successfully before - prefer fix over crash',
|
|
1090
|
+
confidence=0.90,
|
|
1091
|
+
estimated_cost=estimate_operation_cost('fix'),
|
|
1092
|
+
details={
|
|
1093
|
+
'decision_type': 'heuristic',
|
|
1094
|
+
'exit_code': run_report.exit_code,
|
|
1095
|
+
'timestamp': run_report.timestamp,
|
|
1096
|
+
'example_success_history': True,
|
|
1097
|
+
'decision_rationale': 'prefer_fix_over_crash'
|
|
1098
|
+
}
|
|
676
1099
|
)
|
|
677
1100
|
else:
|
|
678
1101
|
return SyncDecision(
|
|
679
1102
|
operation='crash',
|
|
680
|
-
reason='Runtime error detected in last run',
|
|
681
|
-
|
|
682
|
-
estimated_cost=
|
|
683
|
-
|
|
1103
|
+
reason='Runtime error detected in last run - no successful example history',
|
|
1104
|
+
confidence=0.95,
|
|
1105
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1106
|
+
details={
|
|
1107
|
+
'decision_type': 'heuristic',
|
|
1108
|
+
'exit_code': run_report.exit_code,
|
|
1109
|
+
'timestamp': run_report.timestamp,
|
|
1110
|
+
'example_success_history': False,
|
|
1111
|
+
'decision_rationale': 'crash_without_history'
|
|
1112
|
+
}
|
|
684
1113
|
)
|
|
685
1114
|
|
|
686
1115
|
if run_report.coverage < target_coverage:
|
|
@@ -690,21 +1119,50 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
690
1119
|
return SyncDecision(
|
|
691
1120
|
operation='all_synced',
|
|
692
1121
|
reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% but tests skipped',
|
|
693
|
-
|
|
694
|
-
estimated_cost=
|
|
695
|
-
|
|
1122
|
+
confidence=0.90,
|
|
1123
|
+
estimated_cost=estimate_operation_cost('all_synced'),
|
|
1124
|
+
details={
|
|
1125
|
+
'decision_type': 'heuristic',
|
|
1126
|
+
'current_coverage': run_report.coverage,
|
|
1127
|
+
'target_coverage': target_coverage,
|
|
1128
|
+
'tests_skipped': True,
|
|
1129
|
+
'skip_tests': True
|
|
1130
|
+
}
|
|
1131
|
+
)
|
|
1132
|
+
elif run_report.tests_failed == 0 and run_report.tests_passed > 0:
|
|
1133
|
+
# Tests pass but coverage is below target
|
|
1134
|
+
# Return 'test_extend' to signal we need to ADD more tests, not regenerate
|
|
1135
|
+
return SyncDecision(
|
|
1136
|
+
operation='test_extend',
|
|
1137
|
+
reason=f'Tests pass ({run_report.tests_passed} passed) but coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% - extending tests',
|
|
1138
|
+
confidence=0.85,
|
|
1139
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1140
|
+
details={
|
|
1141
|
+
'decision_type': 'heuristic',
|
|
1142
|
+
'current_coverage': run_report.coverage,
|
|
1143
|
+
'target_coverage': target_coverage,
|
|
1144
|
+
'tests_passed': run_report.tests_passed,
|
|
1145
|
+
'tests_failed': run_report.tests_failed,
|
|
1146
|
+
'extend_tests': True
|
|
1147
|
+
}
|
|
696
1148
|
)
|
|
697
1149
|
else:
|
|
698
1150
|
return SyncDecision(
|
|
699
1151
|
operation='test',
|
|
700
1152
|
reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}%',
|
|
701
|
-
|
|
702
|
-
estimated_cost=
|
|
703
|
-
|
|
1153
|
+
confidence=0.85,
|
|
1154
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1155
|
+
details={
|
|
1156
|
+
'decision_type': 'heuristic',
|
|
1157
|
+
'current_coverage': run_report.coverage,
|
|
1158
|
+
'target_coverage': target_coverage,
|
|
1159
|
+
'tests_passed': run_report.tests_passed,
|
|
1160
|
+
'tests_failed': run_report.tests_failed
|
|
1161
|
+
}
|
|
704
1162
|
)
|
|
705
1163
|
|
|
706
1164
|
# 2. Analyze File State
|
|
707
|
-
paths = get_pdd_file_paths(basename, language, prompts_dir)
|
|
1165
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
708
1166
|
current_hashes = calculate_current_hashes(paths)
|
|
709
1167
|
|
|
710
1168
|
# 3. Implement the Decision Tree
|
|
@@ -716,25 +1174,39 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
716
1174
|
return SyncDecision(
|
|
717
1175
|
operation='auto-deps',
|
|
718
1176
|
reason='New prompt with dependencies detected',
|
|
719
|
-
|
|
720
|
-
estimated_cost=
|
|
721
|
-
|
|
1177
|
+
confidence=0.80,
|
|
1178
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
1179
|
+
details={
|
|
1180
|
+
'decision_type': 'heuristic',
|
|
1181
|
+
'prompt_path': str(paths['prompt']),
|
|
1182
|
+
'fingerprint_found': False,
|
|
1183
|
+
'has_dependencies': True
|
|
1184
|
+
}
|
|
722
1185
|
)
|
|
723
1186
|
else:
|
|
724
1187
|
return SyncDecision(
|
|
725
1188
|
operation='generate',
|
|
726
1189
|
reason='New prompt ready for code generation',
|
|
727
|
-
|
|
728
|
-
estimated_cost=
|
|
729
|
-
|
|
1190
|
+
confidence=0.90,
|
|
1191
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1192
|
+
details={
|
|
1193
|
+
'decision_type': 'heuristic',
|
|
1194
|
+
'prompt_path': str(paths['prompt']),
|
|
1195
|
+
'fingerprint_found': False,
|
|
1196
|
+
'has_dependencies': False
|
|
1197
|
+
}
|
|
730
1198
|
)
|
|
731
1199
|
else:
|
|
732
1200
|
return SyncDecision(
|
|
733
1201
|
operation='nothing',
|
|
734
1202
|
reason='No prompt file and no history - nothing to do',
|
|
735
|
-
|
|
736
|
-
estimated_cost=
|
|
737
|
-
|
|
1203
|
+
confidence=1.0,
|
|
1204
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
1205
|
+
details={
|
|
1206
|
+
'decision_type': 'heuristic',
|
|
1207
|
+
'prompt_exists': False,
|
|
1208
|
+
'fingerprint_found': False
|
|
1209
|
+
}
|
|
738
1210
|
)
|
|
739
1211
|
|
|
740
1212
|
# CRITICAL FIX: Validate expected files exist before hash comparison
|
|
@@ -767,54 +1239,203 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
767
1239
|
|
|
768
1240
|
if not changes:
|
|
769
1241
|
# No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
|
|
770
|
-
if _is_workflow_complete(paths, skip_tests, skip_verify):
|
|
1242
|
+
if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
|
|
771
1243
|
return SyncDecision(
|
|
772
1244
|
operation='nothing',
|
|
773
1245
|
reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
|
|
774
|
-
|
|
775
|
-
estimated_cost=
|
|
776
|
-
|
|
1246
|
+
confidence=1.0,
|
|
1247
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
1248
|
+
details={
|
|
1249
|
+
'decision_type': 'heuristic',
|
|
1250
|
+
'skip_tests': skip_tests,
|
|
1251
|
+
'skip_verify': skip_verify,
|
|
1252
|
+
'workflow_complete': True
|
|
1253
|
+
}
|
|
777
1254
|
)
|
|
778
|
-
|
|
1255
|
+
|
|
1256
|
+
# Handle incomplete workflow when all files exist (including test)
|
|
1257
|
+
# This addresses the blind spot where crash/verify/test logic only runs when test is missing
|
|
1258
|
+
if (paths['code'].exists() and paths['example'].exists() and paths['test'].exists()):
|
|
1259
|
+
run_report = read_run_report(basename, language)
|
|
1260
|
+
|
|
1261
|
+
# BUG 4 & 1: No run_report OR crash detected (exit_code != 0)
|
|
1262
|
+
if not run_report or run_report.exit_code != 0:
|
|
1263
|
+
return SyncDecision(
|
|
1264
|
+
operation='crash',
|
|
1265
|
+
reason='All files exist but needs validation' +
|
|
1266
|
+
(' - no run_report' if not run_report else f' - exit_code={run_report.exit_code}'),
|
|
1267
|
+
confidence=0.85,
|
|
1268
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1269
|
+
details={
|
|
1270
|
+
'decision_type': 'heuristic',
|
|
1271
|
+
'all_files_exist': True,
|
|
1272
|
+
'run_report_missing': not run_report,
|
|
1273
|
+
'exit_code': None if not run_report else run_report.exit_code,
|
|
1274
|
+
'workflow_stage': 'post_regeneration_validation'
|
|
1275
|
+
}
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
# BUG 2: Verify not run yet (run_report exists, exit_code=0, but command != verify/test)
|
|
1279
|
+
if fingerprint and fingerprint.command not in ['verify', 'test', 'fix', 'update'] and not skip_verify:
|
|
1280
|
+
return SyncDecision(
|
|
1281
|
+
operation='verify',
|
|
1282
|
+
reason='All files exist but verification not completed',
|
|
1283
|
+
confidence=0.85,
|
|
1284
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1285
|
+
details={
|
|
1286
|
+
'decision_type': 'heuristic',
|
|
1287
|
+
'all_files_exist': True,
|
|
1288
|
+
'last_command': fingerprint.command,
|
|
1289
|
+
'workflow_stage': 'verification_pending'
|
|
1290
|
+
}
|
|
1291
|
+
)
|
|
1292
|
+
|
|
1293
|
+
# Stale run_report detected: _is_workflow_complete returned False but all other conditions passed
|
|
1294
|
+
# This happens when run_report.test_hash doesn't match current test file, or
|
|
1295
|
+
# when fingerprint timestamp > run_report timestamp (legacy detection)
|
|
1296
|
+
# Need to re-run tests to get accurate results
|
|
1297
|
+
if run_report and run_report.exit_code == 0:
|
|
1298
|
+
return SyncDecision(
|
|
1299
|
+
operation='test',
|
|
1300
|
+
reason='Run report is stale - need to re-run tests to verify current state',
|
|
1301
|
+
confidence=0.9,
|
|
1302
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1303
|
+
details={
|
|
1304
|
+
'decision_type': 'heuristic',
|
|
1305
|
+
'all_files_exist': True,
|
|
1306
|
+
'run_report_stale': True,
|
|
1307
|
+
'run_report_test_hash': run_report.test_hash,
|
|
1308
|
+
'workflow_stage': 'revalidation'
|
|
1309
|
+
}
|
|
1310
|
+
)
|
|
1311
|
+
|
|
779
1312
|
# Progress workflow considering skip flags
|
|
780
1313
|
if paths['code'].exists() and not paths['example'].exists():
|
|
781
1314
|
return SyncDecision(
|
|
782
1315
|
operation='example',
|
|
783
1316
|
reason='Code exists but example missing - progress workflow',
|
|
784
|
-
|
|
785
|
-
estimated_cost=
|
|
786
|
-
|
|
1317
|
+
confidence=0.85,
|
|
1318
|
+
estimated_cost=estimate_operation_cost('example'),
|
|
1319
|
+
details={
|
|
1320
|
+
'decision_type': 'heuristic',
|
|
1321
|
+
'code_path': str(paths['code']),
|
|
1322
|
+
'code_exists': True,
|
|
1323
|
+
'example_exists': False
|
|
1324
|
+
}
|
|
787
1325
|
)
|
|
788
1326
|
|
|
789
1327
|
if (paths['code'].exists() and paths['example'].exists() and
|
|
790
1328
|
not skip_tests and not paths['test'].exists()):
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
1329
|
+
|
|
1330
|
+
# Check if example has been crash-tested and verified before allowing test generation
|
|
1331
|
+
run_report = read_run_report(basename, language)
|
|
1332
|
+
if not run_report and not skip_verify:
|
|
1333
|
+
# No run report exists - need to test the example first
|
|
1334
|
+
# But if skip_verify is True, skip crash/verify and go to test generation
|
|
1335
|
+
return SyncDecision(
|
|
1336
|
+
operation='crash',
|
|
1337
|
+
reason='Example exists but needs runtime testing before test generation',
|
|
1338
|
+
confidence=0.85,
|
|
1339
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1340
|
+
details={
|
|
1341
|
+
'decision_type': 'heuristic',
|
|
1342
|
+
'code_path': str(paths['code']),
|
|
1343
|
+
'example_path': str(paths['example']),
|
|
1344
|
+
'no_run_report': True,
|
|
1345
|
+
'workflow_stage': 'crash_validation'
|
|
1346
|
+
}
|
|
1347
|
+
)
|
|
1348
|
+
elif run_report and run_report.exit_code != 0 and not skip_verify:
|
|
1349
|
+
# Example crashed - fix it before proceeding
|
|
1350
|
+
# But if skip_verify is True, skip crash fix and proceed
|
|
1351
|
+
return SyncDecision(
|
|
1352
|
+
operation='crash',
|
|
1353
|
+
reason='Example crashes - fix runtime errors before test generation',
|
|
1354
|
+
confidence=0.90,
|
|
1355
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1356
|
+
details={
|
|
1357
|
+
'decision_type': 'heuristic',
|
|
1358
|
+
'exit_code': run_report.exit_code,
|
|
1359
|
+
'workflow_stage': 'crash_fix'
|
|
1360
|
+
}
|
|
1361
|
+
)
|
|
1362
|
+
elif fingerprint and fingerprint.command != 'verify' and not skip_verify:
|
|
1363
|
+
# Example runs but hasn't been verified yet
|
|
1364
|
+
return SyncDecision(
|
|
1365
|
+
operation='verify',
|
|
1366
|
+
reason='Example runs but needs verification before test generation',
|
|
1367
|
+
confidence=0.85,
|
|
1368
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1369
|
+
details={
|
|
1370
|
+
'decision_type': 'heuristic',
|
|
1371
|
+
'exit_code': run_report.exit_code,
|
|
1372
|
+
'last_command': fingerprint.command,
|
|
1373
|
+
'workflow_stage': 'verify_validation'
|
|
1374
|
+
}
|
|
1375
|
+
)
|
|
1376
|
+
else:
|
|
1377
|
+
# Example runs and is verified (or verify is skipped) - now safe to generate tests
|
|
1378
|
+
return SyncDecision(
|
|
1379
|
+
operation='test',
|
|
1380
|
+
reason='Example validated - ready for test generation',
|
|
1381
|
+
confidence=0.85,
|
|
1382
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1383
|
+
details={
|
|
1384
|
+
'decision_type': 'heuristic',
|
|
1385
|
+
'code_path': str(paths['code']),
|
|
1386
|
+
'example_path': str(paths['example']),
|
|
1387
|
+
'code_exists': True,
|
|
1388
|
+
'example_exists': True,
|
|
1389
|
+
'test_exists': False,
|
|
1390
|
+
'workflow_stage': 'test_generation'
|
|
1391
|
+
}
|
|
1392
|
+
)
|
|
798
1393
|
|
|
799
1394
|
# Some files are missing but no changes detected
|
|
800
1395
|
if not paths['code'].exists():
|
|
801
1396
|
if paths['prompt'].exists():
|
|
1397
|
+
# CRITICAL FIX: Check if auto-deps was just completed to prevent infinite loop
|
|
1398
|
+
if fingerprint and fingerprint.command == 'auto-deps':
|
|
1399
|
+
return SyncDecision(
|
|
1400
|
+
operation='generate',
|
|
1401
|
+
reason='Auto-deps completed, now generate missing code file',
|
|
1402
|
+
confidence=0.90,
|
|
1403
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1404
|
+
details={
|
|
1405
|
+
'decision_type': 'heuristic',
|
|
1406
|
+
'prompt_path': str(paths['prompt']),
|
|
1407
|
+
'code_exists': False,
|
|
1408
|
+
'auto_deps_completed': True,
|
|
1409
|
+
'previous_command': fingerprint.command
|
|
1410
|
+
}
|
|
1411
|
+
)
|
|
1412
|
+
|
|
802
1413
|
prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
|
|
803
1414
|
if check_for_dependencies(prompt_content):
|
|
804
1415
|
return SyncDecision(
|
|
805
1416
|
operation='auto-deps',
|
|
806
1417
|
reason='Missing code file, prompt has dependencies',
|
|
807
|
-
|
|
808
|
-
estimated_cost=
|
|
809
|
-
|
|
1418
|
+
confidence=0.80,
|
|
1419
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
1420
|
+
details={
|
|
1421
|
+
'decision_type': 'heuristic',
|
|
1422
|
+
'prompt_path': str(paths['prompt']),
|
|
1423
|
+
'code_exists': False,
|
|
1424
|
+
'has_dependencies': True
|
|
1425
|
+
}
|
|
810
1426
|
)
|
|
811
1427
|
else:
|
|
812
1428
|
return SyncDecision(
|
|
813
1429
|
operation='generate',
|
|
814
1430
|
reason='Missing code file - generate from prompt',
|
|
815
|
-
|
|
816
|
-
estimated_cost=
|
|
817
|
-
|
|
1431
|
+
confidence=0.90,
|
|
1432
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1433
|
+
details={
|
|
1434
|
+
'decision_type': 'heuristic',
|
|
1435
|
+
'prompt_path': str(paths['prompt']),
|
|
1436
|
+
'code_exists': False,
|
|
1437
|
+
'has_dependencies': False
|
|
1438
|
+
}
|
|
818
1439
|
)
|
|
819
1440
|
|
|
820
1441
|
elif len(changes) == 1:
|
|
@@ -827,67 +1448,156 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
827
1448
|
return SyncDecision(
|
|
828
1449
|
operation='auto-deps',
|
|
829
1450
|
reason='Prompt changed and dependencies need updating',
|
|
830
|
-
|
|
831
|
-
estimated_cost=
|
|
832
|
-
|
|
1451
|
+
confidence=0.85,
|
|
1452
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
1453
|
+
details={
|
|
1454
|
+
'decision_type': 'heuristic',
|
|
1455
|
+
'changed_file': 'prompt',
|
|
1456
|
+
'has_dependencies': True,
|
|
1457
|
+
'prompt_changed': True
|
|
1458
|
+
}
|
|
833
1459
|
)
|
|
834
1460
|
else:
|
|
835
1461
|
return SyncDecision(
|
|
836
1462
|
operation='generate',
|
|
837
1463
|
reason='Prompt changed - regenerate code',
|
|
838
|
-
|
|
839
|
-
estimated_cost=
|
|
840
|
-
|
|
1464
|
+
confidence=0.90,
|
|
1465
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1466
|
+
details={
|
|
1467
|
+
'decision_type': 'heuristic',
|
|
1468
|
+
'changed_file': 'prompt',
|
|
1469
|
+
'has_dependencies': False,
|
|
1470
|
+
'prompt_changed': True
|
|
1471
|
+
}
|
|
841
1472
|
)
|
|
842
1473
|
|
|
843
1474
|
elif change == 'code':
|
|
844
1475
|
return SyncDecision(
|
|
845
1476
|
operation='update',
|
|
846
1477
|
reason='Code changed - update prompt to reflect changes',
|
|
847
|
-
|
|
848
|
-
estimated_cost=
|
|
849
|
-
|
|
1478
|
+
confidence=0.85,
|
|
1479
|
+
estimated_cost=estimate_operation_cost('update'),
|
|
1480
|
+
details={
|
|
1481
|
+
'decision_type': 'heuristic',
|
|
1482
|
+
'changed_file': 'code',
|
|
1483
|
+
'code_changed': True
|
|
1484
|
+
}
|
|
850
1485
|
)
|
|
851
1486
|
|
|
852
1487
|
elif change == 'test':
|
|
853
1488
|
return SyncDecision(
|
|
854
1489
|
operation='test',
|
|
855
1490
|
reason='Test changed - run new tests',
|
|
856
|
-
|
|
857
|
-
estimated_cost=
|
|
858
|
-
|
|
1491
|
+
confidence=0.80,
|
|
1492
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1493
|
+
details={
|
|
1494
|
+
'decision_type': 'heuristic',
|
|
1495
|
+
'changed_file': 'test',
|
|
1496
|
+
'test_changed': True
|
|
1497
|
+
}
|
|
859
1498
|
)
|
|
860
1499
|
|
|
861
1500
|
elif change == 'example':
|
|
862
1501
|
return SyncDecision(
|
|
863
1502
|
operation='verify',
|
|
864
1503
|
reason='Example changed - verify new example',
|
|
865
|
-
|
|
866
|
-
estimated_cost=
|
|
867
|
-
|
|
1504
|
+
confidence=0.80,
|
|
1505
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1506
|
+
details={
|
|
1507
|
+
'decision_type': 'heuristic',
|
|
1508
|
+
'changed_file': 'example',
|
|
1509
|
+
'example_changed': True
|
|
1510
|
+
}
|
|
868
1511
|
)
|
|
869
1512
|
|
|
870
1513
|
else:
|
|
871
|
-
# Complex Changes (Multiple Files Modified
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
1514
|
+
# Complex Changes (Multiple Files Modified)
|
|
1515
|
+
# CRITICAL: Only treat as conflict if prompt changed along with derived artifacts
|
|
1516
|
+
# If only derived artifacts changed (code, example, test), this is NOT a conflict
|
|
1517
|
+
# per PDD doctrine - all are derived from the unchanged prompt
|
|
1518
|
+
|
|
1519
|
+
if 'prompt' in changes:
|
|
1520
|
+
# True conflict: prompt (source of truth) changed along with derived artifacts
|
|
1521
|
+
return SyncDecision(
|
|
1522
|
+
operation='analyze_conflict',
|
|
1523
|
+
reason='Prompt and derived files changed - requires conflict analysis',
|
|
1524
|
+
confidence=0.70,
|
|
1525
|
+
estimated_cost=estimate_operation_cost('analyze_conflict'),
|
|
1526
|
+
details={
|
|
1527
|
+
'decision_type': 'heuristic',
|
|
1528
|
+
'changed_files': changes,
|
|
1529
|
+
'num_changes': len(changes),
|
|
1530
|
+
'prompt_changed': True
|
|
1531
|
+
}
|
|
1532
|
+
)
|
|
1533
|
+
else:
|
|
1534
|
+
# Only derived artifacts changed - prompt (source of truth) is unchanged
|
|
1535
|
+
# Continue workflow from where it was interrupted
|
|
1536
|
+
|
|
1537
|
+
# If code changed, need to re-verify
|
|
1538
|
+
if 'code' in changes:
|
|
1539
|
+
return SyncDecision(
|
|
1540
|
+
operation='verify',
|
|
1541
|
+
reason='Derived files changed (prompt unchanged) - verify code works',
|
|
1542
|
+
confidence=0.85,
|
|
1543
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1544
|
+
details={
|
|
1545
|
+
'decision_type': 'heuristic',
|
|
1546
|
+
'changed_files': changes,
|
|
1547
|
+
'num_changes': len(changes),
|
|
1548
|
+
'prompt_changed': False,
|
|
1549
|
+
'workflow_stage': 'continue_after_interruption'
|
|
1550
|
+
}
|
|
1551
|
+
)
|
|
1552
|
+
# If only example/test changed
|
|
1553
|
+
elif 'example' in changes:
|
|
1554
|
+
return SyncDecision(
|
|
1555
|
+
operation='verify',
|
|
1556
|
+
reason='Example changed (prompt unchanged) - verify example runs',
|
|
1557
|
+
confidence=0.85,
|
|
1558
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1559
|
+
details={
|
|
1560
|
+
'decision_type': 'heuristic',
|
|
1561
|
+
'changed_files': changes,
|
|
1562
|
+
'prompt_changed': False
|
|
1563
|
+
}
|
|
1564
|
+
)
|
|
1565
|
+
elif 'test' in changes:
|
|
1566
|
+
return SyncDecision(
|
|
1567
|
+
operation='test',
|
|
1568
|
+
reason='Test changed (prompt unchanged) - run tests',
|
|
1569
|
+
confidence=0.85,
|
|
1570
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1571
|
+
details={
|
|
1572
|
+
'decision_type': 'heuristic',
|
|
1573
|
+
'changed_files': changes,
|
|
1574
|
+
'prompt_changed': False
|
|
1575
|
+
}
|
|
1576
|
+
)
|
|
879
1577
|
|
|
880
1578
|
# Fallback - should not reach here normally
|
|
881
1579
|
return SyncDecision(
|
|
882
1580
|
operation='nothing',
|
|
883
1581
|
reason='No clear operation determined',
|
|
884
|
-
|
|
885
|
-
estimated_cost=
|
|
886
|
-
|
|
1582
|
+
confidence=0.50,
|
|
1583
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
1584
|
+
details={
|
|
1585
|
+
'decision_type': 'heuristic',
|
|
1586
|
+
'fingerprint_exists': fingerprint is not None,
|
|
1587
|
+
'changes': changes,
|
|
1588
|
+
'fallback': True
|
|
1589
|
+
}
|
|
887
1590
|
)
|
|
888
1591
|
|
|
889
1592
|
|
|
890
|
-
def analyze_conflict_with_llm(
|
|
1593
|
+
def analyze_conflict_with_llm(
|
|
1594
|
+
basename: str,
|
|
1595
|
+
language: str,
|
|
1596
|
+
fingerprint: Fingerprint,
|
|
1597
|
+
changed_files: List[str],
|
|
1598
|
+
prompts_dir: str = "prompts",
|
|
1599
|
+
context_override: Optional[str] = None,
|
|
1600
|
+
) -> SyncDecision:
|
|
891
1601
|
"""
|
|
892
1602
|
Resolve complex sync conflicts using an LLM.
|
|
893
1603
|
|
|
@@ -910,13 +1620,17 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
910
1620
|
return SyncDecision(
|
|
911
1621
|
operation='fail_and_request_manual_merge',
|
|
912
1622
|
reason='LLM analysis template not found - manual merge required',
|
|
913
|
-
|
|
914
|
-
estimated_cost=
|
|
915
|
-
|
|
1623
|
+
confidence=0.0,
|
|
1624
|
+
estimated_cost=estimate_operation_cost('fail_and_request_manual_merge'),
|
|
1625
|
+
details={
|
|
1626
|
+
'decision_type': 'llm',
|
|
1627
|
+
'error': 'Template not available',
|
|
1628
|
+
'changed_files': changed_files
|
|
1629
|
+
}
|
|
916
1630
|
)
|
|
917
1631
|
|
|
918
1632
|
# 2. Gather file paths and diffs
|
|
919
|
-
paths = get_pdd_file_paths(basename, language, prompts_dir)
|
|
1633
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
920
1634
|
|
|
921
1635
|
# Generate diffs for changed files
|
|
922
1636
|
diffs = {}
|
|
@@ -974,9 +1688,14 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
974
1688
|
return SyncDecision(
|
|
975
1689
|
operation='fail_and_request_manual_merge',
|
|
976
1690
|
reason=f'LLM confidence too low ({confidence:.2f}) - manual merge required',
|
|
977
|
-
|
|
1691
|
+
confidence=confidence,
|
|
978
1692
|
estimated_cost=response.get('cost', 0.0),
|
|
979
|
-
|
|
1693
|
+
details={
|
|
1694
|
+
'decision_type': 'llm',
|
|
1695
|
+
'llm_response': llm_result,
|
|
1696
|
+
'changed_files': changed_files,
|
|
1697
|
+
'confidence_threshold': 0.75
|
|
1698
|
+
}
|
|
980
1699
|
)
|
|
981
1700
|
|
|
982
1701
|
# Extract operation and details
|
|
@@ -988,14 +1707,15 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
988
1707
|
return SyncDecision(
|
|
989
1708
|
operation=operation,
|
|
990
1709
|
reason=f"LLM analysis: {reason}",
|
|
1710
|
+
confidence=confidence,
|
|
1711
|
+
estimated_cost=response.get('cost', 0.0),
|
|
991
1712
|
details={
|
|
1713
|
+
'decision_type': 'llm',
|
|
992
1714
|
'llm_response': llm_result,
|
|
993
1715
|
'changed_files': changed_files,
|
|
994
1716
|
'merge_strategy': merge_strategy,
|
|
995
1717
|
'follow_up_operations': follow_up_operations
|
|
996
1718
|
},
|
|
997
|
-
estimated_cost=response.get('cost', 0.0),
|
|
998
|
-
confidence=confidence,
|
|
999
1719
|
prerequisites=follow_up_operations
|
|
1000
1720
|
)
|
|
1001
1721
|
|
|
@@ -1004,9 +1724,15 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
1004
1724
|
return SyncDecision(
|
|
1005
1725
|
operation='fail_and_request_manual_merge',
|
|
1006
1726
|
reason=f'Invalid LLM response: {e} - manual merge required',
|
|
1007
|
-
|
|
1727
|
+
confidence=0.0,
|
|
1008
1728
|
estimated_cost=response.get('cost', 0.0),
|
|
1009
|
-
|
|
1729
|
+
details={
|
|
1730
|
+
'decision_type': 'llm',
|
|
1731
|
+
'error': str(e),
|
|
1732
|
+
'raw_response': response.get('result', ''),
|
|
1733
|
+
'changed_files': changed_files,
|
|
1734
|
+
'llm_error': True
|
|
1735
|
+
}
|
|
1010
1736
|
)
|
|
1011
1737
|
|
|
1012
1738
|
except Exception as e:
|
|
@@ -1014,22 +1740,28 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
1014
1740
|
return SyncDecision(
|
|
1015
1741
|
operation='fail_and_request_manual_merge',
|
|
1016
1742
|
reason=f'Error during LLM analysis: {e} - manual merge required',
|
|
1017
|
-
|
|
1018
|
-
estimated_cost=
|
|
1019
|
-
|
|
1743
|
+
confidence=0.0,
|
|
1744
|
+
estimated_cost=estimate_operation_cost('fail_and_request_manual_merge'),
|
|
1745
|
+
details={
|
|
1746
|
+
'decision_type': 'llm',
|
|
1747
|
+
'error': str(e),
|
|
1748
|
+
'changed_files': changed_files,
|
|
1749
|
+
'llm_error': True
|
|
1750
|
+
}
|
|
1020
1751
|
)
|
|
1021
1752
|
|
|
1022
1753
|
|
|
1023
1754
|
if __name__ == "__main__":
|
|
1024
1755
|
# Example usage
|
|
1025
|
-
if len(sys.argv)
|
|
1026
|
-
print("Usage: python sync_determine_operation.py <basename> <language>")
|
|
1756
|
+
if len(sys.argv) < 3 or len(sys.argv) > 4:
|
|
1757
|
+
print("Usage: python sync_determine_operation.py <basename> <language> [target_coverage]")
|
|
1027
1758
|
sys.exit(1)
|
|
1028
1759
|
|
|
1029
1760
|
basename = sys.argv[1]
|
|
1030
1761
|
language = sys.argv[2]
|
|
1762
|
+
target_coverage = float(sys.argv[3]) if len(sys.argv) == 4 else 90.0
|
|
1031
1763
|
|
|
1032
|
-
decision = sync_determine_operation(basename, language, target_coverage
|
|
1764
|
+
decision = sync_determine_operation(basename, language, target_coverage)
|
|
1033
1765
|
|
|
1034
1766
|
print(f"Operation: {decision.operation}")
|
|
1035
1767
|
print(f"Reason: {decision.reason}")
|