pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +73 -21
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +258 -82
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +330 -76
- pdd/fix_error_loop.py +207 -61
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +306 -272
- pdd/fix_verification_main.py +28 -9
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +9 -2
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1269 -103
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +100 -905
- pdd/prompts/detect_change_LLM.prompt +122 -20
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +228 -108
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +49 -6
- pdd/sync_determine_operation.py +543 -98
- pdd/sync_main.py +81 -31
- pdd/sync_orchestration.py +1334 -751
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/sync_determine_operation.py
CHANGED
|
@@ -69,7 +69,8 @@ class Fingerprint:
|
|
|
69
69
|
prompt_hash: Optional[str]
|
|
70
70
|
code_hash: Optional[str]
|
|
71
71
|
example_hash: Optional[str]
|
|
72
|
-
test_hash: Optional[str]
|
|
72
|
+
test_hash: Optional[str] # Keep for backward compat (primary test file)
|
|
73
|
+
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
|
|
73
74
|
|
|
74
75
|
|
|
75
76
|
@dataclass
|
|
@@ -80,6 +81,8 @@ class RunReport:
|
|
|
80
81
|
tests_passed: int
|
|
81
82
|
tests_failed: int
|
|
82
83
|
coverage: float
|
|
84
|
+
test_hash: Optional[str] = None # Hash of test file when tests were run (for staleness detection)
|
|
85
|
+
test_files: Optional[Dict[str, str]] = None # Bug #156: {"test_foo.py": "hash1", ...}
|
|
83
86
|
|
|
84
87
|
|
|
85
88
|
@dataclass
|
|
@@ -209,51 +212,131 @@ def get_extension(language: str) -> str:
|
|
|
209
212
|
return extensions.get(language.lower(), language.lower())
|
|
210
213
|
|
|
211
214
|
|
|
212
|
-
def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
|
|
215
|
+
def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts", context_override: Optional[str] = None) -> Dict[str, Path]:
|
|
213
216
|
"""Returns a dictionary mapping file types to their expected Path objects."""
|
|
217
|
+
import logging
|
|
218
|
+
logger = logging.getLogger(__name__)
|
|
219
|
+
logger.info(f"get_pdd_file_paths called: basename={basename}, language={language}, prompts_dir={prompts_dir}")
|
|
220
|
+
|
|
214
221
|
try:
|
|
215
222
|
# Use construct_paths to get configuration-aware paths
|
|
216
223
|
prompt_filename = f"{basename}_{language}.prompt"
|
|
217
224
|
prompt_path = str(Path(prompts_dir) / prompt_filename)
|
|
225
|
+
logger.info(f"Checking prompt_path={prompt_path}, exists={Path(prompt_path).exists()}")
|
|
218
226
|
|
|
219
|
-
# Check if prompt file exists - if not, we
|
|
227
|
+
# Check if prompt file exists - if not, we still need configuration-aware paths
|
|
220
228
|
if not Path(prompt_path).exists():
|
|
221
|
-
#
|
|
229
|
+
# Use construct_paths with minimal inputs to get configuration-aware paths
|
|
230
|
+
# even when prompt doesn't exist
|
|
222
231
|
extension = get_extension(language)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
232
|
+
try:
|
|
233
|
+
# Call construct_paths with empty input_file_paths to get configured output paths
|
|
234
|
+
resolved_config, _, output_paths, _ = construct_paths(
|
|
235
|
+
input_file_paths={}, # Empty dict since files don't exist yet
|
|
236
|
+
force=True,
|
|
237
|
+
quiet=True,
|
|
238
|
+
command="sync",
|
|
239
|
+
command_options={"basename": basename, "language": language},
|
|
240
|
+
context_override=context_override
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
import logging
|
|
244
|
+
logger = logging.getLogger(__name__)
|
|
245
|
+
logger.info(f"resolved_config: {resolved_config}")
|
|
246
|
+
logger.info(f"output_paths: {output_paths}")
|
|
247
|
+
|
|
248
|
+
# Extract directory configuration from resolved_config
|
|
249
|
+
# Note: construct_paths sets tests_dir, examples_dir, code_dir keys
|
|
250
|
+
test_dir = resolved_config.get('tests_dir', 'tests/')
|
|
251
|
+
example_dir = resolved_config.get('examples_dir', 'examples/')
|
|
252
|
+
code_dir = resolved_config.get('code_dir', './')
|
|
253
|
+
|
|
254
|
+
logger.info(f"Extracted dirs - test: {test_dir}, example: {example_dir}, code: {code_dir}")
|
|
255
|
+
|
|
256
|
+
# Ensure directories end with /
|
|
257
|
+
if test_dir and not test_dir.endswith('/'):
|
|
258
|
+
test_dir = test_dir + '/'
|
|
259
|
+
if example_dir and not example_dir.endswith('/'):
|
|
260
|
+
example_dir = example_dir + '/'
|
|
261
|
+
if code_dir and not code_dir.endswith('/'):
|
|
262
|
+
code_dir = code_dir + '/'
|
|
263
|
+
|
|
264
|
+
# Construct the full paths
|
|
265
|
+
test_path = f"{test_dir}test_{basename}.{extension}"
|
|
266
|
+
example_path = f"{example_dir}{basename}_example.{extension}"
|
|
267
|
+
code_path = f"{code_dir}{basename}.{extension}"
|
|
268
|
+
|
|
269
|
+
logger.debug(f"Final paths: test={test_path}, example={example_path}, code={code_path}")
|
|
270
|
+
|
|
271
|
+
# Convert to Path objects
|
|
272
|
+
test_path = Path(test_path)
|
|
273
|
+
example_path = Path(example_path)
|
|
274
|
+
code_path = Path(code_path)
|
|
275
|
+
|
|
276
|
+
# Bug #156: Find all matching test files
|
|
277
|
+
test_dir_path = test_path.parent
|
|
278
|
+
test_stem = f"test_{basename}"
|
|
279
|
+
if test_dir_path.exists():
|
|
280
|
+
matching_test_files = sorted(test_dir_path.glob(f"{test_stem}*.{extension}"))
|
|
281
|
+
else:
|
|
282
|
+
matching_test_files = [test_path] if test_path.exists() else []
|
|
283
|
+
|
|
284
|
+
result = {
|
|
285
|
+
'prompt': Path(prompt_path),
|
|
286
|
+
'code': code_path,
|
|
287
|
+
'example': example_path,
|
|
288
|
+
'test': test_path,
|
|
289
|
+
'test_files': matching_test_files or [test_path] # Bug #156
|
|
290
|
+
}
|
|
291
|
+
logger.debug(f"get_pdd_file_paths returning (prompt missing): test={test_path}")
|
|
292
|
+
return result
|
|
293
|
+
except Exception as e:
|
|
294
|
+
# If construct_paths fails, fall back to current directory paths
|
|
295
|
+
# This maintains backward compatibility
|
|
296
|
+
import logging
|
|
297
|
+
logger = logging.getLogger(__name__)
|
|
298
|
+
logger.debug(f"construct_paths failed for non-existent prompt, using defaults: {e}")
|
|
299
|
+
fallback_test_path = Path(f"test_{basename}.{extension}")
|
|
300
|
+
# Bug #156: Find matching test files even in fallback
|
|
301
|
+
if Path('.').exists():
|
|
302
|
+
fallback_matching = sorted(Path('.').glob(f"test_{basename}*.{extension}"))
|
|
303
|
+
else:
|
|
304
|
+
fallback_matching = [fallback_test_path] if fallback_test_path.exists() else []
|
|
305
|
+
return {
|
|
306
|
+
'prompt': Path(prompt_path),
|
|
307
|
+
'code': Path(f"{basename}.{extension}"),
|
|
308
|
+
'example': Path(f"{basename}_example.{extension}"),
|
|
309
|
+
'test': fallback_test_path,
|
|
310
|
+
'test_files': fallback_matching or [fallback_test_path] # Bug #156
|
|
311
|
+
}
|
|
229
312
|
|
|
230
313
|
input_file_paths = {
|
|
231
314
|
"prompt_file": prompt_path
|
|
232
315
|
}
|
|
233
316
|
|
|
234
|
-
#
|
|
317
|
+
# Call construct_paths to get configuration-aware paths
|
|
235
318
|
resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
|
|
236
319
|
input_file_paths=input_file_paths,
|
|
237
320
|
force=True, # Use force=True to avoid interactive prompts during sync
|
|
238
321
|
quiet=True,
|
|
239
|
-
command="
|
|
240
|
-
command_options={}
|
|
322
|
+
command="sync", # Use sync command to get more tolerant path handling
|
|
323
|
+
command_options={"basename": basename, "language": language},
|
|
324
|
+
context_override=context_override
|
|
241
325
|
)
|
|
242
326
|
|
|
243
|
-
#
|
|
244
|
-
#
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
# For other commands, we need to construct the full set of paths
|
|
250
|
-
|
|
251
|
-
# Get the code file path from output_file_paths
|
|
252
|
-
code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
|
|
327
|
+
# For sync command, output_file_paths contains the configured paths
|
|
328
|
+
# Extract the code path from output_file_paths
|
|
329
|
+
code_path = output_file_paths.get('generate_output_path', '')
|
|
330
|
+
if not code_path:
|
|
331
|
+
# Try other possible keys
|
|
332
|
+
code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
|
|
253
333
|
if not code_path:
|
|
254
|
-
# Fallback to constructing from basename
|
|
334
|
+
# Fallback to constructing from basename with configuration
|
|
255
335
|
extension = get_extension(language)
|
|
256
|
-
|
|
336
|
+
code_dir = resolved_config.get('generate_output_path', './')
|
|
337
|
+
if code_dir and not code_dir.endswith('/'):
|
|
338
|
+
code_dir = code_dir + '/'
|
|
339
|
+
code_path = f"{code_dir}{basename}.{extension}"
|
|
257
340
|
|
|
258
341
|
# Get configured paths for example and test files using construct_paths
|
|
259
342
|
# Note: construct_paths requires files to exist, so we need to handle the case
|
|
@@ -269,18 +352,27 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
|
|
|
269
352
|
|
|
270
353
|
try:
|
|
271
354
|
# Get example path using example command
|
|
355
|
+
# Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
|
|
272
356
|
_, _, example_output_paths, _ = construct_paths(
|
|
273
357
|
input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
|
|
274
|
-
force=True, quiet=True, command="example", command_options={}
|
|
358
|
+
force=True, quiet=True, command="example", command_options={},
|
|
359
|
+
context_override=context_override,
|
|
360
|
+
path_resolution_mode="cwd"
|
|
275
361
|
)
|
|
276
362
|
example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
|
|
277
|
-
|
|
278
|
-
# Get test path using test command
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
363
|
+
|
|
364
|
+
# Get test path using test command - handle case where test file doesn't exist yet
|
|
365
|
+
try:
|
|
366
|
+
_, _, test_output_paths, _ = construct_paths(
|
|
367
|
+
input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
|
|
368
|
+
force=True, quiet=True, command="test", command_options={},
|
|
369
|
+
context_override=context_override,
|
|
370
|
+
path_resolution_mode="cwd"
|
|
371
|
+
)
|
|
372
|
+
test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
|
|
373
|
+
except FileNotFoundError:
|
|
374
|
+
# Test file doesn't exist yet - create default path
|
|
375
|
+
test_path = Path(f"test_{basename}.{get_extension(language)}")
|
|
284
376
|
|
|
285
377
|
finally:
|
|
286
378
|
# Clean up temporary file if we created it
|
|
@@ -298,17 +390,26 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
|
|
|
298
390
|
# Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
|
|
299
391
|
try:
|
|
300
392
|
# Get configured directories by using construct_paths with just the prompt file
|
|
393
|
+
# Pass path_resolution_mode="cwd" so paths resolve relative to CWD (not project root)
|
|
301
394
|
_, _, example_output_paths, _ = construct_paths(
|
|
302
395
|
input_file_paths={"prompt_file": prompt_path},
|
|
303
|
-
force=True, quiet=True, command="example", command_options={}
|
|
396
|
+
force=True, quiet=True, command="example", command_options={},
|
|
397
|
+
context_override=context_override,
|
|
398
|
+
path_resolution_mode="cwd"
|
|
304
399
|
)
|
|
305
400
|
example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
_, _, test_output_paths, _ = construct_paths(
|
|
404
|
+
input_file_paths={"prompt_file": prompt_path},
|
|
405
|
+
force=True, quiet=True, command="test", command_options={},
|
|
406
|
+
context_override=context_override,
|
|
407
|
+
path_resolution_mode="cwd"
|
|
408
|
+
)
|
|
409
|
+
test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
|
|
410
|
+
except Exception:
|
|
411
|
+
# If test path construction fails, use default naming
|
|
412
|
+
test_path = Path(f"test_{basename}.{get_extension(language)}")
|
|
312
413
|
|
|
313
414
|
except Exception:
|
|
314
415
|
# Final fallback to deriving from code path if all else fails
|
|
@@ -319,21 +420,47 @@ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts
|
|
|
319
420
|
example_path = code_dir / f"{code_stem}_example{code_ext}"
|
|
320
421
|
test_path = code_dir / f"test_{code_stem}{code_ext}"
|
|
321
422
|
|
|
423
|
+
# Ensure all paths are Path objects
|
|
424
|
+
if isinstance(code_path, str):
|
|
425
|
+
code_path = Path(code_path)
|
|
426
|
+
|
|
427
|
+
# Keep paths as they are (absolute or relative as returned by construct_paths)
|
|
428
|
+
# This ensures consistency with how construct_paths expects them
|
|
429
|
+
|
|
430
|
+
# Bug #156: Find all matching test files
|
|
431
|
+
test_dir = test_path.parent
|
|
432
|
+
test_stem = f"test_{basename}"
|
|
433
|
+
extension = get_extension(language)
|
|
434
|
+
if test_dir.exists():
|
|
435
|
+
matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
|
|
436
|
+
else:
|
|
437
|
+
matching_test_files = [test_path] if test_path.exists() else []
|
|
438
|
+
|
|
322
439
|
return {
|
|
323
440
|
'prompt': Path(prompt_path),
|
|
324
|
-
'code':
|
|
441
|
+
'code': code_path,
|
|
325
442
|
'example': example_path,
|
|
326
|
-
'test': test_path
|
|
443
|
+
'test': test_path,
|
|
444
|
+
'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
|
|
327
445
|
}
|
|
328
446
|
|
|
329
447
|
except Exception as e:
|
|
330
448
|
# Fallback to simple naming if construct_paths fails
|
|
331
449
|
extension = get_extension(language)
|
|
450
|
+
test_path = Path(f"test_{basename}.{extension}")
|
|
451
|
+
# Bug #156: Try to find matching test files even in fallback
|
|
452
|
+
test_dir = Path('.')
|
|
453
|
+
test_stem = f"test_{basename}"
|
|
454
|
+
if test_dir.exists():
|
|
455
|
+
matching_test_files = sorted(test_dir.glob(f"{test_stem}*.{extension}"))
|
|
456
|
+
else:
|
|
457
|
+
matching_test_files = [test_path] if test_path.exists() else []
|
|
332
458
|
return {
|
|
333
459
|
'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
|
|
334
460
|
'code': Path(f"{basename}.{extension}"),
|
|
335
461
|
'example': Path(f"{basename}_example.{extension}"),
|
|
336
|
-
'test':
|
|
462
|
+
'test': test_path,
|
|
463
|
+
'test_files': matching_test_files or [test_path] # Bug #156: All matching test files
|
|
337
464
|
}
|
|
338
465
|
|
|
339
466
|
|
|
@@ -372,7 +499,8 @@ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
|
|
|
372
499
|
prompt_hash=data.get('prompt_hash'),
|
|
373
500
|
code_hash=data.get('code_hash'),
|
|
374
501
|
example_hash=data.get('example_hash'),
|
|
375
|
-
test_hash=data.get('test_hash')
|
|
502
|
+
test_hash=data.get('test_hash'),
|
|
503
|
+
test_files=data.get('test_files') # Bug #156
|
|
376
504
|
)
|
|
377
505
|
except (json.JSONDecodeError, KeyError, IOError):
|
|
378
506
|
return None
|
|
@@ -396,19 +524,29 @@ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
|
|
|
396
524
|
exit_code=data['exit_code'],
|
|
397
525
|
tests_passed=data['tests_passed'],
|
|
398
526
|
tests_failed=data['tests_failed'],
|
|
399
|
-
coverage=data['coverage']
|
|
527
|
+
coverage=data['coverage'],
|
|
528
|
+
test_hash=data.get('test_hash'), # Optional for backward compatibility
|
|
529
|
+
test_files=data.get('test_files') # Bug #156
|
|
400
530
|
)
|
|
401
531
|
except (json.JSONDecodeError, KeyError, IOError):
|
|
402
532
|
return None
|
|
403
533
|
|
|
404
534
|
|
|
405
|
-
def calculate_current_hashes(paths: Dict[str,
|
|
535
|
+
def calculate_current_hashes(paths: Dict[str, Any]) -> Dict[str, Any]:
|
|
406
536
|
"""Computes the hashes for all current files on disk."""
|
|
407
537
|
# Return hash keys that match what the fingerprint expects
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
538
|
+
hashes = {}
|
|
539
|
+
for file_type, file_path in paths.items():
|
|
540
|
+
if file_type == 'test_files':
|
|
541
|
+
# Bug #156: Calculate hashes for all test files
|
|
542
|
+
hashes['test_files'] = {
|
|
543
|
+
f.name: calculate_sha256(f)
|
|
544
|
+
for f in file_path
|
|
545
|
+
if isinstance(f, Path) and f.exists()
|
|
546
|
+
}
|
|
547
|
+
elif isinstance(file_path, Path):
|
|
548
|
+
hashes[f"{file_type}_hash"] = calculate_sha256(file_path)
|
|
549
|
+
return hashes
|
|
412
550
|
|
|
413
551
|
|
|
414
552
|
def get_git_diff(file_path: Path) -> str:
|
|
@@ -438,6 +576,7 @@ def estimate_operation_cost(operation: str, language: str = "python") -> float:
|
|
|
438
576
|
'crash': 0.40,
|
|
439
577
|
'verify': 0.35,
|
|
440
578
|
'test': 0.60,
|
|
579
|
+
'test_extend': 0.60, # Same cost as test - generates additional tests
|
|
441
580
|
'fix': 0.45,
|
|
442
581
|
'update': 0.25,
|
|
443
582
|
'analyze_conflict': 0.20,
|
|
@@ -591,24 +730,103 @@ def _handle_missing_expected_files(
|
|
|
591
730
|
)
|
|
592
731
|
|
|
593
732
|
|
|
594
|
-
def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False
|
|
733
|
+
def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False,
|
|
734
|
+
basename: str = None, language: str = None) -> bool:
|
|
595
735
|
"""
|
|
596
736
|
Check if workflow is complete considering skip flags.
|
|
597
|
-
|
|
737
|
+
|
|
598
738
|
Args:
|
|
599
739
|
paths: Dict mapping file types to their expected Path objects
|
|
600
740
|
skip_tests: If True, test files are not required for completion
|
|
601
741
|
skip_verify: If True, verification operations are not required
|
|
602
|
-
|
|
742
|
+
basename: Module basename (required for run_report check)
|
|
743
|
+
language: Module language (required for run_report check)
|
|
744
|
+
|
|
603
745
|
Returns:
|
|
604
|
-
True if all required files exist
|
|
746
|
+
True if all required files exist AND have been validated (run_report exists)
|
|
605
747
|
"""
|
|
606
748
|
required_files = ['code', 'example']
|
|
607
|
-
|
|
749
|
+
|
|
608
750
|
if not skip_tests:
|
|
609
751
|
required_files.append('test')
|
|
610
|
-
|
|
611
|
-
|
|
752
|
+
|
|
753
|
+
# Check all required files exist
|
|
754
|
+
if not all(paths[f].exists() for f in required_files):
|
|
755
|
+
return False
|
|
756
|
+
|
|
757
|
+
# Also check that run_report exists and code works (exit_code == 0)
|
|
758
|
+
# Without this, newly generated code would incorrectly be marked as "complete"
|
|
759
|
+
if basename and language:
|
|
760
|
+
run_report = read_run_report(basename, language)
|
|
761
|
+
if not run_report or run_report.exit_code != 0:
|
|
762
|
+
return False
|
|
763
|
+
|
|
764
|
+
# Check that run_report corresponds to current test files (staleness detection)
|
|
765
|
+
# If any test file changed since run_report was created, we can't trust the results
|
|
766
|
+
if not skip_tests:
|
|
767
|
+
# Bug #156: Check ALL test files, not just the primary one
|
|
768
|
+
if 'test_files' in paths and run_report.test_files:
|
|
769
|
+
# New multi-file comparison
|
|
770
|
+
current_test_hashes = {
|
|
771
|
+
f.name: calculate_sha256(f)
|
|
772
|
+
for f in paths['test_files']
|
|
773
|
+
if f.exists()
|
|
774
|
+
}
|
|
775
|
+
stored_test_hashes = run_report.test_files
|
|
776
|
+
|
|
777
|
+
# Check if any test file changed or new ones added/removed
|
|
778
|
+
if set(current_test_hashes.keys()) != set(stored_test_hashes.keys()):
|
|
779
|
+
return False # Test files added or removed
|
|
780
|
+
|
|
781
|
+
for fname, current_hash in current_test_hashes.items():
|
|
782
|
+
if stored_test_hashes.get(fname) != current_hash:
|
|
783
|
+
return False # Test file content changed
|
|
784
|
+
elif 'test' in paths and paths['test'].exists():
|
|
785
|
+
# Backward compat: single file check
|
|
786
|
+
current_test_hash = calculate_sha256(paths['test'])
|
|
787
|
+
if run_report.test_hash and current_test_hash != run_report.test_hash:
|
|
788
|
+
# run_report was created for a different version of the test file
|
|
789
|
+
return False
|
|
790
|
+
if not run_report.test_hash:
|
|
791
|
+
# Legacy run_report without test_hash - check fingerprint timestamp as fallback
|
|
792
|
+
fingerprint = read_fingerprint(basename, language)
|
|
793
|
+
if fingerprint:
|
|
794
|
+
# If fingerprint is newer than run_report, run_report might be stale
|
|
795
|
+
from datetime import datetime
|
|
796
|
+
try:
|
|
797
|
+
fp_time = datetime.fromisoformat(fingerprint.timestamp.replace('Z', '+00:00'))
|
|
798
|
+
rr_time = datetime.fromisoformat(run_report.timestamp.replace('Z', '+00:00'))
|
|
799
|
+
if fp_time > rr_time:
|
|
800
|
+
return False # run_report predates fingerprint, might be stale
|
|
801
|
+
except (ValueError, AttributeError):
|
|
802
|
+
pass # If timestamps can't be parsed, skip this check
|
|
803
|
+
|
|
804
|
+
# Check verify has been done (unless skip_verify)
|
|
805
|
+
# Without this, workflow would be "complete" after crash even though verify hasn't run
|
|
806
|
+
# Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
|
|
807
|
+
if not skip_verify:
|
|
808
|
+
fingerprint = read_fingerprint(basename, language)
|
|
809
|
+
if fingerprint:
|
|
810
|
+
# If command starts with 'skip:', the operation was skipped, not completed
|
|
811
|
+
if fingerprint.command.startswith('skip:'):
|
|
812
|
+
return False
|
|
813
|
+
if fingerprint.command not in ['verify', 'test', 'fix', 'update']:
|
|
814
|
+
return False
|
|
815
|
+
|
|
816
|
+
# CRITICAL FIX: Check tests have been run (unless skip_tests)
|
|
817
|
+
# Without this, workflow would be "complete" after verify even though tests haven't run
|
|
818
|
+
# This prevents false positive success when skip_verify=True but tests are still required
|
|
819
|
+
# Bug #23 fix: Also check for 'skip:' prefix which indicates operation was skipped, not executed
|
|
820
|
+
if not skip_tests:
|
|
821
|
+
fp = read_fingerprint(basename, language)
|
|
822
|
+
if fp:
|
|
823
|
+
# If command starts with 'skip:', the operation was skipped, not completed
|
|
824
|
+
if fp.command.startswith('skip:'):
|
|
825
|
+
return False
|
|
826
|
+
if fp.command not in ['test', 'fix', 'update']:
|
|
827
|
+
return False
|
|
828
|
+
|
|
829
|
+
return True
|
|
612
830
|
|
|
613
831
|
|
|
614
832
|
def check_for_dependencies(prompt_content: str) -> bool:
|
|
@@ -693,7 +911,7 @@ def _check_example_success_history(basename: str, language: str) -> bool:
|
|
|
693
911
|
return False
|
|
694
912
|
|
|
695
913
|
|
|
696
|
-
def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
|
|
914
|
+
def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
|
|
697
915
|
"""
|
|
698
916
|
Core decision-making function for sync operations with skip flag awareness.
|
|
699
917
|
|
|
@@ -713,14 +931,14 @@ def sync_determine_operation(basename: str, language: str, target_coverage: floa
|
|
|
713
931
|
|
|
714
932
|
if log_mode:
|
|
715
933
|
# Skip locking for read-only analysis
|
|
716
|
-
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
|
|
934
|
+
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
|
|
717
935
|
else:
|
|
718
936
|
# Normal exclusive locking for actual operations
|
|
719
937
|
with SyncLock(basename, language) as lock:
|
|
720
|
-
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
|
|
938
|
+
return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify, context_override)
|
|
721
939
|
|
|
722
940
|
|
|
723
|
-
def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
|
|
941
|
+
def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False, context_override: Optional[str] = None) -> SyncDecision:
|
|
724
942
|
"""
|
|
725
943
|
Perform the sync state analysis without locking concerns.
|
|
726
944
|
|
|
@@ -749,26 +967,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
749
967
|
|
|
750
968
|
# Read fingerprint early since we need it for crash verification
|
|
751
969
|
fingerprint = read_fingerprint(basename, language)
|
|
752
|
-
|
|
970
|
+
|
|
971
|
+
# Check if auto-deps just completed - ALWAYS regenerate code after auto-deps
|
|
972
|
+
# This must be checked early, before any run_report processing, because:
|
|
973
|
+
# 1. Old run_report (if exists) is stale and should be ignored
|
|
974
|
+
# 2. auto-deps updates dependencies but doesn't regenerate code
|
|
975
|
+
if fingerprint and fingerprint.command == 'auto-deps':
|
|
976
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
977
|
+
return SyncDecision(
|
|
978
|
+
operation='generate',
|
|
979
|
+
reason='Auto-deps completed - regenerate code with updated prompt',
|
|
980
|
+
confidence=0.90,
|
|
981
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
982
|
+
details={
|
|
983
|
+
'decision_type': 'heuristic',
|
|
984
|
+
'previous_command': 'auto-deps',
|
|
985
|
+
'code_exists': paths['code'].exists() if paths.get('code') else False,
|
|
986
|
+
'regenerate_after_autodeps': True
|
|
987
|
+
}
|
|
988
|
+
)
|
|
989
|
+
|
|
753
990
|
run_report = read_run_report(basename, language)
|
|
754
|
-
if
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
'
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
991
|
+
# Only process runtime signals (crash/fix/test) if we have a fingerprint
|
|
992
|
+
# Without a fingerprint, run_report is stale/orphaned and should be ignored
|
|
993
|
+
if run_report and fingerprint:
|
|
994
|
+
# Check for prompt changes FIRST - prompt changes take priority over runtime signals
|
|
995
|
+
# If the user modified the prompt, we need to regenerate regardless of runtime state
|
|
996
|
+
if fingerprint:
|
|
997
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
998
|
+
current_prompt_hash = calculate_sha256(paths['prompt'])
|
|
999
|
+
if current_prompt_hash and current_prompt_hash != fingerprint.prompt_hash:
|
|
1000
|
+
prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore') if paths['prompt'].exists() else ""
|
|
1001
|
+
has_deps = check_for_dependencies(prompt_content)
|
|
1002
|
+
return SyncDecision(
|
|
1003
|
+
operation='auto-deps' if has_deps else 'generate',
|
|
1004
|
+
reason='Prompt changed - regenerating (takes priority over runtime signals)',
|
|
1005
|
+
confidence=0.95,
|
|
1006
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1007
|
+
details={
|
|
1008
|
+
'decision_type': 'heuristic',
|
|
1009
|
+
'prompt_changed': True,
|
|
1010
|
+
'previous_command': fingerprint.command,
|
|
1011
|
+
'runtime_state_ignored': True
|
|
1012
|
+
}
|
|
1013
|
+
)
|
|
1014
|
+
|
|
1015
|
+
# Check if we just completed a crash operation and need verification FIRST
|
|
1016
|
+
# This takes priority over test failures because we need to verify the crash fix worked
|
|
1017
|
+
# BUT only proceed to verify if exit_code == 0 (crash fix succeeded)
|
|
771
1018
|
if fingerprint and fingerprint.command == 'crash' and not skip_verify:
|
|
1019
|
+
if run_report.exit_code != 0:
|
|
1020
|
+
# Crash fix didn't work - need to re-run crash
|
|
1021
|
+
return SyncDecision(
|
|
1022
|
+
operation='crash',
|
|
1023
|
+
reason=f'Previous crash operation failed (exit_code={run_report.exit_code}) - retry crash fix',
|
|
1024
|
+
confidence=0.90,
|
|
1025
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1026
|
+
details={
|
|
1027
|
+
'decision_type': 'heuristic',
|
|
1028
|
+
'previous_command': 'crash',
|
|
1029
|
+
'exit_code': run_report.exit_code,
|
|
1030
|
+
'workflow_stage': 'crash_retry'
|
|
1031
|
+
}
|
|
1032
|
+
)
|
|
772
1033
|
return SyncDecision(
|
|
773
1034
|
operation='verify',
|
|
774
1035
|
reason='Previous crash operation completed - verify example runs correctly',
|
|
@@ -782,6 +1043,41 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
782
1043
|
}
|
|
783
1044
|
)
|
|
784
1045
|
|
|
1046
|
+
# Check test failures (after crash verification check)
|
|
1047
|
+
if run_report.tests_failed > 0:
|
|
1048
|
+
# First check if the test file actually exists
|
|
1049
|
+
pdd_files = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
1050
|
+
test_file = pdd_files.get('test')
|
|
1051
|
+
|
|
1052
|
+
# Only suggest 'fix' if test file exists
|
|
1053
|
+
if test_file and test_file.exists():
|
|
1054
|
+
return SyncDecision(
|
|
1055
|
+
operation='fix',
|
|
1056
|
+
reason=f'Test failures detected: {run_report.tests_failed} failed tests',
|
|
1057
|
+
confidence=0.90,
|
|
1058
|
+
estimated_cost=estimate_operation_cost('fix'),
|
|
1059
|
+
details={
|
|
1060
|
+
'decision_type': 'heuristic',
|
|
1061
|
+
'tests_failed': run_report.tests_failed,
|
|
1062
|
+
'exit_code': run_report.exit_code,
|
|
1063
|
+
'coverage': run_report.coverage
|
|
1064
|
+
}
|
|
1065
|
+
)
|
|
1066
|
+
# If test file doesn't exist but we have test failures in run report,
|
|
1067
|
+
# we need to generate the test first
|
|
1068
|
+
else:
|
|
1069
|
+
return SyncDecision(
|
|
1070
|
+
operation='test',
|
|
1071
|
+
reason='Test failures reported but test file missing - need to generate tests',
|
|
1072
|
+
confidence=0.85,
|
|
1073
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1074
|
+
details={
|
|
1075
|
+
'decision_type': 'heuristic',
|
|
1076
|
+
'run_report_shows_failures': True,
|
|
1077
|
+
'test_file_exists': False
|
|
1078
|
+
}
|
|
1079
|
+
)
|
|
1080
|
+
|
|
785
1081
|
# Then check for runtime crashes (only if no test failures)
|
|
786
1082
|
if run_report.exit_code != 0:
|
|
787
1083
|
# Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
|
|
@@ -833,6 +1129,23 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
833
1129
|
'skip_tests': True
|
|
834
1130
|
}
|
|
835
1131
|
)
|
|
1132
|
+
elif run_report.tests_failed == 0 and run_report.tests_passed > 0:
|
|
1133
|
+
# Tests pass but coverage is below target
|
|
1134
|
+
# Return 'test_extend' to signal we need to ADD more tests, not regenerate
|
|
1135
|
+
return SyncDecision(
|
|
1136
|
+
operation='test_extend',
|
|
1137
|
+
reason=f'Tests pass ({run_report.tests_passed} passed) but coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% - extending tests',
|
|
1138
|
+
confidence=0.85,
|
|
1139
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1140
|
+
details={
|
|
1141
|
+
'decision_type': 'heuristic',
|
|
1142
|
+
'current_coverage': run_report.coverage,
|
|
1143
|
+
'target_coverage': target_coverage,
|
|
1144
|
+
'tests_passed': run_report.tests_passed,
|
|
1145
|
+
'tests_failed': run_report.tests_failed,
|
|
1146
|
+
'extend_tests': True
|
|
1147
|
+
}
|
|
1148
|
+
)
|
|
836
1149
|
else:
|
|
837
1150
|
return SyncDecision(
|
|
838
1151
|
operation='test',
|
|
@@ -849,7 +1162,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
849
1162
|
)
|
|
850
1163
|
|
|
851
1164
|
# 2. Analyze File State
|
|
852
|
-
paths = get_pdd_file_paths(basename, language, prompts_dir)
|
|
1165
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
853
1166
|
current_hashes = calculate_current_hashes(paths)
|
|
854
1167
|
|
|
855
1168
|
# 3. Implement the Decision Tree
|
|
@@ -926,7 +1239,7 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
926
1239
|
|
|
927
1240
|
if not changes:
|
|
928
1241
|
# No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
|
|
929
|
-
if _is_workflow_complete(paths, skip_tests, skip_verify):
|
|
1242
|
+
if _is_workflow_complete(paths, skip_tests, skip_verify, basename, language):
|
|
930
1243
|
return SyncDecision(
|
|
931
1244
|
operation='nothing',
|
|
932
1245
|
reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
|
|
@@ -939,7 +1252,63 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
939
1252
|
'workflow_complete': True
|
|
940
1253
|
}
|
|
941
1254
|
)
|
|
942
|
-
|
|
1255
|
+
|
|
1256
|
+
# Handle incomplete workflow when all files exist (including test)
|
|
1257
|
+
# This addresses the blind spot where crash/verify/test logic only runs when test is missing
|
|
1258
|
+
if (paths['code'].exists() and paths['example'].exists() and paths['test'].exists()):
|
|
1259
|
+
run_report = read_run_report(basename, language)
|
|
1260
|
+
|
|
1261
|
+
# BUG 4 & 1: No run_report OR crash detected (exit_code != 0)
|
|
1262
|
+
if not run_report or run_report.exit_code != 0:
|
|
1263
|
+
return SyncDecision(
|
|
1264
|
+
operation='crash',
|
|
1265
|
+
reason='All files exist but needs validation' +
|
|
1266
|
+
(' - no run_report' if not run_report else f' - exit_code={run_report.exit_code}'),
|
|
1267
|
+
confidence=0.85,
|
|
1268
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
1269
|
+
details={
|
|
1270
|
+
'decision_type': 'heuristic',
|
|
1271
|
+
'all_files_exist': True,
|
|
1272
|
+
'run_report_missing': not run_report,
|
|
1273
|
+
'exit_code': None if not run_report else run_report.exit_code,
|
|
1274
|
+
'workflow_stage': 'post_regeneration_validation'
|
|
1275
|
+
}
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
# BUG 2: Verify not run yet (run_report exists, exit_code=0, but command != verify/test)
|
|
1279
|
+
if fingerprint and fingerprint.command not in ['verify', 'test', 'fix', 'update'] and not skip_verify:
|
|
1280
|
+
return SyncDecision(
|
|
1281
|
+
operation='verify',
|
|
1282
|
+
reason='All files exist but verification not completed',
|
|
1283
|
+
confidence=0.85,
|
|
1284
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1285
|
+
details={
|
|
1286
|
+
'decision_type': 'heuristic',
|
|
1287
|
+
'all_files_exist': True,
|
|
1288
|
+
'last_command': fingerprint.command,
|
|
1289
|
+
'workflow_stage': 'verification_pending'
|
|
1290
|
+
}
|
|
1291
|
+
)
|
|
1292
|
+
|
|
1293
|
+
# Stale run_report detected: _is_workflow_complete returned False but all other conditions passed
|
|
1294
|
+
# This happens when run_report.test_hash doesn't match current test file, or
|
|
1295
|
+
# when fingerprint timestamp > run_report timestamp (legacy detection)
|
|
1296
|
+
# Need to re-run tests to get accurate results
|
|
1297
|
+
if run_report and run_report.exit_code == 0:
|
|
1298
|
+
return SyncDecision(
|
|
1299
|
+
operation='test',
|
|
1300
|
+
reason='Run report is stale - need to re-run tests to verify current state',
|
|
1301
|
+
confidence=0.9,
|
|
1302
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1303
|
+
details={
|
|
1304
|
+
'decision_type': 'heuristic',
|
|
1305
|
+
'all_files_exist': True,
|
|
1306
|
+
'run_report_stale': True,
|
|
1307
|
+
'run_report_test_hash': run_report.test_hash,
|
|
1308
|
+
'workflow_stage': 'revalidation'
|
|
1309
|
+
}
|
|
1310
|
+
)
|
|
1311
|
+
|
|
943
1312
|
# Progress workflow considering skip flags
|
|
944
1313
|
if paths['code'].exists() and not paths['example'].exists():
|
|
945
1314
|
return SyncDecision(
|
|
@@ -960,8 +1329,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
960
1329
|
|
|
961
1330
|
# Check if example has been crash-tested and verified before allowing test generation
|
|
962
1331
|
run_report = read_run_report(basename, language)
|
|
963
|
-
if not run_report:
|
|
1332
|
+
if not run_report and not skip_verify:
|
|
964
1333
|
# No run report exists - need to test the example first
|
|
1334
|
+
# But if skip_verify is True, skip crash/verify and go to test generation
|
|
965
1335
|
return SyncDecision(
|
|
966
1336
|
operation='crash',
|
|
967
1337
|
reason='Example exists but needs runtime testing before test generation',
|
|
@@ -975,8 +1345,9 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
975
1345
|
'workflow_stage': 'crash_validation'
|
|
976
1346
|
}
|
|
977
1347
|
)
|
|
978
|
-
elif run_report.exit_code != 0:
|
|
1348
|
+
elif run_report and run_report.exit_code != 0 and not skip_verify:
|
|
979
1349
|
# Example crashed - fix it before proceeding
|
|
1350
|
+
# But if skip_verify is True, skip crash fix and proceed
|
|
980
1351
|
return SyncDecision(
|
|
981
1352
|
operation='crash',
|
|
982
1353
|
reason='Example crashes - fix runtime errors before test generation',
|
|
@@ -1023,6 +1394,22 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
1023
1394
|
# Some files are missing but no changes detected
|
|
1024
1395
|
if not paths['code'].exists():
|
|
1025
1396
|
if paths['prompt'].exists():
|
|
1397
|
+
# CRITICAL FIX: Check if auto-deps was just completed to prevent infinite loop
|
|
1398
|
+
if fingerprint and fingerprint.command == 'auto-deps':
|
|
1399
|
+
return SyncDecision(
|
|
1400
|
+
operation='generate',
|
|
1401
|
+
reason='Auto-deps completed, now generate missing code file',
|
|
1402
|
+
confidence=0.90,
|
|
1403
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1404
|
+
details={
|
|
1405
|
+
'decision_type': 'heuristic',
|
|
1406
|
+
'prompt_path': str(paths['prompt']),
|
|
1407
|
+
'code_exists': False,
|
|
1408
|
+
'auto_deps_completed': True,
|
|
1409
|
+
'previous_command': fingerprint.command
|
|
1410
|
+
}
|
|
1411
|
+
)
|
|
1412
|
+
|
|
1026
1413
|
prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
|
|
1027
1414
|
if check_for_dependencies(prompt_content):
|
|
1028
1415
|
return SyncDecision(
|
|
@@ -1124,18 +1511,69 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
1124
1511
|
)
|
|
1125
1512
|
|
|
1126
1513
|
else:
|
|
1127
|
-
# Complex Changes (Multiple Files Modified
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
'
|
|
1136
|
-
'
|
|
1137
|
-
|
|
1138
|
-
|
|
1514
|
+
# Complex Changes (Multiple Files Modified)
|
|
1515
|
+
# CRITICAL: Only treat as conflict if prompt changed along with derived artifacts
|
|
1516
|
+
# If only derived artifacts changed (code, example, test), this is NOT a conflict
|
|
1517
|
+
# per PDD doctrine - all are derived from the unchanged prompt
|
|
1518
|
+
|
|
1519
|
+
if 'prompt' in changes:
|
|
1520
|
+
# True conflict: prompt (source of truth) changed along with derived artifacts
|
|
1521
|
+
return SyncDecision(
|
|
1522
|
+
operation='analyze_conflict',
|
|
1523
|
+
reason='Prompt and derived files changed - requires conflict analysis',
|
|
1524
|
+
confidence=0.70,
|
|
1525
|
+
estimated_cost=estimate_operation_cost('analyze_conflict'),
|
|
1526
|
+
details={
|
|
1527
|
+
'decision_type': 'heuristic',
|
|
1528
|
+
'changed_files': changes,
|
|
1529
|
+
'num_changes': len(changes),
|
|
1530
|
+
'prompt_changed': True
|
|
1531
|
+
}
|
|
1532
|
+
)
|
|
1533
|
+
else:
|
|
1534
|
+
# Only derived artifacts changed - prompt (source of truth) is unchanged
|
|
1535
|
+
# Continue workflow from where it was interrupted
|
|
1536
|
+
|
|
1537
|
+
# If code changed, need to re-verify
|
|
1538
|
+
if 'code' in changes:
|
|
1539
|
+
return SyncDecision(
|
|
1540
|
+
operation='verify',
|
|
1541
|
+
reason='Derived files changed (prompt unchanged) - verify code works',
|
|
1542
|
+
confidence=0.85,
|
|
1543
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1544
|
+
details={
|
|
1545
|
+
'decision_type': 'heuristic',
|
|
1546
|
+
'changed_files': changes,
|
|
1547
|
+
'num_changes': len(changes),
|
|
1548
|
+
'prompt_changed': False,
|
|
1549
|
+
'workflow_stage': 'continue_after_interruption'
|
|
1550
|
+
}
|
|
1551
|
+
)
|
|
1552
|
+
# If only example/test changed
|
|
1553
|
+
elif 'example' in changes:
|
|
1554
|
+
return SyncDecision(
|
|
1555
|
+
operation='verify',
|
|
1556
|
+
reason='Example changed (prompt unchanged) - verify example runs',
|
|
1557
|
+
confidence=0.85,
|
|
1558
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1559
|
+
details={
|
|
1560
|
+
'decision_type': 'heuristic',
|
|
1561
|
+
'changed_files': changes,
|
|
1562
|
+
'prompt_changed': False
|
|
1563
|
+
}
|
|
1564
|
+
)
|
|
1565
|
+
elif 'test' in changes:
|
|
1566
|
+
return SyncDecision(
|
|
1567
|
+
operation='test',
|
|
1568
|
+
reason='Test changed (prompt unchanged) - run tests',
|
|
1569
|
+
confidence=0.85,
|
|
1570
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1571
|
+
details={
|
|
1572
|
+
'decision_type': 'heuristic',
|
|
1573
|
+
'changed_files': changes,
|
|
1574
|
+
'prompt_changed': False
|
|
1575
|
+
}
|
|
1576
|
+
)
|
|
1139
1577
|
|
|
1140
1578
|
# Fallback - should not reach here normally
|
|
1141
1579
|
return SyncDecision(
|
|
@@ -1152,7 +1590,14 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
1152
1590
|
)
|
|
1153
1591
|
|
|
1154
1592
|
|
|
1155
|
-
def analyze_conflict_with_llm(
|
|
1593
|
+
def analyze_conflict_with_llm(
|
|
1594
|
+
basename: str,
|
|
1595
|
+
language: str,
|
|
1596
|
+
fingerprint: Fingerprint,
|
|
1597
|
+
changed_files: List[str],
|
|
1598
|
+
prompts_dir: str = "prompts",
|
|
1599
|
+
context_override: Optional[str] = None,
|
|
1600
|
+
) -> SyncDecision:
|
|
1156
1601
|
"""
|
|
1157
1602
|
Resolve complex sync conflicts using an LLM.
|
|
1158
1603
|
|
|
@@ -1185,7 +1630,7 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
1185
1630
|
)
|
|
1186
1631
|
|
|
1187
1632
|
# 2. Gather file paths and diffs
|
|
1188
|
-
paths = get_pdd_file_paths(basename, language, prompts_dir)
|
|
1633
|
+
paths = get_pdd_file_paths(basename, language, prompts_dir, context_override=context_override)
|
|
1189
1634
|
|
|
1190
1635
|
# Generate diffs for changed files
|
|
1191
1636
|
diffs = {}
|