pdd-cli 0.0.42__py3-none-any.whl → 0.0.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -1
- pdd/cli.py +2 -2
- pdd/cmd_test_main.py +9 -0
- pdd/construct_paths.py +26 -2
- pdd/data/language_format.csv +1 -0
- pdd/data/llm_model.csv +2 -2
- pdd/fix_code_loop.py +2 -2
- pdd/fix_error_loop.py +5 -2
- pdd/fix_verification_errors_loop.py +14 -1
- pdd/fix_verification_main.py +29 -8
- pdd/get_jwt_token.py +39 -7
- pdd/increase_tests.py +7 -0
- pdd/llm_invoke.py +9 -7
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/fix_code_module_errors_LLM.prompt +13 -3
- pdd/pytest_output.py +72 -20
- pdd/python_env_detector.py +151 -0
- pdd/summarize_directory.py +7 -1
- pdd/sync_determine_operation.py +396 -109
- pdd/sync_main.py +1 -1
- pdd/sync_orchestration.py +448 -28
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.44.dist-info}/METADATA +4 -4
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.44.dist-info}/RECORD +27 -26
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.44.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.44.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.44.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.44.dist-info}/top_level.txt +0 -0
pdd/sync_determine_operation.py
CHANGED
|
@@ -56,7 +56,8 @@ LOCKS_DIR = get_locks_dir()
|
|
|
56
56
|
|
|
57
57
|
# Export constants for other modules
|
|
58
58
|
__all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
|
|
59
|
-
'sync_determine_operation', 'analyze_conflict_with_llm'
|
|
59
|
+
'sync_determine_operation', 'analyze_conflict_with_llm', 'read_run_report', 'get_pdd_file_paths',
|
|
60
|
+
'_check_example_success_history']
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
@dataclass
|
|
@@ -84,12 +85,12 @@ class RunReport:
|
|
|
84
85
|
@dataclass
|
|
85
86
|
class SyncDecision:
|
|
86
87
|
"""Represents a decision about what PDD operation to run next."""
|
|
87
|
-
operation: str # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing'
|
|
88
|
-
reason: str
|
|
89
|
-
|
|
90
|
-
estimated_cost: float = 0.0
|
|
91
|
-
|
|
92
|
-
prerequisites: List[str] =
|
|
88
|
+
operation: str # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing', 'all_synced', 'error', 'fail_and_request_manual_merge'
|
|
89
|
+
reason: str # A human-readable explanation for the decision
|
|
90
|
+
confidence: float = 1.0 # Confidence level in the decision, 0.0 to 1.0, default 1.0 for deterministic decisions
|
|
91
|
+
estimated_cost: float = 0.0 # Estimated cost for the operation in dollars, default 0.0
|
|
92
|
+
details: Optional[Dict[str, Any]] = None # Extra context for logging and debugging, default None
|
|
93
|
+
prerequisites: Optional[List[str]] = None # List of operations that should be completed first, default None
|
|
93
94
|
|
|
94
95
|
|
|
95
96
|
class SyncLock:
|
|
@@ -428,6 +429,26 @@ def get_git_diff(file_path: Path) -> str:
|
|
|
428
429
|
return ""
|
|
429
430
|
|
|
430
431
|
|
|
432
|
+
def estimate_operation_cost(operation: str, language: str = "python") -> float:
|
|
433
|
+
"""Returns estimated cost in dollars for each operation based on typical LLM usage."""
|
|
434
|
+
cost_map = {
|
|
435
|
+
'auto-deps': 0.10,
|
|
436
|
+
'generate': 0.50,
|
|
437
|
+
'example': 0.30,
|
|
438
|
+
'crash': 0.40,
|
|
439
|
+
'verify': 0.35,
|
|
440
|
+
'test': 0.60,
|
|
441
|
+
'fix': 0.45,
|
|
442
|
+
'update': 0.25,
|
|
443
|
+
'analyze_conflict': 0.20,
|
|
444
|
+
'nothing': 0.0,
|
|
445
|
+
'all_synced': 0.0,
|
|
446
|
+
'error': 0.0,
|
|
447
|
+
'fail_and_request_manual_merge': 0.0
|
|
448
|
+
}
|
|
449
|
+
return cost_map.get(operation, 0.0)
|
|
450
|
+
|
|
451
|
+
|
|
431
452
|
def validate_expected_files(fingerprint: Optional[Fingerprint], paths: Dict[str, Path]) -> Dict[str, bool]:
|
|
432
453
|
"""
|
|
433
454
|
Validate that files expected to exist based on fingerprint actually exist.
|
|
@@ -492,17 +513,27 @@ def _handle_missing_expected_files(
|
|
|
492
513
|
return SyncDecision(
|
|
493
514
|
operation='auto-deps',
|
|
494
515
|
reason='Code file missing, prompt has dependencies - regenerate from auto-deps',
|
|
495
|
-
|
|
496
|
-
estimated_cost=
|
|
497
|
-
|
|
516
|
+
confidence=1.0,
|
|
517
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
518
|
+
details={
|
|
519
|
+
'decision_type': 'heuristic',
|
|
520
|
+
'missing_files': missing_files,
|
|
521
|
+
'prompt_path': str(paths['prompt']),
|
|
522
|
+
'has_dependencies': True
|
|
523
|
+
}
|
|
498
524
|
)
|
|
499
525
|
else:
|
|
500
526
|
return SyncDecision(
|
|
501
527
|
operation='generate',
|
|
502
528
|
reason='Code file missing - regenerate from prompt',
|
|
503
|
-
|
|
504
|
-
estimated_cost=
|
|
505
|
-
|
|
529
|
+
confidence=1.0,
|
|
530
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
531
|
+
details={
|
|
532
|
+
'decision_type': 'heuristic',
|
|
533
|
+
'missing_files': missing_files,
|
|
534
|
+
'prompt_path': str(paths['prompt']),
|
|
535
|
+
'has_dependencies': False
|
|
536
|
+
}
|
|
506
537
|
)
|
|
507
538
|
|
|
508
539
|
elif 'example' in missing_files and paths['code'].exists():
|
|
@@ -510,9 +541,13 @@ def _handle_missing_expected_files(
|
|
|
510
541
|
return SyncDecision(
|
|
511
542
|
operation='example',
|
|
512
543
|
reason='Example file missing - regenerate example',
|
|
513
|
-
|
|
514
|
-
estimated_cost=
|
|
515
|
-
|
|
544
|
+
confidence=1.0,
|
|
545
|
+
estimated_cost=estimate_operation_cost('example'),
|
|
546
|
+
details={
|
|
547
|
+
'decision_type': 'heuristic',
|
|
548
|
+
'missing_files': missing_files,
|
|
549
|
+
'code_path': str(paths['code'])
|
|
550
|
+
}
|
|
516
551
|
)
|
|
517
552
|
|
|
518
553
|
elif 'test' in missing_files and paths['code'].exists() and paths['example'].exists():
|
|
@@ -522,26 +557,37 @@ def _handle_missing_expected_files(
|
|
|
522
557
|
return SyncDecision(
|
|
523
558
|
operation='nothing',
|
|
524
559
|
reason='Test file missing but --skip-tests specified - workflow complete',
|
|
525
|
-
|
|
526
|
-
estimated_cost=
|
|
527
|
-
|
|
560
|
+
confidence=1.0,
|
|
561
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
562
|
+
details={
|
|
563
|
+
'decision_type': 'heuristic',
|
|
564
|
+
'missing_files': missing_files,
|
|
565
|
+
'skip_tests': True
|
|
566
|
+
}
|
|
528
567
|
)
|
|
529
568
|
else:
|
|
530
569
|
return SyncDecision(
|
|
531
570
|
operation='test',
|
|
532
571
|
reason='Test file missing - regenerate tests',
|
|
533
|
-
|
|
534
|
-
estimated_cost=
|
|
535
|
-
|
|
572
|
+
confidence=1.0,
|
|
573
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
574
|
+
details={
|
|
575
|
+
'decision_type': 'heuristic',
|
|
576
|
+
'missing_files': missing_files,
|
|
577
|
+
'code_path': str(paths['code'])
|
|
578
|
+
}
|
|
536
579
|
)
|
|
537
580
|
|
|
538
581
|
# Fallback - regenerate everything
|
|
539
582
|
return SyncDecision(
|
|
540
583
|
operation='generate',
|
|
541
584
|
reason='Multiple files missing - regenerate from prompt',
|
|
542
|
-
|
|
543
|
-
estimated_cost=
|
|
544
|
-
|
|
585
|
+
confidence=1.0,
|
|
586
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
587
|
+
details={
|
|
588
|
+
'decision_type': 'heuristic',
|
|
589
|
+
'missing_files': missing_files
|
|
590
|
+
}
|
|
545
591
|
)
|
|
546
592
|
|
|
547
593
|
|
|
@@ -594,6 +640,59 @@ def check_for_dependencies(prompt_content: str) -> bool:
|
|
|
594
640
|
return has_xml_deps or has_explicit_deps
|
|
595
641
|
|
|
596
642
|
|
|
643
|
+
def _check_example_success_history(basename: str, language: str) -> bool:
|
|
644
|
+
"""
|
|
645
|
+
Check if the example has run successfully before by examining historical fingerprints and run reports.
|
|
646
|
+
|
|
647
|
+
Args:
|
|
648
|
+
basename: The base name for the PDD unit
|
|
649
|
+
language: The programming language
|
|
650
|
+
|
|
651
|
+
Returns:
|
|
652
|
+
True if the example has run successfully before, False otherwise
|
|
653
|
+
"""
|
|
654
|
+
meta_dir = get_meta_dir()
|
|
655
|
+
|
|
656
|
+
# Strategy 1: Check if there's a fingerprint with 'verify' command (indicates successful example run)
|
|
657
|
+
# Cache fingerprint and run report to avoid redundant I/O operations
|
|
658
|
+
fingerprint = read_fingerprint(basename, language)
|
|
659
|
+
current_run_report = read_run_report(basename, language)
|
|
660
|
+
|
|
661
|
+
# Strategy 1: Check if there's a fingerprint with 'verify' command (indicates successful example run)
|
|
662
|
+
if fingerprint and fingerprint.command == 'verify':
|
|
663
|
+
return True
|
|
664
|
+
|
|
665
|
+
# Strategy 2: Check current run report for successful runs (exit_code == 0)
|
|
666
|
+
# Note: We check the current run report for successful history since it's updated
|
|
667
|
+
# This allows for a simple check of recent success
|
|
668
|
+
if current_run_report and current_run_report.exit_code == 0:
|
|
669
|
+
return True
|
|
670
|
+
|
|
671
|
+
# Strategy 2b: Look for historical run reports with exit_code == 0
|
|
672
|
+
# Check all run report files in the meta directory that match the pattern
|
|
673
|
+
run_report_pattern = f"{basename}_{language}_run"
|
|
674
|
+
for file in meta_dir.glob(f"{run_report_pattern}*.json"):
|
|
675
|
+
try:
|
|
676
|
+
with open(file, 'r') as f:
|
|
677
|
+
data = json.load(f)
|
|
678
|
+
|
|
679
|
+
# If we find any historical run with exit_code == 0, the example has run successfully
|
|
680
|
+
if data.get('exit_code') == 0:
|
|
681
|
+
return True
|
|
682
|
+
except (json.JSONDecodeError, KeyError, IOError):
|
|
683
|
+
continue
|
|
684
|
+
|
|
685
|
+
# Strategy 3: Check if fingerprint has example_hash and was created after successful operations
|
|
686
|
+
# Commands that indicate example was working: 'example', 'verify', 'test', 'fix'
|
|
687
|
+
if fingerprint and fingerprint.example_hash:
|
|
688
|
+
successful_commands = {'example', 'verify', 'test', 'fix'}
|
|
689
|
+
if fingerprint.command in successful_commands:
|
|
690
|
+
# If the fingerprint was created after these commands, the example likely worked
|
|
691
|
+
return True
|
|
692
|
+
|
|
693
|
+
return False
|
|
694
|
+
|
|
695
|
+
|
|
597
696
|
def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
|
|
598
697
|
"""
|
|
599
698
|
Core decision-making function for sync operations with skip flag awareness.
|
|
@@ -658,29 +757,63 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
658
757
|
return SyncDecision(
|
|
659
758
|
operation='fix',
|
|
660
759
|
reason=f'Test failures detected: {run_report.tests_failed} failed tests',
|
|
661
|
-
|
|
662
|
-
estimated_cost=
|
|
663
|
-
|
|
760
|
+
confidence=0.90,
|
|
761
|
+
estimated_cost=estimate_operation_cost('fix'),
|
|
762
|
+
details={
|
|
763
|
+
'decision_type': 'heuristic',
|
|
764
|
+
'tests_failed': run_report.tests_failed,
|
|
765
|
+
'exit_code': run_report.exit_code,
|
|
766
|
+
'coverage': run_report.coverage
|
|
767
|
+
}
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
# Check if we just completed a crash operation and need verification
|
|
771
|
+
if fingerprint and fingerprint.command == 'crash' and not skip_verify:
|
|
772
|
+
return SyncDecision(
|
|
773
|
+
operation='verify',
|
|
774
|
+
reason='Previous crash operation completed - verify example runs correctly',
|
|
775
|
+
confidence=0.90,
|
|
776
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
777
|
+
details={
|
|
778
|
+
'decision_type': 'heuristic',
|
|
779
|
+
'previous_command': 'crash',
|
|
780
|
+
'current_exit_code': run_report.exit_code,
|
|
781
|
+
'fingerprint_command': fingerprint.command
|
|
782
|
+
}
|
|
664
783
|
)
|
|
665
784
|
|
|
666
785
|
# Then check for runtime crashes (only if no test failures)
|
|
667
786
|
if run_report.exit_code != 0:
|
|
668
|
-
#
|
|
669
|
-
|
|
787
|
+
# Context-aware decision: prefer 'fix' over 'crash' when example has run successfully before
|
|
788
|
+
has_example_run_successfully = _check_example_success_history(basename, language)
|
|
789
|
+
|
|
790
|
+
if has_example_run_successfully:
|
|
670
791
|
return SyncDecision(
|
|
671
|
-
operation='
|
|
672
|
-
reason='
|
|
673
|
-
|
|
674
|
-
estimated_cost=
|
|
675
|
-
|
|
792
|
+
operation='fix',
|
|
793
|
+
reason='Runtime error detected but example has run successfully before - prefer fix over crash',
|
|
794
|
+
confidence=0.90,
|
|
795
|
+
estimated_cost=estimate_operation_cost('fix'),
|
|
796
|
+
details={
|
|
797
|
+
'decision_type': 'heuristic',
|
|
798
|
+
'exit_code': run_report.exit_code,
|
|
799
|
+
'timestamp': run_report.timestamp,
|
|
800
|
+
'example_success_history': True,
|
|
801
|
+
'decision_rationale': 'prefer_fix_over_crash'
|
|
802
|
+
}
|
|
676
803
|
)
|
|
677
804
|
else:
|
|
678
805
|
return SyncDecision(
|
|
679
806
|
operation='crash',
|
|
680
|
-
reason='Runtime error detected in last run',
|
|
681
|
-
|
|
682
|
-
estimated_cost=
|
|
683
|
-
|
|
807
|
+
reason='Runtime error detected in last run - no successful example history',
|
|
808
|
+
confidence=0.95,
|
|
809
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
810
|
+
details={
|
|
811
|
+
'decision_type': 'heuristic',
|
|
812
|
+
'exit_code': run_report.exit_code,
|
|
813
|
+
'timestamp': run_report.timestamp,
|
|
814
|
+
'example_success_history': False,
|
|
815
|
+
'decision_rationale': 'crash_without_history'
|
|
816
|
+
}
|
|
684
817
|
)
|
|
685
818
|
|
|
686
819
|
if run_report.coverage < target_coverage:
|
|
@@ -690,17 +823,29 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
690
823
|
return SyncDecision(
|
|
691
824
|
operation='all_synced',
|
|
692
825
|
reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% but tests skipped',
|
|
693
|
-
|
|
694
|
-
estimated_cost=
|
|
695
|
-
|
|
826
|
+
confidence=0.90,
|
|
827
|
+
estimated_cost=estimate_operation_cost('all_synced'),
|
|
828
|
+
details={
|
|
829
|
+
'decision_type': 'heuristic',
|
|
830
|
+
'current_coverage': run_report.coverage,
|
|
831
|
+
'target_coverage': target_coverage,
|
|
832
|
+
'tests_skipped': True,
|
|
833
|
+
'skip_tests': True
|
|
834
|
+
}
|
|
696
835
|
)
|
|
697
836
|
else:
|
|
698
837
|
return SyncDecision(
|
|
699
838
|
operation='test',
|
|
700
839
|
reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}%',
|
|
701
|
-
|
|
702
|
-
estimated_cost=
|
|
703
|
-
|
|
840
|
+
confidence=0.85,
|
|
841
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
842
|
+
details={
|
|
843
|
+
'decision_type': 'heuristic',
|
|
844
|
+
'current_coverage': run_report.coverage,
|
|
845
|
+
'target_coverage': target_coverage,
|
|
846
|
+
'tests_passed': run_report.tests_passed,
|
|
847
|
+
'tests_failed': run_report.tests_failed
|
|
848
|
+
}
|
|
704
849
|
)
|
|
705
850
|
|
|
706
851
|
# 2. Analyze File State
|
|
@@ -716,25 +861,39 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
716
861
|
return SyncDecision(
|
|
717
862
|
operation='auto-deps',
|
|
718
863
|
reason='New prompt with dependencies detected',
|
|
719
|
-
|
|
720
|
-
estimated_cost=
|
|
721
|
-
|
|
864
|
+
confidence=0.80,
|
|
865
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
866
|
+
details={
|
|
867
|
+
'decision_type': 'heuristic',
|
|
868
|
+
'prompt_path': str(paths['prompt']),
|
|
869
|
+
'fingerprint_found': False,
|
|
870
|
+
'has_dependencies': True
|
|
871
|
+
}
|
|
722
872
|
)
|
|
723
873
|
else:
|
|
724
874
|
return SyncDecision(
|
|
725
875
|
operation='generate',
|
|
726
876
|
reason='New prompt ready for code generation',
|
|
727
|
-
|
|
728
|
-
estimated_cost=
|
|
729
|
-
|
|
877
|
+
confidence=0.90,
|
|
878
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
879
|
+
details={
|
|
880
|
+
'decision_type': 'heuristic',
|
|
881
|
+
'prompt_path': str(paths['prompt']),
|
|
882
|
+
'fingerprint_found': False,
|
|
883
|
+
'has_dependencies': False
|
|
884
|
+
}
|
|
730
885
|
)
|
|
731
886
|
else:
|
|
732
887
|
return SyncDecision(
|
|
733
888
|
operation='nothing',
|
|
734
889
|
reason='No prompt file and no history - nothing to do',
|
|
735
|
-
|
|
736
|
-
estimated_cost=
|
|
737
|
-
|
|
890
|
+
confidence=1.0,
|
|
891
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
892
|
+
details={
|
|
893
|
+
'decision_type': 'heuristic',
|
|
894
|
+
'prompt_exists': False,
|
|
895
|
+
'fingerprint_found': False
|
|
896
|
+
}
|
|
738
897
|
)
|
|
739
898
|
|
|
740
899
|
# CRITICAL FIX: Validate expected files exist before hash comparison
|
|
@@ -771,9 +930,14 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
771
930
|
return SyncDecision(
|
|
772
931
|
operation='nothing',
|
|
773
932
|
reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
|
|
774
|
-
|
|
775
|
-
estimated_cost=
|
|
776
|
-
|
|
933
|
+
confidence=1.0,
|
|
934
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
935
|
+
details={
|
|
936
|
+
'decision_type': 'heuristic',
|
|
937
|
+
'skip_tests': skip_tests,
|
|
938
|
+
'skip_verify': skip_verify,
|
|
939
|
+
'workflow_complete': True
|
|
940
|
+
}
|
|
777
941
|
)
|
|
778
942
|
|
|
779
943
|
# Progress workflow considering skip flags
|
|
@@ -781,20 +945,80 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
781
945
|
return SyncDecision(
|
|
782
946
|
operation='example',
|
|
783
947
|
reason='Code exists but example missing - progress workflow',
|
|
784
|
-
|
|
785
|
-
estimated_cost=
|
|
786
|
-
|
|
948
|
+
confidence=0.85,
|
|
949
|
+
estimated_cost=estimate_operation_cost('example'),
|
|
950
|
+
details={
|
|
951
|
+
'decision_type': 'heuristic',
|
|
952
|
+
'code_path': str(paths['code']),
|
|
953
|
+
'code_exists': True,
|
|
954
|
+
'example_exists': False
|
|
955
|
+
}
|
|
787
956
|
)
|
|
788
957
|
|
|
789
958
|
if (paths['code'].exists() and paths['example'].exists() and
|
|
790
959
|
not skip_tests and not paths['test'].exists()):
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
960
|
+
|
|
961
|
+
# Check if example has been crash-tested and verified before allowing test generation
|
|
962
|
+
run_report = read_run_report(basename, language)
|
|
963
|
+
if not run_report:
|
|
964
|
+
# No run report exists - need to test the example first
|
|
965
|
+
return SyncDecision(
|
|
966
|
+
operation='crash',
|
|
967
|
+
reason='Example exists but needs runtime testing before test generation',
|
|
968
|
+
confidence=0.85,
|
|
969
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
970
|
+
details={
|
|
971
|
+
'decision_type': 'heuristic',
|
|
972
|
+
'code_path': str(paths['code']),
|
|
973
|
+
'example_path': str(paths['example']),
|
|
974
|
+
'no_run_report': True,
|
|
975
|
+
'workflow_stage': 'crash_validation'
|
|
976
|
+
}
|
|
977
|
+
)
|
|
978
|
+
elif run_report.exit_code != 0:
|
|
979
|
+
# Example crashed - fix it before proceeding
|
|
980
|
+
return SyncDecision(
|
|
981
|
+
operation='crash',
|
|
982
|
+
reason='Example crashes - fix runtime errors before test generation',
|
|
983
|
+
confidence=0.90,
|
|
984
|
+
estimated_cost=estimate_operation_cost('crash'),
|
|
985
|
+
details={
|
|
986
|
+
'decision_type': 'heuristic',
|
|
987
|
+
'exit_code': run_report.exit_code,
|
|
988
|
+
'workflow_stage': 'crash_fix'
|
|
989
|
+
}
|
|
990
|
+
)
|
|
991
|
+
elif fingerprint and fingerprint.command != 'verify' and not skip_verify:
|
|
992
|
+
# Example runs but hasn't been verified yet
|
|
993
|
+
return SyncDecision(
|
|
994
|
+
operation='verify',
|
|
995
|
+
reason='Example runs but needs verification before test generation',
|
|
996
|
+
confidence=0.85,
|
|
997
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
998
|
+
details={
|
|
999
|
+
'decision_type': 'heuristic',
|
|
1000
|
+
'exit_code': run_report.exit_code,
|
|
1001
|
+
'last_command': fingerprint.command,
|
|
1002
|
+
'workflow_stage': 'verify_validation'
|
|
1003
|
+
}
|
|
1004
|
+
)
|
|
1005
|
+
else:
|
|
1006
|
+
# Example runs and is verified (or verify is skipped) - now safe to generate tests
|
|
1007
|
+
return SyncDecision(
|
|
1008
|
+
operation='test',
|
|
1009
|
+
reason='Example validated - ready for test generation',
|
|
1010
|
+
confidence=0.85,
|
|
1011
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1012
|
+
details={
|
|
1013
|
+
'decision_type': 'heuristic',
|
|
1014
|
+
'code_path': str(paths['code']),
|
|
1015
|
+
'example_path': str(paths['example']),
|
|
1016
|
+
'code_exists': True,
|
|
1017
|
+
'example_exists': True,
|
|
1018
|
+
'test_exists': False,
|
|
1019
|
+
'workflow_stage': 'test_generation'
|
|
1020
|
+
}
|
|
1021
|
+
)
|
|
798
1022
|
|
|
799
1023
|
# Some files are missing but no changes detected
|
|
800
1024
|
if not paths['code'].exists():
|
|
@@ -804,17 +1028,27 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
804
1028
|
return SyncDecision(
|
|
805
1029
|
operation='auto-deps',
|
|
806
1030
|
reason='Missing code file, prompt has dependencies',
|
|
807
|
-
|
|
808
|
-
estimated_cost=
|
|
809
|
-
|
|
1031
|
+
confidence=0.80,
|
|
1032
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
1033
|
+
details={
|
|
1034
|
+
'decision_type': 'heuristic',
|
|
1035
|
+
'prompt_path': str(paths['prompt']),
|
|
1036
|
+
'code_exists': False,
|
|
1037
|
+
'has_dependencies': True
|
|
1038
|
+
}
|
|
810
1039
|
)
|
|
811
1040
|
else:
|
|
812
1041
|
return SyncDecision(
|
|
813
1042
|
operation='generate',
|
|
814
1043
|
reason='Missing code file - generate from prompt',
|
|
815
|
-
|
|
816
|
-
estimated_cost=
|
|
817
|
-
|
|
1044
|
+
confidence=0.90,
|
|
1045
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1046
|
+
details={
|
|
1047
|
+
'decision_type': 'heuristic',
|
|
1048
|
+
'prompt_path': str(paths['prompt']),
|
|
1049
|
+
'code_exists': False,
|
|
1050
|
+
'has_dependencies': False
|
|
1051
|
+
}
|
|
818
1052
|
)
|
|
819
1053
|
|
|
820
1054
|
elif len(changes) == 1:
|
|
@@ -827,44 +1061,66 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
827
1061
|
return SyncDecision(
|
|
828
1062
|
operation='auto-deps',
|
|
829
1063
|
reason='Prompt changed and dependencies need updating',
|
|
830
|
-
|
|
831
|
-
estimated_cost=
|
|
832
|
-
|
|
1064
|
+
confidence=0.85,
|
|
1065
|
+
estimated_cost=estimate_operation_cost('auto-deps'),
|
|
1066
|
+
details={
|
|
1067
|
+
'decision_type': 'heuristic',
|
|
1068
|
+
'changed_file': 'prompt',
|
|
1069
|
+
'has_dependencies': True,
|
|
1070
|
+
'prompt_changed': True
|
|
1071
|
+
}
|
|
833
1072
|
)
|
|
834
1073
|
else:
|
|
835
1074
|
return SyncDecision(
|
|
836
1075
|
operation='generate',
|
|
837
1076
|
reason='Prompt changed - regenerate code',
|
|
838
|
-
|
|
839
|
-
estimated_cost=
|
|
840
|
-
|
|
1077
|
+
confidence=0.90,
|
|
1078
|
+
estimated_cost=estimate_operation_cost('generate'),
|
|
1079
|
+
details={
|
|
1080
|
+
'decision_type': 'heuristic',
|
|
1081
|
+
'changed_file': 'prompt',
|
|
1082
|
+
'has_dependencies': False,
|
|
1083
|
+
'prompt_changed': True
|
|
1084
|
+
}
|
|
841
1085
|
)
|
|
842
1086
|
|
|
843
1087
|
elif change == 'code':
|
|
844
1088
|
return SyncDecision(
|
|
845
1089
|
operation='update',
|
|
846
1090
|
reason='Code changed - update prompt to reflect changes',
|
|
847
|
-
|
|
848
|
-
estimated_cost=
|
|
849
|
-
|
|
1091
|
+
confidence=0.85,
|
|
1092
|
+
estimated_cost=estimate_operation_cost('update'),
|
|
1093
|
+
details={
|
|
1094
|
+
'decision_type': 'heuristic',
|
|
1095
|
+
'changed_file': 'code',
|
|
1096
|
+
'code_changed': True
|
|
1097
|
+
}
|
|
850
1098
|
)
|
|
851
1099
|
|
|
852
1100
|
elif change == 'test':
|
|
853
1101
|
return SyncDecision(
|
|
854
1102
|
operation='test',
|
|
855
1103
|
reason='Test changed - run new tests',
|
|
856
|
-
|
|
857
|
-
estimated_cost=
|
|
858
|
-
|
|
1104
|
+
confidence=0.80,
|
|
1105
|
+
estimated_cost=estimate_operation_cost('test'),
|
|
1106
|
+
details={
|
|
1107
|
+
'decision_type': 'heuristic',
|
|
1108
|
+
'changed_file': 'test',
|
|
1109
|
+
'test_changed': True
|
|
1110
|
+
}
|
|
859
1111
|
)
|
|
860
1112
|
|
|
861
1113
|
elif change == 'example':
|
|
862
1114
|
return SyncDecision(
|
|
863
1115
|
operation='verify',
|
|
864
1116
|
reason='Example changed - verify new example',
|
|
865
|
-
|
|
866
|
-
estimated_cost=
|
|
867
|
-
|
|
1117
|
+
confidence=0.80,
|
|
1118
|
+
estimated_cost=estimate_operation_cost('verify'),
|
|
1119
|
+
details={
|
|
1120
|
+
'decision_type': 'heuristic',
|
|
1121
|
+
'changed_file': 'example',
|
|
1122
|
+
'example_changed': True
|
|
1123
|
+
}
|
|
868
1124
|
)
|
|
869
1125
|
|
|
870
1126
|
else:
|
|
@@ -872,18 +1128,27 @@ def _perform_sync_analysis(basename: str, language: str, target_coverage: float,
|
|
|
872
1128
|
return SyncDecision(
|
|
873
1129
|
operation='analyze_conflict',
|
|
874
1130
|
reason='Multiple files changed - requires conflict analysis',
|
|
875
|
-
|
|
876
|
-
estimated_cost=
|
|
877
|
-
|
|
1131
|
+
confidence=0.70,
|
|
1132
|
+
estimated_cost=estimate_operation_cost('analyze_conflict'),
|
|
1133
|
+
details={
|
|
1134
|
+
'decision_type': 'heuristic',
|
|
1135
|
+
'changed_files': changes,
|
|
1136
|
+
'num_changes': len(changes)
|
|
1137
|
+
}
|
|
878
1138
|
)
|
|
879
1139
|
|
|
880
1140
|
# Fallback - should not reach here normally
|
|
881
1141
|
return SyncDecision(
|
|
882
1142
|
operation='nothing',
|
|
883
1143
|
reason='No clear operation determined',
|
|
884
|
-
|
|
885
|
-
estimated_cost=
|
|
886
|
-
|
|
1144
|
+
confidence=0.50,
|
|
1145
|
+
estimated_cost=estimate_operation_cost('nothing'),
|
|
1146
|
+
details={
|
|
1147
|
+
'decision_type': 'heuristic',
|
|
1148
|
+
'fingerprint_exists': fingerprint is not None,
|
|
1149
|
+
'changes': changes,
|
|
1150
|
+
'fallback': True
|
|
1151
|
+
}
|
|
887
1152
|
)
|
|
888
1153
|
|
|
889
1154
|
|
|
@@ -910,9 +1175,13 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
910
1175
|
return SyncDecision(
|
|
911
1176
|
operation='fail_and_request_manual_merge',
|
|
912
1177
|
reason='LLM analysis template not found - manual merge required',
|
|
913
|
-
|
|
914
|
-
estimated_cost=
|
|
915
|
-
|
|
1178
|
+
confidence=0.0,
|
|
1179
|
+
estimated_cost=estimate_operation_cost('fail_and_request_manual_merge'),
|
|
1180
|
+
details={
|
|
1181
|
+
'decision_type': 'llm',
|
|
1182
|
+
'error': 'Template not available',
|
|
1183
|
+
'changed_files': changed_files
|
|
1184
|
+
}
|
|
916
1185
|
)
|
|
917
1186
|
|
|
918
1187
|
# 2. Gather file paths and diffs
|
|
@@ -974,9 +1243,14 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
974
1243
|
return SyncDecision(
|
|
975
1244
|
operation='fail_and_request_manual_merge',
|
|
976
1245
|
reason=f'LLM confidence too low ({confidence:.2f}) - manual merge required',
|
|
977
|
-
|
|
1246
|
+
confidence=confidence,
|
|
978
1247
|
estimated_cost=response.get('cost', 0.0),
|
|
979
|
-
|
|
1248
|
+
details={
|
|
1249
|
+
'decision_type': 'llm',
|
|
1250
|
+
'llm_response': llm_result,
|
|
1251
|
+
'changed_files': changed_files,
|
|
1252
|
+
'confidence_threshold': 0.75
|
|
1253
|
+
}
|
|
980
1254
|
)
|
|
981
1255
|
|
|
982
1256
|
# Extract operation and details
|
|
@@ -988,14 +1262,15 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
988
1262
|
return SyncDecision(
|
|
989
1263
|
operation=operation,
|
|
990
1264
|
reason=f"LLM analysis: {reason}",
|
|
1265
|
+
confidence=confidence,
|
|
1266
|
+
estimated_cost=response.get('cost', 0.0),
|
|
991
1267
|
details={
|
|
1268
|
+
'decision_type': 'llm',
|
|
992
1269
|
'llm_response': llm_result,
|
|
993
1270
|
'changed_files': changed_files,
|
|
994
1271
|
'merge_strategy': merge_strategy,
|
|
995
1272
|
'follow_up_operations': follow_up_operations
|
|
996
1273
|
},
|
|
997
|
-
estimated_cost=response.get('cost', 0.0),
|
|
998
|
-
confidence=confidence,
|
|
999
1274
|
prerequisites=follow_up_operations
|
|
1000
1275
|
)
|
|
1001
1276
|
|
|
@@ -1004,9 +1279,15 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
1004
1279
|
return SyncDecision(
|
|
1005
1280
|
operation='fail_and_request_manual_merge',
|
|
1006
1281
|
reason=f'Invalid LLM response: {e} - manual merge required',
|
|
1007
|
-
|
|
1282
|
+
confidence=0.0,
|
|
1008
1283
|
estimated_cost=response.get('cost', 0.0),
|
|
1009
|
-
|
|
1284
|
+
details={
|
|
1285
|
+
'decision_type': 'llm',
|
|
1286
|
+
'error': str(e),
|
|
1287
|
+
'raw_response': response.get('result', ''),
|
|
1288
|
+
'changed_files': changed_files,
|
|
1289
|
+
'llm_error': True
|
|
1290
|
+
}
|
|
1010
1291
|
)
|
|
1011
1292
|
|
|
1012
1293
|
except Exception as e:
|
|
@@ -1014,22 +1295,28 @@ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerp
|
|
|
1014
1295
|
return SyncDecision(
|
|
1015
1296
|
operation='fail_and_request_manual_merge',
|
|
1016
1297
|
reason=f'Error during LLM analysis: {e} - manual merge required',
|
|
1017
|
-
|
|
1018
|
-
estimated_cost=
|
|
1019
|
-
|
|
1298
|
+
confidence=0.0,
|
|
1299
|
+
estimated_cost=estimate_operation_cost('fail_and_request_manual_merge'),
|
|
1300
|
+
details={
|
|
1301
|
+
'decision_type': 'llm',
|
|
1302
|
+
'error': str(e),
|
|
1303
|
+
'changed_files': changed_files,
|
|
1304
|
+
'llm_error': True
|
|
1305
|
+
}
|
|
1020
1306
|
)
|
|
1021
1307
|
|
|
1022
1308
|
|
|
1023
1309
|
if __name__ == "__main__":
|
|
1024
1310
|
# Example usage
|
|
1025
|
-
if len(sys.argv)
|
|
1026
|
-
print("Usage: python sync_determine_operation.py <basename> <language>")
|
|
1311
|
+
if len(sys.argv) < 3 or len(sys.argv) > 4:
|
|
1312
|
+
print("Usage: python sync_determine_operation.py <basename> <language> [target_coverage]")
|
|
1027
1313
|
sys.exit(1)
|
|
1028
1314
|
|
|
1029
1315
|
basename = sys.argv[1]
|
|
1030
1316
|
language = sys.argv[2]
|
|
1317
|
+
target_coverage = float(sys.argv[3]) if len(sys.argv) == 4 else 90.0
|
|
1031
1318
|
|
|
1032
|
-
decision = sync_determine_operation(basename, language, target_coverage
|
|
1319
|
+
decision = sync_determine_operation(basename, language, target_coverage)
|
|
1033
1320
|
|
|
1034
1321
|
print(f"Operation: {decision.operation}")
|
|
1035
1322
|
print(f"Reason: {decision.reason}")
|