PyPI - empathy-framework - Versions diffs - 4.6.6__py3-none-any.whl → 4.7.1__py3-none-any.whl - Mend

empathy-framework 4.6.6py3-none-any.whl → 4.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (273) hide show

empathy_framework-4.7.1.dist-info/METADATA +690 -0
empathy_framework-4.7.1.dist-info/RECORD +379 -0
{empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/top_level.txt +1 -2
empathy_healthcare_plugin/monitors/monitoring/__init__.py +9 -9
empathy_llm_toolkit/agent_factory/__init__.py +6 -6
empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +7 -10
empathy_llm_toolkit/agents_md/__init__.py +22 -0
empathy_llm_toolkit/agents_md/loader.py +218 -0
empathy_llm_toolkit/agents_md/parser.py +271 -0
empathy_llm_toolkit/agents_md/registry.py +307 -0
empathy_llm_toolkit/commands/__init__.py +51 -0
empathy_llm_toolkit/commands/context.py +375 -0
empathy_llm_toolkit/commands/loader.py +301 -0
empathy_llm_toolkit/commands/models.py +231 -0
empathy_llm_toolkit/commands/parser.py +371 -0
empathy_llm_toolkit/commands/registry.py +429 -0
empathy_llm_toolkit/config/__init__.py +8 -8
empathy_llm_toolkit/config/unified.py +3 -7
empathy_llm_toolkit/context/__init__.py +22 -0
empathy_llm_toolkit/context/compaction.py +455 -0
empathy_llm_toolkit/context/manager.py +434 -0
empathy_llm_toolkit/hooks/__init__.py +24 -0
empathy_llm_toolkit/hooks/config.py +306 -0
empathy_llm_toolkit/hooks/executor.py +289 -0
empathy_llm_toolkit/hooks/registry.py +302 -0
empathy_llm_toolkit/hooks/scripts/__init__.py +39 -0
empathy_llm_toolkit/hooks/scripts/evaluate_session.py +201 -0
empathy_llm_toolkit/hooks/scripts/first_time_init.py +285 -0
empathy_llm_toolkit/hooks/scripts/pre_compact.py +207 -0
empathy_llm_toolkit/hooks/scripts/session_end.py +183 -0
empathy_llm_toolkit/hooks/scripts/session_start.py +163 -0
empathy_llm_toolkit/hooks/scripts/suggest_compact.py +225 -0
empathy_llm_toolkit/learning/__init__.py +30 -0
empathy_llm_toolkit/learning/evaluator.py +438 -0
empathy_llm_toolkit/learning/extractor.py +514 -0
empathy_llm_toolkit/learning/storage.py +560 -0
empathy_llm_toolkit/providers.py +4 -11
empathy_llm_toolkit/security/__init__.py +17 -17
empathy_llm_toolkit/utils/tokens.py +2 -5
empathy_os/__init__.py +202 -70
empathy_os/cache_monitor.py +5 -3
empathy_os/cli/__init__.py +11 -55
empathy_os/cli/__main__.py +29 -15
empathy_os/cli/commands/inspection.py +21 -12
empathy_os/cli/commands/memory.py +4 -12
empathy_os/cli/commands/profiling.py +198 -0
empathy_os/cli/commands/utilities.py +27 -7
empathy_os/cli.py +28 -57
empathy_os/cli_unified.py +525 -1164
empathy_os/cost_tracker.py +9 -3
empathy_os/dashboard/server.py +200 -2
empathy_os/hot_reload/__init__.py +7 -7
empathy_os/hot_reload/config.py +6 -7
empathy_os/hot_reload/integration.py +35 -35
empathy_os/hot_reload/reloader.py +57 -57
empathy_os/hot_reload/watcher.py +28 -28
empathy_os/hot_reload/websocket.py +2 -2
empathy_os/memory/__init__.py +11 -4
empathy_os/memory/claude_memory.py +1 -1
empathy_os/memory/cross_session.py +8 -12
empathy_os/memory/edges.py +6 -6
empathy_os/memory/file_session.py +770 -0
empathy_os/memory/graph.py +30 -30
empathy_os/memory/nodes.py +6 -6
empathy_os/memory/short_term.py +15 -9
empathy_os/memory/unified.py +606 -140
empathy_os/meta_workflows/agent_creator.py +3 -9
empathy_os/meta_workflows/cli_meta_workflows.py +113 -53
empathy_os/meta_workflows/form_engine.py +6 -18
empathy_os/meta_workflows/intent_detector.py +64 -24
empathy_os/meta_workflows/models.py +3 -1
empathy_os/meta_workflows/pattern_learner.py +13 -31
empathy_os/meta_workflows/plan_generator.py +55 -47
empathy_os/meta_workflows/session_context.py +2 -3
empathy_os/meta_workflows/workflow.py +20 -51
empathy_os/models/cli.py +2 -2
empathy_os/models/tasks.py +1 -2
empathy_os/models/telemetry.py +4 -1
empathy_os/models/token_estimator.py +3 -1
empathy_os/monitoring/alerts.py +938 -9
empathy_os/monitoring/alerts_cli.py +346 -183
empathy_os/orchestration/execution_strategies.py +12 -29
empathy_os/orchestration/pattern_learner.py +20 -26
empathy_os/orchestration/real_tools.py +6 -15
empathy_os/platform_utils.py +2 -1
empathy_os/plugins/__init__.py +2 -2
empathy_os/plugins/base.py +64 -64
empathy_os/plugins/registry.py +32 -32
empathy_os/project_index/index.py +49 -15
empathy_os/project_index/models.py +1 -2
empathy_os/project_index/reports.py +1 -1
empathy_os/project_index/scanner.py +1 -0
empathy_os/redis_memory.py +10 -7
empathy_os/resilience/__init__.py +1 -1
empathy_os/resilience/health.py +10 -10
empathy_os/routing/__init__.py +7 -7
empathy_os/routing/chain_executor.py +37 -37
empathy_os/routing/classifier.py +36 -36
empathy_os/routing/smart_router.py +40 -40
empathy_os/routing/{wizard_registry.py → workflow_registry.py} +47 -47
empathy_os/scaffolding/__init__.py +8 -8
empathy_os/scaffolding/__main__.py +1 -1
empathy_os/scaffolding/cli.py +28 -28
empathy_os/socratic/__init__.py +3 -19
empathy_os/socratic/ab_testing.py +25 -36
empathy_os/socratic/blueprint.py +38 -38
empathy_os/socratic/cli.py +34 -20
empathy_os/socratic/collaboration.py +30 -28
empathy_os/socratic/domain_templates.py +9 -1
empathy_os/socratic/embeddings.py +17 -13
empathy_os/socratic/engine.py +135 -70
empathy_os/socratic/explainer.py +70 -60
empathy_os/socratic/feedback.py +24 -19
empathy_os/socratic/forms.py +15 -10
empathy_os/socratic/generator.py +51 -35
empathy_os/socratic/llm_analyzer.py +25 -23
empathy_os/socratic/mcp_server.py +99 -159
empathy_os/socratic/session.py +19 -13
empathy_os/socratic/storage.py +98 -67
empathy_os/socratic/success.py +38 -27
empathy_os/socratic/visual_editor.py +51 -39
empathy_os/socratic/web_ui.py +99 -66
empathy_os/telemetry/cli.py +3 -1
empathy_os/telemetry/usage_tracker.py +1 -3
empathy_os/test_generator/__init__.py +3 -3
empathy_os/test_generator/cli.py +28 -28
empathy_os/test_generator/generator.py +64 -66
empathy_os/test_generator/risk_analyzer.py +11 -11
empathy_os/vscode_bridge 2.py +173 -0
empathy_os/vscode_bridge.py +173 -0
empathy_os/workflows/__init__.py +212 -120
empathy_os/workflows/batch_processing.py +8 -24
empathy_os/workflows/bug_predict.py +1 -1
empathy_os/workflows/code_review.py +20 -5
empathy_os/workflows/code_review_pipeline.py +13 -8
empathy_os/workflows/keyboard_shortcuts/workflow.py +6 -2
empathy_os/workflows/manage_documentation.py +1 -0
empathy_os/workflows/orchestrated_health_check.py +6 -11
empathy_os/workflows/orchestrated_release_prep.py +3 -3
empathy_os/workflows/pr_review.py +18 -10
empathy_os/workflows/progressive/README 2.md +454 -0
empathy_os/workflows/progressive/__init__ 2.py +92 -0
empathy_os/workflows/progressive/__init__.py +2 -12
empathy_os/workflows/progressive/cli 2.py +242 -0
empathy_os/workflows/progressive/cli.py +14 -37
empathy_os/workflows/progressive/core 2.py +488 -0
empathy_os/workflows/progressive/core.py +12 -12
empathy_os/workflows/progressive/orchestrator 2.py +701 -0
empathy_os/workflows/progressive/orchestrator.py +166 -144
empathy_os/workflows/progressive/reports 2.py +528 -0
empathy_os/workflows/progressive/reports.py +22 -31
empathy_os/workflows/progressive/telemetry 2.py +280 -0
empathy_os/workflows/progressive/telemetry.py +8 -14
empathy_os/workflows/progressive/test_gen 2.py +514 -0
empathy_os/workflows/progressive/test_gen.py +29 -48
empathy_os/workflows/progressive/workflow 2.py +628 -0
empathy_os/workflows/progressive/workflow.py +31 -70
empathy_os/workflows/release_prep.py +21 -6
empathy_os/workflows/release_prep_crew.py +1 -0
empathy_os/workflows/secure_release.py +13 -6
empathy_os/workflows/security_audit.py +8 -3
empathy_os/workflows/test_coverage_boost_crew.py +3 -2
empathy_os/workflows/test_maintenance_crew.py +1 -0
empathy_os/workflows/test_runner.py +16 -12
empathy_software_plugin/SOFTWARE_PLUGIN_README.md +25 -703
empathy_software_plugin/cli.py +0 -122
patterns/README.md +119 -0
patterns/__init__.py +95 -0
patterns/behavior.py +298 -0
patterns/code_review_memory.json +441 -0
patterns/core.py +97 -0
patterns/debugging.json +3763 -0
patterns/empathy.py +268 -0
patterns/health_check_memory.json +505 -0
patterns/input.py +161 -0
patterns/memory_graph.json +8 -0
patterns/refactoring_memory.json +1113 -0
patterns/registry.py +663 -0
patterns/security_memory.json +8 -0
patterns/structural.py +415 -0
patterns/validation.py +194 -0
coach_wizards/__init__.py +0 -45
coach_wizards/accessibility_wizard.py +0 -91
coach_wizards/api_wizard.py +0 -91
coach_wizards/base_wizard.py +0 -209
coach_wizards/cicd_wizard.py +0 -91
coach_wizards/code_reviewer_README.md +0 -60
coach_wizards/code_reviewer_wizard.py +0 -180
coach_wizards/compliance_wizard.py +0 -91
coach_wizards/database_wizard.py +0 -91
coach_wizards/debugging_wizard.py +0 -91
coach_wizards/documentation_wizard.py +0 -91
coach_wizards/generate_wizards.py +0 -347
coach_wizards/localization_wizard.py +0 -173
coach_wizards/migration_wizard.py +0 -91
coach_wizards/monitoring_wizard.py +0 -91
coach_wizards/observability_wizard.py +0 -91
coach_wizards/performance_wizard.py +0 -91
coach_wizards/prompt_engineering_wizard.py +0 -661
coach_wizards/refactoring_wizard.py +0 -91
coach_wizards/scaling_wizard.py +0 -90
coach_wizards/security_wizard.py +0 -92
coach_wizards/testing_wizard.py +0 -91
empathy_framework-4.6.6.dist-info/METADATA +0 -1597
empathy_framework-4.6.6.dist-info/RECORD +0 -410
empathy_llm_toolkit/wizards/__init__.py +0 -43
empathy_llm_toolkit/wizards/base_wizard.py +0 -364
empathy_llm_toolkit/wizards/customer_support_wizard.py +0 -190
empathy_llm_toolkit/wizards/healthcare_wizard.py +0 -378
empathy_llm_toolkit/wizards/patient_assessment_README.md +0 -64
empathy_llm_toolkit/wizards/patient_assessment_wizard.py +0 -193
empathy_llm_toolkit/wizards/technology_wizard.py +0 -209
empathy_os/wizard_factory_cli.py +0 -170
empathy_software_plugin/wizards/__init__.py +0 -42
empathy_software_plugin/wizards/advanced_debugging_wizard.py +0 -395
empathy_software_plugin/wizards/agent_orchestration_wizard.py +0 -511
empathy_software_plugin/wizards/ai_collaboration_wizard.py +0 -503
empathy_software_plugin/wizards/ai_context_wizard.py +0 -441
empathy_software_plugin/wizards/ai_documentation_wizard.py +0 -503
empathy_software_plugin/wizards/base_wizard.py +0 -288
empathy_software_plugin/wizards/book_chapter_wizard.py +0 -519
empathy_software_plugin/wizards/code_review_wizard.py +0 -604
empathy_software_plugin/wizards/debugging/__init__.py +0 -50
empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +0 -414
empathy_software_plugin/wizards/debugging/config_loaders.py +0 -446
empathy_software_plugin/wizards/debugging/fix_applier.py +0 -469
empathy_software_plugin/wizards/debugging/language_patterns.py +0 -385
empathy_software_plugin/wizards/debugging/linter_parsers.py +0 -470
empathy_software_plugin/wizards/debugging/verification.py +0 -369
empathy_software_plugin/wizards/enhanced_testing_wizard.py +0 -537
empathy_software_plugin/wizards/memory_enhanced_debugging_wizard.py +0 -816
empathy_software_plugin/wizards/multi_model_wizard.py +0 -501
empathy_software_plugin/wizards/pattern_extraction_wizard.py +0 -422
empathy_software_plugin/wizards/pattern_retriever_wizard.py +0 -400
empathy_software_plugin/wizards/performance/__init__.py +0 -9
empathy_software_plugin/wizards/performance/bottleneck_detector.py +0 -221
empathy_software_plugin/wizards/performance/profiler_parsers.py +0 -278
empathy_software_plugin/wizards/performance/trajectory_analyzer.py +0 -429
empathy_software_plugin/wizards/performance_profiling_wizard.py +0 -305
empathy_software_plugin/wizards/prompt_engineering_wizard.py +0 -425
empathy_software_plugin/wizards/rag_pattern_wizard.py +0 -461
empathy_software_plugin/wizards/security/__init__.py +0 -32
empathy_software_plugin/wizards/security/exploit_analyzer.py +0 -290
empathy_software_plugin/wizards/security/owasp_patterns.py +0 -241
empathy_software_plugin/wizards/security/vulnerability_scanner.py +0 -604
empathy_software_plugin/wizards/security_analysis_wizard.py +0 -322
empathy_software_plugin/wizards/security_learning_wizard.py +0 -740
empathy_software_plugin/wizards/tech_debt_wizard.py +0 -726
empathy_software_plugin/wizards/testing/__init__.py +0 -27
empathy_software_plugin/wizards/testing/coverage_analyzer.py +0 -459
empathy_software_plugin/wizards/testing/quality_analyzer.py +0 -525
empathy_software_plugin/wizards/testing/test_suggester.py +0 -533
empathy_software_plugin/wizards/testing_wizard.py +0 -274
wizards/__init__.py +0 -82
wizards/admission_assessment_wizard.py +0 -644
wizards/care_plan.py +0 -321
wizards/clinical_assessment.py +0 -769
wizards/discharge_planning.py +0 -77
wizards/discharge_summary_wizard.py +0 -468
wizards/dosage_calculation.py +0 -497
wizards/incident_report_wizard.py +0 -454
wizards/medication_reconciliation.py +0 -85
wizards/nursing_assessment.py +0 -171
wizards/patient_education.py +0 -654
wizards/quality_improvement.py +0 -705
wizards/sbar_report.py +0 -324
wizards/sbar_wizard.py +0 -608
wizards/shift_handoff_wizard.py +0 -535
wizards/soap_note_wizard.py +0 -679
wizards/treatment_plan.py +0 -15
{empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/WHEEL +0 -0
{empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/entry_points.txt +0 -0
{empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/licenses/LICENSE +0 -0

empathy_os/workflows/progressive/core 2.py ADDED Viewed

@@ -0,0 +1,488 @@
+"""Core data structures for progressive tier escalation.
+This module defines the fundamental data structures used throughout the
+progressive escalation system, including failure analysis, quality metrics,
+tier results, and configuration.
+"""
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any
+class Tier(Enum):
+    """Model tier levels for progressive escalation.
+    Attributes:
+        CHEAP: Low-cost models (e.g., gpt-4o-mini, claude-3-haiku)
+        CAPABLE: Mid-tier models (e.g., claude-3-5-sonnet, gpt-4o)
+        PREMIUM: High-end models (e.g., claude-opus-4, o1)
+    """
+    CHEAP = "cheap"
+    CAPABLE = "capable"
+    PREMIUM = "premium"
+    def __lt__(self, other: "Tier") -> bool:
+        """Compare tiers for ordering (CHEAP < CAPABLE < PREMIUM)."""
+        order = {Tier.CHEAP: 0, Tier.CAPABLE: 1, Tier.PREMIUM: 2}
+        return order[self] < order[other]
+@dataclass
+class FailureAnalysis:
+    """Multi-signal failure detection and quality analysis.
+    Combines multiple signals to provide robust failure detection:
+    1. Syntax errors in generated code
+    2. Execution failures (test pass rate)
+    3. Quality metrics (coverage, assertion depth)
+    4. LLM confidence signals
+    The composite quality score (CQS) provides an objective measure
+    that combines all signals with appropriate weighting.
+    Attributes:
+        syntax_errors: List of syntax errors found in generated code
+        test_failures: List of test execution failures
+        test_pass_rate: Percentage of tests that passed (0.0-1.0)
+        coverage_percent: Code coverage percentage (0.0-100.0)
+        assertion_depth: Average number of assertions per test
+        confidence_score: LLM confidence level (0.0-1.0)
+        llm_uncertainty_signals: Uncertainty phrases detected in LLM response
+    Example:
+        >>> analysis = FailureAnalysis(
+        ...     test_pass_rate=0.85,
+        ...     coverage_percent=78.0,
+        ...     assertion_depth=5.2,
+        ...     confidence_score=0.92
+        ... )
+        >>> analysis.calculate_quality_score()
+        87.7
+        >>> analysis.should_escalate
+        False
+    """
+    syntax_errors: list[SyntaxError] = field(default_factory=list)
+    test_failures: list[dict[str, Any]] = field(default_factory=list)
+    test_pass_rate: float = 0.0
+    coverage_percent: float = 0.0
+    assertion_depth: float = 0.0
+    confidence_score: float = 0.0
+    llm_uncertainty_signals: list[str] = field(default_factory=list)
+    def calculate_quality_score(self) -> float:
+        """Calculate composite quality score (CQS) from 0-100.
+        Formula:
+            CQS = (
+                0.40 × test_pass_rate +
+                0.25 × code_coverage +
+                0.20 × assertion_quality +
+                0.15 × llm_confidence
+            ) × syntax_error_penalty
+        Weights:
+            - Test pass rate: 40% (most important - functionality must work)
+            - Code coverage: 25% (thoroughness matters)
+            - Assertion quality: 20% (test depth is important)
+            - LLM confidence: 15% (signals potential brittleness)
+        Penalties:
+            - Syntax errors: 50% penalty (halves the score)
+        Returns:
+            Quality score from 0.0 (worst) to 100.0 (perfect)
+        Example:
+            >>> analysis = FailureAnalysis(
+            ...     test_pass_rate=0.90,
+            ...     coverage_percent=85.0,
+            ...     assertion_depth=6.0,
+            ...     confidence_score=0.95
+            ... )
+            >>> analysis.calculate_quality_score()
+            91.25
+        """
+        # Component scores (convert to 0-100 scale)
+        pass_rate_score = self.test_pass_rate * 100
+        coverage_score = self.coverage_percent
+        # Assertion quality: cap at 100% (10 assertions = 100%)
+        assertion_quality_score = min(self.assertion_depth * 10, 100)
+        confidence_score_scaled = self.confidence_score * 100
+        # Weighted composite
+        cqs = (
+            0.40 * pass_rate_score +
+            0.25 * coverage_score +
+            0.20 * assertion_quality_score +
+            0.15 * confidence_score_scaled
+        )
+        # Apply syntax error penalty
+        if len(self.syntax_errors) > 0:
+            cqs *= 0.5  # Halve score for any syntax errors
+        return min(cqs, 100.0)
+    @property
+    def should_escalate(self) -> bool:
+        """Determine if this result should trigger escalation.
+        Multi-criteria decision based on:
+        - Low CQS (<70)
+        - Multiple syntax errors (>3)
+        - Low test pass rate (<70%)
+        - Low coverage (<60%)
+        Returns:
+            True if escalation is recommended, False otherwise
+        Example:
+            >>> analysis = FailureAnalysis(test_pass_rate=0.50)
+            >>> analysis.should_escalate
+            True
+        """
+        cqs = self.calculate_quality_score()
+        return (
+            cqs < 70 or
+            len(self.syntax_errors) > 3 or
+            self.test_pass_rate < 0.7 or
+            self.coverage_percent < 60
+        )
+    @property
+    def failure_severity(self) -> str:
+        """Determine severity level of failures.
+        Returns:
+            "CRITICAL": Severe failures, consider skipping to Premium
+            "HIGH": Significant failures, escalate to next tier
+            "MODERATE": Minor failures, retry at current tier
+            "LOW": Acceptable quality, no escalation needed
+        Example:
+            >>> analysis = FailureAnalysis(test_pass_rate=0.25)
+            >>> analysis.failure_severity
+            'CRITICAL'
+        """
+        cqs = self.calculate_quality_score()
+        if len(self.syntax_errors) > 5 or self.test_pass_rate < 0.3:
+            return "CRITICAL"
+        elif cqs < 70 or self.test_pass_rate < 0.5:
+            return "HIGH"
+        elif cqs < 80 or self.test_pass_rate < 0.7:
+            return "MODERATE"
+        else:
+            return "LOW"
+@dataclass
+class TierResult:
+    """Results from a single tier execution attempt.
+    Captures all information about a tier's execution including
+    generated artifacts, quality analysis, cost, and escalation decision.
+    Attributes:
+        tier: Which tier executed (CHEAP, CAPABLE, or PREMIUM)
+        model: Specific model used (e.g., "gpt-4o-mini")
+        attempt: Attempt number at this tier (1-based)
+        timestamp: When this execution occurred
+        generated_items: Generated artifacts (tests, code, etc.)
+        failure_analysis: Quality and failure analysis
+        cost: Cost in USD for this execution
+        duration: Execution time in seconds
+        escalated: Whether this result triggered escalation
+        escalation_reason: Human-readable reason for escalation
+    Example:
+        >>> result = TierResult(
+        ...     tier=Tier.CHEAP,
+        ...     model="gpt-4o-mini",
+        ...     attempt=1,
+        ...     timestamp=datetime.now(),
+        ...     generated_items=[{"code": "test_foo()"}],
+        ...     failure_analysis=FailureAnalysis(test_pass_rate=0.65),
+        ...     cost=0.15,
+        ...     duration=12.5
+        ... )
+        >>> result.quality_score
+        65.0
+    """
+    tier: Tier
+    model: str
+    attempt: int
+    timestamp: datetime
+    # Generated artifacts
+    generated_items: list[dict[str, Any]] = field(default_factory=list)
+    # Analysis
+    failure_analysis: FailureAnalysis = field(default_factory=FailureAnalysis)
+    cost: float = 0.0
+    duration: float = 0.0
+    tokens_used: dict[str, int] = field(default_factory=dict)
+    # Decision
+    escalated: bool = False
+    escalation_reason: str = ""
+    @property
+    def quality_score(self) -> float:
+        """Get composite quality score for this tier result.
+        Returns:
+            CQS from 0.0 to 100.0
+        """
+        return self.failure_analysis.calculate_quality_score()
+    @property
+    def success_count(self) -> int:
+        """Count of successfully generated items (CQS >= 80).
+        Returns:
+            Number of items meeting quality threshold
+        """
+        return sum(
+            1 for item in self.generated_items
+            if item.get("quality_score", 0) >= 80
+        )
+    @property
+    def success_rate(self) -> float:
+        """Percentage of items successfully generated.
+        Returns:
+            Success rate from 0.0 to 1.0
+        """
+        if not self.generated_items:
+            return 0.0
+        return self.success_count / len(self.generated_items)
+@dataclass
+class ProgressiveWorkflowResult:
+    """Complete results from a progressive workflow execution.
+    Captures the full progression history across all tiers, including
+    costs, quality metrics, and escalation decisions.
+    Attributes:
+        workflow_name: Name of the workflow (e.g., "test-gen")
+        task_id: Unique identifier for this execution
+        tier_results: Chronological list of tier execution results
+        final_result: The last tier result (may be successful or failed)
+        total_cost: Total cost in USD across all tiers
+        total_duration: Total execution time in seconds
+        success: Whether the workflow completed successfully
+    Example:
+        >>> result = ProgressiveWorkflowResult(
+        ...     workflow_name="test-gen",
+        ...     task_id="test-gen-20260117-143022",
+        ...     tier_results=[cheap_result, capable_result],
+        ...     final_result=capable_result,
+        ...     total_cost=0.75,
+        ...     total_duration=45.2,
+        ...     success=True
+        ... )
+        >>> print(result.generate_report())
+        🎯 PROGRESSIVE ESCALATION REPORT
+        ...
+    """
+    workflow_name: str
+    task_id: str
+    tier_results: list[TierResult]
+    final_result: TierResult
+    total_cost: float
+    total_duration: float
+    success: bool
+    def generate_report(self) -> str:
+        """Generate human-readable progression report.
+        Creates a detailed report showing:
+        - Tier-by-tier breakdown
+        - Quality scores and success rates
+        - Cost analysis and savings
+        - Escalation decisions
+        Returns:
+            Formatted report string
+        """
+        # Implementation will be in reports.py module
+        from empathy_os.workflows.progressive.reports import generate_progression_report
+        return generate_progression_report(self)
+    def save_to_disk(self, storage_path: str) -> None:
+        """Save detailed results to disk.
+        Creates a directory with:
+        - summary.json: High-level metrics
+        - tier_N_<tier_name>.json: Detailed tier results
+        - report.txt: Human-readable report
+        Args:
+            storage_path: Base path for saving results
+        """
+        from empathy_os.workflows.progressive.reports import save_results_to_disk
+        save_results_to_disk(self, storage_path)
+    @property
+    def cost_savings(self) -> float:
+        """Calculate cost savings vs running all items at Premium tier.
+        Returns:
+            Dollar amount saved by using progressive escalation
+        """
+        # Estimate what it would cost if all items were Premium
+        total_items = sum(len(r.generated_items) for r in self.tier_results)
+        # Assume Premium costs ~$0.05 per item (conservative estimate)
+        all_premium_cost = total_items * 0.05
+        savings = all_premium_cost - self.total_cost
+        return max(savings, 0.0)
+    @property
+    def cost_savings_percent(self) -> float:
+        """Calculate percentage of cost saved.
+        Returns:
+            Savings percentage (0-100)
+        """
+        total_items = sum(len(r.generated_items) for r in self.tier_results)
+        all_premium_cost = total_items * 0.05
+        if all_premium_cost == 0:
+            return 0.0
+        return (self.cost_savings / all_premium_cost) * 100
+@dataclass
+class EscalationConfig:
+    """Configuration for progressive tier escalation.
+    Controls all aspects of the escalation system including retry logic,
+    thresholds, cost management, and storage.
+    Attributes:
+        enabled: Whether progressive escalation is active
+        tiers: Ordered list of tiers to use (default: all three)
+        Retry configuration:
+            cheap_min_attempts: Minimum attempts at cheap tier
+            cheap_max_attempts: Maximum attempts at cheap tier
+            capable_min_attempts: Minimum attempts at capable tier
+            capable_max_attempts: Maximum attempts at capable tier
+            premium_max_attempts: Maximum attempts at premium tier
+        Thresholds (Cheap → Capable):
+            cheap_to_capable_failure_rate: Max failure rate before escalation
+            cheap_to_capable_min_cqs: Min quality score to avoid escalation
+            cheap_to_capable_max_syntax_errors: Max syntax errors allowed
+        Thresholds (Capable → Premium):
+            capable_to_premium_failure_rate: Max failure rate before escalation
+            capable_to_premium_min_cqs: Min quality score to avoid escalation
+            capable_to_premium_max_syntax_errors: Max syntax errors allowed
+        Stagnation detection:
+            improvement_threshold: Min CQS improvement to avoid stagnation (%)
+            consecutive_stagnation_limit: Consecutive stagnations before escalation
+        Cost management:
+            max_cost: Maximum total cost in USD
+            auto_approve_under: Auto-approve escalations under this cost
+            warn_on_budget_exceeded: Print warning if budget exceeded
+            abort_on_budget_exceeded: Abort execution if budget exceeded
+        Storage:
+            save_tier_results: Whether to save tier results to disk
+            storage_path: Directory for saving results
+    Example:
+        >>> config = EscalationConfig(
+        ...     enabled=True,
+        ...     max_cost=10.00,
+        ...     auto_approve_under=5.00,
+        ...     cheap_min_attempts=2,
+        ...     capable_max_attempts=6
+        ... )
+    """
+    # Global settings
+    enabled: bool = False
+    tiers: list[Tier] = field(default_factory=lambda: [Tier.CHEAP, Tier.CAPABLE, Tier.PREMIUM])
+    # Retry configuration
+    cheap_min_attempts: int = 2
+    cheap_max_attempts: int = 3
+    capable_min_attempts: int = 2
+    capable_max_attempts: int = 6
+    premium_max_attempts: int = 1
+    # Thresholds: Cheap → Capable
+    cheap_to_capable_failure_rate: float = 0.30
+    cheap_to_capable_min_cqs: float = 70.0
+    cheap_to_capable_max_syntax_errors: int = 3
+    # Thresholds: Capable → Premium
+    capable_to_premium_failure_rate: float = 0.20
+    capable_to_premium_min_cqs: float = 80.0
+    capable_to_premium_max_syntax_errors: int = 1
+    # Stagnation detection
+    improvement_threshold: float = 5.0  # 5% CQS improvement required
+    consecutive_stagnation_limit: int = 2
+    # Cost management
+    max_cost: float = 5.00
+    auto_approve_under: float | None = None
+    warn_on_budget_exceeded: bool = True
+    abort_on_budget_exceeded: bool = False
+    # Storage
+    save_tier_results: bool = True
+    storage_path: str = ".empathy/progressive_runs"
+    def get_max_attempts(self, tier: Tier) -> int:
+        """Get maximum attempts for a specific tier.
+        Args:
+            tier: The tier to query
+        Returns:
+            Maximum number of attempts allowed
+        """
+        if tier == Tier.CHEAP:
+            return self.cheap_max_attempts
+        elif tier == Tier.CAPABLE:
+            return self.capable_max_attempts
+        else:  # PREMIUM
+            return self.premium_max_attempts
+    def get_min_attempts(self, tier: Tier) -> int:
+        """Get minimum attempts for a specific tier.
+        Args:
+            tier: The tier to query
+        Returns:
+            Minimum number of attempts required
+        """
+        if tier == Tier.CHEAP:
+            return self.cheap_min_attempts
+        elif tier == Tier.CAPABLE:
+            return self.capable_min_attempts
+        else:  # PREMIUM
+            return 1  # Premium always gets exactly 1 attempt

empathy_os/workflows/progressive/core.py CHANGED Viewed

@@ -19,6 +19,7 @@ class Tier(Enum):
         CAPABLE: Mid-tier models (e.g., claude-3-5-sonnet, gpt-4o)
         PREMIUM: High-end models (e.g., claude-opus-4, o1)
     """
     CHEAP = "cheap"
     CAPABLE = "capable"
     PREMIUM = "premium"
@@ -116,10 +117,10 @@ class FailureAnalysis:
         # Weighted composite
         cqs = (
-            0.40 * pass_rate_score +
-            0.25 * coverage_score +
-            0.20 * assertion_quality_score +
-            0.15 * confidence_score_scaled
+            0.40 * pass_rate_score
+            + 0.25 * coverage_score
+            + 0.20 * assertion_quality_score
+            + 0.15 * confidence_score_scaled
         )
         # Apply syntax error penalty
@@ -148,10 +149,10 @@ class FailureAnalysis:
         """
         cqs = self.calculate_quality_score()
         return (
-            cqs < 70 or
-            len(self.syntax_errors) > 3 or
-            self.test_pass_rate < 0.7 or
-            self.coverage_percent < 60
+            cqs < 70
+            or len(self.syntax_errors) > 3
+            or self.test_pass_rate < 0.7
+            or self.coverage_percent < 60
         )
     @property
@@ -249,10 +250,7 @@ class TierResult:
         Returns:
             Number of items meeting quality threshold
         """
-        return sum(
-            1 for item in self.generated_items
-            if item.get("quality_score", 0) >= 80
-        )
+        return sum(1 for item in self.generated_items if item.get("quality_score", 0) >= 80)
     @property
     def success_rate(self) -> float:
@@ -320,6 +318,7 @@ class ProgressiveWorkflowResult:
         """
         # Implementation will be in reports.py module
         from empathy_os.workflows.progressive.reports import generate_progression_report
         return generate_progression_report(self)
     def save_to_disk(self, storage_path: str) -> None:
@@ -334,6 +333,7 @@ class ProgressiveWorkflowResult:
             storage_path: Base path for saving results
         """
         from empathy_os.workflows.progressive.reports import save_results_to_disk
         save_results_to_disk(self, storage_path)
     @property

empathy-framework 4.6.6__py3-none-any.whl → 4.7.1__py3-none-any.whl

empathy-framework 4.6.6py3-none-any.whl → 4.7.1py3-none-any.whl