PyPI - empathy-framework - Versions diffs - 5.0.3__py3-none-any.whl → 5.1.0__py3-none-any.whl - Mend

empathy-framework 5.0.3py3-none-any.whl → 5.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

{empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/METADATA +259 -142
{empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/RECORD +56 -26
empathy_framework-5.1.0.dist-info/licenses/LICENSE +201 -0
empathy_framework-5.1.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
empathy_os/__init__.py +1 -1
empathy_os/cli/commands/batch.py +5 -5
empathy_os/cli/commands/routing.py +1 -1
empathy_os/cli/commands/workflow.py +2 -1
empathy_os/cli/parsers/cache 2.py +65 -0
empathy_os/cli_minimal.py +3 -3
empathy_os/cli_router 2.py +416 -0
empathy_os/dashboard/__init__.py +1 -2
empathy_os/dashboard/app 2.py +512 -0
empathy_os/dashboard/app.py +1 -1
empathy_os/dashboard/simple_server 2.py +403 -0
empathy_os/dashboard/standalone_server 2.py +536 -0
empathy_os/memory/types 2.py +441 -0
empathy_os/models/__init__.py +19 -0
empathy_os/models/adaptive_routing 2.py +437 -0
empathy_os/models/auth_cli.py +444 -0
empathy_os/models/auth_strategy.py +450 -0
empathy_os/project_index/scanner_parallel 2.py +291 -0
empathy_os/telemetry/agent_coordination 2.py +478 -0
empathy_os/telemetry/agent_coordination.py +3 -3
empathy_os/telemetry/agent_tracking 2.py +350 -0
empathy_os/telemetry/agent_tracking.py +1 -2
empathy_os/telemetry/approval_gates 2.py +563 -0
empathy_os/telemetry/event_streaming 2.py +405 -0
empathy_os/telemetry/event_streaming.py +3 -3
empathy_os/telemetry/feedback_loop 2.py +557 -0
empathy_os/telemetry/feedback_loop.py +1 -1
empathy_os/vscode_bridge 2.py +173 -0
empathy_os/workflows/__init__.py +8 -0
empathy_os/workflows/autonomous_test_gen.py +569 -0
empathy_os/workflows/bug_predict.py +45 -0
empathy_os/workflows/code_review.py +92 -22
empathy_os/workflows/document_gen.py +594 -62
empathy_os/workflows/llm_base.py +363 -0
empathy_os/workflows/perf_audit.py +69 -0
empathy_os/workflows/progressive/README 2.md +454 -0
empathy_os/workflows/progressive/__init__ 2.py +92 -0
empathy_os/workflows/progressive/cli 2.py +242 -0
empathy_os/workflows/progressive/core 2.py +488 -0
empathy_os/workflows/progressive/orchestrator 2.py +701 -0
empathy_os/workflows/progressive/reports 2.py +528 -0
empathy_os/workflows/progressive/telemetry 2.py +280 -0
empathy_os/workflows/progressive/test_gen 2.py +514 -0
empathy_os/workflows/progressive/workflow 2.py +628 -0
empathy_os/workflows/release_prep.py +54 -0
empathy_os/workflows/security_audit.py +154 -79
empathy_os/workflows/test_gen.py +60 -0
empathy_os/workflows/test_gen_behavioral.py +477 -0
empathy_os/workflows/test_gen_parallel.py +341 -0
empathy_framework-5.0.3.dist-info/licenses/LICENSE +0 -139
{empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/WHEEL +0 -0
{empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/entry_points.txt +0 -0
{empathy_framework-5.0.3.dist-info → empathy_framework-5.1.0.dist-info}/top_level.txt +0 -0

empathy_os/workflows/progressive/workflow 2.py ADDED Viewed

@@ -0,0 +1,628 @@
+"""Base class for progressive workflows with tier escalation.
+This module provides the foundation for workflows that support progressive
+tier escalation, handling retry logic, escalation decisions, cost management,
+and approval prompts.
+"""
+import logging
+from datetime import datetime
+from typing import Any
+from empathy_os.workflows.progressive.core import (
+    EscalationConfig,
+    FailureAnalysis,
+    ProgressiveWorkflowResult,
+    Tier,
+    TierResult,
+)
+from empathy_os.workflows.progressive.orchestrator import MetaOrchestrator
+from empathy_os.workflows.progressive.telemetry import ProgressiveTelemetry
+logger = logging.getLogger(__name__)
+class BudgetExceededError(Exception):
+    """Raised when execution cost exceeds configured budget."""
+    pass
+class UserCancelledError(Exception):
+    """Raised when user cancels execution during approval prompt."""
+    pass
+class ProgressiveWorkflow:
+    """Base class for workflows with progressive tier escalation.
+    Implements the core progressive escalation logic:
+    1. Start with cheap tier
+    2. Analyze results with multi-signal failure detection
+    3. Escalate to capable tier if needed
+    4. Use LLM-guided retries with stagnation detection
+    5. Escalate to premium tier if capable tier stagnates
+    6. Request human review if premium tier fails
+    Subclasses should implement:
+    - _execute_tier_impl(): Tier-specific execution logic
+    - _analyze_item(): Item-specific quality analysis
+    Example:
+        class MyProgressiveWorkflow(ProgressiveWorkflow):
+            def _execute_tier_impl(self, tier, items, context):
+                # Generate items using appropriate model
+                return generated_items
+            def _analyze_item(self, item):
+                # Analyze item quality
+                return FailureAnalysis(...)
+    Attributes:
+        config: Escalation configuration
+        tier_results: List of tier execution results
+        meta_orchestrator: Meta-agent for orchestration decisions
+    """
+    def __init__(self, config: EscalationConfig | None = None, user_id: str | None = None):
+        """Initialize progressive workflow.
+        Args:
+            config: Escalation configuration (uses defaults if None)
+            user_id: Optional user identifier for telemetry (will be hashed)
+        """
+        self.config = config or EscalationConfig()
+        self.tier_results: list[TierResult] = []
+        self.meta_orchestrator = MetaOrchestrator()
+        self.user_id = user_id
+        self.telemetry: ProgressiveTelemetry | None = None  # Initialized per workflow
+    def execute(self, **kwargs) -> ProgressiveWorkflowResult:
+        """Execute workflow with progressive tier escalation.
+        This is the main entry point. Subclasses typically override this
+        to provide workflow-specific logic, then call _execute_progressive()
+        to handle the escalation.
+        Args:
+            **kwargs: Workflow-specific parameters
+        Returns:
+            Complete workflow results with progression history
+        Raises:
+            BudgetExceededError: If cost exceeds budget
+            UserCancelledError: If user declines approval
+        """
+        raise NotImplementedError("Subclasses must implement execute()")
+    def _execute_progressive(
+        self,
+        items: list[Any],
+        workflow_name: str,
+        **kwargs
+    ) -> ProgressiveWorkflowResult:
+        """Execute items with progressive tier escalation.
+        Core progressive escalation loop:
+        1. Execute at current tier
+        2. Analyze results
+        3. Separate successful and failed items
+        4. Decide: escalate, retry, or complete
+        5. Repeat with failed items at next tier
+        Args:
+            items: Items to process (functions, files, etc.)
+            workflow_name: Name of workflow for reporting
+            **kwargs: Additional parameters passed to tier execution
+        Returns:
+            Complete workflow results
+        """
+        # Initialize telemetry for this workflow
+        self.telemetry = ProgressiveTelemetry(workflow_name, self.user_id)
+        if not self.config.enabled:
+            # Progressive escalation disabled, use default tier
+            logger.info("Progressive escalation disabled, using default tier")
+            return self._execute_single_tier(items, workflow_name, **kwargs)
+        # Estimate cost and request approval
+        estimated_cost = self._estimate_total_cost(len(items))
+        if not self._request_approval(
+            f"Execute {workflow_name} on {len(items)} items",
+            estimated_cost
+        ):
+            raise UserCancelledError("User declined to proceed")
+        # Start with cheapest tier
+        current_tier = self.config.tiers[0]
+        remaining_items = items
+        context: dict[str, Any] | None = None
+        while remaining_items and current_tier:
+            logger.info(
+                f"Executing {len(remaining_items)} items at {current_tier.value} tier"
+            )
+            # Execute at current tier
+            tier_result = self._execute_tier(
+                current_tier,
+                remaining_items,
+                context,
+                **kwargs
+            )
+            self.tier_results.append(tier_result)
+            # Track tier execution in telemetry
+            if self.telemetry:
+                self.telemetry.track_tier_execution(
+                    tier_result=tier_result,
+                    attempt=tier_result.attempt,
+                    escalated=False,  # Will update if escalation happens
+                )
+            # Check budget
+            self._check_budget()
+            # Separate successful and failed items
+            successful = [
+                item for item in tier_result.generated_items
+                if item.get("quality_score", 0) >= 80
+            ]
+            failed = [
+                item for item in tier_result.generated_items
+                if item.get("quality_score", 0) < 80
+            ]
+            logger.info(
+                f"{current_tier.value} tier: {len(successful)}/{len(tier_result.generated_items)} "
+                f"successful (CQS={tier_result.quality_score:.1f})"
+            )
+            # Update remaining items (partial escalation)
+            remaining_items = failed
+            # Decide: retry, escalate, or complete
+            if not remaining_items:
+                # All items successful
+                break
+            should_escalate, reason = self._should_escalate(
+                current_tier,
+                tier_result,
+                attempt=tier_result.attempt
+            )
+            if should_escalate:
+                # Escalate to next tier
+                next_tier = self._get_next_tier(current_tier)
+                if next_tier is None:
+                    # No higher tier available
+                    logger.warning(
+                        f"Cannot escalate beyond {current_tier.value} tier, "
+                        f"{len(remaining_items)} items incomplete"
+                    )
+                    tier_result.escalated = True
+                    tier_result.escalation_reason = "No higher tier available"
+                    break
+                logger.info(
+                    f"Escalating {len(remaining_items)} items from "
+                    f"{current_tier.value} to {next_tier.value}: {reason}"
+                )
+                # Track escalation in telemetry
+                if self.telemetry:
+                    current_cost = sum(r.cost for r in self.tier_results)
+                    self.telemetry.track_escalation(
+                        from_tier=current_tier,
+                        to_tier=next_tier,
+                        reason=reason,
+                        item_count=len(remaining_items),
+                        current_cost=current_cost,
+                    )
+                # Build context for next tier
+                context = {
+                    "previous_tier": current_tier,
+                    "previous_cqs": tier_result.quality_score,
+                    "failures": failed,
+                    "examples": tier_result.generated_items[-3:],  # Last 3 attempts
+                    "reason": reason
+                }
+                # Request approval for escalation
+                escalation_cost = self._estimate_tier_cost(next_tier, len(remaining_items))
+                if not self._request_escalation_approval(
+                    current_tier,
+                    next_tier,
+                    len(remaining_items),
+                    escalation_cost
+                ):
+                    logger.info("User declined escalation, stopping")
+                    break
+                tier_result.escalated = True
+                tier_result.escalation_reason = reason
+                current_tier = next_tier
+            else:
+                # No escalation needed (retry at same tier or success)
+                break
+        # Compile final result
+        task_id = f"{workflow_name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
+        # Should always have at least one tier result
+        assert self.tier_results, "No tier results generated"
+        result = ProgressiveWorkflowResult(
+            workflow_name=workflow_name,
+            task_id=task_id,
+            tier_results=self.tier_results,
+            final_result=self.tier_results[-1],
+            total_cost=sum(r.cost for r in self.tier_results),
+            total_duration=sum(r.duration for r in self.tier_results),
+            success=len(remaining_items) == 0
+        )
+        # Track workflow completion in telemetry
+        if self.telemetry:
+            self.telemetry.track_workflow_completion(result)
+        return result
+    def _execute_single_tier(
+        self,
+        items: list[Any],
+        workflow_name: str,
+        **kwargs
+    ) -> ProgressiveWorkflowResult:
+        """Execute without progressive escalation (single tier).
+        Used when progressive escalation is disabled.
+        Args:
+            items: Items to process
+            workflow_name: Workflow name
+            **kwargs: Additional parameters
+        Returns:
+            Workflow results with single tier
+        """
+        # Use middle tier (capable) as default
+        default_tier = Tier.CAPABLE
+        tier_result = self._execute_tier(default_tier, items, None, **kwargs)
+        self.tier_results.append(tier_result)
+        task_id = f"{workflow_name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
+        return ProgressiveWorkflowResult(
+            workflow_name=workflow_name,
+            task_id=task_id,
+            tier_results=[tier_result],
+            final_result=tier_result,
+            total_cost=tier_result.cost,
+            total_duration=tier_result.duration,
+            success=tier_result.quality_score >= 80
+        )
+    def _execute_tier(
+        self,
+        tier: Tier,
+        items: list[Any],
+        context: dict[str, Any] | None,
+        **kwargs
+    ) -> TierResult:
+        """Execute items at a specific tier.
+        Wrapper that handles timing, cost tracking, and error handling.
+        Delegates actual execution to _execute_tier_impl().
+        Args:
+            tier: Which tier to execute at
+            items: Items to process
+            context: Context from previous tier (if escalating)
+            **kwargs: Additional parameters
+        Returns:
+            Tier execution result
+        """
+        start_time = datetime.now()
+        try:
+            # Let subclass handle actual execution
+            generated_items = self._execute_tier_impl(tier, items, context, **kwargs)
+            # Analyze overall quality
+            failure_analysis = self._analyze_tier_result(generated_items)
+            # Calculate cost (placeholder - will be implemented)
+            cost = self._calculate_tier_cost(tier, len(items))
+            duration = (datetime.now() - start_time).total_seconds()
+            return TierResult(
+                tier=tier,
+                model=self._get_model_for_tier(tier),
+                attempt=1,  # Simplified for now
+                timestamp=start_time,
+                generated_items=generated_items,
+                failure_analysis=failure_analysis,
+                cost=cost,
+                duration=duration
+            )
+        except Exception as e:
+            logger.exception(f"Error executing tier {tier.value}: {e}")
+            # Return failed result
+            duration = (datetime.now() - start_time).total_seconds()
+            return TierResult(
+                tier=tier,
+                model=self._get_model_for_tier(tier),
+                attempt=1,
+                timestamp=start_time,
+                generated_items=[],
+                failure_analysis=FailureAnalysis(),
+                cost=0.0,
+                duration=duration,
+                escalated=True,
+                escalation_reason=f"Execution error: {str(e)}"
+            )
+    def _execute_tier_impl(
+        self,
+        tier: Tier,
+        items: list[Any],
+        context: dict[str, Any] | None,
+        **kwargs
+    ) -> list[dict[str, Any]]:
+        """Execute items at specific tier (to be implemented by subclasses).
+        Args:
+            tier: Which tier to execute at
+            items: Items to process
+            context: Context from previous tier
+            **kwargs: Additional parameters
+        Returns:
+            List of generated items with quality scores
+        """
+        raise NotImplementedError("Subclasses must implement _execute_tier_impl()")
+    def _analyze_tier_result(self, generated_items: list[dict[str, Any]]) -> FailureAnalysis:
+        """Analyze overall quality of tier execution.
+        Args:
+            generated_items: Items generated at this tier
+        Returns:
+            Aggregated failure analysis
+        """
+        if not generated_items:
+            return FailureAnalysis()
+        # Aggregate metrics across all items
+        total_items = len(generated_items)
+        passed = sum(1 for item in generated_items if item.get("passed", False))
+        syntax_errors = sum(len(item.get("syntax_errors", [])) for item in generated_items)
+        avg_coverage = sum(item.get("coverage", 0) for item in generated_items) / total_items
+        avg_assertions = sum(item.get("assertions", 0) for item in generated_items) / total_items
+        avg_confidence = sum(item.get("confidence", 0) for item in generated_items) / total_items
+        return FailureAnalysis(
+            syntax_errors=[SyntaxError(f"Syntax error {i}") for i in range(min(syntax_errors, 10))],
+            test_pass_rate=passed / total_items if total_items > 0 else 0.0,
+            coverage_percent=avg_coverage,
+            assertion_depth=avg_assertions,
+            confidence_score=avg_confidence
+        )
+    def _should_escalate(
+        self,
+        tier: Tier,
+        result: TierResult,
+        attempt: int
+    ) -> tuple[bool, str]:
+        """Determine if escalation is needed.
+        Uses meta-orchestrator to make intelligent escalation decisions
+        based on tier, quality score, and attempt number.
+        Args:
+            tier: Current tier
+            result: Tier execution result
+            attempt: Attempt number at this tier
+        Returns:
+            Tuple of (should_escalate, reason)
+        """
+        return self.meta_orchestrator.should_escalate(
+            tier,
+            result,
+            attempt,
+            self.config
+        )
+    def _get_next_tier(self, current_tier: Tier) -> Tier | None:
+        """Get the next tier in the progression.
+        Args:
+            current_tier: Current tier
+        Returns:
+            Next tier, or None if at highest tier
+        """
+        try:
+            current_index = self.config.tiers.index(current_tier)
+            if current_index < len(self.config.tiers) - 1:
+                return self.config.tiers[current_index + 1]
+        except ValueError:
+            pass
+        return None
+    def _estimate_total_cost(self, item_count: int) -> float:
+        """Estimate total cost with probabilistic escalation.
+        Args:
+            item_count: Number of items to process
+        Returns:
+            Estimated total cost in USD
+        """
+        # Base cost: all items at cheap tier
+        cheap_cost = self._estimate_tier_cost(Tier.CHEAP, item_count)
+        # Estimated escalation (30% to capable, 10% to premium)
+        capable_cost = self._estimate_tier_cost(Tier.CAPABLE, int(item_count * 0.3))
+        premium_cost = self._estimate_tier_cost(Tier.PREMIUM, int(item_count * 0.1))
+        return cheap_cost + capable_cost + premium_cost
+    def _estimate_tier_cost(self, tier: Tier, item_count: int) -> float:
+        """Estimate cost for specific tier.
+        Args:
+            tier: Which tier
+            item_count: Number of items
+        Returns:
+            Estimated cost in USD
+        """
+        # Cost per item (approximate, based on typical token usage)
+        COST_PER_ITEM = {
+            Tier.CHEAP: 0.003,     # ~$0.003 per item (gpt-4o-mini)
+            Tier.CAPABLE: 0.015,   # ~$0.015 per item (claude-3-5-sonnet)
+            Tier.PREMIUM: 0.05     # ~$0.05 per item (claude-opus-4)
+        }
+        return COST_PER_ITEM[tier] * item_count
+    def _calculate_tier_cost(self, tier: Tier, item_count: int) -> float:
+        """Calculate actual cost for tier execution.
+        TODO: Implement based on actual token usage.
+        Args:
+            tier: Which tier
+            item_count: Number of items processed
+        Returns:
+            Actual cost in USD
+        """
+        # For now, use estimate
+        return self._estimate_tier_cost(tier, item_count)
+    def _request_approval(self, message: str, estimated_cost: float) -> bool:
+        """Request user approval for execution.
+        Args:
+            message: Description of what will be executed
+            estimated_cost: Estimated cost in USD
+        Returns:
+            True if approved, False if declined
+        """
+        # Check auto-approve threshold
+        if self.config.auto_approve_under and estimated_cost <= self.config.auto_approve_under:
+            logger.info(f"Auto-approved: ${estimated_cost:.2f} <= ${self.config.auto_approve_under:.2f}")
+            return True
+        # Check if under default threshold ($1.00)
+        threshold = 1.00
+        if estimated_cost <= threshold:
+            return True
+        # Prompt user
+        print("\n⚠️  Cost Estimate:")
+        print(f"   {message}")
+        print(f"   Estimated total: ${estimated_cost:.2f}")
+        print(f"   (Exceeds threshold of ${threshold:.2f})")
+        print()
+        response = input("Proceed? [y/N]: ").strip().lower()
+        return response == 'y'
+    def _request_escalation_approval(
+        self,
+        from_tier: Tier,
+        to_tier: Tier,
+        item_count: int,
+        additional_cost: float
+    ) -> bool:
+        """Request approval for tier escalation.
+        Args:
+            from_tier: Current tier
+            to_tier: Target tier
+            item_count: Number of items to escalate
+            additional_cost: Additional cost for escalation
+        Returns:
+            True if approved, False if declined
+        """
+        # Check auto-approve
+        total_cost = sum(r.cost for r in self.tier_results) + additional_cost
+        if self.config.auto_approve_under and total_cost <= self.config.auto_approve_under:
+            logger.info(f"Auto-approved escalation: total ${total_cost:.2f}")
+            return True
+        # Prompt user
+        print("\n⚠️  Escalation needed:")
+        print(f"   {item_count} items from {from_tier.value} → {to_tier.value}")
+        print(f"   Additional cost: ~${additional_cost:.2f}")
+        print(f"   Total so far: ${sum(r.cost for r in self.tier_results):.2f}")
+        print()
+        response = input("Proceed? [Y/n]: ").strip().lower()
+        return response != 'n'
+    def _check_budget(self) -> None:
+        """Check if budget has been exceeded.
+        Raises:
+            BudgetExceededError: If abort_on_budget_exceeded is True
+        """
+        current_cost = sum(r.cost for r in self.tier_results)
+        if current_cost > self.config.max_cost:
+            # Track budget exceeded event
+            if self.telemetry:
+                action = "abort" if self.config.abort_on_budget_exceeded else "warn"
+                self.telemetry.track_budget_exceeded(
+                    current_cost=current_cost,
+                    max_budget=self.config.max_cost,
+                    action=action,
+                )
+            if self.config.abort_on_budget_exceeded:
+                raise BudgetExceededError(
+                    f"Cost ${current_cost:.2f} exceeds budget ${self.config.max_cost:.2f}"
+                )
+            elif self.config.warn_on_budget_exceeded:
+                logger.warning(
+                    f"Cost ${current_cost:.2f} exceeds budget ${self.config.max_cost:.2f}"
+                )
+    def _get_model_for_tier(self, tier: Tier) -> str:
+        """Get model name for specific tier.
+        Args:
+            tier: Which tier
+        Returns:
+            Model name (e.g., "gpt-4o-mini")
+        """
+        # TODO: Make this configurable
+        MODEL_MAP = {
+            Tier.CHEAP: "gpt-4o-mini",
+            Tier.CAPABLE: "claude-3-5-sonnet",
+            Tier.PREMIUM: "claude-opus-4"
+        }
+        return MODEL_MAP.get(tier, "claude-3-5-sonnet")

empathy-framework 5.0.3__py3-none-any.whl → 5.1.0__py3-none-any.whl

empathy-framework 5.0.3py3-none-any.whl → 5.1.0py3-none-any.whl