PyPI - empathy-framework - Versions diffs - 5.0.1__py3-none-any.whl → 5.1.0__py3-none-any.whl - Mend

empathy-framework 5.0.1py3-none-any.whl → 5.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/METADATA +311 -150
{empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/RECORD +60 -33
empathy_framework-5.1.0.dist-info/licenses/LICENSE +201 -0
empathy_framework-5.1.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
empathy_llm_toolkit/providers.py +175 -35
empathy_llm_toolkit/utils/tokens.py +150 -30
empathy_os/__init__.py +1 -1
empathy_os/cli/commands/batch.py +256 -0
empathy_os/cli/commands/cache.py +248 -0
empathy_os/cli/commands/inspect.py +1 -2
empathy_os/cli/commands/metrics.py +1 -1
empathy_os/cli/commands/routing.py +285 -0
empathy_os/cli/commands/workflow.py +2 -1
empathy_os/cli/parsers/__init__.py +6 -0
empathy_os/cli/parsers/batch.py +118 -0
empathy_os/cli/parsers/cache 2.py +65 -0
empathy_os/cli/parsers/cache.py +65 -0
empathy_os/cli/parsers/routing.py +110 -0
empathy_os/cli_minimal.py +3 -3
empathy_os/cli_router 2.py +416 -0
empathy_os/dashboard/__init__.py +1 -2
empathy_os/dashboard/app 2.py +512 -0
empathy_os/dashboard/app.py +1 -1
empathy_os/dashboard/simple_server 2.py +403 -0
empathy_os/dashboard/standalone_server 2.py +536 -0
empathy_os/dashboard/standalone_server.py +22 -11
empathy_os/memory/types 2.py +441 -0
empathy_os/metrics/collector.py +31 -0
empathy_os/models/__init__.py +19 -0
empathy_os/models/adaptive_routing 2.py +437 -0
empathy_os/models/auth_cli.py +444 -0
empathy_os/models/auth_strategy.py +450 -0
empathy_os/models/token_estimator.py +21 -13
empathy_os/project_index/scanner_parallel 2.py +291 -0
empathy_os/telemetry/agent_coordination 2.py +478 -0
empathy_os/telemetry/agent_coordination.py +14 -16
empathy_os/telemetry/agent_tracking 2.py +350 -0
empathy_os/telemetry/agent_tracking.py +18 -20
empathy_os/telemetry/approval_gates 2.py +563 -0
empathy_os/telemetry/approval_gates.py +27 -39
empathy_os/telemetry/event_streaming 2.py +405 -0
empathy_os/telemetry/event_streaming.py +22 -22
empathy_os/telemetry/feedback_loop 2.py +557 -0
empathy_os/telemetry/feedback_loop.py +14 -17
empathy_os/workflows/__init__.py +8 -0
empathy_os/workflows/autonomous_test_gen.py +569 -0
empathy_os/workflows/batch_processing.py +56 -10
empathy_os/workflows/bug_predict.py +45 -0
empathy_os/workflows/code_review.py +92 -22
empathy_os/workflows/document_gen.py +594 -62
empathy_os/workflows/llm_base.py +363 -0
empathy_os/workflows/perf_audit.py +69 -0
empathy_os/workflows/release_prep.py +54 -0
empathy_os/workflows/security_audit.py +154 -79
empathy_os/workflows/test_gen.py +60 -0
empathy_os/workflows/test_gen_behavioral.py +477 -0
empathy_os/workflows/test_gen_parallel.py +341 -0
empathy_framework-5.0.1.dist-info/licenses/LICENSE +0 -139
{empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/WHEEL +0 -0
{empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/entry_points.txt +0 -0
{empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/top_level.txt +0 -0

empathy_llm_toolkit/providers.py CHANGED Viewed

@@ -322,6 +322,93 @@ class AnthropicProvider(BaseLLMProvider):
             },
         )
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count using accurate token counter (overrides base class).
+        Uses tiktoken for fast local estimation (~98% accurate).
+        Falls back to heuristic if tiktoken unavailable.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Estimated token count
+        """
+        try:
+            from .utils.tokens import count_tokens
+            return count_tokens(text, model=self.model, use_api=False)
+        except ImportError:
+            # Fallback to base class heuristic if utils not available
+            return super().estimate_tokens(text)
+    def calculate_actual_cost(
+        self,
+        input_tokens: int,
+        output_tokens: int,
+        cache_creation_tokens: int = 0,
+        cache_read_tokens: int = 0,
+    ) -> dict[str, Any]:
+        """Calculate actual cost based on precise token counts.
+        Includes Anthropic prompt caching cost adjustments:
+        - Cache writes: 25% markup over standard input pricing
+        - Cache reads: 90% discount from standard input pricing
+        Args:
+            input_tokens: Regular input tokens (not cached)
+            output_tokens: Output tokens
+            cache_creation_tokens: Tokens written to cache
+            cache_read_tokens: Tokens read from cache
+        Returns:
+            Dictionary with cost breakdown:
+            - base_cost: Cost for regular input/output tokens
+            - cache_write_cost: Cost for cache creation (if any)
+            - cache_read_cost: Cost for cache reads (if any)
+            - total_cost: Total cost including all components
+            - savings: Amount saved by cache reads vs. full price
+        Example:
+            >>> provider = AnthropicProvider(api_key="...")
+            >>> cost = provider.calculate_actual_cost(
+            ...     input_tokens=1000,
+            ...     output_tokens=500,
+            ...     cache_read_tokens=10000
+            ... )
+            >>> cost["total_cost"]
+            0.0105  # Significantly less than without cache
+        """
+        # Get pricing for this model
+        model_info = self.get_model_info()
+        input_price_per_million = model_info["cost_per_1m_input"]
+        output_price_per_million = model_info["cost_per_1m_output"]
+        # Base cost (non-cached tokens)
+        base_cost = (input_tokens / 1_000_000) * input_price_per_million
+        base_cost += (output_tokens / 1_000_000) * output_price_per_million
+        # Cache write cost (25% markup)
+        cache_write_price = input_price_per_million * 1.25
+        cache_write_cost = (cache_creation_tokens / 1_000_000) * cache_write_price
+        # Cache read cost (90% discount = 10% of input price)
+        cache_read_price = input_price_per_million * 0.1
+        cache_read_cost = (cache_read_tokens / 1_000_000) * cache_read_price
+        # Calculate savings from cache reads
+        full_price_for_cached = (cache_read_tokens / 1_000_000) * input_price_per_million
+        savings = full_price_for_cached - cache_read_cost
+        return {
+            "base_cost": round(base_cost, 6),
+            "cache_write_cost": round(cache_write_cost, 6),
+            "cache_read_cost": round(cache_read_cost, 6),
+            "total_cost": round(base_cost + cache_write_cost + cache_read_cost, 6),
+            "savings": round(savings, 6),
+            "currency": "USD",
+        }
 class AnthropicBatchProvider:
     """Provider for Anthropic Batch API (50% cost reduction).
@@ -370,7 +457,8 @@ class AnthropicBatchProvider:
         """Create a batch job.
         Args:
-            requests: List of request dicts with 'custom_id', 'model', 'messages', etc.
+            requests: List of request dicts with 'custom_id' and 'params' containing message creation parameters.
+                Format: [{"custom_id": "id1", "params": {"model": "...", "messages": [...], "max_tokens": 1024}}]
             job_id: Optional job identifier for tracking (unused, for API compatibility)
         Returns:
@@ -384,22 +472,46 @@ class AnthropicBatchProvider:
             >>> requests = [
             ...     {
             ...         "custom_id": "task_1",
-            ...         "model": "claude-sonnet-4-5",
-            ...         "messages": [{"role": "user", "content": "Test"}],
-            ...         "max_tokens": 1024
+            ...         "params": {
+            ...             "model": "claude-sonnet-4-5-20250929",
+            ...             "messages": [{"role": "user", "content": "Test"}],
+            ...             "max_tokens": 1024
+            ...         }
             ...     }
             ... ]
             >>> batch_id = provider.create_batch(requests)
             >>> print(f"Batch created: {batch_id}")
-            Batch created: batch_abc123
+            Batch created: msgbatch_abc123
         """
         if not requests:
             raise ValueError("requests cannot be empty")
+        # Validate and convert old format to new format if needed
+        formatted_requests = []
+        for req in requests:
+            if "params" not in req:
+                # Old format: convert to new format with params wrapper
+                formatted_req = {
+                    "custom_id": req.get("custom_id", f"req_{id(req)}"),
+                    "params": {
+                        "model": req.get("model", "claude-sonnet-4-5-20250929"),
+                        "messages": req.get("messages", []),
+                        "max_tokens": req.get("max_tokens", 4096),
+                    },
+                }
+                # Copy other optional params
+                for key in ["temperature", "system", "stop_sequences"]:
+                    if key in req:
+                        formatted_req["params"][key] = req[key]
+                formatted_requests.append(formatted_req)
+            else:
+                formatted_requests.append(req)
         try:
-            batch = self.client.batches.create(requests=requests)
+            # Use correct Message Batches API endpoint
+            batch = self.client.messages.batches.create(requests=formatted_requests)
             self._batch_jobs[batch.id] = batch
-            logger.info(f"Created batch {batch.id} with {len(requests)} requests")
+            logger.info(f"Created batch {batch.id} with {len(formatted_requests)} requests")
             return batch.id
         except Exception as e:
             logger.error(f"Failed to create batch: {e}")
@@ -412,18 +524,20 @@ class AnthropicBatchProvider:
             batch_id: Batch job ID
         Returns:
-            BatchStatus object with status field:
-            - "processing": Batch is being processed
-            - "completed": Batch processing completed
-            - "failed": Batch processing failed
+            MessageBatch object with processing_status field:
+            - "in_progress": Batch is being processed
+            - "canceling": Cancellation initiated
+            - "ended": Batch processing ended (check request_counts for success/errors)
         Example:
-            >>> status = provider.get_batch_status("batch_abc123")
-            >>> print(status.status)
-            processing
+            >>> status = provider.get_batch_status("msgbatch_abc123")
+            >>> print(status.processing_status)
+            in_progress
+            >>> print(f"Succeeded: {status.request_counts.succeeded}")
         """
         try:
-            batch = self.client.batches.retrieve(batch_id)
+            # Use correct Message Batches API endpoint
+            batch = self.client.messages.batches.retrieve(batch_id)
             self._batch_jobs[batch_id] = batch
             return batch
         except Exception as e:
@@ -437,25 +551,37 @@ class AnthropicBatchProvider:
             batch_id: Batch job ID
         Returns:
-            List of result dicts matching input order
+            List of result dicts. Each dict contains:
+            - custom_id: Request identifier
+            - result: Either {"type": "succeeded", "message": {...}} or {"type": "errored", "error": {...}}
         Raises:
-            ValueError: If batch is not completed
+            ValueError: If batch has not ended processing
             RuntimeError: If API call fails
         Example:
-            >>> results = provider.get_batch_results("batch_abc123")
+            >>> results = provider.get_batch_results("msgbatch_abc123")
             >>> for result in results:
-            ...     print(f"{result['custom_id']}: {result['response']['content']}")
+            ...     if result['result']['type'] == 'succeeded':
+            ...         message = result['result']['message']
+            ...         print(f"{result['custom_id']}: {message.content[0].text}")
+            ...     else:
+            ...         error = result['result']['error']
+            ...         print(f"{result['custom_id']}: Error {error['type']}")
         """
         status = self.get_batch_status(batch_id)
-        if status.status != "completed":
-            raise ValueError(f"Batch {batch_id} not completed (status: {status.status})")
+        # Check processing_status instead of status
+        if status.processing_status != "ended":
+            raise ValueError(
+                f"Batch {batch_id} has not ended processing (status: {status.processing_status})"
+            )
         try:
-            results = self.client.batches.results(batch_id)
-            return list(results)
+            # Use correct Message Batches API endpoint
+            # results() returns an iterator, convert to list
+            results_iterator = self.client.messages.batches.results(batch_id)
+            return list(results_iterator)
         except Exception as e:
             logger.error(f"Failed to get batch results for {batch_id}: {e}")
             raise RuntimeError(f"Failed to get batch results: {e}") from e
@@ -474,15 +600,15 @@ class AnthropicBatchProvider:
             timeout: Maximum wait time in seconds (default: 86400 = 24 hours)
         Returns:
-            Batch results when completed
+            Batch results when processing ends
         Raises:
             TimeoutError: If batch doesn't complete within timeout
-            RuntimeError: If batch processing fails
+            RuntimeError: If batch had errors during processing
         Example:
             >>> results = await provider.wait_for_batch(
-            ...     "batch_abc123",
+            ...     "msgbatch_abc123",
             ...     poll_interval=300,  # Check every 5 minutes
             ... )
             >>> print(f"Batch completed: {len(results)} results")
@@ -493,22 +619,36 @@ class AnthropicBatchProvider:
         while True:
             status = self.get_batch_status(batch_id)
-            if status.status == "completed":
-                logger.info(f"Batch {batch_id} completed successfully")
-                return self.get_batch_results(batch_id)
+            # Check if batch processing has ended
+            if status.processing_status == "ended":
+                # Check request counts to see if there were errors
+                counts = status.request_counts
+                logger.info(
+                    f"Batch {batch_id} ended: "
+                    f"{counts.succeeded} succeeded, {counts.errored} errored, "
+                    f"{counts.canceled} canceled, {counts.expired} expired"
+                )
-            if status.status == "failed":
-                error_msg = getattr(status, "error", "Unknown error")
-                logger.error(f"Batch {batch_id} failed: {error_msg}")
-                raise RuntimeError(f"Batch {batch_id} failed: {error_msg}")
+                # Return results even if some requests failed
+                # The caller can inspect individual results for errors
+                return self.get_batch_results(batch_id)
             # Check timeout
             elapsed = (datetime.now() - start_time).total_seconds()
             if elapsed > timeout:
                 raise TimeoutError(f"Batch {batch_id} did not complete within {timeout}s")
-            # Log progress
-            logger.debug(f"Batch {batch_id} status: {status.status} (elapsed: {elapsed:.0f}s)")
+            # Log progress with request counts
+            try:
+                counts = status.request_counts
+                logger.debug(
+                    f"Batch {batch_id} status: {status.processing_status} "
+                    f"(processing: {counts.processing}, elapsed: {elapsed:.0f}s)"
+                )
+            except AttributeError:
+                logger.debug(
+                    f"Batch {batch_id} status: {status.processing_status} (elapsed: {elapsed:.0f}s)"
+                )
             # Wait before next poll
             await asyncio.sleep(poll_interval)

empathy_llm_toolkit/utils/tokens.py CHANGED Viewed

@@ -7,10 +7,35 @@ Copyright 2025 Smart-AI-Memory
 Licensed under Fair Source License 0.9
 """
+import functools
+import logging
+import os
+from dataclasses import dataclass
 from typing import Any
-# Lazy import to avoid requiring anthropic if not used
+logger = logging.getLogger(__name__)
+# Lazy import to avoid requiring dependencies if not used
 _client = None
+_tiktoken_encoding = None
+# Try to import tiktoken for fast local estimation
+try:
+    import tiktoken
+    TIKTOKEN_AVAILABLE = True
+except ImportError:
+    TIKTOKEN_AVAILABLE = False
+    logger.debug("tiktoken not available - will use API or heuristic fallback")
+@dataclass
+class TokenCount:
+    """Token count result with metadata."""
+    tokens: int
+    method: str  # "anthropic_api", "tiktoken", "heuristic"
+    model: str | None = None
 def _get_client():
@@ -20,7 +45,12 @@ def _get_client():
         try:
             from anthropic import Anthropic
-            _client = Anthropic()
+            api_key = os.getenv("ANTHROPIC_API_KEY")
+            if not api_key:
+                raise ValueError(
+                    "ANTHROPIC_API_KEY environment variable required for API token counting"
+                )
+            _client = Anthropic(api_key=api_key)
         except ImportError as e:
             raise ImportError(
                 "anthropic package required for token counting. Install with: pip install anthropic"
@@ -28,57 +58,109 @@ def _get_client():
     return _client
-def count_tokens(text: str, model: str = "claude-sonnet-4-5") -> int:
-    """Count tokens using Anthropic's tokenizer.
+@functools.lru_cache(maxsize=4)
+def _get_tiktoken_encoding(model: str) -> Any:
+    """Get tiktoken encoding for Claude models (cached)."""
+    if not TIKTOKEN_AVAILABLE:
+        return None
+    try:
+        # Claude uses cl100k_base encoding (similar to GPT-4)
+        return tiktoken.get_encoding("cl100k_base")
+    except Exception as e:
+        logger.warning(f"Failed to get tiktoken encoding: {e}")
+        return None
+def _count_tokens_tiktoken(text: str, model: str) -> int:
+    """Count tokens using tiktoken (fast local estimation)."""
+    if not text:
+        return 0
+    encoding = _get_tiktoken_encoding(model)
+    if not encoding:
+        return 0
+    try:
+        return len(encoding.encode(text))
+    except Exception as e:
+        logger.warning(f"tiktoken encoding failed: {e}")
+        return 0
+def _count_tokens_heuristic(text: str) -> int:
+    """Fallback heuristic token counting (~4 chars per token)."""
+    if not text:
+        return 0
+    return max(1, len(text) // 4)
+def count_tokens(text: str, model: str = "claude-sonnet-4-5-20250929", use_api: bool = False) -> int:
+    """Count tokens using best available method.
+    By default, uses tiktoken for fast local estimation (~98% accurate).
+    Set use_api=True for exact count via Anthropic API (requires network call).
     Args:
         text: Text to tokenize
         model: Model ID (different models may have different tokenizers)
+        use_api: Whether to use Anthropic API for exact count (slower, requires API key)
     Returns:
-        Exact token count as would be billed by API
+        Token count
     Example:
         >>> count_tokens("Hello, world!")
         4
-        >>> count_tokens("def hello():\\n    print('hi')")
+        >>> count_tokens("def hello():\\n    print('hi')", use_api=True)
         8
     Raises:
-        ImportError: If anthropic package not installed
-        ValueError: If text is invalid
+        ImportError: If anthropic package not installed (when use_api=True)
+        ValueError: If API key missing (when use_api=True)
     """
     if not text:
         return 0
-    client = _get_client()
-    # Use Anthropic's count_tokens method
-    # Note: This is a synchronous call for simplicity
-    try:
-        result = client.count_tokens(text)
-        return result
-    except Exception as e:
-        # Fallback to rough estimate if API fails
-        # This ensures token counting never crashes workflows
-        import logging
-        logging.debug(f"Token counting failed, using fallback estimate: {e}")
-        return len(text) // 4
+    # Use API if explicitly requested
+    if use_api:
+        try:
+            client = _get_client()
+            # FIXED: Use correct API method - client.messages.count_tokens()
+            result = client.messages.count_tokens(
+                model=model,
+                messages=[{"role": "user", "content": text}],
+            )
+            return int(result.input_tokens)
+        except Exception as e:
+            logger.warning(f"API token counting failed, using fallback: {e}")
+            # Continue to fallback methods
+    # Try tiktoken first (fast and accurate)
+    if TIKTOKEN_AVAILABLE:
+        tokens = _count_tokens_tiktoken(text, model)
+        if tokens > 0:
+            return tokens
+    # Fallback to heuristic
+    return _count_tokens_heuristic(text)
 def count_message_tokens(
     messages: list[dict[str, str]],
     system_prompt: str | None = None,
-    model: str = "claude-sonnet-4-5",
+    model: str = "claude-sonnet-4-5-20250929",
+    use_api: bool = False,
 ) -> dict[str, int]:
     """Count tokens in a conversation.
+    By default uses tiktoken for fast estimation. Set use_api=True for exact count.
     Args:
         messages: List of message dicts with "role" and "content"
         system_prompt: Optional system prompt
         model: Model ID
+        use_api: Whether to use Anthropic API for exact count
     Returns:
         Dict with token counts by component:
@@ -92,21 +174,59 @@ def count_message_tokens(
         {"system": 4, "messages": 6, "total": 10}
     """
+    if not messages:
+        if system_prompt:
+            tokens = count_tokens(system_prompt, model, use_api)
+            return {"system": tokens, "messages": 0, "total": tokens}
+        return {"system": 0, "messages": 0, "total": 0}
+    # Use Anthropic API for exact count if requested
+    if use_api:
+        try:
+            client = _get_client()
+            kwargs: dict[str, Any] = {"model": model, "messages": messages}
+            if system_prompt:
+                kwargs["system"] = system_prompt
+            result = client.messages.count_tokens(**kwargs)
+            # API returns total input tokens, estimate breakdown
+            total_tokens = result.input_tokens
+            # Estimate system vs message breakdown
+            if system_prompt:
+                system_tokens = count_tokens(system_prompt, model, use_api=False)
+                message_tokens = max(0, total_tokens - system_tokens)
+            else:
+                system_tokens = 0
+                message_tokens = total_tokens
+            return {
+                "system": system_tokens,
+                "messages": message_tokens,
+                "total": total_tokens,
+            }
+        except Exception as e:
+            logger.warning(f"API token counting failed, using fallback: {e}")
+            # Continue to fallback method
+    # Fallback: count each component separately
     counts: dict[str, int] = {}
     # Count system prompt
     if system_prompt:
-        counts["system"] = count_tokens(system_prompt, model)
+        counts["system"] = count_tokens(system_prompt, model, use_api=False)
     else:
         counts["system"] = 0
-    # Count messages
-    # Combine all messages into single text for accurate tokenization
-    message_text = "\n".join(f"{msg['role']}: {msg['content']}" for msg in messages)
-    counts["messages"] = count_tokens(message_text, model)
+    # Count messages with overhead
+    message_tokens = 0
+    for message in messages:
+        content = message.get("content", "")
+        message_tokens += count_tokens(content, model, use_api=False)
+        message_tokens += 4  # Overhead for role markers
-    # Total
-    counts["total"] = counts["system"] + counts["messages"]
+    counts["messages"] = message_tokens
+    counts["total"] = counts["system"] + message_tokens
     return counts

empathy_os/__init__.py CHANGED Viewed

@@ -55,7 +55,7 @@ Copyright 2025 Smart AI Memory, LLC
 Licensed under Fair Source 0.9
 """
-__version__ = "5.0.1"
+__version__ = "5.1.0"
 __author__ = "Patrick Roebuck"
 __email__ = "patrick.roebuck@smartaimemory.com"

empathy-framework 5.0.1__py3-none-any.whl → 5.1.0__py3-none-any.whl

empathy-framework 5.0.1py3-none-any.whl → 5.1.0py3-none-any.whl