PyPI - empathy-framework - Versions diffs - 5.0.1__py3-none-any.whl → 5.0.3__py3-none-any.whl - Mend

empathy-framework 5.0.1py3-none-any.whl → 5.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{empathy_framework-5.0.1.dist-info → empathy_framework-5.0.3.dist-info}/METADATA +53 -9
{empathy_framework-5.0.1.dist-info → empathy_framework-5.0.3.dist-info}/RECORD +28 -31
empathy_llm_toolkit/providers.py +175 -35
empathy_llm_toolkit/utils/tokens.py +150 -30
empathy_os/__init__.py +1 -1
empathy_os/cli/commands/batch.py +256 -0
empathy_os/cli/commands/cache.py +248 -0
empathy_os/cli/commands/inspect.py +1 -2
empathy_os/cli/commands/metrics.py +1 -1
empathy_os/cli/commands/routing.py +285 -0
empathy_os/cli/commands/workflow.py +2 -2
empathy_os/cli/parsers/__init__.py +6 -0
empathy_os/cli/parsers/batch.py +118 -0
empathy_os/cli/parsers/cache.py +65 -0
empathy_os/cli/parsers/routing.py +110 -0
empathy_os/dashboard/standalone_server.py +22 -11
empathy_os/metrics/collector.py +31 -0
empathy_os/models/token_estimator.py +21 -13
empathy_os/telemetry/agent_coordination.py +12 -14
empathy_os/telemetry/agent_tracking.py +18 -19
empathy_os/telemetry/approval_gates.py +27 -39
empathy_os/telemetry/event_streaming.py +19 -19
empathy_os/telemetry/feedback_loop.py +13 -16
empathy_os/workflows/batch_processing.py +56 -10
empathy_os/vscode_bridge 2.py +0 -173
empathy_os/workflows/progressive/README 2.md +0 -454
empathy_os/workflows/progressive/__init__ 2.py +0 -92
empathy_os/workflows/progressive/cli 2.py +0 -242
empathy_os/workflows/progressive/core 2.py +0 -488
empathy_os/workflows/progressive/orchestrator 2.py +0 -701
empathy_os/workflows/progressive/reports 2.py +0 -528
empathy_os/workflows/progressive/telemetry 2.py +0 -280
empathy_os/workflows/progressive/test_gen 2.py +0 -514
empathy_os/workflows/progressive/workflow 2.py +0 -628
{empathy_framework-5.0.1.dist-info → empathy_framework-5.0.3.dist-info}/WHEEL +0 -0
{empathy_framework-5.0.1.dist-info → empathy_framework-5.0.3.dist-info}/entry_points.txt +0 -0
{empathy_framework-5.0.1.dist-info → empathy_framework-5.0.3.dist-info}/licenses/LICENSE +0 -0
{empathy_framework-5.0.1.dist-info → empathy_framework-5.0.3.dist-info}/top_level.txt +0 -0

empathy_llm_toolkit/utils/tokens.py CHANGED Viewed

@@ -7,10 +7,35 @@ Copyright 2025 Smart-AI-Memory
 Licensed under Fair Source License 0.9
 """
+import functools
+import logging
+import os
+from dataclasses import dataclass
 from typing import Any
-# Lazy import to avoid requiring anthropic if not used
+logger = logging.getLogger(__name__)
+# Lazy import to avoid requiring dependencies if not used
 _client = None
+_tiktoken_encoding = None
+# Try to import tiktoken for fast local estimation
+try:
+    import tiktoken
+    TIKTOKEN_AVAILABLE = True
+except ImportError:
+    TIKTOKEN_AVAILABLE = False
+    logger.debug("tiktoken not available - will use API or heuristic fallback")
+@dataclass
+class TokenCount:
+    """Token count result with metadata."""
+    tokens: int
+    method: str  # "anthropic_api", "tiktoken", "heuristic"
+    model: str | None = None
 def _get_client():
@@ -20,7 +45,12 @@ def _get_client():
         try:
             from anthropic import Anthropic
-            _client = Anthropic()
+            api_key = os.getenv("ANTHROPIC_API_KEY")
+            if not api_key:
+                raise ValueError(
+                    "ANTHROPIC_API_KEY environment variable required for API token counting"
+                )
+            _client = Anthropic(api_key=api_key)
         except ImportError as e:
             raise ImportError(
                 "anthropic package required for token counting. Install with: pip install anthropic"
@@ -28,57 +58,109 @@ def _get_client():
     return _client
-def count_tokens(text: str, model: str = "claude-sonnet-4-5") -> int:
-    """Count tokens using Anthropic's tokenizer.
+@functools.lru_cache(maxsize=4)
+def _get_tiktoken_encoding(model: str) -> Any:
+    """Get tiktoken encoding for Claude models (cached)."""
+    if not TIKTOKEN_AVAILABLE:
+        return None
+    try:
+        # Claude uses cl100k_base encoding (similar to GPT-4)
+        return tiktoken.get_encoding("cl100k_base")
+    except Exception as e:
+        logger.warning(f"Failed to get tiktoken encoding: {e}")
+        return None
+def _count_tokens_tiktoken(text: str, model: str) -> int:
+    """Count tokens using tiktoken (fast local estimation)."""
+    if not text:
+        return 0
+    encoding = _get_tiktoken_encoding(model)
+    if not encoding:
+        return 0
+    try:
+        return len(encoding.encode(text))
+    except Exception as e:
+        logger.warning(f"tiktoken encoding failed: {e}")
+        return 0
+def _count_tokens_heuristic(text: str) -> int:
+    """Fallback heuristic token counting (~4 chars per token)."""
+    if not text:
+        return 0
+    return max(1, len(text) // 4)
+def count_tokens(text: str, model: str = "claude-sonnet-4-5-20250929", use_api: bool = False) -> int:
+    """Count tokens using best available method.
+    By default, uses tiktoken for fast local estimation (~98% accurate).
+    Set use_api=True for exact count via Anthropic API (requires network call).
     Args:
         text: Text to tokenize
         model: Model ID (different models may have different tokenizers)
+        use_api: Whether to use Anthropic API for exact count (slower, requires API key)
     Returns:
-        Exact token count as would be billed by API
+        Token count
     Example:
         >>> count_tokens("Hello, world!")
         4
-        >>> count_tokens("def hello():\\n    print('hi')")
+        >>> count_tokens("def hello():\\n    print('hi')", use_api=True)
         8
     Raises:
-        ImportError: If anthropic package not installed
-        ValueError: If text is invalid
+        ImportError: If anthropic package not installed (when use_api=True)
+        ValueError: If API key missing (when use_api=True)
     """
     if not text:
         return 0
-    client = _get_client()
-    # Use Anthropic's count_tokens method
-    # Note: This is a synchronous call for simplicity
-    try:
-        result = client.count_tokens(text)
-        return result
-    except Exception as e:
-        # Fallback to rough estimate if API fails
-        # This ensures token counting never crashes workflows
-        import logging
-        logging.debug(f"Token counting failed, using fallback estimate: {e}")
-        return len(text) // 4
+    # Use API if explicitly requested
+    if use_api:
+        try:
+            client = _get_client()
+            # FIXED: Use correct API method - client.messages.count_tokens()
+            result = client.messages.count_tokens(
+                model=model,
+                messages=[{"role": "user", "content": text}],
+            )
+            return int(result.input_tokens)
+        except Exception as e:
+            logger.warning(f"API token counting failed, using fallback: {e}")
+            # Continue to fallback methods
+    # Try tiktoken first (fast and accurate)
+    if TIKTOKEN_AVAILABLE:
+        tokens = _count_tokens_tiktoken(text, model)
+        if tokens > 0:
+            return tokens
+    # Fallback to heuristic
+    return _count_tokens_heuristic(text)
 def count_message_tokens(
     messages: list[dict[str, str]],
     system_prompt: str | None = None,
-    model: str = "claude-sonnet-4-5",
+    model: str = "claude-sonnet-4-5-20250929",
+    use_api: bool = False,
 ) -> dict[str, int]:
     """Count tokens in a conversation.
+    By default uses tiktoken for fast estimation. Set use_api=True for exact count.
     Args:
         messages: List of message dicts with "role" and "content"
         system_prompt: Optional system prompt
         model: Model ID
+        use_api: Whether to use Anthropic API for exact count
     Returns:
         Dict with token counts by component:
@@ -92,21 +174,59 @@ def count_message_tokens(
         {"system": 4, "messages": 6, "total": 10}
     """
+    if not messages:
+        if system_prompt:
+            tokens = count_tokens(system_prompt, model, use_api)
+            return {"system": tokens, "messages": 0, "total": tokens}
+        return {"system": 0, "messages": 0, "total": 0}
+    # Use Anthropic API for exact count if requested
+    if use_api:
+        try:
+            client = _get_client()
+            kwargs: dict[str, Any] = {"model": model, "messages": messages}
+            if system_prompt:
+                kwargs["system"] = system_prompt
+            result = client.messages.count_tokens(**kwargs)
+            # API returns total input tokens, estimate breakdown
+            total_tokens = result.input_tokens
+            # Estimate system vs message breakdown
+            if system_prompt:
+                system_tokens = count_tokens(system_prompt, model, use_api=False)
+                message_tokens = max(0, total_tokens - system_tokens)
+            else:
+                system_tokens = 0
+                message_tokens = total_tokens
+            return {
+                "system": system_tokens,
+                "messages": message_tokens,
+                "total": total_tokens,
+            }
+        except Exception as e:
+            logger.warning(f"API token counting failed, using fallback: {e}")
+            # Continue to fallback method
+    # Fallback: count each component separately
     counts: dict[str, int] = {}
     # Count system prompt
     if system_prompt:
-        counts["system"] = count_tokens(system_prompt, model)
+        counts["system"] = count_tokens(system_prompt, model, use_api=False)
     else:
         counts["system"] = 0
-    # Count messages
-    # Combine all messages into single text for accurate tokenization
-    message_text = "\n".join(f"{msg['role']}: {msg['content']}" for msg in messages)
-    counts["messages"] = count_tokens(message_text, model)
+    # Count messages with overhead
+    message_tokens = 0
+    for message in messages:
+        content = message.get("content", "")
+        message_tokens += count_tokens(content, model, use_api=False)
+        message_tokens += 4  # Overhead for role markers
-    # Total
-    counts["total"] = counts["system"] + counts["messages"]
+    counts["messages"] = message_tokens
+    counts["total"] = counts["system"] + message_tokens
     return counts

empathy_os/__init__.py CHANGED Viewed

@@ -55,7 +55,7 @@ Copyright 2025 Smart AI Memory, LLC
 Licensed under Fair Source 0.9
 """
-__version__ = "5.0.1"
+__version__ = "5.0.3"
 __author__ = "Patrick Roebuck"
 __email__ = "patrick.roebuck@smartaimemory.com"

empathy_os/cli/commands/batch.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""CLI commands for Anthropic Batch API operations (50% cost savings).
+Provides commands to submit, monitor, and retrieve results from batch processing jobs.
+Copyright 2025 Smart-AI-Memory
+Licensed under Fair Source License 0.9
+"""
+import asyncio
+import json
+import logging
+import os
+from pathlib import Path
+from empathy_os.config import _validate_file_path
+from empathy_os.workflows.batch_processing import BatchProcessingWorkflow
+logger = logging.getLogger(__name__)
+def cmd_batch_submit(args):
+    """Submit a batch processing job from JSON file.
+    Args:
+        args: Arguments with input_file path
+    File format:
+        [
+            {
+                "task_id": "task_1",
+                "task_type": "analyze_logs",
+                "input_data": {"logs": "ERROR: ..."},
+                "model_tier": "capable"
+            },
+            ...
+        ]
+    """
+    input_file = Path(args.input_file)
+    if not input_file.exists():
+        print(f"❌ Error: Input file not found: {input_file}")
+        return 1
+    print(f"📤 Submitting batch from {input_file}...")
+    try:
+        # Get API key from environment
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            print("❌ Error: ANTHROPIC_API_KEY environment variable not set")
+            return 1
+        # Load requests from file
+        workflow = BatchProcessingWorkflow(api_key=api_key)
+        requests = workflow.load_requests_from_file(str(input_file))
+        print(f"  Found {len(requests)} requests")
+        # Create batch (sync operation)
+        batch_id = workflow.batch_provider.create_batch(
+            [
+                {
+                    "custom_id": req.task_id,
+                    "params": {
+                        "model": "claude-sonnet-4-5-20250929",  # Default model
+                        "messages": workflow._format_messages(req),
+                        "max_tokens": 4096,
+                    },
+                }
+                for req in requests
+            ]
+        )
+        print(f"\n✅ Batch submitted successfully!")
+        print(f"   Batch ID: {batch_id}")
+        print(f"\nMonitor status with: empathy batch status {batch_id}")
+        print(f"Retrieve results with: empathy batch results {batch_id} output.json")
+        print(
+            f"Or wait for completion: empathy batch wait {batch_id} output.json --poll-interval 300"
+        )
+        return 0
+    except Exception as e:
+        logger.exception("Failed to submit batch")
+        print(f"❌ Error: {e}")
+        return 1
+def cmd_batch_status(args):
+    """Check status of a batch processing job.
+    Args:
+        args: Arguments with batch_id
+    """
+    batch_id = args.batch_id
+    print(f"🔍 Checking status for batch {batch_id}...")
+    try:
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            print("❌ Error: ANTHROPIC_API_KEY environment variable not set")
+            return 1
+        workflow = BatchProcessingWorkflow(api_key=api_key)
+        status = workflow.batch_provider.get_batch_status(batch_id)
+        print(f"\n📊 Batch Status:")
+        print(f"   ID: {status.id}")
+        print(f"   Processing Status: {status.processing_status}")
+        print(f"   Created: {status.created_at}")
+        if hasattr(status, "ended_at") and status.ended_at:
+            print(f"   Ended: {status.ended_at}")
+        print(f"\n📈 Request Counts:")
+        counts = status.request_counts
+        print(f"   Processing: {counts.processing}")
+        print(f"   Succeeded: {counts.succeeded}")
+        print(f"   Errored: {counts.errored}")
+        print(f"   Canceled: {counts.canceled}")
+        print(f"   Expired: {counts.expired}")
+        if status.processing_status == "ended":
+            print(f"\n✅ Batch processing completed!")
+            print(f"   Retrieve results with: empathy batch results {batch_id} output.json")
+        else:
+            print(f"\n⏳ Batch still processing...")
+        # Output JSON if requested
+        if args.json:
+            print("\n" + json.dumps(status.__dict__, indent=2, default=str))
+        return 0
+    except Exception as e:
+        logger.exception("Failed to get batch status")
+        print(f"❌ Error: {e}")
+        return 1
+def cmd_batch_results(args):
+    """Retrieve results from a completed batch.
+    Args:
+        args: Arguments with batch_id and output_file
+    """
+    batch_id = args.batch_id
+    output_file = args.output_file
+    print(f"📥 Retrieving results for batch {batch_id}...")
+    try:
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            print("❌ Error: ANTHROPIC_API_KEY environment variable not set")
+            return 1
+        workflow = BatchProcessingWorkflow(api_key=api_key)
+        # Check status first
+        status = workflow.batch_provider.get_batch_status(batch_id)
+        if status.processing_status != "ended":
+            print(f"❌ Error: Batch has not ended processing (status: {status.processing_status})")
+            print(f"   Wait for completion with: empathy batch wait {batch_id} {output_file}")
+            return 1
+        # Get results
+        results = workflow.batch_provider.get_batch_results(batch_id)
+        # Save to file
+        validated_path = _validate_file_path(output_file)
+        with open(validated_path, "w") as f:
+            json.dump([dict(r) for r in results], f, indent=2, default=str)
+        print(f"\n✅ Results saved to {validated_path}")
+        print(f"   Total: {len(results)} results")
+        # Summary
+        succeeded = sum(
+            1 for r in results if r.get("result", {}).get("type") == "succeeded"
+        )
+        errored = sum(
+            1 for r in results if r.get("result", {}).get("type") == "errored"
+        )
+        print(f"   Succeeded: {succeeded}")
+        print(f"   Errored: {errored}")
+        return 0
+    except Exception as e:
+        logger.exception("Failed to retrieve results")
+        print(f"❌ Error: {e}")
+        return 1
+def cmd_batch_wait(args):
+    """Wait for batch to complete and retrieve results.
+    Args:
+        args: Arguments with batch_id, output_file, poll_interval, timeout
+    """
+    batch_id = args.batch_id
+    output_file = args.output_file
+    poll_interval = args.poll_interval
+    timeout = args.timeout
+    print(f"⏳ Waiting for batch {batch_id} to complete...")
+    print(f"   Polling every {poll_interval}s (max {timeout}s)")
+    try:
+        api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            print("❌ Error: ANTHROPIC_API_KEY environment variable not set")
+            return 1
+        workflow = BatchProcessingWorkflow(api_key=api_key)
+        # Wait for completion (async)
+        results = asyncio.run(
+            workflow.batch_provider.wait_for_batch(
+                batch_id, poll_interval=poll_interval, timeout=timeout
+            )
+        )
+        # Save results
+        validated_path = _validate_file_path(output_file)
+        with open(validated_path, "w") as f:
+            json.dump([dict(r) for r in results], f, indent=2, default=str)
+        print(f"\n✅ Batch completed! Results saved to {validated_path}")
+        print(f"   Total: {len(results)} results")
+        # Summary
+        succeeded = sum(
+            1 for r in results if r.get("result", {}).get("type") == "succeeded"
+        )
+        errored = sum(
+            1 for r in results if r.get("result", {}).get("type") == "errored"
+        )
+        print(f"   Succeeded: {succeeded}")
+        print(f"   Errored: {errored}")
+        return 0
+    except TimeoutError:
+        print(f"\n⏰ Timeout: Batch did not complete within {timeout}s")
+        print(f"   Check status with: empathy batch status {batch_id}")
+        return 1
+    except Exception as e:
+        logger.exception("Failed to wait for batch")
+        print(f"❌ Error: {e}")
+        return 1

empathy-framework 5.0.1__py3-none-any.whl → 5.0.3__py3-none-any.whl

empathy-framework 5.0.1py3-none-any.whl → 5.0.3py3-none-any.whl