PyPI - kalibr - Versions diffs - 1.1.3a0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

kalibr 1.1.3a0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

kalibr/__init__.py +41 -3
kalibr/cli/capsule_cmd.py +3 -3
kalibr/cli/main.py +3 -3
kalibr/cli/run.py +2 -2
kalibr/client.py +1 -1
kalibr/collector.py +227 -48
kalibr/context.py +42 -0
kalibr/cost_adapter.py +36 -104
kalibr/instrumentation/anthropic_instr.py +34 -40
kalibr/instrumentation/base.py +27 -9
kalibr/instrumentation/google_instr.py +34 -39
kalibr/instrumentation/openai_instr.py +34 -28
kalibr/instrumentation/registry.py +38 -13
kalibr/intelligence.py +662 -0
kalibr/middleware/auto_tracer.py +1 -1
kalibr/pricing.py +245 -0
kalibr/router.py +499 -0
kalibr/simple_tracer.py +16 -15
kalibr/trace_capsule.py +19 -12
kalibr/utils.py +2 -2
kalibr-1.3.0.dist-info/LICENSE +190 -0
kalibr-1.3.0.dist-info/METADATA +296 -0
kalibr-1.3.0.dist-info/RECORD +52 -0
{kalibr-1.1.3a0.dist-info → kalibr-1.3.0.dist-info}/WHEEL +1 -1
kalibr_crewai/__init__.py +1 -1
kalibr_crewai/callbacks.py +122 -14
kalibr_crewai/instrumentor.py +196 -33
kalibr_langchain/__init__.py +4 -2
kalibr_langchain/callback.py +26 -0
kalibr_langchain/chat_model.py +103 -0
kalibr_openai_agents/__init__.py +1 -1
kalibr-1.1.3a0.dist-info/METADATA +0 -236
kalibr-1.1.3a0.dist-info/RECORD +0 -48
kalibr-1.1.3a0.dist-info/licenses/LICENSE +0 -21
{kalibr-1.1.3a0.dist-info → kalibr-1.3.0.dist-info}/entry_points.txt +0 -0
{kalibr-1.1.3a0.dist-info → kalibr-1.3.0.dist-info}/top_level.txt +0 -0

kalibr/router.py ADDED Viewed

@@ -0,0 +1,499 @@
+"""
+Kalibr Router - Intelligent model routing with outcome learning.
+"""
+import os
+import logging
+import uuid
+from typing import Any, Callable, Dict, List, Optional, Union
+from opentelemetry import trace as otel_trace
+from opentelemetry.trace import SpanContext, TraceFlags, NonRecordingSpan, set_span_in_context
+from opentelemetry.context import Context
+logger = logging.getLogger(__name__)
+# Type for paths - either string or dict
+PathSpec = Union[str, Dict[str, Any]]
+def _create_context_with_trace_id(trace_id_hex: str) -> Optional[Context]:
+    """Create an OTel context with a specific trace_id.
+    This allows child spans to inherit the intelligence service's trace_id,
+    enabling JOINs between outcomes and traces tables.
+    """
+    try:
+        # Convert 32-char hex string to 128-bit int
+        trace_id_int = int(trace_id_hex, 16)
+        if trace_id_int == 0:
+            return None
+        # Create span context with our trace_id
+        span_context = SpanContext(
+            trace_id=trace_id_int,
+            span_id=0xDEADBEEF,  # Placeholder, real span will have its own
+            is_remote=True,  # Treat as remote parent so new span_id is generated
+            trace_flags=TraceFlags(TraceFlags.SAMPLED),
+        )
+        # Create a non-recording parent span and set in context
+        parent_span = NonRecordingSpan(span_context)
+        return set_span_in_context(parent_span)
+    except (ValueError, TypeError) as e:
+        logger.warning(f"Could not create OTel context with trace_id: {e}")
+        return None
+class Router:
+    """
+    Routes LLM requests to the best model based on learned outcomes.
+    Example:
+        router = Router(
+            goal="summarize",
+            paths=["gpt-4o", "claude-3-sonnet"],
+            success_when=lambda out: len(out) > 100
+        )
+        response = router.completion(messages=[...])
+    Examples:
+        # Simple auto-reporting
+        router = Router(
+            goal="extract_email",
+            paths=["gpt-4o", "claude-sonnet-4"],
+            success_when=lambda out: "@" in out
+        )
+        response = router.completion(messages=[...])
+        # report() called automatically
+        # Manual reporting for complex validation
+        router = Router(
+            goal="book_meeting",
+            paths=["gpt-4o", "claude-sonnet-4"]
+        )
+        response = router.completion(messages=[...])
+        # ... complex validation logic ...
+        router.report(success=meeting_booked)
+    Warning:
+        Router is not thread-safe. For concurrent requests, create separate
+        Router instances per thread/task. For sequential requests in a single
+        thread, Router can be reused across multiple completion() calls.
+    """
+    def __init__(
+        self,
+        goal: str,
+        paths: Optional[List[PathSpec]] = None,
+        success_when: Optional[Callable[[str], bool]] = None,
+        exploration_rate: Optional[float] = None,
+        auto_register: bool = True,
+    ):
+        """
+        Initialize router.
+        Args:
+            goal: Name of the goal (e.g., "book_meeting", "summarize")
+            paths: List of models or path configs. Examples:
+                   ["gpt-4o", "claude-3-sonnet"]
+                   [{"model": "gpt-4o", "tools": ["search"]}]
+                   [{"model": "gpt-4o", "params": {"temperature": 0.7}}]
+            success_when: Optional function to auto-evaluate success from LLM output.
+                         Takes the output string and returns True/False.
+                         When provided, report() is called automatically after completion().
+                         Use for simple validations (output length, contains key string).
+                         For complex validation (API calls, multi-step checks), omit this
+                         and call report() manually.
+                         Examples:
+                             success_when=lambda out: len(out) > 0  # Not empty
+                             success_when=lambda out: "@" in out     # Contains email
+            exploration_rate: Override exploration rate (0.0-1.0)
+            auto_register: If True, register paths on init
+        """
+        self.goal = goal
+        # Validate required environment variables
+        api_key = os.environ.get('KALIBR_API_KEY')
+        tenant_id = os.environ.get('KALIBR_TENANT_ID')
+        if not api_key:
+            raise ValueError(
+                "KALIBR_API_KEY environment variable not set.\n"
+                "Get your API key from: https://dashboard.kalibr.systems/settings\n"
+                "Then run: export KALIBR_API_KEY=your-key-here"
+            )
+        if not tenant_id:
+            raise ValueError(
+                "KALIBR_TENANT_ID environment variable not set.\n"
+                "Find your Tenant ID at: https://dashboard.kalibr.systems/settings\n"
+                "Then run: export KALIBR_TENANT_ID=your-tenant-id"
+            )
+        self.success_when = success_when
+        self.exploration_rate = exploration_rate
+        self._last_trace_id: Optional[str] = None
+        self._last_model_id: Optional[str] = None
+        self._last_decision: Optional[dict] = None
+        self._outcome_reported = False
+        # Normalize paths to list of dicts
+        self._paths = self._normalize_paths(paths or ["gpt-4o"])
+        # Register paths if requested
+        if auto_register:
+            self._register_paths()
+    def _normalize_paths(self, paths: List[PathSpec]) -> List[Dict[str, Any]]:
+        """Convert paths to consistent format."""
+        normalized = []
+        for p in paths:
+            if isinstance(p, str):
+                normalized.append({"model": p, "tools": None, "params": None})
+            elif isinstance(p, dict):
+                normalized.append({
+                    "model": p.get("model") or p.get("model_id"),
+                    "tools": p.get("tools") or p.get("tool_id"),
+                    "params": p.get("params"),
+                })
+            else:
+                raise ValueError(f"Invalid path spec: {p}")
+        return normalized
+    def _register_paths(self):
+        """Register paths with intelligence service."""
+        from kalibr.intelligence import register_path
+        for path in self._paths:
+            try:
+                register_path(
+                    goal=self.goal,
+                    model_id=path["model"],
+                    tool_id=path["tools"][0] if isinstance(path["tools"], list) and path["tools"] else path["tools"],
+                    params=path["params"],
+                )
+            except Exception as e:
+                # Log but don't fail - path might already exist
+                logger.debug(f"Path registration note: {e}")
+    def completion(
+        self,
+        messages: List[Dict[str, str]],
+        force_model: Optional[str] = None,
+        **kwargs
+    ) -> Any:
+        """
+        Make a completion request with intelligent routing.
+        Args:
+            messages: OpenAI-format messages
+            force_model: Override routing and use this model
+            **kwargs: Additional args passed to provider
+        Returns:
+            OpenAI-compatible ChatCompletion response with added attribute:
+                - kalibr_trace_id: Trace ID for explicit outcome reporting
+        """
+        from kalibr.intelligence import decide
+        # Reset state for new request
+        self._outcome_reported = False
+        # Step 1: Get routing decision FIRST (before creating span)
+        decision = None
+        model_id = None
+        tool_id = None
+        params = {}
+        if force_model:
+            model_id = force_model
+            self._last_decision = {"model_id": model_id, "forced": True}
+        else:
+            try:
+                decision = decide(goal=self.goal)
+                model_id = decision.get("model_id") or self._paths[0]["model"]
+                tool_id = decision.get("tool_id")
+                params = decision.get("params") or {}
+                self._last_decision = decision
+            except Exception as e:
+                logger.warning(f"Routing failed, using fallback: {e}")
+                model_id = self._paths[0]["model"]
+                tool_id = self._paths[0].get("tools")
+                params = self._paths[0].get("params") or {}
+                self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
+        # Step 2: Determine trace_id
+        decision_trace_id = self._last_decision.get("trace_id") if self._last_decision else None
+        if decision_trace_id:
+            trace_id = decision_trace_id
+        else:
+            trace_id = uuid.uuid4().hex  # Fallback: generate OTel-compatible format
+        self._last_trace_id = trace_id
+        self._last_model_id = model_id
+        # Step 3: Create OTel context with intelligence trace_id
+        otel_context = _create_context_with_trace_id(trace_id) if trace_id else None
+        # Step 4: Create span with custom context (child spans inherit trace_id)
+        tracer = otel_trace.get_tracer("kalibr.router")
+        with tracer.start_as_current_span(
+            "kalibr.router.completion",
+            context=otel_context,
+            attributes={
+                "kalibr.goal": self.goal,
+                "kalibr.trace_id": trace_id,
+                "kalibr.model_id": model_id,
+            }
+        ) as router_span:
+            # Add decision attributes
+            if force_model:
+                router_span.set_attribute("kalibr.forced", True)
+            elif decision:
+                router_span.set_attribute("kalibr.path_id", decision.get("path_id", ""))
+                router_span.set_attribute("kalibr.reason", decision.get("reason", ""))
+                router_span.set_attribute("kalibr.exploration", decision.get("exploration", False))
+                router_span.set_attribute("kalibr.confidence", decision.get("confidence", 0.0))
+            else:
+                router_span.set_attribute("kalibr.fallback", True)
+            # Step 5: Dispatch to provider
+            try:
+                response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
+                # Auto-report if success_when provided
+                if self.success_when and not self._outcome_reported:
+                    try:
+                        output = response.choices[0].message.content or ""
+                        success = self.success_when(output)
+                        self.report(success=success)
+                    except Exception as e:
+                        logger.warning(f"Auto-outcome evaluation failed: {e}")
+                # Add trace_id to response for explicit linkage
+                response.kalibr_trace_id = trace_id
+                return response
+            except Exception as e:
+                router_span.set_attribute("error", True)
+                router_span.set_attribute("error.type", type(e).__name__)
+                # Auto-report failure
+                if not self._outcome_reported:
+                    try:
+                        self.report(success=False, reason=f"provider_error: {type(e).__name__}")
+                    except:
+                        pass
+                raise
+    def report(
+        self,
+        success: bool,
+        reason: Optional[str] = None,
+        score: Optional[float] = None,
+        trace_id: Optional[str] = None,
+    ):
+        """
+        Report outcome for the last completion.
+        Args:
+            success: Whether the task succeeded
+            reason: Optional failure reason
+            score: Optional quality score (0.0-1.0)
+            trace_id: Optional explicit trace ID (uses last completion's trace_id if not provided)
+        """
+        if self._outcome_reported:
+            logger.warning("Outcome already reported for this completion. Each completion() requires a separate report() call.")
+            return
+        from kalibr.intelligence import report_outcome
+        trace_id = trace_id or self._last_trace_id
+        if not trace_id:
+            raise ValueError("Must call completion() before report(). No trace_id available.")
+        try:
+            report_outcome(
+                trace_id=trace_id,
+                goal=self.goal,
+                success=success,
+                score=score,
+                failure_reason=reason,
+                model_id=self._last_model_id,
+            )
+            self._outcome_reported = True
+        except Exception as e:
+            logger.warning(f"Failed to report outcome: {e}")
+    def add_path(
+        self,
+        model: str,
+        tools: Optional[List[str]] = None,
+        params: Optional[Dict] = None,
+    ):
+        """Add a new path dynamically."""
+        from kalibr.intelligence import register_path
+        path = {"model": model, "tools": tools, "params": params}
+        self._paths.append(path)
+        register_path(
+            goal=self.goal,
+            model_id=model,
+            tool_id=tools[0] if tools else None,
+            params=params,
+        )
+    def _dispatch(
+        self,
+        model_id: str,
+        messages: List[Dict],
+        tools: Optional[Any] = None,
+        **kwargs
+    ) -> Any:
+        """Dispatch to the appropriate provider."""
+        if model_id.startswith(("gpt-", "o1-", "o3-")):
+            return self._call_openai(model_id, messages, tools, **kwargs)
+        elif model_id.startswith("claude-"):
+            return self._call_anthropic(model_id, messages, tools, **kwargs)
+        elif model_id.startswith(("gemini-", "models/gemini")):
+            return self._call_google(model_id, messages, tools, **kwargs)
+        else:
+            # Default to OpenAI-compatible
+            logger.info(f"Unknown model prefix '{model_id}', trying OpenAI")
+            return self._call_openai(model_id, messages, tools, **kwargs)
+    def _call_openai(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
+        """Call OpenAI API."""
+        try:
+            from openai import OpenAI
+        except ImportError:
+            raise ImportError("Install 'openai' package: pip install openai")
+        client = OpenAI()
+        call_kwargs = {"model": model, "messages": messages, **kwargs}
+        if tools:
+            call_kwargs["tools"] = tools
+        return client.chat.completions.create(**call_kwargs)
+    def _call_anthropic(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
+        """Call Anthropic API and convert response to OpenAI format."""
+        try:
+            from anthropic import Anthropic
+        except ImportError:
+            raise ImportError("Install 'anthropic' package: pip install anthropic")
+        client = Anthropic()
+        # Convert messages (handle system message)
+        system = None
+        anthropic_messages = []
+        for m in messages:
+            if m["role"] == "system":
+                system = m["content"]
+            else:
+                anthropic_messages.append({"role": m["role"], "content": m["content"]})
+        call_kwargs = {"model": model, "messages": anthropic_messages, "max_tokens": kwargs.pop("max_tokens", 4096)}
+        if system:
+            call_kwargs["system"] = system
+        if tools:
+            call_kwargs["tools"] = tools
+        call_kwargs.update(kwargs)
+        response = client.messages.create(**call_kwargs)
+        # Convert to OpenAI format
+        return self._anthropic_to_openai_response(response, model)
+    def _call_google(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
+        """Call Google API and convert response to OpenAI format."""
+        try:
+            import google.generativeai as genai
+        except ImportError:
+            raise ImportError("Install 'google-generativeai' package: pip install google-generativeai")
+        # Configure if API key available
+        api_key = os.environ.get("GOOGLE_API_KEY")
+        if api_key:
+            genai.configure(api_key=api_key)
+        # Convert messages to Google format
+        model_name = model.replace("models/", "") if model.startswith("models/") else model
+        gmodel = genai.GenerativeModel(model_name)
+        # Simple conversion - concatenate messages
+        prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
+        response = gmodel.generate_content(prompt)
+        # Convert to OpenAI format
+        return self._google_to_openai_response(response, model)
+    def _anthropic_to_openai_response(self, response: Any, model: str) -> Any:
+        """Convert Anthropic response to OpenAI format."""
+        from types import SimpleNamespace
+        content = ""
+        if response.content:
+            content = response.content[0].text if hasattr(response.content[0], "text") else str(response.content[0])
+        return SimpleNamespace(
+            id=response.id,
+            model=model,
+            choices=[
+                SimpleNamespace(
+                    index=0,
+                    message=SimpleNamespace(
+                        role="assistant",
+                        content=content,
+                    ),
+                    finish_reason=response.stop_reason,
+                )
+            ],
+            usage=SimpleNamespace(
+                prompt_tokens=response.usage.input_tokens,
+                completion_tokens=response.usage.output_tokens,
+                total_tokens=response.usage.input_tokens + response.usage.output_tokens,
+            ),
+        )
+    def _google_to_openai_response(self, response: Any, model: str) -> Any:
+        """Convert Google response to OpenAI format."""
+        from types import SimpleNamespace
+        import uuid
+        content = response.text if hasattr(response, "text") else str(response)
+        return SimpleNamespace(
+            id=f"google-{uuid.uuid4().hex[:8]}",
+            model=model,
+            choices=[
+                SimpleNamespace(
+                    index=0,
+                    message=SimpleNamespace(
+                        role="assistant",
+                        content=content,
+                    ),
+                    finish_reason="stop",
+                )
+            ],
+            usage=SimpleNamespace(
+                prompt_tokens=getattr(response, "usage_metadata", {}).get("prompt_token_count", 0),
+                completion_tokens=getattr(response, "usage_metadata", {}).get("candidates_token_count", 0),
+                total_tokens=getattr(response, "usage_metadata", {}).get("total_token_count", 0),
+            ),
+        )
+    def as_langchain(self):
+        """Return a LangChain-compatible chat model."""
+        try:
+            from kalibr_langchain.chat_model import KalibrChatModel
+            return KalibrChatModel(router=self)
+        except ImportError:
+            raise ImportError("Install 'kalibr-langchain' package for LangChain integration")

kalibr/simple_tracer.py CHANGED Viewed

@@ -19,6 +19,8 @@ Capsule Usage (automatic when middleware is active):
     def process_request(request: Request, prompt: str):
         # Capsule automatically updated with this hop
         return llm_call(prompt)
+Note: Uses centralized pricing from kalibr.pricing module.
 """
 import json
@@ -31,6 +33,8 @@ from datetime import datetime, timezone
 from functools import wraps
 from typing import Callable, Optional
+from kalibr.pricing import compute_cost
 try:
     import requests
 except ImportError:
@@ -53,7 +57,7 @@ def send_event(payload: dict):
         print("[Kalibr SDK] ❌ requests library not available")
         return
-    url = os.getenv("KALIBR_COLLECTOR_URL", "https://api.kalibr.systems/api/ingest")
+    url = os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest")
     api_key = os.getenv("KALIBR_API_KEY")
     if not api_key:
         print("[Kalibr SDK] ⚠️  KALIBR_API_KEY not set, traces will not be sent")
@@ -155,21 +159,18 @@ def trace(
             actual_input_tokens = input_tokens or kwargs.get("input_tokens", 1000)
             actual_output_tokens = output_tokens or kwargs.get("output_tokens", 500)
-            # Cost calculation (simplified pricing)
-            # OpenAI GPT-4o: ~$2.50/1M input, ~$10/1M output
-            # Anthropic Claude-3-Sonnet: ~$3/1M input, ~$15/1M output
-            pricing_map = {
-                "openai": {"gpt-4o": 0.00000250, "gpt-4": 0.00003000},
-                "anthropic": {"claude-3-sonnet": 0.00000300, "claude-3-opus": 0.00001500},
-                "google": {"gemini-pro": 0.00000125},
-            }
-            # Get unit price
-            provider_pricing = pricing_map.get(provider, {})
-            unit_price_usd = provider_pricing.get(model, 0.00002000)  # Default $0.02/1M
+            # Cost calculation using centralized pricing
+            # This ensures consistency with all other cost adapters
+            total_cost_usd = compute_cost(
+                vendor=provider,
+                model_name=model,
+                input_tokens=actual_input_tokens,
+                output_tokens=actual_output_tokens,
+            )
-            # Calculate total cost
-            total_cost_usd = (actual_input_tokens + actual_output_tokens) * unit_price_usd
+            # Calculate unit price for backward compatibility (total cost / total tokens)
+            total_tokens = actual_input_tokens + actual_output_tokens
+            unit_price_usd = total_cost_usd / total_tokens if total_tokens > 0 else 0.0
             # Build payload
             payload = {

kalibr/trace_capsule.py CHANGED Viewed

@@ -28,6 +28,7 @@ Usage:
 """
 import json
+import threading
 import uuid
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional
@@ -85,12 +86,16 @@ class TraceCapsule:
         # Phase 3C: Context token propagation (keep as UUID for consistency)
         self.context_token = context_token or str(uuid.uuid4())
         self.parent_context_token = parent_context_token
+        # Thread-safety: Lock for protecting concurrent append_hop operations
+        self._lock = threading.Lock()
     def append_hop(self, hop: Dict[str, Any]) -> None:
         """Append a new hop to the capsule.
         Maintains a rolling window of last N hops to keep payload compact.
         Updates aggregate metrics automatically.
+        Thread-safe: Uses internal lock to protect concurrent modifications.
         Args:
             hop: Dictionary containing hop metadata
@@ -111,22 +116,24 @@ class TraceCapsule:
                 "agent_name": "code-writer"
             })
         """
-        # Add hop_index
-        hop["hop_index"] = len(self.last_n_hops)
+        # Thread-safe update of capsule state
+        with self._lock:
+            # Add hop_index
+            hop["hop_index"] = len(self.last_n_hops)
-        # Append to history
-        self.last_n_hops.append(hop)
+            # Append to history
+            self.last_n_hops.append(hop)
-        # Maintain rolling window (keep last N hops)
-        if len(self.last_n_hops) > self.MAX_HOPS:
-            self.last_n_hops.pop(0)
+            # Maintain rolling window (keep last N hops)
+            if len(self.last_n_hops) > self.MAX_HOPS:
+                self.last_n_hops.pop(0)
-        # Update aggregates
-        self.aggregate_cost_usd += hop.get("cost_usd", 0.0)
-        self.aggregate_latency_ms += hop.get("duration_ms", 0.0)
+            # Update aggregates
+            self.aggregate_cost_usd += hop.get("cost_usd", 0.0)
+            self.aggregate_latency_ms += hop.get("duration_ms", 0.0)
-        # Update timestamp
-        self.timestamp = datetime.now(timezone.utc).isoformat()
+            # Update timestamp
+            self.timestamp = datetime.now(timezone.utc).isoformat()
     def get_last_hop(self) -> Optional[Dict[str, Any]]:
         """Get the most recent hop.

kalibr/utils.py CHANGED Viewed

@@ -38,8 +38,8 @@ def load_config_from_env() -> Dict[str, str]:
         "workflow_id": os.getenv("KALIBR_WORKFLOW_ID", "default-workflow"),
         "sandbox_id": os.getenv("SANDBOX_ID", "local"),
         "runtime_env": os.getenv("RUNTIME_ENV", "local"),
-        "api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://api.kalibr.systems/api/v1/traces"),
-        "collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://api.kalibr.systems/api/ingest"),
+        "api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://kalibr-backend.fly.dev/api/v1/traces"),
+        "collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest"),
     }
     return config

kalibr 1.1.3a0__py3-none-any.whl → 1.3.0__py3-none-any.whl

kalibr 1.1.3a0py3-none-any.whl → 1.3.0py3-none-any.whl