PyPI - kalibr - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

kalibr 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

kalibr/__init__.py +20 -1
kalibr/context.py +42 -0
kalibr/intelligence.py +336 -3
{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/METADATA +48 -14
{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/RECORD +12 -12
{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/WHEEL +1 -1
kalibr_crewai/callbacks.py +122 -14
kalibr_crewai/instrumentor.py +164 -33
kalibr_langchain/callback.py +26 -0
{kalibr-1.2.0.dist-info/licenses → kalibr-1.2.2.dist-info}/LICENSE +0 -0
{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/entry_points.txt +0 -0
{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/top_level.txt +0 -0

kalibr/__init__.py CHANGED Viewed

@@ -56,7 +56,17 @@ from .collector import is_configured as is_collector_configured
 from .collector import (
     setup_collector,
 )
-from .context import get_parent_span_id, get_trace_id, new_trace_id, trace_context
+from .context import (
+    get_parent_span_id,
+    get_trace_id,
+    new_trace_id,
+    trace_context,
+    # Goal context (v1.3.0)
+    goal,
+    set_goal,
+    get_goal,
+    clear_goal,
+)
 from .cost_adapter import (
     AnthropicCostAdapter,
     BaseCostAdapter,
@@ -79,6 +89,8 @@ from .intelligence import (
     get_policy,
     report_outcome,
     get_recommendation,
+    register_path,
+    decide,
 )
 if os.getenv("KALIBR_AUTO_INSTRUMENT", "true").lower() == "true":
@@ -114,6 +126,11 @@ __all__ = [
     "get_trace_id",
     "get_parent_span_id",
     "new_trace_id",
+    # Goal Context (v1.3.0)
+    "goal",
+    "set_goal",
+    "get_goal",
+    "clear_goal",
     # Tracer
     "Tracer",
     "SpanContext",
@@ -144,4 +161,6 @@ __all__ = [
     "get_policy",
     "report_outcome",
     "get_recommendation",
+    "register_path",
+    "decide",
 ]

kalibr/context.py CHANGED Viewed

@@ -8,6 +8,7 @@ HTTP requests to SDK calls (OpenAI, Anthropic, Google).
 import random
 import string
 import uuid
+from contextlib import contextmanager
 from contextvars import ContextVar
 from typing import Dict, Optional
@@ -130,3 +131,44 @@ def inject_kalibr_context_into_span(span: Span):
             span.set_attribute("kalibr.http_trace_id", ctx["trace_id"])
         if ctx.get("span_id"):
             span.set_attribute("kalibr.http_span_id", ctx["span_id"])
+# ============================================================================
+# Goal Context for Outcome Tracking (v1.3.0)
+# ============================================================================
+_goal_context: ContextVar[Optional[str]] = ContextVar("goal_context", default=None)
+def set_goal(goal: str):
+    """Set the current goal for all subsequent Kalibr traces."""
+    _goal_context.set(goal)
+def get_goal() -> Optional[str]:
+    """Get the current goal."""
+    return _goal_context.get()
+def clear_goal():
+    """Clear the current goal."""
+    _goal_context.set(None)
+@contextmanager
+def goal(goal_name: str):
+    """Context manager to set goal for a block of code.
+    Usage:
+        with kalibr.goal("research_company"):
+            agent.run("Research Weights & Biases")
+    """
+    previous = get_goal()
+    set_goal(goal_name)
+    try:
+        yield
+    finally:
+        if previous:
+            set_goal(previous)
+        else:
+            clear_goal()

kalibr/intelligence.py CHANGED Viewed

@@ -4,7 +4,7 @@ This module enables the outcome-conditioned routing loop:
 1. Before executing: query get_policy() to get the best path for your goal
 2. After executing: call report_outcome() to teach Kalibr what worked
-Example:
+Example - Policy-based routing:
     from kalibr import get_policy, report_outcome
     # Before executing - get best path
@@ -17,6 +17,17 @@ Example:
         goal="book_meeting",
         success=True
     )
+Example - Path registration and intelligent routing:
+    from kalibr import register_path, decide
+    # Register paths for a goal
+    register_path(goal="book_meeting", model_id="gpt-4", tool_id="calendar_tool")
+    register_path(goal="book_meeting", model_id="claude-3-opus")
+    # Get intelligent routing decision
+    decision = decide(goal="book_meeting")
+    model = decision["model_id"]  # Selected based on outcomes
 """
 from __future__ import annotations
@@ -64,6 +75,7 @@ class KalibrIntelligence:
         method: str,
         path: str,
         json: dict | None = None,
+        params: dict | None = None,
     ) -> httpx.Response:
         """Make authenticated request to intelligence API."""
         headers = {
@@ -73,7 +85,7 @@ class KalibrIntelligence:
         }
         url = f"{self.base_url}{path}"
-        response = self._client.request(method, url, json=json, headers=headers)
+        response = self._client.request(method, url, json=json, params=params, headers=headers)
         response.raise_for_status()
         return response
@@ -139,6 +151,8 @@ class KalibrIntelligence:
         score: float | None = None,
         failure_reason: str | None = None,
         metadata: dict | None = None,
+        tool_id: str | None = None,
+        execution_params: dict | None = None,
     ) -> dict[str, Any]:
         """Report execution outcome for a goal.
@@ -152,6 +166,8 @@ class KalibrIntelligence:
             score: Optional quality score (0-1) for more granular feedback
             failure_reason: Optional reason for failure (helps with debugging)
             metadata: Optional additional context as a dict
+            tool_id: Optional tool that was used (e.g., "serper", "browserless")
+            execution_params: Optional execution parameters (e.g., {"temperature": 0.3})
         Returns:
             dict with:
@@ -184,6 +200,8 @@ class KalibrIntelligence:
                 "score": score,
                 "failure_reason": failure_reason,
                 "metadata": metadata,
+                "tool_id": tool_id,
+                "execution_params": execution_params,
             },
         )
         return response.json()
@@ -230,6 +248,252 @@ class KalibrIntelligence:
         )
         return response.json()
+    # =========================================================================
+    # ROUTING METHODS
+    # =========================================================================
+    def register_path(
+        self,
+        goal: str,
+        model_id: str,
+        tool_id: str | None = None,
+        params: dict | None = None,
+        risk_level: str = "low",
+    ) -> dict[str, Any]:
+        """Register a new routing path for a goal.
+        Creates a path that maps a goal to a specific model (and optionally tool)
+        configuration. This path can then be selected by the decide() method.
+        Args:
+            goal: The goal this path is for (e.g., "book_meeting", "resolve_ticket")
+            model_id: The model identifier to use (e.g., "gpt-4", "claude-3-opus")
+            tool_id: Optional tool identifier if this path uses a specific tool
+            params: Optional parameters dict for the path configuration
+            risk_level: Risk level for this path - "low", "medium", or "high"
+        Returns:
+            dict with the created path including:
+                - path_id: Unique identifier for the path
+                - goal: The goal
+                - model_id: The model
+                - tool_id: The tool (if specified)
+                - params: The parameters (if specified)
+                - risk_level: The risk level
+                - created_at: Creation timestamp
+        Raises:
+            httpx.HTTPStatusError: If the API returns an error
+        Example:
+            path = intelligence.register_path(
+                goal="book_meeting",
+                model_id="gpt-4",
+                tool_id="calendar_tool",
+                risk_level="low"
+            )
+            print(f"Created path: {path['path_id']}")
+        """
+        response = self._request(
+            "POST",
+            "/api/v1/routing/paths",
+            json={
+                "goal": goal,
+                "model_id": model_id,
+                "tool_id": tool_id,
+                "params": params,
+                "risk_level": risk_level,
+            },
+        )
+        return response.json()
+    def list_paths(
+        self,
+        goal: str | None = None,
+        include_disabled: bool = False,
+    ) -> dict[str, Any]:
+        """List registered routing paths.
+        Args:
+            goal: Optional goal to filter paths by
+            include_disabled: Whether to include disabled paths (default False)
+        Returns:
+            dict with:
+                - paths: List of path objects
+        Raises:
+            httpx.HTTPStatusError: If the API returns an error
+        Example:
+            result = intelligence.list_paths(goal="book_meeting")
+            for path in result["paths"]:
+                print(f"{path['path_id']}: {path['model_id']}")
+        """
+        params = {}
+        if goal is not None:
+            params["goal"] = goal
+        if include_disabled:
+            params["include_disabled"] = "true"
+        response = self._request(
+            "GET",
+            "/api/v1/routing/paths",
+            params=params if params else None,
+        )
+        return response.json()
+    def disable_path(self, path_id: str) -> dict[str, Any]:
+        """Disable a routing path.
+        Disables a path so it won't be selected by decide(). The path
+        data is retained for historical analysis.
+        Args:
+            path_id: The unique identifier of the path to disable
+        Returns:
+            dict with:
+                - status: "disabled" if successful
+                - path_id: The disabled path ID
+        Raises:
+            httpx.HTTPStatusError: If the API returns an error
+        Example:
+            result = intelligence.disable_path("path_abc123")
+            print(f"Status: {result['status']}")
+        """
+        response = self._request(
+            "DELETE",
+            f"/api/v1/routing/paths/{path_id}",
+        )
+        return response.json()
+    def decide(
+        self,
+        goal: str,
+        task_risk_level: str = "low",
+    ) -> dict[str, Any]:
+        """Get routing decision for a goal.
+        Uses outcome data and exploration/exploitation strategy to decide
+        which path to use for achieving the specified goal.
+        Args:
+            goal: The goal to route for (e.g., "book_meeting")
+            task_risk_level: Risk tolerance for this task - "low", "medium", or "high"
+        Returns:
+            dict with:
+                - model_id: The selected model
+                - tool_id: The selected tool (if any)
+                - params: Additional parameters (if any)
+                - reason: Human-readable explanation of the decision
+                - confidence: Confidence score (0-1)
+                - is_exploration: Whether this is an exploration choice
+                - path_id: The selected path ID
+        Raises:
+            httpx.HTTPStatusError: If the API returns an error
+        Example:
+            decision = intelligence.decide(goal="book_meeting")
+            model = decision["model_id"]
+            print(f"Using {model} ({decision['reason']})")
+        """
+        response = self._request(
+            "POST",
+            "/api/v1/routing/decide",
+            json={
+                "goal": goal,
+                "task_risk_level": task_risk_level,
+            },
+        )
+        return response.json()
+    def set_exploration_config(
+        self,
+        goal: str = "*",
+        exploration_rate: float = 0.1,
+        min_samples_before_exploit: int = 20,
+        rollback_threshold: float = 0.3,
+        staleness_days: int = 7,
+        exploration_on_high_risk: bool = False,
+    ) -> dict[str, Any]:
+        """Set exploration/exploitation configuration for routing.
+        Configures how the decide() method balances exploring new paths
+        vs exploiting known good paths.
+        Args:
+            goal: Goal to configure, or "*" for default config
+            exploration_rate: Probability of exploring (0-1, default 0.1)
+            min_samples_before_exploit: Minimum outcomes before exploiting (default 20)
+            rollback_threshold: Performance drop threshold to rollback (default 0.3)
+            staleness_days: Days before reexploring stale paths (default 7)
+            exploration_on_high_risk: Whether to explore on high-risk tasks (default False)
+        Returns:
+            dict with the saved configuration
+        Raises:
+            httpx.HTTPStatusError: If the API returns an error
+        Example:
+            config = intelligence.set_exploration_config(
+                goal="book_meeting",
+                exploration_rate=0.2,
+                min_samples_before_exploit=10
+            )
+        """
+        response = self._request(
+            "POST",
+            "/api/v1/routing/config",
+            json={
+                "goal": goal,
+                "exploration_rate": exploration_rate,
+                "min_samples_before_exploit": min_samples_before_exploit,
+                "rollback_threshold": rollback_threshold,
+                "staleness_days": staleness_days,
+                "exploration_on_high_risk": exploration_on_high_risk,
+            },
+        )
+        return response.json()
+    def get_exploration_config(self, goal: str | None = None) -> dict[str, Any]:
+        """Get exploration/exploitation configuration.
+        Args:
+            goal: Optional goal to get config for (returns default if not found)
+        Returns:
+            dict with configuration values:
+                - goal: The goal this config applies to
+                - exploration_rate: Exploration probability
+                - min_samples_before_exploit: Minimum samples before exploiting
+                - rollback_threshold: Rollback threshold
+                - staleness_days: Staleness threshold in days
+                - exploration_on_high_risk: Whether exploration is allowed on high-risk
+        Raises:
+            httpx.HTTPStatusError: If the API returns an error
+        Example:
+            config = intelligence.get_exploration_config(goal="book_meeting")
+            print(f"Exploration rate: {config['exploration_rate']}")
+        """
+        params = {}
+        if goal is not None:
+            params["goal"] = goal
+        response = self._request(
+            "GET",
+            "/api/v1/routing/config",
+            params=params if params else None,
+        )
+        return response.json()
     def close(self):
         """Close the HTTP client."""
         self._client.close()
@@ -291,7 +555,7 @@ def report_outcome(trace_id: str, goal: str, success: bool, tenant_id: str | Non
         goal: The goal this execution was trying to achieve
         success: Whether the goal was achieved
         tenant_id: Optional tenant ID override (default: uses KALIBR_TENANT_ID env var)
-        **kwargs: Additional arguments (score, failure_reason, metadata)
+        **kwargs: Additional arguments (score, failure_reason, metadata, tool_id, execution_params)
     Returns:
         Response dict with status confirmation
@@ -315,3 +579,72 @@ def get_recommendation(task_type: str, **kwargs) -> dict[str, Any]:
     See KalibrIntelligence.get_recommendation for full documentation.
     """
     return _get_intelligence_client().get_recommendation(task_type, **kwargs)
+def register_path(
+    goal: str,
+    model_id: str,
+    tool_id: str | None = None,
+    params: dict | None = None,
+    risk_level: str = "low",
+    tenant_id: str | None = None,
+) -> dict[str, Any]:
+    """Register a new routing path for a goal.
+    Convenience function that uses the default intelligence client.
+    See KalibrIntelligence.register_path for full documentation.
+    Args:
+        goal: The goal this path is for
+        model_id: The model identifier to use
+        tool_id: Optional tool identifier
+        params: Optional parameters dict
+        risk_level: Risk level - "low", "medium", or "high"
+        tenant_id: Optional tenant ID override
+    Returns:
+        dict with the created path
+    Example:
+        from kalibr import register_path
+        path = register_path(
+            goal="book_meeting",
+            model_id="gpt-4",
+            tool_id="calendar_tool"
+        )
+    """
+    client = _get_intelligence_client()
+    if tenant_id:
+        client = KalibrIntelligence(tenant_id=tenant_id)
+    return client.register_path(goal, model_id, tool_id, params, risk_level)
+def decide(
+    goal: str,
+    task_risk_level: str = "low",
+    tenant_id: str | None = None,
+) -> dict[str, Any]:
+    """Get routing decision for a goal.
+    Convenience function that uses the default intelligence client.
+    See KalibrIntelligence.decide for full documentation.
+    Args:
+        goal: The goal to route for
+        task_risk_level: Risk tolerance - "low", "medium", or "high"
+        tenant_id: Optional tenant ID override
+    Returns:
+        dict with model_id, tool_id, params, reason, confidence, etc.
+    Example:
+        from kalibr import decide
+        decision = decide(goal="book_meeting")
+        model = decision["model_id"]
+    """
+    client = _get_intelligence_client()
+    if tenant_id:
+        client = KalibrIntelligence(tenant_id=tenant_id)
+    return client.decide(goal, task_risk_level)

{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.4
+Metadata-Version: 2.2
 Name: kalibr
-Version: 1.2.0
+Version: 1.2.2
 Summary: Unified LLM Observability & Multi-Model AI Integration Framework - Deploy to GPT, Claude, Gemini, Copilot with full telemetry.
 Author-email: Kalibr Team <support@kalibr.systems>
 License: Apache-2.0
@@ -64,7 +64,6 @@ Requires-Dist: pytest>=7.4.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
 Requires-Dist: black>=23.0.0; extra == "dev"
 Requires-Dist: ruff>=0.1.0; extra == "dev"
-Dynamic: license-file
 # Kalibr Python SDK
@@ -188,6 +187,41 @@ policy = get_policy(
 )
 ```
+### Intelligent Routing with decide()
+Register execution paths and let Kalibr decide the best strategy:
+```python
+from kalibr import register_path, decide
+# Register available paths
+register_path(goal="book_meeting", model_id="gpt-4o", tool_id="calendar_api")
+register_path(goal="book_meeting", model_id="claude-3-sonnet")
+# Get intelligent routing decision
+decision = decide(goal="book_meeting")
+model = decision["model_id"]       # Selected based on outcomes
+tool = decision.get("tool_id")     # If tool routing enabled
+print(decision["exploration"])     # True if exploring new paths
+```
+### Goal Context
+Tag traces with goals for outcome tracking:
+```python
+from kalibr import goal, set_goal, get_goal, clear_goal
+# Context manager (recommended)
+with goal("book_meeting"):
+    response = openai.chat.completions.create(...)
+# Or manual control
+set_goal("book_meeting")
+response = openai.chat.completions.create(...)
+clear_goal()
+```
 ## TraceCapsule - Cross-Agent Tracing
 Propagate trace context across agent boundaries:
@@ -285,7 +319,7 @@ Configure via environment variables:
 | `KALIBR_API_KEY` | API key for authentication | *Required* |
 | `KALIBR_TENANT_ID` | Tenant identifier | `default` |
 | `KALIBR_COLLECTOR_URL` | Collector endpoint URL | `https://api.kalibr.systems/api/ingest` |
-| `KALIBR_INTELLIGENCE_URL` | Intelligence API URL | `https://kalibr-intelligence.fly.dev` |
+| `KALIBR_INTELLIGENCE_URL` | Intelligence API URL | `https://dashboard.kalibr.systems/intelligence` |
 | `KALIBR_SERVICE_NAME` | Service name for spans | `kalibr-app` |
 | `KALIBR_ENVIRONMENT` | Environment (prod/staging/dev) | `prod` |
 | `KALIBR_WORKFLOW_ID` | Workflow identifier | `default` |
@@ -294,20 +328,20 @@ Configure via environment variables:
 ## CLI Commands
 ```bash
-# Serve your app with tracing
-kalibr serve myapp.py
+# Show version
+kalibr version
-# Run with managed runtime
-kalibr run myapp.py --port 8000
+# Validate configuration
+kalibr validate
-# Deploy to cloud platforms
-kalibr deploy myapp.py --runtime fly.io
+# Check connection status
+kalibr status
-# Fetch trace capsule by ID
-kalibr capsule <trace-id>
+# Package for deployment
+kalibr package
-# Show version
-kalibr version
+# Update schemas
+kalibr update_schemas
 ```
 ## Supported Providers

{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-kalibr/__init__.py,sha256=16g-LPXiB_10TUcUeNzTy_EL5npqCFGYWJF-IhWpWDY,4889
+kalibr/__init__.py,sha256=N0FRcMM5Rq845MPDjogsY1iRZu7K7NoUHAGqW8-JQDQ,5148
 kalibr/__main__.py,sha256=jO96I4pqinwHg7ONRvNVKbySBh5pSIhOAiNrgSQrNlY,110
 kalibr/capsule_middleware.py,sha256=pXG_wORgCqo3wHjtkn_zY4doLyiDmTwJtB7XiZNnbPk,3163
 kalibr/client.py,sha256=6D1paakE6zgWJStaow3ak9t0R8afodQhSSpUO3WTs_8,9732
 kalibr/collector.py,sha256=rtTKQLe6NkDSblBIfFooQ-ESFcP0Q1HUp4Bcqqg8JFo,5818
-kalibr/context.py,sha256=hBxWXZx0gcmeGqDMS1rstke_DmrujoRBIsfrG26WKUY,3755
+kalibr/context.py,sha256=FgN9-WyMQMDgg2Vqwje4r2_jKRvnMeI8t4fIE1VRn_8,4777
 kalibr/cost_adapter.py,sha256=NerJ7ywaJjBn97gVFr7qKX7318e3Kmy2qqeNlGl9nPE,6439
 kalibr/decorators.py,sha256=m-XBXxWMDVrzaNsljACiGmeGhgiHj_MqSfj6OGK3L5I,4380
-kalibr/intelligence.py,sha256=oW_GFDHj5NEa-9L2y4jZcDsEQt81P77PpCuY--aIzLY,10889
+kalibr/intelligence.py,sha256=JOckaykWrMloZV_MH1e9kvVxPRQKavIgLSdgqiJjxC4,22158
 kalibr/kalibr.py,sha256=cNXC3W_TX5SvGsy1lRopkwFqsHOpyd1kkVjEMOz1Yr4,6084
 kalibr/kalibr_app.py,sha256=ItZwEh0FZPx9_BE-zPQajC2yxI2y9IHYwJD0k9tbHvY,2773
 kalibr/models.py,sha256=HwD_-iysZMSnCzMQYO1Qcf0aeXySupY7yJeBwl_dLS0,1024
@@ -33,17 +33,17 @@ kalibr/instrumentation/openai_instr.py,sha256=UU0Pi1Gq1FqgetYWDacQhNFdjemuPrc0hR
 kalibr/instrumentation/registry.py,sha256=sfQnXhbPOI5LVon2kFhe8KcXQwWmuKW1XUe50B2AaBc,4749
 kalibr/middleware/__init__.py,sha256=qyDUn_irAX67MS-IkuDVxg4RmFnJHDf_BfIT3qfGoBI,115
 kalibr/middleware/auto_tracer.py,sha256=ZBSBM0O3a6rwVzfik1n5NUmQDah8_iaf86rU64aPYT4,13037
-kalibr-1.2.0.dist-info/licenses/LICENSE,sha256=5mwAnB38l3_PjmOQn6_L6cZnJvus143DUjMBPIH1yso,10768
 kalibr_crewai/__init__.py,sha256=b0HFTiE80eArtSMBOIEKu1JM6KU0tCjEylKCVVVF29Q,1796
-kalibr_crewai/callbacks.py,sha256=UBgGw0vdT0Jf9x8fNrHfsUR4unqX4nxNFta07OoSgaI,17162
-kalibr_crewai/instrumentor.py,sha256=AfnK5t7Ynb-7ytZF7XdOSPpr0o8hDf3sFkyzhc1ogY0,19465
+kalibr_crewai/callbacks.py,sha256=_d1M4J-6XfKqrVIxnOgOQu57jpFKVv-VIsmPV0HNgZ4,20419
+kalibr_crewai/instrumentor.py,sha256=x26v0RcriImkPiC8KB1Hmez1XOYLcDa9o-g35BMu5Ek,24420
 kalibr_langchain/__init__.py,sha256=O4XYVyhLp1v-Y1kGZw3zD-tUK9wp0UX8Jt6oN0QTHN4,1373
 kalibr_langchain/async_callback.py,sha256=_Mj_YrKbULNtfxixZ7iwiHyWEV9l178ZA5Oy5A5Pakk,27748
-kalibr_langchain/callback.py,sha256=VVPAvksS8TFMC21QlGj-1NRFsWnkLKPyzqhfA3kmT4c,34265
+kalibr_langchain/callback.py,sha256=SNM1aHOXdG55grHmGyTwbXOeM6hjZTub2REiZD2H-d8,35216
 kalibr_openai_agents/__init__.py,sha256=wL59LzGstptKigfQDrKKt_7hcMO1JGVQtVAsE0lz-Zw,1367
 kalibr_openai_agents/processor.py,sha256=F550sdRf3rpguP1yOlgAUQWDLPBy4hSACV3-zOyCpOU,18257
-kalibr-1.2.0.dist-info/METADATA,sha256=45tJcZAcqg575gr2HSIMRArUhbz9juYec_Mi8LdiW9E,10339
-kalibr-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-kalibr-1.2.0.dist-info/entry_points.txt,sha256=Kojlc6WRX8V1qS9lOMdDPZpTUVHCtzGtHqXusErgmLY,47
-kalibr-1.2.0.dist-info/top_level.txt,sha256=dIfBOWUnnHGFDwgz5zfIx5_0bU3wOUgAbYr4JcFHZmo,59
-kalibr-1.2.0.dist-info/RECORD,,
+kalibr-1.2.2.dist-info/LICENSE,sha256=5mwAnB38l3_PjmOQn6_L6cZnJvus143DUjMBPIH1yso,10768
+kalibr-1.2.2.dist-info/METADATA,sha256=XqhSOZekE5D5WQVT0pH2h1HCZWXiruGM9CqOelAZnf0,11201
+kalibr-1.2.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
+kalibr-1.2.2.dist-info/entry_points.txt,sha256=Kojlc6WRX8V1qS9lOMdDPZpTUVHCtzGtHqXusErgmLY,47
+kalibr-1.2.2.dist-info/top_level.txt,sha256=dIfBOWUnnHGFDwgz5zfIx5_0bU3wOUgAbYr4JcFHZmo,59
+kalibr-1.2.2.dist-info/RECORD,,

{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (76.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

kalibr_crewai/callbacks.py CHANGED Viewed

@@ -60,6 +60,72 @@ def _get_provider_from_model(model: str) -> str:
         return "openai"
+def _extract_model_from_agent(agent) -> tuple[str, str]:
+    """Extract model name and provider from agent's LLM config.
+    Args:
+        agent: CrewAI agent instance
+    Returns:
+        Tuple of (model_name, provider)
+    """
+    model_name = "unknown"
+    provider = "openai"
+    if not hasattr(agent, "llm"):
+        return model_name, provider
+    llm = agent.llm
+    # Case 1: LLM is a string like "openai/gpt-4o-mini" or "gpt-4"
+    if isinstance(llm, str):
+        if "/" in llm:
+            parts = llm.split("/", 1)
+            provider = parts[0]
+            model_name = parts[1]
+        else:
+            model_name = llm
+            provider = _get_provider_from_model(llm)
+        return model_name, provider
+    # Case 2: LLM has model or model_name attribute
+    if hasattr(llm, "model"):
+        model_name = str(llm.model)
+    elif hasattr(llm, "model_name"):
+        model_name = str(llm.model_name)
+    # Parse provider from model string if it contains "/"
+    if "/" in model_name:
+        parts = model_name.split("/", 1)
+        provider = parts[0]
+        model_name = parts[1]
+    else:
+        provider = _get_provider_from_model(model_name)
+    return model_name, provider
+def _calculate_cost(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
+    """Calculate cost using CostAdapterFactory.
+    Args:
+        provider: Provider name (openai, anthropic, etc.)
+        model: Model name
+        input_tokens: Number of input tokens
+        output_tokens: Number of output tokens
+    Returns:
+        Cost in USD
+    """
+    if CostAdapterFactory is None:
+        return 0.0
+    try:
+        return CostAdapterFactory.compute_cost(provider, model, input_tokens, output_tokens)
+    except Exception:
+        return 0.0
 class EventBatcher:
     """Shared event batching for callbacks."""
@@ -198,6 +264,7 @@ class KalibrAgentCallback:
         service: Service name
         workflow_id: Workflow identifier
         metadata: Additional metadata for all events
+        agent: Optional agent reference for model extraction
     Usage:
         from kalibr_crewai import KalibrAgentCallback
@@ -210,6 +277,7 @@ class KalibrAgentCallback:
             goal="Find information",
             step_callback=callback,
         )
+        callback.set_agent(agent)  # Set agent reference for model extraction
     """
     def __init__(
@@ -221,6 +289,7 @@ class KalibrAgentCallback:
         service: Optional[str] = None,
         workflow_id: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
+        agent: Optional[Any] = None,
     ):
         self.api_key = api_key or os.getenv("KALIBR_API_KEY", "")
         self.endpoint = endpoint or os.getenv(
@@ -232,6 +301,7 @@ class KalibrAgentCallback:
         self.service = service or os.getenv("KALIBR_SERVICE", "crewai-app")
         self.workflow_id = workflow_id or os.getenv("KALIBR_WORKFLOW_ID", "default-workflow")
         self.default_metadata = metadata or {}
+        self._agent = agent
         # Get shared batcher
         self._batcher = EventBatcher.get_instance(
@@ -244,6 +314,14 @@ class KalibrAgentCallback:
         self._agent_span_id: Optional[str] = None
         self._step_count: int = 0
+    def set_agent(self, agent: Any) -> None:
+        """Set the agent reference for model extraction.
+        Args:
+            agent: CrewAI agent instance
+        """
+        self._agent = agent
     def __call__(self, step_output: Any) -> None:
         """Called after each agent step.
@@ -271,6 +349,12 @@ class KalibrAgentCallback:
         span_id = str(uuid.uuid4())
+        # Extract model from agent if available
+        model_name = "unknown"
+        provider = "openai"
+        if self._agent:
+            model_name, provider = _extract_model_from_agent(self._agent)
         # Extract step information
         step_type = "agent_step"
         operation = "agent_step"
@@ -307,8 +391,11 @@ class KalibrAgentCallback:
             output_text = str(step_output)
         # Count tokens
-        input_tokens = _count_tokens(tool_input or "", "gpt-4")
-        output_tokens = _count_tokens(output_text, "gpt-4")
+        input_tokens = _count_tokens(tool_input or "", model_name)
+        output_tokens = _count_tokens(output_text, model_name)
+        # Calculate cost using CostAdapterFactory
+        cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
         # Build event
         event = {
@@ -318,9 +405,9 @@ class KalibrAgentCallback:
             "parent_span_id": self._agent_span_id,
             "tenant_id": self.tenant_id,
             "workflow_id": self.workflow_id,
-            "provider": "crewai",
-            "model_id": "agent",
-            "model_name": "crewai-agent",
+            "provider": provider,
+            "model_id": model_name,
+            "model_name": model_name,
             "operation": operation,
             "endpoint": operation,
             "duration_ms": 0,  # Step timing not available
@@ -328,8 +415,8 @@ class KalibrAgentCallback:
             "input_tokens": input_tokens,
             "output_tokens": output_tokens,
             "total_tokens": input_tokens + output_tokens,
-            "cost_usd": 0.0,  # Cost tracked at LLM level
-            "total_cost_usd": 0.0,
+            "cost_usd": cost_usd,
+            "total_cost_usd": cost_usd,
             "status": status,
             "timestamp": now.isoformat(),
             "ts_start": now.isoformat(),
@@ -376,6 +463,7 @@ class KalibrTaskCallback:
         service: Service name
         workflow_id: Workflow identifier
         metadata: Additional metadata for all events
+        agent: Optional agent reference for model extraction
     Usage:
         from kalibr_crewai import KalibrTaskCallback
@@ -388,6 +476,7 @@ class KalibrTaskCallback:
             agent=my_agent,
             callback=callback,
         )
+        callback.set_agent(my_agent)  # Set agent reference for model extraction
     """
     def __init__(
@@ -399,6 +488,7 @@ class KalibrTaskCallback:
         service: Optional[str] = None,
         workflow_id: Optional[str] = None,
         metadata: Optional[Dict[str, Any]] = None,
+        agent: Optional[Any] = None,
     ):
         self.api_key = api_key or os.getenv("KALIBR_API_KEY", "")
         self.endpoint = endpoint or os.getenv(
@@ -410,6 +500,7 @@ class KalibrTaskCallback:
         self.service = service or os.getenv("KALIBR_SERVICE", "crewai-app")
         self.workflow_id = workflow_id or os.getenv("KALIBR_WORKFLOW_ID", "default-workflow")
         self.default_metadata = metadata or {}
+        self._agent = agent
         # Get shared batcher
         self._batcher = EventBatcher.get_instance(
@@ -421,6 +512,14 @@ class KalibrTaskCallback:
         self._trace_id: Optional[str] = None
         self._crew_span_id: Optional[str] = None
+    def set_agent(self, agent: Any) -> None:
+        """Set the agent reference for model extraction.
+        Args:
+            agent: CrewAI agent instance
+        """
+        self._agent = agent
     def __call__(self, task_output: Any) -> None:
         """Called when task completes.
@@ -467,9 +566,18 @@ class KalibrTaskCallback:
         if hasattr(task_output, "agent"):
             agent_role = str(task_output.agent)
+        # Extract model from agent if available
+        model_name = "unknown"
+        provider = "openai"
+        if self._agent:
+            model_name, provider = _extract_model_from_agent(self._agent)
         # Token counting
-        input_tokens = _count_tokens(description, "gpt-4")
-        output_tokens = _count_tokens(raw_output, "gpt-4")
+        input_tokens = _count_tokens(description, model_name)
+        output_tokens = _count_tokens(raw_output, model_name)
+        # Calculate cost using CostAdapterFactory
+        cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
         # Build operation name from description
         operation = "task_complete"
@@ -486,9 +594,9 @@ class KalibrTaskCallback:
             "parent_span_id": self._crew_span_id,
             "tenant_id": self.tenant_id,
             "workflow_id": self.workflow_id,
-            "provider": "crewai",
-            "model_id": "task",
-            "model_name": agent_role,
+            "provider": provider,
+            "model_id": model_name,
+            "model_name": model_name,
             "operation": operation,
             "endpoint": "task_complete",
             "duration_ms": 0,  # Task timing not available in callback
@@ -496,8 +604,8 @@ class KalibrTaskCallback:
             "input_tokens": input_tokens,
             "output_tokens": output_tokens,
             "total_tokens": input_tokens + output_tokens,
-            "cost_usd": 0.0,  # Cost tracked at LLM level
-            "total_cost_usd": 0.0,
+            "cost_usd": cost_usd,
+            "total_cost_usd": cost_usd,
             "status": "success",
             "timestamp": now.isoformat(),
             "ts_start": now.isoformat(),

kalibr_crewai/instrumentor.py CHANGED Viewed

@@ -21,6 +21,72 @@ except ImportError:
     CostAdapterFactory = None
+def _extract_model_from_agent(agent) -> tuple[str, str]:
+    """Extract model name and provider from agent's LLM config.
+    Args:
+        agent: CrewAI agent instance
+    Returns:
+        Tuple of (model_name, provider)
+    """
+    model_name = "unknown"
+    provider = "openai"
+    if not hasattr(agent, "llm"):
+        return model_name, provider
+    llm = agent.llm
+    # Case 1: LLM is a string like "openai/gpt-4o-mini" or "gpt-4"
+    if isinstance(llm, str):
+        if "/" in llm:
+            parts = llm.split("/", 1)
+            provider = parts[0]
+            model_name = parts[1]
+        else:
+            model_name = llm
+            provider = _get_provider_from_model(llm)
+        return model_name, provider
+    # Case 2: LLM has model or model_name attribute
+    if hasattr(llm, "model"):
+        model_name = str(llm.model)
+    elif hasattr(llm, "model_name"):
+        model_name = str(llm.model_name)
+    # Parse provider from model string if it contains "/"
+    if "/" in model_name:
+        parts = model_name.split("/", 1)
+        provider = parts[0]
+        model_name = parts[1]
+    else:
+        provider = _get_provider_from_model(model_name)
+    return model_name, provider
+def _calculate_cost(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
+    """Calculate cost using CostAdapterFactory.
+    Args:
+        provider: Provider name (openai, anthropic, etc.)
+        model: Model name
+        input_tokens: Number of input tokens
+        output_tokens: Number of output tokens
+    Returns:
+        Cost in USD
+    """
+    if CostAdapterFactory is None:
+        return 0.0
+    try:
+        return CostAdapterFactory.compute_cost(provider, model, input_tokens, output_tokens)
+    except Exception:
+        return 0.0
 class KalibrCrewAIInstrumentor:
     """Auto-instrumentation for CrewAI.
@@ -84,6 +150,10 @@ class KalibrCrewAIInstrumentor:
         # Instrumentation state
         self._is_instrumented = False
+        # Accumulated metrics for crew-level aggregation
+        self._accumulated_tokens = {"input": 0, "output": 0}
+        self._accumulated_cost = 0.0
     def instrument(self) -> bool:
         """Instrument CrewAI classes.
@@ -170,11 +240,22 @@ class KalibrCrewAIInstrumentor:
             start_time = time.time()
             ts_start = datetime.now(timezone.utc)
+            # Reset accumulators before crew execution
+            instrumentor._accumulated_tokens = {"input": 0, "output": 0}
+            instrumentor._accumulated_cost = 0.0
             # Capture crew info
             crew_name = getattr(crew_self, "name", None) or "unnamed_crew"
-            agent_count = len(getattr(crew_self, "agents", []))
+            agents = getattr(crew_self, "agents", [])
+            agent_count = len(agents)
             task_count = len(getattr(crew_self, "tasks", []))
+            # Extract model from first agent if available
+            model_name = "unknown"
+            provider = "crewai"
+            if agents:
+                model_name, provider = _extract_model_from_agent(agents[0])
             status = "success"
             error_type = None
             error_message = None
@@ -199,7 +280,13 @@ class KalibrCrewAIInstrumentor:
                 if instrumentor.capture_output and result is not None:
                     output_preview = str(result)[:500]
-                # Create event
+                # Get accumulated metrics from child agent/task executions
+                input_tokens = instrumentor._accumulated_tokens["input"]
+                output_tokens = instrumentor._accumulated_tokens["output"]
+                total_tokens = input_tokens + output_tokens
+                cost_usd = instrumentor._accumulated_cost
+                # Create event with aggregated metrics
                 event = {
                     "schema_version": "1.0",
                     "trace_id": trace_id,
@@ -207,18 +294,18 @@ class KalibrCrewAIInstrumentor:
                     "parent_span_id": None,
                     "tenant_id": instrumentor.tenant_id,
                     "workflow_id": instrumentor.workflow_id,
-                    "provider": "crewai",
-                    "model_id": "crew",
-                    "model_name": crew_name,
+                    "provider": provider,
+                    "model_id": model_name,
+                    "model_name": model_name,
                     "operation": f"crew:{crew_name}",
                     "endpoint": "crew.kickoff",
                     "duration_ms": duration_ms,
                     "latency_ms": duration_ms,
-                    "input_tokens": 0,
-                    "output_tokens": 0,
-                    "total_tokens": 0,
-                    "cost_usd": 0.0,
-                    "total_cost_usd": 0.0,
+                    "input_tokens": input_tokens,
+                    "output_tokens": output_tokens,
+                    "total_tokens": total_tokens,
+                    "cost_usd": cost_usd,
+                    "total_cost_usd": cost_usd,
                     "status": status,
                     "error_type": error_type,
                     "error_message": error_message,
@@ -255,10 +342,21 @@ class KalibrCrewAIInstrumentor:
             start_time = time.time()
             ts_start = datetime.now(timezone.utc)
+            # Reset accumulators before crew execution
+            instrumentor._accumulated_tokens = {"input": 0, "output": 0}
+            instrumentor._accumulated_cost = 0.0
             crew_name = getattr(crew_self, "name", None) or "unnamed_crew"
-            agent_count = len(getattr(crew_self, "agents", []))
+            agents = getattr(crew_self, "agents", [])
+            agent_count = len(agents)
             task_count = len(getattr(crew_self, "tasks", []))
+            # Extract model from first agent if available
+            model_name = "unknown"
+            provider = "crewai"
+            if agents:
+                model_name, provider = _extract_model_from_agent(agents[0])
             status = "success"
             error_type = None
             error_message = None
@@ -282,6 +380,13 @@ class KalibrCrewAIInstrumentor:
                 if instrumentor.capture_output and result is not None:
                     output_preview = str(result)[:500]
+                # Get accumulated metrics from child agent/task executions
+                input_tokens = instrumentor._accumulated_tokens["input"]
+                output_tokens = instrumentor._accumulated_tokens["output"]
+                total_tokens = input_tokens + output_tokens
+                cost_usd = instrumentor._accumulated_cost
+                # Create event with aggregated metrics
                 event = {
                     "schema_version": "1.0",
                     "trace_id": trace_id,
@@ -289,18 +394,18 @@ class KalibrCrewAIInstrumentor:
                     "parent_span_id": None,
                     "tenant_id": instrumentor.tenant_id,
                     "workflow_id": instrumentor.workflow_id,
-                    "provider": "crewai",
-                    "model_id": "crew",
-                    "model_name": crew_name,
+                    "provider": provider,
+                    "model_id": model_name,
+                    "model_name": model_name,
                     "operation": f"crew:{crew_name}",
                     "endpoint": "crew.kickoff_async",
                     "duration_ms": duration_ms,
                     "latency_ms": duration_ms,
-                    "input_tokens": 0,
-                    "output_tokens": 0,
-                    "total_tokens": 0,
-                    "cost_usd": 0.0,
-                    "total_cost_usd": 0.0,
+                    "input_tokens": input_tokens,
+                    "output_tokens": output_tokens,
+                    "total_tokens": total_tokens,
+                    "cost_usd": cost_usd,
+                    "total_cost_usd": cost_usd,
                     "status": status,
                     "error_type": error_type,
                     "error_message": error_message,
@@ -341,6 +446,9 @@ class KalibrCrewAIInstrumentor:
             role = getattr(agent_self, "role", "unknown")
             goal = getattr(agent_self, "goal", "")
+            # Extract model from agent's LLM config
+            model_name, provider = _extract_model_from_agent(agent_self)
             # Get task info
             task_description = ""
             if hasattr(task, "description"):
@@ -370,8 +478,16 @@ class KalibrCrewAIInstrumentor:
                     output_preview = str(result)[:500]
                 # Token estimation
-                input_tokens = _count_tokens(task_description + goal, "gpt-4")
-                output_tokens = _count_tokens(output_preview or "", "gpt-4")
+                input_tokens = _count_tokens(task_description + goal, model_name)
+                output_tokens = _count_tokens(output_preview or "", model_name)
+                # Calculate cost using CostAdapterFactory
+                cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
+                # Accumulate metrics for crew-level aggregation
+                instrumentor._accumulated_tokens["input"] += input_tokens
+                instrumentor._accumulated_tokens["output"] += output_tokens
+                instrumentor._accumulated_cost += cost_usd
                 event = {
                     "schema_version": "1.0",
@@ -380,9 +496,9 @@ class KalibrCrewAIInstrumentor:
                     "parent_span_id": None,
                     "tenant_id": instrumentor.tenant_id,
                     "workflow_id": instrumentor.workflow_id,
-                    "provider": "crewai",
-                    "model_id": "agent",
-                    "model_name": role,
+                    "provider": provider,
+                    "model_id": model_name,
+                    "model_name": model_name,
                     "operation": f"agent:{role}",
                     "endpoint": "agent.execute_task",
                     "duration_ms": duration_ms,
@@ -390,8 +506,8 @@ class KalibrCrewAIInstrumentor:
                     "input_tokens": input_tokens,
                     "output_tokens": output_tokens,
                     "total_tokens": input_tokens + output_tokens,
-                    "cost_usd": 0.0,
-                    "total_cost_usd": 0.0,
+                    "cost_usd": cost_usd,
+                    "total_cost_usd": cost_usd,
                     "status": status,
                     "error_type": error_type,
                     "error_message": error_message,
@@ -430,6 +546,13 @@ class KalibrCrewAIInstrumentor:
             description = getattr(task_self, "description", "")
             expected_output = getattr(task_self, "expected_output", "")
+            # Try to extract model from task's agent
+            model_name = "unknown"
+            provider = "openai"
+            agent = getattr(task_self, "agent", None)
+            if agent:
+                model_name, provider = _extract_model_from_agent(agent)
             status = "success"
             error_type = None
             error_message = None
@@ -456,8 +579,16 @@ class KalibrCrewAIInstrumentor:
                     else:
                         output_preview = str(result)[:500]
-                input_tokens = _count_tokens(description, "gpt-4")
-                output_tokens = _count_tokens(output_preview or "", "gpt-4")
+                input_tokens = _count_tokens(description, model_name)
+                output_tokens = _count_tokens(output_preview or "", model_name)
+                # Calculate cost using CostAdapterFactory
+                cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
+                # Accumulate metrics for crew-level aggregation
+                instrumentor._accumulated_tokens["input"] += input_tokens
+                instrumentor._accumulated_tokens["output"] += output_tokens
+                instrumentor._accumulated_cost += cost_usd
                 event = {
                     "schema_version": "1.0",
@@ -466,9 +597,9 @@ class KalibrCrewAIInstrumentor:
                     "parent_span_id": None,
                     "tenant_id": instrumentor.tenant_id,
                     "workflow_id": instrumentor.workflow_id,
-                    "provider": "crewai",
-                    "model_id": "task",
-                    "model_name": "crewai-task",
+                    "provider": provider,
+                    "model_id": model_name,
+                    "model_name": model_name,
                     "operation": f"task:{description[:30]}..." if len(description) > 30 else f"task:{description}",
                     "endpoint": "task.execute_sync",
                     "duration_ms": duration_ms,
@@ -476,8 +607,8 @@ class KalibrCrewAIInstrumentor:
                     "input_tokens": input_tokens,
                     "output_tokens": output_tokens,
                     "total_tokens": input_tokens + output_tokens,
-                    "cost_usd": 0.0,
-                    "total_cost_usd": 0.0,
+                    "cost_usd": cost_usd,
+                    "total_cost_usd": cost_usd,
                     "status": status,
                     "error_type": error_type,
                     "error_message": error_message,

kalibr_langchain/callback.py CHANGED Viewed

@@ -29,6 +29,8 @@ try:
 except ImportError:
     CostAdapterFactory = None
+from kalibr.context import get_goal
 # Import tiktoken for token counting
 try:
     import tiktoken
@@ -288,6 +290,25 @@ class KalibrCallbackHandler(BaseCallbackHandler):
         # Compute cost
         cost_usd = self._compute_cost(provider, model, input_tokens, output_tokens)
+        # Extract tool_id from operation if this is a tool span
+        tool_id = ""
+        tool_input = ""
+        tool_output = ""
+        if span.get("span_type") == "tool":
+            operation = span.get("operation", "")
+            if operation.startswith("tool:"):
+                tool_id = operation[5:]  # Extract "browserless" from "tool:browserless"
+            # Get tool input/output from span (truncate to 10KB)
+            if span.get("input"):
+                tool_input = str(span["input"])[:10000]
+            if metadata and metadata.get("output"):
+                tool_output = str(metadata["output"])[:10000]
+        # Get goal from context (thread-safe)
+        current_goal = get_goal() or ""
         # Build event
         event = {
             "schema_version": "1.0",
@@ -318,6 +339,11 @@ class KalibrCallbackHandler(BaseCallbackHandler):
             "service": self.service,
             "runtime_env": os.getenv("RUNTIME_ENV", "local"),
             "sandbox_id": os.getenv("SANDBOX_ID", "local"),
+            # New fields for tool/goal tracking
+            "tool_id": tool_id,
+            "tool_input": tool_input,
+            "tool_output": tool_output,
+            "goal": current_goal,
             "metadata": {
                 **self.default_metadata,
                 "span_type": span.get("span_type", "llm"),

{kalibr-1.2.0.dist-info/licenses → kalibr-1.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{kalibr-1.2.0.dist-info → kalibr-1.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

kalibr 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

kalibr 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl