kalibr 1.1.2a0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +49 -53
- kalibr/cli/capsule_cmd.py +3 -3
- kalibr/cli/main.py +3 -3
- kalibr/cli/run.py +2 -2
- kalibr/client.py +1 -1
- kalibr/collector.py +227 -48
- kalibr/context.py +42 -0
- kalibr/cost_adapter.py +36 -104
- kalibr/instrumentation/anthropic_instr.py +34 -40
- kalibr/instrumentation/base.py +27 -9
- kalibr/instrumentation/google_instr.py +34 -39
- kalibr/instrumentation/openai_instr.py +34 -28
- kalibr/instrumentation/registry.py +38 -13
- kalibr/intelligence.py +662 -0
- kalibr/middleware/auto_tracer.py +1 -1
- kalibr/pricing.py +245 -0
- kalibr/router.py +499 -0
- kalibr/simple_tracer.py +17 -16
- kalibr/trace_capsule.py +19 -12
- kalibr/utils.py +2 -2
- kalibr-1.3.0.dist-info/LICENSE +190 -0
- kalibr-1.3.0.dist-info/METADATA +296 -0
- kalibr-1.3.0.dist-info/RECORD +52 -0
- {kalibr-1.1.2a0.dist-info → kalibr-1.3.0.dist-info}/WHEEL +1 -1
- kalibr_crewai/__init__.py +1 -1
- kalibr_crewai/callbacks.py +124 -16
- kalibr_crewai/instrumentor.py +197 -34
- kalibr_langchain/__init__.py +4 -2
- kalibr_langchain/async_callback.py +1 -1
- kalibr_langchain/callback.py +27 -1
- kalibr_langchain/chat_model.py +103 -0
- kalibr_openai_agents/__init__.py +1 -1
- kalibr_openai_agents/processor.py +1 -1
- kalibr-1.1.2a0.dist-info/METADATA +0 -236
- kalibr-1.1.2a0.dist-info/RECORD +0 -48
- kalibr-1.1.2a0.dist-info/licenses/LICENSE +0 -21
- {kalibr-1.1.2a0.dist-info → kalibr-1.3.0.dist-info}/entry_points.txt +0 -0
- {kalibr-1.1.2a0.dist-info → kalibr-1.3.0.dist-info}/top_level.txt +0 -0
kalibr/__init__.py
CHANGED
|
@@ -1,36 +1,24 @@
|
|
|
1
|
-
"""Kalibr SDK v1.
|
|
2
|
-
|
|
3
|
-
This SDK combines:
|
|
4
|
-
1. Full LLM Observability with tracing, cost tracking, and analytics
|
|
5
|
-
2. Multi-Model AI Integration (GPT, Claude, Gemini, Copilot)
|
|
6
|
-
3. One-line deployment with Docker and runtime router
|
|
7
|
-
4. Schema generation for all major AI platforms
|
|
8
|
-
5. **NEW in 1.1.0**: Auto-instrumentation of LLM SDKs (OpenAI, Anthropic, Google)
|
|
1
|
+
"""Kalibr SDK v1.2.7 - LLM Observability & Tracing Framework
|
|
9
2
|
|
|
10
3
|
Features:
|
|
11
4
|
- **Auto-Instrumentation**: Zero-config tracing of OpenAI, Anthropic, Google SDK calls
|
|
12
5
|
- **OpenTelemetry**: OTel-compatible spans with OTLP export
|
|
13
6
|
- **Tracing**: Complete telemetry with @trace decorator
|
|
14
7
|
- **Cost Tracking**: Multi-vendor cost calculation (OpenAI, Anthropic, etc.)
|
|
15
|
-
- **Deployment**: One-command deployment to Fly.io, Render, or local
|
|
16
|
-
- **Schema Generation**: Auto-generate schemas for GPT Actions, Claude MCP, Gemini, Copilot
|
|
17
8
|
- **Error Handling**: Automatic error capture with stack traces
|
|
18
9
|
- **Analytics**: ClickHouse-backed analytics and alerting
|
|
19
10
|
|
|
20
|
-
Usage - Auto-Instrumentation
|
|
21
|
-
from kalibr import
|
|
11
|
+
Usage - Auto-Instrumentation:
|
|
12
|
+
from kalibr import auto_instrument
|
|
22
13
|
import openai # Automatically instrumented!
|
|
23
14
|
|
|
24
|
-
|
|
15
|
+
auto_instrument(["openai", "anthropic", "google"])
|
|
25
16
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
messages=[{"role": "user", "content": message}]
|
|
32
|
-
)
|
|
33
|
-
return response.choices[0].message.content
|
|
17
|
+
# All LLM calls are now traced automatically
|
|
18
|
+
response = openai.chat.completions.create(
|
|
19
|
+
model="gpt-4",
|
|
20
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
21
|
+
)
|
|
34
22
|
|
|
35
23
|
Usage - Manual Tracing:
|
|
36
24
|
from kalibr import trace
|
|
@@ -44,13 +32,11 @@ Usage - Manual Tracing:
|
|
|
44
32
|
return response
|
|
45
33
|
|
|
46
34
|
CLI Usage:
|
|
47
|
-
kalibr serve my_app.py # Run locally
|
|
48
|
-
kalibr deploy my_app.py --runtime fly # Deploy to Fly.io
|
|
49
35
|
kalibr run my_app.py # Run with auto-tracing
|
|
50
36
|
kalibr version # Show version
|
|
51
37
|
"""
|
|
52
38
|
|
|
53
|
-
__version__ = "1.
|
|
39
|
+
__version__ = "1.2.7"
|
|
54
40
|
|
|
55
41
|
# Auto-instrument LLM SDKs on import (can be disabled via env var)
|
|
56
42
|
import os
|
|
@@ -70,7 +56,17 @@ from .collector import is_configured as is_collector_configured
|
|
|
70
56
|
from .collector import (
|
|
71
57
|
setup_collector,
|
|
72
58
|
)
|
|
73
|
-
from .context import
|
|
59
|
+
from .context import (
|
|
60
|
+
get_parent_span_id,
|
|
61
|
+
get_trace_id,
|
|
62
|
+
new_trace_id,
|
|
63
|
+
trace_context,
|
|
64
|
+
# Goal context (v1.3.0)
|
|
65
|
+
goal,
|
|
66
|
+
set_goal,
|
|
67
|
+
get_goal,
|
|
68
|
+
clear_goal,
|
|
69
|
+
)
|
|
74
70
|
from .cost_adapter import (
|
|
75
71
|
AnthropicCostAdapter,
|
|
76
72
|
BaseCostAdapter,
|
|
@@ -79,25 +75,25 @@ from .cost_adapter import (
|
|
|
79
75
|
)
|
|
80
76
|
from .instrumentation import auto_instrument, get_instrumented_providers
|
|
81
77
|
|
|
82
|
-
# ============================================================================
|
|
83
|
-
# SDK & DEPLOYMENT (from 1.0.30)
|
|
84
|
-
# ============================================================================
|
|
85
|
-
from .kalibr import Kalibr
|
|
86
|
-
from .kalibr_app import KalibrApp
|
|
87
78
|
from .models import EventData, TraceConfig
|
|
88
|
-
from .schemas import (
|
|
89
|
-
generate_copilot_schema,
|
|
90
|
-
generate_gemini_schema,
|
|
91
|
-
generate_mcp_schema,
|
|
92
|
-
get_base_url,
|
|
93
|
-
get_supported_models,
|
|
94
|
-
)
|
|
95
79
|
from .simple_tracer import trace
|
|
96
80
|
from .trace_capsule import TraceCapsule, get_or_create_capsule
|
|
97
81
|
from .tracer import SpanContext, Tracer
|
|
98
|
-
from .types import FileUpload, Session
|
|
99
82
|
from .utils import load_config_from_env
|
|
100
83
|
|
|
84
|
+
# ============================================================================
|
|
85
|
+
# INTELLIGENCE & OUTCOME ROUTING (v1.2.0)
|
|
86
|
+
# ============================================================================
|
|
87
|
+
from .intelligence import (
|
|
88
|
+
KalibrIntelligence,
|
|
89
|
+
get_policy,
|
|
90
|
+
report_outcome,
|
|
91
|
+
get_recommendation,
|
|
92
|
+
register_path,
|
|
93
|
+
decide,
|
|
94
|
+
)
|
|
95
|
+
from .router import Router
|
|
96
|
+
|
|
101
97
|
if os.getenv("KALIBR_AUTO_INSTRUMENT", "true").lower() == "true":
|
|
102
98
|
# Setup OpenTelemetry collector
|
|
103
99
|
try:
|
|
@@ -131,6 +127,11 @@ __all__ = [
|
|
|
131
127
|
"get_trace_id",
|
|
132
128
|
"get_parent_span_id",
|
|
133
129
|
"new_trace_id",
|
|
130
|
+
# Goal Context (v1.3.0)
|
|
131
|
+
"goal",
|
|
132
|
+
"set_goal",
|
|
133
|
+
"get_goal",
|
|
134
|
+
"clear_goal",
|
|
134
135
|
# Tracer
|
|
135
136
|
"Tracer",
|
|
136
137
|
"SpanContext",
|
|
@@ -145,21 +146,6 @@ __all__ = [
|
|
|
145
146
|
# Utils
|
|
146
147
|
"load_config_from_env",
|
|
147
148
|
# ========================================================================
|
|
148
|
-
# SDK & DEPLOYMENT
|
|
149
|
-
# ========================================================================
|
|
150
|
-
# SDK Classes
|
|
151
|
-
"Kalibr",
|
|
152
|
-
"KalibrApp",
|
|
153
|
-
# Types
|
|
154
|
-
"FileUpload",
|
|
155
|
-
"Session",
|
|
156
|
-
# Schema Generation
|
|
157
|
-
"get_base_url",
|
|
158
|
-
"generate_mcp_schema",
|
|
159
|
-
"generate_gemini_schema",
|
|
160
|
-
"generate_copilot_schema",
|
|
161
|
-
"get_supported_models",
|
|
162
|
-
# ========================================================================
|
|
163
149
|
# PHASE 1: SDK INSTRUMENTATION & OPENTELEMETRY (v1.1.0)
|
|
164
150
|
# ========================================================================
|
|
165
151
|
# Auto-instrumentation
|
|
@@ -169,4 +155,14 @@ __all__ = [
|
|
|
169
155
|
"setup_collector",
|
|
170
156
|
"get_tracer_provider",
|
|
171
157
|
"is_collector_configured",
|
|
158
|
+
# ========================================================================
|
|
159
|
+
# INTELLIGENCE & OUTCOME ROUTING (v1.2.0)
|
|
160
|
+
# ========================================================================
|
|
161
|
+
"KalibrIntelligence",
|
|
162
|
+
"get_policy",
|
|
163
|
+
"report_outcome",
|
|
164
|
+
"get_recommendation",
|
|
165
|
+
"register_path",
|
|
166
|
+
"decide",
|
|
167
|
+
"Router",
|
|
172
168
|
]
|
kalibr/cli/capsule_cmd.py
CHANGED
|
@@ -23,7 +23,7 @@ def capsule(
|
|
|
23
23
|
None,
|
|
24
24
|
"--api-url",
|
|
25
25
|
"-u",
|
|
26
|
-
help="Kalibr API base URL (default: from env KALIBR_API_URL or
|
|
26
|
+
help="Kalibr API base URL (default: from env KALIBR_API_URL or https://kalibr-backend.fly.dev)",
|
|
27
27
|
envvar="KALIBR_API_URL",
|
|
28
28
|
),
|
|
29
29
|
output: Optional[Path] = typer.Option(
|
|
@@ -63,10 +63,10 @@ def capsule(
|
|
|
63
63
|
kalibr capsule abc-123-def --export --output capsule.json
|
|
64
64
|
|
|
65
65
|
# Specify custom API URL
|
|
66
|
-
kalibr capsule abc-123-def -u https://
|
|
66
|
+
kalibr capsule abc-123-def -u https://kalibr-backend.fly.dev
|
|
67
67
|
"""
|
|
68
68
|
# Determine API base URL
|
|
69
|
-
base_url = api_url or "
|
|
69
|
+
base_url = api_url or "https://kalibr-backend.fly.dev"
|
|
70
70
|
base_url = base_url.rstrip("/")
|
|
71
71
|
|
|
72
72
|
# Build endpoint URL
|
kalibr/cli/main.py
CHANGED
|
@@ -30,9 +30,9 @@ def version():
|
|
|
30
30
|
from kalibr import __version__
|
|
31
31
|
|
|
32
32
|
console.print(f"[bold]Kalibr SDK version:[/bold] {__version__}")
|
|
33
|
-
console.print("
|
|
34
|
-
console.print("
|
|
35
|
-
console.print("GitHub: https://github.com/
|
|
33
|
+
console.print("LLM Observability & Execution Intelligence")
|
|
34
|
+
console.print("Auto-instrumentation for OpenAI, Anthropic, Google AI")
|
|
35
|
+
console.print("GitHub: https://github.com/kalibr-ai/kalibr-sdk-python")
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
@app.command()
|
kalibr/cli/run.py
CHANGED
|
@@ -47,7 +47,7 @@ def run(
|
|
|
47
47
|
kalibr run weather.py --runtime fly.io
|
|
48
48
|
|
|
49
49
|
# Custom backend
|
|
50
|
-
kalibr run weather.py --backend-url https://
|
|
50
|
+
kalibr run weather.py --backend-url https://kalibr-backend.fly.dev
|
|
51
51
|
"""
|
|
52
52
|
# Validate file exists
|
|
53
53
|
agent_path = Path(file_path).resolve()
|
|
@@ -56,7 +56,7 @@ def run(
|
|
|
56
56
|
raise typer.Exit(1)
|
|
57
57
|
|
|
58
58
|
# Configure backend
|
|
59
|
-
backend = backend_url or os.getenv("KALIBR_BACKEND_URL", "
|
|
59
|
+
backend = backend_url or os.getenv("KALIBR_BACKEND_URL", "https://kalibr-backend.fly.dev")
|
|
60
60
|
api_key = os.getenv("KALIBR_API_KEY")
|
|
61
61
|
if not api_key:
|
|
62
62
|
console.print("[yellow]⚠️ KALIBR_API_KEY not set. Set it for trace authentication.[/yellow]")
|
kalibr/client.py
CHANGED
|
@@ -70,7 +70,7 @@ class KalibrClient:
|
|
|
70
70
|
|
|
71
71
|
self.api_key = api_key or env_config.get("auth_token", "")
|
|
72
72
|
self.endpoint = endpoint or env_config.get(
|
|
73
|
-
"api_endpoint", "
|
|
73
|
+
"api_endpoint", "https://kalibr-backend.fly.dev/api/v1/traces"
|
|
74
74
|
)
|
|
75
75
|
self.tenant_id = tenant_id or env_config.get("tenant_id", "default")
|
|
76
76
|
self.environment = environment or env_config.get("environment", "prod")
|
kalibr/collector.py
CHANGED
|
@@ -3,14 +3,21 @@ OpenTelemetry Collector Setup
|
|
|
3
3
|
|
|
4
4
|
Configures OpenTelemetry tracer provider with multiple exporters:
|
|
5
5
|
1. OTLP exporter for sending to OpenTelemetry collectors
|
|
6
|
-
2.
|
|
6
|
+
2. Kalibr HTTP exporter for sending to Kalibr backend
|
|
7
|
+
3. File exporter for local JSONL fallback
|
|
8
|
+
|
|
9
|
+
Thread-safe singleton pattern for collector setup.
|
|
7
10
|
"""
|
|
8
11
|
|
|
9
12
|
import json
|
|
10
13
|
import os
|
|
14
|
+
import threading
|
|
15
|
+
from datetime import datetime, timezone
|
|
11
16
|
from pathlib import Path
|
|
12
17
|
from typing import Optional
|
|
13
18
|
|
|
19
|
+
import requests
|
|
20
|
+
|
|
14
21
|
from opentelemetry import trace
|
|
15
22
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
16
23
|
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
|
@@ -21,6 +28,7 @@ from opentelemetry.sdk.trace.export import (
|
|
|
21
28
|
SpanExporter,
|
|
22
29
|
SpanExportResult,
|
|
23
30
|
)
|
|
31
|
+
from opentelemetry.trace import StatusCode
|
|
24
32
|
|
|
25
33
|
try:
|
|
26
34
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
@@ -81,8 +89,156 @@ class FileSpanExporter(SpanExporter):
|
|
|
81
89
|
}
|
|
82
90
|
|
|
83
91
|
|
|
92
|
+
class KalibrHTTPSpanExporter(SpanExporter):
|
|
93
|
+
"""Export spans to Kalibr backend via HTTP POST"""
|
|
94
|
+
|
|
95
|
+
DEFAULT_URL = "https://kalibr-backend.fly.dev/api/ingest"
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
url: Optional[str] = None,
|
|
100
|
+
api_key: Optional[str] = None,
|
|
101
|
+
tenant_id: Optional[str] = None,
|
|
102
|
+
):
|
|
103
|
+
"""Initialize the Kalibr HTTP exporter.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
url: Kalibr collector URL (default: from KALIBR_COLLECTOR_URL env var)
|
|
107
|
+
api_key: API key (default: from KALIBR_API_KEY env var)
|
|
108
|
+
tenant_id: Tenant ID (default: from KALIBR_TENANT_ID env var)
|
|
109
|
+
"""
|
|
110
|
+
self.url = url or os.getenv("KALIBR_COLLECTOR_URL", self.DEFAULT_URL)
|
|
111
|
+
self.api_key = api_key or os.getenv("KALIBR_API_KEY")
|
|
112
|
+
self.tenant_id = tenant_id or os.getenv("KALIBR_TENANT_ID", "default")
|
|
113
|
+
self.environment = os.getenv("KALIBR_ENVIRONMENT", "production")
|
|
114
|
+
|
|
115
|
+
def export(self, spans) -> SpanExportResult:
|
|
116
|
+
"""Export spans to Kalibr backend"""
|
|
117
|
+
if not self.api_key:
|
|
118
|
+
print("[Kalibr SDK] ⚠️ KALIBR_API_KEY not set, spans will not be sent to backend")
|
|
119
|
+
return SpanExportResult.SUCCESS
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
events = [self._convert_span(span) for span in spans]
|
|
123
|
+
|
|
124
|
+
headers = {
|
|
125
|
+
"X-API-Key": self.api_key,
|
|
126
|
+
"X-Tenant-ID": self.tenant_id,
|
|
127
|
+
"Content-Type": "application/json",
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
payload = {"events": events}
|
|
131
|
+
|
|
132
|
+
response = requests.post(
|
|
133
|
+
self.url,
|
|
134
|
+
headers=headers,
|
|
135
|
+
json=payload,
|
|
136
|
+
timeout=30,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if not response.ok:
|
|
140
|
+
print(
|
|
141
|
+
f"[Kalibr SDK] ❌ Backend rejected spans: {response.status_code} - {response.text}"
|
|
142
|
+
)
|
|
143
|
+
return SpanExportResult.FAILURE
|
|
144
|
+
|
|
145
|
+
return SpanExportResult.SUCCESS
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
print(f"[Kalibr SDK] ❌ Failed to export spans to backend: {e}")
|
|
149
|
+
return SpanExportResult.FAILURE
|
|
150
|
+
|
|
151
|
+
def shutdown(self):
|
|
152
|
+
"""Shutdown the exporter"""
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
def _nanos_to_iso(self, nanos: int) -> str:
|
|
156
|
+
"""Convert nanoseconds since epoch to ISO format timestamp"""
|
|
157
|
+
if nanos is None:
|
|
158
|
+
return datetime.now(timezone.utc).isoformat()
|
|
159
|
+
seconds = nanos / 1_000_000_000
|
|
160
|
+
dt = datetime.fromtimestamp(seconds, tz=timezone.utc)
|
|
161
|
+
return dt.isoformat()
|
|
162
|
+
|
|
163
|
+
def _get_attr(self, span, *keys, default=None):
|
|
164
|
+
"""Get attribute value from span, trying multiple keys"""
|
|
165
|
+
attrs = dict(span.attributes) if span.attributes else {}
|
|
166
|
+
for key in keys:
|
|
167
|
+
if key in attrs:
|
|
168
|
+
return attrs[key]
|
|
169
|
+
return default
|
|
170
|
+
|
|
171
|
+
def _convert_span(self, span) -> dict:
|
|
172
|
+
"""Convert OTel span to Kalibr event format"""
|
|
173
|
+
|
|
174
|
+
# Calculate duration from span times (nanoseconds to milliseconds)
|
|
175
|
+
duration_ms = 0
|
|
176
|
+
if span.start_time and span.end_time:
|
|
177
|
+
duration_ms = int((span.end_time - span.start_time) / 1_000_000)
|
|
178
|
+
|
|
179
|
+
# Determine status
|
|
180
|
+
is_error = (
|
|
181
|
+
hasattr(span.status, "status_code") and span.status.status_code == StatusCode.ERROR
|
|
182
|
+
)
|
|
183
|
+
status = "error" if is_error else "success"
|
|
184
|
+
|
|
185
|
+
# Extract provider and model
|
|
186
|
+
provider = self._get_attr(span, "llm.vendor", "llm.system", "gen_ai.system", default="")
|
|
187
|
+
model_id = self._get_attr(
|
|
188
|
+
span, "llm.request.model", "llm.response.model", "gen_ai.request.model", default=""
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Extract token counts
|
|
192
|
+
input_tokens = self._get_attr(
|
|
193
|
+
span, "llm.usage.prompt_tokens", "gen_ai.usage.prompt_tokens", default=0
|
|
194
|
+
)
|
|
195
|
+
output_tokens = self._get_attr(
|
|
196
|
+
span, "llm.usage.completion_tokens", "gen_ai.usage.completion_tokens", default=0
|
|
197
|
+
)
|
|
198
|
+
total_tokens = self._get_attr(
|
|
199
|
+
span, "llm.usage.total_tokens", "gen_ai.usage.total_tokens", default=0
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# If total_tokens not provided, calculate it
|
|
203
|
+
if not total_tokens and (input_tokens or output_tokens):
|
|
204
|
+
total_tokens = (input_tokens or 0) + (output_tokens or 0)
|
|
205
|
+
|
|
206
|
+
# Build event payload
|
|
207
|
+
event = {
|
|
208
|
+
"schema_version": "1.0",
|
|
209
|
+
"trace_id": format(span.context.trace_id, "032x"),
|
|
210
|
+
"span_id": format(span.context.span_id, "016x"),
|
|
211
|
+
"parent_id": format(span.parent.span_id, "016x") if span.parent else None,
|
|
212
|
+
"tenant_id": self.tenant_id,
|
|
213
|
+
"provider": provider,
|
|
214
|
+
"model_id": model_id,
|
|
215
|
+
"model_name": model_id,
|
|
216
|
+
"operation": span.name,
|
|
217
|
+
"endpoint": span.name,
|
|
218
|
+
"input_tokens": input_tokens or 0,
|
|
219
|
+
"output_tokens": output_tokens or 0,
|
|
220
|
+
"total_tokens": total_tokens or 0,
|
|
221
|
+
"cost_usd": self._get_attr(span, "llm.cost_usd", "gen_ai.usage.cost", default=0.0),
|
|
222
|
+
"latency_ms": self._get_attr(span, "llm.latency_ms", default=duration_ms),
|
|
223
|
+
"duration_ms": duration_ms,
|
|
224
|
+
"status": status,
|
|
225
|
+
"error_type": self._get_attr(span, "error.type", default=None) if is_error else None,
|
|
226
|
+
"error_message": (
|
|
227
|
+
self._get_attr(span, "error.message", default=None) if is_error else None
|
|
228
|
+
),
|
|
229
|
+
"timestamp": self._nanos_to_iso(span.end_time),
|
|
230
|
+
"ts_start": self._nanos_to_iso(span.start_time),
|
|
231
|
+
"ts_end": self._nanos_to_iso(span.end_time),
|
|
232
|
+
"goal": self._get_attr(span, "kalibr.goal", default=""),
|
|
233
|
+
"environment": self.environment,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return event
|
|
237
|
+
|
|
238
|
+
|
|
84
239
|
_tracer_provider: Optional[TracerProvider] = None
|
|
85
240
|
_is_configured = False
|
|
241
|
+
_collector_lock = threading.Lock()
|
|
86
242
|
|
|
87
243
|
|
|
88
244
|
def setup_collector(
|
|
@@ -94,6 +250,8 @@ def setup_collector(
|
|
|
94
250
|
"""
|
|
95
251
|
Setup OpenTelemetry collector with multiple exporters
|
|
96
252
|
|
|
253
|
+
Thread-safe: Uses double-checked locking to ensure single initialization.
|
|
254
|
+
|
|
97
255
|
Args:
|
|
98
256
|
service_name: Service name for the tracer provider
|
|
99
257
|
otlp_endpoint: OTLP collector endpoint (e.g., "http://localhost:4317")
|
|
@@ -106,50 +264,67 @@ def setup_collector(
|
|
|
106
264
|
"""
|
|
107
265
|
global _tracer_provider, _is_configured
|
|
108
266
|
|
|
267
|
+
# First check without lock (fast path)
|
|
109
268
|
if _is_configured and _tracer_provider:
|
|
110
269
|
return _tracer_provider
|
|
111
270
|
|
|
112
|
-
#
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
271
|
+
# Acquire lock for initialization
|
|
272
|
+
with _collector_lock:
|
|
273
|
+
# Double-check inside lock
|
|
274
|
+
if _is_configured and _tracer_provider:
|
|
275
|
+
return _tracer_provider
|
|
276
|
+
|
|
277
|
+
# Create resource with service name
|
|
278
|
+
resource = Resource(attributes={SERVICE_NAME: service_name})
|
|
279
|
+
|
|
280
|
+
# Create tracer provider
|
|
281
|
+
provider = TracerProvider(resource=resource)
|
|
282
|
+
|
|
283
|
+
# Add OTLP exporter if endpoint is configured
|
|
284
|
+
otlp_endpoint = otlp_endpoint or os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
285
|
+
if otlp_endpoint:
|
|
286
|
+
try:
|
|
287
|
+
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint)
|
|
288
|
+
provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
|
|
289
|
+
print(f"✅ OTLP exporter configured: {otlp_endpoint}")
|
|
290
|
+
except Exception as e:
|
|
291
|
+
print(f"⚠️ Failed to configure OTLP exporter: {e}")
|
|
292
|
+
|
|
293
|
+
# Add Kalibr HTTP exporter if API key is configured
|
|
294
|
+
kalibr_api_key = os.getenv("KALIBR_API_KEY")
|
|
295
|
+
if kalibr_api_key:
|
|
296
|
+
try:
|
|
297
|
+
kalibr_exporter = KalibrHTTPSpanExporter()
|
|
298
|
+
provider.add_span_processor(BatchSpanProcessor(kalibr_exporter))
|
|
299
|
+
print(f"✅ Kalibr backend exporter configured: {kalibr_exporter.url}")
|
|
300
|
+
except Exception as e:
|
|
301
|
+
print(f"⚠️ Failed to configure Kalibr backend exporter: {e}")
|
|
302
|
+
|
|
303
|
+
# Add file exporter for local fallback
|
|
304
|
+
if file_export:
|
|
305
|
+
try:
|
|
306
|
+
file_exporter = FileSpanExporter("/tmp/kalibr_otel_spans.jsonl")
|
|
307
|
+
provider.add_span_processor(BatchSpanProcessor(file_exporter))
|
|
308
|
+
print("✅ File exporter configured: /tmp/kalibr_otel_spans.jsonl")
|
|
309
|
+
except Exception as e:
|
|
310
|
+
print(f"⚠️ Failed to configure file exporter: {e}")
|
|
311
|
+
|
|
312
|
+
# Add console exporter for debugging
|
|
313
|
+
if console_export:
|
|
314
|
+
try:
|
|
315
|
+
console_exporter = ConsoleSpanExporter()
|
|
316
|
+
provider.add_span_processor(BatchSpanProcessor(console_exporter))
|
|
317
|
+
print("✅ Console exporter configured")
|
|
318
|
+
except Exception as e:
|
|
319
|
+
print(f"⚠️ Failed to configure console exporter: {e}")
|
|
320
|
+
|
|
321
|
+
# Set as global tracer provider
|
|
322
|
+
trace.set_tracer_provider(provider)
|
|
323
|
+
|
|
324
|
+
_tracer_provider = provider
|
|
325
|
+
_is_configured = True
|
|
326
|
+
|
|
327
|
+
return provider
|
|
153
328
|
|
|
154
329
|
|
|
155
330
|
def get_tracer_provider() -> Optional[TracerProvider]:
|
|
@@ -163,11 +338,15 @@ def is_configured() -> bool:
|
|
|
163
338
|
|
|
164
339
|
|
|
165
340
|
def shutdown_collector():
|
|
166
|
-
"""Shutdown the tracer provider and flush all spans
|
|
341
|
+
"""Shutdown the tracer provider and flush all spans.
|
|
342
|
+
|
|
343
|
+
Thread-safe: Uses lock to protect shutdown operation.
|
|
344
|
+
"""
|
|
167
345
|
global _tracer_provider, _is_configured
|
|
168
346
|
|
|
169
|
-
|
|
170
|
-
_tracer_provider
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
347
|
+
with _collector_lock:
|
|
348
|
+
if _tracer_provider:
|
|
349
|
+
_tracer_provider.shutdown()
|
|
350
|
+
_tracer_provider = None
|
|
351
|
+
_is_configured = False
|
|
352
|
+
print("✅ Tracer provider shutdown")
|
kalibr/context.py
CHANGED
|
@@ -8,6 +8,7 @@ HTTP requests to SDK calls (OpenAI, Anthropic, Google).
|
|
|
8
8
|
import random
|
|
9
9
|
import string
|
|
10
10
|
import uuid
|
|
11
|
+
from contextlib import contextmanager
|
|
11
12
|
from contextvars import ContextVar
|
|
12
13
|
from typing import Dict, Optional
|
|
13
14
|
|
|
@@ -130,3 +131,44 @@ def inject_kalibr_context_into_span(span: Span):
|
|
|
130
131
|
span.set_attribute("kalibr.http_trace_id", ctx["trace_id"])
|
|
131
132
|
if ctx.get("span_id"):
|
|
132
133
|
span.set_attribute("kalibr.http_span_id", ctx["span_id"])
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ============================================================================
|
|
137
|
+
# Goal Context for Outcome Tracking (v1.3.0)
|
|
138
|
+
# ============================================================================
|
|
139
|
+
|
|
140
|
+
_goal_context: ContextVar[Optional[str]] = ContextVar("goal_context", default=None)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def set_goal(goal: str):
|
|
144
|
+
"""Set the current goal for all subsequent Kalibr traces."""
|
|
145
|
+
_goal_context.set(goal)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def get_goal() -> Optional[str]:
|
|
149
|
+
"""Get the current goal."""
|
|
150
|
+
return _goal_context.get()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def clear_goal():
|
|
154
|
+
"""Clear the current goal."""
|
|
155
|
+
_goal_context.set(None)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@contextmanager
|
|
159
|
+
def goal(goal_name: str):
|
|
160
|
+
"""Context manager to set goal for a block of code.
|
|
161
|
+
|
|
162
|
+
Usage:
|
|
163
|
+
with kalibr.goal("research_company"):
|
|
164
|
+
agent.run("Research Weights & Biases")
|
|
165
|
+
"""
|
|
166
|
+
previous = get_goal()
|
|
167
|
+
set_goal(goal_name)
|
|
168
|
+
try:
|
|
169
|
+
yield
|
|
170
|
+
finally:
|
|
171
|
+
if previous:
|
|
172
|
+
set_goal(previous)
|
|
173
|
+
else:
|
|
174
|
+
clear_goal()
|