kalibr 1.0.28__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +170 -3
- kalibr/__main__.py +3 -203
- kalibr/capsule_middleware.py +108 -0
- kalibr/cli/__init__.py +5 -0
- kalibr/cli/capsule_cmd.py +174 -0
- kalibr/cli/deploy_cmd.py +114 -0
- kalibr/cli/main.py +67 -0
- kalibr/cli/run.py +200 -0
- kalibr/cli/serve.py +59 -0
- kalibr/client.py +293 -0
- kalibr/collector.py +173 -0
- kalibr/context.py +132 -0
- kalibr/cost_adapter.py +222 -0
- kalibr/decorators.py +140 -0
- kalibr/instrumentation/__init__.py +13 -0
- kalibr/instrumentation/anthropic_instr.py +282 -0
- kalibr/instrumentation/base.py +108 -0
- kalibr/instrumentation/google_instr.py +281 -0
- kalibr/instrumentation/openai_instr.py +265 -0
- kalibr/instrumentation/registry.py +153 -0
- kalibr/kalibr.py +144 -230
- kalibr/kalibr_app.py +53 -314
- kalibr/middleware/__init__.py +5 -0
- kalibr/middleware/auto_tracer.py +356 -0
- kalibr/models.py +41 -0
- kalibr/redaction.py +44 -0
- kalibr/schemas.py +116 -0
- kalibr/simple_tracer.py +255 -0
- kalibr/tokens.py +52 -0
- kalibr/trace_capsule.py +296 -0
- kalibr/trace_models.py +201 -0
- kalibr/tracer.py +354 -0
- kalibr/types.py +25 -93
- kalibr/utils.py +198 -0
- kalibr-1.1.0.dist-info/METADATA +97 -0
- kalibr-1.1.0.dist-info/RECORD +40 -0
- kalibr-1.1.0.dist-info/entry_points.txt +2 -0
- kalibr-1.1.0.dist-info/licenses/LICENSE +21 -0
- kalibr/deployment.py +0 -41
- kalibr/packager.py +0 -43
- kalibr/runtime_router.py +0 -138
- kalibr/schema_generators.py +0 -159
- kalibr/validator.py +0 -70
- kalibr-1.0.28.data/data/examples/README.md +0 -173
- kalibr-1.0.28.data/data/examples/basic_kalibr_example.py +0 -66
- kalibr-1.0.28.data/data/examples/enhanced_kalibr_example.py +0 -347
- kalibr-1.0.28.dist-info/METADATA +0 -175
- kalibr-1.0.28.dist-info/RECORD +0 -19
- kalibr-1.0.28.dist-info/entry_points.txt +0 -2
- kalibr-1.0.28.dist-info/licenses/LICENSE +0 -11
- {kalibr-1.0.28.dist-info → kalibr-1.1.0.dist-info}/WHEEL +0 -0
- {kalibr-1.0.28.dist-info → kalibr-1.1.0.dist-info}/top_level.txt +0 -0
kalibr/client.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Kalibr client with context propagation and batch flushing.
|
|
2
|
+
|
|
3
|
+
Refactored for Phase 1 - SDK Stabilization:
|
|
4
|
+
- Uses modular tracer and cost adapters
|
|
5
|
+
- Supports new event schema v1.0
|
|
6
|
+
- Enhanced error handling with stack traces
|
|
7
|
+
- Environment-based configuration
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import atexit
|
|
11
|
+
import hashlib
|
|
12
|
+
import hmac
|
|
13
|
+
import queue
|
|
14
|
+
import threading
|
|
15
|
+
import time
|
|
16
|
+
from typing import Any, Callable, Dict, Optional
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
|
|
20
|
+
from .decorators import create_trace_decorator
|
|
21
|
+
from .tracer import Tracer
|
|
22
|
+
from .utils import (
|
|
23
|
+
load_config_from_env,
|
|
24
|
+
log_error,
|
|
25
|
+
log_info,
|
|
26
|
+
log_success,
|
|
27
|
+
log_warning,
|
|
28
|
+
serialize_event,
|
|
29
|
+
validate_event,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class KalibrClient:
|
|
34
|
+
"""Kalibr observability client with Phase 1 enhancements.
|
|
35
|
+
|
|
36
|
+
Features:
|
|
37
|
+
- Modular tracer with cost adapters
|
|
38
|
+
- New event schema v1.0
|
|
39
|
+
- Enhanced error handling
|
|
40
|
+
- Environment-based configuration
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
api_key: Kalibr API key
|
|
44
|
+
endpoint: Collector endpoint URL (optional, can load from env)
|
|
45
|
+
tenant_id: Tenant identifier (optional, can load from env)
|
|
46
|
+
environment: Environment (prod/staging/dev, can load from env)
|
|
47
|
+
service: Service name (optional, can load from env)
|
|
48
|
+
secret: HMAC secret for request signing
|
|
49
|
+
batch_size: Max events per batch
|
|
50
|
+
flush_interval: Flush interval in seconds
|
|
51
|
+
max_queue_size: Max queue size (drops oldest on overflow)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
api_key: str = None,
|
|
57
|
+
endpoint: str = None,
|
|
58
|
+
tenant_id: str = None,
|
|
59
|
+
environment: str = None,
|
|
60
|
+
service: str = None,
|
|
61
|
+
workflow_id: str = None,
|
|
62
|
+
workflow_version: str = None,
|
|
63
|
+
secret: Optional[str] = None,
|
|
64
|
+
batch_size: int = 100,
|
|
65
|
+
flush_interval: float = 2.0,
|
|
66
|
+
max_queue_size: int = 5000,
|
|
67
|
+
):
|
|
68
|
+
# Load config from environment if not provided
|
|
69
|
+
env_config = load_config_from_env()
|
|
70
|
+
|
|
71
|
+
self.api_key = api_key or env_config.get("auth_token", "")
|
|
72
|
+
self.endpoint = endpoint or env_config.get(
|
|
73
|
+
"api_endpoint", "http://localhost:8001/api/v1/traces"
|
|
74
|
+
)
|
|
75
|
+
self.tenant_id = tenant_id or env_config.get("tenant_id", "default")
|
|
76
|
+
self.environment = environment or env_config.get("environment", "prod")
|
|
77
|
+
self.service = service or env_config.get("project_name", "kalibr-app")
|
|
78
|
+
self.secret = secret
|
|
79
|
+
|
|
80
|
+
# Workflow configuration (can override env)
|
|
81
|
+
if workflow_id:
|
|
82
|
+
env_config["workflow_id"] = workflow_id
|
|
83
|
+
if workflow_version:
|
|
84
|
+
env_config["workflow_version"] = workflow_version
|
|
85
|
+
self.batch_size = batch_size
|
|
86
|
+
self.flush_interval = flush_interval
|
|
87
|
+
|
|
88
|
+
# Extract workflow and runtime fields from config
|
|
89
|
+
self.workflow_id = env_config.get("workflow_id", "default-workflow")
|
|
90
|
+
self.workflow_version = env_config.get("workflow_version", "1.0")
|
|
91
|
+
self.sandbox_id = env_config.get("sandbox_id", "local")
|
|
92
|
+
self.runtime_env = env_config.get("runtime_env", "local")
|
|
93
|
+
self.parent_trace_id = env_config.get("parent_trace_id")
|
|
94
|
+
|
|
95
|
+
# Create tracer instance
|
|
96
|
+
self.tracer = Tracer(
|
|
97
|
+
tenant_id=self.tenant_id,
|
|
98
|
+
environment=self.environment,
|
|
99
|
+
service=self.service,
|
|
100
|
+
workflow_id=self.workflow_id,
|
|
101
|
+
workflow_version=self.workflow_version,
|
|
102
|
+
sandbox_id=self.sandbox_id,
|
|
103
|
+
runtime_env=self.runtime_env,
|
|
104
|
+
parent_trace_id=self.parent_trace_id,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Event queue
|
|
108
|
+
self.queue = queue.Queue(maxsize=max_queue_size)
|
|
109
|
+
self.max_queue_size = max_queue_size
|
|
110
|
+
|
|
111
|
+
# HTTP client
|
|
112
|
+
self.client = httpx.Client(timeout=10.0)
|
|
113
|
+
|
|
114
|
+
# Background flusher thread
|
|
115
|
+
self._shutdown = False
|
|
116
|
+
self._flush_thread = threading.Thread(target=self._flush_loop, daemon=True)
|
|
117
|
+
self._flush_thread.start()
|
|
118
|
+
|
|
119
|
+
# Register cleanup
|
|
120
|
+
atexit.register(self.shutdown)
|
|
121
|
+
|
|
122
|
+
log_success(
|
|
123
|
+
f"Kalibr client initialized: {self.tenant_id} @ {self.environment} (service: {self.service})"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def _flush_loop(self):
|
|
127
|
+
"""Background thread to flush events periodically."""
|
|
128
|
+
batch = []
|
|
129
|
+
last_flush = time.time()
|
|
130
|
+
|
|
131
|
+
while not self._shutdown:
|
|
132
|
+
try:
|
|
133
|
+
# Get event with timeout
|
|
134
|
+
try:
|
|
135
|
+
event = self.queue.get(timeout=0.1)
|
|
136
|
+
batch.append(event)
|
|
137
|
+
except queue.Empty:
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
# Flush if batch is full or interval elapsed
|
|
141
|
+
now = time.time()
|
|
142
|
+
should_flush = len(batch) >= self.batch_size or (
|
|
143
|
+
batch and now - last_flush >= self.flush_interval
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if should_flush:
|
|
147
|
+
self._send_batch(batch)
|
|
148
|
+
batch = []
|
|
149
|
+
last_flush = now
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
log_warning(f"Flush loop error: {e}")
|
|
153
|
+
|
|
154
|
+
# Final flush on shutdown
|
|
155
|
+
if batch:
|
|
156
|
+
self._send_batch(batch)
|
|
157
|
+
|
|
158
|
+
def _send_batch(self, batch: list):
|
|
159
|
+
"""Send batch to collector with validation."""
|
|
160
|
+
if not batch:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
# Validate events
|
|
165
|
+
valid_events = []
|
|
166
|
+
for event in batch:
|
|
167
|
+
if validate_event(event):
|
|
168
|
+
valid_events.append(event)
|
|
169
|
+
else:
|
|
170
|
+
log_warning(f"Invalid event skipped: {event.get('span_id', 'unknown')}")
|
|
171
|
+
|
|
172
|
+
if not valid_events:
|
|
173
|
+
log_warning("No valid events in batch")
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
# ✅ Fixed Bug 2: Send as JSON dict instead of NDJSON string
|
|
177
|
+
# Backend expects: {"events": [event_dict]}
|
|
178
|
+
payload = {"events": valid_events}
|
|
179
|
+
|
|
180
|
+
# Create HMAC signature if secret provided
|
|
181
|
+
headers = {"X-API-Key": self.api_key} if self.api_key else {}
|
|
182
|
+
if self.secret:
|
|
183
|
+
# Sign the JSON payload
|
|
184
|
+
body = serialize_event(payload).encode("utf-8")
|
|
185
|
+
signature = hmac.new(self.secret.encode(), body, hashlib.sha256).hexdigest()
|
|
186
|
+
headers["X-Signature"] = signature
|
|
187
|
+
|
|
188
|
+
# Send request with json parameter (automatically serializes dict)
|
|
189
|
+
response = self.client.post(
|
|
190
|
+
self.endpoint,
|
|
191
|
+
json=payload, # ✅ Sends as JSON object, not string
|
|
192
|
+
headers=headers,
|
|
193
|
+
)
|
|
194
|
+
response.raise_for_status()
|
|
195
|
+
|
|
196
|
+
result = response.json()
|
|
197
|
+
accepted = result.get("accepted", 0)
|
|
198
|
+
rejected = result.get("rejected", 0)
|
|
199
|
+
|
|
200
|
+
if rejected > 0:
|
|
201
|
+
log_warning(f"Batch: {accepted} accepted, {rejected} rejected")
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
log_error(f"Batch send failed: {e}")
|
|
205
|
+
|
|
206
|
+
def _enqueue(self, event: Dict):
|
|
207
|
+
"""Add event to queue (drop oldest if full)."""
|
|
208
|
+
try:
|
|
209
|
+
self.queue.put_nowait(event)
|
|
210
|
+
except queue.Full:
|
|
211
|
+
# Drop oldest event
|
|
212
|
+
try:
|
|
213
|
+
self.queue.get_nowait()
|
|
214
|
+
self.queue.put_nowait(event)
|
|
215
|
+
except:
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
def shutdown(self):
|
|
219
|
+
"""Shutdown client and flush remaining events."""
|
|
220
|
+
if self._shutdown:
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
log_info("Shutting down Kalibr client...")
|
|
224
|
+
self._shutdown = True
|
|
225
|
+
|
|
226
|
+
# Wait for flush thread
|
|
227
|
+
if self._flush_thread.is_alive():
|
|
228
|
+
self._flush_thread.join(timeout=5.0)
|
|
229
|
+
|
|
230
|
+
# Close HTTP client
|
|
231
|
+
self.client.close()
|
|
232
|
+
log_success("Kalibr client shutdown complete")
|
|
233
|
+
|
|
234
|
+
def trace(
|
|
235
|
+
self,
|
|
236
|
+
operation: str = "model_call",
|
|
237
|
+
vendor: str = "unknown",
|
|
238
|
+
model: str = "unknown",
|
|
239
|
+
endpoint: Optional[str] = None,
|
|
240
|
+
):
|
|
241
|
+
"""Decorator to trace function calls using new Phase 1 tracer.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
operation: Operation type (chat_completion, embedding, etc.)
|
|
245
|
+
vendor: Vendor name (openai, anthropic, etc.)
|
|
246
|
+
model: Model identifier (gpt-4, claude-3-sonnet, etc.)
|
|
247
|
+
endpoint: API endpoint or function name
|
|
248
|
+
|
|
249
|
+
Example:
|
|
250
|
+
@kalibr.trace(operation="chat_completion", vendor="openai", model="gpt-4")
|
|
251
|
+
def call_openai(prompt):
|
|
252
|
+
return openai.chat.completions.create(...)
|
|
253
|
+
"""
|
|
254
|
+
# Create decorator using the new tracer system
|
|
255
|
+
trace_decorator = create_trace_decorator(self.tracer)
|
|
256
|
+
decorator_func = trace_decorator(
|
|
257
|
+
operation=operation, vendor=vendor, model=model, endpoint=endpoint
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Wrap to enqueue events from tracer
|
|
261
|
+
def enhanced_decorator(func: Callable) -> Callable:
|
|
262
|
+
wrapped = decorator_func(func)
|
|
263
|
+
|
|
264
|
+
def wrapper(*args, **kwargs):
|
|
265
|
+
from .context import trace_context
|
|
266
|
+
|
|
267
|
+
# Clear events before call
|
|
268
|
+
ctx = trace_context.get()
|
|
269
|
+
ctx["events"] = []
|
|
270
|
+
trace_context.set(ctx)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
result = wrapped(*args, **kwargs)
|
|
274
|
+
|
|
275
|
+
# Enqueue events created by tracer
|
|
276
|
+
ctx = trace_context.get()
|
|
277
|
+
events = ctx.get("events", [])
|
|
278
|
+
for event in events:
|
|
279
|
+
self._enqueue(event)
|
|
280
|
+
|
|
281
|
+
return result
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
# Enqueue error events too
|
|
285
|
+
ctx = trace_context.get()
|
|
286
|
+
events = ctx.get("events", [])
|
|
287
|
+
for event in events:
|
|
288
|
+
self._enqueue(event)
|
|
289
|
+
raise
|
|
290
|
+
|
|
291
|
+
return wrapper
|
|
292
|
+
|
|
293
|
+
return enhanced_decorator
|
kalibr/collector.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenTelemetry Collector Setup
|
|
3
|
+
|
|
4
|
+
Configures OpenTelemetry tracer provider with multiple exporters:
|
|
5
|
+
1. OTLP exporter for sending to OpenTelemetry collectors
|
|
6
|
+
2. File exporter for local JSONL fallback
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from opentelemetry import trace
|
|
15
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
16
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
|
17
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
18
|
+
from opentelemetry.sdk.trace.export import (
|
|
19
|
+
BatchSpanProcessor,
|
|
20
|
+
ConsoleSpanExporter,
|
|
21
|
+
SpanExporter,
|
|
22
|
+
SpanExportResult,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
27
|
+
except ImportError:
|
|
28
|
+
ReadableSpan = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FileSpanExporter(SpanExporter):
|
|
32
|
+
"""Export spans to a JSONL file"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, file_path: str = "/tmp/kalibr_otel_spans.jsonl"):
|
|
35
|
+
self.file_path = Path(file_path)
|
|
36
|
+
self.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
def export(self, spans) -> SpanExportResult:
|
|
39
|
+
"""Export spans to JSONL file"""
|
|
40
|
+
try:
|
|
41
|
+
with open(self.file_path, "a") as f:
|
|
42
|
+
for span in spans:
|
|
43
|
+
span_dict = self._span_to_dict(span)
|
|
44
|
+
f.write(json.dumps(span_dict) + "\n")
|
|
45
|
+
return SpanExportResult.SUCCESS
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(f"❌ Failed to export spans to file: {e}")
|
|
48
|
+
return SpanExportResult.FAILURE
|
|
49
|
+
|
|
50
|
+
def shutdown(self):
|
|
51
|
+
"""Shutdown the exporter"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
def _span_to_dict(self, span) -> dict:
|
|
55
|
+
"""Convert span to dictionary format"""
|
|
56
|
+
return {
|
|
57
|
+
"trace_id": format(span.context.trace_id, "032x"),
|
|
58
|
+
"span_id": format(span.context.span_id, "016x"),
|
|
59
|
+
"parent_span_id": format(span.parent.span_id, "016x") if span.parent else None,
|
|
60
|
+
"name": span.name,
|
|
61
|
+
"kind": span.kind.name if hasattr(span.kind, "name") else str(span.kind),
|
|
62
|
+
"start_time_unix_nano": span.start_time,
|
|
63
|
+
"end_time_unix_nano": span.end_time,
|
|
64
|
+
"attributes": dict(span.attributes) if span.attributes else {},
|
|
65
|
+
"status": {
|
|
66
|
+
"code": (
|
|
67
|
+
span.status.status_code.name
|
|
68
|
+
if hasattr(span.status.status_code, "name")
|
|
69
|
+
else str(span.status.status_code)
|
|
70
|
+
),
|
|
71
|
+
"description": getattr(span.status, "description", ""),
|
|
72
|
+
},
|
|
73
|
+
"events": [
|
|
74
|
+
{
|
|
75
|
+
"name": event.name,
|
|
76
|
+
"timestamp": event.timestamp,
|
|
77
|
+
"attributes": dict(event.attributes) if event.attributes else {},
|
|
78
|
+
}
|
|
79
|
+
for event in (span.events or [])
|
|
80
|
+
],
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
_tracer_provider: Optional[TracerProvider] = None
|
|
85
|
+
_is_configured = False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def setup_collector(
|
|
89
|
+
service_name: str = "kalibr",
|
|
90
|
+
otlp_endpoint: Optional[str] = None,
|
|
91
|
+
file_export: bool = True,
|
|
92
|
+
console_export: bool = False,
|
|
93
|
+
) -> TracerProvider:
|
|
94
|
+
"""
|
|
95
|
+
Setup OpenTelemetry collector with multiple exporters
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
service_name: Service name for the tracer provider
|
|
99
|
+
otlp_endpoint: OTLP collector endpoint (e.g., "http://localhost:4317")
|
|
100
|
+
If None, reads from OTEL_EXPORTER_OTLP_ENDPOINT env var
|
|
101
|
+
file_export: Whether to export spans to local JSONL file
|
|
102
|
+
console_export: Whether to export spans to console (for debugging)
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Configured TracerProvider instance
|
|
106
|
+
"""
|
|
107
|
+
global _tracer_provider, _is_configured
|
|
108
|
+
|
|
109
|
+
if _is_configured and _tracer_provider:
|
|
110
|
+
return _tracer_provider
|
|
111
|
+
|
|
112
|
+
# Create resource with service name
|
|
113
|
+
resource = Resource(attributes={SERVICE_NAME: service_name})
|
|
114
|
+
|
|
115
|
+
# Create tracer provider
|
|
116
|
+
provider = TracerProvider(resource=resource)
|
|
117
|
+
|
|
118
|
+
# Add OTLP exporter if endpoint is configured
|
|
119
|
+
otlp_endpoint = otlp_endpoint or os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
120
|
+
if otlp_endpoint:
|
|
121
|
+
try:
|
|
122
|
+
otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint)
|
|
123
|
+
provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
|
|
124
|
+
print(f"✅ OTLP exporter configured: {otlp_endpoint}")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"⚠️ Failed to configure OTLP exporter: {e}")
|
|
127
|
+
|
|
128
|
+
# Add file exporter for local fallback
|
|
129
|
+
if file_export:
|
|
130
|
+
try:
|
|
131
|
+
file_exporter = FileSpanExporter("/tmp/kalibr_otel_spans.jsonl")
|
|
132
|
+
provider.add_span_processor(BatchSpanProcessor(file_exporter))
|
|
133
|
+
print("✅ File exporter configured: /tmp/kalibr_otel_spans.jsonl")
|
|
134
|
+
except Exception as e:
|
|
135
|
+
print(f"⚠️ Failed to configure file exporter: {e}")
|
|
136
|
+
|
|
137
|
+
# Add console exporter for debugging
|
|
138
|
+
if console_export:
|
|
139
|
+
try:
|
|
140
|
+
console_exporter = ConsoleSpanExporter()
|
|
141
|
+
provider.add_span_processor(BatchSpanProcessor(console_exporter))
|
|
142
|
+
print("✅ Console exporter configured")
|
|
143
|
+
except Exception as e:
|
|
144
|
+
print(f"⚠️ Failed to configure console exporter: {e}")
|
|
145
|
+
|
|
146
|
+
# Set as global tracer provider
|
|
147
|
+
trace.set_tracer_provider(provider)
|
|
148
|
+
|
|
149
|
+
_tracer_provider = provider
|
|
150
|
+
_is_configured = True
|
|
151
|
+
|
|
152
|
+
return provider
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def get_tracer_provider() -> Optional[TracerProvider]:
|
|
156
|
+
"""Get the current tracer provider"""
|
|
157
|
+
return _tracer_provider
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def is_configured() -> bool:
|
|
161
|
+
"""Check if collector is configured"""
|
|
162
|
+
return _is_configured
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def shutdown_collector():
|
|
166
|
+
"""Shutdown the tracer provider and flush all spans"""
|
|
167
|
+
global _tracer_provider, _is_configured
|
|
168
|
+
|
|
169
|
+
if _tracer_provider:
|
|
170
|
+
_tracer_provider.shutdown()
|
|
171
|
+
_tracer_provider = None
|
|
172
|
+
_is_configured = False
|
|
173
|
+
print("✅ Tracer provider shutdown")
|
kalibr/context.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context propagation for distributed tracing.
|
|
3
|
+
|
|
4
|
+
Phase 3: Enhanced with OpenTelemetry context propagation for linking
|
|
5
|
+
HTTP requests to SDK calls (OpenAI, Anthropic, Google).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import random
|
|
9
|
+
import string
|
|
10
|
+
import uuid
|
|
11
|
+
from contextvars import ContextVar
|
|
12
|
+
from typing import Dict, Optional
|
|
13
|
+
|
|
14
|
+
from opentelemetry import trace
|
|
15
|
+
from opentelemetry.trace import Span
|
|
16
|
+
|
|
17
|
+
# Thread-local context for trace and span propagation
|
|
18
|
+
trace_context: ContextVar[Dict] = ContextVar("trace_context", default={})
|
|
19
|
+
|
|
20
|
+
# Phase 3: OpenTelemetry context for HTTP→SDK linking
|
|
21
|
+
_otel_request_context: ContextVar[Optional[Dict]] = ContextVar("otel_request_context", default=None)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_trace_id() -> Optional[str]:
|
|
25
|
+
"""Get current trace ID from context."""
|
|
26
|
+
ctx = trace_context.get()
|
|
27
|
+
return ctx.get("trace_id")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_parent_span_id() -> Optional[str]:
|
|
31
|
+
"""Get parent span ID from context."""
|
|
32
|
+
ctx = trace_context.get()
|
|
33
|
+
span_stack = ctx.get("span_stack", [])
|
|
34
|
+
return span_stack[-1] if span_stack else None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def new_span_id() -> str:
|
|
38
|
+
"""Generate new span ID (UUIDv4 for consistency)."""
|
|
39
|
+
return str(uuid.uuid4())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def new_trace_id() -> str:
|
|
43
|
+
"""Generate new trace ID (UUID v4)."""
|
|
44
|
+
return str(uuid.uuid4())
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ============================================================================
|
|
48
|
+
# Phase 3: OpenTelemetry Context Propagation
|
|
49
|
+
# ============================================================================
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def set_otel_request_context(context_token: str, trace_id: str, span_id: str):
|
|
53
|
+
"""
|
|
54
|
+
Set OpenTelemetry request context for HTTP→SDK span linking.
|
|
55
|
+
|
|
56
|
+
Called by AutoTracerMiddleware to establish parent context for
|
|
57
|
+
all SDK calls within the HTTP request.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
context_token: Kalibr context token for chaining
|
|
61
|
+
trace_id: OpenTelemetry trace ID (hex format)
|
|
62
|
+
span_id: OpenTelemetry span ID (hex format)
|
|
63
|
+
"""
|
|
64
|
+
_otel_request_context.set(
|
|
65
|
+
{
|
|
66
|
+
"context_token": context_token,
|
|
67
|
+
"trace_id": trace_id,
|
|
68
|
+
"span_id": span_id,
|
|
69
|
+
}
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_otel_request_context() -> Optional[Dict]:
|
|
74
|
+
"""
|
|
75
|
+
Get current OpenTelemetry request context.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dictionary with context_token, trace_id, span_id or None
|
|
79
|
+
"""
|
|
80
|
+
return _otel_request_context.get()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def clear_otel_request_context():
|
|
84
|
+
"""Clear OpenTelemetry request context (called at end of request)"""
|
|
85
|
+
_otel_request_context.set(None)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_current_otel_span() -> Optional[Span]:
|
|
89
|
+
"""
|
|
90
|
+
Get the currently active OpenTelemetry span.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Current span or None if no active span
|
|
94
|
+
"""
|
|
95
|
+
return trace.get_current_span()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_otel_trace_context() -> Dict:
|
|
99
|
+
"""
|
|
100
|
+
Get current OpenTelemetry trace context from active span.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Dictionary with trace_id, span_id, or empty dict
|
|
104
|
+
"""
|
|
105
|
+
span = get_current_otel_span()
|
|
106
|
+
if span and span.get_span_context().is_valid:
|
|
107
|
+
ctx = span.get_span_context()
|
|
108
|
+
return {
|
|
109
|
+
"trace_id": format(ctx.trace_id, "032x"),
|
|
110
|
+
"span_id": format(ctx.span_id, "016x"),
|
|
111
|
+
"is_valid": True,
|
|
112
|
+
}
|
|
113
|
+
return {}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def inject_kalibr_context_into_span(span: Span):
|
|
117
|
+
"""
|
|
118
|
+
Inject Kalibr-specific context into OpenTelemetry span attributes.
|
|
119
|
+
|
|
120
|
+
This links the OTel span back to Kalibr's context_token and HTTP trace.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
span: OpenTelemetry span to annotate
|
|
124
|
+
"""
|
|
125
|
+
ctx = get_otel_request_context()
|
|
126
|
+
if ctx:
|
|
127
|
+
if ctx.get("context_token"):
|
|
128
|
+
span.set_attribute("kalibr.context_token", ctx["context_token"])
|
|
129
|
+
if ctx.get("trace_id"):
|
|
130
|
+
span.set_attribute("kalibr.http_trace_id", ctx["trace_id"])
|
|
131
|
+
if ctx.get("span_id"):
|
|
132
|
+
span.set_attribute("kalibr.http_span_id", ctx["span_id"])
|