kalibr 1.2.6__py3-none-any.whl → 1.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +2 -2
- kalibr/cli/capsule_cmd.py +3 -3
- kalibr/cli/run.py +2 -2
- kalibr/client.py +1 -1
- kalibr/collector.py +227 -48
- kalibr/cost_adapter.py +36 -104
- kalibr/instrumentation/anthropic_instr.py +34 -40
- kalibr/instrumentation/base.py +27 -9
- kalibr/instrumentation/google_instr.py +34 -39
- kalibr/instrumentation/openai_instr.py +34 -28
- kalibr/instrumentation/registry.py +38 -13
- kalibr/intelligence.py +28 -16
- kalibr/middleware/auto_tracer.py +1 -1
- kalibr/pricing.py +245 -0
- kalibr/router.py +144 -34
- kalibr/simple_tracer.py +16 -15
- kalibr/trace_capsule.py +19 -12
- kalibr/utils.py +2 -2
- {kalibr-1.2.6.dist-info → kalibr-1.2.9.dist-info}/METADATA +114 -14
- {kalibr-1.2.6.dist-info → kalibr-1.2.9.dist-info}/RECORD +24 -23
- {kalibr-1.2.6.dist-info → kalibr-1.2.9.dist-info}/LICENSE +0 -0
- {kalibr-1.2.6.dist-info → kalibr-1.2.9.dist-info}/WHEEL +0 -0
- {kalibr-1.2.6.dist-info → kalibr-1.2.9.dist-info}/entry_points.txt +0 -0
- {kalibr-1.2.6.dist-info → kalibr-1.2.9.dist-info}/top_level.txt +0 -0
kalibr/pricing.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Centralized pricing data for all LLM vendors.
|
|
2
|
+
|
|
3
|
+
This module serves as the single source of truth for model pricing across
|
|
4
|
+
the entire Kalibr SDK. All cost adapters and instrumentation modules should
|
|
5
|
+
use this pricing data to ensure consistency.
|
|
6
|
+
|
|
7
|
+
All prices are in USD per 1 million tokens, matching the format used by
|
|
8
|
+
major LLM providers (OpenAI, Anthropic, etc.) on their pricing pages.
|
|
9
|
+
|
|
10
|
+
Version: 2026-01
|
|
11
|
+
Last Updated: January 2026
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Dict, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
# Pricing version for tracking updates
|
|
17
|
+
PRICING_VERSION = "2026-01"
|
|
18
|
+
|
|
19
|
+
# All prices in USD per 1M tokens
|
|
20
|
+
MODEL_PRICING: Dict[str, Dict[str, Dict[str, float]]] = {
|
|
21
|
+
"openai": {
|
|
22
|
+
# GPT-5 models (future-proofing)
|
|
23
|
+
"gpt-5": {"input": 5.00, "output": 15.00},
|
|
24
|
+
"gpt-5-turbo": {"input": 2.50, "output": 7.50},
|
|
25
|
+
# GPT-4 models
|
|
26
|
+
"gpt-4": {"input": 30.00, "output": 60.00},
|
|
27
|
+
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
|
|
28
|
+
"gpt-4o": {"input": 2.50, "output": 10.00},
|
|
29
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
|
30
|
+
# GPT-3.5 models
|
|
31
|
+
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
|
|
32
|
+
"gpt-3.5-turbo-16k": {"input": 1.00, "output": 2.00},
|
|
33
|
+
},
|
|
34
|
+
"anthropic": {
|
|
35
|
+
# Claude 4 models (future-proofing)
|
|
36
|
+
"claude-4-opus": {"input": 15.00, "output": 75.00},
|
|
37
|
+
"claude-4-sonnet": {"input": 3.00, "output": 15.00},
|
|
38
|
+
# Claude 3.5/3.7 models (Sonnet 4 is actually Claude 3.7)
|
|
39
|
+
"claude-sonnet-4": {"input": 3.00, "output": 15.00},
|
|
40
|
+
"claude-3-7-sonnet": {"input": 3.00, "output": 15.00},
|
|
41
|
+
"claude-3-5-sonnet": {"input": 3.00, "output": 15.00},
|
|
42
|
+
# Claude 3 models
|
|
43
|
+
"claude-3-opus": {"input": 15.00, "output": 75.00},
|
|
44
|
+
"claude-3-sonnet": {"input": 3.00, "output": 15.00},
|
|
45
|
+
"claude-3-haiku": {"input": 0.25, "output": 1.25},
|
|
46
|
+
# Claude 2 models
|
|
47
|
+
"claude-2.1": {"input": 8.00, "output": 24.00},
|
|
48
|
+
"claude-2.0": {"input": 8.00, "output": 24.00},
|
|
49
|
+
"claude-instant-1.2": {"input": 0.80, "output": 2.40},
|
|
50
|
+
},
|
|
51
|
+
"google": {
|
|
52
|
+
# Gemini 2.5 models
|
|
53
|
+
"gemini-2.5-pro": {"input": 1.25, "output": 5.00},
|
|
54
|
+
"gemini-2.5-flash": {"input": 0.075, "output": 0.30},
|
|
55
|
+
# Gemini 2.0 models
|
|
56
|
+
"gemini-2.0-flash": {"input": 0.075, "output": 0.30},
|
|
57
|
+
"gemini-2.0-flash-thinking": {"input": 0.075, "output": 0.30},
|
|
58
|
+
# Gemini 1.5 models
|
|
59
|
+
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
|
60
|
+
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
|
|
61
|
+
"gemini-1.5-flash-8b": {"input": 0.0375, "output": 0.15},
|
|
62
|
+
# Gemini 1.0 models
|
|
63
|
+
"gemini-1.0-pro": {"input": 0.50, "output": 1.50},
|
|
64
|
+
"gemini-pro": {"input": 0.50, "output": 1.50}, # Alias
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Default fallback pricing per vendor (highest tier pricing for safety)
|
|
69
|
+
DEFAULT_PRICING: Dict[str, Dict[str, float]] = {
|
|
70
|
+
"openai": {"input": 30.00, "output": 60.00}, # GPT-4 pricing
|
|
71
|
+
"anthropic": {"input": 15.00, "output": 75.00}, # Claude 3 Opus pricing
|
|
72
|
+
"google": {"input": 1.25, "output": 5.00}, # Gemini 1.5 Pro pricing
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def normalize_model_name(vendor: str, model_name: str) -> str:
|
|
77
|
+
"""Normalize model name to match pricing table keys.
|
|
78
|
+
|
|
79
|
+
Handles version suffixes, date stamps, and common variations.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
vendor: Vendor name (openai, anthropic, google)
|
|
83
|
+
model_name: Raw model name from API
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Normalized model name that matches pricing table, or original if no match
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> normalize_model_name("openai", "gpt-4o-2024-05-13")
|
|
90
|
+
'gpt-4o'
|
|
91
|
+
>>> normalize_model_name("anthropic", "claude-3-5-sonnet-20240620")
|
|
92
|
+
'claude-3-5-sonnet'
|
|
93
|
+
"""
|
|
94
|
+
vendor = vendor.lower()
|
|
95
|
+
model_lower = model_name.lower()
|
|
96
|
+
|
|
97
|
+
# Get vendor pricing table
|
|
98
|
+
vendor_models = MODEL_PRICING.get(vendor, {})
|
|
99
|
+
|
|
100
|
+
# Direct match
|
|
101
|
+
if model_lower in vendor_models:
|
|
102
|
+
return model_lower
|
|
103
|
+
|
|
104
|
+
# OpenAI fuzzy matching
|
|
105
|
+
if vendor == "openai":
|
|
106
|
+
# Remove date suffixes like -20240513
|
|
107
|
+
base_model = model_lower.split("-2")[0] if "-2" in model_lower else model_lower
|
|
108
|
+
|
|
109
|
+
# Try direct match on base
|
|
110
|
+
if base_model in vendor_models:
|
|
111
|
+
return base_model
|
|
112
|
+
|
|
113
|
+
# Fuzzy match in priority order
|
|
114
|
+
if "gpt-4o-mini" in model_lower:
|
|
115
|
+
return "gpt-4o-mini"
|
|
116
|
+
elif "gpt-4o" in model_lower:
|
|
117
|
+
return "gpt-4o"
|
|
118
|
+
elif "gpt-5-turbo" in model_lower:
|
|
119
|
+
return "gpt-5-turbo"
|
|
120
|
+
elif "gpt-5" in model_lower:
|
|
121
|
+
return "gpt-5"
|
|
122
|
+
elif "gpt-4-turbo" in model_lower:
|
|
123
|
+
return "gpt-4-turbo"
|
|
124
|
+
elif "gpt-4" in model_lower:
|
|
125
|
+
return "gpt-4"
|
|
126
|
+
elif "gpt-3.5-turbo-16k" in model_lower:
|
|
127
|
+
return "gpt-3.5-turbo-16k"
|
|
128
|
+
elif "gpt-3.5" in model_lower:
|
|
129
|
+
return "gpt-3.5-turbo"
|
|
130
|
+
|
|
131
|
+
# Anthropic fuzzy matching
|
|
132
|
+
elif vendor == "anthropic":
|
|
133
|
+
# Try fuzzy matching for versioned models
|
|
134
|
+
if "claude-3.5-sonnet" in model_lower or "claude-3-5-sonnet" in model_lower:
|
|
135
|
+
return "claude-3-5-sonnet"
|
|
136
|
+
elif "claude-sonnet-4" in model_lower or "sonnet-4" in model_lower:
|
|
137
|
+
return "claude-sonnet-4"
|
|
138
|
+
elif "claude-3-7-sonnet" in model_lower:
|
|
139
|
+
return "claude-3-7-sonnet"
|
|
140
|
+
elif "claude-4-opus" in model_lower:
|
|
141
|
+
return "claude-4-opus"
|
|
142
|
+
elif "claude-4-sonnet" in model_lower:
|
|
143
|
+
return "claude-4-sonnet"
|
|
144
|
+
elif "claude-3-opus" in model_lower:
|
|
145
|
+
return "claude-3-opus"
|
|
146
|
+
elif "claude-3-sonnet" in model_lower:
|
|
147
|
+
return "claude-3-sonnet"
|
|
148
|
+
elif "claude-3-haiku" in model_lower:
|
|
149
|
+
return "claude-3-haiku"
|
|
150
|
+
elif "claude-2.1" in model_lower:
|
|
151
|
+
return "claude-2.1"
|
|
152
|
+
elif "claude-2.0" in model_lower or "claude-2" in model_lower:
|
|
153
|
+
return "claude-2.0"
|
|
154
|
+
elif "claude-instant" in model_lower:
|
|
155
|
+
return "claude-instant-1.2"
|
|
156
|
+
|
|
157
|
+
# Google fuzzy matching
|
|
158
|
+
elif vendor == "google":
|
|
159
|
+
# Try fuzzy matching for versioned models
|
|
160
|
+
if "gemini-2.5-pro" in model_lower:
|
|
161
|
+
return "gemini-2.5-pro"
|
|
162
|
+
elif "gemini-2.5-flash" in model_lower:
|
|
163
|
+
return "gemini-2.5-flash"
|
|
164
|
+
elif "gemini-2.0-flash-thinking" in model_lower:
|
|
165
|
+
return "gemini-2.0-flash-thinking"
|
|
166
|
+
elif "gemini-2.0-flash" in model_lower:
|
|
167
|
+
return "gemini-2.0-flash"
|
|
168
|
+
elif "gemini-1.5-flash-8b" in model_lower:
|
|
169
|
+
return "gemini-1.5-flash-8b"
|
|
170
|
+
elif "gemini-1.5-flash" in model_lower:
|
|
171
|
+
return "gemini-1.5-flash"
|
|
172
|
+
elif "gemini-1.5-pro" in model_lower:
|
|
173
|
+
return "gemini-1.5-pro"
|
|
174
|
+
elif "gemini-1.0-pro" in model_lower or "gemini-pro" in model_lower:
|
|
175
|
+
return "gemini-pro"
|
|
176
|
+
|
|
177
|
+
# Return original if no match found
|
|
178
|
+
return model_lower
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_pricing(
|
|
182
|
+
vendor: str, model_name: str
|
|
183
|
+
) -> Tuple[Dict[str, float], str]:
|
|
184
|
+
"""Get pricing for a specific vendor and model.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
vendor: Vendor name (openai, anthropic, google)
|
|
188
|
+
model_name: Model identifier
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (pricing dict with 'input' and 'output' keys in USD per 1M tokens,
|
|
192
|
+
normalized model name used)
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
>>> pricing, normalized = get_pricing("openai", "gpt-4o")
|
|
196
|
+
>>> print(pricing)
|
|
197
|
+
{'input': 2.50, 'output': 10.00}
|
|
198
|
+
>>> print(normalized)
|
|
199
|
+
'gpt-4o'
|
|
200
|
+
"""
|
|
201
|
+
vendor = vendor.lower()
|
|
202
|
+
normalized_model = normalize_model_name(vendor, model_name)
|
|
203
|
+
|
|
204
|
+
# Get vendor pricing table
|
|
205
|
+
vendor_models = MODEL_PRICING.get(vendor, {})
|
|
206
|
+
|
|
207
|
+
# Try to get pricing for normalized model
|
|
208
|
+
pricing = vendor_models.get(normalized_model)
|
|
209
|
+
|
|
210
|
+
# Fall back to default vendor pricing if not found
|
|
211
|
+
if pricing is None:
|
|
212
|
+
pricing = DEFAULT_PRICING.get(vendor, {"input": 20.00, "output": 60.00})
|
|
213
|
+
|
|
214
|
+
return pricing, normalized_model
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def compute_cost(
|
|
218
|
+
vendor: str, model_name: str, input_tokens: int, output_tokens: int
|
|
219
|
+
) -> float:
|
|
220
|
+
"""Compute cost in USD for given vendor, model, and token counts.
|
|
221
|
+
|
|
222
|
+
This is a convenience function that combines pricing lookup and cost calculation.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
vendor: Vendor name (openai, anthropic, google)
|
|
226
|
+
model_name: Model identifier
|
|
227
|
+
input_tokens: Number of input tokens
|
|
228
|
+
output_tokens: Number of output tokens
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Cost in USD (rounded to 6 decimal places)
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
>>> cost = compute_cost("openai", "gpt-4o", 1000, 500)
|
|
235
|
+
>>> print(f"${cost:.6f}")
|
|
236
|
+
$0.007500
|
|
237
|
+
"""
|
|
238
|
+
pricing, _ = get_pricing(vendor, model_name)
|
|
239
|
+
|
|
240
|
+
# Calculate cost (pricing is per 1M tokens)
|
|
241
|
+
input_cost = (input_tokens / 1_000_000) * pricing["input"]
|
|
242
|
+
output_cost = (output_tokens / 1_000_000) * pricing["output"]
|
|
243
|
+
|
|
244
|
+
return round(input_cost + output_cost, 6)
|
|
245
|
+
|
kalibr/router.py
CHANGED
|
@@ -4,14 +4,47 @@ Kalibr Router - Intelligent model routing with outcome learning.
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import logging
|
|
7
|
+
import uuid
|
|
7
8
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
8
9
|
|
|
10
|
+
from opentelemetry import trace as otel_trace
|
|
11
|
+
from opentelemetry.trace import SpanContext, TraceFlags, NonRecordingSpan, set_span_in_context
|
|
12
|
+
from opentelemetry.context import Context
|
|
13
|
+
|
|
9
14
|
logger = logging.getLogger(__name__)
|
|
10
15
|
|
|
11
16
|
# Type for paths - either string or dict
|
|
12
17
|
PathSpec = Union[str, Dict[str, Any]]
|
|
13
18
|
|
|
14
19
|
|
|
20
|
+
def _create_context_with_trace_id(trace_id_hex: str) -> Optional[Context]:
|
|
21
|
+
"""Create an OTel context with a specific trace_id.
|
|
22
|
+
|
|
23
|
+
This allows child spans to inherit the intelligence service's trace_id,
|
|
24
|
+
enabling JOINs between outcomes and traces tables.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
# Convert 32-char hex string to 128-bit int
|
|
28
|
+
trace_id_int = int(trace_id_hex, 16)
|
|
29
|
+
if trace_id_int == 0:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
# Create span context with our trace_id
|
|
33
|
+
span_context = SpanContext(
|
|
34
|
+
trace_id=trace_id_int,
|
|
35
|
+
span_id=0xDEADBEEF, # Placeholder, real span will have its own
|
|
36
|
+
is_remote=True, # Treat as remote parent so new span_id is generated
|
|
37
|
+
trace_flags=TraceFlags(TraceFlags.SAMPLED),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Create a non-recording parent span and set in context
|
|
41
|
+
parent_span = NonRecordingSpan(span_context)
|
|
42
|
+
return set_span_in_context(parent_span)
|
|
43
|
+
except (ValueError, TypeError) as e:
|
|
44
|
+
logger.warning(f"Could not create OTel context with trace_id: {e}")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
15
48
|
class Router:
|
|
16
49
|
"""
|
|
17
50
|
Routes LLM requests to the best model based on learned outcomes.
|
|
@@ -23,6 +56,30 @@ class Router:
|
|
|
23
56
|
success_when=lambda out: len(out) > 100
|
|
24
57
|
)
|
|
25
58
|
response = router.completion(messages=[...])
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
# Simple auto-reporting
|
|
62
|
+
router = Router(
|
|
63
|
+
goal="extract_email",
|
|
64
|
+
paths=["gpt-4o", "claude-sonnet-4"],
|
|
65
|
+
success_when=lambda out: "@" in out
|
|
66
|
+
)
|
|
67
|
+
response = router.completion(messages=[...])
|
|
68
|
+
# report() called automatically
|
|
69
|
+
|
|
70
|
+
# Manual reporting for complex validation
|
|
71
|
+
router = Router(
|
|
72
|
+
goal="book_meeting",
|
|
73
|
+
paths=["gpt-4o", "claude-sonnet-4"]
|
|
74
|
+
)
|
|
75
|
+
response = router.completion(messages=[...])
|
|
76
|
+
# ... complex validation logic ...
|
|
77
|
+
router.report(success=meeting_booked)
|
|
78
|
+
|
|
79
|
+
Warning:
|
|
80
|
+
Router is not thread-safe. For concurrent requests, create separate
|
|
81
|
+
Router instances per thread/task. For sequential requests in a single
|
|
82
|
+
thread, Router can be reused across multiple completion() calls.
|
|
26
83
|
"""
|
|
27
84
|
|
|
28
85
|
def __init__(
|
|
@@ -41,7 +98,16 @@ class Router:
|
|
|
41
98
|
paths: List of models or path configs. Examples:
|
|
42
99
|
["gpt-4o", "claude-3-sonnet"]
|
|
43
100
|
[{"model": "gpt-4o", "tools": ["search"]}]
|
|
44
|
-
|
|
101
|
+
[{"model": "gpt-4o", "params": {"temperature": 0.7}}]
|
|
102
|
+
success_when: Optional function to auto-evaluate success from LLM output.
|
|
103
|
+
Takes the output string and returns True/False.
|
|
104
|
+
When provided, report() is called automatically after completion().
|
|
105
|
+
Use for simple validations (output length, contains key string).
|
|
106
|
+
For complex validation (API calls, multi-step checks), omit this
|
|
107
|
+
and call report() manually.
|
|
108
|
+
Examples:
|
|
109
|
+
success_when=lambda out: len(out) > 0 # Not empty
|
|
110
|
+
success_when=lambda out: "@" in out # Contains email
|
|
45
111
|
exploration_rate: Override exploration rate (0.0-1.0)
|
|
46
112
|
auto_register: If True, register paths on init
|
|
47
113
|
"""
|
|
@@ -49,6 +115,7 @@ class Router:
|
|
|
49
115
|
self.success_when = success_when
|
|
50
116
|
self.exploration_rate = exploration_rate
|
|
51
117
|
self._last_trace_id: Optional[str] = None
|
|
118
|
+
self._last_model_id: Optional[str] = None
|
|
52
119
|
self._last_decision: Optional[dict] = None
|
|
53
120
|
self._outcome_reported = False
|
|
54
121
|
|
|
@@ -106,19 +173,22 @@ class Router:
|
|
|
106
173
|
**kwargs: Additional args passed to provider
|
|
107
174
|
|
|
108
175
|
Returns:
|
|
109
|
-
OpenAI-compatible ChatCompletion response
|
|
176
|
+
OpenAI-compatible ChatCompletion response with added attribute:
|
|
177
|
+
- kalibr_trace_id: Trace ID for explicit outcome reporting
|
|
110
178
|
"""
|
|
111
179
|
from kalibr.intelligence import decide
|
|
112
|
-
from kalibr.context import get_trace_id
|
|
113
180
|
|
|
114
181
|
# Reset state for new request
|
|
115
182
|
self._outcome_reported = False
|
|
116
183
|
|
|
117
|
-
# Get routing decision (
|
|
184
|
+
# Step 1: Get routing decision FIRST (before creating span)
|
|
185
|
+
decision = None
|
|
186
|
+
model_id = None
|
|
187
|
+
tool_id = None
|
|
188
|
+
params = {}
|
|
189
|
+
|
|
118
190
|
if force_model:
|
|
119
191
|
model_id = force_model
|
|
120
|
-
tool_id = None
|
|
121
|
-
params = {}
|
|
122
192
|
self._last_decision = {"model_id": model_id, "forced": True}
|
|
123
193
|
else:
|
|
124
194
|
try:
|
|
@@ -128,44 +198,84 @@ class Router:
|
|
|
128
198
|
params = decision.get("params") or {}
|
|
129
199
|
self._last_decision = decision
|
|
130
200
|
except Exception as e:
|
|
131
|
-
# Fallback to first path if routing fails
|
|
132
201
|
logger.warning(f"Routing failed, using fallback: {e}")
|
|
133
202
|
model_id = self._paths[0]["model"]
|
|
134
203
|
tool_id = self._paths[0].get("tools")
|
|
135
204
|
params = self._paths[0].get("params") or {}
|
|
136
205
|
self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
|
|
137
206
|
|
|
138
|
-
#
|
|
139
|
-
|
|
140
|
-
response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
|
|
141
|
-
self._last_trace_id = get_trace_id()
|
|
207
|
+
# Step 2: Determine trace_id
|
|
208
|
+
decision_trace_id = self._last_decision.get("trace_id") if self._last_decision else None
|
|
142
209
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
210
|
+
if decision_trace_id:
|
|
211
|
+
trace_id = decision_trace_id
|
|
212
|
+
else:
|
|
213
|
+
trace_id = uuid.uuid4().hex # Fallback: generate OTel-compatible format
|
|
214
|
+
|
|
215
|
+
self._last_trace_id = trace_id
|
|
216
|
+
self._last_model_id = model_id
|
|
217
|
+
|
|
218
|
+
# Step 3: Create OTel context with intelligence trace_id
|
|
219
|
+
otel_context = _create_context_with_trace_id(trace_id) if decision_trace_id else None
|
|
220
|
+
|
|
221
|
+
# Step 4: Create span with custom context (child spans inherit trace_id)
|
|
222
|
+
tracer = otel_trace.get_tracer("kalibr.router")
|
|
223
|
+
|
|
224
|
+
with tracer.start_as_current_span(
|
|
225
|
+
"kalibr.router.completion",
|
|
226
|
+
context=otel_context,
|
|
227
|
+
attributes={
|
|
228
|
+
"kalibr.goal": self.goal,
|
|
229
|
+
"kalibr.trace_id": trace_id,
|
|
230
|
+
"kalibr.model_id": model_id,
|
|
231
|
+
}
|
|
232
|
+
) as router_span:
|
|
233
|
+
# Add decision attributes
|
|
234
|
+
if force_model:
|
|
235
|
+
router_span.set_attribute("kalibr.forced", True)
|
|
236
|
+
elif decision:
|
|
237
|
+
router_span.set_attribute("kalibr.path_id", decision.get("path_id", ""))
|
|
238
|
+
router_span.set_attribute("kalibr.reason", decision.get("reason", ""))
|
|
239
|
+
router_span.set_attribute("kalibr.exploration", decision.get("exploration", False))
|
|
240
|
+
router_span.set_attribute("kalibr.confidence", decision.get("confidence", 0.0))
|
|
241
|
+
else:
|
|
242
|
+
router_span.set_attribute("kalibr.fallback", True)
|
|
151
243
|
|
|
152
|
-
|
|
244
|
+
# Step 5: Dispatch to provider
|
|
245
|
+
try:
|
|
246
|
+
response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
|
|
153
247
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
248
|
+
# Auto-report if success_when provided
|
|
249
|
+
if self.success_when and not self._outcome_reported:
|
|
250
|
+
try:
|
|
251
|
+
output = response.choices[0].message.content or ""
|
|
252
|
+
success = self.success_when(output)
|
|
253
|
+
self.report(success=success)
|
|
254
|
+
except Exception as e:
|
|
255
|
+
logger.warning(f"Auto-outcome evaluation failed: {e}")
|
|
256
|
+
|
|
257
|
+
# Add trace_id to response for explicit linkage
|
|
258
|
+
response.kalibr_trace_id = trace_id
|
|
259
|
+
return response
|
|
260
|
+
|
|
261
|
+
except Exception as e:
|
|
262
|
+
router_span.set_attribute("error", True)
|
|
263
|
+
router_span.set_attribute("error.type", type(e).__name__)
|
|
264
|
+
|
|
265
|
+
# Auto-report failure
|
|
266
|
+
if not self._outcome_reported:
|
|
267
|
+
try:
|
|
268
|
+
self.report(success=False, reason=f"provider_error: {type(e).__name__}")
|
|
269
|
+
except:
|
|
270
|
+
pass
|
|
271
|
+
raise
|
|
163
272
|
|
|
164
273
|
def report(
|
|
165
274
|
self,
|
|
166
275
|
success: bool,
|
|
167
276
|
reason: Optional[str] = None,
|
|
168
277
|
score: Optional[float] = None,
|
|
278
|
+
trace_id: Optional[str] = None,
|
|
169
279
|
):
|
|
170
280
|
"""
|
|
171
281
|
Report outcome for the last completion.
|
|
@@ -174,18 +284,17 @@ class Router:
|
|
|
174
284
|
success: Whether the task succeeded
|
|
175
285
|
reason: Optional failure reason
|
|
176
286
|
score: Optional quality score (0.0-1.0)
|
|
287
|
+
trace_id: Optional explicit trace ID (uses last completion's trace_id if not provided)
|
|
177
288
|
"""
|
|
178
289
|
if self._outcome_reported:
|
|
179
|
-
logger.warning("Outcome already reported for this
|
|
290
|
+
logger.warning("Outcome already reported for this completion. Each completion() requires a separate report() call.")
|
|
180
291
|
return
|
|
181
292
|
|
|
182
293
|
from kalibr.intelligence import report_outcome
|
|
183
|
-
from kalibr.context import get_trace_id
|
|
184
294
|
|
|
185
|
-
trace_id = self._last_trace_id
|
|
295
|
+
trace_id = trace_id or self._last_trace_id
|
|
186
296
|
if not trace_id:
|
|
187
|
-
|
|
188
|
-
return
|
|
297
|
+
raise ValueError("Must call completion() before report(). No trace_id available.")
|
|
189
298
|
|
|
190
299
|
try:
|
|
191
300
|
report_outcome(
|
|
@@ -194,6 +303,7 @@ class Router:
|
|
|
194
303
|
success=success,
|
|
195
304
|
score=score,
|
|
196
305
|
failure_reason=reason,
|
|
306
|
+
model_id=self._last_model_id,
|
|
197
307
|
)
|
|
198
308
|
self._outcome_reported = True
|
|
199
309
|
except Exception as e:
|
kalibr/simple_tracer.py
CHANGED
|
@@ -19,6 +19,8 @@ Capsule Usage (automatic when middleware is active):
|
|
|
19
19
|
def process_request(request: Request, prompt: str):
|
|
20
20
|
# Capsule automatically updated with this hop
|
|
21
21
|
return llm_call(prompt)
|
|
22
|
+
|
|
23
|
+
Note: Uses centralized pricing from kalibr.pricing module.
|
|
22
24
|
"""
|
|
23
25
|
|
|
24
26
|
import json
|
|
@@ -31,6 +33,8 @@ from datetime import datetime, timezone
|
|
|
31
33
|
from functools import wraps
|
|
32
34
|
from typing import Callable, Optional
|
|
33
35
|
|
|
36
|
+
from kalibr.pricing import compute_cost
|
|
37
|
+
|
|
34
38
|
try:
|
|
35
39
|
import requests
|
|
36
40
|
except ImportError:
|
|
@@ -53,7 +57,7 @@ def send_event(payload: dict):
|
|
|
53
57
|
print("[Kalibr SDK] ❌ requests library not available")
|
|
54
58
|
return
|
|
55
59
|
|
|
56
|
-
url = os.getenv("KALIBR_COLLECTOR_URL", "https://
|
|
60
|
+
url = os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest")
|
|
57
61
|
api_key = os.getenv("KALIBR_API_KEY")
|
|
58
62
|
if not api_key:
|
|
59
63
|
print("[Kalibr SDK] ⚠️ KALIBR_API_KEY not set, traces will not be sent")
|
|
@@ -155,21 +159,18 @@ def trace(
|
|
|
155
159
|
actual_input_tokens = input_tokens or kwargs.get("input_tokens", 1000)
|
|
156
160
|
actual_output_tokens = output_tokens or kwargs.get("output_tokens", 500)
|
|
157
161
|
|
|
158
|
-
# Cost calculation
|
|
159
|
-
#
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
# Get unit price
|
|
168
|
-
provider_pricing = pricing_map.get(provider, {})
|
|
169
|
-
unit_price_usd = provider_pricing.get(model, 0.00002000) # Default $0.02/1M
|
|
162
|
+
# Cost calculation using centralized pricing
|
|
163
|
+
# This ensures consistency with all other cost adapters
|
|
164
|
+
total_cost_usd = compute_cost(
|
|
165
|
+
vendor=provider,
|
|
166
|
+
model_name=model,
|
|
167
|
+
input_tokens=actual_input_tokens,
|
|
168
|
+
output_tokens=actual_output_tokens,
|
|
169
|
+
)
|
|
170
170
|
|
|
171
|
-
# Calculate total cost
|
|
172
|
-
|
|
171
|
+
# Calculate unit price for backward compatibility (total cost / total tokens)
|
|
172
|
+
total_tokens = actual_input_tokens + actual_output_tokens
|
|
173
|
+
unit_price_usd = total_cost_usd / total_tokens if total_tokens > 0 else 0.0
|
|
173
174
|
|
|
174
175
|
# Build payload
|
|
175
176
|
payload = {
|
kalibr/trace_capsule.py
CHANGED
|
@@ -28,6 +28,7 @@ Usage:
|
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
30
|
import json
|
|
31
|
+
import threading
|
|
31
32
|
import uuid
|
|
32
33
|
from datetime import datetime, timezone
|
|
33
34
|
from typing import Any, Dict, List, Optional
|
|
@@ -85,12 +86,16 @@ class TraceCapsule:
|
|
|
85
86
|
# Phase 3C: Context token propagation (keep as UUID for consistency)
|
|
86
87
|
self.context_token = context_token or str(uuid.uuid4())
|
|
87
88
|
self.parent_context_token = parent_context_token
|
|
89
|
+
# Thread-safety: Lock for protecting concurrent append_hop operations
|
|
90
|
+
self._lock = threading.Lock()
|
|
88
91
|
|
|
89
92
|
def append_hop(self, hop: Dict[str, Any]) -> None:
|
|
90
93
|
"""Append a new hop to the capsule.
|
|
91
94
|
|
|
92
95
|
Maintains a rolling window of last N hops to keep payload compact.
|
|
93
96
|
Updates aggregate metrics automatically.
|
|
97
|
+
|
|
98
|
+
Thread-safe: Uses internal lock to protect concurrent modifications.
|
|
94
99
|
|
|
95
100
|
Args:
|
|
96
101
|
hop: Dictionary containing hop metadata
|
|
@@ -111,22 +116,24 @@ class TraceCapsule:
|
|
|
111
116
|
"agent_name": "code-writer"
|
|
112
117
|
})
|
|
113
118
|
"""
|
|
114
|
-
#
|
|
115
|
-
|
|
119
|
+
# Thread-safe update of capsule state
|
|
120
|
+
with self._lock:
|
|
121
|
+
# Add hop_index
|
|
122
|
+
hop["hop_index"] = len(self.last_n_hops)
|
|
116
123
|
|
|
117
|
-
|
|
118
|
-
|
|
124
|
+
# Append to history
|
|
125
|
+
self.last_n_hops.append(hop)
|
|
119
126
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
127
|
+
# Maintain rolling window (keep last N hops)
|
|
128
|
+
if len(self.last_n_hops) > self.MAX_HOPS:
|
|
129
|
+
self.last_n_hops.pop(0)
|
|
123
130
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
131
|
+
# Update aggregates
|
|
132
|
+
self.aggregate_cost_usd += hop.get("cost_usd", 0.0)
|
|
133
|
+
self.aggregate_latency_ms += hop.get("duration_ms", 0.0)
|
|
127
134
|
|
|
128
|
-
|
|
129
|
-
|
|
135
|
+
# Update timestamp
|
|
136
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
130
137
|
|
|
131
138
|
def get_last_hop(self) -> Optional[Dict[str, Any]]:
|
|
132
139
|
"""Get the most recent hop.
|
kalibr/utils.py
CHANGED
|
@@ -38,8 +38,8 @@ def load_config_from_env() -> Dict[str, str]:
|
|
|
38
38
|
"workflow_id": os.getenv("KALIBR_WORKFLOW_ID", "default-workflow"),
|
|
39
39
|
"sandbox_id": os.getenv("SANDBOX_ID", "local"),
|
|
40
40
|
"runtime_env": os.getenv("RUNTIME_ENV", "local"),
|
|
41
|
-
"api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://
|
|
42
|
-
"collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://
|
|
41
|
+
"api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://kalibr-backend.fly.dev/api/v1/traces"),
|
|
42
|
+
"collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest"),
|
|
43
43
|
}
|
|
44
44
|
return config
|
|
45
45
|
|