kalibr 1.2.5__py3-none-any.whl → 1.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +2 -2
- kalibr/cli/capsule_cmd.py +3 -3
- kalibr/cli/run.py +2 -2
- kalibr/client.py +1 -1
- kalibr/collector.py +227 -48
- kalibr/cost_adapter.py +36 -104
- kalibr/instrumentation/anthropic_instr.py +34 -40
- kalibr/instrumentation/base.py +27 -9
- kalibr/instrumentation/google_instr.py +34 -39
- kalibr/instrumentation/openai_instr.py +34 -28
- kalibr/instrumentation/registry.py +38 -13
- kalibr/intelligence.py +28 -16
- kalibr/middleware/auto_tracer.py +1 -1
- kalibr/pricing.py +245 -0
- kalibr/router.py +139 -53
- kalibr/simple_tracer.py +16 -15
- kalibr/trace_capsule.py +19 -12
- kalibr/utils.py +2 -2
- {kalibr-1.2.5.dist-info → kalibr-1.2.8.dist-info}/METADATA +115 -15
- {kalibr-1.2.5.dist-info → kalibr-1.2.8.dist-info}/RECORD +24 -23
- {kalibr-1.2.5.dist-info → kalibr-1.2.8.dist-info}/LICENSE +0 -0
- {kalibr-1.2.5.dist-info → kalibr-1.2.8.dist-info}/WHEEL +0 -0
- {kalibr-1.2.5.dist-info → kalibr-1.2.8.dist-info}/entry_points.txt +0 -0
- {kalibr-1.2.5.dist-info → kalibr-1.2.8.dist-info}/top_level.txt +0 -0
kalibr/pricing.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Centralized pricing data for all LLM vendors.
|
|
2
|
+
|
|
3
|
+
This module serves as the single source of truth for model pricing across
|
|
4
|
+
the entire Kalibr SDK. All cost adapters and instrumentation modules should
|
|
5
|
+
use this pricing data to ensure consistency.
|
|
6
|
+
|
|
7
|
+
All prices are in USD per 1 million tokens, matching the format used by
|
|
8
|
+
major LLM providers (OpenAI, Anthropic, etc.) on their pricing pages.
|
|
9
|
+
|
|
10
|
+
Version: 2026-01
|
|
11
|
+
Last Updated: January 2026
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Dict, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
# Pricing version for tracking updates
|
|
17
|
+
PRICING_VERSION = "2026-01"
|
|
18
|
+
|
|
19
|
+
# All prices in USD per 1M tokens
|
|
20
|
+
MODEL_PRICING: Dict[str, Dict[str, Dict[str, float]]] = {
|
|
21
|
+
"openai": {
|
|
22
|
+
# GPT-5 models (future-proofing)
|
|
23
|
+
"gpt-5": {"input": 5.00, "output": 15.00},
|
|
24
|
+
"gpt-5-turbo": {"input": 2.50, "output": 7.50},
|
|
25
|
+
# GPT-4 models
|
|
26
|
+
"gpt-4": {"input": 30.00, "output": 60.00},
|
|
27
|
+
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
|
|
28
|
+
"gpt-4o": {"input": 2.50, "output": 10.00},
|
|
29
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
|
30
|
+
# GPT-3.5 models
|
|
31
|
+
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
|
|
32
|
+
"gpt-3.5-turbo-16k": {"input": 1.00, "output": 2.00},
|
|
33
|
+
},
|
|
34
|
+
"anthropic": {
|
|
35
|
+
# Claude 4 models (future-proofing)
|
|
36
|
+
"claude-4-opus": {"input": 15.00, "output": 75.00},
|
|
37
|
+
"claude-4-sonnet": {"input": 3.00, "output": 15.00},
|
|
38
|
+
# Claude 3.5/3.7 models (Sonnet 4 is actually Claude 3.7)
|
|
39
|
+
"claude-sonnet-4": {"input": 3.00, "output": 15.00},
|
|
40
|
+
"claude-3-7-sonnet": {"input": 3.00, "output": 15.00},
|
|
41
|
+
"claude-3-5-sonnet": {"input": 3.00, "output": 15.00},
|
|
42
|
+
# Claude 3 models
|
|
43
|
+
"claude-3-opus": {"input": 15.00, "output": 75.00},
|
|
44
|
+
"claude-3-sonnet": {"input": 3.00, "output": 15.00},
|
|
45
|
+
"claude-3-haiku": {"input": 0.25, "output": 1.25},
|
|
46
|
+
# Claude 2 models
|
|
47
|
+
"claude-2.1": {"input": 8.00, "output": 24.00},
|
|
48
|
+
"claude-2.0": {"input": 8.00, "output": 24.00},
|
|
49
|
+
"claude-instant-1.2": {"input": 0.80, "output": 2.40},
|
|
50
|
+
},
|
|
51
|
+
"google": {
|
|
52
|
+
# Gemini 2.5 models
|
|
53
|
+
"gemini-2.5-pro": {"input": 1.25, "output": 5.00},
|
|
54
|
+
"gemini-2.5-flash": {"input": 0.075, "output": 0.30},
|
|
55
|
+
# Gemini 2.0 models
|
|
56
|
+
"gemini-2.0-flash": {"input": 0.075, "output": 0.30},
|
|
57
|
+
"gemini-2.0-flash-thinking": {"input": 0.075, "output": 0.30},
|
|
58
|
+
# Gemini 1.5 models
|
|
59
|
+
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
|
60
|
+
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
|
|
61
|
+
"gemini-1.5-flash-8b": {"input": 0.0375, "output": 0.15},
|
|
62
|
+
# Gemini 1.0 models
|
|
63
|
+
"gemini-1.0-pro": {"input": 0.50, "output": 1.50},
|
|
64
|
+
"gemini-pro": {"input": 0.50, "output": 1.50}, # Alias
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Default fallback pricing per vendor (highest tier pricing for safety)
|
|
69
|
+
DEFAULT_PRICING: Dict[str, Dict[str, float]] = {
|
|
70
|
+
"openai": {"input": 30.00, "output": 60.00}, # GPT-4 pricing
|
|
71
|
+
"anthropic": {"input": 15.00, "output": 75.00}, # Claude 3 Opus pricing
|
|
72
|
+
"google": {"input": 1.25, "output": 5.00}, # Gemini 1.5 Pro pricing
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def normalize_model_name(vendor: str, model_name: str) -> str:
|
|
77
|
+
"""Normalize model name to match pricing table keys.
|
|
78
|
+
|
|
79
|
+
Handles version suffixes, date stamps, and common variations.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
vendor: Vendor name (openai, anthropic, google)
|
|
83
|
+
model_name: Raw model name from API
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Normalized model name that matches pricing table, or original if no match
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> normalize_model_name("openai", "gpt-4o-2024-05-13")
|
|
90
|
+
'gpt-4o'
|
|
91
|
+
>>> normalize_model_name("anthropic", "claude-3-5-sonnet-20240620")
|
|
92
|
+
'claude-3-5-sonnet'
|
|
93
|
+
"""
|
|
94
|
+
vendor = vendor.lower()
|
|
95
|
+
model_lower = model_name.lower()
|
|
96
|
+
|
|
97
|
+
# Get vendor pricing table
|
|
98
|
+
vendor_models = MODEL_PRICING.get(vendor, {})
|
|
99
|
+
|
|
100
|
+
# Direct match
|
|
101
|
+
if model_lower in vendor_models:
|
|
102
|
+
return model_lower
|
|
103
|
+
|
|
104
|
+
# OpenAI fuzzy matching
|
|
105
|
+
if vendor == "openai":
|
|
106
|
+
# Remove date suffixes like -20240513
|
|
107
|
+
base_model = model_lower.split("-2")[0] if "-2" in model_lower else model_lower
|
|
108
|
+
|
|
109
|
+
# Try direct match on base
|
|
110
|
+
if base_model in vendor_models:
|
|
111
|
+
return base_model
|
|
112
|
+
|
|
113
|
+
# Fuzzy match in priority order
|
|
114
|
+
if "gpt-4o-mini" in model_lower:
|
|
115
|
+
return "gpt-4o-mini"
|
|
116
|
+
elif "gpt-4o" in model_lower:
|
|
117
|
+
return "gpt-4o"
|
|
118
|
+
elif "gpt-5-turbo" in model_lower:
|
|
119
|
+
return "gpt-5-turbo"
|
|
120
|
+
elif "gpt-5" in model_lower:
|
|
121
|
+
return "gpt-5"
|
|
122
|
+
elif "gpt-4-turbo" in model_lower:
|
|
123
|
+
return "gpt-4-turbo"
|
|
124
|
+
elif "gpt-4" in model_lower:
|
|
125
|
+
return "gpt-4"
|
|
126
|
+
elif "gpt-3.5-turbo-16k" in model_lower:
|
|
127
|
+
return "gpt-3.5-turbo-16k"
|
|
128
|
+
elif "gpt-3.5" in model_lower:
|
|
129
|
+
return "gpt-3.5-turbo"
|
|
130
|
+
|
|
131
|
+
# Anthropic fuzzy matching
|
|
132
|
+
elif vendor == "anthropic":
|
|
133
|
+
# Try fuzzy matching for versioned models
|
|
134
|
+
if "claude-3.5-sonnet" in model_lower or "claude-3-5-sonnet" in model_lower:
|
|
135
|
+
return "claude-3-5-sonnet"
|
|
136
|
+
elif "claude-sonnet-4" in model_lower or "sonnet-4" in model_lower:
|
|
137
|
+
return "claude-sonnet-4"
|
|
138
|
+
elif "claude-3-7-sonnet" in model_lower:
|
|
139
|
+
return "claude-3-7-sonnet"
|
|
140
|
+
elif "claude-4-opus" in model_lower:
|
|
141
|
+
return "claude-4-opus"
|
|
142
|
+
elif "claude-4-sonnet" in model_lower:
|
|
143
|
+
return "claude-4-sonnet"
|
|
144
|
+
elif "claude-3-opus" in model_lower:
|
|
145
|
+
return "claude-3-opus"
|
|
146
|
+
elif "claude-3-sonnet" in model_lower:
|
|
147
|
+
return "claude-3-sonnet"
|
|
148
|
+
elif "claude-3-haiku" in model_lower:
|
|
149
|
+
return "claude-3-haiku"
|
|
150
|
+
elif "claude-2.1" in model_lower:
|
|
151
|
+
return "claude-2.1"
|
|
152
|
+
elif "claude-2.0" in model_lower or "claude-2" in model_lower:
|
|
153
|
+
return "claude-2.0"
|
|
154
|
+
elif "claude-instant" in model_lower:
|
|
155
|
+
return "claude-instant-1.2"
|
|
156
|
+
|
|
157
|
+
# Google fuzzy matching
|
|
158
|
+
elif vendor == "google":
|
|
159
|
+
# Try fuzzy matching for versioned models
|
|
160
|
+
if "gemini-2.5-pro" in model_lower:
|
|
161
|
+
return "gemini-2.5-pro"
|
|
162
|
+
elif "gemini-2.5-flash" in model_lower:
|
|
163
|
+
return "gemini-2.5-flash"
|
|
164
|
+
elif "gemini-2.0-flash-thinking" in model_lower:
|
|
165
|
+
return "gemini-2.0-flash-thinking"
|
|
166
|
+
elif "gemini-2.0-flash" in model_lower:
|
|
167
|
+
return "gemini-2.0-flash"
|
|
168
|
+
elif "gemini-1.5-flash-8b" in model_lower:
|
|
169
|
+
return "gemini-1.5-flash-8b"
|
|
170
|
+
elif "gemini-1.5-flash" in model_lower:
|
|
171
|
+
return "gemini-1.5-flash"
|
|
172
|
+
elif "gemini-1.5-pro" in model_lower:
|
|
173
|
+
return "gemini-1.5-pro"
|
|
174
|
+
elif "gemini-1.0-pro" in model_lower or "gemini-pro" in model_lower:
|
|
175
|
+
return "gemini-pro"
|
|
176
|
+
|
|
177
|
+
# Return original if no match found
|
|
178
|
+
return model_lower
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_pricing(
|
|
182
|
+
vendor: str, model_name: str
|
|
183
|
+
) -> Tuple[Dict[str, float], str]:
|
|
184
|
+
"""Get pricing for a specific vendor and model.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
vendor: Vendor name (openai, anthropic, google)
|
|
188
|
+
model_name: Model identifier
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (pricing dict with 'input' and 'output' keys in USD per 1M tokens,
|
|
192
|
+
normalized model name used)
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
>>> pricing, normalized = get_pricing("openai", "gpt-4o")
|
|
196
|
+
>>> print(pricing)
|
|
197
|
+
{'input': 2.50, 'output': 10.00}
|
|
198
|
+
>>> print(normalized)
|
|
199
|
+
'gpt-4o'
|
|
200
|
+
"""
|
|
201
|
+
vendor = vendor.lower()
|
|
202
|
+
normalized_model = normalize_model_name(vendor, model_name)
|
|
203
|
+
|
|
204
|
+
# Get vendor pricing table
|
|
205
|
+
vendor_models = MODEL_PRICING.get(vendor, {})
|
|
206
|
+
|
|
207
|
+
# Try to get pricing for normalized model
|
|
208
|
+
pricing = vendor_models.get(normalized_model)
|
|
209
|
+
|
|
210
|
+
# Fall back to default vendor pricing if not found
|
|
211
|
+
if pricing is None:
|
|
212
|
+
pricing = DEFAULT_PRICING.get(vendor, {"input": 20.00, "output": 60.00})
|
|
213
|
+
|
|
214
|
+
return pricing, normalized_model
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def compute_cost(
|
|
218
|
+
vendor: str, model_name: str, input_tokens: int, output_tokens: int
|
|
219
|
+
) -> float:
|
|
220
|
+
"""Compute cost in USD for given vendor, model, and token counts.
|
|
221
|
+
|
|
222
|
+
This is a convenience function that combines pricing lookup and cost calculation.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
vendor: Vendor name (openai, anthropic, google)
|
|
226
|
+
model_name: Model identifier
|
|
227
|
+
input_tokens: Number of input tokens
|
|
228
|
+
output_tokens: Number of output tokens
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Cost in USD (rounded to 6 decimal places)
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
>>> cost = compute_cost("openai", "gpt-4o", 1000, 500)
|
|
235
|
+
>>> print(f"${cost:.6f}")
|
|
236
|
+
$0.007500
|
|
237
|
+
"""
|
|
238
|
+
pricing, _ = get_pricing(vendor, model_name)
|
|
239
|
+
|
|
240
|
+
# Calculate cost (pricing is per 1M tokens)
|
|
241
|
+
input_cost = (input_tokens / 1_000_000) * pricing["input"]
|
|
242
|
+
output_cost = (output_tokens / 1_000_000) * pricing["output"]
|
|
243
|
+
|
|
244
|
+
return round(input_cost + output_cost, 6)
|
|
245
|
+
|
kalibr/router.py
CHANGED
|
@@ -4,8 +4,11 @@ Kalibr Router - Intelligent model routing with outcome learning.
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import logging
|
|
7
|
+
import uuid
|
|
7
8
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
8
9
|
|
|
10
|
+
from opentelemetry import trace as otel_trace
|
|
11
|
+
|
|
9
12
|
logger = logging.getLogger(__name__)
|
|
10
13
|
|
|
11
14
|
# Type for paths - either string or dict
|
|
@@ -23,6 +26,30 @@ class Router:
|
|
|
23
26
|
success_when=lambda out: len(out) > 100
|
|
24
27
|
)
|
|
25
28
|
response = router.completion(messages=[...])
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
# Simple auto-reporting
|
|
32
|
+
router = Router(
|
|
33
|
+
goal="extract_email",
|
|
34
|
+
paths=["gpt-4o", "claude-sonnet-4"],
|
|
35
|
+
success_when=lambda out: "@" in out
|
|
36
|
+
)
|
|
37
|
+
response = router.completion(messages=[...])
|
|
38
|
+
# report() called automatically
|
|
39
|
+
|
|
40
|
+
# Manual reporting for complex validation
|
|
41
|
+
router = Router(
|
|
42
|
+
goal="book_meeting",
|
|
43
|
+
paths=["gpt-4o", "claude-sonnet-4"]
|
|
44
|
+
)
|
|
45
|
+
response = router.completion(messages=[...])
|
|
46
|
+
# ... complex validation logic ...
|
|
47
|
+
router.report(success=meeting_booked)
|
|
48
|
+
|
|
49
|
+
Warning:
|
|
50
|
+
Router is not thread-safe. For concurrent requests, create separate
|
|
51
|
+
Router instances per thread/task. For sequential requests in a single
|
|
52
|
+
thread, Router can be reused across multiple completion() calls.
|
|
26
53
|
"""
|
|
27
54
|
|
|
28
55
|
def __init__(
|
|
@@ -41,7 +68,16 @@ class Router:
|
|
|
41
68
|
paths: List of models or path configs. Examples:
|
|
42
69
|
["gpt-4o", "claude-3-sonnet"]
|
|
43
70
|
[{"model": "gpt-4o", "tools": ["search"]}]
|
|
44
|
-
|
|
71
|
+
[{"model": "gpt-4o", "params": {"temperature": 0.7}}]
|
|
72
|
+
success_when: Optional function to auto-evaluate success from LLM output.
|
|
73
|
+
Takes the output string and returns True/False.
|
|
74
|
+
When provided, report() is called automatically after completion().
|
|
75
|
+
Use for simple validations (output length, contains key string).
|
|
76
|
+
For complex validation (API calls, multi-step checks), omit this
|
|
77
|
+
and call report() manually.
|
|
78
|
+
Examples:
|
|
79
|
+
success_when=lambda out: len(out) > 0 # Not empty
|
|
80
|
+
success_when=lambda out: "@" in out # Contains email
|
|
45
81
|
exploration_rate: Override exploration rate (0.0-1.0)
|
|
46
82
|
auto_register: If True, register paths on init
|
|
47
83
|
"""
|
|
@@ -49,6 +85,7 @@ class Router:
|
|
|
49
85
|
self.success_when = success_when
|
|
50
86
|
self.exploration_rate = exploration_rate
|
|
51
87
|
self._last_trace_id: Optional[str] = None
|
|
88
|
+
self._last_model_id: Optional[str] = None
|
|
52
89
|
self._last_decision: Optional[dict] = None
|
|
53
90
|
self._outcome_reported = False
|
|
54
91
|
|
|
@@ -106,66 +143,115 @@ class Router:
|
|
|
106
143
|
**kwargs: Additional args passed to provider
|
|
107
144
|
|
|
108
145
|
Returns:
|
|
109
|
-
OpenAI-compatible ChatCompletion response
|
|
146
|
+
OpenAI-compatible ChatCompletion response with added attribute:
|
|
147
|
+
- kalibr_trace_id: Trace ID for explicit outcome reporting
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
openai.OpenAIError: If OpenAI API call fails
|
|
151
|
+
anthropic.AnthropicError: If Anthropic API call fails
|
|
152
|
+
google.generativeai.GenerativeAIError: If Google API call fails
|
|
153
|
+
ImportError: If required provider SDK is not installed
|
|
110
154
|
"""
|
|
111
155
|
from kalibr.intelligence import decide
|
|
112
|
-
from kalibr.context import get_trace_id
|
|
113
156
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
tool_id = self._paths[0].get("tools")
|
|
135
|
-
params = self._paths[0].get("params") or {}
|
|
136
|
-
self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
|
|
137
|
-
|
|
138
|
-
# Dispatch to provider
|
|
139
|
-
try:
|
|
140
|
-
response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
|
|
141
|
-
self._last_trace_id = get_trace_id()
|
|
142
|
-
|
|
143
|
-
# Auto-report if success_when provided
|
|
144
|
-
if self.success_when and not self._outcome_reported:
|
|
157
|
+
tracer = otel_trace.get_tracer("kalibr.router")
|
|
158
|
+
|
|
159
|
+
with tracer.start_as_current_span(
|
|
160
|
+
"kalibr.router.completion",
|
|
161
|
+
attributes={
|
|
162
|
+
"kalibr.goal": self.goal,
|
|
163
|
+
}
|
|
164
|
+
) as router_span:
|
|
165
|
+
# Reset state for new request
|
|
166
|
+
self._outcome_reported = False
|
|
167
|
+
|
|
168
|
+
# Get routing decision (or use forced model)
|
|
169
|
+
if force_model:
|
|
170
|
+
model_id = force_model
|
|
171
|
+
tool_id = None
|
|
172
|
+
params = {}
|
|
173
|
+
self._last_decision = {"model_id": model_id, "forced": True}
|
|
174
|
+
router_span.set_attribute("kalibr.model_id", model_id)
|
|
175
|
+
router_span.set_attribute("kalibr.forced", True)
|
|
176
|
+
else:
|
|
145
177
|
try:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
178
|
+
decision = decide(goal=self.goal)
|
|
179
|
+
model_id = decision.get("model_id") or self._paths[0]["model"]
|
|
180
|
+
tool_id = decision.get("tool_id")
|
|
181
|
+
params = decision.get("params") or {}
|
|
182
|
+
self._last_decision = decision
|
|
183
|
+
|
|
184
|
+
# Add decision attributes to span
|
|
185
|
+
router_span.set_attribute("kalibr.path_id", decision.get("path_id", ""))
|
|
186
|
+
router_span.set_attribute("kalibr.model_id", model_id)
|
|
187
|
+
router_span.set_attribute("kalibr.reason", decision.get("reason", ""))
|
|
188
|
+
router_span.set_attribute("kalibr.exploration", decision.get("exploration", False))
|
|
189
|
+
router_span.set_attribute("kalibr.confidence", decision.get("confidence", 0.0))
|
|
149
190
|
except Exception as e:
|
|
150
|
-
|
|
191
|
+
# Fallback to first path if routing fails
|
|
192
|
+
logger.warning(f"Routing failed, using fallback: {e}")
|
|
193
|
+
model_id = self._paths[0]["model"]
|
|
194
|
+
tool_id = self._paths[0].get("tools")
|
|
195
|
+
params = self._paths[0].get("params") or {}
|
|
196
|
+
self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
|
|
197
|
+
router_span.set_attribute("kalibr.model_id", model_id)
|
|
198
|
+
router_span.set_attribute("kalibr.fallback", True)
|
|
199
|
+
router_span.set_attribute("kalibr.fallback_reason", str(e))
|
|
200
|
+
|
|
201
|
+
# Use trace_id from decision if available (links outcome to routing decision)
|
|
202
|
+
# Fall back to OTel span trace_id for backwards compatibility
|
|
203
|
+
decision_trace_id = self._last_decision.get("trace_id") if self._last_decision else None
|
|
204
|
+
|
|
205
|
+
if decision_trace_id:
|
|
206
|
+
trace_id = decision_trace_id
|
|
207
|
+
else:
|
|
208
|
+
# Fallback: generate from OTel span or UUID
|
|
209
|
+
span_context = router_span.get_span_context()
|
|
210
|
+
trace_id = format(span_context.trace_id, "032x")
|
|
211
|
+
if trace_id == "0" * 32:
|
|
212
|
+
trace_id = uuid.uuid4().hex
|
|
213
|
+
|
|
214
|
+
logger.debug(f"Using trace_id={trace_id} (from_decision={bool(decision_trace_id)})")
|
|
215
|
+
self._last_trace_id = trace_id
|
|
216
|
+
self._last_model_id = model_id
|
|
217
|
+
router_span.set_attribute("kalibr.trace_id", trace_id)
|
|
218
|
+
|
|
219
|
+
# Dispatch to provider (will be child span via auto-instrumentation)
|
|
220
|
+
try:
|
|
221
|
+
response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
|
|
151
222
|
|
|
152
|
-
|
|
223
|
+
# Auto-report if success_when provided
|
|
224
|
+
if self.success_when and not self._outcome_reported:
|
|
225
|
+
try:
|
|
226
|
+
output = response.choices[0].message.content or ""
|
|
227
|
+
success = self.success_when(output)
|
|
228
|
+
self.report(success=success)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
logger.warning(f"Auto-outcome evaluation failed: {e}")
|
|
153
231
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
232
|
+
# Add trace_id to response for explicit linkage
|
|
233
|
+
response.kalibr_trace_id = trace_id
|
|
234
|
+
return response
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
# Record error on span
|
|
238
|
+
router_span.set_attribute("error", True)
|
|
239
|
+
router_span.set_attribute("error.type", type(e).__name__)
|
|
240
|
+
|
|
241
|
+
# Auto-report failure
|
|
242
|
+
if not self._outcome_reported:
|
|
243
|
+
try:
|
|
244
|
+
self.report(success=False, reason=f"provider_error: {type(e).__name__}")
|
|
245
|
+
except:
|
|
246
|
+
pass
|
|
247
|
+
raise
|
|
163
248
|
|
|
164
249
|
def report(
|
|
165
250
|
self,
|
|
166
251
|
success: bool,
|
|
167
252
|
reason: Optional[str] = None,
|
|
168
253
|
score: Optional[float] = None,
|
|
254
|
+
trace_id: Optional[str] = None,
|
|
169
255
|
):
|
|
170
256
|
"""
|
|
171
257
|
Report outcome for the last completion.
|
|
@@ -174,18 +260,17 @@ class Router:
|
|
|
174
260
|
success: Whether the task succeeded
|
|
175
261
|
reason: Optional failure reason
|
|
176
262
|
score: Optional quality score (0.0-1.0)
|
|
263
|
+
trace_id: Optional explicit trace ID (uses last completion's trace_id if not provided)
|
|
177
264
|
"""
|
|
178
265
|
if self._outcome_reported:
|
|
179
|
-
logger.warning("Outcome already reported for this
|
|
266
|
+
logger.warning("Outcome already reported for this completion. Each completion() requires a separate report() call.")
|
|
180
267
|
return
|
|
181
268
|
|
|
182
269
|
from kalibr.intelligence import report_outcome
|
|
183
|
-
from kalibr.context import get_trace_id
|
|
184
270
|
|
|
185
|
-
trace_id = self._last_trace_id
|
|
271
|
+
trace_id = trace_id or self._last_trace_id
|
|
186
272
|
if not trace_id:
|
|
187
|
-
|
|
188
|
-
return
|
|
273
|
+
raise ValueError("Must call completion() before report(). No trace_id available.")
|
|
189
274
|
|
|
190
275
|
try:
|
|
191
276
|
report_outcome(
|
|
@@ -194,6 +279,7 @@ class Router:
|
|
|
194
279
|
success=success,
|
|
195
280
|
score=score,
|
|
196
281
|
failure_reason=reason,
|
|
282
|
+
model_id=self._last_model_id,
|
|
197
283
|
)
|
|
198
284
|
self._outcome_reported = True
|
|
199
285
|
except Exception as e:
|
kalibr/simple_tracer.py
CHANGED
|
@@ -19,6 +19,8 @@ Capsule Usage (automatic when middleware is active):
|
|
|
19
19
|
def process_request(request: Request, prompt: str):
|
|
20
20
|
# Capsule automatically updated with this hop
|
|
21
21
|
return llm_call(prompt)
|
|
22
|
+
|
|
23
|
+
Note: Uses centralized pricing from kalibr.pricing module.
|
|
22
24
|
"""
|
|
23
25
|
|
|
24
26
|
import json
|
|
@@ -31,6 +33,8 @@ from datetime import datetime, timezone
|
|
|
31
33
|
from functools import wraps
|
|
32
34
|
from typing import Callable, Optional
|
|
33
35
|
|
|
36
|
+
from kalibr.pricing import compute_cost
|
|
37
|
+
|
|
34
38
|
try:
|
|
35
39
|
import requests
|
|
36
40
|
except ImportError:
|
|
@@ -53,7 +57,7 @@ def send_event(payload: dict):
|
|
|
53
57
|
print("[Kalibr SDK] ❌ requests library not available")
|
|
54
58
|
return
|
|
55
59
|
|
|
56
|
-
url = os.getenv("KALIBR_COLLECTOR_URL", "https://
|
|
60
|
+
url = os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest")
|
|
57
61
|
api_key = os.getenv("KALIBR_API_KEY")
|
|
58
62
|
if not api_key:
|
|
59
63
|
print("[Kalibr SDK] ⚠️ KALIBR_API_KEY not set, traces will not be sent")
|
|
@@ -155,21 +159,18 @@ def trace(
|
|
|
155
159
|
actual_input_tokens = input_tokens or kwargs.get("input_tokens", 1000)
|
|
156
160
|
actual_output_tokens = output_tokens or kwargs.get("output_tokens", 500)
|
|
157
161
|
|
|
158
|
-
# Cost calculation
|
|
159
|
-
#
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
# Get unit price
|
|
168
|
-
provider_pricing = pricing_map.get(provider, {})
|
|
169
|
-
unit_price_usd = provider_pricing.get(model, 0.00002000) # Default $0.02/1M
|
|
162
|
+
# Cost calculation using centralized pricing
|
|
163
|
+
# This ensures consistency with all other cost adapters
|
|
164
|
+
total_cost_usd = compute_cost(
|
|
165
|
+
vendor=provider,
|
|
166
|
+
model_name=model,
|
|
167
|
+
input_tokens=actual_input_tokens,
|
|
168
|
+
output_tokens=actual_output_tokens,
|
|
169
|
+
)
|
|
170
170
|
|
|
171
|
-
# Calculate total cost
|
|
172
|
-
|
|
171
|
+
# Calculate unit price for backward compatibility (total cost / total tokens)
|
|
172
|
+
total_tokens = actual_input_tokens + actual_output_tokens
|
|
173
|
+
unit_price_usd = total_cost_usd / total_tokens if total_tokens > 0 else 0.0
|
|
173
174
|
|
|
174
175
|
# Build payload
|
|
175
176
|
payload = {
|
kalibr/trace_capsule.py
CHANGED
|
@@ -28,6 +28,7 @@ Usage:
|
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
30
|
import json
|
|
31
|
+
import threading
|
|
31
32
|
import uuid
|
|
32
33
|
from datetime import datetime, timezone
|
|
33
34
|
from typing import Any, Dict, List, Optional
|
|
@@ -85,12 +86,16 @@ class TraceCapsule:
|
|
|
85
86
|
# Phase 3C: Context token propagation (keep as UUID for consistency)
|
|
86
87
|
self.context_token = context_token or str(uuid.uuid4())
|
|
87
88
|
self.parent_context_token = parent_context_token
|
|
89
|
+
# Thread-safety: Lock for protecting concurrent append_hop operations
|
|
90
|
+
self._lock = threading.Lock()
|
|
88
91
|
|
|
89
92
|
def append_hop(self, hop: Dict[str, Any]) -> None:
|
|
90
93
|
"""Append a new hop to the capsule.
|
|
91
94
|
|
|
92
95
|
Maintains a rolling window of last N hops to keep payload compact.
|
|
93
96
|
Updates aggregate metrics automatically.
|
|
97
|
+
|
|
98
|
+
Thread-safe: Uses internal lock to protect concurrent modifications.
|
|
94
99
|
|
|
95
100
|
Args:
|
|
96
101
|
hop: Dictionary containing hop metadata
|
|
@@ -111,22 +116,24 @@ class TraceCapsule:
|
|
|
111
116
|
"agent_name": "code-writer"
|
|
112
117
|
})
|
|
113
118
|
"""
|
|
114
|
-
#
|
|
115
|
-
|
|
119
|
+
# Thread-safe update of capsule state
|
|
120
|
+
with self._lock:
|
|
121
|
+
# Add hop_index
|
|
122
|
+
hop["hop_index"] = len(self.last_n_hops)
|
|
116
123
|
|
|
117
|
-
|
|
118
|
-
|
|
124
|
+
# Append to history
|
|
125
|
+
self.last_n_hops.append(hop)
|
|
119
126
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
127
|
+
# Maintain rolling window (keep last N hops)
|
|
128
|
+
if len(self.last_n_hops) > self.MAX_HOPS:
|
|
129
|
+
self.last_n_hops.pop(0)
|
|
123
130
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
131
|
+
# Update aggregates
|
|
132
|
+
self.aggregate_cost_usd += hop.get("cost_usd", 0.0)
|
|
133
|
+
self.aggregate_latency_ms += hop.get("duration_ms", 0.0)
|
|
127
134
|
|
|
128
|
-
|
|
129
|
-
|
|
135
|
+
# Update timestamp
|
|
136
|
+
self.timestamp = datetime.now(timezone.utc).isoformat()
|
|
130
137
|
|
|
131
138
|
def get_last_hop(self) -> Optional[Dict[str, Any]]:
|
|
132
139
|
"""Get the most recent hop.
|
kalibr/utils.py
CHANGED
|
@@ -38,8 +38,8 @@ def load_config_from_env() -> Dict[str, str]:
|
|
|
38
38
|
"workflow_id": os.getenv("KALIBR_WORKFLOW_ID", "default-workflow"),
|
|
39
39
|
"sandbox_id": os.getenv("SANDBOX_ID", "local"),
|
|
40
40
|
"runtime_env": os.getenv("RUNTIME_ENV", "local"),
|
|
41
|
-
"api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://
|
|
42
|
-
"collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://
|
|
41
|
+
"api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://kalibr-backend.fly.dev/api/v1/traces"),
|
|
42
|
+
"collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest"),
|
|
43
43
|
}
|
|
44
44
|
return config
|
|
45
45
|
|