kalibr 1.1.3a0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +41 -3
- kalibr/cli/capsule_cmd.py +3 -3
- kalibr/cli/main.py +3 -3
- kalibr/cli/run.py +2 -2
- kalibr/client.py +1 -1
- kalibr/collector.py +227 -48
- kalibr/context.py +42 -0
- kalibr/cost_adapter.py +36 -104
- kalibr/instrumentation/anthropic_instr.py +34 -40
- kalibr/instrumentation/base.py +27 -9
- kalibr/instrumentation/google_instr.py +34 -39
- kalibr/instrumentation/openai_instr.py +34 -28
- kalibr/instrumentation/registry.py +38 -13
- kalibr/intelligence.py +662 -0
- kalibr/middleware/auto_tracer.py +1 -1
- kalibr/pricing.py +245 -0
- kalibr/router.py +545 -0
- kalibr/simple_tracer.py +16 -15
- kalibr/tokens.py +20 -5
- kalibr/trace_capsule.py +19 -12
- kalibr/utils.py +2 -2
- kalibr-1.4.0.dist-info/LICENSE +190 -0
- kalibr-1.4.0.dist-info/METADATA +306 -0
- kalibr-1.4.0.dist-info/RECORD +52 -0
- {kalibr-1.1.3a0.dist-info → kalibr-1.4.0.dist-info}/WHEEL +1 -1
- kalibr_crewai/__init__.py +1 -1
- kalibr_crewai/callbacks.py +122 -14
- kalibr_crewai/instrumentor.py +196 -33
- kalibr_langchain/__init__.py +4 -2
- kalibr_langchain/callback.py +26 -0
- kalibr_langchain/chat_model.py +103 -0
- kalibr_openai_agents/__init__.py +1 -1
- kalibr-1.1.3a0.dist-info/METADATA +0 -236
- kalibr-1.1.3a0.dist-info/RECORD +0 -48
- kalibr-1.1.3a0.dist-info/licenses/LICENSE +0 -21
- {kalibr-1.1.3a0.dist-info → kalibr-1.4.0.dist-info}/entry_points.txt +0 -0
- {kalibr-1.1.3a0.dist-info → kalibr-1.4.0.dist-info}/top_level.txt +0 -0
kalibr/router.py
ADDED
|
@@ -0,0 +1,545 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Kalibr Router - Intelligent model routing with outcome learning.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import logging
|
|
7
|
+
import uuid
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
9
|
+
|
|
10
|
+
from opentelemetry import trace as otel_trace
|
|
11
|
+
from opentelemetry.trace import SpanContext, TraceFlags, NonRecordingSpan, set_span_in_context
|
|
12
|
+
from opentelemetry.context import Context
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Type for paths - either string or dict
|
|
17
|
+
PathSpec = Union[str, Dict[str, Any]]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _create_context_with_trace_id(trace_id_hex: str) -> Optional[Context]:
|
|
21
|
+
"""Create an OTel context with a specific trace_id.
|
|
22
|
+
|
|
23
|
+
This allows child spans to inherit the intelligence service's trace_id,
|
|
24
|
+
enabling JOINs between outcomes and traces tables.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
# Convert 32-char hex string to 128-bit int
|
|
28
|
+
trace_id_int = int(trace_id_hex, 16)
|
|
29
|
+
if trace_id_int == 0:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
# Create span context with our trace_id
|
|
33
|
+
span_context = SpanContext(
|
|
34
|
+
trace_id=trace_id_int,
|
|
35
|
+
span_id=0xDEADBEEF, # Placeholder, real span will have its own
|
|
36
|
+
is_remote=True, # Treat as remote parent so new span_id is generated
|
|
37
|
+
trace_flags=TraceFlags(TraceFlags.SAMPLED),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Create a non-recording parent span and set in context
|
|
41
|
+
parent_span = NonRecordingSpan(span_context)
|
|
42
|
+
return set_span_in_context(parent_span)
|
|
43
|
+
except (ValueError, TypeError) as e:
|
|
44
|
+
logger.warning(f"Could not create OTel context with trace_id: {e}")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Router:
|
|
49
|
+
"""
|
|
50
|
+
Routes LLM requests to the best model based on learned outcomes.
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
router = Router(
|
|
54
|
+
goal="summarize",
|
|
55
|
+
paths=["gpt-4o", "claude-3-sonnet"],
|
|
56
|
+
success_when=lambda out: len(out) > 100
|
|
57
|
+
)
|
|
58
|
+
response = router.completion(messages=[...])
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
# Simple auto-reporting
|
|
62
|
+
router = Router(
|
|
63
|
+
goal="extract_email",
|
|
64
|
+
paths=["gpt-4o", "claude-sonnet-4"],
|
|
65
|
+
success_when=lambda out: "@" in out
|
|
66
|
+
)
|
|
67
|
+
response = router.completion(messages=[...])
|
|
68
|
+
# report() called automatically
|
|
69
|
+
|
|
70
|
+
# Manual reporting for complex validation
|
|
71
|
+
router = Router(
|
|
72
|
+
goal="book_meeting",
|
|
73
|
+
paths=["gpt-4o", "claude-sonnet-4"]
|
|
74
|
+
)
|
|
75
|
+
response = router.completion(messages=[...])
|
|
76
|
+
# ... complex validation logic ...
|
|
77
|
+
router.report(success=meeting_booked)
|
|
78
|
+
|
|
79
|
+
Warning:
|
|
80
|
+
Router is not thread-safe. For concurrent requests, create separate
|
|
81
|
+
Router instances per thread/task. For sequential requests in a single
|
|
82
|
+
thread, Router can be reused across multiple completion() calls.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
goal: str,
|
|
88
|
+
paths: Optional[List[PathSpec]] = None,
|
|
89
|
+
success_when: Optional[Callable[[str], bool]] = None,
|
|
90
|
+
exploration_rate: Optional[float] = None,
|
|
91
|
+
auto_register: bool = True,
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Initialize router.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
goal: Name of the goal (e.g., "book_meeting", "summarize")
|
|
98
|
+
paths: List of models or path configs. Examples:
|
|
99
|
+
["gpt-4o", "claude-3-sonnet"]
|
|
100
|
+
[{"model": "gpt-4o", "tools": ["search"]}]
|
|
101
|
+
[{"model": "gpt-4o", "params": {"temperature": 0.7}}]
|
|
102
|
+
success_when: Optional function to auto-evaluate success from LLM output.
|
|
103
|
+
Takes the output string and returns True/False.
|
|
104
|
+
When provided, report() is called automatically after completion().
|
|
105
|
+
Use for simple validations (output length, contains key string).
|
|
106
|
+
For complex validation (API calls, multi-step checks), omit this
|
|
107
|
+
and call report() manually.
|
|
108
|
+
Examples:
|
|
109
|
+
success_when=lambda out: len(out) > 0 # Not empty
|
|
110
|
+
success_when=lambda out: "@" in out # Contains email
|
|
111
|
+
exploration_rate: Override exploration rate (0.0-1.0)
|
|
112
|
+
auto_register: If True, register paths on init
|
|
113
|
+
"""
|
|
114
|
+
self.goal = goal
|
|
115
|
+
|
|
116
|
+
# Validate required environment variables
|
|
117
|
+
api_key = os.environ.get('KALIBR_API_KEY')
|
|
118
|
+
tenant_id = os.environ.get('KALIBR_TENANT_ID')
|
|
119
|
+
|
|
120
|
+
if not api_key:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"KALIBR_API_KEY environment variable not set.\n"
|
|
123
|
+
"Get your API key from: https://dashboard.kalibr.systems/settings\n"
|
|
124
|
+
"Then run: export KALIBR_API_KEY=your-key-here"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if not tenant_id:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
"KALIBR_TENANT_ID environment variable not set.\n"
|
|
130
|
+
"Find your Tenant ID at: https://dashboard.kalibr.systems/settings\n"
|
|
131
|
+
"Then run: export KALIBR_TENANT_ID=your-tenant-id"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
self.success_when = success_when
|
|
135
|
+
self.exploration_rate = exploration_rate
|
|
136
|
+
self._last_trace_id: Optional[str] = None
|
|
137
|
+
self._last_model_id: Optional[str] = None
|
|
138
|
+
self._last_decision: Optional[dict] = None
|
|
139
|
+
self._outcome_reported = False
|
|
140
|
+
|
|
141
|
+
# Normalize paths to list of dicts
|
|
142
|
+
self._paths = self._normalize_paths(paths or ["gpt-4o"])
|
|
143
|
+
|
|
144
|
+
# Register paths if requested
|
|
145
|
+
if auto_register:
|
|
146
|
+
self._register_paths()
|
|
147
|
+
|
|
148
|
+
def _normalize_paths(self, paths: List[PathSpec]) -> List[Dict[str, Any]]:
|
|
149
|
+
"""Convert paths to consistent format."""
|
|
150
|
+
normalized = []
|
|
151
|
+
for p in paths:
|
|
152
|
+
if isinstance(p, str):
|
|
153
|
+
normalized.append({"model": p, "tools": None, "params": None})
|
|
154
|
+
elif isinstance(p, dict):
|
|
155
|
+
normalized.append({
|
|
156
|
+
"model": p.get("model") or p.get("model_id"),
|
|
157
|
+
"tools": p.get("tools") or p.get("tool_id"),
|
|
158
|
+
"params": p.get("params"),
|
|
159
|
+
})
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError(f"Invalid path spec: {p}")
|
|
162
|
+
return normalized
|
|
163
|
+
|
|
164
|
+
def _register_paths(self):
|
|
165
|
+
"""Register paths with intelligence service."""
|
|
166
|
+
from kalibr.intelligence import register_path
|
|
167
|
+
|
|
168
|
+
for path in self._paths:
|
|
169
|
+
try:
|
|
170
|
+
register_path(
|
|
171
|
+
goal=self.goal,
|
|
172
|
+
model_id=path["model"],
|
|
173
|
+
tool_id=path["tools"][0] if isinstance(path["tools"], list) and path["tools"] else path["tools"],
|
|
174
|
+
params=path["params"],
|
|
175
|
+
)
|
|
176
|
+
except Exception as e:
|
|
177
|
+
# Log but don't fail - path might already exist
|
|
178
|
+
logger.debug(f"Path registration note: {e}")
|
|
179
|
+
|
|
180
|
+
def completion(
|
|
181
|
+
self,
|
|
182
|
+
messages: List[Dict[str, str]],
|
|
183
|
+
force_model: Optional[str] = None,
|
|
184
|
+
**kwargs
|
|
185
|
+
) -> Any:
|
|
186
|
+
"""
|
|
187
|
+
Make a completion request with intelligent routing.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
messages: OpenAI-format messages
|
|
191
|
+
force_model: Override routing and use this model
|
|
192
|
+
**kwargs: Additional args passed to provider
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
OpenAI-compatible ChatCompletion response with added attribute:
|
|
196
|
+
- kalibr_trace_id: Trace ID for explicit outcome reporting
|
|
197
|
+
"""
|
|
198
|
+
from kalibr.intelligence import decide
|
|
199
|
+
|
|
200
|
+
# Reset state for new request
|
|
201
|
+
self._outcome_reported = False
|
|
202
|
+
|
|
203
|
+
# Step 1: Get routing decision FIRST (before creating span)
|
|
204
|
+
decision = None
|
|
205
|
+
model_id = None
|
|
206
|
+
tool_id = None
|
|
207
|
+
params = {}
|
|
208
|
+
|
|
209
|
+
if force_model:
|
|
210
|
+
model_id = force_model
|
|
211
|
+
self._last_decision = {"model_id": model_id, "forced": True}
|
|
212
|
+
else:
|
|
213
|
+
try:
|
|
214
|
+
decision = decide(goal=self.goal)
|
|
215
|
+
model_id = decision.get("model_id") or self._paths[0]["model"]
|
|
216
|
+
tool_id = decision.get("tool_id")
|
|
217
|
+
params = decision.get("params") or {}
|
|
218
|
+
self._last_decision = decision
|
|
219
|
+
except Exception as e:
|
|
220
|
+
logger.warning(f"Routing failed, using fallback: {e}")
|
|
221
|
+
model_id = self._paths[0]["model"]
|
|
222
|
+
tool_id = self._paths[0].get("tools")
|
|
223
|
+
params = self._paths[0].get("params") or {}
|
|
224
|
+
self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
|
|
225
|
+
|
|
226
|
+
# Step 2: Determine trace_id
|
|
227
|
+
decision_trace_id = self._last_decision.get("trace_id") if self._last_decision else None
|
|
228
|
+
|
|
229
|
+
if decision_trace_id:
|
|
230
|
+
trace_id = decision_trace_id
|
|
231
|
+
else:
|
|
232
|
+
trace_id = uuid.uuid4().hex # Fallback: generate OTel-compatible format
|
|
233
|
+
|
|
234
|
+
self._last_trace_id = trace_id
|
|
235
|
+
self._last_model_id = model_id
|
|
236
|
+
|
|
237
|
+
# Step 3: Create OTel context with intelligence trace_id
|
|
238
|
+
otel_context = _create_context_with_trace_id(trace_id) if trace_id else None
|
|
239
|
+
|
|
240
|
+
# Step 4: Create span with custom context (child spans inherit trace_id)
|
|
241
|
+
tracer = otel_trace.get_tracer("kalibr.router")
|
|
242
|
+
|
|
243
|
+
with tracer.start_as_current_span(
|
|
244
|
+
"kalibr.router.completion",
|
|
245
|
+
context=otel_context,
|
|
246
|
+
attributes={
|
|
247
|
+
"kalibr.goal": self.goal,
|
|
248
|
+
"kalibr.trace_id": trace_id,
|
|
249
|
+
"kalibr.model_id": model_id,
|
|
250
|
+
}
|
|
251
|
+
) as router_span:
|
|
252
|
+
# Add decision attributes
|
|
253
|
+
if force_model:
|
|
254
|
+
router_span.set_attribute("kalibr.forced", True)
|
|
255
|
+
elif decision:
|
|
256
|
+
router_span.set_attribute("kalibr.path_id", decision.get("path_id", ""))
|
|
257
|
+
router_span.set_attribute("kalibr.reason", decision.get("reason", ""))
|
|
258
|
+
router_span.set_attribute("kalibr.exploration", decision.get("exploration", False))
|
|
259
|
+
router_span.set_attribute("kalibr.confidence", decision.get("confidence", 0.0))
|
|
260
|
+
else:
|
|
261
|
+
router_span.set_attribute("kalibr.fallback", True)
|
|
262
|
+
|
|
263
|
+
# Step 5: Build ordered candidate paths for fallback
|
|
264
|
+
# First: intelligence-selected path, then remaining registered paths
|
|
265
|
+
candidate_paths = []
|
|
266
|
+
selected_path = {"model": model_id, "tools": tool_id, "params": params}
|
|
267
|
+
candidate_paths.append(selected_path)
|
|
268
|
+
|
|
269
|
+
# Add remaining paths, skipping duplicates of the selected model
|
|
270
|
+
for path in self._paths:
|
|
271
|
+
if path["model"] != model_id:
|
|
272
|
+
candidate_paths.append(path)
|
|
273
|
+
|
|
274
|
+
# Step 6: Try each candidate path with fallback
|
|
275
|
+
from kalibr.intelligence import report_outcome
|
|
276
|
+
|
|
277
|
+
last_exception = None
|
|
278
|
+
for i, candidate in enumerate(candidate_paths):
|
|
279
|
+
candidate_model = candidate["model"]
|
|
280
|
+
candidate_tools = candidate.get("tools")
|
|
281
|
+
candidate_params = candidate.get("params") or {}
|
|
282
|
+
|
|
283
|
+
is_fallback = (i > 0)
|
|
284
|
+
if is_fallback:
|
|
285
|
+
logger.warning(f"Primary path failed, trying fallback: {candidate_model}")
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
response = self._dispatch(
|
|
289
|
+
candidate_model,
|
|
290
|
+
messages,
|
|
291
|
+
candidate_tools,
|
|
292
|
+
**{**candidate_params, **kwargs}
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Success! Update state to reflect which model succeeded
|
|
296
|
+
self._last_model_id = candidate_model
|
|
297
|
+
|
|
298
|
+
# Auto-report success if success_when provided
|
|
299
|
+
if self.success_when and not self._outcome_reported:
|
|
300
|
+
try:
|
|
301
|
+
output = response.choices[0].message.content or ""
|
|
302
|
+
success = self.success_when(output)
|
|
303
|
+
self.report(success=success)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
logger.warning(f"Auto-outcome evaluation failed: {e}")
|
|
306
|
+
|
|
307
|
+
# Add trace_id to response for explicit linkage
|
|
308
|
+
response.kalibr_trace_id = trace_id
|
|
309
|
+
return response
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
last_exception = e
|
|
313
|
+
|
|
314
|
+
# Log the failure with model name and error
|
|
315
|
+
logger.warning(f"Model {candidate_model} failed: {type(e).__name__}: {e}")
|
|
316
|
+
|
|
317
|
+
# Report failure for this path to enable Thompson Sampling learning
|
|
318
|
+
try:
|
|
319
|
+
report_outcome(
|
|
320
|
+
trace_id=trace_id,
|
|
321
|
+
goal=self.goal,
|
|
322
|
+
success=False,
|
|
323
|
+
failure_reason=f"provider_error: {type(e).__name__}",
|
|
324
|
+
model_id=candidate_model,
|
|
325
|
+
)
|
|
326
|
+
except Exception:
|
|
327
|
+
pass
|
|
328
|
+
|
|
329
|
+
# Continue to next candidate
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
# All paths failed - set error attributes and raise
|
|
333
|
+
router_span.set_attribute("error", True)
|
|
334
|
+
router_span.set_attribute("error.type", type(last_exception).__name__)
|
|
335
|
+
self._outcome_reported = True # Prevent double-reporting on raise
|
|
336
|
+
raise last_exception
|
|
337
|
+
|
|
338
|
+
def report(
|
|
339
|
+
self,
|
|
340
|
+
success: bool,
|
|
341
|
+
reason: Optional[str] = None,
|
|
342
|
+
score: Optional[float] = None,
|
|
343
|
+
trace_id: Optional[str] = None,
|
|
344
|
+
):
|
|
345
|
+
"""
|
|
346
|
+
Report outcome for the last completion.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
success: Whether the task succeeded
|
|
350
|
+
reason: Optional failure reason
|
|
351
|
+
score: Optional quality score (0.0-1.0)
|
|
352
|
+
trace_id: Optional explicit trace ID (uses last completion's trace_id if not provided)
|
|
353
|
+
"""
|
|
354
|
+
if self._outcome_reported:
|
|
355
|
+
logger.warning("Outcome already reported for this completion. Each completion() requires a separate report() call.")
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
from kalibr.intelligence import report_outcome
|
|
359
|
+
|
|
360
|
+
trace_id = trace_id or self._last_trace_id
|
|
361
|
+
if not trace_id:
|
|
362
|
+
raise ValueError("Must call completion() before report(). No trace_id available.")
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
report_outcome(
|
|
366
|
+
trace_id=trace_id,
|
|
367
|
+
goal=self.goal,
|
|
368
|
+
success=success,
|
|
369
|
+
score=score,
|
|
370
|
+
failure_reason=reason,
|
|
371
|
+
model_id=self._last_model_id,
|
|
372
|
+
)
|
|
373
|
+
self._outcome_reported = True
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.warning(f"Failed to report outcome: {e}")
|
|
376
|
+
|
|
377
|
+
def add_path(
|
|
378
|
+
self,
|
|
379
|
+
model: str,
|
|
380
|
+
tools: Optional[List[str]] = None,
|
|
381
|
+
params: Optional[Dict] = None,
|
|
382
|
+
):
|
|
383
|
+
"""Add a new path dynamically."""
|
|
384
|
+
from kalibr.intelligence import register_path
|
|
385
|
+
|
|
386
|
+
path = {"model": model, "tools": tools, "params": params}
|
|
387
|
+
self._paths.append(path)
|
|
388
|
+
|
|
389
|
+
register_path(
|
|
390
|
+
goal=self.goal,
|
|
391
|
+
model_id=model,
|
|
392
|
+
tool_id=tools[0] if tools else None,
|
|
393
|
+
params=params,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
def _dispatch(
|
|
397
|
+
self,
|
|
398
|
+
model_id: str,
|
|
399
|
+
messages: List[Dict],
|
|
400
|
+
tools: Optional[Any] = None,
|
|
401
|
+
**kwargs
|
|
402
|
+
) -> Any:
|
|
403
|
+
"""Dispatch to the appropriate provider."""
|
|
404
|
+
if model_id.startswith(("gpt-", "o1-", "o3-")):
|
|
405
|
+
return self._call_openai(model_id, messages, tools, **kwargs)
|
|
406
|
+
elif model_id.startswith("claude-"):
|
|
407
|
+
return self._call_anthropic(model_id, messages, tools, **kwargs)
|
|
408
|
+
elif model_id.startswith(("gemini-", "models/gemini")):
|
|
409
|
+
return self._call_google(model_id, messages, tools, **kwargs)
|
|
410
|
+
else:
|
|
411
|
+
# Default to OpenAI-compatible
|
|
412
|
+
logger.info(f"Unknown model prefix '{model_id}', trying OpenAI")
|
|
413
|
+
return self._call_openai(model_id, messages, tools, **kwargs)
|
|
414
|
+
|
|
415
|
+
def _call_openai(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
|
|
416
|
+
"""Call OpenAI API."""
|
|
417
|
+
try:
|
|
418
|
+
from openai import OpenAI
|
|
419
|
+
except ImportError:
|
|
420
|
+
raise ImportError("Install 'openai' package: pip install openai")
|
|
421
|
+
|
|
422
|
+
client = OpenAI()
|
|
423
|
+
|
|
424
|
+
call_kwargs = {"model": model, "messages": messages, **kwargs}
|
|
425
|
+
if tools:
|
|
426
|
+
call_kwargs["tools"] = tools
|
|
427
|
+
|
|
428
|
+
return client.chat.completions.create(**call_kwargs)
|
|
429
|
+
|
|
430
|
+
def _call_anthropic(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
|
|
431
|
+
"""Call Anthropic API and convert response to OpenAI format."""
|
|
432
|
+
try:
|
|
433
|
+
from anthropic import Anthropic
|
|
434
|
+
except ImportError:
|
|
435
|
+
raise ImportError("Install 'anthropic' package: pip install anthropic")
|
|
436
|
+
|
|
437
|
+
client = Anthropic()
|
|
438
|
+
|
|
439
|
+
# Convert messages (handle system message)
|
|
440
|
+
system = None
|
|
441
|
+
anthropic_messages = []
|
|
442
|
+
for m in messages:
|
|
443
|
+
if m["role"] == "system":
|
|
444
|
+
system = m["content"]
|
|
445
|
+
else:
|
|
446
|
+
anthropic_messages.append({"role": m["role"], "content": m["content"]})
|
|
447
|
+
|
|
448
|
+
call_kwargs = {"model": model, "messages": anthropic_messages, "max_tokens": kwargs.pop("max_tokens", 4096)}
|
|
449
|
+
if system:
|
|
450
|
+
call_kwargs["system"] = system
|
|
451
|
+
if tools:
|
|
452
|
+
call_kwargs["tools"] = tools
|
|
453
|
+
call_kwargs.update(kwargs)
|
|
454
|
+
|
|
455
|
+
response = client.messages.create(**call_kwargs)
|
|
456
|
+
|
|
457
|
+
# Convert to OpenAI format
|
|
458
|
+
return self._anthropic_to_openai_response(response, model)
|
|
459
|
+
|
|
460
|
+
def _call_google(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
|
|
461
|
+
"""Call Google API and convert response to OpenAI format."""
|
|
462
|
+
try:
|
|
463
|
+
import google.generativeai as genai
|
|
464
|
+
except ImportError:
|
|
465
|
+
raise ImportError("Install 'google-generativeai' package: pip install google-generativeai")
|
|
466
|
+
|
|
467
|
+
# Configure if API key available
|
|
468
|
+
api_key = os.environ.get("GOOGLE_API_KEY")
|
|
469
|
+
if api_key:
|
|
470
|
+
genai.configure(api_key=api_key)
|
|
471
|
+
|
|
472
|
+
# Convert messages to Google format
|
|
473
|
+
model_name = model.replace("models/", "") if model.startswith("models/") else model
|
|
474
|
+
gmodel = genai.GenerativeModel(model_name)
|
|
475
|
+
|
|
476
|
+
# Simple conversion - concatenate messages
|
|
477
|
+
prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
|
|
478
|
+
|
|
479
|
+
response = gmodel.generate_content(prompt)
|
|
480
|
+
|
|
481
|
+
# Convert to OpenAI format
|
|
482
|
+
return self._google_to_openai_response(response, model)
|
|
483
|
+
|
|
484
|
+
def _anthropic_to_openai_response(self, response: Any, model: str) -> Any:
|
|
485
|
+
"""Convert Anthropic response to OpenAI format."""
|
|
486
|
+
from types import SimpleNamespace
|
|
487
|
+
|
|
488
|
+
content = ""
|
|
489
|
+
if response.content:
|
|
490
|
+
content = response.content[0].text if hasattr(response.content[0], "text") else str(response.content[0])
|
|
491
|
+
|
|
492
|
+
return SimpleNamespace(
|
|
493
|
+
id=response.id,
|
|
494
|
+
model=model,
|
|
495
|
+
choices=[
|
|
496
|
+
SimpleNamespace(
|
|
497
|
+
index=0,
|
|
498
|
+
message=SimpleNamespace(
|
|
499
|
+
role="assistant",
|
|
500
|
+
content=content,
|
|
501
|
+
),
|
|
502
|
+
finish_reason=response.stop_reason,
|
|
503
|
+
)
|
|
504
|
+
],
|
|
505
|
+
usage=SimpleNamespace(
|
|
506
|
+
prompt_tokens=response.usage.input_tokens,
|
|
507
|
+
completion_tokens=response.usage.output_tokens,
|
|
508
|
+
total_tokens=response.usage.input_tokens + response.usage.output_tokens,
|
|
509
|
+
),
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
def _google_to_openai_response(self, response: Any, model: str) -> Any:
|
|
513
|
+
"""Convert Google response to OpenAI format."""
|
|
514
|
+
from types import SimpleNamespace
|
|
515
|
+
import uuid
|
|
516
|
+
|
|
517
|
+
content = response.text if hasattr(response, "text") else str(response)
|
|
518
|
+
|
|
519
|
+
return SimpleNamespace(
|
|
520
|
+
id=f"google-{uuid.uuid4().hex[:8]}",
|
|
521
|
+
model=model,
|
|
522
|
+
choices=[
|
|
523
|
+
SimpleNamespace(
|
|
524
|
+
index=0,
|
|
525
|
+
message=SimpleNamespace(
|
|
526
|
+
role="assistant",
|
|
527
|
+
content=content,
|
|
528
|
+
),
|
|
529
|
+
finish_reason="stop",
|
|
530
|
+
)
|
|
531
|
+
],
|
|
532
|
+
usage=SimpleNamespace(
|
|
533
|
+
prompt_tokens=getattr(response, "usage_metadata", {}).get("prompt_token_count", 0),
|
|
534
|
+
completion_tokens=getattr(response, "usage_metadata", {}).get("candidates_token_count", 0),
|
|
535
|
+
total_tokens=getattr(response, "usage_metadata", {}).get("total_token_count", 0),
|
|
536
|
+
),
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
def as_langchain(self):
|
|
540
|
+
"""Return a LangChain-compatible chat model."""
|
|
541
|
+
try:
|
|
542
|
+
from kalibr_langchain.chat_model import KalibrChatModel
|
|
543
|
+
return KalibrChatModel(router=self)
|
|
544
|
+
except ImportError:
|
|
545
|
+
raise ImportError("Install 'kalibr-langchain' package for LangChain integration")
|
kalibr/simple_tracer.py
CHANGED
|
@@ -19,6 +19,8 @@ Capsule Usage (automatic when middleware is active):
|
|
|
19
19
|
def process_request(request: Request, prompt: str):
|
|
20
20
|
# Capsule automatically updated with this hop
|
|
21
21
|
return llm_call(prompt)
|
|
22
|
+
|
|
23
|
+
Note: Uses centralized pricing from kalibr.pricing module.
|
|
22
24
|
"""
|
|
23
25
|
|
|
24
26
|
import json
|
|
@@ -31,6 +33,8 @@ from datetime import datetime, timezone
|
|
|
31
33
|
from functools import wraps
|
|
32
34
|
from typing import Callable, Optional
|
|
33
35
|
|
|
36
|
+
from kalibr.pricing import compute_cost
|
|
37
|
+
|
|
34
38
|
try:
|
|
35
39
|
import requests
|
|
36
40
|
except ImportError:
|
|
@@ -53,7 +57,7 @@ def send_event(payload: dict):
|
|
|
53
57
|
print("[Kalibr SDK] ❌ requests library not available")
|
|
54
58
|
return
|
|
55
59
|
|
|
56
|
-
url = os.getenv("KALIBR_COLLECTOR_URL", "https://
|
|
60
|
+
url = os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest")
|
|
57
61
|
api_key = os.getenv("KALIBR_API_KEY")
|
|
58
62
|
if not api_key:
|
|
59
63
|
print("[Kalibr SDK] ⚠️ KALIBR_API_KEY not set, traces will not be sent")
|
|
@@ -155,21 +159,18 @@ def trace(
|
|
|
155
159
|
actual_input_tokens = input_tokens or kwargs.get("input_tokens", 1000)
|
|
156
160
|
actual_output_tokens = output_tokens or kwargs.get("output_tokens", 500)
|
|
157
161
|
|
|
158
|
-
# Cost calculation
|
|
159
|
-
#
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
# Get unit price
|
|
168
|
-
provider_pricing = pricing_map.get(provider, {})
|
|
169
|
-
unit_price_usd = provider_pricing.get(model, 0.00002000) # Default $0.02/1M
|
|
162
|
+
# Cost calculation using centralized pricing
|
|
163
|
+
# This ensures consistency with all other cost adapters
|
|
164
|
+
total_cost_usd = compute_cost(
|
|
165
|
+
vendor=provider,
|
|
166
|
+
model_name=model,
|
|
167
|
+
input_tokens=actual_input_tokens,
|
|
168
|
+
output_tokens=actual_output_tokens,
|
|
169
|
+
)
|
|
170
170
|
|
|
171
|
-
# Calculate total cost
|
|
172
|
-
|
|
171
|
+
# Calculate unit price for backward compatibility (total cost / total tokens)
|
|
172
|
+
total_tokens = actual_input_tokens + actual_output_tokens
|
|
173
|
+
unit_price_usd = total_cost_usd / total_tokens if total_tokens > 0 else 0.0
|
|
173
174
|
|
|
174
175
|
# Build payload
|
|
175
176
|
payload = {
|
kalibr/tokens.py
CHANGED
|
@@ -2,13 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
# Import tiktoken optionally for token counting
|
|
6
|
+
try:
|
|
7
|
+
import tiktoken
|
|
8
|
+
HAS_TIKTOKEN = True
|
|
9
|
+
except ImportError:
|
|
10
|
+
HAS_TIKTOKEN = False
|
|
6
11
|
|
|
7
12
|
# Cache for tokenizer instances
|
|
8
13
|
_tokenizer_cache = {}
|
|
9
14
|
|
|
10
15
|
|
|
11
|
-
def count_tokens(text: str, model_id: str) -> int:
|
|
16
|
+
def count_tokens(text: str, model_id: str = "gpt-4o") -> int:
|
|
12
17
|
"""Count tokens for given text and model.
|
|
13
18
|
|
|
14
19
|
Args:
|
|
@@ -16,11 +21,15 @@ def count_tokens(text: str, model_id: str) -> int:
|
|
|
16
21
|
model_id: Model identifier
|
|
17
22
|
|
|
18
23
|
Returns:
|
|
19
|
-
Token count (approximate)
|
|
24
|
+
Token count (approximate if tiktoken is not installed)
|
|
20
25
|
"""
|
|
21
26
|
if not text:
|
|
22
27
|
return 0
|
|
23
28
|
|
|
29
|
+
if not HAS_TIKTOKEN:
|
|
30
|
+
# Fallback: rough estimate of 4 chars per token
|
|
31
|
+
return len(text) // 4
|
|
32
|
+
|
|
24
33
|
# Try to get exact tokenizer for OpenAI models
|
|
25
34
|
if "gpt" in model_id.lower():
|
|
26
35
|
try:
|
|
@@ -34,7 +43,13 @@ def count_tokens(text: str, model_id: str) -> int:
|
|
|
34
43
|
|
|
35
44
|
|
|
36
45
|
def get_openai_encoding(model_id: str):
|
|
37
|
-
"""Get tiktoken encoding for OpenAI model.
|
|
46
|
+
"""Get tiktoken encoding for OpenAI model.
|
|
47
|
+
|
|
48
|
+
Returns None if tiktoken is not installed.
|
|
49
|
+
"""
|
|
50
|
+
if not HAS_TIKTOKEN:
|
|
51
|
+
return None
|
|
52
|
+
|
|
38
53
|
if model_id in _tokenizer_cache:
|
|
39
54
|
return _tokenizer_cache[model_id]
|
|
40
55
|
|
|
@@ -48,5 +63,5 @@ def get_openai_encoding(model_id: str):
|
|
|
48
63
|
_tokenizer_cache[model_id] = encoding
|
|
49
64
|
return encoding
|
|
50
65
|
except Exception as e:
|
|
51
|
-
print(f"
|
|
66
|
+
print(f"Warning: Failed to load tokenizer for {model_id}: {e}")
|
|
52
67
|
raise
|