kalibr 1.1.3a0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kalibr/router.py ADDED
@@ -0,0 +1,545 @@
1
+ """
2
+ Kalibr Router - Intelligent model routing with outcome learning.
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import uuid
8
+ from typing import Any, Callable, Dict, List, Optional, Union
9
+
10
+ from opentelemetry import trace as otel_trace
11
+ from opentelemetry.trace import SpanContext, TraceFlags, NonRecordingSpan, set_span_in_context
12
+ from opentelemetry.context import Context
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Type for paths - either string or dict
17
+ PathSpec = Union[str, Dict[str, Any]]
18
+
19
+
20
+ def _create_context_with_trace_id(trace_id_hex: str) -> Optional[Context]:
21
+ """Create an OTel context with a specific trace_id.
22
+
23
+ This allows child spans to inherit the intelligence service's trace_id,
24
+ enabling JOINs between outcomes and traces tables.
25
+ """
26
+ try:
27
+ # Convert 32-char hex string to 128-bit int
28
+ trace_id_int = int(trace_id_hex, 16)
29
+ if trace_id_int == 0:
30
+ return None
31
+
32
+ # Create span context with our trace_id
33
+ span_context = SpanContext(
34
+ trace_id=trace_id_int,
35
+ span_id=0xDEADBEEF, # Placeholder, real span will have its own
36
+ is_remote=True, # Treat as remote parent so new span_id is generated
37
+ trace_flags=TraceFlags(TraceFlags.SAMPLED),
38
+ )
39
+
40
+ # Create a non-recording parent span and set in context
41
+ parent_span = NonRecordingSpan(span_context)
42
+ return set_span_in_context(parent_span)
43
+ except (ValueError, TypeError) as e:
44
+ logger.warning(f"Could not create OTel context with trace_id: {e}")
45
+ return None
46
+
47
+
48
+ class Router:
49
+ """
50
+ Routes LLM requests to the best model based on learned outcomes.
51
+
52
+ Example:
53
+ router = Router(
54
+ goal="summarize",
55
+ paths=["gpt-4o", "claude-3-sonnet"],
56
+ success_when=lambda out: len(out) > 100
57
+ )
58
+ response = router.completion(messages=[...])
59
+
60
+ Examples:
61
+ # Simple auto-reporting
62
+ router = Router(
63
+ goal="extract_email",
64
+ paths=["gpt-4o", "claude-sonnet-4"],
65
+ success_when=lambda out: "@" in out
66
+ )
67
+ response = router.completion(messages=[...])
68
+ # report() called automatically
69
+
70
+ # Manual reporting for complex validation
71
+ router = Router(
72
+ goal="book_meeting",
73
+ paths=["gpt-4o", "claude-sonnet-4"]
74
+ )
75
+ response = router.completion(messages=[...])
76
+ # ... complex validation logic ...
77
+ router.report(success=meeting_booked)
78
+
79
+ Warning:
80
+ Router is not thread-safe. For concurrent requests, create separate
81
+ Router instances per thread/task. For sequential requests in a single
82
+ thread, Router can be reused across multiple completion() calls.
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ goal: str,
88
+ paths: Optional[List[PathSpec]] = None,
89
+ success_when: Optional[Callable[[str], bool]] = None,
90
+ exploration_rate: Optional[float] = None,
91
+ auto_register: bool = True,
92
+ ):
93
+ """
94
+ Initialize router.
95
+
96
+ Args:
97
+ goal: Name of the goal (e.g., "book_meeting", "summarize")
98
+ paths: List of models or path configs. Examples:
99
+ ["gpt-4o", "claude-3-sonnet"]
100
+ [{"model": "gpt-4o", "tools": ["search"]}]
101
+ [{"model": "gpt-4o", "params": {"temperature": 0.7}}]
102
+ success_when: Optional function to auto-evaluate success from LLM output.
103
+ Takes the output string and returns True/False.
104
+ When provided, report() is called automatically after completion().
105
+ Use for simple validations (output length, contains key string).
106
+ For complex validation (API calls, multi-step checks), omit this
107
+ and call report() manually.
108
+ Examples:
109
+ success_when=lambda out: len(out) > 0 # Not empty
110
+ success_when=lambda out: "@" in out # Contains email
111
+ exploration_rate: Override exploration rate (0.0-1.0)
112
+ auto_register: If True, register paths on init
113
+ """
114
+ self.goal = goal
115
+
116
+ # Validate required environment variables
117
+ api_key = os.environ.get('KALIBR_API_KEY')
118
+ tenant_id = os.environ.get('KALIBR_TENANT_ID')
119
+
120
+ if not api_key:
121
+ raise ValueError(
122
+ "KALIBR_API_KEY environment variable not set.\n"
123
+ "Get your API key from: https://dashboard.kalibr.systems/settings\n"
124
+ "Then run: export KALIBR_API_KEY=your-key-here"
125
+ )
126
+
127
+ if not tenant_id:
128
+ raise ValueError(
129
+ "KALIBR_TENANT_ID environment variable not set.\n"
130
+ "Find your Tenant ID at: https://dashboard.kalibr.systems/settings\n"
131
+ "Then run: export KALIBR_TENANT_ID=your-tenant-id"
132
+ )
133
+
134
+ self.success_when = success_when
135
+ self.exploration_rate = exploration_rate
136
+ self._last_trace_id: Optional[str] = None
137
+ self._last_model_id: Optional[str] = None
138
+ self._last_decision: Optional[dict] = None
139
+ self._outcome_reported = False
140
+
141
+ # Normalize paths to list of dicts
142
+ self._paths = self._normalize_paths(paths or ["gpt-4o"])
143
+
144
+ # Register paths if requested
145
+ if auto_register:
146
+ self._register_paths()
147
+
148
+ def _normalize_paths(self, paths: List[PathSpec]) -> List[Dict[str, Any]]:
149
+ """Convert paths to consistent format."""
150
+ normalized = []
151
+ for p in paths:
152
+ if isinstance(p, str):
153
+ normalized.append({"model": p, "tools": None, "params": None})
154
+ elif isinstance(p, dict):
155
+ normalized.append({
156
+ "model": p.get("model") or p.get("model_id"),
157
+ "tools": p.get("tools") or p.get("tool_id"),
158
+ "params": p.get("params"),
159
+ })
160
+ else:
161
+ raise ValueError(f"Invalid path spec: {p}")
162
+ return normalized
163
+
164
+ def _register_paths(self):
165
+ """Register paths with intelligence service."""
166
+ from kalibr.intelligence import register_path
167
+
168
+ for path in self._paths:
169
+ try:
170
+ register_path(
171
+ goal=self.goal,
172
+ model_id=path["model"],
173
+ tool_id=path["tools"][0] if isinstance(path["tools"], list) and path["tools"] else path["tools"],
174
+ params=path["params"],
175
+ )
176
+ except Exception as e:
177
+ # Log but don't fail - path might already exist
178
+ logger.debug(f"Path registration note: {e}")
179
+
180
+ def completion(
181
+ self,
182
+ messages: List[Dict[str, str]],
183
+ force_model: Optional[str] = None,
184
+ **kwargs
185
+ ) -> Any:
186
+ """
187
+ Make a completion request with intelligent routing.
188
+
189
+ Args:
190
+ messages: OpenAI-format messages
191
+ force_model: Override routing and use this model
192
+ **kwargs: Additional args passed to provider
193
+
194
+ Returns:
195
+ OpenAI-compatible ChatCompletion response with added attribute:
196
+ - kalibr_trace_id: Trace ID for explicit outcome reporting
197
+ """
198
+ from kalibr.intelligence import decide
199
+
200
+ # Reset state for new request
201
+ self._outcome_reported = False
202
+
203
+ # Step 1: Get routing decision FIRST (before creating span)
204
+ decision = None
205
+ model_id = None
206
+ tool_id = None
207
+ params = {}
208
+
209
+ if force_model:
210
+ model_id = force_model
211
+ self._last_decision = {"model_id": model_id, "forced": True}
212
+ else:
213
+ try:
214
+ decision = decide(goal=self.goal)
215
+ model_id = decision.get("model_id") or self._paths[0]["model"]
216
+ tool_id = decision.get("tool_id")
217
+ params = decision.get("params") or {}
218
+ self._last_decision = decision
219
+ except Exception as e:
220
+ logger.warning(f"Routing failed, using fallback: {e}")
221
+ model_id = self._paths[0]["model"]
222
+ tool_id = self._paths[0].get("tools")
223
+ params = self._paths[0].get("params") or {}
224
+ self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
225
+
226
+ # Step 2: Determine trace_id
227
+ decision_trace_id = self._last_decision.get("trace_id") if self._last_decision else None
228
+
229
+ if decision_trace_id:
230
+ trace_id = decision_trace_id
231
+ else:
232
+ trace_id = uuid.uuid4().hex # Fallback: generate OTel-compatible format
233
+
234
+ self._last_trace_id = trace_id
235
+ self._last_model_id = model_id
236
+
237
+ # Step 3: Create OTel context with intelligence trace_id
238
+ otel_context = _create_context_with_trace_id(trace_id) if trace_id else None
239
+
240
+ # Step 4: Create span with custom context (child spans inherit trace_id)
241
+ tracer = otel_trace.get_tracer("kalibr.router")
242
+
243
+ with tracer.start_as_current_span(
244
+ "kalibr.router.completion",
245
+ context=otel_context,
246
+ attributes={
247
+ "kalibr.goal": self.goal,
248
+ "kalibr.trace_id": trace_id,
249
+ "kalibr.model_id": model_id,
250
+ }
251
+ ) as router_span:
252
+ # Add decision attributes
253
+ if force_model:
254
+ router_span.set_attribute("kalibr.forced", True)
255
+ elif decision:
256
+ router_span.set_attribute("kalibr.path_id", decision.get("path_id", ""))
257
+ router_span.set_attribute("kalibr.reason", decision.get("reason", ""))
258
+ router_span.set_attribute("kalibr.exploration", decision.get("exploration", False))
259
+ router_span.set_attribute("kalibr.confidence", decision.get("confidence", 0.0))
260
+ else:
261
+ router_span.set_attribute("kalibr.fallback", True)
262
+
263
+ # Step 5: Build ordered candidate paths for fallback
264
+ # First: intelligence-selected path, then remaining registered paths
265
+ candidate_paths = []
266
+ selected_path = {"model": model_id, "tools": tool_id, "params": params}
267
+ candidate_paths.append(selected_path)
268
+
269
+ # Add remaining paths, skipping duplicates of the selected model
270
+ for path in self._paths:
271
+ if path["model"] != model_id:
272
+ candidate_paths.append(path)
273
+
274
+ # Step 6: Try each candidate path with fallback
275
+ from kalibr.intelligence import report_outcome
276
+
277
+ last_exception = None
278
+ for i, candidate in enumerate(candidate_paths):
279
+ candidate_model = candidate["model"]
280
+ candidate_tools = candidate.get("tools")
281
+ candidate_params = candidate.get("params") or {}
282
+
283
+ is_fallback = (i > 0)
284
+ if is_fallback:
285
+ logger.warning(f"Primary path failed, trying fallback: {candidate_model}")
286
+
287
+ try:
288
+ response = self._dispatch(
289
+ candidate_model,
290
+ messages,
291
+ candidate_tools,
292
+ **{**candidate_params, **kwargs}
293
+ )
294
+
295
+ # Success! Update state to reflect which model succeeded
296
+ self._last_model_id = candidate_model
297
+
298
+ # Auto-report success if success_when provided
299
+ if self.success_when and not self._outcome_reported:
300
+ try:
301
+ output = response.choices[0].message.content or ""
302
+ success = self.success_when(output)
303
+ self.report(success=success)
304
+ except Exception as e:
305
+ logger.warning(f"Auto-outcome evaluation failed: {e}")
306
+
307
+ # Add trace_id to response for explicit linkage
308
+ response.kalibr_trace_id = trace_id
309
+ return response
310
+
311
+ except Exception as e:
312
+ last_exception = e
313
+
314
+ # Log the failure with model name and error
315
+ logger.warning(f"Model {candidate_model} failed: {type(e).__name__}: {e}")
316
+
317
+ # Report failure for this path to enable Thompson Sampling learning
318
+ try:
319
+ report_outcome(
320
+ trace_id=trace_id,
321
+ goal=self.goal,
322
+ success=False,
323
+ failure_reason=f"provider_error: {type(e).__name__}",
324
+ model_id=candidate_model,
325
+ )
326
+ except Exception:
327
+ pass
328
+
329
+ # Continue to next candidate
330
+ continue
331
+
332
+ # All paths failed - set error attributes and raise
333
+ router_span.set_attribute("error", True)
334
+ router_span.set_attribute("error.type", type(last_exception).__name__)
335
+ self._outcome_reported = True # Prevent double-reporting on raise
336
+ raise last_exception
337
+
338
+ def report(
339
+ self,
340
+ success: bool,
341
+ reason: Optional[str] = None,
342
+ score: Optional[float] = None,
343
+ trace_id: Optional[str] = None,
344
+ ):
345
+ """
346
+ Report outcome for the last completion.
347
+
348
+ Args:
349
+ success: Whether the task succeeded
350
+ reason: Optional failure reason
351
+ score: Optional quality score (0.0-1.0)
352
+ trace_id: Optional explicit trace ID (uses last completion's trace_id if not provided)
353
+ """
354
+ if self._outcome_reported:
355
+ logger.warning("Outcome already reported for this completion. Each completion() requires a separate report() call.")
356
+ return
357
+
358
+ from kalibr.intelligence import report_outcome
359
+
360
+ trace_id = trace_id or self._last_trace_id
361
+ if not trace_id:
362
+ raise ValueError("Must call completion() before report(). No trace_id available.")
363
+
364
+ try:
365
+ report_outcome(
366
+ trace_id=trace_id,
367
+ goal=self.goal,
368
+ success=success,
369
+ score=score,
370
+ failure_reason=reason,
371
+ model_id=self._last_model_id,
372
+ )
373
+ self._outcome_reported = True
374
+ except Exception as e:
375
+ logger.warning(f"Failed to report outcome: {e}")
376
+
377
+ def add_path(
378
+ self,
379
+ model: str,
380
+ tools: Optional[List[str]] = None,
381
+ params: Optional[Dict] = None,
382
+ ):
383
+ """Add a new path dynamically."""
384
+ from kalibr.intelligence import register_path
385
+
386
+ path = {"model": model, "tools": tools, "params": params}
387
+ self._paths.append(path)
388
+
389
+ register_path(
390
+ goal=self.goal,
391
+ model_id=model,
392
+ tool_id=tools[0] if tools else None,
393
+ params=params,
394
+ )
395
+
396
+ def _dispatch(
397
+ self,
398
+ model_id: str,
399
+ messages: List[Dict],
400
+ tools: Optional[Any] = None,
401
+ **kwargs
402
+ ) -> Any:
403
+ """Dispatch to the appropriate provider."""
404
+ if model_id.startswith(("gpt-", "o1-", "o3-")):
405
+ return self._call_openai(model_id, messages, tools, **kwargs)
406
+ elif model_id.startswith("claude-"):
407
+ return self._call_anthropic(model_id, messages, tools, **kwargs)
408
+ elif model_id.startswith(("gemini-", "models/gemini")):
409
+ return self._call_google(model_id, messages, tools, **kwargs)
410
+ else:
411
+ # Default to OpenAI-compatible
412
+ logger.info(f"Unknown model prefix '{model_id}', trying OpenAI")
413
+ return self._call_openai(model_id, messages, tools, **kwargs)
414
+
415
+ def _call_openai(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
416
+ """Call OpenAI API."""
417
+ try:
418
+ from openai import OpenAI
419
+ except ImportError:
420
+ raise ImportError("Install 'openai' package: pip install openai")
421
+
422
+ client = OpenAI()
423
+
424
+ call_kwargs = {"model": model, "messages": messages, **kwargs}
425
+ if tools:
426
+ call_kwargs["tools"] = tools
427
+
428
+ return client.chat.completions.create(**call_kwargs)
429
+
430
+ def _call_anthropic(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
431
+ """Call Anthropic API and convert response to OpenAI format."""
432
+ try:
433
+ from anthropic import Anthropic
434
+ except ImportError:
435
+ raise ImportError("Install 'anthropic' package: pip install anthropic")
436
+
437
+ client = Anthropic()
438
+
439
+ # Convert messages (handle system message)
440
+ system = None
441
+ anthropic_messages = []
442
+ for m in messages:
443
+ if m["role"] == "system":
444
+ system = m["content"]
445
+ else:
446
+ anthropic_messages.append({"role": m["role"], "content": m["content"]})
447
+
448
+ call_kwargs = {"model": model, "messages": anthropic_messages, "max_tokens": kwargs.pop("max_tokens", 4096)}
449
+ if system:
450
+ call_kwargs["system"] = system
451
+ if tools:
452
+ call_kwargs["tools"] = tools
453
+ call_kwargs.update(kwargs)
454
+
455
+ response = client.messages.create(**call_kwargs)
456
+
457
+ # Convert to OpenAI format
458
+ return self._anthropic_to_openai_response(response, model)
459
+
460
+ def _call_google(self, model: str, messages: List[Dict], tools: Any, **kwargs) -> Any:
461
+ """Call Google API and convert response to OpenAI format."""
462
+ try:
463
+ import google.generativeai as genai
464
+ except ImportError:
465
+ raise ImportError("Install 'google-generativeai' package: pip install google-generativeai")
466
+
467
+ # Configure if API key available
468
+ api_key = os.environ.get("GOOGLE_API_KEY")
469
+ if api_key:
470
+ genai.configure(api_key=api_key)
471
+
472
+ # Convert messages to Google format
473
+ model_name = model.replace("models/", "") if model.startswith("models/") else model
474
+ gmodel = genai.GenerativeModel(model_name)
475
+
476
+ # Simple conversion - concatenate messages
477
+ prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
478
+
479
+ response = gmodel.generate_content(prompt)
480
+
481
+ # Convert to OpenAI format
482
+ return self._google_to_openai_response(response, model)
483
+
484
+ def _anthropic_to_openai_response(self, response: Any, model: str) -> Any:
485
+ """Convert Anthropic response to OpenAI format."""
486
+ from types import SimpleNamespace
487
+
488
+ content = ""
489
+ if response.content:
490
+ content = response.content[0].text if hasattr(response.content[0], "text") else str(response.content[0])
491
+
492
+ return SimpleNamespace(
493
+ id=response.id,
494
+ model=model,
495
+ choices=[
496
+ SimpleNamespace(
497
+ index=0,
498
+ message=SimpleNamespace(
499
+ role="assistant",
500
+ content=content,
501
+ ),
502
+ finish_reason=response.stop_reason,
503
+ )
504
+ ],
505
+ usage=SimpleNamespace(
506
+ prompt_tokens=response.usage.input_tokens,
507
+ completion_tokens=response.usage.output_tokens,
508
+ total_tokens=response.usage.input_tokens + response.usage.output_tokens,
509
+ ),
510
+ )
511
+
512
+ def _google_to_openai_response(self, response: Any, model: str) -> Any:
513
+ """Convert Google response to OpenAI format."""
514
+ from types import SimpleNamespace
515
+ import uuid
516
+
517
+ content = response.text if hasattr(response, "text") else str(response)
518
+
519
+ return SimpleNamespace(
520
+ id=f"google-{uuid.uuid4().hex[:8]}",
521
+ model=model,
522
+ choices=[
523
+ SimpleNamespace(
524
+ index=0,
525
+ message=SimpleNamespace(
526
+ role="assistant",
527
+ content=content,
528
+ ),
529
+ finish_reason="stop",
530
+ )
531
+ ],
532
+ usage=SimpleNamespace(
533
+ prompt_tokens=getattr(response, "usage_metadata", {}).get("prompt_token_count", 0),
534
+ completion_tokens=getattr(response, "usage_metadata", {}).get("candidates_token_count", 0),
535
+ total_tokens=getattr(response, "usage_metadata", {}).get("total_token_count", 0),
536
+ ),
537
+ )
538
+
539
+ def as_langchain(self):
540
+ """Return a LangChain-compatible chat model."""
541
+ try:
542
+ from kalibr_langchain.chat_model import KalibrChatModel
543
+ return KalibrChatModel(router=self)
544
+ except ImportError:
545
+ raise ImportError("Install 'kalibr-langchain' package for LangChain integration")
kalibr/simple_tracer.py CHANGED
@@ -19,6 +19,8 @@ Capsule Usage (automatic when middleware is active):
19
19
  def process_request(request: Request, prompt: str):
20
20
  # Capsule automatically updated with this hop
21
21
  return llm_call(prompt)
22
+
23
+ Note: Uses centralized pricing from kalibr.pricing module.
22
24
  """
23
25
 
24
26
  import json
@@ -31,6 +33,8 @@ from datetime import datetime, timezone
31
33
  from functools import wraps
32
34
  from typing import Callable, Optional
33
35
 
36
+ from kalibr.pricing import compute_cost
37
+
34
38
  try:
35
39
  import requests
36
40
  except ImportError:
@@ -53,7 +57,7 @@ def send_event(payload: dict):
53
57
  print("[Kalibr SDK] ❌ requests library not available")
54
58
  return
55
59
 
56
- url = os.getenv("KALIBR_COLLECTOR_URL", "https://api.kalibr.systems/api/ingest")
60
+ url = os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest")
57
61
  api_key = os.getenv("KALIBR_API_KEY")
58
62
  if not api_key:
59
63
  print("[Kalibr SDK] ⚠️ KALIBR_API_KEY not set, traces will not be sent")
@@ -155,21 +159,18 @@ def trace(
155
159
  actual_input_tokens = input_tokens or kwargs.get("input_tokens", 1000)
156
160
  actual_output_tokens = output_tokens or kwargs.get("output_tokens", 500)
157
161
 
158
- # Cost calculation (simplified pricing)
159
- # OpenAI GPT-4o: ~$2.50/1M input, ~$10/1M output
160
- # Anthropic Claude-3-Sonnet: ~$3/1M input, ~$15/1M output
161
- pricing_map = {
162
- "openai": {"gpt-4o": 0.00000250, "gpt-4": 0.00003000},
163
- "anthropic": {"claude-3-sonnet": 0.00000300, "claude-3-opus": 0.00001500},
164
- "google": {"gemini-pro": 0.00000125},
165
- }
166
-
167
- # Get unit price
168
- provider_pricing = pricing_map.get(provider, {})
169
- unit_price_usd = provider_pricing.get(model, 0.00002000) # Default $0.02/1M
162
+ # Cost calculation using centralized pricing
163
+ # This ensures consistency with all other cost adapters
164
+ total_cost_usd = compute_cost(
165
+ vendor=provider,
166
+ model_name=model,
167
+ input_tokens=actual_input_tokens,
168
+ output_tokens=actual_output_tokens,
169
+ )
170
170
 
171
- # Calculate total cost
172
- total_cost_usd = (actual_input_tokens + actual_output_tokens) * unit_price_usd
171
+ # Calculate unit price for backward compatibility (total cost / total tokens)
172
+ total_tokens = actual_input_tokens + actual_output_tokens
173
+ unit_price_usd = total_cost_usd / total_tokens if total_tokens > 0 else 0.0
173
174
 
174
175
  # Build payload
175
176
  payload = {
kalibr/tokens.py CHANGED
@@ -2,13 +2,18 @@
2
2
 
3
3
  from typing import Optional
4
4
 
5
- import tiktoken
5
+ # Import tiktoken optionally for token counting
6
+ try:
7
+ import tiktoken
8
+ HAS_TIKTOKEN = True
9
+ except ImportError:
10
+ HAS_TIKTOKEN = False
6
11
 
7
12
  # Cache for tokenizer instances
8
13
  _tokenizer_cache = {}
9
14
 
10
15
 
11
- def count_tokens(text: str, model_id: str) -> int:
16
+ def count_tokens(text: str, model_id: str = "gpt-4o") -> int:
12
17
  """Count tokens for given text and model.
13
18
 
14
19
  Args:
@@ -16,11 +21,15 @@ def count_tokens(text: str, model_id: str) -> int:
16
21
  model_id: Model identifier
17
22
 
18
23
  Returns:
19
- Token count (approximate)
24
+ Token count (approximate if tiktoken is not installed)
20
25
  """
21
26
  if not text:
22
27
  return 0
23
28
 
29
+ if not HAS_TIKTOKEN:
30
+ # Fallback: rough estimate of 4 chars per token
31
+ return len(text) // 4
32
+
24
33
  # Try to get exact tokenizer for OpenAI models
25
34
  if "gpt" in model_id.lower():
26
35
  try:
@@ -34,7 +43,13 @@ def count_tokens(text: str, model_id: str) -> int:
34
43
 
35
44
 
36
45
  def get_openai_encoding(model_id: str):
37
- """Get tiktoken encoding for OpenAI model."""
46
+ """Get tiktoken encoding for OpenAI model.
47
+
48
+ Returns None if tiktoken is not installed.
49
+ """
50
+ if not HAS_TIKTOKEN:
51
+ return None
52
+
38
53
  if model_id in _tokenizer_cache:
39
54
  return _tokenizer_cache[model_id]
40
55
 
@@ -48,5 +63,5 @@ def get_openai_encoding(model_id: str):
48
63
  _tokenizer_cache[model_id] = encoding
49
64
  return encoding
50
65
  except Exception as e:
51
- print(f"⚠️ Failed to load tokenizer for {model_id}: {e}")
66
+ print(f"Warning: Failed to load tokenizer for {model_id}: {e}")
52
67
  raise