kalibr 1.2.6__py3-none-any.whl → 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kalibr/pricing.py ADDED
@@ -0,0 +1,245 @@
1
+ """Centralized pricing data for all LLM vendors.
2
+
3
+ This module serves as the single source of truth for model pricing across
4
+ the entire Kalibr SDK. All cost adapters and instrumentation modules should
5
+ use this pricing data to ensure consistency.
6
+
7
+ All prices are in USD per 1 million tokens, matching the format used by
8
+ major LLM providers (OpenAI, Anthropic, etc.) on their pricing pages.
9
+
10
+ Version: 2026-01
11
+ Last Updated: January 2026
12
+ """
13
+
14
+ from typing import Dict, Optional, Tuple
15
+
16
+ # Pricing version for tracking updates
17
+ PRICING_VERSION = "2026-01"
18
+
19
+ # All prices in USD per 1M tokens
20
+ MODEL_PRICING: Dict[str, Dict[str, Dict[str, float]]] = {
21
+ "openai": {
22
+ # GPT-5 models (future-proofing)
23
+ "gpt-5": {"input": 5.00, "output": 15.00},
24
+ "gpt-5-turbo": {"input": 2.50, "output": 7.50},
25
+ # GPT-4 models
26
+ "gpt-4": {"input": 30.00, "output": 60.00},
27
+ "gpt-4-turbo": {"input": 10.00, "output": 30.00},
28
+ "gpt-4o": {"input": 2.50, "output": 10.00},
29
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
30
+ # GPT-3.5 models
31
+ "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
32
+ "gpt-3.5-turbo-16k": {"input": 1.00, "output": 2.00},
33
+ },
34
+ "anthropic": {
35
+ # Claude 4 models (future-proofing)
36
+ "claude-4-opus": {"input": 15.00, "output": 75.00},
37
+ "claude-4-sonnet": {"input": 3.00, "output": 15.00},
38
+ # Claude 3.5/3.7 models (Sonnet 4 is actually Claude 3.7)
39
+ "claude-sonnet-4": {"input": 3.00, "output": 15.00},
40
+ "claude-3-7-sonnet": {"input": 3.00, "output": 15.00},
41
+ "claude-3-5-sonnet": {"input": 3.00, "output": 15.00},
42
+ # Claude 3 models
43
+ "claude-3-opus": {"input": 15.00, "output": 75.00},
44
+ "claude-3-sonnet": {"input": 3.00, "output": 15.00},
45
+ "claude-3-haiku": {"input": 0.25, "output": 1.25},
46
+ # Claude 2 models
47
+ "claude-2.1": {"input": 8.00, "output": 24.00},
48
+ "claude-2.0": {"input": 8.00, "output": 24.00},
49
+ "claude-instant-1.2": {"input": 0.80, "output": 2.40},
50
+ },
51
+ "google": {
52
+ # Gemini 2.5 models
53
+ "gemini-2.5-pro": {"input": 1.25, "output": 5.00},
54
+ "gemini-2.5-flash": {"input": 0.075, "output": 0.30},
55
+ # Gemini 2.0 models
56
+ "gemini-2.0-flash": {"input": 0.075, "output": 0.30},
57
+ "gemini-2.0-flash-thinking": {"input": 0.075, "output": 0.30},
58
+ # Gemini 1.5 models
59
+ "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
60
+ "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
61
+ "gemini-1.5-flash-8b": {"input": 0.0375, "output": 0.15},
62
+ # Gemini 1.0 models
63
+ "gemini-1.0-pro": {"input": 0.50, "output": 1.50},
64
+ "gemini-pro": {"input": 0.50, "output": 1.50}, # Alias
65
+ },
66
+ }
67
+
68
+ # Default fallback pricing per vendor (highest tier pricing for safety)
69
+ DEFAULT_PRICING: Dict[str, Dict[str, float]] = {
70
+ "openai": {"input": 30.00, "output": 60.00}, # GPT-4 pricing
71
+ "anthropic": {"input": 15.00, "output": 75.00}, # Claude 3 Opus pricing
72
+ "google": {"input": 1.25, "output": 5.00}, # Gemini 1.5 Pro pricing
73
+ }
74
+
75
+
76
+ def normalize_model_name(vendor: str, model_name: str) -> str:
77
+ """Normalize model name to match pricing table keys.
78
+
79
+ Handles version suffixes, date stamps, and common variations.
80
+
81
+ Args:
82
+ vendor: Vendor name (openai, anthropic, google)
83
+ model_name: Raw model name from API
84
+
85
+ Returns:
86
+ Normalized model name that matches pricing table, or original if no match
87
+
88
+ Example:
89
+ >>> normalize_model_name("openai", "gpt-4o-2024-05-13")
90
+ 'gpt-4o'
91
+ >>> normalize_model_name("anthropic", "claude-3-5-sonnet-20240620")
92
+ 'claude-3-5-sonnet'
93
+ """
94
+ vendor = vendor.lower()
95
+ model_lower = model_name.lower()
96
+
97
+ # Get vendor pricing table
98
+ vendor_models = MODEL_PRICING.get(vendor, {})
99
+
100
+ # Direct match
101
+ if model_lower in vendor_models:
102
+ return model_lower
103
+
104
+ # OpenAI fuzzy matching
105
+ if vendor == "openai":
106
+ # Remove date suffixes like -20240513
107
+ base_model = model_lower.split("-2")[0] if "-2" in model_lower else model_lower
108
+
109
+ # Try direct match on base
110
+ if base_model in vendor_models:
111
+ return base_model
112
+
113
+ # Fuzzy match in priority order
114
+ if "gpt-4o-mini" in model_lower:
115
+ return "gpt-4o-mini"
116
+ elif "gpt-4o" in model_lower:
117
+ return "gpt-4o"
118
+ elif "gpt-5-turbo" in model_lower:
119
+ return "gpt-5-turbo"
120
+ elif "gpt-5" in model_lower:
121
+ return "gpt-5"
122
+ elif "gpt-4-turbo" in model_lower:
123
+ return "gpt-4-turbo"
124
+ elif "gpt-4" in model_lower:
125
+ return "gpt-4"
126
+ elif "gpt-3.5-turbo-16k" in model_lower:
127
+ return "gpt-3.5-turbo-16k"
128
+ elif "gpt-3.5" in model_lower:
129
+ return "gpt-3.5-turbo"
130
+
131
+ # Anthropic fuzzy matching
132
+ elif vendor == "anthropic":
133
+ # Try fuzzy matching for versioned models
134
+ if "claude-3.5-sonnet" in model_lower or "claude-3-5-sonnet" in model_lower:
135
+ return "claude-3-5-sonnet"
136
+ elif "claude-sonnet-4" in model_lower or "sonnet-4" in model_lower:
137
+ return "claude-sonnet-4"
138
+ elif "claude-3-7-sonnet" in model_lower:
139
+ return "claude-3-7-sonnet"
140
+ elif "claude-4-opus" in model_lower:
141
+ return "claude-4-opus"
142
+ elif "claude-4-sonnet" in model_lower:
143
+ return "claude-4-sonnet"
144
+ elif "claude-3-opus" in model_lower:
145
+ return "claude-3-opus"
146
+ elif "claude-3-sonnet" in model_lower:
147
+ return "claude-3-sonnet"
148
+ elif "claude-3-haiku" in model_lower:
149
+ return "claude-3-haiku"
150
+ elif "claude-2.1" in model_lower:
151
+ return "claude-2.1"
152
+ elif "claude-2.0" in model_lower or "claude-2" in model_lower:
153
+ return "claude-2.0"
154
+ elif "claude-instant" in model_lower:
155
+ return "claude-instant-1.2"
156
+
157
+ # Google fuzzy matching
158
+ elif vendor == "google":
159
+ # Try fuzzy matching for versioned models
160
+ if "gemini-2.5-pro" in model_lower:
161
+ return "gemini-2.5-pro"
162
+ elif "gemini-2.5-flash" in model_lower:
163
+ return "gemini-2.5-flash"
164
+ elif "gemini-2.0-flash-thinking" in model_lower:
165
+ return "gemini-2.0-flash-thinking"
166
+ elif "gemini-2.0-flash" in model_lower:
167
+ return "gemini-2.0-flash"
168
+ elif "gemini-1.5-flash-8b" in model_lower:
169
+ return "gemini-1.5-flash-8b"
170
+ elif "gemini-1.5-flash" in model_lower:
171
+ return "gemini-1.5-flash"
172
+ elif "gemini-1.5-pro" in model_lower:
173
+ return "gemini-1.5-pro"
174
+ elif "gemini-1.0-pro" in model_lower or "gemini-pro" in model_lower:
175
+ return "gemini-pro"
176
+
177
+ # Return original if no match found
178
+ return model_lower
179
+
180
+
181
+ def get_pricing(
182
+ vendor: str, model_name: str
183
+ ) -> Tuple[Dict[str, float], str]:
184
+ """Get pricing for a specific vendor and model.
185
+
186
+ Args:
187
+ vendor: Vendor name (openai, anthropic, google)
188
+ model_name: Model identifier
189
+
190
+ Returns:
191
+ Tuple of (pricing dict with 'input' and 'output' keys in USD per 1M tokens,
192
+ normalized model name used)
193
+
194
+ Example:
195
+ >>> pricing, normalized = get_pricing("openai", "gpt-4o")
196
+ >>> print(pricing)
197
+ {'input': 2.50, 'output': 10.00}
198
+ >>> print(normalized)
199
+ 'gpt-4o'
200
+ """
201
+ vendor = vendor.lower()
202
+ normalized_model = normalize_model_name(vendor, model_name)
203
+
204
+ # Get vendor pricing table
205
+ vendor_models = MODEL_PRICING.get(vendor, {})
206
+
207
+ # Try to get pricing for normalized model
208
+ pricing = vendor_models.get(normalized_model)
209
+
210
+ # Fall back to default vendor pricing if not found
211
+ if pricing is None:
212
+ pricing = DEFAULT_PRICING.get(vendor, {"input": 20.00, "output": 60.00})
213
+
214
+ return pricing, normalized_model
215
+
216
+
217
+ def compute_cost(
218
+ vendor: str, model_name: str, input_tokens: int, output_tokens: int
219
+ ) -> float:
220
+ """Compute cost in USD for given vendor, model, and token counts.
221
+
222
+ This is a convenience function that combines pricing lookup and cost calculation.
223
+
224
+ Args:
225
+ vendor: Vendor name (openai, anthropic, google)
226
+ model_name: Model identifier
227
+ input_tokens: Number of input tokens
228
+ output_tokens: Number of output tokens
229
+
230
+ Returns:
231
+ Cost in USD (rounded to 6 decimal places)
232
+
233
+ Example:
234
+ >>> cost = compute_cost("openai", "gpt-4o", 1000, 500)
235
+ >>> print(f"${cost:.6f}")
236
+ $0.007500
237
+ """
238
+ pricing, _ = get_pricing(vendor, model_name)
239
+
240
+ # Calculate cost (pricing is per 1M tokens)
241
+ input_cost = (input_tokens / 1_000_000) * pricing["input"]
242
+ output_cost = (output_tokens / 1_000_000) * pricing["output"]
243
+
244
+ return round(input_cost + output_cost, 6)
245
+
kalibr/router.py CHANGED
@@ -4,14 +4,47 @@ Kalibr Router - Intelligent model routing with outcome learning.
4
4
 
5
5
  import os
6
6
  import logging
7
+ import uuid
7
8
  from typing import Any, Callable, Dict, List, Optional, Union
8
9
 
10
+ from opentelemetry import trace as otel_trace
11
+ from opentelemetry.trace import SpanContext, TraceFlags, NonRecordingSpan, set_span_in_context
12
+ from opentelemetry.context import Context
13
+
9
14
  logger = logging.getLogger(__name__)
10
15
 
11
16
  # Type for paths - either string or dict
12
17
  PathSpec = Union[str, Dict[str, Any]]
13
18
 
14
19
 
20
+ def _create_context_with_trace_id(trace_id_hex: str) -> Optional[Context]:
21
+ """Create an OTel context with a specific trace_id.
22
+
23
+ This allows child spans to inherit the intelligence service's trace_id,
24
+ enabling JOINs between outcomes and traces tables.
25
+ """
26
+ try:
27
+ # Convert 32-char hex string to 128-bit int
28
+ trace_id_int = int(trace_id_hex, 16)
29
+ if trace_id_int == 0:
30
+ return None
31
+
32
+ # Create span context with our trace_id
33
+ span_context = SpanContext(
34
+ trace_id=trace_id_int,
35
+ span_id=0xDEADBEEF, # Placeholder, real span will have its own
36
+ is_remote=True, # Treat as remote parent so new span_id is generated
37
+ trace_flags=TraceFlags(TraceFlags.SAMPLED),
38
+ )
39
+
40
+ # Create a non-recording parent span and set in context
41
+ parent_span = NonRecordingSpan(span_context)
42
+ return set_span_in_context(parent_span)
43
+ except (ValueError, TypeError) as e:
44
+ logger.warning(f"Could not create OTel context with trace_id: {e}")
45
+ return None
46
+
47
+
15
48
  class Router:
16
49
  """
17
50
  Routes LLM requests to the best model based on learned outcomes.
@@ -23,6 +56,30 @@ class Router:
23
56
  success_when=lambda out: len(out) > 100
24
57
  )
25
58
  response = router.completion(messages=[...])
59
+
60
+ Examples:
61
+ # Simple auto-reporting
62
+ router = Router(
63
+ goal="extract_email",
64
+ paths=["gpt-4o", "claude-sonnet-4"],
65
+ success_when=lambda out: "@" in out
66
+ )
67
+ response = router.completion(messages=[...])
68
+ # report() called automatically
69
+
70
+ # Manual reporting for complex validation
71
+ router = Router(
72
+ goal="book_meeting",
73
+ paths=["gpt-4o", "claude-sonnet-4"]
74
+ )
75
+ response = router.completion(messages=[...])
76
+ # ... complex validation logic ...
77
+ router.report(success=meeting_booked)
78
+
79
+ Warning:
80
+ Router is not thread-safe. For concurrent requests, create separate
81
+ Router instances per thread/task. For sequential requests in a single
82
+ thread, Router can be reused across multiple completion() calls.
26
83
  """
27
84
 
28
85
  def __init__(
@@ -41,7 +98,16 @@ class Router:
41
98
  paths: List of models or path configs. Examples:
42
99
  ["gpt-4o", "claude-3-sonnet"]
43
100
  [{"model": "gpt-4o", "tools": ["search"]}]
44
- success_when: Optional function to auto-evaluate success from output
101
+ [{"model": "gpt-4o", "params": {"temperature": 0.7}}]
102
+ success_when: Optional function to auto-evaluate success from LLM output.
103
+ Takes the output string and returns True/False.
104
+ When provided, report() is called automatically after completion().
105
+ Use for simple validations (output length, contains key string).
106
+ For complex validation (API calls, multi-step checks), omit this
107
+ and call report() manually.
108
+ Examples:
109
+ success_when=lambda out: len(out) > 0 # Not empty
110
+ success_when=lambda out: "@" in out # Contains email
45
111
  exploration_rate: Override exploration rate (0.0-1.0)
46
112
  auto_register: If True, register paths on init
47
113
  """
@@ -49,6 +115,7 @@ class Router:
49
115
  self.success_when = success_when
50
116
  self.exploration_rate = exploration_rate
51
117
  self._last_trace_id: Optional[str] = None
118
+ self._last_model_id: Optional[str] = None
52
119
  self._last_decision: Optional[dict] = None
53
120
  self._outcome_reported = False
54
121
 
@@ -106,19 +173,22 @@ class Router:
106
173
  **kwargs: Additional args passed to provider
107
174
 
108
175
  Returns:
109
- OpenAI-compatible ChatCompletion response
176
+ OpenAI-compatible ChatCompletion response with added attribute:
177
+ - kalibr_trace_id: Trace ID for explicit outcome reporting
110
178
  """
111
179
  from kalibr.intelligence import decide
112
- from kalibr.context import get_trace_id
113
180
 
114
181
  # Reset state for new request
115
182
  self._outcome_reported = False
116
183
 
117
- # Get routing decision (or use forced model)
184
+ # Step 1: Get routing decision FIRST (before creating span)
185
+ decision = None
186
+ model_id = None
187
+ tool_id = None
188
+ params = {}
189
+
118
190
  if force_model:
119
191
  model_id = force_model
120
- tool_id = None
121
- params = {}
122
192
  self._last_decision = {"model_id": model_id, "forced": True}
123
193
  else:
124
194
  try:
@@ -128,44 +198,84 @@ class Router:
128
198
  params = decision.get("params") or {}
129
199
  self._last_decision = decision
130
200
  except Exception as e:
131
- # Fallback to first path if routing fails
132
201
  logger.warning(f"Routing failed, using fallback: {e}")
133
202
  model_id = self._paths[0]["model"]
134
203
  tool_id = self._paths[0].get("tools")
135
204
  params = self._paths[0].get("params") or {}
136
205
  self._last_decision = {"model_id": model_id, "fallback": True, "error": str(e)}
137
206
 
138
- # Dispatch to provider
139
- try:
140
- response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
141
- self._last_trace_id = get_trace_id()
207
+ # Step 2: Determine trace_id
208
+ decision_trace_id = self._last_decision.get("trace_id") if self._last_decision else None
142
209
 
143
- # Auto-report if success_when provided
144
- if self.success_when and not self._outcome_reported:
145
- try:
146
- output = response.choices[0].message.content or ""
147
- success = self.success_when(output)
148
- self.report(success=success)
149
- except Exception as e:
150
- logger.warning(f"Auto-outcome evaluation failed: {e}")
210
+ if decision_trace_id:
211
+ trace_id = decision_trace_id
212
+ else:
213
+ trace_id = uuid.uuid4().hex # Fallback: generate OTel-compatible format
214
+
215
+ self._last_trace_id = trace_id
216
+ self._last_model_id = model_id
217
+
218
+ # Step 3: Create OTel context with intelligence trace_id
219
+ otel_context = _create_context_with_trace_id(trace_id) if decision_trace_id else None
220
+
221
+ # Step 4: Create span with custom context (child spans inherit trace_id)
222
+ tracer = otel_trace.get_tracer("kalibr.router")
223
+
224
+ with tracer.start_as_current_span(
225
+ "kalibr.router.completion",
226
+ context=otel_context,
227
+ attributes={
228
+ "kalibr.goal": self.goal,
229
+ "kalibr.trace_id": trace_id,
230
+ "kalibr.model_id": model_id,
231
+ }
232
+ ) as router_span:
233
+ # Add decision attributes
234
+ if force_model:
235
+ router_span.set_attribute("kalibr.forced", True)
236
+ elif decision:
237
+ router_span.set_attribute("kalibr.path_id", decision.get("path_id", ""))
238
+ router_span.set_attribute("kalibr.reason", decision.get("reason", ""))
239
+ router_span.set_attribute("kalibr.exploration", decision.get("exploration", False))
240
+ router_span.set_attribute("kalibr.confidence", decision.get("confidence", 0.0))
241
+ else:
242
+ router_span.set_attribute("kalibr.fallback", True)
151
243
 
152
- return response
244
+ # Step 5: Dispatch to provider
245
+ try:
246
+ response = self._dispatch(model_id, messages, tool_id, **{**params, **kwargs})
153
247
 
154
- except Exception as e:
155
- # Auto-report failure
156
- self._last_trace_id = get_trace_id()
157
- if not self._outcome_reported:
158
- try:
159
- self.report(success=False, reason=f"provider_error: {type(e).__name__}")
160
- except:
161
- pass
162
- raise
248
+ # Auto-report if success_when provided
249
+ if self.success_when and not self._outcome_reported:
250
+ try:
251
+ output = response.choices[0].message.content or ""
252
+ success = self.success_when(output)
253
+ self.report(success=success)
254
+ except Exception as e:
255
+ logger.warning(f"Auto-outcome evaluation failed: {e}")
256
+
257
+ # Add trace_id to response for explicit linkage
258
+ response.kalibr_trace_id = trace_id
259
+ return response
260
+
261
+ except Exception as e:
262
+ router_span.set_attribute("error", True)
263
+ router_span.set_attribute("error.type", type(e).__name__)
264
+
265
+ # Auto-report failure
266
+ if not self._outcome_reported:
267
+ try:
268
+ self.report(success=False, reason=f"provider_error: {type(e).__name__}")
269
+ except:
270
+ pass
271
+ raise
163
272
 
164
273
  def report(
165
274
  self,
166
275
  success: bool,
167
276
  reason: Optional[str] = None,
168
277
  score: Optional[float] = None,
278
+ trace_id: Optional[str] = None,
169
279
  ):
170
280
  """
171
281
  Report outcome for the last completion.
@@ -174,18 +284,17 @@ class Router:
174
284
  success: Whether the task succeeded
175
285
  reason: Optional failure reason
176
286
  score: Optional quality score (0.0-1.0)
287
+ trace_id: Optional explicit trace ID (uses last completion's trace_id if not provided)
177
288
  """
178
289
  if self._outcome_reported:
179
- logger.warning("Outcome already reported for this request")
290
+ logger.warning("Outcome already reported for this completion. Each completion() requires a separate report() call.")
180
291
  return
181
292
 
182
293
  from kalibr.intelligence import report_outcome
183
- from kalibr.context import get_trace_id
184
294
 
185
- trace_id = self._last_trace_id or get_trace_id()
295
+ trace_id = trace_id or self._last_trace_id
186
296
  if not trace_id:
187
- logger.warning("No trace_id available for outcome reporting")
188
- return
297
+ raise ValueError("Must call completion() before report(). No trace_id available.")
189
298
 
190
299
  try:
191
300
  report_outcome(
@@ -194,6 +303,7 @@ class Router:
194
303
  success=success,
195
304
  score=score,
196
305
  failure_reason=reason,
306
+ model_id=self._last_model_id,
197
307
  )
198
308
  self._outcome_reported = True
199
309
  except Exception as e:
kalibr/simple_tracer.py CHANGED
@@ -19,6 +19,8 @@ Capsule Usage (automatic when middleware is active):
19
19
  def process_request(request: Request, prompt: str):
20
20
  # Capsule automatically updated with this hop
21
21
  return llm_call(prompt)
22
+
23
+ Note: Uses centralized pricing from kalibr.pricing module.
22
24
  """
23
25
 
24
26
  import json
@@ -31,6 +33,8 @@ from datetime import datetime, timezone
31
33
  from functools import wraps
32
34
  from typing import Callable, Optional
33
35
 
36
+ from kalibr.pricing import compute_cost
37
+
34
38
  try:
35
39
  import requests
36
40
  except ImportError:
@@ -53,7 +57,7 @@ def send_event(payload: dict):
53
57
  print("[Kalibr SDK] ❌ requests library not available")
54
58
  return
55
59
 
56
- url = os.getenv("KALIBR_COLLECTOR_URL", "https://api.kalibr.systems/api/ingest")
60
+ url = os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest")
57
61
  api_key = os.getenv("KALIBR_API_KEY")
58
62
  if not api_key:
59
63
  print("[Kalibr SDK] ⚠️ KALIBR_API_KEY not set, traces will not be sent")
@@ -155,21 +159,18 @@ def trace(
155
159
  actual_input_tokens = input_tokens or kwargs.get("input_tokens", 1000)
156
160
  actual_output_tokens = output_tokens or kwargs.get("output_tokens", 500)
157
161
 
158
- # Cost calculation (simplified pricing)
159
- # OpenAI GPT-4o: ~$2.50/1M input, ~$10/1M output
160
- # Anthropic Claude-3-Sonnet: ~$3/1M input, ~$15/1M output
161
- pricing_map = {
162
- "openai": {"gpt-4o": 0.00000250, "gpt-4": 0.00003000},
163
- "anthropic": {"claude-3-sonnet": 0.00000300, "claude-3-opus": 0.00001500},
164
- "google": {"gemini-pro": 0.00000125},
165
- }
166
-
167
- # Get unit price
168
- provider_pricing = pricing_map.get(provider, {})
169
- unit_price_usd = provider_pricing.get(model, 0.00002000) # Default $0.02/1M
162
+ # Cost calculation using centralized pricing
163
+ # This ensures consistency with all other cost adapters
164
+ total_cost_usd = compute_cost(
165
+ vendor=provider,
166
+ model_name=model,
167
+ input_tokens=actual_input_tokens,
168
+ output_tokens=actual_output_tokens,
169
+ )
170
170
 
171
- # Calculate total cost
172
- total_cost_usd = (actual_input_tokens + actual_output_tokens) * unit_price_usd
171
+ # Calculate unit price for backward compatibility (total cost / total tokens)
172
+ total_tokens = actual_input_tokens + actual_output_tokens
173
+ unit_price_usd = total_cost_usd / total_tokens if total_tokens > 0 else 0.0
173
174
 
174
175
  # Build payload
175
176
  payload = {
kalibr/trace_capsule.py CHANGED
@@ -28,6 +28,7 @@ Usage:
28
28
  """
29
29
 
30
30
  import json
31
+ import threading
31
32
  import uuid
32
33
  from datetime import datetime, timezone
33
34
  from typing import Any, Dict, List, Optional
@@ -85,12 +86,16 @@ class TraceCapsule:
85
86
  # Phase 3C: Context token propagation (keep as UUID for consistency)
86
87
  self.context_token = context_token or str(uuid.uuid4())
87
88
  self.parent_context_token = parent_context_token
89
+ # Thread-safety: Lock for protecting concurrent append_hop operations
90
+ self._lock = threading.Lock()
88
91
 
89
92
  def append_hop(self, hop: Dict[str, Any]) -> None:
90
93
  """Append a new hop to the capsule.
91
94
 
92
95
  Maintains a rolling window of last N hops to keep payload compact.
93
96
  Updates aggregate metrics automatically.
97
+
98
+ Thread-safe: Uses internal lock to protect concurrent modifications.
94
99
 
95
100
  Args:
96
101
  hop: Dictionary containing hop metadata
@@ -111,22 +116,24 @@ class TraceCapsule:
111
116
  "agent_name": "code-writer"
112
117
  })
113
118
  """
114
- # Add hop_index
115
- hop["hop_index"] = len(self.last_n_hops)
119
+ # Thread-safe update of capsule state
120
+ with self._lock:
121
+ # Add hop_index
122
+ hop["hop_index"] = len(self.last_n_hops)
116
123
 
117
- # Append to history
118
- self.last_n_hops.append(hop)
124
+ # Append to history
125
+ self.last_n_hops.append(hop)
119
126
 
120
- # Maintain rolling window (keep last N hops)
121
- if len(self.last_n_hops) > self.MAX_HOPS:
122
- self.last_n_hops.pop(0)
127
+ # Maintain rolling window (keep last N hops)
128
+ if len(self.last_n_hops) > self.MAX_HOPS:
129
+ self.last_n_hops.pop(0)
123
130
 
124
- # Update aggregates
125
- self.aggregate_cost_usd += hop.get("cost_usd", 0.0)
126
- self.aggregate_latency_ms += hop.get("duration_ms", 0.0)
131
+ # Update aggregates
132
+ self.aggregate_cost_usd += hop.get("cost_usd", 0.0)
133
+ self.aggregate_latency_ms += hop.get("duration_ms", 0.0)
127
134
 
128
- # Update timestamp
129
- self.timestamp = datetime.now(timezone.utc).isoformat()
135
+ # Update timestamp
136
+ self.timestamp = datetime.now(timezone.utc).isoformat()
130
137
 
131
138
  def get_last_hop(self) -> Optional[Dict[str, Any]]:
132
139
  """Get the most recent hop.
kalibr/utils.py CHANGED
@@ -38,8 +38,8 @@ def load_config_from_env() -> Dict[str, str]:
38
38
  "workflow_id": os.getenv("KALIBR_WORKFLOW_ID", "default-workflow"),
39
39
  "sandbox_id": os.getenv("SANDBOX_ID", "local"),
40
40
  "runtime_env": os.getenv("RUNTIME_ENV", "local"),
41
- "api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://api.kalibr.systems/api/v1/traces"),
42
- "collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://api.kalibr.systems/api/ingest"),
41
+ "api_endpoint": os.getenv("KALIBR_API_ENDPOINT", "https://kalibr-backend.fly.dev/api/v1/traces"),
42
+ "collector_url": os.getenv("KALIBR_COLLECTOR_URL", "https://kalibr-backend.fly.dev/api/ingest"),
43
43
  }
44
44
  return config
45
45