kalibr 1.0.28__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +170 -3
- kalibr/__main__.py +3 -203
- kalibr/capsule_middleware.py +108 -0
- kalibr/cli/__init__.py +5 -0
- kalibr/cli/capsule_cmd.py +174 -0
- kalibr/cli/deploy_cmd.py +114 -0
- kalibr/cli/main.py +67 -0
- kalibr/cli/run.py +200 -0
- kalibr/cli/serve.py +59 -0
- kalibr/client.py +293 -0
- kalibr/collector.py +173 -0
- kalibr/context.py +132 -0
- kalibr/cost_adapter.py +222 -0
- kalibr/decorators.py +140 -0
- kalibr/instrumentation/__init__.py +13 -0
- kalibr/instrumentation/anthropic_instr.py +282 -0
- kalibr/instrumentation/base.py +108 -0
- kalibr/instrumentation/google_instr.py +281 -0
- kalibr/instrumentation/openai_instr.py +265 -0
- kalibr/instrumentation/registry.py +153 -0
- kalibr/kalibr.py +144 -230
- kalibr/kalibr_app.py +53 -314
- kalibr/middleware/__init__.py +5 -0
- kalibr/middleware/auto_tracer.py +356 -0
- kalibr/models.py +41 -0
- kalibr/redaction.py +44 -0
- kalibr/schemas.py +116 -0
- kalibr/simple_tracer.py +255 -0
- kalibr/tokens.py +52 -0
- kalibr/trace_capsule.py +296 -0
- kalibr/trace_models.py +201 -0
- kalibr/tracer.py +354 -0
- kalibr/types.py +25 -93
- kalibr/utils.py +198 -0
- kalibr-1.1.0.dist-info/METADATA +97 -0
- kalibr-1.1.0.dist-info/RECORD +40 -0
- kalibr-1.1.0.dist-info/entry_points.txt +2 -0
- kalibr-1.1.0.dist-info/licenses/LICENSE +21 -0
- kalibr/deployment.py +0 -41
- kalibr/packager.py +0 -43
- kalibr/runtime_router.py +0 -138
- kalibr/schema_generators.py +0 -159
- kalibr/validator.py +0 -70
- kalibr-1.0.28.data/data/examples/README.md +0 -173
- kalibr-1.0.28.data/data/examples/basic_kalibr_example.py +0 -66
- kalibr-1.0.28.data/data/examples/enhanced_kalibr_example.py +0 -347
- kalibr-1.0.28.dist-info/METADATA +0 -175
- kalibr-1.0.28.dist-info/RECORD +0 -19
- kalibr-1.0.28.dist-info/entry_points.txt +0 -2
- kalibr-1.0.28.dist-info/licenses/LICENSE +0 -11
- {kalibr-1.0.28.dist-info → kalibr-1.1.0.dist-info}/WHEEL +0 -0
- {kalibr-1.0.28.dist-info → kalibr-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base instrumentation class for LLM SDKs
|
|
3
|
+
|
|
4
|
+
Provides common functionality for monkey-patching LLM SDKs and
|
|
5
|
+
emitting OpenTelemetry-compatible spans.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
from opentelemetry import trace
|
|
14
|
+
from opentelemetry.trace import SpanKind, Status, StatusCode
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseInstrumentation(ABC):
|
|
18
|
+
"""Base class for LLM SDK instrumentation"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, tracer_name: str):
|
|
21
|
+
self.tracer = trace.get_tracer(tracer_name)
|
|
22
|
+
self._is_instrumented = False
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def instrument(self) -> bool:
|
|
26
|
+
"""
|
|
27
|
+
Apply monkey-patching to instrument the SDK
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
bool: True if instrumentation succeeded, False otherwise
|
|
31
|
+
"""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def uninstrument(self) -> bool:
|
|
36
|
+
"""
|
|
37
|
+
Remove monkey-patching to restore original SDK behavior
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
bool: True if uninstrumentation succeeded, False otherwise
|
|
41
|
+
"""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def is_instrumented(self) -> bool:
|
|
46
|
+
"""Check if SDK is currently instrumented"""
|
|
47
|
+
return self._is_instrumented
|
|
48
|
+
|
|
49
|
+
def create_span(self, name: str, attributes: Dict[str, Any], kind: SpanKind = SpanKind.CLIENT):
|
|
50
|
+
"""
|
|
51
|
+
Create an OpenTelemetry span with standardized attributes
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
name: Span name (e.g., "openai.chat.completions.create")
|
|
55
|
+
attributes: Span attributes following OTel semantic conventions
|
|
56
|
+
kind: Span kind (default: CLIENT for LLM API calls)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Context manager for the span
|
|
60
|
+
"""
|
|
61
|
+
return self.tracer.start_as_current_span(name, kind=kind, attributes=attributes)
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def set_error(span: trace.Span, error: Exception) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Set error status and attributes on a span
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
span: The span to update
|
|
70
|
+
error: The exception that occurred
|
|
71
|
+
"""
|
|
72
|
+
span.set_status(Status(StatusCode.ERROR))
|
|
73
|
+
span.set_attribute("error.type", type(error).__name__)
|
|
74
|
+
span.set_attribute("error.message", str(error))
|
|
75
|
+
span.record_exception(error)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class BaseCostAdapter(ABC):
|
|
79
|
+
"""Base class for cost calculation adapters"""
|
|
80
|
+
|
|
81
|
+
PRICING: Dict[str, Dict[str, float]] = {}
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def calculate_cost(self, model: str, usage: Dict[str, int]) -> float:
|
|
85
|
+
"""
|
|
86
|
+
Calculate cost in USD for an LLM API call
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
model: Model identifier (e.g., "gpt-4")
|
|
90
|
+
usage: Token usage dictionary with prompt_tokens, completion_tokens
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Cost in USD (rounded to 6 decimal places)
|
|
94
|
+
"""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
def get_pricing(self, model: str) -> Optional[Dict[str, float]]:
|
|
98
|
+
"""
|
|
99
|
+
Get pricing for a specific model
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
model: Model identifier
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Dictionary with "input" and "output" prices per 1K tokens,
|
|
106
|
+
or None if model not found
|
|
107
|
+
"""
|
|
108
|
+
return self.PRICING.get(model)
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Google Generative AI SDK Instrumentation
|
|
3
|
+
|
|
4
|
+
Monkey-patches the Google Generative AI SDK to automatically emit OpenTelemetry spans
|
|
5
|
+
for all content generation API calls.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from functools import wraps
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
from opentelemetry.trace import SpanKind
|
|
13
|
+
|
|
14
|
+
from .base import BaseCostAdapter, BaseInstrumentation
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GoogleCostAdapter(BaseCostAdapter):
|
|
18
|
+
"""Cost calculation adapter for Google Generative AI models"""
|
|
19
|
+
|
|
20
|
+
# Pricing per 1K tokens (USD) - Updated November 2025
|
|
21
|
+
PRICING = {
|
|
22
|
+
# Gemini 2.5 models
|
|
23
|
+
"gemini-2.5-pro": {"input": 0.00125, "output": 0.005},
|
|
24
|
+
"gemini-2.5-flash": {"input": 0.000075, "output": 0.0003},
|
|
25
|
+
# Gemini 2.0 models
|
|
26
|
+
"gemini-2.0-flash": {"input": 0.000075, "output": 0.0003},
|
|
27
|
+
"gemini-2.0-flash-thinking": {"input": 0.000075, "output": 0.0003},
|
|
28
|
+
# Gemini 1.5 models
|
|
29
|
+
"gemini-1.5-pro": {"input": 0.00125, "output": 0.005},
|
|
30
|
+
"gemini-1.5-flash": {"input": 0.000075, "output": 0.0003},
|
|
31
|
+
"gemini-1.5-flash-8b": {"input": 0.0000375, "output": 0.00015},
|
|
32
|
+
# Gemini 1.0 models
|
|
33
|
+
"gemini-1.0-pro": {"input": 0.0005, "output": 0.0015},
|
|
34
|
+
"gemini-pro": {"input": 0.0005, "output": 0.0015}, # Alias
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
def calculate_cost(self, model: str, usage: Dict[str, int]) -> float:
|
|
38
|
+
"""Calculate cost in USD for a Google Generative AI API call"""
|
|
39
|
+
# Normalize model name
|
|
40
|
+
base_model = model.lower()
|
|
41
|
+
|
|
42
|
+
# Try exact match first
|
|
43
|
+
pricing = self.get_pricing(base_model)
|
|
44
|
+
|
|
45
|
+
# Try fuzzy matching for versioned models
|
|
46
|
+
if not pricing:
|
|
47
|
+
for known_model in self.PRICING.keys():
|
|
48
|
+
if known_model in base_model or base_model in known_model:
|
|
49
|
+
pricing = self.PRICING[known_model]
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
if not pricing:
|
|
53
|
+
# Default to Gemini 1.5 Pro pricing if unknown
|
|
54
|
+
pricing = {"input": 0.00125, "output": 0.005}
|
|
55
|
+
|
|
56
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
57
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
58
|
+
|
|
59
|
+
input_cost = (prompt_tokens / 1000) * pricing["input"]
|
|
60
|
+
output_cost = (completion_tokens / 1000) * pricing["output"]
|
|
61
|
+
|
|
62
|
+
return round(input_cost + output_cost, 6)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class GoogleInstrumentation(BaseInstrumentation):
|
|
66
|
+
"""Instrumentation for Google Generative AI SDK"""
|
|
67
|
+
|
|
68
|
+
def __init__(self):
|
|
69
|
+
super().__init__("kalibr.google")
|
|
70
|
+
self._original_generate_content = None
|
|
71
|
+
self._original_async_generate_content = None
|
|
72
|
+
self.cost_adapter = GoogleCostAdapter()
|
|
73
|
+
|
|
74
|
+
def instrument(self) -> bool:
|
|
75
|
+
"""Apply monkey-patching to Google Generative AI SDK"""
|
|
76
|
+
if self._is_instrumented:
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
import google.generativeai as genai
|
|
81
|
+
from google.generativeai.generative_models import GenerativeModel
|
|
82
|
+
|
|
83
|
+
# Patch sync method
|
|
84
|
+
if hasattr(GenerativeModel, "generate_content"):
|
|
85
|
+
self._original_generate_content = GenerativeModel.generate_content
|
|
86
|
+
GenerativeModel.generate_content = self._traced_generate_wrapper(
|
|
87
|
+
GenerativeModel.generate_content
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Patch async method (if available)
|
|
91
|
+
if hasattr(GenerativeModel, "generate_content_async"):
|
|
92
|
+
self._original_async_generate_content = GenerativeModel.generate_content_async
|
|
93
|
+
GenerativeModel.generate_content_async = self._traced_async_generate_wrapper(
|
|
94
|
+
GenerativeModel.generate_content_async
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
self._is_instrumented = True
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
except ImportError:
|
|
101
|
+
print("⚠️ Google Generative AI SDK not installed, skipping instrumentation")
|
|
102
|
+
return False
|
|
103
|
+
except Exception as e:
|
|
104
|
+
print(f"❌ Failed to instrument Google Generative AI SDK: {e}")
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
def uninstrument(self) -> bool:
|
|
108
|
+
"""Remove monkey-patching from Google Generative AI SDK"""
|
|
109
|
+
if not self._is_instrumented:
|
|
110
|
+
return True
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
import google.generativeai as genai
|
|
114
|
+
from google.generativeai.generative_models import GenerativeModel
|
|
115
|
+
|
|
116
|
+
# Restore sync method
|
|
117
|
+
if self._original_generate_content:
|
|
118
|
+
GenerativeModel.generate_content = self._original_generate_content
|
|
119
|
+
|
|
120
|
+
# Restore async method
|
|
121
|
+
if self._original_async_generate_content:
|
|
122
|
+
GenerativeModel.generate_content_async = self._original_async_generate_content
|
|
123
|
+
|
|
124
|
+
self._is_instrumented = False
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
except Exception as e:
|
|
128
|
+
print(f"❌ Failed to uninstrument Google Generative AI SDK: {e}")
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
def _traced_generate_wrapper(self, original_func):
|
|
132
|
+
"""Wrapper for sync generate_content method"""
|
|
133
|
+
|
|
134
|
+
@wraps(original_func)
|
|
135
|
+
def wrapper(self_instance, *args, **kwargs):
|
|
136
|
+
# Extract model name from instance
|
|
137
|
+
model = getattr(self_instance, "_model_name", "unknown")
|
|
138
|
+
|
|
139
|
+
# Create span with initial attributes
|
|
140
|
+
with self.tracer.start_as_current_span(
|
|
141
|
+
"google.generativeai.generate_content",
|
|
142
|
+
kind=SpanKind.CLIENT,
|
|
143
|
+
attributes={
|
|
144
|
+
"llm.vendor": "google",
|
|
145
|
+
"llm.request.model": model,
|
|
146
|
+
"llm.system": "google.generativeai",
|
|
147
|
+
},
|
|
148
|
+
) as span:
|
|
149
|
+
start_time = time.time()
|
|
150
|
+
|
|
151
|
+
# Phase 3: Inject Kalibr context for HTTP→SDK linking
|
|
152
|
+
try:
|
|
153
|
+
from kalibr.context import inject_kalibr_context_into_span
|
|
154
|
+
|
|
155
|
+
inject_kalibr_context_into_span(span)
|
|
156
|
+
except Exception:
|
|
157
|
+
pass # Fail silently if context not available
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
# Call original method
|
|
161
|
+
result = original_func(self_instance, *args, **kwargs)
|
|
162
|
+
|
|
163
|
+
# Extract and set response metadata
|
|
164
|
+
self._set_response_attributes(span, result, model, start_time)
|
|
165
|
+
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
self.set_error(span, e)
|
|
170
|
+
raise
|
|
171
|
+
|
|
172
|
+
return wrapper
|
|
173
|
+
|
|
174
|
+
def _traced_async_generate_wrapper(self, original_func):
|
|
175
|
+
"""Wrapper for async generate_content method"""
|
|
176
|
+
|
|
177
|
+
@wraps(original_func)
|
|
178
|
+
async def wrapper(self_instance, *args, **kwargs):
|
|
179
|
+
# Extract model name from instance
|
|
180
|
+
model = getattr(self_instance, "_model_name", "unknown")
|
|
181
|
+
|
|
182
|
+
# Create span with initial attributes
|
|
183
|
+
with self.tracer.start_as_current_span(
|
|
184
|
+
"google.generativeai.generate_content",
|
|
185
|
+
kind=SpanKind.CLIENT,
|
|
186
|
+
attributes={
|
|
187
|
+
"llm.vendor": "google",
|
|
188
|
+
"llm.request.model": model,
|
|
189
|
+
"llm.system": "google.generativeai",
|
|
190
|
+
},
|
|
191
|
+
) as span:
|
|
192
|
+
start_time = time.time()
|
|
193
|
+
|
|
194
|
+
# Phase 3: Inject Kalibr context for HTTP→SDK linking
|
|
195
|
+
try:
|
|
196
|
+
from kalibr.context import inject_kalibr_context_into_span
|
|
197
|
+
|
|
198
|
+
inject_kalibr_context_into_span(span)
|
|
199
|
+
except Exception:
|
|
200
|
+
pass # Fail silently if context not available
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
# Call original async method
|
|
204
|
+
result = await original_func(self_instance, *args, **kwargs)
|
|
205
|
+
|
|
206
|
+
# Extract and set response metadata
|
|
207
|
+
self._set_response_attributes(span, result, model, start_time)
|
|
208
|
+
|
|
209
|
+
return result
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
self.set_error(span, e)
|
|
213
|
+
raise
|
|
214
|
+
|
|
215
|
+
return wrapper
|
|
216
|
+
|
|
217
|
+
def _set_response_attributes(self, span, result, model: str, start_time: float) -> None:
|
|
218
|
+
"""Extract metadata from response and set span attributes"""
|
|
219
|
+
try:
|
|
220
|
+
# Model (from instance)
|
|
221
|
+
span.set_attribute("llm.response.model", model)
|
|
222
|
+
|
|
223
|
+
# Token usage
|
|
224
|
+
if hasattr(result, "usage_metadata") and result.usage_metadata:
|
|
225
|
+
usage = result.usage_metadata
|
|
226
|
+
|
|
227
|
+
prompt_tokens = getattr(usage, "prompt_token_count", 0)
|
|
228
|
+
completion_tokens = getattr(usage, "candidates_token_count", 0)
|
|
229
|
+
total_tokens = getattr(
|
|
230
|
+
usage, "total_token_count", prompt_tokens + completion_tokens
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
span.set_attribute("llm.usage.prompt_tokens", prompt_tokens)
|
|
234
|
+
span.set_attribute("llm.usage.completion_tokens", completion_tokens)
|
|
235
|
+
span.set_attribute("llm.usage.total_tokens", total_tokens)
|
|
236
|
+
|
|
237
|
+
# Calculate cost
|
|
238
|
+
cost = self.cost_adapter.calculate_cost(
|
|
239
|
+
model,
|
|
240
|
+
{
|
|
241
|
+
"prompt_tokens": prompt_tokens,
|
|
242
|
+
"completion_tokens": completion_tokens,
|
|
243
|
+
},
|
|
244
|
+
)
|
|
245
|
+
span.set_attribute("llm.cost_usd", cost)
|
|
246
|
+
|
|
247
|
+
# Latency
|
|
248
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
249
|
+
span.set_attribute("llm.latency_ms", round(latency_ms, 2))
|
|
250
|
+
|
|
251
|
+
# Finish reason (if available)
|
|
252
|
+
if hasattr(result, "candidates") and result.candidates:
|
|
253
|
+
candidate = result.candidates[0]
|
|
254
|
+
if hasattr(candidate, "finish_reason"):
|
|
255
|
+
span.set_attribute("llm.response.finish_reason", str(candidate.finish_reason))
|
|
256
|
+
|
|
257
|
+
except Exception as e:
|
|
258
|
+
# Don't fail the call if metadata extraction fails
|
|
259
|
+
span.set_attribute("llm.metadata_extraction_error", str(e))
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# Singleton instance
|
|
263
|
+
_google_instrumentation = None
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def get_instrumentation() -> GoogleInstrumentation:
|
|
267
|
+
"""Get or create the Google instrumentation singleton"""
|
|
268
|
+
global _google_instrumentation
|
|
269
|
+
if _google_instrumentation is None:
|
|
270
|
+
_google_instrumentation = GoogleInstrumentation()
|
|
271
|
+
return _google_instrumentation
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def instrument() -> bool:
|
|
275
|
+
"""Instrument Google Generative AI SDK"""
|
|
276
|
+
return get_instrumentation().instrument()
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def uninstrument() -> bool:
|
|
280
|
+
"""Uninstrument Google Generative AI SDK"""
|
|
281
|
+
return get_instrumentation().uninstrument()
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI SDK Instrumentation
|
|
3
|
+
|
|
4
|
+
Monkey-patches the OpenAI SDK to automatically emit OpenTelemetry spans
|
|
5
|
+
for all chat completion API calls.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from functools import wraps
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
from opentelemetry.trace import SpanKind
|
|
13
|
+
|
|
14
|
+
from .base import BaseCostAdapter, BaseInstrumentation
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OpenAICostAdapter(BaseCostAdapter):
|
|
18
|
+
"""Cost calculation adapter for OpenAI models"""
|
|
19
|
+
|
|
20
|
+
# Pricing per 1K tokens (USD) - Updated November 2025
|
|
21
|
+
PRICING = {
|
|
22
|
+
# GPT-5 models
|
|
23
|
+
"gpt-5": {"input": 0.005, "output": 0.015},
|
|
24
|
+
"gpt-5-turbo": {"input": 0.0025, "output": 0.0075},
|
|
25
|
+
# GPT-4 models
|
|
26
|
+
"gpt-4": {"input": 0.03, "output": 0.06},
|
|
27
|
+
"gpt-4-turbo": {"input": 0.01, "output": 0.03},
|
|
28
|
+
"gpt-4o": {"input": 0.0025, "output": 0.01},
|
|
29
|
+
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
|
|
30
|
+
# GPT-3.5 models
|
|
31
|
+
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
|
|
32
|
+
"gpt-3.5-turbo-16k": {"input": 0.001, "output": 0.002},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
def calculate_cost(self, model: str, usage: Dict[str, int]) -> float:
|
|
36
|
+
"""Calculate cost in USD for an OpenAI API call"""
|
|
37
|
+
# Normalize model name (remove version suffixes)
|
|
38
|
+
base_model = model.split("-2")[0] # Remove date suffixes like -20240101
|
|
39
|
+
|
|
40
|
+
pricing = self.get_pricing(base_model)
|
|
41
|
+
if not pricing:
|
|
42
|
+
# Default to GPT-4 pricing if unknown
|
|
43
|
+
pricing = {"input": 0.03, "output": 0.06}
|
|
44
|
+
|
|
45
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
46
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
47
|
+
|
|
48
|
+
input_cost = (prompt_tokens / 1000) * pricing["input"]
|
|
49
|
+
output_cost = (completion_tokens / 1000) * pricing["output"]
|
|
50
|
+
|
|
51
|
+
return round(input_cost + output_cost, 6)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class OpenAIInstrumentation(BaseInstrumentation):
|
|
55
|
+
"""Instrumentation for OpenAI SDK"""
|
|
56
|
+
|
|
57
|
+
def __init__(self):
|
|
58
|
+
super().__init__("kalibr.openai")
|
|
59
|
+
self._original_create = None
|
|
60
|
+
self._original_async_create = None
|
|
61
|
+
self.cost_adapter = OpenAICostAdapter()
|
|
62
|
+
|
|
63
|
+
def instrument(self) -> bool:
|
|
64
|
+
"""Apply monkey-patching to OpenAI SDK"""
|
|
65
|
+
if self._is_instrumented:
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
import openai
|
|
70
|
+
from openai.resources.chat import completions
|
|
71
|
+
|
|
72
|
+
# Patch sync method
|
|
73
|
+
if hasattr(completions.Completions, "create"):
|
|
74
|
+
self._original_create = completions.Completions.create
|
|
75
|
+
completions.Completions.create = self._traced_create_wrapper(
|
|
76
|
+
completions.Completions.create
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Patch async method
|
|
80
|
+
if hasattr(completions.AsyncCompletions, "create"):
|
|
81
|
+
self._original_async_create = completions.AsyncCompletions.create
|
|
82
|
+
completions.AsyncCompletions.create = self._traced_async_create_wrapper(
|
|
83
|
+
completions.AsyncCompletions.create
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self._is_instrumented = True
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
except ImportError:
|
|
90
|
+
print("⚠️ OpenAI SDK not installed, skipping instrumentation")
|
|
91
|
+
return False
|
|
92
|
+
except Exception as e:
|
|
93
|
+
print(f"❌ Failed to instrument OpenAI SDK: {e}")
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
def uninstrument(self) -> bool:
|
|
97
|
+
"""Remove monkey-patching from OpenAI SDK"""
|
|
98
|
+
if not self._is_instrumented:
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
import openai
|
|
103
|
+
from openai.resources.chat import completions
|
|
104
|
+
|
|
105
|
+
# Restore sync method
|
|
106
|
+
if self._original_create:
|
|
107
|
+
completions.Completions.create = self._original_create
|
|
108
|
+
|
|
109
|
+
# Restore async method
|
|
110
|
+
if self._original_async_create:
|
|
111
|
+
completions.AsyncCompletions.create = self._original_async_create
|
|
112
|
+
|
|
113
|
+
self._is_instrumented = False
|
|
114
|
+
return True
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
print(f"❌ Failed to uninstrument OpenAI SDK: {e}")
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
def _traced_create_wrapper(self, original_func):
|
|
121
|
+
"""Wrapper for sync create method"""
|
|
122
|
+
|
|
123
|
+
@wraps(original_func)
|
|
124
|
+
def wrapper(self_instance, *args, **kwargs):
|
|
125
|
+
# Extract model from kwargs
|
|
126
|
+
model = kwargs.get("model", "unknown")
|
|
127
|
+
|
|
128
|
+
# Create span with initial attributes
|
|
129
|
+
with self.tracer.start_as_current_span(
|
|
130
|
+
"openai.chat.completions.create",
|
|
131
|
+
kind=SpanKind.CLIENT,
|
|
132
|
+
attributes={
|
|
133
|
+
"llm.vendor": "openai",
|
|
134
|
+
"llm.request.model": model,
|
|
135
|
+
"llm.system": "openai",
|
|
136
|
+
},
|
|
137
|
+
) as span:
|
|
138
|
+
start_time = time.time()
|
|
139
|
+
|
|
140
|
+
# Phase 3: Inject Kalibr context for HTTP→SDK linking
|
|
141
|
+
try:
|
|
142
|
+
from kalibr.context import inject_kalibr_context_into_span
|
|
143
|
+
|
|
144
|
+
inject_kalibr_context_into_span(span)
|
|
145
|
+
except Exception:
|
|
146
|
+
pass # Fail silently if context not available
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Call original method
|
|
150
|
+
result = original_func(self_instance, *args, **kwargs)
|
|
151
|
+
|
|
152
|
+
# Extract and set response metadata
|
|
153
|
+
self._set_response_attributes(span, result, start_time)
|
|
154
|
+
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
self.set_error(span, e)
|
|
159
|
+
raise
|
|
160
|
+
|
|
161
|
+
return wrapper
|
|
162
|
+
|
|
163
|
+
def _traced_async_create_wrapper(self, original_func):
|
|
164
|
+
"""Wrapper for async create method"""
|
|
165
|
+
|
|
166
|
+
@wraps(original_func)
|
|
167
|
+
async def wrapper(self_instance, *args, **kwargs):
|
|
168
|
+
# Extract model from kwargs
|
|
169
|
+
model = kwargs.get("model", "unknown")
|
|
170
|
+
|
|
171
|
+
# Create span with initial attributes
|
|
172
|
+
with self.tracer.start_as_current_span(
|
|
173
|
+
"openai.chat.completions.create",
|
|
174
|
+
kind=SpanKind.CLIENT,
|
|
175
|
+
attributes={
|
|
176
|
+
"llm.vendor": "openai",
|
|
177
|
+
"llm.request.model": model,
|
|
178
|
+
"llm.system": "openai",
|
|
179
|
+
},
|
|
180
|
+
) as span:
|
|
181
|
+
start_time = time.time()
|
|
182
|
+
|
|
183
|
+
# Phase 3: Inject Kalibr context for HTTP→SDK linking
|
|
184
|
+
try:
|
|
185
|
+
from kalibr.context import inject_kalibr_context_into_span
|
|
186
|
+
|
|
187
|
+
inject_kalibr_context_into_span(span)
|
|
188
|
+
except Exception:
|
|
189
|
+
pass # Fail silently if context not available
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
# Call original async method
|
|
193
|
+
result = await original_func(self_instance, *args, **kwargs)
|
|
194
|
+
|
|
195
|
+
# Extract and set response metadata
|
|
196
|
+
self._set_response_attributes(span, result, start_time)
|
|
197
|
+
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
self.set_error(span, e)
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
return wrapper
|
|
205
|
+
|
|
206
|
+
def _set_response_attributes(self, span, result, start_time: float) -> None:
|
|
207
|
+
"""Extract metadata from response and set span attributes"""
|
|
208
|
+
try:
|
|
209
|
+
# Model
|
|
210
|
+
if hasattr(result, "model"):
|
|
211
|
+
span.set_attribute("llm.response.model", result.model)
|
|
212
|
+
|
|
213
|
+
# Token usage
|
|
214
|
+
if hasattr(result, "usage") and result.usage:
|
|
215
|
+
usage = result.usage
|
|
216
|
+
if hasattr(usage, "prompt_tokens"):
|
|
217
|
+
span.set_attribute("llm.usage.prompt_tokens", usage.prompt_tokens)
|
|
218
|
+
if hasattr(usage, "completion_tokens"):
|
|
219
|
+
span.set_attribute("llm.usage.completion_tokens", usage.completion_tokens)
|
|
220
|
+
if hasattr(usage, "total_tokens"):
|
|
221
|
+
span.set_attribute("llm.usage.total_tokens", usage.total_tokens)
|
|
222
|
+
|
|
223
|
+
# Calculate cost
|
|
224
|
+
cost = self.cost_adapter.calculate_cost(
|
|
225
|
+
result.model,
|
|
226
|
+
{
|
|
227
|
+
"prompt_tokens": usage.prompt_tokens,
|
|
228
|
+
"completion_tokens": usage.completion_tokens,
|
|
229
|
+
},
|
|
230
|
+
)
|
|
231
|
+
span.set_attribute("llm.cost_usd", cost)
|
|
232
|
+
|
|
233
|
+
# Latency
|
|
234
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
235
|
+
span.set_attribute("llm.latency_ms", round(latency_ms, 2))
|
|
236
|
+
|
|
237
|
+
# Response ID
|
|
238
|
+
if hasattr(result, "id"):
|
|
239
|
+
span.set_attribute("llm.response.id", result.id)
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
# Don't fail the call if metadata extraction fails
|
|
243
|
+
span.set_attribute("llm.metadata_extraction_error", str(e))
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# Singleton instance
|
|
247
|
+
_openai_instrumentation = None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def get_instrumentation() -> OpenAIInstrumentation:
|
|
251
|
+
"""Get or create the OpenAI instrumentation singleton"""
|
|
252
|
+
global _openai_instrumentation
|
|
253
|
+
if _openai_instrumentation is None:
|
|
254
|
+
_openai_instrumentation = OpenAIInstrumentation()
|
|
255
|
+
return _openai_instrumentation
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def instrument() -> bool:
|
|
259
|
+
"""Instrument OpenAI SDK"""
|
|
260
|
+
return get_instrumentation().instrument()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def uninstrument() -> bool:
|
|
264
|
+
"""Uninstrument OpenAI SDK"""
|
|
265
|
+
return get_instrumentation().uninstrument()
|