kalibr 1.0.28__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. kalibr/__init__.py +170 -3
  2. kalibr/__main__.py +3 -203
  3. kalibr/capsule_middleware.py +108 -0
  4. kalibr/cli/__init__.py +5 -0
  5. kalibr/cli/capsule_cmd.py +174 -0
  6. kalibr/cli/deploy_cmd.py +114 -0
  7. kalibr/cli/main.py +67 -0
  8. kalibr/cli/run.py +200 -0
  9. kalibr/cli/serve.py +59 -0
  10. kalibr/client.py +293 -0
  11. kalibr/collector.py +173 -0
  12. kalibr/context.py +132 -0
  13. kalibr/cost_adapter.py +222 -0
  14. kalibr/decorators.py +140 -0
  15. kalibr/instrumentation/__init__.py +13 -0
  16. kalibr/instrumentation/anthropic_instr.py +282 -0
  17. kalibr/instrumentation/base.py +108 -0
  18. kalibr/instrumentation/google_instr.py +281 -0
  19. kalibr/instrumentation/openai_instr.py +265 -0
  20. kalibr/instrumentation/registry.py +153 -0
  21. kalibr/kalibr.py +144 -230
  22. kalibr/kalibr_app.py +53 -314
  23. kalibr/middleware/__init__.py +5 -0
  24. kalibr/middleware/auto_tracer.py +356 -0
  25. kalibr/models.py +41 -0
  26. kalibr/redaction.py +44 -0
  27. kalibr/schemas.py +116 -0
  28. kalibr/simple_tracer.py +255 -0
  29. kalibr/tokens.py +52 -0
  30. kalibr/trace_capsule.py +296 -0
  31. kalibr/trace_models.py +201 -0
  32. kalibr/tracer.py +354 -0
  33. kalibr/types.py +25 -93
  34. kalibr/utils.py +198 -0
  35. kalibr-1.1.0.dist-info/METADATA +97 -0
  36. kalibr-1.1.0.dist-info/RECORD +40 -0
  37. kalibr-1.1.0.dist-info/entry_points.txt +2 -0
  38. kalibr-1.1.0.dist-info/licenses/LICENSE +21 -0
  39. kalibr/deployment.py +0 -41
  40. kalibr/packager.py +0 -43
  41. kalibr/runtime_router.py +0 -138
  42. kalibr/schema_generators.py +0 -159
  43. kalibr/validator.py +0 -70
  44. kalibr-1.0.28.data/data/examples/README.md +0 -173
  45. kalibr-1.0.28.data/data/examples/basic_kalibr_example.py +0 -66
  46. kalibr-1.0.28.data/data/examples/enhanced_kalibr_example.py +0 -347
  47. kalibr-1.0.28.dist-info/METADATA +0 -175
  48. kalibr-1.0.28.dist-info/RECORD +0 -19
  49. kalibr-1.0.28.dist-info/entry_points.txt +0 -2
  50. kalibr-1.0.28.dist-info/licenses/LICENSE +0 -11
  51. {kalibr-1.0.28.dist-info → kalibr-1.1.0.dist-info}/WHEEL +0 -0
  52. {kalibr-1.0.28.dist-info → kalibr-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
1
+ """
2
+ Base instrumentation class for LLM SDKs
3
+
4
+ Provides common functionality for monkey-patching LLM SDKs and
5
+ emitting OpenTelemetry-compatible spans.
6
+ """
7
+
8
+ import time
9
+ from abc import ABC, abstractmethod
10
+ from datetime import datetime
11
+ from typing import Any, Dict, Optional
12
+
13
+ from opentelemetry import trace
14
+ from opentelemetry.trace import SpanKind, Status, StatusCode
15
+
16
+
17
+ class BaseInstrumentation(ABC):
18
+ """Base class for LLM SDK instrumentation"""
19
+
20
+ def __init__(self, tracer_name: str):
21
+ self.tracer = trace.get_tracer(tracer_name)
22
+ self._is_instrumented = False
23
+
24
+ @abstractmethod
25
+ def instrument(self) -> bool:
26
+ """
27
+ Apply monkey-patching to instrument the SDK
28
+
29
+ Returns:
30
+ bool: True if instrumentation succeeded, False otherwise
31
+ """
32
+ pass
33
+
34
+ @abstractmethod
35
+ def uninstrument(self) -> bool:
36
+ """
37
+ Remove monkey-patching to restore original SDK behavior
38
+
39
+ Returns:
40
+ bool: True if uninstrumentation succeeded, False otherwise
41
+ """
42
+ pass
43
+
44
+ @property
45
+ def is_instrumented(self) -> bool:
46
+ """Check if SDK is currently instrumented"""
47
+ return self._is_instrumented
48
+
49
+ def create_span(self, name: str, attributes: Dict[str, Any], kind: SpanKind = SpanKind.CLIENT):
50
+ """
51
+ Create an OpenTelemetry span with standardized attributes
52
+
53
+ Args:
54
+ name: Span name (e.g., "openai.chat.completions.create")
55
+ attributes: Span attributes following OTel semantic conventions
56
+ kind: Span kind (default: CLIENT for LLM API calls)
57
+
58
+ Returns:
59
+ Context manager for the span
60
+ """
61
+ return self.tracer.start_as_current_span(name, kind=kind, attributes=attributes)
62
+
63
+ @staticmethod
64
+ def set_error(span: trace.Span, error: Exception) -> None:
65
+ """
66
+ Set error status and attributes on a span
67
+
68
+ Args:
69
+ span: The span to update
70
+ error: The exception that occurred
71
+ """
72
+ span.set_status(Status(StatusCode.ERROR))
73
+ span.set_attribute("error.type", type(error).__name__)
74
+ span.set_attribute("error.message", str(error))
75
+ span.record_exception(error)
76
+
77
+
78
+ class BaseCostAdapter(ABC):
79
+ """Base class for cost calculation adapters"""
80
+
81
+ PRICING: Dict[str, Dict[str, float]] = {}
82
+
83
+ @abstractmethod
84
+ def calculate_cost(self, model: str, usage: Dict[str, int]) -> float:
85
+ """
86
+ Calculate cost in USD for an LLM API call
87
+
88
+ Args:
89
+ model: Model identifier (e.g., "gpt-4")
90
+ usage: Token usage dictionary with prompt_tokens, completion_tokens
91
+
92
+ Returns:
93
+ Cost in USD (rounded to 6 decimal places)
94
+ """
95
+ pass
96
+
97
+ def get_pricing(self, model: str) -> Optional[Dict[str, float]]:
98
+ """
99
+ Get pricing for a specific model
100
+
101
+ Args:
102
+ model: Model identifier
103
+
104
+ Returns:
105
+ Dictionary with "input" and "output" prices per 1K tokens,
106
+ or None if model not found
107
+ """
108
+ return self.PRICING.get(model)
@@ -0,0 +1,281 @@
1
+ """
2
+ Google Generative AI SDK Instrumentation
3
+
4
+ Monkey-patches the Google Generative AI SDK to automatically emit OpenTelemetry spans
5
+ for all content generation API calls.
6
+ """
7
+
8
+ import time
9
+ from functools import wraps
10
+ from typing import Any, Dict, Optional
11
+
12
+ from opentelemetry.trace import SpanKind
13
+
14
+ from .base import BaseCostAdapter, BaseInstrumentation
15
+
16
+
17
+ class GoogleCostAdapter(BaseCostAdapter):
18
+ """Cost calculation adapter for Google Generative AI models"""
19
+
20
+ # Pricing per 1K tokens (USD) - Updated November 2025
21
+ PRICING = {
22
+ # Gemini 2.5 models
23
+ "gemini-2.5-pro": {"input": 0.00125, "output": 0.005},
24
+ "gemini-2.5-flash": {"input": 0.000075, "output": 0.0003},
25
+ # Gemini 2.0 models
26
+ "gemini-2.0-flash": {"input": 0.000075, "output": 0.0003},
27
+ "gemini-2.0-flash-thinking": {"input": 0.000075, "output": 0.0003},
28
+ # Gemini 1.5 models
29
+ "gemini-1.5-pro": {"input": 0.00125, "output": 0.005},
30
+ "gemini-1.5-flash": {"input": 0.000075, "output": 0.0003},
31
+ "gemini-1.5-flash-8b": {"input": 0.0000375, "output": 0.00015},
32
+ # Gemini 1.0 models
33
+ "gemini-1.0-pro": {"input": 0.0005, "output": 0.0015},
34
+ "gemini-pro": {"input": 0.0005, "output": 0.0015}, # Alias
35
+ }
36
+
37
+ def calculate_cost(self, model: str, usage: Dict[str, int]) -> float:
38
+ """Calculate cost in USD for a Google Generative AI API call"""
39
+ # Normalize model name
40
+ base_model = model.lower()
41
+
42
+ # Try exact match first
43
+ pricing = self.get_pricing(base_model)
44
+
45
+ # Try fuzzy matching for versioned models
46
+ if not pricing:
47
+ for known_model in self.PRICING.keys():
48
+ if known_model in base_model or base_model in known_model:
49
+ pricing = self.PRICING[known_model]
50
+ break
51
+
52
+ if not pricing:
53
+ # Default to Gemini 1.5 Pro pricing if unknown
54
+ pricing = {"input": 0.00125, "output": 0.005}
55
+
56
+ prompt_tokens = usage.get("prompt_tokens", 0)
57
+ completion_tokens = usage.get("completion_tokens", 0)
58
+
59
+ input_cost = (prompt_tokens / 1000) * pricing["input"]
60
+ output_cost = (completion_tokens / 1000) * pricing["output"]
61
+
62
+ return round(input_cost + output_cost, 6)
63
+
64
+
65
+ class GoogleInstrumentation(BaseInstrumentation):
66
+ """Instrumentation for Google Generative AI SDK"""
67
+
68
+ def __init__(self):
69
+ super().__init__("kalibr.google")
70
+ self._original_generate_content = None
71
+ self._original_async_generate_content = None
72
+ self.cost_adapter = GoogleCostAdapter()
73
+
74
+ def instrument(self) -> bool:
75
+ """Apply monkey-patching to Google Generative AI SDK"""
76
+ if self._is_instrumented:
77
+ return True
78
+
79
+ try:
80
+ import google.generativeai as genai
81
+ from google.generativeai.generative_models import GenerativeModel
82
+
83
+ # Patch sync method
84
+ if hasattr(GenerativeModel, "generate_content"):
85
+ self._original_generate_content = GenerativeModel.generate_content
86
+ GenerativeModel.generate_content = self._traced_generate_wrapper(
87
+ GenerativeModel.generate_content
88
+ )
89
+
90
+ # Patch async method (if available)
91
+ if hasattr(GenerativeModel, "generate_content_async"):
92
+ self._original_async_generate_content = GenerativeModel.generate_content_async
93
+ GenerativeModel.generate_content_async = self._traced_async_generate_wrapper(
94
+ GenerativeModel.generate_content_async
95
+ )
96
+
97
+ self._is_instrumented = True
98
+ return True
99
+
100
+ except ImportError:
101
+ print("⚠️ Google Generative AI SDK not installed, skipping instrumentation")
102
+ return False
103
+ except Exception as e:
104
+ print(f"❌ Failed to instrument Google Generative AI SDK: {e}")
105
+ return False
106
+
107
+ def uninstrument(self) -> bool:
108
+ """Remove monkey-patching from Google Generative AI SDK"""
109
+ if not self._is_instrumented:
110
+ return True
111
+
112
+ try:
113
+ import google.generativeai as genai
114
+ from google.generativeai.generative_models import GenerativeModel
115
+
116
+ # Restore sync method
117
+ if self._original_generate_content:
118
+ GenerativeModel.generate_content = self._original_generate_content
119
+
120
+ # Restore async method
121
+ if self._original_async_generate_content:
122
+ GenerativeModel.generate_content_async = self._original_async_generate_content
123
+
124
+ self._is_instrumented = False
125
+ return True
126
+
127
+ except Exception as e:
128
+ print(f"❌ Failed to uninstrument Google Generative AI SDK: {e}")
129
+ return False
130
+
131
+ def _traced_generate_wrapper(self, original_func):
132
+ """Wrapper for sync generate_content method"""
133
+
134
+ @wraps(original_func)
135
+ def wrapper(self_instance, *args, **kwargs):
136
+ # Extract model name from instance
137
+ model = getattr(self_instance, "_model_name", "unknown")
138
+
139
+ # Create span with initial attributes
140
+ with self.tracer.start_as_current_span(
141
+ "google.generativeai.generate_content",
142
+ kind=SpanKind.CLIENT,
143
+ attributes={
144
+ "llm.vendor": "google",
145
+ "llm.request.model": model,
146
+ "llm.system": "google.generativeai",
147
+ },
148
+ ) as span:
149
+ start_time = time.time()
150
+
151
+ # Phase 3: Inject Kalibr context for HTTP→SDK linking
152
+ try:
153
+ from kalibr.context import inject_kalibr_context_into_span
154
+
155
+ inject_kalibr_context_into_span(span)
156
+ except Exception:
157
+ pass # Fail silently if context not available
158
+
159
+ try:
160
+ # Call original method
161
+ result = original_func(self_instance, *args, **kwargs)
162
+
163
+ # Extract and set response metadata
164
+ self._set_response_attributes(span, result, model, start_time)
165
+
166
+ return result
167
+
168
+ except Exception as e:
169
+ self.set_error(span, e)
170
+ raise
171
+
172
+ return wrapper
173
+
174
+ def _traced_async_generate_wrapper(self, original_func):
175
+ """Wrapper for async generate_content method"""
176
+
177
+ @wraps(original_func)
178
+ async def wrapper(self_instance, *args, **kwargs):
179
+ # Extract model name from instance
180
+ model = getattr(self_instance, "_model_name", "unknown")
181
+
182
+ # Create span with initial attributes
183
+ with self.tracer.start_as_current_span(
184
+ "google.generativeai.generate_content",
185
+ kind=SpanKind.CLIENT,
186
+ attributes={
187
+ "llm.vendor": "google",
188
+ "llm.request.model": model,
189
+ "llm.system": "google.generativeai",
190
+ },
191
+ ) as span:
192
+ start_time = time.time()
193
+
194
+ # Phase 3: Inject Kalibr context for HTTP→SDK linking
195
+ try:
196
+ from kalibr.context import inject_kalibr_context_into_span
197
+
198
+ inject_kalibr_context_into_span(span)
199
+ except Exception:
200
+ pass # Fail silently if context not available
201
+
202
+ try:
203
+ # Call original async method
204
+ result = await original_func(self_instance, *args, **kwargs)
205
+
206
+ # Extract and set response metadata
207
+ self._set_response_attributes(span, result, model, start_time)
208
+
209
+ return result
210
+
211
+ except Exception as e:
212
+ self.set_error(span, e)
213
+ raise
214
+
215
+ return wrapper
216
+
217
+ def _set_response_attributes(self, span, result, model: str, start_time: float) -> None:
218
+ """Extract metadata from response and set span attributes"""
219
+ try:
220
+ # Model (from instance)
221
+ span.set_attribute("llm.response.model", model)
222
+
223
+ # Token usage
224
+ if hasattr(result, "usage_metadata") and result.usage_metadata:
225
+ usage = result.usage_metadata
226
+
227
+ prompt_tokens = getattr(usage, "prompt_token_count", 0)
228
+ completion_tokens = getattr(usage, "candidates_token_count", 0)
229
+ total_tokens = getattr(
230
+ usage, "total_token_count", prompt_tokens + completion_tokens
231
+ )
232
+
233
+ span.set_attribute("llm.usage.prompt_tokens", prompt_tokens)
234
+ span.set_attribute("llm.usage.completion_tokens", completion_tokens)
235
+ span.set_attribute("llm.usage.total_tokens", total_tokens)
236
+
237
+ # Calculate cost
238
+ cost = self.cost_adapter.calculate_cost(
239
+ model,
240
+ {
241
+ "prompt_tokens": prompt_tokens,
242
+ "completion_tokens": completion_tokens,
243
+ },
244
+ )
245
+ span.set_attribute("llm.cost_usd", cost)
246
+
247
+ # Latency
248
+ latency_ms = (time.time() - start_time) * 1000
249
+ span.set_attribute("llm.latency_ms", round(latency_ms, 2))
250
+
251
+ # Finish reason (if available)
252
+ if hasattr(result, "candidates") and result.candidates:
253
+ candidate = result.candidates[0]
254
+ if hasattr(candidate, "finish_reason"):
255
+ span.set_attribute("llm.response.finish_reason", str(candidate.finish_reason))
256
+
257
+ except Exception as e:
258
+ # Don't fail the call if metadata extraction fails
259
+ span.set_attribute("llm.metadata_extraction_error", str(e))
260
+
261
+
262
+ # Singleton instance
263
+ _google_instrumentation = None
264
+
265
+
266
+ def get_instrumentation() -> GoogleInstrumentation:
267
+ """Get or create the Google instrumentation singleton"""
268
+ global _google_instrumentation
269
+ if _google_instrumentation is None:
270
+ _google_instrumentation = GoogleInstrumentation()
271
+ return _google_instrumentation
272
+
273
+
274
+ def instrument() -> bool:
275
+ """Instrument Google Generative AI SDK"""
276
+ return get_instrumentation().instrument()
277
+
278
+
279
+ def uninstrument() -> bool:
280
+ """Uninstrument Google Generative AI SDK"""
281
+ return get_instrumentation().uninstrument()
@@ -0,0 +1,265 @@
1
+ """
2
+ OpenAI SDK Instrumentation
3
+
4
+ Monkey-patches the OpenAI SDK to automatically emit OpenTelemetry spans
5
+ for all chat completion API calls.
6
+ """
7
+
8
+ import time
9
+ from functools import wraps
10
+ from typing import Any, Dict, Optional
11
+
12
+ from opentelemetry.trace import SpanKind
13
+
14
+ from .base import BaseCostAdapter, BaseInstrumentation
15
+
16
+
17
+ class OpenAICostAdapter(BaseCostAdapter):
18
+ """Cost calculation adapter for OpenAI models"""
19
+
20
+ # Pricing per 1K tokens (USD) - Updated November 2025
21
+ PRICING = {
22
+ # GPT-5 models
23
+ "gpt-5": {"input": 0.005, "output": 0.015},
24
+ "gpt-5-turbo": {"input": 0.0025, "output": 0.0075},
25
+ # GPT-4 models
26
+ "gpt-4": {"input": 0.03, "output": 0.06},
27
+ "gpt-4-turbo": {"input": 0.01, "output": 0.03},
28
+ "gpt-4o": {"input": 0.0025, "output": 0.01},
29
+ "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
30
+ # GPT-3.5 models
31
+ "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
32
+ "gpt-3.5-turbo-16k": {"input": 0.001, "output": 0.002},
33
+ }
34
+
35
+ def calculate_cost(self, model: str, usage: Dict[str, int]) -> float:
36
+ """Calculate cost in USD for an OpenAI API call"""
37
+ # Normalize model name (remove version suffixes)
38
+ base_model = model.split("-2")[0] # Remove date suffixes like -20240101
39
+
40
+ pricing = self.get_pricing(base_model)
41
+ if not pricing:
42
+ # Default to GPT-4 pricing if unknown
43
+ pricing = {"input": 0.03, "output": 0.06}
44
+
45
+ prompt_tokens = usage.get("prompt_tokens", 0)
46
+ completion_tokens = usage.get("completion_tokens", 0)
47
+
48
+ input_cost = (prompt_tokens / 1000) * pricing["input"]
49
+ output_cost = (completion_tokens / 1000) * pricing["output"]
50
+
51
+ return round(input_cost + output_cost, 6)
52
+
53
+
54
+ class OpenAIInstrumentation(BaseInstrumentation):
55
+ """Instrumentation for OpenAI SDK"""
56
+
57
+ def __init__(self):
58
+ super().__init__("kalibr.openai")
59
+ self._original_create = None
60
+ self._original_async_create = None
61
+ self.cost_adapter = OpenAICostAdapter()
62
+
63
+ def instrument(self) -> bool:
64
+ """Apply monkey-patching to OpenAI SDK"""
65
+ if self._is_instrumented:
66
+ return True
67
+
68
+ try:
69
+ import openai
70
+ from openai.resources.chat import completions
71
+
72
+ # Patch sync method
73
+ if hasattr(completions.Completions, "create"):
74
+ self._original_create = completions.Completions.create
75
+ completions.Completions.create = self._traced_create_wrapper(
76
+ completions.Completions.create
77
+ )
78
+
79
+ # Patch async method
80
+ if hasattr(completions.AsyncCompletions, "create"):
81
+ self._original_async_create = completions.AsyncCompletions.create
82
+ completions.AsyncCompletions.create = self._traced_async_create_wrapper(
83
+ completions.AsyncCompletions.create
84
+ )
85
+
86
+ self._is_instrumented = True
87
+ return True
88
+
89
+ except ImportError:
90
+ print("⚠️ OpenAI SDK not installed, skipping instrumentation")
91
+ return False
92
+ except Exception as e:
93
+ print(f"❌ Failed to instrument OpenAI SDK: {e}")
94
+ return False
95
+
96
+ def uninstrument(self) -> bool:
97
+ """Remove monkey-patching from OpenAI SDK"""
98
+ if not self._is_instrumented:
99
+ return True
100
+
101
+ try:
102
+ import openai
103
+ from openai.resources.chat import completions
104
+
105
+ # Restore sync method
106
+ if self._original_create:
107
+ completions.Completions.create = self._original_create
108
+
109
+ # Restore async method
110
+ if self._original_async_create:
111
+ completions.AsyncCompletions.create = self._original_async_create
112
+
113
+ self._is_instrumented = False
114
+ return True
115
+
116
+ except Exception as e:
117
+ print(f"❌ Failed to uninstrument OpenAI SDK: {e}")
118
+ return False
119
+
120
+ def _traced_create_wrapper(self, original_func):
121
+ """Wrapper for sync create method"""
122
+
123
+ @wraps(original_func)
124
+ def wrapper(self_instance, *args, **kwargs):
125
+ # Extract model from kwargs
126
+ model = kwargs.get("model", "unknown")
127
+
128
+ # Create span with initial attributes
129
+ with self.tracer.start_as_current_span(
130
+ "openai.chat.completions.create",
131
+ kind=SpanKind.CLIENT,
132
+ attributes={
133
+ "llm.vendor": "openai",
134
+ "llm.request.model": model,
135
+ "llm.system": "openai",
136
+ },
137
+ ) as span:
138
+ start_time = time.time()
139
+
140
+ # Phase 3: Inject Kalibr context for HTTP→SDK linking
141
+ try:
142
+ from kalibr.context import inject_kalibr_context_into_span
143
+
144
+ inject_kalibr_context_into_span(span)
145
+ except Exception:
146
+ pass # Fail silently if context not available
147
+
148
+ try:
149
+ # Call original method
150
+ result = original_func(self_instance, *args, **kwargs)
151
+
152
+ # Extract and set response metadata
153
+ self._set_response_attributes(span, result, start_time)
154
+
155
+ return result
156
+
157
+ except Exception as e:
158
+ self.set_error(span, e)
159
+ raise
160
+
161
+ return wrapper
162
+
163
+ def _traced_async_create_wrapper(self, original_func):
164
+ """Wrapper for async create method"""
165
+
166
+ @wraps(original_func)
167
+ async def wrapper(self_instance, *args, **kwargs):
168
+ # Extract model from kwargs
169
+ model = kwargs.get("model", "unknown")
170
+
171
+ # Create span with initial attributes
172
+ with self.tracer.start_as_current_span(
173
+ "openai.chat.completions.create",
174
+ kind=SpanKind.CLIENT,
175
+ attributes={
176
+ "llm.vendor": "openai",
177
+ "llm.request.model": model,
178
+ "llm.system": "openai",
179
+ },
180
+ ) as span:
181
+ start_time = time.time()
182
+
183
+ # Phase 3: Inject Kalibr context for HTTP→SDK linking
184
+ try:
185
+ from kalibr.context import inject_kalibr_context_into_span
186
+
187
+ inject_kalibr_context_into_span(span)
188
+ except Exception:
189
+ pass # Fail silently if context not available
190
+
191
+ try:
192
+ # Call original async method
193
+ result = await original_func(self_instance, *args, **kwargs)
194
+
195
+ # Extract and set response metadata
196
+ self._set_response_attributes(span, result, start_time)
197
+
198
+ return result
199
+
200
+ except Exception as e:
201
+ self.set_error(span, e)
202
+ raise
203
+
204
+ return wrapper
205
+
206
+ def _set_response_attributes(self, span, result, start_time: float) -> None:
207
+ """Extract metadata from response and set span attributes"""
208
+ try:
209
+ # Model
210
+ if hasattr(result, "model"):
211
+ span.set_attribute("llm.response.model", result.model)
212
+
213
+ # Token usage
214
+ if hasattr(result, "usage") and result.usage:
215
+ usage = result.usage
216
+ if hasattr(usage, "prompt_tokens"):
217
+ span.set_attribute("llm.usage.prompt_tokens", usage.prompt_tokens)
218
+ if hasattr(usage, "completion_tokens"):
219
+ span.set_attribute("llm.usage.completion_tokens", usage.completion_tokens)
220
+ if hasattr(usage, "total_tokens"):
221
+ span.set_attribute("llm.usage.total_tokens", usage.total_tokens)
222
+
223
+ # Calculate cost
224
+ cost = self.cost_adapter.calculate_cost(
225
+ result.model,
226
+ {
227
+ "prompt_tokens": usage.prompt_tokens,
228
+ "completion_tokens": usage.completion_tokens,
229
+ },
230
+ )
231
+ span.set_attribute("llm.cost_usd", cost)
232
+
233
+ # Latency
234
+ latency_ms = (time.time() - start_time) * 1000
235
+ span.set_attribute("llm.latency_ms", round(latency_ms, 2))
236
+
237
+ # Response ID
238
+ if hasattr(result, "id"):
239
+ span.set_attribute("llm.response.id", result.id)
240
+
241
+ except Exception as e:
242
+ # Don't fail the call if metadata extraction fails
243
+ span.set_attribute("llm.metadata_extraction_error", str(e))
244
+
245
+
246
+ # Singleton instance
247
+ _openai_instrumentation = None
248
+
249
+
250
+ def get_instrumentation() -> OpenAIInstrumentation:
251
+ """Get or create the OpenAI instrumentation singleton"""
252
+ global _openai_instrumentation
253
+ if _openai_instrumentation is None:
254
+ _openai_instrumentation = OpenAIInstrumentation()
255
+ return _openai_instrumentation
256
+
257
+
258
+ def instrument() -> bool:
259
+ """Instrument OpenAI SDK"""
260
+ return get_instrumentation().instrument()
261
+
262
+
263
+ def uninstrument() -> bool:
264
+ """Uninstrument OpenAI SDK"""
265
+ return get_instrumentation().uninstrument()