genai-otel-instrument 0.1.2.dev0__py3-none-any.whl → 0.1.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of genai-otel-instrument might be problematic. Click here for more details.

Files changed (24) hide show
  1. genai_otel/__version__.py +2 -2
  2. genai_otel/auto_instrument.py +18 -1
  3. genai_otel/config.py +22 -1
  4. genai_otel/cost_calculator.py +204 -13
  5. genai_otel/cost_enrichment_processor.py +175 -0
  6. genai_otel/gpu_metrics.py +50 -0
  7. genai_otel/instrumentors/base.py +300 -44
  8. genai_otel/instrumentors/cohere_instrumentor.py +140 -76
  9. genai_otel/instrumentors/huggingface_instrumentor.py +142 -13
  10. genai_otel/instrumentors/langchain_instrumentor.py +75 -75
  11. genai_otel/instrumentors/mistralai_instrumentor.py +234 -38
  12. genai_otel/instrumentors/ollama_instrumentor.py +104 -35
  13. genai_otel/instrumentors/replicate_instrumentor.py +59 -14
  14. genai_otel/instrumentors/togetherai_instrumentor.py +120 -16
  15. genai_otel/instrumentors/vertexai_instrumentor.py +79 -15
  16. genai_otel/llm_pricing.json +869 -589
  17. genai_otel/logging_config.py +45 -45
  18. genai_otel/py.typed +2 -2
  19. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/METADATA +294 -33
  20. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/RECORD +24 -23
  21. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/WHEEL +0 -0
  22. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/entry_points.txt +0 -0
  23. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/licenses/LICENSE +0 -0
  24. {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.7.dev0.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ Supports Mistral SDK v1.0+ with the new API structure:
10
10
  """
11
11
 
12
12
  import logging
13
+ import time
13
14
  from typing import Any, Dict, Optional
14
15
 
15
16
  from ..config import OTelConfig
@@ -27,50 +28,245 @@ class MistralAIInstrumentor(BaseInstrumentor):
27
28
  import wrapt
28
29
  from mistralai import Mistral
29
30
 
30
- # Wrap the Mistral client __init__ to instrument each instance
31
- original_init = Mistral.__init__
32
-
33
- def wrapped_init(wrapped, instance, args, kwargs):
34
- result = wrapped(*args, **kwargs)
35
- self._instrument_client(instance)
36
- return result
37
-
38
- Mistral.__init__ = wrapt.FunctionWrapper(original_init, wrapped_init)
39
- logger.info("MistralAI instrumentation enabled (v1.0+ SDK)")
31
+ # Get access to the chat and embeddings modules
32
+ # In Mistral SDK v1.0+, structure is:
33
+ # - Mistral client has .chat and .embeddings properties
34
+ # - These are bound methods that call internal APIs
35
+ # Store original methods at module level before any instances are created
36
+ if not hasattr(Mistral, "_genai_otel_instrumented"):
37
+ self._wrap_mistral_methods(Mistral, wrapt)
38
+ Mistral._genai_otel_instrumented = True
39
+ logger.info("MistralAI instrumentation enabled (v1.0+ SDK)")
40
40
 
41
41
  except ImportError:
42
42
  logger.warning("mistralai package not available, skipping instrumentation")
43
43
  except Exception as e:
44
44
  logger.error(f"Failed to instrument mistralai: {e}", exc_info=True)
45
+ if config.fail_on_error:
46
+ raise
47
+
48
+ def _wrap_mistral_methods(self, Mistral, wrapt):
49
+ """Wrap Mistral client methods at the class level."""
50
+ # Import the internal classes that handle chat and embeddings
51
+ try:
52
+ from mistralai.chat import Chat
53
+ from mistralai.embeddings import Embeddings
54
+
55
+ # Wrap Chat.complete method
56
+ if hasattr(Chat, "complete"):
57
+ wrapt.wrap_function_wrapper(
58
+ "mistralai.chat", "Chat.complete", self._wrap_chat_complete
59
+ )
60
+ logger.debug("Wrapped Mistral Chat.complete")
61
+
62
+ # Wrap Chat.stream method
63
+ if hasattr(Chat, "stream"):
64
+ wrapt.wrap_function_wrapper("mistralai.chat", "Chat.stream", self._wrap_chat_stream)
65
+ logger.debug("Wrapped Mistral Chat.stream")
66
+
67
+ # Wrap Embeddings.create method
68
+ if hasattr(Embeddings, "create"):
69
+ wrapt.wrap_function_wrapper(
70
+ "mistralai.embeddings", "Embeddings.create", self._wrap_embeddings_create
71
+ )
72
+ logger.debug("Wrapped Mistral Embeddings.create")
73
+
74
+ except (ImportError, AttributeError) as e:
75
+ logger.warning(f"Could not access Mistral internal classes: {e}")
76
+
77
+ def _wrap_chat_complete(self, wrapped, instance, args, kwargs):
78
+ """Wrapper for chat.complete() method."""
79
+ model = kwargs.get("model", "mistral-small-latest")
80
+ span_name = f"mistralai.chat.complete {model}"
81
+
82
+ with self.tracer.start_span(span_name) as span:
83
+ # Set attributes
84
+ attributes = self._extract_chat_attributes(instance, args, kwargs)
85
+ for key, value in attributes.items():
86
+ span.set_attribute(key, value)
87
+
88
+ # Record request metric
89
+ if self.request_counter:
90
+ self.request_counter.add(1, {"model": model, "provider": "mistralai"})
91
+
92
+ # Execute the call
93
+ start_time = time.time()
94
+ try:
95
+ response = wrapped(*args, **kwargs)
96
+
97
+ # Record metrics from response
98
+ self._record_result_metrics(span, response, start_time, kwargs)
99
+
100
+ return response
101
+
102
+ except Exception as e:
103
+ if self.error_counter:
104
+ self.error_counter.add(
105
+ 1, {"operation": span_name, "error.type": type(e).__name__}
106
+ )
107
+ span.record_exception(e)
108
+ raise
109
+
110
+ def _wrap_chat_stream(self, wrapped, instance, args, kwargs):
111
+ """Wrapper for chat.stream() method - handles streaming responses."""
112
+ model = kwargs.get("model", "mistral-small-latest")
113
+ span_name = f"mistralai.chat.stream {model}"
114
+
115
+ # Start the span
116
+ span = self.tracer.start_span(span_name)
117
+
118
+ # Set attributes
119
+ attributes = self._extract_chat_attributes(instance, args, kwargs)
120
+ for key, value in attributes.items():
121
+ span.set_attribute(key, value)
122
+
123
+ # Record request metric
124
+ if self.request_counter:
125
+ self.request_counter.add(1, {"model": model, "provider": "mistralai"})
126
+
127
+ start_time = time.time()
128
+
129
+ # Execute and get the stream
130
+ try:
131
+ stream = wrapped(*args, **kwargs)
132
+
133
+ # Wrap the stream with our tracking wrapper
134
+ return self._StreamWrapper(stream, span, self, model, start_time, span_name)
135
+
136
+ except Exception as e:
137
+ if self.error_counter:
138
+ self.error_counter.add(1, {"operation": span_name, "error.type": type(e).__name__})
139
+ span.record_exception(e)
140
+ span.end()
141
+ raise
142
+
143
+ def _wrap_embeddings_create(self, wrapped, instance, args, kwargs):
144
+ """Wrapper for embeddings.create() method."""
145
+ model = kwargs.get("model", "mistral-embed")
146
+ span_name = f"mistralai.embeddings.create {model}"
147
+
148
+ with self.tracer.start_span(span_name) as span:
149
+ # Set attributes
150
+ attributes = self._extract_embeddings_attributes(instance, args, kwargs)
151
+ for key, value in attributes.items():
152
+ span.set_attribute(key, value)
153
+
154
+ # Record request metric
155
+ if self.request_counter:
156
+ self.request_counter.add(1, {"model": model, "provider": "mistralai"})
157
+
158
+ # Execute the call
159
+ start_time = time.time()
160
+ try:
161
+ response = wrapped(*args, **kwargs)
162
+
163
+ # Record metrics from response
164
+ self._record_result_metrics(span, response, start_time, kwargs)
165
+
166
+ return response
167
+
168
+ except Exception as e:
169
+ if self.error_counter:
170
+ self.error_counter.add(
171
+ 1, {"operation": span_name, "error.type": type(e).__name__}
172
+ )
173
+ span.record_exception(e)
174
+ raise
175
+
176
+ class _StreamWrapper:
177
+ """Wrapper for streaming responses that collects metrics."""
178
+
179
+ def __init__(self, stream, span, instrumentor, model, start_time, span_name):
180
+ self._stream = stream
181
+ self._span = span
182
+ self._instrumentor = instrumentor
183
+ self._model = model
184
+ self._start_time = start_time
185
+ self._span_name = span_name
186
+ self._usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
187
+ self._response_text = ""
188
+ self._first_chunk = True
189
+ self._ttft = None
190
+
191
+ def __iter__(self):
192
+ return self
193
+
194
+ def __next__(self):
195
+ try:
196
+ chunk = next(self._stream)
197
+
198
+ # Record time to first token
199
+ if self._first_chunk:
200
+ self._ttft = time.time() - self._start_time
201
+ self._first_chunk = False
202
+
203
+ # Process chunk to extract usage and content
204
+ self._process_chunk(chunk)
205
+
206
+ return chunk
207
+
208
+ except StopIteration:
209
+ # Stream completed - record final metrics
210
+ try:
211
+ # Set TTFT if we got any chunks
212
+ if self._ttft is not None:
213
+ self._span.set_attribute("gen_ai.server.ttft", self._ttft)
214
+
215
+ # Record usage metrics if available
216
+ if self._usage["total_tokens"] > 0:
217
+ # Create a mock response object with usage for _record_result_metrics
218
+ class MockUsage:
219
+ def __init__(self, usage_dict):
220
+ self.prompt_tokens = usage_dict["prompt_tokens"]
221
+ self.completion_tokens = usage_dict["completion_tokens"]
222
+ self.total_tokens = usage_dict["total_tokens"]
223
+
224
+ class MockResponse:
225
+ def __init__(self, usage_dict):
226
+ self.usage = MockUsage(usage_dict)
227
+
228
+ mock_response = MockResponse(self._usage)
229
+ self._instrumentor._record_result_metrics(
230
+ self._span, mock_response, self._start_time, {"model": self._model}
231
+ )
232
+
233
+ finally:
234
+ self._span.end()
235
+
236
+ raise
237
+
238
+ def _process_chunk(self, chunk):
239
+ """Process a streaming chunk to extract usage."""
240
+ try:
241
+ # Mistral streaming chunks have: data.choices[0].delta.content
242
+ if hasattr(chunk, "data"):
243
+ data = chunk.data
244
+ if hasattr(data, "choices") and len(data.choices) > 0:
245
+ delta = data.choices[0].delta
246
+ if hasattr(delta, "content") and delta.content:
247
+ self._response_text += delta.content
248
+
249
+ # Extract usage if available on final chunk
250
+ if hasattr(data, "usage") and data.usage:
251
+ usage = data.usage
252
+ if hasattr(usage, "prompt_tokens"):
253
+ self._usage["prompt_tokens"] = usage.prompt_tokens
254
+ if hasattr(usage, "completion_tokens"):
255
+ self._usage["completion_tokens"] = usage.completion_tokens
256
+ if hasattr(usage, "total_tokens"):
257
+ self._usage["total_tokens"] = usage.total_tokens
258
+
259
+ except Exception as e:
260
+ logger.debug(f"Error processing Mistral stream chunk: {e}")
261
+
262
+ def __enter__(self):
263
+ return self
45
264
 
46
- def _instrument_client(self, client):
47
- """Instrument Mistral client instance methods."""
48
- # Instrument chat.complete()
49
- if hasattr(client, "chat") and hasattr(client.chat, "complete"):
50
- original_complete = client.chat.complete
51
- instrumented_complete = self.create_span_wrapper(
52
- span_name="mistralai.chat.complete",
53
- extract_attributes=self._extract_chat_attributes,
54
- )(original_complete)
55
- client.chat.complete = instrumented_complete
56
-
57
- # Instrument chat.stream()
58
- if hasattr(client, "chat") and hasattr(client.chat, "stream"):
59
- original_stream = client.chat.stream
60
- instrumented_stream = self.create_span_wrapper(
61
- span_name="mistralai.chat.stream",
62
- extract_attributes=self._extract_chat_attributes,
63
- )(original_stream)
64
- client.chat.stream = instrumented_stream
65
-
66
- # Instrument embeddings.create()
67
- if hasattr(client, "embeddings") and hasattr(client.embeddings, "create"):
68
- original_embeddings = client.embeddings.create
69
- instrumented_embeddings = self.create_span_wrapper(
70
- span_name="mistralai.embeddings.create",
71
- extract_attributes=self._extract_embeddings_attributes,
72
- )(original_embeddings)
73
- client.embeddings.create = instrumented_embeddings
265
+ def __exit__(self, exc_type, exc_val, exc_tb):
266
+ if exc_type is not None:
267
+ self._span.record_exception(exc_val)
268
+ self._span.end()
269
+ return False
74
270
 
75
271
  def _extract_chat_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
76
272
  """Extract attributes from chat.complete() or chat.stream() call."""
@@ -2,11 +2,11 @@
2
2
 
3
3
  This instrumentor automatically traces calls to Ollama models for both
4
4
  generation and chat functionalities, capturing relevant attributes such as
5
- the model name.
5
+ the model name and token usage.
6
6
  """
7
7
 
8
8
  import logging
9
- from typing import Dict, Optional
9
+ from typing import Any, Dict, Optional
10
10
 
11
11
  from ..config import OTelConfig
12
12
  from .base import BaseInstrumentor
@@ -22,8 +22,8 @@ class OllamaInstrumentor(BaseInstrumentor):
22
22
  super().__init__()
23
23
  self._ollama_available = False
24
24
  self._ollama_module = None
25
- self._original_generate = None # Add this
26
- self._original_chat = None # Add this
25
+ self._original_generate = None
26
+ self._original_chat = None
27
27
  self._check_availability()
28
28
 
29
29
  def _check_availability(self):
@@ -46,38 +46,107 @@ class OllamaInstrumentor(BaseInstrumentor):
46
46
  if not self._ollama_available or self._ollama_module is None:
47
47
  return
48
48
 
49
- # Store original methods
50
- self._original_generate = self._ollama_module.generate
51
- self._original_chat = self._ollama_module.chat
52
-
53
- def wrapped_generate(*args, **kwargs):
54
- with self.tracer.start_as_current_span("ollama.generate") as span:
55
- model = kwargs.get("model", "unknown")
56
-
57
- span.set_attribute("gen_ai.system", "ollama")
58
- span.set_attribute("gen_ai.request.model", model)
59
-
60
- if self.request_counter:
61
- self.request_counter.add(1, {"model": model, "provider": "ollama"})
62
-
63
- result = self._original_generate(*args, **kwargs)
64
- return result
65
-
66
- def wrapped_chat(*args, **kwargs):
67
- with self.tracer.start_as_current_span("ollama.chat") as span:
68
- model = kwargs.get("model", "unknown")
69
-
70
- span.set_attribute("gen_ai.system", "ollama")
71
- span.set_attribute("gen_ai.request.model", model)
49
+ try:
50
+ # Store original methods and wrap them
51
+ self._original_generate = self._ollama_module.generate
52
+ self._original_chat = self._ollama_module.chat
53
+
54
+ # Wrap generate method
55
+ wrapped_generate = self.create_span_wrapper(
56
+ span_name="ollama.generate",
57
+ extract_attributes=self._extract_generate_attributes,
58
+ )(self._original_generate)
59
+ self._ollama_module.generate = wrapped_generate
60
+
61
+ # Wrap chat method
62
+ wrapped_chat = self.create_span_wrapper(
63
+ span_name="ollama.chat",
64
+ extract_attributes=self._extract_chat_attributes,
65
+ )(self._original_chat)
66
+ self._ollama_module.chat = wrapped_chat
67
+
68
+ self._instrumented = True
69
+ logger.info("Ollama instrumentation enabled")
70
+
71
+ except Exception as e:
72
+ logger.error("Failed to instrument Ollama: %s", e, exc_info=True)
73
+ if config.fail_on_error:
74
+ raise
75
+
76
+ def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
77
+ """Extract attributes from Ollama generate call.
78
+
79
+ Args:
80
+ instance: The client instance (None for module-level functions).
81
+ args: Positional arguments.
82
+ kwargs: Keyword arguments.
83
+
84
+ Returns:
85
+ Dict[str, Any]: Dictionary of attributes to set on the span.
86
+ """
87
+ attrs = {}
88
+ model = kwargs.get("model", "unknown")
89
+
90
+ attrs["gen_ai.system"] = "ollama"
91
+ attrs["gen_ai.request.model"] = model
92
+ attrs["gen_ai.operation.name"] = "generate"
93
+
94
+ return attrs
95
+
96
+ def _extract_chat_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
97
+ """Extract attributes from Ollama chat call.
98
+
99
+ Args:
100
+ instance: The client instance (None for module-level functions).
101
+ args: Positional arguments.
102
+ kwargs: Keyword arguments.
103
+
104
+ Returns:
105
+ Dict[str, Any]: Dictionary of attributes to set on the span.
106
+ """
107
+ attrs = {}
108
+ model = kwargs.get("model", "unknown")
109
+ messages = kwargs.get("messages", [])
110
+
111
+ attrs["gen_ai.system"] = "ollama"
112
+ attrs["gen_ai.request.model"] = model
113
+ attrs["gen_ai.operation.name"] = "chat"
114
+ attrs["gen_ai.request.message_count"] = len(messages)
115
+
116
+ return attrs
72
117
 
73
- if self.request_counter:
74
- self.request_counter.add(1, {"model": model, "provider": "ollama"})
118
+ def _extract_usage(self, result) -> Optional[Dict[str, int]]:
119
+ """Extract token usage from Ollama response.
75
120
 
76
- result = self._original_chat(*args, **kwargs)
77
- return result
121
+ Ollama responses include:
122
+ - prompt_eval_count: Input tokens
123
+ - eval_count: Output tokens
78
124
 
79
- self._ollama_module.generate = wrapped_generate
80
- self._ollama_module.chat = wrapped_chat
125
+ Args:
126
+ result: The API response object or dictionary.
81
127
 
82
- def _extract_usage(self, result) -> Optional[Dict[str, int]]:
83
- return None
128
+ Returns:
129
+ Optional[Dict[str, int]]: Dictionary with token counts or None.
130
+ """
131
+ try:
132
+ # Handle both dict and object responses
133
+ if isinstance(result, dict):
134
+ prompt_tokens = result.get("prompt_eval_count", 0)
135
+ completion_tokens = result.get("eval_count", 0)
136
+ elif hasattr(result, "prompt_eval_count") and hasattr(result, "eval_count"):
137
+ prompt_tokens = getattr(result, "prompt_eval_count", 0)
138
+ completion_tokens = getattr(result, "eval_count", 0)
139
+ else:
140
+ return None
141
+
142
+ if prompt_tokens == 0 and completion_tokens == 0:
143
+ return None
144
+
145
+ return {
146
+ "prompt_tokens": prompt_tokens,
147
+ "completion_tokens": completion_tokens,
148
+ "total_tokens": prompt_tokens + completion_tokens,
149
+ }
150
+ except Exception as e:
151
+ logger.debug("Failed to extract usage from Ollama response: %s", e)
152
+ return None
@@ -2,41 +2,86 @@
2
2
 
3
3
  This instrumentor automatically traces calls to Replicate models, capturing
4
4
  relevant attributes such as the model name.
5
+
6
+ Note: Replicate uses hardware-based pricing (per second of GPU/CPU time),
7
+ not token-based pricing. Cost tracking is not applicable as the pricing model
8
+ is fundamentally different from token-based LLM APIs.
5
9
  """
6
10
 
7
- from typing import Dict, Optional
11
+ import logging
12
+ from typing import Any, Dict, Optional
8
13
 
9
14
  from ..config import OTelConfig
10
15
  from .base import BaseInstrumentor
11
16
 
17
+ logger = logging.getLogger(__name__)
18
+
12
19
 
13
20
  class ReplicateInstrumentor(BaseInstrumentor):
14
- """Instrumentor for Replicate"""
21
+ """Instrumentor for Replicate.
22
+
23
+ Note: Replicate uses hardware-based pricing ($/second), not token-based.
24
+ Cost tracking returns None as pricing is based on execution time and hardware type.
25
+ """
15
26
 
16
27
  def instrument(self, config: OTelConfig):
28
+ """Instrument Replicate SDK if available."""
17
29
  self.config = config
18
30
  try:
19
31
  import replicate
20
32
 
21
33
  original_run = replicate.run
22
34
 
23
- def wrapped_run(*args, **kwargs):
24
- with self.tracer.start_as_current_span("replicate.run") as span:
25
- model = args[0] if args else "unknown"
35
+ # Wrap using create_span_wrapper
36
+ wrapped_run = self.create_span_wrapper(
37
+ span_name="replicate.run",
38
+ extract_attributes=self._extract_run_attributes,
39
+ )(original_run)
26
40
 
27
- span.set_attribute("gen_ai.system", "replicate")
28
- span.set_attribute("gen_ai.request.model", model)
41
+ replicate.run = wrapped_run
42
+ self._instrumented = True
43
+ logger.info("Replicate instrumentation enabled")
29
44
 
30
- if self.request_counter:
31
- self.request_counter.add(1, {"model": model, "provider": "replicate"})
45
+ except ImportError:
46
+ logger.debug("Replicate library not installed, instrumentation will be skipped")
47
+ except Exception as e:
48
+ logger.error("Failed to instrument Replicate: %s", e, exc_info=True)
49
+ if config.fail_on_error:
50
+ raise
32
51
 
33
- result = original_run(*args, **kwargs)
34
- return result
52
+ def _extract_run_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
53
+ """Extract attributes from Replicate run call.
35
54
 
36
- replicate.run = wrapped_run
55
+ Args:
56
+ instance: The instance (None for module-level functions).
57
+ args: Positional arguments (first arg is typically the model).
58
+ kwargs: Keyword arguments.
37
59
 
38
- except ImportError:
39
- pass
60
+ Returns:
61
+ Dict[str, Any]: Dictionary of attributes to set on the span.
62
+ """
63
+ attrs = {}
64
+ model = args[0] if args else kwargs.get("model", "unknown")
65
+
66
+ attrs["gen_ai.system"] = "replicate"
67
+ attrs["gen_ai.request.model"] = model
68
+ attrs["gen_ai.operation.name"] = "run"
69
+
70
+ return attrs
40
71
 
41
72
  def _extract_usage(self, result) -> Optional[Dict[str, int]]:
73
+ """Extract token usage from Replicate response.
74
+
75
+ Note: Replicate uses hardware-based pricing ($/second of GPU/CPU time),
76
+ not token-based pricing. Returns None as the pricing model is incompatible
77
+ with token-based cost calculation.
78
+
79
+ Args:
80
+ result: The API response.
81
+
82
+ Returns:
83
+ None: Replicate uses hardware-based pricing, not token-based.
84
+ """
85
+ # Replicate uses hardware-based pricing ($/second), not tokens
86
+ # Cannot track costs with token-based calculator
42
87
  return None