genai-otel-instrument 0.1.4.dev0__py3-none-any.whl → 0.1.9.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/auto_instrument.py +7 -3
- genai_otel/config.py +19 -1
- genai_otel/cost_calculator.py +72 -6
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -177
- genai_otel/gpu_metrics.py +50 -0
- genai_otel/instrumentors/base.py +228 -4
- genai_otel/instrumentors/cohere_instrumentor.py +140 -140
- genai_otel/instrumentors/huggingface_instrumentor.py +184 -7
- genai_otel/instrumentors/langchain_instrumentor.py +75 -75
- genai_otel/instrumentors/mistralai_instrumentor.py +17 -33
- genai_otel/llm_pricing.json +869 -869
- genai_otel/logging_config.py +45 -45
- genai_otel/py.typed +2 -2
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/METADATA +256 -28
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/RECORD +21 -20
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.4.dev0.dist-info → genai_otel_instrument-0.1.9.dev0.dist-info}/top_level.txt +0 -0
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
This instrumentor automatically traces:
|
|
4
4
|
1. HuggingFace Transformers pipelines (local model execution)
|
|
5
5
|
2. HuggingFace Inference API calls via InferenceClient (used by smolagents)
|
|
6
|
+
3. Direct model usage via AutoModelForCausalLM.generate() and forward()
|
|
6
7
|
|
|
7
8
|
Note: Transformers runs models locally (no API costs), but InferenceClient makes
|
|
8
9
|
API calls to HuggingFace endpoints which may have costs based on usage.
|
|
10
|
+
Local model costs are estimated based on parameter count and token usage.
|
|
9
11
|
"""
|
|
10
12
|
|
|
11
13
|
import logging
|
|
@@ -20,8 +22,10 @@ logger = logging.getLogger(__name__)
|
|
|
20
22
|
class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
21
23
|
"""Instrumentor for HuggingFace Transformers and Inference API.
|
|
22
24
|
|
|
23
|
-
Instruments
|
|
24
|
-
- transformers.pipeline (local execution,
|
|
25
|
+
Instruments:
|
|
26
|
+
- transformers.pipeline (local execution, estimated costs)
|
|
27
|
+
- transformers.AutoModelForCausalLM.generate() (local execution, estimated costs)
|
|
28
|
+
- transformers.AutoModelForCausalLM.forward() (local execution, estimated costs)
|
|
25
29
|
- huggingface_hub.InferenceClient (API calls, may have costs)
|
|
26
30
|
"""
|
|
27
31
|
|
|
@@ -30,6 +34,7 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
30
34
|
super().__init__()
|
|
31
35
|
self._transformers_available = False
|
|
32
36
|
self._inference_client_available = False
|
|
37
|
+
self._model_classes_instrumented = False
|
|
33
38
|
self._check_availability()
|
|
34
39
|
|
|
35
40
|
def _check_availability(self):
|
|
@@ -49,17 +54,20 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
49
54
|
self._inference_client_available = True
|
|
50
55
|
logger.debug("HuggingFace InferenceClient detected and available for instrumentation")
|
|
51
56
|
except ImportError:
|
|
52
|
-
logger.debug(
|
|
57
|
+
logger.debug(
|
|
58
|
+
"huggingface_hub not installed, InferenceClient instrumentation will be skipped"
|
|
59
|
+
)
|
|
53
60
|
self._inference_client_available = False
|
|
54
61
|
|
|
55
62
|
def instrument(self, config: OTelConfig):
|
|
56
|
-
"""Instrument HuggingFace Transformers pipelines and InferenceClient."""
|
|
57
|
-
self.config
|
|
63
|
+
"""Instrument HuggingFace Transformers pipelines, model classes, and InferenceClient."""
|
|
64
|
+
self._setup_config(config)
|
|
58
65
|
|
|
59
66
|
instrumented_count = 0
|
|
60
67
|
|
|
61
|
-
# Instrument transformers
|
|
68
|
+
# Instrument transformers components if available
|
|
62
69
|
if self._transformers_available:
|
|
70
|
+
# Instrument pipeline
|
|
63
71
|
try:
|
|
64
72
|
self._instrument_transformers()
|
|
65
73
|
instrumented_count += 1
|
|
@@ -68,13 +76,24 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
68
76
|
if config.fail_on_error:
|
|
69
77
|
raise
|
|
70
78
|
|
|
79
|
+
# Instrument model classes (AutoModelForCausalLM, etc.)
|
|
80
|
+
try:
|
|
81
|
+
self._instrument_model_classes()
|
|
82
|
+
instrumented_count += 1
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error("Failed to instrument HuggingFace model classes: %s", e, exc_info=True)
|
|
85
|
+
if config.fail_on_error:
|
|
86
|
+
raise
|
|
87
|
+
|
|
71
88
|
# Instrument InferenceClient if available
|
|
72
89
|
if self._inference_client_available:
|
|
73
90
|
try:
|
|
74
91
|
self._instrument_inference_client()
|
|
75
92
|
instrumented_count += 1
|
|
76
93
|
except Exception as e:
|
|
77
|
-
logger.error(
|
|
94
|
+
logger.error(
|
|
95
|
+
"Failed to instrument HuggingFace InferenceClient: %s", e, exc_info=True
|
|
96
|
+
)
|
|
78
97
|
if config.fail_on_error:
|
|
79
98
|
raise
|
|
80
99
|
|
|
@@ -162,6 +181,164 @@ class HuggingFaceInstrumentor(BaseInstrumentor):
|
|
|
162
181
|
InferenceClient.text_generation = wrapped_text_generation
|
|
163
182
|
logger.debug("HuggingFace InferenceClient instrumented")
|
|
164
183
|
|
|
184
|
+
def _instrument_model_classes(self):
|
|
185
|
+
"""Instrument HuggingFace model classes for direct model usage."""
|
|
186
|
+
try:
|
|
187
|
+
import wrapt
|
|
188
|
+
|
|
189
|
+
# Import GenerationMixin - the base class that provides generate() method
|
|
190
|
+
# All generative models (AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.) inherit from it
|
|
191
|
+
try:
|
|
192
|
+
from transformers.generation.utils import GenerationMixin
|
|
193
|
+
except ImportError:
|
|
194
|
+
# Fallback for older transformers versions
|
|
195
|
+
from transformers.generation import GenerationMixin
|
|
196
|
+
|
|
197
|
+
# Store reference to instrumentor for use in wrapper
|
|
198
|
+
instrumentor = self
|
|
199
|
+
|
|
200
|
+
# Wrap the generate() method at GenerationMixin level (all models inherit from this)
|
|
201
|
+
original_generate = GenerationMixin.generate
|
|
202
|
+
|
|
203
|
+
@wrapt.decorator
|
|
204
|
+
def generate_wrapper(wrapped, instance, args, kwargs):
|
|
205
|
+
"""Wrapper for model.generate() method."""
|
|
206
|
+
# Extract model info
|
|
207
|
+
model_name = getattr(instance, "name_or_path", "unknown")
|
|
208
|
+
if hasattr(instance.config, "_name_or_path"):
|
|
209
|
+
model_name = instance.config._name_or_path
|
|
210
|
+
|
|
211
|
+
# Get input token count
|
|
212
|
+
input_ids = kwargs.get("input_ids") or (args[0] if args else None)
|
|
213
|
+
prompt_tokens = 0
|
|
214
|
+
if input_ids is not None:
|
|
215
|
+
if hasattr(input_ids, "shape"):
|
|
216
|
+
prompt_tokens = int(input_ids.shape[-1])
|
|
217
|
+
elif isinstance(input_ids, (list, tuple)):
|
|
218
|
+
prompt_tokens = len(input_ids[0]) if input_ids else 0
|
|
219
|
+
|
|
220
|
+
# Create span
|
|
221
|
+
with instrumentor.tracer.start_as_current_span(
|
|
222
|
+
"huggingface.model.generate"
|
|
223
|
+
) as span:
|
|
224
|
+
# Set attributes
|
|
225
|
+
span.set_attribute("gen_ai.system", "huggingface")
|
|
226
|
+
span.set_attribute("gen_ai.request.model", model_name)
|
|
227
|
+
span.set_attribute("gen_ai.operation.name", "text_generation")
|
|
228
|
+
span.set_attribute("gen_ai.request.type", "chat")
|
|
229
|
+
|
|
230
|
+
# Extract generation parameters
|
|
231
|
+
if "max_length" in kwargs:
|
|
232
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_length"])
|
|
233
|
+
if "max_new_tokens" in kwargs:
|
|
234
|
+
span.set_attribute("gen_ai.request.max_tokens", kwargs["max_new_tokens"])
|
|
235
|
+
if "temperature" in kwargs:
|
|
236
|
+
span.set_attribute("gen_ai.request.temperature", kwargs["temperature"])
|
|
237
|
+
if "top_p" in kwargs:
|
|
238
|
+
span.set_attribute("gen_ai.request.top_p", kwargs["top_p"])
|
|
239
|
+
|
|
240
|
+
# Call original generate
|
|
241
|
+
import time
|
|
242
|
+
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
result = wrapped(*args, **kwargs)
|
|
245
|
+
duration = time.time() - start_time
|
|
246
|
+
|
|
247
|
+
# Extract output token count
|
|
248
|
+
completion_tokens = 0
|
|
249
|
+
if hasattr(result, "shape"):
|
|
250
|
+
# result is a tensor
|
|
251
|
+
total_length = int(result.shape[-1])
|
|
252
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
253
|
+
elif isinstance(result, (list, tuple)):
|
|
254
|
+
# result is a list of sequences
|
|
255
|
+
if result and hasattr(result[0], "shape"):
|
|
256
|
+
total_length = int(result[0].shape[-1])
|
|
257
|
+
completion_tokens = max(0, total_length - prompt_tokens)
|
|
258
|
+
|
|
259
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
260
|
+
|
|
261
|
+
# Set token usage attributes
|
|
262
|
+
if prompt_tokens > 0:
|
|
263
|
+
span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
|
|
264
|
+
if completion_tokens > 0:
|
|
265
|
+
span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
|
|
266
|
+
if total_tokens > 0:
|
|
267
|
+
span.set_attribute("gen_ai.usage.total_tokens", total_tokens)
|
|
268
|
+
|
|
269
|
+
# Record metrics
|
|
270
|
+
if instrumentor.request_counter:
|
|
271
|
+
instrumentor.request_counter.add(
|
|
272
|
+
1, {"model": model_name, "provider": "huggingface"}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
if instrumentor.token_counter and total_tokens > 0:
|
|
276
|
+
if prompt_tokens > 0:
|
|
277
|
+
instrumentor.token_counter.add(
|
|
278
|
+
prompt_tokens, {"token_type": "prompt", "operation": span.name}
|
|
279
|
+
)
|
|
280
|
+
if completion_tokens > 0:
|
|
281
|
+
instrumentor.token_counter.add(
|
|
282
|
+
completion_tokens,
|
|
283
|
+
{"token_type": "completion", "operation": span.name},
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if instrumentor.latency_histogram:
|
|
287
|
+
instrumentor.latency_histogram.record(duration, {"operation": span.name})
|
|
288
|
+
|
|
289
|
+
# Calculate and record cost if enabled
|
|
290
|
+
if (
|
|
291
|
+
instrumentor.config
|
|
292
|
+
and instrumentor.config.enable_cost_tracking
|
|
293
|
+
and total_tokens > 0
|
|
294
|
+
):
|
|
295
|
+
try:
|
|
296
|
+
usage = {
|
|
297
|
+
"prompt_tokens": prompt_tokens,
|
|
298
|
+
"completion_tokens": completion_tokens,
|
|
299
|
+
"total_tokens": total_tokens,
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
costs = instrumentor.cost_calculator.calculate_granular_cost(
|
|
303
|
+
model=model_name, usage=usage, call_type="chat"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
if costs["total"] > 0:
|
|
307
|
+
if instrumentor.cost_counter:
|
|
308
|
+
instrumentor.cost_counter.add(
|
|
309
|
+
costs["total"], {"model": model_name}
|
|
310
|
+
)
|
|
311
|
+
span.set_attribute("gen_ai.usage.cost.total", costs["total"])
|
|
312
|
+
if costs["prompt"] > 0:
|
|
313
|
+
span.set_attribute("gen_ai.usage.cost.prompt", costs["prompt"])
|
|
314
|
+
if costs["completion"] > 0:
|
|
315
|
+
span.set_attribute(
|
|
316
|
+
"gen_ai.usage.cost.completion", costs["completion"]
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
logger.debug(
|
|
320
|
+
f"HuggingFace model {model_name}: {total_tokens} tokens, "
|
|
321
|
+
f"cost: ${costs['total']:.6f}"
|
|
322
|
+
)
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(f"Failed to calculate cost: {e}")
|
|
325
|
+
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
# Apply wrapper to GenerationMixin.generate (all models inherit this)
|
|
329
|
+
GenerationMixin.generate = generate_wrapper(original_generate)
|
|
330
|
+
|
|
331
|
+
self._model_classes_instrumented = True
|
|
332
|
+
logger.debug(
|
|
333
|
+
"HuggingFace GenerationMixin.generate() instrumented "
|
|
334
|
+
"(covers all models: AutoModelForCausalLM, AutoModelForSeq2SeqLM, etc.)"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
except ImportError as e:
|
|
338
|
+
logger.debug(f"Could not import model classes for instrumentation: {e}")
|
|
339
|
+
except Exception as e:
|
|
340
|
+
raise # Re-raise to be caught by instrument() method
|
|
341
|
+
|
|
165
342
|
def _extract_inference_client_attributes(self, instance, args, kwargs) -> Dict[str, str]:
|
|
166
343
|
"""Extract attributes from Inference API call."""
|
|
167
344
|
attrs = {}
|
|
@@ -1,75 +1,75 @@
|
|
|
1
|
-
"""OpenTelemetry instrumentor for the LangChain framework.
|
|
2
|
-
|
|
3
|
-
This instrumentor automatically traces various components within LangChain,
|
|
4
|
-
including chains and agents, capturing relevant attributes for observability.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
from typing import Dict, Optional
|
|
9
|
-
|
|
10
|
-
from ..config import OTelConfig
|
|
11
|
-
from .base import BaseInstrumentor
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class LangChainInstrumentor(BaseInstrumentor):
|
|
17
|
-
"""Instrumentor for LangChain"""
|
|
18
|
-
|
|
19
|
-
def __init__(self):
|
|
20
|
-
"""Initialize the instrumentor."""
|
|
21
|
-
super().__init__()
|
|
22
|
-
self._langchain_available = False
|
|
23
|
-
self._check_availability()
|
|
24
|
-
|
|
25
|
-
def _check_availability(self):
|
|
26
|
-
"""Check if langchain library is available."""
|
|
27
|
-
try:
|
|
28
|
-
import langchain
|
|
29
|
-
|
|
30
|
-
self._langchain_available = True
|
|
31
|
-
logger.debug("langchain library detected and available for instrumentation")
|
|
32
|
-
except ImportError:
|
|
33
|
-
logger.debug("langchain library not installed, instrumentation will be skipped")
|
|
34
|
-
self._langchain_available = False
|
|
35
|
-
|
|
36
|
-
def instrument(self, config: OTelConfig):
|
|
37
|
-
"""Instrument langchain available if available."""
|
|
38
|
-
if not self._langchain_available:
|
|
39
|
-
logger.debug("Skipping instrumentation - library not available")
|
|
40
|
-
return
|
|
41
|
-
|
|
42
|
-
self.config = config
|
|
43
|
-
try:
|
|
44
|
-
from langchain.agents.agent import AgentExecutor
|
|
45
|
-
from langchain.chains.base import Chain
|
|
46
|
-
|
|
47
|
-
# Instrument Chains
|
|
48
|
-
original_call = Chain.__call__
|
|
49
|
-
|
|
50
|
-
def wrapped_call(instance, *args, **kwargs):
|
|
51
|
-
chain_type = instance.__class__.__name__
|
|
52
|
-
with self.tracer.start_as_current_span(f"langchain.chain.{chain_type}") as span:
|
|
53
|
-
span.set_attribute("langchain.chain.type", chain_type)
|
|
54
|
-
result = original_call(instance, *args, **kwargs)
|
|
55
|
-
return result
|
|
56
|
-
|
|
57
|
-
Chain.__call__ = wrapped_call
|
|
58
|
-
|
|
59
|
-
# Instrument Agents
|
|
60
|
-
original_agent_call = AgentExecutor.__call__
|
|
61
|
-
|
|
62
|
-
def wrapped_agent_call(instance, *args, **kwargs):
|
|
63
|
-
with self.tracer.start_as_current_span("langchain.agent.execute") as span:
|
|
64
|
-
agent_name = getattr(instance, "agent", {}).get("name", "unknown")
|
|
65
|
-
span.set_attribute("langchain.agent.name", agent_name)
|
|
66
|
-
result = original_agent_call(instance, *args, **kwargs)
|
|
67
|
-
return result
|
|
68
|
-
|
|
69
|
-
AgentExecutor.__call__ = wrapped_agent_call
|
|
70
|
-
|
|
71
|
-
except ImportError:
|
|
72
|
-
pass
|
|
73
|
-
|
|
74
|
-
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
75
|
-
return None
|
|
1
|
+
"""OpenTelemetry instrumentor for the LangChain framework.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces various components within LangChain,
|
|
4
|
+
including chains and agents, capturing relevant attributes for observability.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
|
|
10
|
+
from ..config import OTelConfig
|
|
11
|
+
from .base import BaseInstrumentor
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LangChainInstrumentor(BaseInstrumentor):
|
|
17
|
+
"""Instrumentor for LangChain"""
|
|
18
|
+
|
|
19
|
+
def __init__(self):
|
|
20
|
+
"""Initialize the instrumentor."""
|
|
21
|
+
super().__init__()
|
|
22
|
+
self._langchain_available = False
|
|
23
|
+
self._check_availability()
|
|
24
|
+
|
|
25
|
+
def _check_availability(self):
|
|
26
|
+
"""Check if langchain library is available."""
|
|
27
|
+
try:
|
|
28
|
+
import langchain
|
|
29
|
+
|
|
30
|
+
self._langchain_available = True
|
|
31
|
+
logger.debug("langchain library detected and available for instrumentation")
|
|
32
|
+
except ImportError:
|
|
33
|
+
logger.debug("langchain library not installed, instrumentation will be skipped")
|
|
34
|
+
self._langchain_available = False
|
|
35
|
+
|
|
36
|
+
def instrument(self, config: OTelConfig):
|
|
37
|
+
"""Instrument langchain available if available."""
|
|
38
|
+
if not self._langchain_available:
|
|
39
|
+
logger.debug("Skipping instrumentation - library not available")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
self.config = config
|
|
43
|
+
try:
|
|
44
|
+
from langchain.agents.agent import AgentExecutor
|
|
45
|
+
from langchain.chains.base import Chain
|
|
46
|
+
|
|
47
|
+
# Instrument Chains
|
|
48
|
+
original_call = Chain.__call__
|
|
49
|
+
|
|
50
|
+
def wrapped_call(instance, *args, **kwargs):
|
|
51
|
+
chain_type = instance.__class__.__name__
|
|
52
|
+
with self.tracer.start_as_current_span(f"langchain.chain.{chain_type}") as span:
|
|
53
|
+
span.set_attribute("langchain.chain.type", chain_type)
|
|
54
|
+
result = original_call(instance, *args, **kwargs)
|
|
55
|
+
return result
|
|
56
|
+
|
|
57
|
+
Chain.__call__ = wrapped_call
|
|
58
|
+
|
|
59
|
+
# Instrument Agents
|
|
60
|
+
original_agent_call = AgentExecutor.__call__
|
|
61
|
+
|
|
62
|
+
def wrapped_agent_call(instance, *args, **kwargs):
|
|
63
|
+
with self.tracer.start_as_current_span("langchain.agent.execute") as span:
|
|
64
|
+
agent_name = getattr(instance, "agent", {}).get("name", "unknown")
|
|
65
|
+
span.set_attribute("langchain.agent.name", agent_name)
|
|
66
|
+
result = original_agent_call(instance, *args, **kwargs)
|
|
67
|
+
return result
|
|
68
|
+
|
|
69
|
+
AgentExecutor.__call__ = wrapped_agent_call
|
|
70
|
+
|
|
71
|
+
except ImportError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
75
|
+
return None
|
|
@@ -32,9 +32,8 @@ class MistralAIInstrumentor(BaseInstrumentor):
|
|
|
32
32
|
# In Mistral SDK v1.0+, structure is:
|
|
33
33
|
# - Mistral client has .chat and .embeddings properties
|
|
34
34
|
# - These are bound methods that call internal APIs
|
|
35
|
-
|
|
36
35
|
# Store original methods at module level before any instances are created
|
|
37
|
-
if not hasattr(Mistral,
|
|
36
|
+
if not hasattr(Mistral, "_genai_otel_instrumented"):
|
|
38
37
|
self._wrap_mistral_methods(Mistral, wrapt)
|
|
39
38
|
Mistral._genai_otel_instrumented = True
|
|
40
39
|
logger.info("MistralAI instrumentation enabled (v1.0+ SDK)")
|
|
@@ -54,29 +53,21 @@ class MistralAIInstrumentor(BaseInstrumentor):
|
|
|
54
53
|
from mistralai.embeddings import Embeddings
|
|
55
54
|
|
|
56
55
|
# Wrap Chat.complete method
|
|
57
|
-
if hasattr(Chat,
|
|
56
|
+
if hasattr(Chat, "complete"):
|
|
58
57
|
wrapt.wrap_function_wrapper(
|
|
59
|
-
|
|
60
|
-
'Chat.complete',
|
|
61
|
-
self._wrap_chat_complete
|
|
58
|
+
"mistralai.chat", "Chat.complete", self._wrap_chat_complete
|
|
62
59
|
)
|
|
63
60
|
logger.debug("Wrapped Mistral Chat.complete")
|
|
64
61
|
|
|
65
62
|
# Wrap Chat.stream method
|
|
66
|
-
if hasattr(Chat,
|
|
67
|
-
wrapt.wrap_function_wrapper(
|
|
68
|
-
'mistralai.chat',
|
|
69
|
-
'Chat.stream',
|
|
70
|
-
self._wrap_chat_stream
|
|
71
|
-
)
|
|
63
|
+
if hasattr(Chat, "stream"):
|
|
64
|
+
wrapt.wrap_function_wrapper("mistralai.chat", "Chat.stream", self._wrap_chat_stream)
|
|
72
65
|
logger.debug("Wrapped Mistral Chat.stream")
|
|
73
66
|
|
|
74
67
|
# Wrap Embeddings.create method
|
|
75
|
-
if hasattr(Embeddings,
|
|
68
|
+
if hasattr(Embeddings, "create"):
|
|
76
69
|
wrapt.wrap_function_wrapper(
|
|
77
|
-
|
|
78
|
-
'Embeddings.create',
|
|
79
|
-
self._wrap_embeddings_create
|
|
70
|
+
"mistralai.embeddings", "Embeddings.create", self._wrap_embeddings_create
|
|
80
71
|
)
|
|
81
72
|
logger.debug("Wrapped Mistral Embeddings.create")
|
|
82
73
|
|
|
@@ -140,15 +131,11 @@ class MistralAIInstrumentor(BaseInstrumentor):
|
|
|
140
131
|
stream = wrapped(*args, **kwargs)
|
|
141
132
|
|
|
142
133
|
# Wrap the stream with our tracking wrapper
|
|
143
|
-
return self._StreamWrapper(
|
|
144
|
-
stream, span, self, model, start_time, span_name
|
|
145
|
-
)
|
|
134
|
+
return self._StreamWrapper(stream, span, self, model, start_time, span_name)
|
|
146
135
|
|
|
147
136
|
except Exception as e:
|
|
148
137
|
if self.error_counter:
|
|
149
|
-
self.error_counter.add(
|
|
150
|
-
1, {"operation": span_name, "error.type": type(e).__name__}
|
|
151
|
-
)
|
|
138
|
+
self.error_counter.add(1, {"operation": span_name, "error.type": type(e).__name__})
|
|
152
139
|
span.record_exception(e)
|
|
153
140
|
span.end()
|
|
154
141
|
raise
|
|
@@ -240,10 +227,7 @@ class MistralAIInstrumentor(BaseInstrumentor):
|
|
|
240
227
|
|
|
241
228
|
mock_response = MockResponse(self._usage)
|
|
242
229
|
self._instrumentor._record_result_metrics(
|
|
243
|
-
self._span,
|
|
244
|
-
mock_response,
|
|
245
|
-
self._start_time,
|
|
246
|
-
{"model": self._model}
|
|
230
|
+
self._span, mock_response, self._start_time, {"model": self._model}
|
|
247
231
|
)
|
|
248
232
|
|
|
249
233
|
finally:
|
|
@@ -255,21 +239,21 @@ class MistralAIInstrumentor(BaseInstrumentor):
|
|
|
255
239
|
"""Process a streaming chunk to extract usage."""
|
|
256
240
|
try:
|
|
257
241
|
# Mistral streaming chunks have: data.choices[0].delta.content
|
|
258
|
-
if hasattr(chunk,
|
|
242
|
+
if hasattr(chunk, "data"):
|
|
259
243
|
data = chunk.data
|
|
260
|
-
if hasattr(data,
|
|
244
|
+
if hasattr(data, "choices") and len(data.choices) > 0:
|
|
261
245
|
delta = data.choices[0].delta
|
|
262
|
-
if hasattr(delta,
|
|
246
|
+
if hasattr(delta, "content") and delta.content:
|
|
263
247
|
self._response_text += delta.content
|
|
264
248
|
|
|
265
249
|
# Extract usage if available on final chunk
|
|
266
|
-
if hasattr(data,
|
|
250
|
+
if hasattr(data, "usage") and data.usage:
|
|
267
251
|
usage = data.usage
|
|
268
|
-
if hasattr(usage,
|
|
252
|
+
if hasattr(usage, "prompt_tokens"):
|
|
269
253
|
self._usage["prompt_tokens"] = usage.prompt_tokens
|
|
270
|
-
if hasattr(usage,
|
|
254
|
+
if hasattr(usage, "completion_tokens"):
|
|
271
255
|
self._usage["completion_tokens"] = usage.completion_tokens
|
|
272
|
-
if hasattr(usage,
|
|
256
|
+
if hasattr(usage, "total_tokens"):
|
|
273
257
|
self._usage["total_tokens"] = usage.total_tokens
|
|
274
258
|
|
|
275
259
|
except Exception as e:
|