genai-otel-instrument 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genai_otel/__init__.py +132 -0
- genai_otel/__version__.py +34 -0
- genai_otel/auto_instrument.py +602 -0
- genai_otel/cli.py +92 -0
- genai_otel/config.py +333 -0
- genai_otel/cost_calculator.py +467 -0
- genai_otel/cost_enriching_exporter.py +207 -0
- genai_otel/cost_enrichment_processor.py +174 -0
- genai_otel/evaluation/__init__.py +76 -0
- genai_otel/evaluation/bias_detector.py +364 -0
- genai_otel/evaluation/config.py +261 -0
- genai_otel/evaluation/hallucination_detector.py +525 -0
- genai_otel/evaluation/pii_detector.py +356 -0
- genai_otel/evaluation/prompt_injection_detector.py +262 -0
- genai_otel/evaluation/restricted_topics_detector.py +316 -0
- genai_otel/evaluation/span_processor.py +962 -0
- genai_otel/evaluation/toxicity_detector.py +406 -0
- genai_otel/exceptions.py +17 -0
- genai_otel/gpu_metrics.py +516 -0
- genai_otel/instrumentors/__init__.py +71 -0
- genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
- genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
- genai_otel/instrumentors/autogen_instrumentor.py +394 -0
- genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
- genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
- genai_otel/instrumentors/base.py +919 -0
- genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
- genai_otel/instrumentors/cohere_instrumentor.py +140 -0
- genai_otel/instrumentors/crewai_instrumentor.py +311 -0
- genai_otel/instrumentors/dspy_instrumentor.py +661 -0
- genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
- genai_otel/instrumentors/groq_instrumentor.py +106 -0
- genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
- genai_otel/instrumentors/haystack_instrumentor.py +503 -0
- genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
- genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
- genai_otel/instrumentors/instructor_instrumentor.py +425 -0
- genai_otel/instrumentors/langchain_instrumentor.py +340 -0
- genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
- genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
- genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
- genai_otel/instrumentors/ollama_instrumentor.py +197 -0
- genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
- genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
- genai_otel/instrumentors/openai_instrumentor.py +260 -0
- genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
- genai_otel/instrumentors/replicate_instrumentor.py +87 -0
- genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
- genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
- genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
- genai_otel/llm_pricing.json +1676 -0
- genai_otel/logging_config.py +45 -0
- genai_otel/mcp_instrumentors/__init__.py +14 -0
- genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
- genai_otel/mcp_instrumentors/base.py +105 -0
- genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
- genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/manager.py +139 -0
- genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
- genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
- genai_otel/metrics.py +148 -0
- genai_otel/py.typed +2 -0
- genai_otel/server_metrics.py +197 -0
- genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
- genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
- genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
- genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
- genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
- genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for the LlamaIndex framework.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces query engine operations within LlamaIndex,
|
|
4
|
+
capturing relevant attributes such as the query text.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Optional
|
|
8
|
+
|
|
9
|
+
from ..config import OTelConfig
|
|
10
|
+
from .base import BaseInstrumentor
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LlamaIndexInstrumentor(BaseInstrumentor):
|
|
14
|
+
"""Instrumentor for LlamaIndex"""
|
|
15
|
+
|
|
16
|
+
def instrument(self, config: OTelConfig):
|
|
17
|
+
self.config = config
|
|
18
|
+
try:
|
|
19
|
+
from llama_index.core.query_engine import BaseQueryEngine
|
|
20
|
+
|
|
21
|
+
original_query = BaseQueryEngine.query
|
|
22
|
+
|
|
23
|
+
def wrapped_query(instance, *args, **kwargs):
|
|
24
|
+
with self.tracer.start_as_current_span("llamaindex.query_engine") as span:
|
|
25
|
+
query_text = args[0] if args else kwargs.get("query_str", "")
|
|
26
|
+
span.set_attribute("llamaindex.query", str(query_text)[:200])
|
|
27
|
+
result = original_query(instance, *args, **kwargs)
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
BaseQueryEngine.query = wrapped_query
|
|
31
|
+
|
|
32
|
+
except ImportError:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
36
|
+
return None
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for the Mistral AI SDK (v1.0+).
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces chat calls to Mistral AI models,
|
|
4
|
+
capturing relevant attributes such as the model name and token usage.
|
|
5
|
+
|
|
6
|
+
Supports Mistral SDK v1.0+ with the new API structure:
|
|
7
|
+
- Mistral.chat.complete()
|
|
8
|
+
- Mistral.chat.stream()
|
|
9
|
+
- Mistral.embeddings.create()
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import time
|
|
14
|
+
from typing import Any, Dict, Optional
|
|
15
|
+
|
|
16
|
+
from ..config import OTelConfig
|
|
17
|
+
from .base import BaseInstrumentor
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MistralAIInstrumentor(BaseInstrumentor):
|
|
23
|
+
"""Instrumentor for Mistral AI SDK v1.0+"""
|
|
24
|
+
|
|
25
|
+
def instrument(self, config: OTelConfig):
|
|
26
|
+
self.config = config
|
|
27
|
+
try:
|
|
28
|
+
import wrapt
|
|
29
|
+
from mistralai import Mistral
|
|
30
|
+
|
|
31
|
+
# Get access to the chat and embeddings modules
|
|
32
|
+
# In Mistral SDK v1.0+, structure is:
|
|
33
|
+
# - Mistral client has .chat and .embeddings properties
|
|
34
|
+
# - These are bound methods that call internal APIs
|
|
35
|
+
# Store original methods at module level before any instances are created
|
|
36
|
+
if not hasattr(Mistral, "_genai_otel_instrumented"):
|
|
37
|
+
self._wrap_mistral_methods(Mistral, wrapt)
|
|
38
|
+
Mistral._genai_otel_instrumented = True
|
|
39
|
+
logger.info("MistralAI instrumentation enabled (v1.0+ SDK)")
|
|
40
|
+
|
|
41
|
+
except ImportError:
|
|
42
|
+
logger.warning("mistralai package not available, skipping instrumentation")
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.error(f"Failed to instrument mistralai: {e}", exc_info=True)
|
|
45
|
+
if config.fail_on_error:
|
|
46
|
+
raise
|
|
47
|
+
|
|
48
|
+
def _wrap_mistral_methods(self, Mistral, wrapt):
|
|
49
|
+
"""Wrap Mistral client methods at the class level."""
|
|
50
|
+
# Import the internal classes that handle chat and embeddings
|
|
51
|
+
try:
|
|
52
|
+
from mistralai.chat import Chat
|
|
53
|
+
from mistralai.embeddings import Embeddings
|
|
54
|
+
|
|
55
|
+
# Wrap Chat.complete method
|
|
56
|
+
if hasattr(Chat, "complete"):
|
|
57
|
+
wrapt.wrap_function_wrapper(
|
|
58
|
+
"mistralai.chat", "Chat.complete", self._wrap_chat_complete
|
|
59
|
+
)
|
|
60
|
+
logger.debug("Wrapped Mistral Chat.complete")
|
|
61
|
+
|
|
62
|
+
# Wrap Chat.stream method
|
|
63
|
+
if hasattr(Chat, "stream"):
|
|
64
|
+
wrapt.wrap_function_wrapper("mistralai.chat", "Chat.stream", self._wrap_chat_stream)
|
|
65
|
+
logger.debug("Wrapped Mistral Chat.stream")
|
|
66
|
+
|
|
67
|
+
# Wrap Embeddings.create method
|
|
68
|
+
if hasattr(Embeddings, "create"):
|
|
69
|
+
wrapt.wrap_function_wrapper(
|
|
70
|
+
"mistralai.embeddings", "Embeddings.create", self._wrap_embeddings_create
|
|
71
|
+
)
|
|
72
|
+
logger.debug("Wrapped Mistral Embeddings.create")
|
|
73
|
+
|
|
74
|
+
except (ImportError, AttributeError) as e:
|
|
75
|
+
logger.warning(f"Could not access Mistral internal classes: {e}")
|
|
76
|
+
|
|
77
|
+
def _wrap_chat_complete(self, wrapped, instance, args, kwargs):
|
|
78
|
+
"""Wrapper for chat.complete() method."""
|
|
79
|
+
model = kwargs.get("model", "mistral-small-latest")
|
|
80
|
+
span_name = f"mistralai.chat.complete {model}"
|
|
81
|
+
|
|
82
|
+
with self.tracer.start_span(span_name) as span:
|
|
83
|
+
# Set attributes
|
|
84
|
+
attributes = self._extract_chat_attributes(instance, args, kwargs)
|
|
85
|
+
for key, value in attributes.items():
|
|
86
|
+
span.set_attribute(key, value)
|
|
87
|
+
|
|
88
|
+
# Record request metric
|
|
89
|
+
if self.request_counter:
|
|
90
|
+
self.request_counter.add(1, {"model": model, "provider": "mistralai"})
|
|
91
|
+
|
|
92
|
+
# Execute the call
|
|
93
|
+
start_time = time.time()
|
|
94
|
+
try:
|
|
95
|
+
response = wrapped(*args, **kwargs)
|
|
96
|
+
|
|
97
|
+
# Record metrics from response
|
|
98
|
+
self._record_result_metrics(span, response, start_time, kwargs)
|
|
99
|
+
|
|
100
|
+
return response
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
if self.error_counter:
|
|
104
|
+
self.error_counter.add(
|
|
105
|
+
1, {"operation": span_name, "error.type": type(e).__name__}
|
|
106
|
+
)
|
|
107
|
+
span.record_exception(e)
|
|
108
|
+
raise
|
|
109
|
+
|
|
110
|
+
def _wrap_chat_stream(self, wrapped, instance, args, kwargs):
|
|
111
|
+
"""Wrapper for chat.stream() method - handles streaming responses."""
|
|
112
|
+
model = kwargs.get("model", "mistral-small-latest")
|
|
113
|
+
span_name = f"mistralai.chat.stream {model}"
|
|
114
|
+
|
|
115
|
+
# Start the span
|
|
116
|
+
span = self.tracer.start_span(span_name)
|
|
117
|
+
|
|
118
|
+
# Set attributes
|
|
119
|
+
attributes = self._extract_chat_attributes(instance, args, kwargs)
|
|
120
|
+
for key, value in attributes.items():
|
|
121
|
+
span.set_attribute(key, value)
|
|
122
|
+
|
|
123
|
+
# Record request metric
|
|
124
|
+
if self.request_counter:
|
|
125
|
+
self.request_counter.add(1, {"model": model, "provider": "mistralai"})
|
|
126
|
+
|
|
127
|
+
start_time = time.time()
|
|
128
|
+
|
|
129
|
+
# Execute and get the stream
|
|
130
|
+
try:
|
|
131
|
+
stream = wrapped(*args, **kwargs)
|
|
132
|
+
|
|
133
|
+
# Wrap the stream with our tracking wrapper
|
|
134
|
+
return self._StreamWrapper(stream, span, self, model, start_time, span_name)
|
|
135
|
+
|
|
136
|
+
except Exception as e:
|
|
137
|
+
if self.error_counter:
|
|
138
|
+
self.error_counter.add(1, {"operation": span_name, "error.type": type(e).__name__})
|
|
139
|
+
span.record_exception(e)
|
|
140
|
+
span.end()
|
|
141
|
+
raise
|
|
142
|
+
|
|
143
|
+
def _wrap_embeddings_create(self, wrapped, instance, args, kwargs):
|
|
144
|
+
"""Wrapper for embeddings.create() method."""
|
|
145
|
+
model = kwargs.get("model", "mistral-embed")
|
|
146
|
+
span_name = f"mistralai.embeddings.create {model}"
|
|
147
|
+
|
|
148
|
+
with self.tracer.start_span(span_name) as span:
|
|
149
|
+
# Set attributes
|
|
150
|
+
attributes = self._extract_embeddings_attributes(instance, args, kwargs)
|
|
151
|
+
for key, value in attributes.items():
|
|
152
|
+
span.set_attribute(key, value)
|
|
153
|
+
|
|
154
|
+
# Record request metric
|
|
155
|
+
if self.request_counter:
|
|
156
|
+
self.request_counter.add(1, {"model": model, "provider": "mistralai"})
|
|
157
|
+
|
|
158
|
+
# Execute the call
|
|
159
|
+
start_time = time.time()
|
|
160
|
+
try:
|
|
161
|
+
response = wrapped(*args, **kwargs)
|
|
162
|
+
|
|
163
|
+
# Record metrics from response
|
|
164
|
+
self._record_result_metrics(span, response, start_time, kwargs)
|
|
165
|
+
|
|
166
|
+
return response
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
if self.error_counter:
|
|
170
|
+
self.error_counter.add(
|
|
171
|
+
1, {"operation": span_name, "error.type": type(e).__name__}
|
|
172
|
+
)
|
|
173
|
+
span.record_exception(e)
|
|
174
|
+
raise
|
|
175
|
+
|
|
176
|
+
class _StreamWrapper:
|
|
177
|
+
"""Wrapper for streaming responses that collects metrics."""
|
|
178
|
+
|
|
179
|
+
def __init__(self, stream, span, instrumentor, model, start_time, span_name):
|
|
180
|
+
self._stream = stream
|
|
181
|
+
self._span = span
|
|
182
|
+
self._instrumentor = instrumentor
|
|
183
|
+
self._model = model
|
|
184
|
+
self._start_time = start_time
|
|
185
|
+
self._span_name = span_name
|
|
186
|
+
self._usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
|
187
|
+
self._response_text = ""
|
|
188
|
+
self._first_chunk = True
|
|
189
|
+
self._ttft = None
|
|
190
|
+
|
|
191
|
+
def __iter__(self):
|
|
192
|
+
return self
|
|
193
|
+
|
|
194
|
+
def __next__(self):
|
|
195
|
+
try:
|
|
196
|
+
chunk = next(self._stream)
|
|
197
|
+
|
|
198
|
+
# Record time to first token
|
|
199
|
+
if self._first_chunk:
|
|
200
|
+
self._ttft = time.time() - self._start_time
|
|
201
|
+
self._first_chunk = False
|
|
202
|
+
|
|
203
|
+
# Process chunk to extract usage and content
|
|
204
|
+
self._process_chunk(chunk)
|
|
205
|
+
|
|
206
|
+
return chunk
|
|
207
|
+
|
|
208
|
+
except StopIteration:
|
|
209
|
+
# Stream completed - record final metrics
|
|
210
|
+
try:
|
|
211
|
+
# Set TTFT if we got any chunks
|
|
212
|
+
if self._ttft is not None:
|
|
213
|
+
self._span.set_attribute("gen_ai.server.ttft", self._ttft)
|
|
214
|
+
|
|
215
|
+
# Record usage metrics if available
|
|
216
|
+
if self._usage["total_tokens"] > 0:
|
|
217
|
+
# Create a mock response object with usage for _record_result_metrics
|
|
218
|
+
class MockUsage:
|
|
219
|
+
def __init__(self, usage_dict):
|
|
220
|
+
self.prompt_tokens = usage_dict["prompt_tokens"]
|
|
221
|
+
self.completion_tokens = usage_dict["completion_tokens"]
|
|
222
|
+
self.total_tokens = usage_dict["total_tokens"]
|
|
223
|
+
|
|
224
|
+
class MockResponse:
|
|
225
|
+
def __init__(self, usage_dict):
|
|
226
|
+
self.usage = MockUsage(usage_dict)
|
|
227
|
+
|
|
228
|
+
mock_response = MockResponse(self._usage)
|
|
229
|
+
self._instrumentor._record_result_metrics(
|
|
230
|
+
self._span, mock_response, self._start_time, {"model": self._model}
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
finally:
|
|
234
|
+
self._span.end()
|
|
235
|
+
|
|
236
|
+
raise
|
|
237
|
+
|
|
238
|
+
def _process_chunk(self, chunk):
|
|
239
|
+
"""Process a streaming chunk to extract usage."""
|
|
240
|
+
try:
|
|
241
|
+
# Mistral streaming chunks have: data.choices[0].delta.content
|
|
242
|
+
if hasattr(chunk, "data"):
|
|
243
|
+
data = chunk.data
|
|
244
|
+
if hasattr(data, "choices") and len(data.choices) > 0:
|
|
245
|
+
delta = data.choices[0].delta
|
|
246
|
+
if hasattr(delta, "content") and delta.content:
|
|
247
|
+
self._response_text += delta.content
|
|
248
|
+
|
|
249
|
+
# Extract usage if available on final chunk
|
|
250
|
+
if hasattr(data, "usage") and data.usage:
|
|
251
|
+
usage = data.usage
|
|
252
|
+
if hasattr(usage, "prompt_tokens"):
|
|
253
|
+
self._usage["prompt_tokens"] = usage.prompt_tokens
|
|
254
|
+
if hasattr(usage, "completion_tokens"):
|
|
255
|
+
self._usage["completion_tokens"] = usage.completion_tokens
|
|
256
|
+
if hasattr(usage, "total_tokens"):
|
|
257
|
+
self._usage["total_tokens"] = usage.total_tokens
|
|
258
|
+
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.debug(f"Error processing Mistral stream chunk: {e}")
|
|
261
|
+
|
|
262
|
+
def __enter__(self):
|
|
263
|
+
return self
|
|
264
|
+
|
|
265
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
266
|
+
if exc_type is not None:
|
|
267
|
+
self._span.record_exception(exc_val)
|
|
268
|
+
self._span.end()
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
def _extract_chat_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
272
|
+
"""Extract attributes from chat.complete() or chat.stream() call."""
|
|
273
|
+
model = kwargs.get("model", "unknown")
|
|
274
|
+
attributes = {
|
|
275
|
+
"gen_ai.system": "mistralai",
|
|
276
|
+
"gen_ai.request.model": model,
|
|
277
|
+
"gen_ai.request.type": "chat",
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
# Add optional parameters
|
|
281
|
+
if "temperature" in kwargs and kwargs["temperature"] is not None:
|
|
282
|
+
attributes["gen_ai.request.temperature"] = kwargs["temperature"]
|
|
283
|
+
if "top_p" in kwargs and kwargs["top_p"] is not None:
|
|
284
|
+
attributes["gen_ai.request.top_p"] = kwargs["top_p"]
|
|
285
|
+
if "max_tokens" in kwargs and kwargs["max_tokens"] is not None:
|
|
286
|
+
attributes["gen_ai.request.max_tokens"] = kwargs["max_tokens"]
|
|
287
|
+
|
|
288
|
+
return attributes
|
|
289
|
+
|
|
290
|
+
def _extract_embeddings_attributes(
|
|
291
|
+
self, instance: Any, args: Any, kwargs: Any
|
|
292
|
+
) -> Dict[str, Any]:
|
|
293
|
+
"""Extract attributes from embeddings.create() call."""
|
|
294
|
+
model = kwargs.get("model", "mistral-embed")
|
|
295
|
+
attributes = {
|
|
296
|
+
"gen_ai.system": "mistralai",
|
|
297
|
+
"gen_ai.request.model": model,
|
|
298
|
+
"gen_ai.request.type": "embedding",
|
|
299
|
+
}
|
|
300
|
+
return attributes
|
|
301
|
+
|
|
302
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
303
|
+
"""Extract usage information from Mistral AI response"""
|
|
304
|
+
try:
|
|
305
|
+
if hasattr(result, "usage"):
|
|
306
|
+
usage = result.usage
|
|
307
|
+
return {
|
|
308
|
+
"prompt_tokens": getattr(usage, "prompt_tokens", 0),
|
|
309
|
+
"completion_tokens": getattr(usage, "completion_tokens", 0),
|
|
310
|
+
"total_tokens": getattr(usage, "total_tokens", 0),
|
|
311
|
+
}
|
|
312
|
+
except Exception as e:
|
|
313
|
+
logger.debug(f"Could not extract usage from MistralAI response: {e}")
|
|
314
|
+
|
|
315
|
+
return None
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""OpenTelemetry instrumentor for the Ollama library.
|
|
2
|
+
|
|
3
|
+
This instrumentor automatically traces calls to Ollama models for both
|
|
4
|
+
generation and chat functionalities, capturing relevant attributes such as
|
|
5
|
+
the model name and token usage.
|
|
6
|
+
|
|
7
|
+
Optionally enables server metrics polling via /api/ps endpoint to track
|
|
8
|
+
VRAM usage and running models.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
from typing import Any, Dict, Optional
|
|
15
|
+
|
|
16
|
+
from ..config import OTelConfig
|
|
17
|
+
from .base import BaseInstrumentor
|
|
18
|
+
from .ollama_server_metrics_poller import start_ollama_metrics_poller
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OllamaInstrumentor(BaseInstrumentor):
|
|
24
|
+
"""Instrumentor for Ollama"""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
"""Initialize the instrumentor."""
|
|
28
|
+
super().__init__()
|
|
29
|
+
self._ollama_available = False
|
|
30
|
+
self._ollama_module = None
|
|
31
|
+
self._original_generate = None
|
|
32
|
+
self._original_chat = None
|
|
33
|
+
self._check_availability()
|
|
34
|
+
|
|
35
|
+
def _check_availability(self):
|
|
36
|
+
"""Check if Ollama library is available."""
|
|
37
|
+
try:
|
|
38
|
+
import ollama
|
|
39
|
+
|
|
40
|
+
self._ollama_available = True
|
|
41
|
+
self._ollama_module = ollama
|
|
42
|
+
logger.debug("Ollama library detected and available for instrumentation")
|
|
43
|
+
except ImportError:
|
|
44
|
+
logger.debug("Ollama library not installed, instrumentation will be skipped")
|
|
45
|
+
self._ollama_available = False
|
|
46
|
+
self._ollama_module = None
|
|
47
|
+
|
|
48
|
+
def instrument(self, config: OTelConfig):
|
|
49
|
+
"""Instrument the Ollama library."""
|
|
50
|
+
self.config = config
|
|
51
|
+
|
|
52
|
+
if not self._ollama_available or self._ollama_module is None:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
# Store original methods and wrap them
|
|
57
|
+
self._original_generate = self._ollama_module.generate
|
|
58
|
+
self._original_chat = self._ollama_module.chat
|
|
59
|
+
|
|
60
|
+
# Wrap generate method
|
|
61
|
+
wrapped_generate = self.create_span_wrapper(
|
|
62
|
+
span_name="ollama.generate",
|
|
63
|
+
extract_attributes=self._extract_generate_attributes,
|
|
64
|
+
)(self._original_generate)
|
|
65
|
+
self._ollama_module.generate = wrapped_generate
|
|
66
|
+
|
|
67
|
+
# Wrap chat method
|
|
68
|
+
wrapped_chat = self.create_span_wrapper(
|
|
69
|
+
span_name="ollama.chat",
|
|
70
|
+
extract_attributes=self._extract_chat_attributes,
|
|
71
|
+
)(self._original_chat)
|
|
72
|
+
self._ollama_module.chat = wrapped_chat
|
|
73
|
+
|
|
74
|
+
self._instrumented = True
|
|
75
|
+
logger.info("Ollama instrumentation enabled")
|
|
76
|
+
|
|
77
|
+
# Start server metrics poller if enabled
|
|
78
|
+
# Note: Server metrics poller requires Python 3.11+ due to implementation dependencies
|
|
79
|
+
python_version = sys.version_info
|
|
80
|
+
if python_version < (3, 11):
|
|
81
|
+
logger.debug(
|
|
82
|
+
"Ollama server metrics poller requires Python 3.11+, skipping "
|
|
83
|
+
f"(current: {python_version.major}.{python_version.minor})"
|
|
84
|
+
)
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
enable_server_metrics = (
|
|
88
|
+
os.getenv("GENAI_ENABLE_OLLAMA_SERVER_METRICS", "true").lower() == "true"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if enable_server_metrics:
|
|
92
|
+
try:
|
|
93
|
+
# Get configuration from environment variables
|
|
94
|
+
ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
|
|
95
|
+
ollama_metrics_interval = float(
|
|
96
|
+
os.getenv("GENAI_OLLAMA_METRICS_INTERVAL", "5.0")
|
|
97
|
+
)
|
|
98
|
+
ollama_max_vram_gb = os.getenv("GENAI_OLLAMA_MAX_VRAM_GB")
|
|
99
|
+
max_vram = float(ollama_max_vram_gb) if ollama_max_vram_gb else None
|
|
100
|
+
|
|
101
|
+
# Start the poller
|
|
102
|
+
start_ollama_metrics_poller(
|
|
103
|
+
base_url=ollama_base_url,
|
|
104
|
+
interval=ollama_metrics_interval,
|
|
105
|
+
max_vram_gb=max_vram,
|
|
106
|
+
)
|
|
107
|
+
logger.info(
|
|
108
|
+
f"Ollama server metrics poller started (url={ollama_base_url}, "
|
|
109
|
+
f"interval={ollama_metrics_interval}s)"
|
|
110
|
+
)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.warning(f"Failed to start Ollama server metrics poller: {e}")
|
|
113
|
+
if config.fail_on_error:
|
|
114
|
+
raise
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.error("Failed to instrument Ollama: %s", e, exc_info=True)
|
|
118
|
+
if config.fail_on_error:
|
|
119
|
+
raise
|
|
120
|
+
|
|
121
|
+
def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
122
|
+
"""Extract attributes from Ollama generate call.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
instance: The client instance (None for module-level functions).
|
|
126
|
+
args: Positional arguments.
|
|
127
|
+
kwargs: Keyword arguments.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
131
|
+
"""
|
|
132
|
+
attrs = {}
|
|
133
|
+
model = kwargs.get("model", "unknown")
|
|
134
|
+
|
|
135
|
+
attrs["gen_ai.system"] = "ollama"
|
|
136
|
+
attrs["gen_ai.request.model"] = model
|
|
137
|
+
attrs["gen_ai.operation.name"] = "generate"
|
|
138
|
+
|
|
139
|
+
return attrs
|
|
140
|
+
|
|
141
|
+
def _extract_chat_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
142
|
+
"""Extract attributes from Ollama chat call.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
instance: The client instance (None for module-level functions).
|
|
146
|
+
args: Positional arguments.
|
|
147
|
+
kwargs: Keyword arguments.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
151
|
+
"""
|
|
152
|
+
attrs = {}
|
|
153
|
+
model = kwargs.get("model", "unknown")
|
|
154
|
+
messages = kwargs.get("messages", [])
|
|
155
|
+
|
|
156
|
+
attrs["gen_ai.system"] = "ollama"
|
|
157
|
+
attrs["gen_ai.request.model"] = model
|
|
158
|
+
attrs["gen_ai.operation.name"] = "chat"
|
|
159
|
+
attrs["gen_ai.request.message_count"] = len(messages)
|
|
160
|
+
|
|
161
|
+
return attrs
|
|
162
|
+
|
|
163
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
164
|
+
"""Extract token usage from Ollama response.
|
|
165
|
+
|
|
166
|
+
Ollama responses include:
|
|
167
|
+
- prompt_eval_count: Input tokens
|
|
168
|
+
- eval_count: Output tokens
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
result: The API response object or dictionary.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
175
|
+
"""
|
|
176
|
+
try:
|
|
177
|
+
# Handle both dict and object responses
|
|
178
|
+
if isinstance(result, dict):
|
|
179
|
+
prompt_tokens = result.get("prompt_eval_count", 0)
|
|
180
|
+
completion_tokens = result.get("eval_count", 0)
|
|
181
|
+
elif hasattr(result, "prompt_eval_count") and hasattr(result, "eval_count"):
|
|
182
|
+
prompt_tokens = getattr(result, "prompt_eval_count", 0)
|
|
183
|
+
completion_tokens = getattr(result, "eval_count", 0)
|
|
184
|
+
else:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
"prompt_tokens": prompt_tokens,
|
|
192
|
+
"completion_tokens": completion_tokens,
|
|
193
|
+
"total_tokens": prompt_tokens + completion_tokens,
|
|
194
|
+
}
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.debug("Failed to extract usage from Ollama response: %s", e)
|
|
197
|
+
return None
|