openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +78 -0
- openlit/__init__.py +41 -13
- openlit/instrumentation/ag2/__init__.py +9 -10
- openlit/instrumentation/ag2/ag2.py +134 -69
- openlit/instrumentation/ai21/__init__.py +6 -5
- openlit/instrumentation/ai21/ai21.py +71 -534
- openlit/instrumentation/ai21/async_ai21.py +71 -534
- openlit/instrumentation/ai21/utils.py +407 -0
- openlit/instrumentation/anthropic/__init__.py +3 -3
- openlit/instrumentation/anthropic/anthropic.py +5 -5
- openlit/instrumentation/anthropic/async_anthropic.py +5 -5
- openlit/instrumentation/assemblyai/__init__.py +2 -2
- openlit/instrumentation/assemblyai/assemblyai.py +3 -3
- openlit/instrumentation/astra/__init__.py +25 -25
- openlit/instrumentation/astra/astra.py +7 -7
- openlit/instrumentation/astra/async_astra.py +7 -7
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
- openlit/instrumentation/bedrock/__init__.py +2 -2
- openlit/instrumentation/bedrock/bedrock.py +3 -3
- openlit/instrumentation/chroma/__init__.py +9 -9
- openlit/instrumentation/chroma/chroma.py +7 -7
- openlit/instrumentation/cohere/__init__.py +7 -7
- openlit/instrumentation/cohere/async_cohere.py +10 -10
- openlit/instrumentation/cohere/cohere.py +11 -11
- openlit/instrumentation/controlflow/__init__.py +4 -4
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/__init__.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/__init__.py +3 -3
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/__init__.py +5 -5
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/__init__.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
- openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
- openlit/instrumentation/embedchain/__init__.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/__init__.py +3 -3
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +3 -3
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +5 -5
- openlit/instrumentation/gpt4all/gpt4all.py +350 -225
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +5 -5
- openlit/instrumentation/groq/async_groq.py +359 -243
- openlit/instrumentation/groq/groq.py +359 -243
- openlit/instrumentation/haystack/__init__.py +2 -2
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/__init__.py +7 -7
- openlit/instrumentation/julep/async_julep.py +6 -6
- openlit/instrumentation/julep/julep.py +6 -6
- openlit/instrumentation/langchain/__init__.py +15 -9
- openlit/instrumentation/langchain/async_langchain.py +388 -0
- openlit/instrumentation/langchain/langchain.py +110 -497
- openlit/instrumentation/letta/__init__.py +7 -7
- openlit/instrumentation/letta/letta.py +10 -8
- openlit/instrumentation/litellm/__init__.py +9 -10
- openlit/instrumentation/litellm/async_litellm.py +321 -250
- openlit/instrumentation/litellm/litellm.py +319 -248
- openlit/instrumentation/llamaindex/__init__.py +2 -2
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/__init__.py +2 -2
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/__init__.py +2 -2
- openlit/instrumentation/milvus/milvus.py +7 -7
- openlit/instrumentation/mistral/__init__.py +13 -13
- openlit/instrumentation/mistral/async_mistral.py +426 -253
- openlit/instrumentation/mistral/mistral.py +424 -250
- openlit/instrumentation/multion/__init__.py +7 -7
- openlit/instrumentation/multion/async_multion.py +9 -7
- openlit/instrumentation/multion/multion.py +9 -7
- openlit/instrumentation/ollama/__init__.py +19 -39
- openlit/instrumentation/ollama/async_ollama.py +137 -563
- openlit/instrumentation/ollama/ollama.py +136 -563
- openlit/instrumentation/ollama/utils.py +333 -0
- openlit/instrumentation/openai/__init__.py +11 -11
- openlit/instrumentation/openai/async_openai.py +25 -27
- openlit/instrumentation/openai/openai.py +25 -27
- openlit/instrumentation/phidata/__init__.py +2 -2
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/__init__.py +6 -6
- openlit/instrumentation/pinecone/pinecone.py +7 -7
- openlit/instrumentation/premai/__init__.py +5 -5
- openlit/instrumentation/premai/premai.py +268 -219
- openlit/instrumentation/qdrant/__init__.py +2 -2
- openlit/instrumentation/qdrant/async_qdrant.py +7 -7
- openlit/instrumentation/qdrant/qdrant.py +7 -7
- openlit/instrumentation/reka/__init__.py +5 -5
- openlit/instrumentation/reka/async_reka.py +93 -55
- openlit/instrumentation/reka/reka.py +93 -55
- openlit/instrumentation/together/__init__.py +9 -9
- openlit/instrumentation/together/async_together.py +284 -242
- openlit/instrumentation/together/together.py +284 -242
- openlit/instrumentation/transformers/__init__.py +3 -3
- openlit/instrumentation/transformers/transformers.py +79 -48
- openlit/instrumentation/vertexai/__init__.py +19 -69
- openlit/instrumentation/vertexai/async_vertexai.py +333 -990
- openlit/instrumentation/vertexai/vertexai.py +333 -990
- openlit/instrumentation/vllm/__init__.py +3 -3
- openlit/instrumentation/vllm/vllm.py +65 -35
- openlit/otel/events.py +85 -0
- openlit/otel/tracing.py +3 -13
- openlit/semcov/__init__.py +16 -4
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
- openlit-1.33.11.dist-info/RECORD +125 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,611 +1,184 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Ollama API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
7
|
-
|
8
|
-
from opentelemetry.
|
6
|
+
import time
|
7
|
+
from opentelemetry.trace import SpanKind
|
9
8
|
from openlit.__helpers import (
|
10
9
|
handle_exception,
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
set_server_address_and_port
|
11
|
+
)
|
12
|
+
from openlit.instrumentation.ollama.utils import (
|
13
|
+
process_chunk,
|
14
|
+
process_chat_response,
|
15
|
+
process_streaming_chat_response,
|
16
|
+
process_embedding_response
|
14
17
|
)
|
15
18
|
from openlit.semcov import SemanticConvetion
|
16
19
|
|
17
|
-
# Initialize logger for logging potential issues and operations
|
18
20
|
logger = logging.getLogger(__name__)
|
19
21
|
|
20
|
-
def chat(
|
21
|
-
|
22
|
+
def chat(version, environment, application_name,
|
23
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
|
22
24
|
"""
|
23
|
-
Generates a telemetry wrapper for
|
24
|
-
|
25
|
-
Args:
|
26
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
27
|
-
version: Version of the monitoring package.
|
28
|
-
environment: Deployment environment (e.g., production, staging).
|
29
|
-
application_name: Name of the application using the Ollama API.
|
30
|
-
tracer: OpenTelemetry tracer for creating spans.
|
31
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
32
|
-
trace_content: Flag indicating whether to trace the actual content.
|
33
|
-
|
34
|
-
Returns:
|
35
|
-
A function that wraps the chat method to add telemetry.
|
25
|
+
Generates a telemetry wrapper for GenAI function call
|
36
26
|
"""
|
37
27
|
|
38
|
-
|
28
|
+
class TracedSyncStream:
|
39
29
|
"""
|
40
|
-
|
41
|
-
|
42
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
43
|
-
gracefully, adding details to the trace for observability.
|
44
|
-
|
45
|
-
Args:
|
46
|
-
wrapped: The original 'chat' method to be wrapped.
|
47
|
-
instance: The instance of the class where the original method is defined.
|
48
|
-
args: Positional arguments for the 'chat' method.
|
49
|
-
kwargs: Keyword arguments for the 'chat' method.
|
50
|
-
|
51
|
-
Returns:
|
52
|
-
The response from the original 'chat' method.
|
30
|
+
Wrapper for streaming responses to collect telemetry.
|
53
31
|
"""
|
54
32
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
106
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
107
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
108
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
109
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
110
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
111
|
-
gen_ai_endpoint)
|
112
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
113
|
-
environment)
|
114
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
115
|
-
application_name)
|
116
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
117
|
-
kwargs.get("model", "llama3"))
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
119
|
-
True)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
121
|
-
prompt_tokens)
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
123
|
-
completion_tokens)
|
124
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
125
|
-
total_tokens)
|
126
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
127
|
-
cost)
|
128
|
-
if trace_content:
|
129
|
-
span.add_event(
|
130
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
131
|
-
attributes={
|
132
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
133
|
-
},
|
134
|
-
)
|
135
|
-
span.add_event(
|
136
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
137
|
-
attributes={
|
138
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
139
|
-
},
|
140
|
-
)
|
141
|
-
|
142
|
-
span.set_status(Status(StatusCode.OK))
|
143
|
-
|
144
|
-
if disable_metrics is False:
|
145
|
-
attributes = {
|
146
|
-
TELEMETRY_SDK_NAME:
|
147
|
-
"openlit",
|
148
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
149
|
-
application_name,
|
150
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
151
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
152
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
153
|
-
environment,
|
154
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
155
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
156
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
157
|
-
kwargs.get("model", "llama3")
|
158
|
-
}
|
159
|
-
|
160
|
-
metrics["genai_requests"].add(1, attributes)
|
161
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
162
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
163
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
164
|
-
metrics["genai_cost"].record(cost, attributes)
|
165
|
-
|
166
|
-
except Exception as e:
|
167
|
-
handle_exception(span, e)
|
168
|
-
logger.error("Error in trace creation: %s", e)
|
169
|
-
|
170
|
-
return stream_generator()
|
171
|
-
|
172
|
-
# Handling for non-streaming responses
|
173
|
-
else:
|
174
|
-
# pylint: disable=line-too-long
|
175
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
176
|
-
response = wrapped(*args, **kwargs)
|
177
|
-
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
wrapped,
|
36
|
+
span,
|
37
|
+
span_name,
|
38
|
+
kwargs,
|
39
|
+
server_address,
|
40
|
+
server_port,
|
41
|
+
**args,
|
42
|
+
):
|
43
|
+
self.__wrapped__ = wrapped
|
44
|
+
self._span = span
|
45
|
+
self._llmresponse = ""
|
46
|
+
self._response_model = ""
|
47
|
+
self._finish_reason = ""
|
48
|
+
self._tool_calls = []
|
49
|
+
self._input_tokens = 0
|
50
|
+
self._output_tokens = 0
|
51
|
+
self._response_role = ''
|
52
|
+
self._span_name = span_name
|
53
|
+
self._args = args
|
54
|
+
self._kwargs = kwargs
|
55
|
+
self._start_time = time.time()
|
56
|
+
self._end_time = None
|
57
|
+
self._timestamps = []
|
58
|
+
self._ttft = 0
|
59
|
+
self._tbt = 0
|
60
|
+
self._server_address = server_address
|
61
|
+
self._server_port = server_port
|
62
|
+
|
63
|
+
def __enter__(self):
|
64
|
+
self.__wrapped__.__enter__()
|
65
|
+
return self
|
66
|
+
|
67
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
68
|
+
self.__wrapped__.__exit__(exc_type, exc_value, traceback)
|
69
|
+
|
70
|
+
def __iter__(self):
|
71
|
+
return self
|
72
|
+
|
73
|
+
def __getattr__(self, name):
|
74
|
+
"""Delegate attribute access to the wrapped object."""
|
75
|
+
return getattr(self.__wrapped__, name)
|
76
|
+
|
77
|
+
def __next__(self):
|
78
|
+
try:
|
79
|
+
chunk = self.__wrapped__.__next__()
|
80
|
+
process_chunk(self, chunk)
|
81
|
+
return chunk
|
82
|
+
except StopIteration:
|
178
83
|
try:
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
if "type" in item else f'text: {item["text"]}'
|
191
|
-
for item in content
|
192
|
-
)
|
193
|
-
formatted_messages.append(f"{role}: {content_str}")
|
194
|
-
else:
|
195
|
-
formatted_messages.append(f"{role}: {content}")
|
196
|
-
prompt = "\n".join(formatted_messages)
|
197
|
-
|
198
|
-
# Set base span attribues
|
199
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
200
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
201
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
202
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
203
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
204
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
205
|
-
gen_ai_endpoint)
|
206
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
207
|
-
environment)
|
208
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
209
|
-
application_name)
|
210
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
211
|
-
kwargs.get("model", "llama3"))
|
212
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
213
|
-
False)
|
214
|
-
if trace_content:
|
215
|
-
span.add_event(
|
216
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
217
|
-
attributes={
|
218
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
219
|
-
},
|
220
|
-
)
|
221
|
-
span.add_event(
|
222
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
223
|
-
attributes={
|
224
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['message']['content'],
|
225
|
-
},
|
84
|
+
with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
|
85
|
+
process_streaming_chat_response(
|
86
|
+
self,
|
87
|
+
pricing_info=pricing_info,
|
88
|
+
environment=environment,
|
89
|
+
application_name=application_name,
|
90
|
+
metrics=metrics,
|
91
|
+
event_provider=event_provider,
|
92
|
+
capture_message_content=capture_message_content,
|
93
|
+
disable_metrics=disable_metrics,
|
94
|
+
version=version
|
226
95
|
)
|
227
|
-
|
228
|
-
prompt_tokens = general_tokens(prompt)
|
229
|
-
completion_tokens = response["eval_count"]
|
230
|
-
total_tokens = prompt_tokens + completion_tokens
|
231
|
-
# Calculate cost of the operation
|
232
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
233
|
-
pricing_info, prompt_tokens, completion_tokens)
|
234
|
-
|
235
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
236
|
-
prompt_tokens)
|
237
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
238
|
-
completion_tokens)
|
239
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
240
|
-
total_tokens)
|
241
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
242
|
-
[response["done_reason"]])
|
243
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
244
|
-
cost)
|
245
|
-
|
246
|
-
span.set_status(Status(StatusCode.OK))
|
247
|
-
|
248
|
-
if disable_metrics is False:
|
249
|
-
attributes = {
|
250
|
-
TELEMETRY_SDK_NAME:
|
251
|
-
"openlit",
|
252
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
253
|
-
application_name,
|
254
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
255
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
256
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
257
|
-
environment,
|
258
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
259
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
260
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
261
|
-
kwargs.get("model", "llama3")
|
262
|
-
}
|
263
|
-
|
264
|
-
metrics["genai_requests"].add(1, attributes)
|
265
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
266
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
267
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
268
|
-
metrics["genai_cost"].record(cost, attributes)
|
269
|
-
|
270
|
-
# Return original response
|
271
|
-
return response
|
272
|
-
|
273
96
|
except Exception as e:
|
274
|
-
handle_exception(
|
97
|
+
handle_exception(self._span, e)
|
275
98
|
logger.error("Error in trace creation: %s", e)
|
276
|
-
|
277
|
-
# Return original response
|
278
|
-
return response
|
279
|
-
|
280
|
-
return wrapper
|
281
|
-
|
282
|
-
def generate(gen_ai_endpoint, version, environment, application_name,
|
283
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
284
|
-
"""
|
285
|
-
Generates a telemetry wrapper for generate to collect metrics.
|
286
|
-
|
287
|
-
Args:
|
288
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
289
|
-
version: Version of the monitoring package.
|
290
|
-
environment: Deployment environment (e.g., production, staging).
|
291
|
-
application_name: Name of the application using the Ollama API.
|
292
|
-
tracer: OpenTelemetry tracer for creating spans.
|
293
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
294
|
-
trace_content: Flag indicating whether to trace the actual content.
|
295
|
-
|
296
|
-
Returns:
|
297
|
-
A function that wraps the generate method to add telemetry.
|
298
|
-
"""
|
99
|
+
raise
|
299
100
|
|
300
101
|
def wrapper(wrapped, instance, args, kwargs):
|
301
102
|
"""
|
302
|
-
Wraps the
|
303
|
-
|
304
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
305
|
-
gracefully, adding details to the trace for observability.
|
306
|
-
|
307
|
-
Args:
|
308
|
-
wrapped: The original 'generate' method to be wrapped.
|
309
|
-
instance: The instance of the class where the original method is defined.
|
310
|
-
args: Positional arguments for the 'generate' method.
|
311
|
-
kwargs: Keyword arguments for the 'generate' method.
|
312
|
-
|
313
|
-
Returns:
|
314
|
-
The response from the original 'generate' method.
|
103
|
+
Wraps the GenAI function call.
|
315
104
|
"""
|
316
105
|
|
317
|
-
# Check if streaming is enabled for the API call
|
318
106
|
streaming = kwargs.get("stream", False)
|
319
107
|
|
320
|
-
|
321
|
-
|
322
|
-
# Special handling for streaming response to accommodate the nature of data flow
|
323
|
-
def stream_generator():
|
324
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
325
|
-
# Placeholder for aggregating streaming response
|
326
|
-
llmresponse = ""
|
327
|
-
|
328
|
-
# Loop through streaming events capturing relevant details
|
329
|
-
for chunk in wrapped(*args, **kwargs):
|
330
|
-
# Collect aggregated response from events
|
331
|
-
content = chunk['response']
|
332
|
-
llmresponse += content
|
333
|
-
|
334
|
-
if chunk['done'] is True:
|
335
|
-
completion_tokens = chunk["eval_count"]
|
336
|
-
|
337
|
-
yield chunk
|
338
|
-
|
339
|
-
# Handling exception ensure observability without disrupting operation
|
340
|
-
try:
|
341
|
-
prompt_tokens = general_tokens(kwargs.get("prompt", ""))
|
342
|
-
total_tokens = prompt_tokens + completion_tokens
|
343
|
-
# Calculate cost of the operation
|
344
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
345
|
-
pricing_info, prompt_tokens, completion_tokens)
|
108
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
|
109
|
+
request_model = kwargs.get("model", "gpt-4o")
|
346
110
|
|
347
|
-
|
348
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
349
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
350
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
351
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
352
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
353
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
354
|
-
gen_ai_endpoint)
|
355
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
356
|
-
environment)
|
357
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
358
|
-
application_name)
|
359
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
360
|
-
kwargs.get("model", "llama3"))
|
361
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
362
|
-
True)
|
363
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
364
|
-
prompt_tokens)
|
365
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
366
|
-
completion_tokens)
|
367
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
368
|
-
total_tokens)
|
369
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
370
|
-
cost)
|
371
|
-
if trace_content:
|
372
|
-
span.add_event(
|
373
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
374
|
-
attributes={
|
375
|
-
# pylint: disable=line-too-long
|
376
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
377
|
-
},
|
378
|
-
)
|
379
|
-
span.add_event(
|
380
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
381
|
-
attributes={
|
382
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
383
|
-
},
|
384
|
-
)
|
111
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
385
112
|
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
"openlit",
|
392
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
393
|
-
application_name,
|
394
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
395
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
396
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
397
|
-
environment,
|
398
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
399
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
400
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
401
|
-
kwargs.get("model", "llama3")
|
402
|
-
}
|
403
|
-
|
404
|
-
metrics["genai_requests"].add(1, attributes)
|
405
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
406
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
407
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
408
|
-
metrics["genai_cost"].record(cost, attributes)
|
409
|
-
|
410
|
-
except Exception as e:
|
411
|
-
handle_exception(span, e)
|
412
|
-
logger.error("Error in trace creation: %s", e)
|
413
|
-
|
414
|
-
return stream_generator()
|
113
|
+
# pylint: disable=no-else-return
|
114
|
+
if streaming:
|
115
|
+
awaited_wrapped = wrapped(*args, **kwargs)
|
116
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
117
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
415
118
|
|
416
|
-
# Handling for non-streaming responses
|
417
119
|
else:
|
418
|
-
|
419
|
-
|
120
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
121
|
+
start_time = time.time()
|
420
122
|
response = wrapped(*args, **kwargs)
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
span.add_event(
|
441
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
442
|
-
attributes={
|
443
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
444
|
-
},
|
445
|
-
)
|
446
|
-
span.add_event(
|
447
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
448
|
-
attributes={
|
449
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['response'],
|
450
|
-
},
|
451
|
-
)
|
452
|
-
|
453
|
-
prompt_tokens = response["prompt_eval_count"]
|
454
|
-
completion_tokens = response["eval_count"]
|
455
|
-
total_tokens = prompt_tokens + completion_tokens
|
456
|
-
# Calculate cost of the operation
|
457
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
458
|
-
pricing_info, prompt_tokens, completion_tokens)
|
459
|
-
|
460
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
461
|
-
prompt_tokens)
|
462
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
463
|
-
completion_tokens)
|
464
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
465
|
-
total_tokens)
|
466
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
467
|
-
[response["done_reason"]])
|
468
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
469
|
-
cost)
|
470
|
-
|
471
|
-
span.set_status(Status(StatusCode.OK))
|
472
|
-
|
473
|
-
if disable_metrics is False:
|
474
|
-
attributes = {
|
475
|
-
TELEMETRY_SDK_NAME:
|
476
|
-
"openlit",
|
477
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
478
|
-
application_name,
|
479
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
480
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
481
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
482
|
-
environment,
|
483
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
484
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
485
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
486
|
-
kwargs.get("model", "llama3")
|
487
|
-
}
|
488
|
-
|
489
|
-
metrics["genai_requests"].add(1, attributes)
|
490
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
491
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
492
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
493
|
-
metrics["genai_cost"].record(cost, attributes)
|
494
|
-
|
495
|
-
# Return original response
|
496
|
-
return response
|
497
|
-
|
498
|
-
except Exception as e:
|
499
|
-
handle_exception(span, e)
|
500
|
-
logger.error("Error in trace creation: %s", e)
|
501
|
-
|
502
|
-
# Return original response
|
503
|
-
return response
|
123
|
+
response = process_chat_response(
|
124
|
+
response=response,
|
125
|
+
request_model=request_model,
|
126
|
+
pricing_info=pricing_info,
|
127
|
+
server_port=server_port,
|
128
|
+
server_address=server_address,
|
129
|
+
environment=environment,
|
130
|
+
application_name=application_name,
|
131
|
+
metrics=metrics,
|
132
|
+
event_provider=event_provider,
|
133
|
+
start_time=start_time,
|
134
|
+
span=span,
|
135
|
+
capture_message_content=capture_message_content,
|
136
|
+
disable_metrics=disable_metrics,
|
137
|
+
version=version,
|
138
|
+
**kwargs
|
139
|
+
)
|
140
|
+
|
141
|
+
return response
|
504
142
|
|
505
143
|
return wrapper
|
506
144
|
|
507
|
-
def embeddings(
|
508
|
-
|
145
|
+
def embeddings(version, environment, application_name,
|
146
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
|
509
147
|
"""
|
510
|
-
Generates a telemetry wrapper for
|
511
|
-
|
512
|
-
Args:
|
513
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
514
|
-
version: Version of the monitoring package.
|
515
|
-
environment: Deployment environment (e.g., production, staging).
|
516
|
-
application_name: Name of the application using the Ollama API.
|
517
|
-
tracer: OpenTelemetry tracer for creating spans.
|
518
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
519
|
-
trace_content: Flag indicating whether to trace the actual content.
|
520
|
-
|
521
|
-
Returns:
|
522
|
-
A function that wraps the embeddings method to add telemetry.
|
148
|
+
Generates a telemetry wrapper for GenAI function call
|
523
149
|
"""
|
524
150
|
|
525
151
|
def wrapper(wrapped, instance, args, kwargs):
|
526
152
|
"""
|
527
|
-
Wraps the
|
528
|
-
|
529
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
530
|
-
gracefully, adding details to the trace for observability.
|
531
|
-
|
532
|
-
Args:
|
533
|
-
wrapped: The original 'embeddings' method to be wrapped.
|
534
|
-
instance: The instance of the class where the original method is defined.
|
535
|
-
args: Positional arguments for the 'embeddings' method.
|
536
|
-
kwargs: Keyword arguments for the 'embeddings' method.
|
537
|
-
|
538
|
-
Returns:
|
539
|
-
The response from the original 'embeddings' method.
|
153
|
+
Wraps the GenAI function call.
|
540
154
|
"""
|
541
155
|
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
try:
|
546
|
-
prompt_tokens = general_tokens(kwargs.get('prompt', ""))
|
547
|
-
# Calculate cost of the operation
|
548
|
-
cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
|
549
|
-
pricing_info, prompt_tokens)
|
550
|
-
# Set Span attributes
|
551
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
552
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
553
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
554
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
555
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
556
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
557
|
-
gen_ai_endpoint)
|
558
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
559
|
-
environment)
|
560
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
561
|
-
application_name)
|
562
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
563
|
-
kwargs.get('model', "llama3"))
|
564
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
565
|
-
prompt_tokens)
|
566
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
567
|
-
prompt_tokens)
|
568
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
569
|
-
cost)
|
570
|
-
if trace_content:
|
571
|
-
span.add_event(
|
572
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
573
|
-
attributes={
|
574
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
575
|
-
},
|
576
|
-
)
|
577
|
-
|
578
|
-
span.set_status(Status(StatusCode.OK))
|
579
|
-
|
580
|
-
if disable_metrics is False:
|
581
|
-
attributes = {
|
582
|
-
TELEMETRY_SDK_NAME:
|
583
|
-
"openlit",
|
584
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
585
|
-
application_name,
|
586
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
587
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
588
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
589
|
-
environment,
|
590
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
591
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
592
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
593
|
-
kwargs.get('model', "llama3")
|
594
|
-
}
|
595
|
-
|
596
|
-
metrics["genai_requests"].add(1, attributes)
|
597
|
-
metrics["genai_total_tokens"].add(prompt_tokens, attributes)
|
598
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
599
|
-
metrics["genai_cost"].record(cost, attributes)
|
156
|
+
server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
|
157
|
+
request_model = kwargs.get('model', 'all-minilm')
|
600
158
|
|
601
|
-
|
602
|
-
return response
|
159
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
603
160
|
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
161
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
162
|
+
start_time = time.time()
|
163
|
+
response = wrapped(*args, **kwargs)
|
164
|
+
response = process_embedding_response(
|
165
|
+
response=response,
|
166
|
+
request_model=request_model,
|
167
|
+
pricing_info=pricing_info,
|
168
|
+
server_port=server_port,
|
169
|
+
server_address=server_address,
|
170
|
+
environment=environment,
|
171
|
+
application_name=application_name,
|
172
|
+
metrics=metrics,
|
173
|
+
event_provider=event_provider,
|
174
|
+
start_time=start_time,
|
175
|
+
span=span,
|
176
|
+
capture_message_content=capture_message_content,
|
177
|
+
disable_metrics=disable_metrics,
|
178
|
+
version=version,
|
179
|
+
**kwargs
|
180
|
+
)
|
181
|
+
|
182
|
+
return response
|
610
183
|
|
611
184
|
return wrapper
|