openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +78 -0
- openlit/__init__.py +41 -13
- openlit/instrumentation/ag2/__init__.py +9 -10
- openlit/instrumentation/ag2/ag2.py +134 -69
- openlit/instrumentation/ai21/__init__.py +6 -5
- openlit/instrumentation/ai21/ai21.py +71 -534
- openlit/instrumentation/ai21/async_ai21.py +71 -534
- openlit/instrumentation/ai21/utils.py +407 -0
- openlit/instrumentation/anthropic/__init__.py +3 -3
- openlit/instrumentation/anthropic/anthropic.py +5 -5
- openlit/instrumentation/anthropic/async_anthropic.py +5 -5
- openlit/instrumentation/assemblyai/__init__.py +2 -2
- openlit/instrumentation/assemblyai/assemblyai.py +3 -3
- openlit/instrumentation/astra/__init__.py +25 -25
- openlit/instrumentation/astra/astra.py +7 -7
- openlit/instrumentation/astra/async_astra.py +7 -7
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
- openlit/instrumentation/bedrock/__init__.py +2 -2
- openlit/instrumentation/bedrock/bedrock.py +3 -3
- openlit/instrumentation/chroma/__init__.py +9 -9
- openlit/instrumentation/chroma/chroma.py +7 -7
- openlit/instrumentation/cohere/__init__.py +7 -7
- openlit/instrumentation/cohere/async_cohere.py +10 -10
- openlit/instrumentation/cohere/cohere.py +11 -11
- openlit/instrumentation/controlflow/__init__.py +4 -4
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/__init__.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/__init__.py +3 -3
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/__init__.py +5 -5
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/__init__.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
- openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
- openlit/instrumentation/embedchain/__init__.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/__init__.py +3 -3
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +3 -3
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +5 -5
- openlit/instrumentation/gpt4all/gpt4all.py +350 -225
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +5 -5
- openlit/instrumentation/groq/async_groq.py +359 -243
- openlit/instrumentation/groq/groq.py +359 -243
- openlit/instrumentation/haystack/__init__.py +2 -2
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/__init__.py +7 -7
- openlit/instrumentation/julep/async_julep.py +6 -6
- openlit/instrumentation/julep/julep.py +6 -6
- openlit/instrumentation/langchain/__init__.py +15 -9
- openlit/instrumentation/langchain/async_langchain.py +388 -0
- openlit/instrumentation/langchain/langchain.py +110 -497
- openlit/instrumentation/letta/__init__.py +7 -7
- openlit/instrumentation/letta/letta.py +10 -8
- openlit/instrumentation/litellm/__init__.py +9 -10
- openlit/instrumentation/litellm/async_litellm.py +321 -250
- openlit/instrumentation/litellm/litellm.py +319 -248
- openlit/instrumentation/llamaindex/__init__.py +2 -2
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/__init__.py +2 -2
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/__init__.py +2 -2
- openlit/instrumentation/milvus/milvus.py +7 -7
- openlit/instrumentation/mistral/__init__.py +13 -13
- openlit/instrumentation/mistral/async_mistral.py +426 -253
- openlit/instrumentation/mistral/mistral.py +424 -250
- openlit/instrumentation/multion/__init__.py +7 -7
- openlit/instrumentation/multion/async_multion.py +9 -7
- openlit/instrumentation/multion/multion.py +9 -7
- openlit/instrumentation/ollama/__init__.py +19 -39
- openlit/instrumentation/ollama/async_ollama.py +137 -563
- openlit/instrumentation/ollama/ollama.py +136 -563
- openlit/instrumentation/ollama/utils.py +333 -0
- openlit/instrumentation/openai/__init__.py +11 -11
- openlit/instrumentation/openai/async_openai.py +25 -27
- openlit/instrumentation/openai/openai.py +25 -27
- openlit/instrumentation/phidata/__init__.py +2 -2
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/__init__.py +6 -6
- openlit/instrumentation/pinecone/pinecone.py +7 -7
- openlit/instrumentation/premai/__init__.py +5 -5
- openlit/instrumentation/premai/premai.py +268 -219
- openlit/instrumentation/qdrant/__init__.py +2 -2
- openlit/instrumentation/qdrant/async_qdrant.py +7 -7
- openlit/instrumentation/qdrant/qdrant.py +7 -7
- openlit/instrumentation/reka/__init__.py +5 -5
- openlit/instrumentation/reka/async_reka.py +93 -55
- openlit/instrumentation/reka/reka.py +93 -55
- openlit/instrumentation/together/__init__.py +9 -9
- openlit/instrumentation/together/async_together.py +284 -242
- openlit/instrumentation/together/together.py +284 -242
- openlit/instrumentation/transformers/__init__.py +3 -3
- openlit/instrumentation/transformers/transformers.py +79 -48
- openlit/instrumentation/vertexai/__init__.py +19 -69
- openlit/instrumentation/vertexai/async_vertexai.py +333 -990
- openlit/instrumentation/vertexai/vertexai.py +333 -990
- openlit/instrumentation/vllm/__init__.py +3 -3
- openlit/instrumentation/vllm/vllm.py +65 -35
- openlit/otel/events.py +85 -0
- openlit/otel/tracing.py +3 -13
- openlit/semcov/__init__.py +16 -4
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
- openlit-1.33.11.dist-info/RECORD +125 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,610 +1,184 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Ollama API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
7
|
-
|
8
|
-
from opentelemetry.
|
6
|
+
import time
|
7
|
+
from opentelemetry.trace import SpanKind
|
9
8
|
from openlit.__helpers import (
|
10
9
|
handle_exception,
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
set_server_address_and_port
|
11
|
+
)
|
12
|
+
from openlit.instrumentation.ollama.utils import (
|
13
|
+
process_chunk,
|
14
|
+
process_chat_response,
|
15
|
+
process_streaming_chat_response,
|
16
|
+
process_embedding_response
|
17
|
+
)
|
14
18
|
from openlit.semcov import SemanticConvetion
|
15
19
|
|
16
|
-
# Initialize logger for logging potential issues and operations
|
17
20
|
logger = logging.getLogger(__name__)
|
18
21
|
|
19
|
-
def async_chat(
|
20
|
-
|
22
|
+
def async_chat(version, environment, application_name,
|
23
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
|
21
24
|
"""
|
22
|
-
Generates a telemetry wrapper for
|
23
|
-
|
24
|
-
Args:
|
25
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
26
|
-
version: Version of the monitoring package.
|
27
|
-
environment: Deployment environment (e.g., production, staging).
|
28
|
-
application_name: Name of the application using the Ollama API.
|
29
|
-
tracer: OpenTelemetry tracer for creating spans.
|
30
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
31
|
-
trace_content: Flag indicating whether to trace the actual content.
|
32
|
-
|
33
|
-
Returns:
|
34
|
-
A function that wraps the chat method to add telemetry.
|
25
|
+
Generates a telemetry wrapper for GenAI function call
|
35
26
|
"""
|
36
27
|
|
37
|
-
|
28
|
+
class TracedAsyncStream:
|
38
29
|
"""
|
39
|
-
|
40
|
-
|
41
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
42
|
-
gracefully, adding details to the trace for observability.
|
43
|
-
|
44
|
-
Args:
|
45
|
-
wrapped: The original 'chat' method to be wrapped.
|
46
|
-
instance: The instance of the class where the original method is defined.
|
47
|
-
args: Positional arguments for the 'chat' method.
|
48
|
-
kwargs: Keyword arguments for the 'chat' method.
|
49
|
-
|
50
|
-
Returns:
|
51
|
-
The response from the original 'chat' method.
|
30
|
+
Wrapper for streaming responses to collect telemetry.
|
52
31
|
"""
|
53
32
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
105
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
106
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
107
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
108
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
109
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
110
|
-
gen_ai_endpoint)
|
111
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
112
|
-
environment)
|
113
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
114
|
-
application_name)
|
115
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
116
|
-
kwargs.get("model", "llama3"))
|
117
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
118
|
-
True)
|
119
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
120
|
-
prompt_tokens)
|
121
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
122
|
-
completion_tokens)
|
123
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
124
|
-
total_tokens)
|
125
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
126
|
-
cost)
|
127
|
-
if trace_content:
|
128
|
-
span.add_event(
|
129
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
130
|
-
attributes={
|
131
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
132
|
-
},
|
133
|
-
)
|
134
|
-
span.add_event(
|
135
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
136
|
-
attributes={
|
137
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
138
|
-
},
|
139
|
-
)
|
140
|
-
|
141
|
-
span.set_status(Status(StatusCode.OK))
|
142
|
-
|
143
|
-
if disable_metrics is False:
|
144
|
-
attributes = {
|
145
|
-
TELEMETRY_SDK_NAME:
|
146
|
-
"openlit",
|
147
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
148
|
-
application_name,
|
149
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
150
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
151
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
152
|
-
environment,
|
153
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
154
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
155
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
156
|
-
kwargs.get("model", "llama3")
|
157
|
-
}
|
158
|
-
|
159
|
-
metrics["genai_requests"].add(1, attributes)
|
160
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
161
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
162
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
163
|
-
metrics["genai_cost"].record(cost, attributes)
|
164
|
-
|
165
|
-
except Exception as e:
|
166
|
-
handle_exception(span, e)
|
167
|
-
logger.error("Error in trace creation: %s", e)
|
168
|
-
|
169
|
-
return stream_generator()
|
170
|
-
|
171
|
-
# Handling for non-streaming responses
|
172
|
-
else:
|
173
|
-
# pylint: disable=line-too-long
|
174
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
175
|
-
response = await wrapped(*args, **kwargs)
|
176
|
-
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
wrapped,
|
36
|
+
span,
|
37
|
+
span_name,
|
38
|
+
kwargs,
|
39
|
+
server_address,
|
40
|
+
server_port,
|
41
|
+
**args,
|
42
|
+
):
|
43
|
+
self.__wrapped__ = wrapped
|
44
|
+
self._span = span
|
45
|
+
self._llmresponse = ""
|
46
|
+
self._response_model = ""
|
47
|
+
self._finish_reason = ""
|
48
|
+
self._tool_calls = []
|
49
|
+
self._input_tokens = 0
|
50
|
+
self._output_tokens = 0
|
51
|
+
self._response_role = ''
|
52
|
+
self._span_name = span_name
|
53
|
+
self._args = args
|
54
|
+
self._kwargs = kwargs
|
55
|
+
self._start_time = time.time()
|
56
|
+
self._end_time = None
|
57
|
+
self._timestamps = []
|
58
|
+
self._ttft = 0
|
59
|
+
self._tbt = 0
|
60
|
+
self._server_address = server_address
|
61
|
+
self._server_port = server_port
|
62
|
+
|
63
|
+
async def __aenter__(self):
|
64
|
+
await self.__wrapped__.__aenter__()
|
65
|
+
return self
|
66
|
+
|
67
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
68
|
+
await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
|
69
|
+
|
70
|
+
def __aiter__(self):
|
71
|
+
return self
|
72
|
+
|
73
|
+
async def __getattr__(self, name):
|
74
|
+
"""Delegate attribute access to the wrapped object."""
|
75
|
+
return getattr(await self.__wrapped__, name)
|
76
|
+
|
77
|
+
async def __anext__(self):
|
78
|
+
try:
|
79
|
+
chunk = await self.__wrapped__.__anext__()
|
80
|
+
process_chunk(self, chunk)
|
81
|
+
return chunk
|
82
|
+
except StopAsyncIteration:
|
177
83
|
try:
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
if "type" in item else f'text: {item["text"]}'
|
190
|
-
for item in content
|
191
|
-
)
|
192
|
-
formatted_messages.append(f"{role}: {content_str}")
|
193
|
-
else:
|
194
|
-
formatted_messages.append(f"{role}: {content}")
|
195
|
-
prompt = "\n".join(formatted_messages)
|
196
|
-
|
197
|
-
# Set base span attribues
|
198
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
199
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
200
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
201
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
202
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
203
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
204
|
-
gen_ai_endpoint)
|
205
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
206
|
-
environment)
|
207
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
208
|
-
application_name)
|
209
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
210
|
-
kwargs.get("model", "llama3"))
|
211
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
212
|
-
False)
|
213
|
-
if trace_content:
|
214
|
-
span.add_event(
|
215
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
216
|
-
attributes={
|
217
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
218
|
-
},
|
219
|
-
)
|
220
|
-
span.add_event(
|
221
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
222
|
-
attributes={
|
223
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['message']['content'],
|
224
|
-
},
|
84
|
+
with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
|
85
|
+
process_streaming_chat_response(
|
86
|
+
self,
|
87
|
+
pricing_info=pricing_info,
|
88
|
+
environment=environment,
|
89
|
+
application_name=application_name,
|
90
|
+
metrics=metrics,
|
91
|
+
event_provider=event_provider,
|
92
|
+
capture_message_content=capture_message_content,
|
93
|
+
disable_metrics=disable_metrics,
|
94
|
+
version=version
|
225
95
|
)
|
226
|
-
|
227
|
-
prompt_tokens = general_tokens(prompt)
|
228
|
-
completion_tokens = response["eval_count"]
|
229
|
-
total_tokens = prompt_tokens + completion_tokens
|
230
|
-
# Calculate cost of the operation
|
231
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
232
|
-
pricing_info, prompt_tokens, completion_tokens)
|
233
|
-
|
234
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
235
|
-
prompt_tokens)
|
236
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
237
|
-
completion_tokens)
|
238
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
239
|
-
total_tokens)
|
240
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
241
|
-
[response["done_reason"]])
|
242
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
243
|
-
cost)
|
244
|
-
|
245
|
-
span.set_status(Status(StatusCode.OK))
|
246
|
-
|
247
|
-
if disable_metrics is False:
|
248
|
-
attributes = {
|
249
|
-
TELEMETRY_SDK_NAME:
|
250
|
-
"openlit",
|
251
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
252
|
-
application_name,
|
253
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
254
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
255
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
256
|
-
environment,
|
257
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
258
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
259
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
260
|
-
kwargs.get("model", "llama3")
|
261
|
-
}
|
262
|
-
|
263
|
-
metrics["genai_requests"].add(1, attributes)
|
264
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
265
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
266
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
267
|
-
metrics["genai_cost"].record(cost, attributes)
|
268
|
-
|
269
|
-
# Return original response
|
270
|
-
return response
|
271
|
-
|
272
96
|
except Exception as e:
|
273
|
-
handle_exception(
|
97
|
+
handle_exception(self._span, e)
|
274
98
|
logger.error("Error in trace creation: %s", e)
|
275
|
-
|
276
|
-
# Return original response
|
277
|
-
return response
|
278
|
-
|
279
|
-
return wrapper
|
280
|
-
|
281
|
-
def async_generate(gen_ai_endpoint, version, environment, application_name,
|
282
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
283
|
-
"""
|
284
|
-
Generates a telemetry wrapper for generate to collect metrics.
|
285
|
-
|
286
|
-
Args:
|
287
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
288
|
-
version: Version of the monitoring package.
|
289
|
-
environment: Deployment environment (e.g., production, staging).
|
290
|
-
application_name: Name of the application using the Ollama API.
|
291
|
-
tracer: OpenTelemetry tracer for creating spans.
|
292
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
293
|
-
trace_content: Flag indicating whether to trace the actual content.
|
294
|
-
|
295
|
-
Returns:
|
296
|
-
A function that wraps the generate method to add telemetry.
|
297
|
-
"""
|
99
|
+
raise
|
298
100
|
|
299
101
|
async def wrapper(wrapped, instance, args, kwargs):
|
300
102
|
"""
|
301
|
-
Wraps the
|
302
|
-
|
303
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
304
|
-
gracefully, adding details to the trace for observability.
|
305
|
-
|
306
|
-
Args:
|
307
|
-
wrapped: The original 'generate' method to be wrapped.
|
308
|
-
instance: The instance of the class where the original method is defined.
|
309
|
-
args: Positional arguments for the 'generate' method.
|
310
|
-
kwargs: Keyword arguments for the 'generate' method.
|
311
|
-
|
312
|
-
Returns:
|
313
|
-
The response from the original 'generate' method.
|
103
|
+
Wraps the GenAI function call.
|
314
104
|
"""
|
315
105
|
|
316
|
-
# Check if streaming is enabled for the API call
|
317
106
|
streaming = kwargs.get("stream", False)
|
318
107
|
|
319
|
-
|
320
|
-
|
321
|
-
# Special handling for streaming response to accommodate the nature of data flow
|
322
|
-
async def stream_generator():
|
323
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
324
|
-
# Placeholder for aggregating streaming response
|
325
|
-
llmresponse = ""
|
326
|
-
|
327
|
-
# Loop through streaming events capturing relevant details
|
328
|
-
async for chunk in await wrapped(*args, **kwargs):
|
329
|
-
# Collect aggregated response from events
|
330
|
-
content = chunk['response']
|
331
|
-
llmresponse += content
|
332
|
-
|
333
|
-
if chunk['done'] is True:
|
334
|
-
completion_tokens = chunk["eval_count"]
|
335
|
-
|
336
|
-
yield chunk
|
337
|
-
|
338
|
-
# Handling exception ensure observability without disrupting operation
|
339
|
-
try:
|
340
|
-
prompt_tokens = general_tokens(kwargs.get("prompt", ""))
|
341
|
-
total_tokens = prompt_tokens + completion_tokens
|
342
|
-
# Calculate cost of the operation
|
343
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
344
|
-
pricing_info, prompt_tokens, completion_tokens)
|
345
|
-
|
346
|
-
# Set Span attributes
|
347
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
348
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
349
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
350
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
351
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
352
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
353
|
-
gen_ai_endpoint)
|
354
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
355
|
-
environment)
|
356
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
357
|
-
application_name)
|
358
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
359
|
-
kwargs.get("model", "llama3"))
|
360
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
361
|
-
True)
|
362
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
363
|
-
prompt_tokens)
|
364
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
365
|
-
completion_tokens)
|
366
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
367
|
-
total_tokens)
|
368
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
369
|
-
cost)
|
370
|
-
if trace_content:
|
371
|
-
span.add_event(
|
372
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
373
|
-
attributes={
|
374
|
-
# pylint: disable=line-too-long
|
375
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
376
|
-
},
|
377
|
-
)
|
378
|
-
span.add_event(
|
379
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
380
|
-
attributes={
|
381
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
382
|
-
},
|
383
|
-
)
|
108
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
|
109
|
+
request_model = kwargs.get("model", "gpt-4o")
|
384
110
|
|
385
|
-
|
111
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
386
112
|
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
application_name,
|
393
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
394
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
395
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
396
|
-
environment,
|
397
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
398
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
399
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
400
|
-
kwargs.get("model", "llama3")
|
401
|
-
}
|
402
|
-
|
403
|
-
metrics["genai_requests"].add(1, attributes)
|
404
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
405
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
406
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
407
|
-
metrics["genai_cost"].record(cost, attributes)
|
408
|
-
|
409
|
-
except Exception as e:
|
410
|
-
handle_exception(span, e)
|
411
|
-
logger.error("Error in trace creation: %s", e)
|
412
|
-
|
413
|
-
return stream_generator()
|
113
|
+
# pylint: disable=no-else-return
|
114
|
+
if streaming:
|
115
|
+
awaited_wrapped = await wrapped(*args, **kwargs)
|
116
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
117
|
+
return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
414
118
|
|
415
|
-
# Handling for non-streaming responses
|
416
119
|
else:
|
417
|
-
|
418
|
-
|
120
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
121
|
+
start_time = time.time()
|
419
122
|
response = await wrapped(*args, **kwargs)
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
span.add_event(
|
440
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
441
|
-
attributes={
|
442
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
443
|
-
},
|
444
|
-
)
|
445
|
-
span.add_event(
|
446
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
447
|
-
attributes={
|
448
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['response'],
|
449
|
-
},
|
450
|
-
)
|
451
|
-
|
452
|
-
prompt_tokens = response["prompt_eval_count"]
|
453
|
-
completion_tokens = response["eval_count"]
|
454
|
-
total_tokens = prompt_tokens + completion_tokens
|
455
|
-
# Calculate cost of the operation
|
456
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
457
|
-
pricing_info, prompt_tokens, completion_tokens)
|
458
|
-
|
459
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
460
|
-
prompt_tokens)
|
461
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
462
|
-
completion_tokens)
|
463
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
464
|
-
total_tokens)
|
465
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
466
|
-
[response["done_reason"]])
|
467
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
468
|
-
cost)
|
469
|
-
|
470
|
-
span.set_status(Status(StatusCode.OK))
|
471
|
-
|
472
|
-
if disable_metrics is False:
|
473
|
-
attributes = {
|
474
|
-
TELEMETRY_SDK_NAME:
|
475
|
-
"openlit",
|
476
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
477
|
-
application_name,
|
478
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
479
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
480
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
481
|
-
environment,
|
482
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
483
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
484
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
485
|
-
kwargs.get("model", "llama3")
|
486
|
-
}
|
487
|
-
|
488
|
-
metrics["genai_requests"].add(1, attributes)
|
489
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
490
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
491
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
492
|
-
metrics["genai_cost"].record(cost, attributes)
|
493
|
-
|
494
|
-
# Return original response
|
495
|
-
return response
|
496
|
-
|
497
|
-
except Exception as e:
|
498
|
-
handle_exception(span, e)
|
499
|
-
logger.error("Error in trace creation: %s", e)
|
500
|
-
|
501
|
-
# Return original response
|
502
|
-
return response
|
123
|
+
response = process_chat_response(
|
124
|
+
response=response,
|
125
|
+
request_model=request_model,
|
126
|
+
pricing_info=pricing_info,
|
127
|
+
server_port=server_port,
|
128
|
+
server_address=server_address,
|
129
|
+
environment=environment,
|
130
|
+
application_name=application_name,
|
131
|
+
metrics=metrics,
|
132
|
+
event_provider=event_provider,
|
133
|
+
start_time=start_time,
|
134
|
+
span=span,
|
135
|
+
capture_message_content=capture_message_content,
|
136
|
+
disable_metrics=disable_metrics,
|
137
|
+
version=version,
|
138
|
+
**kwargs
|
139
|
+
)
|
140
|
+
|
141
|
+
return response
|
503
142
|
|
504
143
|
return wrapper
|
505
144
|
|
506
|
-
def async_embeddings(
|
507
|
-
|
145
|
+
def async_embeddings(version, environment, application_name,
|
146
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
|
508
147
|
"""
|
509
|
-
Generates a telemetry wrapper for
|
510
|
-
|
511
|
-
Args:
|
512
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
513
|
-
version: Version of the monitoring package.
|
514
|
-
environment: Deployment environment (e.g., production, staging).
|
515
|
-
application_name: Name of the application using the Ollama API.
|
516
|
-
tracer: OpenTelemetry tracer for creating spans.
|
517
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
518
|
-
trace_content: Flag indicating whether to trace the actual content.
|
519
|
-
|
520
|
-
Returns:
|
521
|
-
A function that wraps the embeddings method to add telemetry.
|
148
|
+
Generates a telemetry wrapper for GenAI function call
|
522
149
|
"""
|
523
150
|
|
524
151
|
async def wrapper(wrapped, instance, args, kwargs):
|
525
152
|
"""
|
526
|
-
Wraps the
|
527
|
-
|
528
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
529
|
-
gracefully, adding details to the trace for observability.
|
530
|
-
|
531
|
-
Args:
|
532
|
-
wrapped: The original 'embeddings' method to be wrapped.
|
533
|
-
instance: The instance of the class where the original method is defined.
|
534
|
-
args: Positional arguments for the 'embeddings' method.
|
535
|
-
kwargs: Keyword arguments for the 'embeddings' method.
|
536
|
-
|
537
|
-
Returns:
|
538
|
-
The response from the original 'embeddings' method.
|
153
|
+
Wraps the GenAI function call.
|
539
154
|
"""
|
540
155
|
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
try:
|
545
|
-
prompt_tokens = general_tokens(kwargs.get('prompt', ""))
|
546
|
-
# Calculate cost of the operation
|
547
|
-
cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
|
548
|
-
pricing_info, prompt_tokens)
|
549
|
-
# Set Span attributes
|
550
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
551
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
552
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
553
|
-
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
554
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
555
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
556
|
-
gen_ai_endpoint)
|
557
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
558
|
-
environment)
|
559
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
560
|
-
application_name)
|
561
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
562
|
-
kwargs.get('model', "llama3"))
|
563
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
564
|
-
prompt_tokens)
|
565
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
566
|
-
prompt_tokens)
|
567
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
568
|
-
cost)
|
569
|
-
if trace_content:
|
570
|
-
span.add_event(
|
571
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
572
|
-
attributes={
|
573
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
574
|
-
},
|
575
|
-
)
|
576
|
-
|
577
|
-
span.set_status(Status(StatusCode.OK))
|
578
|
-
|
579
|
-
if disable_metrics is False:
|
580
|
-
attributes = {
|
581
|
-
TELEMETRY_SDK_NAME:
|
582
|
-
"openlit",
|
583
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
584
|
-
application_name,
|
585
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
586
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
587
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
588
|
-
environment,
|
589
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
590
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
591
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
592
|
-
kwargs.get('model', "llama3")
|
593
|
-
}
|
594
|
-
|
595
|
-
metrics["genai_requests"].add(1, attributes)
|
596
|
-
metrics["genai_total_tokens"].add(prompt_tokens, attributes)
|
597
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
598
|
-
metrics["genai_cost"].record(cost, attributes)
|
156
|
+
server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
|
157
|
+
request_model = kwargs.get('model', 'all-minilm')
|
599
158
|
|
600
|
-
|
601
|
-
return response
|
159
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
602
160
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
161
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
162
|
+
start_time = time.time()
|
163
|
+
response = await wrapped(*args, **kwargs)
|
164
|
+
response = process_embedding_response(
|
165
|
+
response=response,
|
166
|
+
request_model=request_model,
|
167
|
+
pricing_info=pricing_info,
|
168
|
+
server_port=server_port,
|
169
|
+
server_address=server_address,
|
170
|
+
environment=environment,
|
171
|
+
application_name=application_name,
|
172
|
+
metrics=metrics,
|
173
|
+
event_provider=event_provider,
|
174
|
+
start_time=start_time,
|
175
|
+
span=span,
|
176
|
+
capture_message_content=capture_message_content,
|
177
|
+
disable_metrics=disable_metrics,
|
178
|
+
version=version,
|
179
|
+
**kwargs
|
180
|
+
)
|
181
|
+
|
182
|
+
return response
|
609
183
|
|
610
184
|
return wrapper
|