openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +88 -0
- openlit/__init__.py +4 -3
- openlit/instrumentation/ag2/ag2.py +5 -5
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +9 -9
- openlit/instrumentation/astra/async_astra.py +9 -9
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +9 -9
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
- openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/async_julep.py +5 -5
- openlit/instrumentation/julep/julep.py +5 -5
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +105 -492
- openlit/instrumentation/letta/letta.py +11 -9
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +318 -247
- openlit/instrumentation/litellm/litellm.py +314 -243
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/milvus.py +9 -9
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +423 -250
- openlit/instrumentation/mistral/mistral.py +420 -246
- openlit/instrumentation/multion/async_multion.py +6 -4
- openlit/instrumentation/multion/multion.py +6 -4
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +433 -410
- openlit/instrumentation/openai/openai.py +414 -394
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/pinecone.py +9 -9
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +9 -9
- openlit/instrumentation/qdrant/qdrant.py +9 -9
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +76 -45
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +330 -987
- openlit/instrumentation/vertexai/vertexai.py +330 -987
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +66 -36
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.8.dist-info/RECORD +0 -122
- {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,29 +1,33 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Ollama API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
|
+
get_chat_model_cost,
|
11
|
+
get_embed_model_cost,
|
10
12
|
handle_exception,
|
13
|
+
response_as_dict,
|
11
14
|
general_tokens,
|
12
|
-
|
13
|
-
|
15
|
+
calculate_ttft,
|
16
|
+
calculate_tbt,
|
17
|
+
create_metrics_attributes,
|
18
|
+
set_server_address_and_port
|
14
19
|
)
|
15
20
|
from openlit.semcov import SemanticConvetion
|
16
21
|
|
17
22
|
# Initialize logger for logging potential issues and operations
|
18
23
|
logger = logging.getLogger(__name__)
|
19
24
|
|
20
|
-
def chat(
|
25
|
+
def chat(version, environment, application_name,
|
21
26
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
22
27
|
"""
|
23
|
-
Generates a telemetry wrapper for chat to collect metrics.
|
28
|
+
Generates a telemetry wrapper for chat completions to collect metrics.
|
24
29
|
|
25
30
|
Args:
|
26
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
27
31
|
version: Version of the monitoring package.
|
28
32
|
environment: Deployment environment (e.g., production, staging).
|
29
33
|
application_name: Name of the application using the Ollama API.
|
@@ -32,464 +36,410 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
32
36
|
trace_content: Flag indicating whether to trace the actual content.
|
33
37
|
|
34
38
|
Returns:
|
35
|
-
A function that wraps the chat method to add telemetry.
|
39
|
+
A function that wraps the chat completions method to add telemetry.
|
36
40
|
"""
|
37
41
|
|
38
|
-
|
42
|
+
class TracedSyncStream:
|
39
43
|
"""
|
40
|
-
|
41
|
-
|
42
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
43
|
-
gracefully, adding details to the trace for observability.
|
44
|
+
Wrapper for streaming responses to collect metrics and trace data.
|
45
|
+
Wraps the response to collect message IDs and aggregated response.
|
44
46
|
|
45
|
-
|
46
|
-
|
47
|
-
instance: The instance of the class where the original method is defined.
|
48
|
-
args: Positional arguments for the 'chat' method.
|
49
|
-
kwargs: Keyword arguments for the 'chat' method.
|
47
|
+
This class implements the '__aiter__' and '__anext__' methods that
|
48
|
+
handle asynchronous streaming responses.
|
50
49
|
|
51
|
-
|
52
|
-
|
50
|
+
This class also implements '__aenter__' and '__aexit__' methods that
|
51
|
+
handle asynchronous context management protocol.
|
53
52
|
"""
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
119
|
-
True)
|
120
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
121
|
-
prompt_tokens)
|
122
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
123
|
-
completion_tokens)
|
124
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
125
|
-
total_tokens)
|
126
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
127
|
-
cost)
|
128
|
-
if trace_content:
|
129
|
-
span.add_event(
|
130
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
131
|
-
attributes={
|
132
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
133
|
-
},
|
134
|
-
)
|
135
|
-
span.add_event(
|
136
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
137
|
-
attributes={
|
138
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
139
|
-
},
|
140
|
-
)
|
141
|
-
|
142
|
-
span.set_status(Status(StatusCode.OK))
|
143
|
-
|
144
|
-
if disable_metrics is False:
|
145
|
-
attributes = {
|
146
|
-
TELEMETRY_SDK_NAME:
|
147
|
-
"openlit",
|
148
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
149
|
-
application_name,
|
150
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
151
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
152
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
153
|
-
environment,
|
154
|
-
SemanticConvetion.GEN_AI_TYPE:
|
155
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
156
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
157
|
-
kwargs.get("model", "llama3")
|
158
|
-
}
|
159
|
-
|
160
|
-
metrics["genai_requests"].add(1, attributes)
|
161
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
162
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
163
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
164
|
-
metrics["genai_cost"].record(cost, attributes)
|
165
|
-
|
166
|
-
except Exception as e:
|
167
|
-
handle_exception(span, e)
|
168
|
-
logger.error("Error in trace creation: %s", e)
|
169
|
-
|
170
|
-
return stream_generator()
|
171
|
-
|
172
|
-
# Handling for non-streaming responses
|
173
|
-
else:
|
174
|
-
# pylint: disable=line-too-long
|
175
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
176
|
-
response = wrapped(*args, **kwargs)
|
177
|
-
|
53
|
+
def __init__(
|
54
|
+
self,
|
55
|
+
wrapped,
|
56
|
+
span,
|
57
|
+
kwargs,
|
58
|
+
server_address,
|
59
|
+
server_port,
|
60
|
+
**args,
|
61
|
+
):
|
62
|
+
self.__wrapped__ = wrapped
|
63
|
+
self._span = span
|
64
|
+
# Placeholder for aggregating streaming response
|
65
|
+
self._llmresponse = ""
|
66
|
+
self._response_model = ""
|
67
|
+
self._finish_reason = ""
|
68
|
+
self._input_tokens = 0
|
69
|
+
self._output_tokens = 0
|
70
|
+
|
71
|
+
self._args = args
|
72
|
+
self._kwargs = kwargs
|
73
|
+
self._start_time = time.time()
|
74
|
+
self._end_time = None
|
75
|
+
self._timestamps = []
|
76
|
+
self._ttft = 0
|
77
|
+
self._tbt = 0
|
78
|
+
self._server_address = server_address
|
79
|
+
self._server_port = server_port
|
80
|
+
|
81
|
+
def __enter__(self):
|
82
|
+
self.__wrapped__.__enter__()
|
83
|
+
return self
|
84
|
+
|
85
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
86
|
+
self.__wrapped__.__exit__(exc_type, exc_value, traceback)
|
87
|
+
|
88
|
+
def __iter__(self):
|
89
|
+
return self
|
90
|
+
|
91
|
+
def __getattr__(self, name):
|
92
|
+
"""Delegate attribute access to the wrapped object."""
|
93
|
+
return getattr(self.__wrapped__, name)
|
94
|
+
|
95
|
+
def __next__(self):
|
96
|
+
try:
|
97
|
+
chunk = self.__wrapped__.__next__()
|
98
|
+
end_time = time.time()
|
99
|
+
# Record the timestamp for the current chunk
|
100
|
+
self._timestamps.append(end_time)
|
101
|
+
|
102
|
+
if len(self._timestamps) == 1:
|
103
|
+
# Calculate time to first chunk
|
104
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
105
|
+
|
106
|
+
chunked = response_as_dict(chunk)
|
107
|
+
self._llmresponse += chunked.get('message').get('content')
|
108
|
+
|
109
|
+
if chunked.get('eval_count'):
|
110
|
+
self._input_tokens = chunked.get('prompt_eval_count')
|
111
|
+
self._output_tokens = chunked.get('eval_count')
|
112
|
+
self._response_model = chunked.get('model')
|
113
|
+
self._finish_reason = chunked.get('done_reason')
|
114
|
+
return chunk
|
115
|
+
except StopIteration:
|
116
|
+
# Handling exception ensure observability without disrupting operation
|
178
117
|
try:
|
118
|
+
self._end_time = time.time()
|
119
|
+
if len(self._timestamps) > 1:
|
120
|
+
self._tbt = calculate_tbt(self._timestamps)
|
121
|
+
|
179
122
|
# Format 'messages' into a single string
|
180
|
-
message_prompt =
|
123
|
+
message_prompt = self._kwargs.get("messages", "")
|
181
124
|
formatted_messages = []
|
182
125
|
for message in message_prompt:
|
183
126
|
role = message["role"]
|
184
127
|
content = message["content"]
|
185
128
|
|
186
129
|
if isinstance(content, list):
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
130
|
+
content_str_list = []
|
131
|
+
for item in content:
|
132
|
+
if item["type"] == "text":
|
133
|
+
content_str_list.append(f'text: {item["text"]}')
|
134
|
+
elif (item["type"] == "image_url" and
|
135
|
+
not item["image_url"]["url"].startswith("data:")):
|
136
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
137
|
+
content_str = ", ".join(content_str_list)
|
193
138
|
formatted_messages.append(f"{role}: {content_str}")
|
194
139
|
else:
|
195
140
|
formatted_messages.append(f"{role}: {content}")
|
196
141
|
prompt = "\n".join(formatted_messages)
|
197
142
|
|
198
|
-
|
199
|
-
|
200
|
-
|
143
|
+
request_model = self._kwargs.get("model", "gpt-4o")
|
144
|
+
|
145
|
+
# Calculate cost of the operation
|
146
|
+
cost = get_chat_model_cost(request_model,
|
147
|
+
pricing_info, self._input_tokens,
|
148
|
+
self._output_tokens)
|
149
|
+
|
150
|
+
# Set Span attributes (OTel Semconv)
|
151
|
+
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
152
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
153
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
154
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
201
155
|
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
156
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
157
|
+
request_model)
|
158
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
159
|
+
self._server_port)
|
160
|
+
|
161
|
+
# List of attributes and their config keys
|
162
|
+
attributes = [
|
163
|
+
(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
|
164
|
+
(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
|
165
|
+
(SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
|
166
|
+
(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
|
167
|
+
(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
168
|
+
(SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
169
|
+
(SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
170
|
+
]
|
171
|
+
|
172
|
+
# Safely get the options dictionary from kwargs
|
173
|
+
options = self._kwargs.get('options', {})
|
174
|
+
|
175
|
+
# Set each attribute if the corresponding value exists and is not None
|
176
|
+
for attribute, key in attributes:
|
177
|
+
# Use dictionary `get` to retrieve values from the options dictionary
|
178
|
+
value = options.get(key)
|
179
|
+
if value is not None:
|
180
|
+
self._span.set_attribute(attribute, value)
|
181
|
+
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
183
|
+
[self._finish_reason])
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
185
|
+
self._response_model)
|
186
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
187
|
+
self._input_tokens)
|
188
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
189
|
+
self._output_tokens)
|
190
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
191
|
+
self._server_address)
|
192
|
+
if isinstance(self._llmresponse, str):
|
193
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
194
|
+
"text")
|
195
|
+
else:
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
197
|
+
"json")
|
198
|
+
|
199
|
+
# Set Span attributes (Extra)
|
200
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
207
201
|
environment)
|
208
|
-
|
202
|
+
self._span.set_attribute(SERVICE_NAME,
|
209
203
|
application_name)
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
204
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
205
|
+
True)
|
206
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
207
|
+
self._input_tokens + self._output_tokens)
|
208
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
209
|
+
cost)
|
210
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
211
|
+
self._tbt)
|
212
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
213
|
+
self._ttft)
|
214
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
215
|
+
version)
|
214
216
|
if trace_content:
|
215
|
-
|
217
|
+
self._span.add_event(
|
216
218
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
217
219
|
attributes={
|
218
220
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
219
221
|
},
|
220
222
|
)
|
221
|
-
|
223
|
+
self._span.add_event(
|
222
224
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
223
225
|
attributes={
|
224
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION:
|
226
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
225
227
|
},
|
226
228
|
)
|
227
|
-
|
228
|
-
prompt_tokens = general_tokens(prompt)
|
229
|
-
completion_tokens = response["eval_count"]
|
230
|
-
total_tokens = prompt_tokens + completion_tokens
|
231
|
-
# Calculate cost of the operation
|
232
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
233
|
-
pricing_info, prompt_tokens, completion_tokens)
|
234
|
-
|
235
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
236
|
-
prompt_tokens)
|
237
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
238
|
-
completion_tokens)
|
239
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
240
|
-
total_tokens)
|
241
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
242
|
-
[response["done_reason"]])
|
243
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
244
|
-
cost)
|
245
|
-
|
246
|
-
span.set_status(Status(StatusCode.OK))
|
229
|
+
self._span.set_status(Status(StatusCode.OK))
|
247
230
|
|
248
231
|
if disable_metrics is False:
|
249
|
-
attributes =
|
250
|
-
|
251
|
-
|
252
|
-
SemanticConvetion.
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
260
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
261
|
-
kwargs.get("model", "llama3")
|
262
|
-
}
|
232
|
+
attributes = create_metrics_attributes(
|
233
|
+
service_name=application_name,
|
234
|
+
deployment_environment=environment,
|
235
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
236
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
237
|
+
request_model=request_model,
|
238
|
+
server_address=self._server_address,
|
239
|
+
server_port=self._server_port,
|
240
|
+
response_model=self._response_model,
|
241
|
+
)
|
263
242
|
|
243
|
+
metrics["genai_client_usage_tokens"].record(
|
244
|
+
self._input_tokens + self._output_tokens, attributes
|
245
|
+
)
|
246
|
+
metrics["genai_client_operation_duration"].record(
|
247
|
+
self._end_time - self._start_time, attributes
|
248
|
+
)
|
249
|
+
metrics["genai_server_tbt"].record(
|
250
|
+
self._tbt, attributes
|
251
|
+
)
|
252
|
+
metrics["genai_server_ttft"].record(
|
253
|
+
self._ttft, attributes
|
254
|
+
)
|
264
255
|
metrics["genai_requests"].add(1, attributes)
|
265
|
-
metrics["
|
266
|
-
metrics["
|
267
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
256
|
+
metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
|
257
|
+
metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
|
268
258
|
metrics["genai_cost"].record(cost, attributes)
|
269
259
|
|
270
|
-
# Return original response
|
271
|
-
return response
|
272
|
-
|
273
260
|
except Exception as e:
|
274
|
-
handle_exception(
|
261
|
+
handle_exception(self._span, e)
|
275
262
|
logger.error("Error in trace creation: %s", e)
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
return wrapper
|
281
|
-
|
282
|
-
def generate(gen_ai_endpoint, version, environment, application_name,
|
283
|
-
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
284
|
-
"""
|
285
|
-
Generates a telemetry wrapper for generate to collect metrics.
|
286
|
-
|
287
|
-
Args:
|
288
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
289
|
-
version: Version of the monitoring package.
|
290
|
-
environment: Deployment environment (e.g., production, staging).
|
291
|
-
application_name: Name of the application using the Ollama API.
|
292
|
-
tracer: OpenTelemetry tracer for creating spans.
|
293
|
-
pricing_info: Information used for calculating the cost of Ollama usage.
|
294
|
-
trace_content: Flag indicating whether to trace the actual content.
|
295
|
-
|
296
|
-
Returns:
|
297
|
-
A function that wraps the generate method to add telemetry.
|
298
|
-
"""
|
263
|
+
finally:
|
264
|
+
self._span.end()
|
265
|
+
raise
|
299
266
|
|
300
267
|
def wrapper(wrapped, instance, args, kwargs):
|
301
268
|
"""
|
302
|
-
Wraps the '
|
303
|
-
|
269
|
+
Wraps the 'chat.completions' API call to add telemetry.
|
270
|
+
|
304
271
|
This collects metrics such as execution time, cost, and token usage, and handles errors
|
305
272
|
gracefully, adding details to the trace for observability.
|
306
273
|
|
307
274
|
Args:
|
308
|
-
wrapped: The original '
|
275
|
+
wrapped: The original 'chat.completions' method to be wrapped.
|
309
276
|
instance: The instance of the class where the original method is defined.
|
310
|
-
args: Positional arguments for the '
|
311
|
-
kwargs: Keyword arguments for the '
|
277
|
+
args: Positional arguments for the 'chat.completions' method.
|
278
|
+
kwargs: Keyword arguments for the 'chat.completions' method.
|
312
279
|
|
313
280
|
Returns:
|
314
|
-
The response from the original '
|
281
|
+
The response from the original 'chat.completions' method.
|
315
282
|
"""
|
316
283
|
|
317
284
|
# Check if streaming is enabled for the API call
|
318
285
|
streaming = kwargs.get("stream", False)
|
286
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
|
287
|
+
request_model = kwargs.get("model", "gpt-4o")
|
288
|
+
|
289
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
319
290
|
|
320
291
|
# pylint: disable=no-else-return
|
321
292
|
if streaming:
|
322
293
|
# Special handling for streaming response to accommodate the nature of data flow
|
323
|
-
|
324
|
-
|
325
|
-
# Placeholder for aggregating streaming response
|
326
|
-
llmresponse = ""
|
327
|
-
|
328
|
-
# Loop through streaming events capturing relevant details
|
329
|
-
for chunk in wrapped(*args, **kwargs):
|
330
|
-
# Collect aggregated response from events
|
331
|
-
content = chunk['response']
|
332
|
-
llmresponse += content
|
333
|
-
|
334
|
-
if chunk['done'] is True:
|
335
|
-
completion_tokens = chunk["eval_count"]
|
336
|
-
|
337
|
-
yield chunk
|
338
|
-
|
339
|
-
# Handling exception ensure observability without disrupting operation
|
340
|
-
try:
|
341
|
-
prompt_tokens = general_tokens(kwargs.get("prompt", ""))
|
342
|
-
total_tokens = prompt_tokens + completion_tokens
|
343
|
-
# Calculate cost of the operation
|
344
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
345
|
-
pricing_info, prompt_tokens, completion_tokens)
|
346
|
-
|
347
|
-
# Set Span attributes
|
348
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
349
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
350
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
351
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
352
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
353
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
354
|
-
gen_ai_endpoint)
|
355
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
356
|
-
environment)
|
357
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
358
|
-
application_name)
|
359
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
360
|
-
kwargs.get("model", "llama3"))
|
361
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
362
|
-
True)
|
363
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
364
|
-
prompt_tokens)
|
365
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
366
|
-
completion_tokens)
|
367
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
368
|
-
total_tokens)
|
369
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
370
|
-
cost)
|
371
|
-
if trace_content:
|
372
|
-
span.add_event(
|
373
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
374
|
-
attributes={
|
375
|
-
# pylint: disable=line-too-long
|
376
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
377
|
-
},
|
378
|
-
)
|
379
|
-
span.add_event(
|
380
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
381
|
-
attributes={
|
382
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
383
|
-
},
|
384
|
-
)
|
294
|
+
awaited_wrapped = wrapped(*args, **kwargs)
|
295
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
385
296
|
|
386
|
-
|
387
|
-
|
388
|
-
if disable_metrics is False:
|
389
|
-
attributes = {
|
390
|
-
TELEMETRY_SDK_NAME:
|
391
|
-
"openlit",
|
392
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
393
|
-
application_name,
|
394
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
395
|
-
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
396
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
397
|
-
environment,
|
398
|
-
SemanticConvetion.GEN_AI_TYPE:
|
399
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
400
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
401
|
-
kwargs.get("model", "llama3")
|
402
|
-
}
|
403
|
-
|
404
|
-
metrics["genai_requests"].add(1, attributes)
|
405
|
-
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
406
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
407
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
408
|
-
metrics["genai_cost"].record(cost, attributes)
|
409
|
-
|
410
|
-
except Exception as e:
|
411
|
-
handle_exception(span, e)
|
412
|
-
logger.error("Error in trace creation: %s", e)
|
413
|
-
|
414
|
-
return stream_generator()
|
297
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
415
298
|
|
416
299
|
# Handling for non-streaming responses
|
417
300
|
else:
|
418
|
-
|
419
|
-
|
301
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
302
|
+
start_time = time.time()
|
420
303
|
response = wrapped(*args, **kwargs)
|
304
|
+
end_time = time.time()
|
305
|
+
|
306
|
+
response_dict = response_as_dict(response)
|
421
307
|
|
422
308
|
try:
|
423
|
-
#
|
309
|
+
# Format 'messages' into a single string
|
310
|
+
message_prompt = kwargs.get("messages", "")
|
311
|
+
formatted_messages = []
|
312
|
+
for message in message_prompt:
|
313
|
+
role = message["role"]
|
314
|
+
content = message["content"]
|
315
|
+
|
316
|
+
if isinstance(content, list):
|
317
|
+
content_str = ", ".join(
|
318
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
319
|
+
if "type" in item else f'text: {item["text"]}'
|
320
|
+
for item in content
|
321
|
+
)
|
322
|
+
formatted_messages.append(f"{role}: {content_str}")
|
323
|
+
else:
|
324
|
+
formatted_messages.append(f"{role}: {content}")
|
325
|
+
prompt = "\n".join(formatted_messages)
|
326
|
+
|
327
|
+
input_tokens = response_dict.get('prompt_eval_count')
|
328
|
+
output_tokens = response_dict.get('eval_count')
|
329
|
+
|
330
|
+
# Calculate cost of the operation
|
331
|
+
cost = get_chat_model_cost(request_model,
|
332
|
+
pricing_info, input_tokens,
|
333
|
+
output_tokens)
|
334
|
+
|
335
|
+
# Set base span attribues (OTel Semconv)
|
424
336
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
337
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
338
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
425
339
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
426
340
|
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
427
|
-
span.set_attribute(SemanticConvetion.
|
428
|
-
|
429
|
-
span.set_attribute(SemanticConvetion.
|
430
|
-
|
431
|
-
|
341
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
342
|
+
request_model)
|
343
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
344
|
+
server_port)
|
345
|
+
|
346
|
+
# List of attributes and their config keys
|
347
|
+
attributes = [
|
348
|
+
(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
|
349
|
+
(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
|
350
|
+
(SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
|
351
|
+
(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
|
352
|
+
(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
353
|
+
(SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
354
|
+
(SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
355
|
+
]
|
356
|
+
|
357
|
+
# Safely get the options dictionary from kwargs
|
358
|
+
options = kwargs.get('options', {})
|
359
|
+
|
360
|
+
# Set each attribute if the corresponding value exists and is not None
|
361
|
+
for attribute, key in attributes:
|
362
|
+
# Use dictionary `get` to retrieve values from the options dictionary
|
363
|
+
value = options.get(key)
|
364
|
+
if value is not None:
|
365
|
+
span.set_attribute(attribute, value)
|
366
|
+
|
367
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
368
|
+
response_dict.get('model'))
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
370
|
+
input_tokens)
|
371
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
372
|
+
output_tokens)
|
373
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
374
|
+
server_address)
|
375
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
376
|
+
[response_dict.get('done_reason')])
|
377
|
+
if kwargs.get('format'):
|
378
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
379
|
+
'json')
|
380
|
+
else:
|
381
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
382
|
+
'text')
|
383
|
+
|
384
|
+
# Set base span attribues (Extras)
|
385
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
432
386
|
environment)
|
433
|
-
span.set_attribute(
|
387
|
+
span.set_attribute(SERVICE_NAME,
|
434
388
|
application_name)
|
435
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
436
|
-
kwargs.get("model", "llama3"))
|
437
389
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
438
390
|
False)
|
391
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
392
|
+
input_tokens + output_tokens)
|
393
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
394
|
+
cost)
|
395
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
396
|
+
end_time - start_time)
|
397
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
398
|
+
version)
|
439
399
|
if trace_content:
|
440
400
|
span.add_event(
|
441
401
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
442
402
|
attributes={
|
443
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT:
|
403
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
444
404
|
},
|
445
405
|
)
|
446
406
|
span.add_event(
|
447
407
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
448
408
|
attributes={
|
449
|
-
|
409
|
+
# pylint: disable=line-too-long
|
410
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('message').get('content')),
|
450
411
|
},
|
451
412
|
)
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
total_tokens = prompt_tokens + completion_tokens
|
456
|
-
# Calculate cost of the operation
|
457
|
-
cost = get_chat_model_cost(kwargs.get("model", "llama3"),
|
458
|
-
pricing_info, prompt_tokens, completion_tokens)
|
459
|
-
|
460
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
461
|
-
prompt_tokens)
|
462
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
463
|
-
completion_tokens)
|
464
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
465
|
-
total_tokens)
|
466
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
467
|
-
[response["done_reason"]])
|
468
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
469
|
-
cost)
|
413
|
+
if kwargs.get('tools'):
|
414
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
415
|
+
str(response_dict.get('message').get('tool_calls')))
|
470
416
|
|
471
417
|
span.set_status(Status(StatusCode.OK))
|
472
418
|
|
473
419
|
if disable_metrics is False:
|
474
|
-
attributes =
|
475
|
-
|
476
|
-
|
477
|
-
SemanticConvetion.
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
485
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
486
|
-
kwargs.get("model", "llama3")
|
487
|
-
}
|
420
|
+
attributes = create_metrics_attributes(
|
421
|
+
service_name=application_name,
|
422
|
+
deployment_environment=environment,
|
423
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
424
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
425
|
+
request_model=request_model,
|
426
|
+
server_address=server_address,
|
427
|
+
server_port=server_port,
|
428
|
+
response_model=response_dict.get('model'),
|
429
|
+
)
|
488
430
|
|
431
|
+
metrics["genai_client_usage_tokens"].record(
|
432
|
+
input_tokens + output_tokens, attributes
|
433
|
+
)
|
434
|
+
metrics["genai_client_operation_duration"].record(
|
435
|
+
end_time - start_time, attributes
|
436
|
+
)
|
437
|
+
metrics["genai_server_ttft"].record(
|
438
|
+
end_time - start_time, attributes
|
439
|
+
)
|
489
440
|
metrics["genai_requests"].add(1, attributes)
|
490
|
-
metrics["
|
491
|
-
metrics["
|
492
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
441
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
442
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
493
443
|
metrics["genai_cost"].record(cost, attributes)
|
494
444
|
|
495
445
|
# Return original response
|
@@ -504,13 +454,12 @@ def generate(gen_ai_endpoint, version, environment, application_name,
|
|
504
454
|
|
505
455
|
return wrapper
|
506
456
|
|
507
|
-
def embeddings(
|
508
|
-
|
457
|
+
def embeddings(version, environment, application_name,
|
458
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
509
459
|
"""
|
510
460
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
511
461
|
|
512
462
|
Args:
|
513
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
514
463
|
version: Version of the monitoring package.
|
515
464
|
environment: Deployment environment (e.g., production, staging).
|
516
465
|
application_name: Name of the application using the Ollama API.
|
@@ -539,71 +488,89 @@ def embeddings(gen_ai_endpoint, version, environment, application_name,
|
|
539
488
|
The response from the original 'embeddings' method.
|
540
489
|
"""
|
541
490
|
|
542
|
-
|
491
|
+
server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
|
492
|
+
request_model = kwargs.get('model', 'all-minilm')
|
493
|
+
|
494
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
495
|
+
|
496
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
497
|
+
start_time = time.time()
|
543
498
|
response = wrapped(*args, **kwargs)
|
499
|
+
end_time = time.time()
|
544
500
|
|
545
501
|
try:
|
546
|
-
|
502
|
+
input_tokens = general_tokens(str(kwargs.get('prompt')))
|
503
|
+
|
547
504
|
# Calculate cost of the operation
|
548
|
-
cost = get_embed_model_cost(
|
549
|
-
|
550
|
-
|
551
|
-
|
505
|
+
cost = get_embed_model_cost(request_model,
|
506
|
+
pricing_info, input_tokens)
|
507
|
+
|
508
|
+
# Set Span attributes (OTel Semconv)
|
509
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
510
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
511
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
552
512
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
553
513
|
SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
|
554
|
-
span.set_attribute(SemanticConvetion.
|
555
|
-
|
556
|
-
span.set_attribute(SemanticConvetion.
|
557
|
-
|
558
|
-
span.set_attribute(SemanticConvetion.
|
514
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
515
|
+
request_model)
|
516
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
517
|
+
request_model)
|
518
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
519
|
+
server_address)
|
520
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
521
|
+
server_port)
|
522
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
523
|
+
input_tokens)
|
524
|
+
|
525
|
+
# Set Span attributes (Extras)
|
526
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
559
527
|
environment)
|
560
|
-
span.set_attribute(
|
528
|
+
span.set_attribute(SERVICE_NAME,
|
561
529
|
application_name)
|
562
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
563
|
-
kwargs.get('model', "llama3"))
|
564
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
565
|
-
prompt_tokens)
|
566
530
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
567
|
-
|
531
|
+
input_tokens)
|
568
532
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
569
533
|
cost)
|
534
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
535
|
+
version)
|
536
|
+
|
570
537
|
if trace_content:
|
571
538
|
span.add_event(
|
572
539
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
573
540
|
attributes={
|
574
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get(
|
541
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('prompt', '')),
|
575
542
|
},
|
576
543
|
)
|
577
544
|
|
578
545
|
span.set_status(Status(StatusCode.OK))
|
579
546
|
|
580
547
|
if disable_metrics is False:
|
581
|
-
attributes =
|
582
|
-
|
583
|
-
|
584
|
-
SemanticConvetion.
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
metrics[
|
598
|
-
metrics[
|
599
|
-
metrics[
|
548
|
+
attributes = create_metrics_attributes(
|
549
|
+
service_name=application_name,
|
550
|
+
deployment_environment=environment,
|
551
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
552
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
|
553
|
+
request_model=request_model,
|
554
|
+
server_address=server_address,
|
555
|
+
server_port=server_port,
|
556
|
+
response_model=request_model,
|
557
|
+
)
|
558
|
+
metrics['genai_client_usage_tokens'].record(
|
559
|
+
input_tokens, attributes
|
560
|
+
)
|
561
|
+
metrics['genai_client_operation_duration'].record(
|
562
|
+
end_time - start_time, attributes
|
563
|
+
)
|
564
|
+
metrics['genai_requests'].add(1, attributes)
|
565
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
566
|
+
metrics['genai_cost'].record(cost, attributes)
|
600
567
|
|
601
568
|
# Return original response
|
602
569
|
return response
|
603
570
|
|
604
571
|
except Exception as e:
|
605
572
|
handle_exception(span, e)
|
606
|
-
logger.error(
|
573
|
+
logger.error('Error in trace creation: %s', e)
|
607
574
|
|
608
575
|
# Return original response
|
609
576
|
return response
|