openlit 1.34.19__py3-none-any.whl → 1.34.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +40 -0
- openlit/instrumentation/bedrock/__init__.py +19 -14
- openlit/instrumentation/bedrock/bedrock.py +169 -35
- openlit/instrumentation/bedrock/utils.py +143 -172
- openlit/instrumentation/litellm/async_litellm.py +2 -2
- openlit/instrumentation/openai/__init__.py +63 -68
- openlit/instrumentation/openai/async_openai.py +203 -1277
- openlit/instrumentation/openai/openai.py +200 -1274
- openlit/instrumentation/openai/utils.py +794 -0
- openlit/instrumentation/vertexai/__init__.py +18 -23
- openlit/instrumentation/vertexai/async_vertexai.py +46 -364
- openlit/instrumentation/vertexai/utils.py +204 -0
- openlit/instrumentation/vertexai/vertexai.py +46 -364
- {openlit-1.34.19.dist-info → openlit-1.34.22.dist-info}/METADATA +1 -1
- {openlit-1.34.19.dist-info → openlit-1.34.22.dist-info}/RECORD +17 -15
- {openlit-1.34.19.dist-info → openlit-1.34.22.dist-info}/LICENSE +0 -0
- {openlit-1.34.19.dist-info → openlit-1.34.22.dist-info}/WHEEL +0 -0
@@ -2,78 +2,55 @@
|
|
2
2
|
Module for monitoring OpenAI API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
6
|
+
from opentelemetry.trace import SpanKind
|
9
7
|
from openlit.__helpers import (
|
10
|
-
get_chat_model_cost,
|
11
|
-
get_embed_model_cost,
|
12
|
-
get_audio_model_cost,
|
13
|
-
get_image_model_cost,
|
14
|
-
general_tokens,
|
15
8
|
handle_exception,
|
16
|
-
extract_and_format_input,
|
17
|
-
concatenate_all_contents,
|
18
|
-
response_as_dict,
|
19
|
-
calculate_ttft,
|
20
|
-
calculate_tbt,
|
21
|
-
create_metrics_attributes,
|
22
9
|
set_server_address_and_port
|
23
10
|
)
|
11
|
+
from openlit.instrumentation.openai.utils import (
|
12
|
+
process_chat_chunk,
|
13
|
+
process_response_chunk,
|
14
|
+
process_chat_response,
|
15
|
+
process_streaming_chat_response,
|
16
|
+
process_streaming_response_response,
|
17
|
+
process_response_response,
|
18
|
+
process_embedding_response,
|
19
|
+
process_image_response,
|
20
|
+
process_audio_response,
|
21
|
+
)
|
24
22
|
from openlit.semcov import SemanticConvention
|
25
23
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def responses(version, environment, application_name,
|
30
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
|
+
def chat_completions(version, environment, application_name, tracer, pricing_info,
|
25
|
+
capture_message_content, metrics, disable_metrics):
|
31
26
|
"""
|
32
|
-
Generates a telemetry wrapper for chat completions
|
33
|
-
|
34
|
-
Args:
|
35
|
-
version: Version of the monitoring package.
|
36
|
-
environment: Deployment environment (e.g., production, staging).
|
37
|
-
application_name: Name of the application using the OpenAI API.
|
38
|
-
tracer: OpenTelemetry tracer for creating spans.
|
39
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
40
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
41
|
-
|
42
|
-
Returns:
|
43
|
-
A function that wraps the chat completions method to add telemetry.
|
27
|
+
Generates a telemetry wrapper for OpenAI chat completions.
|
44
28
|
"""
|
45
29
|
|
46
30
|
class TracedSyncStream:
|
47
31
|
"""
|
48
|
-
Wrapper for streaming responses to collect
|
49
|
-
Wraps the response to collect message IDs and aggregated response.
|
50
|
-
|
51
|
-
This class implements the '__aiter__' and '__anext__' methods that
|
52
|
-
handle asynchronous streaming responses.
|
53
|
-
|
54
|
-
This class also implements '__aenter__' and '__aexit__' methods that
|
55
|
-
handle asynchronous context management protocol.
|
32
|
+
Wrapper for streaming responses to collect telemetry.
|
56
33
|
"""
|
34
|
+
|
57
35
|
def __init__(
|
58
36
|
self,
|
59
37
|
wrapped,
|
60
38
|
span,
|
39
|
+
span_name,
|
61
40
|
kwargs,
|
62
41
|
server_address,
|
63
42
|
server_port,
|
64
|
-
**args,
|
65
43
|
):
|
66
44
|
self.__wrapped__ = wrapped
|
67
45
|
self._span = span
|
68
|
-
|
46
|
+
self._span_name = span_name
|
69
47
|
self._llmresponse = ""
|
70
48
|
self._response_id = ""
|
71
49
|
self._response_model = ""
|
72
50
|
self._finish_reason = ""
|
73
|
-
self.
|
74
|
-
self.
|
75
|
-
|
76
|
-
self._args = args
|
51
|
+
self._system_fingerprint = ""
|
52
|
+
self._service_tier = "auto"
|
53
|
+
self._tools = None
|
77
54
|
self._kwargs = kwargs
|
78
55
|
self._start_time = time.time()
|
79
56
|
self._end_time = None
|
@@ -100,383 +77,106 @@ def responses(version, environment, application_name,
|
|
100
77
|
def __next__(self):
|
101
78
|
try:
|
102
79
|
chunk = self.__wrapped__.__next__()
|
103
|
-
|
104
|
-
# Record the timestamp for the current chunk
|
105
|
-
self._timestamps.append(end_time)
|
106
|
-
|
107
|
-
if len(self._timestamps) == 1:
|
108
|
-
# Calculate time to first chunk
|
109
|
-
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
110
|
-
|
111
|
-
chunked = response_as_dict(chunk)
|
112
|
-
# Collect message IDs and aggregated response from events
|
113
|
-
if chunked.get('type') == "response.output_text.delta":
|
114
|
-
self._llmresponse += chunked.get('delta')
|
115
|
-
if chunked.get('type') == "response.completed":
|
116
|
-
self._response_id = chunked.get('response').get('id')
|
117
|
-
self._response_model = chunked.get('response').get('model')
|
118
|
-
self._finish_reason = chunked.get('response').get('status')
|
119
|
-
self._input_tokens = chunked.get('response').get('usage').get('input_tokens')
|
120
|
-
self._output_tokens = chunked.get('response').get('usage').get('output_tokens')
|
80
|
+
process_chat_chunk(self, chunk)
|
121
81
|
return chunk
|
122
82
|
except StopIteration:
|
123
|
-
# Handling exception ensure observability without disrupting operation
|
124
83
|
try:
|
125
|
-
self.
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
request_model = self._kwargs.get("model", "gpt-4o")
|
136
|
-
|
137
|
-
# Calculate cost of the operation
|
138
|
-
cost = get_chat_model_cost(request_model,
|
139
|
-
pricing_info, self._input_tokens,
|
140
|
-
self._output_tokens)
|
141
|
-
|
142
|
-
# Set Span attributes (OTel Semconv)
|
143
|
-
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
144
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
145
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
146
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
147
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
148
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
149
|
-
request_model)
|
150
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
151
|
-
str(self._kwargs.get("seed", "")))
|
152
|
-
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
153
|
-
self._server_port)
|
154
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
155
|
-
str(self._kwargs.get("max_output_tokens", -1)))
|
156
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
157
|
-
str(self._kwargs.get("stop", [])))
|
158
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
159
|
-
str(self._kwargs.get("temperature", 1.0)))
|
160
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
161
|
-
str(self._kwargs.get("top_p", 1.0)))
|
162
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
163
|
-
[self._finish_reason])
|
164
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
165
|
-
self._response_id)
|
166
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
167
|
-
self._response_model)
|
168
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
169
|
-
self._input_tokens)
|
170
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
171
|
-
self._output_tokens)
|
172
|
-
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
173
|
-
self._server_address)
|
174
|
-
if isinstance(self._llmresponse, str):
|
175
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
176
|
-
"text")
|
177
|
-
else:
|
178
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
179
|
-
"json")
|
180
|
-
|
181
|
-
# Set Span attributes (Extra)
|
182
|
-
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
183
|
-
environment)
|
184
|
-
self._span.set_attribute(SERVICE_NAME,
|
185
|
-
application_name)
|
186
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
187
|
-
self._kwargs.get("user", ""))
|
188
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
189
|
-
True)
|
190
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
191
|
-
self._input_tokens + self._output_tokens)
|
192
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
193
|
-
cost)
|
194
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
195
|
-
self._tbt)
|
196
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
197
|
-
self._ttft)
|
198
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
199
|
-
version)
|
200
|
-
|
201
|
-
if capture_message_content:
|
202
|
-
self._span.add_event(
|
203
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
204
|
-
attributes={
|
205
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
206
|
-
},
|
207
|
-
)
|
208
|
-
self._span.add_event(
|
209
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
210
|
-
attributes={
|
211
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
212
|
-
},
|
213
|
-
)
|
214
|
-
self._span.set_status(Status(StatusCode.OK))
|
215
|
-
|
216
|
-
if disable_metrics is False:
|
217
|
-
attributes = create_metrics_attributes(
|
218
|
-
service_name=application_name,
|
219
|
-
deployment_environment=environment,
|
220
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
221
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
222
|
-
request_model=request_model,
|
223
|
-
server_address=self._server_address,
|
224
|
-
server_port=self._server_port,
|
225
|
-
response_model=self._response_model,
|
84
|
+
with self._span:
|
85
|
+
process_streaming_chat_response(
|
86
|
+
self,
|
87
|
+
pricing_info=pricing_info,
|
88
|
+
environment=environment,
|
89
|
+
application_name=application_name,
|
90
|
+
metrics=metrics,
|
91
|
+
capture_message_content=capture_message_content,
|
92
|
+
disable_metrics=disable_metrics,
|
93
|
+
version=version
|
226
94
|
)
|
227
|
-
|
228
|
-
metrics["genai_client_usage_tokens"].record(
|
229
|
-
self._input_tokens + self._output_tokens, attributes
|
230
|
-
)
|
231
|
-
metrics["genai_client_operation_duration"].record(
|
232
|
-
self._end_time - self._start_time, attributes
|
233
|
-
)
|
234
|
-
metrics["genai_server_tbt"].record(
|
235
|
-
self._tbt, attributes
|
236
|
-
)
|
237
|
-
metrics["genai_server_ttft"].record(
|
238
|
-
self._ttft, attributes
|
239
|
-
)
|
240
|
-
metrics["genai_requests"].add(1, attributes)
|
241
|
-
metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
|
242
|
-
metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
|
243
|
-
metrics["genai_cost"].record(cost, attributes)
|
244
|
-
|
245
95
|
except Exception as e:
|
246
96
|
handle_exception(self._span, e)
|
247
|
-
logger.error("Error in trace creation: %s", e)
|
248
|
-
finally:
|
249
|
-
self._span.end()
|
250
97
|
raise
|
251
98
|
|
252
99
|
def wrapper(wrapped, instance, args, kwargs):
|
253
100
|
"""
|
254
|
-
Wraps the
|
255
|
-
|
256
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
257
|
-
gracefully, adding details to the trace for observability.
|
258
|
-
|
259
|
-
Args:
|
260
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
261
|
-
instance: The instance of the class where the original method is defined.
|
262
|
-
args: Positional arguments for the 'chat.completions' method.
|
263
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
264
|
-
|
265
|
-
Returns:
|
266
|
-
The response from the original 'chat.completions' method.
|
101
|
+
Wraps the OpenAI chat completions call.
|
267
102
|
"""
|
268
103
|
|
269
|
-
# Check if streaming is enabled for the API call
|
270
104
|
streaming = kwargs.get("stream", False)
|
271
105
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
272
106
|
request_model = kwargs.get("model", "gpt-4o")
|
273
107
|
|
274
108
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
275
109
|
|
276
|
-
# pylint: disable=no-else-return
|
277
110
|
if streaming:
|
278
|
-
# Special handling for streaming response to accommodate the nature of data flow
|
279
111
|
awaited_wrapped = wrapped(*args, **kwargs)
|
280
112
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
281
113
|
|
282
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
114
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
283
115
|
|
284
|
-
# Handling for non-streaming responses
|
285
116
|
else:
|
286
|
-
with tracer.start_as_current_span(span_name, kind=
|
117
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
287
118
|
start_time = time.time()
|
288
119
|
response = wrapped(*args, **kwargs)
|
289
|
-
end_time = time.time()
|
290
|
-
|
291
|
-
response_dict = response_as_dict(response)
|
292
120
|
|
293
121
|
try:
|
294
|
-
|
295
|
-
|
296
|
-
prompt = concatenate_all_contents(formatted_messages)
|
297
|
-
except:
|
298
|
-
prompt = kwargs.get('input', '')
|
299
|
-
|
300
|
-
input_tokens = response_dict.get('usage').get('input_tokens')
|
301
|
-
output_tokens = response_dict.get('usage').get('output_tokens')
|
302
|
-
|
303
|
-
# Calculate cost of the operation
|
304
|
-
cost = get_chat_model_cost(request_model,
|
305
|
-
pricing_info, input_tokens,
|
306
|
-
output_tokens)
|
307
|
-
|
308
|
-
# Set base span attribues (OTel Semconv)
|
309
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
310
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
311
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
312
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
313
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
314
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
315
|
-
request_model)
|
316
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
317
|
-
kwargs.get("seed", ""))
|
318
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
319
|
-
server_port)
|
320
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
321
|
-
kwargs.get("max_output_tokens", -1))
|
322
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
323
|
-
kwargs.get("stop", []))
|
324
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
325
|
-
str(response_dict.get("temperature", 1.0)))
|
326
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
327
|
-
str(response_dict.get("top_p", 1.0)))
|
328
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
329
|
-
response_dict.get("id"))
|
330
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
331
|
-
response_dict.get('model'))
|
332
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
333
|
-
input_tokens)
|
334
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
335
|
-
output_tokens)
|
336
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
337
|
-
server_address)
|
338
|
-
|
339
|
-
# Set base span attribues (Extras)
|
340
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
341
|
-
environment)
|
342
|
-
span.set_attribute(SERVICE_NAME,
|
343
|
-
application_name)
|
344
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
345
|
-
kwargs.get("user", ""))
|
346
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
347
|
-
False)
|
348
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
349
|
-
input_tokens + output_tokens)
|
350
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
351
|
-
cost)
|
352
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
353
|
-
end_time - start_time)
|
354
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
355
|
-
version)
|
356
|
-
|
357
|
-
if capture_message_content:
|
358
|
-
span.add_event(
|
359
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
360
|
-
attributes={
|
361
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
362
|
-
},
|
363
|
-
)
|
364
|
-
|
365
|
-
for i in range(kwargs.get('n',1)):
|
366
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
367
|
-
[response_dict.get('status')])
|
368
|
-
try:
|
369
|
-
llm_response = response_dict.get('output')[i].get('content')[0].get('text','')
|
370
|
-
except:
|
371
|
-
llm_response = ''
|
372
|
-
|
373
|
-
if capture_message_content:
|
374
|
-
span.add_event(
|
375
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
376
|
-
attributes={
|
377
|
-
# pylint: disable=line-too-long
|
378
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
|
379
|
-
},
|
380
|
-
)
|
381
|
-
if kwargs.get('tools'):
|
382
|
-
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
383
|
-
str(response_dict.get('tools')))
|
384
|
-
|
385
|
-
if isinstance(llm_response, str):
|
386
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
387
|
-
"text")
|
388
|
-
elif llm_response is not None:
|
389
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
390
|
-
"json")
|
391
|
-
|
392
|
-
span.set_status(Status(StatusCode.OK))
|
393
|
-
|
394
|
-
if disable_metrics is False:
|
395
|
-
attributes = create_metrics_attributes(
|
396
|
-
service_name=application_name,
|
397
|
-
deployment_environment=environment,
|
398
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
399
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
122
|
+
response = process_chat_response(
|
123
|
+
response=response,
|
400
124
|
request_model=request_model,
|
401
|
-
|
125
|
+
pricing_info=pricing_info,
|
402
126
|
server_port=server_port,
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
metrics
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
)
|
415
|
-
metrics["genai_requests"].add(1, attributes)
|
416
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
417
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
418
|
-
metrics["genai_cost"].record(cost, attributes)
|
419
|
-
|
420
|
-
# Return original response
|
421
|
-
return response
|
127
|
+
server_address=server_address,
|
128
|
+
environment=environment,
|
129
|
+
application_name=application_name,
|
130
|
+
metrics=metrics,
|
131
|
+
start_time=start_time,
|
132
|
+
span=span,
|
133
|
+
capture_message_content=capture_message_content,
|
134
|
+
disable_metrics=disable_metrics,
|
135
|
+
version=version,
|
136
|
+
**kwargs
|
137
|
+
)
|
422
138
|
|
423
139
|
except Exception as e:
|
424
140
|
handle_exception(span, e)
|
425
|
-
logger.error("Error in trace creation: %s", e)
|
426
141
|
|
427
|
-
|
428
|
-
return response
|
142
|
+
return response
|
429
143
|
|
430
144
|
return wrapper
|
431
145
|
|
432
|
-
def
|
433
|
-
|
146
|
+
def responses(version, environment, application_name, tracer, pricing_info,
|
147
|
+
capture_message_content, metrics, disable_metrics, **kwargs):
|
434
148
|
"""
|
435
|
-
Generates a telemetry wrapper for
|
436
|
-
|
437
|
-
Args:
|
438
|
-
version: Version of the monitoring package.
|
439
|
-
environment: Deployment environment (e.g., production, staging).
|
440
|
-
application_name: Name of the application using the OpenAI API.
|
441
|
-
tracer: OpenTelemetry tracer for creating spans.
|
442
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
443
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
444
|
-
|
445
|
-
Returns:
|
446
|
-
A function that wraps the chat completions method to add telemetry.
|
149
|
+
Generates a telemetry wrapper for OpenAI responses API.
|
447
150
|
"""
|
448
151
|
|
449
152
|
class TracedSyncStream:
|
450
153
|
"""
|
451
|
-
Wrapper for streaming responses to collect
|
452
|
-
Wraps the response to collect message IDs and aggregated response.
|
453
|
-
|
454
|
-
This class implements the '__aiter__' and '__anext__' methods that
|
455
|
-
handle asynchronous streaming responses.
|
456
|
-
|
457
|
-
This class also implements '__aenter__' and '__aexit__' methods that
|
458
|
-
handle asynchronous context management protocol.
|
154
|
+
Wrapper for streaming responses to collect telemetry.
|
459
155
|
"""
|
156
|
+
|
460
157
|
def __init__(
|
461
158
|
self,
|
462
159
|
wrapped,
|
463
160
|
span,
|
161
|
+
span_name,
|
464
162
|
kwargs,
|
465
163
|
server_address,
|
466
164
|
server_port,
|
467
|
-
**args,
|
468
165
|
):
|
469
166
|
self.__wrapped__ = wrapped
|
470
167
|
self._span = span
|
471
|
-
|
168
|
+
self._span_name = span_name
|
472
169
|
self._llmresponse = ""
|
473
170
|
self._response_id = ""
|
474
171
|
self._response_model = ""
|
475
172
|
self._finish_reason = ""
|
476
|
-
self.
|
477
|
-
self.
|
478
|
-
|
479
|
-
self.
|
173
|
+
self._input_tokens = 0
|
174
|
+
self._output_tokens = 0
|
175
|
+
self._reasoning_tokens = 0
|
176
|
+
self._operation_type = "responses"
|
177
|
+
self._service_tier = "default"
|
178
|
+
self._tools = None
|
179
|
+
self._response_tools = None
|
480
180
|
self._kwargs = kwargs
|
481
181
|
self._start_time = time.time()
|
482
182
|
self._end_time = None
|
@@ -503,578 +203,126 @@ def chat_completions(version, environment, application_name,
|
|
503
203
|
def __next__(self):
|
504
204
|
try:
|
505
205
|
chunk = self.__wrapped__.__next__()
|
506
|
-
|
507
|
-
# Record the timestamp for the current chunk
|
508
|
-
self._timestamps.append(end_time)
|
509
|
-
|
510
|
-
if len(self._timestamps) == 1:
|
511
|
-
# Calculate time to first chunk
|
512
|
-
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
513
|
-
|
514
|
-
chunked = response_as_dict(chunk)
|
515
|
-
# Collect message IDs and aggregated response from events
|
516
|
-
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
517
|
-
'content' in chunked.get('choices')[0].get('delta'))):
|
518
|
-
|
519
|
-
content = chunked.get('choices')[0].get('delta').get('content')
|
520
|
-
if content:
|
521
|
-
self._llmresponse += content
|
522
|
-
self._response_id = chunked.get('id')
|
523
|
-
self._response_model = chunked.get('model')
|
524
|
-
try:
|
525
|
-
self._finish_reason = chunked.get('choices', [])[0].get('finish_reason')
|
526
|
-
except (IndexError, AttributeError, TypeError):
|
527
|
-
self._finish_reason = "stop"
|
528
|
-
self._openai_response_service_tier = chunked.get('service_tier') or 'auto'
|
529
|
-
self._openai_system_fingerprint = chunked.get('system_fingerprint')
|
206
|
+
process_response_chunk(self, chunk)
|
530
207
|
return chunk
|
531
208
|
except StopIteration:
|
532
|
-
# Handling exception ensure observability without disrupting operation
|
533
209
|
try:
|
534
|
-
self.
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
if isinstance(content, list):
|
546
|
-
content_str_list = []
|
547
|
-
for item in content:
|
548
|
-
if item["type"] == "text":
|
549
|
-
content_str_list.append(f'text: {item["text"]}')
|
550
|
-
elif (item["type"] == "image_url" and
|
551
|
-
not item["image_url"]["url"].startswith("data:")):
|
552
|
-
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
553
|
-
content_str = ", ".join(content_str_list)
|
554
|
-
formatted_messages.append(f"{role}: {content_str}")
|
555
|
-
else:
|
556
|
-
formatted_messages.append(f"{role}: {content}")
|
557
|
-
prompt = "\n".join(formatted_messages)
|
558
|
-
|
559
|
-
request_model = self._kwargs.get("model", "gpt-4o")
|
560
|
-
|
561
|
-
# Calculate tokens using input prompt and aggregated response
|
562
|
-
input_tokens = general_tokens(prompt)
|
563
|
-
output_tokens = general_tokens(self._llmresponse)
|
564
|
-
|
565
|
-
# Calculate cost of the operation
|
566
|
-
cost = get_chat_model_cost(request_model,
|
567
|
-
pricing_info, input_tokens,
|
568
|
-
output_tokens)
|
569
|
-
|
570
|
-
# Set Span attributes (OTel Semconv)
|
571
|
-
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
572
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
573
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
574
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
575
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
576
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
577
|
-
request_model)
|
578
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
579
|
-
str(self._kwargs.get("seed", "")))
|
580
|
-
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
581
|
-
self._server_port)
|
582
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
583
|
-
str(self._kwargs.get("frequency_penalty", 0.0)))
|
584
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
585
|
-
self._kwargs.get("max_tokens", -1))
|
586
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
587
|
-
str(self._kwargs.get("presence_penalty", 0.0)))
|
588
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
589
|
-
str(self._kwargs.get("stop", [])))
|
590
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
591
|
-
str(self._kwargs.get("temperature", 1.0)))
|
592
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
593
|
-
str(self._kwargs.get("top_p", 1.0)))
|
594
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
595
|
-
[self._finish_reason])
|
596
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
597
|
-
self._response_id)
|
598
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
599
|
-
self._response_model)
|
600
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
601
|
-
input_tokens)
|
602
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
603
|
-
output_tokens)
|
604
|
-
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
605
|
-
self._server_address)
|
606
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
|
607
|
-
str(self._kwargs.get("service_tier", "auto")))
|
608
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
|
609
|
-
self._openai_response_service_tier)
|
610
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
611
|
-
self._openai_system_fingerprint)
|
612
|
-
if isinstance(self._llmresponse, str):
|
613
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
614
|
-
"text")
|
615
|
-
else:
|
616
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
617
|
-
"json")
|
618
|
-
|
619
|
-
# Set Span attributes (Extra)
|
620
|
-
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
621
|
-
environment)
|
622
|
-
self._span.set_attribute(SERVICE_NAME,
|
623
|
-
application_name)
|
624
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
625
|
-
str(self._kwargs.get("user", "")))
|
626
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
627
|
-
True)
|
628
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
629
|
-
input_tokens + output_tokens)
|
630
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
631
|
-
cost)
|
632
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
633
|
-
self._tbt)
|
634
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
635
|
-
self._ttft)
|
636
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
637
|
-
version)
|
638
|
-
if capture_message_content:
|
639
|
-
self._span.add_event(
|
640
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
641
|
-
attributes={
|
642
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
643
|
-
},
|
210
|
+
with self._span:
|
211
|
+
process_streaming_response_response(
|
212
|
+
self,
|
213
|
+
pricing_info=pricing_info,
|
214
|
+
environment=environment,
|
215
|
+
application_name=application_name,
|
216
|
+
metrics=metrics,
|
217
|
+
capture_message_content=capture_message_content,
|
218
|
+
disable_metrics=disable_metrics,
|
219
|
+
version=version
|
644
220
|
)
|
645
|
-
self._span.add_event(
|
646
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
647
|
-
attributes={
|
648
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
649
|
-
},
|
650
|
-
)
|
651
|
-
self._span.set_status(Status(StatusCode.OK))
|
652
|
-
|
653
|
-
if disable_metrics is False:
|
654
|
-
attributes = create_metrics_attributes(
|
655
|
-
service_name=application_name,
|
656
|
-
deployment_environment=environment,
|
657
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
658
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
659
|
-
request_model=request_model,
|
660
|
-
server_address=self._server_address,
|
661
|
-
server_port=self._server_port,
|
662
|
-
response_model=self._response_model,
|
663
|
-
)
|
664
|
-
|
665
|
-
metrics["genai_client_usage_tokens"].record(
|
666
|
-
input_tokens + output_tokens, attributes
|
667
|
-
)
|
668
|
-
metrics["genai_client_operation_duration"].record(
|
669
|
-
self._end_time - self._start_time, attributes
|
670
|
-
)
|
671
|
-
metrics["genai_server_tbt"].record(
|
672
|
-
self._tbt, attributes
|
673
|
-
)
|
674
|
-
metrics["genai_server_ttft"].record(
|
675
|
-
self._ttft, attributes
|
676
|
-
)
|
677
|
-
metrics["genai_requests"].add(1, attributes)
|
678
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
679
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
680
|
-
metrics["genai_cost"].record(cost, attributes)
|
681
|
-
|
682
221
|
except Exception as e:
|
683
222
|
handle_exception(self._span, e)
|
684
|
-
logger.error("Error in trace creation: %s", e)
|
685
|
-
finally:
|
686
|
-
self._span.end()
|
687
223
|
raise
|
688
224
|
|
689
225
|
def wrapper(wrapped, instance, args, kwargs):
|
690
226
|
"""
|
691
|
-
Wraps the
|
692
|
-
|
693
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
694
|
-
gracefully, adding details to the trace for observability.
|
695
|
-
|
696
|
-
Args:
|
697
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
698
|
-
instance: The instance of the class where the original method is defined.
|
699
|
-
args: Positional arguments for the 'chat.completions' method.
|
700
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
701
|
-
|
702
|
-
Returns:
|
703
|
-
The response from the original 'chat.completions' method.
|
227
|
+
Wraps the OpenAI responses API call.
|
704
228
|
"""
|
705
229
|
|
706
|
-
# Check if streaming is enabled for the API call
|
707
230
|
streaming = kwargs.get("stream", False)
|
708
231
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
709
232
|
request_model = kwargs.get("model", "gpt-4o")
|
710
233
|
|
711
234
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
712
235
|
|
713
|
-
# pylint: disable=no-else-return
|
714
236
|
if streaming:
|
715
|
-
# Special handling for streaming response to accommodate the nature of data flow
|
716
237
|
awaited_wrapped = wrapped(*args, **kwargs)
|
717
238
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
718
239
|
|
719
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
240
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
720
241
|
|
721
|
-
# Handling for non-streaming responses
|
722
242
|
else:
|
723
|
-
with tracer.start_as_current_span(span_name, kind=
|
243
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
724
244
|
start_time = time.time()
|
725
245
|
response = wrapped(*args, **kwargs)
|
726
|
-
end_time = time.time()
|
727
|
-
|
728
|
-
response_dict = response_as_dict(response)
|
729
246
|
|
730
247
|
try:
|
731
|
-
|
732
|
-
|
733
|
-
formatted_messages = []
|
734
|
-
for message in message_prompt:
|
735
|
-
role = message["role"]
|
736
|
-
content = message["content"]
|
737
|
-
|
738
|
-
if isinstance(content, list):
|
739
|
-
content_str = ", ".join(
|
740
|
-
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
741
|
-
if "type" in item else f'text: {item["text"]}'
|
742
|
-
for item in content
|
743
|
-
)
|
744
|
-
formatted_messages.append(f"{role}: {content_str}")
|
745
|
-
else:
|
746
|
-
formatted_messages.append(f"{role}: {content}")
|
747
|
-
prompt = "\n".join(formatted_messages)
|
748
|
-
|
749
|
-
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
750
|
-
output_tokens = response_dict.get('usage').get('completion_tokens')
|
751
|
-
|
752
|
-
# Calculate cost of the operation
|
753
|
-
cost = get_chat_model_cost(request_model,
|
754
|
-
pricing_info, input_tokens,
|
755
|
-
output_tokens)
|
756
|
-
|
757
|
-
# Set base span attribues (OTel Semconv)
|
758
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
759
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
760
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
761
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
762
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
763
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
764
|
-
request_model)
|
765
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
766
|
-
str(kwargs.get("seed", "")))
|
767
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
768
|
-
server_port)
|
769
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
770
|
-
str(kwargs.get("frequency_penalty", 0.0)))
|
771
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
772
|
-
str(kwargs.get("max_tokens", -1)))
|
773
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
774
|
-
str(kwargs.get("presence_penalty", 0.0)))
|
775
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
776
|
-
str(kwargs.get("stop", [])))
|
777
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
778
|
-
str(kwargs.get("temperature", 1.0)))
|
779
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
780
|
-
str(kwargs.get("top_p", 1.0)))
|
781
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
782
|
-
response_dict.get("id"))
|
783
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
784
|
-
response_dict.get('model'))
|
785
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
786
|
-
input_tokens)
|
787
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
788
|
-
output_tokens)
|
789
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
790
|
-
server_address)
|
791
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
|
792
|
-
str(kwargs.get("service_tier", "auto")))
|
793
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
|
794
|
-
response_dict.get('service_tier', 'auto'))
|
795
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
796
|
-
str(response_dict.get('system_fingerprint', '')))
|
797
|
-
|
798
|
-
# Set base span attribues (Extras)
|
799
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
800
|
-
environment)
|
801
|
-
span.set_attribute(SERVICE_NAME,
|
802
|
-
application_name)
|
803
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
804
|
-
kwargs.get("user", ""))
|
805
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
806
|
-
False)
|
807
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
808
|
-
input_tokens + output_tokens)
|
809
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
810
|
-
cost)
|
811
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
812
|
-
end_time - start_time)
|
813
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
814
|
-
version)
|
815
|
-
if capture_message_content:
|
816
|
-
span.add_event(
|
817
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
818
|
-
attributes={
|
819
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
820
|
-
},
|
821
|
-
)
|
822
|
-
|
823
|
-
for i in range(kwargs.get('n',1)):
|
824
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
825
|
-
[response_dict.get('choices')[i].get('finish_reason')])
|
826
|
-
if capture_message_content:
|
827
|
-
span.add_event(
|
828
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
829
|
-
attributes={
|
830
|
-
# pylint: disable=line-too-long
|
831
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
832
|
-
},
|
833
|
-
)
|
834
|
-
if kwargs.get('tools'):
|
835
|
-
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
836
|
-
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
837
|
-
|
838
|
-
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
839
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
840
|
-
"text")
|
841
|
-
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
842
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
843
|
-
"json")
|
844
|
-
|
845
|
-
span.set_status(Status(StatusCode.OK))
|
846
|
-
|
847
|
-
if disable_metrics is False:
|
848
|
-
attributes = create_metrics_attributes(
|
849
|
-
service_name=application_name,
|
850
|
-
deployment_environment=environment,
|
851
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
852
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
248
|
+
response = process_response_response(
|
249
|
+
response=response,
|
853
250
|
request_model=request_model,
|
854
|
-
|
251
|
+
pricing_info=pricing_info,
|
855
252
|
server_port=server_port,
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
metrics
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
)
|
868
|
-
metrics["genai_requests"].add(1, attributes)
|
869
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
870
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
871
|
-
metrics["genai_cost"].record(cost, attributes)
|
872
|
-
|
873
|
-
# Return original response
|
874
|
-
return response
|
253
|
+
server_address=server_address,
|
254
|
+
environment=environment,
|
255
|
+
application_name=application_name,
|
256
|
+
metrics=metrics,
|
257
|
+
start_time=start_time,
|
258
|
+
span=span,
|
259
|
+
capture_message_content=capture_message_content,
|
260
|
+
disable_metrics=disable_metrics,
|
261
|
+
version=version,
|
262
|
+
**kwargs
|
263
|
+
)
|
875
264
|
|
876
265
|
except Exception as e:
|
877
266
|
handle_exception(span, e)
|
878
|
-
logger.error("Error in trace creation: %s", e)
|
879
267
|
|
880
|
-
|
881
|
-
return response
|
268
|
+
return response
|
882
269
|
|
883
270
|
return wrapper
|
884
271
|
|
885
|
-
def chat_completions_parse(version, environment, application_name, tracer, pricing_info,
|
886
|
-
|
272
|
+
def chat_completions_parse(version, environment, application_name, tracer, pricing_info,
|
273
|
+
capture_message_content, metrics, disable_metrics):
|
887
274
|
"""
|
888
|
-
Generates a telemetry wrapper for chat completions parse
|
889
|
-
|
890
|
-
Args:
|
891
|
-
version: Version of the monitoring package.
|
892
|
-
environment: Deployment environment (e.g., production, staging).
|
893
|
-
application_name: Name of the application using the OpenAI API.
|
894
|
-
tracer: OpenTelemetry tracer for creating spans.
|
895
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
896
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
897
|
-
|
898
|
-
Returns:
|
899
|
-
A function that wraps the chat completions parse method to add telemetry.
|
275
|
+
Generates a telemetry wrapper for OpenAI chat completions parse.
|
900
276
|
"""
|
901
277
|
|
902
278
|
def wrapper(wrapped, instance, args, kwargs):
|
903
279
|
"""
|
904
|
-
Wraps the
|
905
|
-
|
906
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
907
|
-
gracefully, adding details to the trace for observability.
|
908
|
-
|
909
|
-
Args:
|
910
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
911
|
-
instance: The instance of the class where the original method is defined.
|
912
|
-
args: Positional arguments for the 'chat.completions' method.
|
913
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
914
|
-
|
915
|
-
Returns:
|
916
|
-
The response from the original 'chat.completions.parse' method.
|
280
|
+
Wraps the OpenAI chat completions parse call.
|
917
281
|
"""
|
282
|
+
|
918
283
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
919
284
|
request_model = kwargs.get("model", "gpt-4o")
|
285
|
+
|
920
286
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
921
287
|
|
922
288
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
923
289
|
start_time = time.time()
|
924
|
-
|
925
|
-
# Execute the original 'parse' method
|
926
|
-
response = wrapped(*args, **kwargs)
|
927
|
-
end_time = time.time()
|
928
|
-
|
929
|
-
response_dict = response_as_dict(response)
|
930
|
-
|
931
|
-
# Format 'messages' from kwargs to calculate input tokens
|
932
|
-
message_prompt = kwargs.get("messages", "")
|
933
|
-
formatted_messages = []
|
934
|
-
for message in message_prompt:
|
935
|
-
role = message.get("role")
|
936
|
-
content = message.get("content")
|
937
|
-
if content:
|
938
|
-
formatted_messages.append(f"{role}: {content}")
|
939
|
-
prompt = "\n".join(formatted_messages)
|
940
|
-
|
941
|
-
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
942
|
-
output_tokens = response_dict.get('usage').get('completion_tokens')
|
943
|
-
|
944
|
-
# Calculate cost
|
945
|
-
cost = get_chat_model_cost(request_model,
|
946
|
-
pricing_info, input_tokens,
|
947
|
-
output_tokens)
|
948
|
-
|
949
|
-
# Set base span attribues (OTel Semconv)
|
950
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
951
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
952
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
953
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
954
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, str(kwargs.get("seed", "")))
|
955
|
-
span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
956
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
957
|
-
str(kwargs.get("frequency_penalty", 0.0)))
|
958
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, str(kwargs.get("max_tokens", -1)))
|
959
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
960
|
-
str(kwargs.get("presence_penalty", 0.0)))
|
961
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, str(kwargs.get("stop", [])))
|
962
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, str(kwargs.get("temperature", 1.0)))
|
963
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, str(kwargs.get("top_p", 1.0)))
|
964
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, response_dict.get("id"))
|
965
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_dict.get('model'))
|
966
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
967
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
968
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
969
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
|
970
|
-
str(kwargs.get("service_tier", "auto")))
|
971
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
|
972
|
-
response_dict.get('service_tier', 'auto'))
|
973
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
974
|
-
str(response_dict.get('system_fingerprint', '')))
|
975
|
-
|
976
|
-
# Set base span attribues (Extras)
|
977
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
978
|
-
span.set_attribute(SERVICE_NAME, application_name)
|
979
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, kwargs.get("user", ""))
|
980
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
981
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, False)
|
982
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
|
983
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
984
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, end_time - start_time)
|
985
|
-
|
986
|
-
if capture_message_content:
|
987
|
-
span.add_event(
|
988
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
989
|
-
attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
|
990
|
-
)
|
290
|
+
response = wrapped(*args, **kwargs)
|
991
291
|
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
if capture_message_content:
|
996
|
-
span.add_event(
|
997
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
998
|
-
attributes={
|
999
|
-
# pylint: disable=line-too-long
|
1000
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(
|
1001
|
-
response_dict.get('choices')[i].get('message').get('content')),
|
1002
|
-
},
|
1003
|
-
)
|
1004
|
-
if kwargs.get('tools'):
|
1005
|
-
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
1006
|
-
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
1007
|
-
|
1008
|
-
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
1009
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
1010
|
-
"text")
|
1011
|
-
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
1012
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
1013
|
-
"json")
|
1014
|
-
|
1015
|
-
span.set_status(Status(StatusCode.OK))
|
1016
|
-
|
1017
|
-
if not disable_metrics:
|
1018
|
-
attributes = create_metrics_attributes(
|
1019
|
-
service_name=application_name,
|
1020
|
-
deployment_environment=environment,
|
1021
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
1022
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
292
|
+
try:
|
293
|
+
response = process_chat_response(
|
294
|
+
response=response,
|
1023
295
|
request_model=request_model,
|
1024
|
-
|
296
|
+
pricing_info=pricing_info,
|
1025
297
|
server_port=server_port,
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
metrics
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
298
|
+
server_address=server_address,
|
299
|
+
environment=environment,
|
300
|
+
application_name=application_name,
|
301
|
+
metrics=metrics,
|
302
|
+
start_time=start_time,
|
303
|
+
span=span,
|
304
|
+
capture_message_content=capture_message_content,
|
305
|
+
disable_metrics=disable_metrics,
|
306
|
+
version=version,
|
307
|
+
**kwargs
|
308
|
+
)
|
1037
309
|
|
1038
310
|
except Exception as e:
|
1039
311
|
handle_exception(span, e)
|
1040
|
-
|
1041
|
-
|
1042
|
-
raise
|
312
|
+
|
313
|
+
return response
|
1043
314
|
|
1044
315
|
return wrapper
|
1045
316
|
|
1046
|
-
def embedding(version, environment, application_name,
|
1047
|
-
|
317
|
+
def embedding(version, environment, application_name, tracer, pricing_info,
|
318
|
+
capture_message_content, metrics, disable_metrics, **kwargs):
|
1048
319
|
"""
|
1049
|
-
Generates a telemetry wrapper for embeddings
|
1050
|
-
|
1051
|
-
Args:
|
1052
|
-
version: Version of the monitoring package.
|
1053
|
-
environment: Deployment environment (e.g., production, staging).
|
1054
|
-
application_name: Name of the application using the OpenAI API.
|
1055
|
-
tracer: OpenTelemetry tracer for creating spans.
|
1056
|
-
pricing_info: Information used for calculating the cost of OpenAI usage.
|
1057
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
1058
|
-
|
1059
|
-
Returns:
|
1060
|
-
A function that wraps the embeddings method to add telemetry.
|
320
|
+
Generates a telemetry wrapper for OpenAI embeddings.
|
1061
321
|
"""
|
1062
322
|
|
1063
323
|
def wrapper(wrapped, instance, args, kwargs):
|
1064
324
|
"""
|
1065
|
-
Wraps the
|
1066
|
-
|
1067
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
1068
|
-
gracefully, adding details to the trace for observability.
|
1069
|
-
|
1070
|
-
Args:
|
1071
|
-
wrapped: The original 'embeddings' method to be wrapped.
|
1072
|
-
instance: The instance of the class where the original method is defined.
|
1073
|
-
args: Positional arguments for the 'embeddings' method.
|
1074
|
-
kwargs: Keyword arguments for the 'embeddings' method.
|
1075
|
-
|
1076
|
-
Returns:
|
1077
|
-
The response from the original 'embeddings' method.
|
325
|
+
Wraps the OpenAI embeddings call.
|
1078
326
|
"""
|
1079
327
|
|
1080
328
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
@@ -1082,127 +330,44 @@ def embedding(version, environment, application_name,
|
|
1082
330
|
|
1083
331
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
1084
332
|
|
1085
|
-
with tracer.start_as_current_span(span_name, kind=
|
333
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
1086
334
|
start_time = time.time()
|
1087
335
|
response = wrapped(*args, **kwargs)
|
1088
|
-
end_time = time.time()
|
1089
336
|
|
1090
|
-
response_dict = response_as_dict(response)
|
1091
337
|
try:
|
1092
|
-
|
1093
|
-
|
1094
|
-
# Calculate cost of the operation
|
1095
|
-
cost = get_embed_model_cost(request_model,
|
1096
|
-
pricing_info, input_tokens)
|
1097
|
-
|
1098
|
-
# Set Span attributes (OTel Semconv)
|
1099
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
1100
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
1101
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
1102
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
1103
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
1104
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
1105
|
-
request_model)
|
1106
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
|
1107
|
-
[kwargs.get('encoding_format', 'float')])
|
1108
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
1109
|
-
request_model)
|
1110
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
1111
|
-
server_address)
|
1112
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
1113
|
-
server_port)
|
1114
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
1115
|
-
input_tokens)
|
1116
|
-
|
1117
|
-
# Set Span attributes (Extras)
|
1118
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
1119
|
-
environment)
|
1120
|
-
span.set_attribute(SERVICE_NAME,
|
1121
|
-
application_name)
|
1122
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
1123
|
-
kwargs.get("user", ""))
|
1124
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
1125
|
-
input_tokens)
|
1126
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
1127
|
-
cost)
|
1128
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
1129
|
-
version)
|
1130
|
-
|
1131
|
-
if capture_message_content:
|
1132
|
-
span.add_event(
|
1133
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
1134
|
-
attributes={
|
1135
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
|
1136
|
-
},
|
1137
|
-
)
|
1138
|
-
|
1139
|
-
span.set_status(Status(StatusCode.OK))
|
1140
|
-
|
1141
|
-
if disable_metrics is False:
|
1142
|
-
attributes = create_metrics_attributes(
|
1143
|
-
service_name=application_name,
|
1144
|
-
deployment_environment=environment,
|
1145
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
1146
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
338
|
+
response = process_embedding_response(
|
339
|
+
response=response,
|
1147
340
|
request_model=request_model,
|
1148
|
-
|
341
|
+
pricing_info=pricing_info,
|
1149
342
|
server_port=server_port,
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
# Return original response
|
1163
|
-
return response
|
343
|
+
server_address=server_address,
|
344
|
+
environment=environment,
|
345
|
+
application_name=application_name,
|
346
|
+
metrics=metrics,
|
347
|
+
start_time=start_time,
|
348
|
+
span=span,
|
349
|
+
capture_message_content=capture_message_content,
|
350
|
+
disable_metrics=disable_metrics,
|
351
|
+
version=version,
|
352
|
+
**kwargs
|
353
|
+
)
|
1164
354
|
|
1165
355
|
except Exception as e:
|
1166
356
|
handle_exception(span, e)
|
1167
|
-
logger.error("Error in trace creation: %s", e)
|
1168
357
|
|
1169
|
-
|
1170
|
-
return response
|
358
|
+
return response
|
1171
359
|
|
1172
360
|
return wrapper
|
1173
361
|
|
1174
|
-
def image_generate(version, environment, application_name,
|
1175
|
-
|
362
|
+
def image_generate(version, environment, application_name, tracer, pricing_info,
|
363
|
+
capture_message_content, metrics, disable_metrics, **kwargs):
|
1176
364
|
"""
|
1177
|
-
Generates a telemetry wrapper for image generation
|
1178
|
-
|
1179
|
-
Args:
|
1180
|
-
version: Version of the monitoring package.
|
1181
|
-
environment: Deployment environment (e.g., production, staging).
|
1182
|
-
application_name: Name of the application using the OpenAI API.
|
1183
|
-
tracer: OpenTelemetry tracer for creating spans.
|
1184
|
-
pricing_info: Information used for calculating the cost of OpenAI image generation.
|
1185
|
-
capture_message_content: Flag indicating whether to trace the input prompt and generated images.
|
1186
|
-
|
1187
|
-
Returns:
|
1188
|
-
A function that wraps the image generation method to add telemetry.
|
365
|
+
Generates a telemetry wrapper for OpenAI image generation.
|
1189
366
|
"""
|
1190
367
|
|
1191
368
|
def wrapper(wrapped, instance, args, kwargs):
|
1192
369
|
"""
|
1193
|
-
Wraps the
|
1194
|
-
|
1195
|
-
This collects metrics such as execution time, cost, and handles errors
|
1196
|
-
gracefully, adding details to the trace for observability.
|
1197
|
-
|
1198
|
-
Args:
|
1199
|
-
wrapped: The original 'images.generate' method to be wrapped.
|
1200
|
-
instance: The instance of the class where the original method is defined.
|
1201
|
-
args: Positional arguments for the 'images.generate' method.
|
1202
|
-
kwargs: Keyword arguments for the 'images.generate' method.
|
1203
|
-
|
1204
|
-
Returns:
|
1205
|
-
The response from the original 'images.generate' method.
|
370
|
+
Wraps the OpenAI image generation call.
|
1206
371
|
"""
|
1207
372
|
|
1208
373
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
@@ -1210,146 +375,46 @@ def image_generate(version, environment, application_name,
|
|
1210
375
|
|
1211
376
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
1212
377
|
|
1213
|
-
with tracer.start_as_current_span(span_name, kind=
|
378
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
1214
379
|
start_time = time.time()
|
1215
380
|
response = wrapped(*args, **kwargs)
|
1216
381
|
end_time = time.time()
|
1217
382
|
|
1218
|
-
images_count = 0
|
1219
|
-
|
1220
383
|
try:
|
1221
|
-
|
1222
|
-
|
1223
|
-
image = "b64_json"
|
1224
|
-
else:
|
1225
|
-
image = "url"
|
1226
|
-
|
1227
|
-
# Calculate cost of the operation
|
1228
|
-
cost = get_image_model_cost(request_model,
|
1229
|
-
pricing_info, kwargs.get("size", "1024x1024"),
|
1230
|
-
kwargs.get("quality", "standard"))
|
1231
|
-
|
1232
|
-
for items in response.data:
|
1233
|
-
# Set Span attributes (OTel Semconv)
|
1234
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
1235
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
1236
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE)
|
1237
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
1238
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
1239
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
1240
|
-
request_model)
|
1241
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
1242
|
-
server_address)
|
1243
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
1244
|
-
server_port)
|
1245
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
1246
|
-
response.created)
|
1247
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
1248
|
-
request_model)
|
1249
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
1250
|
-
"image")
|
1251
|
-
|
1252
|
-
# Set Span attributes (Extras)
|
1253
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
1254
|
-
environment)
|
1255
|
-
span.set_attribute(SERVICE_NAME,
|
1256
|
-
application_name)
|
1257
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE,
|
1258
|
-
kwargs.get("size", "1024x1024"))
|
1259
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_QUALITY,
|
1260
|
-
kwargs.get("quality", "standard"))
|
1261
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_STYLE,
|
1262
|
-
kwargs.get("style", "vivid"))
|
1263
|
-
span.set_attribute(SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT,
|
1264
|
-
items.revised_prompt if items.revised_prompt else "")
|
1265
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
1266
|
-
kwargs.get("user", ""))
|
1267
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
1268
|
-
version)
|
1269
|
-
|
1270
|
-
if capture_message_content:
|
1271
|
-
span.add_event(
|
1272
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
1273
|
-
attributes={
|
1274
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
1275
|
-
},
|
1276
|
-
)
|
1277
|
-
attribute_name = f"{SemanticConvention.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
1278
|
-
span.add_event(
|
1279
|
-
name=attribute_name,
|
1280
|
-
attributes={
|
1281
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
|
1282
|
-
},
|
1283
|
-
)
|
1284
|
-
|
1285
|
-
images_count+=1
|
1286
|
-
|
1287
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
1288
|
-
len(response.data) * cost)
|
1289
|
-
span.set_status(Status(StatusCode.OK))
|
1290
|
-
|
1291
|
-
if disable_metrics is False:
|
1292
|
-
attributes = create_metrics_attributes(
|
1293
|
-
service_name=application_name,
|
1294
|
-
deployment_environment=environment,
|
1295
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE,
|
1296
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
384
|
+
response = process_image_response(
|
385
|
+
response=response,
|
1297
386
|
request_model=request_model,
|
1298
|
-
|
387
|
+
pricing_info=pricing_info,
|
1299
388
|
server_port=server_port,
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
metrics
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
389
|
+
server_address=server_address,
|
390
|
+
environment=environment,
|
391
|
+
application_name=application_name,
|
392
|
+
metrics=metrics,
|
393
|
+
start_time=start_time,
|
394
|
+
end_time=end_time,
|
395
|
+
span=span,
|
396
|
+
capture_message_content=capture_message_content,
|
397
|
+
disable_metrics=disable_metrics,
|
398
|
+
version=version,
|
399
|
+
**kwargs
|
400
|
+
)
|
1311
401
|
|
1312
402
|
except Exception as e:
|
1313
403
|
handle_exception(span, e)
|
1314
|
-
logger.error("Error in trace creation: %s", e)
|
1315
404
|
|
1316
|
-
|
1317
|
-
return response
|
405
|
+
return response
|
1318
406
|
|
1319
407
|
return wrapper
|
1320
408
|
|
1321
|
-
def image_variatons(version, environment, application_name,
|
1322
|
-
|
409
|
+
def image_variatons(version, environment, application_name, tracer, pricing_info,
|
410
|
+
capture_message_content, metrics, disable_metrics):
|
1323
411
|
"""
|
1324
|
-
Generates a telemetry wrapper for
|
1325
|
-
|
1326
|
-
Args:
|
1327
|
-
version: Version of the monitoring package.
|
1328
|
-
environment: Deployment environment (e.g., production, staging).
|
1329
|
-
application_name: Name of the application using the OpenAI API.
|
1330
|
-
tracer: OpenTelemetry tracer for creating spans.
|
1331
|
-
pricing_info: Information used for calculating the cost of generating image variations.
|
1332
|
-
capture_message_content: Flag indicating whether to trace the input image and generated variations.
|
1333
|
-
|
1334
|
-
Returns:
|
1335
|
-
A function that wraps the image variations creation method to add telemetry.
|
412
|
+
Generates a telemetry wrapper for OpenAI image variations.
|
1336
413
|
"""
|
1337
414
|
|
1338
415
|
def wrapper(wrapped, instance, args, kwargs):
|
1339
416
|
"""
|
1340
|
-
Wraps the
|
1341
|
-
|
1342
|
-
This collects metrics such as execution time, cost, and handles errors
|
1343
|
-
gracefully, adding details to the trace for observability.
|
1344
|
-
|
1345
|
-
Args:
|
1346
|
-
wrapped: The original 'images.create.variations' method to be wrapped.
|
1347
|
-
instance: The instance of the class where the original method is defined.
|
1348
|
-
args: Positional arguments for the method.
|
1349
|
-
kwargs: Keyword arguments for the method.
|
1350
|
-
|
1351
|
-
Returns:
|
1352
|
-
The response from the original 'images.create.variations' method.
|
417
|
+
Wraps the OpenAI image variations call.
|
1353
418
|
"""
|
1354
419
|
|
1355
420
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
@@ -1357,135 +422,46 @@ def image_variatons(version, environment, application_name,
|
|
1357
422
|
|
1358
423
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
1359
424
|
|
1360
|
-
with tracer.start_as_current_span(span_name, kind=
|
425
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
1361
426
|
start_time = time.time()
|
1362
427
|
response = wrapped(*args, **kwargs)
|
1363
428
|
end_time = time.time()
|
1364
429
|
|
1365
|
-
images_count = 0
|
1366
|
-
|
1367
430
|
try:
|
1368
|
-
|
1369
|
-
|
1370
|
-
image = "b64_json"
|
1371
|
-
else:
|
1372
|
-
image = "url"
|
1373
|
-
|
1374
|
-
# Calculate cost of the operation
|
1375
|
-
cost = get_image_model_cost(request_model, pricing_info,
|
1376
|
-
kwargs.get("size", "1024x1024"), "standard")
|
1377
|
-
|
1378
|
-
for items in response.data:
|
1379
|
-
# Set Span attributes (OTel Semconv)
|
1380
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
1381
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
1382
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE)
|
1383
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
1384
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
1385
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
1386
|
-
request_model)
|
1387
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
1388
|
-
server_address)
|
1389
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
1390
|
-
server_port)
|
1391
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
1392
|
-
response.created)
|
1393
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
1394
|
-
request_model)
|
1395
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
1396
|
-
"image")
|
1397
|
-
|
1398
|
-
# Set Span attributes (Extras)
|
1399
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
1400
|
-
environment)
|
1401
|
-
span.set_attribute(SERVICE_NAME,
|
1402
|
-
application_name)
|
1403
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE,
|
1404
|
-
kwargs.get("size", "1024x1024"))
|
1405
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_QUALITY,
|
1406
|
-
"standard")
|
1407
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
1408
|
-
kwargs.get("user", ""))
|
1409
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
1410
|
-
version)
|
1411
|
-
|
1412
|
-
if capture_message_content:
|
1413
|
-
attribute_name = f"{SemanticConvention.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
1414
|
-
span.add_event(
|
1415
|
-
name=attribute_name,
|
1416
|
-
attributes={
|
1417
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
|
1418
|
-
},
|
1419
|
-
)
|
1420
|
-
|
1421
|
-
images_count+=1
|
1422
|
-
|
1423
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
1424
|
-
len(response.data) * cost)
|
1425
|
-
span.set_status(Status(StatusCode.OK))
|
1426
|
-
|
1427
|
-
if disable_metrics is False:
|
1428
|
-
attributes = create_metrics_attributes(
|
1429
|
-
service_name=application_name,
|
1430
|
-
deployment_environment=environment,
|
1431
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE,
|
1432
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
431
|
+
response = process_image_response(
|
432
|
+
response=response,
|
1433
433
|
request_model=request_model,
|
1434
|
-
|
434
|
+
pricing_info=pricing_info,
|
1435
435
|
server_port=server_port,
|
1436
|
-
|
1437
|
-
|
1438
|
-
|
1439
|
-
metrics
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
436
|
+
server_address=server_address,
|
437
|
+
environment=environment,
|
438
|
+
application_name=application_name,
|
439
|
+
metrics=metrics,
|
440
|
+
start_time=start_time,
|
441
|
+
end_time=end_time,
|
442
|
+
span=span,
|
443
|
+
capture_message_content=capture_message_content,
|
444
|
+
disable_metrics=disable_metrics,
|
445
|
+
version=version,
|
446
|
+
**kwargs
|
447
|
+
)
|
1447
448
|
|
1448
449
|
except Exception as e:
|
1449
450
|
handle_exception(span, e)
|
1450
|
-
logger.error("Error in trace creation: %s", e)
|
1451
451
|
|
1452
|
-
|
1453
|
-
return response
|
452
|
+
return response
|
1454
453
|
|
1455
454
|
return wrapper
|
1456
455
|
|
1457
|
-
def audio_create(version, environment, application_name,
|
1458
|
-
|
456
|
+
def audio_create(version, environment, application_name, tracer, pricing_info,
|
457
|
+
capture_message_content, metrics, disable_metrics):
|
1459
458
|
"""
|
1460
|
-
Generates a telemetry wrapper for
|
1461
|
-
|
1462
|
-
Args:
|
1463
|
-
version: Version of the monitoring package.
|
1464
|
-
environment: Deployment environment (e.g., production, staging).
|
1465
|
-
application_name: Name of the application using the OpenAI API.
|
1466
|
-
tracer: OpenTelemetry tracer for creating spans.
|
1467
|
-
pricing_info: Information used for calculating the cost of generating speech audio.
|
1468
|
-
capture_message_content: Flag indicating whether to trace the input text and generated audio.
|
1469
|
-
|
1470
|
-
Returns:
|
1471
|
-
A function that wraps the speech audio creation method to add telemetry.
|
459
|
+
Generates a telemetry wrapper for OpenAI audio creation.
|
1472
460
|
"""
|
1473
461
|
|
1474
462
|
def wrapper(wrapped, instance, args, kwargs):
|
1475
463
|
"""
|
1476
|
-
Wraps the
|
1477
|
-
|
1478
|
-
This collects metrics such as execution time, cost, and handles errors
|
1479
|
-
gracefully, adding details to the trace for observability.
|
1480
|
-
|
1481
|
-
Args:
|
1482
|
-
wrapped: The original 'audio.speech.create' method to be wrapped.
|
1483
|
-
instance: The instance of the class where the original method is defined.
|
1484
|
-
args: Positional arguments for the 'audio.speech.create' method.
|
1485
|
-
kwargs: Keyword arguments for the 'audio.speech.create' method.
|
1486
|
-
|
1487
|
-
Returns:
|
1488
|
-
The response from the original 'audio.speech.create' method.
|
464
|
+
Wraps the OpenAI audio creation call.
|
1489
465
|
"""
|
1490
466
|
|
1491
467
|
server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
|
@@ -1499,77 +475,27 @@ def audio_create(version, environment, application_name,
|
|
1499
475
|
end_time = time.time()
|
1500
476
|
|
1501
477
|
try:
|
1502
|
-
|
1503
|
-
|
1504
|
-
pricing_info, kwargs.get("input", ""))
|
1505
|
-
|
1506
|
-
# Set Span attributes
|
1507
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
1508
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
1509
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO)
|
1510
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
1511
|
-
SemanticConvention.GEN_AI_SYSTEM_OPENAI)
|
1512
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
1513
|
-
request_model)
|
1514
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
1515
|
-
server_address)
|
1516
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
1517
|
-
server_port)
|
1518
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
1519
|
-
request_model)
|
1520
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
1521
|
-
"speech")
|
1522
|
-
|
1523
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
1524
|
-
environment)
|
1525
|
-
span.set_attribute(SERVICE_NAME,
|
1526
|
-
application_name)
|
1527
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_VOICE,
|
1528
|
-
kwargs.get("voice", "alloy"))
|
1529
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
|
1530
|
-
kwargs.get("response_format", "mp3"))
|
1531
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_SPEED,
|
1532
|
-
kwargs.get("speed", 1))
|
1533
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
1534
|
-
cost)
|
1535
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
1536
|
-
version)
|
1537
|
-
if capture_message_content:
|
1538
|
-
span.add_event(
|
1539
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
1540
|
-
attributes={
|
1541
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
|
1542
|
-
},
|
1543
|
-
)
|
1544
|
-
|
1545
|
-
span.set_status(Status(StatusCode.OK))
|
1546
|
-
|
1547
|
-
if disable_metrics is False:
|
1548
|
-
attributes = create_metrics_attributes(
|
1549
|
-
service_name=application_name,
|
1550
|
-
deployment_environment=environment,
|
1551
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO,
|
1552
|
-
system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
478
|
+
response = process_audio_response(
|
479
|
+
response=response,
|
1553
480
|
request_model=request_model,
|
1554
|
-
|
481
|
+
pricing_info=pricing_info,
|
1555
482
|
server_port=server_port,
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
metrics
|
1560
|
-
|
1561
|
-
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
483
|
+
server_address=server_address,
|
484
|
+
environment=environment,
|
485
|
+
application_name=application_name,
|
486
|
+
metrics=metrics,
|
487
|
+
start_time=start_time,
|
488
|
+
end_time=end_time,
|
489
|
+
span=span,
|
490
|
+
capture_message_content=capture_message_content,
|
491
|
+
disable_metrics=disable_metrics,
|
492
|
+
version=version,
|
493
|
+
**kwargs
|
494
|
+
)
|
1567
495
|
|
1568
496
|
except Exception as e:
|
1569
497
|
handle_exception(span, e)
|
1570
|
-
logger.error("Error in trace creation: %s", e)
|
1571
498
|
|
1572
|
-
|
1573
|
-
return response
|
499
|
+
return response
|
1574
500
|
|
1575
501
|
return wrapper
|