openlit 1.34.20__py3-none-any.whl → 1.34.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +40 -0
- openlit/instrumentation/openai/__init__.py +63 -68
- openlit/instrumentation/openai/async_openai.py +203 -1277
- openlit/instrumentation/openai/openai.py +200 -1274
- openlit/instrumentation/openai/utils.py +794 -0
- openlit/instrumentation/vertexai/__init__.py +18 -23
- openlit/instrumentation/vertexai/async_vertexai.py +46 -364
- openlit/instrumentation/vertexai/utils.py +204 -0
- openlit/instrumentation/vertexai/vertexai.py +46 -364
- {openlit-1.34.20.dist-info → openlit-1.34.22.dist-info}/METADATA +1 -1
- {openlit-1.34.20.dist-info → openlit-1.34.22.dist-info}/RECORD +13 -11
- {openlit-1.34.20.dist-info → openlit-1.34.22.dist-info}/LICENSE +0 -0
- {openlit-1.34.20.dist-info → openlit-1.34.22.dist-info}/WHEEL +0 -0
@@ -0,0 +1,794 @@
|
|
1
|
+
"""
|
2
|
+
OpenAI OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
|
+
from openlit.__helpers import (
|
9
|
+
calculate_ttft,
|
10
|
+
response_as_dict,
|
11
|
+
calculate_tbt,
|
12
|
+
get_chat_model_cost,
|
13
|
+
get_embed_model_cost,
|
14
|
+
get_audio_model_cost,
|
15
|
+
get_image_model_cost,
|
16
|
+
general_tokens,
|
17
|
+
record_completion_metrics,
|
18
|
+
record_embedding_metrics,
|
19
|
+
record_audio_metrics,
|
20
|
+
record_image_metrics,
|
21
|
+
common_span_attributes,
|
22
|
+
)
|
23
|
+
from openlit.semcov import SemanticConvention
|
24
|
+
|
25
|
+
def format_content(messages):
|
26
|
+
"""
|
27
|
+
Format the messages into a string for span events.
|
28
|
+
Handles both chat completions format and responses API input format.
|
29
|
+
"""
|
30
|
+
|
31
|
+
if not messages:
|
32
|
+
return ""
|
33
|
+
|
34
|
+
# Handle string input (simple case)
|
35
|
+
if isinstance(messages, str):
|
36
|
+
return messages
|
37
|
+
|
38
|
+
# Handle list of messages
|
39
|
+
formatted_messages = []
|
40
|
+
for message in messages:
|
41
|
+
role = message.get("role", "user")
|
42
|
+
content = message.get("content", "")
|
43
|
+
|
44
|
+
if isinstance(content, list):
|
45
|
+
content_str_list = []
|
46
|
+
for item in content:
|
47
|
+
# Chat completions format
|
48
|
+
if item.get("type") == "text":
|
49
|
+
content_str_list.append(f'text: {item.get("text", "")}')
|
50
|
+
elif (item.get("type") == "image_url" and
|
51
|
+
not item.get("image_url", {}).get("url", "").startswith("data:")):
|
52
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
53
|
+
|
54
|
+
# Responses API format
|
55
|
+
elif item.get("type") == "input_text":
|
56
|
+
content_str_list.append(f'text: {item.get("text", "")}')
|
57
|
+
elif item.get("type") == "input_image":
|
58
|
+
image_url = item.get("image_url", "")
|
59
|
+
if image_url and not image_url.startswith("data:"):
|
60
|
+
content_str_list.append(f'image_url: {image_url}')
|
61
|
+
|
62
|
+
content_str = ", ".join(content_str_list)
|
63
|
+
formatted_messages.append(f"{role}: {content_str}")
|
64
|
+
else:
|
65
|
+
formatted_messages.append(f"{role}: {content}")
|
66
|
+
|
67
|
+
return "\n".join(formatted_messages)
|
68
|
+
|
69
|
+
def process_chat_chunk(scope, chunk):
|
70
|
+
"""
|
71
|
+
Process a chunk of chat response data and update state.
|
72
|
+
"""
|
73
|
+
|
74
|
+
end_time = time.time()
|
75
|
+
scope._timestamps.append(end_time)
|
76
|
+
|
77
|
+
if len(scope._timestamps) == 1:
|
78
|
+
scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
|
79
|
+
|
80
|
+
chunked = response_as_dict(chunk)
|
81
|
+
|
82
|
+
# Extract content from chat completions
|
83
|
+
if (len(chunked.get("choices", [])) > 0 and
|
84
|
+
"delta" in chunked.get("choices")[0]):
|
85
|
+
|
86
|
+
delta = chunked.get("choices")[0]["delta"]
|
87
|
+
content = delta.get("content")
|
88
|
+
if content:
|
89
|
+
scope._llmresponse += content
|
90
|
+
|
91
|
+
# Handle tool calls in streaming - optimized
|
92
|
+
delta_tools = delta.get("tool_calls")
|
93
|
+
if delta_tools:
|
94
|
+
scope._tools = scope._tools or []
|
95
|
+
|
96
|
+
for tool in delta_tools:
|
97
|
+
idx = tool.get("index", 0)
|
98
|
+
|
99
|
+
# Extend list if needed
|
100
|
+
scope._tools.extend([{}] * (idx + 1 - len(scope._tools)))
|
101
|
+
|
102
|
+
if tool.get("id"): # New tool (id exists)
|
103
|
+
func = tool.get("function", {})
|
104
|
+
scope._tools[idx] = {
|
105
|
+
"id": tool["id"],
|
106
|
+
"function": {"name": func.get("name", ""), "arguments": func.get("arguments", "")},
|
107
|
+
"type": tool.get("type", "function")
|
108
|
+
}
|
109
|
+
elif scope._tools[idx] and "function" in tool: # Append args (id is None)
|
110
|
+
scope._tools[idx]["function"]["arguments"] += tool["function"].get("arguments", "")
|
111
|
+
|
112
|
+
# Extract metadata
|
113
|
+
scope._response_id = chunked.get("id") or scope._response_id
|
114
|
+
scope._response_model = chunked.get("model") or scope._response_model
|
115
|
+
|
116
|
+
try:
|
117
|
+
scope._finish_reason = chunked.get("choices", [])[0].get("finish_reason") or scope._finish_reason
|
118
|
+
except (IndexError, AttributeError, TypeError):
|
119
|
+
scope._finish_reason = "stop"
|
120
|
+
|
121
|
+
scope._system_fingerprint = chunked.get("system_fingerprint") or scope._system_fingerprint
|
122
|
+
scope._service_tier = chunked.get("service_tier") or scope._service_tier
|
123
|
+
|
124
|
+
def process_response_chunk(scope, chunk):
|
125
|
+
"""
|
126
|
+
Process a chunk of response API data and update state.
|
127
|
+
"""
|
128
|
+
|
129
|
+
end_time = time.time()
|
130
|
+
scope._timestamps.append(end_time)
|
131
|
+
|
132
|
+
if len(scope._timestamps) == 1:
|
133
|
+
scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
|
134
|
+
|
135
|
+
chunked = response_as_dict(chunk)
|
136
|
+
|
137
|
+
# Extract content from responses API
|
138
|
+
if chunked.get("type") == "response.output_text.delta":
|
139
|
+
scope._llmresponse += chunked.get("delta", "")
|
140
|
+
|
141
|
+
# Handle tool calls in streaming for responses API
|
142
|
+
elif chunked.get("type") == "response.output_item.added":
|
143
|
+
# New tool call item added
|
144
|
+
if not hasattr(scope, "_response_tools") or scope._response_tools is None:
|
145
|
+
scope._response_tools = []
|
146
|
+
|
147
|
+
item = chunked.get("item", {})
|
148
|
+
if item.get("type") == "function_call":
|
149
|
+
scope._response_tools.append({
|
150
|
+
"id": item.get("id", ""),
|
151
|
+
"call_id": item.get("call_id", ""),
|
152
|
+
"name": item.get("name", ""),
|
153
|
+
"type": item.get("type", "function_call"),
|
154
|
+
"arguments": item.get("arguments", ""),
|
155
|
+
"status": item.get("status", "in_progress")
|
156
|
+
})
|
157
|
+
|
158
|
+
elif chunked.get("type") == "response.function_call_arguments.delta":
|
159
|
+
# Tool arguments being streamed
|
160
|
+
if hasattr(scope, "_response_tools") and scope._response_tools:
|
161
|
+
item_id = chunked.get("item_id", "")
|
162
|
+
delta = chunked.get("delta", "")
|
163
|
+
|
164
|
+
# Find the tool by item_id and append arguments
|
165
|
+
for tool in scope._response_tools:
|
166
|
+
if tool.get("id") == item_id:
|
167
|
+
tool["arguments"] += delta
|
168
|
+
break
|
169
|
+
|
170
|
+
elif chunked.get("type") == "response.function_call_arguments.done":
|
171
|
+
# Tool arguments complete
|
172
|
+
if hasattr(scope, "_response_tools") and scope._response_tools:
|
173
|
+
item_id = chunked.get("item_id", "")
|
174
|
+
final_arguments = chunked.get("arguments", "")
|
175
|
+
|
176
|
+
# Update the tool with final arguments
|
177
|
+
for tool in scope._response_tools:
|
178
|
+
if tool.get("id") == item_id:
|
179
|
+
tool["arguments"] = final_arguments
|
180
|
+
break
|
181
|
+
|
182
|
+
elif chunked.get("type") == "response.output_item.done":
|
183
|
+
# Tool call item complete
|
184
|
+
if hasattr(scope, "_response_tools") and scope._response_tools:
|
185
|
+
item = chunked.get("item", {})
|
186
|
+
item_id = item.get("id", "")
|
187
|
+
|
188
|
+
# Update the tool with final status and data
|
189
|
+
for tool in scope._response_tools:
|
190
|
+
if tool.get("id") == item_id:
|
191
|
+
tool.update({
|
192
|
+
"call_id": item.get("call_id", tool.get("call_id", "")),
|
193
|
+
"name": item.get("name", tool.get("name", "")),
|
194
|
+
"arguments": item.get("arguments", tool.get("arguments", "")),
|
195
|
+
"status": item.get("status", "completed")
|
196
|
+
})
|
197
|
+
break
|
198
|
+
|
199
|
+
elif chunked.get("type") == "response.completed":
|
200
|
+
response_data = chunked.get("response", {})
|
201
|
+
scope._response_id = response_data.get("id")
|
202
|
+
scope._response_model = response_data.get("model")
|
203
|
+
scope._finish_reason = response_data.get("status")
|
204
|
+
|
205
|
+
usage = response_data.get("usage", {})
|
206
|
+
scope._input_tokens = usage.get("input_tokens", 0)
|
207
|
+
scope._output_tokens = usage.get("output_tokens", 0)
|
208
|
+
|
209
|
+
# Handle reasoning tokens
|
210
|
+
output_tokens_details = usage.get("output_tokens_details", {})
|
211
|
+
scope._reasoning_tokens = output_tokens_details.get("reasoning_tokens", 0)
|
212
|
+
|
213
|
+
def common_response_logic(scope, pricing_info, environment, application_name, metrics,
|
214
|
+
capture_message_content, disable_metrics, version, is_stream):
|
215
|
+
"""
|
216
|
+
Process responses API request and generate Telemetry
|
217
|
+
"""
|
218
|
+
|
219
|
+
scope._end_time = time.time()
|
220
|
+
if len(scope._timestamps) > 1:
|
221
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
222
|
+
|
223
|
+
# For responses API, format input using the same function as chat completions
|
224
|
+
input_data = scope._kwargs.get("input", "")
|
225
|
+
prompt = format_content(input_data)
|
226
|
+
request_model = scope._kwargs.get("model", "gpt-4o")
|
227
|
+
|
228
|
+
# Calculate tokens and cost
|
229
|
+
if hasattr(scope, "_input_tokens") and scope._input_tokens:
|
230
|
+
input_tokens = scope._input_tokens
|
231
|
+
output_tokens = scope._output_tokens
|
232
|
+
else:
|
233
|
+
input_tokens = general_tokens(prompt)
|
234
|
+
output_tokens = general_tokens(scope._llmresponse)
|
235
|
+
|
236
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
237
|
+
|
238
|
+
# Common Span Attributes
|
239
|
+
common_span_attributes(scope,
|
240
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
241
|
+
scope._server_address, scope._server_port, request_model, scope._response_model,
|
242
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
243
|
+
|
244
|
+
# Span Attributes for Request parameters specific to responses API
|
245
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
|
246
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
|
247
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_output_tokens", -1))
|
248
|
+
|
249
|
+
# Reasoning parameters
|
250
|
+
reasoning = scope._kwargs.get("reasoning", {})
|
251
|
+
if reasoning:
|
252
|
+
if reasoning.get("effort"):
|
253
|
+
scope._span.set_attribute("gen_ai.request.reasoning_effort", reasoning.get("effort"))
|
254
|
+
|
255
|
+
# Responses API specific attributes
|
256
|
+
if hasattr(scope, "_service_tier"):
|
257
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER, scope._service_tier)
|
258
|
+
|
259
|
+
# Span Attributes for Response parameters
|
260
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
|
261
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
|
262
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
263
|
+
|
264
|
+
# Span Attributes for Tools (responses API structure) - optimized
|
265
|
+
if hasattr(scope, "_response_tools") and scope._response_tools:
|
266
|
+
tools = scope._response_tools if isinstance(scope._response_tools, list) else [scope._response_tools]
|
267
|
+
|
268
|
+
names, ids, args = zip(*[
|
269
|
+
(t.get("name", ""),
|
270
|
+
str(t.get("call_id", "")), # Use call_id for responses API
|
271
|
+
str(t.get("arguments", "")))
|
272
|
+
for t in tools if isinstance(t, dict) and t
|
273
|
+
]) if tools else ([], [], [])
|
274
|
+
|
275
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
|
276
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
|
277
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
|
278
|
+
|
279
|
+
# Span Attributes for Cost and Tokens
|
280
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
281
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
282
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
283
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
284
|
+
|
285
|
+
# Reasoning tokens
|
286
|
+
if hasattr(scope, "_reasoning_tokens") and scope._reasoning_tokens > 0:
|
287
|
+
scope._span.set_attribute("gen_ai.usage.reasoning_tokens", scope._reasoning_tokens)
|
288
|
+
|
289
|
+
# Span Attributes for Content
|
290
|
+
if capture_message_content:
|
291
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
292
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
|
293
|
+
|
294
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
295
|
+
scope._span.add_event(
|
296
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
297
|
+
attributes={
|
298
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
299
|
+
},
|
300
|
+
)
|
301
|
+
scope._span.add_event(
|
302
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
303
|
+
attributes={
|
304
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
305
|
+
},
|
306
|
+
)
|
307
|
+
|
308
|
+
scope._span.set_status(Status(StatusCode.OK))
|
309
|
+
|
310
|
+
# Record metrics
|
311
|
+
if not disable_metrics:
|
312
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
313
|
+
scope._server_address, scope._server_port, request_model, scope._response_model, environment,
|
314
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
315
|
+
cost, scope._tbt, scope._ttft)
|
316
|
+
|
317
|
+
def process_streaming_response_response(scope, pricing_info, environment, application_name, metrics,
|
318
|
+
capture_message_content=False, disable_metrics=False, version=""):
|
319
|
+
"""
|
320
|
+
Process streaming responses API response and generate telemetry.
|
321
|
+
"""
|
322
|
+
|
323
|
+
common_response_logic(scope, pricing_info, environment, application_name, metrics,
|
324
|
+
capture_message_content, disable_metrics, version, is_stream=True)
|
325
|
+
|
326
|
+
def process_response_response(response, request_model, pricing_info, server_port, server_address,
|
327
|
+
environment, application_name, metrics, start_time, span, capture_message_content=False,
|
328
|
+
disable_metrics=False, version="1.0.0", **kwargs):
|
329
|
+
"""
|
330
|
+
Process non-streaming responses API response and generate telemetry.
|
331
|
+
"""
|
332
|
+
|
333
|
+
scope = type("GenericScope", (), {})()
|
334
|
+
response_dict = response_as_dict(response)
|
335
|
+
|
336
|
+
scope._start_time = start_time
|
337
|
+
scope._end_time = time.time()
|
338
|
+
scope._span = span
|
339
|
+
|
340
|
+
# Extract content from responses API structure with reasoning support
|
341
|
+
output = response_dict.get("output", [])
|
342
|
+
scope._llmresponse = ""
|
343
|
+
scope._response_tools = None
|
344
|
+
|
345
|
+
if output:
|
346
|
+
# Find the message item in the output array (might not be first if reasoning is present)
|
347
|
+
message_item = None
|
348
|
+
for item in output:
|
349
|
+
if item.get("type") == "message":
|
350
|
+
message_item = item
|
351
|
+
break
|
352
|
+
if item.get("type") == "function_call":
|
353
|
+
# Handle tool call
|
354
|
+
scope._response_tools = [{
|
355
|
+
"id": item.get("id", ""),
|
356
|
+
"call_id": item.get("call_id", ""),
|
357
|
+
"name": item.get("name", ""),
|
358
|
+
"type": item.get("type", "function_call"),
|
359
|
+
"arguments": item.get("arguments", ""),
|
360
|
+
"status": item.get("status", "")
|
361
|
+
}]
|
362
|
+
|
363
|
+
# Extract content from message item if found
|
364
|
+
if message_item:
|
365
|
+
content = message_item.get("content", [])
|
366
|
+
if content and len(content) > 0:
|
367
|
+
scope._llmresponse = content[0].get("text", "")
|
368
|
+
|
369
|
+
scope._response_id = response_dict.get("id")
|
370
|
+
scope._response_model = response_dict.get("model")
|
371
|
+
|
372
|
+
# Handle token usage including reasoning tokens
|
373
|
+
usage = response_dict.get("usage", {})
|
374
|
+
scope._input_tokens = usage.get("input_tokens", 0)
|
375
|
+
scope._output_tokens = usage.get("output_tokens", 0)
|
376
|
+
|
377
|
+
output_tokens_details = usage.get("output_tokens_details", {})
|
378
|
+
scope._reasoning_tokens = output_tokens_details.get("reasoning_tokens", 0)
|
379
|
+
|
380
|
+
scope._timestamps = []
|
381
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
382
|
+
scope._server_address, scope._server_port = server_address, server_port
|
383
|
+
scope._kwargs = kwargs
|
384
|
+
scope._service_tier = response_dict.get("service_tier", "default")
|
385
|
+
scope._finish_reason = response_dict.get("status", "completed")
|
386
|
+
|
387
|
+
common_response_logic(scope, pricing_info, environment, application_name, metrics,
|
388
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
389
|
+
|
390
|
+
return response
|
391
|
+
|
392
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
393
|
+
capture_message_content, disable_metrics, version, is_stream):
|
394
|
+
"""
|
395
|
+
Process chat request and generate Telemetry
|
396
|
+
"""
|
397
|
+
|
398
|
+
scope._end_time = time.time()
|
399
|
+
if len(scope._timestamps) > 1:
|
400
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
401
|
+
|
402
|
+
# Format messages for chat operations
|
403
|
+
if hasattr(scope, "_operation_type") and scope._operation_type == "responses":
|
404
|
+
# Handle responses API input format using format_content
|
405
|
+
input_data = scope._kwargs.get("input", "")
|
406
|
+
prompt = format_content(input_data)
|
407
|
+
else:
|
408
|
+
# Handle standard chat format
|
409
|
+
prompt = format_content(scope._kwargs.get("messages", []))
|
410
|
+
|
411
|
+
request_model = scope._kwargs.get("model", "gpt-4o")
|
412
|
+
|
413
|
+
# Calculate tokens and cost
|
414
|
+
if hasattr(scope, "_input_tokens") and scope._input_tokens:
|
415
|
+
input_tokens = scope._input_tokens
|
416
|
+
output_tokens = scope._output_tokens
|
417
|
+
else:
|
418
|
+
input_tokens = general_tokens(prompt)
|
419
|
+
output_tokens = general_tokens(scope._llmresponse)
|
420
|
+
|
421
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
422
|
+
|
423
|
+
# Common Span Attributes
|
424
|
+
common_span_attributes(scope,
|
425
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
426
|
+
scope._server_address, scope._server_port, request_model, scope._response_model,
|
427
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
428
|
+
|
429
|
+
# Span Attributes for Request parameters
|
430
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, str(scope._kwargs.get("seed", "")))
|
431
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
|
432
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
|
433
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
|
434
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
|
435
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
|
436
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
|
437
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
|
438
|
+
|
439
|
+
# Span Attributes for Response parameters
|
440
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
|
441
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
|
442
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
|
443
|
+
|
444
|
+
# OpenAI-specific attributes
|
445
|
+
if hasattr(scope, "_system_fingerprint"):
|
446
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT, scope._system_fingerprint)
|
447
|
+
if hasattr(scope, "_service_tier"):
|
448
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER, scope._service_tier)
|
449
|
+
|
450
|
+
# Span Attributes for Tools - optimized
|
451
|
+
if hasattr(scope, "_tools") and scope._tools:
|
452
|
+
tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
|
453
|
+
|
454
|
+
names, ids, args = zip(*[
|
455
|
+
(t.get("function", {}).get("name", ""),
|
456
|
+
str(t.get("id", "")),
|
457
|
+
str(t.get("function", {}).get("arguments", "")))
|
458
|
+
for t in tools if isinstance(t, dict) and t
|
459
|
+
]) if tools else ([], [], [])
|
460
|
+
|
461
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
|
462
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
|
463
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
|
464
|
+
|
465
|
+
# Span Attributes for Cost and Tokens
|
466
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
467
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
468
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
469
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
470
|
+
|
471
|
+
# Span Attributes for Content
|
472
|
+
if capture_message_content:
|
473
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
474
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
|
475
|
+
|
476
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
477
|
+
scope._span.add_event(
|
478
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
479
|
+
attributes={
|
480
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
481
|
+
},
|
482
|
+
)
|
483
|
+
scope._span.add_event(
|
484
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
485
|
+
attributes={
|
486
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
487
|
+
},
|
488
|
+
)
|
489
|
+
|
490
|
+
scope._span.set_status(Status(StatusCode.OK))
|
491
|
+
|
492
|
+
# Record metrics
|
493
|
+
if not disable_metrics:
|
494
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
495
|
+
scope._server_address, scope._server_port, request_model, scope._response_model, environment,
|
496
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
497
|
+
cost, scope._tbt, scope._ttft)
|
498
|
+
|
499
|
+
def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
|
500
|
+
capture_message_content=False, disable_metrics=False, version=""):
|
501
|
+
"""
|
502
|
+
Process streaming chat response and generate telemetry.
|
503
|
+
"""
|
504
|
+
|
505
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
506
|
+
capture_message_content, disable_metrics, version, is_stream=True)
|
507
|
+
|
508
|
+
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
509
|
+
environment, application_name, metrics, start_time, span, capture_message_content=False,
|
510
|
+
disable_metrics=False, version="1.0.0", **kwargs):
|
511
|
+
"""
|
512
|
+
Process non-streaming chat response and generate telemetry.
|
513
|
+
"""
|
514
|
+
|
515
|
+
scope = type("GenericScope", (), {})()
|
516
|
+
response_dict = response_as_dict(response)
|
517
|
+
|
518
|
+
scope._start_time = start_time
|
519
|
+
scope._end_time = time.time()
|
520
|
+
scope._span = span
|
521
|
+
scope._llmresponse = " ".join(
|
522
|
+
(choice.get("message", {}).get("content") or "")
|
523
|
+
for choice in response_dict.get("choices", [])
|
524
|
+
)
|
525
|
+
scope._response_id = response_dict.get("id")
|
526
|
+
scope._response_model = response_dict.get("model")
|
527
|
+
scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
528
|
+
scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
|
529
|
+
scope._timestamps = []
|
530
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
531
|
+
scope._server_address, scope._server_port = server_address, server_port
|
532
|
+
scope._kwargs = kwargs
|
533
|
+
scope._system_fingerprint = response_dict.get("system_fingerprint", "")
|
534
|
+
scope._service_tier = response_dict.get("service_tier", "auto")
|
535
|
+
scope._finish_reason = str(response_dict.get("choices", [])[0].get("finish_reason", "")) if response_dict.get("choices") else ""
|
536
|
+
|
537
|
+
# Handle operation type for responses API
|
538
|
+
if kwargs.get("_operation_type") == "responses":
|
539
|
+
scope._operation_type = "responses"
|
540
|
+
|
541
|
+
# Handle tool calls
|
542
|
+
if kwargs.get("tools"):
|
543
|
+
scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
|
544
|
+
else:
|
545
|
+
scope._tools = None
|
546
|
+
|
547
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
548
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
549
|
+
|
550
|
+
return response
|
551
|
+
|
552
|
+
def common_embedding_logic(scope, request_model, pricing_info, environment, application_name,
|
553
|
+
metrics, capture_message_content, disable_metrics, version):
|
554
|
+
"""
|
555
|
+
Common logic for processing embedding operations.
|
556
|
+
"""
|
557
|
+
|
558
|
+
# Calculate cost
|
559
|
+
cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
|
560
|
+
|
561
|
+
# Common Span Attributes
|
562
|
+
common_span_attributes(scope,
|
563
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
564
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
565
|
+
environment, application_name, False, scope._tbt, scope._ttft, version)
|
566
|
+
|
567
|
+
# Span Attributes for Request parameters
|
568
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS, [scope._kwargs.get("encoding_format", "float")])
|
569
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
|
570
|
+
|
571
|
+
# Span Attributes for Cost and Tokens
|
572
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
|
573
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens)
|
574
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
575
|
+
|
576
|
+
# Span Attributes for Content
|
577
|
+
if capture_message_content:
|
578
|
+
input_data = scope._kwargs.get("input", "")
|
579
|
+
formatted_content = format_content(input_data) if isinstance(input_data, (list, dict)) else str(input_data)
|
580
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, formatted_content)
|
581
|
+
|
582
|
+
scope._span.set_status(Status(StatusCode.OK))
|
583
|
+
|
584
|
+
# Record metrics
|
585
|
+
if not disable_metrics:
|
586
|
+
record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
587
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
588
|
+
application_name, scope._start_time, scope._end_time, scope._input_tokens, cost)
|
589
|
+
|
590
|
+
def common_image_logic(scope, request_model, pricing_info, environment, application_name,
|
591
|
+
metrics, capture_message_content, disable_metrics, version):
|
592
|
+
"""
|
593
|
+
Common logic for processing image operations.
|
594
|
+
"""
|
595
|
+
|
596
|
+
# Calculate cost
|
597
|
+
cost = get_image_model_cost(request_model, pricing_info,
|
598
|
+
scope._kwargs.get("size", "1024x1024"),
|
599
|
+
scope._kwargs.get("quality", "standard"))
|
600
|
+
|
601
|
+
# Common Span Attributes
|
602
|
+
common_span_attributes(scope,
|
603
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
604
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
605
|
+
environment, application_name, False, scope._tbt, scope._ttft, version)
|
606
|
+
|
607
|
+
# Span Attributes for Request parameters
|
608
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE, scope._kwargs.get("size", "1024x1024"))
|
609
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_QUALITY, scope._kwargs.get("quality", "standard"))
|
610
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
|
611
|
+
|
612
|
+
# Extract response data
|
613
|
+
response_dict = scope._response_dict
|
614
|
+
images_data = response_dict.get("data", [])
|
615
|
+
response_created = response_dict.get("created")
|
616
|
+
response_size = response_dict.get("size")
|
617
|
+
response_quality = response_dict.get("quality")
|
618
|
+
response_output_format = response_dict.get("output_format")
|
619
|
+
|
620
|
+
# Span Attributes for Response
|
621
|
+
if response_created:
|
622
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, str(response_created))
|
623
|
+
|
624
|
+
# Process image data and collect URLs/base64 content
|
625
|
+
if images_data:
|
626
|
+
# Collect image URLs or base64 content
|
627
|
+
image_contents = []
|
628
|
+
|
629
|
+
for image in images_data:
|
630
|
+
# Collect image content (URL or base64)
|
631
|
+
if image.get("url"):
|
632
|
+
image_contents.append(image["url"])
|
633
|
+
elif image.get("b64_json"):
|
634
|
+
# For base64, we typically dont want to store the full content in spans
|
635
|
+
# Just indicate its base64 format
|
636
|
+
image_contents.append("[base64_image_data]")
|
637
|
+
|
638
|
+
# Set image response data using semantic conventions
|
639
|
+
if image_contents:
|
640
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_IMAGE, image_contents)
|
641
|
+
|
642
|
+
# Response-level attributes if different from request
|
643
|
+
if response_size:
|
644
|
+
scope._span.set_attribute("gen_ai.response.image_size", response_size)
|
645
|
+
if response_quality:
|
646
|
+
scope._span.set_attribute("gen_ai.response.image_quality", response_quality)
|
647
|
+
if response_output_format:
|
648
|
+
scope._span.set_attribute("gen_ai.response.output_format", response_output_format)
|
649
|
+
|
650
|
+
# Span Attributes for Cost
|
651
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
652
|
+
|
653
|
+
# Span Attributes for Content
|
654
|
+
if capture_message_content:
|
655
|
+
# Always collect the original prompt
|
656
|
+
prompt = scope._kwargs.get("prompt", "")
|
657
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
658
|
+
|
659
|
+
# Collect and set revised prompts if available
|
660
|
+
if images_data:
|
661
|
+
revised_prompts = []
|
662
|
+
for image in images_data:
|
663
|
+
if image.get("revised_prompt"):
|
664
|
+
revised_prompts.append(image["revised_prompt"])
|
665
|
+
|
666
|
+
# Set revised prompts as span attribute if any were found
|
667
|
+
if revised_prompts:
|
668
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT, revised_prompts)
|
669
|
+
|
670
|
+
# Add revised prompt events for detailed tracking
|
671
|
+
for i, image in enumerate(images_data):
|
672
|
+
if image.get("revised_prompt"):
|
673
|
+
scope._span.add_event(
|
674
|
+
name=SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT,
|
675
|
+
attributes={
|
676
|
+
SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT: image["revised_prompt"],
|
677
|
+
"image_index": i,
|
678
|
+
},
|
679
|
+
)
|
680
|
+
|
681
|
+
scope._span.set_status(Status(StatusCode.OK))
|
682
|
+
|
683
|
+
# Record metrics
|
684
|
+
if not disable_metrics:
|
685
|
+
record_image_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
686
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
687
|
+
application_name, scope._start_time, scope._end_time, cost)
|
688
|
+
|
689
|
+
def common_audio_logic(scope, request_model, pricing_info, environment, application_name,
|
690
|
+
metrics, capture_message_content, disable_metrics, version):
|
691
|
+
"""
|
692
|
+
Common logic for processing audio operations.
|
693
|
+
"""
|
694
|
+
|
695
|
+
# Calculate cost
|
696
|
+
input_text = scope._kwargs.get("input", "")
|
697
|
+
cost = get_audio_model_cost(request_model, pricing_info, input_text)
|
698
|
+
|
699
|
+
# Common Span Attributes
|
700
|
+
common_span_attributes(scope,
|
701
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
702
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
703
|
+
environment, application_name, False, scope._tbt, scope._ttft, version)
|
704
|
+
|
705
|
+
# Span Attributes for Request parameters
|
706
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_VOICE, scope._kwargs.get("voice", "alloy"))
|
707
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT, scope._kwargs.get("response_format", "mp3"))
|
708
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_SPEED, scope._kwargs.get("speed", 1.0))
|
709
|
+
|
710
|
+
# Span Attributes for Cost
|
711
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
712
|
+
|
713
|
+
# Span Attributes for Content
|
714
|
+
if capture_message_content:
|
715
|
+
input_text = scope._kwargs.get("input", "")
|
716
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, input_text)
|
717
|
+
|
718
|
+
scope._span.set_status(Status(StatusCode.OK))
|
719
|
+
|
720
|
+
# Record metrics
|
721
|
+
if not disable_metrics:
|
722
|
+
record_audio_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
|
723
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
724
|
+
application_name, scope._start_time, scope._end_time, cost)
|
725
|
+
|
726
|
+
def process_audio_response(response, request_model, pricing_info, server_port, server_address,
|
727
|
+
environment, application_name, metrics, start_time, end_time, span, capture_message_content=False,
|
728
|
+
disable_metrics=False, version="1.0.0", **kwargs):
|
729
|
+
"""
|
730
|
+
Process audio generation response and generate telemetry.
|
731
|
+
"""
|
732
|
+
|
733
|
+
scope = type("GenericScope", (), {})()
|
734
|
+
|
735
|
+
scope._start_time = start_time
|
736
|
+
scope._end_time = end_time
|
737
|
+
scope._span = span
|
738
|
+
scope._timestamps = []
|
739
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
740
|
+
scope._server_address, scope._server_port = server_address, server_port
|
741
|
+
scope._kwargs = kwargs
|
742
|
+
|
743
|
+
common_audio_logic(scope, request_model, pricing_info, environment, application_name,
|
744
|
+
metrics, capture_message_content, disable_metrics, version)
|
745
|
+
|
746
|
+
return response
|
747
|
+
|
748
|
+
def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
|
749
|
+
environment, application_name, metrics, start_time, span, capture_message_content=False,
|
750
|
+
disable_metrics=False, version="1.0.0", **kwargs):
|
751
|
+
"""
|
752
|
+
Process embedding response and generate telemetry.
|
753
|
+
"""
|
754
|
+
|
755
|
+
scope = type("GenericScope", (), {})()
|
756
|
+
response_dict = response_as_dict(response)
|
757
|
+
|
758
|
+
scope._start_time = start_time
|
759
|
+
scope._end_time = time.time()
|
760
|
+
scope._span = span
|
761
|
+
scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
762
|
+
scope._timestamps = []
|
763
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
764
|
+
scope._server_address, scope._server_port = server_address, server_port
|
765
|
+
scope._kwargs = kwargs
|
766
|
+
|
767
|
+
common_embedding_logic(scope, request_model, pricing_info, environment, application_name,
|
768
|
+
metrics, capture_message_content, disable_metrics, version)
|
769
|
+
|
770
|
+
return response
|
771
|
+
|
772
|
+
def process_image_response(response, request_model, pricing_info, server_port, server_address,
|
773
|
+
environment, application_name, metrics, start_time, end_time, span, capture_message_content=False,
|
774
|
+
disable_metrics=False, version="1.0.0", **kwargs):
|
775
|
+
"""
|
776
|
+
Process image generation response and generate telemetry.
|
777
|
+
"""
|
778
|
+
|
779
|
+
scope = type("GenericScope", (), {})()
|
780
|
+
response_dict = response_as_dict(response)
|
781
|
+
|
782
|
+
scope._start_time = start_time
|
783
|
+
scope._end_time = end_time
|
784
|
+
scope._span = span
|
785
|
+
scope._timestamps = []
|
786
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
787
|
+
scope._server_address, scope._server_port = server_address, server_port
|
788
|
+
scope._kwargs = kwargs
|
789
|
+
scope._response_dict = response_dict
|
790
|
+
|
791
|
+
common_image_logic(scope, request_model, pricing_info, environment, application_name,
|
792
|
+
metrics, capture_message_content, disable_metrics, version)
|
793
|
+
|
794
|
+
return response
|