openlit 1.34.20__py3-none-any.whl → 1.34.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,794 @@
1
+ """
2
+ OpenAI OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.trace import Status, StatusCode
7
+
8
+ from openlit.__helpers import (
9
+ calculate_ttft,
10
+ response_as_dict,
11
+ calculate_tbt,
12
+ get_chat_model_cost,
13
+ get_embed_model_cost,
14
+ get_audio_model_cost,
15
+ get_image_model_cost,
16
+ general_tokens,
17
+ record_completion_metrics,
18
+ record_embedding_metrics,
19
+ record_audio_metrics,
20
+ record_image_metrics,
21
+ common_span_attributes,
22
+ )
23
+ from openlit.semcov import SemanticConvention
24
+
25
+ def format_content(messages):
26
+ """
27
+ Format the messages into a string for span events.
28
+ Handles both chat completions format and responses API input format.
29
+ """
30
+
31
+ if not messages:
32
+ return ""
33
+
34
+ # Handle string input (simple case)
35
+ if isinstance(messages, str):
36
+ return messages
37
+
38
+ # Handle list of messages
39
+ formatted_messages = []
40
+ for message in messages:
41
+ role = message.get("role", "user")
42
+ content = message.get("content", "")
43
+
44
+ if isinstance(content, list):
45
+ content_str_list = []
46
+ for item in content:
47
+ # Chat completions format
48
+ if item.get("type") == "text":
49
+ content_str_list.append(f'text: {item.get("text", "")}')
50
+ elif (item.get("type") == "image_url" and
51
+ not item.get("image_url", {}).get("url", "").startswith("data:")):
52
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
53
+
54
+ # Responses API format
55
+ elif item.get("type") == "input_text":
56
+ content_str_list.append(f'text: {item.get("text", "")}')
57
+ elif item.get("type") == "input_image":
58
+ image_url = item.get("image_url", "")
59
+ if image_url and not image_url.startswith("data:"):
60
+ content_str_list.append(f'image_url: {image_url}')
61
+
62
+ content_str = ", ".join(content_str_list)
63
+ formatted_messages.append(f"{role}: {content_str}")
64
+ else:
65
+ formatted_messages.append(f"{role}: {content}")
66
+
67
+ return "\n".join(formatted_messages)
68
+
69
+ def process_chat_chunk(scope, chunk):
70
+ """
71
+ Process a chunk of chat response data and update state.
72
+ """
73
+
74
+ end_time = time.time()
75
+ scope._timestamps.append(end_time)
76
+
77
+ if len(scope._timestamps) == 1:
78
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
79
+
80
+ chunked = response_as_dict(chunk)
81
+
82
+ # Extract content from chat completions
83
+ if (len(chunked.get("choices", [])) > 0 and
84
+ "delta" in chunked.get("choices")[0]):
85
+
86
+ delta = chunked.get("choices")[0]["delta"]
87
+ content = delta.get("content")
88
+ if content:
89
+ scope._llmresponse += content
90
+
91
+ # Handle tool calls in streaming - optimized
92
+ delta_tools = delta.get("tool_calls")
93
+ if delta_tools:
94
+ scope._tools = scope._tools or []
95
+
96
+ for tool in delta_tools:
97
+ idx = tool.get("index", 0)
98
+
99
+ # Extend list if needed
100
+ scope._tools.extend([{}] * (idx + 1 - len(scope._tools)))
101
+
102
+ if tool.get("id"): # New tool (id exists)
103
+ func = tool.get("function", {})
104
+ scope._tools[idx] = {
105
+ "id": tool["id"],
106
+ "function": {"name": func.get("name", ""), "arguments": func.get("arguments", "")},
107
+ "type": tool.get("type", "function")
108
+ }
109
+ elif scope._tools[idx] and "function" in tool: # Append args (id is None)
110
+ scope._tools[idx]["function"]["arguments"] += tool["function"].get("arguments", "")
111
+
112
+ # Extract metadata
113
+ scope._response_id = chunked.get("id") or scope._response_id
114
+ scope._response_model = chunked.get("model") or scope._response_model
115
+
116
+ try:
117
+ scope._finish_reason = chunked.get("choices", [])[0].get("finish_reason") or scope._finish_reason
118
+ except (IndexError, AttributeError, TypeError):
119
+ scope._finish_reason = "stop"
120
+
121
+ scope._system_fingerprint = chunked.get("system_fingerprint") or scope._system_fingerprint
122
+ scope._service_tier = chunked.get("service_tier") or scope._service_tier
123
+
124
+ def process_response_chunk(scope, chunk):
125
+ """
126
+ Process a chunk of response API data and update state.
127
+ """
128
+
129
+ end_time = time.time()
130
+ scope._timestamps.append(end_time)
131
+
132
+ if len(scope._timestamps) == 1:
133
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
134
+
135
+ chunked = response_as_dict(chunk)
136
+
137
+ # Extract content from responses API
138
+ if chunked.get("type") == "response.output_text.delta":
139
+ scope._llmresponse += chunked.get("delta", "")
140
+
141
+ # Handle tool calls in streaming for responses API
142
+ elif chunked.get("type") == "response.output_item.added":
143
+ # New tool call item added
144
+ if not hasattr(scope, "_response_tools") or scope._response_tools is None:
145
+ scope._response_tools = []
146
+
147
+ item = chunked.get("item", {})
148
+ if item.get("type") == "function_call":
149
+ scope._response_tools.append({
150
+ "id": item.get("id", ""),
151
+ "call_id": item.get("call_id", ""),
152
+ "name": item.get("name", ""),
153
+ "type": item.get("type", "function_call"),
154
+ "arguments": item.get("arguments", ""),
155
+ "status": item.get("status", "in_progress")
156
+ })
157
+
158
+ elif chunked.get("type") == "response.function_call_arguments.delta":
159
+ # Tool arguments being streamed
160
+ if hasattr(scope, "_response_tools") and scope._response_tools:
161
+ item_id = chunked.get("item_id", "")
162
+ delta = chunked.get("delta", "")
163
+
164
+ # Find the tool by item_id and append arguments
165
+ for tool in scope._response_tools:
166
+ if tool.get("id") == item_id:
167
+ tool["arguments"] += delta
168
+ break
169
+
170
+ elif chunked.get("type") == "response.function_call_arguments.done":
171
+ # Tool arguments complete
172
+ if hasattr(scope, "_response_tools") and scope._response_tools:
173
+ item_id = chunked.get("item_id", "")
174
+ final_arguments = chunked.get("arguments", "")
175
+
176
+ # Update the tool with final arguments
177
+ for tool in scope._response_tools:
178
+ if tool.get("id") == item_id:
179
+ tool["arguments"] = final_arguments
180
+ break
181
+
182
+ elif chunked.get("type") == "response.output_item.done":
183
+ # Tool call item complete
184
+ if hasattr(scope, "_response_tools") and scope._response_tools:
185
+ item = chunked.get("item", {})
186
+ item_id = item.get("id", "")
187
+
188
+ # Update the tool with final status and data
189
+ for tool in scope._response_tools:
190
+ if tool.get("id") == item_id:
191
+ tool.update({
192
+ "call_id": item.get("call_id", tool.get("call_id", "")),
193
+ "name": item.get("name", tool.get("name", "")),
194
+ "arguments": item.get("arguments", tool.get("arguments", "")),
195
+ "status": item.get("status", "completed")
196
+ })
197
+ break
198
+
199
+ elif chunked.get("type") == "response.completed":
200
+ response_data = chunked.get("response", {})
201
+ scope._response_id = response_data.get("id")
202
+ scope._response_model = response_data.get("model")
203
+ scope._finish_reason = response_data.get("status")
204
+
205
+ usage = response_data.get("usage", {})
206
+ scope._input_tokens = usage.get("input_tokens", 0)
207
+ scope._output_tokens = usage.get("output_tokens", 0)
208
+
209
+ # Handle reasoning tokens
210
+ output_tokens_details = usage.get("output_tokens_details", {})
211
+ scope._reasoning_tokens = output_tokens_details.get("reasoning_tokens", 0)
212
+
213
+ def common_response_logic(scope, pricing_info, environment, application_name, metrics,
214
+ capture_message_content, disable_metrics, version, is_stream):
215
+ """
216
+ Process responses API request and generate Telemetry
217
+ """
218
+
219
+ scope._end_time = time.time()
220
+ if len(scope._timestamps) > 1:
221
+ scope._tbt = calculate_tbt(scope._timestamps)
222
+
223
+ # For responses API, format input using the same function as chat completions
224
+ input_data = scope._kwargs.get("input", "")
225
+ prompt = format_content(input_data)
226
+ request_model = scope._kwargs.get("model", "gpt-4o")
227
+
228
+ # Calculate tokens and cost
229
+ if hasattr(scope, "_input_tokens") and scope._input_tokens:
230
+ input_tokens = scope._input_tokens
231
+ output_tokens = scope._output_tokens
232
+ else:
233
+ input_tokens = general_tokens(prompt)
234
+ output_tokens = general_tokens(scope._llmresponse)
235
+
236
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
237
+
238
+ # Common Span Attributes
239
+ common_span_attributes(scope,
240
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
241
+ scope._server_address, scope._server_port, request_model, scope._response_model,
242
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
243
+
244
+ # Span Attributes for Request parameters specific to responses API
245
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
246
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
247
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_output_tokens", -1))
248
+
249
+ # Reasoning parameters
250
+ reasoning = scope._kwargs.get("reasoning", {})
251
+ if reasoning:
252
+ if reasoning.get("effort"):
253
+ scope._span.set_attribute("gen_ai.request.reasoning_effort", reasoning.get("effort"))
254
+
255
+ # Responses API specific attributes
256
+ if hasattr(scope, "_service_tier"):
257
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER, scope._service_tier)
258
+
259
+ # Span Attributes for Response parameters
260
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
261
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
262
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
263
+
264
+ # Span Attributes for Tools (responses API structure) - optimized
265
+ if hasattr(scope, "_response_tools") and scope._response_tools:
266
+ tools = scope._response_tools if isinstance(scope._response_tools, list) else [scope._response_tools]
267
+
268
+ names, ids, args = zip(*[
269
+ (t.get("name", ""),
270
+ str(t.get("call_id", "")), # Use call_id for responses API
271
+ str(t.get("arguments", "")))
272
+ for t in tools if isinstance(t, dict) and t
273
+ ]) if tools else ([], [], [])
274
+
275
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
276
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
277
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
278
+
279
+ # Span Attributes for Cost and Tokens
280
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
281
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
282
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
283
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
284
+
285
+ # Reasoning tokens
286
+ if hasattr(scope, "_reasoning_tokens") and scope._reasoning_tokens > 0:
287
+ scope._span.set_attribute("gen_ai.usage.reasoning_tokens", scope._reasoning_tokens)
288
+
289
+ # Span Attributes for Content
290
+ if capture_message_content:
291
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
292
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
293
+
294
+ # To be removed once the change to span_attributes (from span events) is complete
295
+ scope._span.add_event(
296
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
297
+ attributes={
298
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
299
+ },
300
+ )
301
+ scope._span.add_event(
302
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
303
+ attributes={
304
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
305
+ },
306
+ )
307
+
308
+ scope._span.set_status(Status(StatusCode.OK))
309
+
310
+ # Record metrics
311
+ if not disable_metrics:
312
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
313
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
314
+ application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
315
+ cost, scope._tbt, scope._ttft)
316
+
317
+ def process_streaming_response_response(scope, pricing_info, environment, application_name, metrics,
318
+ capture_message_content=False, disable_metrics=False, version=""):
319
+ """
320
+ Process streaming responses API response and generate telemetry.
321
+ """
322
+
323
+ common_response_logic(scope, pricing_info, environment, application_name, metrics,
324
+ capture_message_content, disable_metrics, version, is_stream=True)
325
+
326
+ def process_response_response(response, request_model, pricing_info, server_port, server_address,
327
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
328
+ disable_metrics=False, version="1.0.0", **kwargs):
329
+ """
330
+ Process non-streaming responses API response and generate telemetry.
331
+ """
332
+
333
+ scope = type("GenericScope", (), {})()
334
+ response_dict = response_as_dict(response)
335
+
336
+ scope._start_time = start_time
337
+ scope._end_time = time.time()
338
+ scope._span = span
339
+
340
+ # Extract content from responses API structure with reasoning support
341
+ output = response_dict.get("output", [])
342
+ scope._llmresponse = ""
343
+ scope._response_tools = None
344
+
345
+ if output:
346
+ # Find the message item in the output array (might not be first if reasoning is present)
347
+ message_item = None
348
+ for item in output:
349
+ if item.get("type") == "message":
350
+ message_item = item
351
+ break
352
+ if item.get("type") == "function_call":
353
+ # Handle tool call
354
+ scope._response_tools = [{
355
+ "id": item.get("id", ""),
356
+ "call_id": item.get("call_id", ""),
357
+ "name": item.get("name", ""),
358
+ "type": item.get("type", "function_call"),
359
+ "arguments": item.get("arguments", ""),
360
+ "status": item.get("status", "")
361
+ }]
362
+
363
+ # Extract content from message item if found
364
+ if message_item:
365
+ content = message_item.get("content", [])
366
+ if content and len(content) > 0:
367
+ scope._llmresponse = content[0].get("text", "")
368
+
369
+ scope._response_id = response_dict.get("id")
370
+ scope._response_model = response_dict.get("model")
371
+
372
+ # Handle token usage including reasoning tokens
373
+ usage = response_dict.get("usage", {})
374
+ scope._input_tokens = usage.get("input_tokens", 0)
375
+ scope._output_tokens = usage.get("output_tokens", 0)
376
+
377
+ output_tokens_details = usage.get("output_tokens_details", {})
378
+ scope._reasoning_tokens = output_tokens_details.get("reasoning_tokens", 0)
379
+
380
+ scope._timestamps = []
381
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
382
+ scope._server_address, scope._server_port = server_address, server_port
383
+ scope._kwargs = kwargs
384
+ scope._service_tier = response_dict.get("service_tier", "default")
385
+ scope._finish_reason = response_dict.get("status", "completed")
386
+
387
+ common_response_logic(scope, pricing_info, environment, application_name, metrics,
388
+ capture_message_content, disable_metrics, version, is_stream=False)
389
+
390
+ return response
391
+
392
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
393
+ capture_message_content, disable_metrics, version, is_stream):
394
+ """
395
+ Process chat request and generate Telemetry
396
+ """
397
+
398
+ scope._end_time = time.time()
399
+ if len(scope._timestamps) > 1:
400
+ scope._tbt = calculate_tbt(scope._timestamps)
401
+
402
+ # Format messages for chat operations
403
+ if hasattr(scope, "_operation_type") and scope._operation_type == "responses":
404
+ # Handle responses API input format using format_content
405
+ input_data = scope._kwargs.get("input", "")
406
+ prompt = format_content(input_data)
407
+ else:
408
+ # Handle standard chat format
409
+ prompt = format_content(scope._kwargs.get("messages", []))
410
+
411
+ request_model = scope._kwargs.get("model", "gpt-4o")
412
+
413
+ # Calculate tokens and cost
414
+ if hasattr(scope, "_input_tokens") and scope._input_tokens:
415
+ input_tokens = scope._input_tokens
416
+ output_tokens = scope._output_tokens
417
+ else:
418
+ input_tokens = general_tokens(prompt)
419
+ output_tokens = general_tokens(scope._llmresponse)
420
+
421
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
422
+
423
+ # Common Span Attributes
424
+ common_span_attributes(scope,
425
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
426
+ scope._server_address, scope._server_port, request_model, scope._response_model,
427
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
428
+
429
+ # Span Attributes for Request parameters
430
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, str(scope._kwargs.get("seed", "")))
431
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
432
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
433
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
434
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
435
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
436
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
437
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
438
+
439
+ # Span Attributes for Response parameters
440
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
441
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
442
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
443
+
444
+ # OpenAI-specific attributes
445
+ if hasattr(scope, "_system_fingerprint"):
446
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT, scope._system_fingerprint)
447
+ if hasattr(scope, "_service_tier"):
448
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER, scope._service_tier)
449
+
450
+ # Span Attributes for Tools - optimized
451
+ if hasattr(scope, "_tools") and scope._tools:
452
+ tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
453
+
454
+ names, ids, args = zip(*[
455
+ (t.get("function", {}).get("name", ""),
456
+ str(t.get("id", "")),
457
+ str(t.get("function", {}).get("arguments", "")))
458
+ for t in tools if isinstance(t, dict) and t
459
+ ]) if tools else ([], [], [])
460
+
461
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
462
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
463
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
464
+
465
+ # Span Attributes for Cost and Tokens
466
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
467
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
468
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
469
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
470
+
471
+ # Span Attributes for Content
472
+ if capture_message_content:
473
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
474
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
475
+
476
+ # To be removed once the change to span_attributes (from span events) is complete
477
+ scope._span.add_event(
478
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
479
+ attributes={
480
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
481
+ },
482
+ )
483
+ scope._span.add_event(
484
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
485
+ attributes={
486
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
487
+ },
488
+ )
489
+
490
+ scope._span.set_status(Status(StatusCode.OK))
491
+
492
+ # Record metrics
493
+ if not disable_metrics:
494
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
495
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
496
+ application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
497
+ cost, scope._tbt, scope._ttft)
498
+
499
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
500
+ capture_message_content=False, disable_metrics=False, version=""):
501
+ """
502
+ Process streaming chat response and generate telemetry.
503
+ """
504
+
505
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
506
+ capture_message_content, disable_metrics, version, is_stream=True)
507
+
508
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
509
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
510
+ disable_metrics=False, version="1.0.0", **kwargs):
511
+ """
512
+ Process non-streaming chat response and generate telemetry.
513
+ """
514
+
515
+ scope = type("GenericScope", (), {})()
516
+ response_dict = response_as_dict(response)
517
+
518
+ scope._start_time = start_time
519
+ scope._end_time = time.time()
520
+ scope._span = span
521
+ scope._llmresponse = " ".join(
522
+ (choice.get("message", {}).get("content") or "")
523
+ for choice in response_dict.get("choices", [])
524
+ )
525
+ scope._response_id = response_dict.get("id")
526
+ scope._response_model = response_dict.get("model")
527
+ scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
528
+ scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
529
+ scope._timestamps = []
530
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
531
+ scope._server_address, scope._server_port = server_address, server_port
532
+ scope._kwargs = kwargs
533
+ scope._system_fingerprint = response_dict.get("system_fingerprint", "")
534
+ scope._service_tier = response_dict.get("service_tier", "auto")
535
+ scope._finish_reason = str(response_dict.get("choices", [])[0].get("finish_reason", "")) if response_dict.get("choices") else ""
536
+
537
+ # Handle operation type for responses API
538
+ if kwargs.get("_operation_type") == "responses":
539
+ scope._operation_type = "responses"
540
+
541
+ # Handle tool calls
542
+ if kwargs.get("tools"):
543
+ scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
544
+ else:
545
+ scope._tools = None
546
+
547
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
548
+ capture_message_content, disable_metrics, version, is_stream=False)
549
+
550
+ return response
551
+
552
+ def common_embedding_logic(scope, request_model, pricing_info, environment, application_name,
553
+ metrics, capture_message_content, disable_metrics, version):
554
+ """
555
+ Common logic for processing embedding operations.
556
+ """
557
+
558
+ # Calculate cost
559
+ cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
560
+
561
+ # Common Span Attributes
562
+ common_span_attributes(scope,
563
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
564
+ scope._server_address, scope._server_port, request_model, request_model,
565
+ environment, application_name, False, scope._tbt, scope._ttft, version)
566
+
567
+ # Span Attributes for Request parameters
568
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS, [scope._kwargs.get("encoding_format", "float")])
569
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
570
+
571
+ # Span Attributes for Cost and Tokens
572
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
573
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens)
574
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
575
+
576
+ # Span Attributes for Content
577
+ if capture_message_content:
578
+ input_data = scope._kwargs.get("input", "")
579
+ formatted_content = format_content(input_data) if isinstance(input_data, (list, dict)) else str(input_data)
580
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, formatted_content)
581
+
582
+ scope._span.set_status(Status(StatusCode.OK))
583
+
584
+ # Record metrics
585
+ if not disable_metrics:
586
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
587
+ scope._server_address, scope._server_port, request_model, request_model, environment,
588
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, cost)
589
+
590
+ def common_image_logic(scope, request_model, pricing_info, environment, application_name,
591
+ metrics, capture_message_content, disable_metrics, version):
592
+ """
593
+ Common logic for processing image operations.
594
+ """
595
+
596
+ # Calculate cost
597
+ cost = get_image_model_cost(request_model, pricing_info,
598
+ scope._kwargs.get("size", "1024x1024"),
599
+ scope._kwargs.get("quality", "standard"))
600
+
601
+ # Common Span Attributes
602
+ common_span_attributes(scope,
603
+ SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
604
+ scope._server_address, scope._server_port, request_model, request_model,
605
+ environment, application_name, False, scope._tbt, scope._ttft, version)
606
+
607
+ # Span Attributes for Request parameters
608
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE, scope._kwargs.get("size", "1024x1024"))
609
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_QUALITY, scope._kwargs.get("quality", "standard"))
610
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
611
+
612
+ # Extract response data
613
+ response_dict = scope._response_dict
614
+ images_data = response_dict.get("data", [])
615
+ response_created = response_dict.get("created")
616
+ response_size = response_dict.get("size")
617
+ response_quality = response_dict.get("quality")
618
+ response_output_format = response_dict.get("output_format")
619
+
620
+ # Span Attributes for Response
621
+ if response_created:
622
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, str(response_created))
623
+
624
+ # Process image data and collect URLs/base64 content
625
+ if images_data:
626
+ # Collect image URLs or base64 content
627
+ image_contents = []
628
+
629
+ for image in images_data:
630
+ # Collect image content (URL or base64)
631
+ if image.get("url"):
632
+ image_contents.append(image["url"])
633
+ elif image.get("b64_json"):
634
+ # For base64, we typically dont want to store the full content in spans
635
+ # Just indicate its base64 format
636
+ image_contents.append("[base64_image_data]")
637
+
638
+ # Set image response data using semantic conventions
639
+ if image_contents:
640
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_IMAGE, image_contents)
641
+
642
+ # Response-level attributes if different from request
643
+ if response_size:
644
+ scope._span.set_attribute("gen_ai.response.image_size", response_size)
645
+ if response_quality:
646
+ scope._span.set_attribute("gen_ai.response.image_quality", response_quality)
647
+ if response_output_format:
648
+ scope._span.set_attribute("gen_ai.response.output_format", response_output_format)
649
+
650
+ # Span Attributes for Cost
651
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
652
+
653
+ # Span Attributes for Content
654
+ if capture_message_content:
655
+ # Always collect the original prompt
656
+ prompt = scope._kwargs.get("prompt", "")
657
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
658
+
659
+ # Collect and set revised prompts if available
660
+ if images_data:
661
+ revised_prompts = []
662
+ for image in images_data:
663
+ if image.get("revised_prompt"):
664
+ revised_prompts.append(image["revised_prompt"])
665
+
666
+ # Set revised prompts as span attribute if any were found
667
+ if revised_prompts:
668
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT, revised_prompts)
669
+
670
+ # Add revised prompt events for detailed tracking
671
+ for i, image in enumerate(images_data):
672
+ if image.get("revised_prompt"):
673
+ scope._span.add_event(
674
+ name=SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT,
675
+ attributes={
676
+ SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT: image["revised_prompt"],
677
+ "image_index": i,
678
+ },
679
+ )
680
+
681
+ scope._span.set_status(Status(StatusCode.OK))
682
+
683
+ # Record metrics
684
+ if not disable_metrics:
685
+ record_image_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
686
+ scope._server_address, scope._server_port, request_model, request_model, environment,
687
+ application_name, scope._start_time, scope._end_time, cost)
688
+
689
+ def common_audio_logic(scope, request_model, pricing_info, environment, application_name,
690
+ metrics, capture_message_content, disable_metrics, version):
691
+ """
692
+ Common logic for processing audio operations.
693
+ """
694
+
695
+ # Calculate cost
696
+ input_text = scope._kwargs.get("input", "")
697
+ cost = get_audio_model_cost(request_model, pricing_info, input_text)
698
+
699
+ # Common Span Attributes
700
+ common_span_attributes(scope,
701
+ SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
702
+ scope._server_address, scope._server_port, request_model, request_model,
703
+ environment, application_name, False, scope._tbt, scope._ttft, version)
704
+
705
+ # Span Attributes for Request parameters
706
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_VOICE, scope._kwargs.get("voice", "alloy"))
707
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT, scope._kwargs.get("response_format", "mp3"))
708
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_SPEED, scope._kwargs.get("speed", 1.0))
709
+
710
+ # Span Attributes for Cost
711
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
712
+
713
+ # Span Attributes for Content
714
+ if capture_message_content:
715
+ input_text = scope._kwargs.get("input", "")
716
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, input_text)
717
+
718
+ scope._span.set_status(Status(StatusCode.OK))
719
+
720
+ # Record metrics
721
+ if not disable_metrics:
722
+ record_audio_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO, SemanticConvention.GEN_AI_SYSTEM_OPENAI,
723
+ scope._server_address, scope._server_port, request_model, request_model, environment,
724
+ application_name, scope._start_time, scope._end_time, cost)
725
+
726
+ def process_audio_response(response, request_model, pricing_info, server_port, server_address,
727
+ environment, application_name, metrics, start_time, end_time, span, capture_message_content=False,
728
+ disable_metrics=False, version="1.0.0", **kwargs):
729
+ """
730
+ Process audio generation response and generate telemetry.
731
+ """
732
+
733
+ scope = type("GenericScope", (), {})()
734
+
735
+ scope._start_time = start_time
736
+ scope._end_time = end_time
737
+ scope._span = span
738
+ scope._timestamps = []
739
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
740
+ scope._server_address, scope._server_port = server_address, server_port
741
+ scope._kwargs = kwargs
742
+
743
+ common_audio_logic(scope, request_model, pricing_info, environment, application_name,
744
+ metrics, capture_message_content, disable_metrics, version)
745
+
746
+ return response
747
+
748
+ def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
749
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
750
+ disable_metrics=False, version="1.0.0", **kwargs):
751
+ """
752
+ Process embedding response and generate telemetry.
753
+ """
754
+
755
+ scope = type("GenericScope", (), {})()
756
+ response_dict = response_as_dict(response)
757
+
758
+ scope._start_time = start_time
759
+ scope._end_time = time.time()
760
+ scope._span = span
761
+ scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
762
+ scope._timestamps = []
763
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
764
+ scope._server_address, scope._server_port = server_address, server_port
765
+ scope._kwargs = kwargs
766
+
767
+ common_embedding_logic(scope, request_model, pricing_info, environment, application_name,
768
+ metrics, capture_message_content, disable_metrics, version)
769
+
770
+ return response
771
+
772
+ def process_image_response(response, request_model, pricing_info, server_port, server_address,
773
+ environment, application_name, metrics, start_time, end_time, span, capture_message_content=False,
774
+ disable_metrics=False, version="1.0.0", **kwargs):
775
+ """
776
+ Process image generation response and generate telemetry.
777
+ """
778
+
779
+ scope = type("GenericScope", (), {})()
780
+ response_dict = response_as_dict(response)
781
+
782
+ scope._start_time = start_time
783
+ scope._end_time = end_time
784
+ scope._span = span
785
+ scope._timestamps = []
786
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
787
+ scope._server_address, scope._server_port = server_address, server_port
788
+ scope._kwargs = kwargs
789
+ scope._response_dict = response_dict
790
+
791
+ common_image_logic(scope, request_model, pricing_info, environment, application_name,
792
+ metrics, capture_message_content, disable_metrics, version)
793
+
794
+ return response