paid-python 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. paid/_vendor/__init__.py +0 -0
  2. paid/_vendor/opentelemetry/__init__.py +0 -0
  3. paid/_vendor/opentelemetry/instrumentation/__init__.py +0 -0
  4. paid/_vendor/opentelemetry/instrumentation/openai/__init__.py +54 -0
  5. paid/_vendor/opentelemetry/instrumentation/openai/shared/__init__.py +399 -0
  6. paid/_vendor/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1192 -0
  7. paid/_vendor/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +292 -0
  8. paid/_vendor/opentelemetry/instrumentation/openai/shared/config.py +15 -0
  9. paid/_vendor/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +311 -0
  10. paid/_vendor/opentelemetry/instrumentation/openai/shared/event_emitter.py +108 -0
  11. paid/_vendor/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
  12. paid/_vendor/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
  13. paid/_vendor/opentelemetry/instrumentation/openai/shared/span_utils.py +0 -0
  14. paid/_vendor/opentelemetry/instrumentation/openai/utils.py +190 -0
  15. paid/_vendor/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
  16. paid/_vendor/opentelemetry/instrumentation/openai/v1/__init__.py +358 -0
  17. paid/_vendor/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +329 -0
  18. paid/_vendor/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +134 -0
  19. paid/_vendor/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +996 -0
  20. paid/_vendor/opentelemetry/instrumentation/openai/version.py +1 -0
  21. paid/tracing/autoinstrumentation.py +5 -6
  22. paid/tracing/tracing.py +14 -3
  23. {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/METADATA +2 -3
  24. {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/RECORD +26 -6
  25. {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/LICENSE +0 -0
  26. {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1192 @@
1
+ import copy
2
+ import json
3
+ import logging
4
+ import threading
5
+ import time
6
+ from functools import singledispatch
7
+ from typing import List, Optional, Union
8
+
9
+ from opentelemetry import context as context_api
10
+ import pydantic
11
+ from paid._vendor.opentelemetry.instrumentation.openai.shared import (
12
+ OPENAI_LLM_USAGE_TOKEN_TYPES,
13
+ _get_openai_base_url,
14
+ _set_client_attributes,
15
+ _set_functions_attributes,
16
+ _set_request_attributes,
17
+ _set_response_attributes,
18
+ _set_span_attribute,
19
+ _set_span_stream_usage,
20
+ _token_type,
21
+ is_streaming_response,
22
+ metric_shared_attributes,
23
+ model_as_dict,
24
+ propagate_trace_context,
25
+ set_tools_attributes,
26
+ )
27
+ from paid._vendor.opentelemetry.instrumentation.openai.shared.config import Config
28
+ from paid._vendor.opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
29
+ from paid._vendor.opentelemetry.instrumentation.openai.shared.event_models import (
30
+ ChoiceEvent,
31
+ MessageEvent,
32
+ ToolCall,
33
+ )
34
+ from paid._vendor.opentelemetry.instrumentation.openai.utils import (
35
+ _with_chat_telemetry_wrapper,
36
+ dont_throw,
37
+ is_openai_v1,
38
+ run_async,
39
+ should_emit_events,
40
+ should_send_prompts,
41
+ )
42
+ from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
43
+ from opentelemetry.metrics import Counter, Histogram
44
+ from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
45
+ from opentelemetry.semconv._incubating.attributes import (
46
+ gen_ai_attributes as GenAIAttributes,
47
+ )
48
+ from opentelemetry.semconv_ai import (
49
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
50
+ LLMRequestTypeValues,
51
+ SpanAttributes,
52
+ )
53
+ from opentelemetry.trace import SpanKind, Tracer
54
+ from opentelemetry import trace
55
+ from opentelemetry.trace.status import Status, StatusCode
56
+ from wrapt import ObjectProxy
57
+
58
+ SPAN_NAME = "openai.chat"
59
+ PROMPT_FILTER_KEY = "prompt_filter_results"
60
+ CONTENT_FILTER_KEY = "content_filter_results"
61
+
62
+ LLM_REQUEST_TYPE = LLMRequestTypeValues.CHAT
63
+
64
+ logger = logging.getLogger(__name__)
65
+
66
+
67
+ @_with_chat_telemetry_wrapper
68
+ def chat_wrapper(
69
+ tracer: Tracer,
70
+ token_counter: Counter,
71
+ choice_counter: Counter,
72
+ duration_histogram: Histogram,
73
+ exception_counter: Counter,
74
+ streaming_time_to_first_token: Histogram,
75
+ streaming_time_to_generate: Histogram,
76
+ wrapped,
77
+ instance,
78
+ args,
79
+ kwargs,
80
+ ):
81
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
82
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
83
+ ):
84
+ return wrapped(*args, **kwargs)
85
+ # span needs to be opened and closed manually because the response is a generator
86
+
87
+ span = tracer.start_span(
88
+ SPAN_NAME,
89
+ kind=SpanKind.CLIENT,
90
+ attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
91
+ )
92
+
93
+ # Use the span as current context to ensure events get proper trace context
94
+ with trace.use_span(span, end_on_exit=False):
95
+ run_async(_handle_request(span, kwargs, instance))
96
+ try:
97
+ start_time = time.time()
98
+ response = wrapped(*args, **kwargs)
99
+ end_time = time.time()
100
+ except Exception as e: # pylint: disable=broad-except
101
+ end_time = time.time()
102
+ duration = end_time - start_time if "start_time" in locals() else 0
103
+
104
+ attributes = {
105
+ "error.type": e.__class__.__name__,
106
+ }
107
+
108
+ if duration > 0 and duration_histogram:
109
+ duration_histogram.record(duration, attributes=attributes)
110
+ if exception_counter:
111
+ exception_counter.add(1, attributes=attributes)
112
+
113
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
114
+ span.record_exception(e)
115
+ span.set_status(Status(StatusCode.ERROR, str(e)))
116
+ span.end()
117
+
118
+ raise
119
+
120
+ if is_streaming_response(response):
121
+ # span will be closed after the generator is done
122
+ if is_openai_v1():
123
+ return ChatStream(
124
+ span,
125
+ response,
126
+ instance,
127
+ token_counter,
128
+ choice_counter,
129
+ duration_histogram,
130
+ streaming_time_to_first_token,
131
+ streaming_time_to_generate,
132
+ start_time,
133
+ kwargs,
134
+ )
135
+ else:
136
+ return _build_from_streaming_response(
137
+ span,
138
+ response,
139
+ instance,
140
+ token_counter,
141
+ choice_counter,
142
+ duration_histogram,
143
+ streaming_time_to_first_token,
144
+ streaming_time_to_generate,
145
+ start_time,
146
+ kwargs,
147
+ )
148
+
149
+ duration = end_time - start_time
150
+
151
+ _handle_response(
152
+ response,
153
+ span,
154
+ instance,
155
+ token_counter,
156
+ choice_counter,
157
+ duration_histogram,
158
+ duration,
159
+ )
160
+
161
+ span.end()
162
+
163
+ return response
164
+
165
+
166
+ @_with_chat_telemetry_wrapper
167
+ async def achat_wrapper(
168
+ tracer: Tracer,
169
+ token_counter: Counter,
170
+ choice_counter: Counter,
171
+ duration_histogram: Histogram,
172
+ exception_counter: Counter,
173
+ streaming_time_to_first_token: Histogram,
174
+ streaming_time_to_generate: Histogram,
175
+ wrapped,
176
+ instance,
177
+ args,
178
+ kwargs,
179
+ ):
180
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
181
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
182
+ ):
183
+ return await wrapped(*args, **kwargs)
184
+
185
+ span = tracer.start_span(
186
+ SPAN_NAME,
187
+ kind=SpanKind.CLIENT,
188
+ attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
189
+ )
190
+
191
+ # Use the span as current context to ensure events get proper trace context
192
+ with trace.use_span(span, end_on_exit=False):
193
+ await _handle_request(span, kwargs, instance)
194
+
195
+ try:
196
+ start_time = time.time()
197
+ response = await wrapped(*args, **kwargs)
198
+ end_time = time.time()
199
+ except Exception as e: # pylint: disable=broad-except
200
+ end_time = time.time()
201
+ duration = end_time - start_time if "start_time" in locals() else 0
202
+
203
+ common_attributes = Config.get_common_metrics_attributes()
204
+ attributes = {
205
+ **common_attributes,
206
+ "error.type": e.__class__.__name__,
207
+ }
208
+
209
+ if duration > 0 and duration_histogram:
210
+ duration_histogram.record(duration, attributes=attributes)
211
+ if exception_counter:
212
+ exception_counter.add(1, attributes=attributes)
213
+
214
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
215
+ span.record_exception(e)
216
+ span.set_status(Status(StatusCode.ERROR, str(e)))
217
+ span.end()
218
+
219
+ raise
220
+
221
+ if is_streaming_response(response):
222
+ # span will be closed after the generator is done
223
+ if is_openai_v1():
224
+ return ChatStream(
225
+ span,
226
+ response,
227
+ instance,
228
+ token_counter,
229
+ choice_counter,
230
+ duration_histogram,
231
+ streaming_time_to_first_token,
232
+ streaming_time_to_generate,
233
+ start_time,
234
+ kwargs,
235
+ )
236
+ else:
237
+ return _abuild_from_streaming_response(
238
+ span,
239
+ response,
240
+ instance,
241
+ token_counter,
242
+ choice_counter,
243
+ duration_histogram,
244
+ streaming_time_to_first_token,
245
+ streaming_time_to_generate,
246
+ start_time,
247
+ kwargs,
248
+ )
249
+
250
+ duration = end_time - start_time
251
+
252
+ _handle_response(
253
+ response,
254
+ span,
255
+ instance,
256
+ token_counter,
257
+ choice_counter,
258
+ duration_histogram,
259
+ duration,
260
+ )
261
+
262
+ span.end()
263
+
264
+ return response
265
+
266
+
267
+ @dont_throw
268
+ async def _handle_request(span, kwargs, instance):
269
+ _set_request_attributes(span, kwargs, instance)
270
+ _set_client_attributes(span, instance)
271
+ if should_emit_events():
272
+ for message in kwargs.get("messages", []):
273
+ emit_event(
274
+ MessageEvent(
275
+ content=message.get("content"),
276
+ role=message.get("role"),
277
+ tool_calls=_parse_tool_calls(
278
+ message.get("tool_calls", None)),
279
+ )
280
+ )
281
+ else:
282
+ if should_send_prompts():
283
+ await _set_prompts(span, kwargs.get("messages"))
284
+ if kwargs.get("functions"):
285
+ _set_functions_attributes(span, kwargs.get("functions"))
286
+ elif kwargs.get("tools"):
287
+ set_tools_attributes(span, kwargs.get("tools"))
288
+ if Config.enable_trace_context_propagation:
289
+ propagate_trace_context(span, kwargs)
290
+
291
+ # Reasoning request attributes
292
+ reasoning_effort = kwargs.get("reasoning_effort")
293
+ _set_span_attribute(
294
+ span,
295
+ SpanAttributes.LLM_REQUEST_REASONING_EFFORT,
296
+ reasoning_effort or ()
297
+ )
298
+
299
+
300
+ @dont_throw
301
+ def _handle_response(
302
+ response,
303
+ span,
304
+ instance=None,
305
+ token_counter=None,
306
+ choice_counter=None,
307
+ duration_histogram=None,
308
+ duration=None,
309
+ is_streaming: bool = False,
310
+ ):
311
+ if is_openai_v1():
312
+ response_dict = model_as_dict(response)
313
+ else:
314
+ response_dict = response
315
+
316
+ # metrics record
317
+ _set_chat_metrics(
318
+ instance,
319
+ token_counter,
320
+ choice_counter,
321
+ duration_histogram,
322
+ response_dict,
323
+ duration,
324
+ is_streaming,
325
+ )
326
+
327
+ # span attributes
328
+ _set_response_attributes(span, response_dict)
329
+
330
+ # Reasoning usage attributes
331
+ usage = response_dict.get("usage")
332
+ reasoning_tokens = None
333
+ if usage:
334
+ # Support both dict-style and object-style `usage`
335
+ tokens_details = (
336
+ usage.get("completion_tokens_details") if isinstance(usage, dict)
337
+ else getattr(usage, "completion_tokens_details", None)
338
+ )
339
+
340
+ if tokens_details:
341
+ reasoning_tokens = (
342
+ tokens_details.get("reasoning_tokens", None) if isinstance(tokens_details, dict)
343
+ else getattr(tokens_details, "reasoning_tokens", None)
344
+ )
345
+
346
+ _set_span_attribute(
347
+ span,
348
+ SpanAttributes.LLM_USAGE_REASONING_TOKENS,
349
+ reasoning_tokens or 0,
350
+ )
351
+
352
+ if should_emit_events():
353
+ if response.choices is not None:
354
+ for choice in response.choices:
355
+ emit_event(_parse_choice_event(choice))
356
+ else:
357
+ if should_send_prompts():
358
+ _set_completions(span, response_dict.get("choices"))
359
+
360
+ return response
361
+
362
+
363
+ def _set_chat_metrics(
364
+ instance,
365
+ token_counter,
366
+ choice_counter,
367
+ duration_histogram,
368
+ response_dict,
369
+ duration,
370
+ is_streaming: bool = False,
371
+ ):
372
+ shared_attributes = metric_shared_attributes(
373
+ response_model=response_dict.get("model") or None,
374
+ operation="chat",
375
+ server_address=_get_openai_base_url(instance),
376
+ is_streaming=is_streaming,
377
+ )
378
+
379
+ # token metrics
380
+ usage = response_dict.get("usage") # type: dict
381
+ if usage and token_counter:
382
+ _set_token_counter_metrics(token_counter, usage, shared_attributes)
383
+
384
+ # choices metrics
385
+ choices = response_dict.get("choices")
386
+ if choices and choice_counter:
387
+ _set_choice_counter_metrics(choice_counter, choices, shared_attributes)
388
+
389
+ # duration metrics
390
+ if duration and isinstance(duration, (float, int)) and duration_histogram:
391
+ duration_histogram.record(duration, attributes=shared_attributes)
392
+
393
+
394
+ def _set_choice_counter_metrics(choice_counter, choices, shared_attributes):
395
+ choice_counter.add(len(choices), attributes=shared_attributes)
396
+ for choice in choices:
397
+ attributes_with_reason = {**shared_attributes}
398
+ if choice.get("finish_reason"):
399
+ attributes_with_reason[SpanAttributes.LLM_RESPONSE_FINISH_REASON] = (
400
+ choice.get("finish_reason")
401
+ )
402
+ choice_counter.add(1, attributes=attributes_with_reason)
403
+
404
+
405
+ def _set_token_counter_metrics(token_counter, usage, shared_attributes):
406
+ for name, val in usage.items():
407
+ if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
408
+ attributes_with_token_type = {
409
+ **shared_attributes,
410
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: _token_type(name),
411
+ }
412
+ token_counter.record(val, attributes=attributes_with_token_type)
413
+
414
+
415
+ def _is_base64_image(item):
416
+ if not isinstance(item, dict):
417
+ return False
418
+
419
+ if not isinstance(item.get("image_url"), dict):
420
+ return False
421
+
422
+ if "data:image/" not in item.get("image_url", {}).get("url", ""):
423
+ return False
424
+
425
+ return True
426
+
427
+
428
+ async def _process_image_item(item, trace_id, span_id, message_index, content_index):
429
+ if not Config.upload_base64_image:
430
+ return item
431
+
432
+ image_format = item["image_url"]["url"].split(";")[0].split("/")[1]
433
+ image_name = f"message_{message_index}_content_{content_index}.{image_format}"
434
+ base64_string = item["image_url"]["url"].split(",")[1]
435
+ # Convert trace_id and span_id to strings as expected by upload function
436
+ url = await Config.upload_base64_image(str(trace_id), str(span_id), image_name, base64_string)
437
+
438
+ return {"type": "image_url", "image_url": {"url": url}}
439
+
440
+
441
+ @dont_throw
442
+ async def _set_prompts(span, messages):
443
+ if not span.is_recording() or messages is None:
444
+ return
445
+
446
+ for i, msg in enumerate(messages):
447
+ prefix = f"{GenAIAttributes.GEN_AI_PROMPT}.{i}"
448
+ msg = msg if isinstance(msg, dict) else model_as_dict(msg)
449
+
450
+ _set_span_attribute(span, f"{prefix}.role", msg.get("role"))
451
+ if msg.get("content"):
452
+ content = copy.deepcopy(msg.get("content"))
453
+ if isinstance(content, list):
454
+ content = [
455
+ (
456
+ await _process_image_item(
457
+ item, span.context.trace_id, span.context.span_id, i, j
458
+ )
459
+ if _is_base64_image(item)
460
+ else item
461
+ )
462
+ for j, item in enumerate(content)
463
+ ]
464
+
465
+ content = json.dumps(content)
466
+ _set_span_attribute(span, f"{prefix}.content", content)
467
+ if msg.get("tool_call_id"):
468
+ _set_span_attribute(
469
+ span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
470
+ tool_calls = msg.get("tool_calls")
471
+ if tool_calls:
472
+ for i, tool_call in enumerate(tool_calls):
473
+ if is_openai_v1():
474
+ tool_call = model_as_dict(tool_call)
475
+
476
+ function = tool_call.get("function")
477
+ _set_span_attribute(
478
+ span,
479
+ f"{prefix}.tool_calls.{i}.id",
480
+ tool_call.get("id"),
481
+ )
482
+ _set_span_attribute(
483
+ span,
484
+ f"{prefix}.tool_calls.{i}.name",
485
+ function.get("name"),
486
+ )
487
+ _set_span_attribute(
488
+ span,
489
+ f"{prefix}.tool_calls.{i}.arguments",
490
+ function.get("arguments"),
491
+ )
492
+
493
+
494
+ def _set_completions(span, choices):
495
+ if choices is None:
496
+ return
497
+
498
+ for choice in choices:
499
+ index = choice.get("index")
500
+ prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{index}"
501
+ _set_span_attribute(
502
+ span, f"{prefix}.finish_reason", choice.get("finish_reason")
503
+ )
504
+
505
+ if choice.get("content_filter_results"):
506
+ _set_span_attribute(
507
+ span,
508
+ f"{prefix}.{CONTENT_FILTER_KEY}",
509
+ json.dumps(choice.get("content_filter_results")),
510
+ )
511
+
512
+ if choice.get("finish_reason") == "content_filter":
513
+ _set_span_attribute(span, f"{prefix}.role", "assistant")
514
+ _set_span_attribute(span, f"{prefix}.content", "FILTERED")
515
+
516
+ return
517
+
518
+ message = choice.get("message")
519
+ if not message:
520
+ return
521
+
522
+ _set_span_attribute(span, f"{prefix}.role", message.get("role"))
523
+
524
+ if message.get("refusal"):
525
+ _set_span_attribute(
526
+ span, f"{prefix}.refusal", message.get("refusal"))
527
+ else:
528
+ _set_span_attribute(
529
+ span, f"{prefix}.content", message.get("content"))
530
+
531
+ function_call = message.get("function_call")
532
+ if function_call:
533
+ _set_span_attribute(
534
+ span, f"{prefix}.tool_calls.0.name", function_call.get("name")
535
+ )
536
+ _set_span_attribute(
537
+ span,
538
+ f"{prefix}.tool_calls.0.arguments",
539
+ function_call.get("arguments"),
540
+ )
541
+
542
+ tool_calls = message.get("tool_calls")
543
+ if tool_calls:
544
+ for i, tool_call in enumerate(tool_calls):
545
+ function = tool_call.get("function")
546
+ _set_span_attribute(
547
+ span,
548
+ f"{prefix}.tool_calls.{i}.id",
549
+ tool_call.get("id"),
550
+ )
551
+ _set_span_attribute(
552
+ span,
553
+ f"{prefix}.tool_calls.{i}.name",
554
+ function.get("name"),
555
+ )
556
+ _set_span_attribute(
557
+ span,
558
+ f"{prefix}.tool_calls.{i}.arguments",
559
+ function.get("arguments"),
560
+ )
561
+
562
+
563
+ @dont_throw
564
+ def _set_streaming_token_metrics(
565
+ request_kwargs, complete_response, span, token_counter, shared_attributes
566
+ ):
567
+ prompt_usage = -1
568
+ completion_usage = -1
569
+
570
+ # Use token usage from API response only
571
+ if complete_response.get("usage"):
572
+ usage = complete_response["usage"]
573
+ if usage.get("prompt_tokens"):
574
+ prompt_usage = usage["prompt_tokens"]
575
+ if usage.get("completion_tokens"):
576
+ completion_usage = usage["completion_tokens"]
577
+
578
+ # span record
579
+ _set_span_stream_usage(span, prompt_usage, completion_usage)
580
+
581
+ # metrics record
582
+ if token_counter:
583
+ if isinstance(prompt_usage, int) and prompt_usage >= 0:
584
+ attributes_with_token_type = {
585
+ **shared_attributes,
586
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: "input",
587
+ }
588
+ token_counter.record(
589
+ prompt_usage, attributes=attributes_with_token_type)
590
+
591
+ if isinstance(completion_usage, int) and completion_usage >= 0:
592
+ attributes_with_token_type = {
593
+ **shared_attributes,
594
+ GenAIAttributes.GEN_AI_TOKEN_TYPE: "output",
595
+ }
596
+ token_counter.record(
597
+ completion_usage, attributes=attributes_with_token_type
598
+ )
599
+
600
+
601
+ class ChatStream(ObjectProxy):
602
+ _span = None
603
+ _instance = None
604
+ _token_counter = None
605
+ _choice_counter = None
606
+ _duration_histogram = None
607
+ _streaming_time_to_first_token = None
608
+ _streaming_time_to_generate = None
609
+ _start_time = None
610
+ _request_kwargs = None
611
+
612
+ def __init__(
613
+ self,
614
+ span,
615
+ response,
616
+ instance=None,
617
+ token_counter=None,
618
+ choice_counter=None,
619
+ duration_histogram=None,
620
+ streaming_time_to_first_token=None,
621
+ streaming_time_to_generate=None,
622
+ start_time=None,
623
+ request_kwargs=None,
624
+ ):
625
+ super().__init__(response)
626
+
627
+ self._span = span
628
+ self._instance = instance
629
+ self._token_counter = token_counter
630
+ self._choice_counter = choice_counter
631
+ self._duration_histogram = duration_histogram
632
+ self._streaming_time_to_first_token = streaming_time_to_first_token
633
+ self._streaming_time_to_generate = streaming_time_to_generate
634
+ self._start_time = start_time
635
+ self._request_kwargs = request_kwargs
636
+
637
+ self._first_token = True
638
+ # will be updated when first token is received
639
+ self._time_of_first_token = self._start_time
640
+ self._complete_response = {"choices": [], "model": ""}
641
+
642
+ # Cleanup state tracking to prevent duplicate operations
643
+ self._cleanup_completed = False
644
+ self._cleanup_lock = threading.Lock()
645
+
646
+ def __del__(self):
647
+ """Cleanup when object is garbage collected"""
648
+ if hasattr(self, '_cleanup_completed') and not self._cleanup_completed:
649
+ self._ensure_cleanup()
650
+
651
+ def __enter__(self):
652
+ return self
653
+
654
+ def __exit__(self, exc_type, exc_val, exc_tb):
655
+ cleanup_exception = None
656
+ try:
657
+ self._ensure_cleanup()
658
+ except Exception as e:
659
+ cleanup_exception = e
660
+ # Don't re-raise to avoid masking original exception
661
+
662
+ result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
663
+
664
+ if cleanup_exception:
665
+ # Log cleanup exception but don't affect context manager behavior
666
+ logger.debug(
667
+ "Error during ChatStream cleanup in __exit__: %s", cleanup_exception)
668
+
669
+ return result
670
+
671
+ async def __aenter__(self):
672
+ return self
673
+
674
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
675
+ await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb)
676
+
677
+ def __iter__(self):
678
+ return self
679
+
680
+ def __aiter__(self):
681
+ return self
682
+
683
+ def __next__(self):
684
+ try:
685
+ chunk = self.__wrapped__.__next__()
686
+ except Exception as e:
687
+ if isinstance(e, StopIteration):
688
+ self._process_complete_response()
689
+ else:
690
+ # Handle cleanup for other exceptions during stream iteration
691
+ self._ensure_cleanup()
692
+ if self._span and self._span.is_recording():
693
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
694
+ raise
695
+ else:
696
+ self._process_item(chunk)
697
+ return chunk
698
+
699
+ async def __anext__(self):
700
+ try:
701
+ chunk = await self.__wrapped__.__anext__()
702
+ except Exception as e:
703
+ if isinstance(e, StopAsyncIteration):
704
+ self._process_complete_response()
705
+ else:
706
+ # Handle cleanup for other exceptions during stream iteration
707
+ self._ensure_cleanup()
708
+ if self._span and self._span.is_recording():
709
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
710
+ raise
711
+ else:
712
+ self._process_item(chunk)
713
+ return chunk
714
+
715
+ def _process_item(self, item):
716
+ self._span.add_event(
717
+ name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
718
+
719
+ if self._first_token and self._streaming_time_to_first_token:
720
+ self._time_of_first_token = time.time()
721
+ self._streaming_time_to_first_token.record(
722
+ self._time_of_first_token - self._start_time,
723
+ attributes=self._shared_attributes(),
724
+ )
725
+ self._first_token = False
726
+
727
+ _accumulate_stream_items(item, self._complete_response)
728
+
729
+ def _shared_attributes(self):
730
+ return metric_shared_attributes(
731
+ response_model=self._complete_response.get("model")
732
+ or self._request_kwargs.get("model")
733
+ or None,
734
+ operation="chat",
735
+ server_address=_get_openai_base_url(self._instance),
736
+ is_streaming=True,
737
+ )
738
+
739
+ @dont_throw
740
+ def _process_complete_response(self):
741
+ _set_streaming_token_metrics(
742
+ self._request_kwargs,
743
+ self._complete_response,
744
+ self._span,
745
+ self._token_counter,
746
+ self._shared_attributes(),
747
+ )
748
+
749
+ # choice metrics
750
+ if self._choice_counter and self._complete_response.get("choices"):
751
+ _set_choice_counter_metrics(
752
+ self._choice_counter,
753
+ self._complete_response.get("choices"),
754
+ self._shared_attributes(),
755
+ )
756
+
757
+ # duration metrics
758
+ if self._start_time and isinstance(self._start_time, (float, int)):
759
+ duration = time.time() - self._start_time
760
+ else:
761
+ duration = None
762
+ if duration and isinstance(duration, (float, int)) and self._duration_histogram:
763
+ self._duration_histogram.record(
764
+ duration, attributes=self._shared_attributes()
765
+ )
766
+ if self._streaming_time_to_generate and self._time_of_first_token:
767
+ self._streaming_time_to_generate.record(
768
+ time.time() - self._time_of_first_token,
769
+ attributes=self._shared_attributes(),
770
+ )
771
+
772
+ _set_response_attributes(self._span, self._complete_response)
773
+ if should_emit_events():
774
+ for choice in self._complete_response.get("choices", []):
775
+ emit_event(_parse_choice_event(choice))
776
+ else:
777
+ if should_send_prompts():
778
+ _set_completions(
779
+ self._span, self._complete_response.get("choices"))
780
+
781
+ self._span.set_status(Status(StatusCode.OK))
782
+ self._span.end()
783
+ self._cleanup_completed = True
784
+
785
+ @dont_throw
786
+ def _ensure_cleanup(self):
787
+ """Thread-safe cleanup method that handles different cleanup scenarios"""
788
+ with self._cleanup_lock:
789
+ if self._cleanup_completed:
790
+ logger.debug("ChatStream cleanup already completed, skipping")
791
+ return
792
+
793
+ try:
794
+ logger.debug("Starting ChatStream cleanup")
795
+
796
+ # Calculate partial metrics based on available data
797
+ self._record_partial_metrics()
798
+
799
+ # Set span status and close it
800
+ if self._span and self._span.is_recording():
801
+ self._span.set_status(Status(StatusCode.OK))
802
+ self._span.end()
803
+ logger.debug("ChatStream span closed successfully")
804
+
805
+ self._cleanup_completed = True
806
+ logger.debug("ChatStream cleanup completed successfully")
807
+
808
+ except Exception as e:
809
+ # Log cleanup errors but don't propagate to avoid masking original issues
810
+ logger.debug("Error during ChatStream cleanup: %s", str(e))
811
+
812
+ # Still try to close the span even if metrics recording failed
813
+ try:
814
+ if self._span and self._span.is_recording():
815
+ self._span.set_status(
816
+ Status(StatusCode.ERROR, "Cleanup failed"))
817
+ self._span.end()
818
+ self._cleanup_completed = True
819
+ except Exception:
820
+ # Final fallback - just mark as completed to prevent infinite loops
821
+ self._cleanup_completed = True
822
+
823
+ @dont_throw
824
+ def _record_partial_metrics(self):
825
+ """Record metrics based on available partial data"""
826
+ # Always record duration if we have start time
827
+ if self._start_time and isinstance(self._start_time, (float, int)) and self._duration_histogram:
828
+ duration = time.time() - self._start_time
829
+ self._duration_histogram.record(
830
+ duration, attributes=self._shared_attributes()
831
+ )
832
+
833
+ # Record basic span attributes even without complete response
834
+ if self._span and self._span.is_recording():
835
+ _set_response_attributes(self._span, self._complete_response)
836
+
837
+ # Record partial token metrics if we have any data
838
+ if self._complete_response.get("choices") or self._request_kwargs:
839
+ _set_streaming_token_metrics(
840
+ self._request_kwargs,
841
+ self._complete_response,
842
+ self._span,
843
+ self._token_counter,
844
+ self._shared_attributes(),
845
+ )
846
+
847
+ # Record choice metrics if we have any choices processed
848
+ if self._choice_counter and self._complete_response.get("choices"):
849
+ _set_choice_counter_metrics(
850
+ self._choice_counter,
851
+ self._complete_response.get("choices"),
852
+ self._shared_attributes(),
853
+ )
854
+
855
+
856
+ # Backward compatibility with OpenAI v0
857
+
858
+
859
+ @dont_throw
860
+ def _build_from_streaming_response(
861
+ span,
862
+ response,
863
+ instance=None,
864
+ token_counter=None,
865
+ choice_counter=None,
866
+ duration_histogram=None,
867
+ streaming_time_to_first_token=None,
868
+ streaming_time_to_generate=None,
869
+ start_time=None,
870
+ request_kwargs=None,
871
+ ):
872
+ complete_response = {"choices": [], "model": "", "id": ""}
873
+
874
+ first_token = True
875
+ time_of_first_token = start_time # will be updated when first token is received
876
+
877
+ for item in response:
878
+ span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
879
+
880
+ item_to_yield = item
881
+
882
+ if first_token and streaming_time_to_first_token:
883
+ time_of_first_token = time.time()
884
+ streaming_time_to_first_token.record(
885
+ time_of_first_token - start_time)
886
+ first_token = False
887
+
888
+ _accumulate_stream_items(item, complete_response)
889
+
890
+ yield item_to_yield
891
+
892
+ shared_attributes = {
893
+ GenAIAttributes.GEN_AI_RESPONSE_MODEL: complete_response.get("model") or None,
894
+ "server.address": _get_openai_base_url(instance),
895
+ "stream": True,
896
+ }
897
+
898
+ _set_streaming_token_metrics(
899
+ request_kwargs, complete_response, span, token_counter, shared_attributes
900
+ )
901
+
902
+ # choice metrics
903
+ if choice_counter and complete_response.get("choices"):
904
+ _set_choice_counter_metrics(
905
+ choice_counter, complete_response.get("choices"), shared_attributes
906
+ )
907
+
908
+ # duration metrics
909
+ if start_time and isinstance(start_time, (float, int)):
910
+ duration = time.time() - start_time
911
+ else:
912
+ duration = None
913
+ if duration and isinstance(duration, (float, int)) and duration_histogram:
914
+ duration_histogram.record(duration, attributes=shared_attributes)
915
+ if streaming_time_to_generate and time_of_first_token:
916
+ streaming_time_to_generate.record(time.time() - time_of_first_token)
917
+
918
+ _set_response_attributes(span, complete_response)
919
+ if should_emit_events():
920
+ for choice in complete_response.get("choices", []):
921
+ emit_event(_parse_choice_event(choice))
922
+ else:
923
+ if should_send_prompts():
924
+ _set_completions(span, complete_response.get("choices"))
925
+
926
+ span.set_status(Status(StatusCode.OK))
927
+ span.end()
928
+
929
+
930
+ @dont_throw
931
+ async def _abuild_from_streaming_response(
932
+ span,
933
+ response,
934
+ instance=None,
935
+ token_counter=None,
936
+ choice_counter=None,
937
+ duration_histogram=None,
938
+ streaming_time_to_first_token=None,
939
+ streaming_time_to_generate=None,
940
+ start_time=None,
941
+ request_kwargs=None,
942
+ ):
943
+ complete_response = {"choices": [], "model": "", "id": ""}
944
+
945
+ first_token = True
946
+ time_of_first_token = start_time # will be updated when first token is received
947
+
948
+ async for item in response:
949
+ span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
950
+
951
+ item_to_yield = item
952
+
953
+ if first_token and streaming_time_to_first_token:
954
+ time_of_first_token = time.time()
955
+ streaming_time_to_first_token.record(
956
+ time_of_first_token - start_time)
957
+ first_token = False
958
+
959
+ _accumulate_stream_items(item, complete_response)
960
+
961
+ yield item_to_yield
962
+
963
+ shared_attributes = {
964
+ GenAIAttributes.GEN_AI_RESPONSE_MODEL: complete_response.get("model") or None,
965
+ "server.address": _get_openai_base_url(instance),
966
+ "stream": True,
967
+ }
968
+
969
+ _set_streaming_token_metrics(
970
+ request_kwargs, complete_response, span, token_counter, shared_attributes
971
+ )
972
+
973
+ # choice metrics
974
+ if choice_counter and complete_response.get("choices"):
975
+ _set_choice_counter_metrics(
976
+ choice_counter, complete_response.get("choices"), shared_attributes
977
+ )
978
+
979
+ # duration metrics
980
+ if start_time and isinstance(start_time, (float, int)):
981
+ duration = time.time() - start_time
982
+ else:
983
+ duration = None
984
+ if duration and isinstance(duration, (float, int)) and duration_histogram:
985
+ duration_histogram.record(duration, attributes=shared_attributes)
986
+ if streaming_time_to_generate and time_of_first_token:
987
+ streaming_time_to_generate.record(time.time() - time_of_first_token)
988
+
989
+ _set_response_attributes(span, complete_response)
990
+ if should_emit_events():
991
+ for choice in complete_response.get("choices", []):
992
+ emit_event(_parse_choice_event(choice))
993
+ else:
994
+ if should_send_prompts():
995
+ _set_completions(span, complete_response.get("choices"))
996
+
997
+ span.set_status(Status(StatusCode.OK))
998
+ span.end()
999
+
1000
+
1001
+ # pydantic.BaseModel here is ChatCompletionMessageFunctionToolCall (as of openai 1.99.7)
1002
+ # but we keep to a parent type to support older versions
1003
+ def _parse_tool_calls(
1004
+ tool_calls: Optional[List[Union[dict, pydantic.BaseModel]]],
1005
+ ) -> Union[List[ToolCall], None]:
1006
+ """
1007
+ Util to correctly parse the tool calls data from the OpenAI API to this module's
1008
+ standard `ToolCall`.
1009
+ """
1010
+ if tool_calls is None:
1011
+ return tool_calls
1012
+
1013
+ result = []
1014
+
1015
+ for tool_call in tool_calls:
1016
+ tool_call_data = None
1017
+
1018
+ if isinstance(tool_call, dict):
1019
+ tool_call_data = copy.deepcopy(tool_call)
1020
+ elif _is_chat_message_function_tool_call(tool_call):
1021
+ tool_call_data = tool_call.model_dump()
1022
+ elif _is_function_call(tool_call):
1023
+ function_call = tool_call.model_dump()
1024
+ tool_call_data = ToolCall(
1025
+ id="",
1026
+ function={
1027
+ "name": function_call.get("name"),
1028
+ "arguments": function_call.get("arguments"),
1029
+ },
1030
+ type="function",
1031
+ )
1032
+
1033
+ result.append(tool_call_data)
1034
+ return result
1035
+
1036
+
1037
+ def _is_chat_message_function_tool_call(model: Union[dict, pydantic.BaseModel]) -> bool:
1038
+ try:
1039
+ from openai.types.chat.chat_completion_message_function_tool_call import (
1040
+ ChatCompletionMessageFunctionToolCall,
1041
+ )
1042
+
1043
+ return isinstance(model, ChatCompletionMessageFunctionToolCall)
1044
+ except Exception:
1045
+ try:
1046
+ # Since OpenAI 1.99.3, ChatCompletionMessageToolCall is a Union,
1047
+ # and the isinstance check will fail. This is fine, because in all
1048
+ # those versions, the check above will succeed.
1049
+ from openai.types.chat.chat_completion_message_tool_call import (
1050
+ ChatCompletionMessageToolCall,
1051
+ )
1052
+ return isinstance(model, ChatCompletionMessageToolCall)
1053
+ except Exception:
1054
+ return False
1055
+
1056
+
1057
+ def _is_function_call(model: Union[dict, pydantic.BaseModel]) -> bool:
1058
+ try:
1059
+ from openai.types.chat.chat_completion_message import FunctionCall
1060
+ return isinstance(model, FunctionCall)
1061
+ except Exception:
1062
+ return False
1063
+
1064
+
1065
+ @singledispatch
1066
+ def _parse_choice_event(choice) -> ChoiceEvent:
1067
+ has_message = choice.message is not None
1068
+ has_finish_reason = choice.finish_reason is not None
1069
+ has_tool_calls = has_message and choice.message.tool_calls
1070
+ has_function_call = has_message and choice.message.function_call
1071
+
1072
+ content = choice.message.content if has_message else None
1073
+ role = choice.message.role if has_message else "unknown"
1074
+ finish_reason = choice.finish_reason if has_finish_reason else "unknown"
1075
+
1076
+ if has_tool_calls and has_function_call:
1077
+ tool_calls = choice.message.tool_calls + [choice.message.function_call]
1078
+ elif has_tool_calls:
1079
+ tool_calls = choice.message.tool_calls
1080
+ elif has_function_call:
1081
+ tool_calls = [choice.message.function_call]
1082
+ else:
1083
+ tool_calls = None
1084
+
1085
+ return ChoiceEvent(
1086
+ index=choice.index,
1087
+ message={"content": content, "role": role},
1088
+ finish_reason=finish_reason,
1089
+ tool_calls=_parse_tool_calls(tool_calls),
1090
+ )
1091
+
1092
+
1093
+ @_parse_choice_event.register
1094
+ def _(choice: dict) -> ChoiceEvent:
1095
+ message = choice.get("message")
1096
+ has_message = message is not None
1097
+ has_finish_reason = choice.get("finish_reason") is not None
1098
+ has_tool_calls = has_message and message.get("tool_calls")
1099
+ has_function_call = has_message and message.get("function_call")
1100
+
1101
+ content = choice.get("message").get("content", "") if has_message else None
1102
+ role = choice.get("message").get("role") if has_message else "unknown"
1103
+ finish_reason = choice.get(
1104
+ "finish_reason") if has_finish_reason else "unknown"
1105
+
1106
+ if has_tool_calls and has_function_call:
1107
+ tool_calls = message.get("tool_calls") + [message.get("function_call")]
1108
+ elif has_tool_calls:
1109
+ tool_calls = message.get("tool_calls")
1110
+ elif has_function_call:
1111
+ tool_calls = [message.get("function_call")]
1112
+ else:
1113
+ tool_calls = None
1114
+
1115
+ if tool_calls is not None:
1116
+ for tool_call in tool_calls:
1117
+ tool_call["type"] = "function"
1118
+
1119
+ return ChoiceEvent(
1120
+ index=choice.get("index"),
1121
+ message={"content": content, "role": role},
1122
+ finish_reason=finish_reason,
1123
+ tool_calls=tool_calls,
1124
+ )
1125
+
1126
+
1127
+ def _accumulate_stream_items(item, complete_response):
1128
+ if is_openai_v1():
1129
+ item = model_as_dict(item)
1130
+
1131
+ complete_response["model"] = item.get("model")
1132
+ complete_response["id"] = item.get("id")
1133
+
1134
+ # capture usage information from the last stream chunks
1135
+ if item.get("usage"):
1136
+ complete_response["usage"] = item.get("usage")
1137
+ elif item.get("choices") and item["choices"][0].get("usage"):
1138
+ # Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
1139
+ complete_response["usage"] = item["choices"][0].get("usage")
1140
+
1141
+ # prompt filter results
1142
+ if item.get("prompt_filter_results"):
1143
+ complete_response["prompt_filter_results"] = item.get(
1144
+ "prompt_filter_results")
1145
+
1146
+ for choice in item.get("choices"):
1147
+ index = choice.get("index")
1148
+ if len(complete_response.get("choices")) <= index:
1149
+ complete_response["choices"].append(
1150
+ {"index": index, "message": {"content": "", "role": ""}}
1151
+ )
1152
+ complete_choice = complete_response.get("choices")[index]
1153
+ if choice.get("finish_reason"):
1154
+ complete_choice["finish_reason"] = choice.get("finish_reason")
1155
+ if choice.get("content_filter_results"):
1156
+ complete_choice["content_filter_results"] = choice.get(
1157
+ "content_filter_results"
1158
+ )
1159
+
1160
+ delta = choice.get("delta")
1161
+
1162
+ if delta and delta.get("content"):
1163
+ complete_choice["message"]["content"] += delta.get("content")
1164
+
1165
+ if delta and delta.get("role"):
1166
+ complete_choice["message"]["role"] = delta.get("role")
1167
+ if delta and delta.get("tool_calls"):
1168
+ tool_calls = delta.get("tool_calls")
1169
+ if not isinstance(tool_calls, list) or len(tool_calls) == 0:
1170
+ continue
1171
+
1172
+ if not complete_choice["message"].get("tool_calls"):
1173
+ complete_choice["message"]["tool_calls"] = []
1174
+
1175
+ for tool_call in tool_calls:
1176
+ i = int(tool_call["index"])
1177
+ if len(complete_choice["message"]["tool_calls"]) <= i:
1178
+ complete_choice["message"]["tool_calls"].append(
1179
+ {"id": "", "function": {"name": "", "arguments": ""}}
1180
+ )
1181
+
1182
+ span_tool_call = complete_choice["message"]["tool_calls"][i]
1183
+ span_function = span_tool_call["function"]
1184
+ tool_call_function = tool_call.get("function")
1185
+
1186
+ if tool_call.get("id"):
1187
+ span_tool_call["id"] = tool_call.get("id")
1188
+ if tool_call_function and tool_call_function.get("name"):
1189
+ span_function["name"] = tool_call_function.get("name")
1190
+ if tool_call_function and tool_call_function.get("arguments"):
1191
+ span_function["arguments"] += tool_call_function.get(
1192
+ "arguments")