lmnr 0.6.18__py3-none-any.whl → 0.6.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +55 -20
  2. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +23 -0
  3. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
  4. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +442 -0
  5. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1024 -0
  6. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +297 -0
  7. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
  8. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +308 -0
  9. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
  10. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
  11. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
  12. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +185 -0
  13. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
  14. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +358 -0
  15. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +319 -0
  16. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +132 -0
  17. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +626 -0
  18. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
  19. lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +1 -3
  20. lmnr/sdk/browser/browser_use_otel.py +1 -1
  21. lmnr/sdk/browser/patchright_otel.py +0 -14
  22. lmnr/sdk/browser/playwright_otel.py +16 -130
  23. lmnr/sdk/browser/pw_utils.py +45 -31
  24. lmnr/version.py +1 -1
  25. {lmnr-0.6.18.dist-info → lmnr-0.6.19.dist-info}/METADATA +2 -5
  26. {lmnr-0.6.18.dist-info → lmnr-0.6.19.dist-info}/RECORD +28 -11
  27. {lmnr-0.6.18.dist-info → lmnr-0.6.19.dist-info}/WHEEL +1 -1
  28. {lmnr-0.6.18.dist-info → lmnr-0.6.19.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1024 @@
1
+ import copy
2
+ import json
3
+ import logging
4
+ import time
5
+ from functools import singledispatch
6
+ from typing import List, Optional, Union
7
+
8
+ from opentelemetry import context as context_api
9
+ from ..shared import (
10
+ OPENAI_LLM_USAGE_TOKEN_TYPES,
11
+ _get_openai_base_url,
12
+ _set_client_attributes,
13
+ _set_functions_attributes,
14
+ _set_request_attributes,
15
+ _set_response_attributes,
16
+ _set_span_attribute,
17
+ _set_span_stream_usage,
18
+ _token_type,
19
+ get_token_count_from_string,
20
+ is_streaming_response,
21
+ metric_shared_attributes,
22
+ model_as_dict,
23
+ propagate_trace_context,
24
+ set_tools_attributes,
25
+ should_record_stream_token_usage,
26
+ )
27
+ from ..shared.config import Config
28
+ from ..shared.event_emitter import emit_event
29
+ from ..shared.event_models import (
30
+ ChoiceEvent,
31
+ MessageEvent,
32
+ ToolCall,
33
+ )
34
+ from ..utils import (
35
+ _with_chat_telemetry_wrapper,
36
+ dont_throw,
37
+ is_openai_v1,
38
+ run_async,
39
+ should_emit_events,
40
+ should_send_prompts,
41
+ )
42
+ from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
43
+ from opentelemetry.metrics import Counter, Histogram
44
+ from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
45
+ from opentelemetry.semconv_ai import (
46
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
47
+ LLMRequestTypeValues,
48
+ SpanAttributes,
49
+ )
50
+ from opentelemetry.trace import SpanKind, Tracer
51
+ from opentelemetry.trace.status import Status, StatusCode
52
+ from wrapt import ObjectProxy
53
+
54
+ from openai.types.chat import ChatCompletionMessageToolCall
55
+ from openai.types.chat.chat_completion_message import FunctionCall
56
+
57
+ SPAN_NAME = "openai.chat"
58
+ PROMPT_FILTER_KEY = "prompt_filter_results"
59
+ CONTENT_FILTER_KEY = "content_filter_results"
60
+
61
+ LLM_REQUEST_TYPE = LLMRequestTypeValues.CHAT
62
+
63
+ logger = logging.getLogger(__name__)
64
+
65
+
66
+ @_with_chat_telemetry_wrapper
67
+ def chat_wrapper(
68
+ tracer: Tracer,
69
+ token_counter: Counter,
70
+ choice_counter: Counter,
71
+ duration_histogram: Histogram,
72
+ exception_counter: Counter,
73
+ streaming_time_to_first_token: Histogram,
74
+ streaming_time_to_generate: Histogram,
75
+ wrapped,
76
+ instance,
77
+ args,
78
+ kwargs,
79
+ ):
80
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
81
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
82
+ ):
83
+ return wrapped(*args, **kwargs)
84
+ # span needs to be opened and closed manually because the response is a generator
85
+
86
+ span = tracer.start_span(
87
+ SPAN_NAME,
88
+ kind=SpanKind.CLIENT,
89
+ attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
90
+ )
91
+
92
+ run_async(_handle_request(span, kwargs, instance))
93
+
94
+ try:
95
+ start_time = time.time()
96
+ response = wrapped(*args, **kwargs)
97
+ end_time = time.time()
98
+ except Exception as e: # pylint: disable=broad-except
99
+ end_time = time.time()
100
+ duration = end_time - start_time if "start_time" in locals() else 0
101
+
102
+ attributes = {
103
+ "error.type": e.__class__.__name__,
104
+ }
105
+
106
+ if duration > 0 and duration_histogram:
107
+ duration_histogram.record(duration, attributes=attributes)
108
+ if exception_counter:
109
+ exception_counter.add(1, attributes=attributes)
110
+
111
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
112
+ span.record_exception(e)
113
+ span.set_status(Status(StatusCode.ERROR, str(e)))
114
+ span.end()
115
+
116
+ raise
117
+
118
+ if is_streaming_response(response):
119
+ # span will be closed after the generator is done
120
+ if is_openai_v1():
121
+ return ChatStream(
122
+ span,
123
+ response,
124
+ instance,
125
+ token_counter,
126
+ choice_counter,
127
+ duration_histogram,
128
+ streaming_time_to_first_token,
129
+ streaming_time_to_generate,
130
+ start_time,
131
+ kwargs,
132
+ )
133
+ else:
134
+ return _build_from_streaming_response(
135
+ span,
136
+ response,
137
+ instance,
138
+ token_counter,
139
+ choice_counter,
140
+ duration_histogram,
141
+ streaming_time_to_first_token,
142
+ streaming_time_to_generate,
143
+ start_time,
144
+ kwargs,
145
+ )
146
+
147
+ duration = end_time - start_time
148
+
149
+ _handle_response(
150
+ response,
151
+ span,
152
+ instance,
153
+ token_counter,
154
+ choice_counter,
155
+ duration_histogram,
156
+ duration,
157
+ )
158
+
159
+ span.end()
160
+
161
+ return response
162
+
163
+
164
+ @_with_chat_telemetry_wrapper
165
+ async def achat_wrapper(
166
+ tracer: Tracer,
167
+ token_counter: Counter,
168
+ choice_counter: Counter,
169
+ duration_histogram: Histogram,
170
+ exception_counter: Counter,
171
+ streaming_time_to_first_token: Histogram,
172
+ streaming_time_to_generate: Histogram,
173
+ wrapped,
174
+ instance,
175
+ args,
176
+ kwargs,
177
+ ):
178
+ if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
179
+ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
180
+ ):
181
+ return await wrapped(*args, **kwargs)
182
+
183
+ span = tracer.start_span(
184
+ SPAN_NAME,
185
+ kind=SpanKind.CLIENT,
186
+ attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
187
+ )
188
+
189
+ await _handle_request(span, kwargs, instance)
190
+
191
+ try:
192
+ start_time = time.time()
193
+ response = await wrapped(*args, **kwargs)
194
+ end_time = time.time()
195
+ except Exception as e: # pylint: disable=broad-except
196
+ end_time = time.time()
197
+ duration = end_time - start_time if "start_time" in locals() else 0
198
+
199
+ common_attributes = Config.get_common_metrics_attributes()
200
+ attributes = {
201
+ **common_attributes,
202
+ "error.type": e.__class__.__name__,
203
+ }
204
+
205
+ if duration > 0 and duration_histogram:
206
+ duration_histogram.record(duration, attributes=attributes)
207
+ if exception_counter:
208
+ exception_counter.add(1, attributes=attributes)
209
+
210
+ span.set_attribute(ERROR_TYPE, e.__class__.__name__)
211
+ span.record_exception(e)
212
+ span.set_status(Status(StatusCode.ERROR, str(e)))
213
+ span.end()
214
+
215
+ raise
216
+
217
+ if is_streaming_response(response):
218
+ # span will be closed after the generator is done
219
+ if is_openai_v1():
220
+ return ChatStream(
221
+ span,
222
+ response,
223
+ instance,
224
+ token_counter,
225
+ choice_counter,
226
+ duration_histogram,
227
+ streaming_time_to_first_token,
228
+ streaming_time_to_generate,
229
+ start_time,
230
+ kwargs,
231
+ )
232
+ else:
233
+ return _abuild_from_streaming_response(
234
+ span,
235
+ response,
236
+ instance,
237
+ token_counter,
238
+ choice_counter,
239
+ duration_histogram,
240
+ streaming_time_to_first_token,
241
+ streaming_time_to_generate,
242
+ start_time,
243
+ kwargs,
244
+ )
245
+
246
+ duration = end_time - start_time
247
+
248
+ _handle_response(
249
+ response,
250
+ span,
251
+ instance,
252
+ token_counter,
253
+ choice_counter,
254
+ duration_histogram,
255
+ duration,
256
+ )
257
+
258
+ span.end()
259
+
260
+ return response
261
+
262
+
263
+ @dont_throw
264
+ async def _handle_request(span, kwargs, instance):
265
+ _set_request_attributes(span, kwargs, instance)
266
+ _set_client_attributes(span, instance)
267
+ if should_emit_events():
268
+ for message in kwargs.get("messages", []):
269
+ emit_event(
270
+ MessageEvent(
271
+ content=message.get("content"),
272
+ role=message.get("role"),
273
+ tool_calls=_parse_tool_calls(message.get("tool_calls", None)),
274
+ )
275
+ )
276
+ else:
277
+ if should_send_prompts():
278
+ await _set_prompts(span, kwargs.get("messages"))
279
+ if kwargs.get("functions"):
280
+ _set_functions_attributes(span, kwargs.get("functions"))
281
+ elif kwargs.get("tools"):
282
+ set_tools_attributes(span, kwargs.get("tools"))
283
+ if Config.enable_trace_context_propagation:
284
+ propagate_trace_context(span, kwargs)
285
+
286
+
287
+ @dont_throw
288
+ def _handle_response(
289
+ response,
290
+ span,
291
+ instance=None,
292
+ token_counter=None,
293
+ choice_counter=None,
294
+ duration_histogram=None,
295
+ duration=None,
296
+ ):
297
+ if is_openai_v1():
298
+ response_dict = model_as_dict(response)
299
+ else:
300
+ response_dict = response
301
+
302
+ # metrics record
303
+ _set_chat_metrics(
304
+ instance,
305
+ token_counter,
306
+ choice_counter,
307
+ duration_histogram,
308
+ response_dict,
309
+ duration,
310
+ )
311
+
312
+ # span attributes
313
+ _set_response_attributes(span, response_dict)
314
+
315
+ if should_emit_events():
316
+ if response.choices is not None:
317
+ for choice in response.choices:
318
+ emit_event(_parse_choice_event(choice))
319
+ else:
320
+ if should_send_prompts():
321
+ _set_completions(span, response_dict.get("choices"))
322
+
323
+ return response
324
+
325
+
326
+ def _set_chat_metrics(
327
+ instance, token_counter, choice_counter, duration_histogram, response_dict, duration
328
+ ):
329
+ shared_attributes = metric_shared_attributes(
330
+ response_model=response_dict.get("model") or None,
331
+ operation="chat",
332
+ server_address=_get_openai_base_url(instance),
333
+ is_streaming=False,
334
+ )
335
+
336
+ # token metrics
337
+ usage = response_dict.get("usage") # type: dict
338
+ if usage and token_counter:
339
+ _set_token_counter_metrics(token_counter, usage, shared_attributes)
340
+
341
+ # choices metrics
342
+ choices = response_dict.get("choices")
343
+ if choices and choice_counter:
344
+ _set_choice_counter_metrics(choice_counter, choices, shared_attributes)
345
+
346
+ # duration metrics
347
+ if duration and isinstance(duration, (float, int)) and duration_histogram:
348
+ duration_histogram.record(duration, attributes=shared_attributes)
349
+
350
+
351
+ def _set_choice_counter_metrics(choice_counter, choices, shared_attributes):
352
+ choice_counter.add(len(choices), attributes=shared_attributes)
353
+ for choice in choices:
354
+ attributes_with_reason = {**shared_attributes}
355
+ if choice.get("finish_reason"):
356
+ attributes_with_reason[SpanAttributes.LLM_RESPONSE_FINISH_REASON] = (
357
+ choice.get("finish_reason")
358
+ )
359
+ choice_counter.add(1, attributes=attributes_with_reason)
360
+
361
+
362
+ def _set_token_counter_metrics(token_counter, usage, shared_attributes):
363
+ for name, val in usage.items():
364
+ if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
365
+ attributes_with_token_type = {
366
+ **shared_attributes,
367
+ SpanAttributes.LLM_TOKEN_TYPE: _token_type(name),
368
+ }
369
+ token_counter.record(val, attributes=attributes_with_token_type)
370
+
371
+
372
+ def _is_base64_image(item):
373
+ if not isinstance(item, dict):
374
+ return False
375
+
376
+ if not isinstance(item.get("image_url"), dict):
377
+ return False
378
+
379
+ if "data:image/" not in item.get("image_url", {}).get("url", ""):
380
+ return False
381
+
382
+ return True
383
+
384
+
385
+ async def _process_image_item(item, trace_id, span_id, message_index, content_index):
386
+ if not Config.upload_base64_image:
387
+ return item
388
+
389
+ image_format = item["image_url"]["url"].split(";")[0].split("/")[1]
390
+ image_name = f"message_{message_index}_content_{content_index}.{image_format}"
391
+ base64_string = item["image_url"]["url"].split(",")[1]
392
+ url = await Config.upload_base64_image(trace_id, span_id, image_name, base64_string)
393
+
394
+ return {"type": "image_url", "image_url": {"url": url}}
395
+
396
+
397
+ @dont_throw
398
+ async def _set_prompts(span, messages):
399
+ if not span.is_recording() or messages is None:
400
+ return
401
+
402
+ for i, msg in enumerate(messages):
403
+ prefix = f"{SpanAttributes.LLM_PROMPTS}.{i}"
404
+ msg = msg if isinstance(msg, dict) else model_as_dict(msg)
405
+
406
+ _set_span_attribute(span, f"{prefix}.role", msg.get("role"))
407
+ if msg.get("content"):
408
+ content = copy.deepcopy(msg.get("content"))
409
+ if isinstance(content, list):
410
+ content = [
411
+ (
412
+ await _process_image_item(
413
+ item, span.context.trace_id, span.context.span_id, i, j
414
+ )
415
+ if _is_base64_image(item)
416
+ else item
417
+ )
418
+ for j, item in enumerate(content)
419
+ ]
420
+
421
+ content = json.dumps(content)
422
+ _set_span_attribute(span, f"{prefix}.content", content)
423
+ if msg.get("tool_call_id"):
424
+ _set_span_attribute(span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
425
+ tool_calls = msg.get("tool_calls")
426
+ if tool_calls:
427
+ for i, tool_call in enumerate(tool_calls):
428
+ if is_openai_v1():
429
+ tool_call = model_as_dict(tool_call)
430
+
431
+ function = tool_call.get("function")
432
+ _set_span_attribute(
433
+ span,
434
+ f"{prefix}.tool_calls.{i}.id",
435
+ tool_call.get("id"),
436
+ )
437
+ _set_span_attribute(
438
+ span,
439
+ f"{prefix}.tool_calls.{i}.name",
440
+ function.get("name"),
441
+ )
442
+ _set_span_attribute(
443
+ span,
444
+ f"{prefix}.tool_calls.{i}.arguments",
445
+ function.get("arguments"),
446
+ )
447
+
448
+
449
+ def _set_completions(span, choices):
450
+ if choices is None:
451
+ return
452
+
453
+ for choice in choices:
454
+ index = choice.get("index")
455
+ prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
456
+ _set_span_attribute(
457
+ span, f"{prefix}.finish_reason", choice.get("finish_reason")
458
+ )
459
+
460
+ if choice.get("content_filter_results"):
461
+ _set_span_attribute(
462
+ span,
463
+ f"{prefix}.{CONTENT_FILTER_KEY}",
464
+ json.dumps(choice.get("content_filter_results")),
465
+ )
466
+
467
+ if choice.get("finish_reason") == "content_filter":
468
+ _set_span_attribute(span, f"{prefix}.role", "assistant")
469
+ _set_span_attribute(span, f"{prefix}.content", "FILTERED")
470
+
471
+ return
472
+
473
+ message = choice.get("message")
474
+ if not message:
475
+ return
476
+
477
+ _set_span_attribute(span, f"{prefix}.role", message.get("role"))
478
+
479
+ if message.get("refusal"):
480
+ _set_span_attribute(span, f"{prefix}.refusal", message.get("refusal"))
481
+ else:
482
+ _set_span_attribute(span, f"{prefix}.content", message.get("content"))
483
+
484
+ function_call = message.get("function_call")
485
+ if function_call:
486
+ _set_span_attribute(
487
+ span, f"{prefix}.tool_calls.0.name", function_call.get("name")
488
+ )
489
+ _set_span_attribute(
490
+ span,
491
+ f"{prefix}.tool_calls.0.arguments",
492
+ function_call.get("arguments"),
493
+ )
494
+
495
+ tool_calls = message.get("tool_calls")
496
+ if tool_calls:
497
+ for i, tool_call in enumerate(tool_calls):
498
+ function = tool_call.get("function")
499
+ _set_span_attribute(
500
+ span,
501
+ f"{prefix}.tool_calls.{i}.id",
502
+ tool_call.get("id"),
503
+ )
504
+ _set_span_attribute(
505
+ span,
506
+ f"{prefix}.tool_calls.{i}.name",
507
+ function.get("name"),
508
+ )
509
+ _set_span_attribute(
510
+ span,
511
+ f"{prefix}.tool_calls.{i}.arguments",
512
+ function.get("arguments"),
513
+ )
514
+
515
+
516
+ @dont_throw
517
+ def _set_streaming_token_metrics(
518
+ request_kwargs, complete_response, span, token_counter, shared_attributes
519
+ ):
520
+ # use tiktoken calculate token usage
521
+ if not should_record_stream_token_usage():
522
+ return
523
+
524
+ # kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
525
+ prompt_usage = -1
526
+ completion_usage = -1
527
+
528
+ # prompt_usage
529
+ if request_kwargs and request_kwargs.get("messages"):
530
+ prompt_content = ""
531
+ # setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
532
+ # is used by most of the other model.
533
+ model_name = (
534
+ complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
535
+ )
536
+ for msg in request_kwargs.get("messages"):
537
+ if msg.get("content"):
538
+ prompt_content += msg.get("content")
539
+ if model_name:
540
+ prompt_usage = get_token_count_from_string(prompt_content, model_name)
541
+
542
+ # completion_usage
543
+ if complete_response.get("choices"):
544
+ completion_content = ""
545
+ # setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
546
+ # is used by most of the other model.
547
+ model_name = complete_response.get("model") or "gpt-4"
548
+
549
+ for choice in complete_response.get("choices"):
550
+ if choice.get("message") and choice.get("message").get("content"):
551
+ completion_content += choice["message"]["content"]
552
+
553
+ if model_name:
554
+ completion_usage = get_token_count_from_string(
555
+ completion_content, model_name
556
+ )
557
+
558
+ # span record
559
+ _set_span_stream_usage(span, prompt_usage, completion_usage)
560
+
561
+ # metrics record
562
+ if token_counter:
563
+ if isinstance(prompt_usage, int) and prompt_usage >= 0:
564
+ attributes_with_token_type = {
565
+ **shared_attributes,
566
+ SpanAttributes.LLM_TOKEN_TYPE: "input",
567
+ }
568
+ token_counter.record(prompt_usage, attributes=attributes_with_token_type)
569
+
570
+ if isinstance(completion_usage, int) and completion_usage >= 0:
571
+ attributes_with_token_type = {
572
+ **shared_attributes,
573
+ SpanAttributes.LLM_TOKEN_TYPE: "output",
574
+ }
575
+ token_counter.record(
576
+ completion_usage, attributes=attributes_with_token_type
577
+ )
578
+
579
+
580
+ class ChatStream(ObjectProxy):
581
+ _span = None
582
+ _instance = None
583
+ _token_counter = None
584
+ _choice_counter = None
585
+ _duration_histogram = None
586
+ _streaming_time_to_first_token = None
587
+ _streaming_time_to_generate = None
588
+ _start_time = None
589
+ _request_kwargs = None
590
+
591
+ def __init__(
592
+ self,
593
+ span,
594
+ response,
595
+ instance=None,
596
+ token_counter=None,
597
+ choice_counter=None,
598
+ duration_histogram=None,
599
+ streaming_time_to_first_token=None,
600
+ streaming_time_to_generate=None,
601
+ start_time=None,
602
+ request_kwargs=None,
603
+ ):
604
+ super().__init__(response)
605
+
606
+ self._span = span
607
+ self._instance = instance
608
+ self._token_counter = token_counter
609
+ self._choice_counter = choice_counter
610
+ self._duration_histogram = duration_histogram
611
+ self._streaming_time_to_first_token = streaming_time_to_first_token
612
+ self._streaming_time_to_generate = streaming_time_to_generate
613
+ self._start_time = start_time
614
+ self._request_kwargs = request_kwargs
615
+
616
+ self._first_token = True
617
+ # will be updated when first token is received
618
+ self._time_of_first_token = self._start_time
619
+ self._complete_response = {"choices": [], "model": ""}
620
+
621
+ def __enter__(self):
622
+ return self
623
+
624
+ def __exit__(self, exc_type, exc_val, exc_tb):
625
+ self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
626
+
627
+ async def __aenter__(self):
628
+ return self
629
+
630
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
631
+ await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb)
632
+
633
+ def __iter__(self):
634
+ return self
635
+
636
+ def __aiter__(self):
637
+ return self
638
+
639
+ def __next__(self):
640
+ try:
641
+ chunk = self.__wrapped__.__next__()
642
+ except Exception as e:
643
+ if isinstance(e, StopIteration):
644
+ self._process_complete_response()
645
+ raise e
646
+ else:
647
+ self._process_item(chunk)
648
+ return chunk
649
+
650
+ async def __anext__(self):
651
+ try:
652
+ chunk = await self.__wrapped__.__anext__()
653
+ except Exception as e:
654
+ if isinstance(e, StopAsyncIteration):
655
+ self._process_complete_response()
656
+ raise e
657
+ else:
658
+ self._process_item(chunk)
659
+ return chunk
660
+
661
+ def _process_item(self, item):
662
+ self._span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
663
+
664
+ if self._first_token and self._streaming_time_to_first_token:
665
+ self._time_of_first_token = time.time()
666
+ self._streaming_time_to_first_token.record(
667
+ self._time_of_first_token - self._start_time,
668
+ attributes=self._shared_attributes(),
669
+ )
670
+ self._first_token = False
671
+
672
+ _accumulate_stream_items(item, self._complete_response)
673
+
674
+ def _shared_attributes(self):
675
+ return metric_shared_attributes(
676
+ response_model=self._complete_response.get("model")
677
+ or self._request_kwargs.get("model")
678
+ or None,
679
+ operation="chat",
680
+ server_address=_get_openai_base_url(self._instance),
681
+ is_streaming=True,
682
+ )
683
+
684
+ @dont_throw
685
+ def _process_complete_response(self):
686
+ _set_streaming_token_metrics(
687
+ self._request_kwargs,
688
+ self._complete_response,
689
+ self._span,
690
+ self._token_counter,
691
+ self._shared_attributes(),
692
+ )
693
+
694
+ # choice metrics
695
+ if self._choice_counter and self._complete_response.get("choices"):
696
+ _set_choice_counter_metrics(
697
+ self._choice_counter,
698
+ self._complete_response.get("choices"),
699
+ self._shared_attributes(),
700
+ )
701
+
702
+ # duration metrics
703
+ if self._start_time and isinstance(self._start_time, (float, int)):
704
+ duration = time.time() - self._start_time
705
+ else:
706
+ duration = None
707
+ if duration and isinstance(duration, (float, int)) and self._duration_histogram:
708
+ self._duration_histogram.record(
709
+ duration, attributes=self._shared_attributes()
710
+ )
711
+ if self._streaming_time_to_generate and self._time_of_first_token:
712
+ self._streaming_time_to_generate.record(
713
+ time.time() - self._time_of_first_token,
714
+ attributes=self._shared_attributes(),
715
+ )
716
+
717
+ _set_response_attributes(self._span, self._complete_response)
718
+ if should_emit_events():
719
+ for choice in self._complete_response.get("choices", []):
720
+ emit_event(_parse_choice_event(choice))
721
+ else:
722
+ if should_send_prompts():
723
+ _set_completions(self._span, self._complete_response.get("choices"))
724
+
725
+ self._span.set_status(Status(StatusCode.OK))
726
+ self._span.end()
727
+
728
+
729
+ # Backward compatibility with OpenAI v0
730
+
731
+
732
+ @dont_throw
733
+ def _build_from_streaming_response(
734
+ span,
735
+ response,
736
+ instance=None,
737
+ token_counter=None,
738
+ choice_counter=None,
739
+ duration_histogram=None,
740
+ streaming_time_to_first_token=None,
741
+ streaming_time_to_generate=None,
742
+ start_time=None,
743
+ request_kwargs=None,
744
+ ):
745
+ complete_response = {"choices": [], "model": "", "id": ""}
746
+
747
+ first_token = True
748
+ time_of_first_token = start_time # will be updated when first token is received
749
+
750
+ for item in response:
751
+ span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
752
+
753
+ item_to_yield = item
754
+
755
+ if first_token and streaming_time_to_first_token:
756
+ time_of_first_token = time.time()
757
+ streaming_time_to_first_token.record(time_of_first_token - start_time)
758
+ first_token = False
759
+
760
+ _accumulate_stream_items(item, complete_response)
761
+
762
+ yield item_to_yield
763
+
764
+ shared_attributes = {
765
+ SpanAttributes.LLM_RESPONSE_MODEL: complete_response.get("model") or None,
766
+ "server.address": _get_openai_base_url(instance),
767
+ "stream": True,
768
+ }
769
+
770
+ _set_streaming_token_metrics(
771
+ request_kwargs, complete_response, span, token_counter, shared_attributes
772
+ )
773
+
774
+ # choice metrics
775
+ if choice_counter and complete_response.get("choices"):
776
+ _set_choice_counter_metrics(
777
+ choice_counter, complete_response.get("choices"), shared_attributes
778
+ )
779
+
780
+ # duration metrics
781
+ if start_time and isinstance(start_time, (float, int)):
782
+ duration = time.time() - start_time
783
+ else:
784
+ duration = None
785
+ if duration and isinstance(duration, (float, int)) and duration_histogram:
786
+ duration_histogram.record(duration, attributes=shared_attributes)
787
+ if streaming_time_to_generate and time_of_first_token:
788
+ streaming_time_to_generate.record(time.time() - time_of_first_token)
789
+
790
+ _set_response_attributes(span, complete_response)
791
+ if should_emit_events():
792
+ for choice in complete_response.get("choices", []):
793
+ emit_event(_parse_choice_event(choice))
794
+ else:
795
+ if should_send_prompts():
796
+ _set_completions(span, complete_response.get("choices"))
797
+
798
+ span.set_status(Status(StatusCode.OK))
799
+ span.end()
800
+
801
+
802
+ @dont_throw
803
+ async def _abuild_from_streaming_response(
804
+ span,
805
+ response,
806
+ instance=None,
807
+ token_counter=None,
808
+ choice_counter=None,
809
+ duration_histogram=None,
810
+ streaming_time_to_first_token=None,
811
+ streaming_time_to_generate=None,
812
+ start_time=None,
813
+ request_kwargs=None,
814
+ ):
815
+ complete_response = {"choices": [], "model": "", "id": ""}
816
+
817
+ first_token = True
818
+ time_of_first_token = start_time # will be updated when first token is received
819
+
820
+ async for item in response:
821
+ span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
822
+
823
+ item_to_yield = item
824
+
825
+ if first_token and streaming_time_to_first_token:
826
+ time_of_first_token = time.time()
827
+ streaming_time_to_first_token.record(time_of_first_token - start_time)
828
+ first_token = False
829
+
830
+ _accumulate_stream_items(item, complete_response)
831
+
832
+ yield item_to_yield
833
+
834
+ shared_attributes = {
835
+ SpanAttributes.LLM_RESPONSE_MODEL: complete_response.get("model") or None,
836
+ "server.address": _get_openai_base_url(instance),
837
+ "stream": True,
838
+ }
839
+
840
+ _set_streaming_token_metrics(
841
+ request_kwargs, complete_response, span, token_counter, shared_attributes
842
+ )
843
+
844
+ # choice metrics
845
+ if choice_counter and complete_response.get("choices"):
846
+ _set_choice_counter_metrics(
847
+ choice_counter, complete_response.get("choices"), shared_attributes
848
+ )
849
+
850
+ # duration metrics
851
+ if start_time and isinstance(start_time, (float, int)):
852
+ duration = time.time() - start_time
853
+ else:
854
+ duration = None
855
+ if duration and isinstance(duration, (float, int)) and duration_histogram:
856
+ duration_histogram.record(duration, attributes=shared_attributes)
857
+ if streaming_time_to_generate and time_of_first_token:
858
+ streaming_time_to_generate.record(time.time() - time_of_first_token)
859
+
860
+ _set_response_attributes(span, complete_response)
861
+ if should_emit_events():
862
+ for choice in complete_response.get("choices", []):
863
+ emit_event(_parse_choice_event(choice))
864
+ else:
865
+ if should_send_prompts():
866
+ _set_completions(span, complete_response.get("choices"))
867
+
868
+ span.set_status(Status(StatusCode.OK))
869
+ span.end()
870
+
871
+
872
+ def _parse_tool_calls(
873
+ tool_calls: Optional[List[Union[dict, ChatCompletionMessageToolCall]]],
874
+ ) -> Union[List[ToolCall], None]:
875
+ """
876
+ Util to correctly parse the tool calls data from the OpenAI API to this module's
877
+ standard `ToolCall`.
878
+ """
879
+ if tool_calls is None:
880
+ return tool_calls
881
+
882
+ result = []
883
+
884
+ for tool_call in tool_calls:
885
+ tool_call_data = None
886
+
887
+ # Handle dict or ChatCompletionMessageToolCall
888
+ if isinstance(tool_call, dict):
889
+ tool_call_data = copy.deepcopy(tool_call)
890
+ elif isinstance(tool_call, ChatCompletionMessageToolCall):
891
+ tool_call_data = tool_call.model_dump()
892
+ elif isinstance(tool_call, FunctionCall):
893
+ function_call = tool_call.model_dump()
894
+ tool_call_data = ToolCall(
895
+ id="",
896
+ function={
897
+ "name": function_call.get("name"),
898
+ "arguments": function_call.get("arguments"),
899
+ },
900
+ type="function",
901
+ )
902
+
903
+ result.append(tool_call_data)
904
+ return result
905
+
906
+
907
+ @singledispatch
908
+ def _parse_choice_event(choice) -> ChoiceEvent:
909
+ has_message = choice.message is not None
910
+ has_finish_reason = choice.finish_reason is not None
911
+ has_tool_calls = has_message and choice.message.tool_calls
912
+ has_function_call = has_message and choice.message.function_call
913
+
914
+ content = choice.message.content if has_message else None
915
+ role = choice.message.role if has_message else "unknown"
916
+ finish_reason = choice.finish_reason if has_finish_reason else "unknown"
917
+
918
+ if has_tool_calls and has_function_call:
919
+ tool_calls = choice.message.tool_calls + [choice.message.function_call]
920
+ elif has_tool_calls:
921
+ tool_calls = choice.message.tool_calls
922
+ elif has_function_call:
923
+ tool_calls = [choice.message.function_call]
924
+ else:
925
+ tool_calls = None
926
+
927
+ return ChoiceEvent(
928
+ index=choice.index,
929
+ message={"content": content, "role": role},
930
+ finish_reason=finish_reason,
931
+ tool_calls=_parse_tool_calls(tool_calls),
932
+ )
933
+
934
+
935
+ @_parse_choice_event.register
936
+ def _(choice: dict) -> ChoiceEvent:
937
+ message = choice.get("message")
938
+ has_message = message is not None
939
+ has_finish_reason = choice.get("finish_reason") is not None
940
+ has_tool_calls = has_message and message.get("tool_calls")
941
+ has_function_call = has_message and message.get("function_call")
942
+
943
+ content = choice.get("message").get("content", "") if has_message else None
944
+ role = choice.get("message").get("role") if has_message else "unknown"
945
+ finish_reason = choice.get("finish_reason") if has_finish_reason else "unknown"
946
+
947
+ if has_tool_calls and has_function_call:
948
+ tool_calls = message.get("tool_calls") + [message.get("function_call")]
949
+ elif has_tool_calls:
950
+ tool_calls = message.get("tool_calls")
951
+ elif has_function_call:
952
+ tool_calls = [message.get("function_call")]
953
+ else:
954
+ tool_calls = None
955
+
956
+ if tool_calls is not None:
957
+ for tool_call in tool_calls:
958
+ tool_call["type"] = "function"
959
+
960
+ return ChoiceEvent(
961
+ index=choice.get("index"),
962
+ message={"content": content, "role": role},
963
+ finish_reason=finish_reason,
964
+ tool_calls=tool_calls,
965
+ )
966
+
967
+
968
+ def _accumulate_stream_items(item, complete_response):
969
+ if is_openai_v1():
970
+ item = model_as_dict(item)
971
+
972
+ complete_response["model"] = item.get("model")
973
+ complete_response["id"] = item.get("id")
974
+
975
+ # prompt filter results
976
+ if item.get("prompt_filter_results"):
977
+ complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
978
+
979
+ for choice in item.get("choices"):
980
+ index = choice.get("index")
981
+ if len(complete_response.get("choices")) <= index:
982
+ complete_response["choices"].append(
983
+ {"index": index, "message": {"content": "", "role": ""}}
984
+ )
985
+ complete_choice = complete_response.get("choices")[index]
986
+ if choice.get("finish_reason"):
987
+ complete_choice["finish_reason"] = choice.get("finish_reason")
988
+ if choice.get("content_filter_results"):
989
+ complete_choice["content_filter_results"] = choice.get(
990
+ "content_filter_results"
991
+ )
992
+
993
+ delta = choice.get("delta")
994
+
995
+ if delta and delta.get("content"):
996
+ complete_choice["message"]["content"] += delta.get("content")
997
+
998
+ if delta and delta.get("role"):
999
+ complete_choice["message"]["role"] = delta.get("role")
1000
+ if delta and delta.get("tool_calls"):
1001
+ tool_calls = delta.get("tool_calls")
1002
+ if not isinstance(tool_calls, list) or len(tool_calls) == 0:
1003
+ continue
1004
+
1005
+ if not complete_choice["message"].get("tool_calls"):
1006
+ complete_choice["message"]["tool_calls"] = []
1007
+
1008
+ for tool_call in tool_calls:
1009
+ i = int(tool_call["index"])
1010
+ if len(complete_choice["message"]["tool_calls"]) <= i:
1011
+ complete_choice["message"]["tool_calls"].append(
1012
+ {"id": "", "function": {"name": "", "arguments": ""}}
1013
+ )
1014
+
1015
+ span_tool_call = complete_choice["message"]["tool_calls"][i]
1016
+ span_function = span_tool_call["function"]
1017
+ tool_call_function = tool_call.get("function")
1018
+
1019
+ if tool_call.get("id"):
1020
+ span_tool_call["id"] = tool_call.get("id")
1021
+ if tool_call_function and tool_call_function.get("name"):
1022
+ span_function["name"] = tool_call_function.get("name")
1023
+ if tool_call_function and tool_call_function.get("arguments"):
1024
+ span_function["arguments"] += tool_call_function.get("arguments")