opentelemetry-instrumentation-botocore 0.51b0__py3-none-any.whl → 0.52b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,7 @@ from __future__ import annotations
21
21
  import io
22
22
  import json
23
23
  import logging
24
+ from timeit import default_timer
24
25
  from typing import Any
25
26
 
26
27
  from botocore.eventstream import EventStream
@@ -29,12 +30,17 @@ from botocore.response import StreamingBody
29
30
  from opentelemetry.instrumentation.botocore.extensions.bedrock_utils import (
30
31
  ConverseStreamWrapper,
31
32
  InvokeModelWithResponseStreamWrapper,
33
+ _Choice,
34
+ genai_capture_message_content,
35
+ message_to_event,
32
36
  )
33
37
  from opentelemetry.instrumentation.botocore.extensions.types import (
34
38
  _AttributeMapT,
35
39
  _AwsSdkExtension,
36
40
  _BotoClientErrorT,
41
+ _BotocoreInstrumentorContext,
37
42
  )
43
+ from opentelemetry.metrics import Instrument, Meter
38
44
  from opentelemetry.semconv._incubating.attributes.error_attributes import (
39
45
  ERROR_TYPE,
40
46
  )
@@ -47,16 +53,56 @@ from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
47
53
  GEN_AI_REQUEST_TOP_P,
48
54
  GEN_AI_RESPONSE_FINISH_REASONS,
49
55
  GEN_AI_SYSTEM,
56
+ GEN_AI_TOKEN_TYPE,
50
57
  GEN_AI_USAGE_INPUT_TOKENS,
51
58
  GEN_AI_USAGE_OUTPUT_TOKENS,
52
59
  GenAiOperationNameValues,
53
60
  GenAiSystemValues,
61
+ GenAiTokenTypeValues,
62
+ )
63
+ from opentelemetry.semconv._incubating.metrics.gen_ai_metrics import (
64
+ GEN_AI_CLIENT_OPERATION_DURATION,
65
+ GEN_AI_CLIENT_TOKEN_USAGE,
54
66
  )
55
67
  from opentelemetry.trace.span import Span
56
68
  from opentelemetry.trace.status import Status, StatusCode
57
69
 
58
70
  _logger = logging.getLogger(__name__)
59
71
 
72
+ _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
73
+ 0.01,
74
+ 0.02,
75
+ 0.04,
76
+ 0.08,
77
+ 0.16,
78
+ 0.32,
79
+ 0.64,
80
+ 1.28,
81
+ 2.56,
82
+ 5.12,
83
+ 10.24,
84
+ 20.48,
85
+ 40.96,
86
+ 81.92,
87
+ ]
88
+
89
+ _GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
90
+ 1,
91
+ 4,
92
+ 16,
93
+ 64,
94
+ 256,
95
+ 1024,
96
+ 4096,
97
+ 16384,
98
+ 65536,
99
+ 262144,
100
+ 1048576,
101
+ 4194304,
102
+ 16777216,
103
+ 67108864,
104
+ ]
105
+
60
106
  _MODEL_ID_KEY: str = "modelId"
61
107
 
62
108
 
@@ -84,6 +130,40 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
84
130
  not in self._DONT_CLOSE_SPAN_ON_END_OPERATIONS
85
131
  )
86
132
 
133
+ def setup_metrics(self, meter: Meter, metrics: dict[str, Instrument]):
134
+ metrics[GEN_AI_CLIENT_OPERATION_DURATION] = meter.create_histogram(
135
+ name=GEN_AI_CLIENT_OPERATION_DURATION,
136
+ description="GenAI operation duration",
137
+ unit="s",
138
+ explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
139
+ )
140
+ metrics[GEN_AI_CLIENT_TOKEN_USAGE] = meter.create_histogram(
141
+ name=GEN_AI_CLIENT_TOKEN_USAGE,
142
+ description="Measures number of input and output tokens used",
143
+ unit="{token}",
144
+ explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
145
+ )
146
+
147
+ def _extract_metrics_attributes(self) -> _AttributeMapT:
148
+ attributes = {GEN_AI_SYSTEM: GenAiSystemValues.AWS_BEDROCK.value}
149
+
150
+ model_id = self._call_context.params.get(_MODEL_ID_KEY)
151
+ if not model_id:
152
+ return attributes
153
+
154
+ attributes[GEN_AI_REQUEST_MODEL] = model_id
155
+
156
+ # titan in invoke model is a text completion one
157
+ if "body" in self._call_context.params and "amazon.titan" in model_id:
158
+ attributes[GEN_AI_OPERATION_NAME] = (
159
+ GenAiOperationNameValues.TEXT_COMPLETION.value
160
+ )
161
+ else:
162
+ attributes[GEN_AI_OPERATION_NAME] = (
163
+ GenAiOperationNameValues.CHAT.value
164
+ )
165
+ return attributes
166
+
87
167
  def extract_attributes(self, attributes: _AttributeMapT):
88
168
  if self._call_context.operation not in self._HANDLED_OPERATIONS:
89
169
  return
@@ -205,41 +285,137 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
205
285
  if value is not None:
206
286
  attributes[key] = value
207
287
 
208
- def before_service_call(self, span: Span):
288
+ def _get_request_messages(self):
289
+ """Extracts and normalize system and user / assistant messages"""
290
+ input_text = None
291
+ if system := self._call_context.params.get("system", []):
292
+ system_messages = [{"role": "system", "content": system}]
293
+ else:
294
+ system_messages = []
295
+
296
+ if not (messages := self._call_context.params.get("messages", [])):
297
+ if body := self._call_context.params.get("body"):
298
+ decoded_body = json.loads(body)
299
+ if system := decoded_body.get("system"):
300
+ if isinstance(system, str):
301
+ content = [{"text": system}]
302
+ else:
303
+ content = system
304
+ system_messages = [{"role": "system", "content": content}]
305
+
306
+ messages = decoded_body.get("messages", [])
307
+ if not messages:
308
+ # transform old school amazon titan invokeModel api to messages
309
+ if input_text := decoded_body.get("inputText"):
310
+ messages = [
311
+ {"role": "user", "content": [{"text": input_text}]}
312
+ ]
313
+
314
+ return system_messages + messages
315
+
316
+ def before_service_call(
317
+ self, span: Span, instrumentor_context: _BotocoreInstrumentorContext
318
+ ):
209
319
  if self._call_context.operation not in self._HANDLED_OPERATIONS:
210
320
  return
211
321
 
212
- if not span.is_recording():
213
- return
322
+ capture_content = genai_capture_message_content()
214
323
 
215
- operation_name = span.attributes.get(GEN_AI_OPERATION_NAME, "")
216
- request_model = span.attributes.get(GEN_AI_REQUEST_MODEL, "")
217
- # avoid setting to an empty string if are not available
218
- if operation_name and request_model:
219
- span.update_name(f"{operation_name} {request_model}")
324
+ messages = self._get_request_messages()
325
+ for message in messages:
326
+ event_logger = instrumentor_context.event_logger
327
+ for event in message_to_event(message, capture_content):
328
+ event_logger.emit(event)
220
329
 
221
- # pylint: disable=no-self-use
222
- def _converse_on_success(self, span: Span, result: dict[str, Any]):
223
- if usage := result.get("usage"):
224
- if input_tokens := usage.get("inputTokens"):
225
- span.set_attribute(
226
- GEN_AI_USAGE_INPUT_TOKENS,
227
- input_tokens,
228
- )
229
- if output_tokens := usage.get("outputTokens"):
330
+ if span.is_recording():
331
+ operation_name = span.attributes.get(GEN_AI_OPERATION_NAME, "")
332
+ request_model = span.attributes.get(GEN_AI_REQUEST_MODEL, "")
333
+ # avoid setting to an empty string if are not available
334
+ if operation_name and request_model:
335
+ span.update_name(f"{operation_name} {request_model}")
336
+
337
+ # this is used to calculate the operation duration metric, duration may be skewed by request_hook
338
+ # pylint: disable=attribute-defined-outside-init
339
+ self._operation_start = default_timer()
340
+
341
+ # pylint: disable=no-self-use,too-many-locals
342
+ def _converse_on_success(
343
+ self,
344
+ span: Span,
345
+ result: dict[str, Any],
346
+ instrumentor_context: _BotocoreInstrumentorContext,
347
+ capture_content,
348
+ ):
349
+ if span.is_recording():
350
+ if usage := result.get("usage"):
351
+ if input_tokens := usage.get("inputTokens"):
352
+ span.set_attribute(
353
+ GEN_AI_USAGE_INPUT_TOKENS,
354
+ input_tokens,
355
+ )
356
+ if output_tokens := usage.get("outputTokens"):
357
+ span.set_attribute(
358
+ GEN_AI_USAGE_OUTPUT_TOKENS,
359
+ output_tokens,
360
+ )
361
+
362
+ if stop_reason := result.get("stopReason"):
230
363
  span.set_attribute(
231
- GEN_AI_USAGE_OUTPUT_TOKENS,
232
- output_tokens,
364
+ GEN_AI_RESPONSE_FINISH_REASONS,
365
+ [stop_reason],
233
366
  )
234
367
 
235
- if stop_reason := result.get("stopReason"):
236
- span.set_attribute(
237
- GEN_AI_RESPONSE_FINISH_REASONS,
238
- [stop_reason],
368
+ event_logger = instrumentor_context.event_logger
369
+ choice = _Choice.from_converse(result, capture_content)
370
+ # this path is used by streaming apis, in that case we are already out of the span
371
+ # context so need to add the span context manually
372
+ span_ctx = span.get_span_context()
373
+ event_logger.emit(
374
+ choice.to_choice_event(
375
+ trace_id=span_ctx.trace_id,
376
+ span_id=span_ctx.span_id,
377
+ trace_flags=span_ctx.trace_flags,
378
+ )
379
+ )
380
+
381
+ metrics = instrumentor_context.metrics
382
+ metrics_attributes = self._extract_metrics_attributes()
383
+ if operation_duration_histogram := metrics.get(
384
+ GEN_AI_CLIENT_OPERATION_DURATION
385
+ ):
386
+ duration = max((default_timer() - self._operation_start), 0)
387
+ operation_duration_histogram.record(
388
+ duration,
389
+ attributes=metrics_attributes,
239
390
  )
240
391
 
392
+ if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
393
+ if usage := result.get("usage"):
394
+ if input_tokens := usage.get("inputTokens"):
395
+ input_attributes = {
396
+ **metrics_attributes,
397
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
398
+ }
399
+ token_usage_histogram.record(
400
+ input_tokens, input_attributes
401
+ )
402
+
403
+ if output_tokens := usage.get("outputTokens"):
404
+ output_attributes = {
405
+ **metrics_attributes,
406
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
407
+ }
408
+ token_usage_histogram.record(
409
+ output_tokens, output_attributes
410
+ )
411
+
241
412
  def _invoke_model_on_success(
242
- self, span: Span, result: dict[str, Any], model_id: str
413
+ self,
414
+ span: Span,
415
+ result: dict[str, Any],
416
+ model_id: str,
417
+ instrumentor_context: _BotocoreInstrumentorContext,
418
+ capture_content,
243
419
  ):
244
420
  original_body = None
245
421
  try:
@@ -252,12 +428,17 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
252
428
 
253
429
  response_body = json.loads(body_content.decode("utf-8"))
254
430
  if "amazon.titan" in model_id:
255
- self._handle_amazon_titan_response(span, response_body)
431
+ self._handle_amazon_titan_response(
432
+ span, response_body, instrumentor_context, capture_content
433
+ )
256
434
  elif "amazon.nova" in model_id:
257
- self._handle_amazon_nova_response(span, response_body)
435
+ self._handle_amazon_nova_response(
436
+ span, response_body, instrumentor_context, capture_content
437
+ )
258
438
  elif "anthropic.claude" in model_id:
259
- self._handle_anthropic_claude_response(span, response_body)
260
-
439
+ self._handle_anthropic_claude_response(
440
+ span, response_body, instrumentor_context, capture_content
441
+ )
261
442
  except json.JSONDecodeError:
262
443
  _logger.debug("Error: Unable to parse the response body as JSON")
263
444
  except Exception as exc: # pylint: disable=broad-exception-caught
@@ -266,90 +447,174 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
266
447
  if original_body is not None:
267
448
  original_body.close()
268
449
 
269
- def _on_stream_error_callback(self, span: Span, exception):
450
+ def _on_stream_error_callback(
451
+ self,
452
+ span: Span,
453
+ exception,
454
+ instrumentor_context: _BotocoreInstrumentorContext,
455
+ ):
270
456
  span.set_status(Status(StatusCode.ERROR, str(exception)))
271
457
  if span.is_recording():
272
458
  span.set_attribute(ERROR_TYPE, type(exception).__qualname__)
273
459
  span.end()
274
460
 
275
- def on_success(self, span: Span, result: dict[str, Any]):
461
+ metrics = instrumentor_context.metrics
462
+ metrics_attributes = {
463
+ **self._extract_metrics_attributes(),
464
+ ERROR_TYPE: type(exception).__qualname__,
465
+ }
466
+ if operation_duration_histogram := metrics.get(
467
+ GEN_AI_CLIENT_OPERATION_DURATION
468
+ ):
469
+ duration = max((default_timer() - self._operation_start), 0)
470
+ operation_duration_histogram.record(
471
+ duration,
472
+ attributes=metrics_attributes,
473
+ )
474
+
475
+ def on_success(
476
+ self,
477
+ span: Span,
478
+ result: dict[str, Any],
479
+ instrumentor_context: _BotocoreInstrumentorContext,
480
+ ):
276
481
  if self._call_context.operation not in self._HANDLED_OPERATIONS:
277
482
  return
278
483
 
279
- if not span.is_recording():
280
- if not self.should_end_span_on_exit():
281
- span.end()
282
- return
484
+ capture_content = genai_capture_message_content()
283
485
 
284
- # ConverseStream
285
- if "stream" in result and isinstance(result["stream"], EventStream):
486
+ if self._call_context.operation == "ConverseStream":
487
+ if "stream" in result and isinstance(
488
+ result["stream"], EventStream
489
+ ):
286
490
 
287
- def stream_done_callback(response):
288
- self._converse_on_success(span, response)
289
- span.end()
491
+ def stream_done_callback(response):
492
+ self._converse_on_success(
493
+ span, response, instrumentor_context, capture_content
494
+ )
495
+ span.end()
290
496
 
291
- def stream_error_callback(exception):
292
- self._on_stream_error_callback(span, exception)
497
+ def stream_error_callback(exception):
498
+ self._on_stream_error_callback(
499
+ span, exception, instrumentor_context
500
+ )
293
501
 
294
- result["stream"] = ConverseStreamWrapper(
295
- result["stream"], stream_done_callback, stream_error_callback
502
+ result["stream"] = ConverseStreamWrapper(
503
+ result["stream"],
504
+ stream_done_callback,
505
+ stream_error_callback,
506
+ )
507
+ return
508
+ elif self._call_context.operation == "Converse":
509
+ self._converse_on_success(
510
+ span, result, instrumentor_context, capture_content
296
511
  )
297
- return
298
-
299
- # Converse
300
- self._converse_on_success(span, result)
301
512
 
302
513
  model_id = self._call_context.params.get(_MODEL_ID_KEY)
303
514
  if not model_id:
304
515
  return
305
516
 
306
- # InvokeModel
307
- if "body" in result and isinstance(result["body"], StreamingBody):
308
- self._invoke_model_on_success(span, result, model_id)
309
- return
310
-
311
- # InvokeModelWithResponseStream
312
- if "body" in result and isinstance(result["body"], EventStream):
313
-
314
- def invoke_model_stream_done_callback(response):
315
- # the callback gets data formatted as the simpler converse API
316
- self._converse_on_success(span, response)
317
- span.end()
517
+ if self._call_context.operation == "InvokeModel":
518
+ if "body" in result and isinstance(result["body"], StreamingBody):
519
+ self._invoke_model_on_success(
520
+ span,
521
+ result,
522
+ model_id,
523
+ instrumentor_context,
524
+ capture_content,
525
+ )
526
+ return
527
+ elif self._call_context.operation == "InvokeModelWithResponseStream":
528
+ if "body" in result and isinstance(result["body"], EventStream):
529
+
530
+ def invoke_model_stream_done_callback(response):
531
+ # the callback gets data formatted as the simpler converse API
532
+ self._converse_on_success(
533
+ span, response, instrumentor_context, capture_content
534
+ )
535
+ span.end()
318
536
 
319
- def invoke_model_stream_error_callback(exception):
320
- self._on_stream_error_callback(span, exception)
537
+ def invoke_model_stream_error_callback(exception):
538
+ self._on_stream_error_callback(
539
+ span, exception, instrumentor_context
540
+ )
321
541
 
322
- result["body"] = InvokeModelWithResponseStreamWrapper(
323
- result["body"],
324
- invoke_model_stream_done_callback,
325
- invoke_model_stream_error_callback,
326
- model_id,
327
- )
328
- return
542
+ result["body"] = InvokeModelWithResponseStreamWrapper(
543
+ result["body"],
544
+ invoke_model_stream_done_callback,
545
+ invoke_model_stream_error_callback,
546
+ model_id,
547
+ )
548
+ return
329
549
 
330
- # pylint: disable=no-self-use
550
+ # pylint: disable=no-self-use,too-many-locals
331
551
  def _handle_amazon_titan_response(
332
- self, span: Span, response_body: dict[str, Any]
552
+ self,
553
+ span: Span,
554
+ response_body: dict[str, Any],
555
+ instrumentor_context: _BotocoreInstrumentorContext,
556
+ capture_content: bool,
333
557
  ):
334
558
  if "inputTextTokenCount" in response_body:
335
559
  span.set_attribute(
336
560
  GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"]
337
561
  )
338
- if "results" in response_body and response_body["results"]:
339
- result = response_body["results"][0]
340
- if "tokenCount" in result:
341
- span.set_attribute(
342
- GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]
343
- )
344
- if "completionReason" in result:
345
- span.set_attribute(
346
- GEN_AI_RESPONSE_FINISH_REASONS,
347
- [result["completionReason"]],
562
+ if "results" in response_body and response_body["results"]:
563
+ result = response_body["results"][0]
564
+ if "tokenCount" in result:
565
+ span.set_attribute(
566
+ GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"]
567
+ )
568
+ if "completionReason" in result:
569
+ span.set_attribute(
570
+ GEN_AI_RESPONSE_FINISH_REASONS,
571
+ [result["completionReason"]],
572
+ )
573
+
574
+ event_logger = instrumentor_context.event_logger
575
+ choice = _Choice.from_invoke_amazon_titan(
576
+ response_body, capture_content
577
+ )
578
+ event_logger.emit(choice.to_choice_event())
579
+
580
+ metrics = instrumentor_context.metrics
581
+ metrics_attributes = self._extract_metrics_attributes()
582
+ if operation_duration_histogram := metrics.get(
583
+ GEN_AI_CLIENT_OPERATION_DURATION
584
+ ):
585
+ duration = max((default_timer() - self._operation_start), 0)
586
+ operation_duration_histogram.record(
587
+ duration,
588
+ attributes=metrics_attributes,
589
+ )
590
+
591
+ if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
592
+ if input_tokens := response_body.get("inputTextTokenCount"):
593
+ input_attributes = {
594
+ **metrics_attributes,
595
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
596
+ }
597
+ token_usage_histogram.record(
598
+ input_tokens, input_attributes
348
599
  )
349
600
 
350
- # pylint: disable=no-self-use
601
+ if results := response_body.get("results"):
602
+ if output_tokens := results[0].get("tokenCount"):
603
+ output_attributes = {
604
+ **metrics_attributes,
605
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
606
+ }
607
+ token_usage_histogram.record(
608
+ output_tokens, output_attributes
609
+ )
610
+
611
+ # pylint: disable=no-self-use,too-many-locals
351
612
  def _handle_amazon_nova_response(
352
- self, span: Span, response_body: dict[str, Any]
613
+ self,
614
+ span: Span,
615
+ response_body: dict[str, Any],
616
+ instrumentor_context: _BotocoreInstrumentorContext,
617
+ capture_content: bool,
353
618
  ):
354
619
  if "usage" in response_body:
355
620
  usage = response_body["usage"]
@@ -366,9 +631,48 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
366
631
  GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]]
367
632
  )
368
633
 
634
+ event_logger = instrumentor_context.event_logger
635
+ choice = _Choice.from_converse(response_body, capture_content)
636
+ event_logger.emit(choice.to_choice_event())
637
+
638
+ metrics = instrumentor_context.metrics
639
+ metrics_attributes = self._extract_metrics_attributes()
640
+ if operation_duration_histogram := metrics.get(
641
+ GEN_AI_CLIENT_OPERATION_DURATION
642
+ ):
643
+ duration = max((default_timer() - self._operation_start), 0)
644
+ operation_duration_histogram.record(
645
+ duration,
646
+ attributes=metrics_attributes,
647
+ )
648
+
649
+ if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
650
+ if usage := response_body.get("usage"):
651
+ if input_tokens := usage.get("inputTokens"):
652
+ input_attributes = {
653
+ **metrics_attributes,
654
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
655
+ }
656
+ token_usage_histogram.record(
657
+ input_tokens, input_attributes
658
+ )
659
+
660
+ if output_tokens := usage.get("outputTokens"):
661
+ output_attributes = {
662
+ **metrics_attributes,
663
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
664
+ }
665
+ token_usage_histogram.record(
666
+ output_tokens, output_attributes
667
+ )
668
+
369
669
  # pylint: disable=no-self-use
370
670
  def _handle_anthropic_claude_response(
371
- self, span: Span, response_body: dict[str, Any]
671
+ self,
672
+ span: Span,
673
+ response_body: dict[str, Any],
674
+ instrumentor_context: _BotocoreInstrumentorContext,
675
+ capture_content: bool,
372
676
  ):
373
677
  if usage := response_body.get("usage"):
374
678
  if "input_tokens" in usage:
@@ -384,7 +688,49 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
384
688
  GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]]
385
689
  )
386
690
 
387
- def on_error(self, span: Span, exception: _BotoClientErrorT):
691
+ event_logger = instrumentor_context.event_logger
692
+ choice = _Choice.from_invoke_anthropic_claude(
693
+ response_body, capture_content
694
+ )
695
+ event_logger.emit(choice.to_choice_event())
696
+
697
+ metrics = instrumentor_context.metrics
698
+ metrics_attributes = self._extract_metrics_attributes()
699
+ if operation_duration_histogram := metrics.get(
700
+ GEN_AI_CLIENT_OPERATION_DURATION
701
+ ):
702
+ duration = max((default_timer() - self._operation_start), 0)
703
+ operation_duration_histogram.record(
704
+ duration,
705
+ attributes=metrics_attributes,
706
+ )
707
+
708
+ if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
709
+ if usage := response_body.get("usage"):
710
+ if input_tokens := usage.get("input_tokens"):
711
+ input_attributes = {
712
+ **metrics_attributes,
713
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
714
+ }
715
+ token_usage_histogram.record(
716
+ input_tokens, input_attributes
717
+ )
718
+
719
+ if output_tokens := usage.get("output_tokens"):
720
+ output_attributes = {
721
+ **metrics_attributes,
722
+ GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
723
+ }
724
+ token_usage_histogram.record(
725
+ output_tokens, output_attributes
726
+ )
727
+
728
+ def on_error(
729
+ self,
730
+ span: Span,
731
+ exception: _BotoClientErrorT,
732
+ instrumentor_context: _BotocoreInstrumentorContext,
733
+ ):
388
734
  if self._call_context.operation not in self._HANDLED_OPERATIONS:
389
735
  return
390
736
 
@@ -394,3 +740,17 @@ class _BedrockRuntimeExtension(_AwsSdkExtension):
394
740
 
395
741
  if not self.should_end_span_on_exit():
396
742
  span.end()
743
+
744
+ metrics = instrumentor_context.metrics
745
+ metrics_attributes = {
746
+ **self._extract_metrics_attributes(),
747
+ ERROR_TYPE: type(exception).__qualname__,
748
+ }
749
+ if operation_duration_histogram := metrics.get(
750
+ GEN_AI_CLIENT_OPERATION_DURATION
751
+ ):
752
+ duration = max((default_timer() - self._operation_start), 0)
753
+ operation_duration_histogram.record(
754
+ duration,
755
+ attributes=metrics_attributes,
756
+ )