llama-index-llms-bedrock-converse 0.8.2__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,18 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
4
+ from typing import (
5
+ Any,
6
+ Callable,
7
+ Dict,
8
+ List,
9
+ Optional,
10
+ Sequence,
11
+ Tuple,
12
+ Literal,
13
+ Union,
14
+ )
15
+ from typing_extensions import TypedDict
5
16
  from tenacity import (
6
17
  before_sleep_log,
7
18
  retry,
@@ -20,6 +31,8 @@ from llama_index.core.base.llms.types import (
20
31
  AudioBlock,
21
32
  DocumentBlock,
22
33
  CachePoint,
34
+ ThinkingBlock,
35
+ ToolCallBlock,
23
36
  )
24
37
 
25
38
 
@@ -29,6 +42,7 @@ HUMAN_PREFIX = "\n\nHuman:"
29
42
  ASSISTANT_PREFIX = "\n\nAssistant:"
30
43
 
31
44
  BEDROCK_MODELS = {
45
+ "amazon.nova-premier-v1:0": 1000000,
32
46
  "amazon.nova-pro-v1:0": 300000,
33
47
  "amazon.nova-lite-v1:0": 300000,
34
48
  "amazon.nova-micro-v1:0": 128000,
@@ -47,7 +61,11 @@ BEDROCK_MODELS = {
47
61
  "anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
48
62
  "anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
49
63
  "anthropic.claude-opus-4-20250514-v1:0": 200000,
64
+ "anthropic.claude-opus-4-1-20250805-v1:0": 200000,
65
+ "anthropic.claude-opus-4-5-20251101-v1:0": 200000,
50
66
  "anthropic.claude-sonnet-4-20250514-v1:0": 200000,
67
+ "anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
68
+ "anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
51
69
  "ai21.j2-mid-v1": 8192,
52
70
  "ai21.j2-ultra-v1": 8192,
53
71
  "cohere.command-text-v14": 4096,
@@ -80,6 +98,7 @@ BEDROCK_MODELS = {
80
98
  }
81
99
 
82
100
  BEDROCK_FUNCTION_CALLING_MODELS = (
101
+ "amazon.nova-premier-v1:0",
83
102
  "amazon.nova-pro-v1:0",
84
103
  "amazon.nova-lite-v1:0",
85
104
  "amazon.nova-micro-v1:0",
@@ -91,7 +110,11 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
91
110
  "anthropic.claude-3-5-haiku-20241022-v1:0",
92
111
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
93
112
  "anthropic.claude-opus-4-20250514-v1:0",
113
+ "anthropic.claude-opus-4-1-20250805-v1:0",
114
+ "anthropic.claude-opus-4-5-20251101-v1:0",
94
115
  "anthropic.claude-sonnet-4-20250514-v1:0",
116
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
117
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
95
118
  "cohere.command-r-v1:0",
96
119
  "cohere.command-r-plus-v1:0",
97
120
  "mistral.mistral-large-2402-v1:0",
@@ -108,6 +131,7 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
108
131
  )
109
132
 
110
133
  BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
134
+ "amazon.nova-premier-v1:0",
111
135
  "amazon.nova-pro-v1:0",
112
136
  "amazon.nova-lite-v1:0",
113
137
  "amazon.nova-micro-v1:0",
@@ -119,7 +143,11 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
119
143
  "anthropic.claude-3-5-haiku-20241022-v1:0",
120
144
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
121
145
  "anthropic.claude-opus-4-20250514-v1:0",
146
+ "anthropic.claude-opus-4-1-20250805-v1:0",
147
+ "anthropic.claude-opus-4-5-20251101-v1:0",
122
148
  "anthropic.claude-sonnet-4-20250514-v1:0",
149
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
150
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
123
151
  "meta.llama3-1-8b-instruct-v1:0",
124
152
  "meta.llama3-1-70b-instruct-v1:0",
125
153
  "meta.llama3-2-1b-instruct-v1:0",
@@ -131,12 +159,43 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
131
159
  "meta.llama4-scout-17b-instruct-v1:0",
132
160
  "deepseek.r1-v1:0",
133
161
  )
162
+ BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
163
+ "anthropic.claude-3-5-sonnet-20241022-v2:0",
164
+ "anthropic.claude-3-5-haiku-20241022-v1:0",
165
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
166
+ "anthropic.claude-opus-4-20250514-v1:0",
167
+ "anthropic.claude-opus-4-1-20250805-v1:0",
168
+ "anthropic.claude-opus-4-5-20251101-v1:0",
169
+ "anthropic.claude-sonnet-4-20250514-v1:0",
170
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
171
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
172
+ "amazon.nova-premier-v1:0",
173
+ "amazon.nova-pro-v1:0",
174
+ "amazon.nova-lite-v1:0",
175
+ "amazon.nova-micro-v1:0",
176
+ )
177
+
178
+ BEDROCK_REASONING_MODELS = (
179
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
180
+ "anthropic.claude-opus-4-20250514-v1:0",
181
+ "anthropic.claude-opus-4-1-20250805-v1:0",
182
+ "anthropic.claude-opus-4-5-20251101-v1:0",
183
+ "anthropic.claude-sonnet-4-20250514-v1:0",
184
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
185
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
186
+ "deepseek.r1-v1:0",
187
+ )
188
+
189
+
190
+ def is_reasoning(model_name: str) -> bool:
191
+ model_name = get_model_name(model_name)
192
+ return model_name in BEDROCK_REASONING_MODELS
134
193
 
135
194
 
136
195
  def get_model_name(model_name: str) -> str:
137
196
  """Extract base model name from region-prefixed model identifier."""
138
- # Check for region prefixes (us, eu, apac)
139
- REGION_PREFIXES = ["us.", "eu.", "apac."]
197
+ # Check for region prefixes (us, eu, apac, jp, global)
198
+ REGION_PREFIXES = ["us.", "eu.", "apac.", "jp.", "global."]
140
199
 
141
200
  # If no region prefix, return the original model name
142
201
  if not any(prefix in model_name for prefix in REGION_PREFIXES):
@@ -159,6 +218,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
159
218
  return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
160
219
 
161
220
 
221
+ def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
222
+ return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
223
+
224
+
162
225
  def bedrock_modelname_to_context_size(model_name: str) -> int:
163
226
  translated_model_name = get_model_name(model_name)
164
227
 
@@ -197,6 +260,22 @@ def _content_block_to_bedrock_format(
197
260
  return {
198
261
  "text": block.text,
199
262
  }
263
+ elif isinstance(block, ThinkingBlock):
264
+ if block.content:
265
+ thinking_data = {
266
+ "reasoningContent": {"reasoningText": {"text": block.content}}
267
+ }
268
+ if (
269
+ "signature" in block.additional_information
270
+ and block.additional_information["signature"]
271
+ ):
272
+ thinking_data["reasoningContent"]["reasoningText"]["signature"] = (
273
+ block.additional_information["signature"]
274
+ )
275
+
276
+ return thinking_data
277
+ else:
278
+ return None
200
279
  elif isinstance(block, DocumentBlock):
201
280
  if not block.data:
202
281
  file_buffer = block.resolve_document()
@@ -230,6 +309,23 @@ def _content_block_to_bedrock_format(
230
309
  elif isinstance(block, AudioBlock):
231
310
  logger.warning("Audio blocks are not supported in Bedrock Converse API.")
232
311
  return None
312
+ elif isinstance(block, ToolCallBlock):
313
+ if isinstance(block.tool_kwargs, str):
314
+ try:
315
+ tool_input = json.loads(block.tool_kwargs or "{}")
316
+ except json.JSONDecodeError:
317
+ tool_input = {}
318
+ else:
319
+ tool_input = block.tool_kwargs
320
+
321
+ return {
322
+ "toolUse": {
323
+ "input": tool_input,
324
+ "toolUseId": block.tool_call_id or "",
325
+ "name": block.tool_name,
326
+ }
327
+ }
328
+
233
329
  else:
234
330
  logger.warning(f"Unsupported block type: {type(block)}")
235
331
  return None
@@ -254,12 +350,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
254
350
 
255
351
  def messages_to_converse_messages(
256
352
  messages: Sequence[ChatMessage],
257
- ) -> Tuple[Sequence[Dict[str, Any]], str]:
353
+ model: Optional[str] = None,
354
+ ) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
258
355
  """
259
356
  Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
260
357
 
261
358
  Args:
262
359
  messages: List of ChatMessages
360
+ model: optional model name used to omit cache point if the model does not support it
263
361
 
264
362
  Returns:
265
363
  Tuple of:
@@ -268,10 +366,42 @@ def messages_to_converse_messages(
268
366
 
269
367
  """
270
368
  converse_messages = []
271
- system_prompt = ""
369
+ system_prompt = []
370
+ current_system_prompt = ""
371
+
272
372
  for message in messages:
273
- if message.role == MessageRole.SYSTEM and message.content:
274
- system_prompt += (message.content) + "\n"
373
+ unique_tool_calls = []
374
+ if message.role == MessageRole.SYSTEM:
375
+ # we iterate over blocks, if content was used, the blocks are added anyway
376
+ for block in message.blocks:
377
+ if isinstance(block, TextBlock):
378
+ if block.text: # Only add non-empty text
379
+ current_system_prompt += block.text + "\n"
380
+
381
+ elif isinstance(block, CachePoint):
382
+ # when we find a cache point we push the current system prompt as a message
383
+ if current_system_prompt != "":
384
+ system_prompt.append({"text": current_system_prompt.strip()})
385
+ current_system_prompt = ""
386
+ # we add the cache point
387
+ if (
388
+ model is None
389
+ or model is not None
390
+ and is_bedrock_prompt_caching_supported_model(model)
391
+ ):
392
+ if block.cache_control.type != "default":
393
+ logger.warning(
394
+ "The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
395
+ )
396
+ block.cache_control.type = "default"
397
+ system_prompt.append(
398
+ {"cachePoint": {"type": block.cache_control.type}}
399
+ )
400
+ else:
401
+ logger.warning(
402
+ f"Model {model} does not support prompt caching, cache point will be ignored..."
403
+ )
404
+
275
405
  elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
276
406
  # convert tool output to the AWS Bedrock Converse format
277
407
  content = {
@@ -297,6 +427,13 @@ def messages_to_converse_messages(
297
427
  )
298
428
  if bedrock_format_block:
299
429
  content.append(bedrock_format_block)
430
+ if "toolUse" in bedrock_format_block:
431
+ unique_tool_calls.append(
432
+ (
433
+ bedrock_format_block["toolUse"]["toolUseId"],
434
+ bedrock_format_block["toolUse"]["name"],
435
+ )
436
+ )
300
437
 
301
438
  if content:
302
439
  converse_messages.append(
@@ -306,6 +443,7 @@ def messages_to_converse_messages(
306
443
  }
307
444
  )
308
445
 
446
+ # keep this code here for compatibility with older chat histories
309
447
  # convert tool calls to the AWS Bedrock Converse format
310
448
  # NOTE tool calls might show up within any message,
311
449
  # e.g. within assistant message or in consecutive tool calls,
@@ -313,25 +451,28 @@ def messages_to_converse_messages(
313
451
  tool_calls = message.additional_kwargs.get("tool_calls", [])
314
452
  content = []
315
453
  for tool_call in tool_calls:
316
- assert "toolUseId" in tool_call, f"`toolUseId` not found in {tool_call}"
317
- assert "input" in tool_call, f"`input` not found in {tool_call}"
318
- assert "name" in tool_call, f"`name` not found in {tool_call}"
319
- tool_input = tool_call["input"] if tool_call["input"] else {}
320
- if isinstance(tool_input, str):
321
- try:
322
- tool_input = json.loads(tool_input or "{}")
323
- except json.JSONDecodeError:
324
- tool_input = {}
325
-
326
- content.append(
327
- {
328
- "toolUse": {
329
- "input": tool_input,
330
- "toolUseId": tool_call["toolUseId"],
331
- "name": tool_call["name"],
332
- }
333
- }
334
- )
454
+ try:
455
+ assert "toolUseId" in tool_call
456
+ assert "input" in tool_call
457
+ assert "name" in tool_call
458
+ if (tool_call["toolUseId"], tool_call["name"]) not in unique_tool_calls:
459
+ tool_input = tool_call["input"] if tool_call["input"] else {}
460
+ if isinstance(tool_input, str):
461
+ try:
462
+ tool_input = json.loads(tool_input or "{}")
463
+ except json.JSONDecodeError:
464
+ tool_input = {}
465
+ content.append(
466
+ {
467
+ "toolUse": {
468
+ "input": tool_input,
469
+ "toolUseId": tool_call["toolUseId"],
470
+ "name": tool_call["name"],
471
+ }
472
+ }
473
+ )
474
+ except AssertionError:
475
+ continue
335
476
  if len(content) > 0:
336
477
  converse_messages.append(
337
478
  {
@@ -339,14 +480,17 @@ def messages_to_converse_messages(
339
480
  "content": content,
340
481
  }
341
482
  )
342
-
343
- return __merge_common_role_msgs(converse_messages), system_prompt.strip()
483
+ if current_system_prompt != "":
484
+ system_prompt.append({"text": current_system_prompt.strip()})
485
+ return __merge_common_role_msgs(converse_messages), system_prompt
344
486
 
345
487
 
346
488
  def tools_to_converse_tools(
347
489
  tools: List["BaseTool"],
348
490
  tool_choice: Optional[dict] = None,
349
491
  tool_required: bool = False,
492
+ tool_caching: bool = False,
493
+ supports_forced_tool_calls: bool = True,
350
494
  ) -> Dict[str, Any]:
351
495
  """
352
496
  Converts a list of tools to AWS Bedrock Converse tools.
@@ -371,18 +515,35 @@ def tools_to_converse_tools(
371
515
  "inputSchema": {"json": tool.metadata.get_parameters_dict()},
372
516
  }
373
517
  converse_tools.append({"toolSpec": tool_dict})
518
+
519
+ if tool_caching:
520
+ converse_tools.append({"cachePoint": {"type": "default"}})
521
+
522
+ if tool_choice:
523
+ tool_choice = tool_choice
524
+ elif supports_forced_tool_calls and tool_required:
525
+ tool_choice = {"any": {}}
526
+ else:
527
+ tool_choice = {"auto": {}}
528
+
374
529
  return {
375
530
  "tools": converse_tools,
376
531
  # https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
377
532
  # e.g. { "auto": {} }
378
- "toolChoice": tool_choice or ({"any": {}} if tool_required else {"auto": {}}),
533
+ "toolChoice": tool_choice,
379
534
  }
380
535
 
381
536
 
382
537
  def force_single_tool_call(response: ChatResponse) -> None:
383
- tool_calls = response.message.additional_kwargs.get("tool_calls", [])
538
+ tool_calls = [
539
+ block for block in response.message.blocks if isinstance(block, ToolCallBlock)
540
+ ]
384
541
  if len(tool_calls) > 1:
385
- response.message.additional_kwargs["tool_calls"] = [tool_calls[0]]
542
+ response.message.blocks = [
543
+ block
544
+ for block in response.message.blocks
545
+ if not isinstance(block, ToolCallBlock)
546
+ ] + [tool_calls[0]]
386
547
 
387
548
 
388
549
  def _create_retry_decorator(client: Any, max_retries: int) -> Callable[[Any], Any]:
@@ -436,12 +597,15 @@ def converse_with_retry(
436
597
  model: str,
437
598
  messages: Sequence[Dict[str, Any]],
438
599
  max_retries: int = 3,
439
- system_prompt: Optional[str] = None,
600
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
601
+ system_prompt_caching: bool = False,
602
+ tool_caching: bool = False,
440
603
  max_tokens: int = 1000,
441
604
  temperature: float = 0.1,
442
605
  stream: bool = False,
443
606
  guardrail_identifier: Optional[str] = None,
444
607
  guardrail_version: Optional[str] = None,
608
+ guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
445
609
  trace: Optional[str] = None,
446
610
  **kwargs: Any,
447
611
  ) -> Any:
@@ -455,32 +619,62 @@ def converse_with_retry(
455
619
  "temperature": temperature,
456
620
  },
457
621
  }
622
+ if "thinking" in kwargs:
623
+ converse_kwargs["additionalModelRequestFields"] = {
624
+ "thinking": kwargs["thinking"]
625
+ }
458
626
  if system_prompt:
459
- converse_kwargs["system"] = [{"text": system_prompt}]
627
+ if isinstance(system_prompt, str):
628
+ # if the system prompt is a simple text (for retro compatibility)
629
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
630
+ else:
631
+ system_messages: list[dict[str, Any]] = system_prompt
632
+ if (
633
+ system_prompt_caching
634
+ and len(system_messages) > 0
635
+ and system_messages[-1].get("cachePoint", None) is None
636
+ ):
637
+ # "Adding cache point to system prompt if not present"
638
+ system_messages.append({"cachePoint": {"type": "default"}})
639
+ converse_kwargs["system"] = system_messages
460
640
  if tool_config := kwargs.get("tools"):
461
641
  converse_kwargs["toolConfig"] = tool_config
642
+
462
643
  if guardrail_identifier and guardrail_version:
463
644
  converse_kwargs["guardrailConfig"] = {}
464
645
  converse_kwargs["guardrailConfig"]["guardrailIdentifier"] = guardrail_identifier
465
646
  converse_kwargs["guardrailConfig"]["guardrailVersion"] = guardrail_version
466
647
  if trace:
467
648
  converse_kwargs["guardrailConfig"]["trace"] = trace
649
+ if guardrail_stream_processing_mode and stream:
650
+ converse_kwargs["guardrailConfig"]["streamProcessingMode"] = (
651
+ guardrail_stream_processing_mode
652
+ )
653
+
468
654
  converse_kwargs = join_two_dicts(
469
655
  converse_kwargs,
470
656
  {
471
657
  k: v
472
658
  for k, v in kwargs.items()
473
- if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
659
+ if k
660
+ not in [
661
+ "tools",
662
+ "guardrail_identifier",
663
+ "guardrail_version",
664
+ "trace",
665
+ "thinking",
666
+ ]
474
667
  },
475
668
  )
476
669
 
477
670
  @retry_decorator
478
- def _conversion_with_retry(**kwargs: Any) -> Any:
671
+ def _converse_with_retry(**kwargs: Any) -> Any:
479
672
  if stream:
480
673
  return client.converse_stream(**kwargs)
481
- return client.converse(**kwargs)
674
+ else:
675
+ return client.converse(**kwargs)
482
676
 
483
- return _conversion_with_retry(**converse_kwargs)
677
+ return _converse_with_retry(**converse_kwargs)
484
678
 
485
679
 
486
680
  async def converse_with_retry_async(
@@ -489,12 +683,15 @@ async def converse_with_retry_async(
489
683
  model: str,
490
684
  messages: Sequence[Dict[str, Any]],
491
685
  max_retries: int = 3,
492
- system_prompt: Optional[str] = None,
686
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
687
+ system_prompt_caching: bool = False,
688
+ tool_caching: bool = False,
493
689
  max_tokens: int = 1000,
494
690
  temperature: float = 0.1,
495
691
  stream: bool = False,
496
692
  guardrail_identifier: Optional[str] = None,
497
693
  guardrail_version: Optional[str] = None,
694
+ guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
498
695
  trace: Optional[str] = None,
499
696
  boto_client_kwargs: Optional[Dict[str, Any]] = None,
500
697
  **kwargs: Any,
@@ -509,22 +706,55 @@ async def converse_with_retry_async(
509
706
  "temperature": temperature,
510
707
  },
511
708
  }
709
+ if "thinking" in kwargs:
710
+ converse_kwargs["additionalModelRequestFields"] = {
711
+ "thinking": kwargs["thinking"]
712
+ }
713
+
512
714
  if system_prompt:
513
- converse_kwargs["system"] = [{"text": system_prompt}]
715
+ if isinstance(system_prompt, str):
716
+ # if the system prompt is a simple text (for retro compatibility)
717
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
718
+ else:
719
+ system_messages: list[dict[str, Any]] = system_prompt
720
+ if (
721
+ system_prompt_caching
722
+ and len(system_messages) > 0
723
+ and system_messages[-1].get("cachePoint", None) is None
724
+ ):
725
+ # "Adding cache point to system prompt if not present"
726
+ system_messages.append({"cachePoint": {"type": "default"}})
727
+ converse_kwargs["system"] = system_messages
728
+
514
729
  if tool_config := kwargs.get("tools"):
515
730
  converse_kwargs["toolConfig"] = tool_config
731
+ if tool_caching and "tools" in converse_kwargs["toolConfig"]:
732
+ converse_kwargs["toolConfig"]["tools"].append(
733
+ {"cachePoint": {"type": "default"}}
734
+ )
516
735
  if guardrail_identifier and guardrail_version:
517
736
  converse_kwargs["guardrailConfig"] = {}
518
737
  converse_kwargs["guardrailConfig"]["guardrailIdentifier"] = guardrail_identifier
519
738
  converse_kwargs["guardrailConfig"]["guardrailVersion"] = guardrail_version
520
739
  if trace:
521
740
  converse_kwargs["guardrailConfig"]["trace"] = trace
741
+ if guardrail_stream_processing_mode and stream:
742
+ converse_kwargs["guardrailConfig"]["streamProcessingMode"] = (
743
+ guardrail_stream_processing_mode
744
+ )
522
745
  converse_kwargs = join_two_dicts(
523
746
  converse_kwargs,
524
747
  {
525
748
  k: v
526
749
  for k, v in kwargs.items()
527
- if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
750
+ if k
751
+ not in [
752
+ "tools",
753
+ "guardrail_identifier",
754
+ "guardrail_version",
755
+ "trace",
756
+ "thinking",
757
+ ]
528
758
  },
529
759
  )
530
760
  _boto_client_kwargs = {}
@@ -590,3 +820,8 @@ def join_two_dicts(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, An
590
820
  else:
591
821
  new_dict[key] += value
592
822
  return new_dict
823
+
824
+
825
+ class ThinkingDict(TypedDict):
826
+ type: Literal["enabled"]
827
+ budget_tokens: int
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-llms-bedrock-converse
3
- Version: 0.8.2
3
+ Version: 0.12.3
4
4
  Summary: llama-index llms bedrock converse integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
- Requires-Dist: aioboto3<16,>=13.1.1
10
- Requires-Dist: boto3<2,>=1.34.122
11
- Requires-Dist: llama-index-core<0.14,>=0.13.0
9
+ Requires-Dist: aioboto3<16,>=15.0.0
10
+ Requires-Dist: boto3<2,>=1.38.27
11
+ Requires-Dist: llama-index-core<0.15,>=0.14.5
12
12
  Description-Content-Type: text/markdown
13
13
 
14
14
  # LlamaIndex Llms Integration: Bedrock Converse
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
220
220
  print(resp)
221
221
  ```
222
222
 
223
+ ### Prompt Caching System and regular messages
224
+
225
+ You can cache normal and system messages by placing cache points strategically:
226
+
227
+ ```py
228
+ from llama_index.core.llms import ChatMessage
229
+ from llama_index.core.base.llms.types import (
230
+ TextBlock,
231
+ CacheControl,
232
+ CachePoint,
233
+ MessageRole,
234
+ )
235
+
236
+ # Cache expensive context but keep dynamic instructions uncached
237
+ cached_context = (
238
+ """[Large context about company policies, knowledge base, etc...]"""
239
+ )
240
+ dynamic_instructions = (
241
+ "Today's date is 2024-01-15. Focus on recent developments."
242
+ )
243
+ document_text = "[Long document]"
244
+ messages = [
245
+ ChatMessage(
246
+ role=MessageRole.SYSTEM,
247
+ blocks=[
248
+ TextBlock(text=cached_context),
249
+ CachePoint(cache_control=CacheControl(type="default")),
250
+ TextBlock(text=dynamic_instructions),
251
+ ],
252
+ ),
253
+ ChatMessage(
254
+ role=MessageRole.USER,
255
+ blocks=[
256
+ TextBlock(
257
+ text=f"{document_text}",
258
+ type="text",
259
+ ),
260
+ CachePoint(cache_control=CacheControl(type="default")),
261
+ TextBlock(
262
+ text="What's our current policy on remote work?",
263
+ type="text",
264
+ ),
265
+ ],
266
+ ),
267
+ ]
268
+
269
+ response = llm.chat(messages)
270
+ ```
271
+
223
272
  ### LLM Implementation example
224
273
 
225
274
  https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
@@ -0,0 +1,7 @@
1
+ llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
+ llama_index/llms/bedrock_converse/base.py,sha256=xKveT_9f_O0LNq4eHl3KntdN2ADoXf7xIUAxWgg5ASc,46145
3
+ llama_index/llms/bedrock_converse/utils.py,sha256=dP24P7SU9l6NiRC4jbBRjnsPoSwEBXDBMzqwSAgSMX0,30001
4
+ llama_index_llms_bedrock_converse-0.12.3.dist-info/METADATA,sha256=249BYMoiTZFGMoBfcYWycS89IzF-WqbjcIN6hqlNKFY,7834
5
+ llama_index_llms_bedrock_converse-0.12.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ llama_index_llms_bedrock_converse-0.12.3.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
+ llama_index_llms_bedrock_converse-0.12.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,7 +0,0 @@
1
- llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
- llama_index/llms/bedrock_converse/base.py,sha256=TVamvIWu2LVURhO8o7CDZfikuu9ulbJYLBGTV4ku-Es,33802
3
- llama_index/llms/bedrock_converse/utils.py,sha256=ZmYMUWJjW8Ln3CpQ7Gwvc0X0BCKPzK46LffME9BKg5g,21163
4
- llama_index_llms_bedrock_converse-0.8.2.dist-info/METADATA,sha256=KiEZO591tVTfiJpLSWMOd4Eqv3VLMXunE41nh7Nyt3I,6563
5
- llama_index_llms_bedrock_converse-0.8.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- llama_index_llms_bedrock_converse-0.8.2.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
- llama_index_llms_bedrock_converse-0.8.2.dist-info/RECORD,,