lex-llm-bedrock 0.3.10 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/extensions/llm/bedrock/provider.rb +140 -20
- data/lib/legion/extensions/llm/bedrock/version.rb +1 -1
- data/lib/legion/extensions/llm/bedrock.rb +4 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 52fa9adf70213041c0b2e6e96b44ff50b64220ae09f9cdab5619808f725df914
|
|
4
|
+
data.tar.gz: 12b5b4b31b712be0f0831e5e37dd045e6ca89442296bc645100c05cb41f4f8e0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3519d77bffac0e5ea90805fe64cc095f831e6069bc21bf93ffa3e0441977560ebd556b8d2c82b5fad163301c3a78c54e5efdf6bc8060f0ad4935cdce07ab2cfd
|
|
7
|
+
data.tar.gz: 96f1a2d3d394e17b0a517a3523445d027b9a11819785c889b2aae483bfd97753d8b7926aeadaab827330831525a3c86e3b4a3e2d64462b6d35113d359431e7fa
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.11 - 2026-05-31
|
|
4
|
+
|
|
5
|
+
### Security
|
|
6
|
+
- **BEDROCK-CRED-01**: Static AWS credentials now emit a deprecation warning. New setting `security.block_static_aws_credentials=true` rejects them entirely, forcing IAM role-based authentication.
|
|
7
|
+
|
|
8
|
+
### Fixed
|
|
9
|
+
- **TRANSLATION-BUG-07**: Bedrock streaming now preserves thinking (chain-of-thought) blocks in the final `Message`. Previously CoT was accumulated by the wire handler but silently dropped from the returned response.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **PROMPT-CACHE-01**: System blocks, tool definitions, and early conversation messages (first 4, never the last) now include `cache_control: { type: "cache_control" }` markers for Anthropic prompt caching via Bedrock Converse.
|
|
13
|
+
- **PROMPT-CACHE-02**: Response parser extracts `cached_input_tokens` (`cache_read_input_tokens`) and `cache_creation_tokens` (`cache_creation_input_tokens`) from Bedrock usage metadata into `Message#cached_tokens` and `Message#cache_creation_tokens`.
|
|
14
|
+
|
|
3
15
|
## 0.3.10 - 2026-05-21
|
|
4
16
|
|
|
5
17
|
- Add `default_transport`/`default_tier` class declarations, remove `configured_transport`/`configured_tier`
|
|
@@ -77,11 +77,24 @@ module Legion
|
|
|
77
77
|
|
|
78
78
|
INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
|
|
79
79
|
|
|
80
|
-
def inference_profile_id(model)
|
|
80
|
+
def inference_profile_id(model, region: nil)
|
|
81
81
|
return model if model.start_with?('us.', 'eu.', 'ap.', 'arn:')
|
|
82
82
|
return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
prefix = region ? region_prefix(region) : 'us'
|
|
85
|
+
"#{prefix}.#{model}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Region-based inference profile prefix mapping.
|
|
89
|
+
# Bare model IDs (e.g. anthropic.claude-sonnet-4) get the region prefix.
|
|
90
|
+
REGION_PREFIX = {
|
|
91
|
+
'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
|
|
92
|
+
'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
|
|
93
|
+
'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
|
|
94
|
+
}.freeze
|
|
95
|
+
|
|
96
|
+
def region_prefix(region)
|
|
97
|
+
REGION_PREFIX.fetch(region.to_s, 'us')
|
|
85
98
|
end
|
|
86
99
|
end
|
|
87
100
|
|
|
@@ -234,7 +247,7 @@ module Legion
|
|
|
234
247
|
log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
|
|
235
248
|
request = Utils.deep_merge(
|
|
236
249
|
{
|
|
237
|
-
model_id: self.class.inference_profile_id(model_id(model)),
|
|
250
|
+
model_id: self.class.inference_profile_id(model_id(model), region: region),
|
|
238
251
|
input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
|
|
239
252
|
},
|
|
240
253
|
params
|
|
@@ -350,22 +363,48 @@ module Legion
|
|
|
350
363
|
ctx ? { context_window: ctx } : nil
|
|
351
364
|
end
|
|
352
365
|
|
|
353
|
-
def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:)
|
|
366
|
+
def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil)
|
|
354
367
|
{
|
|
355
|
-
model_id: self.class.inference_profile_id(model_id(model)),
|
|
368
|
+
model_id: self.class.inference_profile_id(model_id(model), region: region),
|
|
356
369
|
messages: format_messages(messages.reject { |message| message.role == :system }),
|
|
357
370
|
system: format_system(messages),
|
|
358
371
|
inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
|
|
359
|
-
tool_config: format_tool_config(tools, tool_prefs)
|
|
372
|
+
tool_config: format_tool_config(tools, tool_prefs),
|
|
373
|
+
guardrail_config: guardrail_config
|
|
360
374
|
}.compact
|
|
361
375
|
end
|
|
362
376
|
|
|
363
377
|
def format_messages(messages)
|
|
364
|
-
messages.
|
|
365
|
-
|
|
378
|
+
total = messages.size
|
|
379
|
+
messages.filter_map.with_index do |message, idx|
|
|
380
|
+
blocks = message.role == :tool ? tool_result_blocks(message) : content_blocks(message.content)
|
|
366
381
|
next if blocks.empty?
|
|
367
382
|
|
|
368
|
-
|
|
383
|
+
cache_blocks = should_cache_message?(idx, total) ? add_cache_control_to_blocks(blocks) : blocks
|
|
384
|
+
{ role: bedrock_role(message.role), content: cache_blocks }
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def tool_result_blocks(message)
|
|
389
|
+
return [] unless message.tool_result?
|
|
390
|
+
|
|
391
|
+
[{
|
|
392
|
+
type: 'tool_result',
|
|
393
|
+
tool_use: { tool_use_id: message.tool_call_id },
|
|
394
|
+
content: [{ type: 'text', text: message.tool_results.to_s }]
|
|
395
|
+
}]
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def should_cache_message?(index, total)
|
|
399
|
+
# Cache first 4 messages, never the last message
|
|
400
|
+
return false if index == total - 1
|
|
401
|
+
|
|
402
|
+
index < 4
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def add_cache_control_to_blocks(blocks)
|
|
406
|
+
blocks.map do |block|
|
|
407
|
+
block.dup.merge(cache_control: { type: 'cache_control' })
|
|
369
408
|
end
|
|
370
409
|
end
|
|
371
410
|
|
|
@@ -378,7 +417,7 @@ module Legion
|
|
|
378
417
|
def system_blocks(system)
|
|
379
418
|
return nil if system.to_s.empty?
|
|
380
419
|
|
|
381
|
-
[{ text: system }]
|
|
420
|
+
[{ text: system, cache_control: { type: 'cache_control' } }]
|
|
382
421
|
end
|
|
383
422
|
|
|
384
423
|
def bedrock_role(role)
|
|
@@ -389,12 +428,45 @@ module Legion
|
|
|
389
428
|
raw = raw_content(content)
|
|
390
429
|
return raw if raw
|
|
391
430
|
|
|
431
|
+
return image_blocks(content) if content.respond_to?(:attachments) && !content.attachments.empty?
|
|
432
|
+
|
|
392
433
|
text = content_text(content)
|
|
393
434
|
return [] if text.strip.empty?
|
|
394
435
|
|
|
395
436
|
[{ text: text }]
|
|
396
437
|
end
|
|
397
438
|
|
|
439
|
+
def image_blocks(content)
|
|
440
|
+
blocks = []
|
|
441
|
+
text = content_text(content)
|
|
442
|
+
blocks << { text: text } if text.strip.present?
|
|
443
|
+
|
|
444
|
+
content.attachments.each do |attachment|
|
|
445
|
+
if attachment.is_a?(Legion::Extensions::Llm::Content::ImageAttachment)
|
|
446
|
+
blocks << format_image_attachment(attachment)
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
blocks
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
def format_image_attachment(attachment)
|
|
453
|
+
{
|
|
454
|
+
image: {
|
|
455
|
+
format: image_format(attachment.format),
|
|
456
|
+
source: { bytes: attachment.data }
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def image_format(fmt)
|
|
462
|
+
case fmt.to_s.downcase
|
|
463
|
+
when 'jpeg', 'jpg' then 'jpeg'
|
|
464
|
+
when 'png' then 'png'
|
|
465
|
+
when 'gif' then 'gif'
|
|
466
|
+
when 'webp' then 'webp'
|
|
467
|
+
end || 'jpeg'
|
|
468
|
+
end
|
|
469
|
+
|
|
398
470
|
def raw_content(content)
|
|
399
471
|
return nil unless content.is_a?(Legion::Extensions::Llm::Content::Raw)
|
|
400
472
|
|
|
@@ -414,7 +486,14 @@ module Legion
|
|
|
414
486
|
"bedrock.provider.tools: formatting tools=#{tools.keys.map(&:to_s).sort.join(',')} " \
|
|
415
487
|
"tool_choice=#{tool_choice_label(tool_prefs)}"
|
|
416
488
|
end
|
|
417
|
-
{
|
|
489
|
+
{
|
|
490
|
+
tools: tools.values.map { |tool| tool_definition_with_cache(tool) },
|
|
491
|
+
tool_choice: tool_choice(tool_prefs)
|
|
492
|
+
}.compact
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def tool_definition_with_cache(tool)
|
|
496
|
+
tool_definition(tool).merge(cache_control: { type: 'cache_control' })
|
|
418
497
|
end
|
|
419
498
|
|
|
420
499
|
def tool_definition(tool)
|
|
@@ -465,26 +544,33 @@ module Legion
|
|
|
465
544
|
tool_calls: parse_tool_calls(value(message, :content)),
|
|
466
545
|
input_tokens: value(usage, :input_tokens),
|
|
467
546
|
output_tokens: value(usage, :output_tokens),
|
|
547
|
+
cached_tokens: cache_read_tokens(usage),
|
|
548
|
+
cache_creation_tokens: cache_write_tokens(usage),
|
|
468
549
|
raw: normalize_response(response)
|
|
469
550
|
)
|
|
470
551
|
end
|
|
471
552
|
|
|
472
553
|
def stream_converse(request, fallback_model)
|
|
473
|
-
state = { accumulated: +'',
|
|
554
|
+
state = { accumulated: +'', thinking: +'', final_usage: nil, stop_reason: nil,
|
|
555
|
+
tool_use_blocks: [], current_tool_use: nil, in_thinking: false }
|
|
474
556
|
|
|
475
557
|
runtime_client.converse_stream(**request) do |stream|
|
|
476
558
|
wire_stream_handlers(stream, state, fallback_model) { |chunk| yield chunk if block_given? }
|
|
477
559
|
end
|
|
478
560
|
|
|
479
|
-
|
|
561
|
+
msg_attrs = {
|
|
480
562
|
role: :assistant,
|
|
481
563
|
content: state[:accumulated],
|
|
482
564
|
model_id: fallback_model,
|
|
483
565
|
tool_calls: build_stream_tool_calls(state[:tool_use_blocks]),
|
|
484
566
|
input_tokens: value(state[:final_usage], :input_tokens),
|
|
485
567
|
output_tokens: value(state[:final_usage], :output_tokens),
|
|
568
|
+
cached_tokens: cache_read_tokens(state[:final_usage]),
|
|
569
|
+
cache_creation_tokens: cache_write_tokens(state[:final_usage]),
|
|
486
570
|
stop_reason: state[:stop_reason]
|
|
487
|
-
|
|
571
|
+
}
|
|
572
|
+
msg_attrs[:thinking] = state[:thinking] unless state[:thinking].empty?
|
|
573
|
+
Legion::Extensions::Llm::Message.new(**msg_attrs)
|
|
488
574
|
end
|
|
489
575
|
|
|
490
576
|
def wire_stream_handlers(stream, state, fallback_model, &)
|
|
@@ -500,6 +586,13 @@ module Legion
|
|
|
500
586
|
|
|
501
587
|
stream.on_content_block_start_event do |event|
|
|
502
588
|
start = value(event, :start)
|
|
589
|
+
|
|
590
|
+
if value(start, :thinking)
|
|
591
|
+
state[:in_thinking] = true
|
|
592
|
+
next
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
state[:in_thinking] = false
|
|
503
596
|
tool_start = value(start, :tool_use) if start
|
|
504
597
|
next unless tool_start
|
|
505
598
|
|
|
@@ -516,10 +609,14 @@ module Legion
|
|
|
516
609
|
delta = value(event, :delta)
|
|
517
610
|
text = value(delta, :text)
|
|
518
611
|
if text
|
|
519
|
-
state[:
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
612
|
+
if state[:in_thinking]
|
|
613
|
+
state[:thinking] << text
|
|
614
|
+
else
|
|
615
|
+
state[:accumulated] << text
|
|
616
|
+
if block_given?
|
|
617
|
+
yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
|
|
618
|
+
model_id: fallback_model)
|
|
619
|
+
end
|
|
523
620
|
end
|
|
524
621
|
end
|
|
525
622
|
|
|
@@ -565,6 +662,18 @@ module Legion
|
|
|
565
662
|
end
|
|
566
663
|
end
|
|
567
664
|
|
|
665
|
+
def cache_read_tokens(usage)
|
|
666
|
+
return nil if usage.nil?
|
|
667
|
+
|
|
668
|
+
value(usage, :cache_read_input_tokens) || value(usage, 'cache_read_input_tokens')
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def cache_write_tokens(usage)
|
|
672
|
+
return nil if usage.nil?
|
|
673
|
+
|
|
674
|
+
value(usage, :cache_creation_input_tokens) || value(usage, 'cache_creation_input_tokens')
|
|
675
|
+
end
|
|
676
|
+
|
|
568
677
|
def parse_embedding_response(response, model:)
|
|
569
678
|
body = parse_body(value(response, :body))
|
|
570
679
|
vectors = body['embedding'] || body['embeddings'] || body.dig('data', 0, 'embedding')
|
|
@@ -591,11 +700,11 @@ module Legion
|
|
|
591
700
|
end
|
|
592
701
|
|
|
593
702
|
def bedrock_client
|
|
594
|
-
Aws::Bedrock::Client.new(client_options)
|
|
703
|
+
@bedrock_client ||= Aws::Bedrock::Client.new(client_options)
|
|
595
704
|
end
|
|
596
705
|
|
|
597
706
|
def runtime_client
|
|
598
|
-
Aws::BedrockRuntime::Client.new(client_options)
|
|
707
|
+
@runtime_client ||= Aws::BedrockRuntime::Client.new(client_options)
|
|
599
708
|
end
|
|
600
709
|
|
|
601
710
|
def client_options
|
|
@@ -622,10 +731,21 @@ module Legion
|
|
|
622
731
|
return Aws::SharedCredentials.new(profile_name: config.bedrock_profile) if config.bedrock_profile
|
|
623
732
|
return nil unless config.bedrock_access_key_id
|
|
624
733
|
|
|
734
|
+
if static_credentials_blocked?
|
|
735
|
+
raise SecurityError,
|
|
736
|
+
'Static AWS credentials are disabled (security.block_static_aws_credentials=true); use IAM roles'
|
|
737
|
+
end
|
|
738
|
+
log.warn('[bedrock] Using static AWS credentials — prefer IAM roles for production')
|
|
625
739
|
Aws::Credentials.new(config.bedrock_access_key_id, config.bedrock_secret_access_key,
|
|
626
740
|
config.bedrock_session_token)
|
|
627
741
|
end
|
|
628
742
|
|
|
743
|
+
def static_credentials_blocked?
|
|
744
|
+
return false unless defined?(::Legion::Settings)
|
|
745
|
+
|
|
746
|
+
::Legion::Settings.dig(:extensions, :llm, :security, :block_static_aws_credentials) == true
|
|
747
|
+
end
|
|
748
|
+
|
|
629
749
|
def credential_source
|
|
630
750
|
return :static if config.bedrock_access_key_id
|
|
631
751
|
return :profile if config.bedrock_profile
|
|
@@ -22,7 +22,7 @@ module Legion
|
|
|
22
22
|
::Legion::Extensions::Llm.provider_settings(
|
|
23
23
|
family: PROVIDER_FAMILY,
|
|
24
24
|
instance: {
|
|
25
|
-
default_model: '
|
|
25
|
+
default_model: 'anthropic.claude-sonnet-4',
|
|
26
26
|
region: 'us-east-1',
|
|
27
27
|
tier: :cloud,
|
|
28
28
|
transport: :aws_sdk,
|
|
@@ -74,7 +74,7 @@ module Legion
|
|
|
74
74
|
.transform_values do |config|
|
|
75
75
|
sanitized = sanitize_instance_config(config)
|
|
76
76
|
sanitized[:capabilities] ||= DEFAULT_CAPABILITIES.dup
|
|
77
|
-
sanitized[:default_model] ||= '
|
|
77
|
+
sanitized[:default_model] ||= 'anthropic.claude-sonnet-4'
|
|
78
78
|
sanitized
|
|
79
79
|
end
|
|
80
80
|
end
|
|
@@ -198,6 +198,8 @@ module Legion
|
|
|
198
198
|
end
|
|
199
199
|
|
|
200
200
|
def self.normalize_instance_config(config)
|
|
201
|
+
return {} if config.nil?
|
|
202
|
+
|
|
201
203
|
normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
202
204
|
normalized[:bedrock_region] ||= normalized.delete(:region)
|
|
203
205
|
normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
|