lex-llm-bedrock 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/legion/extensions/llm/bedrock/provider.rb +151 -35
- data/lib/legion/extensions/llm/bedrock/version.rb +1 -1
- data/lib/legion/extensions/llm/bedrock.rb +6 -3
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 52fa9adf70213041c0b2e6e96b44ff50b64220ae09f9cdab5619808f725df914
|
|
4
|
+
data.tar.gz: 12b5b4b31b712be0f0831e5e37dd045e6ca89442296bc645100c05cb41f4f8e0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3519d77bffac0e5ea90805fe64cc095f831e6069bc21bf93ffa3e0441977560ebd556b8d2c82b5fad163301c3a78c54e5efdf6bc8060f0ad4935cdce07ab2cfd
|
|
7
|
+
data.tar.gz: 96f1a2d3d394e17b0a517a3523445d027b9a11819785c889b2aae483bfd97753d8b7926aeadaab827330831525a3c86e3b4a3e2d64462b6d35113d359431e7fa
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.11 - 2026-05-31
|
|
4
|
+
|
|
5
|
+
### Security
|
|
6
|
+
- **BEDROCK-CRED-01**: Static AWS credentials now emit a deprecation warning. New setting `security.block_static_aws_credentials=true` rejects them entirely, forcing IAM role-based authentication.
|
|
7
|
+
|
|
8
|
+
### Fixed
|
|
9
|
+
- **TRANSLATION-BUG-07**: Bedrock streaming now preserves thinking (chain-of-thought) blocks in the final `Message`. Previously CoT was accumulated by the wire handler but silently dropped from the returned response.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **PROMPT-CACHE-01**: System blocks, tool definitions, and early conversation messages (first 4, never the last) now include `cache_control: { type: "cache_control" }` markers for Anthropic prompt caching via Bedrock Converse.
|
|
13
|
+
- **PROMPT-CACHE-02**: Response parser extracts `cached_input_tokens` (`cache_read_input_tokens`) and `cache_creation_tokens` (`cache_creation_input_tokens`) from Bedrock usage metadata into `Message#cached_tokens` and `Message#cache_creation_tokens`.
|
|
14
|
+
|
|
15
|
+
## 0.3.10 - 2026-05-21
|
|
16
|
+
|
|
17
|
+
- Add `default_transport`/`default_tier` class declarations, remove `configured_transport`/`configured_tier`
|
|
18
|
+
- Add `model_allowed?` filtering in `discover_offerings` (handles ModelOffering objects)
|
|
19
|
+
- Move `DEFAULT_REGION` to settings[:region]
|
|
20
|
+
- Default tier corrected from :frontier to :cloud
|
|
21
|
+
- Identity headers included via base provider
|
|
22
|
+
|
|
23
|
+
|
|
3
24
|
## 0.3.9 - 2026-05-18
|
|
4
25
|
|
|
5
26
|
- Fix streaming tool call parsing: `stream_converse` now handles content_block_start/delta/stop events for tool_use blocks, capturing tool ids, names, and accumulated input JSON. Previously only text deltas were captured and tool calls were silently dropped.
|
|
@@ -14,8 +14,6 @@ module Legion
|
|
|
14
14
|
class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
|
|
15
15
|
include Legion::Logging::Helper
|
|
16
16
|
|
|
17
|
-
DEFAULT_REGION = 'us-east-1'
|
|
18
|
-
|
|
19
17
|
STATIC_MODELS = [
|
|
20
18
|
{ model: 'anthropic.claude-3-haiku-20240307-v1:0', alias: 'claude-3-haiku' },
|
|
21
19
|
{ model: 'amazon.titan-text-express-v1', alias: 'titan-text-express' },
|
|
@@ -50,6 +48,8 @@ module Legion
|
|
|
50
48
|
|
|
51
49
|
class << self
|
|
52
50
|
def slug = 'bedrock'
|
|
51
|
+
def default_transport = :aws_sdk
|
|
52
|
+
def default_tier = :cloud
|
|
53
53
|
|
|
54
54
|
def configuration_options
|
|
55
55
|
%i[
|
|
@@ -77,11 +77,24 @@ module Legion
|
|
|
77
77
|
|
|
78
78
|
INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
|
|
79
79
|
|
|
80
|
-
def inference_profile_id(model)
|
|
80
|
+
def inference_profile_id(model, region: nil)
|
|
81
81
|
return model if model.start_with?('us.', 'eu.', 'ap.', 'arn:')
|
|
82
82
|
return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
prefix = region ? region_prefix(region) : 'us'
|
|
85
|
+
"#{prefix}.#{model}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Region-based inference profile prefix mapping.
|
|
89
|
+
# Bare model IDs (e.g. anthropic.claude-sonnet-4) get the region prefix.
|
|
90
|
+
REGION_PREFIX = {
|
|
91
|
+
'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
|
|
92
|
+
'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
|
|
93
|
+
'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
|
|
94
|
+
}.freeze
|
|
95
|
+
|
|
96
|
+
def region_prefix(region)
|
|
97
|
+
REGION_PREFIX.fetch(region.to_s, 'us')
|
|
85
98
|
end
|
|
86
99
|
end
|
|
87
100
|
|
|
@@ -113,7 +126,7 @@ module Legion
|
|
|
113
126
|
def count_tokens_url = 'CountTokens'
|
|
114
127
|
|
|
115
128
|
def region
|
|
116
|
-
config.bedrock_region ||
|
|
129
|
+
config.bedrock_region || settings[:region] || 'us-east-1'
|
|
117
130
|
end
|
|
118
131
|
|
|
119
132
|
def discover_offerings(live: false, **filters)
|
|
@@ -126,8 +139,12 @@ module Legion
|
|
|
126
139
|
|
|
127
140
|
log.info { "bedrock.provider.discover_offerings: listing foundation models (region=#{region})" }
|
|
128
141
|
response = bedrock_client.list_foundation_models(**filters)
|
|
129
|
-
@cached_offerings = Array(value(response, :model_summaries)).
|
|
130
|
-
offering_from_summary(summary)
|
|
142
|
+
@cached_offerings = Array(value(response, :model_summaries)).filter_map do |summary|
|
|
143
|
+
offering = offering_from_summary(summary)
|
|
144
|
+
model_id = offering.respond_to?(:model) ? offering.model : (offering[:model] || offering[:id])
|
|
145
|
+
next unless model_allowed?(model_id.to_s)
|
|
146
|
+
|
|
147
|
+
offering
|
|
131
148
|
end
|
|
132
149
|
log.info { "bedrock.provider.discover_offerings: found #{@cached_offerings.size} models" }
|
|
133
150
|
@cached_offerings
|
|
@@ -230,7 +247,7 @@ module Legion
|
|
|
230
247
|
log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
|
|
231
248
|
request = Utils.deep_merge(
|
|
232
249
|
{
|
|
233
|
-
model_id: self.class.inference_profile_id(model_id(model)),
|
|
250
|
+
model_id: self.class.inference_profile_id(model_id(model), region: region),
|
|
234
251
|
input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
|
|
235
252
|
},
|
|
236
253
|
params
|
|
@@ -323,8 +340,8 @@ module Legion
|
|
|
323
340
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
324
341
|
provider_family: :bedrock,
|
|
325
342
|
instance_id: instance_id,
|
|
326
|
-
transport:
|
|
327
|
-
tier:
|
|
343
|
+
transport: offering_transport,
|
|
344
|
+
tier: offering_tier,
|
|
328
345
|
model: model,
|
|
329
346
|
usage_type: usage_type,
|
|
330
347
|
capabilities: capabilities || default_capabilities(model),
|
|
@@ -346,30 +363,48 @@ module Legion
|
|
|
346
363
|
ctx ? { context_window: ctx } : nil
|
|
347
364
|
end
|
|
348
365
|
|
|
349
|
-
def
|
|
350
|
-
config.respond_to?(:transport) ? config.transport : default
|
|
351
|
-
end
|
|
352
|
-
|
|
353
|
-
def configured_tier(default)
|
|
354
|
-
config.respond_to?(:tier) ? config.tier : default
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:)
|
|
366
|
+
def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil)
|
|
358
367
|
{
|
|
359
|
-
model_id: self.class.inference_profile_id(model_id(model)),
|
|
368
|
+
model_id: self.class.inference_profile_id(model_id(model), region: region),
|
|
360
369
|
messages: format_messages(messages.reject { |message| message.role == :system }),
|
|
361
370
|
system: format_system(messages),
|
|
362
371
|
inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
|
|
363
|
-
tool_config: format_tool_config(tools, tool_prefs)
|
|
372
|
+
tool_config: format_tool_config(tools, tool_prefs),
|
|
373
|
+
guardrail_config: guardrail_config
|
|
364
374
|
}.compact
|
|
365
375
|
end
|
|
366
376
|
|
|
367
377
|
def format_messages(messages)
|
|
368
|
-
messages.
|
|
369
|
-
|
|
378
|
+
total = messages.size
|
|
379
|
+
messages.filter_map.with_index do |message, idx|
|
|
380
|
+
blocks = message.role == :tool ? tool_result_blocks(message) : content_blocks(message.content)
|
|
370
381
|
next if blocks.empty?
|
|
371
382
|
|
|
372
|
-
|
|
383
|
+
cache_blocks = should_cache_message?(idx, total) ? add_cache_control_to_blocks(blocks) : blocks
|
|
384
|
+
{ role: bedrock_role(message.role), content: cache_blocks }
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def tool_result_blocks(message)
|
|
389
|
+
return [] unless message.tool_result?
|
|
390
|
+
|
|
391
|
+
[{
|
|
392
|
+
type: 'tool_result',
|
|
393
|
+
tool_use: { tool_use_id: message.tool_call_id },
|
|
394
|
+
content: [{ type: 'text', text: message.tool_results.to_s }]
|
|
395
|
+
}]
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def should_cache_message?(index, total)
|
|
399
|
+
# Cache first 4 messages, never the last message
|
|
400
|
+
return false if index == total - 1
|
|
401
|
+
|
|
402
|
+
index < 4
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
def add_cache_control_to_blocks(blocks)
|
|
406
|
+
blocks.map do |block|
|
|
407
|
+
block.dup.merge(cache_control: { type: 'cache_control' })
|
|
373
408
|
end
|
|
374
409
|
end
|
|
375
410
|
|
|
@@ -382,7 +417,7 @@ module Legion
|
|
|
382
417
|
def system_blocks(system)
|
|
383
418
|
return nil if system.to_s.empty?
|
|
384
419
|
|
|
385
|
-
[{ text: system }]
|
|
420
|
+
[{ text: system, cache_control: { type: 'cache_control' } }]
|
|
386
421
|
end
|
|
387
422
|
|
|
388
423
|
def bedrock_role(role)
|
|
@@ -393,12 +428,45 @@ module Legion
|
|
|
393
428
|
raw = raw_content(content)
|
|
394
429
|
return raw if raw
|
|
395
430
|
|
|
431
|
+
return image_blocks(content) if content.respond_to?(:attachments) && !content.attachments.empty?
|
|
432
|
+
|
|
396
433
|
text = content_text(content)
|
|
397
434
|
return [] if text.strip.empty?
|
|
398
435
|
|
|
399
436
|
[{ text: text }]
|
|
400
437
|
end
|
|
401
438
|
|
|
439
|
+
def image_blocks(content)
|
|
440
|
+
blocks = []
|
|
441
|
+
text = content_text(content)
|
|
442
|
+
blocks << { text: text } if text.strip.present?
|
|
443
|
+
|
|
444
|
+
content.attachments.each do |attachment|
|
|
445
|
+
if attachment.is_a?(Legion::Extensions::Llm::Content::ImageAttachment)
|
|
446
|
+
blocks << format_image_attachment(attachment)
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
blocks
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
def format_image_attachment(attachment)
|
|
453
|
+
{
|
|
454
|
+
image: {
|
|
455
|
+
format: image_format(attachment.format),
|
|
456
|
+
source: { bytes: attachment.data }
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def image_format(fmt)
|
|
462
|
+
case fmt.to_s.downcase
|
|
463
|
+
when 'jpeg', 'jpg' then 'jpeg'
|
|
464
|
+
when 'png' then 'png'
|
|
465
|
+
when 'gif' then 'gif'
|
|
466
|
+
when 'webp' then 'webp'
|
|
467
|
+
end || 'jpeg'
|
|
468
|
+
end
|
|
469
|
+
|
|
402
470
|
def raw_content(content)
|
|
403
471
|
return nil unless content.is_a?(Legion::Extensions::Llm::Content::Raw)
|
|
404
472
|
|
|
@@ -418,7 +486,14 @@ module Legion
|
|
|
418
486
|
"bedrock.provider.tools: formatting tools=#{tools.keys.map(&:to_s).sort.join(',')} " \
|
|
419
487
|
"tool_choice=#{tool_choice_label(tool_prefs)}"
|
|
420
488
|
end
|
|
421
|
-
{
|
|
489
|
+
{
|
|
490
|
+
tools: tools.values.map { |tool| tool_definition_with_cache(tool) },
|
|
491
|
+
tool_choice: tool_choice(tool_prefs)
|
|
492
|
+
}.compact
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def tool_definition_with_cache(tool)
|
|
496
|
+
tool_definition(tool).merge(cache_control: { type: 'cache_control' })
|
|
422
497
|
end
|
|
423
498
|
|
|
424
499
|
def tool_definition(tool)
|
|
@@ -469,26 +544,33 @@ module Legion
|
|
|
469
544
|
tool_calls: parse_tool_calls(value(message, :content)),
|
|
470
545
|
input_tokens: value(usage, :input_tokens),
|
|
471
546
|
output_tokens: value(usage, :output_tokens),
|
|
547
|
+
cached_tokens: cache_read_tokens(usage),
|
|
548
|
+
cache_creation_tokens: cache_write_tokens(usage),
|
|
472
549
|
raw: normalize_response(response)
|
|
473
550
|
)
|
|
474
551
|
end
|
|
475
552
|
|
|
476
553
|
def stream_converse(request, fallback_model)
|
|
477
|
-
state = { accumulated: +'',
|
|
554
|
+
state = { accumulated: +'', thinking: +'', final_usage: nil, stop_reason: nil,
|
|
555
|
+
tool_use_blocks: [], current_tool_use: nil, in_thinking: false }
|
|
478
556
|
|
|
479
557
|
runtime_client.converse_stream(**request) do |stream|
|
|
480
558
|
wire_stream_handlers(stream, state, fallback_model) { |chunk| yield chunk if block_given? }
|
|
481
559
|
end
|
|
482
560
|
|
|
483
|
-
|
|
561
|
+
msg_attrs = {
|
|
484
562
|
role: :assistant,
|
|
485
563
|
content: state[:accumulated],
|
|
486
564
|
model_id: fallback_model,
|
|
487
565
|
tool_calls: build_stream_tool_calls(state[:tool_use_blocks]),
|
|
488
566
|
input_tokens: value(state[:final_usage], :input_tokens),
|
|
489
567
|
output_tokens: value(state[:final_usage], :output_tokens),
|
|
568
|
+
cached_tokens: cache_read_tokens(state[:final_usage]),
|
|
569
|
+
cache_creation_tokens: cache_write_tokens(state[:final_usage]),
|
|
490
570
|
stop_reason: state[:stop_reason]
|
|
491
|
-
|
|
571
|
+
}
|
|
572
|
+
msg_attrs[:thinking] = state[:thinking] unless state[:thinking].empty?
|
|
573
|
+
Legion::Extensions::Llm::Message.new(**msg_attrs)
|
|
492
574
|
end
|
|
493
575
|
|
|
494
576
|
def wire_stream_handlers(stream, state, fallback_model, &)
|
|
@@ -504,6 +586,13 @@ module Legion
|
|
|
504
586
|
|
|
505
587
|
stream.on_content_block_start_event do |event|
|
|
506
588
|
start = value(event, :start)
|
|
589
|
+
|
|
590
|
+
if value(start, :thinking)
|
|
591
|
+
state[:in_thinking] = true
|
|
592
|
+
next
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
state[:in_thinking] = false
|
|
507
596
|
tool_start = value(start, :tool_use) if start
|
|
508
597
|
next unless tool_start
|
|
509
598
|
|
|
@@ -520,10 +609,14 @@ module Legion
|
|
|
520
609
|
delta = value(event, :delta)
|
|
521
610
|
text = value(delta, :text)
|
|
522
611
|
if text
|
|
523
|
-
state[:
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
612
|
+
if state[:in_thinking]
|
|
613
|
+
state[:thinking] << text
|
|
614
|
+
else
|
|
615
|
+
state[:accumulated] << text
|
|
616
|
+
if block_given?
|
|
617
|
+
yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
|
|
618
|
+
model_id: fallback_model)
|
|
619
|
+
end
|
|
527
620
|
end
|
|
528
621
|
end
|
|
529
622
|
|
|
@@ -569,6 +662,18 @@ module Legion
|
|
|
569
662
|
end
|
|
570
663
|
end
|
|
571
664
|
|
|
665
|
+
def cache_read_tokens(usage)
|
|
666
|
+
return nil if usage.nil?
|
|
667
|
+
|
|
668
|
+
value(usage, :cache_read_input_tokens) || value(usage, 'cache_read_input_tokens')
|
|
669
|
+
end
|
|
670
|
+
|
|
671
|
+
def cache_write_tokens(usage)
|
|
672
|
+
return nil if usage.nil?
|
|
673
|
+
|
|
674
|
+
value(usage, :cache_creation_input_tokens) || value(usage, 'cache_creation_input_tokens')
|
|
675
|
+
end
|
|
676
|
+
|
|
572
677
|
def parse_embedding_response(response, model:)
|
|
573
678
|
body = parse_body(value(response, :body))
|
|
574
679
|
vectors = body['embedding'] || body['embeddings'] || body.dig('data', 0, 'embedding')
|
|
@@ -595,11 +700,11 @@ module Legion
|
|
|
595
700
|
end
|
|
596
701
|
|
|
597
702
|
def bedrock_client
|
|
598
|
-
Aws::Bedrock::Client.new(client_options)
|
|
703
|
+
@bedrock_client ||= Aws::Bedrock::Client.new(client_options)
|
|
599
704
|
end
|
|
600
705
|
|
|
601
706
|
def runtime_client
|
|
602
|
-
Aws::BedrockRuntime::Client.new(client_options)
|
|
707
|
+
@runtime_client ||= Aws::BedrockRuntime::Client.new(client_options)
|
|
603
708
|
end
|
|
604
709
|
|
|
605
710
|
def client_options
|
|
@@ -626,10 +731,21 @@ module Legion
|
|
|
626
731
|
return Aws::SharedCredentials.new(profile_name: config.bedrock_profile) if config.bedrock_profile
|
|
627
732
|
return nil unless config.bedrock_access_key_id
|
|
628
733
|
|
|
734
|
+
if static_credentials_blocked?
|
|
735
|
+
raise SecurityError,
|
|
736
|
+
'Static AWS credentials are disabled (security.block_static_aws_credentials=true); use IAM roles'
|
|
737
|
+
end
|
|
738
|
+
log.warn('[bedrock] Using static AWS credentials — prefer IAM roles for production')
|
|
629
739
|
Aws::Credentials.new(config.bedrock_access_key_id, config.bedrock_secret_access_key,
|
|
630
740
|
config.bedrock_session_token)
|
|
631
741
|
end
|
|
632
742
|
|
|
743
|
+
def static_credentials_blocked?
|
|
744
|
+
return false unless defined?(::Legion::Settings)
|
|
745
|
+
|
|
746
|
+
::Legion::Settings.dig(:extensions, :llm, :security, :block_static_aws_credentials) == true
|
|
747
|
+
end
|
|
748
|
+
|
|
633
749
|
def credential_source
|
|
634
750
|
return :static if config.bedrock_access_key_id
|
|
635
751
|
return :profile if config.bedrock_profile
|
|
@@ -22,8 +22,9 @@ module Legion
|
|
|
22
22
|
::Legion::Extensions::Llm.provider_settings(
|
|
23
23
|
family: PROVIDER_FAMILY,
|
|
24
24
|
instance: {
|
|
25
|
-
default_model: '
|
|
26
|
-
|
|
25
|
+
default_model: 'anthropic.claude-sonnet-4',
|
|
26
|
+
region: 'us-east-1',
|
|
27
|
+
tier: :cloud,
|
|
27
28
|
transport: :aws_sdk,
|
|
28
29
|
credentials: {
|
|
29
30
|
bearer_token: nil,
|
|
@@ -73,7 +74,7 @@ module Legion
|
|
|
73
74
|
.transform_values do |config|
|
|
74
75
|
sanitized = sanitize_instance_config(config)
|
|
75
76
|
sanitized[:capabilities] ||= DEFAULT_CAPABILITIES.dup
|
|
76
|
-
sanitized[:default_model] ||= '
|
|
77
|
+
sanitized[:default_model] ||= 'anthropic.claude-sonnet-4'
|
|
77
78
|
sanitized
|
|
78
79
|
end
|
|
79
80
|
end
|
|
@@ -197,6 +198,8 @@ module Legion
|
|
|
197
198
|
end
|
|
198
199
|
|
|
199
200
|
def self.normalize_instance_config(config)
|
|
201
|
+
return {} if config.nil?
|
|
202
|
+
|
|
200
203
|
normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
201
204
|
normalized[:bedrock_region] ||= normalized.delete(:region)
|
|
202
205
|
normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
|