lex-llm-bedrock 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0d6ffe6c93fec2590c74ed5a01a944c81694fb98316bf35bde08db6efecfed9
4
- data.tar.gz: 3e5174849ecf00a36f79e8926237f430ce1b1e8a8572c4fede2463e5cc0a3bda
3
+ metadata.gz: 939ac50e6240dcde55f7bc9b24fb0a7f1f6fd527ec46cea27d8255f8a401fcd6
4
+ data.tar.gz: 899da50daa4595d92b0bb23a14bd4c873c03d41bc72235e8337b17f61fe69d99
5
5
  SHA512:
6
- metadata.gz: 273bc8934934e7eb40e9365a57de5fb33c12114c84b435b353ffb3e0e83329ced120f94016ddc59bd8cfc484c3a35094814f41beb8b0361ece513417505954d5
7
- data.tar.gz: 2d36dcff70b35577a0d5bbf6846ed6181851e6ac32d8cd65f67367429f32ec88869a084ccc4e3fdde200ab50ae3ca6c21509c84fa7a4af5ae360eaf75b85a2c5
6
+ metadata.gz: b2e824ee11517dbbfaf7710bd25a8329ee9007ccb35226776a57131d4ba859fc90fc2b8f0dc3e31ff349b2cc15e3a82778e491fca0f75b2b76d6a4783a8e7e67
7
+ data.tar.gz: c06248c9b3c047db80193c7a3c9666b0b251de5bb6a62199f135902f91295cfcfc069e2994cdc93168b66a1c11a7ad755875bc1c9ba8fd60ae0850a7277dae30
data/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.12 - 2026-06-02
4
+
5
+ ### Fixed
6
+ - **ContentBlock union validation errors** — Removed `cache_control` from text blocks, system blocks, and tool definitions. The Bedrock Converse SDK's `ContentBlock` is a strict union (text|image|tool_use|...); adding `cache_control` as a sibling key triggered "multiple values provided to union" and "unexpected value" ArgumentError (provider.rb)
7
+ - **Assistant tool_call messages rejected by SDK** — Messages with tool calls were sent as raw content blocks with `:type`/`:content` keys. Now emits proper `{ tool_use: { tool_use_id, name, input } }` blocks via new `build_content_blocks`/`assistant_tool_use_blocks` methods (provider.rb)
8
+ - **PROMPT-CACHE-01 reverted** — Bedrock Converse API does not support `cache_control` on text/document/image blocks. The markers added in 0.3.11 are removed (provider.rb)
9
+
10
+ ### Added
11
+ - **Per-provider discovery refresh actor** — New `actors/discovery_refresh.rb` that only refreshes Bedrock models, avoiding coupling to other providers' discovery cycles
12
+
13
+ ## 0.3.11 - 2026-05-31
14
+
15
+ ### Security
16
+ - **BEDROCK-CRED-01**: Static AWS credentials now emit a deprecation warning. New setting `security.block_static_aws_credentials=true` rejects them entirely, forcing IAM role-based authentication.
17
+
18
+ ### Fixed
19
+ - **TRANSLATION-BUG-07**: Bedrock streaming now preserves thinking (chain-of-thought) blocks in the final `Message`. Previously CoT was accumulated by the wire handler but silently dropped from the returned response.
20
+
21
+ ### Added
22
+ - **PROMPT-CACHE-01**: System blocks, tool definitions, and early conversation messages (first 4, never the last) now include `cache_control: { type: "cache_control" }` markers for Anthropic prompt caching via Bedrock Converse.
23
+ - **PROMPT-CACHE-02**: Response parser extracts `cached_input_tokens` (`cache_read_input_tokens`) and `cache_creation_tokens` (`cache_creation_input_tokens`) from Bedrock usage metadata into `Message#cached_tokens` and `Message#cache_creation_tokens`.
24
+
3
25
  ## 0.3.10 - 2026-05-21
4
26
 
5
27
  - Add `default_transport`/`default_tier` class declarations, remove `configured_transport`/`configured_tier`
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'legion/extensions/actors/every'
5
+ rescue LoadError => e
6
+ warn(e.message) if $VERBOSE
7
+ end
8
+
9
+ return unless defined?(Legion::Extensions::Actors::Every)
10
+
11
+ module Legion
12
+ module Extensions
13
+ module Llm
14
+ module Bedrock
15
+ module Actor
16
+ class DiscoveryRefresh < Legion::Extensions::Actors::Every # rubocop:disable Style/Documentation
17
+ include Legion::Logging::Helper
18
+
19
+ REFRESH_INTERVAL = 1800
20
+
21
+ def runner_class = self.class
22
+ def runner_function = 'manual'
23
+ def run_now? = true
24
+ def use_runner? = false
25
+ def check_subtask? = false
26
+ def generate_task? = false
27
+
28
+ def time
29
+ return REFRESH_INTERVAL unless defined?(Legion::Settings)
30
+
31
+ Legion::Settings.dig(:extensions, :llm, :bedrock, :discovery_interval) || REFRESH_INTERVAL
32
+ end
33
+
34
+ def manual
35
+ log.debug('[bedrock][discovery_refresh] refreshing model list')
36
+ return unless defined?(Legion::LLM::Discovery)
37
+
38
+ Legion::LLM::Discovery.refresh_discovered_models!(provider: :bedrock)
39
+ rescue StandardError => e
40
+ handle_exception(e, level: :warn, handled: true, operation: 'bedrock.actor.discovery_refresh')
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -77,11 +77,24 @@ module Legion
77
77
 
78
78
  INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
79
79
 
80
- def inference_profile_id(model)
80
+ def inference_profile_id(model, region: nil)
81
81
  return model if model.start_with?('us.', 'eu.', 'ap.', 'arn:')
82
82
  return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
83
83
 
84
- "us.#{model}"
84
+ prefix = region ? region_prefix(region) : 'us'
85
+ "#{prefix}.#{model}"
86
+ end
87
+
88
+ # Region-based inference profile prefix mapping.
89
+ # Bare model IDs (e.g. anthropic.claude-sonnet-4) get the region prefix.
90
+ REGION_PREFIX = {
91
+ 'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
92
+ 'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
93
+ 'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
94
+ }.freeze
95
+
96
+ def region_prefix(region)
97
+ REGION_PREFIX.fetch(region.to_s, 'us')
85
98
  end
86
99
  end
87
100
 
@@ -234,7 +247,7 @@ module Legion
234
247
  log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
235
248
  request = Utils.deep_merge(
236
249
  {
237
- model_id: self.class.inference_profile_id(model_id(model)),
250
+ model_id: self.class.inference_profile_id(model_id(model), region: region),
238
251
  input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
239
252
  },
240
253
  params
@@ -350,25 +363,52 @@ module Legion
350
363
  ctx ? { context_window: ctx } : nil
351
364
  end
352
365
 
353
- def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:)
366
+ def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil)
354
367
  {
355
- model_id: self.class.inference_profile_id(model_id(model)),
368
+ model_id: self.class.inference_profile_id(model_id(model), region: region),
356
369
  messages: format_messages(messages.reject { |message| message.role == :system }),
357
370
  system: format_system(messages),
358
371
  inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
359
- tool_config: format_tool_config(tools, tool_prefs)
372
+ tool_config: format_tool_config(tools, tool_prefs),
373
+ guardrail_config: guardrail_config
360
374
  }.compact
361
375
  end
362
376
 
363
377
  def format_messages(messages)
364
- messages.filter_map do |message|
365
- blocks = content_blocks(message.content)
378
+ total = messages.size
379
+ messages.filter_map.with_index do |message, idx|
380
+ blocks = build_content_blocks(message)
366
381
  next if blocks.empty?
367
382
 
368
- { role: bedrock_role(message.role), content: blocks }
383
+ cache_blocks = should_cache_message?(idx, total) ? add_cache_control_to_blocks(blocks) : blocks
384
+ { role: bedrock_role(message.role), content: cache_blocks }
369
385
  end
370
386
  end
371
387
 
388
+ def tool_result_blocks(message)
389
+ return [] unless message.tool_result?
390
+
391
+ [{
392
+ type: 'tool_result',
393
+ tool_use: { tool_use_id: message.tool_call_id },
394
+ content: [{ type: 'text', text: message.tool_results.to_s }]
395
+ }]
396
+ end
397
+
398
+ def should_cache_message?(index, total)
399
+ # Cache first 4 messages, never the last message
400
+ return false if index == total - 1
401
+
402
+ index < 4
403
+ end
404
+
405
+ def add_cache_control_to_blocks(blocks)
406
+ # Bedrock Converse API does not support cache_control on text/image/document blocks.
407
+ # Only tool_use blocks support it via the InputMember cache_control field.
408
+ # Return blocks unchanged to avoid SDK union validation errors.
409
+ blocks
410
+ end
411
+
372
412
  def format_system(messages)
373
413
  system_messages = messages.select { |message| message.role == :system }
374
414
  system_text = system_messages.map { |message| content_text(message.content) }
@@ -385,16 +425,75 @@ module Legion
385
425
  role == :assistant ? 'assistant' : 'user'
386
426
  end
387
427
 
428
+ def build_content_blocks(message)
429
+ return tool_result_blocks(message) if message.role == :tool
430
+
431
+ # Assistant messages with tool calls: build text + tool_use blocks
432
+ return assistant_tool_use_blocks(message) if message.role == :assistant && message.tool_call?
433
+
434
+ content_blocks(message.content)
435
+ end
436
+
437
+ def assistant_tool_use_blocks(message)
438
+ blocks = []
439
+ text = content_text(message.content)
440
+ blocks << { text: text } if text && !text.strip.empty?
441
+
442
+ message.tool_calls.each do |call|
443
+ blocks << {
444
+ tool_use: {
445
+ tool_use_id: call.id,
446
+ name: call.name,
447
+ input: call.arguments || {}
448
+ }
449
+ }
450
+ end
451
+ blocks
452
+ end
453
+
388
454
  def content_blocks(content)
389
455
  raw = raw_content(content)
390
456
  return raw if raw
391
457
 
458
+ return image_blocks(content) if content.respond_to?(:attachments) && !content.attachments.empty?
459
+
392
460
  text = content_text(content)
393
461
  return [] if text.strip.empty?
394
462
 
395
463
  [{ text: text }]
396
464
  end
397
465
 
466
+ def image_blocks(content)
467
+ blocks = []
468
+ text = content_text(content)
469
+ blocks << { text: text } if text.strip.present?
470
+
471
+ content.attachments.each do |attachment|
472
+ if attachment.is_a?(Legion::Extensions::Llm::Content::ImageAttachment)
473
+ blocks << format_image_attachment(attachment)
474
+ end
475
+ end
476
+ blocks
477
+ end
478
+
479
+ def format_image_attachment(attachment)
480
+ {
481
+ image: {
482
+ format: image_format(attachment.format),
483
+ source: { bytes: attachment.data }
484
+ }
485
+ }
486
+ end
487
+
488
+ def image_format(fmt)
489
+ case fmt.to_s.downcase
490
+ when 'jpeg', 'jpg' then 'jpeg'
491
+ when 'png' then 'png'
492
+ when 'gif' then 'gif'
493
+ when 'webp' then 'webp'
494
+ end || 'jpeg'
495
+ end
496
+
398
497
  def raw_content(content)
399
498
  return nil unless content.is_a?(Legion::Extensions::Llm::Content::Raw)
400
499
 
@@ -414,7 +513,14 @@ module Legion
414
513
  "bedrock.provider.tools: formatting tools=#{tools.keys.map(&:to_s).sort.join(',')} " \
415
514
  "tool_choice=#{tool_choice_label(tool_prefs)}"
416
515
  end
417
- { tools: tools.values.map { |tool| tool_definition(tool) }, tool_choice: tool_choice(tool_prefs) }.compact
516
+ {
517
+ tools: tools.values.map { |tool| tool_definition_with_cache(tool) },
518
+ tool_choice: tool_choice(tool_prefs)
519
+ }.compact
520
+ end
521
+
522
+ def tool_definition_with_cache(tool)
523
+ tool_definition(tool)
418
524
  end
419
525
 
420
526
  def tool_definition(tool)
@@ -465,26 +571,33 @@ module Legion
465
571
  tool_calls: parse_tool_calls(value(message, :content)),
466
572
  input_tokens: value(usage, :input_tokens),
467
573
  output_tokens: value(usage, :output_tokens),
574
+ cached_tokens: cache_read_tokens(usage),
575
+ cache_creation_tokens: cache_write_tokens(usage),
468
576
  raw: normalize_response(response)
469
577
  )
470
578
  end
471
579
 
472
580
  def stream_converse(request, fallback_model)
473
- state = { accumulated: +'', final_usage: nil, stop_reason: nil, tool_use_blocks: [], current_tool_use: nil }
581
+ state = { accumulated: +'', thinking: +'', final_usage: nil, stop_reason: nil,
582
+ tool_use_blocks: [], current_tool_use: nil, in_thinking: false }
474
583
 
475
584
  runtime_client.converse_stream(**request) do |stream|
476
585
  wire_stream_handlers(stream, state, fallback_model) { |chunk| yield chunk if block_given? }
477
586
  end
478
587
 
479
- Legion::Extensions::Llm::Message.new(
588
+ msg_attrs = {
480
589
  role: :assistant,
481
590
  content: state[:accumulated],
482
591
  model_id: fallback_model,
483
592
  tool_calls: build_stream_tool_calls(state[:tool_use_blocks]),
484
593
  input_tokens: value(state[:final_usage], :input_tokens),
485
594
  output_tokens: value(state[:final_usage], :output_tokens),
595
+ cached_tokens: cache_read_tokens(state[:final_usage]),
596
+ cache_creation_tokens: cache_write_tokens(state[:final_usage]),
486
597
  stop_reason: state[:stop_reason]
487
- )
598
+ }
599
+ msg_attrs[:thinking] = state[:thinking] unless state[:thinking].empty?
600
+ Legion::Extensions::Llm::Message.new(**msg_attrs)
488
601
  end
489
602
 
490
603
  def wire_stream_handlers(stream, state, fallback_model, &)
@@ -500,6 +613,13 @@ module Legion
500
613
 
501
614
  stream.on_content_block_start_event do |event|
502
615
  start = value(event, :start)
616
+
617
+ if value(start, :thinking)
618
+ state[:in_thinking] = true
619
+ next
620
+ end
621
+
622
+ state[:in_thinking] = false
503
623
  tool_start = value(start, :tool_use) if start
504
624
  next unless tool_start
505
625
 
@@ -516,10 +636,14 @@ module Legion
516
636
  delta = value(event, :delta)
517
637
  text = value(delta, :text)
518
638
  if text
519
- state[:accumulated] << text
520
- if block_given?
521
- yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
522
- model_id: fallback_model)
639
+ if state[:in_thinking]
640
+ state[:thinking] << text
641
+ else
642
+ state[:accumulated] << text
643
+ if block_given?
644
+ yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
645
+ model_id: fallback_model)
646
+ end
523
647
  end
524
648
  end
525
649
 
@@ -565,6 +689,18 @@ module Legion
565
689
  end
566
690
  end
567
691
 
692
+ def cache_read_tokens(usage)
693
+ return nil if usage.nil?
694
+
695
+ value(usage, :cache_read_input_tokens) || value(usage, 'cache_read_input_tokens')
696
+ end
697
+
698
+ def cache_write_tokens(usage)
699
+ return nil if usage.nil?
700
+
701
+ value(usage, :cache_creation_input_tokens) || value(usage, 'cache_creation_input_tokens')
702
+ end
703
+
568
704
  def parse_embedding_response(response, model:)
569
705
  body = parse_body(value(response, :body))
570
706
  vectors = body['embedding'] || body['embeddings'] || body.dig('data', 0, 'embedding')
@@ -591,11 +727,11 @@ module Legion
591
727
  end
592
728
 
593
729
  def bedrock_client
594
- Aws::Bedrock::Client.new(client_options)
730
+ @bedrock_client ||= Aws::Bedrock::Client.new(client_options)
595
731
  end
596
732
 
597
733
  def runtime_client
598
- Aws::BedrockRuntime::Client.new(client_options)
734
+ @runtime_client ||= Aws::BedrockRuntime::Client.new(client_options)
599
735
  end
600
736
 
601
737
  def client_options
@@ -622,10 +758,21 @@ module Legion
622
758
  return Aws::SharedCredentials.new(profile_name: config.bedrock_profile) if config.bedrock_profile
623
759
  return nil unless config.bedrock_access_key_id
624
760
 
761
+ if static_credentials_blocked?
762
+ raise SecurityError,
763
+ 'Static AWS credentials are disabled (security.block_static_aws_credentials=true); use IAM roles'
764
+ end
765
+ log.warn('[bedrock] Using static AWS credentials — prefer IAM roles for production')
625
766
  Aws::Credentials.new(config.bedrock_access_key_id, config.bedrock_secret_access_key,
626
767
  config.bedrock_session_token)
627
768
  end
628
769
 
770
+ def static_credentials_blocked?
771
+ return false unless defined?(::Legion::Settings)
772
+
773
+ ::Legion::Settings.dig(:extensions, :llm, :security, :block_static_aws_credentials) == true
774
+ end
775
+
629
776
  def credential_source
630
777
  return :static if config.bedrock_access_key_id
631
778
  return :profile if config.bedrock_profile
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Bedrock
7
- VERSION = '0.3.10'
7
+ VERSION = '0.3.12'
8
8
  end
9
9
  end
10
10
  end
@@ -22,7 +22,7 @@ module Legion
22
22
  ::Legion::Extensions::Llm.provider_settings(
23
23
  family: PROVIDER_FAMILY,
24
24
  instance: {
25
- default_model: 'us.anthropic.claude-sonnet-4-6',
25
+ default_model: 'anthropic.claude-sonnet-4',
26
26
  region: 'us-east-1',
27
27
  tier: :cloud,
28
28
  transport: :aws_sdk,
@@ -74,7 +74,7 @@ module Legion
74
74
  .transform_values do |config|
75
75
  sanitized = sanitize_instance_config(config)
76
76
  sanitized[:capabilities] ||= DEFAULT_CAPABILITIES.dup
77
- sanitized[:default_model] ||= 'us.anthropic.claude-sonnet-4-6'
77
+ sanitized[:default_model] ||= 'anthropic.claude-sonnet-4'
78
78
  sanitized
79
79
  end
80
80
  end
@@ -198,6 +198,8 @@ module Legion
198
198
  end
199
199
 
200
200
  def self.normalize_instance_config(config)
201
+ return {} if config.nil?
202
+
201
203
  normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
202
204
  normalized[:bedrock_region] ||= normalized.delete(:region)
203
205
  normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-bedrock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.10
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -125,6 +125,7 @@ files:
125
125
  - README.md
126
126
  - lex-llm-bedrock.gemspec
127
127
  - lib/legion/extensions/llm/bedrock.rb
128
+ - lib/legion/extensions/llm/bedrock/actors/discovery_refresh.rb
128
129
  - lib/legion/extensions/llm/bedrock/actors/fleet_worker.rb
129
130
  - lib/legion/extensions/llm/bedrock/provider.rb
130
131
  - lib/legion/extensions/llm/bedrock/runners/fleet_worker.rb