lex-llm-bedrock 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d0b15f1168f45e6f11211963cc8c0939085562bd92e720bcd74426367824318
4
- data.tar.gz: e68d8865e321e62f0b7c54cf16a573286174626531229b325e53b0aecbc8c3ea
3
+ metadata.gz: 52fa9adf70213041c0b2e6e96b44ff50b64220ae09f9cdab5619808f725df914
4
+ data.tar.gz: 12b5b4b31b712be0f0831e5e37dd045e6ca89442296bc645100c05cb41f4f8e0
5
5
  SHA512:
6
- metadata.gz: ac5ed2ff6e4d586891edc3d05a8935cd32b25b561862a882c04a17d2c42324f8cfde527338c8d464cc8373d06dfa4b758fc3a7ab4c1ac70072d22d9998799207
7
- data.tar.gz: 1e6f4bd752aa5fe9eeb033426405718826135763e2f6974674a7238ffbf74134024ad8f164962d02dc908aa8276417451087c7092e623beda5e0df58977efbf4
6
+ metadata.gz: 3519d77bffac0e5ea90805fe64cc095f831e6069bc21bf93ffa3e0441977560ebd556b8d2c82b5fad163301c3a78c54e5efdf6bc8060f0ad4935cdce07ab2cfd
7
+ data.tar.gz: 96f1a2d3d394e17b0a517a3523445d027b9a11819785c889b2aae483bfd97753d8b7926aeadaab827330831525a3c86e3b4a3e2d64462b6d35113d359431e7fa
data/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.11 - 2026-05-31
4
+
5
+ ### Security
6
+ - **BEDROCK-CRED-01**: Static AWS credentials now emit a deprecation warning. New setting `security.block_static_aws_credentials=true` rejects them entirely, forcing IAM role-based authentication.
7
+
8
+ ### Fixed
9
+ - **TRANSLATION-BUG-07**: Bedrock streaming now preserves thinking (chain-of-thought) blocks in the final `Message`. Previously CoT was accumulated by the wire handler but silently dropped from the returned response.
10
+
11
+ ### Added
12
+ - **PROMPT-CACHE-01**: System blocks, tool definitions, and early conversation messages (first 4, never the last) now include `cache_control: { type: "cache_control" }` markers for Anthropic prompt caching via Bedrock Converse.
13
+ - **PROMPT-CACHE-02**: Response parser extracts `cached_input_tokens` (`cache_read_input_tokens`) and `cache_creation_tokens` (`cache_creation_input_tokens`) from Bedrock usage metadata into `Message#cached_tokens` and `Message#cache_creation_tokens`.
14
+
15
+ ## 0.3.10 - 2026-05-21
16
+
17
+ - Add `default_transport`/`default_tier` class declarations, remove `configured_transport`/`configured_tier`
18
+ - Add `model_allowed?` filtering in `discover_offerings` (handles ModelOffering objects)
19
+ - Move `DEFAULT_REGION` to settings[:region]
20
+ - Default tier corrected from :frontier to :cloud
21
+ - Identity headers included via base provider
22
+
23
+
3
24
  ## 0.3.9 - 2026-05-18
4
25
 
5
26
  - Fix streaming tool call parsing: `stream_converse` now handles content_block_start/delta/stop events for tool_use blocks, capturing tool ids, names, and accumulated input JSON. Previously only text deltas were captured and tool calls were silently dropped.
@@ -14,8 +14,6 @@ module Legion
14
14
  class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
15
15
  include Legion::Logging::Helper
16
16
 
17
- DEFAULT_REGION = 'us-east-1'
18
-
19
17
  STATIC_MODELS = [
20
18
  { model: 'anthropic.claude-3-haiku-20240307-v1:0', alias: 'claude-3-haiku' },
21
19
  { model: 'amazon.titan-text-express-v1', alias: 'titan-text-express' },
@@ -50,6 +48,8 @@ module Legion
50
48
 
51
49
  class << self
52
50
  def slug = 'bedrock'
51
+ def default_transport = :aws_sdk
52
+ def default_tier = :cloud
53
53
 
54
54
  def configuration_options
55
55
  %i[
@@ -77,11 +77,24 @@ module Legion
77
77
 
78
78
  INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
79
79
 
80
- def inference_profile_id(model)
80
+ def inference_profile_id(model, region: nil)
81
81
  return model if model.start_with?('us.', 'eu.', 'ap.', 'arn:')
82
82
  return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
83
83
 
84
- "us.#{model}"
84
+ prefix = region ? region_prefix(region) : 'us'
85
+ "#{prefix}.#{model}"
86
+ end
87
+
88
+ # Region-based inference profile prefix mapping.
89
+ # Bare model IDs (e.g. anthropic.claude-sonnet-4) get the region prefix.
90
+ REGION_PREFIX = {
91
+ 'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
92
+ 'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
93
+ 'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
94
+ }.freeze
95
+
96
+ def region_prefix(region)
97
+ REGION_PREFIX.fetch(region.to_s, 'us')
85
98
  end
86
99
  end
87
100
 
@@ -113,7 +126,7 @@ module Legion
113
126
  def count_tokens_url = 'CountTokens'
114
127
 
115
128
  def region
116
- config.bedrock_region || DEFAULT_REGION
129
+ config.bedrock_region || settings[:region] || 'us-east-1'
117
130
  end
118
131
 
119
132
  def discover_offerings(live: false, **filters)
@@ -126,8 +139,12 @@ module Legion
126
139
 
127
140
  log.info { "bedrock.provider.discover_offerings: listing foundation models (region=#{region})" }
128
141
  response = bedrock_client.list_foundation_models(**filters)
129
- @cached_offerings = Array(value(response, :model_summaries)).map do |summary|
130
- offering_from_summary(summary)
142
+ @cached_offerings = Array(value(response, :model_summaries)).filter_map do |summary|
143
+ offering = offering_from_summary(summary)
144
+ model_id = offering.respond_to?(:model) ? offering.model : (offering[:model] || offering[:id])
145
+ next unless model_allowed?(model_id.to_s)
146
+
147
+ offering
131
148
  end
132
149
  log.info { "bedrock.provider.discover_offerings: found #{@cached_offerings.size} models" }
133
150
  @cached_offerings
@@ -230,7 +247,7 @@ module Legion
230
247
  log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
231
248
  request = Utils.deep_merge(
232
249
  {
233
- model_id: self.class.inference_profile_id(model_id(model)),
250
+ model_id: self.class.inference_profile_id(model_id(model), region: region),
234
251
  input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
235
252
  },
236
253
  params
@@ -323,8 +340,8 @@ module Legion
323
340
  Legion::Extensions::Llm::Routing::ModelOffering.new(
324
341
  provider_family: :bedrock,
325
342
  instance_id: instance_id,
326
- transport: configured_transport(:aws_sdk),
327
- tier: configured_tier(:frontier),
343
+ transport: offering_transport,
344
+ tier: offering_tier,
328
345
  model: model,
329
346
  usage_type: usage_type,
330
347
  capabilities: capabilities || default_capabilities(model),
@@ -346,30 +363,48 @@ module Legion
346
363
  ctx ? { context_window: ctx } : nil
347
364
  end
348
365
 
349
- def configured_transport(default)
350
- config.respond_to?(:transport) ? config.transport : default
351
- end
352
-
353
- def configured_tier(default)
354
- config.respond_to?(:tier) ? config.tier : default
355
- end
356
-
357
- def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:)
366
+ def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil)
358
367
  {
359
- model_id: self.class.inference_profile_id(model_id(model)),
368
+ model_id: self.class.inference_profile_id(model_id(model), region: region),
360
369
  messages: format_messages(messages.reject { |message| message.role == :system }),
361
370
  system: format_system(messages),
362
371
  inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
363
- tool_config: format_tool_config(tools, tool_prefs)
372
+ tool_config: format_tool_config(tools, tool_prefs),
373
+ guardrail_config: guardrail_config
364
374
  }.compact
365
375
  end
366
376
 
367
377
  def format_messages(messages)
368
- messages.filter_map do |message|
369
- blocks = content_blocks(message.content)
378
+ total = messages.size
379
+ messages.filter_map.with_index do |message, idx|
380
+ blocks = message.role == :tool ? tool_result_blocks(message) : content_blocks(message.content)
370
381
  next if blocks.empty?
371
382
 
372
- { role: bedrock_role(message.role), content: blocks }
383
+ cache_blocks = should_cache_message?(idx, total) ? add_cache_control_to_blocks(blocks) : blocks
384
+ { role: bedrock_role(message.role), content: cache_blocks }
385
+ end
386
+ end
387
+
388
+ def tool_result_blocks(message)
389
+ return [] unless message.tool_result?
390
+
391
+ [{
392
+ type: 'tool_result',
393
+ tool_use: { tool_use_id: message.tool_call_id },
394
+ content: [{ type: 'text', text: message.tool_results.to_s }]
395
+ }]
396
+ end
397
+
398
+ def should_cache_message?(index, total)
399
+ # Cache first 4 messages, never the last message
400
+ return false if index == total - 1
401
+
402
+ index < 4
403
+ end
404
+
405
+ def add_cache_control_to_blocks(blocks)
406
+ blocks.map do |block|
407
+ block.dup.merge(cache_control: { type: 'cache_control' })
373
408
  end
374
409
  end
375
410
 
@@ -382,7 +417,7 @@ module Legion
382
417
  def system_blocks(system)
383
418
  return nil if system.to_s.empty?
384
419
 
385
- [{ text: system }]
420
+ [{ text: system, cache_control: { type: 'cache_control' } }]
386
421
  end
387
422
 
388
423
  def bedrock_role(role)
@@ -393,12 +428,45 @@ module Legion
393
428
  raw = raw_content(content)
394
429
  return raw if raw
395
430
 
431
+ return image_blocks(content) if content.respond_to?(:attachments) && !content.attachments.empty?
432
+
396
433
  text = content_text(content)
397
434
  return [] if text.strip.empty?
398
435
 
399
436
  [{ text: text }]
400
437
  end
401
438
 
439
+ def image_blocks(content)
440
+ blocks = []
441
+ text = content_text(content)
442
+ blocks << { text: text } if text.strip.present?
443
+
444
+ content.attachments.each do |attachment|
445
+ if attachment.is_a?(Legion::Extensions::Llm::Content::ImageAttachment)
446
+ blocks << format_image_attachment(attachment)
447
+ end
448
+ end
449
+ blocks
450
+ end
451
+
452
+ def format_image_attachment(attachment)
453
+ {
454
+ image: {
455
+ format: image_format(attachment.format),
456
+ source: { bytes: attachment.data }
457
+ }
458
+ }
459
+ end
460
+
461
+ def image_format(fmt)
462
+ case fmt.to_s.downcase
463
+ when 'jpeg', 'jpg' then 'jpeg'
464
+ when 'png' then 'png'
465
+ when 'gif' then 'gif'
466
+ when 'webp' then 'webp'
467
+ end || 'jpeg'
468
+ end
469
+
402
470
  def raw_content(content)
403
471
  return nil unless content.is_a?(Legion::Extensions::Llm::Content::Raw)
404
472
 
@@ -418,7 +486,14 @@ module Legion
418
486
  "bedrock.provider.tools: formatting tools=#{tools.keys.map(&:to_s).sort.join(',')} " \
419
487
  "tool_choice=#{tool_choice_label(tool_prefs)}"
420
488
  end
421
- { tools: tools.values.map { |tool| tool_definition(tool) }, tool_choice: tool_choice(tool_prefs) }.compact
489
+ {
490
+ tools: tools.values.map { |tool| tool_definition_with_cache(tool) },
491
+ tool_choice: tool_choice(tool_prefs)
492
+ }.compact
493
+ end
494
+
495
+ def tool_definition_with_cache(tool)
496
+ tool_definition(tool).merge(cache_control: { type: 'cache_control' })
422
497
  end
423
498
 
424
499
  def tool_definition(tool)
@@ -469,26 +544,33 @@ module Legion
469
544
  tool_calls: parse_tool_calls(value(message, :content)),
470
545
  input_tokens: value(usage, :input_tokens),
471
546
  output_tokens: value(usage, :output_tokens),
547
+ cached_tokens: cache_read_tokens(usage),
548
+ cache_creation_tokens: cache_write_tokens(usage),
472
549
  raw: normalize_response(response)
473
550
  )
474
551
  end
475
552
 
476
553
  def stream_converse(request, fallback_model)
477
- state = { accumulated: +'', final_usage: nil, stop_reason: nil, tool_use_blocks: [], current_tool_use: nil }
554
+ state = { accumulated: +'', thinking: +'', final_usage: nil, stop_reason: nil,
555
+ tool_use_blocks: [], current_tool_use: nil, in_thinking: false }
478
556
 
479
557
  runtime_client.converse_stream(**request) do |stream|
480
558
  wire_stream_handlers(stream, state, fallback_model) { |chunk| yield chunk if block_given? }
481
559
  end
482
560
 
483
- Legion::Extensions::Llm::Message.new(
561
+ msg_attrs = {
484
562
  role: :assistant,
485
563
  content: state[:accumulated],
486
564
  model_id: fallback_model,
487
565
  tool_calls: build_stream_tool_calls(state[:tool_use_blocks]),
488
566
  input_tokens: value(state[:final_usage], :input_tokens),
489
567
  output_tokens: value(state[:final_usage], :output_tokens),
568
+ cached_tokens: cache_read_tokens(state[:final_usage]),
569
+ cache_creation_tokens: cache_write_tokens(state[:final_usage]),
490
570
  stop_reason: state[:stop_reason]
491
- )
571
+ }
572
+ msg_attrs[:thinking] = state[:thinking] unless state[:thinking].empty?
573
+ Legion::Extensions::Llm::Message.new(**msg_attrs)
492
574
  end
493
575
 
494
576
  def wire_stream_handlers(stream, state, fallback_model, &)
@@ -504,6 +586,13 @@ module Legion
504
586
 
505
587
  stream.on_content_block_start_event do |event|
506
588
  start = value(event, :start)
589
+
590
+ if value(start, :thinking)
591
+ state[:in_thinking] = true
592
+ next
593
+ end
594
+
595
+ state[:in_thinking] = false
507
596
  tool_start = value(start, :tool_use) if start
508
597
  next unless tool_start
509
598
 
@@ -520,10 +609,14 @@ module Legion
520
609
  delta = value(event, :delta)
521
610
  text = value(delta, :text)
522
611
  if text
523
- state[:accumulated] << text
524
- if block_given?
525
- yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
526
- model_id: fallback_model)
612
+ if state[:in_thinking]
613
+ state[:thinking] << text
614
+ else
615
+ state[:accumulated] << text
616
+ if block_given?
617
+ yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
618
+ model_id: fallback_model)
619
+ end
527
620
  end
528
621
  end
529
622
 
@@ -569,6 +662,18 @@ module Legion
569
662
  end
570
663
  end
571
664
 
665
+ def cache_read_tokens(usage)
666
+ return nil if usage.nil?
667
+
668
+ value(usage, :cache_read_input_tokens) || value(usage, 'cache_read_input_tokens')
669
+ end
670
+
671
+ def cache_write_tokens(usage)
672
+ return nil if usage.nil?
673
+
674
+ value(usage, :cache_creation_input_tokens) || value(usage, 'cache_creation_input_tokens')
675
+ end
676
+
572
677
  def parse_embedding_response(response, model:)
573
678
  body = parse_body(value(response, :body))
574
679
  vectors = body['embedding'] || body['embeddings'] || body.dig('data', 0, 'embedding')
@@ -595,11 +700,11 @@ module Legion
595
700
  end
596
701
 
597
702
  def bedrock_client
598
- Aws::Bedrock::Client.new(client_options)
703
+ @bedrock_client ||= Aws::Bedrock::Client.new(client_options)
599
704
  end
600
705
 
601
706
  def runtime_client
602
- Aws::BedrockRuntime::Client.new(client_options)
707
+ @runtime_client ||= Aws::BedrockRuntime::Client.new(client_options)
603
708
  end
604
709
 
605
710
  def client_options
@@ -626,10 +731,21 @@ module Legion
626
731
  return Aws::SharedCredentials.new(profile_name: config.bedrock_profile) if config.bedrock_profile
627
732
  return nil unless config.bedrock_access_key_id
628
733
 
734
+ if static_credentials_blocked?
735
+ raise SecurityError,
736
+ 'Static AWS credentials are disabled (security.block_static_aws_credentials=true); use IAM roles'
737
+ end
738
+ log.warn('[bedrock] Using static AWS credentials — prefer IAM roles for production')
629
739
  Aws::Credentials.new(config.bedrock_access_key_id, config.bedrock_secret_access_key,
630
740
  config.bedrock_session_token)
631
741
  end
632
742
 
743
+ def static_credentials_blocked?
744
+ return false unless defined?(::Legion::Settings)
745
+
746
+ ::Legion::Settings.dig(:extensions, :llm, :security, :block_static_aws_credentials) == true
747
+ end
748
+
633
749
  def credential_source
634
750
  return :static if config.bedrock_access_key_id
635
751
  return :profile if config.bedrock_profile
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Bedrock
7
- VERSION = '0.3.9'
7
+ VERSION = '0.3.11'
8
8
  end
9
9
  end
10
10
  end
@@ -22,8 +22,9 @@ module Legion
22
22
  ::Legion::Extensions::Llm.provider_settings(
23
23
  family: PROVIDER_FAMILY,
24
24
  instance: {
25
- default_model: 'us.anthropic.claude-sonnet-4-6',
26
- tier: :frontier,
25
+ default_model: 'anthropic.claude-sonnet-4',
26
+ region: 'us-east-1',
27
+ tier: :cloud,
27
28
  transport: :aws_sdk,
28
29
  credentials: {
29
30
  bearer_token: nil,
@@ -73,7 +74,7 @@ module Legion
73
74
  .transform_values do |config|
74
75
  sanitized = sanitize_instance_config(config)
75
76
  sanitized[:capabilities] ||= DEFAULT_CAPABILITIES.dup
76
- sanitized[:default_model] ||= 'us.anthropic.claude-sonnet-4-6'
77
+ sanitized[:default_model] ||= 'anthropic.claude-sonnet-4'
77
78
  sanitized
78
79
  end
79
80
  end
@@ -197,6 +198,8 @@ module Legion
197
198
  end
198
199
 
199
200
  def self.normalize_instance_config(config)
201
+ return {} if config.nil?
202
+
200
203
  normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
201
204
  normalized[:bedrock_region] ||= normalized.delete(:region)
202
205
  normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-bedrock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.3.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO