lex-llm-bedrock 0.3.10 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0d6ffe6c93fec2590c74ed5a01a944c81694fb98316bf35bde08db6efecfed9
4
- data.tar.gz: 3e5174849ecf00a36f79e8926237f430ce1b1e8a8572c4fede2463e5cc0a3bda
3
+ metadata.gz: 52fa9adf70213041c0b2e6e96b44ff50b64220ae09f9cdab5619808f725df914
4
+ data.tar.gz: 12b5b4b31b712be0f0831e5e37dd045e6ca89442296bc645100c05cb41f4f8e0
5
5
  SHA512:
6
- metadata.gz: 273bc8934934e7eb40e9365a57de5fb33c12114c84b435b353ffb3e0e83329ced120f94016ddc59bd8cfc484c3a35094814f41beb8b0361ece513417505954d5
7
- data.tar.gz: 2d36dcff70b35577a0d5bbf6846ed6181851e6ac32d8cd65f67367429f32ec88869a084ccc4e3fdde200ab50ae3ca6c21509c84fa7a4af5ae360eaf75b85a2c5
6
+ metadata.gz: 3519d77bffac0e5ea90805fe64cc095f831e6069bc21bf93ffa3e0441977560ebd556b8d2c82b5fad163301c3a78c54e5efdf6bc8060f0ad4935cdce07ab2cfd
7
+ data.tar.gz: 96f1a2d3d394e17b0a517a3523445d027b9a11819785c889b2aae483bfd97753d8b7926aeadaab827330831525a3c86e3b4a3e2d64462b6d35113d359431e7fa
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.11 - 2026-05-31
4
+
5
+ ### Security
6
+ - **BEDROCK-CRED-01**: Static AWS credentials now emit a deprecation warning. New setting `security.block_static_aws_credentials=true` rejects them entirely, forcing IAM role-based authentication.
7
+
8
+ ### Fixed
9
+ - **TRANSLATION-BUG-07**: Bedrock streaming now preserves thinking (chain-of-thought) blocks in the final `Message`. Previously CoT was accumulated by the wire handler but silently dropped from the returned response.
10
+
11
+ ### Added
12
+ - **PROMPT-CACHE-01**: System blocks, tool definitions, and early conversation messages (first 4, never the last) now include `cache_control: { type: "cache_control" }` markers for Anthropic prompt caching via Bedrock Converse.
13
+ - **PROMPT-CACHE-02**: Response parser extracts `cached_input_tokens` (`cache_read_input_tokens`) and `cache_creation_tokens` (`cache_creation_input_tokens`) from Bedrock usage metadata into `Message#cached_tokens` and `Message#cache_creation_tokens`.
14
+
3
15
  ## 0.3.10 - 2026-05-21
4
16
 
5
17
  - Add `default_transport`/`default_tier` class declarations, remove `configured_transport`/`configured_tier`
@@ -77,11 +77,24 @@ module Legion
77
77
 
78
78
  INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
79
79
 
80
- def inference_profile_id(model)
80
+ def inference_profile_id(model, region: nil)
81
81
  return model if model.start_with?('us.', 'eu.', 'ap.', 'arn:')
82
82
  return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
83
83
 
84
- "us.#{model}"
84
+ prefix = region ? region_prefix(region) : 'us'
85
+ "#{prefix}.#{model}"
86
+ end
87
+
88
+ # Region-based inference profile prefix mapping.
89
+ # Bare model IDs (e.g. anthropic.claude-sonnet-4) get the region prefix.
90
+ REGION_PREFIX = {
91
+ 'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
92
+ 'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
93
+ 'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
94
+ }.freeze
95
+
96
+ def region_prefix(region)
97
+ REGION_PREFIX.fetch(region.to_s, 'us')
85
98
  end
86
99
  end
87
100
 
@@ -234,7 +247,7 @@ module Legion
234
247
  log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
235
248
  request = Utils.deep_merge(
236
249
  {
237
- model_id: self.class.inference_profile_id(model_id(model)),
250
+ model_id: self.class.inference_profile_id(model_id(model), region: region),
238
251
  input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
239
252
  },
240
253
  params
@@ -350,22 +363,48 @@ module Legion
350
363
  ctx ? { context_window: ctx } : nil
351
364
  end
352
365
 
353
- def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:)
366
+ def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil)
354
367
  {
355
- model_id: self.class.inference_profile_id(model_id(model)),
368
+ model_id: self.class.inference_profile_id(model_id(model), region: region),
356
369
  messages: format_messages(messages.reject { |message| message.role == :system }),
357
370
  system: format_system(messages),
358
371
  inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
359
- tool_config: format_tool_config(tools, tool_prefs)
372
+ tool_config: format_tool_config(tools, tool_prefs),
373
+ guardrail_config: guardrail_config
360
374
  }.compact
361
375
  end
362
376
 
363
377
  def format_messages(messages)
364
- messages.filter_map do |message|
365
- blocks = content_blocks(message.content)
378
+ total = messages.size
379
+ messages.filter_map.with_index do |message, idx|
380
+ blocks = message.role == :tool ? tool_result_blocks(message) : content_blocks(message.content)
366
381
  next if blocks.empty?
367
382
 
368
- { role: bedrock_role(message.role), content: blocks }
383
+ cache_blocks = should_cache_message?(idx, total) ? add_cache_control_to_blocks(blocks) : blocks
384
+ { role: bedrock_role(message.role), content: cache_blocks }
385
+ end
386
+ end
387
+
388
+ def tool_result_blocks(message)
389
+ return [] unless message.tool_result?
390
+
391
+ [{
392
+ type: 'tool_result',
393
+ tool_use: { tool_use_id: message.tool_call_id },
394
+ content: [{ type: 'text', text: message.tool_results.to_s }]
395
+ }]
396
+ end
397
+
398
+ def should_cache_message?(index, total)
399
+ # Cache first 4 messages, never the last message
400
+ return false if index == total - 1
401
+
402
+ index < 4
403
+ end
404
+
405
+ def add_cache_control_to_blocks(blocks)
406
+ blocks.map do |block|
407
+ block.dup.merge(cache_control: { type: 'cache_control' })
369
408
  end
370
409
  end
371
410
 
@@ -378,7 +417,7 @@ module Legion
378
417
  def system_blocks(system)
379
418
  return nil if system.to_s.empty?
380
419
 
381
- [{ text: system }]
420
+ [{ text: system, cache_control: { type: 'cache_control' } }]
382
421
  end
383
422
 
384
423
  def bedrock_role(role)
@@ -389,12 +428,45 @@ module Legion
389
428
  raw = raw_content(content)
390
429
  return raw if raw
391
430
 
431
+ return image_blocks(content) if content.respond_to?(:attachments) && !content.attachments.empty?
432
+
392
433
  text = content_text(content)
393
434
  return [] if text.strip.empty?
394
435
 
395
436
  [{ text: text }]
396
437
  end
397
438
 
439
+ def image_blocks(content)
440
+ blocks = []
441
+ text = content_text(content)
442
+ blocks << { text: text } if text.strip.present?
443
+
444
+ content.attachments.each do |attachment|
445
+ if attachment.is_a?(Legion::Extensions::Llm::Content::ImageAttachment)
446
+ blocks << format_image_attachment(attachment)
447
+ end
448
+ end
449
+ blocks
450
+ end
451
+
452
+ def format_image_attachment(attachment)
453
+ {
454
+ image: {
455
+ format: image_format(attachment.format),
456
+ source: { bytes: attachment.data }
457
+ }
458
+ }
459
+ end
460
+
461
+ def image_format(fmt)
462
+ case fmt.to_s.downcase
463
+ when 'jpeg', 'jpg' then 'jpeg'
464
+ when 'png' then 'png'
465
+ when 'gif' then 'gif'
466
+ when 'webp' then 'webp'
467
+ end || 'jpeg'
468
+ end
469
+
398
470
  def raw_content(content)
399
471
  return nil unless content.is_a?(Legion::Extensions::Llm::Content::Raw)
400
472
 
@@ -414,7 +486,14 @@ module Legion
414
486
  "bedrock.provider.tools: formatting tools=#{tools.keys.map(&:to_s).sort.join(',')} " \
415
487
  "tool_choice=#{tool_choice_label(tool_prefs)}"
416
488
  end
417
- { tools: tools.values.map { |tool| tool_definition(tool) }, tool_choice: tool_choice(tool_prefs) }.compact
489
+ {
490
+ tools: tools.values.map { |tool| tool_definition_with_cache(tool) },
491
+ tool_choice: tool_choice(tool_prefs)
492
+ }.compact
493
+ end
494
+
495
+ def tool_definition_with_cache(tool)
496
+ tool_definition(tool).merge(cache_control: { type: 'cache_control' })
418
497
  end
419
498
 
420
499
  def tool_definition(tool)
@@ -465,26 +544,33 @@ module Legion
465
544
  tool_calls: parse_tool_calls(value(message, :content)),
466
545
  input_tokens: value(usage, :input_tokens),
467
546
  output_tokens: value(usage, :output_tokens),
547
+ cached_tokens: cache_read_tokens(usage),
548
+ cache_creation_tokens: cache_write_tokens(usage),
468
549
  raw: normalize_response(response)
469
550
  )
470
551
  end
471
552
 
472
553
  def stream_converse(request, fallback_model)
473
- state = { accumulated: +'', final_usage: nil, stop_reason: nil, tool_use_blocks: [], current_tool_use: nil }
554
+ state = { accumulated: +'', thinking: +'', final_usage: nil, stop_reason: nil,
555
+ tool_use_blocks: [], current_tool_use: nil, in_thinking: false }
474
556
 
475
557
  runtime_client.converse_stream(**request) do |stream|
476
558
  wire_stream_handlers(stream, state, fallback_model) { |chunk| yield chunk if block_given? }
477
559
  end
478
560
 
479
- Legion::Extensions::Llm::Message.new(
561
+ msg_attrs = {
480
562
  role: :assistant,
481
563
  content: state[:accumulated],
482
564
  model_id: fallback_model,
483
565
  tool_calls: build_stream_tool_calls(state[:tool_use_blocks]),
484
566
  input_tokens: value(state[:final_usage], :input_tokens),
485
567
  output_tokens: value(state[:final_usage], :output_tokens),
568
+ cached_tokens: cache_read_tokens(state[:final_usage]),
569
+ cache_creation_tokens: cache_write_tokens(state[:final_usage]),
486
570
  stop_reason: state[:stop_reason]
487
- )
571
+ }
572
+ msg_attrs[:thinking] = state[:thinking] unless state[:thinking].empty?
573
+ Legion::Extensions::Llm::Message.new(**msg_attrs)
488
574
  end
489
575
 
490
576
  def wire_stream_handlers(stream, state, fallback_model, &)
@@ -500,6 +586,13 @@ module Legion
500
586
 
501
587
  stream.on_content_block_start_event do |event|
502
588
  start = value(event, :start)
589
+
590
+ if value(start, :thinking)
591
+ state[:in_thinking] = true
592
+ next
593
+ end
594
+
595
+ state[:in_thinking] = false
503
596
  tool_start = value(start, :tool_use) if start
504
597
  next unless tool_start
505
598
 
@@ -516,10 +609,14 @@ module Legion
516
609
  delta = value(event, :delta)
517
610
  text = value(delta, :text)
518
611
  if text
519
- state[:accumulated] << text
520
- if block_given?
521
- yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
522
- model_id: fallback_model)
612
+ if state[:in_thinking]
613
+ state[:thinking] << text
614
+ else
615
+ state[:accumulated] << text
616
+ if block_given?
617
+ yield Legion::Extensions::Llm::Chunk.new(role: :assistant, content: text,
618
+ model_id: fallback_model)
619
+ end
523
620
  end
524
621
  end
525
622
 
@@ -565,6 +662,18 @@ module Legion
565
662
  end
566
663
  end
567
664
 
665
+ def cache_read_tokens(usage)
666
+ return nil if usage.nil?
667
+
668
+ value(usage, :cache_read_input_tokens) || value(usage, 'cache_read_input_tokens')
669
+ end
670
+
671
+ def cache_write_tokens(usage)
672
+ return nil if usage.nil?
673
+
674
+ value(usage, :cache_creation_input_tokens) || value(usage, 'cache_creation_input_tokens')
675
+ end
676
+
568
677
  def parse_embedding_response(response, model:)
569
678
  body = parse_body(value(response, :body))
570
679
  vectors = body['embedding'] || body['embeddings'] || body.dig('data', 0, 'embedding')
@@ -591,11 +700,11 @@ module Legion
591
700
  end
592
701
 
593
702
  def bedrock_client
594
- Aws::Bedrock::Client.new(client_options)
703
+ @bedrock_client ||= Aws::Bedrock::Client.new(client_options)
595
704
  end
596
705
 
597
706
  def runtime_client
598
- Aws::BedrockRuntime::Client.new(client_options)
707
+ @runtime_client ||= Aws::BedrockRuntime::Client.new(client_options)
599
708
  end
600
709
 
601
710
  def client_options
@@ -622,10 +731,21 @@ module Legion
622
731
  return Aws::SharedCredentials.new(profile_name: config.bedrock_profile) if config.bedrock_profile
623
732
  return nil unless config.bedrock_access_key_id
624
733
 
734
+ if static_credentials_blocked?
735
+ raise SecurityError,
736
+ 'Static AWS credentials are disabled (security.block_static_aws_credentials=true); use IAM roles'
737
+ end
738
+ log.warn('[bedrock] Using static AWS credentials — prefer IAM roles for production')
625
739
  Aws::Credentials.new(config.bedrock_access_key_id, config.bedrock_secret_access_key,
626
740
  config.bedrock_session_token)
627
741
  end
628
742
 
743
+ def static_credentials_blocked?
744
+ return false unless defined?(::Legion::Settings)
745
+
746
+ ::Legion::Settings.dig(:extensions, :llm, :security, :block_static_aws_credentials) == true
747
+ end
748
+
629
749
  def credential_source
630
750
  return :static if config.bedrock_access_key_id
631
751
  return :profile if config.bedrock_profile
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Bedrock
7
- VERSION = '0.3.10'
7
+ VERSION = '0.3.11'
8
8
  end
9
9
  end
10
10
  end
@@ -22,7 +22,7 @@ module Legion
22
22
  ::Legion::Extensions::Llm.provider_settings(
23
23
  family: PROVIDER_FAMILY,
24
24
  instance: {
25
- default_model: 'us.anthropic.claude-sonnet-4-6',
25
+ default_model: 'anthropic.claude-sonnet-4',
26
26
  region: 'us-east-1',
27
27
  tier: :cloud,
28
28
  transport: :aws_sdk,
@@ -74,7 +74,7 @@ module Legion
74
74
  .transform_values do |config|
75
75
  sanitized = sanitize_instance_config(config)
76
76
  sanitized[:capabilities] ||= DEFAULT_CAPABILITIES.dup
77
- sanitized[:default_model] ||= 'us.anthropic.claude-sonnet-4-6'
77
+ sanitized[:default_model] ||= 'anthropic.claude-sonnet-4'
78
78
  sanitized
79
79
  end
80
80
  end
@@ -198,6 +198,8 @@ module Legion
198
198
  end
199
199
 
200
200
  def self.normalize_instance_config(config)
201
+ return {} if config.nil?
202
+
201
203
  normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
202
204
  normalized[:bedrock_region] ||= normalized.delete(:region)
203
205
  normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-bedrock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.10
4
+ version: 0.3.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO