lex-llm-bedrock 0.4.4 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3aa9de1a3f3c07848fe253d6f8627c54b03bbc74bb96ab5032a407b109aad13a
4
- data.tar.gz: a3f680546d15739bdfb84f79daa70a45beae2650893aead380e23b0e9086b38e
3
+ metadata.gz: ba738761ab28ad68c0dfb013b6f13c3d17ae587295a0b53f97e7bb091d8d4a65
4
+ data.tar.gz: 3b5d2b5955816d248773ce88d48d6f5719d06bb47bf8cba685eaef6cefb2bee2
5
5
  SHA512:
6
- metadata.gz: 7b6ade385af00bbcd329278658f4fe829b312735f0141514353e4da0912d1002a9213cbd6f7411f4ba8dcad9dc9284adefe3518c17ff12297dd18d3f9db36d92
7
- data.tar.gz: 0ffcee037a6efb318802b3ec1ad1684f0516ee133799dc420bd7a74883320c0c2029fc0df916757c5c4584386d4cc2f7cc5f5f8e7bfca46a8def152adfdb9e1f
6
+ metadata.gz: 99e05143e8194b69cdca989616d2f65f36807cc1d408fa4636eb98b688d76c8b5156c6864c6f7a391b601195aef667fb25e3086d2054f082f62f7dba995dd126
7
+ data.tar.gz: fd86abdaca844a0ea28f4b5e32041824fabd9dcf0e5870344045c5379ca776aa7b566a07bc1deb1480bd3c3f007146d1287f906abb66cf5c3df701661bf22e17
data/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.9] - 2026-06-20
4
+
5
+ ### Fixed
6
+ - Stub shared registry publishing through `RegistryPublisher#schedule` in specs so async availability-event coverage stays stable after the shared publisher moved off raw `Thread.new`.
7
+
8
+ ## [0.4.8] - 2026-06-20
9
+
10
+ ### Fixed
11
+ - Stop bulk-publishing Bedrock model availability from `list_models`; discovery now emits one registry event per seen model from the shared `lex-llm` policy-filter path so blocked models stay observable without duplicate publishes.
12
+
13
+ ## [0.4.7] - 2026-06-20
14
+
15
+ ### Fixed
16
+ - Stop deriving Bedrock `us.`/`eu.`/`ap.` inference-profile prefixes from AWS regions. Model invocation now strips any existing geo prefix and prepends only an explicit Bedrock geo prefix setting, defaulting to `us`.
17
+
18
+ ## [0.4.6] - 2026-06-20
19
+
20
+ ### Fixed
21
+ - Canonicalize Bedrock embedding discovery to the shared singular `:embedding` capability and route provider/instance/model override extraction through the `lex-llm` base provider contract.
22
+
23
+ ## [0.4.5] - 2026-06-19
24
+
25
+ ### Changed
26
+ - Adopt `Legion::Extensions::Llm::Inventory::ScopedRefresher` mixin (lex-llm 0.6.0). Discovery
27
+ refresh actors now write directly to the live `Inventory` catalog via `Inventory.write_lane`.
28
+ - Pin `lex-llm >= 0.6.0` and `legion-llm >= 0.14.0` in gemspec.
29
+ - Standard `weight: 100` default added to provider instance settings schema.
30
+
3
31
  ## 0.4.4 - 2026-06-17
4
32
 
5
33
  ### Fixed
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.add_dependency 'legion-logging', '>= 1.3.2'
30
30
  spec.add_dependency 'legion-settings', '>= 1.3.14'
31
31
  spec.add_dependency 'legion-transport', '>= 1.4.14'
32
- spec.add_dependency 'lex-llm', '>= 0.5.4'
32
+ spec.add_dependency 'lex-llm', '>= 0.6.0'
33
33
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
4
+
3
5
  begin
4
6
  require 'legion/extensions/actors/every'
5
7
  rescue LoadError => e
@@ -8,15 +10,27 @@ end
8
10
 
9
11
  return unless defined?(Legion::Extensions::Actors::Every)
10
12
 
13
+ begin
14
+ require 'legion/extensions/llm/inventory/scoped_refresher'
15
+ rescue LoadError => e
16
+ warn(e.message) if $VERBOSE
17
+ end
18
+
11
19
  module Legion
12
20
  module Extensions
13
21
  module Llm
14
22
  module Bedrock
15
23
  module Actor
16
- class DiscoveryRefresh < Legion::Extensions::Actors::Every # rubocop:disable Style/Documentation
24
+ class DiscoveryRefresh < Legion::Extensions::Actors::Every # rubocop:disable Style/Documentation,Metrics/ClassLength
17
25
  include Legion::Logging::Helper
18
26
 
19
- REFRESH_INTERVAL = 1800
27
+ if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
28
+ include Legion::Extensions::Llm::Inventory::ScopedRefresher
29
+ end
30
+
31
+ EMBED_TYPES = %i[embed embedding].freeze
32
+
33
+ def self.every_seconds = 3600
20
34
 
21
35
  def runner_class = self.class
22
36
  def runner_function = 'manual'
@@ -26,26 +40,117 @@ module Legion
26
40
  def generate_task? = false
27
41
 
28
42
  def time
29
- return REFRESH_INTERVAL unless defined?(Legion::Settings)
43
+ return self.class.every_seconds unless defined?(Legion::Settings)
44
+
45
+ Legion::Settings.dig(:extensions, :llm, :bedrock, :discovery_interval) || self.class.every_seconds
46
+ end
30
47
 
31
- Legion::Settings.dig(:extensions, :llm, :bedrock, :discovery_interval) || REFRESH_INTERVAL
48
+ def scope_key(**)
49
+ { provider: :bedrock }
32
50
  end
33
51
 
34
- def manual
35
- log.debug('[bedrock][discovery_refresh] refreshing model list')
36
- return unless defined?(Legion::LLM::Discovery)
52
+ def compute_lanes_for_scope(**)
53
+ return [] unless defined?(Legion::LLM::Call::Registry)
37
54
 
38
- Legion::LLM::Discovery.refresh_discovered_models!(provider: :bedrock)
55
+ settings = Legion::Settings.dig(:extensions, :llm, :bedrock) || {}
56
+ fleet_enabled = settings.dig(:fleet, :dispatch, :enabled)
39
57
 
40
- if defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:populate_auto_rules)
41
- Legion::LLM::Router.populate_auto_rules(Legion::LLM::Discovery.discovered_instances)
42
- end
43
- if defined?(Legion::LLM::Inventory) && Legion::LLM::Inventory.respond_to?(:invalidate_offerings_cache!)
44
- Legion::LLM::Inventory.invalidate_offerings_cache!
58
+ instances = Legion::LLM::Call::Registry.all_instances.select do |e|
59
+ (e[:provider] || '').to_sym == :bedrock
45
60
  end
61
+
62
+ instances.flat_map { |inst| lanes_for_instance(inst, fleet_enabled: fleet_enabled) }
63
+ rescue StandardError => e
64
+ handle_exception(e, level: :warn, handled: true,
65
+ operation: 'bedrock.actor.discovery_refresh.compute_lanes')
66
+ []
67
+ end
68
+
69
+ def credential_hash(**)
70
+ raw = Legion::Settings.dig(:extensions, :llm, :bedrock) || {}
71
+ Digest::SHA256.hexdigest(raw[:api_key].to_s + raw[:instances].to_s)[0, 16]
72
+ end
73
+
74
+ def manual(**)
75
+ tick if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher) &&
76
+ self.class.ancestors.include?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
46
77
  rescue StandardError => e
47
78
  handle_exception(e, level: :warn, handled: true, operation: 'bedrock.actor.discovery_refresh')
48
79
  end
80
+
81
+ private
82
+
83
+ def lanes_for_instance(instance, fleet_enabled: false)
84
+ adapter = instance[:adapter]
85
+ return [] unless adapter.respond_to?(:discover_offerings)
86
+
87
+ Array(adapter.discover_offerings(live: true)).flat_map do |raw_offering|
88
+ offering = offering_to_hash(raw_offering)
89
+ next [] unless offering
90
+
91
+ build_offering_lanes(offering, instance, fleet_enabled: fleet_enabled)
92
+ end
93
+ end
94
+
95
+ def offering_to_hash(offering)
96
+ return nil if offering.nil?
97
+ return offering if offering.is_a?(Hash)
98
+
99
+ hash = offering.to_h
100
+ hash[:type] ||= hash[:usage_type]
101
+ hash[:enabled] = offering.respond_to?(:enabled?) ? offering.enabled? : true
102
+ hash
103
+ end
104
+
105
+ def build_offering_lanes(offering, instance, fleet_enabled: false)
106
+ raw_tier = offering[:tier] || :cloud
107
+ type = EMBED_TYPES.include?(offering[:type]&.to_sym) ? :embedding : :inference
108
+
109
+ lane_fields = {
110
+ tier: raw_tier,
111
+ provider_family: :bedrock,
112
+ instance_id: instance[:instance] || instance[:instance_id] || instance[:id] || 'default',
113
+ type: type,
114
+ model: offering[:model]
115
+ }
116
+
117
+ lane = build_lane(offering, lane_fields)
118
+ result = [lane]
119
+
120
+ if fleet_enabled && type == :inference
121
+ fleet_fields = lane_fields.merge(tier: :fleet)
122
+ result << lane.merge(
123
+ id: Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(fleet_fields),
124
+ tier: :fleet
125
+ )
126
+ end
127
+
128
+ result
129
+ end
130
+
131
+ def build_lane(offering, lane_fields)
132
+ capabilities = normalize_capabilities(offering[:capabilities])
133
+ {
134
+ id: Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(lane_fields),
135
+ tier: lane_fields[:tier],
136
+ provider_family: :bedrock,
137
+ instance_id: lane_fields[:instance_id],
138
+ model: offering[:model],
139
+ canonical_model_alias: offering[:canonical_model_alias],
140
+ type: lane_fields[:type],
141
+ capabilities: capabilities,
142
+ limits: offering[:limits] || {},
143
+ enabled: offering.fetch(:enabled, true),
144
+ cost: offering[:cost] || {}
145
+ }
146
+ end
147
+
148
+ def normalize_capabilities(caps)
149
+ return [] unless defined?(Legion::Extensions::Llm::Inventory::Capabilities)
150
+ return [] unless Legion::Extensions::Llm::Inventory::Capabilities.respond_to?(:normalize)
151
+
152
+ Legion::Extensions::Llm::Inventory::Capabilities.normalize(caps)
153
+ end
49
154
  end
50
155
  end
51
156
  end
@@ -66,6 +66,7 @@ module Legion
66
66
  bedrock_access_key_id
67
67
  bedrock_secret_access_key
68
68
  bedrock_session_token
69
+ bedrock_geo_prefix
69
70
  bedrock_profile
70
71
  bedrock_stub_responses
71
72
  bearer_token
@@ -85,24 +86,19 @@ module Legion
85
86
 
86
87
  INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
87
88
 
88
- def inference_profile_id(model, region: nil)
89
- return model if model.start_with?('us.', 'eu.', 'ap.', 'arn:')
90
- return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
89
+ def inference_profile_id(model, geo_prefix: 'us', region: nil)
90
+ return model if model.start_with?('arn:')
91
91
 
92
- prefix = region ? region_prefix(region) : 'us'
93
- "#{prefix}.#{model}"
94
- end
92
+ canonical = model.sub(/\A(?:us|eu|ap)\./, '')
93
+ return canonical unless INFERENCE_PROFILE_PREFIXES.any? { |p| canonical.start_with?(p) }
95
94
 
96
- # Region-based inference profile prefix mapping.
97
- # Bare model IDs (e.g. anthropic.claude-sonnet-4) get the region prefix.
98
- REGION_PREFIX = {
99
- 'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
100
- 'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
101
- 'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
102
- }.freeze
95
+ prefix = normalize_geo_prefix(geo_prefix || region)
96
+ "#{prefix}.#{canonical}"
97
+ end
103
98
 
104
- def region_prefix(region)
105
- REGION_PREFIX.fetch(region.to_s, 'us')
99
+ def normalize_geo_prefix(value)
100
+ candidate = value.to_s.downcase
101
+ %w[us eu ap].include?(candidate) ? candidate : 'us'
106
102
  end
107
103
  end
108
104
 
@@ -127,6 +123,10 @@ module Legion
127
123
  @translator ||= Translator.new(region: region)
128
124
  end
129
125
 
126
+ def settings
127
+ Bedrock.default_settings
128
+ end
129
+
130
130
  def api_base
131
131
  config.bedrock_endpoint || "https://bedrock-runtime.#{region}.amazonaws.com"
132
132
  end
@@ -141,25 +141,9 @@ module Legion
141
141
  config.bedrock_region || settings[:region] || 'us-east-1'
142
142
  end
143
143
 
144
- def discover_offerings(live: false, **filters)
145
- unless live
146
- return @cached_offerings if @cached_offerings&.any?
147
-
148
- log.debug { 'bedrock.provider.discover_offerings: returning static catalog' }
149
- return static_offerings(**filters)
150
- end
151
-
152
- log.info { "bedrock.provider.discover_offerings: listing foundation models (region=#{region})" }
153
- response = bedrock_client.list_foundation_models(**filters)
154
- @cached_offerings = Array(value(response, :model_summaries)).filter_map do |summary|
155
- offering = offering_from_summary(summary)
156
- model_id = offering.respond_to?(:model) ? offering.model : (offering[:model] || offering[:id])
157
- next unless model_allowed?(model_id.to_s)
158
-
159
- offering
160
- end
161
- log.info { "bedrock.provider.discover_offerings: found #{@cached_offerings.size} models" }
162
- @cached_offerings
144
+ def geo_prefix
145
+ configured = config.bedrock_geo_prefix if config.respond_to?(:bedrock_geo_prefix)
146
+ self.class.normalize_geo_prefix(configured || settings[:geo_prefix])
163
147
  end
164
148
 
165
149
  def offering_for(model:, model_family: nil, instance_id: :default, **metadata)
@@ -205,15 +189,26 @@ module Legion
205
189
  end
206
190
  end
207
191
 
208
- def list_models(**)
192
+ def list_models(**filters)
193
+ request_filters = {}
194
+ request_filters[:by_provider] = filters[:by_provider] if filters[:by_provider]
195
+
209
196
  log.info { 'bedrock.provider.list_models: fetching live model list' }
210
- response = bedrock_client.list_foundation_models
197
+ response = bedrock_client.list_foundation_models(**request_filters)
211
198
  models = Array(value(response, :model_summaries)).filter_map { |summary| model_info_from_summary(summary) }
212
199
  log.info { "bedrock.provider.list_models: found #{models.size} models" }
213
- self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
214
200
  models
215
201
  end
216
202
 
203
+ def discover_offerings(live: false, **filters)
204
+ return static_offerings(**filters) unless live
205
+
206
+ provider_health = health(live:)
207
+ @cached_offerings = discover_live_offerings(filters, provider_health, live:)
208
+ log_discover_complete(@cached_offerings)
209
+ @cached_offerings
210
+ end
211
+
217
212
  def chat(
218
213
  messages:,
219
214
  model:,
@@ -310,6 +305,42 @@ module Legion
310
305
  parse_converse_response(response, model_id(model))
311
306
  end
312
307
 
308
+ def discovery_registry_readiness(provider_health, live:)
309
+ {
310
+ provider: slug.to_sym,
311
+ configured: configured?,
312
+ ready: provider_health[:ready] == true,
313
+ live: live,
314
+ health: provider_health
315
+ }
316
+ end
317
+
318
+ def discover_live_offerings(filters, provider_health, live:)
319
+ readiness = discovery_registry_readiness(provider_health, live:)
320
+ Array(list_models(live:, **filters)).filter_map do |model|
321
+ self.class.registry_publisher.publish_models_async([model], readiness:)
322
+ next unless model_matches_filters?(model, filters)
323
+ next unless model_allowed?(model.id)
324
+
325
+ log_model_discovered(model)
326
+ offering_from_model(model, health: provider_health)
327
+ end
328
+ end
329
+
330
+ def log_model_discovered(model)
331
+ log.debug(
332
+ "[#{slug}] instance=#{provider_instance_id} action=model_discovered " \
333
+ "model=#{model.id} family=#{model.family}"
334
+ )
335
+ end
336
+
337
+ def log_discover_complete(offerings)
338
+ log.info(
339
+ "[#{slug}] instance=#{provider_instance_id} action=discover_complete " \
340
+ "model_count=#{Array(offerings).size}"
341
+ )
342
+ end
343
+
313
344
  def stream(messages:, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {},
314
345
  thinking: nil, **_provider_options, &)
315
346
  enforce_model_allowed!(model_id(model))
@@ -353,7 +384,7 @@ module Legion
353
384
  log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
354
385
  request = Utils.deep_merge(
355
386
  {
356
- model_id: self.class.inference_profile_id(model_id(model), region: region),
387
+ model_id: self.class.inference_profile_id(model_id(model), geo_prefix: geo_prefix),
357
388
  input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
358
389
  },
359
390
  params
@@ -425,7 +456,7 @@ module Legion
425
456
  log.debug { "bedrock.provider.invoke_model_chat: model=#{mid} thinking=#{thinking.inspect}" }
426
457
 
427
458
  response = runtime_client.invoke_model(
428
- model_id: self.class.inference_profile_id(mid, region: region),
459
+ model_id: self.class.inference_profile_id(mid, geo_prefix: geo_prefix),
429
460
  content_type: 'application/json',
430
461
  accept: 'application/json',
431
462
  body: Legion::JSON.generate(body)
@@ -479,7 +510,7 @@ module Legion
479
510
 
480
511
  # rubocop:disable Metrics/BlockLength
481
512
  runtime_client.invoke_model_with_response_stream(
482
- model_id: self.class.inference_profile_id(mid, region: region),
513
+ model_id: self.class.inference_profile_id(mid, geo_prefix: geo_prefix),
483
514
  content_type: 'application/json',
484
515
  accept: 'application/json',
485
516
  body: Legion::JSON.generate(body)
@@ -589,7 +620,10 @@ module Legion
589
620
  body[:tools] = tool_format[:tools]
590
621
  body[:tool_choice] = tool_format[:tool_choice] if tool_format[:tool_choice]
591
622
  end
592
- body[:thinking] = invoke_model_thinking(thinking) if thinking
623
+ if thinking
624
+ body[:thinking] =
625
+ invoke_model_thinking(model: rest[:model] || model_id(rest[:model]), thinking: thinking)
626
+ end
593
627
  body
594
628
  end
595
629
 
@@ -609,11 +643,17 @@ module Legion
609
643
  parts.map { |t| { type: 'text', text: t } }
610
644
  end
611
645
 
612
- # Strip provider-specific keys (e.g. effort from OpenAI) that Bedrock/Anthropic APIs don't accept.
613
- def invoke_model_thinking(thinking)
614
- return thinking unless thinking.is_a?(Hash)
646
+ def invoke_model_thinking(model:, thinking:)
647
+ mid = model_id(model)
648
+ if mid.include?('claude-sonnet-4')
649
+ budget = if thinking.is_a?(Hash)
650
+ thinking[:budget_tokens] || thinking['budget_tokens'] ||
651
+ thinking[:budget] || thinking['budget']
652
+ end
653
+ return { type: 'enabled', budget_tokens: budget }.compact
654
+ end
615
655
 
616
- thinking.except(:effort, 'effort')
656
+ { type: 'adaptive' }
617
657
  end
618
658
 
619
659
  def format_invoke_model_messages(messages)
@@ -867,9 +907,16 @@ module Legion
867
907
  end
868
908
  end
869
909
 
870
- def offering_from_summary(summary)
871
- model = value(summary, :model_id)
872
- real = real_capabilities_from_summary(summary)
910
+ def offering_from_model(model_info, health: {})
911
+ model = model_info.respond_to?(:id) ? model_info.id : model_info
912
+ real = if model_info.respond_to?(:capabilities)
913
+ Array(model_info.capabilities).to_h do |capability|
914
+ [capability.to_s.downcase.tr('-', '_').to_sym, true]
915
+ end
916
+ else
917
+ {}
918
+ end
919
+ metadata = model_info.respond_to?(:metadata) && model_info.metadata.is_a?(Hash) ? model_info.metadata : {}
873
920
  policy = Legion::Extensions::Llm::CapabilityPolicy.resolve(
874
921
  real: real,
875
922
  provider_catalog: {},
@@ -883,11 +930,12 @@ module Legion
883
930
  build_offering(
884
931
  model: model,
885
932
  alias_name: alias_for(model),
886
- model_family: normalize_provider(value(summary, :provider_name)) || model_family_for(model),
887
- usage_type: usage_type_from_modalities(value(summary, :output_modalities)),
933
+ model_family: model_info.respond_to?(:family) ? model_info.family : model_family_for(model),
934
+ usage_type: model_info.respond_to?(:embedding?) && model_info.embedding? ? :embedding : :inference,
888
935
  capabilities: policy[:capabilities],
889
936
  capability_sources: policy[:sources],
890
- metadata: normalize_response(summary)
937
+ metadata: metadata,
938
+ health: health
891
939
  )
892
940
  end
893
941
 
@@ -908,9 +956,14 @@ module Legion
908
956
  )
909
957
  end
910
958
 
959
+ def offering_from_summary(summary, health: {})
960
+ offering_from_model(model_info_from_summary(summary), health:)
961
+ end
962
+
911
963
  def build_offering(model:, model_family:, usage_type:, instance_id: :default, alias_name: nil,
912
- capabilities: nil, capability_sources: nil, metadata: {})
964
+ capabilities: nil, capability_sources: nil, metadata: {}, health: {})
913
965
  limits = infer_limits(model)
966
+ normalized_family = model_family&.to_sym
914
967
  Legion::Extensions::Llm::Routing::ModelOffering.new(
915
968
  provider_family: :bedrock,
916
969
  instance_id: instance_id,
@@ -921,7 +974,8 @@ module Legion
921
974
  capabilities: capabilities || default_capabilities(model),
922
975
  capability_sources: capability_sources,
923
976
  limits: limits,
924
- metadata: metadata.merge(model_family: model_family, alias: alias_name).compact
977
+ health: health,
978
+ metadata: metadata.merge(model_family: normalized_family, alias: alias_name).compact
925
979
  )
926
980
  end
927
981
 
@@ -941,7 +995,7 @@ module Legion
941
995
  def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil,
942
996
  thinking: nil)
943
997
  {
944
- model_id: self.class.inference_profile_id(model_id(model), region: region),
998
+ model_id: self.class.inference_profile_id(model_id(model), geo_prefix: geo_prefix),
945
999
  messages: format_messages(messages.reject { |message| message.role == :system }),
946
1000
  system: format_system(messages),
947
1001
  inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
@@ -1535,7 +1589,6 @@ module Legion
1535
1589
  caps << :streaming if value(summary, :response_streaming_supported)
1536
1590
  end
1537
1591
  caps << :vision if input_mods.include?('image')
1538
- caps << :tools if caps.include?(:completion)
1539
1592
  caps
1540
1593
  end
1541
1594
 
@@ -1545,7 +1598,7 @@ module Legion
1545
1598
  input_mods = Array(value(summary, :input_modalities)).map { |m| m.to_s.upcase }
1546
1599
  caps[:vision] = true if input_mods.include?('IMAGE')
1547
1600
  output_mods = Array(value(summary, :output_modalities)).map { |m| m.to_s.upcase }
1548
- caps[:embeddings] = true if output_mods.include?('EMBEDDING')
1601
+ caps[:embedding] = true if output_mods.include?('EMBEDDING')
1549
1602
  caps
1550
1603
  end
1551
1604
 
@@ -1554,44 +1607,6 @@ module Legion
1554
1607
  { tools: true }
1555
1608
  end
1556
1609
 
1557
- def provider_capability_config
1558
- return {} unless defined?(Legion::Extensions::Llm::CredentialSources)
1559
-
1560
- conf = Legion::Extensions::Llm::CredentialSources.setting(:extensions, :llm, :bedrock)
1561
- conf.is_a?(Hash) ? conf.to_h.except(:instances, 'instances') : {}
1562
- rescue StandardError => e
1563
- handle_exception(e, level: :debug, handled: true, operation: 'bedrock.provider_capability_config')
1564
- {}
1565
- end
1566
-
1567
- def instance_capability_config
1568
- cfg = config
1569
- result = {}
1570
- %i[capabilities enable_thinking enable_tools enable_streaming enable_vision enable_embeddings
1571
- thinking_flag tools_flag streaming_flag vision_flag embedding_flag embeddings_flag
1572
- tool_flag images_flag image_flag].each do |key|
1573
- next unless cfg.respond_to?(key)
1574
-
1575
- val = cfg.send(key)
1576
- result[key] = val unless val.nil?
1577
- rescue StandardError
1578
- next
1579
- end
1580
- result
1581
- end
1582
-
1583
- def model_capability_config(model_id)
1584
- models_conf = nil
1585
- models_conf = config.models if config.respond_to?(:models)
1586
- models_conf ||= config[:models] if config.respond_to?(:[])
1587
- return {} unless models_conf.respond_to?(:to_h)
1588
-
1589
- models_conf.to_h[model_id.to_s] || models_conf.to_h[model_id.to_sym] || {}
1590
- rescue StandardError => e
1591
- handle_exception(e, level: :debug, handled: true, operation: 'bedrock.model_capability_config')
1592
- {}
1593
- end
1594
-
1595
1610
  def model_family_for(model)
1596
1611
  normalize_provider(model.to_s.split('.').first)
1597
1612
  end
@@ -20,8 +20,9 @@ module Legion
20
20
 
21
21
  DEFAULT_MAX_TOKENS = 4096
22
22
 
23
- def initialize(region: nil)
23
+ def initialize(region: nil, geo_prefix: nil)
24
24
  @region = region
25
+ @geo_prefix = geo_prefix
25
26
  end
26
27
 
27
28
  def capabilities
@@ -150,17 +151,13 @@ module Legion
150
151
  end
151
152
 
152
153
  def inference_profile_id(model_id)
153
- return model_id if model_id.nil? || model_id.start_with?('us.', 'eu.', 'ap.', 'arn:')
154
+ return model_id if model_id.nil? || model_id.start_with?('arn:')
154
155
 
155
- return model_id unless MODEL_PREFIXED_FAMILIES.any? { |p| model_id.start_with?(p) }
156
+ canonical = model_id.sub(/\A(?:us|eu|ap)\./, '')
157
+ return canonical unless MODEL_PREFIXED_FAMILIES.any? { |p| canonical.start_with?(p) }
156
158
 
157
- region = @region || 'us-east-1'
158
- prefix = if region.include?('eu')
159
- 'eu'
160
- else
161
- region.include?('ap') ? 'ap' : 'us'
162
- end
163
- "#{prefix}.#{model_id}"
159
+ prefix = normalize_geo_prefix(@geo_prefix)
160
+ "#{prefix}.#{canonical}"
164
161
  end
165
162
 
166
163
  def build_inference_config(canonical)
@@ -187,6 +184,11 @@ module Legion
187
184
  { thinking: { type: 'enabled', budget_tokens: budget } }
188
185
  end
189
186
 
187
+ def normalize_geo_prefix(value)
188
+ candidate = value.to_s.downcase
189
+ %w[us eu ap].include?(candidate) ? candidate : 'us'
190
+ end
191
+
190
192
  def canonical_thinking_budget(canonical)
191
193
  return nil unless canonical.thinking
192
194
 
@@ -254,9 +256,14 @@ module Legion
254
256
  def build_invoke_thinking(canonical)
255
257
  return nil unless canonical.thinking
256
258
 
257
- budget = canonical_thinking_budget(canonical)
258
- budget ||= DEFAULT_MAX_TOKENS / 4
259
- { type: 'enabled', budget_tokens: budget }
259
+ model = model_from_request(canonical)
260
+ if model.to_s.include?('claude-sonnet-4')
261
+ budget = canonical_thinking_budget(canonical)
262
+ budget ||= DEFAULT_MAX_TOKENS / 4
263
+ return { type: 'enabled', budget_tokens: budget }
264
+ end
265
+
266
+ { type: 'adaptive' }
260
267
  end
261
268
 
262
269
  def render_invoke_system(canonical)
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Bedrock
7
- VERSION = '0.4.4'
7
+ VERSION = '0.4.9'
8
8
  end
9
9
  end
10
10
  end
@@ -30,6 +30,7 @@ module Legion
30
30
  instance: {
31
31
  default_model: DEFAULT_MODEL,
32
32
  region: 'us-east-1',
33
+ geo_prefix: 'us',
33
34
  tier: :cloud,
34
35
  transport: :aws_sdk,
35
36
  credentials: {
@@ -41,6 +42,7 @@ module Legion
41
42
  },
42
43
  provider: {
43
44
  region: DEFAULT_REGION,
45
+ geo_prefix: 'us',
44
46
  endpoint: nil,
45
47
  stub_responses: false
46
48
  },
@@ -85,9 +87,17 @@ module Legion
85
87
  # Resolve a default_model that never violates the configured model policy
86
88
  # (whitelist/blacklist stays authoritative over the DEFAULT_MODEL fallback).
87
89
  def self.resolve_default_model(config)
90
+ cfg = config.is_a?(Hash) ? config : {}
91
+ provider_conf = CredentialSources.setting(:extensions, :llm, PROVIDER_FAMILY)
92
+ provider_conf = {} unless provider_conf.is_a?(Hash)
93
+ global_conf = (::Legion::Settings.dig(:extensions, :llm) if defined?(::Legion::Settings))
94
+ global_conf = {} unless global_conf.is_a?(Hash)
95
+
88
96
  provider_class.policy_safe_default_model(
89
- configured: config[:default_model], fallback: DEFAULT_MODEL,
90
- **provider_class.model_policy(config, PROVIDER_FAMILY)
97
+ configured: cfg[:default_model],
98
+ fallback: DEFAULT_MODEL,
99
+ whitelist: provider_class.resolve_policy_value(cfg, provider_conf, global_conf, :model_whitelist),
100
+ blacklist: provider_class.resolve_policy_value(cfg, provider_conf, global_conf, :model_blacklist)
91
101
  )
92
102
  end
93
103
 
@@ -214,6 +224,7 @@ module Legion
214
224
 
215
225
  normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
216
226
  normalized[:bedrock_region] ||= normalized.delete(:region)
227
+ normalized[:bedrock_geo_prefix] ||= normalized.delete(:geo_prefix)
217
228
  normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
218
229
  normalized[:bedrock_endpoint] ||= normalized.delete(:base_url)
219
230
  normalized[:bedrock_endpoint] ||= normalized.delete(:api_base)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-bedrock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -99,14 +99,14 @@ dependencies:
99
99
  requirements:
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
- version: 0.5.4
102
+ version: 0.6.0
103
103
  type: :runtime
104
104
  prerelease: false
105
105
  version_requirements: !ruby/object:Gem::Requirement
106
106
  requirements:
107
107
  - - ">="
108
108
  - !ruby/object:Gem::Version
109
- version: 0.5.4
109
+ version: 0.6.0
110
110
  description: Amazon Bedrock provider integration for the LegionIO LLM routing framework.
111
111
  email:
112
112
  - matthewdiverson@gmail.com