lex-llm-bedrock 0.4.4 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/lex-llm-bedrock.gemspec +1 -1
- data/lib/legion/extensions/llm/bedrock/actors/discovery_refresh.rb +118 -13
- data/lib/legion/extensions/llm/bedrock/provider.rb +109 -94
- data/lib/legion/extensions/llm/bedrock/translator.rb +20 -13
- data/lib/legion/extensions/llm/bedrock/version.rb +1 -1
- data/lib/legion/extensions/llm/bedrock.rb +13 -2
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ba738761ab28ad68c0dfb013b6f13c3d17ae587295a0b53f97e7bb091d8d4a65
|
|
4
|
+
data.tar.gz: 3b5d2b5955816d248773ce88d48d6f5719d06bb47bf8cba685eaef6cefb2bee2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 99e05143e8194b69cdca989616d2f65f36807cc1d408fa4636eb98b688d76c8b5156c6864c6f7a391b601195aef667fb25e3086d2054f082f62f7dba995dd126
|
|
7
|
+
data.tar.gz: fd86abdaca844a0ea28f4b5e32041824fabd9dcf0e5870344045c5379ca776aa7b566a07bc1deb1480bd3c3f007146d1287f906abb66cf5c3df701661bf22e17
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.9] - 2026-06-20
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Stub shared registry publishing through `RegistryPublisher#schedule` in specs so async availability-event coverage stays stable after the shared publisher moved off raw `Thread.new`.
|
|
7
|
+
|
|
8
|
+
## [0.4.8] - 2026-06-20
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Stop bulk-publishing Bedrock model availability from `list_models`; discovery now emits one registry event per seen model from the shared `lex-llm` policy-filter path so blocked models stay observable without duplicate publishes.
|
|
12
|
+
|
|
13
|
+
## [0.4.7] - 2026-06-20
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
- Stop deriving Bedrock `us.`/`eu.`/`ap.` inference-profile prefixes from AWS regions. Model invocation now strips any existing geo prefix and prepends only an explicit Bedrock geo prefix setting, defaulting to `us`.
|
|
17
|
+
|
|
18
|
+
## [0.4.6] - 2026-06-20
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- Canonicalize Bedrock embedding discovery to the shared singular `:embedding` capability and route provider/instance/model override extraction through the `lex-llm` base provider contract.
|
|
22
|
+
|
|
23
|
+
## [0.4.5] - 2026-06-19
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
- Adopt `Legion::Extensions::Llm::Inventory::ScopedRefresher` mixin (lex-llm 0.6.0). Discovery
|
|
27
|
+
refresh actors now write directly to the live `Inventory` catalog via `Inventory.write_lane`.
|
|
28
|
+
- Pin `lex-llm >= 0.6.0` and `legion-llm >= 0.14.0` in gemspec.
|
|
29
|
+
- Standard `weight: 100` default added to provider instance settings schema.
|
|
30
|
+
|
|
3
31
|
## 0.4.4 - 2026-06-17
|
|
4
32
|
|
|
5
33
|
### Fixed
|
data/lex-llm-bedrock.gemspec
CHANGED
|
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
|
|
|
29
29
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
30
30
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
31
31
|
spec.add_dependency 'legion-transport', '>= 1.4.14'
|
|
32
|
-
spec.add_dependency 'lex-llm', '>= 0.
|
|
32
|
+
spec.add_dependency 'lex-llm', '>= 0.6.0'
|
|
33
33
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
3
5
|
begin
|
|
4
6
|
require 'legion/extensions/actors/every'
|
|
5
7
|
rescue LoadError => e
|
|
@@ -8,15 +10,27 @@ end
|
|
|
8
10
|
|
|
9
11
|
return unless defined?(Legion::Extensions::Actors::Every)
|
|
10
12
|
|
|
13
|
+
begin
|
|
14
|
+
require 'legion/extensions/llm/inventory/scoped_refresher'
|
|
15
|
+
rescue LoadError => e
|
|
16
|
+
warn(e.message) if $VERBOSE
|
|
17
|
+
end
|
|
18
|
+
|
|
11
19
|
module Legion
|
|
12
20
|
module Extensions
|
|
13
21
|
module Llm
|
|
14
22
|
module Bedrock
|
|
15
23
|
module Actor
|
|
16
|
-
class DiscoveryRefresh < Legion::Extensions::Actors::Every # rubocop:disable Style/Documentation
|
|
24
|
+
class DiscoveryRefresh < Legion::Extensions::Actors::Every # rubocop:disable Style/Documentation,Metrics/ClassLength
|
|
17
25
|
include Legion::Logging::Helper
|
|
18
26
|
|
|
19
|
-
|
|
27
|
+
if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
|
|
28
|
+
include Legion::Extensions::Llm::Inventory::ScopedRefresher
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
EMBED_TYPES = %i[embed embedding].freeze
|
|
32
|
+
|
|
33
|
+
def self.every_seconds = 3600
|
|
20
34
|
|
|
21
35
|
def runner_class = self.class
|
|
22
36
|
def runner_function = 'manual'
|
|
@@ -26,26 +40,117 @@ module Legion
|
|
|
26
40
|
def generate_task? = false
|
|
27
41
|
|
|
28
42
|
def time
|
|
29
|
-
return
|
|
43
|
+
return self.class.every_seconds unless defined?(Legion::Settings)
|
|
44
|
+
|
|
45
|
+
Legion::Settings.dig(:extensions, :llm, :bedrock, :discovery_interval) || self.class.every_seconds
|
|
46
|
+
end
|
|
30
47
|
|
|
31
|
-
|
|
48
|
+
def scope_key(**)
|
|
49
|
+
{ provider: :bedrock }
|
|
32
50
|
end
|
|
33
51
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
return unless defined?(Legion::LLM::Discovery)
|
|
52
|
+
def compute_lanes_for_scope(**)
|
|
53
|
+
return [] unless defined?(Legion::LLM::Call::Registry)
|
|
37
54
|
|
|
38
|
-
Legion::
|
|
55
|
+
settings = Legion::Settings.dig(:extensions, :llm, :bedrock) || {}
|
|
56
|
+
fleet_enabled = settings.dig(:fleet, :dispatch, :enabled)
|
|
39
57
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
end
|
|
43
|
-
if defined?(Legion::LLM::Inventory) && Legion::LLM::Inventory.respond_to?(:invalidate_offerings_cache!)
|
|
44
|
-
Legion::LLM::Inventory.invalidate_offerings_cache!
|
|
58
|
+
instances = Legion::LLM::Call::Registry.all_instances.select do |e|
|
|
59
|
+
(e[:provider] || '').to_sym == :bedrock
|
|
45
60
|
end
|
|
61
|
+
|
|
62
|
+
instances.flat_map { |inst| lanes_for_instance(inst, fleet_enabled: fleet_enabled) }
|
|
63
|
+
rescue StandardError => e
|
|
64
|
+
handle_exception(e, level: :warn, handled: true,
|
|
65
|
+
operation: 'bedrock.actor.discovery_refresh.compute_lanes')
|
|
66
|
+
[]
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def credential_hash(**)
|
|
70
|
+
raw = Legion::Settings.dig(:extensions, :llm, :bedrock) || {}
|
|
71
|
+
Digest::SHA256.hexdigest(raw[:api_key].to_s + raw[:instances].to_s)[0, 16]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def manual(**)
|
|
75
|
+
tick if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher) &&
|
|
76
|
+
self.class.ancestors.include?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
|
|
46
77
|
rescue StandardError => e
|
|
47
78
|
handle_exception(e, level: :warn, handled: true, operation: 'bedrock.actor.discovery_refresh')
|
|
48
79
|
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def lanes_for_instance(instance, fleet_enabled: false)
|
|
84
|
+
adapter = instance[:adapter]
|
|
85
|
+
return [] unless adapter.respond_to?(:discover_offerings)
|
|
86
|
+
|
|
87
|
+
Array(adapter.discover_offerings(live: true)).flat_map do |raw_offering|
|
|
88
|
+
offering = offering_to_hash(raw_offering)
|
|
89
|
+
next [] unless offering
|
|
90
|
+
|
|
91
|
+
build_offering_lanes(offering, instance, fleet_enabled: fleet_enabled)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def offering_to_hash(offering)
|
|
96
|
+
return nil if offering.nil?
|
|
97
|
+
return offering if offering.is_a?(Hash)
|
|
98
|
+
|
|
99
|
+
hash = offering.to_h
|
|
100
|
+
hash[:type] ||= hash[:usage_type]
|
|
101
|
+
hash[:enabled] = offering.respond_to?(:enabled?) ? offering.enabled? : true
|
|
102
|
+
hash
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def build_offering_lanes(offering, instance, fleet_enabled: false)
|
|
106
|
+
raw_tier = offering[:tier] || :cloud
|
|
107
|
+
type = EMBED_TYPES.include?(offering[:type]&.to_sym) ? :embedding : :inference
|
|
108
|
+
|
|
109
|
+
lane_fields = {
|
|
110
|
+
tier: raw_tier,
|
|
111
|
+
provider_family: :bedrock,
|
|
112
|
+
instance_id: instance[:instance] || instance[:instance_id] || instance[:id] || 'default',
|
|
113
|
+
type: type,
|
|
114
|
+
model: offering[:model]
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
lane = build_lane(offering, lane_fields)
|
|
118
|
+
result = [lane]
|
|
119
|
+
|
|
120
|
+
if fleet_enabled && type == :inference
|
|
121
|
+
fleet_fields = lane_fields.merge(tier: :fleet)
|
|
122
|
+
result << lane.merge(
|
|
123
|
+
id: Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(fleet_fields),
|
|
124
|
+
tier: :fleet
|
|
125
|
+
)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
result
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def build_lane(offering, lane_fields)
|
|
132
|
+
capabilities = normalize_capabilities(offering[:capabilities])
|
|
133
|
+
{
|
|
134
|
+
id: Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(lane_fields),
|
|
135
|
+
tier: lane_fields[:tier],
|
|
136
|
+
provider_family: :bedrock,
|
|
137
|
+
instance_id: lane_fields[:instance_id],
|
|
138
|
+
model: offering[:model],
|
|
139
|
+
canonical_model_alias: offering[:canonical_model_alias],
|
|
140
|
+
type: lane_fields[:type],
|
|
141
|
+
capabilities: capabilities,
|
|
142
|
+
limits: offering[:limits] || {},
|
|
143
|
+
enabled: offering.fetch(:enabled, true),
|
|
144
|
+
cost: offering[:cost] || {}
|
|
145
|
+
}
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def normalize_capabilities(caps)
|
|
149
|
+
return [] unless defined?(Legion::Extensions::Llm::Inventory::Capabilities)
|
|
150
|
+
return [] unless Legion::Extensions::Llm::Inventory::Capabilities.respond_to?(:normalize)
|
|
151
|
+
|
|
152
|
+
Legion::Extensions::Llm::Inventory::Capabilities.normalize(caps)
|
|
153
|
+
end
|
|
49
154
|
end
|
|
50
155
|
end
|
|
51
156
|
end
|
|
@@ -66,6 +66,7 @@ module Legion
|
|
|
66
66
|
bedrock_access_key_id
|
|
67
67
|
bedrock_secret_access_key
|
|
68
68
|
bedrock_session_token
|
|
69
|
+
bedrock_geo_prefix
|
|
69
70
|
bedrock_profile
|
|
70
71
|
bedrock_stub_responses
|
|
71
72
|
bearer_token
|
|
@@ -85,24 +86,19 @@ module Legion
|
|
|
85
86
|
|
|
86
87
|
INFERENCE_PROFILE_PREFIXES = %w[anthropic. meta. mistral. cohere. ai21.].freeze
|
|
87
88
|
|
|
88
|
-
def inference_profile_id(model, region: nil)
|
|
89
|
-
return model if model.start_with?('
|
|
90
|
-
return model unless INFERENCE_PROFILE_PREFIXES.any? { |p| model.start_with?(p) }
|
|
89
|
+
def inference_profile_id(model, geo_prefix: 'us', region: nil)
|
|
90
|
+
return model if model.start_with?('arn:')
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
end
|
|
92
|
+
canonical = model.sub(/\A(?:us|eu|ap)\./, '')
|
|
93
|
+
return canonical unless INFERENCE_PROFILE_PREFIXES.any? { |p| canonical.start_with?(p) }
|
|
95
94
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
'us-east-1' => 'us', 'us-east-2' => 'us', 'us-west-1' => 'us', 'us-west-2' => 'us',
|
|
100
|
-
'eu-central-1' => 'eu', 'eu-west-1' => 'eu', 'eu-west-2' => 'eu', 'eu-west-3' => 'eu',
|
|
101
|
-
'ap-south-1' => 'ap', 'ap-southeast-1' => 'ap', 'ap-southeast-2' => 'ap', 'ap-northeast-1' => 'ap'
|
|
102
|
-
}.freeze
|
|
95
|
+
prefix = normalize_geo_prefix(geo_prefix || region)
|
|
96
|
+
"#{prefix}.#{canonical}"
|
|
97
|
+
end
|
|
103
98
|
|
|
104
|
-
def
|
|
105
|
-
|
|
99
|
+
def normalize_geo_prefix(value)
|
|
100
|
+
candidate = value.to_s.downcase
|
|
101
|
+
%w[us eu ap].include?(candidate) ? candidate : 'us'
|
|
106
102
|
end
|
|
107
103
|
end
|
|
108
104
|
|
|
@@ -127,6 +123,10 @@ module Legion
|
|
|
127
123
|
@translator ||= Translator.new(region: region)
|
|
128
124
|
end
|
|
129
125
|
|
|
126
|
+
def settings
|
|
127
|
+
Bedrock.default_settings
|
|
128
|
+
end
|
|
129
|
+
|
|
130
130
|
def api_base
|
|
131
131
|
config.bedrock_endpoint || "https://bedrock-runtime.#{region}.amazonaws.com"
|
|
132
132
|
end
|
|
@@ -141,25 +141,9 @@ module Legion
|
|
|
141
141
|
config.bedrock_region || settings[:region] || 'us-east-1'
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
-
def
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
log.debug { 'bedrock.provider.discover_offerings: returning static catalog' }
|
|
149
|
-
return static_offerings(**filters)
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
log.info { "bedrock.provider.discover_offerings: listing foundation models (region=#{region})" }
|
|
153
|
-
response = bedrock_client.list_foundation_models(**filters)
|
|
154
|
-
@cached_offerings = Array(value(response, :model_summaries)).filter_map do |summary|
|
|
155
|
-
offering = offering_from_summary(summary)
|
|
156
|
-
model_id = offering.respond_to?(:model) ? offering.model : (offering[:model] || offering[:id])
|
|
157
|
-
next unless model_allowed?(model_id.to_s)
|
|
158
|
-
|
|
159
|
-
offering
|
|
160
|
-
end
|
|
161
|
-
log.info { "bedrock.provider.discover_offerings: found #{@cached_offerings.size} models" }
|
|
162
|
-
@cached_offerings
|
|
144
|
+
def geo_prefix
|
|
145
|
+
configured = config.bedrock_geo_prefix if config.respond_to?(:bedrock_geo_prefix)
|
|
146
|
+
self.class.normalize_geo_prefix(configured || settings[:geo_prefix])
|
|
163
147
|
end
|
|
164
148
|
|
|
165
149
|
def offering_for(model:, model_family: nil, instance_id: :default, **metadata)
|
|
@@ -205,15 +189,26 @@ module Legion
|
|
|
205
189
|
end
|
|
206
190
|
end
|
|
207
191
|
|
|
208
|
-
def list_models(**)
|
|
192
|
+
def list_models(**filters)
|
|
193
|
+
request_filters = {}
|
|
194
|
+
request_filters[:by_provider] = filters[:by_provider] if filters[:by_provider]
|
|
195
|
+
|
|
209
196
|
log.info { 'bedrock.provider.list_models: fetching live model list' }
|
|
210
|
-
response = bedrock_client.list_foundation_models
|
|
197
|
+
response = bedrock_client.list_foundation_models(**request_filters)
|
|
211
198
|
models = Array(value(response, :model_summaries)).filter_map { |summary| model_info_from_summary(summary) }
|
|
212
199
|
log.info { "bedrock.provider.list_models: found #{models.size} models" }
|
|
213
|
-
self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
|
|
214
200
|
models
|
|
215
201
|
end
|
|
216
202
|
|
|
203
|
+
def discover_offerings(live: false, **filters)
|
|
204
|
+
return static_offerings(**filters) unless live
|
|
205
|
+
|
|
206
|
+
provider_health = health(live:)
|
|
207
|
+
@cached_offerings = discover_live_offerings(filters, provider_health, live:)
|
|
208
|
+
log_discover_complete(@cached_offerings)
|
|
209
|
+
@cached_offerings
|
|
210
|
+
end
|
|
211
|
+
|
|
217
212
|
def chat(
|
|
218
213
|
messages:,
|
|
219
214
|
model:,
|
|
@@ -310,6 +305,42 @@ module Legion
|
|
|
310
305
|
parse_converse_response(response, model_id(model))
|
|
311
306
|
end
|
|
312
307
|
|
|
308
|
+
def discovery_registry_readiness(provider_health, live:)
|
|
309
|
+
{
|
|
310
|
+
provider: slug.to_sym,
|
|
311
|
+
configured: configured?,
|
|
312
|
+
ready: provider_health[:ready] == true,
|
|
313
|
+
live: live,
|
|
314
|
+
health: provider_health
|
|
315
|
+
}
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def discover_live_offerings(filters, provider_health, live:)
|
|
319
|
+
readiness = discovery_registry_readiness(provider_health, live:)
|
|
320
|
+
Array(list_models(live:, **filters)).filter_map do |model|
|
|
321
|
+
self.class.registry_publisher.publish_models_async([model], readiness:)
|
|
322
|
+
next unless model_matches_filters?(model, filters)
|
|
323
|
+
next unless model_allowed?(model.id)
|
|
324
|
+
|
|
325
|
+
log_model_discovered(model)
|
|
326
|
+
offering_from_model(model, health: provider_health)
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def log_model_discovered(model)
|
|
331
|
+
log.debug(
|
|
332
|
+
"[#{slug}] instance=#{provider_instance_id} action=model_discovered " \
|
|
333
|
+
"model=#{model.id} family=#{model.family}"
|
|
334
|
+
)
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def log_discover_complete(offerings)
|
|
338
|
+
log.info(
|
|
339
|
+
"[#{slug}] instance=#{provider_instance_id} action=discover_complete " \
|
|
340
|
+
"model_count=#{Array(offerings).size}"
|
|
341
|
+
)
|
|
342
|
+
end
|
|
343
|
+
|
|
313
344
|
def stream(messages:, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {},
|
|
314
345
|
thinking: nil, **_provider_options, &)
|
|
315
346
|
enforce_model_allowed!(model_id(model))
|
|
@@ -353,7 +384,7 @@ module Legion
|
|
|
353
384
|
log.debug { "bedrock.provider.count_tokens: model=#{model_id(model)}" }
|
|
354
385
|
request = Utils.deep_merge(
|
|
355
386
|
{
|
|
356
|
-
model_id: self.class.inference_profile_id(model_id(model),
|
|
387
|
+
model_id: self.class.inference_profile_id(model_id(model), geo_prefix: geo_prefix),
|
|
357
388
|
input: { converse: { messages: format_messages(messages), system: system_blocks(system) }.compact }
|
|
358
389
|
},
|
|
359
390
|
params
|
|
@@ -425,7 +456,7 @@ module Legion
|
|
|
425
456
|
log.debug { "bedrock.provider.invoke_model_chat: model=#{mid} thinking=#{thinking.inspect}" }
|
|
426
457
|
|
|
427
458
|
response = runtime_client.invoke_model(
|
|
428
|
-
model_id: self.class.inference_profile_id(mid,
|
|
459
|
+
model_id: self.class.inference_profile_id(mid, geo_prefix: geo_prefix),
|
|
429
460
|
content_type: 'application/json',
|
|
430
461
|
accept: 'application/json',
|
|
431
462
|
body: Legion::JSON.generate(body)
|
|
@@ -479,7 +510,7 @@ module Legion
|
|
|
479
510
|
|
|
480
511
|
# rubocop:disable Metrics/BlockLength
|
|
481
512
|
runtime_client.invoke_model_with_response_stream(
|
|
482
|
-
model_id: self.class.inference_profile_id(mid,
|
|
513
|
+
model_id: self.class.inference_profile_id(mid, geo_prefix: geo_prefix),
|
|
483
514
|
content_type: 'application/json',
|
|
484
515
|
accept: 'application/json',
|
|
485
516
|
body: Legion::JSON.generate(body)
|
|
@@ -589,7 +620,10 @@ module Legion
|
|
|
589
620
|
body[:tools] = tool_format[:tools]
|
|
590
621
|
body[:tool_choice] = tool_format[:tool_choice] if tool_format[:tool_choice]
|
|
591
622
|
end
|
|
592
|
-
|
|
623
|
+
if thinking
|
|
624
|
+
body[:thinking] =
|
|
625
|
+
invoke_model_thinking(model: rest[:model] || model_id(rest[:model]), thinking: thinking)
|
|
626
|
+
end
|
|
593
627
|
body
|
|
594
628
|
end
|
|
595
629
|
|
|
@@ -609,11 +643,17 @@ module Legion
|
|
|
609
643
|
parts.map { |t| { type: 'text', text: t } }
|
|
610
644
|
end
|
|
611
645
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
646
|
+
def invoke_model_thinking(model:, thinking:)
|
|
647
|
+
mid = model_id(model)
|
|
648
|
+
if mid.include?('claude-sonnet-4')
|
|
649
|
+
budget = if thinking.is_a?(Hash)
|
|
650
|
+
thinking[:budget_tokens] || thinking['budget_tokens'] ||
|
|
651
|
+
thinking[:budget] || thinking['budget']
|
|
652
|
+
end
|
|
653
|
+
return { type: 'enabled', budget_tokens: budget }.compact
|
|
654
|
+
end
|
|
615
655
|
|
|
616
|
-
|
|
656
|
+
{ type: 'adaptive' }
|
|
617
657
|
end
|
|
618
658
|
|
|
619
659
|
def format_invoke_model_messages(messages)
|
|
@@ -867,9 +907,16 @@ module Legion
|
|
|
867
907
|
end
|
|
868
908
|
end
|
|
869
909
|
|
|
870
|
-
def
|
|
871
|
-
model =
|
|
872
|
-
real =
|
|
910
|
+
def offering_from_model(model_info, health: {})
|
|
911
|
+
model = model_info.respond_to?(:id) ? model_info.id : model_info
|
|
912
|
+
real = if model_info.respond_to?(:capabilities)
|
|
913
|
+
Array(model_info.capabilities).to_h do |capability|
|
|
914
|
+
[capability.to_s.downcase.tr('-', '_').to_sym, true]
|
|
915
|
+
end
|
|
916
|
+
else
|
|
917
|
+
{}
|
|
918
|
+
end
|
|
919
|
+
metadata = model_info.respond_to?(:metadata) && model_info.metadata.is_a?(Hash) ? model_info.metadata : {}
|
|
873
920
|
policy = Legion::Extensions::Llm::CapabilityPolicy.resolve(
|
|
874
921
|
real: real,
|
|
875
922
|
provider_catalog: {},
|
|
@@ -883,11 +930,12 @@ module Legion
|
|
|
883
930
|
build_offering(
|
|
884
931
|
model: model,
|
|
885
932
|
alias_name: alias_for(model),
|
|
886
|
-
model_family:
|
|
887
|
-
usage_type:
|
|
933
|
+
model_family: model_info.respond_to?(:family) ? model_info.family : model_family_for(model),
|
|
934
|
+
usage_type: model_info.respond_to?(:embedding?) && model_info.embedding? ? :embedding : :inference,
|
|
888
935
|
capabilities: policy[:capabilities],
|
|
889
936
|
capability_sources: policy[:sources],
|
|
890
|
-
metadata:
|
|
937
|
+
metadata: metadata,
|
|
938
|
+
health: health
|
|
891
939
|
)
|
|
892
940
|
end
|
|
893
941
|
|
|
@@ -908,9 +956,14 @@ module Legion
|
|
|
908
956
|
)
|
|
909
957
|
end
|
|
910
958
|
|
|
959
|
+
def offering_from_summary(summary, health: {})
|
|
960
|
+
offering_from_model(model_info_from_summary(summary), health:)
|
|
961
|
+
end
|
|
962
|
+
|
|
911
963
|
def build_offering(model:, model_family:, usage_type:, instance_id: :default, alias_name: nil,
|
|
912
|
-
capabilities: nil, capability_sources: nil, metadata: {})
|
|
964
|
+
capabilities: nil, capability_sources: nil, metadata: {}, health: {})
|
|
913
965
|
limits = infer_limits(model)
|
|
966
|
+
normalized_family = model_family&.to_sym
|
|
914
967
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
915
968
|
provider_family: :bedrock,
|
|
916
969
|
instance_id: instance_id,
|
|
@@ -921,7 +974,8 @@ module Legion
|
|
|
921
974
|
capabilities: capabilities || default_capabilities(model),
|
|
922
975
|
capability_sources: capability_sources,
|
|
923
976
|
limits: limits,
|
|
924
|
-
|
|
977
|
+
health: health,
|
|
978
|
+
metadata: metadata.merge(model_family: normalized_family, alias: alias_name).compact
|
|
925
979
|
)
|
|
926
980
|
end
|
|
927
981
|
|
|
@@ -941,7 +995,7 @@ module Legion
|
|
|
941
995
|
def converse_request(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, guardrail_config: nil,
|
|
942
996
|
thinking: nil)
|
|
943
997
|
{
|
|
944
|
-
model_id: self.class.inference_profile_id(model_id(model),
|
|
998
|
+
model_id: self.class.inference_profile_id(model_id(model), geo_prefix: geo_prefix),
|
|
945
999
|
messages: format_messages(messages.reject { |message| message.role == :system }),
|
|
946
1000
|
system: format_system(messages),
|
|
947
1001
|
inference_config: { temperature: temperature, max_tokens: max_tokens || model_max_tokens(model) }.compact,
|
|
@@ -1535,7 +1589,6 @@ module Legion
|
|
|
1535
1589
|
caps << :streaming if value(summary, :response_streaming_supported)
|
|
1536
1590
|
end
|
|
1537
1591
|
caps << :vision if input_mods.include?('image')
|
|
1538
|
-
caps << :tools if caps.include?(:completion)
|
|
1539
1592
|
caps
|
|
1540
1593
|
end
|
|
1541
1594
|
|
|
@@ -1545,7 +1598,7 @@ module Legion
|
|
|
1545
1598
|
input_mods = Array(value(summary, :input_modalities)).map { |m| m.to_s.upcase }
|
|
1546
1599
|
caps[:vision] = true if input_mods.include?('IMAGE')
|
|
1547
1600
|
output_mods = Array(value(summary, :output_modalities)).map { |m| m.to_s.upcase }
|
|
1548
|
-
caps[:
|
|
1601
|
+
caps[:embedding] = true if output_mods.include?('EMBEDDING')
|
|
1549
1602
|
caps
|
|
1550
1603
|
end
|
|
1551
1604
|
|
|
@@ -1554,44 +1607,6 @@ module Legion
|
|
|
1554
1607
|
{ tools: true }
|
|
1555
1608
|
end
|
|
1556
1609
|
|
|
1557
|
-
def provider_capability_config
|
|
1558
|
-
return {} unless defined?(Legion::Extensions::Llm::CredentialSources)
|
|
1559
|
-
|
|
1560
|
-
conf = Legion::Extensions::Llm::CredentialSources.setting(:extensions, :llm, :bedrock)
|
|
1561
|
-
conf.is_a?(Hash) ? conf.to_h.except(:instances, 'instances') : {}
|
|
1562
|
-
rescue StandardError => e
|
|
1563
|
-
handle_exception(e, level: :debug, handled: true, operation: 'bedrock.provider_capability_config')
|
|
1564
|
-
{}
|
|
1565
|
-
end
|
|
1566
|
-
|
|
1567
|
-
def instance_capability_config
|
|
1568
|
-
cfg = config
|
|
1569
|
-
result = {}
|
|
1570
|
-
%i[capabilities enable_thinking enable_tools enable_streaming enable_vision enable_embeddings
|
|
1571
|
-
thinking_flag tools_flag streaming_flag vision_flag embedding_flag embeddings_flag
|
|
1572
|
-
tool_flag images_flag image_flag].each do |key|
|
|
1573
|
-
next unless cfg.respond_to?(key)
|
|
1574
|
-
|
|
1575
|
-
val = cfg.send(key)
|
|
1576
|
-
result[key] = val unless val.nil?
|
|
1577
|
-
rescue StandardError
|
|
1578
|
-
next
|
|
1579
|
-
end
|
|
1580
|
-
result
|
|
1581
|
-
end
|
|
1582
|
-
|
|
1583
|
-
def model_capability_config(model_id)
|
|
1584
|
-
models_conf = nil
|
|
1585
|
-
models_conf = config.models if config.respond_to?(:models)
|
|
1586
|
-
models_conf ||= config[:models] if config.respond_to?(:[])
|
|
1587
|
-
return {} unless models_conf.respond_to?(:to_h)
|
|
1588
|
-
|
|
1589
|
-
models_conf.to_h[model_id.to_s] || models_conf.to_h[model_id.to_sym] || {}
|
|
1590
|
-
rescue StandardError => e
|
|
1591
|
-
handle_exception(e, level: :debug, handled: true, operation: 'bedrock.model_capability_config')
|
|
1592
|
-
{}
|
|
1593
|
-
end
|
|
1594
|
-
|
|
1595
1610
|
def model_family_for(model)
|
|
1596
1611
|
normalize_provider(model.to_s.split('.').first)
|
|
1597
1612
|
end
|
|
@@ -20,8 +20,9 @@ module Legion
|
|
|
20
20
|
|
|
21
21
|
DEFAULT_MAX_TOKENS = 4096
|
|
22
22
|
|
|
23
|
-
def initialize(region: nil)
|
|
23
|
+
def initialize(region: nil, geo_prefix: nil)
|
|
24
24
|
@region = region
|
|
25
|
+
@geo_prefix = geo_prefix
|
|
25
26
|
end
|
|
26
27
|
|
|
27
28
|
def capabilities
|
|
@@ -150,17 +151,13 @@ module Legion
|
|
|
150
151
|
end
|
|
151
152
|
|
|
152
153
|
def inference_profile_id(model_id)
|
|
153
|
-
return model_id if model_id.nil? || model_id.start_with?('
|
|
154
|
+
return model_id if model_id.nil? || model_id.start_with?('arn:')
|
|
154
155
|
|
|
155
|
-
|
|
156
|
+
canonical = model_id.sub(/\A(?:us|eu|ap)\./, '')
|
|
157
|
+
return canonical unless MODEL_PREFIXED_FAMILIES.any? { |p| canonical.start_with?(p) }
|
|
156
158
|
|
|
157
|
-
|
|
158
|
-
prefix
|
|
159
|
-
'eu'
|
|
160
|
-
else
|
|
161
|
-
region.include?('ap') ? 'ap' : 'us'
|
|
162
|
-
end
|
|
163
|
-
"#{prefix}.#{model_id}"
|
|
159
|
+
prefix = normalize_geo_prefix(@geo_prefix)
|
|
160
|
+
"#{prefix}.#{canonical}"
|
|
164
161
|
end
|
|
165
162
|
|
|
166
163
|
def build_inference_config(canonical)
|
|
@@ -187,6 +184,11 @@ module Legion
|
|
|
187
184
|
{ thinking: { type: 'enabled', budget_tokens: budget } }
|
|
188
185
|
end
|
|
189
186
|
|
|
187
|
+
def normalize_geo_prefix(value)
|
|
188
|
+
candidate = value.to_s.downcase
|
|
189
|
+
%w[us eu ap].include?(candidate) ? candidate : 'us'
|
|
190
|
+
end
|
|
191
|
+
|
|
190
192
|
def canonical_thinking_budget(canonical)
|
|
191
193
|
return nil unless canonical.thinking
|
|
192
194
|
|
|
@@ -254,9 +256,14 @@ module Legion
|
|
|
254
256
|
def build_invoke_thinking(canonical)
|
|
255
257
|
return nil unless canonical.thinking
|
|
256
258
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
259
|
+
model = model_from_request(canonical)
|
|
260
|
+
if model.to_s.include?('claude-sonnet-4')
|
|
261
|
+
budget = canonical_thinking_budget(canonical)
|
|
262
|
+
budget ||= DEFAULT_MAX_TOKENS / 4
|
|
263
|
+
return { type: 'enabled', budget_tokens: budget }
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
{ type: 'adaptive' }
|
|
260
267
|
end
|
|
261
268
|
|
|
262
269
|
def render_invoke_system(canonical)
|
|
@@ -30,6 +30,7 @@ module Legion
|
|
|
30
30
|
instance: {
|
|
31
31
|
default_model: DEFAULT_MODEL,
|
|
32
32
|
region: 'us-east-1',
|
|
33
|
+
geo_prefix: 'us',
|
|
33
34
|
tier: :cloud,
|
|
34
35
|
transport: :aws_sdk,
|
|
35
36
|
credentials: {
|
|
@@ -41,6 +42,7 @@ module Legion
|
|
|
41
42
|
},
|
|
42
43
|
provider: {
|
|
43
44
|
region: DEFAULT_REGION,
|
|
45
|
+
geo_prefix: 'us',
|
|
44
46
|
endpoint: nil,
|
|
45
47
|
stub_responses: false
|
|
46
48
|
},
|
|
@@ -85,9 +87,17 @@ module Legion
|
|
|
85
87
|
# Resolve a default_model that never violates the configured model policy
|
|
86
88
|
# (whitelist/blacklist stays authoritative over the DEFAULT_MODEL fallback).
|
|
87
89
|
def self.resolve_default_model(config)
|
|
90
|
+
cfg = config.is_a?(Hash) ? config : {}
|
|
91
|
+
provider_conf = CredentialSources.setting(:extensions, :llm, PROVIDER_FAMILY)
|
|
92
|
+
provider_conf = {} unless provider_conf.is_a?(Hash)
|
|
93
|
+
global_conf = (::Legion::Settings.dig(:extensions, :llm) if defined?(::Legion::Settings))
|
|
94
|
+
global_conf = {} unless global_conf.is_a?(Hash)
|
|
95
|
+
|
|
88
96
|
provider_class.policy_safe_default_model(
|
|
89
|
-
configured:
|
|
90
|
-
|
|
97
|
+
configured: cfg[:default_model],
|
|
98
|
+
fallback: DEFAULT_MODEL,
|
|
99
|
+
whitelist: provider_class.resolve_policy_value(cfg, provider_conf, global_conf, :model_whitelist),
|
|
100
|
+
blacklist: provider_class.resolve_policy_value(cfg, provider_conf, global_conf, :model_blacklist)
|
|
91
101
|
)
|
|
92
102
|
end
|
|
93
103
|
|
|
@@ -214,6 +224,7 @@ module Legion
|
|
|
214
224
|
|
|
215
225
|
normalized = config.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
216
226
|
normalized[:bedrock_region] ||= normalized.delete(:region)
|
|
227
|
+
normalized[:bedrock_geo_prefix] ||= normalized.delete(:geo_prefix)
|
|
217
228
|
normalized[:bedrock_endpoint] ||= normalized.delete(:endpoint)
|
|
218
229
|
normalized[:bedrock_endpoint] ||= normalized.delete(:base_url)
|
|
219
230
|
normalized[:bedrock_endpoint] ||= normalized.delete(:api_base)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm-bedrock
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -99,14 +99,14 @@ dependencies:
|
|
|
99
99
|
requirements:
|
|
100
100
|
- - ">="
|
|
101
101
|
- !ruby/object:Gem::Version
|
|
102
|
-
version: 0.
|
|
102
|
+
version: 0.6.0
|
|
103
103
|
type: :runtime
|
|
104
104
|
prerelease: false
|
|
105
105
|
version_requirements: !ruby/object:Gem::Requirement
|
|
106
106
|
requirements:
|
|
107
107
|
- - ">="
|
|
108
108
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: 0.
|
|
109
|
+
version: 0.6.0
|
|
110
110
|
description: Amazon Bedrock provider integration for the LegionIO LLM routing framework.
|
|
111
111
|
email:
|
|
112
112
|
- matthewdiverson@gmail.com
|