legion-llm 0.11.2 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/llm/api/namespaces/openai/responses.rb +21 -2
- data/lib/legion/llm/api/native/models.rb +18 -4
- data/lib/legion/llm/api/native/tiers.rb +1 -1
- data/lib/legion/llm/api/openai/responses.rb +2 -0
- data/lib/legion/llm/api/translators/openai_response.rb +4 -1
- data/lib/legion/llm/call/lex_llm_adapter.rb +4 -1
- data/lib/legion/llm/discovery/rule_generator.rb +1 -1
- data/lib/legion/llm/discovery.rb +1 -1
- data/lib/legion/llm/inference/executor.rb +54 -1
- data/lib/legion/llm/inference.rb +1 -1
- data/lib/legion/llm/router/resolution.rb +0 -4
- data/lib/legion/llm/router.rb +7 -21
- data/lib/legion/llm/settings.rb +8 -17
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b5c5db81b61e68e29fd4a0ebdb612b8032145bac6ef1e2e70a6578b4dff3c9e4
|
|
4
|
+
data.tar.gz: 6ea03a18b9e7e448607d168af2c8c54bbccee3bb942670cd6adc5d1e673ca115
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 13ec651f801eede51e8389244609bd4d74a38ecd385815aeca5bb22ef9cb9a6df35ef12ab0bb99d92e1fa96f08a730d72d03302d5a268631db38c96fdf5ba938
|
|
7
|
+
data.tar.gz: e9798e55b838b440ec1608ddb25424124bf36885a7fef46c3c445fc662361a7c5a82951aa38aedc63934242d363fb67ebdad6ff13060eee272a234b6ef450da7
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.12.2] - 2026-06-02
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- **Codex CLI `/v1/responses` routing through non-native providers** — `RESPONSES_PROVIDER_FAMILIES` now contains only `:openai` (api.openai.com). All other providers (vLLM, Ollama, MLX, Bedrock, Gemini, Azure, etc.) use `/v1/chat/completions` and must explicitly declare `:responses` in their instance capabilities to opt in. Previously `:vllm` was included, causing `BadRequestError: Invalid request` when Codex routed through a vLLM-backed proxy (call/lex_llm_adapter.rb)
|
|
7
|
+
- **`developer` role crash on Responses API input** — The OpenAI Responses API sends `developer` as a higher-trust system role. Both Responses API handlers now map `developer` → `system` before building the message array, preventing `InvalidRoleError` from `lex-llm::Message::ROLES` validation (api/namespaces/openai/responses.rb, api/openai/responses.rb)
|
|
8
|
+
- **Non-streaming Responses API path always used `call_responses`** — Sync path now calls `call_executor_sync` which routes through `call` for non-native providers and `call_responses` only when `provider_supports_responses?` returns true (api/namespaces/openai/responses.rb)
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **`Executor#provider_supports_responses?`** — Public method that checks whether the resolved provider's adapter natively supports the Responses API. Used by the API layer to gate `call_responses` vs `call_stream`/`call` dispatch. Returns false safely when provider resolution hasn't run yet (inference/executor.rb)
|
|
12
|
+
- **`Responses.call_executor_sync`** — New method for non-streaming dispatch: routes through `call_responses` when native, otherwise `call` (api/namespaces/openai/responses.rb)
|
|
13
|
+
- **`Responses.native_responses_supported?`** — Predicate shared by streaming and sync dispatch paths (api/namespaces/openai/responses.rb)
|
|
14
|
+
|
|
3
15
|
## [0.11.2] - 2026-06-02
|
|
4
16
|
|
|
5
17
|
### Removed
|
|
@@ -63,7 +63,7 @@ module Legion
|
|
|
63
63
|
out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
|
|
64
64
|
end
|
|
65
65
|
else
|
|
66
|
-
pipeline_response =
|
|
66
|
+
pipeline_response = Responses.call_executor_sync(executor, upstream_body: body)
|
|
67
67
|
response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
|
|
68
68
|
log.info("[llm][api][namespaces][openai][responses] action=complete request_id=#{request_id}")
|
|
69
69
|
content_type :json
|
|
@@ -165,6 +165,10 @@ module Legion
|
|
|
165
165
|
role = item[:role]&.to_s
|
|
166
166
|
next unless role
|
|
167
167
|
|
|
168
|
+
# OpenAI Responses API uses "developer" as a higher-trust system role.
|
|
169
|
+
# All downstream providers only understand the standard four roles.
|
|
170
|
+
role = 'system' if role == 'developer'
|
|
171
|
+
|
|
168
172
|
content = item[:content]
|
|
169
173
|
content = content.to_s if content && !content.is_a?(Array)
|
|
170
174
|
messages << { role: role, content: content }.compact
|
|
@@ -295,13 +299,28 @@ module Legion
|
|
|
295
299
|
end
|
|
296
300
|
|
|
297
301
|
def self.call_executor(executor, upstream_body: nil, &)
|
|
298
|
-
if
|
|
302
|
+
if native_responses_supported?(executor, upstream_body)
|
|
299
303
|
executor.call_responses(body: upstream_body, stream: true, &)
|
|
300
304
|
else
|
|
301
305
|
executor.call_stream(&)
|
|
302
306
|
end
|
|
303
307
|
end
|
|
304
308
|
|
|
309
|
+
def self.call_executor_sync(executor, upstream_body: nil)
|
|
310
|
+
if native_responses_supported?(executor, upstream_body)
|
|
311
|
+
executor.call_responses(body: upstream_body, stream: false)
|
|
312
|
+
else
|
|
313
|
+
executor.call
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def self.native_responses_supported?(executor, upstream_body)
|
|
318
|
+
upstream_body &&
|
|
319
|
+
executor.respond_to?(:call_responses) &&
|
|
320
|
+
executor.respond_to?(:provider_supports_responses?) &&
|
|
321
|
+
executor.provider_supports_responses?
|
|
322
|
+
end
|
|
323
|
+
|
|
305
324
|
def self.build_output_tool_calls(pipeline_response)
|
|
306
325
|
tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
|
|
307
326
|
return [] unless tools_data.is_a?(Array) && !tools_data.empty?
|
|
@@ -13,6 +13,7 @@ module Legion
|
|
|
13
13
|
AUTO_ROUTING_MODEL_DISPLAY = 'LegionIO'
|
|
14
14
|
AUTO_ROUTING_OFFERING_ID = 'legionio:auto:inference:legionio'
|
|
15
15
|
AUTO_ROUTING_CAPABILITIES = %w[auto_routing chat completion json_schema tools].freeze
|
|
16
|
+
AUTO_ROUTING_MODEL_ALIASES = %w[auto].freeze
|
|
16
17
|
|
|
17
18
|
def self.registered(app)
|
|
18
19
|
log.debug('[llm][api][models] registering model inventory routes')
|
|
@@ -108,9 +109,10 @@ module Legion
|
|
|
108
109
|
enabled: offerings.any? { |offering| offering[:enabled] != false }
|
|
109
110
|
}
|
|
110
111
|
if auto_routing_model?(model)
|
|
111
|
-
|
|
112
|
+
first_display = offerings.filter_map { |o| o[:display_name] }.first
|
|
113
|
+
summary[:display_name] = first_display || AUTO_ROUTING_MODEL_DISPLAY
|
|
112
114
|
summary[:auto_route] = true
|
|
113
|
-
summary[:default] =
|
|
115
|
+
summary[:default] = model.to_s == AUTO_ROUTING_MODEL_ID
|
|
114
116
|
end
|
|
115
117
|
summary
|
|
116
118
|
end
|
|
@@ -129,7 +131,18 @@ module Legion
|
|
|
129
131
|
return offerings unless auto_routing_offering_matches?(filters)
|
|
130
132
|
return offerings if offerings.any? { |offering| auto_routing_model?(offering[:model]) }
|
|
131
133
|
|
|
132
|
-
[auto_routing_offering, *offerings]
|
|
134
|
+
[auto_routing_offering, auto_routing_alias_offering, *offerings]
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def self.auto_routing_alias_offering
|
|
138
|
+
base = auto_routing_offering
|
|
139
|
+
base.merge(
|
|
140
|
+
id: 'legionio:auto:inference:auto',
|
|
141
|
+
offering_id: 'legionio:auto:inference:auto',
|
|
142
|
+
model: 'auto',
|
|
143
|
+
display_name: 'LegionIO (auto)',
|
|
144
|
+
canonical_model_alias: 'auto'
|
|
145
|
+
)
|
|
133
146
|
end
|
|
134
147
|
|
|
135
148
|
def self.auto_routing_offering
|
|
@@ -182,7 +195,8 @@ module Legion
|
|
|
182
195
|
end
|
|
183
196
|
|
|
184
197
|
def self.auto_routing_model?(model)
|
|
185
|
-
model.to_s.strip.downcase
|
|
198
|
+
m = model.to_s.strip.downcase
|
|
199
|
+
m == AUTO_ROUTING_MODEL_ID || AUTO_ROUTING_MODEL_ALIASES.include?(m)
|
|
186
200
|
end
|
|
187
201
|
end
|
|
188
202
|
end
|
|
@@ -165,7 +165,7 @@ module Legion
|
|
|
165
165
|
routing_config = Legion::Settings[:llm][:routing] || {}
|
|
166
166
|
top_level = Legion::Settings[:llm][:tier_order] || nil
|
|
167
167
|
Array(top_level || routing_config[:tier_order] || routing_config[:tier_priority] ||
|
|
168
|
-
%w[local direct fleet
|
|
168
|
+
%w[local direct fleet cloud frontier])
|
|
169
169
|
end
|
|
170
170
|
|
|
171
171
|
def self.privacy_mode?
|
|
@@ -130,6 +130,8 @@ module Legion
|
|
|
130
130
|
role = item[:role]&.to_s
|
|
131
131
|
next unless role
|
|
132
132
|
|
|
133
|
+
role = 'system' if role == 'developer'
|
|
134
|
+
|
|
133
135
|
content = item[:content]
|
|
134
136
|
content = content.to_s if content && !content.is_a?(Array)
|
|
135
137
|
messages << { role: role, content: content }.compact
|
|
@@ -159,7 +159,10 @@ module Legion
|
|
|
159
159
|
owned_by: owned_by
|
|
160
160
|
}
|
|
161
161
|
if limits.is_a?(Hash)
|
|
162
|
-
|
|
162
|
+
if limits[:context_window]
|
|
163
|
+
obj[:context_window] = limits[:context_window]
|
|
164
|
+
obj[:context_size] = limits[:context_window]
|
|
165
|
+
end
|
|
163
166
|
obj[:max_output_tokens] = limits[:max_output_tokens] if limits[:max_output_tokens]
|
|
164
167
|
end
|
|
165
168
|
obj
|
|
@@ -11,7 +11,10 @@ module Legion
|
|
|
11
11
|
include Legion::Logging::Helper
|
|
12
12
|
|
|
13
13
|
METADATA_KEYS = %i[tier capabilities enabled].freeze
|
|
14
|
-
|
|
14
|
+
# Only providers that natively expose /v1/responses (OpenAI API proper).
|
|
15
|
+
# All other providers (vLLM, Ollama, MLX, Anthropic, Bedrock, Gemini, Vertex, Azure Foundry)
|
|
16
|
+
# use /v1/chat/completions and must declare :responses in their instance capabilities explicitly.
|
|
17
|
+
RESPONSES_PROVIDER_FAMILIES = %i[openai].freeze
|
|
15
18
|
|
|
16
19
|
def initialize(provider_name, provider_class, instance_config: {})
|
|
17
20
|
@provider_name = provider_name.to_sym
|
|
@@ -26,7 +26,7 @@ module Legion
|
|
|
26
26
|
anthropic: :frontier
|
|
27
27
|
}.freeze
|
|
28
28
|
|
|
29
|
-
DEFAULT_TIER_PRIORITY = %i[local direct fleet
|
|
29
|
+
DEFAULT_TIER_PRIORITY = %i[local direct fleet cloud frontier].freeze
|
|
30
30
|
CAPABILITY_ALIASES = {
|
|
31
31
|
function_calling: :tools,
|
|
32
32
|
functions: :tools,
|
data/lib/legion/llm/discovery.rb
CHANGED
|
@@ -17,7 +17,7 @@ module Legion
|
|
|
17
17
|
@discovered_models_cache = nil
|
|
18
18
|
@discovered_models_at = nil
|
|
19
19
|
|
|
20
|
-
EMBEDDING_TIER_ORDER = %w[local direct fleet
|
|
20
|
+
EMBEDDING_TIER_ORDER = %w[local direct fleet cloud frontier].freeze
|
|
21
21
|
|
|
22
22
|
class << self
|
|
23
23
|
attr_reader :embedding_provider, :embedding_model, :embedding_instance, :embedding_fallback_chain
|
|
@@ -146,6 +146,22 @@ module Legion
|
|
|
146
146
|
clear_log_context
|
|
147
147
|
end
|
|
148
148
|
|
|
149
|
+
# Returns true when the resolved provider's adapter natively supports the Responses API.
|
|
150
|
+
# Called by the API layer before choosing call_responses vs call_stream.
|
|
151
|
+
# Pre-provider steps must have already run (provider is resolved) for this to be accurate;
|
|
152
|
+
# returns false safely if resolution hasn't happened yet.
|
|
153
|
+
def provider_supports_responses?
|
|
154
|
+
provider = @resolved_provider
|
|
155
|
+
return false unless provider && use_native_dispatch?(provider)
|
|
156
|
+
|
|
157
|
+
ext = Call::Registry.for(provider, instance: @resolved_instance || :default)
|
|
158
|
+
ext.respond_to?(:supports?) ? ext.supports?(:responses) : false
|
|
159
|
+
rescue StandardError => e
|
|
160
|
+
handle_exception(e, level: :warn, handled: true, operation: 'llm.executor.provider_supports_responses',
|
|
161
|
+
provider: @resolved_provider)
|
|
162
|
+
false
|
|
163
|
+
end
|
|
164
|
+
|
|
149
165
|
private
|
|
150
166
|
|
|
151
167
|
def set_log_context
|
|
@@ -368,7 +384,7 @@ module Legion
|
|
|
368
384
|
def step_routing
|
|
369
385
|
log.debug "[llm][executor] action=step_routing.enter requested_provider=#{@request.routing[:provider]} requested_model=#{@request.routing[:model]}"
|
|
370
386
|
@timestamps[:routing_start] = Time.now
|
|
371
|
-
state = resolve_routing_state(apply_proactive_tier_assignment(routing_request_state))
|
|
387
|
+
state = resolve_routing_state(apply_proactive_tier_assignment(resolve_model_to_local_provider(routing_request_state)))
|
|
372
388
|
auto_route = state[:auto_route] == true
|
|
373
389
|
|
|
374
390
|
@resolved_provider = state[:provider] ||
|
|
@@ -537,6 +553,43 @@ module Legion
|
|
|
537
553
|
state
|
|
538
554
|
end
|
|
539
555
|
|
|
556
|
+
# If the caller named a model but gave no explicit provider/tier/instance,
|
|
557
|
+
# search discovered providers for that model with a healthy circuit.
|
|
558
|
+
# On a hit: pin provider + instance so normal routing runs against the local copy.
|
|
559
|
+
# On a miss: clear the model name and set auto_route so the pipeline picks the best
|
|
560
|
+
# available provider rather than blindly forwarding a frontier model name.
|
|
561
|
+
def resolve_model_to_local_provider(state)
|
|
562
|
+
return state if state[:provider_explicit] || state[:tier_explicit] || state[:instance_explicit]
|
|
563
|
+
return state if state[:provider] || state[:tier] || state[:instance]
|
|
564
|
+
return state unless state[:model] && defined?(Discovery) && defined?(Router)
|
|
565
|
+
|
|
566
|
+
model = state[:model].to_s
|
|
567
|
+
all_discovered = Array(Discovery.cached_discovered_models)
|
|
568
|
+
return state if all_discovered.empty?
|
|
569
|
+
|
|
570
|
+
candidates = all_discovered.select do |m|
|
|
571
|
+
dn = m[:model].to_s
|
|
572
|
+
dn == model || dn.start_with?("#{model}:")
|
|
573
|
+
end
|
|
574
|
+
return state if candidates.empty?
|
|
575
|
+
|
|
576
|
+
healthy = candidates.find do |m|
|
|
577
|
+
Router.health_tracker.circuit_state(m[:provider], instance: m[:instance]) != :open
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
if healthy
|
|
581
|
+
log.info "[llm][executor] action=model_discovery_pin model=#{model} provider=#{healthy[:provider]} instance=#{healthy[:instance]}"
|
|
582
|
+
state[:provider] = healthy[:provider]
|
|
583
|
+
state[:instance] = healthy[:instance]
|
|
584
|
+
else
|
|
585
|
+
log.info "[llm][executor] action=model_discovery_miss model=#{model} falling_back=auto_route"
|
|
586
|
+
state[:model] = nil
|
|
587
|
+
state[:auto_route] = true
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
state
|
|
591
|
+
end
|
|
592
|
+
|
|
540
593
|
def resolve_routing_state(state)
|
|
541
594
|
return state unless defined?(Router)
|
|
542
595
|
|
data/lib/legion/llm/inference.rb
CHANGED
data/lib/legion/llm/router.rb
CHANGED
|
@@ -17,8 +17,8 @@ module Legion
|
|
|
17
17
|
PROVIDER_TIER = { bedrock: :cloud, anthropic: :frontier, openai: :frontier,
|
|
18
18
|
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
|
|
19
19
|
PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
|
|
20
|
-
TIER_EXTERNAL = Set[:cloud, :frontier
|
|
21
|
-
TIER_RANK = { local: 0, direct: 1, fleet: 2,
|
|
20
|
+
TIER_EXTERNAL = Set[:cloud, :frontier].freeze
|
|
21
|
+
TIER_RANK = { local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
|
|
22
22
|
CAPABILITY_ALIASES = {
|
|
23
23
|
function_calling: :tools,
|
|
24
24
|
functions: :tools,
|
|
@@ -142,12 +142,11 @@ module Legion
|
|
|
142
142
|
end
|
|
143
143
|
|
|
144
144
|
# Check whether a tier can be used right now.
|
|
145
|
-
# :local
|
|
146
|
-
# :direct
|
|
147
|
-
# :fleet
|
|
148
|
-
# :
|
|
149
|
-
# :
|
|
150
|
-
# :frontier — available unless privacy mode
|
|
145
|
+
# :local — always available
|
|
146
|
+
# :direct — always available (remote self-hosted instances)
|
|
147
|
+
# :fleet — available when Legion::Transport is loaded
|
|
148
|
+
# :cloud — available unless privacy mode
|
|
149
|
+
# :frontier — available unless privacy mode
|
|
151
150
|
def tier_available?(tier)
|
|
152
151
|
sym = tier.to_sym
|
|
153
152
|
if external_tier?(sym) && privacy_mode?
|
|
@@ -159,11 +158,6 @@ module Legion
|
|
|
159
158
|
log.debug "[llm][router] action=tier_available tier=fleet available=#{available}"
|
|
160
159
|
return available
|
|
161
160
|
end
|
|
162
|
-
if sym == :openai_compat
|
|
163
|
-
available = openai_compat_available?
|
|
164
|
-
log.debug "[llm][router] action=tier_available tier=openai_compat available=#{available}"
|
|
165
|
-
return available
|
|
166
|
-
end
|
|
167
161
|
|
|
168
162
|
true
|
|
169
163
|
end
|
|
@@ -403,10 +397,6 @@ module Legion
|
|
|
403
397
|
TIER_EXTERNAL.include?(tier)
|
|
404
398
|
end
|
|
405
399
|
|
|
406
|
-
def openai_compat_available?
|
|
407
|
-
!registry_entry_for_tier(:openai_compat).nil?
|
|
408
|
-
end
|
|
409
|
-
|
|
410
400
|
def pick_best(candidates)
|
|
411
401
|
return nil if candidates.empty?
|
|
412
402
|
|
|
@@ -454,8 +444,6 @@ module Legion
|
|
|
454
444
|
case sym
|
|
455
445
|
when :local, :direct, :fleet
|
|
456
446
|
:ollama
|
|
457
|
-
when :openai_compat
|
|
458
|
-
:openai
|
|
459
447
|
when :cloud
|
|
460
448
|
default = Legion::Settings[:llm][:default_provider]
|
|
461
449
|
default ? default.to_sym : :bedrock
|
|
@@ -477,8 +465,6 @@ module Legion
|
|
|
477
465
|
case sym
|
|
478
466
|
when :local, :direct, :fleet
|
|
479
467
|
default_settings_model_for_tier(sym) || 'llama3'
|
|
480
|
-
when :openai_compat
|
|
481
|
-
'gpt-4o'
|
|
482
468
|
when :cloud
|
|
483
469
|
default_settings_model_for_tier(sym) || 'us.anthropic.claude-sonnet-4-6'
|
|
484
470
|
when :frontier
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -76,17 +76,9 @@ module Legion
|
|
|
76
76
|
end
|
|
77
77
|
|
|
78
78
|
routing = settings.is_a?(Hash) ? (settings[:routing] || settings['routing'] || {}) : {}
|
|
79
|
-
if routing.is_a?(Hash)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
'routing.use_fleet has been removed; configure fleet.dispatch.enabled instead'
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
tiers = routing[:tiers] || routing['tiers'] || {}
|
|
86
|
-
openai_compat = tiers.is_a?(Hash) ? (tiers[:openai_compat] || tiers['openai_compat'] || {}) : {}
|
|
87
|
-
if openai_compat.is_a?(Hash) && (openai_compat.key?(:gateways) || openai_compat.key?('gateways'))
|
|
88
|
-
raise ArgumentError, 'routing.tiers.openai_compat.gateways has been removed; configure lex-llm-openai provider instances instead'
|
|
89
|
-
end
|
|
79
|
+
if routing.is_a?(Hash) && (routing.key?(:use_fleet) || routing.key?('use_fleet'))
|
|
80
|
+
raise ArgumentError,
|
|
81
|
+
'routing.use_fleet has been removed; configure fleet.dispatch.enabled instead'
|
|
90
82
|
end
|
|
91
83
|
|
|
92
84
|
settings
|
|
@@ -215,19 +207,18 @@ module Legion
|
|
|
215
207
|
def self.routing_defaults
|
|
216
208
|
{
|
|
217
209
|
enabled: true,
|
|
218
|
-
tier_priority: %w[local direct fleet
|
|
210
|
+
tier_priority: %w[local direct fleet cloud frontier],
|
|
219
211
|
default_intent: { privacy: 'normal', capability: 'moderate', cost: 'normal' },
|
|
220
212
|
tiers: {
|
|
221
|
-
local:
|
|
222
|
-
fleet:
|
|
213
|
+
local: { provider: 'ollama' },
|
|
214
|
+
fleet: {
|
|
223
215
|
queue: 'llm.fleet',
|
|
224
216
|
routing_style: :shared_lane,
|
|
225
217
|
timeout_seconds: 30,
|
|
226
218
|
timeouts: { embed: 10, chat: 30, generate: 30, default: 30 }
|
|
227
219
|
},
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
frontier: { providers: %w[anthropic openai] }
|
|
220
|
+
cloud: { providers: %w[bedrock azure gemini] },
|
|
221
|
+
frontier: { providers: %w[anthropic openai] }
|
|
231
222
|
},
|
|
232
223
|
health: {
|
|
233
224
|
window_seconds: 300,
|
data/lib/legion/llm/version.rb
CHANGED