legion-llm 0.11.2 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 555bff51c05efea04f283dc4a0c005703fef2344d57b9692fdd70d6ab95d9646
4
- data.tar.gz: 454c9cd0be750aec0d597c9e343a35fc8939c22a67af8d048d3887214c225cba
3
+ metadata.gz: b5c5db81b61e68e29fd4a0ebdb612b8032145bac6ef1e2e70a6578b4dff3c9e4
4
+ data.tar.gz: 6ea03a18b9e7e448607d168af2c8c54bbccee3bb942670cd6adc5d1e673ca115
5
5
  SHA512:
6
- metadata.gz: c153ef24c678502b0bd9249c6ff8d39070e07b8e06eb950686b552bfdf8bcc5d03b060c333dff2c06418cfc4a1046c3a779274756fb784e821166b3605ee430d
7
- data.tar.gz: b1ab367c71a7098292fecbcf2c67758de0734ecd7e70cb108d8ee08abc9fd3917e01adefe2a7bc2d0472a0d731e86b0bf73a9a2a7f566619502322ff0eb9d4ec
6
+ metadata.gz: 13ec651f801eede51e8389244609bd4d74a38ecd385815aeca5bb22ef9cb9a6df35ef12ab0bb99d92e1fa96f08a730d72d03302d5a268631db38c96fdf5ba938
7
+ data.tar.gz: e9798e55b838b440ec1608ddb25424124bf36885a7fef46c3c445fc662361a7c5a82951aa38aedc63934242d363fb67ebdad6ff13060eee272a234b6ef450da7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.12.2] - 2026-06-02
4
+
5
+ ### Fixed
6
+ - **Codex CLI `/v1/responses` routing through non-native providers** — `RESPONSES_PROVIDER_FAMILIES` now contains only `:openai` (api.openai.com). All other providers (vLLM, Ollama, MLX, Bedrock, Gemini, Azure, etc.) use `/v1/chat/completions` and must explicitly declare `:responses` in their instance capabilities to opt in. Previously `:vllm` was included, causing `BadRequestError: Invalid request` when Codex routed through a vLLM-backed proxy (call/lex_llm_adapter.rb)
7
+ - **`developer` role crash on Responses API input** — The OpenAI Responses API sends `developer` as a higher-trust system role. Both Responses API handlers now map `developer` → `system` before building the message array, preventing `InvalidRoleError` from `lex-llm::Message::ROLES` validation (api/namespaces/openai/responses.rb, api/openai/responses.rb)
8
+ - **Non-streaming Responses API path always used `call_responses`** — Sync path now calls `call_executor_sync` which routes through `call` for non-native providers and `call_responses` only when `provider_supports_responses?` returns true (api/namespaces/openai/responses.rb)
9
+
10
+ ### Added
11
+ - **`Executor#provider_supports_responses?`** — Public method that checks whether the resolved provider's adapter natively supports the Responses API. Used by the API layer to gate `call_responses` vs `call_stream`/`call` dispatch. Returns false safely when provider resolution hasn't run yet (inference/executor.rb)
12
+ - **`Responses.call_executor_sync`** — New method for non-streaming dispatch: routes through `call_responses` when native, otherwise `call` (api/namespaces/openai/responses.rb)
13
+ - **`Responses.native_responses_supported?`** — Predicate shared by streaming and sync dispatch paths (api/namespaces/openai/responses.rb)
14
+
3
15
  ## [0.11.2] - 2026-06-02
4
16
 
5
17
  ### Removed
@@ -63,7 +63,7 @@ module Legion
63
63
  out << "event: error\ndata: #{Legion::JSON.dump({ type: 'server_error', message: e.message })}\n\n"
64
64
  end
65
65
  else
66
- pipeline_response = executor.call_responses(body: body, stream: false)
66
+ pipeline_response = Responses.call_executor_sync(executor, upstream_body: body)
67
67
  response_body = Responses.format_response(pipeline_response, request_id: request_id, model: model)
68
68
  log.info("[llm][api][namespaces][openai][responses] action=complete request_id=#{request_id}")
69
69
  content_type :json
@@ -165,6 +165,10 @@ module Legion
165
165
  role = item[:role]&.to_s
166
166
  next unless role
167
167
 
168
+ # OpenAI Responses API uses "developer" as a higher-trust system role.
169
+ # All downstream providers only understand the standard four roles.
170
+ role = 'system' if role == 'developer'
171
+
168
172
  content = item[:content]
169
173
  content = content.to_s if content && !content.is_a?(Array)
170
174
  messages << { role: role, content: content }.compact
@@ -295,13 +299,28 @@ module Legion
295
299
  end
296
300
 
297
301
  def self.call_executor(executor, upstream_body: nil, &)
298
- if upstream_body && executor.respond_to?(:call_responses)
302
+ if native_responses_supported?(executor, upstream_body)
299
303
  executor.call_responses(body: upstream_body, stream: true, &)
300
304
  else
301
305
  executor.call_stream(&)
302
306
  end
303
307
  end
304
308
 
309
+ def self.call_executor_sync(executor, upstream_body: nil)
310
+ if native_responses_supported?(executor, upstream_body)
311
+ executor.call_responses(body: upstream_body, stream: false)
312
+ else
313
+ executor.call
314
+ end
315
+ end
316
+
317
+ def self.native_responses_supported?(executor, upstream_body)
318
+ upstream_body &&
319
+ executor.respond_to?(:call_responses) &&
320
+ executor.respond_to?(:provider_supports_responses?) &&
321
+ executor.provider_supports_responses?
322
+ end
323
+
305
324
  def self.build_output_tool_calls(pipeline_response)
306
325
  tools_data = pipeline_response.respond_to?(:tools) ? pipeline_response.tools : nil
307
326
  return [] unless tools_data.is_a?(Array) && !tools_data.empty?
@@ -13,6 +13,7 @@ module Legion
13
13
  AUTO_ROUTING_MODEL_DISPLAY = 'LegionIO'
14
14
  AUTO_ROUTING_OFFERING_ID = 'legionio:auto:inference:legionio'
15
15
  AUTO_ROUTING_CAPABILITIES = %w[auto_routing chat completion json_schema tools].freeze
16
+ AUTO_ROUTING_MODEL_ALIASES = %w[auto].freeze
16
17
 
17
18
  def self.registered(app)
18
19
  log.debug('[llm][api][models] registering model inventory routes')
@@ -108,9 +109,10 @@ module Legion
108
109
  enabled: offerings.any? { |offering| offering[:enabled] != false }
109
110
  }
110
111
  if auto_routing_model?(model)
111
- summary[:display_name] = AUTO_ROUTING_MODEL_DISPLAY
112
+ first_display = offerings.filter_map { |o| o[:display_name] }.first
113
+ summary[:display_name] = first_display || AUTO_ROUTING_MODEL_DISPLAY
112
114
  summary[:auto_route] = true
113
- summary[:default] = true
115
+ summary[:default] = model.to_s == AUTO_ROUTING_MODEL_ID
114
116
  end
115
117
  summary
116
118
  end
@@ -129,7 +131,18 @@ module Legion
129
131
  return offerings unless auto_routing_offering_matches?(filters)
130
132
  return offerings if offerings.any? { |offering| auto_routing_model?(offering[:model]) }
131
133
 
132
- [auto_routing_offering, *offerings]
134
+ [auto_routing_offering, auto_routing_alias_offering, *offerings]
135
+ end
136
+
137
+ def self.auto_routing_alias_offering
138
+ base = auto_routing_offering
139
+ base.merge(
140
+ id: 'legionio:auto:inference:auto',
141
+ offering_id: 'legionio:auto:inference:auto',
142
+ model: 'auto',
143
+ display_name: 'LegionIO (auto)',
144
+ canonical_model_alias: 'auto'
145
+ )
133
146
  end
134
147
 
135
148
  def self.auto_routing_offering
@@ -182,7 +195,8 @@ module Legion
182
195
  end
183
196
 
184
197
  def self.auto_routing_model?(model)
185
- model.to_s.strip.downcase == AUTO_ROUTING_MODEL_ID
198
+ m = model.to_s.strip.downcase
199
+ m == AUTO_ROUTING_MODEL_ID || AUTO_ROUTING_MODEL_ALIASES.include?(m)
186
200
  end
187
201
  end
188
202
  end
@@ -165,7 +165,7 @@ module Legion
165
165
  routing_config = Legion::Settings[:llm][:routing] || {}
166
166
  top_level = Legion::Settings[:llm][:tier_order] || nil
167
167
  Array(top_level || routing_config[:tier_order] || routing_config[:tier_priority] ||
168
- %w[local direct fleet openai_compat cloud frontier])
168
+ %w[local direct fleet cloud frontier])
169
169
  end
170
170
 
171
171
  def self.privacy_mode?
@@ -130,6 +130,8 @@ module Legion
130
130
  role = item[:role]&.to_s
131
131
  next unless role
132
132
 
133
+ role = 'system' if role == 'developer'
134
+
133
135
  content = item[:content]
134
136
  content = content.to_s if content && !content.is_a?(Array)
135
137
  messages << { role: role, content: content }.compact
@@ -159,7 +159,10 @@ module Legion
159
159
  owned_by: owned_by
160
160
  }
161
161
  if limits.is_a?(Hash)
162
- obj[:context_window] = limits[:context_window] if limits[:context_window]
162
+ if limits[:context_window]
163
+ obj[:context_window] = limits[:context_window]
164
+ obj[:context_size] = limits[:context_window]
165
+ end
163
166
  obj[:max_output_tokens] = limits[:max_output_tokens] if limits[:max_output_tokens]
164
167
  end
165
168
  obj
@@ -11,7 +11,10 @@ module Legion
11
11
  include Legion::Logging::Helper
12
12
 
13
13
  METADATA_KEYS = %i[tier capabilities enabled].freeze
14
- RESPONSES_PROVIDER_FAMILIES = %i[openai vllm].freeze
14
+ # Only providers that natively expose /v1/responses (OpenAI API proper).
15
+ # All other providers (vLLM, Ollama, MLX, Anthropic, Bedrock, Gemini, Vertex, Azure Foundry)
16
+ # use /v1/chat/completions and must declare :responses in their instance capabilities explicitly.
17
+ RESPONSES_PROVIDER_FAMILIES = %i[openai].freeze
15
18
 
16
19
  def initialize(provider_name, provider_class, instance_config: {})
17
20
  @provider_name = provider_name.to_sym
@@ -26,7 +26,7 @@ module Legion
26
26
  anthropic: :frontier
27
27
  }.freeze
28
28
 
29
- DEFAULT_TIER_PRIORITY = %i[local direct fleet openai_compat cloud frontier].freeze
29
+ DEFAULT_TIER_PRIORITY = %i[local direct fleet cloud frontier].freeze
30
30
  CAPABILITY_ALIASES = {
31
31
  function_calling: :tools,
32
32
  functions: :tools,
@@ -17,7 +17,7 @@ module Legion
17
17
  @discovered_models_cache = nil
18
18
  @discovered_models_at = nil
19
19
 
20
- EMBEDDING_TIER_ORDER = %w[local direct fleet openai_compat cloud frontier].freeze
20
+ EMBEDDING_TIER_ORDER = %w[local direct fleet cloud frontier].freeze
21
21
 
22
22
  class << self
23
23
  attr_reader :embedding_provider, :embedding_model, :embedding_instance, :embedding_fallback_chain
@@ -146,6 +146,22 @@ module Legion
146
146
  clear_log_context
147
147
  end
148
148
 
149
+ # Returns true when the resolved provider's adapter natively supports the Responses API.
150
+ # Called by the API layer before choosing call_responses vs call_stream.
151
+ # Pre-provider steps must have already run (provider is resolved) for this to be accurate;
152
+ # returns false safely if resolution hasn't happened yet.
153
+ def provider_supports_responses?
154
+ provider = @resolved_provider
155
+ return false unless provider && use_native_dispatch?(provider)
156
+
157
+ ext = Call::Registry.for(provider, instance: @resolved_instance || :default)
158
+ ext.respond_to?(:supports?) ? ext.supports?(:responses) : false
159
+ rescue StandardError => e
160
+ handle_exception(e, level: :warn, handled: true, operation: 'llm.executor.provider_supports_responses',
161
+ provider: @resolved_provider)
162
+ false
163
+ end
164
+
149
165
  private
150
166
 
151
167
  def set_log_context
@@ -368,7 +384,7 @@ module Legion
368
384
  def step_routing
369
385
  log.debug "[llm][executor] action=step_routing.enter requested_provider=#{@request.routing[:provider]} requested_model=#{@request.routing[:model]}"
370
386
  @timestamps[:routing_start] = Time.now
371
- state = resolve_routing_state(apply_proactive_tier_assignment(routing_request_state))
387
+ state = resolve_routing_state(apply_proactive_tier_assignment(resolve_model_to_local_provider(routing_request_state)))
372
388
  auto_route = state[:auto_route] == true
373
389
 
374
390
  @resolved_provider = state[:provider] ||
@@ -537,6 +553,43 @@ module Legion
537
553
  state
538
554
  end
539
555
 
556
+ # If the caller named a model but gave no explicit provider/tier/instance,
557
+ # search discovered providers for that model with a healthy circuit.
558
+ # On a hit: pin provider + instance so normal routing runs against the local copy.
559
+ # On a miss: clear the model name and set auto_route so the pipeline picks the best
560
+ # available provider rather than blindly forwarding a frontier model name.
561
+ def resolve_model_to_local_provider(state)
562
+ return state if state[:provider_explicit] || state[:tier_explicit] || state[:instance_explicit]
563
+ return state if state[:provider] || state[:tier] || state[:instance]
564
+ return state unless state[:model] && defined?(Discovery) && defined?(Router)
565
+
566
+ model = state[:model].to_s
567
+ all_discovered = Array(Discovery.cached_discovered_models)
568
+ return state if all_discovered.empty?
569
+
570
+ candidates = all_discovered.select do |m|
571
+ dn = m[:model].to_s
572
+ dn == model || dn.start_with?("#{model}:")
573
+ end
574
+ return state if candidates.empty?
575
+
576
+ healthy = candidates.find do |m|
577
+ Router.health_tracker.circuit_state(m[:provider], instance: m[:instance]) != :open
578
+ end
579
+
580
+ if healthy
581
+ log.info "[llm][executor] action=model_discovery_pin model=#{model} provider=#{healthy[:provider]} instance=#{healthy[:instance]}"
582
+ state[:provider] = healthy[:provider]
583
+ state[:instance] = healthy[:instance]
584
+ else
585
+ log.info "[llm][executor] action=model_discovery_miss model=#{model} falling_back=auto_route"
586
+ state[:model] = nil
587
+ state[:auto_route] = true
588
+ end
589
+
590
+ state
591
+ end
592
+
540
593
  def resolve_routing_state(state)
541
594
  return state unless defined?(Router)
542
595
 
@@ -864,7 +864,7 @@ module Legion
864
864
  alias effective_tier_is_cloud? effective_tier_is_external?
865
865
 
866
866
  def external_tier?(tier)
867
- %i[cloud frontier openai_compat].include?(tier)
867
+ %i[cloud frontier].include?(tier)
868
868
  end
869
869
  end
870
870
  end
@@ -40,10 +40,6 @@ module Legion
40
40
  @tier == :frontier
41
41
  end
42
42
 
43
- def openai_compat?
44
- @tier == :openai_compat
45
- end
46
-
47
43
  def external?
48
44
  !%i[local direct fleet].include?(@tier)
49
45
  end
@@ -17,8 +17,8 @@ module Legion
17
17
  PROVIDER_TIER = { bedrock: :cloud, anthropic: :frontier, openai: :frontier,
18
18
  gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
19
19
  PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
20
- TIER_EXTERNAL = Set[:cloud, :frontier, :openai_compat].freeze
21
- TIER_RANK = { local: 0, direct: 1, fleet: 2, openai_compat: 3, cloud: 4, frontier: 5 }.freeze
20
+ TIER_EXTERNAL = Set[:cloud, :frontier].freeze
21
+ TIER_RANK = { local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
22
22
  CAPABILITY_ALIASES = {
23
23
  function_calling: :tools,
24
24
  functions: :tools,
@@ -142,12 +142,11 @@ module Legion
142
142
  end
143
143
 
144
144
  # Check whether a tier can be used right now.
145
- # :local — always available
146
- # :direct — always available (remote self-hosted instances)
147
- # :fleet — available when Legion::Transport is loaded
148
- # :openai_compat — available when OpenAI-compatible provider instances are registered
149
- # :cloud — available unless privacy mode
150
- # :frontier — available unless privacy mode
145
+ # :local — always available
146
+ # :direct — always available (remote self-hosted instances)
147
+ # :fleet — available when Legion::Transport is loaded
148
+ # :cloud — available unless privacy mode
149
+ # :frontier — available unless privacy mode
151
150
  def tier_available?(tier)
152
151
  sym = tier.to_sym
153
152
  if external_tier?(sym) && privacy_mode?
@@ -159,11 +158,6 @@ module Legion
159
158
  log.debug "[llm][router] action=tier_available tier=fleet available=#{available}"
160
159
  return available
161
160
  end
162
- if sym == :openai_compat
163
- available = openai_compat_available?
164
- log.debug "[llm][router] action=tier_available tier=openai_compat available=#{available}"
165
- return available
166
- end
167
161
 
168
162
  true
169
163
  end
@@ -403,10 +397,6 @@ module Legion
403
397
  TIER_EXTERNAL.include?(tier)
404
398
  end
405
399
 
406
- def openai_compat_available?
407
- !registry_entry_for_tier(:openai_compat).nil?
408
- end
409
-
410
400
  def pick_best(candidates)
411
401
  return nil if candidates.empty?
412
402
 
@@ -454,8 +444,6 @@ module Legion
454
444
  case sym
455
445
  when :local, :direct, :fleet
456
446
  :ollama
457
- when :openai_compat
458
- :openai
459
447
  when :cloud
460
448
  default = Legion::Settings[:llm][:default_provider]
461
449
  default ? default.to_sym : :bedrock
@@ -477,8 +465,6 @@ module Legion
477
465
  case sym
478
466
  when :local, :direct, :fleet
479
467
  default_settings_model_for_tier(sym) || 'llama3'
480
- when :openai_compat
481
- 'gpt-4o'
482
468
  when :cloud
483
469
  default_settings_model_for_tier(sym) || 'us.anthropic.claude-sonnet-4-6'
484
470
  when :frontier
@@ -76,17 +76,9 @@ module Legion
76
76
  end
77
77
 
78
78
  routing = settings.is_a?(Hash) ? (settings[:routing] || settings['routing'] || {}) : {}
79
- if routing.is_a?(Hash)
80
- if routing.key?(:use_fleet) || routing.key?('use_fleet')
81
- raise ArgumentError,
82
- 'routing.use_fleet has been removed; configure fleet.dispatch.enabled instead'
83
- end
84
-
85
- tiers = routing[:tiers] || routing['tiers'] || {}
86
- openai_compat = tiers.is_a?(Hash) ? (tiers[:openai_compat] || tiers['openai_compat'] || {}) : {}
87
- if openai_compat.is_a?(Hash) && (openai_compat.key?(:gateways) || openai_compat.key?('gateways'))
88
- raise ArgumentError, 'routing.tiers.openai_compat.gateways has been removed; configure lex-llm-openai provider instances instead'
89
- end
79
+ if routing.is_a?(Hash) && (routing.key?(:use_fleet) || routing.key?('use_fleet'))
80
+ raise ArgumentError,
81
+ 'routing.use_fleet has been removed; configure fleet.dispatch.enabled instead'
90
82
  end
91
83
 
92
84
  settings
@@ -215,19 +207,18 @@ module Legion
215
207
  def self.routing_defaults
216
208
  {
217
209
  enabled: true,
218
- tier_priority: %w[local direct fleet openai_compat cloud frontier],
210
+ tier_priority: %w[local direct fleet cloud frontier],
219
211
  default_intent: { privacy: 'normal', capability: 'moderate', cost: 'normal' },
220
212
  tiers: {
221
- local: { provider: 'ollama' },
222
- fleet: {
213
+ local: { provider: 'ollama' },
214
+ fleet: {
223
215
  queue: 'llm.fleet',
224
216
  routing_style: :shared_lane,
225
217
  timeout_seconds: 30,
226
218
  timeouts: { embed: 10, chat: 30, generate: 30, default: 30 }
227
219
  },
228
- openai_compat: {},
229
- cloud: { providers: %w[bedrock azure gemini] },
230
- frontier: { providers: %w[anthropic openai] }
220
+ cloud: { providers: %w[bedrock azure gemini] },
221
+ frontier: { providers: %w[anthropic openai] }
231
222
  },
232
223
  health: {
233
224
  window_seconds: 300,
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.11.2'
5
+ VERSION = '0.12.2'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.2
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity