legion-llm 0.5.20 → 0.5.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f51b096c53558665dbfc074fa203c8624aa79444324cb686759aee35cd568ca6
4
- data.tar.gz: 95c6a41fa6839d476cff0d60a6b56a1fbc7ad5be4f914f290fc877e0198e50bc
3
+ metadata.gz: '08ccdbe9c1f4187acdcae49df633370d7fecb9866fa441aa1e95a3f932d5c9e6'
4
+ data.tar.gz: df48f1ba0ef83a0fb26ba67865bf0f69ee415a349f2fc0e4ec40dd70f7f70815
5
5
  SHA512:
6
- metadata.gz: 33a939afde771b78203d7c84f3fdb9ccbe226907037bc97f86a5650324c089e1034e9de3251ea1deb72d634680b3de6e263a48bbb933648621586598d1244f16
7
- data.tar.gz: d0f98e6c6dfbee9040953991e8ab3a0de88cd37ed32a352b2d6d0ef52163e120756238d7d5f96c6f00d028b25e778a2076c9012550879fba1b98b06339d5a63e
6
+ metadata.gz: 9237a7a67d3b843bef628817cbd679fdce2f690e35b35f817e38b39c2e46918cb87d1c81b0b3c5a48589f54a163cbd7f3ef5956e73ba3c96f4f0ee3c1f803c9d
7
+ data.tar.gz: 996f35c9bb47ff5046bfd10fe48440a783480b81956b6e0b7f1f10bd860fd4aaab9dd9fcda3378761ac2942ea7f9c886d0bd19722cee9b46936f7ae8c662aa9d
data/CHANGELOG.md CHANGED
@@ -2,6 +2,28 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.5.21] - 2026-03-31
6
+
7
+ ### Added
8
+ - Provider health checks at boot: each SaaS provider is pinged with a test request; failures disable the provider with a log warning
9
+ - `resolve_llm_secrets` — resolves `env://` and `vault://` URIs in LLM settings before provider configuration (fixes late-loaded settings not being resolved)
10
+ - `CodexConfigLoader.read_token` — extracts valid Codex auth token for fallback credential recovery
11
+ - Credential recovery: when OpenAI fails health check, automatically tries `~/.codex/auth.json` token as fallback
12
+ - Provider summary log after health checks listing all available providers
13
+ - All-providers-down error log when no providers survive health checks
14
+ - Embedding health check for SaaS providers during boot (Ollama skipped — model-pulled check is sufficient)
15
+ - Direct Ollama embedding via `POST /api/embed` — bypasses RubyLLM which doesn't support Ollama embeddings
16
+ - Pipeline executor provider fallback: on auth/forbidden errors, automatically retries with next enabled provider
17
+ - `RubyLLM::Error` subclasses now caught in pipeline executor (previously only Faraday errors were rescued)
18
+
19
+ ### Changed
20
+ - Bedrock default model corrected from `us.anthropic.claude-sonnet-4-6-v1` to `us.anthropic.claude-sonnet-4-6`
21
+ - Ollama default model changed from `llama3` to `qwen3.5:latest`
22
+ - `nomic-embed-text` added as first preference in `ollama_preferred` embedding models
23
+ - `Discovery::Ollama.model_available?` now uses prefix matching (`mxbai-embed-large` matches `mxbai-embed-large:latest`)
24
+ - Removed redundant `ping_provider` — replaced by `verify_providers` which checks all enabled SaaS providers
25
+ - `ModelNotFoundError` during health check no longer disables the provider (RubyLLM registry gap, not auth failure)
26
+
5
27
  ## [0.5.20] - 2026-03-30
6
28
 
7
29
  ### Added
@@ -19,6 +19,20 @@ module Legion
19
19
  apply_codex_config(config)
20
20
  end
21
21
 
22
+ def read_token
23
+ return nil unless File.exist?(CODEX_AUTH)
24
+
25
+ config = read_json(CODEX_AUTH)
26
+ return nil if config.empty?
27
+ return nil unless config[:auth_mode] == 'chatgpt'
28
+
29
+ token = config.dig(:tokens, :access_token)
30
+ return nil unless token.is_a?(String) && !token.empty?
31
+ return nil unless token_valid?(token)
32
+
33
+ token
34
+ end
35
+
22
36
  def read_json(path)
23
37
  ::JSON.parse(File.read(path), symbolize_names: true)
24
38
  rescue StandardError => e
@@ -18,11 +18,11 @@ module Legion
18
18
  end
19
19
 
20
20
  def model_available?(name)
21
- model_names.include?(name)
21
+ model_names.any? { |n| n == name || n.start_with?("#{name}:") }
22
22
  end
23
23
 
24
24
  def model_size(name)
25
- models.find { |m| m['name'] == name }&.dig('size')
25
+ models.find { |m| m['name'] == name || m['name'].start_with?("#{name}:") }&.dig('size')
26
26
  end
27
27
 
28
28
  def refresh!
@@ -22,6 +22,9 @@ module Legion
22
22
 
23
23
  provider ||= resolve_provider
24
24
  model ||= resolve_model(provider)
25
+
26
+ return generate_ollama(text: text, model: model) if provider&.to_sym == :ollama
27
+
25
28
  response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
26
29
  vector = apply_dimension_enforcement(response.vectors.first, provider)
27
30
  return dimension_error(model, provider, vector) if vector.is_a?(String)
@@ -37,7 +40,10 @@ module Legion
37
40
 
38
41
  provider ||= resolve_provider
39
42
  model ||= resolve_model(provider)
40
- response = RubyLLM.embed(texts, **build_opts(model, provider, dimensions))
43
+
44
+ return generate_ollama_batch(texts: texts, model: model) if provider&.to_sym == :ollama
45
+
46
+ response = RubyLLM.embed(texts, **build_opts(model, provider, dimensions))
41
47
  response.vectors.each_with_index.map do |vec, i|
42
48
  build_batch_entry(vec, model, provider, i)
43
49
  end
@@ -169,6 +175,37 @@ module Legion
169
175
  rescue StandardError
170
176
  {}
171
177
  end
178
+
179
+ def generate_ollama(text:, model:)
180
+ result = ollama_embed_request(model: model, input: text)
181
+ vector = result['embeddings']&.first
182
+ vector = apply_dimension_enforcement(vector, :ollama) if vector
183
+ return dimension_error(model, :ollama, vector) if vector.is_a?(String)
184
+
185
+ { vector: vector, model: model, provider: :ollama, dimensions: vector&.size || 0, tokens: 0 }
186
+ end
187
+
188
+ def generate_ollama_batch(texts:, model:)
189
+ result = ollama_embed_request(model: model, input: texts)
190
+ vectors = result['embeddings'] || []
191
+ vectors.each_with_index.map do |vec, i|
192
+ build_batch_entry(vec, model, :ollama, i)
193
+ end
194
+ end
195
+
196
+ def ollama_embed_request(model:, input:)
197
+ base_url = Legion::Settings.dig(:llm, :providers, :ollama, :base_url) || 'http://localhost:11434'
198
+ conn = Faraday.new(url: base_url) do |f|
199
+ f.options.timeout = 30
200
+ f.options.open_timeout = 5
201
+ f.adapter Faraday.default_adapter
202
+ end
203
+ body = { model: model, input: input }
204
+ response = conn.post('/api/embed', body.to_json, 'Content-Type' => 'application/json')
205
+ raise "Ollama embed failed: #{response.status} #{response.body}" unless response.success?
206
+
207
+ ::JSON.parse(response.body)
208
+ end
172
209
  end
173
210
  end
174
211
  end
@@ -141,6 +141,42 @@ module Legion
141
141
  end
142
142
 
143
143
  def step_provider_call
144
+ providers_tried = []
145
+ begin
146
+ execute_provider_request
147
+ rescue RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError,
148
+ Faraday::UnauthorizedError, Faraday::ForbiddenError => e
149
+ providers_tried << @resolved_provider
150
+ fallback = find_fallback_provider(exclude: providers_tried)
151
+ if fallback
152
+ if defined?(Legion::Logging)
153
+ Legion::Logging.warn "[pipeline] #{@resolved_provider} auth failed (#{e.class}), falling back to #{fallback[:provider]}:#{fallback[:model]}"
154
+ end
155
+ @resolved_provider = fallback[:provider]
156
+ @resolved_model = fallback[:model]
157
+ @warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
158
+ @timeline.record(
159
+ category: :provider, key: 'provider:fallback',
160
+ direction: :internal,
161
+ detail: "auth failed on #{providers_tried.last}, trying #{@resolved_provider}",
162
+ from: 'pipeline', to: "provider:#{@resolved_provider}"
163
+ )
164
+ retry
165
+ end
166
+ raise Legion::LLM::AuthError, e.message
167
+ rescue RubyLLM::RateLimitError => e
168
+ raise Legion::LLM::RateLimitError, e.message
169
+ rescue RubyLLM::ServerError, RubyLLM::ServiceUnavailableError, RubyLLM::OverloadedError,
170
+ Faraday::ServerError => e
171
+ raise Legion::LLM::ProviderError, e.message
172
+ rescue Faraday::TooManyRequestsError => e
173
+ raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
174
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
175
+ raise Legion::LLM::ProviderDown, e.message
176
+ end
177
+ end
178
+
179
+ def execute_provider_request
144
180
  @timestamps[:provider_start] = Time.now
145
181
  @timeline.record(
146
182
  category: :provider, key: 'provider:request_sent',
@@ -177,14 +213,6 @@ module Legion
177
213
 
178
214
  @timestamps[:provider_end] = Time.now
179
215
  record_provider_response
180
- rescue Faraday::UnauthorizedError, Faraday::ForbiddenError => e
181
- raise Legion::LLM::AuthError, e.message
182
- rescue Faraday::TooManyRequestsError => e
183
- raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
184
- rescue Faraday::ServerError => e
185
- raise Legion::LLM::ProviderError, e.message
186
- rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
187
- raise Legion::LLM::ProviderDown, e.message
188
216
  end
189
217
 
190
218
  def record_provider_response
@@ -220,6 +248,37 @@ module Legion
220
248
  end
221
249
 
222
250
  def step_provider_call_stream(&)
251
+ providers_tried = []
252
+ begin
253
+ execute_provider_request_stream(&)
254
+ rescue RubyLLM::UnauthorizedError, RubyLLM::ForbiddenError,
255
+ Faraday::UnauthorizedError, Faraday::ForbiddenError => e
256
+ providers_tried << @resolved_provider
257
+ fallback = find_fallback_provider(exclude: providers_tried)
258
+ if fallback
259
+ if defined?(Legion::Logging)
260
+ Legion::Logging.warn "[pipeline] #{@resolved_provider} stream auth failed (#{e.class}), " \
261
+ "falling back to #{fallback[:provider]}:#{fallback[:model]}"
262
+ end
263
+ @resolved_provider = fallback[:provider]
264
+ @resolved_model = fallback[:model]
265
+ @warnings << { type: :provider_fallback, original_error: e.message, fallback: "#{@resolved_provider}:#{@resolved_model}" }
266
+ retry
267
+ end
268
+ raise Legion::LLM::AuthError, e.message
269
+ rescue RubyLLM::RateLimitError => e
270
+ raise Legion::LLM::RateLimitError, e.message
271
+ rescue RubyLLM::ServerError, RubyLLM::ServiceUnavailableError, RubyLLM::OverloadedError,
272
+ Faraday::ServerError => e
273
+ raise Legion::LLM::ProviderError, e.message
274
+ rescue Faraday::TooManyRequestsError => e
275
+ raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
276
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
277
+ raise Legion::LLM::ProviderDown, e.message
278
+ end
279
+ end
280
+
281
+ def execute_provider_request_stream(&)
223
282
  @timestamps[:provider_start] = Time.now
224
283
  @timeline.record(
225
284
  category: :provider, key: 'provider:request_sent',
@@ -243,14 +302,19 @@ module Legion
243
302
 
244
303
  @timestamps[:provider_end] = Time.now
245
304
  record_provider_response
246
- rescue Faraday::UnauthorizedError, Faraday::ForbiddenError => e
247
- raise Legion::LLM::AuthError, e.message
248
- rescue Faraday::TooManyRequestsError => e
249
- raise Legion::LLM::RateLimitError.new(e.message, retry_after: extract_retry_after(e))
250
- rescue Faraday::ServerError => e
251
- raise Legion::LLM::ProviderError, e.message
252
- rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
253
- raise Legion::LLM::ProviderDown, e.message
305
+ end
306
+
307
+ def find_fallback_provider(exclude: [])
308
+ providers = Legion::LLM.settings[:providers] || {}
309
+ providers.each do |name, config|
310
+ next unless config.is_a?(Hash) && config[:enabled]
311
+ next if exclude.include?(name) || exclude.include?(name.to_s)
312
+ next if name == :ollama
313
+ next unless config[:default_model]
314
+
315
+ return { provider: name, model: config[:default_model] }
316
+ end
317
+ nil
254
318
  end
255
319
 
256
320
  def step_response_normalization; end
@@ -134,6 +134,64 @@ module Legion
134
134
  end
135
135
  Legion::Logging.info "Configured Ollama provider (#{config[:base_url]})"
136
136
  end
137
+
138
+ SAAS_PROVIDERS = %i[bedrock anthropic openai gemini azure].freeze
139
+
140
+ def verify_providers
141
+ settings[:providers].each do |provider, config|
142
+ next unless config[:enabled]
143
+ next unless SAAS_PROVIDERS.include?(provider)
144
+
145
+ model = config[:default_model]
146
+ next unless model
147
+
148
+ verify_single_provider(provider, model, config)
149
+ end
150
+
151
+ recover_with_alternative_credentials
152
+
153
+ enabled = settings[:providers].select { |_, c| c.is_a?(Hash) && c[:enabled] }
154
+ if enabled.empty?
155
+ Legion::Logging.error 'No LLM providers available — all providers failed health checks or are disabled. ' \
156
+ 'LLM features (chat, inference, embeddings) will not work. ' \
157
+ 'Check API keys, network connectivity, and provider configuration.'
158
+ else
159
+ names = enabled.map { |name, c| "#{name}/#{c[:default_model] || 'auto'}" }
160
+ Legion::Logging.info "LLM providers available: #{names.join(', ')}"
161
+ end
162
+ end
163
+
164
+ def recover_with_alternative_credentials
165
+ recover_openai_with_codex
166
+ end
167
+
168
+ def recover_openai_with_codex
169
+ openai_config = settings.dig(:providers, :openai)
170
+ return unless openai_config.is_a?(Hash) && !openai_config[:enabled]
171
+
172
+ token = CodexConfigLoader.read_token
173
+ return unless token
174
+
175
+ Legion::Logging.info 'OpenAI disabled — trying Codex auth token as fallback'
176
+ openai_config[:api_key] = token
177
+ configure_openai(openai_config)
178
+ openai_config[:enabled] = true
179
+ verify_single_provider(:openai, openai_config[:default_model], openai_config)
180
+ rescue StandardError => e
181
+ Legion::Logging.debug "Codex credential recovery failed: #{e.message}" if defined?(Legion::Logging)
182
+ end
183
+
184
+ def verify_single_provider(provider, model, config)
185
+ start_time = Time.now
186
+ RubyLLM.chat(model: model, provider: provider).ask('Respond with only the word: pong')
187
+ elapsed = ((Time.now - start_time) * 1000).round
188
+ Legion::Logging.info "Health check #{provider}/#{model}: OK (#{elapsed}ms)"
189
+ rescue RubyLLM::ModelNotFoundError => e
190
+ Legion::Logging.warn "Health check #{provider}/#{model}: model not in RubyLLM registry (#{e.message}) — provider stays enabled"
191
+ rescue StandardError => e
192
+ Legion::Logging.warn "Health check failed for #{provider}/#{model}: #{e.class}: #{e.message} — disabling provider"
193
+ config[:enabled] = false
194
+ end
137
195
  end
138
196
  end
139
197
  end
@@ -150,7 +150,7 @@ module Legion
150
150
  bedrock: 'amazon.titan-embed-text-v2:0',
151
151
  openai: 'text-embedding-3-small'
152
152
  },
153
- ollama_preferred: %w[mxbai-embed-large bge-large snowflake-arctic-embed]
153
+ ollama_preferred: %w[nomic-embed-text mxbai-embed-large bge-large snowflake-arctic-embed]
154
154
  }
155
155
  end
156
156
 
@@ -158,7 +158,7 @@ module Legion
158
158
  {
159
159
  bedrock: {
160
160
  enabled: false,
161
- default_model: 'us.anthropic.claude-sonnet-4-6-v1',
161
+ default_model: 'us.anthropic.claude-sonnet-4-6',
162
162
  api_key: nil,
163
163
  secret_key: nil,
164
164
  session_token: nil,
@@ -189,7 +189,7 @@ module Legion
189
189
  },
190
190
  ollama: {
191
191
  enabled: false,
192
- default_model: 'llama3',
192
+ default_model: 'qwen3.5:latest',
193
193
  base_url: 'http://localhost:11434'
194
194
  }
195
195
  }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.5.20'
5
+ VERSION = '0.5.21'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -48,7 +48,9 @@ module Legion
48
48
  require 'legion/llm/codex_config_loader'
49
49
  CodexConfigLoader.load
50
50
 
51
+ resolve_llm_secrets
51
52
  configure_providers
53
+ verify_providers
52
54
  run_discovery
53
55
  detect_embedding_capability
54
56
  set_defaults
@@ -59,7 +61,6 @@ module Legion
59
61
  Legion::Settings[:llm][:connected] = true
60
62
  Legion::Logging.info 'Legion::LLM started'
61
63
  register_routes
62
- ping_provider
63
64
  end
64
65
 
65
66
  def shutdown
@@ -220,6 +221,14 @@ module Legion
220
221
 
221
222
  private
222
223
 
224
+ def resolve_llm_secrets
225
+ return unless defined?(Legion::Settings::Resolver)
226
+
227
+ Legion::Settings::Resolver.resolve_secrets!(settings)
228
+ rescue StandardError => e
229
+ Legion::Logging.warn "LLM settings resolution failed: #{e.message}" if defined?(Legion::Logging)
230
+ end
231
+
223
232
  def pipeline_enabled?
224
233
  settings[:pipeline_enabled] == true
225
234
  rescue StandardError => e
@@ -603,11 +612,30 @@ module Legion
603
612
  next unless available
604
613
 
605
614
  resolved_model = available.is_a?(String) ? available : model&.to_s
615
+ next unless verify_embedding(provider, resolved_model)
616
+
606
617
  return { provider: provider, model: resolved_model }
607
618
  end
608
619
  nil
609
620
  end
610
621
 
622
+ def verify_embedding(provider, model)
623
+ return true if provider == :ollama
624
+ return true unless model
625
+
626
+ start_time = Time.now
627
+ RubyLLM.embed('health check', model: model, provider: provider)
628
+ elapsed = ((Time.now - start_time) * 1000).round
629
+ Legion::Logging.info "Embedding health check #{provider}/#{model}: OK (#{elapsed}ms)"
630
+ true
631
+ rescue RubyLLM::ModelNotFoundError => e
632
+ Legion::Logging.warn "Embedding health check #{provider}/#{model}: model not in RubyLLM registry (#{e.message}) — skipping"
633
+ false
634
+ rescue StandardError => e
635
+ Legion::Logging.warn "Embedding health check failed for #{provider}/#{model}: #{e.class}: #{e.message} — skipping"
636
+ false
637
+ end
638
+
611
639
  def probe_embedding_provider(provider, ollama_preferred)
612
640
  case provider
613
641
  when :ollama then detect_ollama_embedding(ollama_preferred)
@@ -651,19 +679,6 @@ module Legion
651
679
  Legion::Logging.warn "Discovery failed: #{e.message}"
652
680
  end
653
681
 
654
- def ping_provider
655
- model = settings[:default_model]
656
- provider = settings[:default_provider]
657
- return unless model && provider
658
-
659
- start_time = Time.now
660
- RubyLLM.chat(model: model, provider: provider).ask('Respond with only the word: pong')
661
- elapsed = ((Time.now - start_time) * 1000).round
662
- Legion::Logging.info "LLM ping #{provider}/#{model}: pong (#{elapsed}ms)"
663
- rescue StandardError => e
664
- Legion::Logging.warn "LLM ping failed for #{provider}/#{model}: #{e.message}"
665
- end
666
-
667
682
  def register_routes
668
683
  return unless defined?(Legion::API) && Legion::API.respond_to?(:register_library_routes)
669
684
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.20
4
+ version: 0.5.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity