legion-llm 0.6.7 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c7af59b9f91a1e3c4cb4d6d03758dd927695aba9cbe4f9ac52f221b100a40294
4
- data.tar.gz: 53ad50449a3f984a1be1faeecb68ec35a72da3b4f691811395787c7ba4343a80
3
+ metadata.gz: 426b32ea868afbd22ddbe70fb975c8b684b5afc306bd8e2efa820c5f01819fff
4
+ data.tar.gz: 31dec2079d806c124824eac38098bd5642391547dfa53b88b1b8e4c0f657a693
5
5
  SHA512:
6
- metadata.gz: 9f4b5a94917ef6e131769128212860363a51ae893feb9dbe7c5b53f3697a15a78680436d152e294f5b7498cf59a167e973c98307df896186dedc83feb9614b5a
7
- data.tar.gz: 4621025b19339b1e627da7b5338b034c8c459090a29c539a409aeea80549bac8a5797d314ff6e4b9715eebd9d35941cbce2f0f4d25e6f1a6b5a40bc815a7e0bb
6
+ metadata.gz: 36d79c94a91d353e6cff51c4a23a082f8e21b7d886edca017f2cfa13d7d292495aa9375007d781a056e98dcffc1b1fda0290affbb88387dc028dacbb35ee1fb1
7
+ data.tar.gz: 4a0c36734d64cd7a86efa760d0fd932a542f52823cdb3096ffd099808274d77a0c8f7cc569abe1edc2ee0f3cf3f084a90ab040c88991bd9c4b842c612830ba4d
data/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.6.8] - 2026-04-01
6
+
7
+ ### Added
8
+ - `ASYNC_SAFE_STEPS` constant and async dispatch for fire-and-forget post-provider pipeline steps (`post_response`, `knowledge_capture`, `response_return`) via a shared `Concurrent::FixedThreadPool` — frees Puma threads immediately after response normalization; `context_store` continues to run synchronously
9
+ - `pipeline_async_post_steps` setting (default `true`) controls sync vs async post-step dispatch
10
+ - Cached embed provider fallback chain at startup (`build_embedding_fallback_chain`, `provider_enabled?`) — stops re-probing on every embed failure
11
+ - Hard gate on disabled providers in `Embeddings.generate` and `generate_batch` — providers with `enabled: false` are never tried
12
+
13
+ ### Changed
14
+ - `embedding_defaults[:provider_fallback]` no longer includes `azure` (leaked via `deep_merge` array concat)
15
+ - `find_fallback_provider` walks the cached startup chain instead of re-probing live
16
+
5
17
  ## [0.6.7] - 2026-04-01
6
18
 
7
19
  ### Added
@@ -34,8 +34,10 @@ module Legion
34
34
  return { vector: nil, model: model, provider: provider, error: 'LLM not started' } unless LLM.started?
35
35
 
36
36
  provider ||= resolve_provider
37
- model ||= resolve_model(provider)
38
- text = apply_prefix(text, model: model, task: task)
37
+ return { vector: nil, model: model, provider: provider, error: "provider #{provider} is disabled" } if provider_disabled?(provider)
38
+
39
+ model ||= resolve_model(provider)
40
+ text = apply_prefix(text, model: model, task: task)
39
41
 
40
42
  return generate_ollama(text: text, model: model) if provider&.to_sym == :ollama
41
43
  return generate_azure(text: text, model: model, dimensions: dimensions) if provider&.to_sym == :azure
@@ -54,8 +56,11 @@ module Legion
54
56
  return texts.map { |_| { vector: nil, error: 'LLM not started' } } unless LLM.started?
55
57
 
56
58
  provider ||= resolve_provider
57
- model ||= resolve_model(provider)
58
- texts = texts.map { |t| apply_prefix(t, model: model, task: task) }
59
+ disabled_result = disabled_batch_result(texts, provider, model)
60
+ return disabled_result if disabled_result
61
+
62
+ model ||= resolve_model(provider)
63
+ texts = texts.map { |t| apply_prefix(t, model: model, task: task) }
59
64
 
60
65
  return generate_ollama_batch(texts: texts, model: model) if provider&.to_sym == :ollama
61
66
  return generate_azure_batch(texts: texts, model: model, dimensions: dimensions) if provider&.to_sym == :azure
@@ -75,6 +80,24 @@ module Legion
75
80
 
76
81
  private
77
82
 
83
+ def disabled_batch_result(texts, provider, model)
84
+ return nil unless provider_disabled?(provider)
85
+
86
+ model ||= resolve_model(provider)
87
+ texts.each_with_index.map do |_, i|
88
+ { vector: nil, model: model, provider: provider, dimensions: 0, index: i, error: "provider #{provider} is disabled" }
89
+ end
90
+ end
91
+
92
+ def provider_disabled?(provider)
93
+ return false unless provider
94
+
95
+ config = Legion::Settings.dig(:llm, :providers, provider.to_sym)
96
+ config.is_a?(Hash) && config[:enabled] == false
97
+ rescue StandardError
98
+ false
99
+ end
100
+
78
101
  def build_opts(model, provider, dimensions)
79
102
  target_dim = enforce_dimension? ? TARGET_DIMENSION : dimensions
80
103
  opts = { model: model }
@@ -113,7 +136,6 @@ module Legion
113
136
  def handle_embed_failure(error, text:, failed_provider:, failed_model:)
114
137
  fallback = find_fallback_provider(failed_provider)
115
138
  if fallback
116
- Legion::Logging.info "Embedding failover: #{failed_provider} -> #{fallback[:provider]}" if defined?(Legion::Logging)
117
139
  generate(text: text, model: fallback[:model], provider: fallback[:provider])
118
140
  else
119
141
  { vector: nil, model: failed_model, provider: failed_provider, error: error.message }
@@ -121,37 +143,25 @@ module Legion
121
143
  end
122
144
 
123
145
  def find_fallback_provider(failed_provider)
124
- chain = embedding_settings[:provider_fallback] || %w[ollama bedrock openai]
125
- models = embedding_settings[:provider_models] || {}
126
- started = false
146
+ chain = LLM.embedding_fallback_chain
147
+ return nil unless chain.is_a?(Array) && chain.any?
127
148
 
128
- chain.each do |name|
129
- sym = name.to_sym
130
- if sym == failed_provider
149
+ started = false
150
+ chain.each do |entry|
151
+ if entry[:provider] == failed_provider&.to_sym
131
152
  started = true
132
153
  next
133
154
  end
134
155
  next unless started
156
+ # Skip providers that are explicitly disabled in the fallback chain
157
+ next if provider_disabled?(entry[:provider])
135
158
 
136
- available = probe_fallback_provider(sym)
137
- next unless available
138
-
139
- model = available.is_a?(String) ? available : (models[name] || models[sym])&.to_s
140
- return { provider: sym, model: model }
159
+ Legion::Logging.info "Embedding failover: #{failed_provider} -> #{entry[:provider]}" if defined?(Legion::Logging)
160
+ return entry
141
161
  end
142
162
  nil
143
163
  end
144
164
 
145
- def probe_fallback_provider(sym)
146
- case sym
147
- when :ollama
148
- LLM.send(:detect_ollama_embedding,
149
- embedding_settings[:ollama_preferred] || %w[mxbai-embed-large])
150
- else
151
- LLM.send(:detect_cloud_embedding, sym)
152
- end
153
- end
154
-
155
165
  def resolve_provider
156
166
  return LLM.embedding_provider if LLM.embedding_provider
157
167
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'concurrent'
4
+
3
5
  module Legion
4
6
  module LLM
5
7
  module Pipeline
@@ -40,6 +42,10 @@ module Legion
40
42
  response_normalization debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
41
43
  ].freeze
42
44
 
45
+ ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
46
+
47
+ ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
48
+
43
49
  def initialize(request)
44
50
  @request = request
45
51
  @profile = Profile.derive(request.caller)
@@ -515,12 +521,52 @@ module Legion
515
521
  end
516
522
 
517
523
  def execute_post_provider_steps
524
+ if async_post_enabled?
525
+ execute_post_provider_steps_mixed
526
+ else
527
+ POST_PROVIDER_STEPS.each do |step|
528
+ next if Profile.skip?(@profile, step)
529
+
530
+ execute_step(step) { send(:"step_#{step}") }
531
+ end
532
+ end
533
+ end
534
+
535
+ def execute_post_provider_steps_mixed
518
536
  POST_PROVIDER_STEPS.each do |step|
519
537
  next if Profile.skip?(@profile, step)
538
+ next if ASYNC_SAFE_STEPS.include?(step)
520
539
 
521
540
  execute_step(step) { send(:"step_#{step}") }
522
541
  end
542
+
543
+ async_steps = POST_PROVIDER_STEPS.select { |s| ASYNC_SAFE_STEPS.include?(s) }
544
+ return if async_steps.empty?
545
+
546
+ # Snapshot timeline and warnings before firing the async thread so that
547
+ # build_response (called on the main thread immediately after) reads a
548
+ # consistent, immutable view rather than racing with async writes.
549
+ @_response_timeline_snapshot = @timeline.events.dup.freeze
550
+ @_response_warnings_snapshot = @warnings.dup.freeze
551
+ @_response_participants_snapshot = @timeline.participants.dup.freeze
552
+
553
+ profile = @profile
554
+ ASYNC_THREAD_POOL.post do
555
+ async_steps.each do |step|
556
+ next if Profile.skip?(profile, step)
557
+
558
+ send(:"step_#{step}")
559
+ end
560
+ rescue StandardError => e
561
+ Legion::Logging.warn("[pipeline] async post-step error: #{e.message}") if defined?(Legion::Logging)
562
+ end
563
+ end
564
+ private :execute_post_provider_steps_mixed
565
+
566
+ def async_post_enabled?
567
+ Legion::LLM.settings[:pipeline_async_post_steps] == true
523
568
  end
569
+ private :async_post_enabled?
524
570
 
525
571
  def step_provider_call_stream(&)
526
572
  providers_tried = []
@@ -713,6 +759,12 @@ module Legion
713
759
 
714
760
  @timestamps[:returned] = Time.now
715
761
 
762
+ # Use pre-built snapshots when async post-steps are running concurrently
763
+ # to avoid reading partially-mutated timeline/warnings state.
764
+ timeline_events = @_response_timeline_snapshot || @timeline.events
765
+ timeline_parts = @_response_participants_snapshot || @timeline.participants
766
+ warnings_snapshot = @_response_warnings_snapshot || @warnings
767
+
716
768
  Response.build(
717
769
  request_id: @request.id,
718
770
  conversation_id: @request.conversation_id || "conv_#{SecureRandom.hex(8)}",
@@ -723,9 +775,9 @@ module Legion
723
775
  timestamps: @timestamps,
724
776
  enrichments: @enrichments,
725
777
  audit: @audit,
726
- timeline: @timeline.events,
727
- participants: @timeline.participants,
728
- warnings: @warnings,
778
+ timeline: timeline_events,
779
+ participants: timeline_parts,
780
+ warnings: warnings_snapshot,
729
781
  tracing: @tracing,
730
782
  caller: @request.caller,
731
783
  classification: @request.classification,
@@ -6,29 +6,30 @@ module Legion
6
6
  def self.default
7
7
  model_override = ENV.fetch('ANTHROPIC_MODEL', nil)
8
8
  {
9
- enabled: true,
10
- connected: false,
11
- pipeline_enabled: true,
12
- default_model: model_override,
13
- default_provider: nil,
14
- providers: providers,
15
- routing: routing_defaults,
16
- budget: budget_defaults,
17
- confidence: confidence_defaults,
18
- discovery: discovery_defaults,
19
- gateway: gateway_defaults,
20
- daemon: daemon_defaults,
21
- prompt_caching: prompt_caching_defaults,
22
- arbitrage: arbitrage_defaults,
23
- batch: batch_defaults,
24
- scheduling: scheduling_defaults,
25
- rag: rag_defaults,
26
- embedding: embedding_defaults,
27
- conversation: conversation_defaults,
28
- telemetry: telemetry_defaults,
29
- context_curation: context_curation_defaults,
30
- debate: debate_defaults,
31
- provider_layer: provider_layer_defaults
9
+ enabled: true,
10
+ connected: false,
11
+ pipeline_enabled: true,
12
+ pipeline_async_post_steps: true,
13
+ default_model: model_override,
14
+ default_provider: nil,
15
+ providers: providers,
16
+ routing: routing_defaults,
17
+ budget: budget_defaults,
18
+ confidence: confidence_defaults,
19
+ discovery: discovery_defaults,
20
+ gateway: gateway_defaults,
21
+ daemon: daemon_defaults,
22
+ prompt_caching: prompt_caching_defaults,
23
+ arbitrage: arbitrage_defaults,
24
+ batch: batch_defaults,
25
+ scheduling: scheduling_defaults,
26
+ rag: rag_defaults,
27
+ embedding: embedding_defaults,
28
+ conversation: conversation_defaults,
29
+ telemetry: telemetry_defaults,
30
+ context_curation: context_curation_defaults,
31
+ debate: debate_defaults,
32
+ provider_layer: provider_layer_defaults
32
33
  }
33
34
  end
34
35
 
@@ -165,7 +166,7 @@ module Legion
165
166
  {
166
167
  dimension: 1024,
167
168
  enforce_dimension: true,
168
- provider_fallback: %w[azure ollama bedrock openai],
169
+ provider_fallback: %w[ollama bedrock openai],
169
170
  provider_models: {
170
171
  ollama: 'mxbai-embed-large',
171
172
  azure: 'text-embedding-3-small',
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.6.7'
5
+ VERSION = '0.6.8'
6
6
  end
7
7
  end
data/lib/legion/llm.rb CHANGED
@@ -75,6 +75,7 @@ module Legion
75
75
  @can_embed = nil
76
76
  @embedding_provider = nil
77
77
  @embedding_model = nil
78
+ @embedding_fallback_chain = nil
78
79
  ProviderRegistry.reset!
79
80
  Legion::Logging.info 'Legion::LLM shut down'
80
81
  end
@@ -87,7 +88,7 @@ module Legion
87
88
  @can_embed == true
88
89
  end
89
90
 
90
- attr_reader :embedding_provider, :embedding_model
91
+ attr_reader :embedding_provider, :embedding_model, :embedding_fallback_chain
91
92
 
92
93
  def settings
93
94
  if Legion.const_defined?('Settings', false)
@@ -630,13 +631,16 @@ module Legion
630
631
  @can_embed = true
631
632
  @embedding_provider = found[:provider]
632
633
  @embedding_model = found[:model]
634
+ @embedding_fallback_chain = build_embedding_fallback_chain(embedding_settings)
633
635
  Legion::Logging.info "Embedding available: #{@embedding_provider}:#{@embedding_model}"
634
636
  else
635
637
  @can_embed = false
638
+ @embedding_fallback_chain = []
636
639
  Legion::Logging.info 'No embedding provider available'
637
640
  end
638
641
  rescue StandardError => e
639
642
  @can_embed = false
643
+ @embedding_fallback_chain = []
640
644
  Legion::Logging.warn "Embedding detection failed: #{e.message}" if defined?(Legion::Logging)
641
645
  end
642
646
 
@@ -705,6 +709,28 @@ module Legion
705
709
  nil
706
710
  end
707
711
 
712
+ def build_embedding_fallback_chain(embedding_settings)
713
+ fallback = embedding_settings[:provider_fallback] || %w[ollama bedrock openai]
714
+ provider_models = embedding_settings[:provider_models] || {}
715
+ ollama_preferred = embedding_settings[:ollama_preferred] || %w[mxbai-embed-large bge-large snowflake-arctic-embed]
716
+
717
+ fallback.filter_map do |provider_name|
718
+ provider = provider_name.to_sym
719
+ next unless provider_enabled?(provider)
720
+
721
+ available = probe_embedding_provider(provider, ollama_preferred)
722
+ next unless available
723
+
724
+ model = available.is_a?(String) ? available : (provider_models[provider_name] || provider_models[provider])&.to_s
725
+ { provider: provider, model: model }
726
+ end
727
+ end
728
+
729
+ def provider_enabled?(provider)
730
+ config = settings.dig(:providers, provider)
731
+ config.is_a?(Hash) && config[:enabled] != false
732
+ end
733
+
708
734
  def run_discovery
709
735
  return unless settings.dig(:providers, :ollama, :enabled)
710
736
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.7
4
+ version: 0.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity