legion-llm 0.6.7 → 0.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/legion/llm/embeddings.rb +36 -26
- data/lib/legion/llm/pipeline/executor.rb +55 -3
- data/lib/legion/llm/settings.rb +25 -24
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +27 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 426b32ea868afbd22ddbe70fb975c8b684b5afc306bd8e2efa820c5f01819fff
|
|
4
|
+
data.tar.gz: 31dec2079d806c124824eac38098bd5642391547dfa53b88b1b8e4c0f657a693
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 36d79c94a91d353e6cff51c4a23a082f8e21b7d886edca017f2cfa13d7d292495aa9375007d781a056e98dcffc1b1fda0290affbb88387dc028dacbb35ee1fb1
|
|
7
|
+
data.tar.gz: 4a0c36734d64cd7a86efa760d0fd932a542f52823cdb3096ffd099808274d77a0c8f7cc569abe1edc2ee0f3cf3f084a90ab040c88991bd9c4b842c612830ba4d
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.6.8] - 2026-04-01
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- `ASYNC_SAFE_STEPS` constant and async dispatch for fire-and-forget post-provider pipeline steps (`post_response`, `knowledge_capture`, `response_return`) via a shared `Concurrent::FixedThreadPool` — frees Puma threads immediately after response normalization; `context_store` continues to run synchronously
|
|
9
|
+
- `pipeline_async_post_steps` setting (default `true`) controls sync vs async post-step dispatch
|
|
10
|
+
- Cached embed provider fallback chain at startup (`build_embedding_fallback_chain`, `provider_enabled?`) — stops re-probing on every embed failure
|
|
11
|
+
- Hard gate on disabled providers in `Embeddings.generate` and `generate_batch` — providers with `enabled: false` are never tried
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- `embedding_defaults[:provider_fallback]` no longer includes `azure` (leaked via `deep_merge` array concat)
|
|
15
|
+
- `find_fallback_provider` walks the cached startup chain instead of re-probing live
|
|
16
|
+
|
|
5
17
|
## [0.6.7] - 2026-04-01
|
|
6
18
|
|
|
7
19
|
### Added
|
|
@@ -34,8 +34,10 @@ module Legion
|
|
|
34
34
|
return { vector: nil, model: model, provider: provider, error: 'LLM not started' } unless LLM.started?
|
|
35
35
|
|
|
36
36
|
provider ||= resolve_provider
|
|
37
|
-
model
|
|
38
|
-
|
|
37
|
+
return { vector: nil, model: model, provider: provider, error: "provider #{provider} is disabled" } if provider_disabled?(provider)
|
|
38
|
+
|
|
39
|
+
model ||= resolve_model(provider)
|
|
40
|
+
text = apply_prefix(text, model: model, task: task)
|
|
39
41
|
|
|
40
42
|
return generate_ollama(text: text, model: model) if provider&.to_sym == :ollama
|
|
41
43
|
return generate_azure(text: text, model: model, dimensions: dimensions) if provider&.to_sym == :azure
|
|
@@ -54,8 +56,11 @@ module Legion
|
|
|
54
56
|
return texts.map { |_| { vector: nil, error: 'LLM not started' } } unless LLM.started?
|
|
55
57
|
|
|
56
58
|
provider ||= resolve_provider
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
disabled_result = disabled_batch_result(texts, provider, model)
|
|
60
|
+
return disabled_result if disabled_result
|
|
61
|
+
|
|
62
|
+
model ||= resolve_model(provider)
|
|
63
|
+
texts = texts.map { |t| apply_prefix(t, model: model, task: task) }
|
|
59
64
|
|
|
60
65
|
return generate_ollama_batch(texts: texts, model: model) if provider&.to_sym == :ollama
|
|
61
66
|
return generate_azure_batch(texts: texts, model: model, dimensions: dimensions) if provider&.to_sym == :azure
|
|
@@ -75,6 +80,24 @@ module Legion
|
|
|
75
80
|
|
|
76
81
|
private
|
|
77
82
|
|
|
83
|
+
def disabled_batch_result(texts, provider, model)
|
|
84
|
+
return nil unless provider_disabled?(provider)
|
|
85
|
+
|
|
86
|
+
model ||= resolve_model(provider)
|
|
87
|
+
texts.each_with_index.map do |_, i|
|
|
88
|
+
{ vector: nil, model: model, provider: provider, dimensions: 0, index: i, error: "provider #{provider} is disabled" }
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def provider_disabled?(provider)
|
|
93
|
+
return false unless provider
|
|
94
|
+
|
|
95
|
+
config = Legion::Settings.dig(:llm, :providers, provider.to_sym)
|
|
96
|
+
config.is_a?(Hash) && config[:enabled] == false
|
|
97
|
+
rescue StandardError
|
|
98
|
+
false
|
|
99
|
+
end
|
|
100
|
+
|
|
78
101
|
def build_opts(model, provider, dimensions)
|
|
79
102
|
target_dim = enforce_dimension? ? TARGET_DIMENSION : dimensions
|
|
80
103
|
opts = { model: model }
|
|
@@ -113,7 +136,6 @@ module Legion
|
|
|
113
136
|
def handle_embed_failure(error, text:, failed_provider:, failed_model:)
|
|
114
137
|
fallback = find_fallback_provider(failed_provider)
|
|
115
138
|
if fallback
|
|
116
|
-
Legion::Logging.info "Embedding failover: #{failed_provider} -> #{fallback[:provider]}" if defined?(Legion::Logging)
|
|
117
139
|
generate(text: text, model: fallback[:model], provider: fallback[:provider])
|
|
118
140
|
else
|
|
119
141
|
{ vector: nil, model: failed_model, provider: failed_provider, error: error.message }
|
|
@@ -121,37 +143,25 @@ module Legion
|
|
|
121
143
|
end
|
|
122
144
|
|
|
123
145
|
def find_fallback_provider(failed_provider)
|
|
124
|
-
chain =
|
|
125
|
-
|
|
126
|
-
started = false
|
|
146
|
+
chain = LLM.embedding_fallback_chain
|
|
147
|
+
return nil unless chain.is_a?(Array) && chain.any?
|
|
127
148
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
if
|
|
149
|
+
started = false
|
|
150
|
+
chain.each do |entry|
|
|
151
|
+
if entry[:provider] == failed_provider&.to_sym
|
|
131
152
|
started = true
|
|
132
153
|
next
|
|
133
154
|
end
|
|
134
155
|
next unless started
|
|
156
|
+
# Skip providers that are explicitly disabled in the fallback chain
|
|
157
|
+
next if provider_disabled?(entry[:provider])
|
|
135
158
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
model = available.is_a?(String) ? available : (models[name] || models[sym])&.to_s
|
|
140
|
-
return { provider: sym, model: model }
|
|
159
|
+
Legion::Logging.info "Embedding failover: #{failed_provider} -> #{entry[:provider]}" if defined?(Legion::Logging)
|
|
160
|
+
return entry
|
|
141
161
|
end
|
|
142
162
|
nil
|
|
143
163
|
end
|
|
144
164
|
|
|
145
|
-
def probe_fallback_provider(sym)
|
|
146
|
-
case sym
|
|
147
|
-
when :ollama
|
|
148
|
-
LLM.send(:detect_ollama_embedding,
|
|
149
|
-
embedding_settings[:ollama_preferred] || %w[mxbai-embed-large])
|
|
150
|
-
else
|
|
151
|
-
LLM.send(:detect_cloud_embedding, sym)
|
|
152
|
-
end
|
|
153
|
-
end
|
|
154
|
-
|
|
155
165
|
def resolve_provider
|
|
156
166
|
return LLM.embedding_provider if LLM.embedding_provider
|
|
157
167
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'concurrent'
|
|
4
|
+
|
|
3
5
|
module Legion
|
|
4
6
|
module LLM
|
|
5
7
|
module Pipeline
|
|
@@ -40,6 +42,10 @@ module Legion
|
|
|
40
42
|
response_normalization debate confidence_scoring tool_calls context_store post_response knowledge_capture response_return
|
|
41
43
|
].freeze
|
|
42
44
|
|
|
45
|
+
ASYNC_SAFE_STEPS = %i[post_response knowledge_capture response_return].freeze
|
|
46
|
+
|
|
47
|
+
ASYNC_THREAD_POOL = Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)
|
|
48
|
+
|
|
43
49
|
def initialize(request)
|
|
44
50
|
@request = request
|
|
45
51
|
@profile = Profile.derive(request.caller)
|
|
@@ -515,12 +521,52 @@ module Legion
|
|
|
515
521
|
end
|
|
516
522
|
|
|
517
523
|
def execute_post_provider_steps
|
|
524
|
+
if async_post_enabled?
|
|
525
|
+
execute_post_provider_steps_mixed
|
|
526
|
+
else
|
|
527
|
+
POST_PROVIDER_STEPS.each do |step|
|
|
528
|
+
next if Profile.skip?(@profile, step)
|
|
529
|
+
|
|
530
|
+
execute_step(step) { send(:"step_#{step}") }
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
def execute_post_provider_steps_mixed
|
|
518
536
|
POST_PROVIDER_STEPS.each do |step|
|
|
519
537
|
next if Profile.skip?(@profile, step)
|
|
538
|
+
next if ASYNC_SAFE_STEPS.include?(step)
|
|
520
539
|
|
|
521
540
|
execute_step(step) { send(:"step_#{step}") }
|
|
522
541
|
end
|
|
542
|
+
|
|
543
|
+
async_steps = POST_PROVIDER_STEPS.select { |s| ASYNC_SAFE_STEPS.include?(s) }
|
|
544
|
+
return if async_steps.empty?
|
|
545
|
+
|
|
546
|
+
# Snapshot timeline and warnings before firing the async thread so that
|
|
547
|
+
# build_response (called on the main thread immediately after) reads a
|
|
548
|
+
# consistent, immutable view rather than racing with async writes.
|
|
549
|
+
@_response_timeline_snapshot = @timeline.events.dup.freeze
|
|
550
|
+
@_response_warnings_snapshot = @warnings.dup.freeze
|
|
551
|
+
@_response_participants_snapshot = @timeline.participants.dup.freeze
|
|
552
|
+
|
|
553
|
+
profile = @profile
|
|
554
|
+
ASYNC_THREAD_POOL.post do
|
|
555
|
+
async_steps.each do |step|
|
|
556
|
+
next if Profile.skip?(profile, step)
|
|
557
|
+
|
|
558
|
+
send(:"step_#{step}")
|
|
559
|
+
end
|
|
560
|
+
rescue StandardError => e
|
|
561
|
+
Legion::Logging.warn("[pipeline] async post-step error: #{e.message}") if defined?(Legion::Logging)
|
|
562
|
+
end
|
|
563
|
+
end
|
|
564
|
+
private :execute_post_provider_steps_mixed
|
|
565
|
+
|
|
566
|
+
def async_post_enabled?
|
|
567
|
+
Legion::LLM.settings[:pipeline_async_post_steps] == true
|
|
523
568
|
end
|
|
569
|
+
private :async_post_enabled?
|
|
524
570
|
|
|
525
571
|
def step_provider_call_stream(&)
|
|
526
572
|
providers_tried = []
|
|
@@ -713,6 +759,12 @@ module Legion
|
|
|
713
759
|
|
|
714
760
|
@timestamps[:returned] = Time.now
|
|
715
761
|
|
|
762
|
+
# Use pre-built snapshots when async post-steps are running concurrently
|
|
763
|
+
# to avoid reading partially-mutated timeline/warnings state.
|
|
764
|
+
timeline_events = @_response_timeline_snapshot || @timeline.events
|
|
765
|
+
timeline_parts = @_response_participants_snapshot || @timeline.participants
|
|
766
|
+
warnings_snapshot = @_response_warnings_snapshot || @warnings
|
|
767
|
+
|
|
716
768
|
Response.build(
|
|
717
769
|
request_id: @request.id,
|
|
718
770
|
conversation_id: @request.conversation_id || "conv_#{SecureRandom.hex(8)}",
|
|
@@ -723,9 +775,9 @@ module Legion
|
|
|
723
775
|
timestamps: @timestamps,
|
|
724
776
|
enrichments: @enrichments,
|
|
725
777
|
audit: @audit,
|
|
726
|
-
timeline:
|
|
727
|
-
participants:
|
|
728
|
-
warnings:
|
|
778
|
+
timeline: timeline_events,
|
|
779
|
+
participants: timeline_parts,
|
|
780
|
+
warnings: warnings_snapshot,
|
|
729
781
|
tracing: @tracing,
|
|
730
782
|
caller: @request.caller,
|
|
731
783
|
classification: @request.classification,
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -6,29 +6,30 @@ module Legion
|
|
|
6
6
|
def self.default
|
|
7
7
|
model_override = ENV.fetch('ANTHROPIC_MODEL', nil)
|
|
8
8
|
{
|
|
9
|
-
enabled:
|
|
10
|
-
connected:
|
|
11
|
-
pipeline_enabled:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
9
|
+
enabled: true,
|
|
10
|
+
connected: false,
|
|
11
|
+
pipeline_enabled: true,
|
|
12
|
+
pipeline_async_post_steps: true,
|
|
13
|
+
default_model: model_override,
|
|
14
|
+
default_provider: nil,
|
|
15
|
+
providers: providers,
|
|
16
|
+
routing: routing_defaults,
|
|
17
|
+
budget: budget_defaults,
|
|
18
|
+
confidence: confidence_defaults,
|
|
19
|
+
discovery: discovery_defaults,
|
|
20
|
+
gateway: gateway_defaults,
|
|
21
|
+
daemon: daemon_defaults,
|
|
22
|
+
prompt_caching: prompt_caching_defaults,
|
|
23
|
+
arbitrage: arbitrage_defaults,
|
|
24
|
+
batch: batch_defaults,
|
|
25
|
+
scheduling: scheduling_defaults,
|
|
26
|
+
rag: rag_defaults,
|
|
27
|
+
embedding: embedding_defaults,
|
|
28
|
+
conversation: conversation_defaults,
|
|
29
|
+
telemetry: telemetry_defaults,
|
|
30
|
+
context_curation: context_curation_defaults,
|
|
31
|
+
debate: debate_defaults,
|
|
32
|
+
provider_layer: provider_layer_defaults
|
|
32
33
|
}
|
|
33
34
|
end
|
|
34
35
|
|
|
@@ -165,7 +166,7 @@ module Legion
|
|
|
165
166
|
{
|
|
166
167
|
dimension: 1024,
|
|
167
168
|
enforce_dimension: true,
|
|
168
|
-
provider_fallback: %w[
|
|
169
|
+
provider_fallback: %w[ollama bedrock openai],
|
|
169
170
|
provider_models: {
|
|
170
171
|
ollama: 'mxbai-embed-large',
|
|
171
172
|
azure: 'text-embedding-3-small',
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -75,6 +75,7 @@ module Legion
|
|
|
75
75
|
@can_embed = nil
|
|
76
76
|
@embedding_provider = nil
|
|
77
77
|
@embedding_model = nil
|
|
78
|
+
@embedding_fallback_chain = nil
|
|
78
79
|
ProviderRegistry.reset!
|
|
79
80
|
Legion::Logging.info 'Legion::LLM shut down'
|
|
80
81
|
end
|
|
@@ -87,7 +88,7 @@ module Legion
|
|
|
87
88
|
@can_embed == true
|
|
88
89
|
end
|
|
89
90
|
|
|
90
|
-
attr_reader :embedding_provider, :embedding_model
|
|
91
|
+
attr_reader :embedding_provider, :embedding_model, :embedding_fallback_chain
|
|
91
92
|
|
|
92
93
|
def settings
|
|
93
94
|
if Legion.const_defined?('Settings', false)
|
|
@@ -630,13 +631,16 @@ module Legion
|
|
|
630
631
|
@can_embed = true
|
|
631
632
|
@embedding_provider = found[:provider]
|
|
632
633
|
@embedding_model = found[:model]
|
|
634
|
+
@embedding_fallback_chain = build_embedding_fallback_chain(embedding_settings)
|
|
633
635
|
Legion::Logging.info "Embedding available: #{@embedding_provider}:#{@embedding_model}"
|
|
634
636
|
else
|
|
635
637
|
@can_embed = false
|
|
638
|
+
@embedding_fallback_chain = []
|
|
636
639
|
Legion::Logging.info 'No embedding provider available'
|
|
637
640
|
end
|
|
638
641
|
rescue StandardError => e
|
|
639
642
|
@can_embed = false
|
|
643
|
+
@embedding_fallback_chain = []
|
|
640
644
|
Legion::Logging.warn "Embedding detection failed: #{e.message}" if defined?(Legion::Logging)
|
|
641
645
|
end
|
|
642
646
|
|
|
@@ -705,6 +709,28 @@ module Legion
|
|
|
705
709
|
nil
|
|
706
710
|
end
|
|
707
711
|
|
|
712
|
+
def build_embedding_fallback_chain(embedding_settings)
|
|
713
|
+
fallback = embedding_settings[:provider_fallback] || %w[ollama bedrock openai]
|
|
714
|
+
provider_models = embedding_settings[:provider_models] || {}
|
|
715
|
+
ollama_preferred = embedding_settings[:ollama_preferred] || %w[mxbai-embed-large bge-large snowflake-arctic-embed]
|
|
716
|
+
|
|
717
|
+
fallback.filter_map do |provider_name|
|
|
718
|
+
provider = provider_name.to_sym
|
|
719
|
+
next unless provider_enabled?(provider)
|
|
720
|
+
|
|
721
|
+
available = probe_embedding_provider(provider, ollama_preferred)
|
|
722
|
+
next unless available
|
|
723
|
+
|
|
724
|
+
model = available.is_a?(String) ? available : (provider_models[provider_name] || provider_models[provider])&.to_s
|
|
725
|
+
{ provider: provider, model: model }
|
|
726
|
+
end
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
def provider_enabled?(provider)
|
|
730
|
+
config = settings.dig(:providers, provider)
|
|
731
|
+
config.is_a?(Hash) && config[:enabled] != false
|
|
732
|
+
end
|
|
733
|
+
|
|
708
734
|
def run_discovery
|
|
709
735
|
return unless settings.dig(:providers, :ollama, :enabled)
|
|
710
736
|
|