legion-llm 0.5.10 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/lib/legion/llm/embeddings.rb +117 -23
- data/lib/legion/llm/settings.rb +16 -1
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +72 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 66044d638a0a53f9694fe2c208092bd62f980e46ffe6c94e11a2e4e1189faff1
|
|
4
|
+
data.tar.gz: 2c59beb39f98553ed9727adb0efb36cb1c7327780e27b7c39aa8fc39a8a1998f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 13026e19ff1630411693f56fd4cd1a122fc85b4796aa5c9361c443464a5ef2a46c1bdc07106f1d104d015e75532bc43af70e56eb4e8a094c3afe48c700032bb4
|
|
7
|
+
data.tar.gz: 0dbd5234f42d24ae82c893c11ee3446fae3a74146246cbc42417ec129112ea140dc3788231ab6629357a3634ccc72715fb11b8e4c8b4749aa04cd14886c8efa0
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.11] - 2026-03-25
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Legion::LLM.can_embed?` — cached boolean for embedding capability
|
|
7
|
+
- `Legion::LLM.embedding_provider` — current embedding provider symbol
|
|
8
|
+
- `Legion::LLM.embedding_model` — current embedding model string
|
|
9
|
+
- Boot-time embedding detection with configurable provider fallback chain (ollama -> bedrock -> openai)
|
|
10
|
+
- 1024-dimension enforcement on all embedding responses (truncate if larger, reject if smaller)
|
|
11
|
+
- Runtime failover: if cached embedding provider fails, walks fallback chain for next available
|
|
12
|
+
- `llm.embedding.*` settings block with `provider_fallback`, `provider_models`, `ollama_preferred`, `dimension`, `enforce_dimension`
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- `Embeddings.generate` now uses cached provider/model from boot detection when no explicit provider given
|
|
16
|
+
- `Embeddings.generate` enforces exactly 1024 dimensions by default (configurable via `enforce_dimension: false`)
|
|
17
|
+
- Bedrock Titan model updated to `amazon.titan-embed-text-v2:0`
|
|
18
|
+
|
|
3
19
|
## [0.5.10] - 2026-03-25
|
|
4
20
|
|
|
5
21
|
### Added
|
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
module Legion
|
|
4
4
|
module LLM
|
|
5
|
+
class EmbeddingUnavailableError < LLMError; end
|
|
6
|
+
|
|
5
7
|
module Embeddings
|
|
6
8
|
PROVIDER_EMBEDDING_MODELS = {
|
|
7
|
-
bedrock: 'amazon.titan-embed-text-v2',
|
|
9
|
+
bedrock: 'amazon.titan-embed-text-v2:0',
|
|
8
10
|
anthropic: nil,
|
|
9
11
|
openai: 'text-embedding-3-small',
|
|
10
12
|
gemini: 'text-embedding-004',
|
|
@@ -12,37 +14,32 @@ module Legion
|
|
|
12
14
|
ollama: 'mxbai-embed-large'
|
|
13
15
|
}.freeze
|
|
14
16
|
|
|
17
|
+
TARGET_DIMENSION = 1024
|
|
18
|
+
|
|
15
19
|
class << self
|
|
16
20
|
def generate(text:, model: nil, provider: nil, dimensions: nil)
|
|
21
|
+
return { vector: nil, model: model, provider: provider, error: 'LLM not started' } unless LLM.started?
|
|
22
|
+
|
|
17
23
|
provider ||= resolve_provider
|
|
18
24
|
model ||= resolve_model(provider)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
{
|
|
25
|
-
vector: response.vectors.first,
|
|
26
|
-
model: model,
|
|
27
|
-
provider: provider,
|
|
28
|
-
dimensions: response.vectors.first&.size || 0,
|
|
29
|
-
tokens: response.input_tokens
|
|
30
|
-
}
|
|
25
|
+
response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
|
|
26
|
+
vector = apply_dimension_enforcement(response.vectors.first, provider)
|
|
27
|
+
return dimension_error(model, provider, vector) if vector.is_a?(String)
|
|
28
|
+
|
|
29
|
+
{ vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
|
|
31
30
|
rescue StandardError => e
|
|
32
31
|
Legion::Logging.warn "Embedding failed (#{provider}/#{model}): #{e.message}" if defined?(Legion::Logging)
|
|
33
|
-
|
|
32
|
+
handle_embed_failure(e, text: text, failed_provider: provider, failed_model: model)
|
|
34
33
|
end
|
|
35
34
|
|
|
36
35
|
def generate_batch(texts:, model: nil, provider: nil, dimensions: nil)
|
|
36
|
+
return texts.map { |_| { vector: nil, error: 'LLM not started' } } unless LLM.started?
|
|
37
|
+
|
|
37
38
|
provider ||= resolve_provider
|
|
38
39
|
model ||= resolve_model(provider)
|
|
39
|
-
|
|
40
|
-
opts[:provider] = provider if provider
|
|
41
|
-
opts[:dimensions] = dimensions if dimensions
|
|
42
|
-
|
|
43
|
-
response = RubyLLM.embed(texts, **opts)
|
|
40
|
+
response = RubyLLM.embed(texts, **build_opts(model, provider, dimensions))
|
|
44
41
|
response.vectors.each_with_index.map do |vec, i|
|
|
45
|
-
|
|
42
|
+
build_batch_entry(vec, model, provider, i)
|
|
46
43
|
end
|
|
47
44
|
rescue StandardError => e
|
|
48
45
|
Legion::Logging.warn("Batch embedding failed (#{provider}/#{model}): #{e.message}") if defined?(Legion::Logging)
|
|
@@ -55,8 +52,89 @@ module Legion
|
|
|
55
52
|
|
|
56
53
|
private
|
|
57
54
|
|
|
55
|
+
def build_opts(model, provider, dimensions)
|
|
56
|
+
target_dim = enforce_dimension? ? TARGET_DIMENSION : dimensions
|
|
57
|
+
opts = { model: model }
|
|
58
|
+
opts[:provider] = provider if provider
|
|
59
|
+
opts[:dimensions] = target_dim if target_dim && provider&.to_sym == :openai
|
|
60
|
+
opts
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def apply_dimension_enforcement(vector, provider)
|
|
64
|
+
return vector unless enforce_dimension? && vector.is_a?(Array)
|
|
65
|
+
|
|
66
|
+
enforce_dimensions(vector, provider)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def dimension_error(model, provider, message)
|
|
70
|
+
{ vector: nil, model: model, provider: provider, error: "incompatible dimension: #{message}" }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def build_batch_entry(vec, model, provider, index)
|
|
74
|
+
vec = enforce_dimensions(vec, provider) if enforce_dimension? && vec.is_a?(Array)
|
|
75
|
+
{ vector: vec.is_a?(String) ? nil : vec, model: model, provider: provider,
|
|
76
|
+
dimensions: vec.is_a?(Array) ? vec.size : 0, index: index }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def enforce_dimension?
|
|
80
|
+
embedding_settings[:enforce_dimension] != false
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def enforce_dimensions(vector, _provider)
|
|
84
|
+
return vector if vector.size == TARGET_DIMENSION
|
|
85
|
+
return vector.first(TARGET_DIMENSION) if vector.size > TARGET_DIMENSION
|
|
86
|
+
|
|
87
|
+
"got #{vector.size}, need #{TARGET_DIMENSION} (provider cannot upscale)"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def handle_embed_failure(error, text:, failed_provider:, failed_model:)
|
|
91
|
+
fallback = find_fallback_provider(failed_provider)
|
|
92
|
+
if fallback
|
|
93
|
+
Legion::Logging.info "Embedding failover: #{failed_provider} -> #{fallback[:provider]}" if defined?(Legion::Logging)
|
|
94
|
+
LLM.instance_variable_set(:@embedding_provider, fallback[:provider])
|
|
95
|
+
LLM.instance_variable_set(:@embedding_model, fallback[:model])
|
|
96
|
+
generate(text: text, model: fallback[:model], provider: fallback[:provider])
|
|
97
|
+
else
|
|
98
|
+
{ vector: nil, model: failed_model, provider: failed_provider, error: error.message }
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def find_fallback_provider(failed_provider)
|
|
103
|
+
chain = embedding_settings[:provider_fallback] || %w[ollama bedrock openai]
|
|
104
|
+
models = embedding_settings[:provider_models] || {}
|
|
105
|
+
started = false
|
|
106
|
+
|
|
107
|
+
chain.each do |name|
|
|
108
|
+
sym = name.to_sym
|
|
109
|
+
if sym == failed_provider
|
|
110
|
+
started = true
|
|
111
|
+
next
|
|
112
|
+
end
|
|
113
|
+
next unless started
|
|
114
|
+
|
|
115
|
+
available = probe_fallback_provider(sym)
|
|
116
|
+
next unless available
|
|
117
|
+
|
|
118
|
+
model = available.is_a?(String) ? available : (models[name] || models[sym])&.to_s
|
|
119
|
+
return { provider: sym, model: model }
|
|
120
|
+
end
|
|
121
|
+
nil
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def probe_fallback_provider(sym)
|
|
125
|
+
case sym
|
|
126
|
+
when :ollama
|
|
127
|
+
LLM.send(:detect_ollama_embedding,
|
|
128
|
+
embedding_settings[:ollama_preferred] || %w[mxbai-embed-large])
|
|
129
|
+
else
|
|
130
|
+
LLM.send(:detect_cloud_embedding, sym)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
58
134
|
def resolve_provider
|
|
59
|
-
|
|
135
|
+
return LLM.embedding_provider if LLM.embedding_provider
|
|
136
|
+
|
|
137
|
+
configured = embedding_settings[:provider]
|
|
60
138
|
return configured&.to_sym if configured
|
|
61
139
|
|
|
62
140
|
Legion::Settings.dig(:llm, :default_provider)&.to_sym
|
|
@@ -65,15 +143,31 @@ module Legion
|
|
|
65
143
|
end
|
|
66
144
|
|
|
67
145
|
def resolve_model(provider)
|
|
68
|
-
|
|
146
|
+
return LLM.embedding_model if LLM.embedding_model && provider == LLM.embedding_provider
|
|
147
|
+
|
|
148
|
+
configured = embedding_settings[:default_model]
|
|
69
149
|
return configured if configured
|
|
70
150
|
|
|
151
|
+
resolve_model_from_settings(provider)
|
|
152
|
+
rescue StandardError
|
|
153
|
+
'text-embedding-3-small'
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def resolve_model_from_settings(provider)
|
|
157
|
+
models = embedding_settings[:provider_models] || {}
|
|
158
|
+
pm = models[provider&.to_sym] || models[provider.to_s]
|
|
159
|
+
return pm.to_s if pm
|
|
160
|
+
|
|
71
161
|
provider_default = PROVIDER_EMBEDDING_MODELS[provider&.to_sym] if provider
|
|
72
162
|
return provider_default if provider_default
|
|
73
163
|
|
|
74
164
|
'text-embedding-3-small'
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def embedding_settings
|
|
168
|
+
Legion::Settings.dig(:llm, :embedding) || {}
|
|
75
169
|
rescue StandardError
|
|
76
|
-
|
|
170
|
+
{}
|
|
77
171
|
end
|
|
78
172
|
end
|
|
79
173
|
end
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -20,7 +20,8 @@ module Legion
|
|
|
20
20
|
arbitrage: arbitrage_defaults,
|
|
21
21
|
batch: batch_defaults,
|
|
22
22
|
scheduling: scheduling_defaults,
|
|
23
|
-
rag: rag_defaults
|
|
23
|
+
rag: rag_defaults,
|
|
24
|
+
embedding: embedding_defaults
|
|
24
25
|
}
|
|
25
26
|
end
|
|
26
27
|
|
|
@@ -127,6 +128,20 @@ module Legion
|
|
|
127
128
|
}
|
|
128
129
|
end
|
|
129
130
|
|
|
131
|
+
def self.embedding_defaults
|
|
132
|
+
{
|
|
133
|
+
dimension: 1024,
|
|
134
|
+
enforce_dimension: true,
|
|
135
|
+
provider_fallback: %w[ollama bedrock openai],
|
|
136
|
+
provider_models: {
|
|
137
|
+
ollama: 'mxbai-embed-large',
|
|
138
|
+
bedrock: 'amazon.titan-embed-text-v2:0',
|
|
139
|
+
openai: 'text-embedding-3-small'
|
|
140
|
+
},
|
|
141
|
+
ollama_preferred: %w[mxbai-embed-large bge-large snowflake-arctic-embed]
|
|
142
|
+
}
|
|
143
|
+
end
|
|
144
|
+
|
|
130
145
|
def self.providers
|
|
131
146
|
{
|
|
132
147
|
bedrock: {
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -43,6 +43,7 @@ module Legion
|
|
|
43
43
|
|
|
44
44
|
configure_providers
|
|
45
45
|
run_discovery
|
|
46
|
+
detect_embedding_capability
|
|
46
47
|
set_defaults
|
|
47
48
|
|
|
48
49
|
install_hooks
|
|
@@ -56,6 +57,9 @@ module Legion
|
|
|
56
57
|
def shutdown
|
|
57
58
|
Legion::Settings[:llm][:connected] = false
|
|
58
59
|
@started = false
|
|
60
|
+
@can_embed = nil
|
|
61
|
+
@embedding_provider = nil
|
|
62
|
+
@embedding_model = nil
|
|
59
63
|
Legion::Logging.info 'Legion::LLM shut down'
|
|
60
64
|
end
|
|
61
65
|
|
|
@@ -63,6 +67,12 @@ module Legion
|
|
|
63
67
|
@started == true
|
|
64
68
|
end
|
|
65
69
|
|
|
70
|
+
def can_embed?
|
|
71
|
+
@can_embed == true
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
attr_reader :embedding_provider, :embedding_model
|
|
75
|
+
|
|
66
76
|
def settings
|
|
67
77
|
if Legion.const_defined?('Settings')
|
|
68
78
|
Legion::Settings[:llm]
|
|
@@ -558,6 +568,68 @@ module Legion
|
|
|
558
568
|
auto_configure_defaults
|
|
559
569
|
end
|
|
560
570
|
|
|
571
|
+
def detect_embedding_capability
|
|
572
|
+
embedding_settings = settings[:embedding] || {}
|
|
573
|
+
found = find_embedding_provider(embedding_settings)
|
|
574
|
+
if found
|
|
575
|
+
@can_embed = true
|
|
576
|
+
@embedding_provider = found[:provider]
|
|
577
|
+
@embedding_model = found[:model]
|
|
578
|
+
Legion::Logging.info "Embedding available: #{@embedding_provider}:#{@embedding_model}"
|
|
579
|
+
else
|
|
580
|
+
@can_embed = false
|
|
581
|
+
Legion::Logging.info 'No embedding provider available'
|
|
582
|
+
end
|
|
583
|
+
rescue StandardError => e
|
|
584
|
+
@can_embed = false
|
|
585
|
+
Legion::Logging.warn "Embedding detection failed: #{e.message}" if defined?(Legion::Logging)
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
def find_embedding_provider(embedding_settings)
|
|
589
|
+
fallback = embedding_settings[:provider_fallback] || %w[ollama bedrock openai]
|
|
590
|
+
provider_models = embedding_settings[:provider_models] || {}
|
|
591
|
+
ollama_preferred = embedding_settings[:ollama_preferred] || %w[mxbai-embed-large bge-large snowflake-arctic-embed]
|
|
592
|
+
|
|
593
|
+
fallback.each do |provider_name|
|
|
594
|
+
provider = provider_name.to_sym
|
|
595
|
+
model = provider_models[provider_name] || provider_models[provider]
|
|
596
|
+
available = probe_embedding_provider(provider, ollama_preferred)
|
|
597
|
+
next unless available
|
|
598
|
+
|
|
599
|
+
resolved_model = available.is_a?(String) ? available : model&.to_s
|
|
600
|
+
return { provider: provider, model: resolved_model }
|
|
601
|
+
end
|
|
602
|
+
nil
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
def probe_embedding_provider(provider, ollama_preferred)
|
|
606
|
+
case provider
|
|
607
|
+
when :ollama then detect_ollama_embedding(ollama_preferred)
|
|
608
|
+
else detect_cloud_embedding(provider)
|
|
609
|
+
end
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
def detect_ollama_embedding(preferred_models)
|
|
613
|
+
return nil unless defined?(Legion::LLM::Discovery::Ollama)
|
|
614
|
+
return nil unless settings.dig(:providers, :ollama, :enabled)
|
|
615
|
+
|
|
616
|
+
preferred_models.each do |model|
|
|
617
|
+
return model if Legion::LLM::Discovery::Ollama.model_available?(model)
|
|
618
|
+
end
|
|
619
|
+
nil
|
|
620
|
+
rescue StandardError
|
|
621
|
+
nil
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
def detect_cloud_embedding(provider)
|
|
625
|
+
provider_config = settings.dig(:providers, provider)
|
|
626
|
+
return nil unless provider_config.is_a?(Hash) && provider_config[:enabled]
|
|
627
|
+
|
|
628
|
+
true
|
|
629
|
+
rescue StandardError
|
|
630
|
+
nil
|
|
631
|
+
end
|
|
632
|
+
|
|
561
633
|
def run_discovery
|
|
562
634
|
return unless settings.dig(:providers, :ollama, :enabled)
|
|
563
635
|
|