legion-llm 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/lib/legion/llm/embeddings.rb +67 -0
- data/lib/legion/llm/settings.rb +2 -1
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: eac9a164ca981a40e76eb183e9a1a6809a407d28c8e898fbdbcb38d65e26406a
|
|
4
|
+
data.tar.gz: 993e3c5f37a40aced5ba745e967b5e5c662bea7b734f2fe2343d248d9b0a5e7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 46fcb27d23b7d531d3b7a9a14d9248f9c61ae3eaeca46a46b8d892d33f3ae1bbc7eff4073cd3cd4ad54aee4170a44a3d186f7d820770d8b3610f4baccc4e2a73
|
|
7
|
+
data.tar.gz: b005e7b350db1aef999dada991407da94bc44ca0911a4cf959edfb333d60ca5b90aa87f9772f0cba8a3c85726951351dcc124621eb79df29511cd98d0d50bf82
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.6.4] - 2026-04-01
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- Direct Azure OpenAI embedding provider with SNI host header injection, bypasses ruby_llm and DNS — connects to private endpoint IP with correct Host header
|
|
9
|
+
- Azure embedding supports single and batch requests, dimension enforcement, and settings-driven IP override (`llm.embedding.azure.ip`)
|
|
10
|
+
- Default embedding fallback chain: azure -> ollama -> bedrock -> openai
|
|
11
|
+
|
|
5
12
|
## [0.6.3] - 2026-03-31
|
|
6
13
|
|
|
7
14
|
### Changed
|
|
@@ -38,6 +38,7 @@ module Legion
|
|
|
38
38
|
text = apply_prefix(text, model: model, task: task)
|
|
39
39
|
|
|
40
40
|
return generate_ollama(text: text, model: model) if provider&.to_sym == :ollama
|
|
41
|
+
return generate_azure(text: text, model: model, dimensions: dimensions) if provider&.to_sym == :azure
|
|
41
42
|
|
|
42
43
|
response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
|
|
43
44
|
vector = apply_dimension_enforcement(response.vectors.first, provider)
|
|
@@ -57,6 +58,7 @@ module Legion
|
|
|
57
58
|
texts = texts.map { |t| apply_prefix(t, model: model, task: task) }
|
|
58
59
|
|
|
59
60
|
return generate_ollama_batch(texts: texts, model: model) if provider&.to_sym == :ollama
|
|
61
|
+
return generate_azure_batch(texts: texts, model: model, dimensions: dimensions) if provider&.to_sym == :azure
|
|
60
62
|
|
|
61
63
|
response = RubyLLM.embed(texts, **build_opts(model, provider, dimensions))
|
|
62
64
|
response.vectors.each_with_index.map do |vec, i|
|
|
@@ -284,6 +286,71 @@ module Legion
|
|
|
284
286
|
OLLAMA_CONTEXT_CHARS[base] || OLLAMA_DEFAULT_CONTEXT_CHARS
|
|
285
287
|
end
|
|
286
288
|
|
|
289
|
+
# ── Azure OpenAI (direct HTTP with SNI, bypasses ruby_llm) ──
|
|
290
|
+
|
|
291
|
+
def generate_azure(text:, model:, dimensions: nil)
|
|
292
|
+
result = azure_embed_request(model: model, input: text, dimensions: dimensions)
|
|
293
|
+
vector = result.dig('data', 0, 'embedding')
|
|
294
|
+
vector = apply_dimension_enforcement(vector, :azure) if vector
|
|
295
|
+
return dimension_error(model, :azure, vector) if vector.is_a?(String)
|
|
296
|
+
|
|
297
|
+
tokens = result.dig('usage', 'total_tokens') || 0
|
|
298
|
+
{ vector: vector, model: model, provider: :azure, dimensions: vector&.size || 0, tokens: tokens }
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def generate_azure_batch(texts:, model:, dimensions: nil)
|
|
302
|
+
result = azure_embed_request(model: model, input: texts, dimensions: dimensions)
|
|
303
|
+
(result['data'] || []).each_with_index.map do |entry, i|
|
|
304
|
+
build_batch_entry(entry['embedding'], model, :azure, i)
|
|
305
|
+
end
|
|
306
|
+
rescue StandardError => e
|
|
307
|
+
Legion::Logging.warn("Azure batch embedding failed: #{e.message}") if defined?(Legion::Logging)
|
|
308
|
+
texts.map { |_| { vector: nil, model: model, provider: :azure, error: e.message } }
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def azure_embed_request(model:, input:, dimensions: nil)
|
|
312
|
+
settings = azure_embedding_settings
|
|
313
|
+
api_base = settings[:api_base]
|
|
314
|
+
api_key = settings[:api_key]
|
|
315
|
+
ip = settings[:ip]
|
|
316
|
+
raise 'Azure OpenAI embedding not configured (llm.providers.azure.api_base required)' unless api_base
|
|
317
|
+
|
|
318
|
+
host = URI.parse(api_base).host
|
|
319
|
+
target = ip || host
|
|
320
|
+
path = "/openai/deployments/#{model}/embeddings?api-version=2024-02-01"
|
|
321
|
+
|
|
322
|
+
require 'net/http'
|
|
323
|
+
http = Net::HTTP.new(target, 443)
|
|
324
|
+
http.use_ssl = true
|
|
325
|
+
http.open_timeout = 5
|
|
326
|
+
http.read_timeout = 30
|
|
327
|
+
|
|
328
|
+
req = Net::HTTP::Post.new(path)
|
|
329
|
+
req['Content-Type'] = 'application/json'
|
|
330
|
+
req['Host'] = host
|
|
331
|
+
req['api-key'] = api_key
|
|
332
|
+
body = { input: input }
|
|
333
|
+
body[:dimensions] = dimensions || TARGET_DIMENSION
|
|
334
|
+
req.body = ::JSON.dump(body)
|
|
335
|
+
|
|
336
|
+
response = http.request(req)
|
|
337
|
+
raise "Azure embed failed: #{response.code} #{response.body}" unless response.is_a?(Net::HTTPSuccess)
|
|
338
|
+
|
|
339
|
+
::JSON.parse(response.body)
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def azure_embedding_settings
|
|
343
|
+
base = Legion::Settings.dig(:llm, :providers, :azure) || {}
|
|
344
|
+
embed = Legion::Settings.dig(:llm, :embedding, :azure) || {}
|
|
345
|
+
{
|
|
346
|
+
api_base: embed[:api_base] || base[:api_base],
|
|
347
|
+
api_key: embed[:api_key] || base[:api_key] || base[:auth_token],
|
|
348
|
+
ip: embed[:ip]
|
|
349
|
+
}
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# ── Ollama (direct HTTP, bypasses ruby_llm) ──
|
|
353
|
+
|
|
287
354
|
def ollama_embed_request(model:, input:)
|
|
288
355
|
base_url = Legion::Settings.dig(:llm, :providers, :ollama, :base_url) || 'http://localhost:11434'
|
|
289
356
|
conn = Faraday.new(url: base_url) do |f|
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -165,9 +165,10 @@ module Legion
|
|
|
165
165
|
{
|
|
166
166
|
dimension: 1024,
|
|
167
167
|
enforce_dimension: true,
|
|
168
|
-
provider_fallback: %w[ollama bedrock openai],
|
|
168
|
+
provider_fallback: %w[azure ollama bedrock openai],
|
|
169
169
|
provider_models: {
|
|
170
170
|
ollama: 'mxbai-embed-large',
|
|
171
|
+
azure: 'text-embedding-3-small',
|
|
171
172
|
bedrock: 'amazon.titan-embed-text-v2:0',
|
|
172
173
|
openai: 'text-embedding-3-small'
|
|
173
174
|
},
|
data/lib/legion/llm/version.rb
CHANGED