legion-llm 0.8.24 → 0.8.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/lib/legion/llm/call/embeddings.rb +12 -1
- data/lib/legion/llm/call/providers.rb +28 -0
- data/lib/legion/llm/call/structured_output.rb +12 -5
- data/lib/legion/llm/discovery/vllm.rb +114 -0
- data/lib/legion/llm/discovery.rb +14 -7
- data/lib/legion/llm/inference/executor.rb +1 -1
- data/lib/legion/llm/patches/ruby_llm_vllm.rb +37 -0
- data/lib/legion/llm/router.rb +13 -4
- data/lib/legion/llm/settings.rb +6 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 942f34663b8d915ee982996b5b2e63e26a7edf79a7aac17f8ce71ed1829dff01
|
|
4
|
+
data.tar.gz: dd78dd3bd79c9f1cf19d170f4ee2905fc92865dd3e21b107856c973eaf752fb5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bfc1f55dce2a3eda78b5b6ab2405b6ce5d4e58fa841a81bb304af3bbe9a5b52851023c845d898713cfa87d9e292cd5fd1545464a7e0937eadde6f8668595ccc2
|
|
7
|
+
data.tar.gz: 4cad8eb9c6b6cfc79c1ffce687b7fddbb7b47d4e22ec9bca424f2dbb061ed83fff97d4ab2bbec441d3b319922316a238b057752210f1d7908e0d7169380485e9
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.26] - 2026-04-24
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- First-class vLLM provider support. vLLM exposes an OpenAI-compatible API and is registered as a new RubyLLM provider (`:vllm`). Configured via `providers.vllm.base_url` in settings. Mapped to `:fleet` tier in the router.
|
|
7
|
+
- vLLM discovery via `/v1/models` endpoint. Caches model list with `max_model_len` (context window size) using the same TTL as Ollama discovery. Health checks via `/health` endpoint.
|
|
8
|
+
- Context overflow escalation: when vLLM rejects a request due to context length limits (32k on V100 hardware), the executor automatically falls back to cloud/frontier providers.
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- `find_fallback_provider` in `Executor` now skips all local providers (`:ollama` and `:vllm`) when searching for fallbacks, not just `:ollama`. Ensures context overflow escalates to cloud/frontier.
|
|
12
|
+
- `Router::PROVIDER_ORDER` updated: `:vllm` inserted after `:ollama` and before `:bedrock`.
|
|
13
|
+
- `default_provider_for_tier(:fleet)` returns `:vllm` when vLLM is enabled, falls back to `:ollama`.
|
|
14
|
+
|
|
15
|
+
## [0.8.25] - 2026-04-24
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- `StructuredOutput.generate`, `handle_parse_error`, and `retry_with_instruction` used hash-style access (`result[:content]`, `result[:model]`) on the return value of `chat_single`, but `chat_single` returns a `RubyLLM::Message` object which only supports method access (`.content`, `.model_id`). All four access sites now use `respond_to?` duck-typing so both hash and Message objects work. Visible as `undefined method '[]' for an instance of RubyLLM::Message` in Apollo's `llm_detects_conflict?` and any structured output caller using non-schema-capable models (e.g. ollama/qwen).
|
|
19
|
+
- `Call::Embeddings.generate` crashed with `NoMethodError` on `.size` when `response.vectors` was a flat array (`[0.007, ...]`) instead of nested (`[[0.007, ...]]`). RubyLLM's OpenAI provider unwraps single-input embedding responses. Added `normalize_vectors_first` to detect and handle both flat and nested vector formats before dimension enforcement.
|
|
20
|
+
|
|
3
21
|
## [0.8.24] - 2026-04-23
|
|
4
22
|
|
|
5
23
|
### Fixed
|
|
@@ -27,7 +27,8 @@ module Legion
|
|
|
27
27
|
|
|
28
28
|
response = RubyLLM.embed(text, **build_opts(model, provider, dimensions))
|
|
29
29
|
emit_embedding_metering(provider: provider, model: model, tokens: response.input_tokens)
|
|
30
|
-
vector =
|
|
30
|
+
vector = normalize_vectors_first(response.vectors)
|
|
31
|
+
vector = apply_dimension_enforcement(vector, provider)
|
|
31
32
|
return dimension_error(model, provider, vector) if vector.is_a?(String)
|
|
32
33
|
|
|
33
34
|
{ vector: vector, model: model, provider: provider, dimensions: vector&.size || 0, tokens: response.input_tokens }
|
|
@@ -101,6 +102,16 @@ module Legion
|
|
|
101
102
|
opts
|
|
102
103
|
end
|
|
103
104
|
|
|
105
|
+
def normalize_vectors_first(vectors)
|
|
106
|
+
return nil if vectors.nil? || (vectors.is_a?(Array) && vectors.empty?)
|
|
107
|
+
|
|
108
|
+
first = vectors.first
|
|
109
|
+
return first if first.is_a?(Array)
|
|
110
|
+
return vectors if vectors.is_a?(Array) && vectors.first.is_a?(Numeric)
|
|
111
|
+
|
|
112
|
+
first
|
|
113
|
+
end
|
|
114
|
+
|
|
104
115
|
def apply_dimension_enforcement(vector, provider)
|
|
105
116
|
return vector unless enforce_dimension? && vector.is_a?(Array)
|
|
106
117
|
|
|
@@ -76,6 +76,8 @@ module Legion
|
|
|
76
76
|
config[:api_base] && (usable_setting?(config[:api_key]) || usable_setting?(config[:auth_token]))
|
|
77
77
|
when :ollama
|
|
78
78
|
ollama_running?(config)
|
|
79
|
+
when :vllm
|
|
80
|
+
vllm_running?(config)
|
|
79
81
|
else
|
|
80
82
|
usable_setting?(config[:api_key])
|
|
81
83
|
end
|
|
@@ -106,6 +108,22 @@ module Legion
|
|
|
106
108
|
false
|
|
107
109
|
end
|
|
108
110
|
|
|
111
|
+
def vllm_running?(config)
|
|
112
|
+
require 'faraday'
|
|
113
|
+
url = config[:base_url] || 'http://localhost:8000/v1'
|
|
114
|
+
base = url.sub(%r{/+\z}, '').sub(%r{/v1\z}, '')
|
|
115
|
+
log.debug "[llm][providers] vllm_running? url=#{base}/health"
|
|
116
|
+
response = Faraday.new(url: base) do |f|
|
|
117
|
+
f.options.timeout = 2
|
|
118
|
+
f.options.open_timeout = 2
|
|
119
|
+
f.adapter Faraday.default_adapter
|
|
120
|
+
end.get('/health')
|
|
121
|
+
response.success?
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
handle_exception(e, level: :debug, operation: 'llm.providers.vllm_running', base_url: url)
|
|
124
|
+
false
|
|
125
|
+
end
|
|
126
|
+
|
|
109
127
|
def apply_provider_config(provider, config)
|
|
110
128
|
case provider
|
|
111
129
|
when :bedrock then configure_bedrock(config)
|
|
@@ -114,6 +132,7 @@ module Legion
|
|
|
114
132
|
when :gemini then configure_gemini(config)
|
|
115
133
|
when :azure then configure_azure(config)
|
|
116
134
|
when :ollama then configure_ollama(config)
|
|
135
|
+
when :vllm then configure_vllm(config)
|
|
117
136
|
else
|
|
118
137
|
log.warn "[llm][providers] unknown provider=#{provider}"
|
|
119
138
|
end
|
|
@@ -214,6 +233,15 @@ module Legion
|
|
|
214
233
|
log.info "[llm][providers] configured ollama base_url=#{config[:base_url].inspect}"
|
|
215
234
|
end
|
|
216
235
|
|
|
236
|
+
def configure_vllm(config)
|
|
237
|
+
base_url = config[:base_url] || 'http://localhost:8000/v1'
|
|
238
|
+
RubyLLM.configure do |c|
|
|
239
|
+
c.vllm_api_base = base_url
|
|
240
|
+
c.vllm_api_key = config[:api_key] if config[:api_key]
|
|
241
|
+
end
|
|
242
|
+
log.info "[llm][providers] configured vllm base_url=#{base_url.inspect}"
|
|
243
|
+
end
|
|
244
|
+
|
|
217
245
|
SAAS_PROVIDERS = %i[bedrock anthropic openai gemini azure].freeze
|
|
218
246
|
|
|
219
247
|
def verify_providers
|
|
@@ -15,8 +15,11 @@ module Legion
|
|
|
15
15
|
result = call_with_schema(messages, schema, model, provider: provider, **)
|
|
16
16
|
log.info "[llm][structured_output] model=#{model} provider=#{provider} valid=true"
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
content = result.respond_to?(:content) ? result.content : result[:content]
|
|
19
|
+
raw_model = result.respond_to?(:model_id) ? result.model_id : result[:model]
|
|
20
|
+
|
|
21
|
+
parsed = Legion::JSON.load(content)
|
|
22
|
+
{ data: parsed, raw: content, model: raw_model, valid: true }
|
|
20
23
|
rescue ::JSON::ParserError => e
|
|
21
24
|
log.warn "[llm][structured_output] model=#{model} provider=#{provider} parse_error=#{e.message}"
|
|
22
25
|
handle_parse_error(e, messages, schema, model, provider, result, **)
|
|
@@ -49,7 +52,8 @@ module Legion
|
|
|
49
52
|
if retry_enabled? && attempt < max_retries
|
|
50
53
|
retry_with_instruction(messages, schema, model, provider: provider, attempt: attempt + 1, **opts)
|
|
51
54
|
else
|
|
52
|
-
|
|
55
|
+
raw = result.respond_to?(:content) ? result&.content : result&.dig(:content)
|
|
56
|
+
{ data: nil, error: "JSON parse failed: #{error.message}", raw: raw, valid: false }
|
|
53
57
|
end
|
|
54
58
|
end
|
|
55
59
|
|
|
@@ -60,8 +64,11 @@ module Legion
|
|
|
60
64
|
model: model, provider: provider, intent: nil, tier: nil,
|
|
61
65
|
message: user_content, **opts.except(:attempt))
|
|
62
66
|
|
|
63
|
-
|
|
64
|
-
|
|
67
|
+
retry_content = result.respond_to?(:content) ? result.content : result[:content]
|
|
68
|
+
retry_model = result.respond_to?(:model_id) ? result.model_id : result[:model]
|
|
69
|
+
|
|
70
|
+
parsed = Legion::JSON.load(retry_content)
|
|
71
|
+
{ data: parsed, raw: retry_content, model: retry_model, valid: true, retried: true }
|
|
65
72
|
rescue StandardError => e
|
|
66
73
|
handle_exception(e, level: :warn)
|
|
67
74
|
{ data: nil, error: e.message, valid: false }
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
require 'legion/logging/helper'
|
|
6
|
+
require 'legion/json'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module LLM
|
|
10
|
+
module Discovery
|
|
11
|
+
module Vllm
|
|
12
|
+
extend Legion::Logging::Helper
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def models
|
|
16
|
+
ensure_fresh
|
|
17
|
+
@models || []
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def model_names
|
|
21
|
+
models.map { |m| m[:id] }
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def model_available?(name)
|
|
25
|
+
model_names.any? { |n| n == name }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def max_context(name)
|
|
29
|
+
model = models.find { |m| m[:id] == name }
|
|
30
|
+
model&.dig(:max_model_len)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def healthy?
|
|
34
|
+
response = health_connection.get('/health')
|
|
35
|
+
response.success?
|
|
36
|
+
rescue StandardError => e
|
|
37
|
+
handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.healthy')
|
|
38
|
+
false
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def refresh!
|
|
42
|
+
response = connection.get('/v1/models')
|
|
43
|
+
if response.success?
|
|
44
|
+
parsed = Legion::JSON.load(response.body)
|
|
45
|
+
@models = parsed[:data] || []
|
|
46
|
+
log.debug "[llm][discovery][vllm] model list refreshed count=#{@models.size}"
|
|
47
|
+
else
|
|
48
|
+
log.warn "[llm][discovery][vllm] HTTP failure status=#{response.status}"
|
|
49
|
+
@models ||= []
|
|
50
|
+
end
|
|
51
|
+
rescue StandardError => e
|
|
52
|
+
handle_exception(e, level: :warn, operation: 'llm.discovery.vllm.refresh')
|
|
53
|
+
@models ||= []
|
|
54
|
+
ensure
|
|
55
|
+
@last_refreshed_at = Time.now
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def reset!
|
|
59
|
+
@models = nil
|
|
60
|
+
@last_refreshed_at = nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def stale?
|
|
64
|
+
return true if @last_refreshed_at.nil?
|
|
65
|
+
|
|
66
|
+
ttl = discovery_settings[:refresh_seconds] || 60
|
|
67
|
+
Time.now - @last_refreshed_at > ttl
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def ensure_fresh
|
|
73
|
+
refresh! if stale?
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def connection
|
|
77
|
+
Faraday.new(url: vllm_base_url) do |f|
|
|
78
|
+
f.options.timeout = 3
|
|
79
|
+
f.options.open_timeout = 2
|
|
80
|
+
f.adapter Faraday.default_adapter
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def health_connection
|
|
85
|
+
base = vllm_base_url.sub(%r{/+\z}, '').sub(%r{/v1\z}, '')
|
|
86
|
+
Faraday.new(url: base) do |f|
|
|
87
|
+
f.options.timeout = 2
|
|
88
|
+
f.options.open_timeout = 2
|
|
89
|
+
f.adapter Faraday.default_adapter
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def vllm_base_url
|
|
94
|
+
return 'http://localhost:8000/v1' unless Legion.const_defined?('Settings', false)
|
|
95
|
+
|
|
96
|
+
Legion::Settings[:llm].dig(:providers, :vllm, :base_url) || 'http://localhost:8000/v1'
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.base_url')
|
|
99
|
+
'http://localhost:8000/v1'
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def discovery_settings
|
|
103
|
+
return {} unless Legion.const_defined?('Settings', false)
|
|
104
|
+
|
|
105
|
+
Legion::Settings[:llm][:discovery] || {}
|
|
106
|
+
rescue StandardError => e
|
|
107
|
+
handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.settings')
|
|
108
|
+
{}
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
data/lib/legion/llm/discovery.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
4
|
require_relative 'discovery/ollama'
|
|
5
|
+
require_relative 'discovery/vllm'
|
|
5
6
|
require_relative 'discovery/system'
|
|
6
7
|
|
|
7
8
|
module Legion
|
|
@@ -23,15 +24,21 @@ module Legion
|
|
|
23
24
|
|
|
24
25
|
def run
|
|
25
26
|
log.debug '[llm][discovery] run.enter'
|
|
26
|
-
return unless Legion::LLM.settings.dig(:providers, :ollama, :enabled)
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
if Legion::LLM.settings.dig(:providers, :ollama, :enabled)
|
|
29
|
+
Ollama.refresh!
|
|
30
|
+
System.refresh!
|
|
31
|
+
names = Ollama.model_names
|
|
32
|
+
log.info "[llm][discovery] ollama model_count=#{names.size} models=#{names.join(', ')}"
|
|
33
|
+
log.info "[llm][discovery] system total_mb=#{System.total_memory_mb} available_mb=#{System.available_memory_mb}"
|
|
34
|
+
end
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
if Legion::LLM.settings.dig(:providers, :vllm, :enabled)
|
|
37
|
+
Vllm.refresh!
|
|
38
|
+
names = Vllm.model_names
|
|
39
|
+
contexts = names.map { |n| "#{n}(#{Vllm.max_context(n)})" }
|
|
40
|
+
log.info "[llm][discovery] vllm model_count=#{names.size} models=#{contexts.join(', ')}"
|
|
41
|
+
end
|
|
35
42
|
rescue StandardError => e
|
|
36
43
|
handle_exception(e, level: :warn, operation: 'llm.discovery.run')
|
|
37
44
|
end
|
|
@@ -1030,7 +1030,7 @@ module Legion
|
|
|
1030
1030
|
providers.each do |name, config|
|
|
1031
1031
|
next unless config.is_a?(Hash) && config[:enabled]
|
|
1032
1032
|
next if exclude.include?(name) || exclude.include?(name.to_s)
|
|
1033
|
-
next if name
|
|
1033
|
+
next if %i[ollama vllm].include?(name)
|
|
1034
1034
|
next unless config[:default_model]
|
|
1035
1035
|
|
|
1036
1036
|
return { provider: name, model: config[:default_model] }
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class Vllm < OpenAI
|
|
6
|
+
def api_base
|
|
7
|
+
@config.vllm_api_base
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def headers
|
|
11
|
+
return {} unless @config.vllm_api_key
|
|
12
|
+
|
|
13
|
+
{ 'Authorization' => "Bearer #{@config.vllm_api_key}" }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
def configuration_options
|
|
18
|
+
%i[vllm_api_base vllm_api_key]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def configuration_requirements
|
|
22
|
+
%i[vllm_api_base]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def local?
|
|
26
|
+
true
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def capabilities
|
|
30
|
+
nil
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
RubyLLM::Provider.register :vllm, RubyLLM::Providers::Vllm
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -15,8 +15,8 @@ module Legion
|
|
|
15
15
|
extend Legion::Logging::Helper
|
|
16
16
|
|
|
17
17
|
PROVIDER_TIER = { bedrock: :cloud, anthropic: :frontier, openai: :frontier,
|
|
18
|
-
gemini: :cloud, azure: :cloud, ollama: :local }.freeze
|
|
19
|
-
PROVIDER_ORDER = %i[ollama bedrock azure gemini anthropic openai].freeze
|
|
18
|
+
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :local }.freeze
|
|
19
|
+
PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
|
|
20
20
|
|
|
21
21
|
class << self
|
|
22
22
|
# Resolve an LLM routing intent to a tier/provider/model decision.
|
|
@@ -296,8 +296,11 @@ module Legion
|
|
|
296
296
|
|
|
297
297
|
def default_provider_for_tier(tier)
|
|
298
298
|
case tier.to_sym
|
|
299
|
-
when :local
|
|
299
|
+
when :local
|
|
300
300
|
:ollama
|
|
301
|
+
when :fleet
|
|
302
|
+
vllm_config = Legion::Settings[:llm].dig(:providers, :vllm)
|
|
303
|
+
vllm_config.is_a?(Hash) && vllm_config[:enabled] ? :vllm : :ollama
|
|
301
304
|
when :openai_compat
|
|
302
305
|
:openai
|
|
303
306
|
when :cloud
|
|
@@ -316,7 +319,13 @@ module Legion
|
|
|
316
319
|
ollama = Legion::Settings[:llm].dig(:providers, :ollama) || {}
|
|
317
320
|
ollama[:default_model] || 'llama3'
|
|
318
321
|
when :fleet
|
|
319
|
-
|
|
322
|
+
vllm_config = Legion::Settings[:llm].dig(:providers, :vllm) || {}
|
|
323
|
+
if vllm_config[:enabled]
|
|
324
|
+
vllm_config[:default_model] || 'qwen3.6-27b'
|
|
325
|
+
else
|
|
326
|
+
ollama = Legion::Settings[:llm].dig(:providers, :ollama) || {}
|
|
327
|
+
ollama[:default_model] || 'llama3'
|
|
328
|
+
end
|
|
320
329
|
when :openai_compat
|
|
321
330
|
'gpt-4o'
|
|
322
331
|
when :cloud
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -375,6 +375,12 @@ module Legion
|
|
|
375
375
|
enabled: false,
|
|
376
376
|
default_model: 'qwen3.5:latest',
|
|
377
377
|
base_url: 'http://localhost:11434'
|
|
378
|
+
},
|
|
379
|
+
vllm: {
|
|
380
|
+
enabled: false,
|
|
381
|
+
default_model: 'qwen3.6-27b',
|
|
382
|
+
base_url: 'http://localhost:8000/v1',
|
|
383
|
+
api_key: nil
|
|
378
384
|
}
|
|
379
385
|
}
|
|
380
386
|
end
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.8.
|
|
4
|
+
version: 0.8.26
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -262,6 +262,7 @@ files:
|
|
|
262
262
|
- lib/legion/llm/discovery.rb
|
|
263
263
|
- lib/legion/llm/discovery/ollama.rb
|
|
264
264
|
- lib/legion/llm/discovery/system.rb
|
|
265
|
+
- lib/legion/llm/discovery/vllm.rb
|
|
265
266
|
- lib/legion/llm/errors.rb
|
|
266
267
|
- lib/legion/llm/fleet.rb
|
|
267
268
|
- lib/legion/llm/fleet/dispatcher.rb
|
|
@@ -323,6 +324,7 @@ files:
|
|
|
323
324
|
- lib/legion/llm/metering/tracker.rb
|
|
324
325
|
- lib/legion/llm/metering/usage.rb
|
|
325
326
|
- lib/legion/llm/patches/ruby_llm_parallel_tools.rb
|
|
327
|
+
- lib/legion/llm/patches/ruby_llm_vllm.rb
|
|
326
328
|
- lib/legion/llm/quality.rb
|
|
327
329
|
- lib/legion/llm/quality/checker.rb
|
|
328
330
|
- lib/legion/llm/quality/confidence/score.rb
|