legion-llm 0.8.25 → 0.8.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/lib/legion/llm/call/providers.rb +28 -0
- data/lib/legion/llm/discovery/vllm.rb +114 -0
- data/lib/legion/llm/discovery.rb +14 -7
- data/lib/legion/llm/inference/audit_publisher.rb +18 -1
- data/lib/legion/llm/inference/executor.rb +1 -1
- data/lib/legion/llm/patches/ruby_llm_vllm.rb +78 -0
- data/lib/legion/llm/router.rb +13 -4
- data/lib/legion/llm/settings.rb +7 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2cccf9351fd9f4db59b1548197bf7b78c5947e85183535e86ede5c3359d71b89
|
|
4
|
+
data.tar.gz: 14e3a1b5b6648bea618941f84473e63aeccee7edc9520366d09dde8d27b00a7b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 31ec279fcb498e5cc3308bcefcb6adc94915c36867967fd08aa3f0422d4c583f83bc0ace1db9a47ecd45371a0ce0c82542fea7015c1474f5b7386409d789e5e0
|
|
7
|
+
data.tar.gz: '083bd8e581399a574b313a31784eacca4424fadb131f82cd17a5a7e840420da14ec3e5f589efa88b7be6be8a76916439c88bbf936a6d13c9a9435bd8fd04245c'
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.27] - 2026-04-24
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- vLLM provider sent `developer` message role (OpenAI convention) which Qwen's chat template rejects. Added `Vllm::Chat` module that overrides `format_messages` and `format_role` to always send `system`.
|
|
7
|
+
- vLLM provider called `OpenAI::Chat.render_payload` as a module function without provider instance context, causing `NoMethodError` on `openai_use_system_role`. Rewrote to use `super` with instance method overrides.
|
|
8
|
+
- Audit events included the full conversation history in every message — quadratic payload growth. Now caps at the last 20 messages (configurable via `compliance.audit_max_messages`). Full conversation reconstructable via `conversation_id`.
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- vLLM `chat_template_kwargs` with `enable_thinking` sent on every request so vLLM separates reasoning into the `reasoning` response field instead of inline `<think>` tags.
|
|
12
|
+
- `providers.vllm.enable_thinking` setting (default: `true`). Controls whether thinking is enabled for vLLM requests. Per-request `thinking` param overrides.
|
|
13
|
+
|
|
14
|
+
## [0.8.26] - 2026-04-24
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
- First-class vLLM provider support. vLLM exposes an OpenAI-compatible API and is registered as a new RubyLLM provider (`:vllm`). Configured via `providers.vllm.base_url` in settings. Mapped to `:fleet` tier in the router.
|
|
18
|
+
- vLLM discovery via `/v1/models` endpoint. Caches model list with `max_model_len` (context window size) using the same TTL as Ollama discovery. Health checks via `/health` endpoint.
|
|
19
|
+
- Context overflow escalation: when vLLM rejects a request due to context length limits (32k on V100 hardware), the executor automatically falls back to cloud/frontier providers.
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- `find_fallback_provider` in `Executor` now skips all local providers (`:ollama` and `:vllm`) when searching for fallbacks, not just `:ollama`. Ensures context overflow escalates to cloud/frontier.
|
|
23
|
+
- `Router::PROVIDER_ORDER` updated: `:vllm` inserted after `:ollama` and before `:bedrock`.
|
|
24
|
+
- `default_provider_for_tier(:fleet)` returns `:vllm` when vLLM is enabled, falls back to `:ollama`.
|
|
25
|
+
|
|
3
26
|
## [0.8.25] - 2026-04-24
|
|
4
27
|
|
|
5
28
|
### Fixed
|
|
@@ -76,6 +76,8 @@ module Legion
|
|
|
76
76
|
config[:api_base] && (usable_setting?(config[:api_key]) || usable_setting?(config[:auth_token]))
|
|
77
77
|
when :ollama
|
|
78
78
|
ollama_running?(config)
|
|
79
|
+
when :vllm
|
|
80
|
+
vllm_running?(config)
|
|
79
81
|
else
|
|
80
82
|
usable_setting?(config[:api_key])
|
|
81
83
|
end
|
|
@@ -106,6 +108,22 @@ module Legion
|
|
|
106
108
|
false
|
|
107
109
|
end
|
|
108
110
|
|
|
111
|
+
def vllm_running?(config)
|
|
112
|
+
require 'faraday'
|
|
113
|
+
url = config[:base_url] || 'http://localhost:8000/v1'
|
|
114
|
+
base = url.sub(%r{/+\z}, '').sub(%r{/v1\z}, '')
|
|
115
|
+
log.debug "[llm][providers] vllm_running? url=#{base}/health"
|
|
116
|
+
response = Faraday.new(url: base) do |f|
|
|
117
|
+
f.options.timeout = 2
|
|
118
|
+
f.options.open_timeout = 2
|
|
119
|
+
f.adapter Faraday.default_adapter
|
|
120
|
+
end.get('/health')
|
|
121
|
+
response.success?
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
handle_exception(e, level: :debug, operation: 'llm.providers.vllm_running', base_url: url)
|
|
124
|
+
false
|
|
125
|
+
end
|
|
126
|
+
|
|
109
127
|
def apply_provider_config(provider, config)
|
|
110
128
|
case provider
|
|
111
129
|
when :bedrock then configure_bedrock(config)
|
|
@@ -114,6 +132,7 @@ module Legion
|
|
|
114
132
|
when :gemini then configure_gemini(config)
|
|
115
133
|
when :azure then configure_azure(config)
|
|
116
134
|
when :ollama then configure_ollama(config)
|
|
135
|
+
when :vllm then configure_vllm(config)
|
|
117
136
|
else
|
|
118
137
|
log.warn "[llm][providers] unknown provider=#{provider}"
|
|
119
138
|
end
|
|
@@ -214,6 +233,15 @@ module Legion
|
|
|
214
233
|
log.info "[llm][providers] configured ollama base_url=#{config[:base_url].inspect}"
|
|
215
234
|
end
|
|
216
235
|
|
|
236
|
+
def configure_vllm(config)
|
|
237
|
+
base_url = config[:base_url] || 'http://localhost:8000/v1'
|
|
238
|
+
RubyLLM.configure do |c|
|
|
239
|
+
c.vllm_api_base = base_url
|
|
240
|
+
c.vllm_api_key = config[:api_key] if config[:api_key]
|
|
241
|
+
end
|
|
242
|
+
log.info "[llm][providers] configured vllm base_url=#{base_url.inspect}"
|
|
243
|
+
end
|
|
244
|
+
|
|
217
245
|
SAAS_PROVIDERS = %i[bedrock anthropic openai gemini azure].freeze
|
|
218
246
|
|
|
219
247
|
def verify_providers
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
require 'legion/logging/helper'
|
|
6
|
+
require 'legion/json'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module LLM
|
|
10
|
+
module Discovery
|
|
11
|
+
module Vllm
|
|
12
|
+
extend Legion::Logging::Helper
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def models
|
|
16
|
+
ensure_fresh
|
|
17
|
+
@models || []
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def model_names
|
|
21
|
+
models.map { |m| m[:id] }
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def model_available?(name)
|
|
25
|
+
model_names.any? { |n| n == name }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def max_context(name)
|
|
29
|
+
model = models.find { |m| m[:id] == name }
|
|
30
|
+
model&.dig(:max_model_len)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def healthy?
|
|
34
|
+
response = health_connection.get('/health')
|
|
35
|
+
response.success?
|
|
36
|
+
rescue StandardError => e
|
|
37
|
+
handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.healthy')
|
|
38
|
+
false
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def refresh!
|
|
42
|
+
response = connection.get('/v1/models')
|
|
43
|
+
if response.success?
|
|
44
|
+
parsed = Legion::JSON.load(response.body)
|
|
45
|
+
@models = parsed[:data] || []
|
|
46
|
+
log.debug "[llm][discovery][vllm] model list refreshed count=#{@models.size}"
|
|
47
|
+
else
|
|
48
|
+
log.warn "[llm][discovery][vllm] HTTP failure status=#{response.status}"
|
|
49
|
+
@models ||= []
|
|
50
|
+
end
|
|
51
|
+
rescue StandardError => e
|
|
52
|
+
handle_exception(e, level: :warn, operation: 'llm.discovery.vllm.refresh')
|
|
53
|
+
@models ||= []
|
|
54
|
+
ensure
|
|
55
|
+
@last_refreshed_at = Time.now
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def reset!
|
|
59
|
+
@models = nil
|
|
60
|
+
@last_refreshed_at = nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def stale?
|
|
64
|
+
return true if @last_refreshed_at.nil?
|
|
65
|
+
|
|
66
|
+
ttl = discovery_settings[:refresh_seconds] || 60
|
|
67
|
+
Time.now - @last_refreshed_at > ttl
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def ensure_fresh
|
|
73
|
+
refresh! if stale?
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def connection
|
|
77
|
+
Faraday.new(url: vllm_base_url) do |f|
|
|
78
|
+
f.options.timeout = 3
|
|
79
|
+
f.options.open_timeout = 2
|
|
80
|
+
f.adapter Faraday.default_adapter
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def health_connection
|
|
85
|
+
base = vllm_base_url.sub(%r{/+\z}, '').sub(%r{/v1\z}, '')
|
|
86
|
+
Faraday.new(url: base) do |f|
|
|
87
|
+
f.options.timeout = 2
|
|
88
|
+
f.options.open_timeout = 2
|
|
89
|
+
f.adapter Faraday.default_adapter
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def vllm_base_url
|
|
94
|
+
return 'http://localhost:8000/v1' unless Legion.const_defined?('Settings', false)
|
|
95
|
+
|
|
96
|
+
Legion::Settings[:llm].dig(:providers, :vllm, :base_url) || 'http://localhost:8000/v1'
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.base_url')
|
|
99
|
+
'http://localhost:8000/v1'
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def discovery_settings
|
|
103
|
+
return {} unless Legion.const_defined?('Settings', false)
|
|
104
|
+
|
|
105
|
+
Legion::Settings[:llm][:discovery] || {}
|
|
106
|
+
rescue StandardError => e
|
|
107
|
+
handle_exception(e, level: :debug, operation: 'llm.discovery.vllm.settings')
|
|
108
|
+
{}
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
data/lib/legion/llm/discovery.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
4
|
require_relative 'discovery/ollama'
|
|
5
|
+
require_relative 'discovery/vllm'
|
|
5
6
|
require_relative 'discovery/system'
|
|
6
7
|
|
|
7
8
|
module Legion
|
|
@@ -23,15 +24,21 @@ module Legion
|
|
|
23
24
|
|
|
24
25
|
def run
|
|
25
26
|
log.debug '[llm][discovery] run.enter'
|
|
26
|
-
return unless Legion::LLM.settings.dig(:providers, :ollama, :enabled)
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
if Legion::LLM.settings.dig(:providers, :ollama, :enabled)
|
|
29
|
+
Ollama.refresh!
|
|
30
|
+
System.refresh!
|
|
31
|
+
names = Ollama.model_names
|
|
32
|
+
log.info "[llm][discovery] ollama model_count=#{names.size} models=#{names.join(', ')}"
|
|
33
|
+
log.info "[llm][discovery] system total_mb=#{System.total_memory_mb} available_mb=#{System.available_memory_mb}"
|
|
34
|
+
end
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
if Legion::LLM.settings.dig(:providers, :vllm, :enabled)
|
|
37
|
+
Vllm.refresh!
|
|
38
|
+
names = Vllm.model_names
|
|
39
|
+
contexts = names.map { |n| "#{n}(#{Vllm.max_context(n)})" }
|
|
40
|
+
log.info "[llm][discovery] vllm model_count=#{names.size} models=#{contexts.join(', ')}"
|
|
41
|
+
end
|
|
35
42
|
rescue StandardError => e
|
|
36
43
|
handle_exception(e, level: :warn, operation: 'llm.discovery.run')
|
|
37
44
|
end
|
|
@@ -40,7 +40,7 @@ module Legion
|
|
|
40
40
|
timeline: compact_timeline(response.timeline),
|
|
41
41
|
classification: response.classification,
|
|
42
42
|
tracing: response.tracing,
|
|
43
|
-
messages: request.messages,
|
|
43
|
+
messages: current_turn_messages(request.messages),
|
|
44
44
|
response_content: msg_content,
|
|
45
45
|
tools_used: tools_data,
|
|
46
46
|
timestamp: Time.now,
|
|
@@ -109,6 +109,23 @@ module Legion
|
|
|
109
109
|
end
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
+
def current_turn_messages(messages)
|
|
113
|
+
return messages unless messages.is_a?(Array)
|
|
114
|
+
|
|
115
|
+
max = audit_max_messages
|
|
116
|
+
return messages if messages.size <= max
|
|
117
|
+
|
|
118
|
+
messages.last(max)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def audit_max_messages
|
|
122
|
+
return 20 unless defined?(Legion::Settings)
|
|
123
|
+
|
|
124
|
+
Legion::Settings[:llm].dig(:compliance, :audit_max_messages) || 20
|
|
125
|
+
rescue StandardError
|
|
126
|
+
20
|
|
127
|
+
end
|
|
128
|
+
|
|
112
129
|
def build_message_context(response:, **)
|
|
113
130
|
{
|
|
114
131
|
request_id: response.request_id,
|
|
@@ -1030,7 +1030,7 @@ module Legion
|
|
|
1030
1030
|
providers.each do |name, config|
|
|
1031
1031
|
next unless config.is_a?(Hash) && config[:enabled]
|
|
1032
1032
|
next if exclude.include?(name) || exclude.include?(name.to_s)
|
|
1033
|
-
next if name
|
|
1033
|
+
next if %i[ollama vllm].include?(name)
|
|
1034
1034
|
next unless config[:default_model]
|
|
1035
1035
|
|
|
1036
1036
|
return { provider: name, model: config[:default_model] }
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class Vllm < OpenAI
|
|
6
|
+
module Chat
|
|
7
|
+
def format_role(role)
|
|
8
|
+
role.to_s
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def format_messages(messages)
|
|
12
|
+
messages.map do |msg|
|
|
13
|
+
{
|
|
14
|
+
role: format_role(msg.role),
|
|
15
|
+
content: OpenAI::Media.format_content(msg.content),
|
|
16
|
+
tool_calls: format_tool_calls(msg.tool_calls),
|
|
17
|
+
tool_call_id: msg.tool_call_id
|
|
18
|
+
}.compact.merge(OpenAI::Chat.format_thinking(msg))
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil,
|
|
23
|
+
thinking: nil, tool_prefs: nil)
|
|
24
|
+
payload = super
|
|
25
|
+
enable = if thinking.nil?
|
|
26
|
+
vllm_thinking_default
|
|
27
|
+
else
|
|
28
|
+
thinking ? true : false
|
|
29
|
+
end
|
|
30
|
+
payload[:chat_template_kwargs] = { enable_thinking: enable }
|
|
31
|
+
payload
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def vllm_thinking_default
|
|
37
|
+
return true unless defined?(Legion::Settings)
|
|
38
|
+
|
|
39
|
+
Legion::Settings[:llm].dig(:providers, :vllm, :enable_thinking) != false
|
|
40
|
+
rescue StandardError
|
|
41
|
+
true
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
include Vllm::Chat
|
|
46
|
+
|
|
47
|
+
def api_base
|
|
48
|
+
@config.vllm_api_base
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def headers
|
|
52
|
+
return {} unless @config.vllm_api_key
|
|
53
|
+
|
|
54
|
+
{ 'Authorization' => "Bearer #{@config.vllm_api_key}" }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
class << self
|
|
58
|
+
def configuration_options
|
|
59
|
+
%i[vllm_api_base vllm_api_key]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def configuration_requirements
|
|
63
|
+
%i[vllm_api_base]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def local?
|
|
67
|
+
true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def capabilities
|
|
71
|
+
nil
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
RubyLLM::Provider.register :vllm, RubyLLM::Providers::Vllm
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -15,8 +15,8 @@ module Legion
|
|
|
15
15
|
extend Legion::Logging::Helper
|
|
16
16
|
|
|
17
17
|
PROVIDER_TIER = { bedrock: :cloud, anthropic: :frontier, openai: :frontier,
|
|
18
|
-
gemini: :cloud, azure: :cloud, ollama: :local }.freeze
|
|
19
|
-
PROVIDER_ORDER = %i[ollama bedrock azure gemini anthropic openai].freeze
|
|
18
|
+
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :local }.freeze
|
|
19
|
+
PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
|
|
20
20
|
|
|
21
21
|
class << self
|
|
22
22
|
# Resolve an LLM routing intent to a tier/provider/model decision.
|
|
@@ -296,8 +296,11 @@ module Legion
|
|
|
296
296
|
|
|
297
297
|
def default_provider_for_tier(tier)
|
|
298
298
|
case tier.to_sym
|
|
299
|
-
when :local
|
|
299
|
+
when :local
|
|
300
300
|
:ollama
|
|
301
|
+
when :fleet
|
|
302
|
+
vllm_config = Legion::Settings[:llm].dig(:providers, :vllm)
|
|
303
|
+
vllm_config.is_a?(Hash) && vllm_config[:enabled] ? :vllm : :ollama
|
|
301
304
|
when :openai_compat
|
|
302
305
|
:openai
|
|
303
306
|
when :cloud
|
|
@@ -316,7 +319,13 @@ module Legion
|
|
|
316
319
|
ollama = Legion::Settings[:llm].dig(:providers, :ollama) || {}
|
|
317
320
|
ollama[:default_model] || 'llama3'
|
|
318
321
|
when :fleet
|
|
319
|
-
|
|
322
|
+
vllm_config = Legion::Settings[:llm].dig(:providers, :vllm) || {}
|
|
323
|
+
if vllm_config[:enabled]
|
|
324
|
+
vllm_config[:default_model] || 'qwen3.6-27b'
|
|
325
|
+
else
|
|
326
|
+
ollama = Legion::Settings[:llm].dig(:providers, :ollama) || {}
|
|
327
|
+
ollama[:default_model] || 'llama3'
|
|
328
|
+
end
|
|
320
329
|
when :openai_compat
|
|
321
330
|
'gpt-4o'
|
|
322
331
|
when :cloud
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -375,6 +375,13 @@ module Legion
|
|
|
375
375
|
enabled: false,
|
|
376
376
|
default_model: 'qwen3.5:latest',
|
|
377
377
|
base_url: 'http://localhost:11434'
|
|
378
|
+
},
|
|
379
|
+
vllm: {
|
|
380
|
+
enabled: false,
|
|
381
|
+
default_model: 'qwen3.6-27b',
|
|
382
|
+
base_url: 'http://localhost:8000/v1',
|
|
383
|
+
api_key: nil,
|
|
384
|
+
enable_thinking: true
|
|
378
385
|
}
|
|
379
386
|
}
|
|
380
387
|
end
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.8.
|
|
4
|
+
version: 0.8.27
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -262,6 +262,7 @@ files:
|
|
|
262
262
|
- lib/legion/llm/discovery.rb
|
|
263
263
|
- lib/legion/llm/discovery/ollama.rb
|
|
264
264
|
- lib/legion/llm/discovery/system.rb
|
|
265
|
+
- lib/legion/llm/discovery/vllm.rb
|
|
265
266
|
- lib/legion/llm/errors.rb
|
|
266
267
|
- lib/legion/llm/fleet.rb
|
|
267
268
|
- lib/legion/llm/fleet/dispatcher.rb
|
|
@@ -323,6 +324,7 @@ files:
|
|
|
323
324
|
- lib/legion/llm/metering/tracker.rb
|
|
324
325
|
- lib/legion/llm/metering/usage.rb
|
|
325
326
|
- lib/legion/llm/patches/ruby_llm_parallel_tools.rb
|
|
327
|
+
- lib/legion/llm/patches/ruby_llm_vllm.rb
|
|
326
328
|
- lib/legion/llm/quality.rb
|
|
327
329
|
- lib/legion/llm/quality/checker.rb
|
|
328
330
|
- lib/legion/llm/quality/confidence/score.rb
|