lex-llm-vllm 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/lib/legion/extensions/llm/vllm/actors/fleet_worker.rb +11 -2
- data/lib/legion/extensions/llm/vllm/provider.rb +29 -8
- data/lib/legion/extensions/llm/vllm/runners/fleet_worker.rb +19 -0
- data/lib/legion/extensions/llm/vllm/version.rb +1 -1
- data/lib/legion/extensions/llm/vllm.rb +4 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
|
|
4
|
+
data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
|
|
7
|
+
data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.9 - 2026-05-12
|
|
4
|
+
|
|
5
|
+
- Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
|
|
6
|
+
- Add debug logging around vLLM instance discovery, fleet worker dispatch, offering construction, payload rendering, and management endpoints.
|
|
7
|
+
|
|
8
|
+
## 0.2.8 - 2026-05-07
|
|
9
|
+
|
|
10
|
+
- Read vLLM thinking defaults from the active provider instance config so per-instance `enable_thinking` settings affect chat payloads.
|
|
11
|
+
|
|
3
12
|
## 0.2.7 - 2026-05-07
|
|
4
13
|
|
|
5
14
|
- Fix merge order in `discover_instances` so a user-supplied `tier:` in instance config is no longer clobbered by the `:direct` default.
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
begin
|
|
4
4
|
require 'legion/extensions/actors/subscription'
|
|
5
5
|
rescue LoadError => e
|
|
6
|
-
|
|
6
|
+
require 'legion/extensions/llm/vllm'
|
|
7
|
+
unless defined?(Legion::Extensions::Actors::Subscription)
|
|
8
|
+
Legion::Extensions::Llm::Vllm.handle_exception(e, level: :warn, handled: false,
|
|
9
|
+
operation: 'vllm.fleet_worker.load_actor_runtime')
|
|
10
|
+
end
|
|
7
11
|
end
|
|
8
12
|
|
|
9
13
|
unless defined?(Legion::Extensions::Actors::Subscription)
|
|
@@ -12,6 +16,7 @@ end
|
|
|
12
16
|
|
|
13
17
|
require 'legion/extensions/llm/vllm'
|
|
14
18
|
require 'legion/extensions/llm/fleet/provider_responder'
|
|
19
|
+
require 'legion/logging'
|
|
15
20
|
|
|
16
21
|
module Legion
|
|
17
22
|
module Extensions
|
|
@@ -20,6 +25,8 @@ module Legion
|
|
|
20
25
|
module Actor
|
|
21
26
|
# Subscription actor for vLLM fleet request consumption.
|
|
22
27
|
class FleetWorker < Legion::Extensions::Actors::Subscription
|
|
28
|
+
include Legion::Logging::Helper
|
|
29
|
+
|
|
23
30
|
def runner_class
|
|
24
31
|
'Legion::Extensions::Llm::Vllm::Runners::FleetWorker'
|
|
25
32
|
end
|
|
@@ -33,7 +40,9 @@ module Legion
|
|
|
33
40
|
end
|
|
34
41
|
|
|
35
42
|
def enabled?
|
|
36
|
-
Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Vllm.discover_instances)
|
|
43
|
+
Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Vllm.discover_instances).tap do |enabled|
|
|
44
|
+
log.debug { "vLLM fleet worker enabled=#{enabled}" }
|
|
45
|
+
end
|
|
37
46
|
end
|
|
38
47
|
end
|
|
39
48
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
|
+
require 'legion/logging'
|
|
4
5
|
require 'uri'
|
|
5
6
|
|
|
6
7
|
module Legion
|
|
@@ -94,7 +95,9 @@ module Legion
|
|
|
94
95
|
else
|
|
95
96
|
Array(@cached_models)
|
|
96
97
|
end
|
|
97
|
-
models.map { |model_info| offering_from_model(model_info) }
|
|
98
|
+
models.map { |model_info| offering_from_model(model_info) }.tap do |offerings|
|
|
99
|
+
log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
|
|
100
|
+
end
|
|
98
101
|
rescue StandardError => e
|
|
99
102
|
handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
|
|
100
103
|
[]
|
|
@@ -106,18 +109,25 @@ module Legion
|
|
|
106
109
|
end
|
|
107
110
|
|
|
108
111
|
def reset_prefix_cache(reset_running_requests: nil, reset_external: nil)
|
|
112
|
+
log.debug do
|
|
113
|
+
"resetting vLLM prefix cache reset_running_requests=#{reset_running_requests.inspect} " \
|
|
114
|
+
"reset_external=#{reset_external.inspect}"
|
|
115
|
+
end
|
|
109
116
|
connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body
|
|
110
117
|
end
|
|
111
118
|
|
|
112
119
|
def reset_mm_cache
|
|
120
|
+
log.debug { 'resetting vLLM multimodal cache' }
|
|
113
121
|
connection.post(reset_mm_cache_url, {}).body
|
|
114
122
|
end
|
|
115
123
|
|
|
116
124
|
def sleep(level: 1)
|
|
125
|
+
log.debug { "putting vLLM worker to sleep level=#{level.inspect}" }
|
|
117
126
|
connection.post(with_query(sleep_url, level:), {}).body
|
|
118
127
|
end
|
|
119
128
|
|
|
120
129
|
def wake_up(tags: nil)
|
|
130
|
+
log.debug { "waking vLLM worker tags=#{Array(tags).inspect}" }
|
|
121
131
|
query = Array(tags).map { |tag| ['tags', tag] }
|
|
122
132
|
connection.post(with_query(wake_up_url, query), {}).body
|
|
123
133
|
end
|
|
@@ -150,6 +160,10 @@ module Legion
|
|
|
150
160
|
payload = super
|
|
151
161
|
payload.delete(:reasoning_effort)
|
|
152
162
|
payload[:chat_template_kwargs] = { enable_thinking: true } if thinking_enabled?(thinking)
|
|
163
|
+
log.debug do
|
|
164
|
+
"rendered vLLM payload model=#{model.respond_to?(:id) ? model.id : model} stream=#{stream} " \
|
|
165
|
+
"tools=#{tools.respond_to?(:size) ? tools.size : 0} thinking=#{payload.key?(:chat_template_kwargs)}"
|
|
166
|
+
end
|
|
153
167
|
payload
|
|
154
168
|
end
|
|
155
169
|
|
|
@@ -162,18 +176,25 @@ module Legion
|
|
|
162
176
|
end
|
|
163
177
|
|
|
164
178
|
def vllm_thinking_setting
|
|
179
|
+
instance_thinking_enabled? || global_thinking_enabled?
|
|
180
|
+
rescue StandardError => e
|
|
181
|
+
handle_exception(e, level: :debug, handled: true, operation: 'vllm.thinking_setting')
|
|
182
|
+
false
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def instance_thinking_enabled?
|
|
186
|
+
return config.enable_thinking if config.respond_to?(:enable_thinking)
|
|
187
|
+
|
|
188
|
+
config.respond_to?(:[]) && config[:enable_thinking] == true
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def global_thinking_enabled?
|
|
165
192
|
return false unless defined?(Legion::Settings)
|
|
166
193
|
|
|
167
194
|
vllm = Legion::Settings.dig(:llm, :providers, :vllm)
|
|
168
195
|
return false unless vllm.is_a?(Hash)
|
|
169
196
|
|
|
170
|
-
vllm[:enable_thinking] == true ||
|
|
171
|
-
vllm['enable_thinking'] == true ||
|
|
172
|
-
vllm.dig(:instances, :default, :enable_thinking) == true ||
|
|
173
|
-
vllm.dig('instances', 'default', 'enable_thinking') == true
|
|
174
|
-
rescue StandardError => e
|
|
175
|
-
handle_exception(e, level: :debug, handled: true, operation: 'vllm.thinking_setting')
|
|
176
|
-
false
|
|
197
|
+
vllm[:enable_thinking] == true || vllm['enable_thinking'] == true
|
|
177
198
|
end
|
|
178
199
|
|
|
179
200
|
def parse_list_models_response(response, provider, capabilities)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm/fleet/provider_responder'
|
|
4
4
|
require 'legion/extensions/llm/vllm'
|
|
5
|
+
require 'legion/logging'
|
|
5
6
|
|
|
6
7
|
module Legion
|
|
7
8
|
module Extensions
|
|
@@ -10,9 +11,17 @@ module Legion
|
|
|
10
11
|
module Runners
|
|
11
12
|
# Runner entrypoint for vLLM fleet request execution.
|
|
12
13
|
module FleetWorker
|
|
14
|
+
include Legion::Logging::Helper
|
|
15
|
+
extend Legion::Logging::Helper
|
|
16
|
+
|
|
13
17
|
module_function
|
|
14
18
|
|
|
15
19
|
def handle_fleet_request(payload, delivery: nil, properties: nil)
|
|
20
|
+
log.debug do
|
|
21
|
+
"handling vLLM fleet request request_id=#{payload_field(payload, :request_id).inspect} " \
|
|
22
|
+
"provider_instance=#{payload_field(payload, :provider_instance).inspect} " \
|
|
23
|
+
"operation=#{payload_field(payload, :operation).inspect}"
|
|
24
|
+
end
|
|
16
25
|
Legion::Extensions::Llm::Fleet::ProviderResponder.call(
|
|
17
26
|
payload: payload,
|
|
18
27
|
provider_family: Vllm::PROVIDER_FAMILY,
|
|
@@ -22,6 +31,16 @@ module Legion
|
|
|
22
31
|
properties: properties
|
|
23
32
|
)
|
|
24
33
|
end
|
|
34
|
+
|
|
35
|
+
def payload_field(payload, key)
|
|
36
|
+
return unless payload.respond_to?(:[])
|
|
37
|
+
|
|
38
|
+
payload[key] || payload[key.to_s]
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
handle_exception(e, level: :debug, handled: true, operation: 'vllm.fleet_worker.payload_field',
|
|
41
|
+
field: key)
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
25
44
|
end
|
|
26
45
|
end
|
|
27
46
|
end
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
4
|
require 'legion/extensions/llm/vllm/provider'
|
|
5
5
|
require 'legion/extensions/llm/vllm/version'
|
|
6
|
+
require 'legion/logging'
|
|
6
7
|
|
|
7
8
|
module Legion
|
|
8
9
|
module Extensions
|
|
@@ -65,6 +66,7 @@ module Legion
|
|
|
65
66
|
end
|
|
66
67
|
end
|
|
67
68
|
|
|
69
|
+
log.debug { "discovered #{instances.size} vLLM instance(s): #{instances.keys.join(', ')}" }
|
|
68
70
|
instances
|
|
69
71
|
end
|
|
70
72
|
|
|
@@ -92,7 +94,8 @@ module Legion
|
|
|
92
94
|
require 'uri'
|
|
93
95
|
host = URI.parse(url.to_s).host.to_s.downcase
|
|
94
96
|
%w[localhost 127.0.0.1 ::1].include?(host) ? :local : :direct
|
|
95
|
-
rescue URI::InvalidURIError
|
|
97
|
+
rescue URI::InvalidURIError => e
|
|
98
|
+
handle_exception(e, level: :debug, handled: true, operation: 'vllm.infer_tier_from_endpoint')
|
|
96
99
|
:direct
|
|
97
100
|
end
|
|
98
101
|
end
|