lex-llm-vllm 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/legion/extensions/llm/vllm/actors/fleet_worker.rb +11 -2
- data/lib/legion/extensions/llm/vllm/provider.rb +15 -1
- data/lib/legion/extensions/llm/vllm/runners/fleet_worker.rb +19 -0
- data/lib/legion/extensions/llm/vllm/version.rb +1 -1
- data/lib/legion/extensions/llm/vllm.rb +4 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
|
|
4
|
+
data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
|
|
7
|
+
data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.9 - 2026-05-12
|
|
4
|
+
|
|
5
|
+
- Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
|
|
6
|
+
- Add debug logging around vLLM instance discovery, fleet worker dispatch, offering construction, payload rendering, and management endpoints.
|
|
7
|
+
|
|
3
8
|
## 0.2.8 - 2026-05-07
|
|
4
9
|
|
|
5
10
|
- Read vLLM thinking defaults from the active provider instance config so per-instance `enable_thinking` settings affect chat payloads.
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
begin
|
|
4
4
|
require 'legion/extensions/actors/subscription'
|
|
5
5
|
rescue LoadError => e
|
|
6
|
-
|
|
6
|
+
require 'legion/extensions/llm/vllm'
|
|
7
|
+
unless defined?(Legion::Extensions::Actors::Subscription)
|
|
8
|
+
Legion::Extensions::Llm::Vllm.handle_exception(e, level: :warn, handled: false,
|
|
9
|
+
operation: 'vllm.fleet_worker.load_actor_runtime')
|
|
10
|
+
end
|
|
7
11
|
end
|
|
8
12
|
|
|
9
13
|
unless defined?(Legion::Extensions::Actors::Subscription)
|
|
@@ -12,6 +16,7 @@ end
|
|
|
12
16
|
|
|
13
17
|
require 'legion/extensions/llm/vllm'
|
|
14
18
|
require 'legion/extensions/llm/fleet/provider_responder'
|
|
19
|
+
require 'legion/logging'
|
|
15
20
|
|
|
16
21
|
module Legion
|
|
17
22
|
module Extensions
|
|
@@ -20,6 +25,8 @@ module Legion
|
|
|
20
25
|
module Actor
|
|
21
26
|
# Subscription actor for vLLM fleet request consumption.
|
|
22
27
|
class FleetWorker < Legion::Extensions::Actors::Subscription
|
|
28
|
+
include Legion::Logging::Helper
|
|
29
|
+
|
|
23
30
|
def runner_class
|
|
24
31
|
'Legion::Extensions::Llm::Vllm::Runners::FleetWorker'
|
|
25
32
|
end
|
|
@@ -33,7 +40,9 @@ module Legion
|
|
|
33
40
|
end
|
|
34
41
|
|
|
35
42
|
def enabled?
|
|
36
|
-
Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Vllm.discover_instances)
|
|
43
|
+
Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Vllm.discover_instances).tap do |enabled|
|
|
44
|
+
log.debug { "vLLM fleet worker enabled=#{enabled}" }
|
|
45
|
+
end
|
|
37
46
|
end
|
|
38
47
|
end
|
|
39
48
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
|
+
require 'legion/logging'
|
|
4
5
|
require 'uri'
|
|
5
6
|
|
|
6
7
|
module Legion
|
|
@@ -94,7 +95,9 @@ module Legion
|
|
|
94
95
|
else
|
|
95
96
|
Array(@cached_models)
|
|
96
97
|
end
|
|
97
|
-
models.map { |model_info| offering_from_model(model_info) }
|
|
98
|
+
models.map { |model_info| offering_from_model(model_info) }.tap do |offerings|
|
|
99
|
+
log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
|
|
100
|
+
end
|
|
98
101
|
rescue StandardError => e
|
|
99
102
|
handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
|
|
100
103
|
[]
|
|
@@ -106,18 +109,25 @@ module Legion
|
|
|
106
109
|
end
|
|
107
110
|
|
|
108
111
|
def reset_prefix_cache(reset_running_requests: nil, reset_external: nil)
|
|
112
|
+
log.debug do
|
|
113
|
+
"resetting vLLM prefix cache reset_running_requests=#{reset_running_requests.inspect} " \
|
|
114
|
+
"reset_external=#{reset_external.inspect}"
|
|
115
|
+
end
|
|
109
116
|
connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body
|
|
110
117
|
end
|
|
111
118
|
|
|
112
119
|
def reset_mm_cache
|
|
120
|
+
log.debug { 'resetting vLLM multimodal cache' }
|
|
113
121
|
connection.post(reset_mm_cache_url, {}).body
|
|
114
122
|
end
|
|
115
123
|
|
|
116
124
|
def sleep(level: 1)
|
|
125
|
+
log.debug { "putting vLLM worker to sleep level=#{level.inspect}" }
|
|
117
126
|
connection.post(with_query(sleep_url, level:), {}).body
|
|
118
127
|
end
|
|
119
128
|
|
|
120
129
|
def wake_up(tags: nil)
|
|
130
|
+
log.debug { "waking vLLM worker tags=#{Array(tags).inspect}" }
|
|
121
131
|
query = Array(tags).map { |tag| ['tags', tag] }
|
|
122
132
|
connection.post(with_query(wake_up_url, query), {}).body
|
|
123
133
|
end
|
|
@@ -150,6 +160,10 @@ module Legion
|
|
|
150
160
|
payload = super
|
|
151
161
|
payload.delete(:reasoning_effort)
|
|
152
162
|
payload[:chat_template_kwargs] = { enable_thinking: true } if thinking_enabled?(thinking)
|
|
163
|
+
log.debug do
|
|
164
|
+
"rendered vLLM payload model=#{model.respond_to?(:id) ? model.id : model} stream=#{stream} " \
|
|
165
|
+
"tools=#{tools.respond_to?(:size) ? tools.size : 0} thinking=#{payload.key?(:chat_template_kwargs)}"
|
|
166
|
+
end
|
|
153
167
|
payload
|
|
154
168
|
end
|
|
155
169
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/extensions/llm/fleet/provider_responder'
|
|
4
4
|
require 'legion/extensions/llm/vllm'
|
|
5
|
+
require 'legion/logging'
|
|
5
6
|
|
|
6
7
|
module Legion
|
|
7
8
|
module Extensions
|
|
@@ -10,9 +11,17 @@ module Legion
|
|
|
10
11
|
module Runners
|
|
11
12
|
# Runner entrypoint for vLLM fleet request execution.
|
|
12
13
|
module FleetWorker
|
|
14
|
+
include Legion::Logging::Helper
|
|
15
|
+
extend Legion::Logging::Helper
|
|
16
|
+
|
|
13
17
|
module_function
|
|
14
18
|
|
|
15
19
|
def handle_fleet_request(payload, delivery: nil, properties: nil)
|
|
20
|
+
log.debug do
|
|
21
|
+
"handling vLLM fleet request request_id=#{payload_field(payload, :request_id).inspect} " \
|
|
22
|
+
"provider_instance=#{payload_field(payload, :provider_instance).inspect} " \
|
|
23
|
+
"operation=#{payload_field(payload, :operation).inspect}"
|
|
24
|
+
end
|
|
16
25
|
Legion::Extensions::Llm::Fleet::ProviderResponder.call(
|
|
17
26
|
payload: payload,
|
|
18
27
|
provider_family: Vllm::PROVIDER_FAMILY,
|
|
@@ -22,6 +31,16 @@ module Legion
|
|
|
22
31
|
properties: properties
|
|
23
32
|
)
|
|
24
33
|
end
|
|
34
|
+
|
|
35
|
+
def payload_field(payload, key)
|
|
36
|
+
return unless payload.respond_to?(:[])
|
|
37
|
+
|
|
38
|
+
payload[key] || payload[key.to_s]
|
|
39
|
+
rescue StandardError => e
|
|
40
|
+
handle_exception(e, level: :debug, handled: true, operation: 'vllm.fleet_worker.payload_field',
|
|
41
|
+
field: key)
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
25
44
|
end
|
|
26
45
|
end
|
|
27
46
|
end
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require 'legion/extensions/llm'
|
|
4
4
|
require 'legion/extensions/llm/vllm/provider'
|
|
5
5
|
require 'legion/extensions/llm/vllm/version'
|
|
6
|
+
require 'legion/logging'
|
|
6
7
|
|
|
7
8
|
module Legion
|
|
8
9
|
module Extensions
|
|
@@ -65,6 +66,7 @@ module Legion
|
|
|
65
66
|
end
|
|
66
67
|
end
|
|
67
68
|
|
|
69
|
+
log.debug { "discovered #{instances.size} vLLM instance(s): #{instances.keys.join(', ')}" }
|
|
68
70
|
instances
|
|
69
71
|
end
|
|
70
72
|
|
|
@@ -92,7 +94,8 @@ module Legion
|
|
|
92
94
|
require 'uri'
|
|
93
95
|
host = URI.parse(url.to_s).host.to_s.downcase
|
|
94
96
|
%w[localhost 127.0.0.1 ::1].include?(host) ? :local : :direct
|
|
95
|
-
rescue URI::InvalidURIError
|
|
97
|
+
rescue URI::InvalidURIError => e
|
|
98
|
+
handle_exception(e, level: :debug, handled: true, operation: 'vllm.infer_tier_from_endpoint')
|
|
96
99
|
:direct
|
|
97
100
|
end
|
|
98
101
|
end
|