lex-llm-vllm 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ec67bb352cf0bd9dc3ae3f0102e7d26a27ebc8c64c56d98811742c4b75e4ae0
4
- data.tar.gz: de092940c2c186e982b0d453b4f1f94157a868626e2a7b4b16cd6c0c1596b28c
3
+ metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
4
+ data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
5
5
  SHA512:
6
- metadata.gz: aad15995696865b3cc0a6e42b6e627e2d4a5dbdb7768ec676d3867964fad107709ce10bfe908128a6575d8bc81d0a8ee9b97b45c554db093b12dbea5b67300c0
7
- data.tar.gz: ecfe08d54a3113188c09f642e1b457af39eca29542315d529c477f3f2cac4096a7a1485fd2977fb6acdb7a4c6885f904510b9a3653eb9a966da5ffb6b60ac5ba
6
+ metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
7
+ data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.9 - 2026-05-12
4
+
5
+ - Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
6
+ - Add debug logging around vLLM instance discovery, fleet worker dispatch, offering construction, payload rendering, and management endpoints.
7
+
3
8
  ## 0.2.8 - 2026-05-07
4
9
 
5
10
  - Read vLLM thinking defaults from the active provider instance config so per-instance `enable_thinking` settings affect chat payloads.
@@ -3,7 +3,11 @@
3
3
  begin
4
4
  require 'legion/extensions/actors/subscription'
5
5
  rescue LoadError => e
6
- warn(e.message) if $VERBOSE
6
+ require 'legion/extensions/llm/vllm'
7
+ unless defined?(Legion::Extensions::Actors::Subscription)
8
+ Legion::Extensions::Llm::Vllm.handle_exception(e, level: :warn, handled: false,
9
+ operation: 'vllm.fleet_worker.load_actor_runtime')
10
+ end
7
11
  end
8
12
 
9
13
  unless defined?(Legion::Extensions::Actors::Subscription)
@@ -12,6 +16,7 @@ end
12
16
 
13
17
  require 'legion/extensions/llm/vllm'
14
18
  require 'legion/extensions/llm/fleet/provider_responder'
19
+ require 'legion/logging'
15
20
 
16
21
  module Legion
17
22
  module Extensions
@@ -20,6 +25,8 @@ module Legion
20
25
  module Actor
21
26
  # Subscription actor for vLLM fleet request consumption.
22
27
  class FleetWorker < Legion::Extensions::Actors::Subscription
28
+ include Legion::Logging::Helper
29
+
23
30
  def runner_class
24
31
  'Legion::Extensions::Llm::Vllm::Runners::FleetWorker'
25
32
  end
@@ -33,7 +40,9 @@ module Legion
33
40
  end
34
41
 
35
42
  def enabled?
36
- Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Vllm.discover_instances)
43
+ Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Vllm.discover_instances).tap do |enabled|
44
+ log.debug { "vLLM fleet worker enabled=#{enabled}" }
45
+ end
37
46
  end
38
47
  end
39
48
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/llm'
4
+ require 'legion/logging'
4
5
  require 'uri'
5
6
 
6
7
  module Legion
@@ -94,7 +95,9 @@ module Legion
94
95
  else
95
96
  Array(@cached_models)
96
97
  end
97
- models.map { |model_info| offering_from_model(model_info) }
98
+ models.map { |model_info| offering_from_model(model_info) }.tap do |offerings|
99
+ log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
100
+ end
98
101
  rescue StandardError => e
99
102
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
100
103
  []
@@ -106,18 +109,25 @@ module Legion
106
109
  end
107
110
 
108
111
  def reset_prefix_cache(reset_running_requests: nil, reset_external: nil)
112
+ log.debug do
113
+ "resetting vLLM prefix cache reset_running_requests=#{reset_running_requests.inspect} " \
114
+ "reset_external=#{reset_external.inspect}"
115
+ end
109
116
  connection.post(with_query(reset_prefix_cache_url, reset_running_requests:, reset_external:), {}).body
110
117
  end
111
118
 
112
119
  def reset_mm_cache
120
+ log.debug { 'resetting vLLM multimodal cache' }
113
121
  connection.post(reset_mm_cache_url, {}).body
114
122
  end
115
123
 
116
124
  def sleep(level: 1)
125
+ log.debug { "putting vLLM worker to sleep level=#{level.inspect}" }
117
126
  connection.post(with_query(sleep_url, level:), {}).body
118
127
  end
119
128
 
120
129
  def wake_up(tags: nil)
130
+ log.debug { "waking vLLM worker tags=#{Array(tags).inspect}" }
121
131
  query = Array(tags).map { |tag| ['tags', tag] }
122
132
  connection.post(with_query(wake_up_url, query), {}).body
123
133
  end
@@ -150,6 +160,10 @@ module Legion
150
160
  payload = super
151
161
  payload.delete(:reasoning_effort)
152
162
  payload[:chat_template_kwargs] = { enable_thinking: true } if thinking_enabled?(thinking)
163
+ log.debug do
164
+ "rendered vLLM payload model=#{model.respond_to?(:id) ? model.id : model} stream=#{stream} " \
165
+ "tools=#{tools.respond_to?(:size) ? tools.size : 0} thinking=#{payload.key?(:chat_template_kwargs)}"
166
+ end
153
167
  payload
154
168
  end
155
169
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'legion/extensions/llm/fleet/provider_responder'
4
4
  require 'legion/extensions/llm/vllm'
5
+ require 'legion/logging'
5
6
 
6
7
  module Legion
7
8
  module Extensions
@@ -10,9 +11,17 @@ module Legion
10
11
  module Runners
11
12
  # Runner entrypoint for vLLM fleet request execution.
12
13
  module FleetWorker
14
+ include Legion::Logging::Helper
15
+ extend Legion::Logging::Helper
16
+
13
17
  module_function
14
18
 
15
19
  def handle_fleet_request(payload, delivery: nil, properties: nil)
20
+ log.debug do
21
+ "handling vLLM fleet request request_id=#{payload_field(payload, :request_id).inspect} " \
22
+ "provider_instance=#{payload_field(payload, :provider_instance).inspect} " \
23
+ "operation=#{payload_field(payload, :operation).inspect}"
24
+ end
16
25
  Legion::Extensions::Llm::Fleet::ProviderResponder.call(
17
26
  payload: payload,
18
27
  provider_family: Vllm::PROVIDER_FAMILY,
@@ -22,6 +31,16 @@ module Legion
22
31
  properties: properties
23
32
  )
24
33
  end
34
+
35
+ def payload_field(payload, key)
36
+ return unless payload.respond_to?(:[])
37
+
38
+ payload[key] || payload[key.to_s]
39
+ rescue StandardError => e
40
+ handle_exception(e, level: :debug, handled: true, operation: 'vllm.fleet_worker.payload_field',
41
+ field: key)
42
+ nil
43
+ end
25
44
  end
26
45
  end
27
46
  end
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Vllm
7
- VERSION = '0.2.8'
7
+ VERSION = '0.2.9'
8
8
  end
9
9
  end
10
10
  end
@@ -3,6 +3,7 @@
3
3
  require 'legion/extensions/llm'
4
4
  require 'legion/extensions/llm/vllm/provider'
5
5
  require 'legion/extensions/llm/vllm/version'
6
+ require 'legion/logging'
6
7
 
7
8
  module Legion
8
9
  module Extensions
@@ -65,6 +66,7 @@ module Legion
65
66
  end
66
67
  end
67
68
 
69
+ log.debug { "discovered #{instances.size} vLLM instance(s): #{instances.keys.join(', ')}" }
68
70
  instances
69
71
  end
70
72
 
@@ -92,7 +94,8 @@ module Legion
92
94
  require 'uri'
93
95
  host = URI.parse(url.to_s).host.to_s.downcase
94
96
  %w[localhost 127.0.0.1 ::1].include?(host) ? :local : :direct
95
- rescue URI::InvalidURIError
97
+ rescue URI::InvalidURIError => e
98
+ handle_exception(e, level: :debug, handled: true, operation: 'vllm.infer_tier_from_endpoint')
96
99
  :direct
97
100
  end
98
101
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-vllm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO