lex-llm-vllm 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09f2552a0f25448494068e45c5ae44d199b7311b3f1256893341c5e4fea6db45'
4
- data.tar.gz: 4ca2f6e31f494983d97d67ab4d45c56e7c0e8a768c322fbfa313119789c96557
3
+ metadata.gz: 3b6bccbfd1d8e01fd38459107474d9ca3853f7d847ff3b5d71a8df3ff7a66c4b
4
+ data.tar.gz: f2bd935851929d113f078301a08119a425a68c35907094ee66d69d10af3e5f6f
5
5
  SHA512:
6
- metadata.gz: 964bb4761e53e9141337266c38f470bc358d479f001dd3cb106ce21daeaf63fa3fe777b7f80f7200aeed3cbbcfcb121195614afbaecb26de4a13f618ae882146
7
- data.tar.gz: 367efe9b428ca024db4d2de25ffef52e79207b17934a68b47eaa6a10d7b95f6e0dcc05a43d758b09d06fd7db27246b6e4acb9b901fc950fc9b636ec555822f34
6
+ metadata.gz: 837e7ea4d14a09dd44922cb6193e4650b92aea3c4eea8cd85ed7916d766c84b7f8887961b0fb72ab8a1578d4005742f61ed44435d181235bb4f26042aa6aecf8
7
+ data.tar.gz: 8c73bfdd7921d1f99d788d4a311be574fc7cb9f61c7ebb6a79bdf7ea4a68622f020ace60858134288eea85186beb3d4c32b97c5ac714515a124b8110f3253679
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.7 - 2026-04-30
4
+
5
+ - Enable stream_usage_supported? for streaming token usage reporting
6
+ - Add render_payload override with chat_template_kwargs for vLLM thinking mode
7
+ - Add thinking_enabled? setting support from Legion::Settings
8
+
9
+ ## 0.1.6 - 2026-04-28
10
+
11
+ - Publish best-effort `llm.registry` readiness and discovered-model availability events when transport is loaded.
12
+
3
13
  ## 0.1.5 - 2026-04-28
4
14
 
5
15
  - Require current shared Legion JSON, logging, settings, and LLM extension gems.
@@ -12,11 +12,17 @@ module Legion
12
12
  include Legion::Extensions::Llm::Provider::OpenAICompatible
13
13
 
14
14
  class << self
15
+ attr_writer :registry_publisher
16
+
15
17
  def slug = 'vllm'
16
18
  def local? = true
17
19
  def configuration_options = %i[vllm_api_base vllm_api_key]
18
20
  def configuration_requirements = []
19
21
  def capabilities = Capabilities
22
+
23
+ def registry_publisher
24
+ @registry_publisher ||= RegistryPublisher.new
25
+ end
20
26
  end
21
27
 
22
28
  # Capability predicates for vLLM OpenAI-compatible model offerings.
@@ -39,6 +45,8 @@ module Legion
39
45
  end
40
46
  end
41
47
 
48
+ def stream_usage_supported? = true
49
+
42
50
  def api_base
43
51
  config.vllm_api_base || 'http://localhost:8000'
44
52
  end
@@ -61,6 +69,18 @@ module Legion
61
69
  connection.get(health_url).body
62
70
  end
63
71
 
72
+ def readiness(live: false)
73
+ super.tap do |metadata|
74
+ self.class.registry_publisher.publish_readiness_async(metadata) if live
75
+ end
76
+ end
77
+
78
+ def list_models
79
+ super.tap do |models|
80
+ self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
81
+ end
82
+ end
83
+
64
84
  def version
65
85
  connection.get(version_url).body
66
86
  end
@@ -84,6 +104,30 @@ module Legion
84
104
 
85
105
  private
86
106
 
107
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
108
+ payload = super
109
+ payload.delete(:reasoning_effort)
110
+ payload[:chat_template_kwargs] = { enable_thinking: true } if thinking_enabled?(thinking)
111
+ payload
112
+ end
113
+
114
+ def thinking_enabled?(thinking)
115
+ return true if thinking.is_a?(Hash) && (thinking[:enabled] != false)
116
+ return true if thinking.respond_to?(:enabled?) && thinking.enabled?
117
+ return vllm_thinking_setting unless thinking
118
+
119
+ false
120
+ end
121
+
122
+ def vllm_thinking_setting
123
+ return false unless defined?(Legion::Settings)
124
+
125
+ vllm = Legion::Settings.dig(:llm, :providers, :vllm)
126
+ vllm.is_a?(Hash) && (vllm[:enable_thinking] == true || vllm['enable_thinking'] == true)
127
+ rescue StandardError
128
+ false
129
+ end
130
+
87
131
  def with_query(path, positional = [], **params)
88
132
  pairs = positional + params.compact.map { |key, value| [key.to_s, value] }
89
133
  return path if pairs.empty?
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Vllm
7
+ # Builds sanitized lex-llm registry envelopes for vLLM provider state.
8
+ class RegistryEventBuilder
9
+ def readiness(readiness)
10
+ registry_event_class.public_send(
11
+ readiness[:ready] ? :available : :unavailable,
12
+ provider_offering(readiness),
13
+ runtime: runtime_metadata,
14
+ health: readiness_health(readiness),
15
+ metadata: readiness_metadata(readiness)
16
+ )
17
+ end
18
+
19
+ def model_available(model, readiness:)
20
+ registry_event_class.available(
21
+ model_offering(model),
22
+ runtime: runtime_metadata,
23
+ health: model_health(readiness),
24
+ metadata: model_metadata(model)
25
+ )
26
+ end
27
+
28
+ private
29
+
30
+ def provider_offering(readiness)
31
+ {
32
+ provider_family: :vllm,
33
+ provider_instance: provider_instance,
34
+ transport: :http,
35
+ model: 'provider-readiness',
36
+ usage_type: :inference,
37
+ capabilities: [],
38
+ health: readiness_health(readiness),
39
+ metadata: { lex: :llm_vllm, provider_readiness: true }
40
+ }
41
+ end
42
+
43
+ def model_offering(model)
44
+ {
45
+ provider_family: :vllm,
46
+ provider_instance: provider_instance,
47
+ transport: :http,
48
+ model: model.id,
49
+ usage_type: usage_type_for(model),
50
+ capabilities: Array(model.capabilities).map(&:to_sym),
51
+ limits: model_limits(model),
52
+ metadata: { lex: :llm_vllm, model_name: model.name }.compact
53
+ }
54
+ end
55
+
56
+ def readiness_health(readiness)
57
+ health = {
58
+ ready: readiness[:ready] == true,
59
+ status: readiness[:ready] ? :available : :unavailable,
60
+ checked: readiness.dig(:health, :checked) != false
61
+ }
62
+ add_readiness_error(health, readiness[:health])
63
+ end
64
+
65
+ def add_readiness_error(health, source)
66
+ error = source.is_a?(Hash) ? source : {}
67
+ error_class = error[:error] || error['error']
68
+ error_message = error[:message] || error['message']
69
+ health[:error_class] = error_class if error_class
70
+ health[:error] = error_message if error_message
71
+ health
72
+ end
73
+
74
+ def model_health(readiness)
75
+ ready = readiness.fetch(:ready, true) == true
76
+ { ready:, status: ready ? :available : :degraded }
77
+ end
78
+
79
+ def readiness_metadata(readiness)
80
+ {
81
+ extension: :lex_llm_vllm,
82
+ provider: :vllm,
83
+ configured: readiness[:configured] == true,
84
+ live: readiness[:live] == true
85
+ }
86
+ end
87
+
88
+ def model_metadata(model)
89
+ { extension: :lex_llm_vllm, provider: :vllm, model_type: model.type }
90
+ end
91
+
92
+ def runtime_metadata
93
+ { node: provider_instance }
94
+ end
95
+
96
+ def model_limits(model)
97
+ {
98
+ context_window: model.context_window,
99
+ max_output_tokens: model.max_output_tokens
100
+ }.compact
101
+ end
102
+
103
+ def usage_type_for(model)
104
+ model.type == 'embedding' ? :embedding : :inference
105
+ end
106
+
107
+ def provider_instance
108
+ configured_node = (::Legion::Settings.dig(:node, :canonical_name) if defined?(::Legion::Settings))
109
+ value = configured_node.to_s.strip
110
+ value.empty? ? :vllm : value.to_sym
111
+ rescue StandardError
112
+ :vllm
113
+ end
114
+
115
+ def registry_event_class
116
+ ::Legion::Extensions::Llm::Routing::RegistryEvent
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Vllm
7
+ # Best-effort publisher for vLLM provider availability events.
8
+ class RegistryPublisher
9
+ APP_ID = 'lex-llm-vllm'
10
+
11
+ def initialize(builder: RegistryEventBuilder.new)
12
+ @builder = builder
13
+ end
14
+
15
+ def publish_readiness_async(readiness)
16
+ schedule { publish_event(@builder.readiness(readiness)) }
17
+ end
18
+
19
+ def publish_models_async(models, readiness:)
20
+ schedule do
21
+ Array(models).each do |model|
22
+ publish_event(@builder.model_available(model, readiness:))
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def schedule(&)
30
+ return false unless publishing_available?
31
+
32
+ Thread.new do
33
+ Thread.current.abort_on_exception = false
34
+ yield
35
+ rescue StandardError => e
36
+ log_publish_failure(e, level: :debug)
37
+ end
38
+ rescue StandardError => e
39
+ log_publish_failure(e, level: :debug)
40
+ false
41
+ end
42
+
43
+ def publish_event(event)
44
+ return false unless publishing_available?
45
+
46
+ message_class.new(event:, app_id: APP_ID).publish(spool: false)
47
+ rescue StandardError => e
48
+ log_publish_failure(e)
49
+ false
50
+ end
51
+
52
+ def publishing_available?
53
+ return false unless registry_event_available?
54
+ return false unless transport_message_available?
55
+ return true unless defined?(::Legion::Transport::Connection)
56
+ return true unless ::Legion::Transport::Connection.respond_to?(:session_open?)
57
+
58
+ ::Legion::Transport::Connection.session_open?
59
+ rescue StandardError
60
+ false
61
+ end
62
+
63
+ def registry_event_available?
64
+ defined?(::Legion::Extensions::Llm::Routing::RegistryEvent)
65
+ end
66
+
67
+ def transport_message_available?
68
+ return true if message_class_defined?
69
+ return false unless defined?(::Legion::Transport::Message) && defined?(::Legion::Transport::Exchange)
70
+
71
+ require 'legion/extensions/llm/vllm/transport/messages/registry_event'
72
+ message_class_defined?
73
+ rescue LoadError
74
+ false
75
+ end
76
+
77
+ def message_class_defined?
78
+ defined?(::Legion::Extensions::Llm::Vllm::Transport::Messages::RegistryEvent)
79
+ end
80
+
81
+ def message_class
82
+ ::Legion::Extensions::Llm::Vllm::Transport::Messages::RegistryEvent
83
+ end
84
+
85
+ def log_publish_failure(error, level: :warn)
86
+ message = "[lex-llm-vllm] llm.registry publish failed: #{error.class}: #{error.message}"
87
+ logger = ::Legion::Extensions::Llm.logger if defined?(::Legion::Extensions::Llm)
88
+ if logger.respond_to?(level)
89
+ logger.public_send(level, message)
90
+ elsif logger.respond_to?(:debug)
91
+ logger.debug(message)
92
+ end
93
+ rescue StandardError
94
+ nil
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Vllm
7
+ module Transport
8
+ module Exchanges
9
+ # Topic exchange for vLLM provider availability events.
10
+ class LlmRegistry < ::Legion::Transport::Exchange
11
+ def exchange_name
12
+ 'llm.registry'
13
+ end
14
+
15
+ def default_type
16
+ 'topic'
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/llm/vllm/transport/exchanges/llm_registry'
4
+
5
+ module Legion
6
+ module Extensions
7
+ module Llm
8
+ module Vllm
9
+ module Transport
10
+ module Messages
11
+ # Publishes lex-llm RegistryEvent envelopes to the llm.registry exchange.
12
+ class RegistryEvent < ::Legion::Transport::Message
13
+ def initialize(event:, **options)
14
+ super(**event.to_h.merge(options))
15
+ end
16
+
17
+ def exchange
18
+ Transport::Exchanges::LlmRegistry
19
+ end
20
+
21
+ def routing_key
22
+ @options[:routing_key] || "llm.registry.#{@options.fetch(:event_type)}"
23
+ end
24
+
25
+ def type
26
+ 'llm.registry.event'
27
+ end
28
+
29
+ def app_id
30
+ @options[:app_id] || RegistryPublisher::APP_ID
31
+ end
32
+
33
+ def persistent # rubocop:disable Naming/PredicateMethod
34
+ false
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Vllm
7
- VERSION = '0.1.5'
7
+ VERSION = '0.1.7'
8
8
  end
9
9
  end
10
10
  end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/llm'
4
+ require 'legion/extensions/llm/vllm/registry_event_builder'
4
5
  require 'legion/extensions/llm/vllm/provider'
6
+ require 'legion/extensions/llm/vllm/registry_publisher'
5
7
  require 'legion/extensions/llm/vllm/version'
6
8
 
7
9
  module Legion
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-vllm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -84,6 +84,10 @@ files:
84
84
  - lex-llm-vllm.gemspec
85
85
  - lib/legion/extensions/llm/vllm.rb
86
86
  - lib/legion/extensions/llm/vllm/provider.rb
87
+ - lib/legion/extensions/llm/vllm/registry_event_builder.rb
88
+ - lib/legion/extensions/llm/vllm/registry_publisher.rb
89
+ - lib/legion/extensions/llm/vllm/transport/exchanges/llm_registry.rb
90
+ - lib/legion/extensions/llm/vllm/transport/messages/registry_event.rb
87
91
  - lib/legion/extensions/llm/vllm/version.rb
88
92
  homepage: https://github.com/LegionIO/lex-llm-vllm
89
93
  licenses: