lex-llm-vllm 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
4
- data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
3
+ metadata.gz: c4369ada04bb372dd59d3a57e2490c2b984c6ec1d7c4277c044b0f4126ba6136
4
+ data.tar.gz: 922947d49abecdbefbc7818dff56a82a35d1cae0978ebb94f5e9711f02a402c1
5
5
  SHA512:
6
- metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
7
- data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
6
+ metadata.gz: e2d9c3ffc63f2ba151573ebd898c0a08e8f73c52fe11ba38d452470e61569a33914d253ad8b1d08b235f90cfbc1f9613dea4bae5728d4b905539241e7034f9ff
7
+ data.tar.gz: 9cf36664ead33936f9c70ca3e892603ed6a66e1e3ef82560e7016e94207835ae6de4af7e7e9041e70eeb69b3117a48d780ca4a7e71aa3626ff8cc497800e8453
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.11 - 2026-05-21
4
+
5
+ - Add `default_transport`/`default_tier` class declarations, remove duplicate instance methods
6
+ - Add `model_allowed?` filtering in `discover_offerings`
7
+ - Identity headers included via base provider
8
+ - api_base reads from settings[:endpoint] fallback
9
+
10
+
11
+ ## 0.2.10 - 2026-05-13
12
+
13
+ - Add `fetch_model_detail` to re-fetch `/v1/models` for `context_window` on a cache miss.
14
+ - Pre-warm the model detail cache during offering discovery via `cache_set` using `model_detail_cache_key`.
15
+
3
16
  ## 0.2.9 - 2026-05-12
4
17
 
5
18
  - Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
@@ -16,6 +16,8 @@ module Legion
16
16
  class << self
17
17
  def slug = 'vllm'
18
18
  def local? = false
19
+ def default_transport = :http
20
+ def default_tier = :direct
19
21
  def configuration_options = %i[vllm_api_base vllm_api_key]
20
22
  def configuration_requirements = []
21
23
  def capabilities = Capabilities
@@ -52,14 +54,14 @@ module Legion
52
54
  end
53
55
 
54
56
  def api_base
55
- normalize_url(config.vllm_api_base || 'localhost:8000')
57
+ normalize_url(config.vllm_api_base || settings[:endpoint] || 'http://localhost:8000')
56
58
  end
57
59
 
58
60
  def headers
61
+ hdrs = identity_headers
59
62
  token = config.vllm_api_key
60
- return {} if token.nil? || token.to_s.empty?
61
-
62
- { 'Authorization' => "Bearer #{token}" }
63
+ hdrs['Authorization'] = "Bearer #{token}" unless token.nil? || token.to_s.empty?
64
+ hdrs
63
65
  end
64
66
 
65
67
  def health_url = '/health'
@@ -95,9 +97,13 @@ module Legion
95
97
  else
96
98
  Array(@cached_models)
97
99
  end
98
- models.map { |model_info| offering_from_model(model_info) }.tap do |offerings|
99
- log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
100
+ offerings = models.filter_map do |model_info|
101
+ next unless model_allowed?(model_info.id)
102
+
103
+ offering_from_model(model_info)
100
104
  end
105
+ log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
106
+ offerings
101
107
  rescue StandardError => e
102
108
  handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
103
109
  []
@@ -132,9 +138,28 @@ module Legion
132
138
  connection.post(with_query(wake_up_url, query), {}).body
133
139
  end
134
140
 
141
+ def fetch_model_detail(model_name)
142
+ # vLLM provides context_length via /v1/models during discovery.
143
+ # Re-fetch from the models endpoint if we need it outside discovery.
144
+ response = @connection.get(models_url)
145
+ models = response.body.fetch('data', [])
146
+ entry = models.find { |m| m['id'] == model_name.to_s }
147
+ return nil unless entry
148
+
149
+ ctx = entry['max_model_len']
150
+ ctx ? { context_window: ctx } : nil
151
+ rescue StandardError => e
152
+ handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
153
+ model: model_name)
154
+ nil
155
+ end
156
+
135
157
  private
136
158
 
137
159
  def offering_from_model(model_info)
160
+ ctx = model_info.context_length
161
+ cache_set(model_detail_cache_key(model_info.id), { context_window: ctx }, ttl: 86_400) if ctx
162
+
138
163
  Legion::Extensions::Llm::Routing::ModelOffering.new(
139
164
  provider_family: :vllm,
140
165
  instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -143,19 +168,11 @@ module Legion
143
168
  model: model_info.id,
144
169
  usage_type: model_info.embedding? ? :embedding : :inference,
145
170
  capabilities: model_info.capabilities.map(&:to_s),
146
- limits: { context_window: model_info.context_length }.compact,
147
- metadata: { context_length: model_info.context_length }
171
+ limits: { context_window: ctx }.compact,
172
+ metadata: { context_length: ctx }
148
173
  )
149
174
  end
150
175
 
151
- def offering_transport
152
- config.respond_to?(:transport) ? config.transport : :http
153
- end
154
-
155
- def offering_tier
156
- config.respond_to?(:tier) ? config.tier : :direct
157
- end
158
-
159
176
  def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
160
177
  payload = super
161
178
  payload.delete(:reasoning_effort)
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Vllm
7
- VERSION = '0.2.9'
7
+ VERSION = '0.2.11'
8
8
  end
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-vllm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO