lex-llm-vllm 0.2.9 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
4
- data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
3
+ metadata.gz: a3082ce3d1b3d61f220aed833a0b87138ad199cce52cfb455683c04d57db4f10
4
+ data.tar.gz: a542a059e10c1a12a58fa68f611706e3c6007c0642ba57777ddf2e7b31829e5e
5
5
  SHA512:
6
- metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
7
- data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
6
+ metadata.gz: 864d11b0394b30b9df44c5e5ffb97dcba939311c30b87c9588ca8dea73b92f2087c5a6af6c052606f97c63e93201e964d5a358b95ca6b32905f9a087d860bf80
7
+ data.tar.gz: 22df5b5ea5c9dedabe193cd2caa83944eef6f0b107f98605216587eda77ce53efb9fe4fea596fd796415d9c1e5ea7fda2d4079890ef805fd054ba78d7f398d9f
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.10 - 2026-05-13
4
+
5
+ - Add `fetch_model_detail` to re-fetch `/v1/models` for `context_window` on a cache miss.
6
+ - Pre-warm the model detail cache during offering discovery via `cache_set` using `model_detail_cache_key`.
7
+
3
8
  ## 0.2.9 - 2026-05-12
4
9
 
5
10
  - Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
@@ -132,9 +132,28 @@ module Legion
132
132
  connection.post(with_query(wake_up_url, query), {}).body
133
133
  end
134
134
 
135
+ def fetch_model_detail(model_name)
136
+ # vLLM provides context_length via /v1/models during discovery.
137
+ # Re-fetch from the models endpoint if we need it outside discovery.
138
+ response = @connection.get(models_url)
139
+ models = response.body.fetch('data', [])
140
+ entry = models.find { |m| m['id'] == model_name.to_s }
141
+ return nil unless entry
142
+
143
+ ctx = entry['max_model_len']
144
+ ctx ? { context_window: ctx } : nil
145
+ rescue StandardError => e
146
+ handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
147
+ model: model_name)
148
+ nil
149
+ end
150
+
135
151
  private
136
152
 
137
153
  def offering_from_model(model_info)
154
+ ctx = model_info.context_length
155
+ cache_set(model_detail_cache_key(model_info.id), { context_window: ctx }, ttl: 86_400) if ctx
156
+
138
157
  Legion::Extensions::Llm::Routing::ModelOffering.new(
139
158
  provider_family: :vllm,
140
159
  instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -143,8 +162,8 @@ module Legion
143
162
  model: model_info.id,
144
163
  usage_type: model_info.embedding? ? :embedding : :inference,
145
164
  capabilities: model_info.capabilities.map(&:to_s),
146
- limits: { context_window: model_info.context_length }.compact,
147
- metadata: { context_length: model_info.context_length }
165
+ limits: { context_window: ctx }.compact,
166
+ metadata: { context_length: ctx }
148
167
  )
149
168
  end
150
169
 
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Vllm
7
- VERSION = '0.2.9'
7
+ VERSION = '0.2.10'
8
8
  end
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-vllm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO