lex-llm-vllm 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/legion/extensions/llm/vllm/provider.rb +21 -2
- data/lib/legion/extensions/llm/vllm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a3082ce3d1b3d61f220aed833a0b87138ad199cce52cfb455683c04d57db4f10
|
|
4
|
+
data.tar.gz: a542a059e10c1a12a58fa68f611706e3c6007c0642ba57777ddf2e7b31829e5e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 864d11b0394b30b9df44c5e5ffb97dcba939311c30b87c9588ca8dea73b92f2087c5a6af6c052606f97c63e93201e964d5a358b95ca6b32905f9a087d860bf80
|
|
7
|
+
data.tar.gz: 22df5b5ea5c9dedabe193cd2caa83944eef6f0b107f98605216587eda77ce53efb9fe4fea596fd796415d9c1e5ea7fda2d4079890ef805fd054ba78d7f398d9f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.10 - 2026-05-13
|
|
4
|
+
|
|
5
|
+
- Add `fetch_model_detail` to re-fetch `/v1/models` for `context_window` on a cache miss.
|
|
6
|
+
- Pre-warm the model detail cache during offering discovery via `cache_set` using `model_detail_cache_key`.
|
|
7
|
+
|
|
3
8
|
## 0.2.9 - 2026-05-12
|
|
4
9
|
|
|
5
10
|
- Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
|
|
@@ -132,9 +132,28 @@ module Legion
|
|
|
132
132
|
connection.post(with_query(wake_up_url, query), {}).body
|
|
133
133
|
end
|
|
134
134
|
|
|
135
|
+
def fetch_model_detail(model_name)
|
|
136
|
+
# vLLM provides context_length via /v1/models during discovery.
|
|
137
|
+
# Re-fetch from the models endpoint if we need it outside discovery.
|
|
138
|
+
response = @connection.get(models_url)
|
|
139
|
+
models = response.body.fetch('data', [])
|
|
140
|
+
entry = models.find { |m| m['id'] == model_name.to_s }
|
|
141
|
+
return nil unless entry
|
|
142
|
+
|
|
143
|
+
ctx = entry['max_model_len']
|
|
144
|
+
ctx ? { context_window: ctx } : nil
|
|
145
|
+
rescue StandardError => e
|
|
146
|
+
handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
|
|
147
|
+
model: model_name)
|
|
148
|
+
nil
|
|
149
|
+
end
|
|
150
|
+
|
|
135
151
|
private
|
|
136
152
|
|
|
137
153
|
def offering_from_model(model_info)
|
|
154
|
+
ctx = model_info.context_length
|
|
155
|
+
cache_set(model_detail_cache_key(model_info.id), { context_window: ctx }, ttl: 86_400) if ctx
|
|
156
|
+
|
|
138
157
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
139
158
|
provider_family: :vllm,
|
|
140
159
|
instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
|
|
@@ -143,8 +162,8 @@ module Legion
|
|
|
143
162
|
model: model_info.id,
|
|
144
163
|
usage_type: model_info.embedding? ? :embedding : :inference,
|
|
145
164
|
capabilities: model_info.capabilities.map(&:to_s),
|
|
146
|
-
limits: { context_window:
|
|
147
|
-
metadata: { context_length:
|
|
165
|
+
limits: { context_window: ctx }.compact,
|
|
166
|
+
metadata: { context_length: ctx }
|
|
148
167
|
)
|
|
149
168
|
end
|
|
150
169
|
|