lex-llm-vllm 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/lib/legion/extensions/llm/vllm/provider.rb +33 -16
- data/lib/legion/extensions/llm/vllm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c4369ada04bb372dd59d3a57e2490c2b984c6ec1d7c4277c044b0f4126ba6136
|
|
4
|
+
data.tar.gz: 922947d49abecdbefbc7818dff56a82a35d1cae0978ebb94f5e9711f02a402c1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e2d9c3ffc63f2ba151573ebd898c0a08e8f73c52fe11ba38d452470e61569a33914d253ad8b1d08b235f90cfbc1f9613dea4bae5728d4b905539241e7034f9ff
|
|
7
|
+
data.tar.gz: 9cf36664ead33936f9c70ca3e892603ed6a66e1e3ef82560e7016e94207835ae6de4af7e7e9041e70eeb69b3117a48d780ca4a7e71aa3626ff8cc497800e8453
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.11 - 2026-05-21
|
|
4
|
+
|
|
5
|
+
- Add `default_transport`/`default_tier` class declarations, remove duplicate instance methods
|
|
6
|
+
- Add `model_allowed?` filtering in `discover_offerings`
|
|
7
|
+
- Identity headers included via base provider
|
|
8
|
+
- api_base reads from settings[:endpoint] fallback
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## 0.2.10 - 2026-05-13
|
|
12
|
+
|
|
13
|
+
- Add `fetch_model_detail` to re-fetch `/v1/models` for `context_window` on a cache miss.
|
|
14
|
+
- Pre-warm the model detail cache during offering discovery via `cache_set` using `model_detail_cache_key`.
|
|
15
|
+
|
|
3
16
|
## 0.2.9 - 2026-05-12
|
|
4
17
|
|
|
5
18
|
- Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.
|
|
@@ -16,6 +16,8 @@ module Legion
|
|
|
16
16
|
class << self
|
|
17
17
|
def slug = 'vllm'
|
|
18
18
|
def local? = false
|
|
19
|
+
def default_transport = :http
|
|
20
|
+
def default_tier = :direct
|
|
19
21
|
def configuration_options = %i[vllm_api_base vllm_api_key]
|
|
20
22
|
def configuration_requirements = []
|
|
21
23
|
def capabilities = Capabilities
|
|
@@ -52,14 +54,14 @@ module Legion
|
|
|
52
54
|
end
|
|
53
55
|
|
|
54
56
|
def api_base
|
|
55
|
-
normalize_url(config.vllm_api_base || 'localhost:8000')
|
|
57
|
+
normalize_url(config.vllm_api_base || settings[:endpoint] || 'http://localhost:8000')
|
|
56
58
|
end
|
|
57
59
|
|
|
58
60
|
def headers
|
|
61
|
+
hdrs = identity_headers
|
|
59
62
|
token = config.vllm_api_key
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
{ 'Authorization' => "Bearer #{token}" }
|
|
63
|
+
hdrs['Authorization'] = "Bearer #{token}" unless token.nil? || token.to_s.empty?
|
|
64
|
+
hdrs
|
|
63
65
|
end
|
|
64
66
|
|
|
65
67
|
def health_url = '/health'
|
|
@@ -95,9 +97,13 @@ module Legion
|
|
|
95
97
|
else
|
|
96
98
|
Array(@cached_models)
|
|
97
99
|
end
|
|
98
|
-
|
|
99
|
-
|
|
100
|
+
offerings = models.filter_map do |model_info|
|
|
101
|
+
next unless model_allowed?(model_info.id)
|
|
102
|
+
|
|
103
|
+
offering_from_model(model_info)
|
|
100
104
|
end
|
|
105
|
+
log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
|
|
106
|
+
offerings
|
|
101
107
|
rescue StandardError => e
|
|
102
108
|
handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
|
|
103
109
|
[]
|
|
@@ -132,9 +138,28 @@ module Legion
|
|
|
132
138
|
connection.post(with_query(wake_up_url, query), {}).body
|
|
133
139
|
end
|
|
134
140
|
|
|
141
|
+
def fetch_model_detail(model_name)
|
|
142
|
+
# vLLM provides context_length via /v1/models during discovery.
|
|
143
|
+
# Re-fetch from the models endpoint if we need it outside discovery.
|
|
144
|
+
response = @connection.get(models_url)
|
|
145
|
+
models = response.body.fetch('data', [])
|
|
146
|
+
entry = models.find { |m| m['id'] == model_name.to_s }
|
|
147
|
+
return nil unless entry
|
|
148
|
+
|
|
149
|
+
ctx = entry['max_model_len']
|
|
150
|
+
ctx ? { context_window: ctx } : nil
|
|
151
|
+
rescue StandardError => e
|
|
152
|
+
handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
|
|
153
|
+
model: model_name)
|
|
154
|
+
nil
|
|
155
|
+
end
|
|
156
|
+
|
|
135
157
|
private
|
|
136
158
|
|
|
137
159
|
def offering_from_model(model_info)
|
|
160
|
+
ctx = model_info.context_length
|
|
161
|
+
cache_set(model_detail_cache_key(model_info.id), { context_window: ctx }, ttl: 86_400) if ctx
|
|
162
|
+
|
|
138
163
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
139
164
|
provider_family: :vllm,
|
|
140
165
|
instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
|
|
@@ -143,19 +168,11 @@ module Legion
|
|
|
143
168
|
model: model_info.id,
|
|
144
169
|
usage_type: model_info.embedding? ? :embedding : :inference,
|
|
145
170
|
capabilities: model_info.capabilities.map(&:to_s),
|
|
146
|
-
limits: { context_window:
|
|
147
|
-
metadata: { context_length:
|
|
171
|
+
limits: { context_window: ctx }.compact,
|
|
172
|
+
metadata: { context_length: ctx }
|
|
148
173
|
)
|
|
149
174
|
end
|
|
150
175
|
|
|
151
|
-
def offering_transport
|
|
152
|
-
config.respond_to?(:transport) ? config.transport : :http
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
def offering_tier
|
|
156
|
-
config.respond_to?(:tier) ? config.tier : :direct
|
|
157
|
-
end
|
|
158
|
-
|
|
159
176
|
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
160
177
|
payload = super
|
|
161
178
|
payload.delete(:reasoning_effort)
|