RubyGems - lex-llm-vllm - Versions diffs - 0.2.9 → 0.2.11 - Mend

lex-llm-vllm 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -0
data/lib/legion/extensions/llm/vllm/provider.rb +33 -16
data/lib/legion/extensions/llm/vllm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
-  data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
+  metadata.gz: c4369ada04bb372dd59d3a57e2490c2b984c6ec1d7c4277c044b0f4126ba6136
+  data.tar.gz: 922947d49abecdbefbc7818dff56a82a35d1cae0978ebb94f5e9711f02a402c1
 SHA512:
-  metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
-  data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
+  metadata.gz: e2d9c3ffc63f2ba151573ebd898c0a08e8f73c52fe11ba38d452470e61569a33914d253ad8b1d08b235f90cfbc1f9613dea4bae5728d4b905539241e7034f9ff
+  data.tar.gz: 9cf36664ead33936f9c70ca3e892603ed6a66e1e3ef82560e7016e94207835ae6de4af7e7e9041e70eeb69b3117a48d780ca4a7e71aa3626ff8cc497800e8453

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # Changelog
+## 0.2.11 - 2026-05-21
+- Add `default_transport`/`default_tier` class declarations, remove duplicate instance methods
+- Add `model_allowed?` filtering in `discover_offerings`
+- Identity headers included via base provider
+- api_base reads from settings[:endpoint] fallback
+## 0.2.10 - 2026-05-13
+- Add `fetch_model_detail` to re-fetch `/v1/models` for `context_window` on a cache miss.
+- Pre-warm the model detail cache during offering discovery via `cache_set` using `model_detail_cache_key`.
 ## 0.2.9 - 2026-05-12
 - Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.

data/lib/legion/extensions/llm/vllm/provider.rb CHANGED Viewed

@@ -16,6 +16,8 @@ module Legion
           class << self
             def slug = 'vllm'
             def local? = false
+            def default_transport = :http
+            def default_tier = :direct
             def configuration_options = %i[vllm_api_base vllm_api_key]
             def configuration_requirements = []
             def capabilities = Capabilities
@@ -52,14 +54,14 @@ module Legion
           end
           def api_base
-            normalize_url(config.vllm_api_base || 'localhost:8000')
+            normalize_url(config.vllm_api_base || settings[:endpoint] || 'http://localhost:8000')
           end
           def headers
+            hdrs = identity_headers
             token = config.vllm_api_key
-            return {} if token.nil? || token.to_s.empty?
-            { 'Authorization' => "Bearer #{token}" }
+            hdrs['Authorization'] = "Bearer #{token}" unless token.nil? || token.to_s.empty?
+            hdrs
           end
           def health_url = '/health'
@@ -95,9 +97,13 @@ module Legion
                      else
                        Array(@cached_models)
                      end
-            models.map { |model_info| offering_from_model(model_info) }.tap do |offerings|
-              log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
+            offerings = models.filter_map do |model_info|
+              next unless model_allowed?(model_info.id)
+              offering_from_model(model_info)
             end
+            log.debug { "built #{offerings.size} vLLM offering(s) live=#{live}" }
+            offerings
           rescue StandardError => e
             handle_exception(e, level: :warn, handled: true, operation: 'vllm.discover_offerings')
             []
@@ -132,9 +138,28 @@ module Legion
             connection.post(with_query(wake_up_url, query), {}).body
           end
+          def fetch_model_detail(model_name)
+            # vLLM provides context_length via /v1/models during discovery.
+            # Re-fetch from the models endpoint if we need it outside discovery.
+            response = @connection.get(models_url)
+            models = response.body.fetch('data', [])
+            entry = models.find { |m| m['id'] == model_name.to_s }
+            return nil unless entry
+            ctx = entry['max_model_len']
+            ctx ? { context_window: ctx } : nil
+          rescue StandardError => e
+            handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
+                                model: model_name)
+            nil
+          end
           private
           def offering_from_model(model_info)
+            ctx = model_info.context_length
+            cache_set(model_detail_cache_key(model_info.id), { context_window: ctx }, ttl: 86_400) if ctx
             Legion::Extensions::Llm::Routing::ModelOffering.new(
               provider_family: :vllm,
               instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -143,19 +168,11 @@ module Legion
               model: model_info.id,
               usage_type: model_info.embedding? ? :embedding : :inference,
               capabilities: model_info.capabilities.map(&:to_s),
-              limits: { context_window: model_info.context_length }.compact,
-              metadata: { context_length: model_info.context_length }
+              limits: { context_window: ctx }.compact,
+              metadata: { context_length: ctx }
             )
           end
-          def offering_transport
-            config.respond_to?(:transport) ? config.transport : :http
-          end
-          def offering_tier
-            config.respond_to?(:tier) ? config.tier : :direct
-          end
           def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
             payload = super
             payload.delete(:reasoning_effort)

data/lib/legion/extensions/llm/vllm/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Legion
   module Extensions
     module Llm
       module Vllm
-        VERSION = '0.2.9'
+        VERSION = '0.2.11'
       end
     end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lex-llm-vllm
 version: !ruby/object:Gem::Version
-  version: 0.2.9
+  version: 0.2.11
 platform: ruby
 authors:
 - LegionIO