RubyGems - lex-llm-vllm - Versions diffs - 0.2.9 → 0.2.10 - Mend

lex-llm-vllm 0.2.9 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/lib/legion/extensions/llm/vllm/provider.rb +21 -2
data/lib/legion/extensions/llm/vllm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c681beae79a3636380cbb8f75f3b0deb92722ee8dcfe150569944d4cd678ecd4
-  data.tar.gz: a072817f69752bde450cb67776b9b67021c680b5c744a9c05fddd6048821b871
+  metadata.gz: a3082ce3d1b3d61f220aed833a0b87138ad199cce52cfb455683c04d57db4f10
+  data.tar.gz: a542a059e10c1a12a58fa68f611706e3c6007c0642ba57777ddf2e7b31829e5e
 SHA512:
-  metadata.gz: 9e4fdb96b3e7084371aa29f058072d2eb094c5872f0a10a4974c9bf2c16a6527d5e321563a7e24e0a9ee9ea52471ecf1c264a4e38e3c206930202739760d4135
-  data.tar.gz: f09e2f1c922a9466493281223371b57ff8b765850d15b9f783819551d9dc9aa1d8a36e5b40d6b9059d6dcf31998e2c6ca610a87c23c68ca70102e2e13fb5f193
+  metadata.gz: 864d11b0394b30b9df44c5e5ffb97dcba939311c30b87c9588ca8dea73b92f2087c5a6af6c052606f97c63e93201e964d5a358b95ca6b32905f9a087d860bf80
+  data.tar.gz: 22df5b5ea5c9dedabe193cd2caa83944eef6f0b107f98605216587eda77ce53efb9fe4fea596fd796415d9c1e5ea7fda2d4079890ef805fd054ba78d7f398d9f

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 # Changelog
+## 0.2.10 - 2026-05-13
+- Add `fetch_model_detail` to re-fetch `/v1/models` for `context_window` on a cache miss.
+- Pre-warm the model detail cache during offering discovery via `cache_set` using `model_detail_cache_key`.
 ## 0.2.9 - 2026-05-12
 - Route fleet actor load failures through `Legion::Logging::Helper` instead of direct warnings.

data/lib/legion/extensions/llm/vllm/provider.rb CHANGED Viewed

@@ -132,9 +132,28 @@ module Legion
             connection.post(with_query(wake_up_url, query), {}).body
           end
+          def fetch_model_detail(model_name)
+            # vLLM provides context_length via /v1/models during discovery.
+            # Re-fetch from the models endpoint if we need it outside discovery.
+            response = @connection.get(models_url)
+            models = response.body.fetch('data', [])
+            entry = models.find { |m| m['id'] == model_name.to_s }
+            return nil unless entry
+            ctx = entry['max_model_len']
+            ctx ? { context_window: ctx } : nil
+          rescue StandardError => e
+            handle_exception(e, level: :warn, handled: true, operation: 'vllm.fetch_model_detail',
+                                model: model_name)
+            nil
+          end
           private
           def offering_from_model(model_info)
+            ctx = model_info.context_length
+            cache_set(model_detail_cache_key(model_info.id), { context_window: ctx }, ttl: 86_400) if ctx
             Legion::Extensions::Llm::Routing::ModelOffering.new(
               provider_family: :vllm,
               instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -143,8 +162,8 @@ module Legion
               model: model_info.id,
               usage_type: model_info.embedding? ? :embedding : :inference,
               capabilities: model_info.capabilities.map(&:to_s),
-              limits: { context_window: model_info.context_length }.compact,
-              metadata: { context_length: model_info.context_length }
+              limits: { context_window: ctx }.compact,
+              metadata: { context_length: ctx }
             )
           end

data/lib/legion/extensions/llm/vllm/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Legion
   module Extensions
     module Llm
       module Vllm
-        VERSION = '0.2.9'
+        VERSION = '0.2.10'
       end
     end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lex-llm-vllm
 version: !ruby/object:Gem::Version
-  version: 0.2.9
+  version: 0.2.10
 platform: ruby
 authors:
 - LegionIO