lex-llm-vllm 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/legion/extensions/llm/vllm/provider.rb +26 -0
- data/lib/legion/extensions/llm/vllm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3b6bccbfd1d8e01fd38459107474d9ca3853f7d847ff3b5d71a8df3ff7a66c4b
|
|
4
|
+
data.tar.gz: f2bd935851929d113f078301a08119a425a68c35907094ee66d69d10af3e5f6f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 837e7ea4d14a09dd44922cb6193e4650b92aea3c4eea8cd85ed7916d766c84b7f8887961b0fb72ab8a1578d4005742f61ed44435d181235bb4f26042aa6aecf8
|
|
7
|
+
data.tar.gz: 8c73bfdd7921d1f99d788d4a311be574fc7cb9f61c7ebb6a79bdf7ea4a68622f020ace60858134288eea85186beb3d4c32b97c5ac714515a124b8110f3253679
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.7 - 2026-04-30
|
|
4
|
+
|
|
5
|
+
- Enable stream_usage_supported? for streaming token usage reporting
|
|
6
|
+
- Add render_payload override with chat_template_kwargs for vLLM thinking mode
|
|
7
|
+
- Add thinking_enabled? setting support from Legion::Settings
|
|
8
|
+
|
|
3
9
|
## 0.1.6 - 2026-04-28
|
|
4
10
|
|
|
5
11
|
- Publish best-effort `llm.registry` readiness and discovered-model availability events when transport is loaded.
|
|
@@ -45,6 +45,8 @@ module Legion
|
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
def stream_usage_supported? = true
|
|
49
|
+
|
|
48
50
|
def api_base
|
|
49
51
|
config.vllm_api_base || 'http://localhost:8000'
|
|
50
52
|
end
|
|
@@ -102,6 +104,30 @@ module Legion
|
|
|
102
104
|
|
|
103
105
|
private
|
|
104
106
|
|
|
107
|
+
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
108
|
+
payload = super
|
|
109
|
+
payload.delete(:reasoning_effort)
|
|
110
|
+
payload[:chat_template_kwargs] = { enable_thinking: true } if thinking_enabled?(thinking)
|
|
111
|
+
payload
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def thinking_enabled?(thinking)
|
|
115
|
+
return true if thinking.is_a?(Hash) && (thinking[:enabled] != false)
|
|
116
|
+
return true if thinking.respond_to?(:enabled?) && thinking.enabled?
|
|
117
|
+
return vllm_thinking_setting unless thinking
|
|
118
|
+
|
|
119
|
+
false
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def vllm_thinking_setting
|
|
123
|
+
return false unless defined?(Legion::Settings)
|
|
124
|
+
|
|
125
|
+
vllm = Legion::Settings.dig(:llm, :providers, :vllm)
|
|
126
|
+
vllm.is_a?(Hash) && (vllm[:enable_thinking] == true || vllm['enable_thinking'] == true)
|
|
127
|
+
rescue StandardError
|
|
128
|
+
false
|
|
129
|
+
end
|
|
130
|
+
|
|
105
131
|
def with_query(path, positional = [], **params)
|
|
106
132
|
pairs = positional + params.compact.map { |key, value| [key.to_s, value] }
|
|
107
133
|
return path if pairs.empty?
|