legion-llm 0.8.26 → 0.8.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/lib/legion/llm/inference/audit_publisher.rb +18 -1
- data/lib/legion/llm/inference/executor.rb +5 -1
- data/lib/legion/llm/inference.rb +2 -1
- data/lib/legion/llm/patches/ruby_llm_vllm.rb +41 -0
- data/lib/legion/llm/router.rb +20 -11
- data/lib/legion/llm/settings.rb +5 -4
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 523afac32d76644a92db4f6af5228c9ff9856521ccffb7cff0a0e8194570a432
|
|
4
|
+
data.tar.gz: b58073ec104d42eb18436fd708a33bea881931386dfab1e578f0570d891a6f55
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 205d3a1ef6f1c9e8712bc61e2d88382b88a91560343ab6be7e5c863f2b839ea3d384f5a5642240f7a62fed73ed96aff7b653855c15c1cb5517a43d342306a54b
|
|
7
|
+
data.tar.gz: 8847c3be8580a5c1c62bd61b83c72ef53db974b19a8567fd102827b748e9113bfc3dc0af7aa14e23d25b524453b2465dd8053e55f64ee2798f9ae9b387cac264
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.28] - 2026-04-24
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Model/provider mismatch when clients send a model name (e.g., `qwen3.5:latest`) without an explicit provider. The fallback paths blindly paired it with `default_provider` (typically `bedrock`), causing `RubyLLM::ModelNotFoundError`. Now infers the correct provider from model naming patterns before falling back to the global default.
|
|
7
|
+
- `arbitrage_fallback` hardcoded `:cloud` tier and `:bedrock` provider when inference failed. Now uses `PROVIDER_TIER` to resolve the correct tier for the inferred provider.
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- `Router.infer_provider_for_model(model)` — public method that maps model naming patterns to providers. Recognizes Ollama-style models (`:` or `/` in name), Bedrock (`us.*`), OpenAI (`gpt-*`, `o1-*`/`o3-*`/`o4-*`), Anthropic (`claude-*`), and Gemini (`gemini-*`).
|
|
11
|
+
|
|
12
|
+
## [0.8.27] - 2026-04-24
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- vLLM provider sent `developer` message role (OpenAI convention) which Qwen's chat template rejects. Added `Vllm::Chat` module that overrides `format_messages` and `format_role` to always send `system`.
|
|
16
|
+
- vLLM provider called `OpenAI::Chat.render_payload` as a module function without provider instance context, causing `NoMethodError` on `openai_use_system_role`. Rewrote to use `super` with instance method overrides.
|
|
17
|
+
- Audit events included the full conversation history in every message — quadratic payload growth. Now caps at the last 20 messages (configurable via `compliance.audit_max_messages`). Full conversation reconstructable via `conversation_id`.
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
- vLLM `chat_template_kwargs` with `enable_thinking` sent on every request so vLLM separates reasoning into the `reasoning` response field instead of inline `<think>` tags.
|
|
21
|
+
- `providers.vllm.enable_thinking` setting (default: `true`). Controls whether thinking is enabled for vLLM requests. Per-request `thinking` param overrides.
|
|
22
|
+
|
|
3
23
|
## [0.8.26] - 2026-04-24
|
|
4
24
|
|
|
5
25
|
### Added
|
|
@@ -40,7 +40,7 @@ module Legion
|
|
|
40
40
|
timeline: compact_timeline(response.timeline),
|
|
41
41
|
classification: response.classification,
|
|
42
42
|
tracing: response.tracing,
|
|
43
|
-
messages: request.messages,
|
|
43
|
+
messages: current_turn_messages(request.messages),
|
|
44
44
|
response_content: msg_content,
|
|
45
45
|
tools_used: tools_data,
|
|
46
46
|
timestamp: Time.now,
|
|
@@ -109,6 +109,23 @@ module Legion
|
|
|
109
109
|
end
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
+
def current_turn_messages(messages)
|
|
113
|
+
return messages unless messages.is_a?(Array)
|
|
114
|
+
|
|
115
|
+
max = audit_max_messages
|
|
116
|
+
return messages if messages.size <= max
|
|
117
|
+
|
|
118
|
+
messages.last(max)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def audit_max_messages
|
|
122
|
+
return 20 unless defined?(Legion::Settings)
|
|
123
|
+
|
|
124
|
+
Legion::Settings[:llm].dig(:compliance, :audit_max_messages) || 20
|
|
125
|
+
rescue StandardError
|
|
126
|
+
20
|
|
127
|
+
end
|
|
128
|
+
|
|
112
129
|
def build_message_context(response:, **)
|
|
113
130
|
{
|
|
114
131
|
request_id: response.request_id,
|
|
@@ -328,7 +328,9 @@ module Legion
|
|
|
328
328
|
end
|
|
329
329
|
end
|
|
330
330
|
|
|
331
|
-
@resolved_provider = provider ||
|
|
331
|
+
@resolved_provider = provider ||
|
|
332
|
+
(model && Router.infer_provider_for_model(model)) ||
|
|
333
|
+
Legion::LLM.settings[:default_provider]
|
|
332
334
|
@resolved_model = model || Legion::LLM.settings[:default_model]
|
|
333
335
|
|
|
334
336
|
log.info "[llm][inference] resolved provider=#{@resolved_provider} model=#{@resolved_model}"
|
|
@@ -846,6 +848,8 @@ module Legion
|
|
|
846
848
|
duration_ms = started_at ? ((finished_at - started_at) * 1000).round : nil
|
|
847
849
|
|
|
848
850
|
result_str = (raw.is_a?(String) ? raw : raw.to_s)
|
|
851
|
+
result_str = result_str.encode('UTF-8', invalid: :replace, undef: :replace, replace: '�') unless result_str.valid_encoding?
|
|
852
|
+
result_str = result_str.delete("\x00")
|
|
849
853
|
is_error = raw.is_a?(Hash) && (raw[:error] || raw['error']) ? true : false
|
|
850
854
|
|
|
851
855
|
@pending_tool_history_mutex.synchronize do
|
data/lib/legion/llm/inference.rb
CHANGED
|
@@ -496,7 +496,8 @@ module Legion
|
|
|
496
496
|
end
|
|
497
497
|
|
|
498
498
|
model ||= Legion::LLM.settings[:default_model]
|
|
499
|
-
provider ||=
|
|
499
|
+
provider ||= (model && Router.infer_provider_for_model(model)) ||
|
|
500
|
+
Legion::LLM.settings[:default_provider]
|
|
500
501
|
|
|
501
502
|
opts = {}
|
|
502
503
|
opts[:model] = model if model
|
|
@@ -3,6 +3,47 @@
|
|
|
3
3
|
module RubyLLM
|
|
4
4
|
module Providers
|
|
5
5
|
class Vllm < OpenAI
|
|
6
|
+
module Chat
|
|
7
|
+
def format_role(role)
|
|
8
|
+
role.to_s
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def format_messages(messages)
|
|
12
|
+
messages.map do |msg|
|
|
13
|
+
{
|
|
14
|
+
role: format_role(msg.role),
|
|
15
|
+
content: OpenAI::Media.format_content(msg.content),
|
|
16
|
+
tool_calls: format_tool_calls(msg.tool_calls),
|
|
17
|
+
tool_call_id: msg.tool_call_id
|
|
18
|
+
}.compact.merge(OpenAI::Chat.format_thinking(msg))
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil,
|
|
23
|
+
thinking: nil, tool_prefs: nil)
|
|
24
|
+
payload = super
|
|
25
|
+
enable = if thinking.nil?
|
|
26
|
+
vllm_thinking_default
|
|
27
|
+
else
|
|
28
|
+
thinking ? true : false
|
|
29
|
+
end
|
|
30
|
+
payload[:chat_template_kwargs] = { enable_thinking: enable }
|
|
31
|
+
payload
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def vllm_thinking_default
|
|
37
|
+
return true unless defined?(Legion::Settings)
|
|
38
|
+
|
|
39
|
+
Legion::Settings[:llm].dig(:providers, :vllm, :enable_thinking) != false
|
|
40
|
+
rescue StandardError
|
|
41
|
+
true
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
include Vllm::Chat
|
|
46
|
+
|
|
6
47
|
def api_base
|
|
7
48
|
@config.vllm_api_base
|
|
8
49
|
end
|
data/lib/legion/llm/router.rb
CHANGED
|
@@ -18,7 +18,22 @@ module Legion
|
|
|
18
18
|
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :local }.freeze
|
|
19
19
|
PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
|
|
20
20
|
|
|
21
|
+
OLLAMA_MODEL_PATTERN = %r{[:/]}
|
|
22
|
+
|
|
21
23
|
class << self
|
|
24
|
+
def infer_provider_for_model(model)
|
|
25
|
+
return nil if model.nil? || model.to_s.empty?
|
|
26
|
+
|
|
27
|
+
model_s = model.to_s
|
|
28
|
+
return :bedrock if model_s.start_with?('us.')
|
|
29
|
+
return :openai if model_s.match?(/\Agpt-|\Ao[134]-/)
|
|
30
|
+
return :anthropic if model_s.start_with?('claude-')
|
|
31
|
+
return :gemini if model_s.start_with?('gemini-')
|
|
32
|
+
return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN)
|
|
33
|
+
|
|
34
|
+
nil
|
|
35
|
+
end
|
|
36
|
+
|
|
22
37
|
# Resolve an LLM routing intent to a tier/provider/model decision.
|
|
23
38
|
#
|
|
24
39
|
# @param intent [Hash, nil] routing intent (capability, privacy, etc.)
|
|
@@ -95,18 +110,12 @@ module Legion
|
|
|
95
110
|
model = Arbitrage.cheapest_for(capability: capability)
|
|
96
111
|
return nil unless model
|
|
97
112
|
|
|
98
|
-
provider =
|
|
99
|
-
|
|
100
|
-
Resolution.new(tier: :cloud, provider: provider || :bedrock, model: model, rule: 'arbitrage_fallback')
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def infer_provider(model)
|
|
104
|
-
return :ollama if model.include?('llama')
|
|
105
|
-
return :bedrock if model.start_with?('us.')
|
|
106
|
-
return :openai if model.start_with?('gpt')
|
|
107
|
-
return :google if model.start_with?('gemini')
|
|
113
|
+
provider = infer_provider_for_model(model)
|
|
114
|
+
return nil unless provider
|
|
108
115
|
|
|
109
|
-
|
|
116
|
+
tier = PROVIDER_TIER.fetch(provider, :cloud)
|
|
117
|
+
log.debug("Router: arbitrage fallback selected model=#{model} provider=#{provider} tier=#{tier}")
|
|
118
|
+
Resolution.new(tier: tier, provider: provider, model: model, rule: 'arbitrage_fallback')
|
|
110
119
|
end
|
|
111
120
|
|
|
112
121
|
def explicit_resolution(tier, provider, model)
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -377,10 +377,11 @@ module Legion
|
|
|
377
377
|
base_url: 'http://localhost:11434'
|
|
378
378
|
},
|
|
379
379
|
vllm: {
|
|
380
|
-
enabled:
|
|
381
|
-
default_model:
|
|
382
|
-
base_url:
|
|
383
|
-
api_key:
|
|
380
|
+
enabled: false,
|
|
381
|
+
default_model: 'qwen3.6-27b',
|
|
382
|
+
base_url: 'http://localhost:8000/v1',
|
|
383
|
+
api_key: nil,
|
|
384
|
+
enable_thinking: true
|
|
384
385
|
}
|
|
385
386
|
}
|
|
386
387
|
end
|
data/lib/legion/llm/version.rb
CHANGED