legion-llm 0.8.26 → 0.8.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 942f34663b8d915ee982996b5b2e63e26a7edf79a7aac17f8ce71ed1829dff01
4
- data.tar.gz: dd78dd3bd79c9f1cf19d170f4ee2905fc92865dd3e21b107856c973eaf752fb5
3
+ metadata.gz: 523afac32d76644a92db4f6af5228c9ff9856521ccffb7cff0a0e8194570a432
4
+ data.tar.gz: b58073ec104d42eb18436fd708a33bea881931386dfab1e578f0570d891a6f55
5
5
  SHA512:
6
- metadata.gz: bfc1f55dce2a3eda78b5b6ab2405b6ce5d4e58fa841a81bb304af3bbe9a5b52851023c845d898713cfa87d9e292cd5fd1545464a7e0937eadde6f8668595ccc2
7
- data.tar.gz: 4cad8eb9c6b6cfc79c1ffce687b7fddbb7b47d4e22ec9bca424f2dbb061ed83fff97d4ab2bbec441d3b319922316a238b057752210f1d7908e0d7169380485e9
6
+ metadata.gz: 205d3a1ef6f1c9e8712bc61e2d88382b88a91560343ab6be7e5c863f2b839ea3d384f5a5642240f7a62fed73ed96aff7b653855c15c1cb5517a43d342306a54b
7
+ data.tar.gz: 8847c3be8580a5c1c62bd61b83c72ef53db974b19a8567fd102827b748e9113bfc3dc0af7aa14e23d25b524453b2465dd8053e55f64ee2798f9ae9b387cac264
data/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.8.28] - 2026-04-24
4
+
5
+ ### Fixed
6
+ - Model/provider mismatch when clients send a model name (e.g., `qwen3.5:latest`) without an explicit provider. The fallback paths blindly paired it with `default_provider` (typically `bedrock`), causing `RubyLLM::ModelNotFoundError`. Now infers the correct provider from model naming patterns before falling back to the global default.
7
+ - `arbitrage_fallback` hardcoded `:cloud` tier and `:bedrock` provider when inference failed. Now uses `PROVIDER_TIER` to resolve the correct tier for the inferred provider.
8
+
9
+ ### Added
10
+ - `Router.infer_provider_for_model(model)` — public method that maps model naming patterns to providers. Recognizes Ollama-style models (`:` or `/` in name), Bedrock (`us.*`), OpenAI (`gpt-*`, `o1-*`/`o3-*`/`o4-*`), Anthropic (`claude-*`), and Gemini (`gemini-*`).
11
+
12
+ ## [0.8.27] - 2026-04-24
13
+
14
+ ### Fixed
15
+ - vLLM provider sent `developer` message role (OpenAI convention) which Qwen's chat template rejects. Added `Vllm::Chat` module that overrides `format_messages` and `format_role` to always send `system`.
16
+ - vLLM provider called `OpenAI::Chat.render_payload` as a module function without provider instance context, causing `NoMethodError` on `openai_use_system_role`. Rewrote to use `super` with instance method overrides.
17
+ - Audit events included the full conversation history in every message — quadratic payload growth. Now caps at the last 20 messages (configurable via `compliance.audit_max_messages`). Full conversation reconstructable via `conversation_id`.
18
+
19
+ ### Added
20
+ - vLLM `chat_template_kwargs` with `enable_thinking` sent on every request so vLLM separates reasoning into the `reasoning` response field instead of inline `<think>` tags.
21
+ - `providers.vllm.enable_thinking` setting (default: `true`). Controls whether thinking is enabled for vLLM requests. Per-request `thinking` param overrides.
22
+
3
23
  ## [0.8.26] - 2026-04-24
4
24
 
5
25
  ### Added
@@ -40,7 +40,7 @@ module Legion
40
40
  timeline: compact_timeline(response.timeline),
41
41
  classification: response.classification,
42
42
  tracing: response.tracing,
43
- messages: request.messages,
43
+ messages: current_turn_messages(request.messages),
44
44
  response_content: msg_content,
45
45
  tools_used: tools_data,
46
46
  timestamp: Time.now,
@@ -109,6 +109,23 @@ module Legion
109
109
  end
110
110
  end
111
111
 
112
+ def current_turn_messages(messages)
113
+ return messages unless messages.is_a?(Array)
114
+
115
+ max = audit_max_messages
116
+ return messages if messages.size <= max
117
+
118
+ messages.last(max)
119
+ end
120
+
121
+ def audit_max_messages
122
+ return 20 unless defined?(Legion::Settings)
123
+
124
+ Legion::Settings[:llm].dig(:compliance, :audit_max_messages) || 20
125
+ rescue StandardError
126
+ 20
127
+ end
128
+
112
129
  def build_message_context(response:, **)
113
130
  {
114
131
  request_id: response.request_id,
@@ -328,7 +328,9 @@ module Legion
328
328
  end
329
329
  end
330
330
 
331
- @resolved_provider = provider || Legion::LLM.settings[:default_provider]
331
+ @resolved_provider = provider ||
332
+ (model && Router.infer_provider_for_model(model)) ||
333
+ Legion::LLM.settings[:default_provider]
332
334
  @resolved_model = model || Legion::LLM.settings[:default_model]
333
335
 
334
336
  log.info "[llm][inference] resolved provider=#{@resolved_provider} model=#{@resolved_model}"
@@ -846,6 +848,8 @@ module Legion
846
848
  duration_ms = started_at ? ((finished_at - started_at) * 1000).round : nil
847
849
 
848
850
  result_str = (raw.is_a?(String) ? raw : raw.to_s)
851
+ result_str = result_str.encode('UTF-8', invalid: :replace, undef: :replace, replace: '�') unless result_str.valid_encoding?
852
+ result_str = result_str.delete("\x00")
849
853
  is_error = raw.is_a?(Hash) && (raw[:error] || raw['error']) ? true : false
850
854
 
851
855
  @pending_tool_history_mutex.synchronize do
@@ -496,7 +496,8 @@ module Legion
496
496
  end
497
497
 
498
498
  model ||= Legion::LLM.settings[:default_model]
499
- provider ||= Legion::LLM.settings[:default_provider]
499
+ provider ||= (model && Router.infer_provider_for_model(model)) ||
500
+ Legion::LLM.settings[:default_provider]
500
501
 
501
502
  opts = {}
502
503
  opts[:model] = model if model
@@ -3,6 +3,47 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  class Vllm < OpenAI
6
+ module Chat
7
+ def format_role(role)
8
+ role.to_s
9
+ end
10
+
11
+ def format_messages(messages)
12
+ messages.map do |msg|
13
+ {
14
+ role: format_role(msg.role),
15
+ content: OpenAI::Media.format_content(msg.content),
16
+ tool_calls: format_tool_calls(msg.tool_calls),
17
+ tool_call_id: msg.tool_call_id
18
+ }.compact.merge(OpenAI::Chat.format_thinking(msg))
19
+ end
20
+ end
21
+
22
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil,
23
+ thinking: nil, tool_prefs: nil)
24
+ payload = super
25
+ enable = if thinking.nil?
26
+ vllm_thinking_default
27
+ else
28
+ thinking ? true : false
29
+ end
30
+ payload[:chat_template_kwargs] = { enable_thinking: enable }
31
+ payload
32
+ end
33
+
34
+ private
35
+
36
+ def vllm_thinking_default
37
+ return true unless defined?(Legion::Settings)
38
+
39
+ Legion::Settings[:llm].dig(:providers, :vllm, :enable_thinking) != false
40
+ rescue StandardError
41
+ true
42
+ end
43
+ end
44
+
45
+ include Vllm::Chat
46
+
6
47
  def api_base
7
48
  @config.vllm_api_base
8
49
  end
@@ -18,7 +18,22 @@ module Legion
18
18
  gemini: :cloud, azure: :cloud, ollama: :local, vllm: :local }.freeze
19
19
  PROVIDER_ORDER = %i[ollama vllm bedrock azure gemini anthropic openai].freeze
20
20
 
21
+ OLLAMA_MODEL_PATTERN = %r{[:/]}
22
+
21
23
  class << self
24
+ def infer_provider_for_model(model)
25
+ return nil if model.nil? || model.to_s.empty?
26
+
27
+ model_s = model.to_s
28
+ return :bedrock if model_s.start_with?('us.')
29
+ return :openai if model_s.match?(/\Agpt-|\Ao[134]-/)
30
+ return :anthropic if model_s.start_with?('claude-')
31
+ return :gemini if model_s.start_with?('gemini-')
32
+ return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN)
33
+
34
+ nil
35
+ end
36
+
22
37
  # Resolve an LLM routing intent to a tier/provider/model decision.
23
38
  #
24
39
  # @param intent [Hash, nil] routing intent (capability, privacy, etc.)
@@ -95,18 +110,12 @@ module Legion
95
110
  model = Arbitrage.cheapest_for(capability: capability)
96
111
  return nil unless model
97
112
 
98
- provider = Arbitrage.cost_table[model] ? infer_provider(model) : nil
99
- log.debug("Router: arbitrage fallback selected model=#{model}")
100
- Resolution.new(tier: :cloud, provider: provider || :bedrock, model: model, rule: 'arbitrage_fallback')
101
- end
102
-
103
- def infer_provider(model)
104
- return :ollama if model.include?('llama')
105
- return :bedrock if model.start_with?('us.')
106
- return :openai if model.start_with?('gpt')
107
- return :google if model.start_with?('gemini')
113
+ provider = infer_provider_for_model(model)
114
+ return nil unless provider
108
115
 
109
- :anthropic if model.start_with?('claude')
116
+ tier = PROVIDER_TIER.fetch(provider, :cloud)
117
+ log.debug("Router: arbitrage fallback selected model=#{model} provider=#{provider} tier=#{tier}")
118
+ Resolution.new(tier: tier, provider: provider, model: model, rule: 'arbitrage_fallback')
110
119
  end
111
120
 
112
121
  def explicit_resolution(tier, provider, model)
@@ -377,10 +377,11 @@ module Legion
377
377
  base_url: 'http://localhost:11434'
378
378
  },
379
379
  vllm: {
380
- enabled: false,
381
- default_model: 'qwen3.6-27b',
382
- base_url: 'http://localhost:8000/v1',
383
- api_key: nil
380
+ enabled: false,
381
+ default_model: 'qwen3.6-27b',
382
+ base_url: 'http://localhost:8000/v1',
383
+ api_key: nil,
384
+ enable_thinking: true
384
385
  }
385
386
  }
386
387
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.8.26'
5
+ VERSION = '0.8.28'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.26
4
+ version: 0.8.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity