legion-llm 0.9.15 → 0.9.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 206afbe8609bb8ed7df111d216967aafba55b0d523d5939aad024f169e43f5ef
4
- data.tar.gz: 283f42c3d5b9ba07aa7857aad3e6d1f30b7559f35282f4a1d00986ea4ab2c646
3
+ metadata.gz: 4743dd41922fbca3818f72bb48d353314ed2895ce0981e779ac29315c8ffea3b
4
+ data.tar.gz: a235df9596b11ddfd94ef5f075a9785f4bce0ae9c849db8b0b5845bde83af4ac
5
5
  SHA512:
6
- metadata.gz: 54b3e821013f9ba6f73019907821e85d1aaacc766e8942767f5e6a9630d66757c1d16a8e1b6643054895b1e5de229245e45ebf562f42bdec1cfac8f609024a5c
7
- data.tar.gz: 45d349d01bef14e68527aa0c8108c4d08f71e05b63c87c331e377703bdacfcee431b903fe72dc2ad48b10c2f73a67523e9c67a0da1dd46b4b9cf3e041c290671
6
+ metadata.gz: 1c02e4859ef4bd824e854275fcbb1eadfe243b13477c9af9a9f2f3c484579eefa10bc70d0b1735c85b433b476ca9a8dd69b5fa788cdeafc651dcc370f71cfc40
7
+ data.tar.gz: 9f3ae0f1adba6bbe56653f0afce38c0eaa0dd4121b02279f5d9053be84682774f07401e346a855320c1bc006929d8ca184c88896098cd52697869c9b8d9f4630
data/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.19] - 2026-05-11
4
+
5
+ ### Added
6
+ - `GET /api/llm/tiers` — full RESTful tier hierarchy endpoint with sub-routes: `/:tier`, `/:tier/providers`, `/:tier/providers/:provider`, `/:tier/providers/:provider/instances`, `/:tier/providers/:provider/instances/:instance`, `/:tier/providers/:provider/instances/:instance/models`, `/:tier/providers/:provider/models`. Returns tier availability, provider health, instance details, and model listings in a structured tree.
7
+ - `POST /api/llm/inference` now accepts `tier` parameter in request body, passed through to the routing pipeline via `Request.extra[:tier]`. Supports values: `local`, `fleet`, `openai_compat`, `cloud`, `frontier`.
8
+ - Request log for `/api/llm/inference` now includes `requested_tier` field.
9
+
10
+ ### Changed
11
+ - `GET /api/llm/offerings` response restructured from flat array to grouped hash: `tier → provider → instance → [offerings]`. Individual offering lookup (`GET /api/llm/offerings/:id`) unchanged.
12
+
13
+ ## [0.9.18] - 2026-05-11
14
+
15
+ ### Fixed
16
+ - `NativeResponseAdapter` now coerces tool_calls from the Hash-keyed-by-name format (returned by OpenAI-compatible providers via lex-llm) into a flat Array of Hashes, preventing TypeError crashes in `step_tool_calls`, `response_tool_calls`, and the native tool loop when streaming tool-use responses from vllm/ollama.
17
+ - `LexLLMAdapter#normalize_messages` merges enriched system content with an existing system message at index 0 instead of prepending a duplicate, preventing vllm "System message must be at the beginning" rejections during gaia narrator ticks.
18
+
19
+ ## [0.9.17] - 2026-05-11
20
+
21
+ ### Fixed
22
+ - `total_memory_mb` now fetched exactly once on first access and never re-fetched; hardware memory is static so repeated `sysctl` calls every 60s were wasteful. `refresh!` only clears the available memory cache; `reset!` still clears everything (for tests).
23
+ - `trivial_query?` now correctly identifies short/trivial messages: a query is trivial if it matches a known trivial pattern (exact normalized match), or if no custom patterns are configured and the query is short (under `trivial_max_chars`) and a single word. Previously, an empty patterns list caused `.any?` to always return false, so nothing was ever trivial.
24
+ - Added `trivial_patterns` helper with configurable defaults (`ping`, `pong`, `ding`, `test`, `foobar`) readable via `rag.trivial_patterns` setting; when custom patterns are explicitly configured, the short-query heuristic is disabled so only listed patterns are treated as trivial.
25
+
26
+ ## [0.9.16] - 2026-05-11
27
+
28
+ ### Fixed
29
+ - Renamed `Metering#settings_value` to `extract_hash_value` to fix method shadowing with `Legion::Logging::Helper#settings_value`, which resolves a `wrong number of arguments (given 3, expected 2)` error raised from `instance_log_level` when metering is active.
30
+
3
31
  ## [0.9.15] - 2026-05-08
4
32
 
5
33
  ### Fixed
@@ -23,6 +23,7 @@ module Legion
23
23
  requested_tools = body[:requested_tools] || []
24
24
  model = body[:model]
25
25
  provider = body[:provider]
26
+ tier = body[:tier]
26
27
  caller_context = body[:caller]
27
28
  conversation_id = body[:conversation_id]
28
29
  request_id = body[:request_id] || SecureRandom.uuid
@@ -88,12 +89,16 @@ module Legion
88
89
  "[llm][api][inference] action=accepted request_id=#{request_id} " \
89
90
  "conversation_id=#{conversation_id || 'none'} caller=#{caller_summary} " \
90
91
  "messages=#{messages.size} client_tools=#{tools.size} requested_tools=#{Array(requested_tools).size} " \
91
- "requested_provider=#{provider || 'auto'} requested_model=#{model || 'auto'} stream=#{streaming}"
92
+ "requested_tier=#{tier || 'auto'} requested_provider=#{provider || 'auto'} " \
93
+ "requested_model=#{model || 'auto'} stream=#{streaming}"
92
94
  )
93
95
 
94
96
  require 'legion/llm/inference/request' unless defined?(Legion::LLM::Inference::Request)
95
97
  require 'legion/llm/inference/executor' unless defined?(Legion::LLM::Inference::Executor)
96
98
 
99
+ extra = {}
100
+ extra[:tier] = tier.to_sym if tier
101
+
97
102
  pipeline_request = Legion::LLM::Inference::Request.build(
98
103
  id: request_id,
99
104
  messages: messages,
@@ -104,7 +109,8 @@ module Legion
104
109
  conversation_id: conversation_id,
105
110
  metadata: { requested_tools: requested_tools },
106
111
  stream: streaming,
107
- cache: { strategy: :default, cacheable: true }
112
+ cache: { strategy: :default, cacheable: true },
113
+ extra: extra
108
114
  )
109
115
 
110
116
  setup_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - route_t0) * 1000).round
@@ -17,11 +17,12 @@ module Legion
17
17
  require_llm!
18
18
 
19
19
  filters = Legion::LLM::API::Native::Offerings.request_filters(params)
20
- offerings = Legion::LLM::Inventory.offerings(filters)
20
+ raw_offerings = Legion::LLM::Inventory.offerings(filters)
21
+ grouped = Legion::LLM::API::Native::Offerings.group_offerings(raw_offerings)
21
22
 
22
23
  json_response({
23
- offerings: offerings,
24
- summary: Legion::LLM::API::Native::Offerings.summary(offerings, filters)
24
+ offerings: grouped,
25
+ summary: Legion::LLM::API::Native::Offerings.summary(raw_offerings)
25
26
  })
26
27
  rescue StandardError => e
27
28
  handle_exception(e, level: :error, handled: true, operation: 'llm.api.offerings.list')
@@ -59,15 +60,46 @@ module Legion
59
60
  }
60
61
  end
61
62
 
62
- def self.summary(offerings, filters)
63
+ def self.group_offerings(offerings)
64
+ grouped = {}
65
+
66
+ offerings.each do |offering|
67
+ tier = (offering[:tier] || :unknown).to_s
68
+ provider = (offering[:provider_family] || :unknown).to_s
69
+ instance = (offering[:instance_id] || offering[:provider_instance] || :default).to_s
70
+
71
+ grouped[tier] ||= {}
72
+ grouped[tier][provider] ||= {}
73
+ grouped[tier][provider][instance] ||= []
74
+ grouped[tier][provider][instance] << compact_offering(offering)
75
+ end
76
+
77
+ grouped
78
+ end
79
+
80
+ def self.compact_offering(offering)
63
81
  {
64
- total: offerings.size,
65
- operation: filters[:type]&.to_s,
66
- models: offerings.map { |offering| offering[:model] }.uniq.size,
67
- providers: offerings.map { |offering| offering[:provider_family] }.uniq.size,
68
- instances: offerings.map { |offering| offering[:instance_id] }.uniq.size
82
+ id: offering[:offering_id] || offering[:id],
83
+ model: offering[:model].to_s,
84
+ type: offering[:type].to_s,
85
+ model_family: offering[:model_family]&.to_s,
86
+ capabilities: Array(offering[:capabilities]).map(&:to_s),
87
+ limits: offering[:limits] || {},
88
+ enabled: offering[:enabled] != false,
89
+ cost: offering[:cost] || {},
90
+ health: offering[:health] || {}
69
91
  }.compact
70
92
  end
93
+
94
+ def self.summary(offerings)
95
+ {
96
+ total: offerings.size,
97
+ tiers: offerings.map { |o| (o[:tier] || :unknown).to_s }.uniq.size,
98
+ providers: offerings.map { |o| (o[:provider_family] || :unknown).to_s }.uniq.size,
99
+ instances: offerings.map { |o| (o[:instance_id] || :default).to_s }.uniq.size,
100
+ models: offerings.map { |o| o[:model] }.uniq.size
101
+ }
102
+ end
71
103
  end
72
104
  end
73
105
  end
@@ -0,0 +1,242 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/logging/helper'
4
+
5
+ module Legion
6
+ module LLM
7
+ module API
8
+ module Native
9
+ module Tiers
10
+ extend Legion::Logging::Helper
11
+
12
+ def self.registered(app) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
13
+ log.debug('[llm][api][tiers] registering tier routes')
14
+
15
+ app.get '/api/llm/tiers' do
16
+ require_llm!
17
+
18
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
19
+ json_response({
20
+ tiers: tiers_data,
21
+ priority: Legion::LLM::API::Native::Tiers.tier_priority,
22
+ privacy_mode: Legion::LLM::API::Native::Tiers.privacy_mode?
23
+ })
24
+ rescue StandardError => e
25
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.list')
26
+ json_error('tiers_error', e.message, status_code: 500)
27
+ end
28
+
29
+ app.get '/api/llm/tiers/:tier' do
30
+ require_llm!
31
+
32
+ tier_name = params[:tier].to_s
33
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
34
+ tier = tiers_data[tier_name]
35
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
36
+
37
+ json_response({ tier: tier_name, **tier })
38
+ rescue StandardError => e
39
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.get')
40
+ json_error('tiers_error', e.message, status_code: 500)
41
+ end
42
+
43
+ app.get '/api/llm/tiers/:tier/providers' do
44
+ require_llm!
45
+
46
+ tier_name = params[:tier].to_s
47
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
48
+ tier = tiers_data[tier_name]
49
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
50
+
51
+ json_response({ tier: tier_name, providers: tier[:providers] })
52
+ rescue StandardError => e
53
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.providers')
54
+ json_error('tiers_error', e.message, status_code: 500)
55
+ end
56
+
57
+ app.get '/api/llm/tiers/:tier/providers/:provider' do
58
+ require_llm!
59
+
60
+ tier_name = params[:tier].to_s
61
+ provider_name = params[:provider].to_s
62
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
63
+ tier = tiers_data[tier_name]
64
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
65
+
66
+ provider = tier.dig(:providers, provider_name)
67
+ halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
68
+
69
+ json_response({ tier: tier_name, provider: provider_name, **provider })
70
+ rescue StandardError => e
71
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.provider')
72
+ json_error('tiers_error', e.message, status_code: 500)
73
+ end
74
+
75
+ app.get '/api/llm/tiers/:tier/providers/:provider/instances' do
76
+ require_llm!
77
+
78
+ tier_name = params[:tier].to_s
79
+ provider_name = params[:provider].to_s
80
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
81
+ tier = tiers_data[tier_name]
82
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
83
+
84
+ provider = tier.dig(:providers, provider_name)
85
+ halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
86
+
87
+ json_response({ tier: tier_name, provider: provider_name, instances: provider[:instances] })
88
+ rescue StandardError => e
89
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.instances')
90
+ json_error('tiers_error', e.message, status_code: 500)
91
+ end
92
+
93
+ app.get '/api/llm/tiers/:tier/providers/:provider/instances/:instance' do
94
+ require_llm!
95
+
96
+ tier_name = params[:tier].to_s
97
+ provider_name = params[:provider].to_s
98
+ instance_name = params[:instance].to_s
99
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
100
+ tier = tiers_data[tier_name]
101
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
102
+
103
+ provider = tier.dig(:providers, provider_name)
104
+ halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
105
+
106
+ instance = provider.dig(:instances, instance_name)
107
+ halt json_error('instance_not_found', "Instance '#{instance_name}' not found", status_code: 404) unless instance
108
+
109
+ json_response({ tier: tier_name, provider: provider_name, instance: instance_name, **instance })
110
+ rescue StandardError => e
111
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.instance')
112
+ json_error('tiers_error', e.message, status_code: 500)
113
+ end
114
+
115
+ app.get '/api/llm/tiers/:tier/providers/:provider/instances/:instance/models' do
116
+ require_llm!
117
+
118
+ tier_name = params[:tier].to_s
119
+ provider_name = params[:provider].to_s
120
+ instance_name = params[:instance].to_s
121
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
122
+ tier = tiers_data[tier_name]
123
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
124
+
125
+ provider = tier.dig(:providers, provider_name)
126
+ halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
127
+
128
+ instance = provider.dig(:instances, instance_name)
129
+ halt json_error('instance_not_found', "Instance '#{instance_name}' not found", status_code: 404) unless instance
130
+
131
+ json_response({ tier: tier_name, provider: provider_name, instance: instance_name, models: instance[:models] })
132
+ rescue StandardError => e
133
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.instance_models')
134
+ json_error('tiers_error', e.message, status_code: 500)
135
+ end
136
+
137
+ app.get '/api/llm/tiers/:tier/providers/:provider/models' do
138
+ require_llm!
139
+
140
+ tier_name = params[:tier].to_s
141
+ provider_name = params[:provider].to_s
142
+ tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
143
+ tier = tiers_data[tier_name]
144
+ halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
145
+
146
+ provider = tier.dig(:providers, provider_name)
147
+ halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
148
+
149
+ all_models = provider[:instances].values.flat_map { |inst| inst[:models] }
150
+ seen = {}
151
+ unique_models = all_models.select { |m| seen[m[:id]] ? false : (seen[m[:id]] = true) }
152
+
153
+ json_response({ tier: tier_name, provider: provider_name, models: unique_models })
154
+ rescue StandardError => e
155
+ handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.provider_models')
156
+ json_error('tiers_error', e.message, status_code: 500)
157
+ end
158
+
159
+ log.debug('[llm][api][tiers] tier routes registered')
160
+ end
161
+
162
+ def self.tier_priority
163
+ routing_config = Legion::LLM::Settings.value(:routing) || {}
164
+ Array(routing_config[:tier_priority] || %w[local fleet openai_compat cloud frontier])
165
+ end
166
+
167
+ def self.privacy_mode?
168
+ return false unless defined?(Legion::LLM::Router)
169
+
170
+ Legion::LLM::Router.respond_to?(:privacy_mode?) && Legion::LLM::Router.privacy_mode?
171
+ end
172
+
173
+ def self.tier_available?(tier_sym)
174
+ return true unless defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:tier_available?)
175
+
176
+ Legion::LLM::Router.tier_available?(tier_sym)
177
+ end
178
+
179
+ def self.build_tiers_tree
180
+ offerings = Legion::LLM::Inventory.offerings({})
181
+ grouped = {}
182
+
183
+ offerings.each do |offering|
184
+ tier_name = (offering[:tier] || :unknown).to_s
185
+ provider_name = (offering[:provider_family] || :unknown).to_s
186
+ instance_name = (offering[:instance_id] || offering[:provider_instance] || :default).to_s
187
+
188
+ grouped[tier_name] ||= { available: tier_available?(tier_name.to_sym), providers: {} }
189
+ grouped[tier_name][:providers][provider_name] ||= { instances: {} }
190
+ grouped[tier_name][:providers][provider_name][:instances][instance_name] ||= {
191
+ health: offering_instance_health(provider_name, instance_name),
192
+ capabilities: [],
193
+ models: []
194
+ }
195
+
196
+ inst = grouped[tier_name][:providers][provider_name][:instances][instance_name]
197
+ inst[:capabilities] = (inst[:capabilities] + Array(offering[:capabilities])).uniq.sort
198
+ inst[:models] << build_model_entry(offering)
199
+ end
200
+
201
+ # Sort tiers by priority order
202
+ priority = tier_priority
203
+ sorted = {}
204
+ priority.each { |t| sorted[t] = grouped.delete(t) if grouped.key?(t) }
205
+ grouped.each { |t, v| sorted[t] = v }
206
+
207
+ # Ensure all priority tiers appear even if empty
208
+ priority.each do |t|
209
+ sorted[t] ||= { available: tier_available?(t.to_sym), providers: {} }
210
+ end
211
+
212
+ sorted
213
+ end
214
+
215
+ def self.build_model_entry(offering)
216
+ {
217
+ id: offering[:model].to_s,
218
+ offering_id: offering[:offering_id] || offering[:id],
219
+ type: offering[:type].to_s,
220
+ capabilities: Array(offering[:capabilities]).map(&:to_s),
221
+ limits: offering[:limits] || {},
222
+ enabled: offering[:enabled] != false,
223
+ cost: offering[:cost] || {},
224
+ model_family: offering[:model_family]&.to_s
225
+ }.compact
226
+ end
227
+
228
+ def self.offering_instance_health(provider_name, instance_name)
229
+ return 'unknown' unless defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:health_tracker)
230
+
231
+ tracker = Legion::LLM::Router.health_tracker
232
+ return 'unknown' unless tracker
233
+
234
+ tracker.circuit_state(provider_name.to_sym, instance: instance_name.to_sym).to_s
235
+ rescue StandardError
236
+ 'unknown'
237
+ end
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end
@@ -9,6 +9,7 @@ require_relative 'api/native/models'
9
9
  require_relative 'api/native/offerings'
10
10
  require_relative 'api/native/instances'
11
11
  require_relative 'api/native/routing'
12
+ require_relative 'api/native/tiers'
12
13
  require_relative 'api/translators/openai_request'
13
14
  require_relative 'api/translators/openai_response'
14
15
  require_relative 'api/openai/chat_completions'
@@ -36,6 +37,7 @@ module Legion
36
37
  Native::Offerings.registered(app)
37
38
  Native::Instances.registered(app)
38
39
  Native::Routing.registered(app)
40
+ Native::Tiers.registered(app)
39
41
  OpenAI::ChatCompletions.registered(app)
40
42
  OpenAI::Models.registered(app)
41
43
  OpenAI::Embeddings.registered(app)
@@ -2,11 +2,7 @@
2
2
 
3
3
  require 'legion/logging/helper'
4
4
 
5
- begin
6
- require 'legion/extensions/llm/responses/thinking_extractor'
7
- rescue LoadError
8
- nil
9
- end
5
+ require 'legion/extensions/llm/responses/thinking_extractor'
10
6
 
11
7
  module Legion
12
8
  module LLM
@@ -37,7 +33,7 @@ module Legion
37
33
  @content = extracted[:result].to_s
38
34
  @model = result_hash[:model]
39
35
  @metadata = extracted[:metadata] || {}
40
- @tool_calls = result_hash[:tool_calls] || []
36
+ @tool_calls = self.class.coerce_tool_calls(result_hash[:tool_calls])
41
37
  @stop_reason = result_hash[:stop_reason]
42
38
  @thinking = extracted[:thinking]
43
39
  usage = self.class.coerce_usage(result_hash[:usage])
@@ -73,7 +69,7 @@ module Legion
73
69
  cache_write_tokens: raw.respond_to?(:cache_creation_tokens) ? raw.cache_creation_tokens.to_i : 0
74
70
  ),
75
71
  metadata: raw.respond_to?(:metadata) && raw.metadata.is_a?(Hash) ? raw.metadata : {},
76
- tool_calls: raw.respond_to?(:tool_calls) ? raw.tool_calls : [],
72
+ tool_calls: raw.respond_to?(:tool_calls) ? coerce_tool_calls(raw.tool_calls) : [],
77
73
  stop_reason: raw.respond_to?(:stop_reason) ? raw.stop_reason : nil,
78
74
  thinking: raw.respond_to?(:thinking) ? raw.thinking : nil
79
75
  }.compact
@@ -107,6 +103,29 @@ module Legion
107
103
  )
108
104
  end
109
105
 
106
+ def self.coerce_tool_calls(raw)
107
+ return [] if raw.nil?
108
+ return raw if raw.is_a?(Array)
109
+
110
+ return raw.values.filter_map { |entry| coerce_single_tool_call(entry) } if raw.is_a?(Hash) && !single_tool_call_hash?(raw)
111
+
112
+ [coerce_single_tool_call(raw)].compact
113
+ end
114
+
115
+ def self.single_tool_call_hash?(hash)
116
+ hash.key?(:name) || hash.key?('name') || hash.key?(:function) || hash.key?('function')
117
+ end
118
+
119
+ def self.coerce_single_tool_call(entry)
120
+ if entry.respond_to?(:id) && entry.respond_to?(:name)
121
+ return { id: entry.id, name: entry.name, arguments: entry.respond_to?(:arguments) ? entry.arguments : {} }
122
+ end
123
+
124
+ return entry if entry.is_a?(Hash)
125
+
126
+ nil
127
+ end
128
+
110
129
  def self.merge_thinking_payloads(existing, extracted)
111
130
  return existing || extracted unless existing && extracted
112
131
 
@@ -150,7 +150,7 @@ module Legion
150
150
  def normalize_messages(messages, system: nil)
151
151
  message_class = lex_llm_namespace::Message
152
152
  raw_messages = Array(messages)
153
- raw_messages = [{ role: :system, content: system }] + raw_messages if present_system?(system)
153
+ raw_messages = prepend_or_merge_system(raw_messages, system) if present_system?(system)
154
154
 
155
155
  raw_messages.map do |message|
156
156
  next message if message.is_a?(message_class)
@@ -165,6 +165,22 @@ module Legion
165
165
  end
166
166
  end
167
167
 
168
+ def prepend_or_merge_system(raw_messages, system)
169
+ first = raw_messages.first
170
+ first_role = if first.is_a?(Hash)
171
+ first[:role] || first['role']
172
+ elsif first.respond_to?(:role)
173
+ first.role
174
+ end
175
+ if first_role.to_s == 'system'
176
+ existing_content = first.is_a?(Hash) ? (first[:content] || first['content']) : first.content
177
+ merged = { role: :system, content: "#{system}\n\n#{existing_content}" }
178
+ [merged] + raw_messages[1..]
179
+ else
180
+ [{ role: :system, content: system }] + raw_messages
181
+ end
182
+ end
183
+
168
184
  def present_system?(system)
169
185
  return false if system.nil?
170
186
  return false if system.respond_to?(:empty?) && system.empty?
@@ -247,11 +247,13 @@ module Legion
247
247
  def load_curated(conversation_id)
248
248
  return nil unless Inference::Conversation.conversation_exists?(conversation_id)
249
249
 
250
- raw = Inference::Conversation.messages(conversation_id)
250
+ # Use raw_messages so CURATED_ROLE entries are visible even though they
251
+ # are filtered out of the public-facing Conversation#messages array.
252
+ raw = Inference::Conversation.raw_messages(conversation_id)
251
253
  curated_entries = raw.select { |m| m[:role] == CURATED_KEY }
252
254
  return nil if curated_entries.empty?
253
255
 
254
- regular = raw.reject { |m| m[:role] == CURATED_KEY }
256
+ regular = raw.reject { |m| [CURATED_KEY, Inference::Conversation::METADATA_ROLE].include?(m[:role]) }
255
257
  summaries = normalized_curated_summaries(curated_entries)
256
258
  if summaries.empty?
257
259
  apply_curation_pipeline(regular)
@@ -31,9 +31,7 @@ module Legion
31
31
  end
32
32
 
33
33
  def refresh!
34
- @total_fetched_at = nil
35
34
  @available_fetched_at = nil
36
- @total_memory_mb = nil
37
35
  @available_memory_mb = nil
38
36
  @last_refreshed_at = Time.now
39
37
  end
@@ -57,7 +55,6 @@ module Legion
57
55
  private
58
56
 
59
57
  def ensure_total_fresh
60
- refresh! if stale?
61
58
  return unless @total_fetched_at.nil?
62
59
 
63
60
  fetch_total
@@ -11,6 +11,7 @@ module Legion
11
11
 
12
12
  MAX_CONVERSATIONS = 256
13
13
  METADATA_ROLE = :__metadata__
14
+ CURATED_ROLE = :__curated__
14
15
 
15
16
  class << self
16
17
  def append(conversation_id, role:, content:, parent_id: nil, sidechain: false,
@@ -38,29 +39,41 @@ module Legion
38
39
 
39
40
  # Returns flat ordered message array — backward-compatible.
40
41
  # Uses chain reconstruction when parent links exist; falls back to seq order.
42
+ # Internal-only roles (__metadata__, __curated__) are filtered out.
41
43
  def messages(conversation_id)
42
44
  if in_memory?(conversation_id)
43
45
  touch(conversation_id)
44
- raw = conversations[conversation_id][:messages].reject { |m| m[:role] == METADATA_ROLE }
46
+ raw = conversations[conversation_id][:messages].reject { |m| internal_role?(m[:role]) }
45
47
  chain_or_seq(raw)
46
48
  else
47
49
  load_from_db(conversation_id)
48
50
  end
49
51
  end
50
52
 
53
+ # Returns ALL messages including internal-role entries (__metadata__, __curated__).
54
+ # Use this when you need access to curation markers or metadata entries.
55
+ def raw_messages(conversation_id)
56
+ if in_memory?(conversation_id)
57
+ touch(conversation_id)
58
+ conversations[conversation_id][:messages].dup
59
+ else
60
+ load_all_from_db(conversation_id)
61
+ end
62
+ end
63
+
51
64
  # Build ordered chain from parent links.
52
65
  # Excludes sidechain messages by default.
53
66
  def build_chain(conversation_id, include_sidechains: false)
54
67
  raw = all_raw_messages(conversation_id)
55
68
  raw = raw.reject { |m| m[:sidechain] } unless include_sidechains
56
- raw = raw.reject { |m| m[:role] == METADATA_ROLE }
69
+ raw = raw.reject { |m| internal_role?(m[:role]) }
57
70
  reconstruct_chain(raw)
58
71
  end
59
72
 
60
73
  # Return sidechain messages; optionally filter by agent_id.
61
74
  def sidechain_messages(conversation_id, agent_id: nil)
62
75
  raw = all_raw_messages(conversation_id)
63
- result = raw.select { |m| m[:sidechain] && m[:role] != METADATA_ROLE }
76
+ result = raw.select { |m| m[:sidechain] && !internal_role?(m[:role]) }
64
77
  result = result.select { |m| m[:agent_id] == agent_id } unless agent_id.nil?
65
78
  result.sort_by { |m| m[:seq] }
66
79
  end
@@ -243,6 +256,12 @@ module Legion
243
256
 
244
257
  private
245
258
 
259
+ # Returns true for roles that are internal bookkeeping and should not
260
+ # appear in the public-facing message array returned by #messages.
261
+ def internal_role?(role)
262
+ [METADATA_ROLE, CURATED_ROLE].include?(role)
263
+ end
264
+
246
265
  def conversations
247
266
  @conversations ||= {}
248
267
  end
@@ -543,9 +562,22 @@ module Legion
543
562
  .where(conversation_id: conversation_id)
544
563
  .order(:seq)
545
564
  .map { |row| symbolize_message(row) }
565
+ .reject { |m| internal_role?(m[:role]) }
546
566
  chain_or_seq(rows)
547
567
  end
548
568
 
569
+ def load_all_from_db(conversation_id)
570
+ return [] unless db_available?
571
+
572
+ Legion::Data.connection[:conversation_messages]
573
+ .where(conversation_id: conversation_id)
574
+ .order(:seq)
575
+ .map { |row| symbolize_message(row) }
576
+ rescue StandardError => e
577
+ handle_exception(e, level: :debug)
578
+ []
579
+ end
580
+
549
581
  def db_conversation_record?(conversation_id)
550
582
  Legion::Data.connection[:conversations].where(id: conversation_id).any?
551
583
  end
@@ -134,12 +134,18 @@ module Legion
134
134
  def trivial_query?(query)
135
135
  query = content_text(query)
136
136
  max_chars = rag_setting(:trivial_max_chars, 20)
137
- patterns = rag_setting(:trivial_patterns, [])
138
-
139
- return false if query.length > max_chars
137
+ configured_patterns = rag_setting(:trivial_patterns)
140
138
 
141
139
  normalized = query.strip.downcase.gsub(/[^a-z0-9\s]/, '')
142
- patterns.any? { |p| normalized == p }
140
+ patterns = configured_patterns || trivial_patterns
141
+ return true if patterns.any? { |p| normalized == p }
142
+ return true if configured_patterns.nil? && query.length <= max_chars && normalized.split.length <= 1
143
+
144
+ false
145
+ end
146
+
147
+ def trivial_patterns
148
+ rag_setting(:trivial_patterns, %w[ping pong ding test foobar])
143
149
  end
144
150
 
145
151
  def apollo_available?
@@ -314,7 +320,8 @@ module Legion
314
320
  def positive_integer(value)
315
321
  integer = Integer(value)
316
322
  integer.positive? ? integer : nil
317
- rescue ArgumentError, TypeError
323
+ rescue ArgumentError, TypeError => e
324
+ handle_exception(e, level: :debug, handled: true, operation: 'llm.pipeline.steps.rag_context.positive_integer')
318
325
  nil
319
326
  end
320
327
  end
@@ -142,26 +142,26 @@ module Legion
142
142
  def extract_usage(response)
143
143
  return { input_tokens: 0, output_tokens: 0 } unless response.is_a?(Hash)
144
144
 
145
- usage = settings_value(response, :usage) || {}
145
+ usage = extract_hash_value(response, :usage) || {}
146
146
  {
147
- input_tokens: settings_value(usage, :input_tokens) || settings_value(usage, :prompt_tokens) || 0,
148
- output_tokens: settings_value(usage, :output_tokens) || settings_value(usage, :completion_tokens) || 0
147
+ input_tokens: extract_hash_value(usage, :input_tokens) || extract_hash_value(usage, :prompt_tokens) || 0,
148
+ output_tokens: extract_hash_value(usage, :output_tokens) || extract_hash_value(usage, :completion_tokens) || 0
149
149
  }
150
150
  end
151
151
 
152
152
  def extract_provider(response)
153
153
  return nil unless response.is_a?(Hash)
154
154
 
155
- settings_value(settings_value(response, :meta), :provider) || settings_value(response, :provider)
155
+ extract_hash_value(extract_hash_value(response, :meta), :provider) || extract_hash_value(response, :provider)
156
156
  end
157
157
 
158
158
  def extract_model(response)
159
159
  return nil unless response.is_a?(Hash)
160
160
 
161
- settings_value(settings_value(response, :meta), :model) || settings_value(response, :model)
161
+ extract_hash_value(extract_hash_value(response, :meta), :model) || extract_hash_value(response, :model)
162
162
  end
163
163
 
164
- def settings_value(hash, key)
164
+ def extract_hash_value(hash, key)
165
165
  return nil unless hash.respond_to?(:key?)
166
166
 
167
167
  string_key = key.to_s
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.15'
5
+ VERSION = '0.9.19'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.15
4
+ version: 0.9.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity
@@ -200,6 +200,7 @@ files:
200
200
  - lib/legion/llm/api/native/offerings.rb
201
201
  - lib/legion/llm/api/native/providers.rb
202
202
  - lib/legion/llm/api/native/routing.rb
203
+ - lib/legion/llm/api/native/tiers.rb
203
204
  - lib/legion/llm/api/openai/chat_completions.rb
204
205
  - lib/legion/llm/api/openai/embeddings.rb
205
206
  - lib/legion/llm/api/openai/models.rb