legion-llm 0.9.17 → 0.9.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -0
- data/lib/legion/llm/api/native/inference.rb +13 -2
- data/lib/legion/llm/api/native/offerings.rb +41 -9
- data/lib/legion/llm/api/native/tiers.rb +242 -0
- data/lib/legion/llm/api.rb +2 -0
- data/lib/legion/llm/call/dispatch.rb +34 -8
- data/lib/legion/llm/call/embeddings.rb +123 -10
- data/lib/legion/llm/call/lex_llm_adapter.rb +116 -25
- data/lib/legion/llm/inference/conversation.rb +17 -291
- data/lib/legion/llm/inference/executor.rb +44 -43
- data/lib/legion/llm/inference/native_tool_loop.rb +149 -0
- data/lib/legion/llm/inference/steps/gaia_advisory.rb +4 -0
- data/lib/legion/llm/inference/steps/rag_context.rb +2 -0
- data/lib/legion/llm/inference/steps/sticky_runners.rb +11 -1
- data/lib/legion/llm/inference/steps/tool_discovery.rb +2 -1
- data/lib/legion/llm/inference/steps/trigger_match.rb +85 -15
- data/lib/legion/llm/metering.rb +3 -45
- data/lib/legion/llm/settings.rb +8 -1
- data/lib/legion/llm/tools/confidence.rb +1 -25
- data/lib/legion/llm/tools/dispatcher.rb +8 -1
- data/lib/legion/llm/tools/interceptors/python_venv.rb +13 -5
- data/lib/legion/llm/tools/special.rb +325 -0
- data/lib/legion/llm/tools.rb +1 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +1 -0
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 428a14e141f5cbbb278e05f49fd198ef13f6e789727037c90154a855b76a8b34
|
|
4
|
+
data.tar.gz: 8dc2aea0cd776675aad1c8ff198b35f0eba573e4a37c6e2bcdc0b6dfbbb7210b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3b9f1b9fae5371eefcbfbc89262bfa422e23df0e2e52d56735c5f3af9912b7245883ae4864568b6d8828e2dfdc3ab8c3d9fd4f125f662d6f8ae51602976d9952
|
|
7
|
+
data.tar.gz: dcdbf11006d26b929779bdb0e2ae8a541225b3a62c820dd027ef6801198a5056eb1d9a93e7cd504846b11b2196c187d7a415e263592864c3eae5ace4153b31ee
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,55 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.22] - 2026-05-12
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Pin `legion_list_special_tools` before client and registry tools so models can inspect Legion special tools and the current `Legion::Settings::Extensions` inventory.
|
|
7
|
+
- Surface special Ruby runtime execution with current process/PATH environment metadata, and add Legion-managed Python and pip tools when `legionio setup python` is available.
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
- Route Python command interception through the same Legion Python runtime detection used by special tool injection.
|
|
11
|
+
- Replace ad hoc `/api/llm/inference` tool-payload debug prints with structured debug logging.
|
|
12
|
+
|
|
13
|
+
### Fixed
|
|
14
|
+
- Chunk Ollama embedding requests according to configured model context limits and aggregate chunk vectors so large Apollo knowledge-capture documents do not exceed provider context windows.
|
|
15
|
+
|
|
16
|
+
## [0.9.21] - 2026-05-12
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
- Route metering strictly through `legion-transport`, dropping events when transport is unavailable instead of writing metric events to `Legion::Data::Spool`.
|
|
20
|
+
- Keep override confidence database access read-only by removing `Legion::Data::Local` upserts from `legion-llm`.
|
|
21
|
+
- Stop conversation history and sticky state from writing directly to `Legion::Data` tables.
|
|
22
|
+
|
|
23
|
+
## [0.9.20] - 2026-05-12
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- Added `llm.gaia.advisory_enabled`, defaulting to `true`, so GAIA pre-request advisory shaping can be disabled without code changes.
|
|
27
|
+
|
|
28
|
+
### Fixed
|
|
29
|
+
- Preserve accumulated streamed native tool-call arguments from lex-llm provider responses instead of rebuilding final responses from partial stream chunks.
|
|
30
|
+
- Symbolize extension tool arguments before invoking runner keyword methods so JSON string keys such as `chat_id` satisfy Ruby keyword parameters.
|
|
31
|
+
- Match tool triggers from `Legion::Settings::Extensions` registry entries and keep registry tools injectable alongside client tools with better diagnostics.
|
|
32
|
+
- Skip trigger matching cleanly when `Legion::Settings::Extensions` is not loaded instead of warning through a rescued `NameError`.
|
|
33
|
+
- Accumulate only stream fallback state in the lex-llm adapter instead of retaining every streamed chunk when providers return final messages.
|
|
34
|
+
- Apply explicit vLLM tool-name forcing only on the first native tool-loop round, allowing follow-up automatic tool calls after the requested tool returns.
|
|
35
|
+
- Ignore absent GAIA advisory context-window limits when sizing RAG retrieval instead of routing nil through debug exception handling.
|
|
36
|
+
|
|
37
|
+
## [0.9.19] - 2026-05-11
|
|
38
|
+
|
|
39
|
+
### Added
|
|
40
|
+
- `GET /api/llm/tiers` — full RESTful tier hierarchy endpoint with sub-routes: `/:tier`, `/:tier/providers`, `/:tier/providers/:provider`, `/:tier/providers/:provider/instances`, `/:tier/providers/:provider/instances/:instance`, `/:tier/providers/:provider/instances/:instance/models`, `/:tier/providers/:provider/models`. Returns tier availability, provider health, instance details, and model listings in a structured tree.
|
|
41
|
+
- `POST /api/llm/inference` now accepts `tier` parameter in request body, passed through to the routing pipeline via `Request.extra[:tier]`. Supports values: `local`, `fleet`, `openai_compat`, `cloud`, `frontier`.
|
|
42
|
+
- Request log for `/api/llm/inference` now includes `requested_tier` field.
|
|
43
|
+
|
|
44
|
+
### Changed
|
|
45
|
+
- `GET /api/llm/offerings` response restructured from flat array to grouped hash: `tier → provider → instance → [offerings]`. Individual offering lookup (`GET /api/llm/offerings/:id`) unchanged.
|
|
46
|
+
|
|
47
|
+
## [0.9.18] - 2026-05-11
|
|
48
|
+
|
|
49
|
+
### Fixed
|
|
50
|
+
- `NativeResponseAdapter` now coerces tool_calls from the Hash-keyed-by-name format (returned by OpenAI-compatible providers via lex-llm) into a flat Array of Hashes, preventing TypeError crashes in `step_tool_calls`, `response_tool_calls`, and the native tool loop when streaming tool-use responses from vllm/ollama.
|
|
51
|
+
- `LexLLMAdapter#normalize_messages` merges enriched system content with an existing system message at index 0 instead of prepending a duplicate, preventing vllm "System message must be at the beginning" rejections during gaia narrator ticks.
|
|
52
|
+
|
|
3
53
|
## [0.9.17] - 2026-05-11
|
|
4
54
|
|
|
5
55
|
### Fixed
|
|
@@ -23,6 +23,7 @@ module Legion
|
|
|
23
23
|
requested_tools = body[:requested_tools] || []
|
|
24
24
|
model = body[:model]
|
|
25
25
|
provider = body[:provider]
|
|
26
|
+
tier = body[:tier]
|
|
26
27
|
caller_context = body[:caller]
|
|
27
28
|
conversation_id = body[:conversation_id]
|
|
28
29
|
request_id = body[:request_id] || SecureRandom.uuid
|
|
@@ -42,6 +43,11 @@ module Legion
|
|
|
42
43
|
|
|
43
44
|
tools = raw_tools || []
|
|
44
45
|
validate_tools!(tools) unless tools.empty?
|
|
46
|
+
raw_tool_count = raw_tools.is_a?(Array) ? raw_tools.size : 0
|
|
47
|
+
log.debug(
|
|
48
|
+
"[llm][api][tools] action=request_tools_received request_id=#{request_id} " \
|
|
49
|
+
"has_tools=#{body.key?(:tools)} raw_tools_class=#{raw_tools&.class} raw_tools_count=#{raw_tool_count}"
|
|
50
|
+
)
|
|
45
51
|
|
|
46
52
|
caller_identity = identity_canonical_name(env)
|
|
47
53
|
last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
@@ -88,12 +94,16 @@ module Legion
|
|
|
88
94
|
"[llm][api][inference] action=accepted request_id=#{request_id} " \
|
|
89
95
|
"conversation_id=#{conversation_id || 'none'} caller=#{caller_summary} " \
|
|
90
96
|
"messages=#{messages.size} client_tools=#{tools.size} requested_tools=#{Array(requested_tools).size} " \
|
|
91
|
-
"
|
|
97
|
+
"requested_tier=#{tier || 'auto'} requested_provider=#{provider || 'auto'} " \
|
|
98
|
+
"requested_model=#{model || 'auto'} stream=#{streaming}"
|
|
92
99
|
)
|
|
93
100
|
|
|
94
101
|
require 'legion/llm/inference/request' unless defined?(Legion::LLM::Inference::Request)
|
|
95
102
|
require 'legion/llm/inference/executor' unless defined?(Legion::LLM::Inference::Executor)
|
|
96
103
|
|
|
104
|
+
extra = {}
|
|
105
|
+
extra[:tier] = tier.to_sym if tier
|
|
106
|
+
|
|
97
107
|
pipeline_request = Legion::LLM::Inference::Request.build(
|
|
98
108
|
id: request_id,
|
|
99
109
|
messages: messages,
|
|
@@ -104,7 +114,8 @@ module Legion
|
|
|
104
114
|
conversation_id: conversation_id,
|
|
105
115
|
metadata: { requested_tools: requested_tools },
|
|
106
116
|
stream: streaming,
|
|
107
|
-
cache: { strategy: :default, cacheable: true }
|
|
117
|
+
cache: { strategy: :default, cacheable: true },
|
|
118
|
+
extra: extra
|
|
108
119
|
)
|
|
109
120
|
|
|
110
121
|
setup_ms = ((::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - route_t0) * 1000).round
|
|
@@ -17,11 +17,12 @@ module Legion
|
|
|
17
17
|
require_llm!
|
|
18
18
|
|
|
19
19
|
filters = Legion::LLM::API::Native::Offerings.request_filters(params)
|
|
20
|
-
|
|
20
|
+
raw_offerings = Legion::LLM::Inventory.offerings(filters)
|
|
21
|
+
grouped = Legion::LLM::API::Native::Offerings.group_offerings(raw_offerings)
|
|
21
22
|
|
|
22
23
|
json_response({
|
|
23
|
-
offerings:
|
|
24
|
-
summary: Legion::LLM::API::Native::Offerings.summary(
|
|
24
|
+
offerings: grouped,
|
|
25
|
+
summary: Legion::LLM::API::Native::Offerings.summary(raw_offerings)
|
|
25
26
|
})
|
|
26
27
|
rescue StandardError => e
|
|
27
28
|
handle_exception(e, level: :error, handled: true, operation: 'llm.api.offerings.list')
|
|
@@ -59,15 +60,46 @@ module Legion
|
|
|
59
60
|
}
|
|
60
61
|
end
|
|
61
62
|
|
|
62
|
-
def self.
|
|
63
|
+
def self.group_offerings(offerings)
|
|
64
|
+
grouped = {}
|
|
65
|
+
|
|
66
|
+
offerings.each do |offering|
|
|
67
|
+
tier = (offering[:tier] || :unknown).to_s
|
|
68
|
+
provider = (offering[:provider_family] || :unknown).to_s
|
|
69
|
+
instance = (offering[:instance_id] || offering[:provider_instance] || :default).to_s
|
|
70
|
+
|
|
71
|
+
grouped[tier] ||= {}
|
|
72
|
+
grouped[tier][provider] ||= {}
|
|
73
|
+
grouped[tier][provider][instance] ||= []
|
|
74
|
+
grouped[tier][provider][instance] << compact_offering(offering)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
grouped
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.compact_offering(offering)
|
|
63
81
|
{
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
82
|
+
id: offering[:offering_id] || offering[:id],
|
|
83
|
+
model: offering[:model].to_s,
|
|
84
|
+
type: offering[:type].to_s,
|
|
85
|
+
model_family: offering[:model_family]&.to_s,
|
|
86
|
+
capabilities: Array(offering[:capabilities]).map(&:to_s),
|
|
87
|
+
limits: offering[:limits] || {},
|
|
88
|
+
enabled: offering[:enabled] != false,
|
|
89
|
+
cost: offering[:cost] || {},
|
|
90
|
+
health: offering[:health] || {}
|
|
69
91
|
}.compact
|
|
70
92
|
end
|
|
93
|
+
|
|
94
|
+
def self.summary(offerings)
|
|
95
|
+
{
|
|
96
|
+
total: offerings.size,
|
|
97
|
+
tiers: offerings.map { |o| (o[:tier] || :unknown).to_s }.uniq.size,
|
|
98
|
+
providers: offerings.map { |o| (o[:provider_family] || :unknown).to_s }.uniq.size,
|
|
99
|
+
instances: offerings.map { |o| (o[:instance_id] || :default).to_s }.uniq.size,
|
|
100
|
+
models: offerings.map { |o| o[:model] }.uniq.size
|
|
101
|
+
}
|
|
102
|
+
end
|
|
71
103
|
end
|
|
72
104
|
end
|
|
73
105
|
end
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/logging/helper'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module LLM
|
|
7
|
+
module API
|
|
8
|
+
module Native
|
|
9
|
+
module Tiers
|
|
10
|
+
extend Legion::Logging::Helper
|
|
11
|
+
|
|
12
|
+
def self.registered(app) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
13
|
+
log.debug('[llm][api][tiers] registering tier routes')
|
|
14
|
+
|
|
15
|
+
app.get '/api/llm/tiers' do
|
|
16
|
+
require_llm!
|
|
17
|
+
|
|
18
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
19
|
+
json_response({
|
|
20
|
+
tiers: tiers_data,
|
|
21
|
+
priority: Legion::LLM::API::Native::Tiers.tier_priority,
|
|
22
|
+
privacy_mode: Legion::LLM::API::Native::Tiers.privacy_mode?
|
|
23
|
+
})
|
|
24
|
+
rescue StandardError => e
|
|
25
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.list')
|
|
26
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
app.get '/api/llm/tiers/:tier' do
|
|
30
|
+
require_llm!
|
|
31
|
+
|
|
32
|
+
tier_name = params[:tier].to_s
|
|
33
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
34
|
+
tier = tiers_data[tier_name]
|
|
35
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
36
|
+
|
|
37
|
+
json_response({ tier: tier_name, **tier })
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.get')
|
|
40
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
app.get '/api/llm/tiers/:tier/providers' do
|
|
44
|
+
require_llm!
|
|
45
|
+
|
|
46
|
+
tier_name = params[:tier].to_s
|
|
47
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
48
|
+
tier = tiers_data[tier_name]
|
|
49
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
50
|
+
|
|
51
|
+
json_response({ tier: tier_name, providers: tier[:providers] })
|
|
52
|
+
rescue StandardError => e
|
|
53
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.providers')
|
|
54
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
app.get '/api/llm/tiers/:tier/providers/:provider' do
|
|
58
|
+
require_llm!
|
|
59
|
+
|
|
60
|
+
tier_name = params[:tier].to_s
|
|
61
|
+
provider_name = params[:provider].to_s
|
|
62
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
63
|
+
tier = tiers_data[tier_name]
|
|
64
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
65
|
+
|
|
66
|
+
provider = tier.dig(:providers, provider_name)
|
|
67
|
+
halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
|
|
68
|
+
|
|
69
|
+
json_response({ tier: tier_name, provider: provider_name, **provider })
|
|
70
|
+
rescue StandardError => e
|
|
71
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.provider')
|
|
72
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
app.get '/api/llm/tiers/:tier/providers/:provider/instances' do
|
|
76
|
+
require_llm!
|
|
77
|
+
|
|
78
|
+
tier_name = params[:tier].to_s
|
|
79
|
+
provider_name = params[:provider].to_s
|
|
80
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
81
|
+
tier = tiers_data[tier_name]
|
|
82
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
83
|
+
|
|
84
|
+
provider = tier.dig(:providers, provider_name)
|
|
85
|
+
halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
|
|
86
|
+
|
|
87
|
+
json_response({ tier: tier_name, provider: provider_name, instances: provider[:instances] })
|
|
88
|
+
rescue StandardError => e
|
|
89
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.instances')
|
|
90
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
app.get '/api/llm/tiers/:tier/providers/:provider/instances/:instance' do
|
|
94
|
+
require_llm!
|
|
95
|
+
|
|
96
|
+
tier_name = params[:tier].to_s
|
|
97
|
+
provider_name = params[:provider].to_s
|
|
98
|
+
instance_name = params[:instance].to_s
|
|
99
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
100
|
+
tier = tiers_data[tier_name]
|
|
101
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
102
|
+
|
|
103
|
+
provider = tier.dig(:providers, provider_name)
|
|
104
|
+
halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
|
|
105
|
+
|
|
106
|
+
instance = provider.dig(:instances, instance_name)
|
|
107
|
+
halt json_error('instance_not_found', "Instance '#{instance_name}' not found", status_code: 404) unless instance
|
|
108
|
+
|
|
109
|
+
json_response({ tier: tier_name, provider: provider_name, instance: instance_name, **instance })
|
|
110
|
+
rescue StandardError => e
|
|
111
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.instance')
|
|
112
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
app.get '/api/llm/tiers/:tier/providers/:provider/instances/:instance/models' do
|
|
116
|
+
require_llm!
|
|
117
|
+
|
|
118
|
+
tier_name = params[:tier].to_s
|
|
119
|
+
provider_name = params[:provider].to_s
|
|
120
|
+
instance_name = params[:instance].to_s
|
|
121
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
122
|
+
tier = tiers_data[tier_name]
|
|
123
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
124
|
+
|
|
125
|
+
provider = tier.dig(:providers, provider_name)
|
|
126
|
+
halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
|
|
127
|
+
|
|
128
|
+
instance = provider.dig(:instances, instance_name)
|
|
129
|
+
halt json_error('instance_not_found', "Instance '#{instance_name}' not found", status_code: 404) unless instance
|
|
130
|
+
|
|
131
|
+
json_response({ tier: tier_name, provider: provider_name, instance: instance_name, models: instance[:models] })
|
|
132
|
+
rescue StandardError => e
|
|
133
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.instance_models')
|
|
134
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
app.get '/api/llm/tiers/:tier/providers/:provider/models' do
|
|
138
|
+
require_llm!
|
|
139
|
+
|
|
140
|
+
tier_name = params[:tier].to_s
|
|
141
|
+
provider_name = params[:provider].to_s
|
|
142
|
+
tiers_data = Legion::LLM::API::Native::Tiers.build_tiers_tree
|
|
143
|
+
tier = tiers_data[tier_name]
|
|
144
|
+
halt json_error('tier_not_found', "Tier '#{tier_name}' not found", status_code: 404) unless tier
|
|
145
|
+
|
|
146
|
+
provider = tier.dig(:providers, provider_name)
|
|
147
|
+
halt json_error('provider_not_found', "Provider '#{provider_name}' not found in tier '#{tier_name}'", status_code: 404) unless provider
|
|
148
|
+
|
|
149
|
+
all_models = provider[:instances].values.flat_map { |inst| inst[:models] }
|
|
150
|
+
seen = {}
|
|
151
|
+
unique_models = all_models.select { |m| seen[m[:id]] ? false : (seen[m[:id]] = true) }
|
|
152
|
+
|
|
153
|
+
json_response({ tier: tier_name, provider: provider_name, models: unique_models })
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
handle_exception(e, level: :error, handled: true, operation: 'llm.api.tiers.provider_models')
|
|
156
|
+
json_error('tiers_error', e.message, status_code: 500)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
log.debug('[llm][api][tiers] tier routes registered')
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def self.tier_priority
|
|
163
|
+
routing_config = Legion::LLM::Settings.value(:routing) || {}
|
|
164
|
+
Array(routing_config[:tier_priority] || %w[local fleet openai_compat cloud frontier])
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def self.privacy_mode?
|
|
168
|
+
return false unless defined?(Legion::LLM::Router)
|
|
169
|
+
|
|
170
|
+
Legion::LLM::Router.respond_to?(:privacy_mode?) && Legion::LLM::Router.privacy_mode?
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def self.tier_available?(tier_sym)
|
|
174
|
+
return true unless defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:tier_available?)
|
|
175
|
+
|
|
176
|
+
Legion::LLM::Router.tier_available?(tier_sym)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def self.build_tiers_tree
|
|
180
|
+
offerings = Legion::LLM::Inventory.offerings({})
|
|
181
|
+
grouped = {}
|
|
182
|
+
|
|
183
|
+
offerings.each do |offering|
|
|
184
|
+
tier_name = (offering[:tier] || :unknown).to_s
|
|
185
|
+
provider_name = (offering[:provider_family] || :unknown).to_s
|
|
186
|
+
instance_name = (offering[:instance_id] || offering[:provider_instance] || :default).to_s
|
|
187
|
+
|
|
188
|
+
grouped[tier_name] ||= { available: tier_available?(tier_name.to_sym), providers: {} }
|
|
189
|
+
grouped[tier_name][:providers][provider_name] ||= { instances: {} }
|
|
190
|
+
grouped[tier_name][:providers][provider_name][:instances][instance_name] ||= {
|
|
191
|
+
health: offering_instance_health(provider_name, instance_name),
|
|
192
|
+
capabilities: [],
|
|
193
|
+
models: []
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
inst = grouped[tier_name][:providers][provider_name][:instances][instance_name]
|
|
197
|
+
inst[:capabilities] = (inst[:capabilities] + Array(offering[:capabilities])).uniq.sort
|
|
198
|
+
inst[:models] << build_model_entry(offering)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Sort tiers by priority order
|
|
202
|
+
priority = tier_priority
|
|
203
|
+
sorted = {}
|
|
204
|
+
priority.each { |t| sorted[t] = grouped.delete(t) if grouped.key?(t) }
|
|
205
|
+
grouped.each { |t, v| sorted[t] = v }
|
|
206
|
+
|
|
207
|
+
# Ensure all priority tiers appear even if empty
|
|
208
|
+
priority.each do |t|
|
|
209
|
+
sorted[t] ||= { available: tier_available?(t.to_sym), providers: {} }
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
sorted
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def self.build_model_entry(offering)
|
|
216
|
+
{
|
|
217
|
+
id: offering[:model].to_s,
|
|
218
|
+
offering_id: offering[:offering_id] || offering[:id],
|
|
219
|
+
type: offering[:type].to_s,
|
|
220
|
+
capabilities: Array(offering[:capabilities]).map(&:to_s),
|
|
221
|
+
limits: offering[:limits] || {},
|
|
222
|
+
enabled: offering[:enabled] != false,
|
|
223
|
+
cost: offering[:cost] || {},
|
|
224
|
+
model_family: offering[:model_family]&.to_s
|
|
225
|
+
}.compact
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def self.offering_instance_health(provider_name, instance_name)
|
|
229
|
+
return 'unknown' unless defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:health_tracker)
|
|
230
|
+
|
|
231
|
+
tracker = Legion::LLM::Router.health_tracker
|
|
232
|
+
return 'unknown' unless tracker
|
|
233
|
+
|
|
234
|
+
tracker.circuit_state(provider_name.to_sym, instance: instance_name.to_sym).to_s
|
|
235
|
+
rescue StandardError
|
|
236
|
+
'unknown'
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
data/lib/legion/llm/api.rb
CHANGED
|
@@ -9,6 +9,7 @@ require_relative 'api/native/models'
|
|
|
9
9
|
require_relative 'api/native/offerings'
|
|
10
10
|
require_relative 'api/native/instances'
|
|
11
11
|
require_relative 'api/native/routing'
|
|
12
|
+
require_relative 'api/native/tiers'
|
|
12
13
|
require_relative 'api/translators/openai_request'
|
|
13
14
|
require_relative 'api/translators/openai_response'
|
|
14
15
|
require_relative 'api/openai/chat_completions'
|
|
@@ -36,6 +37,7 @@ module Legion
|
|
|
36
37
|
Native::Offerings.registered(app)
|
|
37
38
|
Native::Instances.registered(app)
|
|
38
39
|
Native::Routing.registered(app)
|
|
40
|
+
Native::Tiers.registered(app)
|
|
39
41
|
OpenAI::ChatCompletions.registered(app)
|
|
40
42
|
OpenAI::Models.registered(app)
|
|
41
43
|
OpenAI::Embeddings.registered(app)
|
|
@@ -2,11 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require 'legion/logging/helper'
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
require 'legion/extensions/llm/responses/thinking_extractor'
|
|
7
|
-
rescue LoadError
|
|
8
|
-
nil
|
|
9
|
-
end
|
|
5
|
+
require 'legion/extensions/llm/responses/thinking_extractor'
|
|
10
6
|
|
|
11
7
|
module Legion
|
|
12
8
|
module LLM
|
|
@@ -37,7 +33,7 @@ module Legion
|
|
|
37
33
|
@content = extracted[:result].to_s
|
|
38
34
|
@model = result_hash[:model]
|
|
39
35
|
@metadata = extracted[:metadata] || {}
|
|
40
|
-
@tool_calls = result_hash[:tool_calls]
|
|
36
|
+
@tool_calls = self.class.coerce_tool_calls(result_hash[:tool_calls])
|
|
41
37
|
@stop_reason = result_hash[:stop_reason]
|
|
42
38
|
@thinking = extracted[:thinking]
|
|
43
39
|
usage = self.class.coerce_usage(result_hash[:usage])
|
|
@@ -73,7 +69,7 @@ module Legion
|
|
|
73
69
|
cache_write_tokens: raw.respond_to?(:cache_creation_tokens) ? raw.cache_creation_tokens.to_i : 0
|
|
74
70
|
),
|
|
75
71
|
metadata: raw.respond_to?(:metadata) && raw.metadata.is_a?(Hash) ? raw.metadata : {},
|
|
76
|
-
tool_calls: raw.respond_to?(:tool_calls) ? raw.tool_calls : [],
|
|
72
|
+
tool_calls: raw.respond_to?(:tool_calls) ? coerce_tool_calls(raw.tool_calls) : [],
|
|
77
73
|
stop_reason: raw.respond_to?(:stop_reason) ? raw.stop_reason : nil,
|
|
78
74
|
thinking: raw.respond_to?(:thinking) ? raw.thinking : nil
|
|
79
75
|
}.compact
|
|
@@ -107,6 +103,29 @@ module Legion
|
|
|
107
103
|
)
|
|
108
104
|
end
|
|
109
105
|
|
|
106
|
+
def self.coerce_tool_calls(raw)
|
|
107
|
+
return [] if raw.nil?
|
|
108
|
+
return raw if raw.is_a?(Array)
|
|
109
|
+
|
|
110
|
+
return raw.values.filter_map { |entry| coerce_single_tool_call(entry) } if raw.is_a?(Hash) && !single_tool_call_hash?(raw)
|
|
111
|
+
|
|
112
|
+
[coerce_single_tool_call(raw)].compact
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def self.single_tool_call_hash?(hash)
|
|
116
|
+
hash.key?(:name) || hash.key?('name') || hash.key?(:function) || hash.key?('function')
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def self.coerce_single_tool_call(entry)
|
|
120
|
+
if entry.respond_to?(:id) && entry.respond_to?(:name)
|
|
121
|
+
return { id: entry.id, name: entry.name, arguments: entry.respond_to?(:arguments) ? entry.arguments : {} }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
return entry if entry.is_a?(Hash)
|
|
125
|
+
|
|
126
|
+
nil
|
|
127
|
+
end
|
|
128
|
+
|
|
110
129
|
def self.merge_thinking_payloads(existing, extracted)
|
|
111
130
|
return existing || extracted unless existing && extracted
|
|
112
131
|
|
|
@@ -231,6 +250,14 @@ module Legion
|
|
|
231
250
|
ext = Registry.for(provider, instance: instance)
|
|
232
251
|
return ext if ext
|
|
233
252
|
|
|
253
|
+
if instance && instance.to_s != 'default'
|
|
254
|
+
ext = Registry.for(provider, instance: :default)
|
|
255
|
+
if ext
|
|
256
|
+
log.warn("[llm][native] instance_fallback provider=#{provider} requested=#{instance} using=default")
|
|
257
|
+
return ext
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
234
261
|
instance_suffix = instance ? "/#{instance}" : ''
|
|
235
262
|
log.error("[llm][native] provider_not_registered provider=#{provider}#{instance_suffix}")
|
|
236
263
|
raise Legion::LLM::ProviderError,
|
|
@@ -277,7 +304,6 @@ module Legion
|
|
|
277
304
|
|
|
278
305
|
tool_calls = normalize_tool_calls(raw[:tool_calls] || raw['tool_calls'] || raw[:tools] || raw['tools'] || result)
|
|
279
306
|
stop_reason = raw[:stop_reason] || raw['stop_reason'] || (tool_calls.any? ? :tool_use : nil)
|
|
280
|
-
|
|
281
307
|
{
|
|
282
308
|
result: result,
|
|
283
309
|
model: raw[:model] || raw['model'],
|