legion-llm 0.5.14 → 0.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/CLAUDE.md +2 -2
- data/README.md +2 -2
- data/lib/legion/llm/conversation_store.rb +8 -3
- data/lib/legion/llm/routes.rb +388 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +11 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 42d72ed366b2266f182b9a2e950d02c735956ff0b3ee51f712ed4686370b7274
|
|
4
|
+
data.tar.gz: 9af0c1f6f15ecea5d029868fde0a51305307a5de14391cf31e7c85307cccdee0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 844583a7565f8bbc167f12330b51c32ba57b74802b23e624158f1afbaa3020dffd25e7324ad4ec19fe2ad0eaf2dccdfb6b6d8c673f06805f0b12f2613ac5f6f7
|
|
7
|
+
data.tar.gz: 26ec627a507e4e8d14e8a9e6155bb852c44d36af9e44b60f02dc247e1d307801bffbdc486a54745894430453c1fde22902662b7253feb8d5c841b92971aa2326
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.15] - 2026-03-28
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Legion::LLM::Routes` Sinatra extension module (`lib/legion/llm/routes.rb`): contains all `/api/llm/*` route definitions (chat, inference, providers) extracted from `LegionIO/lib/legion/api/llm.rb`. Self-registers with `Legion::API.register_library_routes('llm', Legion::LLM::Routes)` at the end of `Legion::LLM.start`.
|
|
7
|
+
|
|
8
|
+
### Changed
|
|
9
|
+
- `Legion::LLM.start` now calls `register_routes` after setting `@started = true`, mounting routes onto the API if `Legion::API` is available.
|
|
10
|
+
|
|
3
11
|
## [0.5.14] - 2026-03-27
|
|
4
12
|
|
|
5
13
|
### Added
|
data/CLAUDE.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
Core LegionIO gem providing LLM capabilities to all extensions. Wraps ruby_llm to provide a consistent interface for chat, embeddings, tool use, and agents across multiple providers (Bedrock, Anthropic, OpenAI, Gemini, Ollama). Includes a dynamic weighted routing engine that dispatches requests across local, fleet, and cloud tiers based on caller intent, priority rules, time schedules, cost multipliers, and real-time provider health.
|
|
9
9
|
|
|
10
10
|
**GitHub**: https://github.com/LegionIO/legion-llm
|
|
11
|
-
**Version**: 0.5.
|
|
11
|
+
**Version**: 0.5.15
|
|
12
12
|
**License**: Apache-2.0
|
|
13
13
|
|
|
14
14
|
## Architecture
|
|
@@ -325,7 +325,7 @@ In-memory signal consumer with pluggable handlers. Adjusts effective priorities
|
|
|
325
325
|
| `lib/legion/llm/structured_output.rb` | JSON schema enforcement with native response_format and prompt fallback |
|
|
326
326
|
| `lib/legion/llm/errors.rb` | Typed error hierarchy: LLMError base + AuthError, RateLimitError, ContextOverflow, ProviderError, ProviderDown, UnsupportedCapability, PipelineError |
|
|
327
327
|
| `lib/legion/llm/conversation_store.rb` | ConversationStore: in-memory LRU (256 slots) + optional Sequel DB persistence + spool fallback |
|
|
328
|
-
| `lib/legion/llm/version.rb` | Version constant
|
|
328
|
+
| `lib/legion/llm/version.rb` | Version constant |
|
|
329
329
|
| `lib/legion/llm/quality_checker.rb` | QualityChecker module with QualityResult struct |
|
|
330
330
|
| `lib/legion/llm/escalation_history.rb` | EscalationHistory mixin: `escalation_history`, `escalated?`, `final_resolution`, `escalation_chain` |
|
|
331
331
|
| `lib/legion/llm/router/escalation_chain.rb` | EscalationChain value object |
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
LLM integration for the [LegionIO](https://github.com/LegionIO/LegionIO) framework. Wraps [ruby_llm](https://github.com/crmne/ruby_llm) to provide chat, embeddings, tool use, and agent capabilities to any Legion extension.
|
|
4
4
|
|
|
5
|
-
**Version**: 0.5.
|
|
5
|
+
**Version**: 0.5.15
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -652,7 +652,7 @@ bundle exec rspec
|
|
|
652
652
|
Tests use stubbed `Legion::Logging` and `Legion::Settings` modules (no need for the full LegionIO stack):
|
|
653
653
|
|
|
654
654
|
```bash
|
|
655
|
-
bundle exec rspec # Run all
|
|
655
|
+
bundle exec rspec # Run all tests
|
|
656
656
|
bundle exec rubocop # Lint (0 offenses)
|
|
657
657
|
bundle exec rspec spec/legion/llm_spec.rb # Run specific test file
|
|
658
658
|
bundle exec rspec spec/legion/llm/router_spec.rb # Router tests only
|
|
@@ -26,7 +26,7 @@ module Legion
|
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
def create_conversation(conversation_id, **metadata)
|
|
29
|
-
conversations[conversation_id] = { messages: [], metadata: metadata,
|
|
29
|
+
conversations[conversation_id] = { messages: [], metadata: metadata, lru_tick: next_tick }
|
|
30
30
|
evict_if_needed
|
|
31
31
|
persist_conversation(conversation_id, metadata)
|
|
32
32
|
end
|
|
@@ -41,6 +41,7 @@ module Legion
|
|
|
41
41
|
|
|
42
42
|
def reset!
|
|
43
43
|
@conversations = {}
|
|
44
|
+
@lru_counter = 0
|
|
44
45
|
end
|
|
45
46
|
|
|
46
47
|
private
|
|
@@ -49,6 +50,10 @@ module Legion
|
|
|
49
50
|
@conversations ||= {}
|
|
50
51
|
end
|
|
51
52
|
|
|
53
|
+
def next_tick
|
|
54
|
+
@lru_counter = (@lru_counter || 0) + 1
|
|
55
|
+
end
|
|
56
|
+
|
|
52
57
|
def ensure_conversation(conversation_id)
|
|
53
58
|
return if in_memory?(conversation_id)
|
|
54
59
|
|
|
@@ -63,13 +68,13 @@ module Legion
|
|
|
63
68
|
def touch(conversation_id)
|
|
64
69
|
return unless in_memory?(conversation_id)
|
|
65
70
|
|
|
66
|
-
conversations[conversation_id][:
|
|
71
|
+
conversations[conversation_id][:lru_tick] = next_tick
|
|
67
72
|
end
|
|
68
73
|
|
|
69
74
|
def evict_if_needed
|
|
70
75
|
return unless conversations.size > self::MAX_CONVERSATIONS
|
|
71
76
|
|
|
72
|
-
oldest_id = conversations.min_by { |_, v| v[:
|
|
77
|
+
oldest_id = conversations.min_by { |_, v| v[:lru_tick] }&.first
|
|
73
78
|
conversations.delete(oldest_id) if oldest_id
|
|
74
79
|
end
|
|
75
80
|
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Self-registering route module for legion-llm.
|
|
4
|
+
# All routes previously defined in LegionIO/lib/legion/api/llm.rb now live here
|
|
5
|
+
# and are mounted via Legion::API.register_library_routes when legion-llm boots.
|
|
6
|
+
#
|
|
7
|
+
# LegionIO/lib/legion/api/llm.rb is preserved for backward compatibility but guards
|
|
8
|
+
# its registration with defined?(Legion::LLM::Routes) so double-registration is avoided.
|
|
9
|
+
|
|
10
|
+
require 'securerandom'
|
|
11
|
+
|
|
12
|
+
module Legion
|
|
13
|
+
module LLM
|
|
14
|
+
module Routes
|
|
15
|
+
def self.registered(app) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize,Metrics/MethodLength
|
|
16
|
+
app.helpers do # rubocop:disable Metrics/BlockLength
|
|
17
|
+
# Minimal fallback implementations of shared API helpers.
|
|
18
|
+
# These are used when Legion::LLM::Routes is mounted on a bare Sinatra app.
|
|
19
|
+
# When mounted via Legion::API (the normal path), Legion::API::Helpers and
|
|
20
|
+
# Legion::API::Validators provide full implementations that take precedence.
|
|
21
|
+
unless method_defined?(:parse_request_body)
|
|
22
|
+
define_method(:parse_request_body) do
|
|
23
|
+
raw = request.body.read
|
|
24
|
+
return {} if raw.nil? || raw.empty?
|
|
25
|
+
|
|
26
|
+
begin
|
|
27
|
+
parsed = Legion::JSON.load(raw)
|
|
28
|
+
rescue StandardError
|
|
29
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
30
|
+
Legion::JSON.dump({ error: { code: 'invalid_json', message: 'request body is not valid JSON' } })
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
unless parsed.respond_to?(:transform_keys)
|
|
34
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
35
|
+
Legion::JSON.dump({ error: { code: 'invalid_request_body',
|
|
36
|
+
message: 'request body must be a JSON object' } })
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
parsed.transform_keys(&:to_sym)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
unless method_defined?(:validate_required!)
|
|
44
|
+
define_method(:validate_required!) do |body, *keys|
|
|
45
|
+
missing = keys.select { |k| body[k].nil? || (body[k].respond_to?(:empty?) && body[k].empty?) }
|
|
46
|
+
return if missing.empty?
|
|
47
|
+
|
|
48
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
49
|
+
Legion::JSON.dump({ error: { code: 'missing_fields',
|
|
50
|
+
message: "required: #{missing.join(', ')}" } })
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
unless method_defined?(:json_response)
|
|
55
|
+
define_method(:json_response) do |data, status_code: 200|
|
|
56
|
+
content_type :json
|
|
57
|
+
status status_code
|
|
58
|
+
Legion::JSON.dump({ data: data })
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
unless method_defined?(:json_error)
|
|
63
|
+
define_method(:json_error) do |code, message, status_code: 400|
|
|
64
|
+
content_type :json
|
|
65
|
+
status status_code
|
|
66
|
+
Legion::JSON.dump({ error: { code: code, message: message } })
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
unless method_defined?(:require_llm!)
|
|
71
|
+
define_method(:require_llm!) do
|
|
72
|
+
return if defined?(Legion::LLM) &&
|
|
73
|
+
Legion::LLM.respond_to?(:started?) &&
|
|
74
|
+
Legion::LLM.started?
|
|
75
|
+
|
|
76
|
+
halt 503, { 'Content-Type' => 'application/json' },
|
|
77
|
+
Legion::JSON.dump({ error: { code: 'llm_unavailable',
|
|
78
|
+
message: 'LLM subsystem is not available' } })
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
unless method_defined?(:cache_available?)
|
|
83
|
+
define_method(:cache_available?) do
|
|
84
|
+
defined?(Legion::Cache) &&
|
|
85
|
+
Legion::Cache.respond_to?(:connected?) &&
|
|
86
|
+
Legion::Cache.connected?
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
unless method_defined?(:gateway_available?)
|
|
91
|
+
define_method(:gateway_available?) do
|
|
92
|
+
defined?(Legion::Extensions::LLM::Gateway::Runners::Inference)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
unless method_defined?(:validate_tools!)
|
|
97
|
+
define_method(:validate_tools!) do |tool_list|
|
|
98
|
+
unless tool_list.is_a?(Array) && tool_list.all? { |t| t.respond_to?(:transform_keys) }
|
|
99
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
100
|
+
Legion::JSON.dump({ error: { code: 'invalid_tools',
|
|
101
|
+
message: 'tools must be an array of objects' } })
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
invalid = tool_list.any? do |t|
|
|
105
|
+
ts = t.transform_keys(&:to_sym)
|
|
106
|
+
ts[:name].to_s.empty?
|
|
107
|
+
end
|
|
108
|
+
return unless invalid
|
|
109
|
+
|
|
110
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
111
|
+
Legion::JSON.dump({ error: { code: 'invalid_tools',
|
|
112
|
+
message: 'each tool must have a non-empty name' } })
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
unless method_defined?(:validate_messages!)
|
|
117
|
+
define_method(:validate_messages!) do |msg_list|
|
|
118
|
+
valid = msg_list.all? do |m|
|
|
119
|
+
next false unless m.respond_to?(:key?) && m.respond_to?(:[])
|
|
120
|
+
|
|
121
|
+
role = m[:role] || m['role']
|
|
122
|
+
content_value = m[:content] || m['content']
|
|
123
|
+
|
|
124
|
+
!role.to_s.empty? &&
|
|
125
|
+
(m.key?(:content) || m.key?('content')) &&
|
|
126
|
+
!content_value.nil? &&
|
|
127
|
+
!(content_value.respond_to?(:empty?) && content_value.empty?)
|
|
128
|
+
end
|
|
129
|
+
return if valid
|
|
130
|
+
|
|
131
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
132
|
+
Legion::JSON.dump({ error: { code: 'invalid_messages',
|
|
133
|
+
message: 'each message must be an object with non-empty role and content' } })
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
register_chat(app)
|
|
139
|
+
register_providers(app)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def self.register_chat(app) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
143
|
+
register_inference(app)
|
|
144
|
+
|
|
145
|
+
app.post '/api/llm/chat' do # rubocop:disable Metrics/BlockLength
|
|
146
|
+
Legion::Logging.debug "API: POST /api/llm/chat params=#{params.keys}" if defined?(Legion::Logging)
|
|
147
|
+
require_llm!
|
|
148
|
+
|
|
149
|
+
body = parse_request_body
|
|
150
|
+
validate_required!(body, :message)
|
|
151
|
+
|
|
152
|
+
message = body[:message]
|
|
153
|
+
|
|
154
|
+
if defined?(Legion::MCP::TierRouter)
|
|
155
|
+
tier_result = Legion::MCP::TierRouter.route(
|
|
156
|
+
intent: message,
|
|
157
|
+
params: body.except(:message, :model, :provider, :request_id),
|
|
158
|
+
context: {}
|
|
159
|
+
)
|
|
160
|
+
if tier_result[:tier]&.zero?
|
|
161
|
+
halt json_response({
|
|
162
|
+
response: tier_result[:response],
|
|
163
|
+
tier: 0,
|
|
164
|
+
latency_ms: tier_result[:latency_ms],
|
|
165
|
+
pattern_confidence: tier_result[:pattern_confidence]
|
|
166
|
+
})
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
request_id = body[:request_id] || SecureRandom.uuid
|
|
171
|
+
model = body[:model]
|
|
172
|
+
provider = body[:provider]
|
|
173
|
+
|
|
174
|
+
if gateway_available?
|
|
175
|
+
ingress_result = Legion::Ingress.run(
|
|
176
|
+
payload: { message: message, model: model, provider: provider,
|
|
177
|
+
request_id: request_id },
|
|
178
|
+
runner_class: 'Legion::Extensions::LLM::Gateway::Runners::Inference',
|
|
179
|
+
function: 'chat',
|
|
180
|
+
source: 'api'
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
unless ingress_result[:success]
|
|
184
|
+
Legion::Logging.error "[api/llm/chat] ingress failed: #{ingress_result}" if defined?(Legion::Logging)
|
|
185
|
+
err = ingress_result[:error] || ingress_result[:status]
|
|
186
|
+
err_code = err.respond_to?(:dig) ? (err[:code] || 'gateway_error') : err.to_s
|
|
187
|
+
err_message = err.respond_to?(:dig) ? (err[:message] || err.to_s) : err.to_s
|
|
188
|
+
halt json_error(err_code, err_message, status_code: 502)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
result = ingress_result[:result]
|
|
192
|
+
|
|
193
|
+
if result.nil?
|
|
194
|
+
Legion::Logging.warn "[api/llm/chat] runner returned nil (status=#{ingress_result[:status]})" if defined?(Legion::Logging)
|
|
195
|
+
halt json_error('empty_result', 'Gateway runner returned no result', status_code: 502)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
if result.is_a?(Hash) && result[:error]
|
|
199
|
+
re = result[:error]
|
|
200
|
+
re_code = re.respond_to?(:dig) ? (re[:code] || 'gateway_error') : re.to_s
|
|
201
|
+
re_message = re.respond_to?(:dig) ? (re[:message] || re.to_s) : re.to_s
|
|
202
|
+
halt json_error(re_code, re_message, status_code: 502)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
response_content = if result.respond_to?(:content)
|
|
206
|
+
result.content
|
|
207
|
+
elsif result.is_a?(Hash)
|
|
208
|
+
result[:response] || result[:content] || result.to_s
|
|
209
|
+
else
|
|
210
|
+
result.to_s
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
meta = { routed_via: 'gateway' }
|
|
214
|
+
meta[:model] = result.model.to_s if result.respond_to?(:model)
|
|
215
|
+
meta[:tokens_in] = result.input_tokens if result.respond_to?(:input_tokens)
|
|
216
|
+
meta[:tokens_out] = result.output_tokens if result.respond_to?(:output_tokens)
|
|
217
|
+
|
|
218
|
+
halt json_response({ response: response_content, meta: meta }, status_code: 201)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
if cache_available? && env['HTTP_X_LEGION_SYNC'] != 'true'
|
|
222
|
+
llm = Legion::LLM
|
|
223
|
+
rc = Legion::LLM::ResponseCache
|
|
224
|
+
rc.init_request(request_id)
|
|
225
|
+
|
|
226
|
+
Thread.new do
|
|
227
|
+
session = llm.chat_direct(model: model, provider: provider)
|
|
228
|
+
response = session.ask(message)
|
|
229
|
+
rc.complete(
|
|
230
|
+
request_id,
|
|
231
|
+
response: response.content,
|
|
232
|
+
meta: {
|
|
233
|
+
model: session.model.to_s,
|
|
234
|
+
tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
|
|
235
|
+
tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
|
|
236
|
+
}
|
|
237
|
+
)
|
|
238
|
+
rescue StandardError => e
|
|
239
|
+
Legion::Logging.error "API POST /api/llm/chat async: #{e.class} — #{e.message}" if defined?(Legion::Logging)
|
|
240
|
+
rc.fail_request(request_id, code: 'llm_error', message: e.message)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
Legion::Logging.info "API: LLM chat request #{request_id} queued async" if defined?(Legion::Logging)
|
|
244
|
+
json_response({ request_id: request_id, poll_key: "llm:#{request_id}:status" },
|
|
245
|
+
status_code: 202)
|
|
246
|
+
else
|
|
247
|
+
session = Legion::LLM.chat(model: model, provider: provider,
|
|
248
|
+
caller: { source: 'api', path: request.path })
|
|
249
|
+
response = session.ask(message)
|
|
250
|
+
Legion::Logging.info "API: LLM chat request #{request_id} completed sync model=#{session.model}" if defined?(Legion::Logging)
|
|
251
|
+
json_response(
|
|
252
|
+
{
|
|
253
|
+
response: response.content,
|
|
254
|
+
meta: {
|
|
255
|
+
model: session.model.to_s,
|
|
256
|
+
tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
|
|
257
|
+
tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
|
|
258
|
+
}
|
|
259
|
+
},
|
|
260
|
+
status_code: 201
|
|
261
|
+
)
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def self.register_inference(app) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
267
|
+
app.post '/api/llm/inference' do # rubocop:disable Metrics/BlockLength
|
|
268
|
+
require_llm!
|
|
269
|
+
body = parse_request_body
|
|
270
|
+
validate_required!(body, :messages)
|
|
271
|
+
|
|
272
|
+
messages = body[:messages]
|
|
273
|
+
raw_tools = body[:tools]
|
|
274
|
+
model = body[:model]
|
|
275
|
+
provider = body[:provider]
|
|
276
|
+
|
|
277
|
+
unless messages.is_a?(Array)
|
|
278
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
279
|
+
Legion::JSON.dump({ error: { code: 'invalid_messages', message: 'messages must be an array' } })
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
validate_messages!(messages)
|
|
283
|
+
|
|
284
|
+
unless raw_tools.nil? || raw_tools.is_a?(Array)
|
|
285
|
+
halt 400, { 'Content-Type' => 'application/json' },
|
|
286
|
+
Legion::JSON.dump({ error: { code: 'invalid_tools', message: 'tools must be an array' } })
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
tools = raw_tools || []
|
|
290
|
+
|
|
291
|
+
session = Legion::LLM.chat(
|
|
292
|
+
model: model,
|
|
293
|
+
provider: provider,
|
|
294
|
+
caller: { source: 'api', path: request.path }
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
unless tools.empty?
|
|
298
|
+
validate_tools!(tools)
|
|
299
|
+
|
|
300
|
+
tool_declarations = tools.map do |t|
|
|
301
|
+
ts = t.respond_to?(:transform_keys) ? t.transform_keys(&:to_sym) : t
|
|
302
|
+
tname = ts[:name].to_s
|
|
303
|
+
tdesc = ts[:description].to_s
|
|
304
|
+
tparams = ts[:parameters] || {}
|
|
305
|
+
Class.new do
|
|
306
|
+
define_singleton_method(:tool_name) { tname }
|
|
307
|
+
define_singleton_method(:description) { tdesc }
|
|
308
|
+
define_singleton_method(:parameters) { tparams }
|
|
309
|
+
define_method(:call) { |**_| raise NotImplementedError, "#{tname} executes client-side only" }
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
session.with_tools(*tool_declarations)
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
last_user = messages.select { |m| (m[:role] || m['role']).to_s == 'user' }.last
|
|
316
|
+
prior_messages = if last_user
|
|
317
|
+
idx = messages.rindex(last_user)
|
|
318
|
+
if idx
|
|
319
|
+
duped = messages.dup
|
|
320
|
+
duped.delete_at(idx)
|
|
321
|
+
duped
|
|
322
|
+
else
|
|
323
|
+
messages
|
|
324
|
+
end
|
|
325
|
+
else
|
|
326
|
+
messages
|
|
327
|
+
end
|
|
328
|
+
prior_messages.each { |m| session.add_message(m) }
|
|
329
|
+
|
|
330
|
+
prompt = (last_user || {})[:content] || (last_user || {})['content'] || ''
|
|
331
|
+
response = session.ask(prompt)
|
|
332
|
+
|
|
333
|
+
tc_list = if response.respond_to?(:tool_calls) && response.tool_calls
|
|
334
|
+
Array(response.tool_calls).map do |tc|
|
|
335
|
+
{
|
|
336
|
+
id: tc.respond_to?(:id) ? tc.id : nil,
|
|
337
|
+
name: tc.respond_to?(:name) ? tc.name : tc.to_s,
|
|
338
|
+
arguments: tc.respond_to?(:arguments) ? tc.arguments : {}
|
|
339
|
+
}
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
json_response({
|
|
344
|
+
content: response.content,
|
|
345
|
+
tool_calls: tc_list,
|
|
346
|
+
stop_reason: response.respond_to?(:stop_reason) ? response.stop_reason : nil,
|
|
347
|
+
model: session.model.to_s,
|
|
348
|
+
input_tokens: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
|
|
349
|
+
output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : nil
|
|
350
|
+
}, status_code: 200)
|
|
351
|
+
rescue StandardError => e
|
|
352
|
+
Legion::Logging.error "[api/llm/inference] #{e.class}: #{e.message}" if defined?(Legion::Logging)
|
|
353
|
+
json_error('inference_error', e.message, status_code: 500)
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def self.register_providers(app)
|
|
358
|
+
app.get '/api/llm/providers' do
|
|
359
|
+
require_llm!
|
|
360
|
+
unless gateway_available? && defined?(Legion::Extensions::LLM::Gateway::Runners::ProviderStats)
|
|
361
|
+
halt json_error('gateway_unavailable', 'LLM gateway is not loaded', status_code: 503)
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
stats = Legion::Extensions::LLM::Gateway::Runners::ProviderStats
|
|
365
|
+
json_response({
|
|
366
|
+
providers: stats.health_report,
|
|
367
|
+
summary: stats.circuit_summary
|
|
368
|
+
})
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
app.get '/api/llm/providers/:name' do
|
|
372
|
+
require_llm!
|
|
373
|
+
unless gateway_available? && defined?(Legion::Extensions::LLM::Gateway::Runners::ProviderStats)
|
|
374
|
+
halt json_error('gateway_unavailable', 'LLM gateway is not loaded', status_code: 503)
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
stats = Legion::Extensions::LLM::Gateway::Runners::ProviderStats
|
|
378
|
+
detail = stats.provider_detail(provider: params[:name])
|
|
379
|
+
json_response(detail)
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
class << self
|
|
384
|
+
private :register_chat, :register_inference, :register_providers
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
end
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -24,6 +24,7 @@ require_relative 'llm/off_peak'
|
|
|
24
24
|
require_relative 'llm/cost_tracker'
|
|
25
25
|
require_relative 'llm/tool_registry'
|
|
26
26
|
require_relative 'llm/override_confidence'
|
|
27
|
+
require_relative 'llm/routes'
|
|
27
28
|
|
|
28
29
|
module Legion
|
|
29
30
|
module LLM
|
|
@@ -51,6 +52,7 @@ module Legion
|
|
|
51
52
|
@started = true
|
|
52
53
|
Legion::Settings[:llm][:connected] = true
|
|
53
54
|
Legion::Logging.info 'Legion::LLM started'
|
|
55
|
+
register_routes
|
|
54
56
|
ping_provider
|
|
55
57
|
end
|
|
56
58
|
|
|
@@ -658,6 +660,15 @@ module Legion
|
|
|
658
660
|
Legion::Logging.warn "LLM ping failed for #{provider}/#{model}: #{e.message}"
|
|
659
661
|
end
|
|
660
662
|
|
|
663
|
+
def register_routes
|
|
664
|
+
return unless defined?(Legion::API) && Legion::API.respond_to?(:register_library_routes)
|
|
665
|
+
|
|
666
|
+
Legion::API.register_library_routes('llm', Legion::LLM::Routes)
|
|
667
|
+
Legion::Logging.debug 'Legion::LLM routes registered with API'
|
|
668
|
+
rescue StandardError => e
|
|
669
|
+
Legion::Logging.warn "Legion::LLM route registration failed: #{e.message}" if defined?(Legion::Logging)
|
|
670
|
+
end
|
|
671
|
+
|
|
661
672
|
def auto_configure_defaults
|
|
662
673
|
settings[:providers].each do |provider, config|
|
|
663
674
|
next unless config&.dig(:enabled)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.15
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -270,6 +270,7 @@ files:
|
|
|
270
270
|
- lib/legion/llm/router/health_tracker.rb
|
|
271
271
|
- lib/legion/llm/router/resolution.rb
|
|
272
272
|
- lib/legion/llm/router/rule.rb
|
|
273
|
+
- lib/legion/llm/routes.rb
|
|
273
274
|
- lib/legion/llm/scheduling.rb
|
|
274
275
|
- lib/legion/llm/settings.rb
|
|
275
276
|
- lib/legion/llm/shadow_eval.rb
|