legion-llm 0.9.52 → 0.9.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/lib/legion/llm/api/auth.rb +7 -3
- data/lib/legion/llm/api/openai/chat_completions.rb +19 -6
- data/lib/legion/llm/api/openai/embeddings.rb +11 -4
- data/lib/legion/llm/api/openai/models.rb +9 -4
- data/lib/legion/llm/api/translators/openai_response.rb +4 -2
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz: '
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '09bf7eb9fe4c93ccba0bb574864c66a518225327399b47a30dd08aa148ac3b74'
|
|
4
|
+
data.tar.gz: a54854addf081e387d94ed9fd3ba67f826df9b64ec3b042b2767a7e5a441d715
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 69fa173952297d7da6410c101c9b9e13548514db85075188fdef153d0f9340f50c314e3f633432e5481c5a4d979468ea84dc3b24edff84d4d728a6d7df7f94c0
|
|
7
|
+
data.tar.gz: 52852f542515ec121bf1d9851506c3866e70cae5a551f034777f15d9c68fc7886888eb6d9c5cb9a9f58e18016d84d92d4dd010a58cc7c312b2c683bfb477815d
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.54] - 2026-05-29
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- API: OpenAI-compatible streaming responses now include `usage` (`prompt_tokens`, `completion_tokens`, `total_tokens`) in the final done-chunk, allowing the Vercel AI SDK `step-finish` event to propagate token counts to clients (fixes empty Tokens display in Kai's response info popup)
|
|
7
|
+
|
|
8
|
+
## [0.9.53] - 2026-05-29
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- API: OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`, `/v1/responses`) are now also available under the `/api/llm/inference/v1/` prefix, allowing Mastra `openai-compatible` providers to use `http://127.0.0.1:4567/api/llm/inference` as the base URL — consistent with the Claude and Codex client routing patches in legion-interlink
|
|
12
|
+
- API: auth `before` filter extended to cover `/api/llm/inference/v1/*` in addition to `/v1/*`
|
|
13
|
+
|
|
3
14
|
## [0.9.52] - 2026-05-27
|
|
4
15
|
|
|
5
16
|
### Fixed
|
data/lib/legion/llm/api/auth.rb
CHANGED
|
@@ -9,9 +9,9 @@ module Legion
|
|
|
9
9
|
extend Legion::Logging::Helper
|
|
10
10
|
|
|
11
11
|
def self.registered(app)
|
|
12
|
-
log.debug('[llm][api][auth] registering /v1/* before
|
|
12
|
+
log.debug('[llm][api][auth] registering /v1/* and /api/llm/inference/v1/* before filters')
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
auth_check = proc do
|
|
15
15
|
log.debug("[llm][api][auth] before filter action=check path=#{request.path_info}")
|
|
16
16
|
next unless auth_enabled?
|
|
17
17
|
|
|
@@ -27,6 +27,10 @@ module Legion
|
|
|
27
27
|
log.debug("[llm][api][auth] action=authorized path=#{request.path_info}")
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
app.before('/api/llm/inference/v1/*', &auth_check)
|
|
31
|
+
|
|
32
|
+
app.before('/v1/*', &auth_check)
|
|
33
|
+
|
|
30
34
|
app.helpers do
|
|
31
35
|
define_method(:auth_enabled?) do
|
|
32
36
|
Legion::LLM::Settings.value(:api, :auth, :enabled) == true
|
|
@@ -53,7 +57,7 @@ module Legion
|
|
|
53
57
|
end
|
|
54
58
|
end
|
|
55
59
|
|
|
56
|
-
log.debug('[llm][api][auth] /v1/* before
|
|
60
|
+
log.debug('[llm][api][auth] /v1/* and /api/llm/inference/v1/* before filters registered')
|
|
57
61
|
rescue StandardError => e
|
|
58
62
|
handle_exception(e, level: :error, handled: false, operation: 'llm.api.auth.register')
|
|
59
63
|
end
|
|
@@ -11,10 +11,19 @@ module Legion
|
|
|
11
11
|
module ChatCompletions
|
|
12
12
|
extend Legion::Logging::Helper
|
|
13
13
|
|
|
14
|
-
def self.registered(app)
|
|
15
|
-
log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions')
|
|
14
|
+
def self.registered(app)
|
|
15
|
+
log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions + /api/llm/inference/v1/chat/completions')
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
handler = build_handler
|
|
18
|
+
|
|
19
|
+
app.post('/v1/chat/completions') { instance_exec(&handler) }
|
|
20
|
+
app.post('/api/llm/inference/v1/chat/completions') { instance_exec(&handler) }
|
|
21
|
+
|
|
22
|
+
log.debug('[llm][api][openai][chat_completions] routes registered')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.build_handler # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
|
26
|
+
proc do # rubocop:disable Metrics/BlockLength
|
|
18
27
|
require_llm!
|
|
19
28
|
body = parse_request_body
|
|
20
29
|
|
|
@@ -82,7 +91,13 @@ module Legion
|
|
|
82
91
|
nil,
|
|
83
92
|
model: final_model,
|
|
84
93
|
request_id: request_id,
|
|
85
|
-
finish_reason: tool_calls.empty? ? 'stop' : 'tool_calls'
|
|
94
|
+
finish_reason: tool_calls.empty? ? 'stop' : 'tool_calls',
|
|
95
|
+
usage: {
|
|
96
|
+
prompt_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :input),
|
|
97
|
+
completion_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :output),
|
|
98
|
+
total_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :input).to_i +
|
|
99
|
+
Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :output).to_i
|
|
100
|
+
}
|
|
86
101
|
)
|
|
87
102
|
out << "data: #{Legion::JSON.dump(done_chunk)}\n\n"
|
|
88
103
|
out << "data: [DONE]\n\n"
|
|
@@ -121,8 +136,6 @@ module Legion
|
|
|
121
136
|
halt 500, { 'Content-Type' => 'application/json' },
|
|
122
137
|
Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
|
|
123
138
|
end
|
|
124
|
-
|
|
125
|
-
log.debug('[llm][api][openai][chat_completions] POST /v1/chat/completions registered')
|
|
126
139
|
end
|
|
127
140
|
|
|
128
141
|
def self.build_openai_tool_classes(tools)
|
|
@@ -10,9 +10,18 @@ module Legion
|
|
|
10
10
|
extend Legion::Logging::Helper
|
|
11
11
|
|
|
12
12
|
def self.registered(app)
|
|
13
|
-
log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings')
|
|
13
|
+
log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings + /api/llm/inference/v1/embeddings')
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
handler = build_handler
|
|
16
|
+
|
|
17
|
+
app.post('/v1/embeddings') { instance_exec(&handler) }
|
|
18
|
+
app.post('/api/llm/inference/v1/embeddings') { instance_exec(&handler) }
|
|
19
|
+
|
|
20
|
+
log.debug('[llm][api][openai][embeddings] routes registered')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.build_handler
|
|
24
|
+
proc do
|
|
16
25
|
require_llm!
|
|
17
26
|
body = parse_request_body
|
|
18
27
|
|
|
@@ -57,8 +66,6 @@ module Legion
|
|
|
57
66
|
halt 500, { 'Content-Type' => 'application/json' },
|
|
58
67
|
Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
|
|
59
68
|
end
|
|
60
|
-
|
|
61
|
-
log.debug('[llm][api][openai][embeddings] POST /v1/embeddings registered')
|
|
62
69
|
end
|
|
63
70
|
end
|
|
64
71
|
end
|
|
@@ -11,9 +11,9 @@ module Legion
|
|
|
11
11
|
extend Legion::Logging::Helper
|
|
12
12
|
|
|
13
13
|
def self.registered(app)
|
|
14
|
-
log.debug('[llm][api][openai][models] registering GET /v1/models
|
|
14
|
+
log.debug('[llm][api][openai][models] registering GET /v1/models + /api/llm/inference/v1/models routes')
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
list_handler = proc do
|
|
17
17
|
log.debug('[llm][api][openai][models] action=list')
|
|
18
18
|
require_llm!
|
|
19
19
|
|
|
@@ -28,7 +28,7 @@ module Legion
|
|
|
28
28
|
Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
get_handler = proc do
|
|
32
32
|
model_id = params[:id]
|
|
33
33
|
log.debug("[llm][api][openai][models] action=get id=#{model_id}")
|
|
34
34
|
require_llm!
|
|
@@ -52,7 +52,12 @@ module Legion
|
|
|
52
52
|
Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
|
|
55
|
+
app.get('/v1/models') { instance_exec(&list_handler) }
|
|
56
|
+
app.get('/api/llm/inference/v1/models') { instance_exec(&list_handler) }
|
|
57
|
+
app.get('/v1/models/:id') { instance_exec(&get_handler) }
|
|
58
|
+
app.get('/api/llm/inference/v1/models/:id') { instance_exec(&get_handler) }
|
|
59
|
+
|
|
60
|
+
log.debug('[llm][api][openai][models] routes registered')
|
|
56
61
|
end
|
|
57
62
|
|
|
58
63
|
def self.build_model_list
|
|
@@ -57,17 +57,19 @@ module Legion
|
|
|
57
57
|
}
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
-
def format_stream_chunk(delta_text, model:, request_id:, finish_reason: nil)
|
|
60
|
+
def format_stream_chunk(delta_text, model:, request_id:, finish_reason: nil, usage: nil)
|
|
61
61
|
choice = { index: 0, delta: {}, finish_reason: finish_reason }
|
|
62
62
|
choice[:delta][:content] = delta_text if delta_text && !delta_text.empty?
|
|
63
63
|
|
|
64
|
-
{
|
|
64
|
+
chunk = {
|
|
65
65
|
id: "chatcmpl-#{request_id.delete('-')}",
|
|
66
66
|
object: 'chat.completion.chunk',
|
|
67
67
|
created: Time.now.to_i,
|
|
68
68
|
model: model.to_s,
|
|
69
69
|
choices: [choice]
|
|
70
70
|
}
|
|
71
|
+
chunk[:usage] = usage if usage
|
|
72
|
+
chunk
|
|
71
73
|
end
|
|
72
74
|
|
|
73
75
|
def format_stream_tool_call_chunk(tool_call, model:, request_id:, index:)
|
data/lib/legion/llm/version.rb
CHANGED