legion-llm 0.9.52 → 0.9.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08d2a4d13f38bdf0c305da0965e688c317deffdad792796daf66f0168bddd37a'
4
- data.tar.gz: 9c36b44103590f45b9947fee8c3dfaf9651fcec57c193b7c7b4dbfe839e57aed
3
+ metadata.gz: '09bf7eb9fe4c93ccba0bb574864c66a518225327399b47a30dd08aa148ac3b74'
4
+ data.tar.gz: a54854addf081e387d94ed9fd3ba67f826df9b64ec3b042b2767a7e5a441d715
5
5
  SHA512:
6
- metadata.gz: 5fec699d874dff0e91c83735becbfb825657c1d9da13e7034c8e6a843dba481e85a30c7975c959282ca8da5e9140eeeff92489a3e32fdad164685d33f83f9e42
7
- data.tar.gz: 3e149f5061993af9a16f4644b83385a0c3a60fcaa394a2454c88cdecd99890d4fd083c08491fe22715739507875bb544b61747947d2805cc198ed72f9cabde3c
6
+ metadata.gz: 69fa173952297d7da6410c101c9b9e13548514db85075188fdef153d0f9340f50c314e3f633432e5481c5a4d979468ea84dc3b24edff84d4d728a6d7df7f94c0
7
+ data.tar.gz: 52852f542515ec121bf1d9851506c3866e70cae5a551f034777f15d9c68fc7886888eb6d9c5cb9a9f58e18016d84d92d4dd010a58cc7c312b2c683bfb477815d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.9.54] - 2026-05-29
4
+
5
+ ### Fixed
6
+ - API: OpenAI-compatible streaming responses now include `usage` (`prompt_tokens`, `completion_tokens`, `total_tokens`) in the final done-chunk, allowing the Vercel AI SDK `step-finish` event to propagate token counts to clients (fixes empty Tokens display in Kai's response info popup)
7
+
8
+ ## [0.9.53] - 2026-05-29
9
+
10
+ ### Added
11
+ - API: OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`, `/v1/responses`) are now also available under the `/api/llm/inference/v1/` prefix, allowing Mastra `openai-compatible` providers to use `http://127.0.0.1:4567/api/llm/inference` as the base URL — consistent with the Claude and Codex client routing patches in legion-interlink
12
+ - API: auth `before` filter extended to cover `/api/llm/inference/v1/*` in addition to `/v1/*`
13
+
3
14
  ## [0.9.52] - 2026-05-27
4
15
 
5
16
  ### Fixed
@@ -9,9 +9,9 @@ module Legion
9
9
  extend Legion::Logging::Helper
10
10
 
11
11
  def self.registered(app)
12
- log.debug('[llm][api][auth] registering /v1/* before filter')
12
+ log.debug('[llm][api][auth] registering /v1/* and /api/llm/inference/v1/* before filters')
13
13
 
14
- app.before '/v1/*' do
14
+ auth_check = proc do
15
15
  log.debug("[llm][api][auth] before filter action=check path=#{request.path_info}")
16
16
  next unless auth_enabled?
17
17
 
@@ -27,6 +27,10 @@ module Legion
27
27
  log.debug("[llm][api][auth] action=authorized path=#{request.path_info}")
28
28
  end
29
29
 
30
+ app.before('/api/llm/inference/v1/*', &auth_check)
31
+
32
+ app.before('/v1/*', &auth_check)
33
+
30
34
  app.helpers do
31
35
  define_method(:auth_enabled?) do
32
36
  Legion::LLM::Settings.value(:api, :auth, :enabled) == true
@@ -53,7 +57,7 @@ module Legion
53
57
  end
54
58
  end
55
59
 
56
- log.debug('[llm][api][auth] /v1/* before filter registered')
60
+ log.debug('[llm][api][auth] /v1/* and /api/llm/inference/v1/* before filters registered')
57
61
  rescue StandardError => e
58
62
  handle_exception(e, level: :error, handled: false, operation: 'llm.api.auth.register')
59
63
  end
@@ -11,10 +11,19 @@ module Legion
11
11
  module ChatCompletions
12
12
  extend Legion::Logging::Helper
13
13
 
14
- def self.registered(app) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
15
- log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions')
14
+ def self.registered(app)
15
+ log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions + /api/llm/inference/v1/chat/completions')
16
16
 
17
- app.post '/v1/chat/completions' do # rubocop:disable Metrics/BlockLength
17
+ handler = build_handler
18
+
19
+ app.post('/v1/chat/completions') { instance_exec(&handler) }
20
+ app.post('/api/llm/inference/v1/chat/completions') { instance_exec(&handler) }
21
+
22
+ log.debug('[llm][api][openai][chat_completions] routes registered')
23
+ end
24
+
25
+ def self.build_handler # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
26
+ proc do # rubocop:disable Metrics/BlockLength
18
27
  require_llm!
19
28
  body = parse_request_body
20
29
 
@@ -82,7 +91,13 @@ module Legion
82
91
  nil,
83
92
  model: final_model,
84
93
  request_id: request_id,
85
- finish_reason: tool_calls.empty? ? 'stop' : 'tool_calls'
94
+ finish_reason: tool_calls.empty? ? 'stop' : 'tool_calls',
95
+ usage: {
96
+ prompt_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :input),
97
+ completion_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :output),
98
+ total_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :input).to_i +
99
+ Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :output).to_i
100
+ }
86
101
  )
87
102
  out << "data: #{Legion::JSON.dump(done_chunk)}\n\n"
88
103
  out << "data: [DONE]\n\n"
@@ -121,8 +136,6 @@ module Legion
121
136
  halt 500, { 'Content-Type' => 'application/json' },
122
137
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
123
138
  end
124
-
125
- log.debug('[llm][api][openai][chat_completions] POST /v1/chat/completions registered')
126
139
  end
127
140
 
128
141
  def self.build_openai_tool_classes(tools)
@@ -10,9 +10,18 @@ module Legion
10
10
  extend Legion::Logging::Helper
11
11
 
12
12
  def self.registered(app)
13
- log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings')
13
+ log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings + /api/llm/inference/v1/embeddings')
14
14
 
15
- app.post '/v1/embeddings' do
15
+ handler = build_handler
16
+
17
+ app.post('/v1/embeddings') { instance_exec(&handler) }
18
+ app.post('/api/llm/inference/v1/embeddings') { instance_exec(&handler) }
19
+
20
+ log.debug('[llm][api][openai][embeddings] routes registered')
21
+ end
22
+
23
+ def self.build_handler
24
+ proc do
16
25
  require_llm!
17
26
  body = parse_request_body
18
27
 
@@ -57,8 +66,6 @@ module Legion
57
66
  halt 500, { 'Content-Type' => 'application/json' },
58
67
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
59
68
  end
60
-
61
- log.debug('[llm][api][openai][embeddings] POST /v1/embeddings registered')
62
69
  end
63
70
  end
64
71
  end
@@ -11,9 +11,9 @@ module Legion
11
11
  extend Legion::Logging::Helper
12
12
 
13
13
  def self.registered(app)
14
- log.debug('[llm][api][openai][models] registering GET /v1/models and GET /v1/models/:id')
14
+ log.debug('[llm][api][openai][models] registering GET /v1/models + /api/llm/inference/v1/models routes')
15
15
 
16
- app.get '/v1/models' do
16
+ list_handler = proc do
17
17
  log.debug('[llm][api][openai][models] action=list')
18
18
  require_llm!
19
19
 
@@ -28,7 +28,7 @@ module Legion
28
28
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
29
29
  end
30
30
 
31
- app.get '/v1/models/:id' do
31
+ get_handler = proc do
32
32
  model_id = params[:id]
33
33
  log.debug("[llm][api][openai][models] action=get id=#{model_id}")
34
34
  require_llm!
@@ -52,7 +52,12 @@ module Legion
52
52
  Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
53
53
  end
54
54
 
55
- log.debug('[llm][api][openai][models] GET /v1/models routes registered')
55
+ app.get('/v1/models') { instance_exec(&list_handler) }
56
+ app.get('/api/llm/inference/v1/models') { instance_exec(&list_handler) }
57
+ app.get('/v1/models/:id') { instance_exec(&get_handler) }
58
+ app.get('/api/llm/inference/v1/models/:id') { instance_exec(&get_handler) }
59
+
60
+ log.debug('[llm][api][openai][models] routes registered')
56
61
  end
57
62
 
58
63
  def self.build_model_list
@@ -57,17 +57,19 @@ module Legion
57
57
  }
58
58
  end
59
59
 
60
- def format_stream_chunk(delta_text, model:, request_id:, finish_reason: nil)
60
+ def format_stream_chunk(delta_text, model:, request_id:, finish_reason: nil, usage: nil)
61
61
  choice = { index: 0, delta: {}, finish_reason: finish_reason }
62
62
  choice[:delta][:content] = delta_text if delta_text && !delta_text.empty?
63
63
 
64
- {
64
+ chunk = {
65
65
  id: "chatcmpl-#{request_id.delete('-')}",
66
66
  object: 'chat.completion.chunk',
67
67
  created: Time.now.to_i,
68
68
  model: model.to_s,
69
69
  choices: [choice]
70
70
  }
71
+ chunk[:usage] = usage if usage
72
+ chunk
71
73
  end
72
74
 
73
75
  def format_stream_tool_call_chunk(tool_call, model:, request_id:, index:)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.9.52'
5
+ VERSION = '0.9.54'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.52
4
+ version: 0.9.54
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity