RubyGems - legion-llm - Versions diffs - 0.9.52 → 0.9.54 - Mend

legion-llm 0.9.52 → 0.9.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -0
data/lib/legion/llm/api/auth.rb +7 -3
data/lib/legion/llm/api/openai/chat_completions.rb +19 -6
data/lib/legion/llm/api/openai/embeddings.rb +11 -4
data/lib/legion/llm/api/openai/models.rb +9 -4
data/lib/legion/llm/api/translators/openai_response.rb +4 -2
data/lib/legion/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '08d2a4d13f38bdf0c305da0965e688c317deffdad792796daf66f0168bddd37a'
-  data.tar.gz: 9c36b44103590f45b9947fee8c3dfaf9651fcec57c193b7c7b4dbfe839e57aed
+  metadata.gz: '09bf7eb9fe4c93ccba0bb574864c66a518225327399b47a30dd08aa148ac3b74'
+  data.tar.gz: a54854addf081e387d94ed9fd3ba67f826df9b64ec3b042b2767a7e5a441d715
 SHA512:
-  metadata.gz: 5fec699d874dff0e91c83735becbfb825657c1d9da13e7034c8e6a843dba481e85a30c7975c959282ca8da5e9140eeeff92489a3e32fdad164685d33f83f9e42
-  data.tar.gz: 3e149f5061993af9a16f4644b83385a0c3a60fcaa394a2454c88cdecd99890d4fd083c08491fe22715739507875bb544b61747947d2805cc198ed72f9cabde3c
+  metadata.gz: 69fa173952297d7da6410c101c9b9e13548514db85075188fdef153d0f9340f50c314e3f633432e5481c5a4d979468ea84dc3b24edff84d4d728a6d7df7f94c0
+  data.tar.gz: 52852f542515ec121bf1d9851506c3866e70cae5a551f034777f15d9c68fc7886888eb6d9c5cb9a9f58e18016d84d92d4dd010a58cc7c312b2c683bfb477815d

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,16 @@
 # Legion LLM Changelog
+## [0.9.54] - 2026-05-29
+### Fixed
+- API: OpenAI-compatible streaming responses now include `usage` (`prompt_tokens`, `completion_tokens`, `total_tokens`) in the final done-chunk, allowing the Vercel AI SDK `step-finish` event to propagate token counts to clients (fixes empty Tokens display in Kai's response info popup)
+## [0.9.53] - 2026-05-29
+### Added
+- API: OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`, `/v1/responses`) are now also available under the `/api/llm/inference/v1/` prefix, allowing Mastra `openai-compatible` providers to use `http://127.0.0.1:4567/api/llm/inference` as the base URL — consistent with the Claude and Codex client routing patches in legion-interlink
+- API: auth `before` filter extended to cover `/api/llm/inference/v1/*` in addition to `/v1/*`
 ## [0.9.52] - 2026-05-27
 ### Fixed

data/lib/legion/llm/api/auth.rb CHANGED Viewed

@@ -9,9 +9,9 @@ module Legion
         extend Legion::Logging::Helper
         def self.registered(app)
-          log.debug('[llm][api][auth] registering /v1/* before filter')
+          log.debug('[llm][api][auth] registering /v1/* and /api/llm/inference/v1/* before filters')
-          app.before '/v1/*' do
+          auth_check = proc do
             log.debug("[llm][api][auth] before filter action=check path=#{request.path_info}")
             next unless auth_enabled?
@@ -27,6 +27,10 @@ module Legion
             log.debug("[llm][api][auth] action=authorized path=#{request.path_info}")
           end
+          app.before('/api/llm/inference/v1/*', &auth_check)
+          app.before('/v1/*', &auth_check)
           app.helpers do
             define_method(:auth_enabled?) do
               Legion::LLM::Settings.value(:api, :auth, :enabled) == true
@@ -53,7 +57,7 @@ module Legion
             end
           end
-          log.debug('[llm][api][auth] /v1/* before filter registered')
+          log.debug('[llm][api][auth] /v1/* and /api/llm/inference/v1/* before filters registered')
         rescue StandardError => e
           handle_exception(e, level: :error, handled: false, operation: 'llm.api.auth.register')
         end

data/lib/legion/llm/api/openai/chat_completions.rb CHANGED Viewed

@@ -11,10 +11,19 @@ module Legion
         module ChatCompletions
           extend Legion::Logging::Helper
-          def self.registered(app) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
-            log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions')
+          def self.registered(app)
+            log.debug('[llm][api][openai][chat_completions] registering POST /v1/chat/completions + /api/llm/inference/v1/chat/completions')
-            app.post '/v1/chat/completions' do # rubocop:disable Metrics/BlockLength
+            handler = build_handler
+            app.post('/v1/chat/completions') { instance_exec(&handler) }
+            app.post('/api/llm/inference/v1/chat/completions') { instance_exec(&handler) }
+            log.debug('[llm][api][openai][chat_completions] routes registered')
+          end
+          def self.build_handler # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
+            proc do # rubocop:disable Metrics/BlockLength
               require_llm!
               body = parse_request_body
@@ -82,7 +91,13 @@ module Legion
                     nil,
                     model:         final_model,
                     request_id:    request_id,
-                    finish_reason: tool_calls.empty? ? 'stop' : 'tool_calls'
+                    finish_reason: tool_calls.empty? ? 'stop' : 'tool_calls',
+                    usage:         {
+                      prompt_tokens:     Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :input),
+                      completion_tokens: Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :output),
+                      total_tokens:      Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :input).to_i +
+                                         Legion::LLM::API::Translators::OpenAIResponse.extract_token_count(pipeline_response.tokens, :output).to_i
+                    }
                   )
                   out << "data: #{Legion::JSON.dump(done_chunk)}\n\n"
                   out << "data: [DONE]\n\n"
@@ -121,8 +136,6 @@ module Legion
               halt 500, { 'Content-Type' => 'application/json' },
                    Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
             end
-            log.debug('[llm][api][openai][chat_completions] POST /v1/chat/completions registered')
           end
           def self.build_openai_tool_classes(tools)

data/lib/legion/llm/api/openai/embeddings.rb CHANGED Viewed

@@ -10,9 +10,18 @@ module Legion
           extend Legion::Logging::Helper
           def self.registered(app)
-            log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings')
+            log.debug('[llm][api][openai][embeddings] registering POST /v1/embeddings + /api/llm/inference/v1/embeddings')
-            app.post '/v1/embeddings' do
+            handler = build_handler
+            app.post('/v1/embeddings') { instance_exec(&handler) }
+            app.post('/api/llm/inference/v1/embeddings') { instance_exec(&handler) }
+            log.debug('[llm][api][openai][embeddings] routes registered')
+          end
+          def self.build_handler
+            proc do
               require_llm!
               body = parse_request_body
@@ -57,8 +66,6 @@ module Legion
               halt 500, { 'Content-Type' => 'application/json' },
                    Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
             end
-            log.debug('[llm][api][openai][embeddings] POST /v1/embeddings registered')
           end
         end
       end

data/lib/legion/llm/api/openai/models.rb CHANGED Viewed

@@ -11,9 +11,9 @@ module Legion
           extend Legion::Logging::Helper
           def self.registered(app)
-            log.debug('[llm][api][openai][models] registering GET /v1/models and GET /v1/models/:id')
+            log.debug('[llm][api][openai][models] registering GET /v1/models + /api/llm/inference/v1/models routes')
-            app.get '/v1/models' do
+            list_handler = proc do
               log.debug('[llm][api][openai][models] action=list')
               require_llm!
@@ -28,7 +28,7 @@ module Legion
                    Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
             end
-            app.get '/v1/models/:id' do
+            get_handler = proc do
               model_id = params[:id]
               log.debug("[llm][api][openai][models] action=get id=#{model_id}")
               require_llm!
@@ -52,7 +52,12 @@ module Legion
                    Legion::JSON.dump({ error: { message: e.message, type: 'server_error' } })
             end
-            log.debug('[llm][api][openai][models] GET /v1/models routes registered')
+            app.get('/v1/models') { instance_exec(&list_handler) }
+            app.get('/api/llm/inference/v1/models') { instance_exec(&list_handler) }
+            app.get('/v1/models/:id') { instance_exec(&get_handler) }
+            app.get('/api/llm/inference/v1/models/:id') { instance_exec(&get_handler) }
+            log.debug('[llm][api][openai][models] routes registered')
           end
           def self.build_model_list

data/lib/legion/llm/api/translators/openai_response.rb CHANGED Viewed

@@ -57,17 +57,19 @@ module Legion
             }
           end
-          def format_stream_chunk(delta_text, model:, request_id:, finish_reason: nil)
+          def format_stream_chunk(delta_text, model:, request_id:, finish_reason: nil, usage: nil)
             choice = { index: 0, delta: {}, finish_reason: finish_reason }
             choice[:delta][:content] = delta_text if delta_text && !delta_text.empty?
-            {
+            chunk = {
               id:      "chatcmpl-#{request_id.delete('-')}",
               object:  'chat.completion.chunk',
               created: Time.now.to_i,
               model:   model.to_s,
               choices: [choice]
             }
+            chunk[:usage] = usage if usage
+            chunk
           end
           def format_stream_tool_call_chunk(tool_call, model:, request_id:, index:)

data/lib/legion/llm/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Legion
   module LLM
-    VERSION = '0.9.52'
+    VERSION = '0.9.54'
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legion-llm
 version: !ruby/object:Gem::Version
-  version: 0.9.52
+  version: 0.9.54
 platform: ruby
 authors:
 - Esity