RubyGems - lex-llm - Versions diffs - 0.1.6 → 0.1.7 - Mend

lex-llm 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +54 -3
data/lib/legion/extensions/llm/stream_accumulator.rb +29 -2
data/lib/legion/extensions/llm/streaming.rb +11 -8
data/lib/legion/extensions/llm/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d7d400d2739542ca417b189fba9d20f468d32ca6b4c1d4864fcd884a21d31577
-  data.tar.gz: 1c0ffee1ed602d77d2a295d2f4e7904abcef1ac284754553c3c5f883a78fa023
+  metadata.gz: c5b58678c0d7021662b2ef38d80932bd373e3c462b9d05e1004dfc880b1e6d6f
+  data.tar.gz: 49a88cc742e128df1bd93882585df89e595c9761194da354af6be93bd4bd4c2e
 SHA512:
-  metadata.gz: 07ea1df46e8469e493b89855d983ef1416d38e6907404eae1502340f37f271a43c2de442825b48dbca538907042e520d85f95316803f8a87b08633edf849685a
-  data.tar.gz: 8ee001e548224a71f050c3224d140d33e652603a9308d6301e539a8f984d8aed922e7c8f8ff3313df4a42b5a693acd2dc896914a71c23e47c42eae90f4a62c9d
+  metadata.gz: 273c724d3b7b2945dea092184c8df80952d6ec0c8c38aefbba966a28de91c43c2862be91ac9e918943a7e2e42b5dde4c7b98826852598c7e82c4dd10bbca26e8
+  data.tar.gz: 68b38d28e88ad07c333ca0f7e94885a3006b1f3fc727f3c2b7206cba5497b42f848927b2d555db5382abac05123b76074572c7ca6834da0de312b2f30fdd3a03

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 # Changelog
+## 0.1.7 - 2026-04-30
+- Add thinking extraction from OpenAI-compatible streaming chunks (reasoning_content, reasoning, think tags)
+- Add stream_usage_supported? opt-in for streaming token usage reporting
+- Add filtered_chunk method to StreamAccumulator for clean thinking/content separation
+- Wrap streaming callback through accumulator filter for proper SSE event routing
 ## 0.1.6 - 2026-04-28
 - Add provider-neutral registry event envelopes for future `llm.registry` offering availability, unavailability, degraded, and heartbeat publishing without persistence.

data/lib/legion/extensions/llm/provider/open_ai_compatible.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Legion
       class Provider
         # Shared OpenAI-compatible HTTP payload and response adapter.
         module OpenAICompatible
+          def stream_usage_supported? = false
           def completion_url = '/v1/chat/completions'
           def stream_url = completion_url
           def models_url = '/v1/models'
@@ -20,7 +21,7 @@ module Legion
           private
           def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
-            {
+            payload = {
               model: model.id,
               messages: format_openai_messages(messages),
               temperature: temperature,
@@ -30,6 +31,8 @@ module Legion
               response_format: openai_response_format(schema),
               reasoning_effort: openai_reasoning_effort(thinking)
             }.compact
+            payload[:stream_options] = { include_usage: true } if stream && stream_usage_supported?
+            payload
           end
           def format_openai_messages(messages)
@@ -116,12 +119,14 @@ module Legion
             choice = Array(body['choices']).first || {}
             message = choice['message'] || {}
             usage = body['usage'] || {}
+            content, thinking = extract_thinking_from_completion(message)
             Legion::Extensions::Llm::Message.new(
               role: :assistant,
-              content: message['content'],
+              content: content,
               model_id: body['model'],
               tool_calls: parse_tool_calls(message['tool_calls']),
+              thinking: thinking,
               input_tokens: usage['prompt_tokens'],
               output_tokens: usage['completion_tokens'],
               reasoning_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
@@ -129,22 +134,68 @@ module Legion
             )
           end
+          def extract_thinking_from_completion(message)
+            reasoning = message['reasoning_content'] || message['reasoning']
+            content = message['content']
+            if reasoning
+              [content, Thinking.build(text: reasoning)]
+            elsif content.is_a?(String) && content.include?('<think>')
+              think_text = content[%r{<think>(.*?)</think>}m, 1]
+              clean = content.gsub(%r{<think>.*?</think>}m, '').strip
+              [clean, Thinking.build(text: think_text)]
+            else
+              [content, nil]
+            end
+          end
           def build_chunk(data)
             choice = Array(data['choices']).first || {}
             delta = choice['delta'] || {}
             usage = data['usage'] || {}
+            content, thinking = extract_thinking_from_chunk(delta)
             Legion::Extensions::Llm::Chunk.new(
               role: :assistant,
-              content: delta['content'],
+              content: content,
               model_id: data['model'],
               tool_calls: parse_tool_calls(delta['tool_calls']),
+              thinking: thinking,
               input_tokens: usage['prompt_tokens'],
               output_tokens: usage['completion_tokens'],
               raw: data
             )
           end
+          def extract_thinking_from_chunk(delta)
+            reasoning = delta['reasoning_content'] || delta['reasoning']
+            content = delta['content']
+            if reasoning
+              [content, Thinking.build(text: reasoning)]
+            elsif content.is_a?(String) && content.include?('<think>')
+              clean, think_text = split_think_tags(content)
+              [clean, Thinking.build(text: think_text)]
+            else
+              [content, nil]
+            end
+          end
+          def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
+            if text.match?(%r{<think>.*</think>}m)
+              thinking = text[%r{<think>(.*?)</think>}m, 1]
+              clean = text.gsub(%r{<think>.*?</think>}m, '').strip
+              [clean.empty? ? nil : clean, thinking]
+            elsif text.start_with?('<think>')
+              [nil, text.delete_prefix('<think>')]
+            elsif text.include?('</think>')
+              parts = text.split('</think>', 2)
+              [parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
+            else
+              [text, nil]
+            end
+          end
           def parse_tool_calls(tool_calls)
             return nil unless tool_calls&.any?

data/lib/legion/extensions/llm/stream_accumulator.rb CHANGED Viewed

@@ -26,12 +26,32 @@ module Legion
           Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
           @model_id ||= chunk.model_id
+          @last_content_delta = +''
+          @last_thinking_delta = +''
           handle_chunk_content(chunk)
           append_thinking_from_chunk(chunk)
           count_tokens chunk
           Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
         end
+        def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
+          has_content = !@last_content_delta.empty?
+          has_thinking = !@last_thinking_delta.empty?
+          has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
+          return nil unless has_content || has_thinking || chunk.tool_call? || has_tokens
+          Chunk.new(
+            role: :assistant,
+            content: has_content ? @last_content_delta : nil,
+            thinking: has_thinking ? Thinking.build(text: @last_thinking_delta) : chunk.thinking,
+            model_id: chunk.model_id,
+            tool_calls: chunk.tool_calls,
+            input_tokens: chunk.input_tokens,
+            output_tokens: chunk.output_tokens,
+            raw: chunk.raw
+          )
+        end
         def to_message(response)
           Message.new(
             role: :assistant,
@@ -137,14 +157,21 @@ module Legion
         def append_text_with_thinking(text)
           content_chunk, thinking_chunk = extract_think_tags(text)
           @content << content_chunk
-          @thinking_text << thinking_chunk if thinking_chunk
+          @last_content_delta << content_chunk
+          return unless thinking_chunk
+          @thinking_text << thinking_chunk
+          @last_thinking_delta << thinking_chunk
         end
         def append_thinking_from_chunk(chunk)
           thinking = chunk.thinking
           return unless thinking
-          @thinking_text << thinking.text.to_s if thinking.text
+          if thinking.text
+            @thinking_text << thinking.text.to_s
+            @last_thinking_delta << thinking.text.to_s
+          end
           @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
         end

data/lib/legion/extensions/llm/streaming.rb CHANGED Viewed

@@ -12,16 +12,11 @@ module Legion
           response = connection.post stream_url, payload do |req|
             req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
+            on_chunk = build_stream_callback(accumulator, block)
             if faraday_1?
-              req.options[:on_data] = handle_stream do |chunk|
-                accumulator.add chunk
-                block.call chunk
-              end
+              req.options[:on_data] = handle_stream(&on_chunk)
             else
-              req.options.on_data = handle_stream do |chunk|
-                accumulator.add chunk
-                block.call chunk
-              end
+              req.options.on_data = handle_stream(&on_chunk)
             end
           end
@@ -30,6 +25,14 @@ module Legion
           message
         end
+        def build_stream_callback(accumulator, block)
+          proc do |chunk|
+            accumulator.add chunk
+            filtered = accumulator.filtered_chunk(chunk)
+            block.call(filtered) if filtered
+          end
+        end
         def handle_stream(&block)
           build_on_data_handler do |data|
             block.call(build_chunk(data)) if data.is_a?(Hash)

data/lib/legion/extensions/llm/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Legion
   module Extensions
     module Llm
-      VERSION = '0.1.6'
+      VERSION = '0.1.7'
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lex-llm
 version: !ruby/object:Gem::Version
-  version: 0.1.6
+  version: 0.1.7
 platform: ruby
 authors:
 - LegionIO