RubyGems - lex-llm - Versions diffs - 0.1.5 → 0.1.7 - Mend

lex-llm 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/README.md +31 -1
data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +54 -3
data/lib/legion/extensions/llm/routing/registry_event.rb +167 -0
data/lib/legion/extensions/llm/stream_accumulator.rb +29 -2
data/lib/legion/extensions/llm/streaming.rb +11 -8
data/lib/legion/extensions/llm/version.rb +1 -1
data/lib/legion/extensions/llm.rb +2 -0
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: aae636ae2e90a5bbbf5b11ba40ae3bd21ab628fa7754e0b9c78539f2535d03fc
-  data.tar.gz: 79a95d21375a4da155f768f8696d408917a8c37391ec5c986afce9dde2033f08
+  metadata.gz: c5b58678c0d7021662b2ef38d80932bd373e3c462b9d05e1004dfc880b1e6d6f
+  data.tar.gz: 49a88cc742e128df1bd93882585df89e595c9761194da354af6be93bd4bd4c2e
 SHA512:
-  metadata.gz: 88bd2debf160491c93dbd275d332a4563f7d607142105d631114702b68cab98da103ce8b4e1b5a70f0da1aef3bc1727b384e2a1b1340bac3fe3079939e85377f
-  data.tar.gz: 49055e0945460d46444536b0fee9e1fabcba640a97f57b84486e75e5c014d765aaa1c7c33e6f67ae455fb43143b32a3fec80e0c507e20da5b29f34e714cd6d82
+  metadata.gz: 273c724d3b7b2945dea092184c8df80952d6ec0c8c38aefbba966a28de91c43c2862be91ac9e918943a7e2e42b5dde4c7b98826852598c7e82c4dd10bbca26e8
+  data.tar.gz: 68b38d28e88ad07c333ca0f7e94885a3006b1f3fc727f3c2b7206cba5497b42f848927b2d555db5382abac05123b76074572c7ca6834da0de312b2f30fdd3a03

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Changelog
+## 0.1.7 - 2026-04-30
+- Add thinking extraction from OpenAI-compatible streaming chunks (reasoning_content, reasoning, think tags)
+- Add stream_usage_supported? opt-in for streaming token usage reporting
+- Add filtered_chunk method to StreamAccumulator for clean thinking/content separation
+- Wrap streaming callback through accumulator filter for proper SSE event routing
+## 0.1.6 - 2026-04-28
+- Add provider-neutral registry event envelopes for future `llm.registry` offering availability, unavailability, degraded, and heartbeat publishing without persistence.
+- Sanitize registry offering payloads and reject sensitive runtime, capacity, health, lane, and metadata keys before publication.
 ## 0.1.5 - 2026-04-28
 - Add the expanded provider-neutral model offering contract with offering IDs, provider instances, canonical model aliases, model families, and routing metadata.

data/README.md CHANGED Viewed

@@ -48,7 +48,7 @@ gem 'lex-llm'
 Provider extensions should declare `lex-llm` as a gemspec dependency:
 ```ruby
-spec.add_dependency 'lex-llm', '>= 0.1.5'
+spec.add_dependency 'lex-llm', '>= 0.1.6'
 ```
 For local development across LegionIO repos, prefer a local path override in the app or test `Gemfile`, not a permanent git dependency in the gemspec.
@@ -171,6 +171,36 @@ registry.filter(
 )
 ```
+## Registry Events
+`Legion::Extensions::Llm::Routing::RegistryEvent` builds dependency-light envelopes for future `llm.registry` publishing. It does not persist registry state or publish messages by itself.
+```ruby
+event = Legion::Extensions::Llm::Routing::RegistryEvent.available(
+  offering,
+  runtime: { host_id: 'macbook-m4-max', process: { pid: 12_345 } },
+  capacity: { concurrency: 4, queued: 0 },
+  health: { ready: true, latency_ms: 180 },
+  lane: offering.lane_key,
+  metadata: { observed_by: :lex_llm_ollama }
+)
+event.to_h
+# => {
+#      event_id: "...",
+#      event_type: :offering_available,
+#      occurred_at: "2026-04-28T14:30:15.123456Z",
+#      offering: { ... },
+#      runtime: { host_id: "macbook-m4-max", process: { pid: 12345 } },
+#      capacity: { concurrency: 4, queued: 0 },
+#      health: { ready: true, latency_ms: 180 },
+#      lane: "llm.fleet.inference.qwen3-6-27b-q4-k-m.ctx32768",
+#      metadata: { observed_by: :lex_llm_ollama }
+#    }
+```
+Supported event types are `:offering_available`, `:offering_unavailable`, `:offering_degraded`, and `:offering_heartbeat`. Event offerings are derived from `ModelOffering#to_h`, with sensitive offering fields removed. Optional `runtime`, `capacity`, `health`, `lane`, and `metadata` values are intended for non-secret operational context and reject sensitive keys such as credentials, tokens, secrets, URLs, endpoint paths, prompts, and reply queues.
 ## Fleet Lanes
 Fleet routing uses shared work lanes derived from model offerings. A lane describes the work required, not the worker that happens to do it.

data/lib/legion/extensions/llm/provider/open_ai_compatible.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Legion
       class Provider
         # Shared OpenAI-compatible HTTP payload and response adapter.
         module OpenAICompatible
+          def stream_usage_supported? = false
           def completion_url = '/v1/chat/completions'
           def stream_url = completion_url
           def models_url = '/v1/models'
@@ -20,7 +21,7 @@ module Legion
           private
           def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
-            {
+            payload = {
               model: model.id,
               messages: format_openai_messages(messages),
               temperature: temperature,
@@ -30,6 +31,8 @@ module Legion
               response_format: openai_response_format(schema),
               reasoning_effort: openai_reasoning_effort(thinking)
             }.compact
+            payload[:stream_options] = { include_usage: true } if stream && stream_usage_supported?
+            payload
           end
           def format_openai_messages(messages)
@@ -116,12 +119,14 @@ module Legion
             choice = Array(body['choices']).first || {}
             message = choice['message'] || {}
             usage = body['usage'] || {}
+            content, thinking = extract_thinking_from_completion(message)
             Legion::Extensions::Llm::Message.new(
               role: :assistant,
-              content: message['content'],
+              content: content,
               model_id: body['model'],
               tool_calls: parse_tool_calls(message['tool_calls']),
+              thinking: thinking,
               input_tokens: usage['prompt_tokens'],
               output_tokens: usage['completion_tokens'],
               reasoning_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens'),
@@ -129,22 +134,68 @@ module Legion
             )
           end
+          def extract_thinking_from_completion(message)
+            reasoning = message['reasoning_content'] || message['reasoning']
+            content = message['content']
+            if reasoning
+              [content, Thinking.build(text: reasoning)]
+            elsif content.is_a?(String) && content.include?('<think>')
+              think_text = content[%r{<think>(.*?)</think>}m, 1]
+              clean = content.gsub(%r{<think>.*?</think>}m, '').strip
+              [clean, Thinking.build(text: think_text)]
+            else
+              [content, nil]
+            end
+          end
           def build_chunk(data)
             choice = Array(data['choices']).first || {}
             delta = choice['delta'] || {}
             usage = data['usage'] || {}
+            content, thinking = extract_thinking_from_chunk(delta)
             Legion::Extensions::Llm::Chunk.new(
               role: :assistant,
-              content: delta['content'],
+              content: content,
               model_id: data['model'],
               tool_calls: parse_tool_calls(delta['tool_calls']),
+              thinking: thinking,
               input_tokens: usage['prompt_tokens'],
               output_tokens: usage['completion_tokens'],
               raw: data
             )
           end
+          def extract_thinking_from_chunk(delta)
+            reasoning = delta['reasoning_content'] || delta['reasoning']
+            content = delta['content']
+            if reasoning
+              [content, Thinking.build(text: reasoning)]
+            elsif content.is_a?(String) && content.include?('<think>')
+              clean, think_text = split_think_tags(content)
+              [clean, Thinking.build(text: think_text)]
+            else
+              [content, nil]
+            end
+          end
+          def split_think_tags(text) # rubocop:disable Metrics/PerceivedComplexity
+            if text.match?(%r{<think>.*</think>}m)
+              thinking = text[%r{<think>(.*?)</think>}m, 1]
+              clean = text.gsub(%r{<think>.*?</think>}m, '').strip
+              [clean.empty? ? nil : clean, thinking]
+            elsif text.start_with?('<think>')
+              [nil, text.delete_prefix('<think>')]
+            elsif text.include?('</think>')
+              parts = text.split('</think>', 2)
+              [parts[1]&.strip.then { |s| s&.empty? ? nil : s }, parts[0]]
+            else
+              [text, nil]
+            end
+          end
           def parse_tool_calls(tool_calls)
             return nil unless tool_calls&.any?

data/lib/legion/extensions/llm/routing/registry_event.rb ADDED Viewed

@@ -0,0 +1,167 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      module Routing
+        # Serializable provider-neutral envelope for future llm.registry publishing.
+        class RegistryEvent
+          EVENT_TYPES = %i[
+            offering_available
+            offering_unavailable
+            offering_degraded
+            offering_heartbeat
+          ].freeze
+          SENSITIVE_KEYS = %i[
+            access_key
+            api_key
+            authorization
+            bearer
+            client_secret
+            credential
+            credentials
+            endpoint
+            endpoint_url
+            password
+            path
+            private_key
+            prompt
+            reply_to
+            secret
+            secrets
+            token
+            url
+          ].freeze
+          attr_reader :event_id, :event_type, :occurred_at, :offering, :runtime, :capacity, :health, :lane, :metadata
+          class << self
+            def available(offering, **attributes)
+              new(event_type: :offering_available, offering:, **attributes)
+            end
+            def unavailable(offering, **attributes)
+              new(event_type: :offering_unavailable, offering:, **attributes)
+            end
+            def degraded(offering, **attributes)
+              new(event_type: :offering_degraded, offering:, **attributes)
+            end
+            def heartbeat(offering, **attributes)
+              new(event_type: :offering_heartbeat, offering:, **attributes)
+            end
+          end
+          def initialize(event_type:, offering:, **attributes)
+            @event_id = normalize_event_id(attributes.fetch(:event_id, SecureRandom.uuid))
+            @event_type = normalize_event_type(event_type)
+            @occurred_at = normalize_time(attributes.fetch(:occurred_at, Time.now.utc))
+            @offering = normalize_offering(offering)
+            @runtime = sanitize_optional_hash(attributes[:runtime], :runtime)
+            @capacity = sanitize_optional_hash(attributes[:capacity], :capacity)
+            @health = sanitize_optional_hash(attributes[:health], :health)
+            @lane = sanitize_optional_value(attributes[:lane], :lane)
+            @metadata = sanitize_optional_hash(attributes[:metadata], :metadata)
+          end
+          def to_h
+            {
+              event_id: event_id,
+              event_type: event_type,
+              occurred_at: occurred_at.utc.iso8601(6),
+              offering: sanitized_offering_hash,
+              runtime: runtime,
+              capacity: capacity,
+              health: health,
+              lane: lane,
+              metadata: metadata
+            }.compact
+          end
+          private
+          def normalize_event_id(value)
+            normalized = value.to_s.strip
+            raise ArgumentError, 'event_id is required' if normalized.empty?
+            normalized
+          end
+          def normalize_event_type(value)
+            normalized = value.to_sym
+            raise ArgumentError, "unsupported registry event type: #{value}" unless EVENT_TYPES.include?(normalized)
+            normalized
+          end
+          def normalize_time(value)
+            return value.utc if value.respond_to?(:utc)
+            Time.parse(value.to_s).utc
+          end
+          def normalize_offering(value)
+            return value if value.is_a?(ModelOffering)
+            ModelOffering.new(value)
+          end
+          def sanitized_offering_hash
+            sanitize_hash(offering.to_h, on_sensitive: :drop)
+          end
+          def sanitize_optional_hash(value, label)
+            return nil if value.nil?
+            sanitize_hash(value.to_h, label:)
+          end
+          def sanitize_optional_value(value, label)
+            return nil if value.nil?
+            return sanitize_hash(value.to_h, label:) if value.respond_to?(:to_h)
+            return value unless value.is_a?(Array)
+            sanitize_array(value, label:, path: [])
+          end
+          def sanitize_hash(hash, label: nil, path: [], on_sensitive: :raise)
+            hash.each_with_object({}) do |(key, value), sanitized|
+              normalized_key = key.to_sym
+              key_path = path + [normalized_key]
+              if sensitive_key?(normalized_key)
+                raise_sensitive_key!(label, key_path) if on_sensitive == :raise
+                next
+              end
+              sanitized[normalized_key] = sanitize_value(value, label:, path: key_path, on_sensitive:)
+            end
+          end
+          def sanitize_array(array, label:, path:, on_sensitive: :raise)
+            array.map { |value| sanitize_value(value, label:, path:, on_sensitive:) }
+          end
+          def sanitize_value(value, label:, path:, on_sensitive:)
+            return sanitize_hash(value, label:, path:, on_sensitive:) if value.is_a?(Hash)
+            return sanitize_array(value, label:, path:, on_sensitive:) if value.is_a?(Array)
+            value
+          end
+          def sensitive_key?(key)
+            normalized = key.to_s.downcase.gsub(/[^a-z0-9]+/, '_').to_sym
+            SENSITIVE_KEYS.include?(normalized) ||
+              normalized.to_s.end_with?('_key', '_secret', '_token', '_password')
+          end
+          def raise_sensitive_key!(label, path)
+            prefix = label ? "#{label} contains" : 'registry event contains'
+            raise ArgumentError, "#{prefix} sensitive key: #{path.join('.')}"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/stream_accumulator.rb CHANGED Viewed

@@ -26,12 +26,32 @@ module Legion
           Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
           @model_id ||= chunk.model_id
+          @last_content_delta = +''
+          @last_thinking_delta = +''
           handle_chunk_content(chunk)
           append_thinking_from_chunk(chunk)
           count_tokens chunk
           Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
         end
+        def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
+          has_content = !@last_content_delta.empty?
+          has_thinking = !@last_thinking_delta.empty?
+          has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
+          return nil unless has_content || has_thinking || chunk.tool_call? || has_tokens
+          Chunk.new(
+            role: :assistant,
+            content: has_content ? @last_content_delta : nil,
+            thinking: has_thinking ? Thinking.build(text: @last_thinking_delta) : chunk.thinking,
+            model_id: chunk.model_id,
+            tool_calls: chunk.tool_calls,
+            input_tokens: chunk.input_tokens,
+            output_tokens: chunk.output_tokens,
+            raw: chunk.raw
+          )
+        end
         def to_message(response)
           Message.new(
             role: :assistant,
@@ -137,14 +157,21 @@ module Legion
         def append_text_with_thinking(text)
           content_chunk, thinking_chunk = extract_think_tags(text)
           @content << content_chunk
-          @thinking_text << thinking_chunk if thinking_chunk
+          @last_content_delta << content_chunk
+          return unless thinking_chunk
+          @thinking_text << thinking_chunk
+          @last_thinking_delta << thinking_chunk
         end
         def append_thinking_from_chunk(chunk)
           thinking = chunk.thinking
           return unless thinking
-          @thinking_text << thinking.text.to_s if thinking.text
+          if thinking.text
+            @thinking_text << thinking.text.to_s
+            @last_thinking_delta << thinking.text.to_s
+          end
           @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
         end

data/lib/legion/extensions/llm/streaming.rb CHANGED Viewed

@@ -12,16 +12,11 @@ module Legion
           response = connection.post stream_url, payload do |req|
             req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
+            on_chunk = build_stream_callback(accumulator, block)
             if faraday_1?
-              req.options[:on_data] = handle_stream do |chunk|
-                accumulator.add chunk
-                block.call chunk
-              end
+              req.options[:on_data] = handle_stream(&on_chunk)
             else
-              req.options.on_data = handle_stream do |chunk|
-                accumulator.add chunk
-                block.call chunk
-              end
+              req.options.on_data = handle_stream(&on_chunk)
             end
           end
@@ -30,6 +25,14 @@ module Legion
           message
         end
+        def build_stream_callback(accumulator, block)
+          proc do |chunk|
+            accumulator.add chunk
+            filtered = accumulator.filtered_chunk(chunk)
+            block.call(filtered) if filtered
+          end
+        end
         def handle_stream(&block)
           build_on_data_handler do |data|
             block.call(build_chunk(data)) if data.is_a?(Hash)

data/lib/legion/extensions/llm/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Legion
   module Extensions
     module Llm
-      VERSION = '0.1.5'
+      VERSION = '0.1.7'
     end
   end
 end

data/lib/legion/extensions/llm.rb CHANGED Viewed

@@ -39,6 +39,7 @@ module Legion
       module Types
         ModelOffering = Routing::ModelOffering unless const_defined?(:ModelOffering, false)
         OfferingRegistry = Routing::OfferingRegistry unless const_defined?(:OfferingRegistry, false)
+        RegistryEvent = Routing::RegistryEvent unless const_defined?(:RegistryEvent, false)
       end
       # Shared routing helpers exposed under the Legion extension namespace.
@@ -46,6 +47,7 @@ module Legion
         LaneKey = ::Legion::Extensions::Llm::Routing::LaneKey unless const_defined?(:LaneKey, false)
         OfferingRegistry = ::Legion::Extensions::Llm::Routing::OfferingRegistry unless const_defined?(:OfferingRegistry,
                                                                                                       false)
+        RegistryEvent = ::Legion::Extensions::Llm::Routing::RegistryEvent unless const_defined?(:RegistryEvent, false)
       end
       class << self

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lex-llm
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.7
 platform: ruby
 authors:
 - LegionIO
@@ -229,6 +229,7 @@ files:
 - lib/legion/extensions/llm/routing/lane_key.rb
 - lib/legion/extensions/llm/routing/model_offering.rb
 - lib/legion/extensions/llm/routing/offering_registry.rb
+- lib/legion/extensions/llm/routing/registry_event.rb
 - lib/legion/extensions/llm/stream_accumulator.rb
 - lib/legion/extensions/llm/streaming.rb
 - lib/legion/extensions/llm/thinking.rb