RubyGems - lex-llm - Versions diffs - 0.1.2 → 0.1.3 - Mend

lex-llm 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

data/lib/legion/extensions/llm/routing/model_offering.rb ADDED Viewed

@@ -0,0 +1,173 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      module Routing
+        # Describes one concrete model made available by one provider instance.
+        class ModelOffering
+          attr_reader :provider_family, :instance_id, :transport, :tier, :model, :usage_type, :capabilities, :limits,
+                      :credentials, :health, :cost, :policy_tags, :metadata
+          def initialize(data)
+            @provider_family = normalize_symbol(fetch_value(data, :provider_family, fetch_value(data, :provider)))
+            @instance_id = normalize_symbol(fetch_value(data, :instance_id, @provider_family))
+            @transport = normalize_symbol(fetch_value(data, :transport, :http))
+            @tier = normalize_symbol(fetch_value(data, :tier, default_tier))
+            @model = fetch_value(data, :model).to_s
+            @usage_type = normalize_usage_type(fetch_value(data, :usage_type,
+                                                           fetch_value(data, :type) ||
+                                                           fetch_value(data, :kind) ||
+                                                           infer_usage_type(data)))
+            @capabilities = normalize_array(fetch_value(data, :capabilities))
+            @limits = normalize_hash(fetch_value(data, :limits))
+            @credentials = fetch_value(data, :credentials)
+            @health = normalize_hash(fetch_value(data, :health))
+            @cost = normalize_hash(fetch_value(data, :cost))
+            @policy_tags = normalize_array(fetch_value(data, :policy_tags)).map(&:to_sym)
+            @metadata = normalize_hash(fetch_value(data, :metadata))
+          end
+          def enabled?
+            !metadata.key?(:enabled) || metadata[:enabled] != false
+          end
+          def embedding?
+            usage_type == :embedding
+          end
+          def inference?
+            %i[chat inference completion].include?(usage_type)
+          end
+          def context_window
+            integer_limit(:context_window) || integer_limit(:max_input_tokens)
+          end
+          def max_output_tokens
+            integer_limit(:max_output_tokens)
+          end
+          def supports?(capability)
+            capabilities.include?(capability.to_sym)
+          end
+          def eligible_for?(usage_type: nil, required_capabilities: [], min_context_window: nil, policy_tags: [])
+            return false unless enabled?
+            return false unless usage_type_matches?(usage_type)
+            return false unless capabilities_match?(required_capabilities)
+            return false unless context_window_matches?(min_context_window)
+            return false unless policy_tags_match?(policy_tags)
+            true
+          end
+          def lane_key(prefix: 'llm.fleet', include_context: true, include_fingerprint: false)
+            LaneKey.for(self, prefix:, include_context:, include_fingerprint:)
+          end
+          def eligibility_fingerprint
+            LaneKey.eligibility_fingerprint(self)
+          end
+          def to_h
+            {
+              provider_family: provider_family,
+              instance_id: instance_id,
+              transport: transport,
+              tier: tier,
+              model: model,
+              usage_type: usage_type,
+              capabilities: capabilities,
+              limits: limits,
+              credentials: credentials,
+              health: health,
+              cost: cost,
+              policy_tags: policy_tags,
+              metadata: metadata
+            }
+          end
+          private
+          def default_tier
+            case @transport
+            when :local
+              :local
+            when :rabbitmq
+              :fleet
+            else
+              :private
+            end
+          end
+          def infer_usage_type(data)
+            capabilities = normalize_array(fetch_value(data, :capabilities))
+            return :embedding if capabilities.include?(:embedding) || capabilities.include?(:embed)
+            :inference
+          end
+          def normalize_usage_type(value)
+            case value.to_sym
+            when :embed, :embeddings
+              :embedding
+            when :completion, :text, :chat
+              :inference
+            else
+              value.to_sym
+            end
+          end
+          def normalize_symbol(value)
+            return nil if value.nil?
+            value.to_sym
+          end
+          def normalize_array(value)
+            Array(value).compact.map(&:to_sym)
+          end
+          def normalize_hash(value)
+            (value || {}).to_h.transform_keys(&:to_sym)
+          end
+          def fetch_value(hash, key, default = nil)
+            return default unless hash.respond_to?(:key?)
+            string_key = key.to_s
+            return hash[string_key] if hash.key?(string_key)
+            hash.key?(key) ? hash[key] : default
+          end
+          def usage_type_matches?(expected)
+            expected.nil? || normalize_usage_type(expected) == usage_type
+          end
+          def capabilities_match?(required)
+            Array(required).all? { |capability| supports?(capability) }
+          end
+          def context_window_matches?(minimum)
+            minimum.nil? || (!!context_window && context_window >= minimum.to_i)
+          end
+          def policy_tags_match?(required)
+            Array(required).all? { |tag| policy_tags.include?(tag.to_sym) }
+          end
+          def integer_limit(key)
+            value = limits[key]
+            return nil if value.nil?
+            Integer(value)
+          rescue ArgumentError, TypeError
+            nil
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/routing.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      # Provider-neutral routing metadata used by Legion LLM provider gems.
+      module Routing
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/stream_accumulator.rb ADDED Viewed

@@ -0,0 +1,209 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      # Assembles streaming responses from LLMs into complete messages.
+      class StreamAccumulator
+        attr_reader :content, :model_id, :tool_calls
+        def initialize
+          @content = +''
+          @thinking_text = +''
+          @thinking_signature = nil
+          @tool_calls = {}
+          @input_tokens = nil
+          @output_tokens = nil
+          @cached_tokens = nil
+          @cache_creation_tokens = nil
+          @thinking_tokens = nil
+          @inside_think_tag = false
+          @pending_think_tag = +''
+          @latest_tool_call_id = nil
+        end
+        def add(chunk)
+          Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
+          @model_id ||= chunk.model_id
+          handle_chunk_content(chunk)
+          append_thinking_from_chunk(chunk)
+          count_tokens chunk
+          Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
+        end
+        def to_message(response)
+          Message.new(
+            role: :assistant,
+            content: content.empty? ? nil : content,
+            thinking: Thinking.build(
+              text: @thinking_text.empty? ? nil : @thinking_text,
+              signature: @thinking_signature
+            ),
+            tokens: Tokens.build(
+              input: @input_tokens,
+              output: @output_tokens,
+              cached: @cached_tokens,
+              cache_creation: @cache_creation_tokens,
+              thinking: @thinking_tokens
+            ),
+            model_id: model_id,
+            tool_calls: tool_calls_from_stream,
+            raw: response
+          )
+        end
+        private
+        def tool_calls_from_stream
+          tool_calls.transform_values do |tc|
+            arguments = if tc.arguments.is_a?(String) && !tc.arguments.empty?
+                          Legion::JSON.parse(tc.arguments, symbolize_names: false)
+                        elsif tc.arguments.is_a?(String)
+                          {}
+                        else
+                          tc.arguments
+                        end
+            ToolCall.new(
+              id: tc.id,
+              name: tc.name,
+              arguments: arguments,
+              thought_signature: tc.thought_signature
+            )
+          end
+        end
+        def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
+          if Legion::Extensions::Llm.config.log_stream_debug
+            Legion::Extensions::Llm.logger.debug { "Accumulating tool calls: #{new_tool_calls}" }
+          end
+          new_tool_calls.each_value do |tool_call|
+            if tool_call.id
+              tool_call_id = tool_call.id.empty? ? SecureRandom.uuid : tool_call.id
+              tool_call_arguments = tool_call.arguments
+              if tool_call_arguments.nil? || (tool_call_arguments.respond_to?(:empty?) && tool_call_arguments.empty?)
+                tool_call_arguments = +''
+              end
+              @tool_calls[tool_call.id] = ToolCall.new(
+                id: tool_call_id,
+                name: tool_call.name,
+                arguments: tool_call_arguments,
+                thought_signature: tool_call.thought_signature
+              )
+              @latest_tool_call_id = tool_call.id
+            else
+              existing = @tool_calls[@latest_tool_call_id]
+              if existing
+                fragment = tool_call.arguments
+                fragment = '' if fragment.nil?
+                existing.arguments << fragment
+                if tool_call.thought_signature && existing.thought_signature.nil?
+                  existing.thought_signature = tool_call.thought_signature
+                end
+              end
+            end
+          end
+        end
+        def find_tool_call(tool_call_id)
+          if tool_call_id.nil?
+            @tool_calls[@latest_tool_call]
+          else
+            @latest_tool_call_id = tool_call_id
+            @tool_calls[tool_call_id]
+          end
+        end
+        def count_tokens(chunk)
+          @input_tokens = chunk.input_tokens if chunk.input_tokens
+          @output_tokens = chunk.output_tokens if chunk.output_tokens
+          @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
+          @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
+          @thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
+        end
+        def handle_chunk_content(chunk)
+          return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
+          content_text = chunk.content || ''
+          if content_text.is_a?(String)
+            append_text_with_thinking(content_text)
+          else
+            @content << content_text.to_s
+          end
+        end
+        def append_text_with_thinking(text)
+          content_chunk, thinking_chunk = extract_think_tags(text)
+          @content << content_chunk
+          @thinking_text << thinking_chunk if thinking_chunk
+        end
+        def append_thinking_from_chunk(chunk)
+          thinking = chunk.thinking
+          return unless thinking
+          @thinking_text << thinking.text.to_s if thinking.text
+          @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
+        end
+        def extract_think_tags(text)
+          start_tag = '<think>'
+          end_tag = '</think>'
+          remaining = @pending_think_tag + text
+          @pending_think_tag = +''
+          output = +''
+          thinking = +''
+          until remaining.empty?
+            remaining = if @inside_think_tag
+                          consume_think_content(remaining, end_tag, thinking)
+                        else
+                          consume_non_think_content(remaining, start_tag, output)
+                        end
+          end
+          [output, thinking.empty? ? nil : thinking]
+        end
+        def consume_think_content(remaining, end_tag, thinking)
+          end_index = remaining.index(end_tag)
+          if end_index
+            thinking << remaining.slice(0, end_index)
+            @inside_think_tag = false
+            remaining.slice((end_index + end_tag.length)..) || +''
+          else
+            suffix_len = longest_suffix_prefix(remaining, end_tag)
+            thinking << remaining.slice(0, remaining.length - suffix_len)
+            @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
+            +''
+          end
+        end
+        def consume_non_think_content(remaining, start_tag, output)
+          start_index = remaining.index(start_tag)
+          if start_index
+            output << remaining.slice(0, start_index)
+            @inside_think_tag = true
+            remaining.slice((start_index + start_tag.length)..) || +''
+          else
+            suffix_len = longest_suffix_prefix(remaining, start_tag)
+            output << remaining.slice(0, remaining.length - suffix_len)
+            @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
+            +''
+          end
+        end
+        def longest_suffix_prefix(text, tag)
+          max = [text.length, tag.length - 1].min
+          max.downto(1) do |len|
+            return len if text.end_with?(tag[0, len])
+          end
+          0
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/streaming.rb ADDED Viewed

@@ -0,0 +1,181 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      # Handles streaming responses from AI providers.
+      module Streaming
+        module_function
+        def stream_response(connection, payload, additional_headers = {}, &block)
+          accumulator = StreamAccumulator.new
+          response = connection.post stream_url, payload do |req|
+            req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
+            if faraday_1?
+              req.options[:on_data] = handle_stream do |chunk|
+                accumulator.add chunk
+                block.call chunk
+              end
+            else
+              req.options.on_data = handle_stream do |chunk|
+                accumulator.add chunk
+                block.call chunk
+              end
+            end
+          end
+          message = accumulator.to_message(response)
+          Legion::Extensions::Llm.logger.debug { "Stream completed: #{message.content}" }
+          message
+        end
+        def handle_stream(&block)
+          build_on_data_handler do |data|
+            block.call(build_chunk(data)) if data.is_a?(Hash)
+          end
+        end
+        private
+        def faraday_1?
+          Faraday::VERSION.start_with?('1')
+        end
+        def build_on_data_handler(&)
+          buffer = +''
+          parser = EventStreamParser::Parser.new
+          FaradayHandlers.build(
+            faraday_v1: faraday_1?,
+            on_chunk: ->(chunk, env) { process_stream_chunk(chunk, parser, env, &) },
+            on_failed_response: ->(chunk, env) { handle_failed_response(chunk, buffer, env) }
+          )
+        end
+        def process_stream_chunk(chunk, parser, env, &)
+          if Legion::Extensions::Llm.config.log_stream_debug
+            Legion::Extensions::Llm.logger.debug { "Received chunk: #{chunk}" }
+          end
+          if error_chunk?(chunk)
+            handle_error_chunk(chunk, env)
+          elsif json_error_payload?(chunk)
+            handle_json_error_chunk(chunk, env)
+          else
+            yield handle_sse(chunk, parser, env, &)
+          end
+        end
+        def error_chunk?(chunk)
+          chunk.start_with?('event: error')
+        end
+        def json_error_payload?(chunk)
+          chunk.lstrip.start_with?('{') && chunk.include?('"error"')
+        end
+        def handle_json_error_chunk(chunk, env)
+          parse_error_from_json(chunk, env, 'Failed to parse JSON error chunk')
+        end
+        def handle_error_chunk(chunk, env)
+          error_data = chunk.split("\n")[1].delete_prefix('data: ')
+          parse_error_from_json(error_data, env, 'Failed to parse error chunk')
+        end
+        def handle_failed_response(chunk, buffer, env)
+          buffer << chunk
+          error_data = Legion::JSON.parse(buffer, symbolize_names: false)
+          handle_parsed_error(error_data, env)
+        rescue Legion::JSON::ParseError
+          Legion::Extensions::Llm.logger.debug { "Accumulating error chunk: #{chunk}" }
+        end
+        def handle_sse(chunk, parser, env, &)
+          parser.feed(chunk) do |type, data|
+            case type.to_sym
+            when :error
+              handle_error_event(data, env)
+            else
+              yield handle_data(data, env, &) unless data == '[DONE]'
+            end
+          end
+        end
+        def handle_data(data, env)
+          parsed = Legion::JSON.parse(data, symbolize_names: false)
+          return parsed unless parsed.is_a?(Hash) && parsed.key?('error')
+          handle_parsed_error(parsed, env)
+        rescue Legion::JSON::ParseError => e
+          Legion::Extensions::Llm.logger.debug { "Failed to parse data chunk: #{e.message}" }
+        end
+        def handle_error_event(data, env)
+          parse_error_from_json(data, env, 'Failed to parse error event')
+        end
+        def parse_streaming_error(data)
+          error_data = Legion::JSON.parse(data, symbolize_names: false)
+          [500, error_data['message'] || 'Unknown streaming error']
+        rescue Legion::JSON::ParseError => e
+          Legion::Extensions::Llm.logger.debug { "Failed to parse streaming error: #{e.message}" }
+          [500, "Failed to parse error: #{data}"]
+        end
+        def handle_parsed_error(parsed_data, env)
+          status, _message = parse_streaming_error(parsed_data.to_json)
+          error_response = build_stream_error_response(parsed_data, env, status)
+          ErrorMiddleware.parse_error(provider: self, response: error_response)
+        end
+        def parse_error_from_json(data, env, error_message)
+          parsed_data = Legion::JSON.parse(data, symbolize_names: false)
+          handle_parsed_error(parsed_data, env)
+        rescue Legion::JSON::ParseError => e
+          Legion::Extensions::Llm.logger.debug { "#{error_message}: #{e.message}" }
+        end
+        def build_stream_error_response(parsed_data, env, status)
+          error_status = status || env&.status || 500
+          if faraday_1?
+            Struct.new(:body, :status).new(parsed_data, error_status)
+          else
+            env.merge(body: parsed_data, status: error_status)
+          end
+        end
+        # Builds Faraday on_data handlers for different major versions.
+        module FaradayHandlers
+          module_function
+          def build(faraday_v1:, on_chunk:, on_failed_response:)
+            if faraday_v1
+              v1_on_data(on_chunk)
+            else
+              v2_on_data(on_chunk, on_failed_response)
+            end
+          end
+          def v1_on_data(on_chunk)
+            proc do |chunk, _size|
+              on_chunk.call(chunk, nil)
+            end
+          end
+          def v2_on_data(on_chunk, on_failed_response)
+            proc do |chunk, _bytes, env|
+              if env&.status == 200
+                on_chunk.call(chunk, env)
+              else
+                on_failed_response.call(chunk, env)
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/thinking.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      # Represents provider thinking output.
+      class Thinking
+        attr_reader :text, :signature
+        def initialize(text: nil, signature: nil)
+          @text = text
+          @signature = signature
+        end
+        def self.build(text: nil, signature: nil)
+          text = nil if text.is_a?(String) && text.empty?
+          signature = nil if signature.is_a?(String) && signature.empty?
+          return nil if text.nil? && signature.nil?
+          new(text: text, signature: signature)
+        end
+        def pretty_print(printer)
+          printer.object_group(self) do
+            printer.breakable
+            printer.text 'text='
+            printer.pp text
+            printer.comma_breakable
+            printer.text 'signature='
+            printer.pp(signature ? '[REDACTED]' : nil)
+          end
+        end
+      end
+      class Thinking
+        # Normalized config for thinking across providers.
+        class Config
+          attr_reader :effort, :budget
+          def initialize(effort: nil, budget: nil)
+            @effort = effort.is_a?(Symbol) ? effort.to_s : effort
+            @budget = budget
+          end
+          def enabled?
+            !effort.nil? || !budget.nil?
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/tokens.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      # Represents token usage for a response.
+      class Tokens
+        attr_reader :input, :output, :cached, :cache_creation, :thinking
+        # rubocop:disable Metrics/ParameterLists
+        def initialize(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
+          @input = input
+          @output = output
+          @cached = cached
+          @cache_creation = cache_creation
+          @thinking = thinking || reasoning
+        end
+        # rubocop:enable Metrics/ParameterLists
+        # rubocop:disable Metrics/ParameterLists
+        def self.build(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
+          return nil if [input, output, cached, cache_creation, thinking, reasoning].all?(&:nil?)
+          new(
+            input: input,
+            output: output,
+            cached: cached,
+            cache_creation: cache_creation,
+            thinking: thinking,
+            reasoning: reasoning
+          )
+        end
+        # rubocop:enable Metrics/ParameterLists
+        def to_h
+          {
+            input_tokens: input,
+            output_tokens: output,
+            cached_tokens: cached,
+            cache_creation_tokens: cache_creation,
+            thinking_tokens: thinking
+          }.compact
+        end
+        def reasoning
+          thinking
+        end
+      end
+    end
+  end
+end