RubyGems - lex-llm - Versions diffs - 0.4.18 → 0.5.1 - Mend

lex-llm 0.4.18 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

data/lib/legion/extensions/llm/canonical/usage.rb ADDED Viewed

@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+# -- from_hash normalization is intentional
+module Legion
+  module Extensions
+    module Llm
+      module Canonical
+        # rubocop:disable Lint/ConstantDefinitionInBlock -- required for Data.define block scope
+        # Canonical usage/metering data for a response.
+        # Ports field vocabulary from lex-llm Tokens and legion-llm Types.
+        # Includes non-token units extension point per G20b.
+        Usage = ::Data.define(
+          :input_tokens, :output_tokens, :cache_read_tokens, :cache_write_tokens,
+          :thinking_tokens, :units
+        ) do
+          USAGE_KNOWN_KEYS = %i[input_tokens output_tokens cache_read_tokens cache_write_tokens
+                                thinking_tokens units].freeze
+          # Build from a Hash (raw provider response or deserialized wire payload).
+          # Accepts both canonical key names and legacy provider spellings.
+          def self.from_hash(source)
+            return nil if source.nil? || source.empty?
+            h = source.transform_keys(&:to_sym)
+            # Normalize legacy key names
+            h[:input_tokens] ||= h.delete(:input) || h.delete(:prompt_tokens)
+            h[:output_tokens] ||= h.delete(:output) || h.delete(:completion_tokens)
+            h[:cache_read_tokens] ||= h.delete(:cached) || h.delete(:cache_read)
+            h[:cache_write_tokens] ||= h.delete(:cache_creation) || h.delete(:cache_write)
+            h[:thinking_tokens] ||= h.delete(:thinking) || h.delete(:reasoning)
+            # Extract nested details (OpenAI prompt_tokens_details / input_tokens_details)
+            h[:cache_read_tokens] ||= dig_nested(h, :prompt_tokens_details, :cached_tokens) ||
+                                      dig_nested(h, :input_tokens_details, :cached_tokens)
+            h[:thinking_tokens] ||= dig_nested(h, :completion_tokens_details, :reasoning_tokens) ||
+                                    dig_nested(h, :output_tokens_details, :reasoning_tokens)
+            # Extract units (non-token extension point — G20b)
+            units = h.delete(:units) || {}
+            new(
+              input_tokens: h[:input_tokens],
+              output_tokens: h[:output_tokens],
+              cache_read_tokens: h[:cache_read_tokens],
+              cache_write_tokens: h[:cache_write_tokens],
+              thinking_tokens: h[:thinking_tokens],
+              units: units
+            )
+          end
+          def self.dig_nested(hash, details_key, value_key)
+            details = hash[details_key]
+            return nil unless details.is_a?(Hash)
+            details[value_key] || details[value_key.to_s]
+          end
+          # Serialize to a Hash for AMQP/fleet/wire transport.
+          def to_h
+            super.compact
+          end
+          # Total tokens across all categories.
+          def total_tokens
+            [input_tokens, output_tokens, cache_read_tokens, cache_write_tokens,
+             thinking_tokens].compact.sum
+          end
+        end
+        # rubocop:enable Lint/ConstantDefinitionInBlock
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/canonical.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+require_relative 'canonical/thinking'
+require_relative 'canonical/usage'
+require_relative 'canonical/params'
+require_relative 'canonical/content_block'
+require_relative 'canonical/tool_definition'
+require_relative 'canonical/tool_schema'
+require_relative 'canonical/tool_call'
+require_relative 'canonical/message'
+require_relative 'canonical/request'
+require_relative 'canonical/response'
+require_relative 'canonical/chunk'
+module Legion
+  module Extensions
+    module Llm
+      # Canonical types for the N×N client→provider routing architecture.
+      #
+      # These Data.define structs form the single contract between client translators
+      # and provider translators. Per Amendment A: immutable, strict factories,
+      # enum validation, unknown keys → metadata.
+      #
+      # Contract version: incremented on any breaking change to the canonical shape.
+      # Provider registration refuses gems built against a mismatched version (G7).
+      module Canonical
+        CONTRACT_VERSION = '1.0.0'
+        # Available canonical types.
+        TYPES = %i[
+          Thinking Usage Params ContentBlock
+          ToolDefinition ToolCall Message
+          Request Response Chunk
+        ].freeze
+        class << self
+          # List all canonical type classes.
+          def types
+            TYPES.map { |name| const_get(name) }
+          end
+          # Check if a given constant name is a registered canonical type.
+          def type?(name)
+            TYPES.include?(name.to_sym)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/chat.rb CHANGED Viewed

@@ -11,9 +11,7 @@ module Legion
         attr_reader :model, :messages, :tools, :tool_prefs, :params, :headers, :schema
         def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
-          if assume_model_exists && !provider
-            raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
-          end
+          raise ArgumentError, 'Provider must be specified if assume_model_exists is true' if assume_model_exists && !provider
           @context = context
           @config = context&.config || Legion::Extensions::Llm.config
@@ -139,7 +137,7 @@ module Legion
           messages.each(&)
         end
-        def complete(&) # rubocop:disable Metrics/PerceivedComplexity
+        def complete(&)
           response = @provider.complete(
             messages,
             tools: @tools,
@@ -234,7 +232,7 @@ module Legion
           end
         end
-        def handle_tool_calls(response, &) # rubocop:disable Metrics/PerceivedComplexity
+        def handle_tool_calls(response, &)
           halt_result = nil
           response.tool_calls.each_value do |tool_call|

data/lib/legion/extensions/llm/connection.rb CHANGED Viewed

@@ -77,9 +77,13 @@ module Legion
         def setup_logging(faraday)
           logger = faraday_logger
+          # Enable request body logging when the logger is at DEBUG level,
+          # or when explicitly enabled via fleet request_payload setting.
+          request_payload = Legion::Extensions::Llm.default_settings.dig(:fleet, :request, :logger, :request_payload)
+          bodies_enabled = request_payload == true || debug_logger?(logger)
           faraday.response :logger,
                            logger,
-                           bodies: debug_logger?(logger),
+                           bodies: bodies_enabled,
                            errors: false,
                            headers: false,
                            log_level: :debug do |logger|

data/lib/legion/extensions/llm/error.rb CHANGED Viewed

@@ -54,6 +54,8 @@ module Legion
       # Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
       class ErrorMiddleware < Faraday::Middleware
+        extend Legion::Logging::Helper
         STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
         def initialize(app, options = {})
@@ -80,7 +82,7 @@ module Legion
             /reduce the length of messages/i
           ].freeze
-          def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplexity
+          def parse_error(provider:, response:)
             response = response_with_stream_error_body(response)
             message = provider&.parse_error(response)
@@ -88,9 +90,7 @@ module Legion
             when 200..399
               message
             when 400
-              if context_length_exceeded?(message)
-                raise ContextLengthExceededError.new(response, message || 'Context length exceeded')
-              end
+              raise ContextLengthExceededError.new(response, message || 'Context length exceeded') if context_length_exceeded?(message)
               raise BadRequestError.new(response, message || 'Invalid request - please check your input')
             when 401
@@ -101,9 +101,7 @@ module Legion
               raise ForbiddenError.new(response,
                                        message || 'Forbidden - you do not have permission to access this resource')
             when 429
-              if context_length_exceeded?(message)
-                raise ContextLengthExceededError.new(response, message || 'Context length exceeded')
-              end
+              raise ContextLengthExceededError.new(response, message || 'Context length exceeded') if context_length_exceeded?(message)
               raise RateLimitError.new(response, message || 'Rate limit exceeded - please wait a moment')
             when 500

data/lib/legion/extensions/llm/fleet/envelope_validation.rb CHANGED Viewed

@@ -14,9 +14,7 @@ module Legion
           def reject_legacy_options!
             LEGACY_OPTIONS.each do |key|
-              if @options.key?(key) || @options.key?(key.to_s)
-                raise ArgumentError, "#{key} is not supported by fleet protocol v2"
-              end
+              raise ArgumentError, "#{key} is not supported by fleet protocol v2" if @options.key?(key) || @options.key?(key.to_s)
             end
           end

data/lib/legion/extensions/llm/fleet/provider_responder.rb CHANGED Viewed

@@ -119,9 +119,7 @@ module Legion
               raise ConfigurationError,
                     "fleet provider instance is not configured: #{instance_id}"
             end
-            unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
-              raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}"
-            end
+            raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}" unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
             provider_class.new(deep_symbolize(instance_settings))
           end

data/lib/legion/extensions/llm/fleet/token_validator.rb CHANGED Viewed

@@ -186,9 +186,7 @@ module Legion
           end
           def signing_key
-            if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
-              return ::Legion::Crypt.cluster_secret
-            end
+            return ::Legion::Crypt.cluster_secret if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
             raise TokenError, 'no signing key available - Legion::Crypt not initialized'
           rescue TokenError

data/lib/legion/extensions/llm/model/info.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module Legion
           :parameter_size, :quantization, :size_bytes,
           :modalities_input, :modalities_output, :metadata
         ) do
-          # rubocop:disable Metrics/ParameterLists, Metrics/PerceivedComplexity
+          # rubocop:disable Metrics/ParameterLists
           def initialize(
             id:, name: nil, provider: nil, instance: :default,
             family: nil, capabilities: [], context_length: nil,
@@ -46,7 +46,7 @@ module Legion
               metadata: metadata.is_a?(Hash) ? metadata : {}
             )
           end
-          # rubocop:enable Metrics/ParameterLists, Metrics/PerceivedComplexity
+          # rubocop:enable Metrics/ParameterLists
           # ── Capability predicates ─────────────────────────────────────
@@ -206,11 +206,9 @@ module Legion
           class << self
             private
-            def extract_modalities(data) # rubocop:disable Metrics/PerceivedComplexity
+            def extract_modalities(data)
               # New-style keys take priority (round-trip from to_h)
-              if data.key?(:modalities_input) || data.key?(:modalities_output)
-                return [Array(data[:modalities_input]), Array(data[:modalities_output])]
-              end
+              return [Array(data[:modalities_input]), Array(data[:modalities_output])] if data.key?(:modalities_input) || data.key?(:modalities_output)
               # Legacy: modalities is a hash or Modalities object
               modalities_data = data[:modalities]

data/lib/legion/extensions/llm/models.rb CHANGED Viewed

@@ -123,7 +123,7 @@ module Legion
             fetch_provider_models(remote_only: remote_only)[:models]
           end
-          def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
+          def resolve(model_id, provider: nil, assume_exists: false, config: nil)
             config ||= Legion::Extensions::Llm.config
             provider_class = provider ? resolve_provider_class(provider) : nil
@@ -168,7 +168,7 @@ module Legion
             instance.respond_to?(method, include_private) || super
           end
-          def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
+          def fetch_models_dev_models(existing_models)
             log.info 'Fetching models from models.dev API...'
             connection = Connection.basic do |f|
@@ -300,7 +300,7 @@ module Legion
             end
           end
-          def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
+          def add_provider_metadata(models_dev_model, provider_model)
             data = models_dev_model.to_h
             data[:name] = provider_model.name if blank_value?(data[:name])
             data[:family] = provider_model.family if blank_value?(data[:family])

data/lib/legion/extensions/llm/provider/open_ai_compatible.rb CHANGED Viewed

@@ -76,7 +76,12 @@ module Legion
           def format_openai_tool_calls(tool_calls)
             return nil unless tool_calls&.any?
-            tool_calls.values.map do |tool_call|
+            # Array is the canonical shape (per canonical/message.rb); Hash
+            # is the legacy lex-llm shape (id => ToolCall). Both flow through
+            # this renderer depending on caller.
+            calls = tool_calls.is_a?(Hash) ? tool_calls.values : Array(tool_calls)
+            calls.map do |tool_call|
               {
                 id: tool_call.id,
                 type: 'function',
@@ -95,9 +100,9 @@ module Legion
               {
                 type: 'function',
                 function: {
-                  name: tool.name,
-                  description: tool.description,
-                  parameters: tool.params_schema || { type: 'object', properties: {} }
+                  name: Canonical::ToolSchema.tool_name(tool),
+                  description: Canonical::ToolSchema.tool_description(tool),
+                  parameters: Canonical::ToolSchema.extract(tool)
                 }
               }
             end

data/lib/legion/extensions/llm/provider.rb CHANGED Viewed

@@ -137,7 +137,7 @@ module Legion
           parse_list_models_response response, slug, capabilities
         end
-        def discover_offerings(live: false, **filters)
+        def discover_offerings(live: false, raise_on_unreachable: false, **filters)
           return filter_cached_offerings(Array(@cached_offerings), filters) unless live
           provider_health = health(live:)
@@ -148,8 +148,10 @@ module Legion
             offering_from_model(model, health: provider_health)
           end
           @cached_offerings
-        rescue Faraday::ConnectionFailed => e
+        rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
           log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
+          raise if raise_on_unreachable
           []
         end
@@ -224,9 +226,16 @@ module Legion
         end
         def cache_enabled?
-          return false unless config.respond_to?(:llm_cache_enabled)
+          explicit = config.llm_cache_enabled if config.respond_to?(:llm_cache_enabled)
+          unless explicit.nil?
+            log.debug { "[#{slug}] cache_enabled? source=per_provider value=#{explicit}" }
+            return explicit == true
+          end
-          config.llm_cache_enabled == true
+          global = global_prompt_caching_enabled?
+          log.debug { "[#{slug}] cache_enabled? source=global value=#{global}" }
+          global
         end
         def cache_control_prefix_tokens
@@ -528,6 +537,14 @@ module Legion
         private
+        def global_prompt_caching_enabled?
+          return false unless defined?(Legion::Settings)
+          Legion::Settings.dig(:llm, :prompt_caching, :enabled) == true
+        rescue StandardError
+          false
+        end
         def model_detail_cache_key(model_name)
           tier = offering_tier
           instance_key = cache_instance_key

data/lib/legion/extensions/llm/provider_contract.rb CHANGED Viewed

@@ -11,10 +11,19 @@ module Legion
           embed: [%i[keyreq text], %i[keyreq model]],
           image: [%i[keyreq prompt], %i[keyreq model]],
           list_models: [%i[key live], %i[keyrest filters]],
-          discover_offerings: [%i[key live], %i[keyrest filters]],
+          discover_offerings: [%i[key live], %i[key raise_on_unreachable], %i[keyrest filters]],
           health: [%i[key live]],
           count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
         }.freeze
+        # Tools passed to chat/stream_chat must support Canonical::ToolDefinition objects.
+        # Providers must not crash on Data.define instances (not Hashes).
+        TOOL_SUPPORT_CONTRACT = <<~DOC
+          - chat and stream_chat accept keyword `tools:` (Hash<name, tool_object>)
+          - tools may be Canonical::ToolDefinition, Hash, or legacy Lex::Llm::Tool
+          - Renderers must use Canonical::ToolSchema.extract(tool) for schema access
+          - discover_offerings(live: true, raise_on_unreachable: true) raises on transport failure
+        DOC
       end
     end
   end

data/lib/legion/extensions/llm/routing/lane_key.rb CHANGED Viewed

@@ -10,9 +10,7 @@ module Legion
           def for(offering, prefix: 'llm.fleet', include_context: true, include_fingerprint: false)
             parts = [prefix, lane_kind(offering), model_slug(lane_model(offering))]
-            if include_context && offering.inference? && offering.context_window
-              parts << "ctx#{offering.context_window}"
-            end
+            parts << "ctx#{offering.context_window}" if include_context && offering.inference? && offering.context_window
             parts.push('elig', eligibility_fingerprint(offering)) if include_fingerprint
             parts.join('.')
           end

data/lib/legion/extensions/llm/stream_accumulator.rb CHANGED Viewed

@@ -39,7 +39,7 @@ module Legion
           log.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
         end
-        def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
+        def filtered_chunk(chunk)
           has_content = !@last_content_delta.empty?
           has_thinking = !@last_thinking_delta.empty?
           has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
@@ -57,6 +57,27 @@ module Legion
           )
         end
+        # Flush any text still held in the untagged-preamble buffer as a final
+        # streamed chunk. Without this, short responses that match the
+        # untagged-reasoning heuristic (e.g. starting with "I", "The", "Let me")
+        # and never hit a double newline are buffered for the entire stream and
+        # the caller's block never receives a single delta.
+        def flush_pending_chunk
+          return nil if @untagged_preamble_buffer.empty?
+          @last_content_delta = +''
+          @last_thinking_delta = +''
+          flush_pending_untagged_preamble_into_deltas
+          return nil if @last_content_delta.empty? && @last_thinking_delta.empty?
+          Chunk.new(
+            role: :assistant,
+            content: @last_content_delta.empty? ? nil : @last_content_delta,
+            thinking: @last_thinking_delta.empty? ? nil : Thinking.build(text: @last_thinking_delta),
+            model_id: model_id
+          )
+        end
         def to_message(response)
           flush_pending_untagged_preamble
@@ -233,6 +254,24 @@ module Legion
           @untagged_preamble_pending = false
         end
+        # Same as flush_pending_untagged_preamble, but also records the flushed
+        # text in the per-chunk delta accumulators so flush_pending_chunk can
+        # surface it to the streaming block.
+        def flush_pending_untagged_preamble_into_deltas
+          content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
+          if thinking
+            @content << content
+            @last_content_delta << content
+            @thinking_text << thinking
+            @last_thinking_delta << thinking
+          else
+            @content << @untagged_preamble_buffer
+            @last_content_delta << @untagged_preamble_buffer
+          end
+          @untagged_preamble_buffer = +''
+          @untagged_preamble_pending = false
+        end
         def append_thinking_from_chunk(chunk)
           thinking = chunk.thinking
           return unless thinking

data/lib/legion/extensions/llm/streaming.rb CHANGED Viewed

@@ -16,9 +16,7 @@ module Legion
           response = connection.post stream_url, payload do |req|
             req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
             on_chunk = build_stream_callback(accumulator, block)
-            if Legion::Extensions::Llm.config.log_stream_debug
-              log.debug { "Stream callback prepared: #{on_chunk.inspect}" }
-            end
+            log.debug { "Stream callback prepared: #{on_chunk.inspect}" } if Legion::Extensions::Llm.config.log_stream_debug
             if faraday_1?
               req.options[:on_data] = handle_stream(&on_chunk)
             else
@@ -26,6 +24,11 @@ module Legion
             end
           end
+          # Release any text held by the untagged-preamble heuristic so short
+          # responses still stream at least one delta to the caller.
+          final_chunk = accumulator.flush_pending_chunk
+          block&.call(final_chunk) if final_chunk
           message = accumulator.to_message(response)
           log.debug { "Stream completed: #{message.content}" }
           message
@@ -33,6 +36,8 @@ module Legion
         def build_stream_callback(accumulator, block)
           proc do |chunk|
+            next unless chunk
             accumulator.add chunk
             filtered = accumulator.filtered_chunk(chunk)
             block.call(filtered) if filtered
@@ -41,7 +46,10 @@ module Legion
         def handle_stream(&block)
           build_on_data_handler do |data|
-            block.call(build_chunk(data)) if data.is_a?(Hash)
+            next unless data.is_a?(Hash)
+            chunk = build_chunk(data)
+            block.call(chunk) if chunk
           end
         end
@@ -185,7 +193,7 @@ module Legion
         def build_stream_error_response(parsed_data, env, status)
           error_status = status || env&.status || 500
-          if faraday_1?
+          if faraday_1? || env.nil?
             Struct.new(:body, :status).new(parsed_data, error_status)
           else
             env.merge(body: parsed_data, status: error_status)

data/lib/legion/extensions/llm/tool.rb CHANGED Viewed

@@ -235,9 +235,7 @@ module Legion
           def resolve_direct_schema(schema)
             return extract_schema(schema.to_json_schema) if schema.respond_to?(:to_json_schema)
             return Legion::Extensions::Llm::Utils.deep_dup(schema) if schema.is_a?(Hash)
-            if schema.is_a?(Class) && schema.method_defined?(:to_json_schema)
-              return extract_schema(schema.new.to_json_schema)
-            end
+            return extract_schema(schema.new.to_json_schema) if schema.is_a?(Class) && schema.method_defined?(:to_json_schema)
             nil
           end

data/lib/legion/extensions/llm/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Legion
   module Extensions
     module Llm
-      VERSION = '0.4.18'
+      VERSION = '0.5.1'
     end
   end
 end