RubyGems - lex-llm - Versions diffs - 0.5.1 → 0.5.4 - Mend

lex-llm 0.5.1 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -0
data/lib/legion/extensions/llm/capability_policy.rb +107 -0
data/lib/legion/extensions/llm/configuration.rb +4 -0
data/lib/legion/extensions/llm/error.rb +15 -0
data/lib/legion/extensions/llm/provider.rb +68 -3
data/lib/legion/extensions/llm/routing/model_offering.rb +14 -2
data/lib/legion/extensions/llm/streaming.rb +24 -1
data/lib/legion/extensions/llm/version.rb +1 -1
data/lib/legion/extensions/llm.rb +3 -0
data/spec/legion/extensions/llm/capability_policy_spec.rb +192 -0
data/spec/legion/extensions/llm/configuration_spec.rb +40 -0
data/spec/legion/extensions/llm/provider_spec.rb +87 -0
data/spec/legion/extensions/llm/routing/model_offering_spec.rb +58 -0
data/spec/legion/extensions/llm/streaming_spec.rb +9 -0
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2aef21677943bb1b621d6defad35f76e3ca8195ea971b172e7924c7b83774f62
-  data.tar.gz: 8a46fdb9ead39ffb08335e019e4623844ad91de4af1cb30960802a39ecf75ab0
+  metadata.gz: a67345a318fe016e8b7c302f08cc335bf25f1e4605a2b16fd9d95a9c9d6ccd04
+  data.tar.gz: a0d2f7b5998b3a70754cb538515e581cb9a17ae7bc38b72de305159cc486edd5
 SHA512:
-  metadata.gz: 6909cc6018428bde9983ab3cf001d31f87cd9a90c7c58571e6daa3041dbe25962f610ea37b79a1abf1094385feea37e6f2d466c2131ed41e939e7c4da8ac3980
-  data.tar.gz: 5c2b52d64916c8b169a49dc3459ca1aac9c4af6f148865ae190368d364754dce3e35865bf3cfa525ce431cd0bfe0f940240c989b20f5a4be0f26c9cba4574471
+  metadata.gz: 0e1d43f8bfc296cc15e1389f153adc134baf7dfa051d5604617126bfbfc558ce02416caf24509521d43448fc05bc43b278c8cf4fb6a197465de10af36489ada5
+  data.tar.gz: 0163f3ab169203405a2081c79712343ad5e36ae76bafaa1c703ac20e30ec46324d584bb69da0ebed064a85f9eafcb054ed8f4cb4395789b516607993e3fee53e

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,23 @@
 # Changelog
+## 0.5.4 - 2026-06-17
+### Fixed
+- **Model policy enforced at dispatch (compliance)** — `model_whitelist` / `model_blacklist` were only applied when *listing* models (`discover_offerings`); inference dispatch never checked them, so a denied model could still be invoked directly. Added `enforce_model_allowed!`, called at every dispatch entry point (`complete` — which backs `chat`/`stream_chat` — plus `embed`, `moderate`, `paint`), raising the new `ModelNotAllowedError` *before* any provider API call. Fail-closed, no exceptions. `ModelNotAllowedError` is a distinct, non-HTTP error so callers can treat it as a terminal policy outcome (non-retryable, non-escalatable) rather than a provider failure.
+## 0.5.3 - 2026-06-16
+### Fixed
+- **Streaming error classification** — Partial non-2xx streaming responses now raise status-specific errors (`UnauthorizedError`, `ForbiddenError`, `RateLimitError`, `ServiceUnavailableError`, etc.) instead of always raising `ServerError`. This preserves auth failures for downstream escalation and circuit handling.
+## 0.5.2 - 2026-06-15
+### Added
+- **CapabilityPolicy module** — Shared capability resolution with 7-layer precedence chain (model_override > instance_override > provider_override > model_metadata > provider_catalog > probe > provider_envelope > default_false). All optional capabilities default false.
+- **Boolean aliases** — `enable_thinking`, `tools_flag`, `embedding_flag`, etc. map to canonical capability keys at any settings level.
+- **ModelOffering#capability_sources** — Per-capability source metadata preserved through offering serialization.
+- **Provider#offering_from_model** — Base class now generates `:model_metadata` source tags for capabilities from provider API responses.
 ## 0.5.1 - 2026-06-12
 ### Fixed

data/lib/legion/extensions/llm/capability_policy.rb ADDED Viewed

@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+module Legion
+  module Extensions
+    module Llm
+      # Resolves capability truth from multiple sources with explicit precedence.
+      # Returns both a flat capability list and per-capability source metadata.
+      module CapabilityPolicy
+        OPTIONAL_CAPABILITIES = %i[
+          streaming tools vision embeddings thinking structured_output image audio_transcription audio_speech
+        ].freeze
+        BOOLEAN_ALIASES = {
+          enable_streaming: :streaming,
+          enable_tools: :tools,
+          enable_thinking: :thinking,
+          enable_vision: :vision,
+          enable_embeddings: :embeddings,
+          enable_images: :image,
+          streaming_flag: :streaming,
+          tool_flag: :tools,
+          tools_flag: :tools,
+          thinking_flag: :thinking,
+          vision_flag: :vision,
+          embedding_flag: :embeddings,
+          embeddings_flag: :embeddings,
+          image_flag: :image,
+          images_flag: :image
+        }.freeze
+        module_function
+        def resolve(real:, provider_catalog:, probe:, provider_envelope:, provider_config:, instance_config:, model_config:) # rubocop:disable Metrics/ParameterLists
+          sources = {}
+          OPTIONAL_CAPABILITIES.each do |capability|
+            sources[capability] = resolve_one(
+              capability,
+              real:, provider_catalog:, probe:, provider_envelope:,
+              provider_config:, instance_config:, model_config:
+            )
+          end
+          {
+            capabilities: sources.filter_map { |capability, data| capability if data[:value] == true },
+            sources: sources
+          }
+        end
+        def resolve_one(capability, real:, provider_catalog:, probe:, provider_envelope:, provider_config:, instance_config:, model_config:) # rubocop:disable Metrics/ParameterLists
+          model_overrides = normalized_overrides(model_config)
+          return { value: model_overrides[capability], source: :model_override } if model_overrides.key?(capability)
+          instance_overrides = normalized_overrides(instance_config)
+          return { value: instance_overrides[capability], source: :instance_override } if instance_overrides.key?(capability)
+          provider_overrides = normalized_overrides(provider_config)
+          return { value: provider_overrides[capability], source: :provider_override } if provider_overrides.key?(capability)
+          real_caps = normalized_booleans(real)
+          return { value: real_caps[capability], source: :model_metadata } if real_caps.key?(capability)
+          catalog_caps = normalized_booleans(provider_catalog)
+          return { value: catalog_caps[capability], source: :provider_catalog } if catalog_caps.key?(capability)
+          probe_caps = normalized_booleans(probe)
+          return { value: probe_caps[capability], source: :probe } if probe_caps.key?(capability)
+          provider_caps = normalized_booleans(provider_envelope)
+          return { value: provider_caps[capability], source: :provider_envelope } if provider_caps.key?(capability)
+          { value: false, source: :default_false }
+        end
+        def normalized_overrides(config)
+          config = normalize_hash(config)
+          caps_key = config.key?(:capabilities) ? :capabilities : 'capabilities'
+          overrides = normalized_booleans(config[caps_key])
+          BOOLEAN_ALIASES.each do |key, capability|
+            value = config[key]
+            value = config[key.to_s] if value.nil?
+            next unless [true, false].include?(value)
+            next if overrides.key?(capability)
+            overrides[capability] = value
+          end
+          overrides
+        end
+        def normalized_booleans(value)
+          normalize_hash(value).each_with_object({}) do |(key, raw), result|
+            capability = key.to_s.downcase.tr('-', '_').to_sym
+            next unless OPTIONAL_CAPABILITIES.include?(capability)
+            next unless [true, false].include?(raw)
+            result[capability] = raw
+          end
+        end
+        def normalize_hash(value)
+          return {} unless value.respond_to?(:to_h)
+          value.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
+        end
+      end
+    end
+  end
+end

data/lib/legion/extensions/llm/configuration.rb CHANGED Viewed

@@ -22,6 +22,10 @@ module Legion
             option_keys.dup
           end
+          def register_provider_options(keys)
+            Array(keys).each { |key| option(key.to_sym) }
+          end
           private
           def option_keys = @option_keys ||= []

data/lib/legion/extensions/llm/error.rb CHANGED Viewed

@@ -27,6 +27,21 @@ module Legion
       class ModelNotFoundError < StandardError; end
       class UnsupportedAttachmentError < StandardError; end
+      # Raised when a request targets a model excluded by the configured
+      # model_whitelist / model_blacklist. This is a compliance guard enforced at
+      # the provider dispatch boundary (the last line before the model API call),
+      # so a denied model can never be reached regardless of caller. Non-retryable:
+      # retrying the same denied model must never succeed.
+      class ModelNotAllowedError < StandardError
+        attr_reader :model, :provider
+        def initialize(message = nil, model: nil, provider: nil)
+          @model = model
+          @provider = provider
+          super(message || "model #{model.inspect} is not permitted by the configured model policy for provider #{provider.inspect}")
+        end
+      end
       # Backward-compatible unsupported-capability error alias.
       class UnsupportedCapabilityError < Errors::UnsupportedCapability
         def initialize(message = nil, provider: nil, capability: nil, model: nil)

data/lib/legion/extensions/llm/provider.rb CHANGED Viewed

@@ -30,6 +30,8 @@ module Legion
         include Legion::Logging::Helper
         include Legion::Cache::Helper
+        MODEL_DETAIL_CACHE_SCHEMA_VERSION = 2
         attr_reader :config, :connection
         def initialize(config)
@@ -94,6 +96,7 @@ module Legion
         def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
                      tool_prefs: nil, &)
+          enforce_model_allowed!(model)
           normalized_temperature = maybe_normalize_temperature(temperature, model)
           log_provider_request(
             messages: messages,
@@ -182,6 +185,7 @@ module Legion
         end
         def embed(text:, model:, dimensions: nil, params: {}, headers: {})
+          enforce_model_allowed!(model)
           payload = Utils.deep_merge(render_embedding_payload(text, model:, dimensions:), params)
           response = @connection.post(embedding_url(model:), payload) do |req|
             req.headers = headers.merge(req.headers) unless headers.empty?
@@ -190,12 +194,14 @@ module Legion
         end
         def moderate(input, model:)
+          enforce_model_allowed!(model)
           payload = render_moderation_payload(input, model:)
           response = @connection.post moderation_url, payload
           parse_moderation_response(response, model:)
         end
         def paint(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
+          enforce_model_allowed!(model)
           validate_paint_inputs!(with:, mask:)
           payload = render_image_payload(prompt, model:, size:, with:, mask:, params:)
           response = @connection.post images_url(with:, mask:), payload
@@ -362,9 +368,19 @@ module Legion
         end
         def model_allowed?(model_name)
+          self.class.policy_allows?(model_name, whitelist: model_whitelist, blacklist: model_blacklist)
+        end
+        # Single source of truth for model-policy matching, usable both at runtime
+        # (instance #model_allowed?) and at instance-config build time (provider
+        # extensions choosing a default_model that does not violate the policy).
+        # Substring, case-insensitive: a whitelist permits models containing any
+        # pattern; a blacklist denies models containing any pattern; whitelist is
+        # applied before blacklist. Empty list = no restriction from that side.
+        def self.policy_allows?(model_name, whitelist: [], blacklist: [])
           name = model_name.to_s.downcase
-          wl = model_whitelist
-          bl = model_blacklist
+          wl = Array(whitelist).map { |p| p.to_s.downcase }
+          bl = Array(blacklist).map { |p| p.to_s.downcase }
           return false if wl.any? && wl.none? { |p| name.include?(p) }
           return false if bl.any? && bl.any? { |p| name.include?(p) }
@@ -372,6 +388,46 @@ module Legion
           true
         end
+        # Effective whitelist/blacklist for an instance config: per-instance config
+        # first, then the provider-level setting (mirrors instance #model_whitelist
+        # resolution order). Used by provider extensions when picking a default_model.
+        def self.model_policy(config, provider_family)
+          cfg = config.is_a?(Hash) ? config : {}
+          provider_conf = CredentialSources.setting(:extensions, :llm, provider_family)
+          provider_conf = {} unless provider_conf.is_a?(Hash)
+          {
+            whitelist: cfg[:model_whitelist] || provider_conf[:model_whitelist] || provider_conf['model_whitelist'],
+            blacklist: cfg[:model_blacklist] || provider_conf[:model_blacklist] || provider_conf['model_blacklist']
+          }
+        end
+        # Choose a default_model that never violates the model policy: prefer an
+        # explicitly-configured default when permitted; else a provider fallback when
+        # permitted; else nil, so routing resolves an allowed discovered model rather
+        # than forcing a policy-forbidden default. Keeps a whitelist/blacklist
+        # authoritative over any hardcoded provider default.
+        def self.policy_safe_default_model(configured:, fallback:, whitelist: [], blacklist: [])
+          return configured if configured && !configured.to_s.empty? &&
+                               policy_allows?(configured, whitelist:, blacklist:)
+          return fallback if fallback && !fallback.to_s.empty? &&
+                             policy_allows?(fallback, whitelist:, blacklist:)
+          nil
+        end
+        # Compliance guard: refuse to dispatch any request for a model excluded by
+        # the configured model_whitelist / model_blacklist. Invoked at every
+        # dispatch entry point (the last line before the model API call) so a
+        # denied model can never reach a provider API, regardless of caller. Fail
+        # closed — raises rather than silently routing elsewhere.
+        def enforce_model_allowed!(model_name)
+          return if model_allowed?(model_name)
+          log.warn("[#{slug}] action=model_denied model=#{model_name} instance=#{provider_instance_id} " \
+                   'reason=model_whitelist_or_blacklist')
+          raise ModelNotAllowedError.new(model: model_name, provider: slug)
+        end
         # ── Offering defaults ─────────────────────────────────────────────
         def offering_transport
@@ -549,7 +605,11 @@ module Legion
           tier = offering_tier
           instance_key = cache_instance_key
           cred_fp = credential_cache_fragment
-          key_parts = ['model_info', tier, slug, instance_key, cred_fp, model_name].compact
+          key_parts = [
+            'model_info',
+            "schema#{MODEL_DETAIL_CACHE_SCHEMA_VERSION}",
+            tier, slug, instance_key, cred_fp, model_name
+          ].compact
           key_parts.join('.')
         end
@@ -572,6 +632,10 @@ module Legion
         end
         def offering_from_model(model, health: {})
+          capability_sources = Array(model.capabilities).to_h do |cap|
+            [cap.to_sym, { value: true, source: :model_metadata }]
+          end
           Routing::ModelOffering.new(
             provider_family: slug.to_sym,
             provider_instance: model.instance || provider_instance_id,
@@ -582,6 +646,7 @@ module Legion
             model_family: model.family,
             usage_type: offering_usage_type(model),
             capabilities: model.capabilities,
+            capability_sources: capability_sources,
             limits: offering_limits(model),
             health:,
             metadata: offering_metadata(model)

data/lib/legion/extensions/llm/routing/model_offering.rb CHANGED Viewed

@@ -16,8 +16,8 @@ module Legion
           }.freeze
           attr_reader :offering_id, :provider_family, :model_family, :provider_instance, :instance_id, :transport,
-                      :tier, :model, :canonical_model_alias, :routing_metadata, :usage_type, :capabilities, :limits,
-                      :credentials, :health, :cost, :policy_tags, :metadata
+                      :tier, :model, :canonical_model_alias, :routing_metadata, :usage_type, :capabilities,
+                      :capability_sources, :limits, :credentials, :health, :cost, :policy_tags, :metadata
           def initialize(data)
             @metadata = normalize_hash(fetch_value(data, :metadata))
@@ -37,6 +37,7 @@ module Legion
                                                            fetch_value(data, :kind) ||
                                                            infer_usage_type(data)))
             @capabilities = normalize_capabilities(fetch_value(data, :capabilities))
+            @capability_sources = normalize_capability_sources(fetch_value(data, :capability_sources))
             @limits = normalize_hash(fetch_value(data, :limits))
             @credentials = fetch_value(data, :credentials)
             @health = normalize_hash(fetch_value(data, :health))
@@ -106,6 +107,7 @@ module Legion
               routing_metadata: routing_metadata,
               usage_type: usage_type,
               capabilities: capabilities,
+              capability_sources: capability_sources,
               limits: limits,
               credentials: credentials,
               health: health,
@@ -167,6 +169,16 @@ module Legion
             end.uniq
           end
+          def normalize_capability_sources(value)
+            normalize_hash(value).to_h do |capability, source_data|
+              normalized_source = normalize_hash(source_data)
+              [
+                capability.to_s.downcase.tr('-', '_').to_sym,
+                { value: normalized_source[:value], source: normalized_source[:source]&.to_sym }.compact
+              ]
+            end
+          end
           def normalize_hash(value)
             (value || {}).to_h.transform_keys(&:to_sym)
           end

data/lib/legion/extensions/llm/streaming.rb CHANGED Viewed

@@ -142,7 +142,30 @@ module Legion
                     end
           log.warn "[llm][streaming] action=handle_failed_response status=#{status} " \
                    "partial_body=#{buffer.length}b msg=#{partial.inspect}"
-          raise Legion::Extensions::Llm::ServerError, msg
+          raise_streaming_status_error(status, msg)
+        end
+        def raise_streaming_status_error(status, message)
+          response = Struct.new(:body, :status).new({ 'error' => { 'message' => message } }, status)
+          case status
+          when 400
+            raise Legion::Extensions::Llm::BadRequestError.new(response, message)
+          when 401
+            raise Legion::Extensions::Llm::UnauthorizedError.new(response, message)
+          when 403
+            raise Legion::Extensions::Llm::ForbiddenError.new(response, message)
+          when 429
+            raise Legion::Extensions::Llm::RateLimitError.new(response, message)
+          when 500
+            raise Legion::Extensions::Llm::ServerError.new(response, message)
+          when 502..504
+            raise Legion::Extensions::Llm::ServiceUnavailableError.new(response, message)
+          when 529
+            raise Legion::Extensions::Llm::OverloadedError.new(response, message)
+          else
+            provider = respond_to?(:parse_error) ? self : nil
+            Legion::Extensions::Llm::ErrorMiddleware.parse_error(provider: provider, response: response)
+          end
         end
         def handle_sse(chunk, parser, env, &)

data/lib/legion/extensions/llm/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Legion
   module Extensions
     module Llm
-      VERSION = '0.5.1'
+      VERSION = '0.5.4'
     end
   end
 end

data/lib/legion/extensions/llm.rb CHANGED Viewed

@@ -36,6 +36,9 @@ module Legion
       #  unqualified constant lookups resolve via Ruby scope.               #
       # ------------------------------------------------------------------ #
+      # --- Capability resolution policy (no internal deps) ---
+      require_relative 'llm/capability_policy'
       # --- Base value objects (no internal deps) ---
       require_relative 'llm/mime_type'
       require_relative 'llm/model/info'

data/spec/legion/extensions/llm/capability_policy_spec.rb ADDED Viewed

@@ -0,0 +1,192 @@
+# frozen_string_literal: true
+require 'spec_helper'
+RSpec.describe Legion::Extensions::Llm::CapabilityPolicy do
+  let(:empty_sources) do
+    { real: {}, provider_catalog: {}, probe: {}, provider_envelope: {}, provider_config: {}, instance_config: {},
+      model_config: {} }
+  end
+  describe '.resolve' do
+    context 'with no data at all' do
+      it 'defaults all optional capabilities to false' do
+        policy = described_class.resolve(**empty_sources)
+        expect(policy[:capabilities]).to eq([])
+        expect(policy[:sources][:embeddings]).to eq(value: false, source: :default_false)
+        expect(policy[:sources][:thinking]).to eq(value: false, source: :default_false)
+        expect(policy[:sources][:streaming]).to eq(value: false, source: :default_false)
+        expect(policy[:sources][:tools]).to eq(value: false, source: :default_false)
+        expect(policy[:sources][:vision]).to eq(value: false, source: :default_false)
+      end
+    end
+    context 'with instance override' do
+      it 'resolves capabilities from instance config' do
+        policy = described_class.resolve(
+          real: {},
+          provider_catalog: {},
+          probe: {},
+          provider_envelope: {},
+          provider_config: {
+            capabilities: { embeddings: false },
+            tools_flag: false
+          },
+          instance_config: {
+            capabilities: { streaming: true, tools: true },
+            enable_thinking: true
+          },
+          model_config: {}
+        )
+        expect(policy[:capabilities]).to contain_exactly(:streaming, :tools, :thinking)
+        expect(policy[:sources][:thinking]).to eq(value: true, source: :instance_override)
+        expect(policy[:sources][:embeddings]).to eq(value: false, source: :provider_override)
+        expect(policy[:sources][:tools]).to eq(value: true, source: :instance_override)
+      end
+    end
+    context 'with provider-level override' do
+      it 'resolves capabilities from provider config' do
+        policy = described_class.resolve(
+          real: {},
+          provider_catalog: {},
+          probe: {},
+          provider_envelope: {},
+          provider_config: {
+            capabilities: { streaming: true },
+            embedding_flag: false,
+            thinking_flag: true
+          },
+          instance_config: {},
+          model_config: {}
+        )
+        expect(policy[:capabilities]).to contain_exactly(:streaming, :thinking)
+        expect(policy[:sources][:streaming]).to eq(value: true, source: :provider_override)
+        expect(policy[:sources][:embeddings]).to eq(value: false, source: :provider_override)
+        expect(policy[:sources][:thinking]).to eq(value: true, source: :provider_override)
+      end
+    end
+    context 'with full precedence chain' do
+      it 'resolves each capability from the highest-priority source' do
+        policy = described_class.resolve(
+          real: { tools: false, vision: true },
+          provider_catalog: { structured_output: true },
+          probe: { embeddings: true },
+          provider_envelope: { streaming: true, tools: true },
+          provider_config: { capabilities: { tools: true, vision: false } },
+          instance_config: { capabilities: { tools: false } },
+          model_config: { capabilities: { tools: true } }
+        )
+        expect(policy[:capabilities]).to include(:tools, :embeddings, :streaming, :structured_output)
+        expect(policy[:capabilities]).not_to include(:vision)
+        expect(policy[:sources][:tools]).to eq(value: true, source: :model_override)
+        expect(policy[:sources][:vision]).to eq(value: false, source: :provider_override)
+        expect(policy[:sources][:embeddings]).to eq(value: true, source: :probe)
+        expect(policy[:sources][:structured_output]).to eq(value: true, source: :provider_catalog)
+        expect(policy[:sources][:streaming]).to eq(value: true, source: :provider_envelope)
+      end
+    end
+    context 'with boolean aliases' do
+      it 'resolves enable_* and *_flag aliases' do
+        policy = described_class.resolve(
+          real: {},
+          provider_catalog: {},
+          probe: {},
+          provider_envelope: {},
+          provider_config: {},
+          instance_config: { enable_thinking: true, streaming_flag: true, tools_flag: false },
+          model_config: {}
+        )
+        expect(policy[:capabilities]).to contain_exactly(:streaming, :thinking)
+        expect(policy[:sources][:thinking]).to eq(value: true, source: :instance_override)
+        expect(policy[:sources][:streaming]).to eq(value: true, source: :instance_override)
+        expect(policy[:sources][:tools]).to eq(value: false, source: :instance_override)
+      end
+    end
+    context 'when capabilities hash wins over alias at same level' do
+      it 'prefers capabilities nested key over boolean alias' do
+        policy = described_class.resolve(
+          real: {},
+          provider_catalog: {},
+          probe: {},
+          provider_envelope: {},
+          provider_config: {},
+          instance_config: { capabilities: { tools: true }, tools_flag: false },
+          model_config: {}
+        )
+        expect(policy[:capabilities]).to include(:tools)
+        expect(policy[:sources][:tools]).to eq(value: true, source: :instance_override)
+      end
+    end
+    context 'with model override' do
+      it 'model override beats instance and provider' do
+        policy = described_class.resolve(
+          real: {},
+          provider_catalog: {},
+          probe: {},
+          provider_envelope: {},
+          provider_config: { capabilities: { thinking: false } },
+          instance_config: { capabilities: { thinking: false } },
+          model_config: { thinking_flag: true }
+        )
+        expect(policy[:capabilities]).to include(:thinking)
+        expect(policy[:sources][:thinking]).to eq(value: true, source: :model_override)
+      end
+    end
+    context 'with provider envelope' do
+      it 'uses provider envelope when no overrides exist' do
+        policy = described_class.resolve(
+          real: {},
+          provider_catalog: {},
+          probe: {},
+          provider_envelope: { streaming: true },
+          provider_config: {},
+          instance_config: {},
+          model_config: {}
+        )
+        expect(policy[:capabilities]).to contain_exactly(:streaming)
+        expect(policy[:sources][:streaming]).to eq(value: true, source: :provider_envelope)
+      end
+    end
+  end
+  describe '.normalized_overrides' do
+    it 'handles string keys in capabilities hash' do
+      result = described_class.normalized_overrides({ 'capabilities' => { 'streaming' => true } })
+      expect(result[:streaming]).to be(true)
+    end
+    it 'handles symbol keys in capabilities hash' do
+      result = described_class.normalized_overrides({ capabilities: { streaming: true } })
+      expect(result[:streaming]).to be(true)
+    end
+    it 'ignores non-boolean values' do
+      result = described_class.normalized_overrides({ capabilities: { streaming: 'yes' } })
+      expect(result).not_to have_key(:streaming)
+    end
+  end
+  describe '.normalize_hash' do
+    it 'returns empty hash for nil' do
+      expect(described_class.normalize_hash(nil)).to eq({})
+    end
+    it 'symbolizes keys' do
+      expect(described_class.normalize_hash({ 'foo' => 1 })).to eq({ foo: 1 })
+    end
+  end
+end

data/spec/legion/extensions/llm/configuration_spec.rb CHANGED Viewed

@@ -35,4 +35,44 @@ RSpec.describe Legion::Extensions::Llm::Configuration do
       expect(config.cache_control_prefix_tokens).to eq(4)
     end
   end
+  describe '.register_provider_options' do
+    after do
+      # Clean up test options to avoid polluting other specs
+      %i[test_api_key test_api_base].each do |key|
+        described_class.send(:option_keys).delete(key)
+        described_class.send(:defaults).delete(key)
+        described_class.send(:remove_method, key) if described_class.method_defined?(key)
+        described_class.send(:remove_method, :"#{key}=") if described_class.method_defined?(:"#{key}=")
+      end
+    end
+    it 'registers new options that become accessible on instances' do
+      described_class.register_provider_options(%i[test_api_key test_api_base])
+      config = described_class.new
+      expect(config).to respond_to(:test_api_key)
+      expect(config).to respond_to(:test_api_base)
+    end
+    it 'adds registered options to the options list' do
+      described_class.register_provider_options(%i[test_api_key test_api_base])
+      expect(described_class.options).to include(:test_api_key, :test_api_base)
+    end
+    it 'is idempotent — duplicate registrations do not add duplicates' do
+      described_class.register_provider_options(%i[test_api_key])
+      described_class.register_provider_options(%i[test_api_key])
+      count = described_class.options.count(:test_api_key)
+      expect(count).to eq(1)
+    end
+    it 'accepts string keys and normalizes them to symbols' do
+      described_class.register_provider_options(%w[test_api_key])
+      expect(described_class.options).to include(:test_api_key)
+    end
+  end
 end

data/spec/legion/extensions/llm/provider_spec.rb CHANGED Viewed

@@ -357,6 +357,62 @@ RSpec.describe Legion::Extensions::Llm::Provider do
     end
   end
+  describe '#enforce_model_allowed! (dispatch compliance guard)' do
+    let(:provider_class) do
+      Class.new(described_class) do
+        attr_writer :settings
+        def api_base = 'https://test.invalid'
+        def settings = @settings || {}
+        def slug = :test
+        def provider_instance_id = :default
+      end
+    end
+    let(:provider) { provider_class.new(Legion::Extensions::Llm.config) }
+    context 'when a model is excluded by the whitelist' do
+      before { provider.settings = { model_whitelist: %w[haiku] } }
+      it 'raises ModelNotAllowedError carrying the model and provider' do
+        expect { provider.send(:enforce_model_allowed!, 'gpt-5') }
+          .to raise_error(Legion::Extensions::Llm::ModelNotAllowedError) do |error|
+            expect(error.model).to eq('gpt-5')
+            expect(error.provider).to eq(:test)
+          end
+      end
+      it 'permits a whitelisted model' do
+        expect { provider.send(:enforce_model_allowed!, 'claude-haiku-4-5-20251001') }.not_to raise_error
+      end
+      it 'fails closed in #complete before any provider call' do
+        expect { provider.complete([], tools: [], temperature: nil, model: 'gpt-5') }
+          .to raise_error(Legion::Extensions::Llm::ModelNotAllowedError)
+      end
+      it 'fails closed in #embed before any provider call' do
+        expect { provider.embed(text: 'hello', model: 'text-embedding-3-large') }
+          .to raise_error(Legion::Extensions::Llm::ModelNotAllowedError)
+      end
+    end
+    context 'when a model is excluded by the blacklist' do
+      before { provider.settings = { model_blacklist: %w[sonnet] } }
+      it 'fails closed in #complete for a blacklisted model' do
+        expect { provider.complete([], tools: [], temperature: nil, model: 'claude-sonnet-4-6') }
+          .to raise_error(Legion::Extensions::Llm::ModelNotAllowedError)
+      end
+    end
+    context 'with no policy configured' do
+      it 'does not raise for any model' do
+        expect { provider.send(:enforce_model_allowed!, 'anything-goes') }.not_to raise_error
+      end
+    end
+  end
   describe 'multi-host URL resolution' do
     let(:provider_class) do
       Class.new(described_class) do
@@ -610,4 +666,35 @@ RSpec.describe Legion::Extensions::Llm::Provider do
       end
     end
   end
+  describe '#model_detail_cache_key' do
+    let(:provider_class) do
+      Class.new(described_class) do
+        def api_base = 'https://test.invalid'
+        def self.slug = 'testprov'
+      end
+    end
+    let(:provider) do
+      provider_class.new({ request_timeout: 30, max_retries: 0, retry_interval: 0,
+                           retry_backoff_factor: 0, retry_interval_randomness: 0 })
+    end
+    it 'includes the model-detail cache schema version in the key' do
+      key = provider.send(:model_detail_cache_key, 'test-model')
+      expect(key).to include("schema#{described_class::MODEL_DETAIL_CACHE_SCHEMA_VERSION}")
+      expect(key).to include('schema2')
+    end
+    it 'changes the key when the schema version constant changes' do
+      key_v2 = provider.send(:model_detail_cache_key, 'test-model')
+      stub_const("#{described_class}::MODEL_DETAIL_CACHE_SCHEMA_VERSION", 3)
+      key_v3 = provider.send(:model_detail_cache_key, 'test-model')
+      expect(key_v3).not_to eq(key_v2)
+      expect(key_v3).to include('schema3')
+    end
+  end
 end

data/spec/legion/extensions/llm/routing/model_offering_spec.rb CHANGED Viewed

@@ -219,4 +219,62 @@ RSpec.describe Legion::Extensions::Llm::Routing::ModelOffering do
       limits: { context_window: 32_768, max_output_tokens: 8192 }
     )
   end
+  describe 'capability_sources' do
+    it 'accepts and exposes capability source metadata' do
+      sourced = described_class.new(
+        provider_family: :vllm,
+        provider_instance: :apollo,
+        model: 'gemma-4-12b-it',
+        capabilities: %i[streaming tools],
+        capability_sources: {
+          streaming: { value: true, source: :provider_envelope },
+          tools: { value: true, source: :instance_override },
+          embeddings: { value: false, source: :default_false }
+        }
+      )
+      expect(sourced.capabilities).to include(:streaming, :tools)
+      expect(sourced.capability_sources[:tools]).to eq(value: true, source: :instance_override)
+      expect(sourced.capability_sources[:embeddings]).to eq(value: false, source: :default_false)
+    end
+    it 'includes capability_sources in to_h' do
+      sourced = described_class.new(
+        provider_family: :vllm,
+        provider_instance: :apollo,
+        model: 'gemma-4-12b-it',
+        capabilities: %i[streaming],
+        capability_sources: {
+          streaming: { value: true, source: :provider_envelope }
+        }
+      )
+      expect(sourced.to_h[:capability_sources]).to eq(
+        streaming: { value: true, source: :provider_envelope }
+      )
+    end
+    it 'normalizes string-keyed capability sources' do
+      sourced = described_class.new(
+        provider_family: :vllm,
+        model: 'test',
+        capability_sources: {
+          'streaming' => { 'value' => true, 'source' => 'provider_envelope' }
+        }
+      )
+      expect(sourced.capability_sources[:streaming]).to eq(value: true, source: :provider_envelope)
+    end
+    it 'defaults to empty hash when not provided' do
+      plain = described_class.new(
+        provider_family: :ollama,
+        model: 'test',
+        capabilities: %i[streaming]
+      )
+      expect(plain.capability_sources).to eq({})
+    end
+  end
 end

data/spec/legion/extensions/llm/streaming_spec.rb CHANGED Viewed

@@ -98,6 +98,15 @@ RSpec.describe Legion::Extensions::Llm::Streaming do
         .to raise_error(Legion::Extensions::Llm::ServerError, /Provider error.*The model is currently overloaded/)
     end
+    it 'raises UnauthorizedError for partial 401 streaming responses' do
+      buffer = +''
+      auth_env = Struct.new(:status).new(401)
+      truncated_chunk = '{"error":{"message":"Unauthorized'
+      expect { test_obj.send(:handle_failed_response, truncated_chunk, buffer, auth_env) }
+        .to raise_error(Legion::Extensions::Llm::UnauthorizedError, /Unauthorized/)
+    end
     it 'raises ServerError with generic message when no partial message is extractable and env cannot buffer' do
       buffer = +''
       partial_chunk = '{"error":{'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lex-llm
 version: !ruby/object:Gem::Version
-  version: 0.5.1
+  version: 0.5.4
 platform: ruby
 authors:
 - LegionIO
@@ -272,6 +272,7 @@ files:
 - lib/legion/extensions/llm/canonical/tool_definition.rb
 - lib/legion/extensions/llm/canonical/tool_schema.rb
 - lib/legion/extensions/llm/canonical/usage.rb
+- lib/legion/extensions/llm/capability_policy.rb
 - lib/legion/extensions/llm/chat.rb
 - lib/legion/extensions/llm/chunk.rb
 - lib/legion/extensions/llm/configuration.rb
@@ -356,6 +357,7 @@ files:
 - spec/legion/extensions/llm/canonical/tool_definition_spec.rb
 - spec/legion/extensions/llm/canonical/tool_schema_spec.rb
 - spec/legion/extensions/llm/canonical/usage_spec.rb
+- spec/legion/extensions/llm/capability_policy_spec.rb
 - spec/legion/extensions/llm/configuration_spec.rb
 - spec/legion/extensions/llm/conformance/client_translator_examples.rb
 - spec/legion/extensions/llm/conformance/conformance.rb