RubyGems - ruby-skill-bench - Versions diffs - 1.1.0 → 1.2.0 - Mend

ruby-skill-bench 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

checksums.yaml +4 -4
data/README.md +166 -35
data/docs/architecture.md +3 -1
data/docs/first-eval-guide.md +7 -7
data/docs/testing-guide.md +1 -1
data/lib/skill_bench/agent/react_agent/loop_runner.rb +44 -9
data/lib/skill_bench/agent/react_agent/step.rb +7 -1
data/lib/skill_bench/cli/batch_result_printer.rb +45 -0
data/lib/skill_bench/cli/eval/eval_options.rb +4 -0
data/lib/skill_bench/cli/help_printer.rb +10 -2
data/lib/skill_bench/cli/init_command.rb +2 -1
data/lib/skill_bench/cli/result_printer.rb +1 -1
data/lib/skill_bench/cli/run_command.rb +47 -9
data/lib/skill_bench/cli/validate_command.rb +242 -0
data/lib/skill_bench/cli.rb +3 -0
data/lib/skill_bench/client.rb +43 -1
data/lib/skill_bench/clients/all.rb +2 -0
data/lib/skill_bench/clients/base_client.rb +12 -1
data/lib/skill_bench/clients/base_url_validator.rb +105 -0
data/lib/skill_bench/clients/provider_config.rb +34 -1
data/lib/skill_bench/clients/provider_schemas.rb +4 -0
data/lib/skill_bench/clients/providers/mistral.rb +47 -0
data/lib/skill_bench/commands/init.rb +5 -0
data/lib/skill_bench/commands/skill_new.rb +3 -1
data/lib/skill_bench/config/applier.rb +2 -0
data/lib/skill_bench/config/defaults.rb +2 -0
data/lib/skill_bench/config/facade_readers.rb +7 -0
data/lib/skill_bench/config/facade_writers.rb +17 -0
data/lib/skill_bench/config/json_loader.rb +1 -1
data/lib/skill_bench/config/store.rb +29 -0
data/lib/skill_bench/config.rb +18 -0
data/lib/skill_bench/evaluation/runner.rb +20 -3
data/lib/skill_bench/execution/context_hydrator.rb +52 -11
data/lib/skill_bench/execution/sandbox.rb +58 -11
data/lib/skill_bench/judge/judge.rb +4 -0
data/lib/skill_bench/judge/prompt.rb +42 -6
data/lib/skill_bench/models/config.rb +32 -0
data/lib/skill_bench/output_formatter.rb +60 -1
data/lib/skill_bench/package_verifier.rb +1 -1
data/lib/skill_bench/rails/skill_templates.rb +19 -5
data/lib/skill_bench/services/agent_spawner_service.rb +7 -3
data/lib/skill_bench/services/batch_runner_service.rb +111 -0
data/lib/skill_bench/services/compare_option_parser.rb +1 -0
data/lib/skill_bench/services/cost_calculator.rb +91 -0
data/lib/skill_bench/services/html_formatter.rb +289 -0
data/lib/skill_bench/services/json_formatter.rb +19 -1
data/lib/skill_bench/services/junit_formatter.rb +74 -24
data/lib/skill_bench/services/provider_resolver.rb +5 -2
data/lib/skill_bench/services/response_cache.rb +130 -0
data/lib/skill_bench/services/runner_service.rb +88 -4
data/lib/skill_bench/services/summary_formatter.rb +90 -0
data/lib/skill_bench/services/template_registry.rb +43 -9
data/lib/skill_bench/services/trend_recorder_service.rb +29 -2
data/lib/skill_bench/tools/registry.rb +29 -3
data/lib/skill_bench/tools/run_command.rb +171 -19
data/lib/skill_bench/trend_tracker/persistence.rb +27 -10
data/lib/skill_bench/trend_tracker.rb +5 -5
data/lib/skill_bench/version.rb +1 -1
data/lib/skill_bench.rb +2 -3
metadata +17 -36

data/lib/skill_bench/cli/eval/eval_options.rb CHANGED Viewed

@@ -9,6 +9,7 @@ module SkillBench
       class BaseEvalOptions
         attr_reader :options, :parser
+        # Initializes the option set and the OptionParser used to parse the command's arguments.
         def initialize
           @options = default_options
           @parser = create_parser
@@ -39,10 +40,12 @@ module SkillBench
       class NewEvalOptions < BaseEvalOptions
         protected
+        # @return [Hash] default options for the `eval new` command, with the runtime defaulting to "ruby"
         def default_options
           { runtime: 'ruby' }
         end
+        # @return [OptionParser] parser for the `eval new` command, handling --runtime and --help
         def create_parser
           OptionParser.new do |opts|
             opts.banner = 'Usage: skill-bench eval new <name> [options]'
@@ -59,6 +62,7 @@ module SkillBench
       class GenerateEvalOptions < BaseEvalOptions
         protected
+        # @return [OptionParser] parser for the `eval generate` command, handling --name and --help
         def create_parser
           OptionParser.new do |opts|
             opts.banner = 'Usage: skill-bench eval generate <skill-name> [options]'

data/lib/skill_bench/cli/help_printer.rb CHANGED Viewed

@@ -20,11 +20,14 @@ module SkillBench
               --force    Overwrite existing config file
             run <eval> --skill <name> [--skill <name>] [--format FORMAT] [--pack NAME]
-              Run an evaluation
+              Run an evaluation (single eval, or a whole directory with --all)
               --skill    Skill to use (can be specified multiple times)
               --pack     Pack context for registry-based skill resolution
               --registry-manifest PATH  Path to registry.json manifest
-              --format   Output format: human, json, junit (default: human)
+              --format   Output format: human, json, junit, html (default: human)
+              --all      Run every eval under evals/ (batch mode)
+              --evals-dir DIR  Run every eval under DIR (batch mode)
+              --summary  Emit a JSON summary gate for a batch run (batch mode)
             compare <skill-name> --variant-a SPEC --variant-b SPEC --eval PATH
               Compare the same skill across two pack variants
@@ -45,6 +48,11 @@ module SkillBench
               Auto-generate an eval from a skill
               --name     Name for the generated eval (optional)
+            validate (alias: doctor) [--criteria PATH] [--config PATH]
+              Run read-only pre-flight checks (no eval, no network)
+              --criteria  Criteria JSON to validate (default: criteria.json)
+              --config    Config file to validate (default: skill-bench.json)
           Global Options:
             -h, --help        Show this help message
         USAGE

data/lib/skill_bench/cli/init_command.rb CHANGED Viewed

@@ -45,6 +45,7 @@ module SkillBench
         OptionParser.new do |opts|
           opts.banner = 'Usage: skill-bench init --<provider> [options]'
           register_provider_options(opts, options)
+          opts.on('--mock', 'Generate offline mock config (no API key required)') { options[:provider] = :mock }
           opts.on('--force', 'Overwrite existing config file') { options[:force] = true }
           opts.on('-h', '--help', 'Prints this help') do
             puts opts
@@ -60,7 +61,7 @@ module SkillBench
       end
       def error_missing_provider
-        providers = SkillBench::Clients::ProviderSchemas.names.map { |provider_name| "--#{provider_name}" }.join(', ')
+        providers = (SkillBench::Clients::ProviderSchemas.names.map { |provider_name| "--#{provider_name}" } + ['--mock']).join(', ')
         warn "Error: provider is required. Use one of: #{providers}"
         1
       end

data/lib/skill_bench/cli/result_printer.rb CHANGED Viewed

@@ -9,7 +9,7 @@ module SkillBench
       # Prints the result and returns the appropriate exit code.
       #
       # @param result [Hash] Result from ScoringService
-      # @param format [Symbol] Output format (:human, :json, :junit)
+      # @param format [Symbol] Output format (:human, :json, :junit, :html)
       # @return [Integer] Exit code (0 for pass, 1 for fail)
       def self.call(result, format: :human)
         puts OutputFormatter.format(result, format: format)

data/lib/skill_bench/cli/run_command.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module SkillBench
         @argv = argv
       end
-      # Parses options and runs the eval.
+      # Parses options and runs the eval(s).
       #
       # @return [Integer] Exit code
       def call
@@ -27,14 +27,9 @@ module SkillBench
         parser = build_parser(options)
         parser.parse!(@argv)
-        eval_name = @argv.shift
-        return error_missing_eval unless eval_name
-        return error_missing_skill if options[:skill_names].empty? && !options[:pack]
+        return run_batch(options) if batch_requested?(options)
-        options[:eval_name] = eval_name
-        exec_options = options.reject { |key| key == :format }
-        result = Commands::Run.run(**exec_options)
-        ResultPrinter.call(result, format: options[:format] || :human)
+        run_single(options)
       rescue HelpRequested
         0
       rescue StandardError => e
@@ -44,13 +39,56 @@ module SkillBench
       private
+      # Whether a whole-directory batch run was requested.
+      #
+      # @param options [Hash] Parsed options
+      # @return [Boolean] true when --all or --evals-dir was given
+      def batch_requested?(options)
+        options[:all] || options[:evals_dir]
+      end
+      # Runs a single eval (the original `run <eval> --skill ...` path).
+      #
+      # @param options [Hash] Parsed options
+      # @return [Integer] Exit code
+      def run_single(options)
+        eval_name = @argv.shift
+        return error_missing_eval unless eval_name
+        return error_missing_skill if options[:skill_names].empty? && !options[:pack]
+        options[:eval_name] = eval_name
+        exec_options = options.reject { |key| %i[format summary all evals_dir].include?(key) }
+        result = Commands::Run.run(**exec_options)
+        ResultPrinter.call(result, format: options[:format] || :human)
+      end
+      # Runs every eval under the target directory and prints an aggregate.
+      #
+      # @param options [Hash] Parsed options
+      # @return [Integer] Exit code
+      def run_batch(options)
+        return error_missing_skill if options[:skill_names].empty? && !options[:pack]
+        aggregate = Services::BatchRunnerService.call(
+          evals_dir: options[:evals_dir] || Services::BatchRunnerService::DEFAULT_EVALS_DIR,
+          skill_names: options[:skill_names],
+          pack: options[:pack],
+          registry_manifest: options[:registry_manifest]
+        )
+        BatchResultPrinter.call(aggregate, format: options[:format], summary: options[:summary])
+      end
       def build_parser(options)
         OptionParser.new do |opts|
           opts.banner = 'Usage: skill-bench run <eval> [options]'
           opts.on('--skill NAME', 'Skill to use (can be specified multiple times)') { |v| options[:skill_names] << v }
           opts.on('--pack NAME', 'Pack context for skill resolution') { |v| options[:pack] = v }
           opts.on('--registry-manifest PATH', 'Path to registry.json manifest') { |v| options[:registry_manifest] = v }
-          opts.on('--format FORMAT', 'Output format (human, json, junit)') { |v| options[:format] = v.to_sym }
+          opts.on('--format FORMAT', 'Output format (human, json, junit, html)') { |v| options[:format] = v.to_sym }
+          opts.on('--all', 'Run every eval under the default evals/ directory') { options[:all] = true }
+          opts.on('--evals-dir DIR', 'Run every eval under DIR') { |v| options[:evals_dir] = v }
+          opts.on('--summary', 'Emit a JSON summary gate for a batch run') { options[:summary] = true }
+          opts.on('--cache', 'Enable content-addressed response caching') { ENV['SKILL_BENCH_CACHE'] = '1' }
           opts.on('-h', '--help', 'Prints this help') do
             puts opts
             raise SkillBench::HelpRequested

data/lib/skill_bench/cli/validate_command.rb ADDED Viewed

@@ -0,0 +1,242 @@
+# frozen_string_literal: true
+require 'json'
+require 'optparse'
+module SkillBench
+  module Cli
+    # Handles the `skill-bench validate` / `doctor` subcommand.
+    #
+    # Runs read-only pre-flight checks and prints a PASS/FAIL report:
+    #   1. Criteria JSON structure (via {Models::CriteriaValidator}).
+    #   2. skill-bench.json shape (hand-rolled, lightweight schema check).
+    #   3. Provider credentials for the configured non-mock provider.
+    #
+    # It never runs an eval and never makes a network call.
+    class ValidateCommand
+      # Default criteria file validated when --criteria is not given.
+      DEFAULT_CRITERIA = 'criteria.json'
+      # @param argv [Array<String>] Raw CLI arguments
+      # @return [Integer] Exit code
+      def self.call(argv)
+        new(argv).call
+      end
+      # @param argv [Array<String>] Raw CLI arguments
+      def initialize(argv)
+        @argv = argv
+      end
+      # Parses options, runs the pre-flight checks, and prints the report.
+      #
+      # @return [Integer] Exit code (0 when all checks pass, 1 otherwise)
+      def call
+        options = parse_options
+        config_path = options[:config] || SkillBench::Config::CONFIG_FILENAME
+        config_data = load_config_data(config_path)
+        results = [
+          check_criteria(options),
+          check_config(config_path, config_data),
+          check_provider_key(config_data)
+        ]
+        print_report(results)
+        results.any? { |result| result[:status] == :fail } ? 1 : 0
+      rescue HelpRequested
+        0
+      rescue StandardError => e
+        warn "Error: #{e.message}"
+        1
+      end
+      private
+      def parse_options
+        options = {}
+        build_parser(options).parse!(@argv)
+        options
+      end
+      def build_parser(options)
+        OptionParser.new do |opts|
+          opts.banner = 'Usage: skill-bench validate [options]'
+          opts.on('--criteria PATH', 'Criteria JSON file to validate (default: criteria.json)') { |v| options[:criteria] = v }
+          opts.on('--config PATH', 'Config file to validate (default: skill-bench.json)') { |v| options[:config] = v }
+          opts.on('-h', '--help', 'Prints this help') do
+            puts opts
+            raise SkillBench::HelpRequested
+          end
+        end
+      end
+      # --- Check (a): criteria ------------------------------------------------
+      def check_criteria(options)
+        path = options[:criteria] || DEFAULT_CRITERIA
+        unless File.exist?(path)
+          return fail_result('criteria', "criteria file not found: #{path}") if options[:criteria]
+          return skip_result('criteria', "no #{DEFAULT_CRITERIA} found (skipped)")
+        end
+        result = Models::CriteriaValidator.call(path:)
+        return pass_result('criteria', "#{path} is valid") if result[:success]
+        fail_result('criteria', "#{path}: #{criteria_error(result)}")
+      end
+      def criteria_error(result)
+        result.dig(:response, :error, :message) || 'invalid criteria'
+      end
+      # --- Check (b): config shape -------------------------------------------
+      def check_config(path, config_data)
+        case config_data[:status]
+        when :missing
+          fail_result('config', "#{path} not found")
+        when :invalid_json
+          fail_result('config', "#{path} is not valid JSON: #{config_data[:message]}")
+        else
+          validate_config_shape(path, config_data[:data])
+        end
+      end
+      def validate_config_shape(path, data)
+        return fail_result('config', "#{path} must contain a JSON object") unless data.is_a?(Hash)
+        errors = config_shape_errors(data)
+        return fail_result('config', errors.join('; ')) if errors.any?
+        pass_result('config', "#{path} matches the expected shape")
+      end
+      def config_shape_errors(data)
+        errors = provider_errors(data[:provider])
+        errors.concat(max_execution_time_errors(data[:max_execution_time]))
+        errors << "'config' must be an object" if data.key?(:config) && !data[:config].is_a?(Hash)
+        errors
+      end
+      def provider_errors(provider)
+        return ["'provider' is required"] if provider.nil?
+        return ["'provider' must be a string"] unless provider.is_a?(String)
+        allowed = Models::Provider::ALLOWED_PROVIDERS
+        return [] if allowed.include?(provider)
+        ["'provider' '#{provider}' is not one of: #{allowed.join(', ')}"]
+      end
+      def max_execution_time_errors(value)
+        return [] if value.nil?
+        return [] if value.is_a?(Integer) && value.positive?
+        ["'max_execution_time' must be a positive integer"]
+      end
+      # --- Check (c): provider key -------------------------------------------
+      def check_provider_key(config_data)
+        return skip_result('provider key', 'skipped (no usable config)') unless config_data[:status] == :ok
+        provider = config_provider(config_data[:data])
+        return skip_result('provider key', 'skipped (provider invalid)') unless provider
+        return pass_result('provider key', 'mock provider requires no API key') if provider == 'mock'
+        missing = missing_provider_keys(provider, config_data[:data][:config])
+        return pass_result('provider key', "#{provider} credentials present") if missing.empty?
+        fail_result('provider key', "#{provider} is missing: #{missing.join(', ')}")
+      rescue StandardError => e
+        # Building the client can raise on unrelated config (e.g. base_url
+        # validation); surface that as a structured FAIL rather than crashing.
+        fail_result('provider key', "#{provider} config is invalid: #{e.message}")
+      end
+      def config_provider(data)
+        return nil unless data.is_a?(Hash)
+        provider = data[:provider]
+        return nil unless provider.is_a?(String) && Models::Provider::ALLOWED_PROVIDERS.include?(provider)
+        provider
+      end
+      def missing_provider_keys(provider, provider_config)
+        provider_sym = provider.to_sym
+        options = provider_client_options(provider_sym, provider_config)
+        client = Clients::ProviderRegistry.for(provider_sym).new(options)
+        return [] unless client.respond_to?(:missing_config_keys, true)
+        client.send(:missing_config_keys)
+      end
+      def provider_client_options(provider_sym, provider_config)
+        options = provider_config.is_a?(Hash) ? provider_config.dup : {}
+        Models::Provider::ENV_OVERRIDABLE_SETTINGS.each do |setting|
+          value = env_setting(provider_sym, setting)
+          options[setting] = value unless value.nil?
+        end
+        options
+      end
+      def env_setting(provider_sym, setting)
+        provider = provider_sym.to_s.upcase
+        name = setting.to_s.upcase
+        ["SKILL_BENCH_#{provider}_#{name}", "#{provider}_#{name}"].each do |var|
+          value = ENV.fetch(var, nil)
+          return value if value && !value.empty?
+        end
+        nil
+      end
+      # --- Config loading ----------------------------------------------------
+      def load_config_data(path)
+        return { status: :missing } unless File.exist?(path)
+        { status: :ok, data: JSON.parse(File.read(path), symbolize_names: true) }
+      rescue JSON::ParserError => e
+        { status: :invalid_json, message: e.message }
+      end
+      # --- Reporting ---------------------------------------------------------
+      def print_report(results)
+        puts 'skill-bench validate'
+        puts
+        results.each { |result| puts format_result(result) }
+        puts
+        puts summary_line(results)
+      end
+      def format_result(result)
+        "[#{label(result[:status])}] #{result[:name].ljust(13)} #{result[:message]}"
+      end
+      def label(status)
+        { pass: 'PASS', fail: 'FAIL', skip: 'SKIP' }.fetch(status)
+      end
+      def summary_line(results)
+        failed = results.count { |result| result[:status] == :fail }
+        return "#{failed} check(s) failed." if failed.positive?
+        'All checks passed.'
+      end
+      def pass_result(name, message)
+        { name:, status: :pass, message: }
+      end
+      def fail_result(name, message)
+        { name:, status: :fail, message: }
+      end
+      def skip_result(name, message)
+        { name:, status: :skip, message: }
+      end
+    end
+  end
+end

data/lib/skill_bench/cli.rb CHANGED Viewed

@@ -5,8 +5,10 @@ require_relative 'cli/run_command'
 require_relative 'cli/compare_command'
 require_relative 'cli/skill_command'
 require_relative 'cli/eval_command'
+require_relative 'cli/validate_command'
 require_relative 'cli/help_printer'
 require_relative 'cli/result_printer'
+require_relative 'cli/batch_result_printer'
 module SkillBench
   # Raised when -h/--help is passed to abort OptionParser and return exit code 0.
@@ -42,6 +44,7 @@ module SkillBench
       when 'compare' then Cli::CompareCommand.call(@argv)
       when 'skill'   then Cli::SkillCommand.call(@argv)
       when 'eval'    then Cli::EvalCommand.call(@argv)
+      when 'validate', 'doctor' then Cli::ValidateCommand.call(@argv)
       when '-h', '--help', 'help'
         help.call
       else

data/lib/skill_bench/client.rb CHANGED Viewed

@@ -1,13 +1,27 @@
 # frozen_string_literal: true
 require_relative 'clients/all'
+require_relative 'services/response_cache'
 module SkillBench
   # Facade for calling LLM clients.
   # Delegates to the configured provider.
   class Client
+    # Provider clients that must never be cached: their results either signal a
+    # configuration error (NullClient) or are cheap, deterministic test doubles
+    # (Mock). Caching them would provide no benefit and could mask errors.
+    UNCACHEABLE_CLIENTS = [
+      Clients::Providers::NullClient,
+      Clients::Providers::Mock
+    ].freeze
     # Calls the configured LLM provider with the given parameters.
     #
+    # When response caching is enabled (see {Services::ResponseCache.enabled?})
+    # and the resolved provider is cacheable, identical requests reuse a cached
+    # response instead of calling the provider again. When caching is disabled
+    # (the default), the provider is always invoked, leaving behavior unchanged.
+    #
     # @param system_prompt [String] System prompt for the LLM
     # @param messages [Array<Hash>] Conversation messages
     # @param provider [Symbol, nil] Override the configured LLM provider (e.g., :deepseek, :openai)
@@ -17,7 +31,35 @@ module SkillBench
       resolved = provider || Config.current_llm_provider || :openai
       client_class = Clients::ProviderRegistry.for(resolved)
       warn "WARNING: LLM provider '#{resolved}' is not configured. Falling back to null client." if client_class == Clients::Providers::NullClient
-      client_class.call(system_prompt: system_prompt, messages: messages, **options)
+      invoke = -> { client_class.call(system_prompt: system_prompt, messages: messages, **options) }
+      return invoke.call unless cache_eligible?(client_class)
+      cache_key = Services::ResponseCache.key(
+        provider: resolved,
+        model: options[:model],
+        system_prompt: system_prompt,
+        messages: messages,
+        tools: options[:tools],
+        temperature: options[:temperature],
+        provider_config: options.slice(:base_url, :request_path, :endpoint, :location, :project_id, :api_version)
+      )
+      Services::ResponseCache.fetch(cache_key, &invoke)
+    end
+    # Whether a resolved provider client may be served from the cache.
+    #
+    # Requires caching to be enabled and the client to not be one of the
+    # {UNCACHEABLE_CLIENTS} (null/mock), so disabling the cache restores the
+    # original, uncached behavior exactly.
+    #
+    # @param client_class [Class] The resolved provider client class
+    # @return [Boolean] true when the call should go through the cache
+    def self.cache_eligible?(client_class)
+      return false unless Services::ResponseCache.enabled?
+      !UNCACHEABLE_CLIENTS.include?(client_class)
     end
+    private_class_method :cache_eligible?
   end
 end

data/lib/skill_bench/clients/all.rb CHANGED Viewed

@@ -5,6 +5,7 @@ require_relative 'response_error_handler'
 require_relative 'response_builder'
 require_relative 'request_builder'
 require_relative 'retry_handler'
+require_relative 'base_url_validator'
 require_relative 'base_client'
 require_relative 'provider_config'
 require_relative 'provider_registry'
@@ -17,5 +18,6 @@ require_relative 'providers/azure_openai'
 require_relative 'providers/opencode'
 require_relative 'providers/groq'
 require_relative 'providers/deepseek'
+require_relative 'providers/mistral'
 require_relative 'providers/openrouter'
 require_relative 'providers/mock'

data/lib/skill_bench/clients/base_client.rb CHANGED Viewed

@@ -159,11 +159,22 @@ module SkillBench
       def execute_request
         RetryHandler.call do
-          connection = RequestBuilder.build_connection(base_url)
           RequestBuilder.execute(connection, request_path, headers: request_headers, body: request_body)
         end
       end
+      # Lazily builds and memoizes the Faraday connection for this client instance.
+      #
+      # Reusing one connection across the instance's sequential requests and retry
+      # attempts enables HTTP keep-alive, avoiding a fresh TCP + TLS handshake per turn.
+      # Memoization is intentionally per-instance (never global/shared) so concurrent
+      # agent and judge clients each own a connection, keeping net/http thread-safe.
+      #
+      # @return [Faraday::Connection] the reused connection for this instance.
+      def connection
+        @connection ||= RequestBuilder.build_connection(base_url)
+      end
       def handle_response(response)
         parsed = ResponseParser.parse_body(response)
         return failure_response(response, parsed) unless response.success?

data/lib/skill_bench/clients/base_url_validator.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# frozen_string_literal: true
+require 'uri'
+module SkillBench
+  module Clients
+    # Validates a provider `base_url` before it is used to build an HTTP
+    # connection that may carry an API key / bearer token.
+    #
+    # Security rationale: `base_url` is taken verbatim from config/env input and
+    # the authenticated request attaches a credential to whatever host it names.
+    # Left unchecked this is an SSRF surface, and an `http://` URL would transmit
+    # the credential in cleartext. This service enforces:
+    #
+    # - the URL must be an absolute `http`/`https` URL with a host (empty/relative
+    #   /garbage values are rejected);
+    # - when a credential will be attached, non-loopback hosts MUST use `https`;
+    #   loopback hosts (`localhost`, `127.0.0.1`, `::1`) MAY use `http` — the
+    #   legitimate self-hosted/Ollama case — and an explicit opt-in
+    #   (`allow_insecure_base_url`) can permit cleartext for non-loopback hosts.
+    #
+    # A blank (`nil`/empty) `base_url` is allowed so providers may supply their
+    # own (https) default downstream. Error messages describe only the transport
+    # and never include the credential.
+    class BaseUrlValidator
+      # Hosts permitted to use cleartext `http` even with a credential attached.
+      LOOPBACK_HOSTS = %w[localhost 127.0.0.1 ::1].freeze
+      # Raised when a base URL is structurally invalid or would leak a credential
+      # over cleartext transport. The message never contains the credential.
+      class InvalidBaseURLError < StandardError; end
+      # Validates a base URL and returns it unchanged when valid.
+      #
+      # @param base_url [String, nil] the URL to validate; blank values are
+      #   returned as-is so a provider default can be applied later.
+      # @param has_credential [Boolean] whether a credential (api key/bearer
+      #   token) will be attached to requests sent to this URL.
+      # @param allow_insecure [Boolean] explicit opt-in that permits cleartext
+      #   `http` to a non-loopback host even when a credential is attached.
+      # @raise [InvalidBaseURLError] when the URL is invalid or insecure.
+      # @return [String, nil] the validated URL (blank input returned unchanged).
+      def self.call(base_url:, has_credential: false, allow_insecure: false)
+        new(base_url, has_credential, allow_insecure).call
+      end
+      # @param base_url [String, nil] the URL to validate.
+      # @param has_credential [Boolean] whether a credential will be attached.
+      # @param allow_insecure [Boolean] opt-in permitting cleartext non-loopback.
+      def initialize(base_url, has_credential, allow_insecure)
+        @base_url = base_url
+        @has_credential = has_credential
+        @allow_insecure = allow_insecure
+      end
+      # Runs the validation.
+      #
+      # @raise [InvalidBaseURLError] when the URL is invalid or insecure.
+      # @return [String, nil] the validated URL.
+      def call
+        return @base_url if blank?(@base_url)
+        validate_absolute_http_url!
+        validate_secure_transport!
+        @base_url
+      end
+      private
+      def blank?(value)
+        value.to_s.strip.empty?
+      end
+      def uri
+        @uri ||= URI.parse(@base_url.to_s)
+      rescue URI::InvalidURIError
+        nil
+      end
+      def validate_absolute_http_url!
+        return if uri.is_a?(URI::HTTP) && !blank?(uri.hostname)
+        raise InvalidBaseURLError,
+              "Invalid provider base_url #{@base_url.inspect}: " \
+              'must be an absolute http(s) URL with a host.'
+      end
+      def validate_secure_transport!
+        return unless @has_credential
+        return if uri.scheme == 'https'
+        return if loopback?
+        return if @allow_insecure
+        raise InvalidBaseURLError,
+              'Insecure provider base_url: refusing to send a credential over cleartext http ' \
+              "to non-loopback host #{uri.hostname.inspect}. Use https, target a loopback host, " \
+              'or set allow_insecure_base_url: true to override.'
+      end
+      def loopback?
+        LOOPBACK_HOSTS.include?(uri.hostname)
+      end
+    end
+  end
+end