RubyGems - agent-harness - Versions diffs - 0.5.4 → 0.5.6 - Mend

agent-harness 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +16 -0
data/lib/agent_harness/configuration.rb +3 -1
data/lib/agent_harness/error_taxonomy.rb +5 -0
data/lib/agent_harness/provider_runtime.rb +115 -0
data/lib/agent_harness/providers/adapter.rb +53 -0
data/lib/agent_harness/providers/aider.rb +37 -0
data/lib/agent_harness/providers/anthropic.rb +30 -4
data/lib/agent_harness/providers/base.rb +99 -3
data/lib/agent_harness/providers/codex.rb +96 -26
data/lib/agent_harness/providers/cursor.rb +44 -2
data/lib/agent_harness/providers/gemini.rb +32 -0
data/lib/agent_harness/providers/github_copilot.rb +21 -4
data/lib/agent_harness/providers/kilocode.rb +17 -0
data/lib/agent_harness/providers/mistral_vibe.rb +17 -0
data/lib/agent_harness/providers/opencode.rb +40 -0
data/lib/agent_harness/response.rb +6 -2
data/lib/agent_harness/version.rb +1 -1
data/lib/agent_harness.rb +1 -0
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0ac511d4448bf777f9389cdba34c165a3b97ac607cfca1db90f0fd5c9de0a4af
-  data.tar.gz: 946efb4b7f13e36da7b4c3cc8c3efc3722421aa83ccaf9789efdaa4e776d4bc6
+  metadata.gz: 946d7c425aff8c96536bc30def7e0abdba7ff7d82020f4941674a8c93be63526
+  data.tar.gz: ffc9d707f89ab60bf9cc59b4e5ccbcd2570a3aaa07e97c5a10bfb731f5dc07a0
 SHA512:
-  metadata.gz: 7c3afe5167530f2cd4f8b435b5098732a12b3630f4324cbfa002f3983d760e3a41488ae0d565a0a0694d9b08704d74f010845fb39251a8f92782c6c6d01a572e
-  data.tar.gz: 8a9d9706b997b2c8543a45a43cdf476a089eaded7283fe6b3dade1394242f9786c7458b68a2953bcb866611d21a74a360a35d5b1ae3ade40ebe222c819d3c7ce
+  metadata.gz: c7b0dcef83c7a31be09a87884211a8ba03c0c93fb845e666423cd17eb70a358dd122db7e57ce04271fa5e114013adbc0007394211286c23b03cfa6a2a600c68f
+  data.tar.gz: a8f14eb24039afd0a93ec1eb064b59ebbe6d03aafd61ab1859c64729d2253140afebacc5c8ee761578337c671c074425784c33676808534cdf7b8ad809a2df32

data/.release-please-manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  ".": "0.5.4"
+  ".": "0.5.6"
 }

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,21 @@
 ## [Unreleased]
+## [0.5.6](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.5...agent-harness/v0.5.6) (2026-03-30)
+### Bug Fixes
+* 53: Expose provider configuration capabilities for app-driven provider setup UIs ([#57](https://github.com/viamin/agent-harness/issues/57)) ([6aa6a02](https://github.com/viamin/agent-harness/commit/6aa6a02da14feefcad8761302d5fa8b5642a57fe))
+* 54: Add per-request provider runtime overrides for CLI-backed providers ([#55](https://github.com/viamin/agent-harness/issues/55)) ([407467a](https://github.com/viamin/agent-harness/commit/407467a6965a01494e2c4590680b2bb9ddac6dce))
+## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
+### Bug Fixes
+* 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
+* 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
 ## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)

data/lib/agent_harness/configuration.rb CHANGED Viewed

@@ -233,7 +233,8 @@ module AgentHarness
   # Provider-specific configuration
   class ProviderConfig
-    attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
+    attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
+      :externally_sandboxed
     attr_reader :name
@@ -246,6 +247,7 @@ module AgentHarness
       @default_flags = []
       @timeout = nil
       @model = nil
+      @externally_sandboxed = false
     end
     # Merge options into this configuration

data/lib/agent_harness/error_taxonomy.rb CHANGED Viewed

@@ -39,6 +39,11 @@ module AgentHarness
         action: :retry_with_backoff,
         retryable: true
       },
+      sandbox_failure: {
+        description: "Sandbox setup failed",
+        action: :escalate,
+        retryable: false
+      },
       unknown: {
         description: "Unknown error",
         action: :retry_with_backoff,

data/lib/agent_harness/provider_runtime.rb ADDED Viewed

@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+module AgentHarness
+  # Normalized runtime configuration for per-request provider overrides.
+  #
+  # ProviderRuntime lets callers pass a single, provider-agnostic payload
+  # into +send_message+ that each provider materializes into CLI args, env
+  # vars, or config files as needed.
+  #
+  # @example Routing OpenCode through OpenRouter with a specific model
+  #   runtime = AgentHarness::ProviderRuntime.new(
+  #     model: "anthropic/claude-opus-4.1",
+  #     base_url: "https://openrouter.ai/api/v1",
+  #     api_provider: "openrouter",
+  #     env: { "OPENROUTER_API_KEY" => "sk-..." }
+  #   )
+  #   provider.send_message(prompt: "Hello", provider_runtime: runtime)
+  #
+  # @example Passing a Hash (auto-coerced by Base#send_message)
+  #   provider.send_message(
+  #     prompt: "Hello",
+  #     provider_runtime: {
+  #       model: "openai/gpt-5.3-codex",
+  #       base_url: "https://openrouter.ai/api/v1"
+  #     }
+  #   )
+  class ProviderRuntime
+    attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata
+    # @param model [String, nil] model identifier override
+    # @param base_url [String, nil] upstream API base URL override
+    # @param api_provider [String, nil] API-compatible backend name
+    # @param env [Hash<String,String>] extra environment variables for the subprocess
+    # @param flags [Array<String>] extra CLI flags to append
+    # @param metadata [Hash] arbitrary provider-specific data
+    def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], metadata: {})
+      @model = model
+      @base_url = base_url
+      @api_provider = api_provider
+      env_hash = env || {}
+      unless env_hash.is_a?(Hash)
+        raise ArgumentError, "env must be a Hash (got #{env_hash.class})"
+      end
+      normalized_env = env_hash.each_with_object({}) do |(key, value), acc|
+        string_key = key.to_s
+        unless value.is_a?(String)
+          raise ArgumentError, "env value for #{string_key.inspect} must be a String (got #{value.class})"
+        end
+        acc[string_key] = value
+      end
+      @env = normalized_env.freeze
+      normalized_flags = flags || []
+      unless normalized_flags.is_a?(Array)
+        raise ArgumentError, "flags must be an Array (got #{normalized_flags.class})"
+      end
+      normalized_flags = normalized_flags.dup
+      normalized_flags.each_with_index do |flag, index|
+        unless flag.is_a?(String)
+          raise ArgumentError,
+            "flags must be an Array of Strings; invalid element at index #{index}: #{flag.inspect} (#{flag.class})"
+        end
+      end
+      @flags = normalized_flags.freeze
+      metadata_hash = metadata || {}
+      unless metadata_hash.is_a?(Hash)
+        raise ArgumentError, "metadata must be a Hash (got #{metadata_hash.class})"
+      end
+      @metadata = metadata_hash.dup.freeze
+      freeze
+    end
+    # Build a ProviderRuntime from a Hash.
+    #
+    # @param hash [Hash] runtime attributes
+    # @return [ProviderRuntime]
+    def self.from_hash(hash)
+      raise ArgumentError, "expected a Hash, got #{hash.class}" unless hash.is_a?(Hash)
+      new(
+        model: hash[:model] || hash["model"],
+        base_url: hash[:base_url] || hash["base_url"],
+        api_provider: hash[:api_provider] || hash["api_provider"],
+        env: hash[:env] || hash["env"] || {},
+        flags: hash[:flags] || hash["flags"] || [],
+        metadata: hash[:metadata] || hash["metadata"] || {}
+      )
+    end
+    # Coerce a value into a ProviderRuntime.
+    #
+    # @param value [ProviderRuntime, Hash, nil] input
+    # @return [ProviderRuntime, nil]
+    def self.wrap(value)
+      case value
+      when ProviderRuntime then value
+      when Hash then from_hash(value)
+      when nil then nil
+      else
+        raise ArgumentError, "Cannot coerce #{value.class} into ProviderRuntime"
+      end
+    end
+    # Whether any meaningful overrides are present.
+    #
+    # @return [Boolean]
+    def empty?
+      model.nil? && base_url.nil? && api_provider.nil? &&
+        env.empty? && flags.empty? && metadata.empty?
+    end
+  end
+end

data/lib/agent_harness/providers/adapter.rb CHANGED Viewed

@@ -75,11 +75,32 @@ module AgentHarness
       # @option options [Integer] :timeout timeout in seconds
       # @option options [String] :session session identifier
       # @option options [Boolean] :dangerous_mode skip permission checks
+      # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
+      #   runtime overrides (model, base_url, api_provider, env, flags, metadata).
+      #   For providers that delegate to Providers::Base#send_message, a plain Hash
+      #   is automatically coerced into a ProviderRuntime. Providers that override
+      #   #send_message directly are responsible for handling this option.
       # @return [Response] response object with output and metadata
       def send_message(prompt:, **options)
         raise NotImplementedError, "#{self.class} must implement #send_message"
       end
+      # Provider configuration schema for app-driven setup UIs
+      #
+      # Returns metadata describing the configurable fields, supported
+      # authentication modes, and backend compatibility for this provider.
+      # Applications use this to build generic provider-entry forms without
+      # hardcoding provider-specific knowledge.
+      #
+      # @return [Hash] with :fields, :auth_modes, :openai_compatible keys
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [auth_type],
+          openai_compatible: false
+        }
+      end
       # Provider capabilities
       #
       # @return [Hash] capability flags
@@ -218,6 +239,38 @@ module AgentHarness
       def health_status
         {healthy: true, message: "OK"}
       end
+      # Execution semantics for this provider
+      #
+      # Returns a hash describing provider-specific execution behavior so
+      # downstream apps do not need to hardcode CLI quirks. This metadata
+      # can be used to select the right flags and interpret output.
+      #
+      # @return [Hash] execution semantics
+      def execution_semantics
+        {
+          prompt_delivery: :arg,       # :arg, :stdin, or :flag
+          output_format: :text,        # :text or :json
+          sandbox_aware: false,        # adjusts behavior inside containers
+          uses_subcommand: false,      # e.g. "codex exec", "opencode run"
+          non_interactive_flag: nil,   # flag to suppress interactive prompts
+          legitimate_exit_codes: [0],  # exit codes that are NOT errors
+          stderr_is_diagnostic: true,  # stderr may contain non-error output
+          parses_rate_limit_reset: false # can extract Retry-After from output
+        }
+      end
+      # Parse a rate-limit reset time from provider output
+      #
+      # Providers that include rate-limit reset information in their error
+      # output can override this to extract it, so the orchestration layer
+      # can schedule retries accurately.
+      #
+      # @param output [String] combined stdout+stderr from the CLI
+      # @return [Time, nil] when the rate limit resets, or nil if unknown
+      def parse_rate_limit_reset(output)
+        nil
+      end
     end
   end
 end

data/lib/agent_harness/providers/aider.rb CHANGED Viewed

@@ -59,6 +59,23 @@ module AgentHarness
         "Aider"
       end
+      def configuration_schema
+        {
+          fields: [
+            {
+              name: :model,
+              type: :string,
+              label: "Model",
+              required: false,
+              hint: "Model identifier (supports OpenAI, Anthropic, and other model names)",
+              accepts_arbitrary: true
+            }
+          ],
+          auth_modes: [:api_key],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: true,
@@ -71,6 +88,26 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS.merge(
+          auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
+          transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
+        )
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: "--yes",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def supports_sessions?
         true
       end

data/lib/agent_harness/providers/anthropic.rb CHANGED Viewed

@@ -160,6 +160,23 @@ module AgentHarness
         "Anthropic Claude CLI"
       end
+      def configuration_schema
+        {
+          fields: [
+            {
+              name: :model,
+              type: :string,
+              label: "Model",
+              required: false,
+              hint: "Claude model to use (e.g. claude-3-5-sonnet-20241022)",
+              accepts_arbitrary: false
+            }
+          ],
+          auth_modes: [:oauth],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: true,
@@ -193,10 +210,6 @@ module AgentHarness
         ["--mcp-config", config_path]
       end
-      def supports_dangerous_mode?
-        true
-      end
       def dangerous_mode_flags
         ["--dangerously-skip-permissions"]
       end
@@ -205,6 +218,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :json,
+          sandbox_aware: true,
+          uses_subcommand: false,
+          non_interactive_flag: "--print",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [

data/lib/agent_harness/providers/base.rb CHANGED Viewed

@@ -32,6 +32,34 @@ module AgentHarness
     class Base
       include Adapter
+      # Common error patterns shared across providers that use standard
+      # HTTP-style error responses. Providers with unique patterns (e.g.
+      # Anthropic, GitHub Copilot) override error_patterns entirely.
+      COMMON_ERROR_PATTERNS = {
+        rate_limited: [
+          /rate.?limit/i,
+          /too.?many.?requests/i,
+          /429/
+        ],
+        auth_expired: [
+          /invalid.*api.*key/i,
+          /unauthorized/i,
+          /authentication/i
+        ],
+        quota_exceeded: [
+          /quota.*exceeded/i,
+          /insufficient.*quota/i,
+          /billing/i
+        ],
+        transient: [
+          /timeout/i,
+          /connection.*error/i,
+          /service.*unavailable/i,
+          /503/,
+          /502/
+        ]
+      }.tap { |patterns| patterns.each_value(&:freeze) }.freeze
       attr_reader :config, :logger
       attr_accessor :executor
@@ -59,10 +87,16 @@ module AgentHarness
       #
       # @param prompt [String] the prompt to send
       # @param options [Hash] additional options
+      # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
+      #   runtime overrides (model, base_url, api_provider, env, flags, metadata).
+      #   A plain Hash is automatically coerced into a ProviderRuntime.
       # @return [Response] the response
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Coerce provider_runtime from Hash if needed
+        options = normalize_provider_runtime(options)
         # Normalize and validate MCP servers
         options = normalize_mcp_servers(options)
         validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
@@ -80,6 +114,23 @@ module AgentHarness
         # Parse response
         response = parse_response(result, duration: duration)
+        runtime = options[:provider_runtime]
+        # Runtime model is a per-request override and always takes precedence
+        # over both the config-level model and whatever parse_response returned.
+        # This is intentional: callers use runtime overrides to route a single
+        # provider instance through different backends on each request.
+        if runtime&.model
+          response = Response.new(
+            output: response.output,
+            exit_code: response.exit_code,
+            duration: response.duration,
+            provider: response.provider,
+            model: runtime.model,
+            tokens: response.tokens,
+            metadata: response.metadata,
+            error: response.error
+          )
+        end
         # Track tokens
         track_tokens(response) if response.tokens
@@ -107,6 +158,16 @@ module AgentHarness
         name.capitalize
       end
+      # Whether the provider is running inside a sandboxed (Docker) environment
+      #
+      # Providers can use this to adjust execution flags, e.g. skipping
+      # nested sandboxing when already inside a container.
+      #
+      # @return [Boolean] true when the executor is a DockerCommandExecutor
+      def sandboxed_environment?
+        @executor.is_a?(DockerCommandExecutor)
+      end
       protected
       # Build CLI command - override in subclasses
@@ -120,25 +181,53 @@ module AgentHarness
       # Build environment variables - override in subclasses
       #
+      # Provider subclasses should call +super+ and merge their own env vars
+      # so that ProviderRuntime env overrides are always included.
+      #
       # @param options [Hash] options
       # @return [Hash] environment variables
       def build_env(options)
-        {}
+        runtime = options[:provider_runtime]
+        return {} unless runtime
+        runtime.env.dup
       end
       # Parse CLI output into Response - override in subclasses
       #
+      # Combines stdout and stderr for error classification so that
+      # provider-specific error messages are captured regardless of
+      # which stream they appear on.
+      #
       # @param result [CommandExecutor::Result] execution result
       # @param duration [Float] execution duration
       # @return [Response] parsed response
       def parse_response(result, duration:)
+        error = nil
+        # Use execution_semantics[:legitimate_exit_codes] so providers can
+        # declare additional non-error exit codes beyond zero.
+        legitimate = execution_semantics[:legitimate_exit_codes] || [0]
+        unless legitimate.include?(result.exit_code)
+          # Concatenate non-empty streams so error patterns can match
+          # regardless of which stream the provider writes to.
+          combined = [result.stderr, result.stdout]
+            .map { |s| s.to_s.strip }
+            .reject(&:empty?)
+            .join("\n")
+          error = combined unless combined.empty?
+        end
         Response.new(
           output: result.stdout,
           exit_code: result.exit_code,
           duration: duration,
           provider: self.class.provider_name,
           model: @config.model,
-          error: result.failed? ? result.stderr : nil
+          error: error,
+          metadata: {
+            legitimate_exit_codes: legitimate
+          }
         )
       end
@@ -151,6 +240,13 @@ module AgentHarness
       private
+      def normalize_provider_runtime(options)
+        raw = options[:provider_runtime]
+        return options if raw.nil? || raw.is_a?(ProviderRuntime)
+        options.merge(provider_runtime: ProviderRuntime.wrap(raw))
+      end
       def normalize_mcp_servers(options)
         servers = options[:mcp_servers]
         return options if servers.nil?
@@ -192,7 +288,7 @@ module AgentHarness
         AgentHarness.token_tracker.record(
           provider: self.class.provider_name,
-          model: @config.model,
+          model: response.model || @config.model,
           input_tokens: response.tokens[:input] || 0,
           output_tokens: response.tokens[:output] || 0,
           total_tokens: response.tokens[:total]

data/lib/agent_harness/providers/codex.rb CHANGED Viewed

@@ -59,6 +59,14 @@ module AgentHarness
         "OpenAI Codex CLI"
       end
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [:api_key],
+          openai_compatible: true
+        }
+      end
       def capabilities
         {
           streaming: false,
@@ -67,7 +75,24 @@ module AgentHarness
           tool_use: true,
           json_mode: false,
           mcp: false,
-          dangerous_mode: false
+          dangerous_mode: true
+        }
+      end
+      def dangerous_mode_flags
+        ["--full-auto"]
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: true,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
         }
       end
@@ -81,32 +106,15 @@ module AgentHarness
       end
       def error_patterns
-        {
-          rate_limited: [
-            /rate.?limit/i,
-            /too.?many.?requests/i,
-            /429/
-          ],
-          auth_expired: [
-            /invalid.*api.*key/i,
-            /unauthorized/i,
-            /authentication/i,
-            /401/,
-            /incorrect.*api.*key/i
-          ],
-          quota_exceeded: [
-            /quota.*exceeded/i,
-            /insufficient.*quota/i,
-            /billing/i
-          ],
-          transient: [
-            /timeout/i,
-            /connection.*reset/i,
-            /service.*unavailable/i,
-            /503/,
-            /502/
+        COMMON_ERROR_PATTERNS.merge(
+          auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
+          transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
+          sandbox_failure: [
+            /bwrap.*no permissions/i,
+            /no permissions to create a new namespace/i,
+            /unprivileged.*namespace/i
           ]
-        }
+        )
       end
       def auth_status
@@ -167,9 +175,34 @@ module AgentHarness
       protected
+      def parse_response(result, duration:)
+        response = super
+        if response.success? && sandbox_failure_detected?(result.stderr)
+          return Response.new(
+            output: result.stdout,
+            exit_code: 1,
+            duration: duration,
+            provider: self.class.provider_name,
+            model: @config.model,
+            error: "Sandbox failure detected: #{result.stderr.strip}"
+          )
+        end
+        response
+      end
       def build_command(prompt, options)
         cmd = [self.class.binary_name, "exec"]
+        # When running inside an already-sandboxed Docker container, Codex's
+        # own sandboxing conflicts with the outer sandbox. Use --full-auto to
+        # skip nested sandboxing while keeping full tool access.
+        # Also applies when dangerous_mode is explicitly requested.
+        if sandboxed_environment? || options[:dangerous_mode]
+          cmd += dangerous_mode_flags
+        end
         flags = @config.default_flags
         if flags
           unless flags.is_a?(Array)
@@ -178,21 +211,58 @@ module AgentHarness
           cmd += flags if flags.any?
         end
+        if externally_sandboxed?(options)
+          cmd += sandbox_bypass_flags
+        end
         if options[:session]
           cmd += session_flags(options[:session])
         end
+        runtime = options[:provider_runtime]
+        if runtime
+          cmd += ["--model", runtime.model] if runtime.model
+          cmd += runtime.flags unless runtime.flags.empty?
+        end
         cmd << prompt
         cmd
       end
+      def build_env(options)
+        env = super
+        runtime = options[:provider_runtime]
+        return env unless runtime
+        env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
+        env
+      end
       def default_timeout
         300
       end
       private
+      def externally_sandboxed?(options)
+        if options.key?(:externally_sandboxed)
+          !!options[:externally_sandboxed]
+        else
+          !!@config.externally_sandboxed
+        end
+      end
+      def sandbox_failure_detected?(stderr)
+        return false if stderr.nil? || stderr.empty?
+        error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
+      end
+      def sandbox_bypass_flags
+        ["--sandbox", "none"]
+      end
       def read_codex_credentials
         path = codex_config_path
         return nil unless File.exist?(path)

data/lib/agent_harness/providers/cursor.rb CHANGED Viewed

@@ -93,6 +93,14 @@ module AgentHarness
         "Cursor AI"
       end
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [:oauth],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: false,
@@ -126,6 +134,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :stdin,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: "-p",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [
@@ -150,23 +171,44 @@ module AgentHarness
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Coerce provider_runtime from Hash if needed (same as Base#send_message)
+        options = normalize_provider_runtime(options)
+        runtime = options[:provider_runtime]
         # Normalize and validate MCP servers (same as Base#send_message)
         options = normalize_mcp_servers(options)
         validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
         # Build command (without prompt in args - we send via stdin)
         command = [self.class.binary_name, "-p"]
+        command.concat(runtime.flags) if runtime&.flags&.any?
         # Calculate timeout
         timeout = options[:timeout] || @config.timeout || default_timeout
         # Execute command with prompt on stdin
+        env = build_env(options)
         start_time = Time.now
-        result = @executor.execute(command, timeout: timeout, stdin_data: prompt)
+        result = @executor.execute(command, timeout: timeout, stdin_data: prompt, env: env)
         duration = Time.now - start_time
         # Parse response
         response = parse_response(result, duration: duration)
+        # Runtime model is a per-request override and always takes precedence
+        # over both the config-level model and whatever parse_response returned.
+        # See Base#send_message for rationale.
+        if runtime&.model
+          response = Response.new(
+            output: response.output,
+            exit_code: response.exit_code,
+            duration: response.duration,
+            provider: response.provider,
+            model: runtime.model,
+            tokens: response.tokens,
+            metadata: response.metadata,
+            error: response.error
+          )
+        end
         # Track tokens
         track_tokens(response) if response.tokens
@@ -188,7 +230,7 @@ module AgentHarness
       end
       def build_env(options)
-        {}
+        super
       end
       def default_timeout

data/lib/agent_harness/providers/gemini.rb CHANGED Viewed

@@ -83,6 +83,25 @@ module AgentHarness
         "Google Gemini"
       end
+      def configuration_schema
+        {
+          fields: [
+            {
+              name: :model,
+              type: :string,
+              label: "Model",
+              required: false,
+              hint: "Gemini model to use (e.g. gemini-2.5-pro, gemini-2.0-flash)",
+              # accepts_arbitrary is true because supports_model_family? accepts
+              # any string starting with "gemini-", not just discovered models.
+              accepts_arbitrary: true
+            }
+          ],
+          auth_modes: [:api_key, :oauth],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: true,
@@ -99,6 +118,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [

data/lib/agent_harness/providers/github_copilot.rb CHANGED Viewed

@@ -77,6 +77,14 @@ module AgentHarness
         "GitHub Copilot CLI"
       end
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [:oauth],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: false,
@@ -89,10 +97,6 @@ module AgentHarness
         }
       end
-      def supports_dangerous_mode?
-        true
-      end
       def dangerous_mode_flags
         ["--allow-all-tools"]
       end
@@ -110,6 +114,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           auth_expired: [

data/lib/agent_harness/providers/kilocode.rb CHANGED Viewed

@@ -57,6 +57,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/providers/mistral_vibe.rb CHANGED Viewed

@@ -59,6 +59,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/providers/opencode.rb CHANGED Viewed

@@ -47,6 +47,14 @@ module AgentHarness
         "OpenCode CLI"
       end
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [:api_key],
+          openai_compatible: true
+        }
+      end
       def capabilities
         {
           streaming: false,
@@ -59,14 +67,46 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)
         cmd = [self.class.binary_name, "run"]
+        runtime = options[:provider_runtime]
+        if runtime
+          cmd += runtime.flags unless runtime.flags.empty?
+        end
         cmd << prompt
         cmd
       end
+      def build_env(options)
+        env = super
+        runtime = options[:provider_runtime]
+        return env unless runtime
+        env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
+        env
+      end
       def default_timeout
         300
       end

data/lib/agent_harness/response.rb CHANGED Viewed

@@ -40,9 +40,13 @@ module AgentHarness
     # Check if the response indicates success
     #
-    # @return [Boolean] true if exit_code is 0 and no error
+    # A response is successful when its exit code is among the provider's
+    # legitimate exit codes (defaults to [0]) and no error was detected.
+    #
+    # @return [Boolean] true if exit_code is legitimate and no error
     def success?
-      @exit_code == 0 && @error.nil?
+      legitimate = @metadata[:legitimate_exit_codes] || [0]
+      legitimate.include?(@exit_code) && @error.nil?
     end
     # Check if the response indicates failure

data/lib/agent_harness/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module AgentHarness
-  VERSION = "0.5.4"
+  VERSION = "0.5.6"
 end

data/lib/agent_harness.rb CHANGED Viewed

@@ -138,6 +138,7 @@ end
 # Core components
 require_relative "agent_harness/errors"
 require_relative "agent_harness/mcp_server"
+require_relative "agent_harness/provider_runtime"
 require_relative "agent_harness/configuration"
 require_relative "agent_harness/command_executor"
 require_relative "agent_harness/docker_command_executor"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: agent-harness
 version: !ruby/object:Gem::Version
-  version: 0.5.4
+  version: 0.5.6
 platform: ruby
 authors:
 - Bart Agapinan
@@ -92,6 +92,7 @@ files:
 - lib/agent_harness/orchestration/provider_manager.rb
 - lib/agent_harness/orchestration/rate_limiter.rb
 - lib/agent_harness/provider_health_check.rb
+- lib/agent_harness/provider_runtime.rb
 - lib/agent_harness/providers/adapter.rb
 - lib/agent_harness/providers/aider.rb
 - lib/agent_harness/providers/anthropic.rb