RubyGems - agent-harness - Versions diffs - 0.5.3 → 0.5.5 - Mend

agent-harness 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +15 -0
data/lib/agent_harness/configuration.rb +3 -1
data/lib/agent_harness/error_taxonomy.rb +5 -0
data/lib/agent_harness/errors.rb +14 -0
data/lib/agent_harness/mcp_server.rb +157 -0
data/lib/agent_harness/providers/adapter.rb +84 -0
data/lib/agent_harness/providers/aider.rb +20 -0
data/lib/agent_harness/providers/anthropic.rb +114 -2
data/lib/agent_harness/providers/base.rb +99 -1
data/lib/agent_harness/providers/codex.rb +73 -26
data/lib/agent_harness/providers/cursor.rb +27 -0
data/lib/agent_harness/providers/gemini.rb +13 -0
data/lib/agent_harness/providers/github_copilot.rb +13 -4
data/lib/agent_harness/providers/kilocode.rb +17 -0
data/lib/agent_harness/providers/mistral_vibe.rb +17 -0
data/lib/agent_harness/providers/opencode.rb +17 -0
data/lib/agent_harness/response.rb +6 -2
data/lib/agent_harness/version.rb +1 -1
data/lib/agent_harness.rb +1 -0
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3dfde368468f5c0c037ec1cb2fefd705e813d192e3723fd70126757cd851cf14
-  data.tar.gz: f68f11de99ea61807ab1bb7a94a28aca4aa37e3346469b195a77019f6ceeceec
+  metadata.gz: dc338c5fc81d4175149d405d494936b68a261a637deda4fc0e4fb7b18944bf67
+  data.tar.gz: aed5c92bc22dadab8826b919e8eabf606bcd7f6bfe0e1d02631c83461056a888
 SHA512:
-  metadata.gz: 4cf0d1807fee47eb2ef1aecc63ab8c10ce71a681d6c98ca2cb860ae7eb9ba7b1a8d88e4382e9ff1f5f2f25acd9380a6cf4c3d8e61f5cf008d920b4d2bbf7b4bc
-  data.tar.gz: cd6ac3ae08acf302369a28cdda0a3e332994f679ad37258c0f4dd869f99f18396ccb1cb52d9a6d5bcbb3c6ac3c776ba87a5338edb8280b46785edce254eb8258
+  metadata.gz: 1d662f4ae796d88a1a2c2eabce4604a38c4b53b545640b51c16a4b8e370ddf59f40ff57b19dfb46ba96e50b85399b846c9d2379bdc9806bd40aa78b1f18c1f66
+  data.tar.gz: 913df22acc91cd6db4ff2788867dc8337a2f1e94c0a2b2cce483e0af4ec73d2a946fcbb80270968d82c49c55aa375da86c386f4c8472ad2d42664d2bc1242ee6

data/.release-please-manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  ".": "0.5.3"
+  ".": "0.5.5"
 }

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,20 @@
 ## [Unreleased]
+## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
+### Bug Fixes
+* 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
+* 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
+## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
+### Bug Fixes
+* 44: feat(mcp): add first-class MCP server configuration to request execution ([#45](https://github.com/viamin/agent-harness/issues/45)) ([454cd9b](https://github.com/viamin/agent-harness/commit/454cd9be1c4bcd2eb92a4ca6f81cc012d4ce1f8c))
 ## [0.5.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.2...agent-harness/v0.5.3) (2026-03-27)

data/lib/agent_harness/configuration.rb CHANGED Viewed

@@ -233,7 +233,8 @@ module AgentHarness
   # Provider-specific configuration
   class ProviderConfig
-    attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
+    attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
+      :externally_sandboxed
     attr_reader :name
@@ -246,6 +247,7 @@ module AgentHarness
       @default_flags = []
       @timeout = nil
       @model = nil
+      @externally_sandboxed = false
     end
     # Merge options into this configuration

data/lib/agent_harness/error_taxonomy.rb CHANGED Viewed

@@ -39,6 +39,11 @@ module AgentHarness
         action: :retry_with_backoff,
         retryable: true
       },
+      sandbox_failure: {
+        description: "Sandbox setup failed",
+        action: :escalate,
+        retryable: false
+      },
       unknown: {
         description: "Unknown error",
         action: :retry_with_backoff,

data/lib/agent_harness/errors.rb CHANGED Viewed

@@ -57,6 +57,20 @@ module AgentHarness
   # Configuration errors
   class ConfigurationError < Error; end
+  # MCP-specific errors
+  class McpConfigurationError < ConfigurationError; end
+  class McpUnsupportedError < ProviderError
+    attr_reader :provider
+    def initialize(message = nil, provider: nil, **kwargs)
+      @provider = provider
+      super(message, **kwargs)
+    end
+  end
+  class McpTransportUnsupportedError < McpUnsupportedError; end
   # Orchestration errors
   class NoProvidersAvailableError < Error
     attr_reader :attempted_providers, :errors

data/lib/agent_harness/mcp_server.rb ADDED Viewed

@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+module AgentHarness
+  # Canonical representation of an MCP server for request-time execution.
+  #
+  # Provider-agnostic value object that can be translated by each provider
+  # adapter into its CLI-specific configuration.
+  #
+  # @example stdio server
+  #   McpServer.new(
+  #     name: "filesystem",
+  #     transport: "stdio",
+  #     command: ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/workspace"],
+  #     env: { "DEBUG" => "0" }
+  #   )
+  #
+  # @example HTTP/URL server
+  #   McpServer.new(
+  #     name: "playwright",
+  #     transport: "http",
+  #     url: "http://mcp-playwright:3000/mcp"
+  #   )
+  class McpServer
+    VALID_TRANSPORTS = %w[stdio http sse].freeze
+    attr_reader :name, :transport, :command, :args, :env, :url
+    # @param name [String] unique name for this MCP server
+    # @param transport [String] one of "stdio", "http", "sse"
+    # @param command [Array<String>, nil] command to launch (stdio only)
+    # @param args [Array<String>, nil] additional args for the command
+    # @param env [Hash<String,String>, nil] environment variables for the server process
+    # @param url [String, nil] URL for HTTP/SSE transport
+    def initialize(name:, transport:, command: nil, args: nil, env: nil, url: nil)
+      @name = name
+      @transport = transport.to_s
+      @command = command
+      @args = args || []
+      @env = env || {}
+      @url = url
+      validate!
+    end
+    # Build from a plain Hash (e.g. from user input or serialized config)
+    #
+    # @param hash [Hash] server definition
+    # @return [McpServer]
+    def self.from_hash(hash)
+      unless hash.is_a?(Hash)
+        raise McpConfigurationError, "MCP server definition must be a Hash, got #{hash.class}"
+      end
+      begin
+        hash = hash.transform_keys(&:to_sym)
+      rescue NoMethodError, TypeError => e
+        raise McpConfigurationError, "MCP server hash contains invalid keys: #{e.message}"
+      end
+      new(
+        name: hash[:name],
+        transport: hash[:transport],
+        command: hash[:command],
+        args: hash[:args],
+        env: hash[:env],
+        url: hash[:url]
+      )
+    end
+    def stdio?
+      @transport == "stdio"
+    end
+    def http?
+      %w[http sse].include?(@transport)
+    end
+    def to_h
+      h = {name: @name, transport: @transport}
+      if stdio?
+        h[:command] = @command
+        h[:args] = @args unless @args.empty?
+      else
+        h[:url] = @url
+      end
+      h[:env] = @env unless @env.empty?
+      h
+    end
+    private
+    def validate!
+      raise McpConfigurationError, "MCP server name is required" if @name.nil? || @name.to_s.strip.empty?
+      unless VALID_TRANSPORTS.include?(@transport)
+        raise McpConfigurationError,
+          "Invalid MCP transport '#{@transport}' for server '#{@name}'. Valid transports: #{VALID_TRANSPORTS.join(", ")}"
+      end
+      validate_args!
+      validate_env!
+      validate_stdio! if stdio?
+      validate_http! if http?
+      validate_no_stdio_only_fields_on_http! if http?
+    end
+    def validate_args!
+      return if @args.is_a?(Array) && @args.all? { |a| a.is_a?(String) }
+      raise McpConfigurationError,
+        "MCP server '#{@name}' args must be an Array of Strings"
+    end
+    def validate_env!
+      return if @env.is_a?(Hash) && @env.keys.all? { |k| k.is_a?(String) } && @env.values.all? { |v| v.is_a?(String) }
+      raise McpConfigurationError,
+        "MCP server '#{@name}' env must be a Hash with String keys and values"
+    end
+    def validate_stdio!
+      if @command.nil? || !@command.is_a?(Array) || @command.empty?
+        raise McpConfigurationError,
+          "MCP server '#{@name}' with stdio transport requires a non-empty command array"
+      end
+      unless @command.all? { |c| c.is_a?(String) }
+        raise McpConfigurationError,
+          "MCP server '#{@name}' command must contain only strings"
+      end
+      return if @url.nil?
+      raise McpConfigurationError,
+        "MCP server '#{@name}' with stdio transport should not have a url"
+    end
+    def validate_http!
+      if @url.nil? || @url.to_s.strip.empty?
+        raise McpConfigurationError,
+          "MCP server '#{@name}' with #{@transport} transport requires a url"
+      end
+      return if @command.nil?
+      raise McpConfigurationError,
+        "MCP server '#{@name}' with #{@transport} transport should not have a command"
+    end
+    def validate_no_stdio_only_fields_on_http!
+      return if @args.empty?
+      raise McpConfigurationError,
+        "MCP server '#{@name}' with #{@transport} transport should not have args (args are only valid for stdio)"
+    end
+  end
+end

data/lib/agent_harness/providers/adapter.rb CHANGED Viewed

@@ -124,6 +124,58 @@ module AgentHarness
         []
       end
+      # Supported MCP transport types for this provider
+      #
+      # @return [Array<String>] supported transports (e.g. ["stdio", "http"])
+      def supported_mcp_transports
+        []
+      end
+      # Build provider-specific MCP flags/arguments for CLI invocation
+      #
+      # @param mcp_servers [Array<McpServer>] MCP server definitions
+      # @param working_dir [String, nil] working directory for temp files
+      # @return [Array<String>] CLI flags to append to the command
+      def build_mcp_flags(mcp_servers, working_dir: nil)
+        []
+      end
+      # Validate that this provider can handle the given MCP servers
+      #
+      # @param mcp_servers [Array<McpServer>] MCP server definitions
+      # @raise [McpUnsupportedError] if MCP is not supported
+      # @raise [McpTransportUnsupportedError] if a transport is not supported
+      def validate_mcp_servers!(mcp_servers)
+        return if mcp_servers.nil? || mcp_servers.empty?
+        unless supports_mcp?
+          raise McpUnsupportedError.new(
+            "Provider '#{self.class.provider_name}' does not support MCP servers",
+            provider: self.class.provider_name
+          )
+        end
+        supported = supported_mcp_transports
+        if supported.empty?
+          raise McpUnsupportedError.new(
+            "Provider '#{self.class.provider_name}' does not support request-time MCP servers",
+            provider: self.class.provider_name
+          )
+        end
+        mcp_servers.each do |server|
+          next if supported.include?(server.transport)
+          raise McpTransportUnsupportedError.new(
+            "Provider '#{self.class.provider_name}' does not support MCP transport " \
+            "'#{server.transport}' (server: '#{server.name}'). " \
+            "Supported transports: #{supported.join(", ")}",
+            provider: self.class.provider_name
+          )
+        end
+      end
       # Check if provider supports dangerous mode
       #
       # @return [Boolean] true if dangerous mode is supported
@@ -166,6 +218,38 @@ module AgentHarness
       def health_status
         {healthy: true, message: "OK"}
       end
+      # Execution semantics for this provider
+      #
+      # Returns a hash describing provider-specific execution behavior so
+      # downstream apps do not need to hardcode CLI quirks. This metadata
+      # can be used to select the right flags and interpret output.
+      #
+      # @return [Hash] execution semantics
+      def execution_semantics
+        {
+          prompt_delivery: :arg,       # :arg, :stdin, or :flag
+          output_format: :text,        # :text or :json
+          sandbox_aware: false,        # adjusts behavior inside containers
+          uses_subcommand: false,      # e.g. "codex exec", "opencode run"
+          non_interactive_flag: nil,   # flag to suppress interactive prompts
+          legitimate_exit_codes: [0],  # exit codes that are NOT errors
+          stderr_is_diagnostic: true,  # stderr may contain non-error output
+          parses_rate_limit_reset: false # can extract Retry-After from output
+        }
+      end
+      # Parse a rate-limit reset time from provider output
+      #
+      # Providers that include rate-limit reset information in their error
+      # output can override this to extract it, so the orchestration layer
+      # can schedule retries accurately.
+      #
+      # @param output [String] combined stdout+stderr from the CLI
+      # @return [Time, nil] when the rate limit resets, or nil if unknown
+      def parse_rate_limit_reset(output)
+        nil
+      end
     end
   end
 end

data/lib/agent_harness/providers/aider.rb CHANGED Viewed

@@ -71,6 +71,26 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS.merge(
+          auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
+          transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
+        )
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: "--yes",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def supports_sessions?
         true
       end

data/lib/agent_harness/providers/anthropic.rb CHANGED Viewed

@@ -172,12 +172,25 @@ module AgentHarness
         }
       end
+      def send_message(prompt:, **options)
+        super
+      ensure
+        cleanup_mcp_tempfiles!
+      end
       def supports_mcp?
         true
       end
-      def supports_dangerous_mode?
-        true
+      def supported_mcp_transports
+        %w[stdio http sse]
+      end
+      def build_mcp_flags(mcp_servers, working_dir: nil)
+        return [] if mcp_servers.empty?
+        config_path = write_mcp_config_file(mcp_servers, working_dir: working_dir)
+        ["--mcp-config", config_path]
       end
       def dangerous_mode_flags
@@ -188,6 +201,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :json,
+          sandbox_aware: true,
+          uses_subcommand: false,
+          non_interactive_flag: "--print",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [
@@ -266,6 +292,11 @@ module AgentHarness
           cmd += dangerous_mode_flags
         end
+        # Add MCP server flags (validated/normalized by Base#send_message)
+        if options[:mcp_servers]&.any?
+          cmd += build_mcp_flags(options[:mcp_servers])
+        end
         # Add custom flags from config
         cmd += @config.default_flags if @config.default_flags&.any?
@@ -376,6 +407,87 @@ module AgentHarness
         servers
       end
+      def write_mcp_config_file(mcp_servers, working_dir: nil)
+        require "tempfile"
+        require "tmpdir"
+        require "securerandom"
+        config = build_claude_mcp_config(mcp_servers)
+        config_json = JSON.generate(config)
+        if @executor.is_a?(DockerCommandExecutor)
+          # When running inside a Docker container, write the config file
+          # inside the container so the CLI process can read it.
+          # Track the path so cleanup_mcp_tempfiles! can remove it after execution.
+          container_path = "/tmp/agent_harness_mcp_#{SecureRandom.hex(8)}.json"
+          result = @executor.execute(
+            ["sh", "-c", "cat > #{container_path}"],
+            stdin_data: config_json,
+            timeout: 5
+          )
+          unless result.success?
+            raise McpConfigurationError,
+              "Failed to write MCP config inside container: #{result.stderr}"
+          end
+          @mcp_docker_config_paths ||= []
+          @mcp_docker_config_paths << container_path
+          container_path
+        else
+          dir = working_dir || Dir.tmpdir
+          file = Tempfile.new(["agent_harness_mcp_", ".json"], dir)
+          file.write(config_json)
+          file.close
+          # Hold a reference so the Tempfile is not garbage-collected (and
+          # therefore deleted) before the CLI process reads it.
+          # Cleaned up by cleanup_mcp_tempfiles! after execution.
+          @mcp_config_tempfiles ||= []
+          @mcp_config_tempfiles << file
+          file.path
+        end
+      end
+      def build_claude_mcp_config(mcp_servers)
+        servers = {}
+        mcp_servers.each do |server|
+          h = if server.stdio?
+            entry = {command: server.command.first}
+            remaining_args = server.command[1..] + server.args
+            entry[:args] = remaining_args unless remaining_args.empty?
+            entry
+          else
+            {url: server.url}
+          end
+          h[:env] = server.env unless server.env.empty?
+          servers[server.name] = h
+        end
+        {mcpServers: servers}
+      end
+      def cleanup_mcp_tempfiles!
+        if @mcp_config_tempfiles
+          @mcp_config_tempfiles.each do |file|
+            file.close unless file.closed?
+            file.unlink
+          rescue
+            nil
+          end
+          @mcp_config_tempfiles = nil
+        end
+        if @mcp_docker_config_paths
+          @mcp_docker_config_paths.each do |path|
+            @executor.execute(["rm", "-f", path], timeout: 5)
+          rescue
+            nil
+          end
+          @mcp_docker_config_paths = nil
+        end
+      end
       def log_debug(action, **context)
         @logger&.debug("[AgentHarness::Anthropic] #{action}: #{context.inspect}")
       end

data/lib/agent_harness/providers/base.rb CHANGED Viewed

@@ -32,6 +32,34 @@ module AgentHarness
     class Base
       include Adapter
+      # Common error patterns shared across providers that use standard
+      # HTTP-style error responses. Providers with unique patterns (e.g.
+      # Anthropic, GitHub Copilot) override error_patterns entirely.
+      COMMON_ERROR_PATTERNS = {
+        rate_limited: [
+          /rate.?limit/i,
+          /too.?many.?requests/i,
+          /429/
+        ],
+        auth_expired: [
+          /invalid.*api.*key/i,
+          /unauthorized/i,
+          /authentication/i
+        ],
+        quota_exceeded: [
+          /quota.*exceeded/i,
+          /insufficient.*quota/i,
+          /billing/i
+        ],
+        transient: [
+          /timeout/i,
+          /connection.*error/i,
+          /service.*unavailable/i,
+          /503/,
+          /502/
+        ]
+      }.tap { |patterns| patterns.each_value(&:freeze) }.freeze
       attr_reader :config, :logger
       attr_accessor :executor
@@ -63,6 +91,10 @@ module AgentHarness
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Normalize and validate MCP servers
+        options = normalize_mcp_servers(options)
+        validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
         # Build command
         command = build_command(prompt, options)
@@ -83,6 +115,8 @@ module AgentHarness
         log_debug("send_message_complete", duration: duration, tokens: response.tokens)
         response
+      rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
+        raise
       rescue => e
         handle_error(e, prompt: prompt, options: options)
       end
@@ -101,6 +135,16 @@ module AgentHarness
         name.capitalize
       end
+      # Whether the provider is running inside a sandboxed (Docker) environment
+      #
+      # Providers can use this to adjust execution flags, e.g. skipping
+      # nested sandboxing when already inside a container.
+      #
+      # @return [Boolean] true when the executor is a DockerCommandExecutor
+      def sandboxed_environment?
+        @executor.is_a?(DockerCommandExecutor)
+      end
       protected
       # Build CLI command - override in subclasses
@@ -122,17 +166,39 @@ module AgentHarness
       # Parse CLI output into Response - override in subclasses
       #
+      # Combines stdout and stderr for error classification so that
+      # provider-specific error messages are captured regardless of
+      # which stream they appear on.
+      #
       # @param result [CommandExecutor::Result] execution result
       # @param duration [Float] execution duration
       # @return [Response] parsed response
       def parse_response(result, duration:)
+        error = nil
+        # Use execution_semantics[:legitimate_exit_codes] so providers can
+        # declare additional non-error exit codes beyond zero.
+        legitimate = execution_semantics[:legitimate_exit_codes] || [0]
+        unless legitimate.include?(result.exit_code)
+          # Concatenate non-empty streams so error patterns can match
+          # regardless of which stream the provider writes to.
+          combined = [result.stderr, result.stdout]
+            .map { |s| s.to_s.strip }
+            .reject(&:empty?)
+            .join("\n")
+          error = combined unless combined.empty?
+        end
         Response.new(
           output: result.stdout,
           exit_code: result.exit_code,
           duration: duration,
           provider: self.class.provider_name,
           model: @config.model,
-          error: result.failed? ? result.stderr : nil
+          error: error,
+          metadata: {
+            legitimate_exit_codes: legitimate
+          }
         )
       end
@@ -145,6 +211,38 @@ module AgentHarness
       private
+      def normalize_mcp_servers(options)
+        servers = options[:mcp_servers]
+        return options if servers.nil?
+        unless servers.is_a?(Array)
+          raise McpConfigurationError,
+            "mcp_servers must be an Array of Hash or McpServer, got #{servers.class}"
+        end
+        return options if servers.empty?
+        normalized = servers.map do |server|
+          if server.is_a?(McpServer)
+            server
+          elsif server.is_a?(Hash)
+            McpServer.from_hash(server)
+          else
+            raise McpConfigurationError, "MCP server must be a Hash or McpServer, got #{server.class}"
+          end
+        end
+        # Ensure MCP server names are unique to avoid silent overwrites downstream
+        names = normalized.map(&:name)
+        duplicate_names = names.group_by { |n| n }.select { |_, v| v.size > 1 }.keys
+        unless duplicate_names.empty?
+          raise McpConfigurationError,
+            "Duplicate MCP server names detected: #{duplicate_names.join(", ")}"
+        end
+        options.merge(mcp_servers: normalized)
+      end
       def execute_with_timeout(command, timeout:, env:)
         @executor.execute(command, timeout: timeout, env: env)
       end

data/lib/agent_harness/providers/codex.rb CHANGED Viewed

@@ -67,7 +67,24 @@ module AgentHarness
           tool_use: true,
           json_mode: false,
           mcp: false,
-          dangerous_mode: false
+          dangerous_mode: true
+        }
+      end
+      def dangerous_mode_flags
+        ["--full-auto"]
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: true,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
         }
       end
@@ -81,32 +98,15 @@ module AgentHarness
       end
       def error_patterns
-        {
-          rate_limited: [
-            /rate.?limit/i,
-            /too.?many.?requests/i,
-            /429/
-          ],
-          auth_expired: [
-            /invalid.*api.*key/i,
-            /unauthorized/i,
-            /authentication/i,
-            /401/,
-            /incorrect.*api.*key/i
-          ],
-          quota_exceeded: [
-            /quota.*exceeded/i,
-            /insufficient.*quota/i,
-            /billing/i
-          ],
-          transient: [
-            /timeout/i,
-            /connection.*reset/i,
-            /service.*unavailable/i,
-            /503/,
-            /502/
+        COMMON_ERROR_PATTERNS.merge(
+          auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
+          transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
+          sandbox_failure: [
+            /bwrap.*no permissions/i,
+            /no permissions to create a new namespace/i,
+            /unprivileged.*namespace/i
           ]
-        }
+        )
       end
       def auth_status
@@ -167,9 +167,34 @@ module AgentHarness
       protected
+      def parse_response(result, duration:)
+        response = super
+        if response.success? && sandbox_failure_detected?(result.stderr)
+          return Response.new(
+            output: result.stdout,
+            exit_code: 1,
+            duration: duration,
+            provider: self.class.provider_name,
+            model: @config.model,
+            error: "Sandbox failure detected: #{result.stderr.strip}"
+          )
+        end
+        response
+      end
       def build_command(prompt, options)
         cmd = [self.class.binary_name, "exec"]
+        # When running inside an already-sandboxed Docker container, Codex's
+        # own sandboxing conflicts with the outer sandbox. Use --full-auto to
+        # skip nested sandboxing while keeping full tool access.
+        # Also applies when dangerous_mode is explicitly requested.
+        if sandboxed_environment? || options[:dangerous_mode]
+          cmd += dangerous_mode_flags
+        end
         flags = @config.default_flags
         if flags
           unless flags.is_a?(Array)
@@ -178,6 +203,10 @@ module AgentHarness
           cmd += flags if flags.any?
         end
+        if externally_sandboxed?(options)
+          cmd += sandbox_bypass_flags
+        end
         if options[:session]
           cmd += session_flags(options[:session])
         end
@@ -193,6 +222,24 @@ module AgentHarness
       private
+      def externally_sandboxed?(options)
+        if options.key?(:externally_sandboxed)
+          !!options[:externally_sandboxed]
+        else
+          !!@config.externally_sandboxed
+        end
+      end
+      def sandbox_failure_detected?(stderr)
+        return false if stderr.nil? || stderr.empty?
+        error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
+      end
+      def sandbox_bypass_flags
+        ["--sandbox", "none"]
+      end
       def read_codex_credentials
         path = codex_config_path
         return nil unless File.exist?(path)

data/lib/agent_harness/providers/cursor.rb CHANGED Viewed

@@ -109,6 +109,14 @@ module AgentHarness
         true
       end
+      # Cursor supports MCP for fetching existing server configurations (via
+      # fetch_mcp_servers) but does not support injecting request-time MCP
+      # servers into CLI invocations. Returning an empty list causes
+      # validate_mcp_servers! to raise McpUnsupportedError with a clear message.
+      def supported_mcp_transports
+        []
+      end
       def fetch_mcp_servers
         # Try CLI first, then config file
         fetch_mcp_servers_cli || fetch_mcp_servers_config
@@ -118,6 +126,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :stdin,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: "-p",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [
@@ -142,6 +163,10 @@ module AgentHarness
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Normalize and validate MCP servers (same as Base#send_message)
+        options = normalize_mcp_servers(options)
+        validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
         # Build command (without prompt in args - we send via stdin)
         command = [self.class.binary_name, "-p"]
@@ -162,6 +187,8 @@ module AgentHarness
         log_debug("send_message_complete", duration: duration)
         response
+      rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
+        raise
       rescue => e
         handle_error(e, prompt: prompt, options: options)
       end

data/lib/agent_harness/providers/gemini.rb CHANGED Viewed

@@ -99,6 +99,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [

data/lib/agent_harness/providers/github_copilot.rb CHANGED Viewed

@@ -89,10 +89,6 @@ module AgentHarness
         }
       end
-      def supports_dangerous_mode?
-        true
-      end
       def dangerous_mode_flags
         ["--allow-all-tools"]
       end
@@ -110,6 +106,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           auth_expired: [

data/lib/agent_harness/providers/kilocode.rb CHANGED Viewed

@@ -57,6 +57,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/providers/mistral_vibe.rb CHANGED Viewed

@@ -59,6 +59,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/providers/opencode.rb CHANGED Viewed

@@ -59,6 +59,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/response.rb CHANGED Viewed

@@ -40,9 +40,13 @@ module AgentHarness
     # Check if the response indicates success
     #
-    # @return [Boolean] true if exit_code is 0 and no error
+    # A response is successful when its exit code is among the provider's
+    # legitimate exit codes (defaults to [0]) and no error was detected.
+    #
+    # @return [Boolean] true if exit_code is legitimate and no error
     def success?
-      @exit_code == 0 && @error.nil?
+      legitimate = @metadata[:legitimate_exit_codes] || [0]
+      legitimate.include?(@exit_code) && @error.nil?
     end
     # Check if the response indicates failure

data/lib/agent_harness/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module AgentHarness
-  VERSION = "0.5.3"
+  VERSION = "0.5.5"
 end

data/lib/agent_harness.rb CHANGED Viewed

@@ -137,6 +137,7 @@ end
 # Core components
 require_relative "agent_harness/errors"
+require_relative "agent_harness/mcp_server"
 require_relative "agent_harness/configuration"
 require_relative "agent_harness/command_executor"
 require_relative "agent_harness/docker_command_executor"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: agent-harness
 version: !ruby/object:Gem::Version
-  version: 0.5.3
+  version: 0.5.5
 platform: ruby
 authors:
 - Bart Agapinan
@@ -84,6 +84,7 @@ files:
 - lib/agent_harness/docker_command_executor.rb
 - lib/agent_harness/error_taxonomy.rb
 - lib/agent_harness/errors.rb
+- lib/agent_harness/mcp_server.rb
 - lib/agent_harness/orchestration/circuit_breaker.rb
 - lib/agent_harness/orchestration/conductor.rb
 - lib/agent_harness/orchestration/health_monitor.rb