RubyGems - agent-harness - Versions diffs - 0.5.4 → 0.5.5 - Mend

agent-harness 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +8 -0
data/lib/agent_harness/configuration.rb +3 -1
data/lib/agent_harness/error_taxonomy.rb +5 -0
data/lib/agent_harness/providers/adapter.rb +32 -0
data/lib/agent_harness/providers/aider.rb +20 -0
data/lib/agent_harness/providers/anthropic.rb +13 -4
data/lib/agent_harness/providers/base.rb +61 -1
data/lib/agent_harness/providers/codex.rb +73 -26
data/lib/agent_harness/providers/cursor.rb +13 -0
data/lib/agent_harness/providers/gemini.rb +13 -0
data/lib/agent_harness/providers/github_copilot.rb +13 -4
data/lib/agent_harness/providers/kilocode.rb +17 -0
data/lib/agent_harness/providers/mistral_vibe.rb +17 -0
data/lib/agent_harness/providers/opencode.rb +17 -0
data/lib/agent_harness/response.rb +6 -2
data/lib/agent_harness/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0ac511d4448bf777f9389cdba34c165a3b97ac607cfca1db90f0fd5c9de0a4af
-  data.tar.gz: 946efb4b7f13e36da7b4c3cc8c3efc3722421aa83ccaf9789efdaa4e776d4bc6
+  metadata.gz: dc338c5fc81d4175149d405d494936b68a261a637deda4fc0e4fb7b18944bf67
+  data.tar.gz: aed5c92bc22dadab8826b919e8eabf606bcd7f6bfe0e1d02631c83461056a888
 SHA512:
-  metadata.gz: 7c3afe5167530f2cd4f8b435b5098732a12b3630f4324cbfa002f3983d760e3a41488ae0d565a0a0694d9b08704d74f010845fb39251a8f92782c6c6d01a572e
-  data.tar.gz: 8a9d9706b997b2c8543a45a43cdf476a089eaded7283fe6b3dade1394242f9786c7458b68a2953bcb866611d21a74a360a35d5b1ae3ade40ebe222c819d3c7ce
+  metadata.gz: 1d662f4ae796d88a1a2c2eabce4604a38c4b53b545640b51c16a4b8e370ddf59f40ff57b19dfb46ba96e50b85399b846c9d2379bdc9806bd40aa78b1f18c1f66
+  data.tar.gz: 913df22acc91cd6db4ff2788867dc8337a2f1e94c0a2b2cce483e0af4ec73d2a946fcbb80270968d82c49c55aa375da86c386f4c8472ad2d42664d2bc1242ee6

data/.release-please-manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  ".": "0.5.4"
+  ".": "0.5.5"
 }

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 ## [Unreleased]
+## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
+### Bug Fixes
+* 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
+* 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
 ## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)

data/lib/agent_harness/configuration.rb CHANGED Viewed

@@ -233,7 +233,8 @@ module AgentHarness
   # Provider-specific configuration
   class ProviderConfig
-    attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
+    attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
+      :externally_sandboxed
     attr_reader :name
@@ -246,6 +247,7 @@ module AgentHarness
       @default_flags = []
       @timeout = nil
       @model = nil
+      @externally_sandboxed = false
     end
     # Merge options into this configuration

data/lib/agent_harness/error_taxonomy.rb CHANGED Viewed

@@ -39,6 +39,11 @@ module AgentHarness
         action: :retry_with_backoff,
         retryable: true
       },
+      sandbox_failure: {
+        description: "Sandbox setup failed",
+        action: :escalate,
+        retryable: false
+      },
       unknown: {
         description: "Unknown error",
         action: :retry_with_backoff,

data/lib/agent_harness/providers/adapter.rb CHANGED Viewed

@@ -218,6 +218,38 @@ module AgentHarness
       def health_status
         {healthy: true, message: "OK"}
       end
+      # Execution semantics for this provider
+      #
+      # Returns a hash describing provider-specific execution behavior so
+      # downstream apps do not need to hardcode CLI quirks. This metadata
+      # can be used to select the right flags and interpret output.
+      #
+      # @return [Hash] execution semantics
+      def execution_semantics
+        {
+          prompt_delivery: :arg,       # :arg, :stdin, or :flag
+          output_format: :text,        # :text or :json
+          sandbox_aware: false,        # adjusts behavior inside containers
+          uses_subcommand: false,      # e.g. "codex exec", "opencode run"
+          non_interactive_flag: nil,   # flag to suppress interactive prompts
+          legitimate_exit_codes: [0],  # exit codes that are NOT errors
+          stderr_is_diagnostic: true,  # stderr may contain non-error output
+          parses_rate_limit_reset: false # can extract Retry-After from output
+        }
+      end
+      # Parse a rate-limit reset time from provider output
+      #
+      # Providers that include rate-limit reset information in their error
+      # output can override this to extract it, so the orchestration layer
+      # can schedule retries accurately.
+      #
+      # @param output [String] combined stdout+stderr from the CLI
+      # @return [Time, nil] when the rate limit resets, or nil if unknown
+      def parse_rate_limit_reset(output)
+        nil
+      end
     end
   end
 end

data/lib/agent_harness/providers/aider.rb CHANGED Viewed

@@ -71,6 +71,26 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS.merge(
+          auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
+          transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
+        )
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: "--yes",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def supports_sessions?
         true
       end

data/lib/agent_harness/providers/anthropic.rb CHANGED Viewed

@@ -193,10 +193,6 @@ module AgentHarness
         ["--mcp-config", config_path]
       end
-      def supports_dangerous_mode?
-        true
-      end
       def dangerous_mode_flags
         ["--dangerously-skip-permissions"]
       end
@@ -205,6 +201,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :json,
+          sandbox_aware: true,
+          uses_subcommand: false,
+          non_interactive_flag: "--print",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [

data/lib/agent_harness/providers/base.rb CHANGED Viewed

@@ -32,6 +32,34 @@ module AgentHarness
     class Base
       include Adapter
+      # Common error patterns shared across providers that use standard
+      # HTTP-style error responses. Providers with unique patterns (e.g.
+      # Anthropic, GitHub Copilot) override error_patterns entirely.
+      COMMON_ERROR_PATTERNS = {
+        rate_limited: [
+          /rate.?limit/i,
+          /too.?many.?requests/i,
+          /429/
+        ],
+        auth_expired: [
+          /invalid.*api.*key/i,
+          /unauthorized/i,
+          /authentication/i
+        ],
+        quota_exceeded: [
+          /quota.*exceeded/i,
+          /insufficient.*quota/i,
+          /billing/i
+        ],
+        transient: [
+          /timeout/i,
+          /connection.*error/i,
+          /service.*unavailable/i,
+          /503/,
+          /502/
+        ]
+      }.tap { |patterns| patterns.each_value(&:freeze) }.freeze
       attr_reader :config, :logger
       attr_accessor :executor
@@ -107,6 +135,16 @@ module AgentHarness
         name.capitalize
       end
+      # Whether the provider is running inside a sandboxed (Docker) environment
+      #
+      # Providers can use this to adjust execution flags, e.g. skipping
+      # nested sandboxing when already inside a container.
+      #
+      # @return [Boolean] true when the executor is a DockerCommandExecutor
+      def sandboxed_environment?
+        @executor.is_a?(DockerCommandExecutor)
+      end
       protected
       # Build CLI command - override in subclasses
@@ -128,17 +166,39 @@ module AgentHarness
       # Parse CLI output into Response - override in subclasses
       #
+      # Combines stdout and stderr for error classification so that
+      # provider-specific error messages are captured regardless of
+      # which stream they appear on.
+      #
       # @param result [CommandExecutor::Result] execution result
       # @param duration [Float] execution duration
       # @return [Response] parsed response
       def parse_response(result, duration:)
+        error = nil
+        # Use execution_semantics[:legitimate_exit_codes] so providers can
+        # declare additional non-error exit codes beyond zero.
+        legitimate = execution_semantics[:legitimate_exit_codes] || [0]
+        unless legitimate.include?(result.exit_code)
+          # Concatenate non-empty streams so error patterns can match
+          # regardless of which stream the provider writes to.
+          combined = [result.stderr, result.stdout]
+            .map { |s| s.to_s.strip }
+            .reject(&:empty?)
+            .join("\n")
+          error = combined unless combined.empty?
+        end
         Response.new(
           output: result.stdout,
           exit_code: result.exit_code,
           duration: duration,
           provider: self.class.provider_name,
           model: @config.model,
-          error: result.failed? ? result.stderr : nil
+          error: error,
+          metadata: {
+            legitimate_exit_codes: legitimate
+          }
         )
       end

data/lib/agent_harness/providers/codex.rb CHANGED Viewed

@@ -67,7 +67,24 @@ module AgentHarness
           tool_use: true,
           json_mode: false,
           mcp: false,
-          dangerous_mode: false
+          dangerous_mode: true
+        }
+      end
+      def dangerous_mode_flags
+        ["--full-auto"]
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: true,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
         }
       end
@@ -81,32 +98,15 @@ module AgentHarness
       end
       def error_patterns
-        {
-          rate_limited: [
-            /rate.?limit/i,
-            /too.?many.?requests/i,
-            /429/
-          ],
-          auth_expired: [
-            /invalid.*api.*key/i,
-            /unauthorized/i,
-            /authentication/i,
-            /401/,
-            /incorrect.*api.*key/i
-          ],
-          quota_exceeded: [
-            /quota.*exceeded/i,
-            /insufficient.*quota/i,
-            /billing/i
-          ],
-          transient: [
-            /timeout/i,
-            /connection.*reset/i,
-            /service.*unavailable/i,
-            /503/,
-            /502/
+        COMMON_ERROR_PATTERNS.merge(
+          auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
+          transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
+          sandbox_failure: [
+            /bwrap.*no permissions/i,
+            /no permissions to create a new namespace/i,
+            /unprivileged.*namespace/i
           ]
-        }
+        )
       end
       def auth_status
@@ -167,9 +167,34 @@ module AgentHarness
       protected
+      def parse_response(result, duration:)
+        response = super
+        if response.success? && sandbox_failure_detected?(result.stderr)
+          return Response.new(
+            output: result.stdout,
+            exit_code: 1,
+            duration: duration,
+            provider: self.class.provider_name,
+            model: @config.model,
+            error: "Sandbox failure detected: #{result.stderr.strip}"
+          )
+        end
+        response
+      end
       def build_command(prompt, options)
         cmd = [self.class.binary_name, "exec"]
+        # When running inside an already-sandboxed Docker container, Codex's
+        # own sandboxing conflicts with the outer sandbox. Use --full-auto to
+        # skip nested sandboxing while keeping full tool access.
+        # Also applies when dangerous_mode is explicitly requested.
+        if sandboxed_environment? || options[:dangerous_mode]
+          cmd += dangerous_mode_flags
+        end
         flags = @config.default_flags
         if flags
           unless flags.is_a?(Array)
@@ -178,6 +203,10 @@ module AgentHarness
           cmd += flags if flags.any?
         end
+        if externally_sandboxed?(options)
+          cmd += sandbox_bypass_flags
+        end
         if options[:session]
           cmd += session_flags(options[:session])
         end
@@ -193,6 +222,24 @@ module AgentHarness
       private
+      def externally_sandboxed?(options)
+        if options.key?(:externally_sandboxed)
+          !!options[:externally_sandboxed]
+        else
+          !!@config.externally_sandboxed
+        end
+      end
+      def sandbox_failure_detected?(stderr)
+        return false if stderr.nil? || stderr.empty?
+        error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
+      end
+      def sandbox_bypass_flags
+        ["--sandbox", "none"]
+      end
       def read_codex_credentials
         path = codex_config_path
         return nil unless File.exist?(path)

data/lib/agent_harness/providers/cursor.rb CHANGED Viewed

@@ -126,6 +126,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :stdin,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: "-p",
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [

data/lib/agent_harness/providers/gemini.rb CHANGED Viewed

@@ -99,6 +99,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           rate_limited: [

data/lib/agent_harness/providers/github_copilot.rb CHANGED Viewed

@@ -89,10 +89,6 @@ module AgentHarness
         }
       end
-      def supports_dangerous_mode?
-        true
-      end
       def dangerous_mode_flags
         ["--allow-all-tools"]
       end
@@ -110,6 +106,19 @@ module AgentHarness
         :oauth
       end
+      def execution_semantics
+        {
+          prompt_delivery: :flag,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: false,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       def error_patterns
         {
           auth_expired: [

data/lib/agent_harness/providers/kilocode.rb CHANGED Viewed

@@ -57,6 +57,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/providers/mistral_vibe.rb CHANGED Viewed

@@ -59,6 +59,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/providers/opencode.rb CHANGED Viewed

@@ -59,6 +59,23 @@ module AgentHarness
         }
       end
+      def error_patterns
+        COMMON_ERROR_PATTERNS
+      end
+      def execution_semantics
+        {
+          prompt_delivery: :arg,
+          output_format: :text,
+          sandbox_aware: false,
+          uses_subcommand: true,
+          non_interactive_flag: nil,
+          legitimate_exit_codes: [0],
+          stderr_is_diagnostic: true,
+          parses_rate_limit_reset: false
+        }
+      end
       protected
       def build_command(prompt, options)

data/lib/agent_harness/response.rb CHANGED Viewed

@@ -40,9 +40,13 @@ module AgentHarness
     # Check if the response indicates success
     #
-    # @return [Boolean] true if exit_code is 0 and no error
+    # A response is successful when its exit code is among the provider's
+    # legitimate exit codes (defaults to [0]) and no error was detected.
+    #
+    # @return [Boolean] true if exit_code is legitimate and no error
     def success?
-      @exit_code == 0 && @error.nil?
+      legitimate = @metadata[:legitimate_exit_codes] || [0]
+      legitimate.include?(@exit_code) && @error.nil?
     end
     # Check if the response indicates failure

data/lib/agent_harness/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module AgentHarness
-  VERSION = "0.5.4"
+  VERSION = "0.5.5"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: agent-harness
 version: !ruby/object:Gem::Version
-  version: 0.5.4
+  version: 0.5.5
 platform: ruby
 authors:
 - Bart Agapinan