agent-harness 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ac511d4448bf777f9389cdba34c165a3b97ac607cfca1db90f0fd5c9de0a4af
4
- data.tar.gz: 946efb4b7f13e36da7b4c3cc8c3efc3722421aa83ccaf9789efdaa4e776d4bc6
3
+ metadata.gz: dc338c5fc81d4175149d405d494936b68a261a637deda4fc0e4fb7b18944bf67
4
+ data.tar.gz: aed5c92bc22dadab8826b919e8eabf606bcd7f6bfe0e1d02631c83461056a888
5
5
  SHA512:
6
- metadata.gz: 7c3afe5167530f2cd4f8b435b5098732a12b3630f4324cbfa002f3983d760e3a41488ae0d565a0a0694d9b08704d74f010845fb39251a8f92782c6c6d01a572e
7
- data.tar.gz: 8a9d9706b997b2c8543a45a43cdf476a089eaded7283fe6b3dade1394242f9786c7458b68a2953bcb866611d21a74a360a35d5b1ae3ade40ebe222c819d3c7ce
6
+ metadata.gz: 1d662f4ae796d88a1a2c2eabce4604a38c4b53b545640b51c16a4b8e370ddf59f40ff57b19dfb46ba96e50b85399b846c9d2379bdc9806bd40aa78b1f18c1f66
7
+ data.tar.gz: 913df22acc91cd6db4ff2788867dc8337a2f1e94c0a2b2cce483e0af4ec73d2a946fcbb80270968d82c49c55aa375da86c386f4c8472ad2d42664d2bc1242ee6
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.5.4"
2
+ ".": "0.5.5"
3
3
  }
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
9
+ * 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
10
+
3
11
  ## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
4
12
 
5
13
 
@@ -233,7 +233,8 @@ module AgentHarness
233
233
 
234
234
  # Provider-specific configuration
235
235
  class ProviderConfig
236
- attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
236
+ attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
237
+ :externally_sandboxed
237
238
 
238
239
  attr_reader :name
239
240
 
@@ -246,6 +247,7 @@ module AgentHarness
246
247
  @default_flags = []
247
248
  @timeout = nil
248
249
  @model = nil
250
+ @externally_sandboxed = false
249
251
  end
250
252
 
251
253
  # Merge options into this configuration
@@ -39,6 +39,11 @@ module AgentHarness
39
39
  action: :retry_with_backoff,
40
40
  retryable: true
41
41
  },
42
+ sandbox_failure: {
43
+ description: "Sandbox setup failed",
44
+ action: :escalate,
45
+ retryable: false
46
+ },
42
47
  unknown: {
43
48
  description: "Unknown error",
44
49
  action: :retry_with_backoff,
@@ -218,6 +218,38 @@ module AgentHarness
218
218
  def health_status
219
219
  {healthy: true, message: "OK"}
220
220
  end
221
+
222
+ # Execution semantics for this provider
223
+ #
224
+ # Returns a hash describing provider-specific execution behavior so
225
+ # downstream apps do not need to hardcode CLI quirks. This metadata
226
+ # can be used to select the right flags and interpret output.
227
+ #
228
+ # @return [Hash] execution semantics
229
+ def execution_semantics
230
+ {
231
+ prompt_delivery: :arg, # :arg, :stdin, or :flag
232
+ output_format: :text, # :text or :json
233
+ sandbox_aware: false, # adjusts behavior inside containers
234
+ uses_subcommand: false, # e.g. "codex exec", "opencode run"
235
+ non_interactive_flag: nil, # flag to suppress interactive prompts
236
+ legitimate_exit_codes: [0], # exit codes that are NOT errors
237
+ stderr_is_diagnostic: true, # stderr may contain non-error output
238
+ parses_rate_limit_reset: false # can extract Retry-After from output
239
+ }
240
+ end
241
+
242
+ # Parse a rate-limit reset time from provider output
243
+ #
244
+ # Providers that include rate-limit reset information in their error
245
+ # output can override this to extract it, so the orchestration layer
246
+ # can schedule retries accurately.
247
+ #
248
+ # @param output [String] combined stdout+stderr from the CLI
249
+ # @return [Time, nil] when the rate limit resets, or nil if unknown
250
+ def parse_rate_limit_reset(output)
251
+ nil
252
+ end
221
253
  end
222
254
  end
223
255
  end
@@ -71,6 +71,26 @@ module AgentHarness
71
71
  }
72
72
  end
73
73
 
74
+ def error_patterns
75
+ COMMON_ERROR_PATTERNS.merge(
76
+ auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
77
+ transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
78
+ )
79
+ end
80
+
81
+ def execution_semantics
82
+ {
83
+ prompt_delivery: :flag,
84
+ output_format: :text,
85
+ sandbox_aware: false,
86
+ uses_subcommand: false,
87
+ non_interactive_flag: "--yes",
88
+ legitimate_exit_codes: [0],
89
+ stderr_is_diagnostic: true,
90
+ parses_rate_limit_reset: false
91
+ }
92
+ end
93
+
74
94
  def supports_sessions?
75
95
  true
76
96
  end
@@ -193,10 +193,6 @@ module AgentHarness
193
193
  ["--mcp-config", config_path]
194
194
  end
195
195
 
196
- def supports_dangerous_mode?
197
- true
198
- end
199
-
200
196
  def dangerous_mode_flags
201
197
  ["--dangerously-skip-permissions"]
202
198
  end
@@ -205,6 +201,19 @@ module AgentHarness
205
201
  :oauth
206
202
  end
207
203
 
204
+ def execution_semantics
205
+ {
206
+ prompt_delivery: :arg,
207
+ output_format: :json,
208
+ sandbox_aware: true,
209
+ uses_subcommand: false,
210
+ non_interactive_flag: "--print",
211
+ legitimate_exit_codes: [0],
212
+ stderr_is_diagnostic: true,
213
+ parses_rate_limit_reset: false
214
+ }
215
+ end
216
+
208
217
  def error_patterns
209
218
  {
210
219
  rate_limited: [
@@ -32,6 +32,34 @@ module AgentHarness
32
32
  class Base
33
33
  include Adapter
34
34
 
35
+ # Common error patterns shared across providers that use standard
36
+ # HTTP-style error responses. Providers with unique patterns (e.g.
37
+ # Anthropic, GitHub Copilot) override error_patterns entirely.
38
+ COMMON_ERROR_PATTERNS = {
39
+ rate_limited: [
40
+ /rate.?limit/i,
41
+ /too.?many.?requests/i,
42
+ /429/
43
+ ],
44
+ auth_expired: [
45
+ /invalid.*api.*key/i,
46
+ /unauthorized/i,
47
+ /authentication/i
48
+ ],
49
+ quota_exceeded: [
50
+ /quota.*exceeded/i,
51
+ /insufficient.*quota/i,
52
+ /billing/i
53
+ ],
54
+ transient: [
55
+ /timeout/i,
56
+ /connection.*error/i,
57
+ /service.*unavailable/i,
58
+ /503/,
59
+ /502/
60
+ ]
61
+ }.tap { |patterns| patterns.each_value(&:freeze) }.freeze
62
+
35
63
  attr_reader :config, :logger
36
64
  attr_accessor :executor
37
65
 
@@ -107,6 +135,16 @@ module AgentHarness
107
135
  name.capitalize
108
136
  end
109
137
 
138
+ # Whether the provider is running inside a sandboxed (Docker) environment
139
+ #
140
+ # Providers can use this to adjust execution flags, e.g. skipping
141
+ # nested sandboxing when already inside a container.
142
+ #
143
+ # @return [Boolean] true when the executor is a DockerCommandExecutor
144
+ def sandboxed_environment?
145
+ @executor.is_a?(DockerCommandExecutor)
146
+ end
147
+
110
148
  protected
111
149
 
112
150
  # Build CLI command - override in subclasses
@@ -128,17 +166,39 @@ module AgentHarness
128
166
 
129
167
  # Parse CLI output into Response - override in subclasses
130
168
  #
169
+ # Combines stdout and stderr for error classification so that
170
+ # provider-specific error messages are captured regardless of
171
+ # which stream they appear on.
172
+ #
131
173
  # @param result [CommandExecutor::Result] execution result
132
174
  # @param duration [Float] execution duration
133
175
  # @return [Response] parsed response
134
176
  def parse_response(result, duration:)
177
+ error = nil
178
+ # Use execution_semantics[:legitimate_exit_codes] so providers can
179
+ # declare additional non-error exit codes beyond zero.
180
+ legitimate = execution_semantics[:legitimate_exit_codes] || [0]
181
+ unless legitimate.include?(result.exit_code)
182
+ # Concatenate non-empty streams so error patterns can match
183
+ # regardless of which stream the provider writes to.
184
+ combined = [result.stderr, result.stdout]
185
+ .map { |s| s.to_s.strip }
186
+ .reject(&:empty?)
187
+ .join("\n")
188
+
189
+ error = combined unless combined.empty?
190
+ end
191
+
135
192
  Response.new(
136
193
  output: result.stdout,
137
194
  exit_code: result.exit_code,
138
195
  duration: duration,
139
196
  provider: self.class.provider_name,
140
197
  model: @config.model,
141
- error: result.failed? ? result.stderr : nil
198
+ error: error,
199
+ metadata: {
200
+ legitimate_exit_codes: legitimate
201
+ }
142
202
  )
143
203
  end
144
204
 
@@ -67,7 +67,24 @@ module AgentHarness
67
67
  tool_use: true,
68
68
  json_mode: false,
69
69
  mcp: false,
70
- dangerous_mode: false
70
+ dangerous_mode: true
71
+ }
72
+ end
73
+
74
+ def dangerous_mode_flags
75
+ ["--full-auto"]
76
+ end
77
+
78
+ def execution_semantics
79
+ {
80
+ prompt_delivery: :arg,
81
+ output_format: :text,
82
+ sandbox_aware: true,
83
+ uses_subcommand: true,
84
+ non_interactive_flag: nil,
85
+ legitimate_exit_codes: [0],
86
+ stderr_is_diagnostic: true,
87
+ parses_rate_limit_reset: false
71
88
  }
72
89
  end
73
90
 
@@ -81,32 +98,15 @@ module AgentHarness
81
98
  end
82
99
 
83
100
  def error_patterns
84
- {
85
- rate_limited: [
86
- /rate.?limit/i,
87
- /too.?many.?requests/i,
88
- /429/
89
- ],
90
- auth_expired: [
91
- /invalid.*api.*key/i,
92
- /unauthorized/i,
93
- /authentication/i,
94
- /401/,
95
- /incorrect.*api.*key/i
96
- ],
97
- quota_exceeded: [
98
- /quota.*exceeded/i,
99
- /insufficient.*quota/i,
100
- /billing/i
101
- ],
102
- transient: [
103
- /timeout/i,
104
- /connection.*reset/i,
105
- /service.*unavailable/i,
106
- /503/,
107
- /502/
101
+ COMMON_ERROR_PATTERNS.merge(
102
+ auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
103
+ transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
104
+ sandbox_failure: [
105
+ /bwrap.*no permissions/i,
106
+ /no permissions to create a new namespace/i,
107
+ /unprivileged.*namespace/i
108
108
  ]
109
- }
109
+ )
110
110
  end
111
111
 
112
112
  def auth_status
@@ -167,9 +167,34 @@ module AgentHarness
167
167
 
168
168
  protected
169
169
 
170
+ def parse_response(result, duration:)
171
+ response = super
172
+
173
+ if response.success? && sandbox_failure_detected?(result.stderr)
174
+ return Response.new(
175
+ output: result.stdout,
176
+ exit_code: 1,
177
+ duration: duration,
178
+ provider: self.class.provider_name,
179
+ model: @config.model,
180
+ error: "Sandbox failure detected: #{result.stderr.strip}"
181
+ )
182
+ end
183
+
184
+ response
185
+ end
186
+
170
187
  def build_command(prompt, options)
171
188
  cmd = [self.class.binary_name, "exec"]
172
189
 
190
+ # When running inside an already-sandboxed Docker container, Codex's
191
+ # own sandboxing conflicts with the outer sandbox. Use --full-auto to
192
+ # skip nested sandboxing while keeping full tool access.
193
+ # Also applies when dangerous_mode is explicitly requested.
194
+ if sandboxed_environment? || options[:dangerous_mode]
195
+ cmd += dangerous_mode_flags
196
+ end
197
+
173
198
  flags = @config.default_flags
174
199
  if flags
175
200
  unless flags.is_a?(Array)
@@ -178,6 +203,10 @@ module AgentHarness
178
203
  cmd += flags if flags.any?
179
204
  end
180
205
 
206
+ if externally_sandboxed?(options)
207
+ cmd += sandbox_bypass_flags
208
+ end
209
+
181
210
  if options[:session]
182
211
  cmd += session_flags(options[:session])
183
212
  end
@@ -193,6 +222,24 @@ module AgentHarness
193
222
 
194
223
  private
195
224
 
225
+ def externally_sandboxed?(options)
226
+ if options.key?(:externally_sandboxed)
227
+ !!options[:externally_sandboxed]
228
+ else
229
+ !!@config.externally_sandboxed
230
+ end
231
+ end
232
+
233
+ def sandbox_failure_detected?(stderr)
234
+ return false if stderr.nil? || stderr.empty?
235
+
236
+ error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
237
+ end
238
+
239
+ def sandbox_bypass_flags
240
+ ["--sandbox", "none"]
241
+ end
242
+
196
243
  def read_codex_credentials
197
244
  path = codex_config_path
198
245
  return nil unless File.exist?(path)
@@ -126,6 +126,19 @@ module AgentHarness
126
126
  :oauth
127
127
  end
128
128
 
129
+ def execution_semantics
130
+ {
131
+ prompt_delivery: :stdin,
132
+ output_format: :text,
133
+ sandbox_aware: false,
134
+ uses_subcommand: false,
135
+ non_interactive_flag: "-p",
136
+ legitimate_exit_codes: [0],
137
+ stderr_is_diagnostic: true,
138
+ parses_rate_limit_reset: false
139
+ }
140
+ end
141
+
129
142
  def error_patterns
130
143
  {
131
144
  rate_limited: [
@@ -99,6 +99,19 @@ module AgentHarness
99
99
  :oauth
100
100
  end
101
101
 
102
+ def execution_semantics
103
+ {
104
+ prompt_delivery: :flag,
105
+ output_format: :text,
106
+ sandbox_aware: false,
107
+ uses_subcommand: false,
108
+ non_interactive_flag: nil,
109
+ legitimate_exit_codes: [0],
110
+ stderr_is_diagnostic: true,
111
+ parses_rate_limit_reset: false
112
+ }
113
+ end
114
+
102
115
  def error_patterns
103
116
  {
104
117
  rate_limited: [
@@ -89,10 +89,6 @@ module AgentHarness
89
89
  }
90
90
  end
91
91
 
92
- def supports_dangerous_mode?
93
- true
94
- end
95
-
96
92
  def dangerous_mode_flags
97
93
  ["--allow-all-tools"]
98
94
  end
@@ -110,6 +106,19 @@ module AgentHarness
110
106
  :oauth
111
107
  end
112
108
 
109
+ def execution_semantics
110
+ {
111
+ prompt_delivery: :flag,
112
+ output_format: :text,
113
+ sandbox_aware: false,
114
+ uses_subcommand: false,
115
+ non_interactive_flag: nil,
116
+ legitimate_exit_codes: [0],
117
+ stderr_is_diagnostic: true,
118
+ parses_rate_limit_reset: false
119
+ }
120
+ end
121
+
113
122
  def error_patterns
114
123
  {
115
124
  auth_expired: [
@@ -57,6 +57,23 @@ module AgentHarness
57
57
  }
58
58
  end
59
59
 
60
+ def error_patterns
61
+ COMMON_ERROR_PATTERNS
62
+ end
63
+
64
+ def execution_semantics
65
+ {
66
+ prompt_delivery: :arg,
67
+ output_format: :text,
68
+ sandbox_aware: false,
69
+ uses_subcommand: true,
70
+ non_interactive_flag: nil,
71
+ legitimate_exit_codes: [0],
72
+ stderr_is_diagnostic: true,
73
+ parses_rate_limit_reset: false
74
+ }
75
+ end
76
+
60
77
  protected
61
78
 
62
79
  def build_command(prompt, options)
@@ -59,6 +59,23 @@ module AgentHarness
59
59
  }
60
60
  end
61
61
 
62
+ def error_patterns
63
+ COMMON_ERROR_PATTERNS
64
+ end
65
+
66
+ def execution_semantics
67
+ {
68
+ prompt_delivery: :arg,
69
+ output_format: :text,
70
+ sandbox_aware: false,
71
+ uses_subcommand: true,
72
+ non_interactive_flag: nil,
73
+ legitimate_exit_codes: [0],
74
+ stderr_is_diagnostic: true,
75
+ parses_rate_limit_reset: false
76
+ }
77
+ end
78
+
62
79
  protected
63
80
 
64
81
  def build_command(prompt, options)
@@ -59,6 +59,23 @@ module AgentHarness
59
59
  }
60
60
  end
61
61
 
62
+ def error_patterns
63
+ COMMON_ERROR_PATTERNS
64
+ end
65
+
66
+ def execution_semantics
67
+ {
68
+ prompt_delivery: :arg,
69
+ output_format: :text,
70
+ sandbox_aware: false,
71
+ uses_subcommand: true,
72
+ non_interactive_flag: nil,
73
+ legitimate_exit_codes: [0],
74
+ stderr_is_diagnostic: true,
75
+ parses_rate_limit_reset: false
76
+ }
77
+ end
78
+
62
79
  protected
63
80
 
64
81
  def build_command(prompt, options)
@@ -40,9 +40,13 @@ module AgentHarness
40
40
 
41
41
  # Check if the response indicates success
42
42
  #
43
- # @return [Boolean] true if exit_code is 0 and no error
43
+ # A response is successful when its exit code is among the provider's
44
+ # legitimate exit codes (defaults to [0]) and no error was detected.
45
+ #
46
+ # @return [Boolean] true if exit_code is legitimate and no error
44
47
  def success?
45
- @exit_code == 0 && @error.nil?
48
+ legitimate = @metadata[:legitimate_exit_codes] || [0]
49
+ legitimate.include?(@exit_code) && @error.nil?
46
50
  end
47
51
 
48
52
  # Check if the response indicates failure
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AgentHarness
4
- VERSION = "0.5.4"
4
+ VERSION = "0.5.5"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agent-harness
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bart Agapinan