agent-harness 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ac511d4448bf777f9389cdba34c165a3b97ac607cfca1db90f0fd5c9de0a4af
4
- data.tar.gz: 946efb4b7f13e36da7b4c3cc8c3efc3722421aa83ccaf9789efdaa4e776d4bc6
3
+ metadata.gz: 946d7c425aff8c96536bc30def7e0abdba7ff7d82020f4941674a8c93be63526
4
+ data.tar.gz: ffc9d707f89ab60bf9cc59b4e5ccbcd2570a3aaa07e97c5a10bfb731f5dc07a0
5
5
  SHA512:
6
- metadata.gz: 7c3afe5167530f2cd4f8b435b5098732a12b3630f4324cbfa002f3983d760e3a41488ae0d565a0a0694d9b08704d74f010845fb39251a8f92782c6c6d01a572e
7
- data.tar.gz: 8a9d9706b997b2c8543a45a43cdf476a089eaded7283fe6b3dade1394242f9786c7458b68a2953bcb866611d21a74a360a35d5b1ae3ade40ebe222c819d3c7ce
6
+ metadata.gz: c7b0dcef83c7a31be09a87884211a8ba03c0c93fb845e666423cd17eb70a358dd122db7e57ce04271fa5e114013adbc0007394211286c23b03cfa6a2a600c68f
7
+ data.tar.gz: a8f14eb24039afd0a93ec1eb064b59ebbe6d03aafd61ab1859c64729d2253140afebacc5c8ee761578337c671c074425784c33676808534cdf7b8ad809a2df32
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.5.4"
2
+ ".": "0.5.6"
3
3
  }
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.5.6](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.5...agent-harness/v0.5.6) (2026-03-30)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * 53: Expose provider configuration capabilities for app-driven provider setup UIs ([#57](https://github.com/viamin/agent-harness/issues/57)) ([6aa6a02](https://github.com/viamin/agent-harness/commit/6aa6a02da14feefcad8761302d5fa8b5642a57fe))
9
+ * 54: Add per-request provider runtime overrides for CLI-backed providers ([#55](https://github.com/viamin/agent-harness/issues/55)) ([407467a](https://github.com/viamin/agent-harness/commit/407467a6965a01494e2c4590680b2bb9ddac6dce))
10
+
11
+ ## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
12
+
13
+
14
+ ### Bug Fixes
15
+
16
+ * 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
17
+ * 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
18
+
3
19
  ## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
4
20
 
5
21
 
@@ -233,7 +233,8 @@ module AgentHarness
233
233
 
234
234
  # Provider-specific configuration
235
235
  class ProviderConfig
236
- attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
236
+ attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
237
+ :externally_sandboxed
237
238
 
238
239
  attr_reader :name
239
240
 
@@ -246,6 +247,7 @@ module AgentHarness
246
247
  @default_flags = []
247
248
  @timeout = nil
248
249
  @model = nil
250
+ @externally_sandboxed = false
249
251
  end
250
252
 
251
253
  # Merge options into this configuration
@@ -39,6 +39,11 @@ module AgentHarness
39
39
  action: :retry_with_backoff,
40
40
  retryable: true
41
41
  },
42
+ sandbox_failure: {
43
+ description: "Sandbox setup failed",
44
+ action: :escalate,
45
+ retryable: false
46
+ },
42
47
  unknown: {
43
48
  description: "Unknown error",
44
49
  action: :retry_with_backoff,
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ # Normalized runtime configuration for per-request provider overrides.
5
+ #
6
+ # ProviderRuntime lets callers pass a single, provider-agnostic payload
7
+ # into +send_message+ that each provider materializes into CLI args, env
8
+ # vars, or config files as needed.
9
+ #
10
+ # @example Routing OpenCode through OpenRouter with a specific model
11
+ # runtime = AgentHarness::ProviderRuntime.new(
12
+ # model: "anthropic/claude-opus-4.1",
13
+ # base_url: "https://openrouter.ai/api/v1",
14
+ # api_provider: "openrouter",
15
+ # env: { "OPENROUTER_API_KEY" => "sk-..." }
16
+ # )
17
+ # provider.send_message(prompt: "Hello", provider_runtime: runtime)
18
+ #
19
+ # @example Passing a Hash (auto-coerced by Base#send_message)
20
+ # provider.send_message(
21
+ # prompt: "Hello",
22
+ # provider_runtime: {
23
+ # model: "openai/gpt-5.3-codex",
24
+ # base_url: "https://openrouter.ai/api/v1"
25
+ # }
26
+ # )
27
+ class ProviderRuntime
28
+ attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata
29
+
30
+ # @param model [String, nil] model identifier override
31
+ # @param base_url [String, nil] upstream API base URL override
32
+ # @param api_provider [String, nil] API-compatible backend name
33
+ # @param env [Hash<String,String>] extra environment variables for the subprocess
34
+ # @param flags [Array<String>] extra CLI flags to append
35
+ # @param metadata [Hash] arbitrary provider-specific data
36
+ def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], metadata: {})
37
+ @model = model
38
+ @base_url = base_url
39
+ @api_provider = api_provider
40
+
41
+ env_hash = env || {}
42
+ unless env_hash.is_a?(Hash)
43
+ raise ArgumentError, "env must be a Hash (got #{env_hash.class})"
44
+ end
45
+ normalized_env = env_hash.each_with_object({}) do |(key, value), acc|
46
+ string_key = key.to_s
47
+ unless value.is_a?(String)
48
+ raise ArgumentError, "env value for #{string_key.inspect} must be a String (got #{value.class})"
49
+ end
50
+ acc[string_key] = value
51
+ end
52
+ @env = normalized_env.freeze
53
+
54
+ normalized_flags = flags || []
55
+ unless normalized_flags.is_a?(Array)
56
+ raise ArgumentError, "flags must be an Array (got #{normalized_flags.class})"
57
+ end
58
+ normalized_flags = normalized_flags.dup
59
+ normalized_flags.each_with_index do |flag, index|
60
+ unless flag.is_a?(String)
61
+ raise ArgumentError,
62
+ "flags must be an Array of Strings; invalid element at index #{index}: #{flag.inspect} (#{flag.class})"
63
+ end
64
+ end
65
+ @flags = normalized_flags.freeze
66
+
67
+ metadata_hash = metadata || {}
68
+ unless metadata_hash.is_a?(Hash)
69
+ raise ArgumentError, "metadata must be a Hash (got #{metadata_hash.class})"
70
+ end
71
+ @metadata = metadata_hash.dup.freeze
72
+
73
+ freeze
74
+ end
75
+
76
+ # Build a ProviderRuntime from a Hash.
77
+ #
78
+ # @param hash [Hash] runtime attributes
79
+ # @return [ProviderRuntime]
80
+ def self.from_hash(hash)
81
+ raise ArgumentError, "expected a Hash, got #{hash.class}" unless hash.is_a?(Hash)
82
+
83
+ new(
84
+ model: hash[:model] || hash["model"],
85
+ base_url: hash[:base_url] || hash["base_url"],
86
+ api_provider: hash[:api_provider] || hash["api_provider"],
87
+ env: hash[:env] || hash["env"] || {},
88
+ flags: hash[:flags] || hash["flags"] || [],
89
+ metadata: hash[:metadata] || hash["metadata"] || {}
90
+ )
91
+ end
92
+
93
+ # Coerce a value into a ProviderRuntime.
94
+ #
95
+ # @param value [ProviderRuntime, Hash, nil] input
96
+ # @return [ProviderRuntime, nil]
97
+ def self.wrap(value)
98
+ case value
99
+ when ProviderRuntime then value
100
+ when Hash then from_hash(value)
101
+ when nil then nil
102
+ else
103
+ raise ArgumentError, "Cannot coerce #{value.class} into ProviderRuntime"
104
+ end
105
+ end
106
+
107
+ # Whether any meaningful overrides are present.
108
+ #
109
+ # @return [Boolean]
110
+ def empty?
111
+ model.nil? && base_url.nil? && api_provider.nil? &&
112
+ env.empty? && flags.empty? && metadata.empty?
113
+ end
114
+ end
115
+ end
@@ -75,11 +75,32 @@ module AgentHarness
75
75
  # @option options [Integer] :timeout timeout in seconds
76
76
  # @option options [String] :session session identifier
77
77
  # @option options [Boolean] :dangerous_mode skip permission checks
78
+ # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
79
+ # runtime overrides (model, base_url, api_provider, env, flags, metadata).
80
+ # For providers that delegate to Providers::Base#send_message, a plain Hash
81
+ # is automatically coerced into a ProviderRuntime. Providers that override
82
+ # #send_message directly are responsible for handling this option.
78
83
  # @return [Response] response object with output and metadata
79
84
  def send_message(prompt:, **options)
80
85
  raise NotImplementedError, "#{self.class} must implement #send_message"
81
86
  end
82
87
 
88
+ # Provider configuration schema for app-driven setup UIs
89
+ #
90
+ # Returns metadata describing the configurable fields, supported
91
+ # authentication modes, and backend compatibility for this provider.
92
+ # Applications use this to build generic provider-entry forms without
93
+ # hardcoding provider-specific knowledge.
94
+ #
95
+ # @return [Hash] with :fields, :auth_modes, :openai_compatible keys
96
+ def configuration_schema
97
+ {
98
+ fields: [],
99
+ auth_modes: [auth_type],
100
+ openai_compatible: false
101
+ }
102
+ end
103
+
83
104
  # Provider capabilities
84
105
  #
85
106
  # @return [Hash] capability flags
@@ -218,6 +239,38 @@ module AgentHarness
218
239
  def health_status
219
240
  {healthy: true, message: "OK"}
220
241
  end
242
+
243
+ # Execution semantics for this provider
244
+ #
245
+ # Returns a hash describing provider-specific execution behavior so
246
+ # downstream apps do not need to hardcode CLI quirks. This metadata
247
+ # can be used to select the right flags and interpret output.
248
+ #
249
+ # @return [Hash] execution semantics
250
+ def execution_semantics
251
+ {
252
+ prompt_delivery: :arg, # :arg, :stdin, or :flag
253
+ output_format: :text, # :text or :json
254
+ sandbox_aware: false, # adjusts behavior inside containers
255
+ uses_subcommand: false, # e.g. "codex exec", "opencode run"
256
+ non_interactive_flag: nil, # flag to suppress interactive prompts
257
+ legitimate_exit_codes: [0], # exit codes that are NOT errors
258
+ stderr_is_diagnostic: true, # stderr may contain non-error output
259
+ parses_rate_limit_reset: false # can extract Retry-After from output
260
+ }
261
+ end
262
+
263
+ # Parse a rate-limit reset time from provider output
264
+ #
265
+ # Providers that include rate-limit reset information in their error
266
+ # output can override this to extract it, so the orchestration layer
267
+ # can schedule retries accurately.
268
+ #
269
+ # @param output [String] combined stdout+stderr from the CLI
270
+ # @return [Time, nil] when the rate limit resets, or nil if unknown
271
+ def parse_rate_limit_reset(output)
272
+ nil
273
+ end
221
274
  end
222
275
  end
223
276
  end
@@ -59,6 +59,23 @@ module AgentHarness
59
59
  "Aider"
60
60
  end
61
61
 
62
+ def configuration_schema
63
+ {
64
+ fields: [
65
+ {
66
+ name: :model,
67
+ type: :string,
68
+ label: "Model",
69
+ required: false,
70
+ hint: "Model identifier (supports OpenAI, Anthropic, and other model names)",
71
+ accepts_arbitrary: true
72
+ }
73
+ ],
74
+ auth_modes: [:api_key],
75
+ openai_compatible: false
76
+ }
77
+ end
78
+
62
79
  def capabilities
63
80
  {
64
81
  streaming: true,
@@ -71,6 +88,26 @@ module AgentHarness
71
88
  }
72
89
  end
73
90
 
91
+ def error_patterns
92
+ COMMON_ERROR_PATTERNS.merge(
93
+ auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
94
+ transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
95
+ )
96
+ end
97
+
98
+ def execution_semantics
99
+ {
100
+ prompt_delivery: :flag,
101
+ output_format: :text,
102
+ sandbox_aware: false,
103
+ uses_subcommand: false,
104
+ non_interactive_flag: "--yes",
105
+ legitimate_exit_codes: [0],
106
+ stderr_is_diagnostic: true,
107
+ parses_rate_limit_reset: false
108
+ }
109
+ end
110
+
74
111
  def supports_sessions?
75
112
  true
76
113
  end
@@ -160,6 +160,23 @@ module AgentHarness
160
160
  "Anthropic Claude CLI"
161
161
  end
162
162
 
163
+ def configuration_schema
164
+ {
165
+ fields: [
166
+ {
167
+ name: :model,
168
+ type: :string,
169
+ label: "Model",
170
+ required: false,
171
+ hint: "Claude model to use (e.g. claude-3-5-sonnet-20241022)",
172
+ accepts_arbitrary: false
173
+ }
174
+ ],
175
+ auth_modes: [:oauth],
176
+ openai_compatible: false
177
+ }
178
+ end
179
+
163
180
  def capabilities
164
181
  {
165
182
  streaming: true,
@@ -193,10 +210,6 @@ module AgentHarness
193
210
  ["--mcp-config", config_path]
194
211
  end
195
212
 
196
- def supports_dangerous_mode?
197
- true
198
- end
199
-
200
213
  def dangerous_mode_flags
201
214
  ["--dangerously-skip-permissions"]
202
215
  end
@@ -205,6 +218,19 @@ module AgentHarness
205
218
  :oauth
206
219
  end
207
220
 
221
+ def execution_semantics
222
+ {
223
+ prompt_delivery: :arg,
224
+ output_format: :json,
225
+ sandbox_aware: true,
226
+ uses_subcommand: false,
227
+ non_interactive_flag: "--print",
228
+ legitimate_exit_codes: [0],
229
+ stderr_is_diagnostic: true,
230
+ parses_rate_limit_reset: false
231
+ }
232
+ end
233
+
208
234
  def error_patterns
209
235
  {
210
236
  rate_limited: [
@@ -32,6 +32,34 @@ module AgentHarness
32
32
  class Base
33
33
  include Adapter
34
34
 
35
+ # Common error patterns shared across providers that use standard
36
+ # HTTP-style error responses. Providers with unique patterns (e.g.
37
+ # Anthropic, GitHub Copilot) override error_patterns entirely.
38
+ COMMON_ERROR_PATTERNS = {
39
+ rate_limited: [
40
+ /rate.?limit/i,
41
+ /too.?many.?requests/i,
42
+ /429/
43
+ ],
44
+ auth_expired: [
45
+ /invalid.*api.*key/i,
46
+ /unauthorized/i,
47
+ /authentication/i
48
+ ],
49
+ quota_exceeded: [
50
+ /quota.*exceeded/i,
51
+ /insufficient.*quota/i,
52
+ /billing/i
53
+ ],
54
+ transient: [
55
+ /timeout/i,
56
+ /connection.*error/i,
57
+ /service.*unavailable/i,
58
+ /503/,
59
+ /502/
60
+ ]
61
+ }.tap { |patterns| patterns.each_value(&:freeze) }.freeze
62
+
35
63
  attr_reader :config, :logger
36
64
  attr_accessor :executor
37
65
 
@@ -59,10 +87,16 @@ module AgentHarness
59
87
  #
60
88
  # @param prompt [String] the prompt to send
61
89
  # @param options [Hash] additional options
90
+ # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
91
+ # runtime overrides (model, base_url, api_provider, env, flags, metadata).
92
+ # A plain Hash is automatically coerced into a ProviderRuntime.
62
93
  # @return [Response] the response
63
94
  def send_message(prompt:, **options)
64
95
  log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
65
96
 
97
+ # Coerce provider_runtime from Hash if needed
98
+ options = normalize_provider_runtime(options)
99
+
66
100
  # Normalize and validate MCP servers
67
101
  options = normalize_mcp_servers(options)
68
102
  validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
@@ -80,6 +114,23 @@ module AgentHarness
80
114
 
81
115
  # Parse response
82
116
  response = parse_response(result, duration: duration)
117
+ runtime = options[:provider_runtime]
118
+ # Runtime model is a per-request override and always takes precedence
119
+ # over both the config-level model and whatever parse_response returned.
120
+ # This is intentional: callers use runtime overrides to route a single
121
+ # provider instance through different backends on each request.
122
+ if runtime&.model
123
+ response = Response.new(
124
+ output: response.output,
125
+ exit_code: response.exit_code,
126
+ duration: response.duration,
127
+ provider: response.provider,
128
+ model: runtime.model,
129
+ tokens: response.tokens,
130
+ metadata: response.metadata,
131
+ error: response.error
132
+ )
133
+ end
83
134
 
84
135
  # Track tokens
85
136
  track_tokens(response) if response.tokens
@@ -107,6 +158,16 @@ module AgentHarness
107
158
  name.capitalize
108
159
  end
109
160
 
161
+ # Whether the provider is running inside a sandboxed (Docker) environment
162
+ #
163
+ # Providers can use this to adjust execution flags, e.g. skipping
164
+ # nested sandboxing when already inside a container.
165
+ #
166
+ # @return [Boolean] true when the executor is a DockerCommandExecutor
167
+ def sandboxed_environment?
168
+ @executor.is_a?(DockerCommandExecutor)
169
+ end
170
+
110
171
  protected
111
172
 
112
173
  # Build CLI command - override in subclasses
@@ -120,25 +181,53 @@ module AgentHarness
120
181
 
121
182
  # Build environment variables - override in subclasses
122
183
  #
184
+ # Provider subclasses should call +super+ and merge their own env vars
185
+ # so that ProviderRuntime env overrides are always included.
186
+ #
123
187
  # @param options [Hash] options
124
188
  # @return [Hash] environment variables
125
189
  def build_env(options)
126
- {}
190
+ runtime = options[:provider_runtime]
191
+ return {} unless runtime
192
+
193
+ runtime.env.dup
127
194
  end
128
195
 
129
196
  # Parse CLI output into Response - override in subclasses
130
197
  #
198
+ # Combines stdout and stderr for error classification so that
199
+ # provider-specific error messages are captured regardless of
200
+ # which stream they appear on.
201
+ #
131
202
  # @param result [CommandExecutor::Result] execution result
132
203
  # @param duration [Float] execution duration
133
204
  # @return [Response] parsed response
134
205
  def parse_response(result, duration:)
206
+ error = nil
207
+ # Use execution_semantics[:legitimate_exit_codes] so providers can
208
+ # declare additional non-error exit codes beyond zero.
209
+ legitimate = execution_semantics[:legitimate_exit_codes] || [0]
210
+ unless legitimate.include?(result.exit_code)
211
+ # Concatenate non-empty streams so error patterns can match
212
+ # regardless of which stream the provider writes to.
213
+ combined = [result.stderr, result.stdout]
214
+ .map { |s| s.to_s.strip }
215
+ .reject(&:empty?)
216
+ .join("\n")
217
+
218
+ error = combined unless combined.empty?
219
+ end
220
+
135
221
  Response.new(
136
222
  output: result.stdout,
137
223
  exit_code: result.exit_code,
138
224
  duration: duration,
139
225
  provider: self.class.provider_name,
140
226
  model: @config.model,
141
- error: result.failed? ? result.stderr : nil
227
+ error: error,
228
+ metadata: {
229
+ legitimate_exit_codes: legitimate
230
+ }
142
231
  )
143
232
  end
144
233
 
@@ -151,6 +240,13 @@ module AgentHarness
151
240
 
152
241
  private
153
242
 
243
+ def normalize_provider_runtime(options)
244
+ raw = options[:provider_runtime]
245
+ return options if raw.nil? || raw.is_a?(ProviderRuntime)
246
+
247
+ options.merge(provider_runtime: ProviderRuntime.wrap(raw))
248
+ end
249
+
154
250
  def normalize_mcp_servers(options)
155
251
  servers = options[:mcp_servers]
156
252
  return options if servers.nil?
@@ -192,7 +288,7 @@ module AgentHarness
192
288
 
193
289
  AgentHarness.token_tracker.record(
194
290
  provider: self.class.provider_name,
195
- model: @config.model,
291
+ model: response.model || @config.model,
196
292
  input_tokens: response.tokens[:input] || 0,
197
293
  output_tokens: response.tokens[:output] || 0,
198
294
  total_tokens: response.tokens[:total]
@@ -59,6 +59,14 @@ module AgentHarness
59
59
  "OpenAI Codex CLI"
60
60
  end
61
61
 
62
+ def configuration_schema
63
+ {
64
+ fields: [],
65
+ auth_modes: [:api_key],
66
+ openai_compatible: true
67
+ }
68
+ end
69
+
62
70
  def capabilities
63
71
  {
64
72
  streaming: false,
@@ -67,7 +75,24 @@ module AgentHarness
67
75
  tool_use: true,
68
76
  json_mode: false,
69
77
  mcp: false,
70
- dangerous_mode: false
78
+ dangerous_mode: true
79
+ }
80
+ end
81
+
82
+ def dangerous_mode_flags
83
+ ["--full-auto"]
84
+ end
85
+
86
+ def execution_semantics
87
+ {
88
+ prompt_delivery: :arg,
89
+ output_format: :text,
90
+ sandbox_aware: true,
91
+ uses_subcommand: true,
92
+ non_interactive_flag: nil,
93
+ legitimate_exit_codes: [0],
94
+ stderr_is_diagnostic: true,
95
+ parses_rate_limit_reset: false
71
96
  }
72
97
  end
73
98
 
@@ -81,32 +106,15 @@ module AgentHarness
81
106
  end
82
107
 
83
108
  def error_patterns
84
- {
85
- rate_limited: [
86
- /rate.?limit/i,
87
- /too.?many.?requests/i,
88
- /429/
89
- ],
90
- auth_expired: [
91
- /invalid.*api.*key/i,
92
- /unauthorized/i,
93
- /authentication/i,
94
- /401/,
95
- /incorrect.*api.*key/i
96
- ],
97
- quota_exceeded: [
98
- /quota.*exceeded/i,
99
- /insufficient.*quota/i,
100
- /billing/i
101
- ],
102
- transient: [
103
- /timeout/i,
104
- /connection.*reset/i,
105
- /service.*unavailable/i,
106
- /503/,
107
- /502/
109
+ COMMON_ERROR_PATTERNS.merge(
110
+ auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
111
+ transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
112
+ sandbox_failure: [
113
+ /bwrap.*no permissions/i,
114
+ /no permissions to create a new namespace/i,
115
+ /unprivileged.*namespace/i
108
116
  ]
109
- }
117
+ )
110
118
  end
111
119
 
112
120
  def auth_status
@@ -167,9 +175,34 @@ module AgentHarness
167
175
 
168
176
  protected
169
177
 
178
+ def parse_response(result, duration:)
179
+ response = super
180
+
181
+ if response.success? && sandbox_failure_detected?(result.stderr)
182
+ return Response.new(
183
+ output: result.stdout,
184
+ exit_code: 1,
185
+ duration: duration,
186
+ provider: self.class.provider_name,
187
+ model: @config.model,
188
+ error: "Sandbox failure detected: #{result.stderr.strip}"
189
+ )
190
+ end
191
+
192
+ response
193
+ end
194
+
170
195
  def build_command(prompt, options)
171
196
  cmd = [self.class.binary_name, "exec"]
172
197
 
198
+ # When running inside an already-sandboxed Docker container, Codex's
199
+ # own sandboxing conflicts with the outer sandbox. Use --full-auto to
200
+ # skip nested sandboxing while keeping full tool access.
201
+ # Also applies when dangerous_mode is explicitly requested.
202
+ if sandboxed_environment? || options[:dangerous_mode]
203
+ cmd += dangerous_mode_flags
204
+ end
205
+
173
206
  flags = @config.default_flags
174
207
  if flags
175
208
  unless flags.is_a?(Array)
@@ -178,21 +211,58 @@ module AgentHarness
178
211
  cmd += flags if flags.any?
179
212
  end
180
213
 
214
+ if externally_sandboxed?(options)
215
+ cmd += sandbox_bypass_flags
216
+ end
217
+
181
218
  if options[:session]
182
219
  cmd += session_flags(options[:session])
183
220
  end
184
221
 
222
+ runtime = options[:provider_runtime]
223
+ if runtime
224
+ cmd += ["--model", runtime.model] if runtime.model
225
+ cmd += runtime.flags unless runtime.flags.empty?
226
+ end
227
+
185
228
  cmd << prompt
186
229
 
187
230
  cmd
188
231
  end
189
232
 
233
+ def build_env(options)
234
+ env = super
235
+ runtime = options[:provider_runtime]
236
+ return env unless runtime
237
+
238
+ env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
239
+ env
240
+ end
241
+
190
242
  def default_timeout
191
243
  300
192
244
  end
193
245
 
194
246
  private
195
247
 
248
+ def externally_sandboxed?(options)
249
+ if options.key?(:externally_sandboxed)
250
+ !!options[:externally_sandboxed]
251
+ else
252
+ !!@config.externally_sandboxed
253
+ end
254
+ end
255
+
256
+ def sandbox_failure_detected?(stderr)
257
+ return false if stderr.nil? || stderr.empty?
258
+
259
+ error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
260
+ end
261
+
262
+ def sandbox_bypass_flags
263
+ ["--sandbox", "none"]
264
+ end
265
+
196
266
  def read_codex_credentials
197
267
  path = codex_config_path
198
268
  return nil unless File.exist?(path)
@@ -93,6 +93,14 @@ module AgentHarness
93
93
  "Cursor AI"
94
94
  end
95
95
 
96
+ def configuration_schema
97
+ {
98
+ fields: [],
99
+ auth_modes: [:oauth],
100
+ openai_compatible: false
101
+ }
102
+ end
103
+
96
104
  def capabilities
97
105
  {
98
106
  streaming: false,
@@ -126,6 +134,19 @@ module AgentHarness
126
134
  :oauth
127
135
  end
128
136
 
137
+ def execution_semantics
138
+ {
139
+ prompt_delivery: :stdin,
140
+ output_format: :text,
141
+ sandbox_aware: false,
142
+ uses_subcommand: false,
143
+ non_interactive_flag: "-p",
144
+ legitimate_exit_codes: [0],
145
+ stderr_is_diagnostic: true,
146
+ parses_rate_limit_reset: false
147
+ }
148
+ end
149
+
129
150
  def error_patterns
130
151
  {
131
152
  rate_limited: [
@@ -150,23 +171,44 @@ module AgentHarness
150
171
  def send_message(prompt:, **options)
151
172
  log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
152
173
 
174
+ # Coerce provider_runtime from Hash if needed (same as Base#send_message)
175
+ options = normalize_provider_runtime(options)
176
+ runtime = options[:provider_runtime]
177
+
153
178
  # Normalize and validate MCP servers (same as Base#send_message)
154
179
  options = normalize_mcp_servers(options)
155
180
  validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
156
181
 
157
182
  # Build command (without prompt in args - we send via stdin)
158
183
  command = [self.class.binary_name, "-p"]
184
+ command.concat(runtime.flags) if runtime&.flags&.any?
159
185
 
160
186
  # Calculate timeout
161
187
  timeout = options[:timeout] || @config.timeout || default_timeout
162
188
 
163
189
  # Execute command with prompt on stdin
190
+ env = build_env(options)
164
191
  start_time = Time.now
165
- result = @executor.execute(command, timeout: timeout, stdin_data: prompt)
192
+ result = @executor.execute(command, timeout: timeout, stdin_data: prompt, env: env)
166
193
  duration = Time.now - start_time
167
194
 
168
195
  # Parse response
169
196
  response = parse_response(result, duration: duration)
197
+ # Runtime model is a per-request override and always takes precedence
198
+ # over both the config-level model and whatever parse_response returned.
199
+ # See Base#send_message for rationale.
200
+ if runtime&.model
201
+ response = Response.new(
202
+ output: response.output,
203
+ exit_code: response.exit_code,
204
+ duration: response.duration,
205
+ provider: response.provider,
206
+ model: runtime.model,
207
+ tokens: response.tokens,
208
+ metadata: response.metadata,
209
+ error: response.error
210
+ )
211
+ end
170
212
 
171
213
  # Track tokens
172
214
  track_tokens(response) if response.tokens
@@ -188,7 +230,7 @@ module AgentHarness
188
230
  end
189
231
 
190
232
  def build_env(options)
191
- {}
233
+ super
192
234
  end
193
235
 
194
236
  def default_timeout
@@ -83,6 +83,25 @@ module AgentHarness
83
83
  "Google Gemini"
84
84
  end
85
85
 
86
+ def configuration_schema
87
+ {
88
+ fields: [
89
+ {
90
+ name: :model,
91
+ type: :string,
92
+ label: "Model",
93
+ required: false,
94
+ hint: "Gemini model to use (e.g. gemini-2.5-pro, gemini-2.0-flash)",
95
+ # accepts_arbitrary is true because supports_model_family? accepts
96
+ # any string starting with "gemini-", not just discovered models.
97
+ accepts_arbitrary: true
98
+ }
99
+ ],
100
+ auth_modes: [:api_key, :oauth],
101
+ openai_compatible: false
102
+ }
103
+ end
104
+
86
105
  def capabilities
87
106
  {
88
107
  streaming: true,
@@ -99,6 +118,19 @@ module AgentHarness
99
118
  :oauth
100
119
  end
101
120
 
121
+ def execution_semantics
122
+ {
123
+ prompt_delivery: :flag,
124
+ output_format: :text,
125
+ sandbox_aware: false,
126
+ uses_subcommand: false,
127
+ non_interactive_flag: nil,
128
+ legitimate_exit_codes: [0],
129
+ stderr_is_diagnostic: true,
130
+ parses_rate_limit_reset: false
131
+ }
132
+ end
133
+
102
134
  def error_patterns
103
135
  {
104
136
  rate_limited: [
@@ -77,6 +77,14 @@ module AgentHarness
77
77
  "GitHub Copilot CLI"
78
78
  end
79
79
 
80
+ def configuration_schema
81
+ {
82
+ fields: [],
83
+ auth_modes: [:oauth],
84
+ openai_compatible: false
85
+ }
86
+ end
87
+
80
88
  def capabilities
81
89
  {
82
90
  streaming: false,
@@ -89,10 +97,6 @@ module AgentHarness
89
97
  }
90
98
  end
91
99
 
92
- def supports_dangerous_mode?
93
- true
94
- end
95
-
96
100
  def dangerous_mode_flags
97
101
  ["--allow-all-tools"]
98
102
  end
@@ -110,6 +114,19 @@ module AgentHarness
110
114
  :oauth
111
115
  end
112
116
 
117
+ def execution_semantics
118
+ {
119
+ prompt_delivery: :flag,
120
+ output_format: :text,
121
+ sandbox_aware: false,
122
+ uses_subcommand: false,
123
+ non_interactive_flag: nil,
124
+ legitimate_exit_codes: [0],
125
+ stderr_is_diagnostic: true,
126
+ parses_rate_limit_reset: false
127
+ }
128
+ end
129
+
113
130
  def error_patterns
114
131
  {
115
132
  auth_expired: [
@@ -57,6 +57,23 @@ module AgentHarness
57
57
  }
58
58
  end
59
59
 
60
+ def error_patterns
61
+ COMMON_ERROR_PATTERNS
62
+ end
63
+
64
+ def execution_semantics
65
+ {
66
+ prompt_delivery: :arg,
67
+ output_format: :text,
68
+ sandbox_aware: false,
69
+ uses_subcommand: true,
70
+ non_interactive_flag: nil,
71
+ legitimate_exit_codes: [0],
72
+ stderr_is_diagnostic: true,
73
+ parses_rate_limit_reset: false
74
+ }
75
+ end
76
+
60
77
  protected
61
78
 
62
79
  def build_command(prompt, options)
@@ -59,6 +59,23 @@ module AgentHarness
59
59
  }
60
60
  end
61
61
 
62
+ def error_patterns
63
+ COMMON_ERROR_PATTERNS
64
+ end
65
+
66
+ def execution_semantics
67
+ {
68
+ prompt_delivery: :arg,
69
+ output_format: :text,
70
+ sandbox_aware: false,
71
+ uses_subcommand: true,
72
+ non_interactive_flag: nil,
73
+ legitimate_exit_codes: [0],
74
+ stderr_is_diagnostic: true,
75
+ parses_rate_limit_reset: false
76
+ }
77
+ end
78
+
62
79
  protected
63
80
 
64
81
  def build_command(prompt, options)
@@ -47,6 +47,14 @@ module AgentHarness
47
47
  "OpenCode CLI"
48
48
  end
49
49
 
50
+ def configuration_schema
51
+ {
52
+ fields: [],
53
+ auth_modes: [:api_key],
54
+ openai_compatible: true
55
+ }
56
+ end
57
+
50
58
  def capabilities
51
59
  {
52
60
  streaming: false,
@@ -59,14 +67,46 @@ module AgentHarness
59
67
  }
60
68
  end
61
69
 
70
+ def error_patterns
71
+ COMMON_ERROR_PATTERNS
72
+ end
73
+
74
+ def execution_semantics
75
+ {
76
+ prompt_delivery: :arg,
77
+ output_format: :text,
78
+ sandbox_aware: false,
79
+ uses_subcommand: true,
80
+ non_interactive_flag: nil,
81
+ legitimate_exit_codes: [0],
82
+ stderr_is_diagnostic: true,
83
+ parses_rate_limit_reset: false
84
+ }
85
+ end
86
+
62
87
  protected
63
88
 
64
89
  def build_command(prompt, options)
65
90
  cmd = [self.class.binary_name, "run"]
91
+
92
+ runtime = options[:provider_runtime]
93
+ if runtime
94
+ cmd += runtime.flags unless runtime.flags.empty?
95
+ end
96
+
66
97
  cmd << prompt
67
98
  cmd
68
99
  end
69
100
 
101
+ def build_env(options)
102
+ env = super
103
+ runtime = options[:provider_runtime]
104
+ return env unless runtime
105
+
106
+ env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
107
+ env
108
+ end
109
+
70
110
  def default_timeout
71
111
  300
72
112
  end
@@ -40,9 +40,13 @@ module AgentHarness
40
40
 
41
41
  # Check if the response indicates success
42
42
  #
43
- # @return [Boolean] true if exit_code is 0 and no error
43
+ # A response is successful when its exit code is among the provider's
44
+ # legitimate exit codes (defaults to [0]) and no error was detected.
45
+ #
46
+ # @return [Boolean] true if exit_code is legitimate and no error
44
47
  def success?
45
- @exit_code == 0 && @error.nil?
48
+ legitimate = @metadata[:legitimate_exit_codes] || [0]
49
+ legitimate.include?(@exit_code) && @error.nil?
46
50
  end
47
51
 
48
52
  # Check if the response indicates failure
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AgentHarness
4
- VERSION = "0.5.4"
4
+ VERSION = "0.5.6"
5
5
  end
data/lib/agent_harness.rb CHANGED
@@ -138,6 +138,7 @@ end
138
138
  # Core components
139
139
  require_relative "agent_harness/errors"
140
140
  require_relative "agent_harness/mcp_server"
141
+ require_relative "agent_harness/provider_runtime"
141
142
  require_relative "agent_harness/configuration"
142
143
  require_relative "agent_harness/command_executor"
143
144
  require_relative "agent_harness/docker_command_executor"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agent-harness
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bart Agapinan
@@ -92,6 +92,7 @@ files:
92
92
  - lib/agent_harness/orchestration/provider_manager.rb
93
93
  - lib/agent_harness/orchestration/rate_limiter.rb
94
94
  - lib/agent_harness/provider_health_check.rb
95
+ - lib/agent_harness/provider_runtime.rb
95
96
  - lib/agent_harness/providers/adapter.rb
96
97
  - lib/agent_harness/providers/aider.rb
97
98
  - lib/agent_harness/providers/anthropic.rb