agent-harness 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3dfde368468f5c0c037ec1cb2fefd705e813d192e3723fd70126757cd851cf14
4
- data.tar.gz: f68f11de99ea61807ab1bb7a94a28aca4aa37e3346469b195a77019f6ceeceec
3
+ metadata.gz: dc338c5fc81d4175149d405d494936b68a261a637deda4fc0e4fb7b18944bf67
4
+ data.tar.gz: aed5c92bc22dadab8826b919e8eabf606bcd7f6bfe0e1d02631c83461056a888
5
5
  SHA512:
6
- metadata.gz: 4cf0d1807fee47eb2ef1aecc63ab8c10ce71a681d6c98ca2cb860ae7eb9ba7b1a8d88e4382e9ff1f5f2f25acd9380a6cf4c3d8e61f5cf008d920b4d2bbf7b4bc
7
- data.tar.gz: cd6ac3ae08acf302369a28cdda0a3e332994f679ad37258c0f4dd869f99f18396ccb1cb52d9a6d5bcbb3c6ac3c776ba87a5338edb8280b46785edce254eb8258
6
+ metadata.gz: 1d662f4ae796d88a1a2c2eabce4604a38c4b53b545640b51c16a4b8e370ddf59f40ff57b19dfb46ba96e50b85399b846c9d2379bdc9806bd40aa78b1f18c1f66
7
+ data.tar.gz: 913df22acc91cd6db4ff2788867dc8337a2f1e94c0a2b2cce483e0af4ec73d2a946fcbb80270968d82c49c55aa375da86c386f4c8472ad2d42664d2bc1242ee6
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.5.3"
2
+ ".": "0.5.5"
3
3
  }
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
9
+ * 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
10
+
11
+ ## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
12
+
13
+
14
+ ### Bug Fixes
15
+
16
+ * 44: feat(mcp): add first-class MCP server configuration to request execution ([#45](https://github.com/viamin/agent-harness/issues/45)) ([454cd9b](https://github.com/viamin/agent-harness/commit/454cd9be1c4bcd2eb92a4ca6f81cc012d4ce1f8c))
17
+
3
18
  ## [0.5.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.2...agent-harness/v0.5.3) (2026-03-27)
4
19
 
5
20
 
@@ -233,7 +233,8 @@ module AgentHarness
233
233
 
234
234
  # Provider-specific configuration
235
235
  class ProviderConfig
236
- attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
236
+ attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
237
+ :externally_sandboxed
237
238
 
238
239
  attr_reader :name
239
240
 
@@ -246,6 +247,7 @@ module AgentHarness
246
247
  @default_flags = []
247
248
  @timeout = nil
248
249
  @model = nil
250
+ @externally_sandboxed = false
249
251
  end
250
252
 
251
253
  # Merge options into this configuration
@@ -39,6 +39,11 @@ module AgentHarness
39
39
  action: :retry_with_backoff,
40
40
  retryable: true
41
41
  },
42
+ sandbox_failure: {
43
+ description: "Sandbox setup failed",
44
+ action: :escalate,
45
+ retryable: false
46
+ },
42
47
  unknown: {
43
48
  description: "Unknown error",
44
49
  action: :retry_with_backoff,
@@ -57,6 +57,20 @@ module AgentHarness
57
57
  # Configuration errors
58
58
  class ConfigurationError < Error; end
59
59
 
60
+ # MCP-specific errors
61
+ class McpConfigurationError < ConfigurationError; end
62
+
63
+ class McpUnsupportedError < ProviderError
64
+ attr_reader :provider
65
+
66
+ def initialize(message = nil, provider: nil, **kwargs)
67
+ @provider = provider
68
+ super(message, **kwargs)
69
+ end
70
+ end
71
+
72
+ class McpTransportUnsupportedError < McpUnsupportedError; end
73
+
60
74
  # Orchestration errors
61
75
  class NoProvidersAvailableError < Error
62
76
  attr_reader :attempted_providers, :errors
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ # Canonical representation of an MCP server for request-time execution.
5
+ #
6
+ # Provider-agnostic value object that can be translated by each provider
7
+ # adapter into its CLI-specific configuration.
8
+ #
9
+ # @example stdio server
10
+ # McpServer.new(
11
+ # name: "filesystem",
12
+ # transport: "stdio",
13
+ # command: ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/workspace"],
14
+ # env: { "DEBUG" => "0" }
15
+ # )
16
+ #
17
+ # @example HTTP/URL server
18
+ # McpServer.new(
19
+ # name: "playwright",
20
+ # transport: "http",
21
+ # url: "http://mcp-playwright:3000/mcp"
22
+ # )
23
+ class McpServer
24
+ VALID_TRANSPORTS = %w[stdio http sse].freeze
25
+
26
+ attr_reader :name, :transport, :command, :args, :env, :url
27
+
28
+ # @param name [String] unique name for this MCP server
29
+ # @param transport [String] one of "stdio", "http", "sse"
30
+ # @param command [Array<String>, nil] command to launch (stdio only)
31
+ # @param args [Array<String>, nil] additional args for the command
32
+ # @param env [Hash<String,String>, nil] environment variables for the server process
33
+ # @param url [String, nil] URL for HTTP/SSE transport
34
+ def initialize(name:, transport:, command: nil, args: nil, env: nil, url: nil)
35
+ @name = name
36
+ @transport = transport.to_s
37
+ @command = command
38
+ @args = args || []
39
+ @env = env || {}
40
+ @url = url
41
+
42
+ validate!
43
+ end
44
+
45
+ # Build from a plain Hash (e.g. from user input or serialized config)
46
+ #
47
+ # @param hash [Hash] server definition
48
+ # @return [McpServer]
49
+ def self.from_hash(hash)
50
+ unless hash.is_a?(Hash)
51
+ raise McpConfigurationError, "MCP server definition must be a Hash, got #{hash.class}"
52
+ end
53
+
54
+ begin
55
+ hash = hash.transform_keys(&:to_sym)
56
+ rescue NoMethodError, TypeError => e
57
+ raise McpConfigurationError, "MCP server hash contains invalid keys: #{e.message}"
58
+ end
59
+
60
+ new(
61
+ name: hash[:name],
62
+ transport: hash[:transport],
63
+ command: hash[:command],
64
+ args: hash[:args],
65
+ env: hash[:env],
66
+ url: hash[:url]
67
+ )
68
+ end
69
+
70
+ def stdio?
71
+ @transport == "stdio"
72
+ end
73
+
74
+ def http?
75
+ %w[http sse].include?(@transport)
76
+ end
77
+
78
+ def to_h
79
+ h = {name: @name, transport: @transport}
80
+ if stdio?
81
+ h[:command] = @command
82
+ h[:args] = @args unless @args.empty?
83
+ else
84
+ h[:url] = @url
85
+ end
86
+ h[:env] = @env unless @env.empty?
87
+ h
88
+ end
89
+
90
+ private
91
+
92
+ def validate!
93
+ raise McpConfigurationError, "MCP server name is required" if @name.nil? || @name.to_s.strip.empty?
94
+
95
+ unless VALID_TRANSPORTS.include?(@transport)
96
+ raise McpConfigurationError,
97
+ "Invalid MCP transport '#{@transport}' for server '#{@name}'. Valid transports: #{VALID_TRANSPORTS.join(", ")}"
98
+ end
99
+
100
+ validate_args!
101
+ validate_env!
102
+ validate_stdio! if stdio?
103
+ validate_http! if http?
104
+ validate_no_stdio_only_fields_on_http! if http?
105
+ end
106
+
107
+ def validate_args!
108
+ return if @args.is_a?(Array) && @args.all? { |a| a.is_a?(String) }
109
+
110
+ raise McpConfigurationError,
111
+ "MCP server '#{@name}' args must be an Array of Strings"
112
+ end
113
+
114
+ def validate_env!
115
+ return if @env.is_a?(Hash) && @env.keys.all? { |k| k.is_a?(String) } && @env.values.all? { |v| v.is_a?(String) }
116
+
117
+ raise McpConfigurationError,
118
+ "MCP server '#{@name}' env must be a Hash with String keys and values"
119
+ end
120
+
121
+ def validate_stdio!
122
+ if @command.nil? || !@command.is_a?(Array) || @command.empty?
123
+ raise McpConfigurationError,
124
+ "MCP server '#{@name}' with stdio transport requires a non-empty command array"
125
+ end
126
+
127
+ unless @command.all? { |c| c.is_a?(String) }
128
+ raise McpConfigurationError,
129
+ "MCP server '#{@name}' command must contain only strings"
130
+ end
131
+
132
+ return if @url.nil?
133
+
134
+ raise McpConfigurationError,
135
+ "MCP server '#{@name}' with stdio transport should not have a url"
136
+ end
137
+
138
+ def validate_http!
139
+ if @url.nil? || @url.to_s.strip.empty?
140
+ raise McpConfigurationError,
141
+ "MCP server '#{@name}' with #{@transport} transport requires a url"
142
+ end
143
+
144
+ return if @command.nil?
145
+
146
+ raise McpConfigurationError,
147
+ "MCP server '#{@name}' with #{@transport} transport should not have a command"
148
+ end
149
+
150
+ def validate_no_stdio_only_fields_on_http!
151
+ return if @args.empty?
152
+
153
+ raise McpConfigurationError,
154
+ "MCP server '#{@name}' with #{@transport} transport should not have args (args are only valid for stdio)"
155
+ end
156
+ end
157
+ end
@@ -124,6 +124,58 @@ module AgentHarness
124
124
  []
125
125
  end
126
126
 
127
+ # Supported MCP transport types for this provider
128
+ #
129
+ # @return [Array<String>] supported transports (e.g. ["stdio", "http"])
130
+ def supported_mcp_transports
131
+ []
132
+ end
133
+
134
+ # Build provider-specific MCP flags/arguments for CLI invocation
135
+ #
136
+ # @param mcp_servers [Array<McpServer>] MCP server definitions
137
+ # @param working_dir [String, nil] working directory for temp files
138
+ # @return [Array<String>] CLI flags to append to the command
139
+ def build_mcp_flags(mcp_servers, working_dir: nil)
140
+ []
141
+ end
142
+
143
+ # Validate that this provider can handle the given MCP servers
144
+ #
145
+ # @param mcp_servers [Array<McpServer>] MCP server definitions
146
+ # @raise [McpUnsupportedError] if MCP is not supported
147
+ # @raise [McpTransportUnsupportedError] if a transport is not supported
148
+ def validate_mcp_servers!(mcp_servers)
149
+ return if mcp_servers.nil? || mcp_servers.empty?
150
+
151
+ unless supports_mcp?
152
+ raise McpUnsupportedError.new(
153
+ "Provider '#{self.class.provider_name}' does not support MCP servers",
154
+ provider: self.class.provider_name
155
+ )
156
+ end
157
+
158
+ supported = supported_mcp_transports
159
+
160
+ if supported.empty?
161
+ raise McpUnsupportedError.new(
162
+ "Provider '#{self.class.provider_name}' does not support request-time MCP servers",
163
+ provider: self.class.provider_name
164
+ )
165
+ end
166
+
167
+ mcp_servers.each do |server|
168
+ next if supported.include?(server.transport)
169
+
170
+ raise McpTransportUnsupportedError.new(
171
+ "Provider '#{self.class.provider_name}' does not support MCP transport " \
172
+ "'#{server.transport}' (server: '#{server.name}'). " \
173
+ "Supported transports: #{supported.join(", ")}",
174
+ provider: self.class.provider_name
175
+ )
176
+ end
177
+ end
178
+
127
179
  # Check if provider supports dangerous mode
128
180
  #
129
181
  # @return [Boolean] true if dangerous mode is supported
@@ -166,6 +218,38 @@ module AgentHarness
166
218
  def health_status
167
219
  {healthy: true, message: "OK"}
168
220
  end
221
+
222
+ # Execution semantics for this provider
223
+ #
224
+ # Returns a hash describing provider-specific execution behavior so
225
+ # downstream apps do not need to hardcode CLI quirks. This metadata
226
+ # can be used to select the right flags and interpret output.
227
+ #
228
+ # @return [Hash] execution semantics
229
+ def execution_semantics
230
+ {
231
+ prompt_delivery: :arg, # :arg, :stdin, or :flag
232
+ output_format: :text, # :text or :json
233
+ sandbox_aware: false, # adjusts behavior inside containers
234
+ uses_subcommand: false, # e.g. "codex exec", "opencode run"
235
+ non_interactive_flag: nil, # flag to suppress interactive prompts
236
+ legitimate_exit_codes: [0], # exit codes that are NOT errors
237
+ stderr_is_diagnostic: true, # stderr may contain non-error output
238
+ parses_rate_limit_reset: false # can extract Retry-After from output
239
+ }
240
+ end
241
+
242
+ # Parse a rate-limit reset time from provider output
243
+ #
244
+ # Providers that include rate-limit reset information in their error
245
+ # output can override this to extract it, so the orchestration layer
246
+ # can schedule retries accurately.
247
+ #
248
+ # @param output [String] combined stdout+stderr from the CLI
249
+ # @return [Time, nil] when the rate limit resets, or nil if unknown
250
+ def parse_rate_limit_reset(output)
251
+ nil
252
+ end
169
253
  end
170
254
  end
171
255
  end
@@ -71,6 +71,26 @@ module AgentHarness
71
71
  }
72
72
  end
73
73
 
74
+ def error_patterns
75
+ COMMON_ERROR_PATTERNS.merge(
76
+ auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
77
+ transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
78
+ )
79
+ end
80
+
81
+ def execution_semantics
82
+ {
83
+ prompt_delivery: :flag,
84
+ output_format: :text,
85
+ sandbox_aware: false,
86
+ uses_subcommand: false,
87
+ non_interactive_flag: "--yes",
88
+ legitimate_exit_codes: [0],
89
+ stderr_is_diagnostic: true,
90
+ parses_rate_limit_reset: false
91
+ }
92
+ end
93
+
74
94
  def supports_sessions?
75
95
  true
76
96
  end
@@ -172,12 +172,25 @@ module AgentHarness
172
172
  }
173
173
  end
174
174
 
175
+ def send_message(prompt:, **options)
176
+ super
177
+ ensure
178
+ cleanup_mcp_tempfiles!
179
+ end
180
+
175
181
  def supports_mcp?
176
182
  true
177
183
  end
178
184
 
179
- def supports_dangerous_mode?
180
- true
185
+ def supported_mcp_transports
186
+ %w[stdio http sse]
187
+ end
188
+
189
+ def build_mcp_flags(mcp_servers, working_dir: nil)
190
+ return [] if mcp_servers.empty?
191
+
192
+ config_path = write_mcp_config_file(mcp_servers, working_dir: working_dir)
193
+ ["--mcp-config", config_path]
181
194
  end
182
195
 
183
196
  def dangerous_mode_flags
@@ -188,6 +201,19 @@ module AgentHarness
188
201
  :oauth
189
202
  end
190
203
 
204
+ def execution_semantics
205
+ {
206
+ prompt_delivery: :arg,
207
+ output_format: :json,
208
+ sandbox_aware: true,
209
+ uses_subcommand: false,
210
+ non_interactive_flag: "--print",
211
+ legitimate_exit_codes: [0],
212
+ stderr_is_diagnostic: true,
213
+ parses_rate_limit_reset: false
214
+ }
215
+ end
216
+
191
217
  def error_patterns
192
218
  {
193
219
  rate_limited: [
@@ -266,6 +292,11 @@ module AgentHarness
266
292
  cmd += dangerous_mode_flags
267
293
  end
268
294
 
295
+ # Add MCP server flags (validated/normalized by Base#send_message)
296
+ if options[:mcp_servers]&.any?
297
+ cmd += build_mcp_flags(options[:mcp_servers])
298
+ end
299
+
269
300
  # Add custom flags from config
270
301
  cmd += @config.default_flags if @config.default_flags&.any?
271
302
 
@@ -376,6 +407,87 @@ module AgentHarness
376
407
  servers
377
408
  end
378
409
 
410
+ def write_mcp_config_file(mcp_servers, working_dir: nil)
411
+ require "tempfile"
412
+ require "tmpdir"
413
+ require "securerandom"
414
+
415
+ config = build_claude_mcp_config(mcp_servers)
416
+ config_json = JSON.generate(config)
417
+
418
+ if @executor.is_a?(DockerCommandExecutor)
419
+ # When running inside a Docker container, write the config file
420
+ # inside the container so the CLI process can read it.
421
+ # Track the path so cleanup_mcp_tempfiles! can remove it after execution.
422
+ container_path = "/tmp/agent_harness_mcp_#{SecureRandom.hex(8)}.json"
423
+ result = @executor.execute(
424
+ ["sh", "-c", "cat > #{container_path}"],
425
+ stdin_data: config_json,
426
+ timeout: 5
427
+ )
428
+ unless result.success?
429
+ raise McpConfigurationError,
430
+ "Failed to write MCP config inside container: #{result.stderr}"
431
+ end
432
+
433
+ @mcp_docker_config_paths ||= []
434
+ @mcp_docker_config_paths << container_path
435
+
436
+ container_path
437
+ else
438
+ dir = working_dir || Dir.tmpdir
439
+ file = Tempfile.new(["agent_harness_mcp_", ".json"], dir)
440
+ file.write(config_json)
441
+ file.close
442
+
443
+ # Hold a reference so the Tempfile is not garbage-collected (and
444
+ # therefore deleted) before the CLI process reads it.
445
+ # Cleaned up by cleanup_mcp_tempfiles! after execution.
446
+ @mcp_config_tempfiles ||= []
447
+ @mcp_config_tempfiles << file
448
+
449
+ file.path
450
+ end
451
+ end
452
+
453
+ def build_claude_mcp_config(mcp_servers)
454
+ servers = {}
455
+ mcp_servers.each do |server|
456
+ h = if server.stdio?
457
+ entry = {command: server.command.first}
458
+ remaining_args = server.command[1..] + server.args
459
+ entry[:args] = remaining_args unless remaining_args.empty?
460
+ entry
461
+ else
462
+ {url: server.url}
463
+ end
464
+ h[:env] = server.env unless server.env.empty?
465
+ servers[server.name] = h
466
+ end
467
+ {mcpServers: servers}
468
+ end
469
+
470
+ def cleanup_mcp_tempfiles!
471
+ if @mcp_config_tempfiles
472
+ @mcp_config_tempfiles.each do |file|
473
+ file.close unless file.closed?
474
+ file.unlink
475
+ rescue
476
+ nil
477
+ end
478
+ @mcp_config_tempfiles = nil
479
+ end
480
+
481
+ if @mcp_docker_config_paths
482
+ @mcp_docker_config_paths.each do |path|
483
+ @executor.execute(["rm", "-f", path], timeout: 5)
484
+ rescue
485
+ nil
486
+ end
487
+ @mcp_docker_config_paths = nil
488
+ end
489
+ end
490
+
379
491
  def log_debug(action, **context)
380
492
  @logger&.debug("[AgentHarness::Anthropic] #{action}: #{context.inspect}")
381
493
  end
@@ -32,6 +32,34 @@ module AgentHarness
32
32
  class Base
33
33
  include Adapter
34
34
 
35
+ # Common error patterns shared across providers that use standard
36
+ # HTTP-style error responses. Providers with unique patterns (e.g.
37
+ # Anthropic, GitHub Copilot) override error_patterns entirely.
38
+ COMMON_ERROR_PATTERNS = {
39
+ rate_limited: [
40
+ /rate.?limit/i,
41
+ /too.?many.?requests/i,
42
+ /429/
43
+ ],
44
+ auth_expired: [
45
+ /invalid.*api.*key/i,
46
+ /unauthorized/i,
47
+ /authentication/i
48
+ ],
49
+ quota_exceeded: [
50
+ /quota.*exceeded/i,
51
+ /insufficient.*quota/i,
52
+ /billing/i
53
+ ],
54
+ transient: [
55
+ /timeout/i,
56
+ /connection.*error/i,
57
+ /service.*unavailable/i,
58
+ /503/,
59
+ /502/
60
+ ]
61
+ }.tap { |patterns| patterns.each_value(&:freeze) }.freeze
62
+
35
63
  attr_reader :config, :logger
36
64
  attr_accessor :executor
37
65
 
@@ -63,6 +91,10 @@ module AgentHarness
63
91
  def send_message(prompt:, **options)
64
92
  log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
65
93
 
94
+ # Normalize and validate MCP servers
95
+ options = normalize_mcp_servers(options)
96
+ validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
97
+
66
98
  # Build command
67
99
  command = build_command(prompt, options)
68
100
 
@@ -83,6 +115,8 @@ module AgentHarness
83
115
  log_debug("send_message_complete", duration: duration, tokens: response.tokens)
84
116
 
85
117
  response
118
+ rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
119
+ raise
86
120
  rescue => e
87
121
  handle_error(e, prompt: prompt, options: options)
88
122
  end
@@ -101,6 +135,16 @@ module AgentHarness
101
135
  name.capitalize
102
136
  end
103
137
 
138
+ # Whether the provider is running inside a sandboxed (Docker) environment
139
+ #
140
+ # Providers can use this to adjust execution flags, e.g. skipping
141
+ # nested sandboxing when already inside a container.
142
+ #
143
+ # @return [Boolean] true when the executor is a DockerCommandExecutor
144
+ def sandboxed_environment?
145
+ @executor.is_a?(DockerCommandExecutor)
146
+ end
147
+
104
148
  protected
105
149
 
106
150
  # Build CLI command - override in subclasses
@@ -122,17 +166,39 @@ module AgentHarness
122
166
 
123
167
  # Parse CLI output into Response - override in subclasses
124
168
  #
169
+ # Combines stdout and stderr for error classification so that
170
+ # provider-specific error messages are captured regardless of
171
+ # which stream they appear on.
172
+ #
125
173
  # @param result [CommandExecutor::Result] execution result
126
174
  # @param duration [Float] execution duration
127
175
  # @return [Response] parsed response
128
176
  def parse_response(result, duration:)
177
+ error = nil
178
+ # Use execution_semantics[:legitimate_exit_codes] so providers can
179
+ # declare additional non-error exit codes beyond zero.
180
+ legitimate = execution_semantics[:legitimate_exit_codes] || [0]
181
+ unless legitimate.include?(result.exit_code)
182
+ # Concatenate non-empty streams so error patterns can match
183
+ # regardless of which stream the provider writes to.
184
+ combined = [result.stderr, result.stdout]
185
+ .map { |s| s.to_s.strip }
186
+ .reject(&:empty?)
187
+ .join("\n")
188
+
189
+ error = combined unless combined.empty?
190
+ end
191
+
129
192
  Response.new(
130
193
  output: result.stdout,
131
194
  exit_code: result.exit_code,
132
195
  duration: duration,
133
196
  provider: self.class.provider_name,
134
197
  model: @config.model,
135
- error: result.failed? ? result.stderr : nil
198
+ error: error,
199
+ metadata: {
200
+ legitimate_exit_codes: legitimate
201
+ }
136
202
  )
137
203
  end
138
204
 
@@ -145,6 +211,38 @@ module AgentHarness
145
211
 
146
212
  private
147
213
 
214
+ def normalize_mcp_servers(options)
215
+ servers = options[:mcp_servers]
216
+ return options if servers.nil?
217
+
218
+ unless servers.is_a?(Array)
219
+ raise McpConfigurationError,
220
+ "mcp_servers must be an Array of Hash or McpServer, got #{servers.class}"
221
+ end
222
+
223
+ return options if servers.empty?
224
+
225
+ normalized = servers.map do |server|
226
+ if server.is_a?(McpServer)
227
+ server
228
+ elsif server.is_a?(Hash)
229
+ McpServer.from_hash(server)
230
+ else
231
+ raise McpConfigurationError, "MCP server must be a Hash or McpServer, got #{server.class}"
232
+ end
233
+ end
234
+
235
+ # Ensure MCP server names are unique to avoid silent overwrites downstream
236
+ names = normalized.map(&:name)
237
+ duplicate_names = names.group_by { |n| n }.select { |_, v| v.size > 1 }.keys
238
+ unless duplicate_names.empty?
239
+ raise McpConfigurationError,
240
+ "Duplicate MCP server names detected: #{duplicate_names.join(", ")}"
241
+ end
242
+
243
+ options.merge(mcp_servers: normalized)
244
+ end
245
+
148
246
  def execute_with_timeout(command, timeout:, env:)
149
247
  @executor.execute(command, timeout: timeout, env: env)
150
248
  end
@@ -67,7 +67,24 @@ module AgentHarness
67
67
  tool_use: true,
68
68
  json_mode: false,
69
69
  mcp: false,
70
- dangerous_mode: false
70
+ dangerous_mode: true
71
+ }
72
+ end
73
+
74
+ def dangerous_mode_flags
75
+ ["--full-auto"]
76
+ end
77
+
78
+ def execution_semantics
79
+ {
80
+ prompt_delivery: :arg,
81
+ output_format: :text,
82
+ sandbox_aware: true,
83
+ uses_subcommand: true,
84
+ non_interactive_flag: nil,
85
+ legitimate_exit_codes: [0],
86
+ stderr_is_diagnostic: true,
87
+ parses_rate_limit_reset: false
71
88
  }
72
89
  end
73
90
 
@@ -81,32 +98,15 @@ module AgentHarness
81
98
  end
82
99
 
83
100
  def error_patterns
84
- {
85
- rate_limited: [
86
- /rate.?limit/i,
87
- /too.?many.?requests/i,
88
- /429/
89
- ],
90
- auth_expired: [
91
- /invalid.*api.*key/i,
92
- /unauthorized/i,
93
- /authentication/i,
94
- /401/,
95
- /incorrect.*api.*key/i
96
- ],
97
- quota_exceeded: [
98
- /quota.*exceeded/i,
99
- /insufficient.*quota/i,
100
- /billing/i
101
- ],
102
- transient: [
103
- /timeout/i,
104
- /connection.*reset/i,
105
- /service.*unavailable/i,
106
- /503/,
107
- /502/
101
+ COMMON_ERROR_PATTERNS.merge(
102
+ auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
103
+ transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
104
+ sandbox_failure: [
105
+ /bwrap.*no permissions/i,
106
+ /no permissions to create a new namespace/i,
107
+ /unprivileged.*namespace/i
108
108
  ]
109
- }
109
+ )
110
110
  end
111
111
 
112
112
  def auth_status
@@ -167,9 +167,34 @@ module AgentHarness
167
167
 
168
168
  protected
169
169
 
170
+ def parse_response(result, duration:)
171
+ response = super
172
+
173
+ if response.success? && sandbox_failure_detected?(result.stderr)
174
+ return Response.new(
175
+ output: result.stdout,
176
+ exit_code: 1,
177
+ duration: duration,
178
+ provider: self.class.provider_name,
179
+ model: @config.model,
180
+ error: "Sandbox failure detected: #{result.stderr.strip}"
181
+ )
182
+ end
183
+
184
+ response
185
+ end
186
+
170
187
  def build_command(prompt, options)
171
188
  cmd = [self.class.binary_name, "exec"]
172
189
 
190
+ # When running inside an already-sandboxed Docker container, Codex's
191
+ # own sandboxing conflicts with the outer sandbox. Use --full-auto to
192
+ # skip nested sandboxing while keeping full tool access.
193
+ # Also applies when dangerous_mode is explicitly requested.
194
+ if sandboxed_environment? || options[:dangerous_mode]
195
+ cmd += dangerous_mode_flags
196
+ end
197
+
173
198
  flags = @config.default_flags
174
199
  if flags
175
200
  unless flags.is_a?(Array)
@@ -178,6 +203,10 @@ module AgentHarness
178
203
  cmd += flags if flags.any?
179
204
  end
180
205
 
206
+ if externally_sandboxed?(options)
207
+ cmd += sandbox_bypass_flags
208
+ end
209
+
181
210
  if options[:session]
182
211
  cmd += session_flags(options[:session])
183
212
  end
@@ -193,6 +222,24 @@ module AgentHarness
193
222
 
194
223
  private
195
224
 
225
+ def externally_sandboxed?(options)
226
+ if options.key?(:externally_sandboxed)
227
+ !!options[:externally_sandboxed]
228
+ else
229
+ !!@config.externally_sandboxed
230
+ end
231
+ end
232
+
233
+ def sandbox_failure_detected?(stderr)
234
+ return false if stderr.nil? || stderr.empty?
235
+
236
+ error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
237
+ end
238
+
239
+ def sandbox_bypass_flags
240
+ ["--sandbox", "none"]
241
+ end
242
+
196
243
  def read_codex_credentials
197
244
  path = codex_config_path
198
245
  return nil unless File.exist?(path)
@@ -109,6 +109,14 @@ module AgentHarness
109
109
  true
110
110
  end
111
111
 
112
+ # Cursor supports MCP for fetching existing server configurations (via
113
+ # fetch_mcp_servers) but does not support injecting request-time MCP
114
+ # servers into CLI invocations. Returning an empty list causes
115
+ # validate_mcp_servers! to raise McpUnsupportedError with a clear message.
116
+ def supported_mcp_transports
117
+ []
118
+ end
119
+
112
120
  def fetch_mcp_servers
113
121
  # Try CLI first, then config file
114
122
  fetch_mcp_servers_cli || fetch_mcp_servers_config
@@ -118,6 +126,19 @@ module AgentHarness
118
126
  :oauth
119
127
  end
120
128
 
129
+ def execution_semantics
130
+ {
131
+ prompt_delivery: :stdin,
132
+ output_format: :text,
133
+ sandbox_aware: false,
134
+ uses_subcommand: false,
135
+ non_interactive_flag: "-p",
136
+ legitimate_exit_codes: [0],
137
+ stderr_is_diagnostic: true,
138
+ parses_rate_limit_reset: false
139
+ }
140
+ end
141
+
121
142
  def error_patterns
122
143
  {
123
144
  rate_limited: [
@@ -142,6 +163,10 @@ module AgentHarness
142
163
  def send_message(prompt:, **options)
143
164
  log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
144
165
 
166
+ # Normalize and validate MCP servers (same as Base#send_message)
167
+ options = normalize_mcp_servers(options)
168
+ validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
169
+
145
170
  # Build command (without prompt in args - we send via stdin)
146
171
  command = [self.class.binary_name, "-p"]
147
172
 
@@ -162,6 +187,8 @@ module AgentHarness
162
187
  log_debug("send_message_complete", duration: duration)
163
188
 
164
189
  response
190
+ rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
191
+ raise
165
192
  rescue => e
166
193
  handle_error(e, prompt: prompt, options: options)
167
194
  end
@@ -99,6 +99,19 @@ module AgentHarness
99
99
  :oauth
100
100
  end
101
101
 
102
+ def execution_semantics
103
+ {
104
+ prompt_delivery: :flag,
105
+ output_format: :text,
106
+ sandbox_aware: false,
107
+ uses_subcommand: false,
108
+ non_interactive_flag: nil,
109
+ legitimate_exit_codes: [0],
110
+ stderr_is_diagnostic: true,
111
+ parses_rate_limit_reset: false
112
+ }
113
+ end
114
+
102
115
  def error_patterns
103
116
  {
104
117
  rate_limited: [
@@ -89,10 +89,6 @@ module AgentHarness
89
89
  }
90
90
  end
91
91
 
92
- def supports_dangerous_mode?
93
- true
94
- end
95
-
96
92
  def dangerous_mode_flags
97
93
  ["--allow-all-tools"]
98
94
  end
@@ -110,6 +106,19 @@ module AgentHarness
110
106
  :oauth
111
107
  end
112
108
 
109
+ def execution_semantics
110
+ {
111
+ prompt_delivery: :flag,
112
+ output_format: :text,
113
+ sandbox_aware: false,
114
+ uses_subcommand: false,
115
+ non_interactive_flag: nil,
116
+ legitimate_exit_codes: [0],
117
+ stderr_is_diagnostic: true,
118
+ parses_rate_limit_reset: false
119
+ }
120
+ end
121
+
113
122
  def error_patterns
114
123
  {
115
124
  auth_expired: [
@@ -57,6 +57,23 @@ module AgentHarness
57
57
  }
58
58
  end
59
59
 
60
+ def error_patterns
61
+ COMMON_ERROR_PATTERNS
62
+ end
63
+
64
+ def execution_semantics
65
+ {
66
+ prompt_delivery: :arg,
67
+ output_format: :text,
68
+ sandbox_aware: false,
69
+ uses_subcommand: true,
70
+ non_interactive_flag: nil,
71
+ legitimate_exit_codes: [0],
72
+ stderr_is_diagnostic: true,
73
+ parses_rate_limit_reset: false
74
+ }
75
+ end
76
+
60
77
  protected
61
78
 
62
79
  def build_command(prompt, options)
@@ -59,6 +59,23 @@ module AgentHarness
59
59
  }
60
60
  end
61
61
 
62
+ def error_patterns
63
+ COMMON_ERROR_PATTERNS
64
+ end
65
+
66
+ def execution_semantics
67
+ {
68
+ prompt_delivery: :arg,
69
+ output_format: :text,
70
+ sandbox_aware: false,
71
+ uses_subcommand: true,
72
+ non_interactive_flag: nil,
73
+ legitimate_exit_codes: [0],
74
+ stderr_is_diagnostic: true,
75
+ parses_rate_limit_reset: false
76
+ }
77
+ end
78
+
62
79
  protected
63
80
 
64
81
  def build_command(prompt, options)
@@ -59,6 +59,23 @@ module AgentHarness
59
59
  }
60
60
  end
61
61
 
62
+ def error_patterns
63
+ COMMON_ERROR_PATTERNS
64
+ end
65
+
66
+ def execution_semantics
67
+ {
68
+ prompt_delivery: :arg,
69
+ output_format: :text,
70
+ sandbox_aware: false,
71
+ uses_subcommand: true,
72
+ non_interactive_flag: nil,
73
+ legitimate_exit_codes: [0],
74
+ stderr_is_diagnostic: true,
75
+ parses_rate_limit_reset: false
76
+ }
77
+ end
78
+
62
79
  protected
63
80
 
64
81
  def build_command(prompt, options)
@@ -40,9 +40,13 @@ module AgentHarness
40
40
 
41
41
  # Check if the response indicates success
42
42
  #
43
- # @return [Boolean] true if exit_code is 0 and no error
43
+ # A response is successful when its exit code is among the provider's
44
+ # legitimate exit codes (defaults to [0]) and no error was detected.
45
+ #
46
+ # @return [Boolean] true if exit_code is legitimate and no error
44
47
  def success?
45
- @exit_code == 0 && @error.nil?
48
+ legitimate = @metadata[:legitimate_exit_codes] || [0]
49
+ legitimate.include?(@exit_code) && @error.nil?
46
50
  end
47
51
 
48
52
  # Check if the response indicates failure
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AgentHarness
4
- VERSION = "0.5.3"
4
+ VERSION = "0.5.5"
5
5
  end
data/lib/agent_harness.rb CHANGED
@@ -137,6 +137,7 @@ end
137
137
 
138
138
  # Core components
139
139
  require_relative "agent_harness/errors"
140
+ require_relative "agent_harness/mcp_server"
140
141
  require_relative "agent_harness/configuration"
141
142
  require_relative "agent_harness/command_executor"
142
143
  require_relative "agent_harness/docker_command_executor"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agent-harness
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bart Agapinan
@@ -84,6 +84,7 @@ files:
84
84
  - lib/agent_harness/docker_command_executor.rb
85
85
  - lib/agent_harness/error_taxonomy.rb
86
86
  - lib/agent_harness/errors.rb
87
+ - lib/agent_harness/mcp_server.rb
87
88
  - lib/agent_harness/orchestration/circuit_breaker.rb
88
89
  - lib/agent_harness/orchestration/conductor.rb
89
90
  - lib/agent_harness/orchestration/health_monitor.rb