agent-harness 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/CHANGELOG.md +15 -0
- data/lib/agent_harness/configuration.rb +3 -1
- data/lib/agent_harness/error_taxonomy.rb +5 -0
- data/lib/agent_harness/errors.rb +14 -0
- data/lib/agent_harness/mcp_server.rb +157 -0
- data/lib/agent_harness/providers/adapter.rb +84 -0
- data/lib/agent_harness/providers/aider.rb +20 -0
- data/lib/agent_harness/providers/anthropic.rb +114 -2
- data/lib/agent_harness/providers/base.rb +99 -1
- data/lib/agent_harness/providers/codex.rb +73 -26
- data/lib/agent_harness/providers/cursor.rb +27 -0
- data/lib/agent_harness/providers/gemini.rb +13 -0
- data/lib/agent_harness/providers/github_copilot.rb +13 -4
- data/lib/agent_harness/providers/kilocode.rb +17 -0
- data/lib/agent_harness/providers/mistral_vibe.rb +17 -0
- data/lib/agent_harness/providers/opencode.rb +17 -0
- data/lib/agent_harness/response.rb +6 -2
- data/lib/agent_harness/version.rb +1 -1
- data/lib/agent_harness.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dc338c5fc81d4175149d405d494936b68a261a637deda4fc0e4fb7b18944bf67
|
|
4
|
+
data.tar.gz: aed5c92bc22dadab8826b919e8eabf606bcd7f6bfe0e1d02631c83461056a888
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1d662f4ae796d88a1a2c2eabce4604a38c4b53b545640b51c16a4b8e370ddf59f40ff57b19dfb46ba96e50b85399b846c9d2379bdc9806bd40aa78b1f18c1f66
|
|
7
|
+
data.tar.gz: 913df22acc91cd6db4ff2788867dc8337a2f1e94c0a2b2cce483e0af4ec73d2a946fcbb80270968d82c49c55aa375da86c386f4c8472ad2d42664d2bc1242ee6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
|
|
9
|
+
* 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
|
|
10
|
+
|
|
11
|
+
## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Bug Fixes
|
|
15
|
+
|
|
16
|
+
* 44: feat(mcp): add first-class MCP server configuration to request execution ([#45](https://github.com/viamin/agent-harness/issues/45)) ([454cd9b](https://github.com/viamin/agent-harness/commit/454cd9be1c4bcd2eb92a4ca6f81cc012d4ce1f8c))
|
|
17
|
+
|
|
3
18
|
## [0.5.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.2...agent-harness/v0.5.3) (2026-03-27)
|
|
4
19
|
|
|
5
20
|
|
|
@@ -233,7 +233,8 @@ module AgentHarness
|
|
|
233
233
|
|
|
234
234
|
# Provider-specific configuration
|
|
235
235
|
class ProviderConfig
|
|
236
|
-
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
|
|
236
|
+
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
|
|
237
|
+
:externally_sandboxed
|
|
237
238
|
|
|
238
239
|
attr_reader :name
|
|
239
240
|
|
|
@@ -246,6 +247,7 @@ module AgentHarness
|
|
|
246
247
|
@default_flags = []
|
|
247
248
|
@timeout = nil
|
|
248
249
|
@model = nil
|
|
250
|
+
@externally_sandboxed = false
|
|
249
251
|
end
|
|
250
252
|
|
|
251
253
|
# Merge options into this configuration
|
|
@@ -39,6 +39,11 @@ module AgentHarness
|
|
|
39
39
|
action: :retry_with_backoff,
|
|
40
40
|
retryable: true
|
|
41
41
|
},
|
|
42
|
+
sandbox_failure: {
|
|
43
|
+
description: "Sandbox setup failed",
|
|
44
|
+
action: :escalate,
|
|
45
|
+
retryable: false
|
|
46
|
+
},
|
|
42
47
|
unknown: {
|
|
43
48
|
description: "Unknown error",
|
|
44
49
|
action: :retry_with_backoff,
|
data/lib/agent_harness/errors.rb
CHANGED
|
@@ -57,6 +57,20 @@ module AgentHarness
|
|
|
57
57
|
# Configuration errors
|
|
58
58
|
class ConfigurationError < Error; end
|
|
59
59
|
|
|
60
|
+
# MCP-specific errors
|
|
61
|
+
class McpConfigurationError < ConfigurationError; end
|
|
62
|
+
|
|
63
|
+
class McpUnsupportedError < ProviderError
|
|
64
|
+
attr_reader :provider
|
|
65
|
+
|
|
66
|
+
def initialize(message = nil, provider: nil, **kwargs)
|
|
67
|
+
@provider = provider
|
|
68
|
+
super(message, **kwargs)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
class McpTransportUnsupportedError < McpUnsupportedError; end
|
|
73
|
+
|
|
60
74
|
# Orchestration errors
|
|
61
75
|
class NoProvidersAvailableError < Error
|
|
62
76
|
attr_reader :attempted_providers, :errors
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentHarness
|
|
4
|
+
# Canonical representation of an MCP server for request-time execution.
|
|
5
|
+
#
|
|
6
|
+
# Provider-agnostic value object that can be translated by each provider
|
|
7
|
+
# adapter into its CLI-specific configuration.
|
|
8
|
+
#
|
|
9
|
+
# @example stdio server
|
|
10
|
+
# McpServer.new(
|
|
11
|
+
# name: "filesystem",
|
|
12
|
+
# transport: "stdio",
|
|
13
|
+
# command: ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/workspace"],
|
|
14
|
+
# env: { "DEBUG" => "0" }
|
|
15
|
+
# )
|
|
16
|
+
#
|
|
17
|
+
# @example HTTP/URL server
|
|
18
|
+
# McpServer.new(
|
|
19
|
+
# name: "playwright",
|
|
20
|
+
# transport: "http",
|
|
21
|
+
# url: "http://mcp-playwright:3000/mcp"
|
|
22
|
+
# )
|
|
23
|
+
class McpServer
|
|
24
|
+
VALID_TRANSPORTS = %w[stdio http sse].freeze
|
|
25
|
+
|
|
26
|
+
attr_reader :name, :transport, :command, :args, :env, :url
|
|
27
|
+
|
|
28
|
+
# @param name [String] unique name for this MCP server
|
|
29
|
+
# @param transport [String] one of "stdio", "http", "sse"
|
|
30
|
+
# @param command [Array<String>, nil] command to launch (stdio only)
|
|
31
|
+
# @param args [Array<String>, nil] additional args for the command
|
|
32
|
+
# @param env [Hash<String,String>, nil] environment variables for the server process
|
|
33
|
+
# @param url [String, nil] URL for HTTP/SSE transport
|
|
34
|
+
def initialize(name:, transport:, command: nil, args: nil, env: nil, url: nil)
|
|
35
|
+
@name = name
|
|
36
|
+
@transport = transport.to_s
|
|
37
|
+
@command = command
|
|
38
|
+
@args = args || []
|
|
39
|
+
@env = env || {}
|
|
40
|
+
@url = url
|
|
41
|
+
|
|
42
|
+
validate!
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Build from a plain Hash (e.g. from user input or serialized config)
|
|
46
|
+
#
|
|
47
|
+
# @param hash [Hash] server definition
|
|
48
|
+
# @return [McpServer]
|
|
49
|
+
def self.from_hash(hash)
|
|
50
|
+
unless hash.is_a?(Hash)
|
|
51
|
+
raise McpConfigurationError, "MCP server definition must be a Hash, got #{hash.class}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
begin
|
|
55
|
+
hash = hash.transform_keys(&:to_sym)
|
|
56
|
+
rescue NoMethodError, TypeError => e
|
|
57
|
+
raise McpConfigurationError, "MCP server hash contains invalid keys: #{e.message}"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
new(
|
|
61
|
+
name: hash[:name],
|
|
62
|
+
transport: hash[:transport],
|
|
63
|
+
command: hash[:command],
|
|
64
|
+
args: hash[:args],
|
|
65
|
+
env: hash[:env],
|
|
66
|
+
url: hash[:url]
|
|
67
|
+
)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def stdio?
|
|
71
|
+
@transport == "stdio"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def http?
|
|
75
|
+
%w[http sse].include?(@transport)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def to_h
|
|
79
|
+
h = {name: @name, transport: @transport}
|
|
80
|
+
if stdio?
|
|
81
|
+
h[:command] = @command
|
|
82
|
+
h[:args] = @args unless @args.empty?
|
|
83
|
+
else
|
|
84
|
+
h[:url] = @url
|
|
85
|
+
end
|
|
86
|
+
h[:env] = @env unless @env.empty?
|
|
87
|
+
h
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
def validate!
|
|
93
|
+
raise McpConfigurationError, "MCP server name is required" if @name.nil? || @name.to_s.strip.empty?
|
|
94
|
+
|
|
95
|
+
unless VALID_TRANSPORTS.include?(@transport)
|
|
96
|
+
raise McpConfigurationError,
|
|
97
|
+
"Invalid MCP transport '#{@transport}' for server '#{@name}'. Valid transports: #{VALID_TRANSPORTS.join(", ")}"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
validate_args!
|
|
101
|
+
validate_env!
|
|
102
|
+
validate_stdio! if stdio?
|
|
103
|
+
validate_http! if http?
|
|
104
|
+
validate_no_stdio_only_fields_on_http! if http?
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def validate_args!
|
|
108
|
+
return if @args.is_a?(Array) && @args.all? { |a| a.is_a?(String) }
|
|
109
|
+
|
|
110
|
+
raise McpConfigurationError,
|
|
111
|
+
"MCP server '#{@name}' args must be an Array of Strings"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def validate_env!
|
|
115
|
+
return if @env.is_a?(Hash) && @env.keys.all? { |k| k.is_a?(String) } && @env.values.all? { |v| v.is_a?(String) }
|
|
116
|
+
|
|
117
|
+
raise McpConfigurationError,
|
|
118
|
+
"MCP server '#{@name}' env must be a Hash with String keys and values"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def validate_stdio!
|
|
122
|
+
if @command.nil? || !@command.is_a?(Array) || @command.empty?
|
|
123
|
+
raise McpConfigurationError,
|
|
124
|
+
"MCP server '#{@name}' with stdio transport requires a non-empty command array"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
unless @command.all? { |c| c.is_a?(String) }
|
|
128
|
+
raise McpConfigurationError,
|
|
129
|
+
"MCP server '#{@name}' command must contain only strings"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
return if @url.nil?
|
|
133
|
+
|
|
134
|
+
raise McpConfigurationError,
|
|
135
|
+
"MCP server '#{@name}' with stdio transport should not have a url"
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def validate_http!
|
|
139
|
+
if @url.nil? || @url.to_s.strip.empty?
|
|
140
|
+
raise McpConfigurationError,
|
|
141
|
+
"MCP server '#{@name}' with #{@transport} transport requires a url"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
return if @command.nil?
|
|
145
|
+
|
|
146
|
+
raise McpConfigurationError,
|
|
147
|
+
"MCP server '#{@name}' with #{@transport} transport should not have a command"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def validate_no_stdio_only_fields_on_http!
|
|
151
|
+
return if @args.empty?
|
|
152
|
+
|
|
153
|
+
raise McpConfigurationError,
|
|
154
|
+
"MCP server '#{@name}' with #{@transport} transport should not have args (args are only valid for stdio)"
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
@@ -124,6 +124,58 @@ module AgentHarness
|
|
|
124
124
|
[]
|
|
125
125
|
end
|
|
126
126
|
|
|
127
|
+
# Supported MCP transport types for this provider
|
|
128
|
+
#
|
|
129
|
+
# @return [Array<String>] supported transports (e.g. ["stdio", "http"])
|
|
130
|
+
def supported_mcp_transports
|
|
131
|
+
[]
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Build provider-specific MCP flags/arguments for CLI invocation
|
|
135
|
+
#
|
|
136
|
+
# @param mcp_servers [Array<McpServer>] MCP server definitions
|
|
137
|
+
# @param working_dir [String, nil] working directory for temp files
|
|
138
|
+
# @return [Array<String>] CLI flags to append to the command
|
|
139
|
+
def build_mcp_flags(mcp_servers, working_dir: nil)
|
|
140
|
+
[]
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Validate that this provider can handle the given MCP servers
|
|
144
|
+
#
|
|
145
|
+
# @param mcp_servers [Array<McpServer>] MCP server definitions
|
|
146
|
+
# @raise [McpUnsupportedError] if MCP is not supported
|
|
147
|
+
# @raise [McpTransportUnsupportedError] if a transport is not supported
|
|
148
|
+
def validate_mcp_servers!(mcp_servers)
|
|
149
|
+
return if mcp_servers.nil? || mcp_servers.empty?
|
|
150
|
+
|
|
151
|
+
unless supports_mcp?
|
|
152
|
+
raise McpUnsupportedError.new(
|
|
153
|
+
"Provider '#{self.class.provider_name}' does not support MCP servers",
|
|
154
|
+
provider: self.class.provider_name
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
supported = supported_mcp_transports
|
|
159
|
+
|
|
160
|
+
if supported.empty?
|
|
161
|
+
raise McpUnsupportedError.new(
|
|
162
|
+
"Provider '#{self.class.provider_name}' does not support request-time MCP servers",
|
|
163
|
+
provider: self.class.provider_name
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
mcp_servers.each do |server|
|
|
168
|
+
next if supported.include?(server.transport)
|
|
169
|
+
|
|
170
|
+
raise McpTransportUnsupportedError.new(
|
|
171
|
+
"Provider '#{self.class.provider_name}' does not support MCP transport " \
|
|
172
|
+
"'#{server.transport}' (server: '#{server.name}'). " \
|
|
173
|
+
"Supported transports: #{supported.join(", ")}",
|
|
174
|
+
provider: self.class.provider_name
|
|
175
|
+
)
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
127
179
|
# Check if provider supports dangerous mode
|
|
128
180
|
#
|
|
129
181
|
# @return [Boolean] true if dangerous mode is supported
|
|
@@ -166,6 +218,38 @@ module AgentHarness
|
|
|
166
218
|
def health_status
|
|
167
219
|
{healthy: true, message: "OK"}
|
|
168
220
|
end
|
|
221
|
+
|
|
222
|
+
# Execution semantics for this provider
|
|
223
|
+
#
|
|
224
|
+
# Returns a hash describing provider-specific execution behavior so
|
|
225
|
+
# downstream apps do not need to hardcode CLI quirks. This metadata
|
|
226
|
+
# can be used to select the right flags and interpret output.
|
|
227
|
+
#
|
|
228
|
+
# @return [Hash] execution semantics
|
|
229
|
+
def execution_semantics
|
|
230
|
+
{
|
|
231
|
+
prompt_delivery: :arg, # :arg, :stdin, or :flag
|
|
232
|
+
output_format: :text, # :text or :json
|
|
233
|
+
sandbox_aware: false, # adjusts behavior inside containers
|
|
234
|
+
uses_subcommand: false, # e.g. "codex exec", "opencode run"
|
|
235
|
+
non_interactive_flag: nil, # flag to suppress interactive prompts
|
|
236
|
+
legitimate_exit_codes: [0], # exit codes that are NOT errors
|
|
237
|
+
stderr_is_diagnostic: true, # stderr may contain non-error output
|
|
238
|
+
parses_rate_limit_reset: false # can extract Retry-After from output
|
|
239
|
+
}
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Parse a rate-limit reset time from provider output
|
|
243
|
+
#
|
|
244
|
+
# Providers that include rate-limit reset information in their error
|
|
245
|
+
# output can override this to extract it, so the orchestration layer
|
|
246
|
+
# can schedule retries accurately.
|
|
247
|
+
#
|
|
248
|
+
# @param output [String] combined stdout+stderr from the CLI
|
|
249
|
+
# @return [Time, nil] when the rate limit resets, or nil if unknown
|
|
250
|
+
def parse_rate_limit_reset(output)
|
|
251
|
+
nil
|
|
252
|
+
end
|
|
169
253
|
end
|
|
170
254
|
end
|
|
171
255
|
end
|
|
@@ -71,6 +71,26 @@ module AgentHarness
|
|
|
71
71
|
}
|
|
72
72
|
end
|
|
73
73
|
|
|
74
|
+
def error_patterns
|
|
75
|
+
COMMON_ERROR_PATTERNS.merge(
|
|
76
|
+
auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
|
|
77
|
+
transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def execution_semantics
|
|
82
|
+
{
|
|
83
|
+
prompt_delivery: :flag,
|
|
84
|
+
output_format: :text,
|
|
85
|
+
sandbox_aware: false,
|
|
86
|
+
uses_subcommand: false,
|
|
87
|
+
non_interactive_flag: "--yes",
|
|
88
|
+
legitimate_exit_codes: [0],
|
|
89
|
+
stderr_is_diagnostic: true,
|
|
90
|
+
parses_rate_limit_reset: false
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
74
94
|
def supports_sessions?
|
|
75
95
|
true
|
|
76
96
|
end
|
|
@@ -172,12 +172,25 @@ module AgentHarness
|
|
|
172
172
|
}
|
|
173
173
|
end
|
|
174
174
|
|
|
175
|
+
def send_message(prompt:, **options)
|
|
176
|
+
super
|
|
177
|
+
ensure
|
|
178
|
+
cleanup_mcp_tempfiles!
|
|
179
|
+
end
|
|
180
|
+
|
|
175
181
|
def supports_mcp?
|
|
176
182
|
true
|
|
177
183
|
end
|
|
178
184
|
|
|
179
|
-
def
|
|
180
|
-
|
|
185
|
+
def supported_mcp_transports
|
|
186
|
+
%w[stdio http sse]
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def build_mcp_flags(mcp_servers, working_dir: nil)
|
|
190
|
+
return [] if mcp_servers.empty?
|
|
191
|
+
|
|
192
|
+
config_path = write_mcp_config_file(mcp_servers, working_dir: working_dir)
|
|
193
|
+
["--mcp-config", config_path]
|
|
181
194
|
end
|
|
182
195
|
|
|
183
196
|
def dangerous_mode_flags
|
|
@@ -188,6 +201,19 @@ module AgentHarness
|
|
|
188
201
|
:oauth
|
|
189
202
|
end
|
|
190
203
|
|
|
204
|
+
def execution_semantics
|
|
205
|
+
{
|
|
206
|
+
prompt_delivery: :arg,
|
|
207
|
+
output_format: :json,
|
|
208
|
+
sandbox_aware: true,
|
|
209
|
+
uses_subcommand: false,
|
|
210
|
+
non_interactive_flag: "--print",
|
|
211
|
+
legitimate_exit_codes: [0],
|
|
212
|
+
stderr_is_diagnostic: true,
|
|
213
|
+
parses_rate_limit_reset: false
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
|
|
191
217
|
def error_patterns
|
|
192
218
|
{
|
|
193
219
|
rate_limited: [
|
|
@@ -266,6 +292,11 @@ module AgentHarness
|
|
|
266
292
|
cmd += dangerous_mode_flags
|
|
267
293
|
end
|
|
268
294
|
|
|
295
|
+
# Add MCP server flags (validated/normalized by Base#send_message)
|
|
296
|
+
if options[:mcp_servers]&.any?
|
|
297
|
+
cmd += build_mcp_flags(options[:mcp_servers])
|
|
298
|
+
end
|
|
299
|
+
|
|
269
300
|
# Add custom flags from config
|
|
270
301
|
cmd += @config.default_flags if @config.default_flags&.any?
|
|
271
302
|
|
|
@@ -376,6 +407,87 @@ module AgentHarness
|
|
|
376
407
|
servers
|
|
377
408
|
end
|
|
378
409
|
|
|
410
|
+
def write_mcp_config_file(mcp_servers, working_dir: nil)
|
|
411
|
+
require "tempfile"
|
|
412
|
+
require "tmpdir"
|
|
413
|
+
require "securerandom"
|
|
414
|
+
|
|
415
|
+
config = build_claude_mcp_config(mcp_servers)
|
|
416
|
+
config_json = JSON.generate(config)
|
|
417
|
+
|
|
418
|
+
if @executor.is_a?(DockerCommandExecutor)
|
|
419
|
+
# When running inside a Docker container, write the config file
|
|
420
|
+
# inside the container so the CLI process can read it.
|
|
421
|
+
# Track the path so cleanup_mcp_tempfiles! can remove it after execution.
|
|
422
|
+
container_path = "/tmp/agent_harness_mcp_#{SecureRandom.hex(8)}.json"
|
|
423
|
+
result = @executor.execute(
|
|
424
|
+
["sh", "-c", "cat > #{container_path}"],
|
|
425
|
+
stdin_data: config_json,
|
|
426
|
+
timeout: 5
|
|
427
|
+
)
|
|
428
|
+
unless result.success?
|
|
429
|
+
raise McpConfigurationError,
|
|
430
|
+
"Failed to write MCP config inside container: #{result.stderr}"
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
@mcp_docker_config_paths ||= []
|
|
434
|
+
@mcp_docker_config_paths << container_path
|
|
435
|
+
|
|
436
|
+
container_path
|
|
437
|
+
else
|
|
438
|
+
dir = working_dir || Dir.tmpdir
|
|
439
|
+
file = Tempfile.new(["agent_harness_mcp_", ".json"], dir)
|
|
440
|
+
file.write(config_json)
|
|
441
|
+
file.close
|
|
442
|
+
|
|
443
|
+
# Hold a reference so the Tempfile is not garbage-collected (and
|
|
444
|
+
# therefore deleted) before the CLI process reads it.
|
|
445
|
+
# Cleaned up by cleanup_mcp_tempfiles! after execution.
|
|
446
|
+
@mcp_config_tempfiles ||= []
|
|
447
|
+
@mcp_config_tempfiles << file
|
|
448
|
+
|
|
449
|
+
file.path
|
|
450
|
+
end
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
def build_claude_mcp_config(mcp_servers)
|
|
454
|
+
servers = {}
|
|
455
|
+
mcp_servers.each do |server|
|
|
456
|
+
h = if server.stdio?
|
|
457
|
+
entry = {command: server.command.first}
|
|
458
|
+
remaining_args = server.command[1..] + server.args
|
|
459
|
+
entry[:args] = remaining_args unless remaining_args.empty?
|
|
460
|
+
entry
|
|
461
|
+
else
|
|
462
|
+
{url: server.url}
|
|
463
|
+
end
|
|
464
|
+
h[:env] = server.env unless server.env.empty?
|
|
465
|
+
servers[server.name] = h
|
|
466
|
+
end
|
|
467
|
+
{mcpServers: servers}
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
def cleanup_mcp_tempfiles!
|
|
471
|
+
if @mcp_config_tempfiles
|
|
472
|
+
@mcp_config_tempfiles.each do |file|
|
|
473
|
+
file.close unless file.closed?
|
|
474
|
+
file.unlink
|
|
475
|
+
rescue
|
|
476
|
+
nil
|
|
477
|
+
end
|
|
478
|
+
@mcp_config_tempfiles = nil
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
if @mcp_docker_config_paths
|
|
482
|
+
@mcp_docker_config_paths.each do |path|
|
|
483
|
+
@executor.execute(["rm", "-f", path], timeout: 5)
|
|
484
|
+
rescue
|
|
485
|
+
nil
|
|
486
|
+
end
|
|
487
|
+
@mcp_docker_config_paths = nil
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
|
|
379
491
|
def log_debug(action, **context)
|
|
380
492
|
@logger&.debug("[AgentHarness::Anthropic] #{action}: #{context.inspect}")
|
|
381
493
|
end
|
|
@@ -32,6 +32,34 @@ module AgentHarness
|
|
|
32
32
|
class Base
|
|
33
33
|
include Adapter
|
|
34
34
|
|
|
35
|
+
# Common error patterns shared across providers that use standard
|
|
36
|
+
# HTTP-style error responses. Providers with unique patterns (e.g.
|
|
37
|
+
# Anthropic, GitHub Copilot) override error_patterns entirely.
|
|
38
|
+
COMMON_ERROR_PATTERNS = {
|
|
39
|
+
rate_limited: [
|
|
40
|
+
/rate.?limit/i,
|
|
41
|
+
/too.?many.?requests/i,
|
|
42
|
+
/429/
|
|
43
|
+
],
|
|
44
|
+
auth_expired: [
|
|
45
|
+
/invalid.*api.*key/i,
|
|
46
|
+
/unauthorized/i,
|
|
47
|
+
/authentication/i
|
|
48
|
+
],
|
|
49
|
+
quota_exceeded: [
|
|
50
|
+
/quota.*exceeded/i,
|
|
51
|
+
/insufficient.*quota/i,
|
|
52
|
+
/billing/i
|
|
53
|
+
],
|
|
54
|
+
transient: [
|
|
55
|
+
/timeout/i,
|
|
56
|
+
/connection.*error/i,
|
|
57
|
+
/service.*unavailable/i,
|
|
58
|
+
/503/,
|
|
59
|
+
/502/
|
|
60
|
+
]
|
|
61
|
+
}.tap { |patterns| patterns.each_value(&:freeze) }.freeze
|
|
62
|
+
|
|
35
63
|
attr_reader :config, :logger
|
|
36
64
|
attr_accessor :executor
|
|
37
65
|
|
|
@@ -63,6 +91,10 @@ module AgentHarness
|
|
|
63
91
|
def send_message(prompt:, **options)
|
|
64
92
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
65
93
|
|
|
94
|
+
# Normalize and validate MCP servers
|
|
95
|
+
options = normalize_mcp_servers(options)
|
|
96
|
+
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
97
|
+
|
|
66
98
|
# Build command
|
|
67
99
|
command = build_command(prompt, options)
|
|
68
100
|
|
|
@@ -83,6 +115,8 @@ module AgentHarness
|
|
|
83
115
|
log_debug("send_message_complete", duration: duration, tokens: response.tokens)
|
|
84
116
|
|
|
85
117
|
response
|
|
118
|
+
rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
|
|
119
|
+
raise
|
|
86
120
|
rescue => e
|
|
87
121
|
handle_error(e, prompt: prompt, options: options)
|
|
88
122
|
end
|
|
@@ -101,6 +135,16 @@ module AgentHarness
|
|
|
101
135
|
name.capitalize
|
|
102
136
|
end
|
|
103
137
|
|
|
138
|
+
# Whether the provider is running inside a sandboxed (Docker) environment
|
|
139
|
+
#
|
|
140
|
+
# Providers can use this to adjust execution flags, e.g. skipping
|
|
141
|
+
# nested sandboxing when already inside a container.
|
|
142
|
+
#
|
|
143
|
+
# @return [Boolean] true when the executor is a DockerCommandExecutor
|
|
144
|
+
def sandboxed_environment?
|
|
145
|
+
@executor.is_a?(DockerCommandExecutor)
|
|
146
|
+
end
|
|
147
|
+
|
|
104
148
|
protected
|
|
105
149
|
|
|
106
150
|
# Build CLI command - override in subclasses
|
|
@@ -122,17 +166,39 @@ module AgentHarness
|
|
|
122
166
|
|
|
123
167
|
# Parse CLI output into Response - override in subclasses
|
|
124
168
|
#
|
|
169
|
+
# Combines stdout and stderr for error classification so that
|
|
170
|
+
# provider-specific error messages are captured regardless of
|
|
171
|
+
# which stream they appear on.
|
|
172
|
+
#
|
|
125
173
|
# @param result [CommandExecutor::Result] execution result
|
|
126
174
|
# @param duration [Float] execution duration
|
|
127
175
|
# @return [Response] parsed response
|
|
128
176
|
def parse_response(result, duration:)
|
|
177
|
+
error = nil
|
|
178
|
+
# Use execution_semantics[:legitimate_exit_codes] so providers can
|
|
179
|
+
# declare additional non-error exit codes beyond zero.
|
|
180
|
+
legitimate = execution_semantics[:legitimate_exit_codes] || [0]
|
|
181
|
+
unless legitimate.include?(result.exit_code)
|
|
182
|
+
# Concatenate non-empty streams so error patterns can match
|
|
183
|
+
# regardless of which stream the provider writes to.
|
|
184
|
+
combined = [result.stderr, result.stdout]
|
|
185
|
+
.map { |s| s.to_s.strip }
|
|
186
|
+
.reject(&:empty?)
|
|
187
|
+
.join("\n")
|
|
188
|
+
|
|
189
|
+
error = combined unless combined.empty?
|
|
190
|
+
end
|
|
191
|
+
|
|
129
192
|
Response.new(
|
|
130
193
|
output: result.stdout,
|
|
131
194
|
exit_code: result.exit_code,
|
|
132
195
|
duration: duration,
|
|
133
196
|
provider: self.class.provider_name,
|
|
134
197
|
model: @config.model,
|
|
135
|
-
error:
|
|
198
|
+
error: error,
|
|
199
|
+
metadata: {
|
|
200
|
+
legitimate_exit_codes: legitimate
|
|
201
|
+
}
|
|
136
202
|
)
|
|
137
203
|
end
|
|
138
204
|
|
|
@@ -145,6 +211,38 @@ module AgentHarness
|
|
|
145
211
|
|
|
146
212
|
private
|
|
147
213
|
|
|
214
|
+
def normalize_mcp_servers(options)
|
|
215
|
+
servers = options[:mcp_servers]
|
|
216
|
+
return options if servers.nil?
|
|
217
|
+
|
|
218
|
+
unless servers.is_a?(Array)
|
|
219
|
+
raise McpConfigurationError,
|
|
220
|
+
"mcp_servers must be an Array of Hash or McpServer, got #{servers.class}"
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
return options if servers.empty?
|
|
224
|
+
|
|
225
|
+
normalized = servers.map do |server|
|
|
226
|
+
if server.is_a?(McpServer)
|
|
227
|
+
server
|
|
228
|
+
elsif server.is_a?(Hash)
|
|
229
|
+
McpServer.from_hash(server)
|
|
230
|
+
else
|
|
231
|
+
raise McpConfigurationError, "MCP server must be a Hash or McpServer, got #{server.class}"
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Ensure MCP server names are unique to avoid silent overwrites downstream
|
|
236
|
+
names = normalized.map(&:name)
|
|
237
|
+
duplicate_names = names.group_by { |n| n }.select { |_, v| v.size > 1 }.keys
|
|
238
|
+
unless duplicate_names.empty?
|
|
239
|
+
raise McpConfigurationError,
|
|
240
|
+
"Duplicate MCP server names detected: #{duplicate_names.join(", ")}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
options.merge(mcp_servers: normalized)
|
|
244
|
+
end
|
|
245
|
+
|
|
148
246
|
def execute_with_timeout(command, timeout:, env:)
|
|
149
247
|
@executor.execute(command, timeout: timeout, env: env)
|
|
150
248
|
end
|
|
@@ -67,7 +67,24 @@ module AgentHarness
|
|
|
67
67
|
tool_use: true,
|
|
68
68
|
json_mode: false,
|
|
69
69
|
mcp: false,
|
|
70
|
-
dangerous_mode:
|
|
70
|
+
dangerous_mode: true
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def dangerous_mode_flags
|
|
75
|
+
["--full-auto"]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def execution_semantics
|
|
79
|
+
{
|
|
80
|
+
prompt_delivery: :arg,
|
|
81
|
+
output_format: :text,
|
|
82
|
+
sandbox_aware: true,
|
|
83
|
+
uses_subcommand: true,
|
|
84
|
+
non_interactive_flag: nil,
|
|
85
|
+
legitimate_exit_codes: [0],
|
|
86
|
+
stderr_is_diagnostic: true,
|
|
87
|
+
parses_rate_limit_reset: false
|
|
71
88
|
}
|
|
72
89
|
end
|
|
73
90
|
|
|
@@ -81,32 +98,15 @@ module AgentHarness
|
|
|
81
98
|
end
|
|
82
99
|
|
|
83
100
|
def error_patterns
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
/
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
/invalid.*api.*key/i,
|
|
92
|
-
/unauthorized/i,
|
|
93
|
-
/authentication/i,
|
|
94
|
-
/401/,
|
|
95
|
-
/incorrect.*api.*key/i
|
|
96
|
-
],
|
|
97
|
-
quota_exceeded: [
|
|
98
|
-
/quota.*exceeded/i,
|
|
99
|
-
/insufficient.*quota/i,
|
|
100
|
-
/billing/i
|
|
101
|
-
],
|
|
102
|
-
transient: [
|
|
103
|
-
/timeout/i,
|
|
104
|
-
/connection.*reset/i,
|
|
105
|
-
/service.*unavailable/i,
|
|
106
|
-
/503/,
|
|
107
|
-
/502/
|
|
101
|
+
COMMON_ERROR_PATTERNS.merge(
|
|
102
|
+
auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
|
|
103
|
+
transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
|
|
104
|
+
sandbox_failure: [
|
|
105
|
+
/bwrap.*no permissions/i,
|
|
106
|
+
/no permissions to create a new namespace/i,
|
|
107
|
+
/unprivileged.*namespace/i
|
|
108
108
|
]
|
|
109
|
-
|
|
109
|
+
)
|
|
110
110
|
end
|
|
111
111
|
|
|
112
112
|
def auth_status
|
|
@@ -167,9 +167,34 @@ module AgentHarness
|
|
|
167
167
|
|
|
168
168
|
protected
|
|
169
169
|
|
|
170
|
+
def parse_response(result, duration:)
|
|
171
|
+
response = super
|
|
172
|
+
|
|
173
|
+
if response.success? && sandbox_failure_detected?(result.stderr)
|
|
174
|
+
return Response.new(
|
|
175
|
+
output: result.stdout,
|
|
176
|
+
exit_code: 1,
|
|
177
|
+
duration: duration,
|
|
178
|
+
provider: self.class.provider_name,
|
|
179
|
+
model: @config.model,
|
|
180
|
+
error: "Sandbox failure detected: #{result.stderr.strip}"
|
|
181
|
+
)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
response
|
|
185
|
+
end
|
|
186
|
+
|
|
170
187
|
def build_command(prompt, options)
|
|
171
188
|
cmd = [self.class.binary_name, "exec"]
|
|
172
189
|
|
|
190
|
+
# When running inside an already-sandboxed Docker container, Codex's
|
|
191
|
+
# own sandboxing conflicts with the outer sandbox. Use --full-auto to
|
|
192
|
+
# skip nested sandboxing while keeping full tool access.
|
|
193
|
+
# Also applies when dangerous_mode is explicitly requested.
|
|
194
|
+
if sandboxed_environment? || options[:dangerous_mode]
|
|
195
|
+
cmd += dangerous_mode_flags
|
|
196
|
+
end
|
|
197
|
+
|
|
173
198
|
flags = @config.default_flags
|
|
174
199
|
if flags
|
|
175
200
|
unless flags.is_a?(Array)
|
|
@@ -178,6 +203,10 @@ module AgentHarness
|
|
|
178
203
|
cmd += flags if flags.any?
|
|
179
204
|
end
|
|
180
205
|
|
|
206
|
+
if externally_sandboxed?(options)
|
|
207
|
+
cmd += sandbox_bypass_flags
|
|
208
|
+
end
|
|
209
|
+
|
|
181
210
|
if options[:session]
|
|
182
211
|
cmd += session_flags(options[:session])
|
|
183
212
|
end
|
|
@@ -193,6 +222,24 @@ module AgentHarness
|
|
|
193
222
|
|
|
194
223
|
private
|
|
195
224
|
|
|
225
|
+
def externally_sandboxed?(options)
|
|
226
|
+
if options.key?(:externally_sandboxed)
|
|
227
|
+
!!options[:externally_sandboxed]
|
|
228
|
+
else
|
|
229
|
+
!!@config.externally_sandboxed
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def sandbox_failure_detected?(stderr)
|
|
234
|
+
return false if stderr.nil? || stderr.empty?
|
|
235
|
+
|
|
236
|
+
error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def sandbox_bypass_flags
|
|
240
|
+
["--sandbox", "none"]
|
|
241
|
+
end
|
|
242
|
+
|
|
196
243
|
def read_codex_credentials
|
|
197
244
|
path = codex_config_path
|
|
198
245
|
return nil unless File.exist?(path)
|
|
@@ -109,6 +109,14 @@ module AgentHarness
|
|
|
109
109
|
true
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
+
# Cursor supports MCP for fetching existing server configurations (via
|
|
113
|
+
# fetch_mcp_servers) but does not support injecting request-time MCP
|
|
114
|
+
# servers into CLI invocations. Returning an empty list causes
|
|
115
|
+
# validate_mcp_servers! to raise McpUnsupportedError with a clear message.
|
|
116
|
+
def supported_mcp_transports
|
|
117
|
+
[]
|
|
118
|
+
end
|
|
119
|
+
|
|
112
120
|
def fetch_mcp_servers
|
|
113
121
|
# Try CLI first, then config file
|
|
114
122
|
fetch_mcp_servers_cli || fetch_mcp_servers_config
|
|
@@ -118,6 +126,19 @@ module AgentHarness
|
|
|
118
126
|
:oauth
|
|
119
127
|
end
|
|
120
128
|
|
|
129
|
+
def execution_semantics
|
|
130
|
+
{
|
|
131
|
+
prompt_delivery: :stdin,
|
|
132
|
+
output_format: :text,
|
|
133
|
+
sandbox_aware: false,
|
|
134
|
+
uses_subcommand: false,
|
|
135
|
+
non_interactive_flag: "-p",
|
|
136
|
+
legitimate_exit_codes: [0],
|
|
137
|
+
stderr_is_diagnostic: true,
|
|
138
|
+
parses_rate_limit_reset: false
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
|
|
121
142
|
def error_patterns
|
|
122
143
|
{
|
|
123
144
|
rate_limited: [
|
|
@@ -142,6 +163,10 @@ module AgentHarness
|
|
|
142
163
|
def send_message(prompt:, **options)
|
|
143
164
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
144
165
|
|
|
166
|
+
# Normalize and validate MCP servers (same as Base#send_message)
|
|
167
|
+
options = normalize_mcp_servers(options)
|
|
168
|
+
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
169
|
+
|
|
145
170
|
# Build command (without prompt in args - we send via stdin)
|
|
146
171
|
command = [self.class.binary_name, "-p"]
|
|
147
172
|
|
|
@@ -162,6 +187,8 @@ module AgentHarness
|
|
|
162
187
|
log_debug("send_message_complete", duration: duration)
|
|
163
188
|
|
|
164
189
|
response
|
|
190
|
+
rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
|
|
191
|
+
raise
|
|
165
192
|
rescue => e
|
|
166
193
|
handle_error(e, prompt: prompt, options: options)
|
|
167
194
|
end
|
|
@@ -99,6 +99,19 @@ module AgentHarness
|
|
|
99
99
|
:oauth
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
+
def execution_semantics
|
|
103
|
+
{
|
|
104
|
+
prompt_delivery: :flag,
|
|
105
|
+
output_format: :text,
|
|
106
|
+
sandbox_aware: false,
|
|
107
|
+
uses_subcommand: false,
|
|
108
|
+
non_interactive_flag: nil,
|
|
109
|
+
legitimate_exit_codes: [0],
|
|
110
|
+
stderr_is_diagnostic: true,
|
|
111
|
+
parses_rate_limit_reset: false
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
102
115
|
def error_patterns
|
|
103
116
|
{
|
|
104
117
|
rate_limited: [
|
|
@@ -89,10 +89,6 @@ module AgentHarness
|
|
|
89
89
|
}
|
|
90
90
|
end
|
|
91
91
|
|
|
92
|
-
def supports_dangerous_mode?
|
|
93
|
-
true
|
|
94
|
-
end
|
|
95
|
-
|
|
96
92
|
def dangerous_mode_flags
|
|
97
93
|
["--allow-all-tools"]
|
|
98
94
|
end
|
|
@@ -110,6 +106,19 @@ module AgentHarness
|
|
|
110
106
|
:oauth
|
|
111
107
|
end
|
|
112
108
|
|
|
109
|
+
def execution_semantics
|
|
110
|
+
{
|
|
111
|
+
prompt_delivery: :flag,
|
|
112
|
+
output_format: :text,
|
|
113
|
+
sandbox_aware: false,
|
|
114
|
+
uses_subcommand: false,
|
|
115
|
+
non_interactive_flag: nil,
|
|
116
|
+
legitimate_exit_codes: [0],
|
|
117
|
+
stderr_is_diagnostic: true,
|
|
118
|
+
parses_rate_limit_reset: false
|
|
119
|
+
}
|
|
120
|
+
end
|
|
121
|
+
|
|
113
122
|
def error_patterns
|
|
114
123
|
{
|
|
115
124
|
auth_expired: [
|
|
@@ -57,6 +57,23 @@ module AgentHarness
|
|
|
57
57
|
}
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
def error_patterns
|
|
61
|
+
COMMON_ERROR_PATTERNS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def execution_semantics
|
|
65
|
+
{
|
|
66
|
+
prompt_delivery: :arg,
|
|
67
|
+
output_format: :text,
|
|
68
|
+
sandbox_aware: false,
|
|
69
|
+
uses_subcommand: true,
|
|
70
|
+
non_interactive_flag: nil,
|
|
71
|
+
legitimate_exit_codes: [0],
|
|
72
|
+
stderr_is_diagnostic: true,
|
|
73
|
+
parses_rate_limit_reset: false
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
60
77
|
protected
|
|
61
78
|
|
|
62
79
|
def build_command(prompt, options)
|
|
@@ -59,6 +59,23 @@ module AgentHarness
|
|
|
59
59
|
}
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def error_patterns
|
|
63
|
+
COMMON_ERROR_PATTERNS
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def execution_semantics
|
|
67
|
+
{
|
|
68
|
+
prompt_delivery: :arg,
|
|
69
|
+
output_format: :text,
|
|
70
|
+
sandbox_aware: false,
|
|
71
|
+
uses_subcommand: true,
|
|
72
|
+
non_interactive_flag: nil,
|
|
73
|
+
legitimate_exit_codes: [0],
|
|
74
|
+
stderr_is_diagnostic: true,
|
|
75
|
+
parses_rate_limit_reset: false
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
62
79
|
protected
|
|
63
80
|
|
|
64
81
|
def build_command(prompt, options)
|
|
@@ -59,6 +59,23 @@ module AgentHarness
|
|
|
59
59
|
}
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def error_patterns
|
|
63
|
+
COMMON_ERROR_PATTERNS
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def execution_semantics
|
|
67
|
+
{
|
|
68
|
+
prompt_delivery: :arg,
|
|
69
|
+
output_format: :text,
|
|
70
|
+
sandbox_aware: false,
|
|
71
|
+
uses_subcommand: true,
|
|
72
|
+
non_interactive_flag: nil,
|
|
73
|
+
legitimate_exit_codes: [0],
|
|
74
|
+
stderr_is_diagnostic: true,
|
|
75
|
+
parses_rate_limit_reset: false
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
62
79
|
protected
|
|
63
80
|
|
|
64
81
|
def build_command(prompt, options)
|
|
@@ -40,9 +40,13 @@ module AgentHarness
|
|
|
40
40
|
|
|
41
41
|
# Check if the response indicates success
|
|
42
42
|
#
|
|
43
|
-
#
|
|
43
|
+
# A response is successful when its exit code is among the provider's
|
|
44
|
+
# legitimate exit codes (defaults to [0]) and no error was detected.
|
|
45
|
+
#
|
|
46
|
+
# @return [Boolean] true if exit_code is legitimate and no error
|
|
44
47
|
def success?
|
|
45
|
-
@
|
|
48
|
+
legitimate = @metadata[:legitimate_exit_codes] || [0]
|
|
49
|
+
legitimate.include?(@exit_code) && @error.nil?
|
|
46
50
|
end
|
|
47
51
|
|
|
48
52
|
# Check if the response indicates failure
|
data/lib/agent_harness.rb
CHANGED
|
@@ -137,6 +137,7 @@ end
|
|
|
137
137
|
|
|
138
138
|
# Core components
|
|
139
139
|
require_relative "agent_harness/errors"
|
|
140
|
+
require_relative "agent_harness/mcp_server"
|
|
140
141
|
require_relative "agent_harness/configuration"
|
|
141
142
|
require_relative "agent_harness/command_executor"
|
|
142
143
|
require_relative "agent_harness/docker_command_executor"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: agent-harness
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Bart Agapinan
|
|
@@ -84,6 +84,7 @@ files:
|
|
|
84
84
|
- lib/agent_harness/docker_command_executor.rb
|
|
85
85
|
- lib/agent_harness/error_taxonomy.rb
|
|
86
86
|
- lib/agent_harness/errors.rb
|
|
87
|
+
- lib/agent_harness/mcp_server.rb
|
|
87
88
|
- lib/agent_harness/orchestration/circuit_breaker.rb
|
|
88
89
|
- lib/agent_harness/orchestration/conductor.rb
|
|
89
90
|
- lib/agent_harness/orchestration/health_monitor.rb
|