agent-harness 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/CHANGELOG.md +8 -0
- data/lib/agent_harness/configuration.rb +3 -1
- data/lib/agent_harness/error_taxonomy.rb +5 -0
- data/lib/agent_harness/providers/adapter.rb +32 -0
- data/lib/agent_harness/providers/aider.rb +20 -0
- data/lib/agent_harness/providers/anthropic.rb +13 -4
- data/lib/agent_harness/providers/base.rb +61 -1
- data/lib/agent_harness/providers/codex.rb +73 -26
- data/lib/agent_harness/providers/cursor.rb +13 -0
- data/lib/agent_harness/providers/gemini.rb +13 -0
- data/lib/agent_harness/providers/github_copilot.rb +13 -4
- data/lib/agent_harness/providers/kilocode.rb +17 -0
- data/lib/agent_harness/providers/mistral_vibe.rb +17 -0
- data/lib/agent_harness/providers/opencode.rb +17 -0
- data/lib/agent_harness/response.rb +6 -2
- data/lib/agent_harness/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dc338c5fc81d4175149d405d494936b68a261a637deda4fc0e4fb7b18944bf67
|
|
4
|
+
data.tar.gz: aed5c92bc22dadab8826b919e8eabf606bcd7f6bfe0e1d02631c83461056a888
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1d662f4ae796d88a1a2c2eabce4604a38c4b53b545640b51c16a4b8e370ddf59f40ff57b19dfb46ba96e50b85399b846c9d2379bdc9806bd40aa78b1f18c1f66
|
|
7
|
+
data.tar.gz: 913df22acc91cd6db4ff2788867dc8337a2f1e94c0a2b2cce483e0af4ec73d2a946fcbb80270968d82c49c55aa375da86c386f4c8472ad2d42664d2bc1242ee6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
|
|
9
|
+
* 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
|
|
10
|
+
|
|
3
11
|
## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
|
|
4
12
|
|
|
5
13
|
|
|
@@ -233,7 +233,8 @@ module AgentHarness
|
|
|
233
233
|
|
|
234
234
|
# Provider-specific configuration
|
|
235
235
|
class ProviderConfig
|
|
236
|
-
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
|
|
236
|
+
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
|
|
237
|
+
:externally_sandboxed
|
|
237
238
|
|
|
238
239
|
attr_reader :name
|
|
239
240
|
|
|
@@ -246,6 +247,7 @@ module AgentHarness
|
|
|
246
247
|
@default_flags = []
|
|
247
248
|
@timeout = nil
|
|
248
249
|
@model = nil
|
|
250
|
+
@externally_sandboxed = false
|
|
249
251
|
end
|
|
250
252
|
|
|
251
253
|
# Merge options into this configuration
|
|
@@ -39,6 +39,11 @@ module AgentHarness
|
|
|
39
39
|
action: :retry_with_backoff,
|
|
40
40
|
retryable: true
|
|
41
41
|
},
|
|
42
|
+
sandbox_failure: {
|
|
43
|
+
description: "Sandbox setup failed",
|
|
44
|
+
action: :escalate,
|
|
45
|
+
retryable: false
|
|
46
|
+
},
|
|
42
47
|
unknown: {
|
|
43
48
|
description: "Unknown error",
|
|
44
49
|
action: :retry_with_backoff,
|
|
@@ -218,6 +218,38 @@ module AgentHarness
|
|
|
218
218
|
def health_status
|
|
219
219
|
{healthy: true, message: "OK"}
|
|
220
220
|
end
|
|
221
|
+
|
|
222
|
+
# Execution semantics for this provider
|
|
223
|
+
#
|
|
224
|
+
# Returns a hash describing provider-specific execution behavior so
|
|
225
|
+
# downstream apps do not need to hardcode CLI quirks. This metadata
|
|
226
|
+
# can be used to select the right flags and interpret output.
|
|
227
|
+
#
|
|
228
|
+
# @return [Hash] execution semantics
|
|
229
|
+
def execution_semantics
|
|
230
|
+
{
|
|
231
|
+
prompt_delivery: :arg, # :arg, :stdin, or :flag
|
|
232
|
+
output_format: :text, # :text or :json
|
|
233
|
+
sandbox_aware: false, # adjusts behavior inside containers
|
|
234
|
+
uses_subcommand: false, # e.g. "codex exec", "opencode run"
|
|
235
|
+
non_interactive_flag: nil, # flag to suppress interactive prompts
|
|
236
|
+
legitimate_exit_codes: [0], # exit codes that are NOT errors
|
|
237
|
+
stderr_is_diagnostic: true, # stderr may contain non-error output
|
|
238
|
+
parses_rate_limit_reset: false # can extract Retry-After from output
|
|
239
|
+
}
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Parse a rate-limit reset time from provider output
|
|
243
|
+
#
|
|
244
|
+
# Providers that include rate-limit reset information in their error
|
|
245
|
+
# output can override this to extract it, so the orchestration layer
|
|
246
|
+
# can schedule retries accurately.
|
|
247
|
+
#
|
|
248
|
+
# @param output [String] combined stdout+stderr from the CLI
|
|
249
|
+
# @return [Time, nil] when the rate limit resets, or nil if unknown
|
|
250
|
+
def parse_rate_limit_reset(output)
|
|
251
|
+
nil
|
|
252
|
+
end
|
|
221
253
|
end
|
|
222
254
|
end
|
|
223
255
|
end
|
|
@@ -71,6 +71,26 @@ module AgentHarness
|
|
|
71
71
|
}
|
|
72
72
|
end
|
|
73
73
|
|
|
74
|
+
def error_patterns
|
|
75
|
+
COMMON_ERROR_PATTERNS.merge(
|
|
76
|
+
auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
|
|
77
|
+
transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def execution_semantics
|
|
82
|
+
{
|
|
83
|
+
prompt_delivery: :flag,
|
|
84
|
+
output_format: :text,
|
|
85
|
+
sandbox_aware: false,
|
|
86
|
+
uses_subcommand: false,
|
|
87
|
+
non_interactive_flag: "--yes",
|
|
88
|
+
legitimate_exit_codes: [0],
|
|
89
|
+
stderr_is_diagnostic: true,
|
|
90
|
+
parses_rate_limit_reset: false
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
74
94
|
def supports_sessions?
|
|
75
95
|
true
|
|
76
96
|
end
|
|
@@ -193,10 +193,6 @@ module AgentHarness
|
|
|
193
193
|
["--mcp-config", config_path]
|
|
194
194
|
end
|
|
195
195
|
|
|
196
|
-
def supports_dangerous_mode?
|
|
197
|
-
true
|
|
198
|
-
end
|
|
199
|
-
|
|
200
196
|
def dangerous_mode_flags
|
|
201
197
|
["--dangerously-skip-permissions"]
|
|
202
198
|
end
|
|
@@ -205,6 +201,19 @@ module AgentHarness
|
|
|
205
201
|
:oauth
|
|
206
202
|
end
|
|
207
203
|
|
|
204
|
+
def execution_semantics
|
|
205
|
+
{
|
|
206
|
+
prompt_delivery: :arg,
|
|
207
|
+
output_format: :json,
|
|
208
|
+
sandbox_aware: true,
|
|
209
|
+
uses_subcommand: false,
|
|
210
|
+
non_interactive_flag: "--print",
|
|
211
|
+
legitimate_exit_codes: [0],
|
|
212
|
+
stderr_is_diagnostic: true,
|
|
213
|
+
parses_rate_limit_reset: false
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
|
|
208
217
|
def error_patterns
|
|
209
218
|
{
|
|
210
219
|
rate_limited: [
|
|
@@ -32,6 +32,34 @@ module AgentHarness
|
|
|
32
32
|
class Base
|
|
33
33
|
include Adapter
|
|
34
34
|
|
|
35
|
+
# Common error patterns shared across providers that use standard
|
|
36
|
+
# HTTP-style error responses. Providers with unique patterns (e.g.
|
|
37
|
+
# Anthropic, GitHub Copilot) override error_patterns entirely.
|
|
38
|
+
COMMON_ERROR_PATTERNS = {
|
|
39
|
+
rate_limited: [
|
|
40
|
+
/rate.?limit/i,
|
|
41
|
+
/too.?many.?requests/i,
|
|
42
|
+
/429/
|
|
43
|
+
],
|
|
44
|
+
auth_expired: [
|
|
45
|
+
/invalid.*api.*key/i,
|
|
46
|
+
/unauthorized/i,
|
|
47
|
+
/authentication/i
|
|
48
|
+
],
|
|
49
|
+
quota_exceeded: [
|
|
50
|
+
/quota.*exceeded/i,
|
|
51
|
+
/insufficient.*quota/i,
|
|
52
|
+
/billing/i
|
|
53
|
+
],
|
|
54
|
+
transient: [
|
|
55
|
+
/timeout/i,
|
|
56
|
+
/connection.*error/i,
|
|
57
|
+
/service.*unavailable/i,
|
|
58
|
+
/503/,
|
|
59
|
+
/502/
|
|
60
|
+
]
|
|
61
|
+
}.tap { |patterns| patterns.each_value(&:freeze) }.freeze
|
|
62
|
+
|
|
35
63
|
attr_reader :config, :logger
|
|
36
64
|
attr_accessor :executor
|
|
37
65
|
|
|
@@ -107,6 +135,16 @@ module AgentHarness
|
|
|
107
135
|
name.capitalize
|
|
108
136
|
end
|
|
109
137
|
|
|
138
|
+
# Whether the provider is running inside a sandboxed (Docker) environment
|
|
139
|
+
#
|
|
140
|
+
# Providers can use this to adjust execution flags, e.g. skipping
|
|
141
|
+
# nested sandboxing when already inside a container.
|
|
142
|
+
#
|
|
143
|
+
# @return [Boolean] true when the executor is a DockerCommandExecutor
|
|
144
|
+
def sandboxed_environment?
|
|
145
|
+
@executor.is_a?(DockerCommandExecutor)
|
|
146
|
+
end
|
|
147
|
+
|
|
110
148
|
protected
|
|
111
149
|
|
|
112
150
|
# Build CLI command - override in subclasses
|
|
@@ -128,17 +166,39 @@ module AgentHarness
|
|
|
128
166
|
|
|
129
167
|
# Parse CLI output into Response - override in subclasses
|
|
130
168
|
#
|
|
169
|
+
# Combines stdout and stderr for error classification so that
|
|
170
|
+
# provider-specific error messages are captured regardless of
|
|
171
|
+
# which stream they appear on.
|
|
172
|
+
#
|
|
131
173
|
# @param result [CommandExecutor::Result] execution result
|
|
132
174
|
# @param duration [Float] execution duration
|
|
133
175
|
# @return [Response] parsed response
|
|
134
176
|
def parse_response(result, duration:)
|
|
177
|
+
error = nil
|
|
178
|
+
# Use execution_semantics[:legitimate_exit_codes] so providers can
|
|
179
|
+
# declare additional non-error exit codes beyond zero.
|
|
180
|
+
legitimate = execution_semantics[:legitimate_exit_codes] || [0]
|
|
181
|
+
unless legitimate.include?(result.exit_code)
|
|
182
|
+
# Concatenate non-empty streams so error patterns can match
|
|
183
|
+
# regardless of which stream the provider writes to.
|
|
184
|
+
combined = [result.stderr, result.stdout]
|
|
185
|
+
.map { |s| s.to_s.strip }
|
|
186
|
+
.reject(&:empty?)
|
|
187
|
+
.join("\n")
|
|
188
|
+
|
|
189
|
+
error = combined unless combined.empty?
|
|
190
|
+
end
|
|
191
|
+
|
|
135
192
|
Response.new(
|
|
136
193
|
output: result.stdout,
|
|
137
194
|
exit_code: result.exit_code,
|
|
138
195
|
duration: duration,
|
|
139
196
|
provider: self.class.provider_name,
|
|
140
197
|
model: @config.model,
|
|
141
|
-
error:
|
|
198
|
+
error: error,
|
|
199
|
+
metadata: {
|
|
200
|
+
legitimate_exit_codes: legitimate
|
|
201
|
+
}
|
|
142
202
|
)
|
|
143
203
|
end
|
|
144
204
|
|
|
@@ -67,7 +67,24 @@ module AgentHarness
|
|
|
67
67
|
tool_use: true,
|
|
68
68
|
json_mode: false,
|
|
69
69
|
mcp: false,
|
|
70
|
-
dangerous_mode:
|
|
70
|
+
dangerous_mode: true
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def dangerous_mode_flags
|
|
75
|
+
["--full-auto"]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def execution_semantics
|
|
79
|
+
{
|
|
80
|
+
prompt_delivery: :arg,
|
|
81
|
+
output_format: :text,
|
|
82
|
+
sandbox_aware: true,
|
|
83
|
+
uses_subcommand: true,
|
|
84
|
+
non_interactive_flag: nil,
|
|
85
|
+
legitimate_exit_codes: [0],
|
|
86
|
+
stderr_is_diagnostic: true,
|
|
87
|
+
parses_rate_limit_reset: false
|
|
71
88
|
}
|
|
72
89
|
end
|
|
73
90
|
|
|
@@ -81,32 +98,15 @@ module AgentHarness
|
|
|
81
98
|
end
|
|
82
99
|
|
|
83
100
|
def error_patterns
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
/
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
/invalid.*api.*key/i,
|
|
92
|
-
/unauthorized/i,
|
|
93
|
-
/authentication/i,
|
|
94
|
-
/401/,
|
|
95
|
-
/incorrect.*api.*key/i
|
|
96
|
-
],
|
|
97
|
-
quota_exceeded: [
|
|
98
|
-
/quota.*exceeded/i,
|
|
99
|
-
/insufficient.*quota/i,
|
|
100
|
-
/billing/i
|
|
101
|
-
],
|
|
102
|
-
transient: [
|
|
103
|
-
/timeout/i,
|
|
104
|
-
/connection.*reset/i,
|
|
105
|
-
/service.*unavailable/i,
|
|
106
|
-
/503/,
|
|
107
|
-
/502/
|
|
101
|
+
COMMON_ERROR_PATTERNS.merge(
|
|
102
|
+
auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
|
|
103
|
+
transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
|
|
104
|
+
sandbox_failure: [
|
|
105
|
+
/bwrap.*no permissions/i,
|
|
106
|
+
/no permissions to create a new namespace/i,
|
|
107
|
+
/unprivileged.*namespace/i
|
|
108
108
|
]
|
|
109
|
-
|
|
109
|
+
)
|
|
110
110
|
end
|
|
111
111
|
|
|
112
112
|
def auth_status
|
|
@@ -167,9 +167,34 @@ module AgentHarness
|
|
|
167
167
|
|
|
168
168
|
protected
|
|
169
169
|
|
|
170
|
+
def parse_response(result, duration:)
|
|
171
|
+
response = super
|
|
172
|
+
|
|
173
|
+
if response.success? && sandbox_failure_detected?(result.stderr)
|
|
174
|
+
return Response.new(
|
|
175
|
+
output: result.stdout,
|
|
176
|
+
exit_code: 1,
|
|
177
|
+
duration: duration,
|
|
178
|
+
provider: self.class.provider_name,
|
|
179
|
+
model: @config.model,
|
|
180
|
+
error: "Sandbox failure detected: #{result.stderr.strip}"
|
|
181
|
+
)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
response
|
|
185
|
+
end
|
|
186
|
+
|
|
170
187
|
def build_command(prompt, options)
|
|
171
188
|
cmd = [self.class.binary_name, "exec"]
|
|
172
189
|
|
|
190
|
+
# When running inside an already-sandboxed Docker container, Codex's
|
|
191
|
+
# own sandboxing conflicts with the outer sandbox. Use --full-auto to
|
|
192
|
+
# skip nested sandboxing while keeping full tool access.
|
|
193
|
+
# Also applies when dangerous_mode is explicitly requested.
|
|
194
|
+
if sandboxed_environment? || options[:dangerous_mode]
|
|
195
|
+
cmd += dangerous_mode_flags
|
|
196
|
+
end
|
|
197
|
+
|
|
173
198
|
flags = @config.default_flags
|
|
174
199
|
if flags
|
|
175
200
|
unless flags.is_a?(Array)
|
|
@@ -178,6 +203,10 @@ module AgentHarness
|
|
|
178
203
|
cmd += flags if flags.any?
|
|
179
204
|
end
|
|
180
205
|
|
|
206
|
+
if externally_sandboxed?(options)
|
|
207
|
+
cmd += sandbox_bypass_flags
|
|
208
|
+
end
|
|
209
|
+
|
|
181
210
|
if options[:session]
|
|
182
211
|
cmd += session_flags(options[:session])
|
|
183
212
|
end
|
|
@@ -193,6 +222,24 @@ module AgentHarness
|
|
|
193
222
|
|
|
194
223
|
private
|
|
195
224
|
|
|
225
|
+
def externally_sandboxed?(options)
|
|
226
|
+
if options.key?(:externally_sandboxed)
|
|
227
|
+
!!options[:externally_sandboxed]
|
|
228
|
+
else
|
|
229
|
+
!!@config.externally_sandboxed
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def sandbox_failure_detected?(stderr)
|
|
234
|
+
return false if stderr.nil? || stderr.empty?
|
|
235
|
+
|
|
236
|
+
error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def sandbox_bypass_flags
|
|
240
|
+
["--sandbox", "none"]
|
|
241
|
+
end
|
|
242
|
+
|
|
196
243
|
def read_codex_credentials
|
|
197
244
|
path = codex_config_path
|
|
198
245
|
return nil unless File.exist?(path)
|
|
@@ -126,6 +126,19 @@ module AgentHarness
|
|
|
126
126
|
:oauth
|
|
127
127
|
end
|
|
128
128
|
|
|
129
|
+
def execution_semantics
|
|
130
|
+
{
|
|
131
|
+
prompt_delivery: :stdin,
|
|
132
|
+
output_format: :text,
|
|
133
|
+
sandbox_aware: false,
|
|
134
|
+
uses_subcommand: false,
|
|
135
|
+
non_interactive_flag: "-p",
|
|
136
|
+
legitimate_exit_codes: [0],
|
|
137
|
+
stderr_is_diagnostic: true,
|
|
138
|
+
parses_rate_limit_reset: false
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
|
|
129
142
|
def error_patterns
|
|
130
143
|
{
|
|
131
144
|
rate_limited: [
|
|
@@ -99,6 +99,19 @@ module AgentHarness
|
|
|
99
99
|
:oauth
|
|
100
100
|
end
|
|
101
101
|
|
|
102
|
+
def execution_semantics
|
|
103
|
+
{
|
|
104
|
+
prompt_delivery: :flag,
|
|
105
|
+
output_format: :text,
|
|
106
|
+
sandbox_aware: false,
|
|
107
|
+
uses_subcommand: false,
|
|
108
|
+
non_interactive_flag: nil,
|
|
109
|
+
legitimate_exit_codes: [0],
|
|
110
|
+
stderr_is_diagnostic: true,
|
|
111
|
+
parses_rate_limit_reset: false
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
102
115
|
def error_patterns
|
|
103
116
|
{
|
|
104
117
|
rate_limited: [
|
|
@@ -89,10 +89,6 @@ module AgentHarness
|
|
|
89
89
|
}
|
|
90
90
|
end
|
|
91
91
|
|
|
92
|
-
def supports_dangerous_mode?
|
|
93
|
-
true
|
|
94
|
-
end
|
|
95
|
-
|
|
96
92
|
def dangerous_mode_flags
|
|
97
93
|
["--allow-all-tools"]
|
|
98
94
|
end
|
|
@@ -110,6 +106,19 @@ module AgentHarness
|
|
|
110
106
|
:oauth
|
|
111
107
|
end
|
|
112
108
|
|
|
109
|
+
def execution_semantics
|
|
110
|
+
{
|
|
111
|
+
prompt_delivery: :flag,
|
|
112
|
+
output_format: :text,
|
|
113
|
+
sandbox_aware: false,
|
|
114
|
+
uses_subcommand: false,
|
|
115
|
+
non_interactive_flag: nil,
|
|
116
|
+
legitimate_exit_codes: [0],
|
|
117
|
+
stderr_is_diagnostic: true,
|
|
118
|
+
parses_rate_limit_reset: false
|
|
119
|
+
}
|
|
120
|
+
end
|
|
121
|
+
|
|
113
122
|
def error_patterns
|
|
114
123
|
{
|
|
115
124
|
auth_expired: [
|
|
@@ -57,6 +57,23 @@ module AgentHarness
|
|
|
57
57
|
}
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
def error_patterns
|
|
61
|
+
COMMON_ERROR_PATTERNS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def execution_semantics
|
|
65
|
+
{
|
|
66
|
+
prompt_delivery: :arg,
|
|
67
|
+
output_format: :text,
|
|
68
|
+
sandbox_aware: false,
|
|
69
|
+
uses_subcommand: true,
|
|
70
|
+
non_interactive_flag: nil,
|
|
71
|
+
legitimate_exit_codes: [0],
|
|
72
|
+
stderr_is_diagnostic: true,
|
|
73
|
+
parses_rate_limit_reset: false
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
60
77
|
protected
|
|
61
78
|
|
|
62
79
|
def build_command(prompt, options)
|
|
@@ -59,6 +59,23 @@ module AgentHarness
|
|
|
59
59
|
}
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def error_patterns
|
|
63
|
+
COMMON_ERROR_PATTERNS
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def execution_semantics
|
|
67
|
+
{
|
|
68
|
+
prompt_delivery: :arg,
|
|
69
|
+
output_format: :text,
|
|
70
|
+
sandbox_aware: false,
|
|
71
|
+
uses_subcommand: true,
|
|
72
|
+
non_interactive_flag: nil,
|
|
73
|
+
legitimate_exit_codes: [0],
|
|
74
|
+
stderr_is_diagnostic: true,
|
|
75
|
+
parses_rate_limit_reset: false
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
62
79
|
protected
|
|
63
80
|
|
|
64
81
|
def build_command(prompt, options)
|
|
@@ -59,6 +59,23 @@ module AgentHarness
|
|
|
59
59
|
}
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def error_patterns
|
|
63
|
+
COMMON_ERROR_PATTERNS
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def execution_semantics
|
|
67
|
+
{
|
|
68
|
+
prompt_delivery: :arg,
|
|
69
|
+
output_format: :text,
|
|
70
|
+
sandbox_aware: false,
|
|
71
|
+
uses_subcommand: true,
|
|
72
|
+
non_interactive_flag: nil,
|
|
73
|
+
legitimate_exit_codes: [0],
|
|
74
|
+
stderr_is_diagnostic: true,
|
|
75
|
+
parses_rate_limit_reset: false
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
62
79
|
protected
|
|
63
80
|
|
|
64
81
|
def build_command(prompt, options)
|
|
@@ -40,9 +40,13 @@ module AgentHarness
|
|
|
40
40
|
|
|
41
41
|
# Check if the response indicates success
|
|
42
42
|
#
|
|
43
|
-
#
|
|
43
|
+
# A response is successful when its exit code is among the provider's
|
|
44
|
+
# legitimate exit codes (defaults to [0]) and no error was detected.
|
|
45
|
+
#
|
|
46
|
+
# @return [Boolean] true if exit_code is legitimate and no error
|
|
44
47
|
def success?
|
|
45
|
-
@
|
|
48
|
+
legitimate = @metadata[:legitimate_exit_codes] || [0]
|
|
49
|
+
legitimate.include?(@exit_code) && @error.nil?
|
|
46
50
|
end
|
|
47
51
|
|
|
48
52
|
# Check if the response indicates failure
|