agent-harness 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/CHANGELOG.md +16 -0
- data/lib/agent_harness/configuration.rb +3 -1
- data/lib/agent_harness/error_taxonomy.rb +5 -0
- data/lib/agent_harness/provider_runtime.rb +115 -0
- data/lib/agent_harness/providers/adapter.rb +53 -0
- data/lib/agent_harness/providers/aider.rb +37 -0
- data/lib/agent_harness/providers/anthropic.rb +30 -4
- data/lib/agent_harness/providers/base.rb +99 -3
- data/lib/agent_harness/providers/codex.rb +96 -26
- data/lib/agent_harness/providers/cursor.rb +44 -2
- data/lib/agent_harness/providers/gemini.rb +32 -0
- data/lib/agent_harness/providers/github_copilot.rb +21 -4
- data/lib/agent_harness/providers/kilocode.rb +17 -0
- data/lib/agent_harness/providers/mistral_vibe.rb +17 -0
- data/lib/agent_harness/providers/opencode.rb +40 -0
- data/lib/agent_harness/response.rb +6 -2
- data/lib/agent_harness/version.rb +1 -1
- data/lib/agent_harness.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 946d7c425aff8c96536bc30def7e0abdba7ff7d82020f4941674a8c93be63526
|
|
4
|
+
data.tar.gz: ffc9d707f89ab60bf9cc59b4e5ccbcd2570a3aaa07e97c5a10bfb731f5dc07a0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c7b0dcef83c7a31be09a87884211a8ba03c0c93fb845e666423cd17eb70a358dd122db7e57ce04271fa5e114013adbc0007394211286c23b03cfa6a2a600c68f
|
|
7
|
+
data.tar.gz: a8f14eb24039afd0a93ec1eb064b59ebbe6d03aafd61ab1859c64729d2253140afebacc5c8ee761578337c671c074425784c33676808534cdf7b8ad809a2df32
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.5.6](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.5...agent-harness/v0.5.6) (2026-03-30)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* 53: Expose provider configuration capabilities for app-driven provider setup UIs ([#57](https://github.com/viamin/agent-harness/issues/57)) ([6aa6a02](https://github.com/viamin/agent-harness/commit/6aa6a02da14feefcad8761302d5fa8b5642a57fe))
|
|
9
|
+
* 54: Add per-request provider runtime overrides for CLI-backed providers ([#55](https://github.com/viamin/agent-harness/issues/55)) ([407467a](https://github.com/viamin/agent-harness/commit/407467a6965a01494e2c4590680b2bb9ddac6dce))
|
|
10
|
+
|
|
11
|
+
## [0.5.5](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.4...agent-harness/v0.5.5) (2026-03-29)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Bug Fixes
|
|
15
|
+
|
|
16
|
+
* 47: Audit provider-specific execution semantics so downstream apps do not hardcode CLI quirks ([#50](https://github.com/viamin/agent-harness/issues/50)) ([2d9a972](https://github.com/viamin/agent-harness/commit/2d9a972a78273901535ae44998c32292899b82ec))
|
|
17
|
+
* 48: Handle Codex sandbox mode for externally sandboxed container execution ([#49](https://github.com/viamin/agent-harness/issues/49)) ([5b6ba3f](https://github.com/viamin/agent-harness/commit/5b6ba3f9f517bb027670ead384feddd2c0f99edb))
|
|
18
|
+
|
|
3
19
|
## [0.5.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.3...agent-harness/v0.5.4) (2026-03-27)
|
|
4
20
|
|
|
5
21
|
|
|
@@ -233,7 +233,8 @@ module AgentHarness
|
|
|
233
233
|
|
|
234
234
|
# Provider-specific configuration
|
|
235
235
|
class ProviderConfig
|
|
236
|
-
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
|
|
236
|
+
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model,
|
|
237
|
+
:externally_sandboxed
|
|
237
238
|
|
|
238
239
|
attr_reader :name
|
|
239
240
|
|
|
@@ -246,6 +247,7 @@ module AgentHarness
|
|
|
246
247
|
@default_flags = []
|
|
247
248
|
@timeout = nil
|
|
248
249
|
@model = nil
|
|
250
|
+
@externally_sandboxed = false
|
|
249
251
|
end
|
|
250
252
|
|
|
251
253
|
# Merge options into this configuration
|
|
@@ -39,6 +39,11 @@ module AgentHarness
|
|
|
39
39
|
action: :retry_with_backoff,
|
|
40
40
|
retryable: true
|
|
41
41
|
},
|
|
42
|
+
sandbox_failure: {
|
|
43
|
+
description: "Sandbox setup failed",
|
|
44
|
+
action: :escalate,
|
|
45
|
+
retryable: false
|
|
46
|
+
},
|
|
42
47
|
unknown: {
|
|
43
48
|
description: "Unknown error",
|
|
44
49
|
action: :retry_with_backoff,
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentHarness
|
|
4
|
+
# Normalized runtime configuration for per-request provider overrides.
|
|
5
|
+
#
|
|
6
|
+
# ProviderRuntime lets callers pass a single, provider-agnostic payload
|
|
7
|
+
# into +send_message+ that each provider materializes into CLI args, env
|
|
8
|
+
# vars, or config files as needed.
|
|
9
|
+
#
|
|
10
|
+
# @example Routing OpenCode through OpenRouter with a specific model
|
|
11
|
+
# runtime = AgentHarness::ProviderRuntime.new(
|
|
12
|
+
# model: "anthropic/claude-opus-4.1",
|
|
13
|
+
# base_url: "https://openrouter.ai/api/v1",
|
|
14
|
+
# api_provider: "openrouter",
|
|
15
|
+
# env: { "OPENROUTER_API_KEY" => "sk-..." }
|
|
16
|
+
# )
|
|
17
|
+
# provider.send_message(prompt: "Hello", provider_runtime: runtime)
|
|
18
|
+
#
|
|
19
|
+
# @example Passing a Hash (auto-coerced by Base#send_message)
|
|
20
|
+
# provider.send_message(
|
|
21
|
+
# prompt: "Hello",
|
|
22
|
+
# provider_runtime: {
|
|
23
|
+
# model: "openai/gpt-5.3-codex",
|
|
24
|
+
# base_url: "https://openrouter.ai/api/v1"
|
|
25
|
+
# }
|
|
26
|
+
# )
|
|
27
|
+
class ProviderRuntime
|
|
28
|
+
attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata
|
|
29
|
+
|
|
30
|
+
# @param model [String, nil] model identifier override
|
|
31
|
+
# @param base_url [String, nil] upstream API base URL override
|
|
32
|
+
# @param api_provider [String, nil] API-compatible backend name
|
|
33
|
+
# @param env [Hash<String,String>] extra environment variables for the subprocess
|
|
34
|
+
# @param flags [Array<String>] extra CLI flags to append
|
|
35
|
+
# @param metadata [Hash] arbitrary provider-specific data
|
|
36
|
+
def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], metadata: {})
|
|
37
|
+
@model = model
|
|
38
|
+
@base_url = base_url
|
|
39
|
+
@api_provider = api_provider
|
|
40
|
+
|
|
41
|
+
env_hash = env || {}
|
|
42
|
+
unless env_hash.is_a?(Hash)
|
|
43
|
+
raise ArgumentError, "env must be a Hash (got #{env_hash.class})"
|
|
44
|
+
end
|
|
45
|
+
normalized_env = env_hash.each_with_object({}) do |(key, value), acc|
|
|
46
|
+
string_key = key.to_s
|
|
47
|
+
unless value.is_a?(String)
|
|
48
|
+
raise ArgumentError, "env value for #{string_key.inspect} must be a String (got #{value.class})"
|
|
49
|
+
end
|
|
50
|
+
acc[string_key] = value
|
|
51
|
+
end
|
|
52
|
+
@env = normalized_env.freeze
|
|
53
|
+
|
|
54
|
+
normalized_flags = flags || []
|
|
55
|
+
unless normalized_flags.is_a?(Array)
|
|
56
|
+
raise ArgumentError, "flags must be an Array (got #{normalized_flags.class})"
|
|
57
|
+
end
|
|
58
|
+
normalized_flags = normalized_flags.dup
|
|
59
|
+
normalized_flags.each_with_index do |flag, index|
|
|
60
|
+
unless flag.is_a?(String)
|
|
61
|
+
raise ArgumentError,
|
|
62
|
+
"flags must be an Array of Strings; invalid element at index #{index}: #{flag.inspect} (#{flag.class})"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
@flags = normalized_flags.freeze
|
|
66
|
+
|
|
67
|
+
metadata_hash = metadata || {}
|
|
68
|
+
unless metadata_hash.is_a?(Hash)
|
|
69
|
+
raise ArgumentError, "metadata must be a Hash (got #{metadata_hash.class})"
|
|
70
|
+
end
|
|
71
|
+
@metadata = metadata_hash.dup.freeze
|
|
72
|
+
|
|
73
|
+
freeze
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Build a ProviderRuntime from a Hash.
|
|
77
|
+
#
|
|
78
|
+
# @param hash [Hash] runtime attributes
|
|
79
|
+
# @return [ProviderRuntime]
|
|
80
|
+
def self.from_hash(hash)
|
|
81
|
+
raise ArgumentError, "expected a Hash, got #{hash.class}" unless hash.is_a?(Hash)
|
|
82
|
+
|
|
83
|
+
new(
|
|
84
|
+
model: hash[:model] || hash["model"],
|
|
85
|
+
base_url: hash[:base_url] || hash["base_url"],
|
|
86
|
+
api_provider: hash[:api_provider] || hash["api_provider"],
|
|
87
|
+
env: hash[:env] || hash["env"] || {},
|
|
88
|
+
flags: hash[:flags] || hash["flags"] || [],
|
|
89
|
+
metadata: hash[:metadata] || hash["metadata"] || {}
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Coerce a value into a ProviderRuntime.
|
|
94
|
+
#
|
|
95
|
+
# @param value [ProviderRuntime, Hash, nil] input
|
|
96
|
+
# @return [ProviderRuntime, nil]
|
|
97
|
+
def self.wrap(value)
|
|
98
|
+
case value
|
|
99
|
+
when ProviderRuntime then value
|
|
100
|
+
when Hash then from_hash(value)
|
|
101
|
+
when nil then nil
|
|
102
|
+
else
|
|
103
|
+
raise ArgumentError, "Cannot coerce #{value.class} into ProviderRuntime"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Whether any meaningful overrides are present.
|
|
108
|
+
#
|
|
109
|
+
# @return [Boolean]
|
|
110
|
+
def empty?
|
|
111
|
+
model.nil? && base_url.nil? && api_provider.nil? &&
|
|
112
|
+
env.empty? && flags.empty? && metadata.empty?
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -75,11 +75,32 @@ module AgentHarness
|
|
|
75
75
|
# @option options [Integer] :timeout timeout in seconds
|
|
76
76
|
# @option options [String] :session session identifier
|
|
77
77
|
# @option options [Boolean] :dangerous_mode skip permission checks
|
|
78
|
+
# @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
|
|
79
|
+
# runtime overrides (model, base_url, api_provider, env, flags, metadata).
|
|
80
|
+
# For providers that delegate to Providers::Base#send_message, a plain Hash
|
|
81
|
+
# is automatically coerced into a ProviderRuntime. Providers that override
|
|
82
|
+
# #send_message directly are responsible for handling this option.
|
|
78
83
|
# @return [Response] response object with output and metadata
|
|
79
84
|
def send_message(prompt:, **options)
|
|
80
85
|
raise NotImplementedError, "#{self.class} must implement #send_message"
|
|
81
86
|
end
|
|
82
87
|
|
|
88
|
+
# Provider configuration schema for app-driven setup UIs
|
|
89
|
+
#
|
|
90
|
+
# Returns metadata describing the configurable fields, supported
|
|
91
|
+
# authentication modes, and backend compatibility for this provider.
|
|
92
|
+
# Applications use this to build generic provider-entry forms without
|
|
93
|
+
# hardcoding provider-specific knowledge.
|
|
94
|
+
#
|
|
95
|
+
# @return [Hash] with :fields, :auth_modes, :openai_compatible keys
|
|
96
|
+
def configuration_schema
|
|
97
|
+
{
|
|
98
|
+
fields: [],
|
|
99
|
+
auth_modes: [auth_type],
|
|
100
|
+
openai_compatible: false
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
|
|
83
104
|
# Provider capabilities
|
|
84
105
|
#
|
|
85
106
|
# @return [Hash] capability flags
|
|
@@ -218,6 +239,38 @@ module AgentHarness
|
|
|
218
239
|
def health_status
|
|
219
240
|
{healthy: true, message: "OK"}
|
|
220
241
|
end
|
|
242
|
+
|
|
243
|
+
# Execution semantics for this provider
|
|
244
|
+
#
|
|
245
|
+
# Returns a hash describing provider-specific execution behavior so
|
|
246
|
+
# downstream apps do not need to hardcode CLI quirks. This metadata
|
|
247
|
+
# can be used to select the right flags and interpret output.
|
|
248
|
+
#
|
|
249
|
+
# @return [Hash] execution semantics
|
|
250
|
+
def execution_semantics
|
|
251
|
+
{
|
|
252
|
+
prompt_delivery: :arg, # :arg, :stdin, or :flag
|
|
253
|
+
output_format: :text, # :text or :json
|
|
254
|
+
sandbox_aware: false, # adjusts behavior inside containers
|
|
255
|
+
uses_subcommand: false, # e.g. "codex exec", "opencode run"
|
|
256
|
+
non_interactive_flag: nil, # flag to suppress interactive prompts
|
|
257
|
+
legitimate_exit_codes: [0], # exit codes that are NOT errors
|
|
258
|
+
stderr_is_diagnostic: true, # stderr may contain non-error output
|
|
259
|
+
parses_rate_limit_reset: false # can extract Retry-After from output
|
|
260
|
+
}
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Parse a rate-limit reset time from provider output
|
|
264
|
+
#
|
|
265
|
+
# Providers that include rate-limit reset information in their error
|
|
266
|
+
# output can override this to extract it, so the orchestration layer
|
|
267
|
+
# can schedule retries accurately.
|
|
268
|
+
#
|
|
269
|
+
# @param output [String] combined stdout+stderr from the CLI
|
|
270
|
+
# @return [Time, nil] when the rate limit resets, or nil if unknown
|
|
271
|
+
def parse_rate_limit_reset(output)
|
|
272
|
+
nil
|
|
273
|
+
end
|
|
221
274
|
end
|
|
222
275
|
end
|
|
223
276
|
end
|
|
@@ -59,6 +59,23 @@ module AgentHarness
|
|
|
59
59
|
"Aider"
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def configuration_schema
|
|
63
|
+
{
|
|
64
|
+
fields: [
|
|
65
|
+
{
|
|
66
|
+
name: :model,
|
|
67
|
+
type: :string,
|
|
68
|
+
label: "Model",
|
|
69
|
+
required: false,
|
|
70
|
+
hint: "Model identifier (supports OpenAI, Anthropic, and other model names)",
|
|
71
|
+
accepts_arbitrary: true
|
|
72
|
+
}
|
|
73
|
+
],
|
|
74
|
+
auth_modes: [:api_key],
|
|
75
|
+
openai_compatible: false
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
62
79
|
def capabilities
|
|
63
80
|
{
|
|
64
81
|
streaming: true,
|
|
@@ -71,6 +88,26 @@ module AgentHarness
|
|
|
71
88
|
}
|
|
72
89
|
end
|
|
73
90
|
|
|
91
|
+
def error_patterns
|
|
92
|
+
COMMON_ERROR_PATTERNS.merge(
|
|
93
|
+
auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/incorrect.*api.*key/i],
|
|
94
|
+
transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i]
|
|
95
|
+
)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def execution_semantics
|
|
99
|
+
{
|
|
100
|
+
prompt_delivery: :flag,
|
|
101
|
+
output_format: :text,
|
|
102
|
+
sandbox_aware: false,
|
|
103
|
+
uses_subcommand: false,
|
|
104
|
+
non_interactive_flag: "--yes",
|
|
105
|
+
legitimate_exit_codes: [0],
|
|
106
|
+
stderr_is_diagnostic: true,
|
|
107
|
+
parses_rate_limit_reset: false
|
|
108
|
+
}
|
|
109
|
+
end
|
|
110
|
+
|
|
74
111
|
def supports_sessions?
|
|
75
112
|
true
|
|
76
113
|
end
|
|
@@ -160,6 +160,23 @@ module AgentHarness
|
|
|
160
160
|
"Anthropic Claude CLI"
|
|
161
161
|
end
|
|
162
162
|
|
|
163
|
+
def configuration_schema
|
|
164
|
+
{
|
|
165
|
+
fields: [
|
|
166
|
+
{
|
|
167
|
+
name: :model,
|
|
168
|
+
type: :string,
|
|
169
|
+
label: "Model",
|
|
170
|
+
required: false,
|
|
171
|
+
hint: "Claude model to use (e.g. claude-3-5-sonnet-20241022)",
|
|
172
|
+
accepts_arbitrary: false
|
|
173
|
+
}
|
|
174
|
+
],
|
|
175
|
+
auth_modes: [:oauth],
|
|
176
|
+
openai_compatible: false
|
|
177
|
+
}
|
|
178
|
+
end
|
|
179
|
+
|
|
163
180
|
def capabilities
|
|
164
181
|
{
|
|
165
182
|
streaming: true,
|
|
@@ -193,10 +210,6 @@ module AgentHarness
|
|
|
193
210
|
["--mcp-config", config_path]
|
|
194
211
|
end
|
|
195
212
|
|
|
196
|
-
def supports_dangerous_mode?
|
|
197
|
-
true
|
|
198
|
-
end
|
|
199
|
-
|
|
200
213
|
def dangerous_mode_flags
|
|
201
214
|
["--dangerously-skip-permissions"]
|
|
202
215
|
end
|
|
@@ -205,6 +218,19 @@ module AgentHarness
|
|
|
205
218
|
:oauth
|
|
206
219
|
end
|
|
207
220
|
|
|
221
|
+
def execution_semantics
|
|
222
|
+
{
|
|
223
|
+
prompt_delivery: :arg,
|
|
224
|
+
output_format: :json,
|
|
225
|
+
sandbox_aware: true,
|
|
226
|
+
uses_subcommand: false,
|
|
227
|
+
non_interactive_flag: "--print",
|
|
228
|
+
legitimate_exit_codes: [0],
|
|
229
|
+
stderr_is_diagnostic: true,
|
|
230
|
+
parses_rate_limit_reset: false
|
|
231
|
+
}
|
|
232
|
+
end
|
|
233
|
+
|
|
208
234
|
def error_patterns
|
|
209
235
|
{
|
|
210
236
|
rate_limited: [
|
|
@@ -32,6 +32,34 @@ module AgentHarness
|
|
|
32
32
|
class Base
|
|
33
33
|
include Adapter
|
|
34
34
|
|
|
35
|
+
# Common error patterns shared across providers that use standard
|
|
36
|
+
# HTTP-style error responses. Providers with unique patterns (e.g.
|
|
37
|
+
# Anthropic, GitHub Copilot) override error_patterns entirely.
|
|
38
|
+
COMMON_ERROR_PATTERNS = {
|
|
39
|
+
rate_limited: [
|
|
40
|
+
/rate.?limit/i,
|
|
41
|
+
/too.?many.?requests/i,
|
|
42
|
+
/429/
|
|
43
|
+
],
|
|
44
|
+
auth_expired: [
|
|
45
|
+
/invalid.*api.*key/i,
|
|
46
|
+
/unauthorized/i,
|
|
47
|
+
/authentication/i
|
|
48
|
+
],
|
|
49
|
+
quota_exceeded: [
|
|
50
|
+
/quota.*exceeded/i,
|
|
51
|
+
/insufficient.*quota/i,
|
|
52
|
+
/billing/i
|
|
53
|
+
],
|
|
54
|
+
transient: [
|
|
55
|
+
/timeout/i,
|
|
56
|
+
/connection.*error/i,
|
|
57
|
+
/service.*unavailable/i,
|
|
58
|
+
/503/,
|
|
59
|
+
/502/
|
|
60
|
+
]
|
|
61
|
+
}.tap { |patterns| patterns.each_value(&:freeze) }.freeze
|
|
62
|
+
|
|
35
63
|
attr_reader :config, :logger
|
|
36
64
|
attr_accessor :executor
|
|
37
65
|
|
|
@@ -59,10 +87,16 @@ module AgentHarness
|
|
|
59
87
|
#
|
|
60
88
|
# @param prompt [String] the prompt to send
|
|
61
89
|
# @param options [Hash] additional options
|
|
90
|
+
# @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
|
|
91
|
+
# runtime overrides (model, base_url, api_provider, env, flags, metadata).
|
|
92
|
+
# A plain Hash is automatically coerced into a ProviderRuntime.
|
|
62
93
|
# @return [Response] the response
|
|
63
94
|
def send_message(prompt:, **options)
|
|
64
95
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
65
96
|
|
|
97
|
+
# Coerce provider_runtime from Hash if needed
|
|
98
|
+
options = normalize_provider_runtime(options)
|
|
99
|
+
|
|
66
100
|
# Normalize and validate MCP servers
|
|
67
101
|
options = normalize_mcp_servers(options)
|
|
68
102
|
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
@@ -80,6 +114,23 @@ module AgentHarness
|
|
|
80
114
|
|
|
81
115
|
# Parse response
|
|
82
116
|
response = parse_response(result, duration: duration)
|
|
117
|
+
runtime = options[:provider_runtime]
|
|
118
|
+
# Runtime model is a per-request override and always takes precedence
|
|
119
|
+
# over both the config-level model and whatever parse_response returned.
|
|
120
|
+
# This is intentional: callers use runtime overrides to route a single
|
|
121
|
+
# provider instance through different backends on each request.
|
|
122
|
+
if runtime&.model
|
|
123
|
+
response = Response.new(
|
|
124
|
+
output: response.output,
|
|
125
|
+
exit_code: response.exit_code,
|
|
126
|
+
duration: response.duration,
|
|
127
|
+
provider: response.provider,
|
|
128
|
+
model: runtime.model,
|
|
129
|
+
tokens: response.tokens,
|
|
130
|
+
metadata: response.metadata,
|
|
131
|
+
error: response.error
|
|
132
|
+
)
|
|
133
|
+
end
|
|
83
134
|
|
|
84
135
|
# Track tokens
|
|
85
136
|
track_tokens(response) if response.tokens
|
|
@@ -107,6 +158,16 @@ module AgentHarness
|
|
|
107
158
|
name.capitalize
|
|
108
159
|
end
|
|
109
160
|
|
|
161
|
+
# Whether the provider is running inside a sandboxed (Docker) environment
|
|
162
|
+
#
|
|
163
|
+
# Providers can use this to adjust execution flags, e.g. skipping
|
|
164
|
+
# nested sandboxing when already inside a container.
|
|
165
|
+
#
|
|
166
|
+
# @return [Boolean] true when the executor is a DockerCommandExecutor
|
|
167
|
+
def sandboxed_environment?
|
|
168
|
+
@executor.is_a?(DockerCommandExecutor)
|
|
169
|
+
end
|
|
170
|
+
|
|
110
171
|
protected
|
|
111
172
|
|
|
112
173
|
# Build CLI command - override in subclasses
|
|
@@ -120,25 +181,53 @@ module AgentHarness
|
|
|
120
181
|
|
|
121
182
|
# Build environment variables - override in subclasses
|
|
122
183
|
#
|
|
184
|
+
# Provider subclasses should call +super+ and merge their own env vars
|
|
185
|
+
# so that ProviderRuntime env overrides are always included.
|
|
186
|
+
#
|
|
123
187
|
# @param options [Hash] options
|
|
124
188
|
# @return [Hash] environment variables
|
|
125
189
|
def build_env(options)
|
|
126
|
-
|
|
190
|
+
runtime = options[:provider_runtime]
|
|
191
|
+
return {} unless runtime
|
|
192
|
+
|
|
193
|
+
runtime.env.dup
|
|
127
194
|
end
|
|
128
195
|
|
|
129
196
|
# Parse CLI output into Response - override in subclasses
|
|
130
197
|
#
|
|
198
|
+
# Combines stdout and stderr for error classification so that
|
|
199
|
+
# provider-specific error messages are captured regardless of
|
|
200
|
+
# which stream they appear on.
|
|
201
|
+
#
|
|
131
202
|
# @param result [CommandExecutor::Result] execution result
|
|
132
203
|
# @param duration [Float] execution duration
|
|
133
204
|
# @return [Response] parsed response
|
|
134
205
|
def parse_response(result, duration:)
|
|
206
|
+
error = nil
|
|
207
|
+
# Use execution_semantics[:legitimate_exit_codes] so providers can
|
|
208
|
+
# declare additional non-error exit codes beyond zero.
|
|
209
|
+
legitimate = execution_semantics[:legitimate_exit_codes] || [0]
|
|
210
|
+
unless legitimate.include?(result.exit_code)
|
|
211
|
+
# Concatenate non-empty streams so error patterns can match
|
|
212
|
+
# regardless of which stream the provider writes to.
|
|
213
|
+
combined = [result.stderr, result.stdout]
|
|
214
|
+
.map { |s| s.to_s.strip }
|
|
215
|
+
.reject(&:empty?)
|
|
216
|
+
.join("\n")
|
|
217
|
+
|
|
218
|
+
error = combined unless combined.empty?
|
|
219
|
+
end
|
|
220
|
+
|
|
135
221
|
Response.new(
|
|
136
222
|
output: result.stdout,
|
|
137
223
|
exit_code: result.exit_code,
|
|
138
224
|
duration: duration,
|
|
139
225
|
provider: self.class.provider_name,
|
|
140
226
|
model: @config.model,
|
|
141
|
-
error:
|
|
227
|
+
error: error,
|
|
228
|
+
metadata: {
|
|
229
|
+
legitimate_exit_codes: legitimate
|
|
230
|
+
}
|
|
142
231
|
)
|
|
143
232
|
end
|
|
144
233
|
|
|
@@ -151,6 +240,13 @@ module AgentHarness
|
|
|
151
240
|
|
|
152
241
|
private
|
|
153
242
|
|
|
243
|
+
def normalize_provider_runtime(options)
|
|
244
|
+
raw = options[:provider_runtime]
|
|
245
|
+
return options if raw.nil? || raw.is_a?(ProviderRuntime)
|
|
246
|
+
|
|
247
|
+
options.merge(provider_runtime: ProviderRuntime.wrap(raw))
|
|
248
|
+
end
|
|
249
|
+
|
|
154
250
|
def normalize_mcp_servers(options)
|
|
155
251
|
servers = options[:mcp_servers]
|
|
156
252
|
return options if servers.nil?
|
|
@@ -192,7 +288,7 @@ module AgentHarness
|
|
|
192
288
|
|
|
193
289
|
AgentHarness.token_tracker.record(
|
|
194
290
|
provider: self.class.provider_name,
|
|
195
|
-
model: @config.model,
|
|
291
|
+
model: response.model || @config.model,
|
|
196
292
|
input_tokens: response.tokens[:input] || 0,
|
|
197
293
|
output_tokens: response.tokens[:output] || 0,
|
|
198
294
|
total_tokens: response.tokens[:total]
|
|
@@ -59,6 +59,14 @@ module AgentHarness
|
|
|
59
59
|
"OpenAI Codex CLI"
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def configuration_schema
|
|
63
|
+
{
|
|
64
|
+
fields: [],
|
|
65
|
+
auth_modes: [:api_key],
|
|
66
|
+
openai_compatible: true
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
62
70
|
def capabilities
|
|
63
71
|
{
|
|
64
72
|
streaming: false,
|
|
@@ -67,7 +75,24 @@ module AgentHarness
|
|
|
67
75
|
tool_use: true,
|
|
68
76
|
json_mode: false,
|
|
69
77
|
mcp: false,
|
|
70
|
-
dangerous_mode:
|
|
78
|
+
dangerous_mode: true
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def dangerous_mode_flags
|
|
83
|
+
["--full-auto"]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def execution_semantics
|
|
87
|
+
{
|
|
88
|
+
prompt_delivery: :arg,
|
|
89
|
+
output_format: :text,
|
|
90
|
+
sandbox_aware: true,
|
|
91
|
+
uses_subcommand: true,
|
|
92
|
+
non_interactive_flag: nil,
|
|
93
|
+
legitimate_exit_codes: [0],
|
|
94
|
+
stderr_is_diagnostic: true,
|
|
95
|
+
parses_rate_limit_reset: false
|
|
71
96
|
}
|
|
72
97
|
end
|
|
73
98
|
|
|
@@ -81,32 +106,15 @@ module AgentHarness
|
|
|
81
106
|
end
|
|
82
107
|
|
|
83
108
|
def error_patterns
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
/
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
/invalid.*api.*key/i,
|
|
92
|
-
/unauthorized/i,
|
|
93
|
-
/authentication/i,
|
|
94
|
-
/401/,
|
|
95
|
-
/incorrect.*api.*key/i
|
|
96
|
-
],
|
|
97
|
-
quota_exceeded: [
|
|
98
|
-
/quota.*exceeded/i,
|
|
99
|
-
/insufficient.*quota/i,
|
|
100
|
-
/billing/i
|
|
101
|
-
],
|
|
102
|
-
transient: [
|
|
103
|
-
/timeout/i,
|
|
104
|
-
/connection.*reset/i,
|
|
105
|
-
/service.*unavailable/i,
|
|
106
|
-
/503/,
|
|
107
|
-
/502/
|
|
109
|
+
COMMON_ERROR_PATTERNS.merge(
|
|
110
|
+
auth_expired: COMMON_ERROR_PATTERNS[:auth_expired] + [/401/, /incorrect.*api.*key/i],
|
|
111
|
+
transient: COMMON_ERROR_PATTERNS[:transient] + [/connection.*reset/i],
|
|
112
|
+
sandbox_failure: [
|
|
113
|
+
/bwrap.*no permissions/i,
|
|
114
|
+
/no permissions to create a new namespace/i,
|
|
115
|
+
/unprivileged.*namespace/i
|
|
108
116
|
]
|
|
109
|
-
|
|
117
|
+
)
|
|
110
118
|
end
|
|
111
119
|
|
|
112
120
|
def auth_status
|
|
@@ -167,9 +175,34 @@ module AgentHarness
|
|
|
167
175
|
|
|
168
176
|
protected
|
|
169
177
|
|
|
178
|
+
def parse_response(result, duration:)
|
|
179
|
+
response = super
|
|
180
|
+
|
|
181
|
+
if response.success? && sandbox_failure_detected?(result.stderr)
|
|
182
|
+
return Response.new(
|
|
183
|
+
output: result.stdout,
|
|
184
|
+
exit_code: 1,
|
|
185
|
+
duration: duration,
|
|
186
|
+
provider: self.class.provider_name,
|
|
187
|
+
model: @config.model,
|
|
188
|
+
error: "Sandbox failure detected: #{result.stderr.strip}"
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
response
|
|
193
|
+
end
|
|
194
|
+
|
|
170
195
|
def build_command(prompt, options)
|
|
171
196
|
cmd = [self.class.binary_name, "exec"]
|
|
172
197
|
|
|
198
|
+
# When running inside an already-sandboxed Docker container, Codex's
|
|
199
|
+
# own sandboxing conflicts with the outer sandbox. Use --full-auto to
|
|
200
|
+
# skip nested sandboxing while keeping full tool access.
|
|
201
|
+
# Also applies when dangerous_mode is explicitly requested.
|
|
202
|
+
if sandboxed_environment? || options[:dangerous_mode]
|
|
203
|
+
cmd += dangerous_mode_flags
|
|
204
|
+
end
|
|
205
|
+
|
|
173
206
|
flags = @config.default_flags
|
|
174
207
|
if flags
|
|
175
208
|
unless flags.is_a?(Array)
|
|
@@ -178,21 +211,58 @@ module AgentHarness
|
|
|
178
211
|
cmd += flags if flags.any?
|
|
179
212
|
end
|
|
180
213
|
|
|
214
|
+
if externally_sandboxed?(options)
|
|
215
|
+
cmd += sandbox_bypass_flags
|
|
216
|
+
end
|
|
217
|
+
|
|
181
218
|
if options[:session]
|
|
182
219
|
cmd += session_flags(options[:session])
|
|
183
220
|
end
|
|
184
221
|
|
|
222
|
+
runtime = options[:provider_runtime]
|
|
223
|
+
if runtime
|
|
224
|
+
cmd += ["--model", runtime.model] if runtime.model
|
|
225
|
+
cmd += runtime.flags unless runtime.flags.empty?
|
|
226
|
+
end
|
|
227
|
+
|
|
185
228
|
cmd << prompt
|
|
186
229
|
|
|
187
230
|
cmd
|
|
188
231
|
end
|
|
189
232
|
|
|
233
|
+
def build_env(options)
|
|
234
|
+
env = super
|
|
235
|
+
runtime = options[:provider_runtime]
|
|
236
|
+
return env unless runtime
|
|
237
|
+
|
|
238
|
+
env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
|
|
239
|
+
env
|
|
240
|
+
end
|
|
241
|
+
|
|
190
242
|
def default_timeout
|
|
191
243
|
300
|
|
192
244
|
end
|
|
193
245
|
|
|
194
246
|
private
|
|
195
247
|
|
|
248
|
+
def externally_sandboxed?(options)
|
|
249
|
+
if options.key?(:externally_sandboxed)
|
|
250
|
+
!!options[:externally_sandboxed]
|
|
251
|
+
else
|
|
252
|
+
!!@config.externally_sandboxed
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def sandbox_failure_detected?(stderr)
|
|
257
|
+
return false if stderr.nil? || stderr.empty?
|
|
258
|
+
|
|
259
|
+
error_patterns[:sandbox_failure].any? { |pattern| stderr.match?(pattern) }
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def sandbox_bypass_flags
|
|
263
|
+
["--sandbox", "none"]
|
|
264
|
+
end
|
|
265
|
+
|
|
196
266
|
def read_codex_credentials
|
|
197
267
|
path = codex_config_path
|
|
198
268
|
return nil unless File.exist?(path)
|
|
@@ -93,6 +93,14 @@ module AgentHarness
|
|
|
93
93
|
"Cursor AI"
|
|
94
94
|
end
|
|
95
95
|
|
|
96
|
+
def configuration_schema
|
|
97
|
+
{
|
|
98
|
+
fields: [],
|
|
99
|
+
auth_modes: [:oauth],
|
|
100
|
+
openai_compatible: false
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
|
|
96
104
|
def capabilities
|
|
97
105
|
{
|
|
98
106
|
streaming: false,
|
|
@@ -126,6 +134,19 @@ module AgentHarness
|
|
|
126
134
|
:oauth
|
|
127
135
|
end
|
|
128
136
|
|
|
137
|
+
def execution_semantics
|
|
138
|
+
{
|
|
139
|
+
prompt_delivery: :stdin,
|
|
140
|
+
output_format: :text,
|
|
141
|
+
sandbox_aware: false,
|
|
142
|
+
uses_subcommand: false,
|
|
143
|
+
non_interactive_flag: "-p",
|
|
144
|
+
legitimate_exit_codes: [0],
|
|
145
|
+
stderr_is_diagnostic: true,
|
|
146
|
+
parses_rate_limit_reset: false
|
|
147
|
+
}
|
|
148
|
+
end
|
|
149
|
+
|
|
129
150
|
def error_patterns
|
|
130
151
|
{
|
|
131
152
|
rate_limited: [
|
|
@@ -150,23 +171,44 @@ module AgentHarness
|
|
|
150
171
|
def send_message(prompt:, **options)
|
|
151
172
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
152
173
|
|
|
174
|
+
# Coerce provider_runtime from Hash if needed (same as Base#send_message)
|
|
175
|
+
options = normalize_provider_runtime(options)
|
|
176
|
+
runtime = options[:provider_runtime]
|
|
177
|
+
|
|
153
178
|
# Normalize and validate MCP servers (same as Base#send_message)
|
|
154
179
|
options = normalize_mcp_servers(options)
|
|
155
180
|
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
156
181
|
|
|
157
182
|
# Build command (without prompt in args - we send via stdin)
|
|
158
183
|
command = [self.class.binary_name, "-p"]
|
|
184
|
+
command.concat(runtime.flags) if runtime&.flags&.any?
|
|
159
185
|
|
|
160
186
|
# Calculate timeout
|
|
161
187
|
timeout = options[:timeout] || @config.timeout || default_timeout
|
|
162
188
|
|
|
163
189
|
# Execute command with prompt on stdin
|
|
190
|
+
env = build_env(options)
|
|
164
191
|
start_time = Time.now
|
|
165
|
-
result = @executor.execute(command, timeout: timeout, stdin_data: prompt)
|
|
192
|
+
result = @executor.execute(command, timeout: timeout, stdin_data: prompt, env: env)
|
|
166
193
|
duration = Time.now - start_time
|
|
167
194
|
|
|
168
195
|
# Parse response
|
|
169
196
|
response = parse_response(result, duration: duration)
|
|
197
|
+
# Runtime model is a per-request override and always takes precedence
|
|
198
|
+
# over both the config-level model and whatever parse_response returned.
|
|
199
|
+
# See Base#send_message for rationale.
|
|
200
|
+
if runtime&.model
|
|
201
|
+
response = Response.new(
|
|
202
|
+
output: response.output,
|
|
203
|
+
exit_code: response.exit_code,
|
|
204
|
+
duration: response.duration,
|
|
205
|
+
provider: response.provider,
|
|
206
|
+
model: runtime.model,
|
|
207
|
+
tokens: response.tokens,
|
|
208
|
+
metadata: response.metadata,
|
|
209
|
+
error: response.error
|
|
210
|
+
)
|
|
211
|
+
end
|
|
170
212
|
|
|
171
213
|
# Track tokens
|
|
172
214
|
track_tokens(response) if response.tokens
|
|
@@ -188,7 +230,7 @@ module AgentHarness
|
|
|
188
230
|
end
|
|
189
231
|
|
|
190
232
|
def build_env(options)
|
|
191
|
-
|
|
233
|
+
super
|
|
192
234
|
end
|
|
193
235
|
|
|
194
236
|
def default_timeout
|
|
@@ -83,6 +83,25 @@ module AgentHarness
|
|
|
83
83
|
"Google Gemini"
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
def configuration_schema
|
|
87
|
+
{
|
|
88
|
+
fields: [
|
|
89
|
+
{
|
|
90
|
+
name: :model,
|
|
91
|
+
type: :string,
|
|
92
|
+
label: "Model",
|
|
93
|
+
required: false,
|
|
94
|
+
hint: "Gemini model to use (e.g. gemini-2.5-pro, gemini-2.0-flash)",
|
|
95
|
+
# accepts_arbitrary is true because supports_model_family? accepts
|
|
96
|
+
# any string starting with "gemini-", not just discovered models.
|
|
97
|
+
accepts_arbitrary: true
|
|
98
|
+
}
|
|
99
|
+
],
|
|
100
|
+
auth_modes: [:api_key, :oauth],
|
|
101
|
+
openai_compatible: false
|
|
102
|
+
}
|
|
103
|
+
end
|
|
104
|
+
|
|
86
105
|
def capabilities
|
|
87
106
|
{
|
|
88
107
|
streaming: true,
|
|
@@ -99,6 +118,19 @@ module AgentHarness
|
|
|
99
118
|
:oauth
|
|
100
119
|
end
|
|
101
120
|
|
|
121
|
+
def execution_semantics
|
|
122
|
+
{
|
|
123
|
+
prompt_delivery: :flag,
|
|
124
|
+
output_format: :text,
|
|
125
|
+
sandbox_aware: false,
|
|
126
|
+
uses_subcommand: false,
|
|
127
|
+
non_interactive_flag: nil,
|
|
128
|
+
legitimate_exit_codes: [0],
|
|
129
|
+
stderr_is_diagnostic: true,
|
|
130
|
+
parses_rate_limit_reset: false
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
|
|
102
134
|
def error_patterns
|
|
103
135
|
{
|
|
104
136
|
rate_limited: [
|
|
@@ -77,6 +77,14 @@ module AgentHarness
|
|
|
77
77
|
"GitHub Copilot CLI"
|
|
78
78
|
end
|
|
79
79
|
|
|
80
|
+
def configuration_schema
|
|
81
|
+
{
|
|
82
|
+
fields: [],
|
|
83
|
+
auth_modes: [:oauth],
|
|
84
|
+
openai_compatible: false
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
|
|
80
88
|
def capabilities
|
|
81
89
|
{
|
|
82
90
|
streaming: false,
|
|
@@ -89,10 +97,6 @@ module AgentHarness
|
|
|
89
97
|
}
|
|
90
98
|
end
|
|
91
99
|
|
|
92
|
-
def supports_dangerous_mode?
|
|
93
|
-
true
|
|
94
|
-
end
|
|
95
|
-
|
|
96
100
|
def dangerous_mode_flags
|
|
97
101
|
["--allow-all-tools"]
|
|
98
102
|
end
|
|
@@ -110,6 +114,19 @@ module AgentHarness
|
|
|
110
114
|
:oauth
|
|
111
115
|
end
|
|
112
116
|
|
|
117
|
+
def execution_semantics
|
|
118
|
+
{
|
|
119
|
+
prompt_delivery: :flag,
|
|
120
|
+
output_format: :text,
|
|
121
|
+
sandbox_aware: false,
|
|
122
|
+
uses_subcommand: false,
|
|
123
|
+
non_interactive_flag: nil,
|
|
124
|
+
legitimate_exit_codes: [0],
|
|
125
|
+
stderr_is_diagnostic: true,
|
|
126
|
+
parses_rate_limit_reset: false
|
|
127
|
+
}
|
|
128
|
+
end
|
|
129
|
+
|
|
113
130
|
def error_patterns
|
|
114
131
|
{
|
|
115
132
|
auth_expired: [
|
|
@@ -57,6 +57,23 @@ module AgentHarness
|
|
|
57
57
|
}
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
def error_patterns
|
|
61
|
+
COMMON_ERROR_PATTERNS
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def execution_semantics
|
|
65
|
+
{
|
|
66
|
+
prompt_delivery: :arg,
|
|
67
|
+
output_format: :text,
|
|
68
|
+
sandbox_aware: false,
|
|
69
|
+
uses_subcommand: true,
|
|
70
|
+
non_interactive_flag: nil,
|
|
71
|
+
legitimate_exit_codes: [0],
|
|
72
|
+
stderr_is_diagnostic: true,
|
|
73
|
+
parses_rate_limit_reset: false
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
60
77
|
protected
|
|
61
78
|
|
|
62
79
|
def build_command(prompt, options)
|
|
@@ -59,6 +59,23 @@ module AgentHarness
|
|
|
59
59
|
}
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def error_patterns
|
|
63
|
+
COMMON_ERROR_PATTERNS
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def execution_semantics
|
|
67
|
+
{
|
|
68
|
+
prompt_delivery: :arg,
|
|
69
|
+
output_format: :text,
|
|
70
|
+
sandbox_aware: false,
|
|
71
|
+
uses_subcommand: true,
|
|
72
|
+
non_interactive_flag: nil,
|
|
73
|
+
legitimate_exit_codes: [0],
|
|
74
|
+
stderr_is_diagnostic: true,
|
|
75
|
+
parses_rate_limit_reset: false
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
|
|
62
79
|
protected
|
|
63
80
|
|
|
64
81
|
def build_command(prompt, options)
|
|
@@ -47,6 +47,14 @@ module AgentHarness
|
|
|
47
47
|
"OpenCode CLI"
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
+
def configuration_schema
|
|
51
|
+
{
|
|
52
|
+
fields: [],
|
|
53
|
+
auth_modes: [:api_key],
|
|
54
|
+
openai_compatible: true
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
50
58
|
def capabilities
|
|
51
59
|
{
|
|
52
60
|
streaming: false,
|
|
@@ -59,14 +67,46 @@ module AgentHarness
|
|
|
59
67
|
}
|
|
60
68
|
end
|
|
61
69
|
|
|
70
|
+
def error_patterns
|
|
71
|
+
COMMON_ERROR_PATTERNS
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def execution_semantics
|
|
75
|
+
{
|
|
76
|
+
prompt_delivery: :arg,
|
|
77
|
+
output_format: :text,
|
|
78
|
+
sandbox_aware: false,
|
|
79
|
+
uses_subcommand: true,
|
|
80
|
+
non_interactive_flag: nil,
|
|
81
|
+
legitimate_exit_codes: [0],
|
|
82
|
+
stderr_is_diagnostic: true,
|
|
83
|
+
parses_rate_limit_reset: false
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
62
87
|
protected
|
|
63
88
|
|
|
64
89
|
def build_command(prompt, options)
|
|
65
90
|
cmd = [self.class.binary_name, "run"]
|
|
91
|
+
|
|
92
|
+
runtime = options[:provider_runtime]
|
|
93
|
+
if runtime
|
|
94
|
+
cmd += runtime.flags unless runtime.flags.empty?
|
|
95
|
+
end
|
|
96
|
+
|
|
66
97
|
cmd << prompt
|
|
67
98
|
cmd
|
|
68
99
|
end
|
|
69
100
|
|
|
101
|
+
def build_env(options)
|
|
102
|
+
env = super
|
|
103
|
+
runtime = options[:provider_runtime]
|
|
104
|
+
return env unless runtime
|
|
105
|
+
|
|
106
|
+
env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
|
|
107
|
+
env
|
|
108
|
+
end
|
|
109
|
+
|
|
70
110
|
def default_timeout
|
|
71
111
|
300
|
|
72
112
|
end
|
|
@@ -40,9 +40,13 @@ module AgentHarness
|
|
|
40
40
|
|
|
41
41
|
# Check if the response indicates success
|
|
42
42
|
#
|
|
43
|
-
#
|
|
43
|
+
# A response is successful when its exit code is among the provider's
|
|
44
|
+
# legitimate exit codes (defaults to [0]) and no error was detected.
|
|
45
|
+
#
|
|
46
|
+
# @return [Boolean] true if exit_code is legitimate and no error
|
|
44
47
|
def success?
|
|
45
|
-
@
|
|
48
|
+
legitimate = @metadata[:legitimate_exit_codes] || [0]
|
|
49
|
+
legitimate.include?(@exit_code) && @error.nil?
|
|
46
50
|
end
|
|
47
51
|
|
|
48
52
|
# Check if the response indicates failure
|
data/lib/agent_harness.rb
CHANGED
|
@@ -138,6 +138,7 @@ end
|
|
|
138
138
|
# Core components
|
|
139
139
|
require_relative "agent_harness/errors"
|
|
140
140
|
require_relative "agent_harness/mcp_server"
|
|
141
|
+
require_relative "agent_harness/provider_runtime"
|
|
141
142
|
require_relative "agent_harness/configuration"
|
|
142
143
|
require_relative "agent_harness/command_executor"
|
|
143
144
|
require_relative "agent_harness/docker_command_executor"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: agent-harness
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Bart Agapinan
|
|
@@ -92,6 +92,7 @@ files:
|
|
|
92
92
|
- lib/agent_harness/orchestration/provider_manager.rb
|
|
93
93
|
- lib/agent_harness/orchestration/rate_limiter.rb
|
|
94
94
|
- lib/agent_harness/provider_health_check.rb
|
|
95
|
+
- lib/agent_harness/provider_runtime.rb
|
|
95
96
|
- lib/agent_harness/providers/adapter.rb
|
|
96
97
|
- lib/agent_harness/providers/aider.rb
|
|
97
98
|
- lib/agent_harness/providers/anthropic.rb
|