agent-harness 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/CHANGELOG.md +24 -0
- data/README.md +76 -1
- data/lib/agent_harness/command_executor.rb +453 -32
- data/lib/agent_harness/docker_command_executor.rb +23 -3
- data/lib/agent_harness/error_taxonomy.rb +10 -0
- data/lib/agent_harness/errors.rb +5 -0
- data/lib/agent_harness/orchestration/conductor.rb +40 -16
- data/lib/agent_harness/orchestration/provider_manager.rb +21 -13
- data/lib/agent_harness/provider_health_check.rb +216 -58
- data/lib/agent_harness/provider_runtime.rb +132 -0
- data/lib/agent_harness/providers/adapter.rb +157 -0
- data/lib/agent_harness/providers/aider.rb +21 -0
- data/lib/agent_harness/providers/anthropic.rb +21 -0
- data/lib/agent_harness/providers/base.rb +83 -11
- data/lib/agent_harness/providers/codex.rb +75 -8
- data/lib/agent_harness/providers/cursor.rb +47 -2
- data/lib/agent_harness/providers/gemini.rb +53 -0
- data/lib/agent_harness/providers/github_copilot.rb +34 -6
- data/lib/agent_harness/providers/kilocode.rb +39 -0
- data/lib/agent_harness/providers/mistral_vibe.rb +4 -0
- data/lib/agent_harness/providers/opencode.rb +91 -1
- data/lib/agent_harness/providers/registry.rb +54 -0
- data/lib/agent_harness/version.rb +1 -1
- data/lib/agent_harness.rb +78 -6
- metadata +22 -1
|
@@ -43,6 +43,17 @@ module AgentHarness
|
|
|
43
43
|
raise NotImplementedError, "#{self} must implement .binary_name"
|
|
44
44
|
end
|
|
45
45
|
|
|
46
|
+
# Installation contract for the provider CLI.
|
|
47
|
+
#
|
|
48
|
+
# Downstream applications can use this metadata to install a provider's
|
|
49
|
+
# supported CLI without hardcoding package names, install flags, or
|
|
50
|
+
# version pins outside AgentHarness.
|
|
51
|
+
#
|
|
52
|
+
# @return [Hash, nil] installation metadata or nil when not provided
|
|
53
|
+
def install_contract(version: nil)
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
|
|
46
57
|
# Required domains for firewall configuration
|
|
47
58
|
#
|
|
48
59
|
# @return [Hash] with :domains and :ip_ranges arrays
|
|
@@ -63,6 +74,49 @@ module AgentHarness
|
|
|
63
74
|
def discover_models
|
|
64
75
|
[]
|
|
65
76
|
end
|
|
77
|
+
|
|
78
|
+
# Installation contract for this provider's CLI.
|
|
79
|
+
#
|
|
80
|
+
# Downstream apps can use this metadata to provision the provider CLI
|
|
81
|
+
# without hardcoding package names, versions, or binary expectations
|
|
82
|
+
# outside agent-harness.
|
|
83
|
+
#
|
|
84
|
+
# @return [Hash, nil] install metadata, or nil when no first-class
|
|
85
|
+
# installation contract is defined for the provider
|
|
86
|
+
def installation_contract(**options)
|
|
87
|
+
return install_contract unless options.key?(:version)
|
|
88
|
+
|
|
89
|
+
install_contract(version: options[:version])
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Build the install command from the provider installation contract.
|
|
93
|
+
#
|
|
94
|
+
# @param version [String, nil] optional explicit version override
|
|
95
|
+
# @return [Array<String>, nil] install command argv or nil when the
|
|
96
|
+
# provider has no install contract
|
|
97
|
+
def install_command(version: nil)
|
|
98
|
+
contract = installation_contract
|
|
99
|
+
return nil unless contract
|
|
100
|
+
|
|
101
|
+
return contract[:install_command] unless version
|
|
102
|
+
|
|
103
|
+
package_name = contract[:package_name]
|
|
104
|
+
unless package_name
|
|
105
|
+
raise ArgumentError, "installation_contract must define :package_name when overriding version"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
Array(contract[:install_command_prefix]) + ["#{package_name}@#{version}"]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Canonical smoke-test contract for this provider.
|
|
112
|
+
#
|
|
113
|
+
# CLI-backed providers should expose a minimal real-execution prompt so
|
|
114
|
+
# downstream apps can reuse a stable provider-owned health check.
|
|
115
|
+
#
|
|
116
|
+
# @return [Hash, nil] smoke-test metadata or nil when not provided
|
|
117
|
+
def smoke_test_contract
|
|
118
|
+
nil
|
|
119
|
+
end
|
|
66
120
|
end
|
|
67
121
|
|
|
68
122
|
# Instance methods
|
|
@@ -75,11 +129,32 @@ module AgentHarness
|
|
|
75
129
|
# @option options [Integer] :timeout timeout in seconds
|
|
76
130
|
# @option options [String] :session session identifier
|
|
77
131
|
# @option options [Boolean] :dangerous_mode skip permission checks
|
|
132
|
+
# @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
|
|
133
|
+
# runtime overrides (model, base_url, api_provider, env, flags, metadata).
|
|
134
|
+
# For providers that delegate to Providers::Base#send_message, a plain Hash
|
|
135
|
+
# is automatically coerced into a ProviderRuntime. Providers that override
|
|
136
|
+
# #send_message directly are responsible for handling this option.
|
|
78
137
|
# @return [Response] response object with output and metadata
|
|
79
138
|
def send_message(prompt:, **options)
|
|
80
139
|
raise NotImplementedError, "#{self.class} must implement #send_message"
|
|
81
140
|
end
|
|
82
141
|
|
|
142
|
+
# Provider configuration schema for app-driven setup UIs
|
|
143
|
+
#
|
|
144
|
+
# Returns metadata describing the configurable fields, supported
|
|
145
|
+
# authentication modes, and backend compatibility for this provider.
|
|
146
|
+
# Applications use this to build generic provider-entry forms without
|
|
147
|
+
# hardcoding provider-specific knowledge.
|
|
148
|
+
#
|
|
149
|
+
# @return [Hash] with :fields, :auth_modes, :openai_compatible keys
|
|
150
|
+
def configuration_schema
|
|
151
|
+
{
|
|
152
|
+
fields: [],
|
|
153
|
+
auth_modes: [auth_type],
|
|
154
|
+
openai_compatible: false
|
|
155
|
+
}
|
|
156
|
+
end
|
|
157
|
+
|
|
83
158
|
# Provider capabilities
|
|
84
159
|
#
|
|
85
160
|
# @return [Hash] capability flags
|
|
@@ -219,6 +294,71 @@ module AgentHarness
|
|
|
219
294
|
{healthy: true, message: "OK"}
|
|
220
295
|
end
|
|
221
296
|
|
|
297
|
+
# Canonical smoke-test contract for this provider instance.
|
|
298
|
+
#
|
|
299
|
+
# @return [Hash, nil] smoke-test metadata
|
|
300
|
+
def smoke_test_contract
|
|
301
|
+
self.class.smoke_test_contract if self.class.respond_to?(:smoke_test_contract)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Execute a minimal provider-owned smoke test via the configured executor.
|
|
305
|
+
#
|
|
306
|
+
# @param timeout [Integer, nil] timeout override in seconds
|
|
307
|
+
# @param provider_runtime [ProviderRuntime, Hash, nil] runtime overrides
|
|
308
|
+
# @return [Hash] normalized smoke-test result
|
|
309
|
+
def smoke_test(timeout: nil, provider_runtime: nil)
|
|
310
|
+
contract = smoke_test_contract
|
|
311
|
+
raise NotImplementedError, "#{self.class} does not implement #smoke_test_contract" unless contract
|
|
312
|
+
|
|
313
|
+
prompt = contract[:prompt]
|
|
314
|
+
if !prompt.is_a?(String) || prompt.strip.empty?
|
|
315
|
+
raise ConfigurationError, "#{self.class}.smoke_test_contract must define a non-empty :prompt"
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
response = send_message(
|
|
319
|
+
prompt: prompt,
|
|
320
|
+
timeout: timeout || contract[:timeout],
|
|
321
|
+
provider_runtime: provider_runtime
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
output = response.output.to_s.strip
|
|
325
|
+
expected_output = contract[:expected_output]&.strip
|
|
326
|
+
success = response.success? && (!contract.fetch(:require_output, true) || !output.empty?)
|
|
327
|
+
success &&= expected_output.nil? || output == expected_output
|
|
328
|
+
|
|
329
|
+
if success
|
|
330
|
+
return {
|
|
331
|
+
ok: true,
|
|
332
|
+
status: "ok",
|
|
333
|
+
message: contract[:success_message] || "Smoke test passed",
|
|
334
|
+
error_category: nil,
|
|
335
|
+
output: output,
|
|
336
|
+
exit_code: response.exit_code
|
|
337
|
+
}
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
message = response.error.to_s.strip
|
|
341
|
+
message = output if message.empty?
|
|
342
|
+
message = "Smoke test failed with exit code #{response.exit_code}" if message.empty?
|
|
343
|
+
|
|
344
|
+
{
|
|
345
|
+
ok: false,
|
|
346
|
+
status: "error",
|
|
347
|
+
message: message,
|
|
348
|
+
error_category: classify_smoke_test_message(message),
|
|
349
|
+
output: output,
|
|
350
|
+
exit_code: response.exit_code
|
|
351
|
+
}
|
|
352
|
+
rescue TimeoutError => e
|
|
353
|
+
failure_smoke_test_result(e.message, :timeout)
|
|
354
|
+
rescue AuthenticationError => e
|
|
355
|
+
failure_smoke_test_result(e.message, :auth_expired)
|
|
356
|
+
rescue RateLimitError => e
|
|
357
|
+
failure_smoke_test_result(e.message, :rate_limited)
|
|
358
|
+
rescue ProviderError => e
|
|
359
|
+
failure_smoke_test_result(e.message, classify_smoke_test_message(e.message))
|
|
360
|
+
end
|
|
361
|
+
|
|
222
362
|
# Execution semantics for this provider
|
|
223
363
|
#
|
|
224
364
|
# Returns a hash describing provider-specific execution behavior so
|
|
@@ -250,6 +390,23 @@ module AgentHarness
|
|
|
250
390
|
def parse_rate_limit_reset(output)
|
|
251
391
|
nil
|
|
252
392
|
end
|
|
393
|
+
|
|
394
|
+
private
|
|
395
|
+
|
|
396
|
+
def classify_smoke_test_message(message)
|
|
397
|
+
ErrorTaxonomy.classify(StandardError.new(message.to_s), error_patterns)
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def failure_smoke_test_result(message, error_category)
|
|
401
|
+
{
|
|
402
|
+
ok: false,
|
|
403
|
+
status: "error",
|
|
404
|
+
message: message,
|
|
405
|
+
error_category: error_category,
|
|
406
|
+
output: nil,
|
|
407
|
+
exit_code: nil
|
|
408
|
+
}
|
|
409
|
+
end
|
|
253
410
|
end
|
|
254
411
|
end
|
|
255
412
|
end
|
|
@@ -49,6 +49,10 @@ module AgentHarness
|
|
|
49
49
|
{name: "claude-3-5-sonnet", family: "claude-3-5-sonnet", tier: "standard", provider: "aider"}
|
|
50
50
|
]
|
|
51
51
|
end
|
|
52
|
+
|
|
53
|
+
def smoke_test_contract
|
|
54
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
55
|
+
end
|
|
52
56
|
end
|
|
53
57
|
|
|
54
58
|
def name
|
|
@@ -59,6 +63,23 @@ module AgentHarness
|
|
|
59
63
|
"Aider"
|
|
60
64
|
end
|
|
61
65
|
|
|
66
|
+
def configuration_schema
|
|
67
|
+
{
|
|
68
|
+
fields: [
|
|
69
|
+
{
|
|
70
|
+
name: :model,
|
|
71
|
+
type: :string,
|
|
72
|
+
label: "Model",
|
|
73
|
+
required: false,
|
|
74
|
+
hint: "Model identifier (supports OpenAI, Anthropic, and other model names)",
|
|
75
|
+
accepts_arbitrary: true
|
|
76
|
+
}
|
|
77
|
+
],
|
|
78
|
+
auth_modes: [:api_key],
|
|
79
|
+
openai_compatible: false
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
62
83
|
def capabilities
|
|
63
84
|
{
|
|
64
85
|
streaming: true,
|
|
@@ -81,6 +81,10 @@ module AgentHarness
|
|
|
81
81
|
MODEL_PATTERN.match?(family_name)
|
|
82
82
|
end
|
|
83
83
|
|
|
84
|
+
def smoke_test_contract
|
|
85
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
86
|
+
end
|
|
87
|
+
|
|
84
88
|
private
|
|
85
89
|
|
|
86
90
|
def parse_models_list(output)
|
|
@@ -160,6 +164,23 @@ module AgentHarness
|
|
|
160
164
|
"Anthropic Claude CLI"
|
|
161
165
|
end
|
|
162
166
|
|
|
167
|
+
def configuration_schema
|
|
168
|
+
{
|
|
169
|
+
fields: [
|
|
170
|
+
{
|
|
171
|
+
name: :model,
|
|
172
|
+
type: :string,
|
|
173
|
+
label: "Model",
|
|
174
|
+
required: false,
|
|
175
|
+
hint: "Claude model to use (e.g. claude-3-5-sonnet-20241022)",
|
|
176
|
+
accepts_arbitrary: false
|
|
177
|
+
}
|
|
178
|
+
],
|
|
179
|
+
auth_modes: [:oauth],
|
|
180
|
+
openai_compatible: false
|
|
181
|
+
}
|
|
182
|
+
end
|
|
183
|
+
|
|
163
184
|
def capabilities
|
|
164
185
|
{
|
|
165
186
|
streaming: true,
|
|
@@ -22,16 +22,18 @@ module AgentHarness
|
|
|
22
22
|
# system("which my-cli > /dev/null 2>&1")
|
|
23
23
|
# end
|
|
24
24
|
# end
|
|
25
|
-
#
|
|
26
|
-
# protected
|
|
27
|
-
#
|
|
28
|
-
# def build_command(prompt, options)
|
|
29
|
-
# [self.class.binary_name, "--prompt", prompt]
|
|
30
|
-
# end
|
|
31
25
|
# end
|
|
32
26
|
class Base
|
|
33
27
|
include Adapter
|
|
34
28
|
|
|
29
|
+
DEFAULT_SMOKE_TEST_CONTRACT = {
|
|
30
|
+
prompt: "Reply with exactly OK.",
|
|
31
|
+
expected_output: "OK",
|
|
32
|
+
timeout: 30,
|
|
33
|
+
require_output: true,
|
|
34
|
+
success_message: "Smoke test passed"
|
|
35
|
+
}.freeze
|
|
36
|
+
|
|
35
37
|
# Common error patterns shared across providers that use standard
|
|
36
38
|
# HTTP-style error responses. Providers with unique patterns (e.g.
|
|
37
39
|
# Anthropic, GitHub Copilot) override error_patterns entirely.
|
|
@@ -63,6 +65,12 @@ module AgentHarness
|
|
|
63
65
|
attr_reader :config, :logger
|
|
64
66
|
attr_accessor :executor
|
|
65
67
|
|
|
68
|
+
class << self
|
|
69
|
+
def smoke_test_contract
|
|
70
|
+
nil
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
66
74
|
# Initialize the provider
|
|
67
75
|
#
|
|
68
76
|
# @param config [ProviderConfig, nil] provider configuration
|
|
@@ -87,10 +95,16 @@ module AgentHarness
|
|
|
87
95
|
#
|
|
88
96
|
# @param prompt [String] the prompt to send
|
|
89
97
|
# @param options [Hash] additional options
|
|
98
|
+
# @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
|
|
99
|
+
# runtime overrides (model, base_url, api_provider, env, flags, metadata).
|
|
100
|
+
# A plain Hash is automatically coerced into a ProviderRuntime.
|
|
90
101
|
# @return [Response] the response
|
|
91
102
|
def send_message(prompt:, **options)
|
|
92
103
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
93
104
|
|
|
105
|
+
# Coerce provider_runtime from Hash if needed
|
|
106
|
+
options = normalize_provider_runtime(options)
|
|
107
|
+
|
|
94
108
|
# Normalize and validate MCP servers
|
|
95
109
|
options = normalize_mcp_servers(options)
|
|
96
110
|
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
@@ -103,11 +117,33 @@ module AgentHarness
|
|
|
103
117
|
|
|
104
118
|
# Execute command
|
|
105
119
|
start_time = Time.now
|
|
106
|
-
result = execute_with_timeout(
|
|
120
|
+
result = execute_with_timeout(
|
|
121
|
+
command,
|
|
122
|
+
timeout: timeout,
|
|
123
|
+
env: build_env(options),
|
|
124
|
+
**command_execution_options(options)
|
|
125
|
+
)
|
|
107
126
|
duration = Time.now - start_time
|
|
108
127
|
|
|
109
128
|
# Parse response
|
|
110
129
|
response = parse_response(result, duration: duration)
|
|
130
|
+
runtime = options[:provider_runtime]
|
|
131
|
+
# Runtime model is a per-request override and always takes precedence
|
|
132
|
+
# over both the config-level model and whatever parse_response returned.
|
|
133
|
+
# This is intentional: callers use runtime overrides to route a single
|
|
134
|
+
# provider instance through different backends on each request.
|
|
135
|
+
if runtime&.model
|
|
136
|
+
response = Response.new(
|
|
137
|
+
output: response.output,
|
|
138
|
+
exit_code: response.exit_code,
|
|
139
|
+
duration: response.duration,
|
|
140
|
+
provider: response.provider,
|
|
141
|
+
model: runtime.model,
|
|
142
|
+
tokens: response.tokens,
|
|
143
|
+
metadata: response.metadata,
|
|
144
|
+
error: response.error
|
|
145
|
+
)
|
|
146
|
+
end
|
|
111
147
|
|
|
112
148
|
# Track tokens
|
|
113
149
|
track_tokens(response) if response.tokens
|
|
@@ -158,10 +194,20 @@ module AgentHarness
|
|
|
158
194
|
|
|
159
195
|
# Build environment variables - override in subclasses
|
|
160
196
|
#
|
|
197
|
+
# Provider subclasses should call +super+ and merge their own env vars
|
|
198
|
+
# so that ProviderRuntime env overrides are always included.
|
|
199
|
+
#
|
|
161
200
|
# @param options [Hash] options
|
|
162
201
|
# @return [Hash] environment variables
|
|
163
202
|
def build_env(options)
|
|
164
|
-
|
|
203
|
+
runtime = options[:provider_runtime]
|
|
204
|
+
return {} unless runtime
|
|
205
|
+
|
|
206
|
+
# Return overrides only. Ruby subprocess spawning treats nil values as
|
|
207
|
+
# explicit unsets in the child process, while omitted keys are inherited.
|
|
208
|
+
env = runtime.env.dup
|
|
209
|
+
runtime.unset_env.each { |key| env[key] = nil }
|
|
210
|
+
env
|
|
165
211
|
end
|
|
166
212
|
|
|
167
213
|
# Parse CLI output into Response - override in subclasses
|
|
@@ -211,6 +257,13 @@ module AgentHarness
|
|
|
211
257
|
|
|
212
258
|
private
|
|
213
259
|
|
|
260
|
+
def normalize_provider_runtime(options)
|
|
261
|
+
raw = options[:provider_runtime]
|
|
262
|
+
return options if raw.nil? || raw.is_a?(ProviderRuntime)
|
|
263
|
+
|
|
264
|
+
options.merge(provider_runtime: ProviderRuntime.wrap(raw))
|
|
265
|
+
end
|
|
266
|
+
|
|
214
267
|
def normalize_mcp_servers(options)
|
|
215
268
|
servers = options[:mcp_servers]
|
|
216
269
|
return options if servers.nil?
|
|
@@ -243,8 +296,21 @@ module AgentHarness
|
|
|
243
296
|
options.merge(mcp_servers: normalized)
|
|
244
297
|
end
|
|
245
298
|
|
|
246
|
-
def
|
|
247
|
-
|
|
299
|
+
def command_execution_options(options)
|
|
300
|
+
execution_options = {
|
|
301
|
+
idle_timeout: options[:idle_timeout],
|
|
302
|
+
on_stdout_chunk: options[:on_stdout_chunk],
|
|
303
|
+
on_stderr_chunk: options[:on_stderr_chunk],
|
|
304
|
+
on_heartbeat: options[:on_heartbeat],
|
|
305
|
+
observer: options[:execution_observer] || options[:observer]
|
|
306
|
+
}.reject { |_, value| value.nil? }
|
|
307
|
+
|
|
308
|
+
execution_options[:heartbeat_interval] = options[:heartbeat_interval] if options.key?(:heartbeat_interval)
|
|
309
|
+
execution_options
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def execute_with_timeout(command, timeout:, env:, stdin_data: nil, **execution_options)
|
|
313
|
+
@executor.execute(command, timeout: timeout, env: env, stdin_data: stdin_data, **execution_options)
|
|
248
314
|
end
|
|
249
315
|
|
|
250
316
|
def track_tokens(response)
|
|
@@ -252,7 +318,7 @@ module AgentHarness
|
|
|
252
318
|
|
|
253
319
|
AgentHarness.token_tracker.record(
|
|
254
320
|
provider: self.class.provider_name,
|
|
255
|
-
model: @config.model,
|
|
321
|
+
model: response.model || @config.model,
|
|
256
322
|
input_tokens: response.tokens[:input] || 0,
|
|
257
323
|
output_tokens: response.tokens[:output] || 0,
|
|
258
324
|
total_tokens: response.tokens[:total]
|
|
@@ -283,7 +349,13 @@ module AgentHarness
|
|
|
283
349
|
original_error: original_error
|
|
284
350
|
)
|
|
285
351
|
when :timeout
|
|
352
|
+
return original_error if original_error.is_a?(TimeoutError)
|
|
353
|
+
|
|
286
354
|
TimeoutError.new(original_error.message, original_error: original_error)
|
|
355
|
+
when :idle_timeout
|
|
356
|
+
return original_error if original_error.is_a?(IdleTimeoutError)
|
|
357
|
+
|
|
358
|
+
IdleTimeoutError.new(original_error.message, original_error: original_error)
|
|
287
359
|
else
|
|
288
360
|
ProviderError.new(original_error.message, original_error: original_error)
|
|
289
361
|
end
|
|
@@ -8,6 +8,9 @@ module AgentHarness
|
|
|
8
8
|
#
|
|
9
9
|
# Provides integration with the OpenAI Codex CLI tool.
|
|
10
10
|
class Codex < Base
|
|
11
|
+
SUPPORTED_CLI_VERSION = "0.116.0"
|
|
12
|
+
SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.117.0").freeze
|
|
13
|
+
|
|
11
14
|
class << self
|
|
12
15
|
def provider_name
|
|
13
16
|
:codex
|
|
@@ -49,6 +52,37 @@ module AgentHarness
|
|
|
49
52
|
{name: "codex", family: "codex", tier: "standard", provider: "codex"}
|
|
50
53
|
]
|
|
51
54
|
end
|
|
55
|
+
|
|
56
|
+
def installation_contract
|
|
57
|
+
default_package = "@openai/codex@#{SUPPORTED_CLI_VERSION}".freeze
|
|
58
|
+
install_command_prefix = ["npm", "install", "-g", "--ignore-scripts"].freeze
|
|
59
|
+
install_command = (install_command_prefix + [default_package]).freeze
|
|
60
|
+
supported_versions = [SUPPORTED_CLI_VERSION].freeze
|
|
61
|
+
version_requirement = SUPPORTED_CLI_REQUIREMENT.requirements
|
|
62
|
+
.map { |op, ver| "#{op} #{ver}".freeze }
|
|
63
|
+
.freeze
|
|
64
|
+
|
|
65
|
+
contract = {
|
|
66
|
+
source: :npm,
|
|
67
|
+
package: default_package,
|
|
68
|
+
package_name: "@openai/codex",
|
|
69
|
+
version: SUPPORTED_CLI_VERSION,
|
|
70
|
+
version_requirement: version_requirement,
|
|
71
|
+
binary_name: binary_name,
|
|
72
|
+
install_command_prefix: install_command_prefix,
|
|
73
|
+
install_command: install_command,
|
|
74
|
+
supported_versions: supported_versions
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
contract.each_value do |value|
|
|
78
|
+
value.freeze if value.is_a?(String)
|
|
79
|
+
end
|
|
80
|
+
contract.freeze
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def smoke_test_contract
|
|
84
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
85
|
+
end
|
|
52
86
|
end
|
|
53
87
|
|
|
54
88
|
def name
|
|
@@ -59,6 +93,14 @@ module AgentHarness
|
|
|
59
93
|
"OpenAI Codex CLI"
|
|
60
94
|
end
|
|
61
95
|
|
|
96
|
+
def configuration_schema
|
|
97
|
+
{
|
|
98
|
+
fields: [],
|
|
99
|
+
auth_modes: [:api_key],
|
|
100
|
+
openai_compatible: true
|
|
101
|
+
}
|
|
102
|
+
end
|
|
103
|
+
|
|
62
104
|
def capabilities
|
|
63
105
|
{
|
|
64
106
|
streaming: false,
|
|
@@ -186,12 +228,16 @@ module AgentHarness
|
|
|
186
228
|
|
|
187
229
|
def build_command(prompt, options)
|
|
188
230
|
cmd = [self.class.binary_name, "exec"]
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
#
|
|
192
|
-
#
|
|
193
|
-
#
|
|
194
|
-
|
|
231
|
+
externally_sandboxed = externally_sandboxed?(options)
|
|
232
|
+
|
|
233
|
+
# When externally_sandboxed is set, use --dangerously-bypass-approvals-and-sandbox
|
|
234
|
+
# instead of --full-auto. In the Codex CLI, full_auto is checked first and
|
|
235
|
+
# selects workspace-write sandbox mode, which overrides the bypass flag.
|
|
236
|
+
# Passing both would leave the run in the wrong sandbox mode.
|
|
237
|
+
#
|
|
238
|
+
# When NOT externally sandboxed: use --full-auto for Docker containers
|
|
239
|
+
# (to skip nested sandboxing) or when dangerous_mode is explicitly requested.
|
|
240
|
+
if !externally_sandboxed && (sandboxed_environment? || options[:dangerous_mode])
|
|
195
241
|
cmd += dangerous_mode_flags
|
|
196
242
|
end
|
|
197
243
|
|
|
@@ -200,10 +246,13 @@ module AgentHarness
|
|
|
200
246
|
unless flags.is_a?(Array)
|
|
201
247
|
raise ArgumentError, "Codex configuration error: default_flags must be an array of strings"
|
|
202
248
|
end
|
|
249
|
+
# Strip --full-auto from defaults when externally sandboxed to avoid
|
|
250
|
+
# conflicting with --dangerously-bypass-approvals-and-sandbox.
|
|
251
|
+
flags -= dangerous_mode_flags if externally_sandboxed
|
|
203
252
|
cmd += flags if flags.any?
|
|
204
253
|
end
|
|
205
254
|
|
|
206
|
-
if externally_sandboxed
|
|
255
|
+
if externally_sandboxed
|
|
207
256
|
cmd += sandbox_bypass_flags
|
|
208
257
|
end
|
|
209
258
|
|
|
@@ -211,11 +260,29 @@ module AgentHarness
|
|
|
211
260
|
cmd += session_flags(options[:session])
|
|
212
261
|
end
|
|
213
262
|
|
|
263
|
+
runtime = options[:provider_runtime]
|
|
264
|
+
if runtime
|
|
265
|
+
cmd += ["--model", runtime.model] if runtime.model
|
|
266
|
+
runtime_flags = runtime.flags
|
|
267
|
+
# Strip --full-auto from runtime flags when externally sandboxed.
|
|
268
|
+
runtime_flags -= dangerous_mode_flags if externally_sandboxed
|
|
269
|
+
cmd += runtime_flags unless runtime_flags.empty?
|
|
270
|
+
end
|
|
271
|
+
|
|
214
272
|
cmd << prompt
|
|
215
273
|
|
|
216
274
|
cmd
|
|
217
275
|
end
|
|
218
276
|
|
|
277
|
+
def build_env(options)
|
|
278
|
+
env = super
|
|
279
|
+
runtime = options[:provider_runtime]
|
|
280
|
+
return env unless runtime
|
|
281
|
+
|
|
282
|
+
env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
|
|
283
|
+
env
|
|
284
|
+
end
|
|
285
|
+
|
|
219
286
|
def default_timeout
|
|
220
287
|
300
|
|
221
288
|
end
|
|
@@ -237,7 +304,7 @@ module AgentHarness
|
|
|
237
304
|
end
|
|
238
305
|
|
|
239
306
|
def sandbox_bypass_flags
|
|
240
|
-
["--sandbox"
|
|
307
|
+
["--dangerously-bypass-approvals-and-sandbox"]
|
|
241
308
|
end
|
|
242
309
|
|
|
243
310
|
def read_codex_credentials
|
|
@@ -83,6 +83,10 @@ module AgentHarness
|
|
|
83
83
|
def supports_model_family?(family_name)
|
|
84
84
|
family_name.match?(/^(claude|gpt|cursor)-/)
|
|
85
85
|
end
|
|
86
|
+
|
|
87
|
+
def smoke_test_contract
|
|
88
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
89
|
+
end
|
|
86
90
|
end
|
|
87
91
|
|
|
88
92
|
def name
|
|
@@ -93,6 +97,14 @@ module AgentHarness
|
|
|
93
97
|
"Cursor AI"
|
|
94
98
|
end
|
|
95
99
|
|
|
100
|
+
def configuration_schema
|
|
101
|
+
{
|
|
102
|
+
fields: [],
|
|
103
|
+
auth_modes: [:oauth],
|
|
104
|
+
openai_compatible: false
|
|
105
|
+
}
|
|
106
|
+
end
|
|
107
|
+
|
|
96
108
|
def capabilities
|
|
97
109
|
{
|
|
98
110
|
streaming: false,
|
|
@@ -163,23 +175,50 @@ module AgentHarness
|
|
|
163
175
|
def send_message(prompt:, **options)
|
|
164
176
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
165
177
|
|
|
178
|
+
# Coerce provider_runtime from Hash if needed (same as Base#send_message)
|
|
179
|
+
options = normalize_provider_runtime(options)
|
|
180
|
+
runtime = options[:provider_runtime]
|
|
181
|
+
|
|
166
182
|
# Normalize and validate MCP servers (same as Base#send_message)
|
|
167
183
|
options = normalize_mcp_servers(options)
|
|
168
184
|
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
169
185
|
|
|
170
186
|
# Build command (without prompt in args - we send via stdin)
|
|
171
187
|
command = [self.class.binary_name, "-p"]
|
|
188
|
+
command.concat(runtime.flags) if runtime&.flags&.any?
|
|
172
189
|
|
|
173
190
|
# Calculate timeout
|
|
174
191
|
timeout = options[:timeout] || @config.timeout || default_timeout
|
|
175
192
|
|
|
176
193
|
# Execute command with prompt on stdin
|
|
194
|
+
env = build_env(options)
|
|
177
195
|
start_time = Time.now
|
|
178
|
-
result =
|
|
196
|
+
result = execute_with_timeout(
|
|
197
|
+
command,
|
|
198
|
+
timeout: timeout,
|
|
199
|
+
env: env,
|
|
200
|
+
stdin_data: prompt,
|
|
201
|
+
**command_execution_options(options)
|
|
202
|
+
)
|
|
179
203
|
duration = Time.now - start_time
|
|
180
204
|
|
|
181
205
|
# Parse response
|
|
182
206
|
response = parse_response(result, duration: duration)
|
|
207
|
+
# Runtime model is a per-request override and always takes precedence
|
|
208
|
+
# over both the config-level model and whatever parse_response returned.
|
|
209
|
+
# See Base#send_message for rationale.
|
|
210
|
+
if runtime&.model
|
|
211
|
+
response = Response.new(
|
|
212
|
+
output: response.output,
|
|
213
|
+
exit_code: response.exit_code,
|
|
214
|
+
duration: response.duration,
|
|
215
|
+
provider: response.provider,
|
|
216
|
+
model: runtime.model,
|
|
217
|
+
tokens: response.tokens,
|
|
218
|
+
metadata: response.metadata,
|
|
219
|
+
error: response.error
|
|
220
|
+
)
|
|
221
|
+
end
|
|
183
222
|
|
|
184
223
|
# Track tokens
|
|
185
224
|
track_tokens(response) if response.tokens
|
|
@@ -201,7 +240,7 @@ module AgentHarness
|
|
|
201
240
|
end
|
|
202
241
|
|
|
203
242
|
def build_env(options)
|
|
204
|
-
|
|
243
|
+
super
|
|
205
244
|
end
|
|
206
245
|
|
|
207
246
|
def default_timeout
|
|
@@ -298,7 +337,13 @@ module AgentHarness
|
|
|
298
337
|
when :auth_expired
|
|
299
338
|
raise AuthenticationError.new(error.message, provider: self.class.provider_name, original_error: error)
|
|
300
339
|
when :timeout
|
|
340
|
+
raise error if error.is_a?(TimeoutError)
|
|
341
|
+
|
|
301
342
|
raise TimeoutError.new(error.message, original_error: error)
|
|
343
|
+
when :idle_timeout
|
|
344
|
+
raise error if error.is_a?(IdleTimeoutError)
|
|
345
|
+
|
|
346
|
+
raise IdleTimeoutError.new(error.message, original_error: error)
|
|
302
347
|
else
|
|
303
348
|
raise ProviderError.new(error.message, original_error: error)
|
|
304
349
|
end
|