agent-harness 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/CHANGELOG.md +26 -0
- data/README.md +216 -3
- data/lib/agent_harness/authentication.rb +28 -9
- data/lib/agent_harness/command_executor.rb +453 -32
- data/lib/agent_harness/docker_command_executor.rb +23 -3
- data/lib/agent_harness/error_taxonomy.rb +10 -0
- data/lib/agent_harness/errors.rb +5 -0
- data/lib/agent_harness/orchestration/conductor.rb +40 -16
- data/lib/agent_harness/orchestration/provider_manager.rb +46 -18
- data/lib/agent_harness/provider_health_check.rb +243 -63
- data/lib/agent_harness/provider_runtime.rb +20 -3
- data/lib/agent_harness/providers/adapter.rb +717 -0
- data/lib/agent_harness/providers/aider.rb +59 -0
- data/lib/agent_harness/providers/anthropic.rb +98 -0
- data/lib/agent_harness/providers/base.rb +46 -10
- data/lib/agent_harness/providers/codex.rb +68 -9
- data/lib/agent_harness/providers/cursor.rb +90 -2
- data/lib/agent_harness/providers/gemini.rb +43 -0
- data/lib/agent_harness/providers/github_copilot.rb +38 -6
- data/lib/agent_harness/providers/kilocode.rb +39 -0
- data/lib/agent_harness/providers/mistral_vibe.rb +13 -0
- data/lib/agent_harness/providers/opencode.rb +77 -1
- data/lib/agent_harness/providers/registry.rb +446 -18
- data/lib/agent_harness/version.rb +1 -1
- data/lib/agent_harness.rb +105 -6
- metadata +21 -1
|
@@ -6,6 +6,17 @@ module AgentHarness
|
|
|
6
6
|
#
|
|
7
7
|
# Provides integration with the Aider CLI tool.
|
|
8
8
|
class Aider < Base
|
|
9
|
+
UV_VERSION = "0.8.17"
|
|
10
|
+
SUPPORTED_CLI_VERSION = "0.86.2"
|
|
11
|
+
SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.87.0").freeze
|
|
12
|
+
PYTHON_VERSION = "python3.12"
|
|
13
|
+
BINARY_PATH = "/usr/local/bin/aider"
|
|
14
|
+
UV_TOOL_ENV = {
|
|
15
|
+
"UV_TOOL_BIN_DIR" => "/usr/local/bin",
|
|
16
|
+
"UV_TOOL_DIR" => "/opt/uv/tools",
|
|
17
|
+
"UV_PYTHON_INSTALL_DIR" => "/opt/uv/python"
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
9
20
|
class << self
|
|
10
21
|
def provider_name
|
|
11
22
|
:aider
|
|
@@ -49,6 +60,54 @@ module AgentHarness
|
|
|
49
60
|
{name: "claude-3-5-sonnet", family: "claude-3-5-sonnet", tier: "standard", provider: "aider"}
|
|
50
61
|
]
|
|
51
62
|
end
|
|
63
|
+
|
|
64
|
+
def installation_contract(version: SUPPORTED_CLI_VERSION)
|
|
65
|
+
unless SUPPORTED_CLI_REQUIREMENT.satisfied_by?(Gem::Version.new(version))
|
|
66
|
+
raise ArgumentError,
|
|
67
|
+
"Unsupported Aider CLI version #{version.inspect}; " \
|
|
68
|
+
"supported versions must satisfy #{SUPPORTED_CLI_REQUIREMENT}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
default_package = "aider-chat==#{version}".freeze
|
|
72
|
+
bootstrap_command = [
|
|
73
|
+
"python3", "-m", "pip", "install", "--no-cache-dir", "--break-system-packages", "uv==#{UV_VERSION}"
|
|
74
|
+
].freeze
|
|
75
|
+
install_command_prefix = [
|
|
76
|
+
"uv", "tool", "install", "--force", "--python", PYTHON_VERSION, "--with", "pip"
|
|
77
|
+
].freeze
|
|
78
|
+
install_command = (install_command_prefix + [default_package]).freeze
|
|
79
|
+
supported_versions = [version].freeze
|
|
80
|
+
version_requirement = SUPPORTED_CLI_REQUIREMENT.requirements
|
|
81
|
+
.map { |op, ver| "#{op} #{ver}".freeze }
|
|
82
|
+
.freeze
|
|
83
|
+
|
|
84
|
+
contract = {
|
|
85
|
+
source: :uv_tool,
|
|
86
|
+
bootstrap_source: :pip,
|
|
87
|
+
bootstrap_package: "uv==#{UV_VERSION}",
|
|
88
|
+
bootstrap_commands: [bootstrap_command].freeze,
|
|
89
|
+
install_environment: UV_TOOL_ENV,
|
|
90
|
+
package: default_package,
|
|
91
|
+
package_name: "aider-chat",
|
|
92
|
+
version: version,
|
|
93
|
+
version_format: "%{package_name}==%{version}",
|
|
94
|
+
version_requirement: version_requirement,
|
|
95
|
+
binary_name: binary_name,
|
|
96
|
+
binary_path: BINARY_PATH,
|
|
97
|
+
install_command_prefix: install_command_prefix,
|
|
98
|
+
install_command: install_command,
|
|
99
|
+
supported_versions: supported_versions
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
contract.each_value do |value|
|
|
103
|
+
value.freeze if value.is_a?(String)
|
|
104
|
+
end
|
|
105
|
+
contract.freeze
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def smoke_test_contract
|
|
109
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
110
|
+
end
|
|
52
111
|
end
|
|
53
112
|
|
|
54
113
|
def name
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "shellwords"
|
|
4
5
|
|
|
5
6
|
module AgentHarness
|
|
6
7
|
module Providers
|
|
@@ -15,6 +16,12 @@ module AgentHarness
|
|
|
15
16
|
class Anthropic < Base
|
|
16
17
|
# Model name pattern for Anthropic Claude models
|
|
17
18
|
MODEL_PATTERN = /^claude-[\d.-]+-(?:opus|sonnet|haiku)(?:-\d{8})?$/i
|
|
19
|
+
SUPPORTED_CLI_VERSION = "2.1.92"
|
|
20
|
+
SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 2.2.0").freeze
|
|
21
|
+
|
|
22
|
+
# Matches semver (e.g. "2.1.92"), optional pre-release (e.g. "2.1.92-beta.1"),
|
|
23
|
+
# or channel tokens (e.g. "latest", "stable").
|
|
24
|
+
VALID_VERSION_PATTERN = /\A(?:\d+\.\d+\.\d+(?:-[a-zA-Z0-9.]+)?|latest|stable)\z/
|
|
18
25
|
|
|
19
26
|
class << self
|
|
20
27
|
def provider_name
|
|
@@ -25,11 +32,79 @@ module AgentHarness
|
|
|
25
32
|
"claude"
|
|
26
33
|
end
|
|
27
34
|
|
|
35
|
+
def install_contract(version: nil)
|
|
36
|
+
target_version = version || SUPPORTED_CLI_VERSION
|
|
37
|
+
validate_version!(target_version)
|
|
38
|
+
version_requirement = SUPPORTED_CLI_REQUIREMENT.requirements
|
|
39
|
+
.map { |op, ver| "#{op} #{ver}" }
|
|
40
|
+
.join(", ")
|
|
41
|
+
channel_token = %w[latest stable].include?(target_version.to_s)
|
|
42
|
+
|
|
43
|
+
warning = "Review the downloaded installer before execution and verify any published checksum or signature metadata when available."
|
|
44
|
+
if channel_token
|
|
45
|
+
warning += " Channel '#{target_version}' is not pinned; the resolved version may fall " \
|
|
46
|
+
"outside the supported range (#{version_requirement}). Verify the installed version " \
|
|
47
|
+
"after installation."
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
{
|
|
51
|
+
provider: provider_name,
|
|
52
|
+
binary_name: binary_name,
|
|
53
|
+
binary_paths: [
|
|
54
|
+
"$HOME/.local/bin/claude",
|
|
55
|
+
binary_name
|
|
56
|
+
],
|
|
57
|
+
install: {
|
|
58
|
+
strategy: :shell,
|
|
59
|
+
source: "official",
|
|
60
|
+
command: "tmp_script=$(mktemp) && trap 'rm -f \"$tmp_script\"' EXIT && curl -fsSL https://claude.ai/install.sh -o \"$tmp_script\" && bash \"$tmp_script\" #{Shellwords.shellescape(target_version)}",
|
|
61
|
+
warning: warning,
|
|
62
|
+
post_install_binary_path: "$HOME/.local/bin/claude",
|
|
63
|
+
# When a channel token is used, include the requirement so
|
|
64
|
+
# consumers can validate the installed version post-install.
|
|
65
|
+
version_not_pinned: channel_token
|
|
66
|
+
},
|
|
67
|
+
supported_versions: {
|
|
68
|
+
default: SUPPORTED_CLI_VERSION,
|
|
69
|
+
requirement: version_requirement,
|
|
70
|
+
channel: "stable"
|
|
71
|
+
},
|
|
72
|
+
runtime_contract: {
|
|
73
|
+
available_via: binary_name,
|
|
74
|
+
build_command: [
|
|
75
|
+
binary_name,
|
|
76
|
+
"--print",
|
|
77
|
+
"--output-format=json"
|
|
78
|
+
],
|
|
79
|
+
required_features: [
|
|
80
|
+
"print_mode",
|
|
81
|
+
"json_output",
|
|
82
|
+
"mcp_config",
|
|
83
|
+
"mcp_list",
|
|
84
|
+
"dangerously_skip_permissions",
|
|
85
|
+
"models_list"
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
end
|
|
90
|
+
|
|
28
91
|
def available?
|
|
29
92
|
executor = AgentHarness.configuration.command_executor
|
|
30
93
|
!!executor.which(binary_name)
|
|
31
94
|
end
|
|
32
95
|
|
|
96
|
+
def provider_metadata_overrides
|
|
97
|
+
{
|
|
98
|
+
auth: {
|
|
99
|
+
service: :anthropic,
|
|
100
|
+
api_family: :anthropic
|
|
101
|
+
},
|
|
102
|
+
identity: {
|
|
103
|
+
bot_usernames: %w[claude anthropic]
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
end
|
|
107
|
+
|
|
33
108
|
def firewall_requirements
|
|
34
109
|
{
|
|
35
110
|
domains: [
|
|
@@ -81,8 +156,31 @@ module AgentHarness
|
|
|
81
156
|
MODEL_PATTERN.match?(family_name)
|
|
82
157
|
end
|
|
83
158
|
|
|
159
|
+
def smoke_test_contract
|
|
160
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
161
|
+
end
|
|
162
|
+
|
|
84
163
|
private
|
|
85
164
|
|
|
165
|
+
def validate_version!(version)
|
|
166
|
+
version_str = version.to_s
|
|
167
|
+
|
|
168
|
+
unless VALID_VERSION_PATTERN.match?(version_str)
|
|
169
|
+
raise ArgumentError, "Invalid version: #{version.inspect}. " \
|
|
170
|
+
"Must be a semver string (e.g. '2.1.92'), optional pre-release suffix, or a channel token ('latest', 'stable')."
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Channel tokens are not concrete versions; skip requirement check.
|
|
174
|
+
return if %w[latest stable].include?(version_str)
|
|
175
|
+
|
|
176
|
+
# Validate concrete versions against the supported range.
|
|
177
|
+
gem_version = Gem::Version.new(version_str)
|
|
178
|
+
return if SUPPORTED_CLI_REQUIREMENT.satisfied_by?(gem_version)
|
|
179
|
+
|
|
180
|
+
raise ArgumentError, "Version #{version.inspect} is outside the supported range " \
|
|
181
|
+
"(#{SUPPORTED_CLI_REQUIREMENT}). Update SUPPORTED_CLI_REQUIREMENT before targeting this version."
|
|
182
|
+
end
|
|
183
|
+
|
|
86
184
|
def parse_models_list(output)
|
|
87
185
|
return [] if output.nil? || output.empty?
|
|
88
186
|
|
|
@@ -22,16 +22,18 @@ module AgentHarness
|
|
|
22
22
|
# system("which my-cli > /dev/null 2>&1")
|
|
23
23
|
# end
|
|
24
24
|
# end
|
|
25
|
-
#
|
|
26
|
-
# protected
|
|
27
|
-
#
|
|
28
|
-
# def build_command(prompt, options)
|
|
29
|
-
# [self.class.binary_name, "--prompt", prompt]
|
|
30
|
-
# end
|
|
31
25
|
# end
|
|
32
26
|
class Base
|
|
33
27
|
include Adapter
|
|
34
28
|
|
|
29
|
+
DEFAULT_SMOKE_TEST_CONTRACT = {
|
|
30
|
+
prompt: "Reply with exactly OK.",
|
|
31
|
+
expected_output: "OK",
|
|
32
|
+
timeout: 30,
|
|
33
|
+
require_output: true,
|
|
34
|
+
success_message: "Smoke test passed"
|
|
35
|
+
}.freeze
|
|
36
|
+
|
|
35
37
|
# Common error patterns shared across providers that use standard
|
|
36
38
|
# HTTP-style error responses. Providers with unique patterns (e.g.
|
|
37
39
|
# Anthropic, GitHub Copilot) override error_patterns entirely.
|
|
@@ -63,6 +65,12 @@ module AgentHarness
|
|
|
63
65
|
attr_reader :config, :logger
|
|
64
66
|
attr_accessor :executor
|
|
65
67
|
|
|
68
|
+
class << self
|
|
69
|
+
def smoke_test_contract
|
|
70
|
+
nil
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
66
74
|
# Initialize the provider
|
|
67
75
|
#
|
|
68
76
|
# @param config [ProviderConfig, nil] provider configuration
|
|
@@ -109,7 +117,12 @@ module AgentHarness
|
|
|
109
117
|
|
|
110
118
|
# Execute command
|
|
111
119
|
start_time = Time.now
|
|
112
|
-
result = execute_with_timeout(
|
|
120
|
+
result = execute_with_timeout(
|
|
121
|
+
command,
|
|
122
|
+
timeout: timeout,
|
|
123
|
+
env: build_env(options),
|
|
124
|
+
**command_execution_options(options)
|
|
125
|
+
)
|
|
113
126
|
duration = Time.now - start_time
|
|
114
127
|
|
|
115
128
|
# Parse response
|
|
@@ -190,7 +203,11 @@ module AgentHarness
|
|
|
190
203
|
runtime = options[:provider_runtime]
|
|
191
204
|
return {} unless runtime
|
|
192
205
|
|
|
193
|
-
|
|
206
|
+
# Return overrides only. Ruby subprocess spawning treats nil values as
|
|
207
|
+
# explicit unsets in the child process, while omitted keys are inherited.
|
|
208
|
+
env = runtime.env.dup
|
|
209
|
+
runtime.unset_env.each { |key| env[key] = nil }
|
|
210
|
+
env
|
|
194
211
|
end
|
|
195
212
|
|
|
196
213
|
# Parse CLI output into Response - override in subclasses
|
|
@@ -279,8 +296,21 @@ module AgentHarness
|
|
|
279
296
|
options.merge(mcp_servers: normalized)
|
|
280
297
|
end
|
|
281
298
|
|
|
282
|
-
def
|
|
283
|
-
|
|
299
|
+
def command_execution_options(options)
|
|
300
|
+
execution_options = {
|
|
301
|
+
idle_timeout: options[:idle_timeout],
|
|
302
|
+
on_stdout_chunk: options[:on_stdout_chunk],
|
|
303
|
+
on_stderr_chunk: options[:on_stderr_chunk],
|
|
304
|
+
on_heartbeat: options[:on_heartbeat],
|
|
305
|
+
observer: options[:execution_observer] || options[:observer]
|
|
306
|
+
}.reject { |_, value| value.nil? }
|
|
307
|
+
|
|
308
|
+
execution_options[:heartbeat_interval] = options[:heartbeat_interval] if options.key?(:heartbeat_interval)
|
|
309
|
+
execution_options
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def execute_with_timeout(command, timeout:, env:, stdin_data: nil, **execution_options)
|
|
313
|
+
@executor.execute(command, timeout: timeout, env: env, stdin_data: stdin_data, **execution_options)
|
|
284
314
|
end
|
|
285
315
|
|
|
286
316
|
def track_tokens(response)
|
|
@@ -319,7 +349,13 @@ module AgentHarness
|
|
|
319
349
|
original_error: original_error
|
|
320
350
|
)
|
|
321
351
|
when :timeout
|
|
352
|
+
return original_error if original_error.is_a?(TimeoutError)
|
|
353
|
+
|
|
322
354
|
TimeoutError.new(original_error.message, original_error: original_error)
|
|
355
|
+
when :idle_timeout
|
|
356
|
+
return original_error if original_error.is_a?(IdleTimeoutError)
|
|
357
|
+
|
|
358
|
+
IdleTimeoutError.new(original_error.message, original_error: original_error)
|
|
323
359
|
else
|
|
324
360
|
ProviderError.new(original_error.message, original_error: original_error)
|
|
325
361
|
end
|
|
@@ -8,6 +8,9 @@ module AgentHarness
|
|
|
8
8
|
#
|
|
9
9
|
# Provides integration with the OpenAI Codex CLI tool.
|
|
10
10
|
class Codex < Base
|
|
11
|
+
SUPPORTED_CLI_VERSION = "0.116.0"
|
|
12
|
+
SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.117.0").freeze
|
|
13
|
+
|
|
11
14
|
class << self
|
|
12
15
|
def provider_name
|
|
13
16
|
:codex
|
|
@@ -22,6 +25,15 @@ module AgentHarness
|
|
|
22
25
|
!!executor.which(binary_name)
|
|
23
26
|
end
|
|
24
27
|
|
|
28
|
+
def provider_metadata_overrides
|
|
29
|
+
{
|
|
30
|
+
auth: {
|
|
31
|
+
service: :openai,
|
|
32
|
+
api_family: :openai
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
25
37
|
def firewall_requirements
|
|
26
38
|
{
|
|
27
39
|
domains: [
|
|
@@ -49,6 +61,43 @@ module AgentHarness
|
|
|
49
61
|
{name: "codex", family: "codex", tier: "standard", provider: "codex"}
|
|
50
62
|
]
|
|
51
63
|
end
|
|
64
|
+
|
|
65
|
+
def installation_contract(version: SUPPORTED_CLI_VERSION)
|
|
66
|
+
unless SUPPORTED_CLI_REQUIREMENT.satisfied_by?(Gem::Version.new(version))
|
|
67
|
+
raise ArgumentError,
|
|
68
|
+
"Unsupported Codex CLI version #{version.inspect}; " \
|
|
69
|
+
"supported versions must satisfy #{SUPPORTED_CLI_REQUIREMENT}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
default_package = "@openai/codex@#{version}".freeze
|
|
73
|
+
install_command_prefix = ["npm", "install", "-g", "--ignore-scripts"].freeze
|
|
74
|
+
install_command = (install_command_prefix + [default_package]).freeze
|
|
75
|
+
supported_versions = [version].freeze
|
|
76
|
+
version_requirement = SUPPORTED_CLI_REQUIREMENT.requirements
|
|
77
|
+
.map { |op, ver| "#{op} #{ver}".freeze }
|
|
78
|
+
.freeze
|
|
79
|
+
|
|
80
|
+
contract = {
|
|
81
|
+
source: :npm,
|
|
82
|
+
package: default_package,
|
|
83
|
+
package_name: "@openai/codex",
|
|
84
|
+
version: version,
|
|
85
|
+
version_requirement: version_requirement,
|
|
86
|
+
binary_name: binary_name,
|
|
87
|
+
install_command_prefix: install_command_prefix,
|
|
88
|
+
install_command: install_command,
|
|
89
|
+
supported_versions: supported_versions
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
contract.each_value do |value|
|
|
93
|
+
value.freeze if value.is_a?(String)
|
|
94
|
+
end
|
|
95
|
+
contract.freeze
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def smoke_test_contract
|
|
99
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
100
|
+
end
|
|
52
101
|
end
|
|
53
102
|
|
|
54
103
|
def name
|
|
@@ -194,12 +243,16 @@ module AgentHarness
|
|
|
194
243
|
|
|
195
244
|
def build_command(prompt, options)
|
|
196
245
|
cmd = [self.class.binary_name, "exec"]
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
#
|
|
200
|
-
#
|
|
201
|
-
#
|
|
202
|
-
|
|
246
|
+
externally_sandboxed = externally_sandboxed?(options)
|
|
247
|
+
|
|
248
|
+
# When externally_sandboxed is set, use --dangerously-bypass-approvals-and-sandbox
|
|
249
|
+
# instead of --full-auto. In the Codex CLI, full_auto is checked first and
|
|
250
|
+
# selects workspace-write sandbox mode, which overrides the bypass flag.
|
|
251
|
+
# Passing both would leave the run in the wrong sandbox mode.
|
|
252
|
+
#
|
|
253
|
+
# When NOT externally sandboxed: use --full-auto for Docker containers
|
|
254
|
+
# (to skip nested sandboxing) or when dangerous_mode is explicitly requested.
|
|
255
|
+
if !externally_sandboxed && (sandboxed_environment? || options[:dangerous_mode])
|
|
203
256
|
cmd += dangerous_mode_flags
|
|
204
257
|
end
|
|
205
258
|
|
|
@@ -208,10 +261,13 @@ module AgentHarness
|
|
|
208
261
|
unless flags.is_a?(Array)
|
|
209
262
|
raise ArgumentError, "Codex configuration error: default_flags must be an array of strings"
|
|
210
263
|
end
|
|
264
|
+
# Strip --full-auto from defaults when externally sandboxed to avoid
|
|
265
|
+
# conflicting with --dangerously-bypass-approvals-and-sandbox.
|
|
266
|
+
flags -= dangerous_mode_flags if externally_sandboxed
|
|
211
267
|
cmd += flags if flags.any?
|
|
212
268
|
end
|
|
213
269
|
|
|
214
|
-
if externally_sandboxed
|
|
270
|
+
if externally_sandboxed
|
|
215
271
|
cmd += sandbox_bypass_flags
|
|
216
272
|
end
|
|
217
273
|
|
|
@@ -222,7 +278,10 @@ module AgentHarness
|
|
|
222
278
|
runtime = options[:provider_runtime]
|
|
223
279
|
if runtime
|
|
224
280
|
cmd += ["--model", runtime.model] if runtime.model
|
|
225
|
-
|
|
281
|
+
runtime_flags = runtime.flags
|
|
282
|
+
# Strip --full-auto from runtime flags when externally sandboxed.
|
|
283
|
+
runtime_flags -= dangerous_mode_flags if externally_sandboxed
|
|
284
|
+
cmd += runtime_flags unless runtime_flags.empty?
|
|
226
285
|
end
|
|
227
286
|
|
|
228
287
|
cmd << prompt
|
|
@@ -260,7 +319,7 @@ module AgentHarness
|
|
|
260
319
|
end
|
|
261
320
|
|
|
262
321
|
def sandbox_bypass_flags
|
|
263
|
-
["--sandbox"
|
|
322
|
+
["--dangerously-bypass-approvals-and-sandbox"]
|
|
264
323
|
end
|
|
265
324
|
|
|
266
325
|
def read_codex_credentials
|
|
@@ -12,6 +12,12 @@ module AgentHarness
|
|
|
12
12
|
# provider = AgentHarness::Providers::Cursor.new
|
|
13
13
|
# response = provider.send_message(prompt: "Hello!")
|
|
14
14
|
class Cursor < Base
|
|
15
|
+
INSTALL_SCRIPT_URL = "https://cursor.com/install"
|
|
16
|
+
INSTALL_TARGET_LATEST = "latest"
|
|
17
|
+
INSTALL_BUILD = "2026.03.30-a5d3e17"
|
|
18
|
+
INSTALL_SCRIPT_SHA256 = "8371988b483abec13c07c10e95cccc839da81ebf9596e430d3c90835a227cbad"
|
|
19
|
+
INSTALL_LINUX_X64_PACKAGE_SHA256 = "e0d4b611db111d2dbe76474386271bff3e1dbb2cc6ddf527f9d5d5801b2ce2a0"
|
|
20
|
+
|
|
15
21
|
class << self
|
|
16
22
|
def provider_name
|
|
17
23
|
:cursor
|
|
@@ -26,6 +32,15 @@ module AgentHarness
|
|
|
26
32
|
!!executor.which(binary_name)
|
|
27
33
|
end
|
|
28
34
|
|
|
35
|
+
def provider_metadata_overrides
|
|
36
|
+
{
|
|
37
|
+
auth: {
|
|
38
|
+
service: :cursor,
|
|
39
|
+
api_family: :cursor
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
29
44
|
def firewall_requirements
|
|
30
45
|
{
|
|
31
46
|
domains: [
|
|
@@ -83,6 +98,67 @@ module AgentHarness
|
|
|
83
98
|
def supports_model_family?(family_name)
|
|
84
99
|
family_name.match?(/^(claude|gpt|cursor)-/)
|
|
85
100
|
end
|
|
101
|
+
|
|
102
|
+
def install_metadata(version: nil)
|
|
103
|
+
install_target = normalize_install_target(version)
|
|
104
|
+
linux_x64_package_url = package_url_for(os: "linux", arch: "x64")
|
|
105
|
+
|
|
106
|
+
{
|
|
107
|
+
source: {
|
|
108
|
+
type: :shell_script,
|
|
109
|
+
url: INSTALL_SCRIPT_URL,
|
|
110
|
+
resolved_version: INSTALL_BUILD,
|
|
111
|
+
default_artifact_url: linux_x64_package_url
|
|
112
|
+
},
|
|
113
|
+
checksum: {
|
|
114
|
+
strategy: :sha256,
|
|
115
|
+
targets: {
|
|
116
|
+
script: {
|
|
117
|
+
url: INSTALL_SCRIPT_URL,
|
|
118
|
+
value: INSTALL_SCRIPT_SHA256
|
|
119
|
+
},
|
|
120
|
+
artifacts: {
|
|
121
|
+
"linux/x64" => {
|
|
122
|
+
url: linux_x64_package_url,
|
|
123
|
+
value: INSTALL_LINUX_X64_PACKAGE_SHA256
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
binary: {
|
|
129
|
+
name: binary_name,
|
|
130
|
+
path: "$HOME/.local/bin/#{binary_name}",
|
|
131
|
+
suggested_global_path: "/usr/local/bin/#{binary_name}"
|
|
132
|
+
},
|
|
133
|
+
version: {
|
|
134
|
+
default: INSTALL_TARGET_LATEST,
|
|
135
|
+
supported: install_target,
|
|
136
|
+
command: [binary_name, "--version"]
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def smoke_test_contract
|
|
142
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
private
|
|
146
|
+
|
|
147
|
+
def package_url_for(os:, arch:)
|
|
148
|
+
format(
|
|
149
|
+
"https://downloads.cursor.com/lab/%<build>s/%<os>s/%<arch>s/agent-cli-package.tar.gz",
|
|
150
|
+
build: INSTALL_BUILD,
|
|
151
|
+
os: os,
|
|
152
|
+
arch: arch
|
|
153
|
+
)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def normalize_install_target(version)
|
|
157
|
+
target = version.nil? ? INSTALL_TARGET_LATEST : version.to_s
|
|
158
|
+
return target if target == INSTALL_TARGET_LATEST
|
|
159
|
+
|
|
160
|
+
raise ArgumentError, "Unsupported Cursor install target: #{version.inspect}"
|
|
161
|
+
end
|
|
86
162
|
end
|
|
87
163
|
|
|
88
164
|
def name
|
|
@@ -189,7 +265,13 @@ module AgentHarness
|
|
|
189
265
|
# Execute command with prompt on stdin
|
|
190
266
|
env = build_env(options)
|
|
191
267
|
start_time = Time.now
|
|
192
|
-
result =
|
|
268
|
+
result = execute_with_timeout(
|
|
269
|
+
command,
|
|
270
|
+
timeout: timeout,
|
|
271
|
+
env: env,
|
|
272
|
+
stdin_data: prompt,
|
|
273
|
+
**command_execution_options(options)
|
|
274
|
+
)
|
|
193
275
|
duration = Time.now - start_time
|
|
194
276
|
|
|
195
277
|
# Parse response
|
|
@@ -243,7 +325,7 @@ module AgentHarness
|
|
|
243
325
|
return nil unless self.class.available?
|
|
244
326
|
|
|
245
327
|
begin
|
|
246
|
-
result = @executor.execute([
|
|
328
|
+
result = @executor.execute([self.class.binary_name, "mcp", "list"], timeout: 5)
|
|
247
329
|
return nil unless result.success?
|
|
248
330
|
|
|
249
331
|
parse_mcp_servers_output(result.stdout)
|
|
@@ -327,7 +409,13 @@ module AgentHarness
|
|
|
327
409
|
when :auth_expired
|
|
328
410
|
raise AuthenticationError.new(error.message, provider: self.class.provider_name, original_error: error)
|
|
329
411
|
when :timeout
|
|
412
|
+
raise error if error.is_a?(TimeoutError)
|
|
413
|
+
|
|
330
414
|
raise TimeoutError.new(error.message, original_error: error)
|
|
415
|
+
when :idle_timeout
|
|
416
|
+
raise error if error.is_a?(IdleTimeoutError)
|
|
417
|
+
|
|
418
|
+
raise IdleTimeoutError.new(error.message, original_error: error)
|
|
331
419
|
else
|
|
332
420
|
raise ProviderError.new(error.message, original_error: error)
|
|
333
421
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "rubygems/requirement"
|
|
4
5
|
require "time"
|
|
5
6
|
|
|
6
7
|
module AgentHarness
|
|
@@ -11,6 +12,9 @@ module AgentHarness
|
|
|
11
12
|
class Gemini < Base
|
|
12
13
|
# Model name pattern for Gemini models
|
|
13
14
|
MODEL_PATTERN = /^gemini-[\d.]+-(?:pro|flash|ultra)(?:-\d+)?$/i
|
|
15
|
+
CLI_PACKAGE = "@google/gemini-cli"
|
|
16
|
+
SUPPORTED_CLI_VERSION = "0.35.3"
|
|
17
|
+
SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new("= #{SUPPORTED_CLI_VERSION}").freeze
|
|
14
18
|
|
|
15
19
|
class << self
|
|
16
20
|
def provider_name
|
|
@@ -26,6 +30,41 @@ module AgentHarness
|
|
|
26
30
|
!!executor.which(binary_name)
|
|
27
31
|
end
|
|
28
32
|
|
|
33
|
+
def provider_metadata_overrides
|
|
34
|
+
{
|
|
35
|
+
auth: {
|
|
36
|
+
service: :google,
|
|
37
|
+
api_family: :gemini
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def install_contract(version: SUPPORTED_CLI_VERSION)
|
|
43
|
+
parsed_version = begin
|
|
44
|
+
Gem::Version.new(version)
|
|
45
|
+
rescue ArgumentError
|
|
46
|
+
raise ArgumentError, "Unsupported Gemini CLI version #{version.inspect}. Supported requirement: #{SUPPORTED_CLI_REQUIREMENT}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
unless SUPPORTED_CLI_REQUIREMENT.satisfied_by?(parsed_version)
|
|
50
|
+
raise ArgumentError, "Unsupported Gemini CLI version #{version.inspect}. Supported requirement: #{SUPPORTED_CLI_REQUIREMENT}"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
package_spec = "#{CLI_PACKAGE}@#{version}"
|
|
54
|
+
|
|
55
|
+
{
|
|
56
|
+
provider: provider_name,
|
|
57
|
+
source_type: :npm,
|
|
58
|
+
package_name: CLI_PACKAGE,
|
|
59
|
+
supported_version_requirement: SUPPORTED_CLI_REQUIREMENT,
|
|
60
|
+
default_version: SUPPORTED_CLI_VERSION,
|
|
61
|
+
resolved_version: version,
|
|
62
|
+
binary_name: binary_name,
|
|
63
|
+
install_command: ["npm", "install", "-g", "--ignore-scripts", package_spec],
|
|
64
|
+
install_command_string: "npm install -g --ignore-scripts #{package_spec}"
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
29
68
|
def firewall_requirements
|
|
30
69
|
{
|
|
31
70
|
domains: [
|
|
@@ -73,6 +112,10 @@ module AgentHarness
|
|
|
73
112
|
def supports_model_family?(family_name)
|
|
74
113
|
MODEL_PATTERN.match?(family_name) || family_name.start_with?("gemini-")
|
|
75
114
|
end
|
|
115
|
+
|
|
116
|
+
def smoke_test_contract
|
|
117
|
+
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
118
|
+
end
|
|
76
119
|
end
|
|
77
120
|
|
|
78
121
|
def name
|