agent-harness 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.markdownlint.yml +6 -0
  3. data/.markdownlintignore +8 -0
  4. data/.release-please-manifest.json +3 -0
  5. data/.rspec +3 -0
  6. data/.simplecov +26 -0
  7. data/.tool-versions +1 -0
  8. data/CHANGELOG.md +27 -0
  9. data/CODE_OF_CONDUCT.md +10 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +274 -0
  12. data/Rakefile +103 -0
  13. data/bin/console +11 -0
  14. data/bin/setup +8 -0
  15. data/lib/agent_harness/command_executor.rb +146 -0
  16. data/lib/agent_harness/configuration.rb +299 -0
  17. data/lib/agent_harness/error_taxonomy.rb +128 -0
  18. data/lib/agent_harness/errors.rb +63 -0
  19. data/lib/agent_harness/orchestration/circuit_breaker.rb +169 -0
  20. data/lib/agent_harness/orchestration/conductor.rb +179 -0
  21. data/lib/agent_harness/orchestration/health_monitor.rb +170 -0
  22. data/lib/agent_harness/orchestration/metrics.rb +167 -0
  23. data/lib/agent_harness/orchestration/provider_manager.rb +240 -0
  24. data/lib/agent_harness/orchestration/rate_limiter.rb +113 -0
  25. data/lib/agent_harness/providers/adapter.rb +163 -0
  26. data/lib/agent_harness/providers/aider.rb +109 -0
  27. data/lib/agent_harness/providers/anthropic.rb +345 -0
  28. data/lib/agent_harness/providers/base.rb +198 -0
  29. data/lib/agent_harness/providers/codex.rb +100 -0
  30. data/lib/agent_harness/providers/cursor.rb +281 -0
  31. data/lib/agent_harness/providers/gemini.rb +136 -0
  32. data/lib/agent_harness/providers/github_copilot.rb +155 -0
  33. data/lib/agent_harness/providers/kilocode.rb +73 -0
  34. data/lib/agent_harness/providers/opencode.rb +75 -0
  35. data/lib/agent_harness/providers/registry.rb +137 -0
  36. data/lib/agent_harness/response.rb +100 -0
  37. data/lib/agent_harness/token_tracker.rb +170 -0
  38. data/lib/agent_harness/version.rb +5 -0
  39. data/lib/agent_harness.rb +115 -0
  40. data/release-please-config.json +63 -0
  41. metadata +129 -0
@@ -0,0 +1,345 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module AgentHarness
6
+ module Providers
7
+ # Anthropic Claude Code CLI provider
8
+ #
9
+ # Provides integration with the Claude Code CLI tool for AI-powered
10
+ # coding assistance.
11
+ #
12
+ # @example Basic usage
13
+ # provider = AgentHarness::Providers::Anthropic.new
14
+ # response = provider.send_message(prompt: "Hello!")
15
+ class Anthropic < Base
16
+ # Model name pattern for Anthropic Claude models
17
+ MODEL_PATTERN = /^claude-[\d.-]+-(?:opus|sonnet|haiku)(?:-\d{8})?$/i
18
+
19
+ class << self
20
+ def provider_name
21
+ :claude
22
+ end
23
+
24
+ def binary_name
25
+ "claude"
26
+ end
27
+
28
+ def available?
29
+ executor = AgentHarness.configuration.command_executor
30
+ !!executor.which(binary_name)
31
+ end
32
+
33
+ def firewall_requirements
34
+ {
35
+ domains: [
36
+ "api.anthropic.com",
37
+ "claude.ai",
38
+ "console.anthropic.com"
39
+ ],
40
+ ip_ranges: []
41
+ }
42
+ end
43
+
44
+ def instruction_file_paths
45
+ [
46
+ {
47
+ path: "CLAUDE.md",
48
+ description: "Claude Code CLI agent instructions",
49
+ symlink: true
50
+ }
51
+ ]
52
+ end
53
+
54
+ def discover_models
55
+ return [] unless available?
56
+
57
+ begin
58
+ require "open3"
59
+ output, _, status = Open3.capture3("claude", "models", "list", {timeout: 10})
60
+ return [] unless status.success?
61
+
62
+ parse_models_list(output)
63
+ rescue => e
64
+ AgentHarness.logger&.debug("[AgentHarness::Anthropic] Model discovery failed: #{e.message}")
65
+ []
66
+ end
67
+ end
68
+
69
+ # Normalize a provider-specific model name to its model family
70
+ def model_family(provider_model_name)
71
+ provider_model_name.sub(/-\d{8}$/, "")
72
+ end
73
+
74
+ # Convert a model family name to the provider's preferred model name
75
+ def provider_model_name(family_name)
76
+ family_name
77
+ end
78
+
79
+ # Check if this provider supports a given model family
80
+ def supports_model_family?(family_name)
81
+ MODEL_PATTERN.match?(family_name)
82
+ end
83
+
84
+ private
85
+
86
+ def parse_models_list(output)
87
+ return [] if output.nil? || output.empty?
88
+
89
+ models = []
90
+ lines = output.lines.map(&:strip)
91
+
92
+ # Skip header and separator lines
93
+ lines.reject! { |line| line.empty? || line.match?(/^[-=]+$/) || line.match?(/^(Model|Name)/i) }
94
+
95
+ lines.each do |line|
96
+ model_info = parse_model_line(line)
97
+ models << model_info if model_info
98
+ end
99
+
100
+ models
101
+ end
102
+
103
+ def parse_model_line(line)
104
+ # Format 1: Simple list of model names
105
+ if line.match?(/^claude-\d/)
106
+ model_name = line.split.first
107
+ return build_model_info(model_name)
108
+ end
109
+
110
+ # Format 2: Table format with columns
111
+ parts = line.split(/\s{2,}/)
112
+ if parts.size >= 1 && parts[0].match?(/^claude/)
113
+ model_name = parts[0]
114
+ model_name = "#{model_name}-#{parts[1]}" if parts.size > 1 && parts[1].match?(/^\d{8}$/)
115
+ return build_model_info(model_name)
116
+ end
117
+
118
+ nil
119
+ end
120
+
121
+ def build_model_info(model_name)
122
+ family = model_family(model_name)
123
+ tier = classify_tier(model_name)
124
+
125
+ {
126
+ name: model_name,
127
+ family: family,
128
+ tier: tier,
129
+ capabilities: extract_capabilities(model_name),
130
+ context_window: infer_context_window(family),
131
+ provider: "anthropic"
132
+ }
133
+ end
134
+
135
+ def classify_tier(model_name)
136
+ name_lower = model_name.downcase
137
+ return "advanced" if name_lower.include?("opus")
138
+ return "mini" if name_lower.include?("haiku")
139
+ return "standard" if name_lower.include?("sonnet")
140
+ "standard"
141
+ end
142
+
143
+ def extract_capabilities(model_name)
144
+ capabilities = ["chat", "code"]
145
+ name_lower = model_name.downcase
146
+ capabilities << "vision" unless name_lower.include?("haiku")
147
+ capabilities
148
+ end
149
+
150
+ def infer_context_window(family)
151
+ family.match?(/claude-3/) ? 200_000 : nil
152
+ end
153
+ end
154
+
155
+ def name
156
+ "anthropic"
157
+ end
158
+
159
+ def display_name
160
+ "Anthropic Claude CLI"
161
+ end
162
+
163
+ def capabilities
164
+ {
165
+ streaming: true,
166
+ file_upload: true,
167
+ vision: true,
168
+ tool_use: true,
169
+ json_mode: true,
170
+ mcp: true,
171
+ dangerous_mode: true
172
+ }
173
+ end
174
+
175
+ def supports_mcp?
176
+ true
177
+ end
178
+
179
+ def supports_dangerous_mode?
180
+ true
181
+ end
182
+
183
+ def dangerous_mode_flags
184
+ ["--dangerously-skip-permissions"]
185
+ end
186
+
187
+ def error_patterns
188
+ {
189
+ rate_limited: [
190
+ /rate.?limit/i,
191
+ /too.?many.?requests/i,
192
+ /429/,
193
+ /overloaded/i,
194
+ /session.?limit/i
195
+ ],
196
+ auth_expired: [
197
+ /oauth.*token.*expired/i,
198
+ /authentication.*error/i,
199
+ /invalid.*api.*key/i,
200
+ /unauthorized/i,
201
+ /401/
202
+ ],
203
+ quota_exceeded: [
204
+ /quota.*exceeded/i,
205
+ /usage.*limit/i,
206
+ /credit.*exhausted/i
207
+ ],
208
+ transient: [
209
+ /timeout/i,
210
+ /connection.*reset/i,
211
+ /temporary.*error/i,
212
+ /service.*unavailable/i,
213
+ /503/,
214
+ /502/,
215
+ /504/
216
+ ],
217
+ permanent: [
218
+ /invalid.*model/i,
219
+ /unsupported.*operation/i,
220
+ /not.*found/i,
221
+ /404/,
222
+ /bad.*request/i,
223
+ /400/,
224
+ /model.*deprecated/i,
225
+ /end-of-life/i
226
+ ]
227
+ }
228
+ end
229
+
230
+ def fetch_mcp_servers
231
+ return [] unless self.class.available?
232
+
233
+ begin
234
+ result = @executor.execute(["claude", "mcp", "list"], timeout: 5)
235
+ return [] unless result.success?
236
+
237
+ parse_claude_mcp_output(result.stdout)
238
+ rescue => e
239
+ log_debug("fetch_mcp_servers_failed", error: e.message)
240
+ []
241
+ end
242
+ end
243
+
244
+ protected
245
+
246
+ def build_command(prompt, options)
247
+ cmd = [self.class.binary_name]
248
+
249
+ cmd += ["--print", "--output-format=text"]
250
+
251
+ # Add model if specified
252
+ if @config.model && !@config.model.empty?
253
+ cmd += ["--model", @config.model]
254
+ end
255
+
256
+ # Add dangerous mode if requested
257
+ if options[:dangerous_mode] && supports_dangerous_mode?
258
+ cmd += dangerous_mode_flags
259
+ end
260
+
261
+ # Add custom flags from config
262
+ cmd += @config.default_flags if @config.default_flags&.any?
263
+
264
+ cmd += ["--prompt", prompt]
265
+
266
+ cmd
267
+ end
268
+
269
+ def parse_response(result, duration:)
270
+ output = result.stdout
271
+ error = nil
272
+
273
+ if result.failed?
274
+ combined = [result.stdout, result.stderr].compact.join("\n")
275
+ error = classify_error_message(combined)
276
+ end
277
+
278
+ Response.new(
279
+ output: output,
280
+ exit_code: result.exit_code,
281
+ duration: duration,
282
+ provider: self.class.provider_name,
283
+ model: @config.model,
284
+ error: error
285
+ )
286
+ end
287
+
288
+ def default_timeout
289
+ 300
290
+ end
291
+
292
+ private
293
+
294
+ def classify_error_message(message)
295
+ msg_lower = message.downcase
296
+
297
+ if msg_lower.include?("rate limit") || msg_lower.include?("session limit")
298
+ "Rate limit exceeded"
299
+ elsif msg_lower.include?("deprecat") || msg_lower.include?("end-of-life")
300
+ "Model deprecated"
301
+ elsif msg_lower.include?("oauth token") || msg_lower.include?("authentication")
302
+ "Authentication error"
303
+ else
304
+ message
305
+ end
306
+ end
307
+
308
+ def parse_claude_mcp_output(output)
309
+ servers = []
310
+ return servers unless output
311
+
312
+ lines = output.lines
313
+ lines.reject! { |line| /checking mcp server health/i.match?(line) }
314
+
315
+ lines.each do |line|
316
+ line = line.strip
317
+ next if line.empty?
318
+
319
+ # Parse format: "name: command - ✓ Connected"
320
+ if line =~ /^([^:]+):\s*(.+?)\s*-\s*(✓|✗)\s*(.+)$/
321
+ name = Regexp.last_match(1).strip
322
+ command = Regexp.last_match(2).strip
323
+ status_symbol = Regexp.last_match(3)
324
+ status_text = Regexp.last_match(4).strip
325
+
326
+ servers << {
327
+ name: name,
328
+ status: (status_symbol == "✓") ? "connected" : "error",
329
+ description: command,
330
+ enabled: status_symbol == "✓",
331
+ error: (status_symbol == "✗") ? status_text : nil,
332
+ source: "claude_cli"
333
+ }
334
+ end
335
+ end
336
+
337
+ servers
338
+ end
339
+
340
+ def log_debug(action, **context)
341
+ @logger&.debug("[AgentHarness::Anthropic] #{action}: #{context.inspect}")
342
+ end
343
+ end
344
+ end
345
+ end
@@ -0,0 +1,198 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ module Providers
5
+ # Base class for all providers
6
+ #
7
+ # Provides common functionality for provider implementations including
8
+ # command execution, error handling, and response parsing.
9
+ #
10
+ # @example Implementing a provider
11
+ # class MyProvider < AgentHarness::Providers::Base
12
+ # class << self
13
+ # def provider_name
14
+ # :my_provider
15
+ # end
16
+ #
17
+ # def binary_name
18
+ # "my-cli"
19
+ # end
20
+ #
21
+ # def available?
22
+ # system("which my-cli > /dev/null 2>&1")
23
+ # end
24
+ # end
25
+ #
26
+ # protected
27
+ #
28
+ # def build_command(prompt, options)
29
+ # [self.class.binary_name, "--prompt", prompt]
30
+ # end
31
+ # end
32
+ class Base
33
+ include Adapter
34
+
35
+ attr_reader :config, :executor, :logger
36
+
37
+ # Initialize the provider
38
+ #
39
+ # @param config [ProviderConfig, nil] provider configuration
40
+ # @param executor [CommandExecutor, nil] command executor
41
+ # @param logger [Logger, nil] logger instance
42
+ def initialize(config: nil, executor: nil, logger: nil)
43
+ @config = config || ProviderConfig.new(self.class.provider_name)
44
+ @executor = executor || AgentHarness.configuration.command_executor
45
+ @logger = logger || AgentHarness.logger
46
+ end
47
+
48
+ # Configure the provider instance
49
+ #
50
+ # @param options [Hash] configuration options
51
+ # @return [self]
52
+ def configure(options = {})
53
+ @config.merge!(options)
54
+ self
55
+ end
56
+
57
+ # Main send_message implementation
58
+ #
59
+ # @param prompt [String] the prompt to send
60
+ # @param options [Hash] additional options
61
+ # @return [Response] the response
62
+ def send_message(prompt:, **options)
63
+ log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
64
+
65
+ # Build command
66
+ command = build_command(prompt, options)
67
+
68
+ # Calculate timeout
69
+ timeout = options[:timeout] || @config.timeout || default_timeout
70
+
71
+ # Execute command
72
+ start_time = Time.now
73
+ result = execute_with_timeout(command, timeout: timeout, env: build_env(options))
74
+ duration = Time.now - start_time
75
+
76
+ # Parse response
77
+ response = parse_response(result, duration: duration)
78
+
79
+ # Track tokens
80
+ track_tokens(response) if response.tokens
81
+
82
+ log_debug("send_message_complete", duration: duration, tokens: response.tokens)
83
+
84
+ response
85
+ rescue => e
86
+ handle_error(e, prompt: prompt, options: options)
87
+ end
88
+
89
+ # Provider name for display
90
+ #
91
+ # @return [String] display name
92
+ def name
93
+ self.class.provider_name.to_s
94
+ end
95
+
96
+ # Human-friendly display name
97
+ #
98
+ # @return [String] display name
99
+ def display_name
100
+ name.capitalize
101
+ end
102
+
103
+ protected
104
+
105
+ # Build CLI command - override in subclasses
106
+ #
107
+ # @param prompt [String] the prompt
108
+ # @param options [Hash] options
109
+ # @return [Array<String>] command array
110
+ def build_command(prompt, options)
111
+ raise NotImplementedError, "#{self.class} must implement #build_command"
112
+ end
113
+
114
+ # Build environment variables - override in subclasses
115
+ #
116
+ # @param options [Hash] options
117
+ # @return [Hash] environment variables
118
+ def build_env(options)
119
+ {}
120
+ end
121
+
122
+ # Parse CLI output into Response - override in subclasses
123
+ #
124
+ # @param result [CommandExecutor::Result] execution result
125
+ # @param duration [Float] execution duration
126
+ # @return [Response] parsed response
127
+ def parse_response(result, duration:)
128
+ Response.new(
129
+ output: result.stdout,
130
+ exit_code: result.exit_code,
131
+ duration: duration,
132
+ provider: self.class.provider_name,
133
+ model: @config.model,
134
+ error: result.failed? ? result.stderr : nil
135
+ )
136
+ end
137
+
138
+ # Default timeout
139
+ #
140
+ # @return [Integer] timeout in seconds
141
+ def default_timeout
142
+ 300
143
+ end
144
+
145
+ private
146
+
147
+ def execute_with_timeout(command, timeout:, env:)
148
+ @executor.execute(command, timeout: timeout, env: env)
149
+ end
150
+
151
+ def track_tokens(response)
152
+ return unless response.tokens
153
+
154
+ AgentHarness.token_tracker.record(
155
+ provider: self.class.provider_name,
156
+ model: @config.model,
157
+ input_tokens: response.tokens[:input] || 0,
158
+ output_tokens: response.tokens[:output] || 0,
159
+ total_tokens: response.tokens[:total]
160
+ )
161
+ end
162
+
163
+ def handle_error(error, prompt:, options:)
164
+ # Classify error
165
+ classification = ErrorTaxonomy.classify(error, error_patterns)
166
+
167
+ log_error("send_message_error",
168
+ error: error.class.name,
169
+ message: error.message,
170
+ classification: classification)
171
+
172
+ # Wrap in appropriate error class
173
+ raise map_to_error_class(classification, error)
174
+ end
175
+
176
+ def map_to_error_class(classification, original_error)
177
+ case classification
178
+ when :rate_limited
179
+ RateLimitError.new(original_error.message, original_error: original_error)
180
+ when :auth_expired
181
+ AuthenticationError.new(original_error.message, original_error: original_error)
182
+ when :timeout
183
+ TimeoutError.new(original_error.message, original_error: original_error)
184
+ else
185
+ ProviderError.new(original_error.message, original_error: original_error)
186
+ end
187
+ end
188
+
189
+ def log_debug(action, **context)
190
+ @logger&.debug("[AgentHarness::#{self.class.provider_name}] #{action}: #{context.inspect}")
191
+ end
192
+
193
+ def log_error(action, **context)
194
+ @logger&.error("[AgentHarness::#{self.class.provider_name}] #{action}: #{context.inspect}")
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ module Providers
5
+ # OpenAI Codex CLI provider
6
+ #
7
+ # Provides integration with the OpenAI Codex CLI tool.
8
+ class Codex < Base
9
+ class << self
10
+ def provider_name
11
+ :codex
12
+ end
13
+
14
+ def binary_name
15
+ "codex"
16
+ end
17
+
18
+ def available?
19
+ executor = AgentHarness.configuration.command_executor
20
+ !!executor.which(binary_name)
21
+ end
22
+
23
+ def firewall_requirements
24
+ {
25
+ domains: [
26
+ "api.openai.com",
27
+ "openai.com"
28
+ ],
29
+ ip_ranges: []
30
+ }
31
+ end
32
+
33
+ def instruction_file_paths
34
+ [
35
+ {
36
+ path: "AGENTS.md",
37
+ description: "OpenAI Codex agent instructions",
38
+ symlink: false
39
+ }
40
+ ]
41
+ end
42
+
43
+ def discover_models
44
+ return [] unless available?
45
+
46
+ [
47
+ {name: "codex", family: "codex", tier: "standard", provider: "codex"}
48
+ ]
49
+ end
50
+ end
51
+
52
+ def name
53
+ "codex"
54
+ end
55
+
56
+ def display_name
57
+ "OpenAI Codex CLI"
58
+ end
59
+
60
+ def capabilities
61
+ {
62
+ streaming: false,
63
+ file_upload: false,
64
+ vision: false,
65
+ tool_use: true,
66
+ json_mode: false,
67
+ mcp: false,
68
+ dangerous_mode: false
69
+ }
70
+ end
71
+
72
+ def supports_sessions?
73
+ true
74
+ end
75
+
76
+ def session_flags(session_id)
77
+ return [] unless session_id && !session_id.empty?
78
+ ["--session", session_id]
79
+ end
80
+
81
+ protected
82
+
83
+ def build_command(prompt, options)
84
+ cmd = [self.class.binary_name]
85
+
86
+ if options[:session]
87
+ cmd += session_flags(options[:session])
88
+ end
89
+
90
+ cmd += ["--prompt", prompt]
91
+
92
+ cmd
93
+ end
94
+
95
+ def default_timeout
96
+ 300
97
+ end
98
+ end
99
+ end
100
+ end