agent-harness 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.markdownlint.yml +6 -0
  3. data/.markdownlintignore +8 -0
  4. data/.release-please-manifest.json +3 -0
  5. data/.rspec +3 -0
  6. data/.simplecov +26 -0
  7. data/.tool-versions +1 -0
  8. data/CHANGELOG.md +27 -0
  9. data/CODE_OF_CONDUCT.md +10 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +274 -0
  12. data/Rakefile +103 -0
  13. data/bin/console +11 -0
  14. data/bin/setup +8 -0
  15. data/lib/agent_harness/command_executor.rb +146 -0
  16. data/lib/agent_harness/configuration.rb +299 -0
  17. data/lib/agent_harness/error_taxonomy.rb +128 -0
  18. data/lib/agent_harness/errors.rb +63 -0
  19. data/lib/agent_harness/orchestration/circuit_breaker.rb +169 -0
  20. data/lib/agent_harness/orchestration/conductor.rb +179 -0
  21. data/lib/agent_harness/orchestration/health_monitor.rb +170 -0
  22. data/lib/agent_harness/orchestration/metrics.rb +167 -0
  23. data/lib/agent_harness/orchestration/provider_manager.rb +240 -0
  24. data/lib/agent_harness/orchestration/rate_limiter.rb +113 -0
  25. data/lib/agent_harness/providers/adapter.rb +163 -0
  26. data/lib/agent_harness/providers/aider.rb +109 -0
  27. data/lib/agent_harness/providers/anthropic.rb +345 -0
  28. data/lib/agent_harness/providers/base.rb +198 -0
  29. data/lib/agent_harness/providers/codex.rb +100 -0
  30. data/lib/agent_harness/providers/cursor.rb +281 -0
  31. data/lib/agent_harness/providers/gemini.rb +136 -0
  32. data/lib/agent_harness/providers/github_copilot.rb +155 -0
  33. data/lib/agent_harness/providers/kilocode.rb +73 -0
  34. data/lib/agent_harness/providers/opencode.rb +75 -0
  35. data/lib/agent_harness/providers/registry.rb +137 -0
  36. data/lib/agent_harness/response.rb +100 -0
  37. data/lib/agent_harness/token_tracker.rb +170 -0
  38. data/lib/agent_harness/version.rb +5 -0
  39. data/lib/agent_harness.rb +115 -0
  40. data/release-please-config.json +63 -0
  41. metadata +129 -0
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ module Orchestration
5
+ # Main orchestration entry point
6
+ #
7
+ # Provides a simple interface for sending messages while managing
8
+ # provider selection, fallback, retries, and error handling internally.
9
+ #
10
+ # @example Basic usage
11
+ # conductor = AgentHarness::Orchestration::Conductor.new
12
+ # response = conductor.send_message("Hello, world!")
13
+ #
14
+ # @example With explicit provider
15
+ # response = conductor.send_message("Hello", provider: :gemini)
16
+ class Conductor
17
+ attr_reader :provider_manager, :metrics
18
+
19
+ # Create a new conductor
20
+ #
21
+ # @param config [Configuration, nil] configuration object
22
+ def initialize(config: nil)
23
+ @config = config || AgentHarness.configuration
24
+ @provider_manager = ProviderManager.new(@config)
25
+ @metrics = Metrics.new
26
+ end
27
+
28
+ # Send a message with full orchestration
29
+ #
30
+ # Handles provider selection, fallback, retries, circuit breakers,
31
+ # and error handling transparently.
32
+ #
33
+ # @param prompt [String] the prompt to send
34
+ # @param provider [Symbol, nil] preferred provider
35
+ # @param model [String, nil] model to use
36
+ # @param options [Hash] additional options
37
+ # @return [Response] the response
38
+ # @raise [NoProvidersAvailableError] if all providers fail
39
+ def send_message(prompt, provider: nil, model: nil, **options)
40
+ provider_name = provider || @config.default_provider
41
+
42
+ with_orchestration(provider_name, model, options) do |selected_provider|
43
+ selected_provider.send_message(prompt: prompt, model: model, **options)
44
+ end
45
+ end
46
+
47
+ # Execute with explicit provider (bypass orchestration)
48
+ #
49
+ # @param prompt [String] the prompt to send
50
+ # @param provider [Symbol] the provider to use
51
+ # @param options [Hash] additional options
52
+ # @return [Response] the response
53
+ def execute_direct(prompt, provider:, **options)
54
+ provider_instance = @provider_manager.get_provider(provider)
55
+ provider_instance.send_message(prompt: prompt, **options)
56
+ end
57
+
58
+ # Get current orchestration status
59
+ #
60
+ # @return [Hash] status information
61
+ def status
62
+ {
63
+ current_provider: @provider_manager.current_provider,
64
+ available_providers: @provider_manager.available_providers,
65
+ health: @provider_manager.health_status,
66
+ metrics: @metrics.summary
67
+ }
68
+ end
69
+
70
+ # Reset all orchestration state
71
+ #
72
+ # @return [void]
73
+ def reset!
74
+ @provider_manager.reset!
75
+ @metrics.reset!
76
+ end
77
+
78
+ private
79
+
80
+ def with_orchestration(provider_name, model, options)
81
+ retries = 0
82
+ retry_config = @config.orchestration_config.retry_config
83
+ max_retries = retry_config.max_attempts
84
+ attempted_providers = []
85
+
86
+ begin
87
+ # Select provider (may return different provider based on health)
88
+ provider = @provider_manager.select_provider(provider_name)
89
+ provider_name = provider.class.provider_name
90
+ attempted_providers << provider_name
91
+
92
+ # Record attempt
93
+ @metrics.record_attempt(provider_name)
94
+
95
+ start_time = Time.now
96
+ response = yield(provider)
97
+ duration = Time.now - start_time
98
+
99
+ # Record success
100
+ @metrics.record_success(provider_name, duration)
101
+ @provider_manager.record_success(provider_name)
102
+
103
+ response
104
+ rescue RateLimitError => e
105
+ @provider_manager.mark_rate_limited(provider_name, reset_at: e.reset_time)
106
+ handle_provider_failure(e, provider_name, :switch)
107
+ retry if should_retry?(retries += 1, max_retries)
108
+ raise
109
+ rescue CircuitOpenError => e
110
+ handle_provider_failure(e, provider_name, :switch)
111
+ retry if should_retry?(retries += 1, max_retries)
112
+ raise
113
+ rescue TimeoutError, ProviderError => e
114
+ @provider_manager.record_failure(provider_name)
115
+ handle_provider_failure(e, provider_name, :retry)
116
+ retry if should_retry?(retries += 1, max_retries)
117
+ raise
118
+ rescue NoProvidersAvailableError
119
+ # Re-raise as-is, don't wrap
120
+ raise
121
+ rescue => e
122
+ @metrics.record_failure(provider_name, e)
123
+ @provider_manager.record_failure(provider_name)
124
+
125
+ # Try switching for unknown errors
126
+ handle_provider_failure(e, provider_name, :switch)
127
+ retry if should_retry?(retries += 1, max_retries)
128
+ raise ProviderError.new(e.message, original_error: e)
129
+ end
130
+ end
131
+
132
+ def should_retry?(current_retries, max_retries)
133
+ return false unless @config.orchestration_config.retry_config.enabled
134
+ current_retries < max_retries
135
+ end
136
+
137
+ def handle_provider_failure(error, provider_name, strategy)
138
+ @metrics.record_failure(provider_name, error)
139
+
140
+ case strategy
141
+ when :switch
142
+ if @config.orchestration_config.auto_switch_on_error
143
+ new_provider = begin
144
+ @provider_manager.switch_provider(
145
+ reason: error.class.name,
146
+ context: {error: error.message}
147
+ )
148
+ rescue NoProvidersAvailableError
149
+ nil
150
+ end
151
+
152
+ if new_provider
153
+ @metrics.record_switch(provider_name, new_provider.class.provider_name, error.class.name)
154
+ end
155
+ end
156
+ when :retry
157
+ delay = calculate_retry_delay
158
+ sleep(delay) if delay > 0
159
+ end
160
+ end
161
+
162
+ def calculate_retry_delay
163
+ retry_config = @config.orchestration_config.retry_config
164
+ return 0 unless retry_config.enabled
165
+
166
+ base = retry_config.base_delay
167
+ max = retry_config.max_delay
168
+
169
+ # Add jitter if configured
170
+ if retry_config.jitter
171
+ jitter = rand * base * 0.5
172
+ base += jitter
173
+ end
174
+
175
+ [base, max].min
176
+ end
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ module Orchestration
5
+ # Monitors provider health based on success/failure metrics
6
+ #
7
+ # Tracks success and failure rates to determine provider health status.
8
+ # Uses a sliding window approach to focus on recent performance.
9
+ #
10
+ # @example
11
+ # monitor = HealthMonitor.new
12
+ # monitor.record_success(:claude)
13
+ # monitor.healthy?(:claude) # => true
14
+ class HealthMonitor
15
+ DEFAULT_WINDOW_SIZE = 100
16
+ DEFAULT_HEALTH_THRESHOLD = 0.5
17
+
18
+ # Create a new health monitor
19
+ #
20
+ # @param config [HealthCheckConfig, nil] configuration object
21
+ # @param window_size [Integer] number of events to track
22
+ # @param health_threshold [Float] minimum success rate for healthy
23
+ def initialize(config = nil, window_size: nil, health_threshold: nil)
24
+ if config
25
+ @enabled = config.enabled
26
+ @failure_threshold = config.failure_threshold
27
+ else
28
+ @enabled = true
29
+ @failure_threshold = 3
30
+ end
31
+
32
+ @window_size = window_size || DEFAULT_WINDOW_SIZE
33
+ @health_threshold = health_threshold || DEFAULT_HEALTH_THRESHOLD
34
+ @provider_metrics = Hash.new { |h, k| h[k] = ProviderHealthMetrics.new(@window_size) }
35
+ @mutex = Mutex.new
36
+ end
37
+
38
+ # Record a successful call for a provider
39
+ #
40
+ # @param provider_name [Symbol, String] the provider name
41
+ # @return [void]
42
+ def record_success(provider_name)
43
+ @mutex.synchronize do
44
+ @provider_metrics[provider_name.to_sym].record_success
45
+ end
46
+ end
47
+
48
+ # Record a failed call for a provider
49
+ #
50
+ # @param provider_name [Symbol, String] the provider name
51
+ # @return [void]
52
+ def record_failure(provider_name)
53
+ @mutex.synchronize do
54
+ @provider_metrics[provider_name.to_sym].record_failure
55
+ end
56
+ end
57
+
58
+ # Check if a provider is healthy
59
+ #
60
+ # @param provider_name [Symbol, String] the provider name
61
+ # @return [Boolean] true if healthy
62
+ def healthy?(provider_name)
63
+ return true unless @enabled
64
+
65
+ metrics = @provider_metrics[provider_name.to_sym]
66
+ return true if metrics.total_calls == 0
67
+
68
+ metrics.success_rate >= @health_threshold
69
+ end
70
+
71
+ # Get health metrics for a provider
72
+ #
73
+ # @param provider_name [Symbol, String] the provider name
74
+ # @return [Hash] health metrics
75
+ def metrics_for(provider_name)
76
+ metrics = @provider_metrics[provider_name.to_sym]
77
+ {
78
+ success_rate: metrics.success_rate,
79
+ total_calls: metrics.total_calls,
80
+ recent_successes: metrics.recent_successes,
81
+ recent_failures: metrics.recent_failures,
82
+ healthy: healthy?(provider_name)
83
+ }
84
+ end
85
+
86
+ # Get health status for all tracked providers
87
+ #
88
+ # @return [Hash<Symbol, Hash>] health status by provider
89
+ def all_metrics
90
+ @provider_metrics.transform_values do |metrics|
91
+ {
92
+ success_rate: metrics.success_rate,
93
+ total_calls: metrics.total_calls,
94
+ recent_successes: metrics.recent_successes,
95
+ recent_failures: metrics.recent_failures
96
+ }
97
+ end
98
+ end
99
+
100
+ # Reset all health metrics
101
+ #
102
+ # @return [void]
103
+ def reset!
104
+ @mutex.synchronize do
105
+ @provider_metrics.clear
106
+ end
107
+ end
108
+
109
+ # Reset metrics for a specific provider
110
+ #
111
+ # @param provider_name [Symbol, String] the provider name
112
+ # @return [void]
113
+ def reset_provider!(provider_name)
114
+ @mutex.synchronize do
115
+ @provider_metrics.delete(provider_name.to_sym)
116
+ end
117
+ end
118
+ end
119
+
120
+ # Internal class for tracking per-provider metrics
121
+ class ProviderHealthMetrics
122
+ attr_reader :total_calls, :recent_successes, :recent_failures
123
+
124
+ def initialize(window_size)
125
+ @window_size = window_size
126
+ @events = []
127
+ @total_calls = 0
128
+ @recent_successes = 0
129
+ @recent_failures = 0
130
+ end
131
+
132
+ def record_success
133
+ add_event(:success)
134
+ end
135
+
136
+ def record_failure
137
+ add_event(:failure)
138
+ end
139
+
140
+ def success_rate
141
+ return 1.0 if @events.empty?
142
+ @recent_successes.to_f / @events.size
143
+ end
144
+
145
+ private
146
+
147
+ def add_event(type)
148
+ @total_calls += 1
149
+
150
+ # Remove oldest event if at capacity
151
+ if @events.size >= @window_size
152
+ old_event = @events.shift
153
+ if old_event == :success
154
+ @recent_successes -= 1
155
+ else
156
+ @recent_failures -= 1
157
+ end
158
+ end
159
+
160
+ # Add new event
161
+ @events << type
162
+ if type == :success
163
+ @recent_successes += 1
164
+ else
165
+ @recent_failures += 1
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module AgentHarness
4
+ module Orchestration
5
+ # Collects and aggregates orchestration metrics
6
+ #
7
+ # Tracks attempts, successes, failures, and timing information
8
+ # for provider orchestration.
9
+ class Metrics
10
+ def initialize
11
+ @mutex = Mutex.new
12
+ reset!
13
+ end
14
+
15
+ # Record an attempt for a provider
16
+ #
17
+ # @param provider_name [Symbol, String] the provider name
18
+ # @return [void]
19
+ def record_attempt(provider_name)
20
+ @mutex.synchronize do
21
+ provider = provider_name.to_sym
22
+ @attempts[provider] += 1
23
+ @total_attempts += 1
24
+ end
25
+ end
26
+
27
+ # Record a success for a provider
28
+ #
29
+ # @param provider_name [Symbol, String] the provider name
30
+ # @param duration [Float] request duration in seconds
31
+ # @return [void]
32
+ def record_success(provider_name, duration)
33
+ @mutex.synchronize do
34
+ provider = provider_name.to_sym
35
+ @successes[provider] += 1
36
+ @total_successes += 1
37
+ @durations[provider] << duration
38
+ @last_success_time = Time.now
39
+ end
40
+ end
41
+
42
+ # Record a failure for a provider
43
+ #
44
+ # @param provider_name [Symbol, String] the provider name
45
+ # @param error [Exception] the error that occurred
46
+ # @return [void]
47
+ def record_failure(provider_name, error)
48
+ @mutex.synchronize do
49
+ provider = provider_name.to_sym
50
+ @failures[provider] += 1
51
+ @total_failures += 1
52
+ @error_counts[error.class.name] += 1
53
+ @last_failure_time = Time.now
54
+ end
55
+ end
56
+
57
+ # Record a provider switch
58
+ #
59
+ # @param from_provider [Symbol, String] the original provider
60
+ # @param to_provider [Symbol, String] the new provider
61
+ # @param reason [String] reason for switch
62
+ # @return [void]
63
+ def record_switch(from_provider, to_provider, reason)
64
+ @mutex.synchronize do
65
+ @switches << {
66
+ from: from_provider.to_sym,
67
+ to: to_provider.to_sym,
68
+ reason: reason,
69
+ timestamp: Time.now
70
+ }
71
+ @total_switches += 1
72
+ end
73
+ end
74
+
75
+ # Get metrics summary
76
+ #
77
+ # @return [Hash] metrics summary
78
+ def summary
79
+ @mutex.synchronize do
80
+ {
81
+ total_attempts: @total_attempts,
82
+ total_successes: @total_successes,
83
+ total_failures: @total_failures,
84
+ total_switches: @total_switches,
85
+ success_rate: success_rate,
86
+ by_provider: provider_summary,
87
+ error_counts: @error_counts.dup,
88
+ last_success_time: @last_success_time,
89
+ last_failure_time: @last_failure_time,
90
+ recent_switches: @switches.last(10)
91
+ }
92
+ end
93
+ end
94
+
95
+ # Get metrics for a specific provider
96
+ #
97
+ # @param provider_name [Symbol, String] the provider name
98
+ # @return [Hash] provider metrics
99
+ def provider_metrics(provider_name)
100
+ provider = provider_name.to_sym
101
+ @mutex.synchronize do
102
+ {
103
+ attempts: @attempts[provider],
104
+ successes: @successes[provider],
105
+ failures: @failures[provider],
106
+ success_rate: provider_success_rate(provider),
107
+ average_duration: average_duration(provider)
108
+ }
109
+ end
110
+ end
111
+
112
+ # Reset all metrics
113
+ #
114
+ # @return [void]
115
+ def reset!
116
+ @mutex.synchronize do
117
+ @attempts = Hash.new(0)
118
+ @successes = Hash.new(0)
119
+ @failures = Hash.new(0)
120
+ @durations = Hash.new { |h, k| h[k] = [] }
121
+ @error_counts = Hash.new(0)
122
+ @switches = []
123
+
124
+ @total_attempts = 0
125
+ @total_successes = 0
126
+ @total_failures = 0
127
+ @total_switches = 0
128
+
129
+ @last_success_time = nil
130
+ @last_failure_time = nil
131
+ end
132
+ end
133
+
134
+ private
135
+
136
+ def success_rate
137
+ return 1.0 if @total_attempts == 0
138
+ @total_successes.to_f / @total_attempts
139
+ end
140
+
141
+ def provider_success_rate(provider)
142
+ attempts = @attempts[provider]
143
+ return 1.0 if attempts == 0
144
+ @successes[provider].to_f / attempts
145
+ end
146
+
147
+ def average_duration(provider)
148
+ durations = @durations[provider]
149
+ return 0.0 if durations.empty?
150
+ durations.sum / durations.size
151
+ end
152
+
153
+ def provider_summary
154
+ providers = (@attempts.keys + @successes.keys + @failures.keys).uniq
155
+ providers.to_h do |provider|
156
+ [provider, {
157
+ attempts: @attempts[provider],
158
+ successes: @successes[provider],
159
+ failures: @failures[provider],
160
+ success_rate: provider_success_rate(provider),
161
+ average_duration: average_duration(provider)
162
+ }]
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end