agent-harness 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,391 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module AgentHarness
8
+ # OpenAI-compatible HTTP transport for multi-turn chat completions.
9
+ #
10
+ # Supports any endpoint that implements the OpenAI chat completions API,
11
+ # including OpenAI, GitHub Models, OpenRouter, and other compatible services.
12
+ #
13
+ # @example Non-streaming
14
+ # transport = AgentHarness::OpenAICompatibleTransport.new(
15
+ # base_url: "https://api.openai.com/v1",
16
+ # api_key: "sk-...",
17
+ # model: "gpt-4o"
18
+ # )
19
+ # response = transport.chat(messages: [{ role: "user", content: "Hello" }])
20
+ #
21
+ # @example Streaming
22
+ # transport.chat(messages: msgs, stream: true) do |chunk|
23
+ # case chunk[:type]
24
+ # when :text then print chunk[:content]
25
+ # when :usage then puts "\nTokens: #{chunk[:input_tokens]}+#{chunk[:output_tokens]}"
26
+ # when :done then puts "Stream complete"
27
+ # end
28
+ # end
29
+ class OpenAICompatibleTransport
30
+ DEFAULT_TIMEOUT = 300
31
+ DEFAULT_MAX_TOKENS = 4096
32
+ USER_AGENT = "AgentHarness/1.0"
33
+
34
+ # @param base_url [String] API base URL (e.g. "https://api.openai.com/v1")
35
+ # @param api_key [String] bearer token for authentication
36
+ # @param model [String] default model identifier
37
+ # @param logger [Logger, nil] optional logger
38
+ def initialize(base_url:, api_key:, model:, logger: nil)
39
+ @base_url = base_url.chomp("/")
40
+ @api_key = api_key
41
+ @model = model
42
+ @logger = logger
43
+ end
44
+
45
+ # Send a chat completion request.
46
+ #
47
+ # Streaming chunks can be received via block, +on_chat_chunk+ proc,
48
+ # or an observer that responds to +on_chat_chunk+. When multiple
49
+ # receivers are provided, all receive every event.
50
+ #
51
+ # @param messages [Array<Hash>] conversation messages
52
+ # @param tools [Array<Hash>, nil] tool/function definitions
53
+ # @param stream [Boolean] whether to stream the response
54
+ # @param max_tokens [Integer, nil] maximum tokens in the response
55
+ # @param temperature [Float, nil] sampling temperature
56
+ # @param model [String, nil] model override for this request
57
+ # @param on_chat_chunk [Proc, nil] callback for structured streaming events
58
+ # @param observer [#on_chat_chunk, nil] observer receiving streaming events
59
+ # @yield [Hash] streaming chunks when stream: true
60
+ # @return [Response] the response
61
+ # @raise [AuthenticationError] on 401/403 responses
62
+ # @raise [RateLimitError] on 429 responses
63
+ # @raise [TimeoutError] on network timeouts
64
+ # @raise [ProviderError] on other HTTP errors
65
+ def chat(messages:, tools: nil, stream: false, max_tokens: nil, temperature: nil,
66
+ model: nil, on_chat_chunk: nil, observer: nil, &on_chunk)
67
+ max_tokens ||= DEFAULT_MAX_TOKENS
68
+ model ||= @model
69
+ uri = URI("#{@base_url}/chat/completions")
70
+
71
+ has_stream_receiver = on_chunk || on_chat_chunk || observer_responds_to?(observer, :on_chat_chunk)
72
+ request_stream = stream && has_stream_receiver
73
+
74
+ body = build_request_body(
75
+ messages: messages, tools: tools, stream: request_stream,
76
+ max_tokens: max_tokens, temperature: temperature, model: model
77
+ )
78
+
79
+ start_time = Time.now
80
+
81
+ if request_stream
82
+ combined = build_chat_chunk_callback(on_chunk, on_chat_chunk, observer)
83
+ result = make_streaming_request(uri, body, &combined)
84
+ duration = Time.now - start_time
85
+ build_streaming_response(result, duration: duration, model: model)
86
+ else
87
+ http_response = make_request(uri, body)
88
+ duration = Time.now - start_time
89
+ parse_response(http_response, duration: duration, model: model)
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ def build_request_body(messages:, tools:, stream:, max_tokens:, temperature:, model: nil)
96
+ body = {
97
+ model: model || @model,
98
+ max_tokens: max_tokens,
99
+ messages: messages
100
+ }
101
+ body[:temperature] = temperature if temperature
102
+ body[:tools] = tools if tools
103
+ body[:stream] = true if stream
104
+ body[:stream_options] = {include_usage: true} if stream
105
+ body
106
+ end
107
+
108
+ def make_request(uri, body)
109
+ http = build_http(uri)
110
+ request = build_post_request(uri, body)
111
+
112
+ @logger&.debug("[AgentHarness::OpenAICompatibleTransport] POST #{uri} model=#{body[:model]}")
113
+
114
+ http.request(request)
115
+ rescue Net::OpenTimeout, Net::ReadTimeout => e
116
+ raise TimeoutError.new(e.message, original_error: e)
117
+ rescue SocketError, Errno::ECONNREFUSED, Errno::ECONNRESET, IOError => e
118
+ raise ProviderError.new("HTTP connection error: #{e.message}", original_error: e)
119
+ end
120
+
121
+ def make_streaming_request(uri, body, &on_chunk)
122
+ http = build_http(uri)
123
+ request = build_post_request(uri, body)
124
+
125
+ @logger&.debug("[AgentHarness::OpenAICompatibleTransport] POST #{uri} model=#{body[:model]} stream=true")
126
+
127
+ accumulated = {content: +"", tool_calls: [], model: nil, usage: nil}
128
+
129
+ http.request(request) do |http_response|
130
+ status_code = http_response.code.to_i
131
+ unless status_code == 200
132
+ response_body = http_response.read_body
133
+ handle_error_response_raw(response_body, status_code)
134
+ end
135
+
136
+ parse_sse_stream(http_response, accumulated, &on_chunk)
137
+ end
138
+
139
+ accumulated
140
+ rescue Net::OpenTimeout, Net::ReadTimeout => e
141
+ raise TimeoutError.new(e.message, original_error: e)
142
+ rescue SocketError, Errno::ECONNREFUSED, Errno::ECONNRESET, IOError => e
143
+ raise ProviderError.new("HTTP connection error: #{e.message}", original_error: e)
144
+ end
145
+
146
+ def parse_sse_stream(http_response, accumulated, &on_chunk)
147
+ buffer = +""
148
+
149
+ http_response.read_body do |chunk|
150
+ buffer << chunk
151
+ while (line_end = buffer.index("\n"))
152
+ line = buffer.slice!(0, line_end + 1).strip
153
+ next if line.empty?
154
+ next unless line.start_with?("data: ")
155
+
156
+ data = line[6..]
157
+ next if data == "[DONE]"
158
+
159
+ begin
160
+ event = JSON.parse(data)
161
+ rescue JSON::ParserError => e
162
+ @logger&.warn("[AgentHarness::OpenAICompatibleTransport] Skipping malformed SSE event: #{e.message}")
163
+ next
164
+ end
165
+ process_stream_event(event, accumulated, &on_chunk)
166
+ end
167
+ end
168
+ end
169
+
170
+ def process_stream_event(event, accumulated, &on_chunk)
171
+ accumulated[:model] ||= event["model"]
172
+
173
+ if event["usage"]
174
+ usage = extract_usage(event)
175
+ accumulated[:usage] = usage
176
+ on_chunk.call({type: :usage, input_tokens: usage[:input], output_tokens: usage[:output]})
177
+ on_chunk.call({type: :done})
178
+ return
179
+ end
180
+
181
+ choice = event.dig("choices", 0)
182
+ return unless choice
183
+
184
+ delta = choice["delta"] || {}
185
+
186
+ if delta["content"]
187
+ accumulated[:content] << delta["content"]
188
+ on_chunk.call({type: :text, content: delta["content"]})
189
+ end
190
+
191
+ process_tool_call_delta(delta, accumulated, &on_chunk)
192
+
193
+ emit_tool_call_completions(choice, accumulated, &on_chunk)
194
+ end
195
+
196
+ def process_tool_call_delta(delta, accumulated, &on_chunk)
197
+ return unless delta["tool_calls"]
198
+
199
+ delta["tool_calls"].each do |tc_delta|
200
+ index = tc_delta["index"] || 0
201
+
202
+ if tc_delta["id"]
203
+ accumulated[:tool_calls][index] = {
204
+ id: tc_delta["id"],
205
+ name: tc_delta.dig("function", "name") || "",
206
+ arguments: +""
207
+ }
208
+ end
209
+
210
+ tc = accumulated[:tool_calls][index]
211
+ next unless tc
212
+
213
+ if tc_delta.dig("function", "arguments")
214
+ tc[:arguments] << tc_delta.dig("function", "arguments")
215
+ end
216
+
217
+ if tc_delta["id"]
218
+ on_chunk.call({
219
+ type: :tool_call_start,
220
+ id: tc_delta["id"],
221
+ name: tc_delta.dig("function", "name") || ""
222
+ })
223
+ elsif tc_delta.dig("function", "arguments")
224
+ on_chunk.call({
225
+ type: :tool_call_delta,
226
+ id: tc[:id],
227
+ arguments: tc_delta.dig("function", "arguments")
228
+ })
229
+ end
230
+ end
231
+ end
232
+
233
+ def emit_tool_call_completions(choice, accumulated, &on_chunk)
234
+ return unless choice["finish_reason"] == "tool_calls"
235
+
236
+ accumulated[:tool_calls].each do |tc|
237
+ next unless tc
238
+
239
+ on_chunk.call({
240
+ type: :tool_call_complete,
241
+ id: tc[:id],
242
+ name: tc[:name],
243
+ arguments: tc[:arguments]
244
+ })
245
+ end
246
+ end
247
+
248
+ def build_http(uri)
249
+ http = Net::HTTP.new(uri.host, uri.port)
250
+ http.use_ssl = (uri.scheme == "https")
251
+ http.open_timeout = [DEFAULT_TIMEOUT, 30].min
252
+ http.read_timeout = DEFAULT_TIMEOUT
253
+ http
254
+ end
255
+
256
+ def build_post_request(uri, body)
257
+ request = Net::HTTP::Post.new(uri)
258
+ request["Content-Type"] = "application/json"
259
+ request["Authorization"] = "Bearer #{@api_key}"
260
+ request["User-Agent"] = USER_AGENT
261
+ request.body = JSON.generate(body)
262
+ request
263
+ end
264
+
265
+ def parse_response(http_response, duration:, model:)
266
+ status_code = http_response.code.to_i
267
+
268
+ unless status_code == 200
269
+ handle_error_response(http_response, status_code)
270
+ end
271
+
272
+ body = JSON.parse(http_response.body)
273
+ output = extract_content(body)
274
+ tokens = extract_usage(body)
275
+ tool_calls = extract_tool_calls(body)
276
+
277
+ metadata = {transport: :http, stream: false}
278
+ metadata[:tool_calls] = tool_calls if tool_calls
279
+
280
+ Response.new(
281
+ output: output,
282
+ exit_code: 0,
283
+ duration: duration,
284
+ provider: :openai_compatible,
285
+ model: body["model"] || model,
286
+ tokens: tokens,
287
+ metadata: metadata
288
+ )
289
+ rescue JSON::ParserError => e
290
+ raise ProviderError.new(
291
+ "Invalid JSON in API response: #{e.message}",
292
+ original_error: e
293
+ )
294
+ end
295
+
296
+ def build_streaming_response(accumulated, duration:, model:)
297
+ tool_calls = accumulated[:tool_calls].compact
298
+ metadata = {transport: :http, stream: true}
299
+ metadata[:tool_calls] = tool_calls unless tool_calls.empty?
300
+
301
+ Response.new(
302
+ output: accumulated[:content],
303
+ exit_code: 0,
304
+ duration: duration,
305
+ provider: :openai_compatible,
306
+ model: accumulated[:model] || model,
307
+ tokens: accumulated[:usage],
308
+ metadata: metadata
309
+ )
310
+ end
311
+
312
+ def extract_content(body)
313
+ choice = body.dig("choices", 0)
314
+ return "" unless choice
315
+
316
+ choice.dig("message", "content") || ""
317
+ end
318
+
319
+ def extract_usage(body)
320
+ usage = body["usage"]
321
+ return nil unless usage
322
+
323
+ input = usage["prompt_tokens"] || 0
324
+ output = usage["completion_tokens"] || 0
325
+
326
+ {input: input, output: output, total: input + output}
327
+ end
328
+
329
+ def extract_tool_calls(body)
330
+ tool_calls = body.dig("choices", 0, "message", "tool_calls")
331
+ return nil unless tool_calls&.any?
332
+
333
+ tool_calls.map do |tc|
334
+ {
335
+ id: tc["id"],
336
+ name: tc.dig("function", "name"),
337
+ arguments: tc.dig("function", "arguments")
338
+ }
339
+ end
340
+ end
341
+
342
+ def build_chat_chunk_callback(on_chunk, on_chat_chunk, observer)
343
+ proc do |chunk|
344
+ on_chunk&.call(chunk)
345
+ on_chat_chunk&.call(chunk)
346
+ observer.on_chat_chunk(chunk) if observer_responds_to?(observer, :on_chat_chunk)
347
+ end
348
+ end
349
+
350
+ def observer_responds_to?(observer, method_name)
351
+ observer&.respond_to?(method_name)
352
+ end
353
+
354
+ def handle_error_response(http_response, status_code)
355
+ handle_error_response_raw(http_response.body, status_code)
356
+ end
357
+
358
+ def handle_error_response_raw(body_string, status_code)
359
+ message = begin
360
+ body = JSON.parse(body_string)
361
+ body.dig("error", "message") || body.dig("error", "type") || body_string
362
+ rescue JSON::ParserError
363
+ body_string
364
+ end
365
+
366
+ case status_code
367
+ when 401
368
+ raise AuthenticationError.new(
369
+ "API authentication failed: #{message}",
370
+ provider: :openai_compatible
371
+ )
372
+ when 403
373
+ raise AuthenticationError.new(
374
+ "API access forbidden: #{message}",
375
+ provider: :openai_compatible
376
+ )
377
+ when 429
378
+ raise RateLimitError.new(
379
+ "API rate limit exceeded: #{message}",
380
+ provider: :openai_compatible
381
+ )
382
+ when 400
383
+ raise ProviderError.new("Bad request: #{message}")
384
+ when 500, 502, 503
385
+ raise ProviderError.new("Server error (#{status_code}): #{message}")
386
+ else
387
+ raise ProviderError.new("HTTP #{status_code}: #{message}")
388
+ end
389
+ end
390
+ end
391
+ end
@@ -25,7 +25,8 @@ module AgentHarness
25
25
  # }
26
26
  # )
27
27
  class ProviderRuntime
28
- attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata, :unset_env
28
+ attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata, :unset_env,
29
+ :chat_base_url, :chat_model, :chat_api_key, :chat_max_tokens, :chat_tools
29
30
 
30
31
  # @param model [String, nil] model identifier override
31
32
  # @param base_url [String, nil] upstream API base URL override
@@ -34,7 +35,13 @@ module AgentHarness
34
35
  # @param flags [Array<String>] extra CLI flags to append
35
36
  # @param unset_env [Array<String>] environment variable names to remove from inherited env
36
37
  # @param metadata [Hash] arbitrary provider-specific data
37
- def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], unset_env: [], metadata: {})
38
+ # @param chat_base_url [String, nil] override transport base URL for chat
39
+ # @param chat_model [String, nil] override model for chat
40
+ # @param chat_api_key [String, nil] override API key for chat
41
+ # @param chat_max_tokens [Integer, nil] max tokens for chat response
42
+ # @param chat_tools [Array<Hash>, nil] default tool definitions for chat requests
43
+ def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], unset_env: [], metadata: {},
44
+ chat_base_url: nil, chat_model: nil, chat_api_key: nil, chat_max_tokens: nil, chat_tools: nil)
38
45
  validate_optional_string!(:model, model)
39
46
  validate_optional_string!(:base_url, base_url)
40
47
  validate_optional_string!(:api_provider, api_provider)
@@ -90,6 +97,28 @@ module AgentHarness
90
97
  end
91
98
  @unset_env = normalized_unset_env.freeze
92
99
 
100
+ validate_optional_string!(:chat_base_url, chat_base_url)
101
+ validate_optional_string!(:chat_model, chat_model)
102
+ validate_optional_string!(:chat_api_key, chat_api_key)
103
+ unless chat_max_tokens.nil? || chat_max_tokens.is_a?(Integer)
104
+ raise ArgumentError, "chat_max_tokens must be an Integer or nil (got #{chat_max_tokens.class})"
105
+ end
106
+ unless chat_tools.nil? || chat_tools.is_a?(Array)
107
+ raise ArgumentError, "chat_tools must be an Array or nil (got #{chat_tools.class})"
108
+ end
109
+ normalized_chat_tools = chat_tools&.dup
110
+ normalized_chat_tools&.each_with_index do |tool, index|
111
+ unless tool.is_a?(Hash)
112
+ raise ArgumentError,
113
+ "chat_tools must be an Array of Hashes; invalid element at index #{index}: #{tool.inspect} (#{tool.class})"
114
+ end
115
+ end
116
+ @chat_base_url = chat_base_url
117
+ @chat_model = chat_model
118
+ @chat_api_key = chat_api_key
119
+ @chat_max_tokens = chat_max_tokens
120
+ @chat_tools = normalized_chat_tools&.freeze
121
+
93
122
  freeze
94
123
  end
95
124
 
@@ -112,7 +141,12 @@ module AgentHarness
112
141
  env: env_val.nil? ? {} : env_val,
113
142
  flags: flags_val.nil? ? [] : flags_val,
114
143
  unset_env: unset_env_val.nil? ? [] : unset_env_val,
115
- metadata: metadata_val.nil? ? {} : metadata_val
144
+ metadata: metadata_val.nil? ? {} : metadata_val,
145
+ chat_base_url: hash_value(hash, :chat_base_url),
146
+ chat_model: hash_value(hash, :chat_model),
147
+ chat_api_key: hash_value(hash, :chat_api_key),
148
+ chat_max_tokens: hash_value(hash, :chat_max_tokens),
149
+ chat_tools: hash_value(hash, :chat_tools)
116
150
  )
117
151
  end
118
152
 
@@ -135,7 +169,9 @@ module AgentHarness
135
169
  # @return [Boolean]
136
170
  def empty?
137
171
  model.nil? && base_url.nil? && api_provider.nil? &&
138
- env.empty? && flags.empty? && metadata.empty? && unset_env.empty?
172
+ env.empty? && flags.empty? && metadata.empty? && unset_env.empty? &&
173
+ chat_base_url.nil? && chat_model.nil? && chat_api_key.nil? && chat_max_tokens.nil? &&
174
+ chat_tools.nil?
139
175
  end
140
176
 
141
177
  private_class_method def self.hash_value(hash, key)
@@ -290,7 +290,8 @@ module AgentHarness
290
290
  canonical_name: canonical_provider_name,
291
291
  aliases: normalized_aliases
292
292
  )
293
- }
293
+ },
294
+ chat: build_chat_metadata(provider)
294
295
  }
295
296
 
296
297
  deep_merge_metadata(metadata, sanitized_provider_metadata_overrides)
@@ -536,6 +537,32 @@ module AgentHarness
536
537
  default
537
538
  end
538
539
 
540
+ def build_chat_metadata(provider)
541
+ supported = provider_metadata_value(provider, :supports_chat?, default: false)
542
+ return {supported: false} unless supported
543
+
544
+ chat_meta = {supported: true}
545
+
546
+ if provider.respond_to?(:chat_models, true)
547
+ models = provider.chat_models
548
+ chat_meta[:models] = models if models.is_a?(Array)
549
+ chat_meta[:default_model] = models.first if models.is_a?(Array) && !models.empty?
550
+ end
551
+
552
+ # Use lightweight chat_transport_type to avoid instantiating the
553
+ # transport (which may trigger API key resolution / authentication
554
+ # as a side effect).
555
+ transport_type = provider_metadata_value(provider, :chat_transport_type, default: nil)
556
+ chat_meta[:transport] = transport_type
557
+
558
+ chat_meta
559
+ rescue => e
560
+ AgentHarness.logger&.debug(
561
+ "[AgentHarness::Providers::Adapter] chat metadata failed for #{provider_name}: #{e.class}"
562
+ )
563
+ {supported: false}
564
+ end
565
+
539
566
  def provider_display_name(provider, canonical_name: provider_name)
540
567
  if provider&.respond_to?(:display_name) &&
541
568
  provider.method(:display_name).owner != AgentHarness::Providers::Base
@@ -599,7 +626,7 @@ module AgentHarness
599
626
  end
600
627
 
601
628
  def default_supported_mcp_transports
602
- []
629
+ %w[stdio]
603
630
  end
604
631
 
605
632
  def default_supports_sessions
@@ -849,9 +876,12 @@ module AgentHarness
849
876
 
850
877
  # Supported MCP transport types for this provider
851
878
  #
879
+ # Defaults to ["stdio"]. Providers that support HTTP/SSE transports
880
+ # should override this to include those transports.
881
+ #
852
882
  # @return [Array<String>] supported transports (e.g. ["stdio", "http"])
853
883
  def supported_mcp_transports
854
- []
884
+ %w[stdio]
855
885
  end
856
886
 
857
887
  # Build provider-specific MCP flags/arguments for CLI invocation
@@ -917,6 +947,35 @@ module AgentHarness
917
947
  false
918
948
  end
919
949
 
950
+ # Check if provider supports multi-turn chat mode.
951
+ #
952
+ # Providers that return +true+ can accept conversation history
953
+ # and return streaming multi-turn responses via +send_chat_message+.
954
+ #
955
+ # @return [Boolean] true if the provider supports chat
956
+ def supports_chat?
957
+ false
958
+ end
959
+
960
+ # Returns the transport instance used for chat mode.
961
+ #
962
+ # Providers that support chat override this to return an appropriate
963
+ # transport (e.g. OpenAICompatibleTransport or TextTransport).
964
+ #
965
+ # @return [Object, nil] transport instance or nil if unsupported
966
+ def chat_transport
967
+ nil
968
+ end
969
+
970
+ # Returns the symbolic transport type for chat without instantiating
971
+ # the transport object. This avoids triggering API key resolution or
972
+ # other authentication side effects during metadata collection.
973
+ #
974
+ # @return [Symbol, nil] :openai_compatible, :anthropic, or nil
975
+ def chat_transport_type
976
+ nil
977
+ end
978
+
920
979
  # Check if provider supports dangerous mode
921
980
  #
922
981
  # @return [Boolean] true if dangerous mode is supported
@@ -159,6 +159,10 @@ module AgentHarness
159
159
  MODEL_PATTERN.match?(family_name)
160
160
  end
161
161
 
162
+ def supports_chat?
163
+ true
164
+ end
165
+
162
166
  def smoke_test_contract
163
167
  Base::DEFAULT_SMOKE_TEST_CONTRACT
164
168
  end
@@ -410,6 +414,32 @@ module AgentHarness
410
414
  true
411
415
  end
412
416
 
417
+ CHAT_MODELS = %w[claude-sonnet-4-20250514 claude-haiku-4-20250414 claude-opus-4-20250514].freeze
418
+
419
+ def supports_chat?
420
+ true
421
+ end
422
+
423
+ def chat_models
424
+ CHAT_MODELS
425
+ end
426
+
427
+ def chat_transport
428
+ @chat_transport ||= TextTransport.new(api_key: resolve_text_mode_api_key, logger: @logger)
429
+ end
430
+
431
+ def build_runtime_chat_transport(runtime)
432
+ TextTransport.new(
433
+ base_url: runtime.chat_base_url || TextTransport::ANTHROPIC_API_URL,
434
+ api_key: runtime.chat_api_key || resolve_text_mode_api_key,
435
+ logger: @logger
436
+ )
437
+ end
438
+
439
+ def chat_transport_type
440
+ :anthropic
441
+ end
442
+
413
443
  def dangerous_mode_flags
414
444
  ["--dangerously-skip-permissions"]
415
445
  end