agent-harness 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.release-please-manifest.json +1 -1
- data/CHANGELOG.md +15 -0
- data/lib/agent_harness/errors.rb +7 -0
- data/lib/agent_harness/providers/adapter.rb +27 -0
- data/lib/agent_harness/providers/aider.rb +132 -14
- data/lib/agent_harness/providers/anthropic.rb +163 -23
- data/lib/agent_harness/providers/base.rb +9 -0
- data/lib/agent_harness/providers/github_copilot.rb +435 -494
- data/lib/agent_harness/providers/registry.rb +1 -0
- data/lib/agent_harness/providers/token_usage_parsing.rb +118 -0
- data/lib/agent_harness/text_transport.rb +168 -0
- data/lib/agent_harness/version.rb +1 -1
- data/lib/agent_harness.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 278785d86727fd759e55bcd8fd4fb4124a13c8f6ae818a40b2ae49bcbbb3b18f
|
|
4
|
+
data.tar.gz: 717338d556ef335ebf3d4e2f0fbdb4a9d92bbbe52bbb1739fcb8afaba7b0c1ac
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 76cd57c3875f38271390f3f7ebe29153d40924988315807d79fd85d37fdedde109e7c465a6eeeb889c858a6da53faac5cd48dc8a62862fed5d6843e73b4036a7
|
|
7
|
+
data.tar.gz: 6d74d1ac89feb72339a87bb08b413ee6996b0dc1b0b7cb7ff2446ac3ec12539434a00f57187e1632ec889b5fba5bab10ce20d2b91a7aaee5b21e39c837101b04
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.7.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.3...agent-harness/v0.7.4) (2026-04-18)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* 119: Claude provider leaks raw --output-format json envelope as response.output ([#120](https://github.com/viamin/agent-harness/issues/120)) ([602a5f9](https://github.com/viamin/agent-harness/commit/602a5f97e009ac59c798c7b1d7342cd43e2e8d4f))
|
|
9
|
+
|
|
10
|
+
## [0.7.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.2...agent-harness/v0.7.3) (2026-04-15)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Bug Fixes
|
|
14
|
+
|
|
15
|
+
* 114: feat: add text-only transport that bypasses the CLI ([a6be68a](https://github.com/viamin/agent-harness/commit/a6be68aa03b0202492caeb24233104cd1b814d88))
|
|
16
|
+
* 98: feat: add token usage extraction for remaining providers (cursor, gemini, aider, opencode, copilot, mistral_vibe) ([#105](https://github.com/viamin/agent-harness/issues/105)) ([b090748](https://github.com/viamin/agent-harness/commit/b090748b5d528ab864e94754c0992bc060669540))
|
|
17
|
+
|
|
3
18
|
## [0.7.2](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.1...agent-harness/v0.7.2) (2026-04-15)
|
|
4
19
|
|
|
5
20
|
|
data/lib/agent_harness/errors.rb
CHANGED
|
@@ -59,6 +59,13 @@ module AgentHarness
|
|
|
59
59
|
end
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
# Auth mismatch errors — raised when the requested transport mode
|
|
63
|
+
# requires credentials that differ from the caller's current auth mode.
|
|
64
|
+
# For example, requesting HTTP text mode with only OAuth/subscription
|
|
65
|
+
# credentials (no API key) would silently shift billing from
|
|
66
|
+
# subscription to API-metered usage.
|
|
67
|
+
class AuthMismatchError < AuthenticationError; end
|
|
68
|
+
|
|
62
69
|
# Configuration errors
|
|
63
70
|
class ConfigurationError < Error; end
|
|
64
71
|
|
|
@@ -257,6 +257,11 @@ module AgentHarness
|
|
|
257
257
|
:supported_mcp_transports,
|
|
258
258
|
default: default_supported_mcp_transports
|
|
259
259
|
),
|
|
260
|
+
supports_token_counting: provider_metadata_value(
|
|
261
|
+
provider,
|
|
262
|
+
:supports_token_counting?,
|
|
263
|
+
default: default_supports_token_counting
|
|
264
|
+
),
|
|
260
265
|
supports_sessions: provider_metadata_value(
|
|
261
266
|
provider,
|
|
262
267
|
:supports_sessions?,
|
|
@@ -601,6 +606,10 @@ module AgentHarness
|
|
|
601
606
|
false
|
|
602
607
|
end
|
|
603
608
|
|
|
609
|
+
def default_supports_token_counting
|
|
610
|
+
false
|
|
611
|
+
end
|
|
612
|
+
|
|
604
613
|
def default_supports_dangerous_mode
|
|
605
614
|
false
|
|
606
615
|
end
|
|
@@ -853,6 +862,17 @@ module AgentHarness
|
|
|
853
862
|
false
|
|
854
863
|
end
|
|
855
864
|
|
|
865
|
+
# Check if provider supports text-only mode via direct HTTP transport.
|
|
866
|
+
#
|
|
867
|
+
# Providers that return +true+ will route +mode: :text+ requests
|
|
868
|
+
# through their REST API instead of the CLI. Providers that return
|
|
869
|
+
# +false+ fall back to the CLI path with tools forcibly disabled.
|
|
870
|
+
#
|
|
871
|
+
# @return [Boolean] true if the provider has an HTTP text transport
|
|
872
|
+
def supports_text_mode?
|
|
873
|
+
false
|
|
874
|
+
end
|
|
875
|
+
|
|
856
876
|
# Check if provider supports dangerous mode
|
|
857
877
|
#
|
|
858
878
|
# @return [Boolean] true if dangerous mode is supported
|
|
@@ -882,6 +902,13 @@ module AgentHarness
|
|
|
882
902
|
[]
|
|
883
903
|
end
|
|
884
904
|
|
|
905
|
+
# Whether this provider can extract token usage from CLI output
|
|
906
|
+
#
|
|
907
|
+
# @return [Boolean] true if the provider returns token counts
|
|
908
|
+
def supports_token_counting?
|
|
909
|
+
false
|
|
910
|
+
end
|
|
911
|
+
|
|
885
912
|
# Validate provider configuration
|
|
886
913
|
#
|
|
887
914
|
# @return [Hash] with :valid, :errors keys
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "json"
|
|
3
4
|
require "securerandom"
|
|
4
5
|
require "shellwords"
|
|
5
6
|
require "tmpdir"
|
|
@@ -10,6 +11,8 @@ module AgentHarness
|
|
|
10
11
|
#
|
|
11
12
|
# Provides integration with the Aider CLI tool.
|
|
12
13
|
class Aider < Base
|
|
14
|
+
include TokenUsageParsing
|
|
15
|
+
|
|
13
16
|
UV_VERSION = "0.8.17"
|
|
14
17
|
SUPPORTED_CLI_VERSION = "0.86.2"
|
|
15
18
|
SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.87.0").freeze
|
|
@@ -196,6 +199,10 @@ module AgentHarness
|
|
|
196
199
|
["--restore-chat-history", session_id]
|
|
197
200
|
end
|
|
198
201
|
|
|
202
|
+
def supports_token_counting?
|
|
203
|
+
true
|
|
204
|
+
end
|
|
205
|
+
|
|
199
206
|
def send_message(prompt:, **options)
|
|
200
207
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
201
208
|
|
|
@@ -205,15 +212,19 @@ module AgentHarness
|
|
|
205
212
|
options = normalize_mcp_servers(options)
|
|
206
213
|
validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
|
|
207
214
|
|
|
208
|
-
llm_history_path = generate_llm_history_path
|
|
209
|
-
command = build_command(prompt, options.merge(llm_history_path: llm_history_path))
|
|
210
|
-
preparation = build_execution_preparation(options)
|
|
211
215
|
timeout = options[:timeout] || @config.timeout || default_timeout
|
|
216
|
+
raise TimeoutError, "Command timed out before execution started" if timeout <= 0
|
|
212
217
|
|
|
213
218
|
start_time = Time.now
|
|
219
|
+
llm_history_path = prepare_llm_history_file!
|
|
220
|
+
command = build_command(prompt, options.merge(llm_history_path: llm_history_path))
|
|
221
|
+
preparation = build_execution_preparation(options)
|
|
222
|
+
remaining_timeout = timeout - (Time.now - start_time)
|
|
223
|
+
raise TimeoutError, "Command timed out before execution started" if remaining_timeout <= 0
|
|
224
|
+
|
|
214
225
|
result = execute_with_timeout(
|
|
215
226
|
command,
|
|
216
|
-
timeout:
|
|
227
|
+
timeout: remaining_timeout,
|
|
217
228
|
env: build_env(options),
|
|
218
229
|
preparation: preparation,
|
|
219
230
|
**command_execution_options(options)
|
|
@@ -221,13 +232,14 @@ module AgentHarness
|
|
|
221
232
|
duration = Time.now - start_time
|
|
222
233
|
|
|
223
234
|
response = parse_response(result, duration: duration, llm_history_path: llm_history_path)
|
|
224
|
-
|
|
235
|
+
effective_runtime_model = normalized_model_name(runtime&.model)
|
|
236
|
+
if effective_runtime_model
|
|
225
237
|
response = Response.new(
|
|
226
238
|
output: response.output,
|
|
227
239
|
exit_code: response.exit_code,
|
|
228
240
|
duration: response.duration,
|
|
229
241
|
provider: response.provider,
|
|
230
|
-
model:
|
|
242
|
+
model: effective_runtime_model,
|
|
231
243
|
tokens: response.tokens,
|
|
232
244
|
metadata: response.metadata,
|
|
233
245
|
error: response.error
|
|
@@ -259,10 +271,8 @@ module AgentHarness
|
|
|
259
271
|
cmd += ["--llm-history-file", options[:llm_history_path]]
|
|
260
272
|
end
|
|
261
273
|
|
|
262
|
-
model = runtime
|
|
263
|
-
|
|
264
|
-
cmd += ["--model", model]
|
|
265
|
-
end
|
|
274
|
+
model = effective_model_name(runtime)
|
|
275
|
+
cmd += ["--model", model] if model
|
|
266
276
|
|
|
267
277
|
if options[:session]
|
|
268
278
|
cmd += session_flags(options[:session])
|
|
@@ -316,11 +326,11 @@ module AgentHarness
|
|
|
316
326
|
COMMON_SHELL_COMMAND_PATTERN =
|
|
317
327
|
/\A(?:git|bundle|ruby|python\d*(?:\.\d+)?|uv|npm|yarn|pnpm|node|bash|sh|zsh|make|rake|rspec|rails|go|pytest|bin\/[\w.-]+|sed|rg|grep|find|ls|cat|cp|mv|rm|mkdir|touch|chmod|chown|docker|kubectl)\z/
|
|
318
328
|
EXECUTOR_LLM_HISTORY_TIMEOUT = 10
|
|
319
|
-
|
|
329
|
+
HistoryFileHandle = Struct.new(:path)
|
|
320
330
|
def generate_llm_history_path
|
|
321
|
-
return "/tmp/aider_llm_history_#{
|
|
331
|
+
return "/tmp/aider_llm_history_#{SecureRandom.hex(8)}.json" if sandboxed_environment?
|
|
322
332
|
|
|
323
|
-
File.join(Dir.tmpdir, "aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}")
|
|
333
|
+
File.join(Dir.tmpdir, "aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}.json")
|
|
324
334
|
end
|
|
325
335
|
|
|
326
336
|
def parse_token_usage(result, llm_history_path:)
|
|
@@ -328,11 +338,18 @@ module AgentHarness
|
|
|
328
338
|
# Prefer the request-local history file when it includes a token report,
|
|
329
339
|
# but fall back to captured command output because the usage summary is
|
|
330
340
|
# printed there during normal runs.
|
|
331
|
-
|
|
341
|
+
parse_token_usage_history_content(safe_read_llm_history(llm_history_path)) ||
|
|
332
342
|
parse_token_usage_text(result.stdout, source: :output) ||
|
|
333
343
|
parse_token_usage_text(result.stderr, source: :output)
|
|
334
344
|
end
|
|
335
345
|
|
|
346
|
+
def parse_token_usage_history_content(content)
|
|
347
|
+
return nil if content.nil? || content.strip.empty?
|
|
348
|
+
|
|
349
|
+
aggregate_token_counts(parse_history_entries(content)) ||
|
|
350
|
+
parse_token_usage_text(content, source: :history)
|
|
351
|
+
end
|
|
352
|
+
|
|
336
353
|
def read_llm_history(path)
|
|
337
354
|
return read_executor_llm_history(path) if sandboxed_environment?
|
|
338
355
|
return nil unless path && File.exist?(path) && !File.zero?(path)
|
|
@@ -362,10 +379,67 @@ module AgentHarness
|
|
|
362
379
|
|
|
363
380
|
input = parse_token_count(match[:input])
|
|
364
381
|
output = parse_token_count(match[:output])
|
|
382
|
+
return nil if input.negative? || output.negative?
|
|
365
383
|
|
|
366
384
|
{input: input, output: output, total: input + output}
|
|
367
385
|
end
|
|
368
386
|
|
|
387
|
+
def parse_history_entries(content)
|
|
388
|
+
parsed = JSON.parse(content)
|
|
389
|
+
case parsed
|
|
390
|
+
when Array
|
|
391
|
+
parsed
|
|
392
|
+
when Hash
|
|
393
|
+
[parsed]
|
|
394
|
+
end
|
|
395
|
+
rescue JSON::ParserError
|
|
396
|
+
parsed_lines = []
|
|
397
|
+
|
|
398
|
+
content.each_line do |line|
|
|
399
|
+
next if line.strip.empty?
|
|
400
|
+
|
|
401
|
+
parsed_lines << JSON.parse(line)
|
|
402
|
+
rescue JSON::ParserError
|
|
403
|
+
return nil
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
parsed_lines.empty? ? nil : parsed_lines
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def aggregate_token_counts(entries)
|
|
410
|
+
return nil unless entries&.any?
|
|
411
|
+
|
|
412
|
+
total_input = 0
|
|
413
|
+
total_output = 0
|
|
414
|
+
found = false
|
|
415
|
+
|
|
416
|
+
entries.each do |entry|
|
|
417
|
+
usage = find_usage_in_entry(entry)
|
|
418
|
+
next unless usage
|
|
419
|
+
|
|
420
|
+
input = token_count_for(usage, "prompt_tokens", "input_tokens", "promptTokens", "inputTokens")
|
|
421
|
+
output = token_count_for(usage, "completion_tokens", "output_tokens", "completionTokens", "outputTokens")
|
|
422
|
+
next if input.nil? && output.nil?
|
|
423
|
+
|
|
424
|
+
total_input += input || 0
|
|
425
|
+
total_output += output || 0
|
|
426
|
+
found = true
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
return nil unless found
|
|
430
|
+
|
|
431
|
+
{input: total_input, output: total_output, total: total_input + total_output}
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def find_usage_in_entry(entry)
|
|
435
|
+
return nil unless entry.is_a?(Hash)
|
|
436
|
+
|
|
437
|
+
select_best_usage_payload([
|
|
438
|
+
entry["usage"],
|
|
439
|
+
nested_hash_value(entry, "response", "usage")
|
|
440
|
+
])
|
|
441
|
+
end
|
|
442
|
+
|
|
369
443
|
def extract_history_token_usage_match(content)
|
|
370
444
|
lines = content.lines
|
|
371
445
|
|
|
@@ -513,6 +587,16 @@ module AgentHarness
|
|
|
513
587
|
(normalized.to_f * multiplier).round
|
|
514
588
|
end
|
|
515
589
|
|
|
590
|
+
def prepare_llm_history_file!
|
|
591
|
+
if sandboxed_environment?
|
|
592
|
+
@aider_history_path = generate_llm_history_path
|
|
593
|
+
else
|
|
594
|
+
path = reserve_local_llm_history_path
|
|
595
|
+
@aider_history_tempfile = HistoryFileHandle.new(path)
|
|
596
|
+
path
|
|
597
|
+
end
|
|
598
|
+
end
|
|
599
|
+
|
|
516
600
|
def cleanup_llm_history_file!(path)
|
|
517
601
|
return unless path
|
|
518
602
|
|
|
@@ -522,6 +606,9 @@ module AgentHarness
|
|
|
522
606
|
rescue => e
|
|
523
607
|
log_debug("llm_history_cleanup_error", error: e.message)
|
|
524
608
|
nil
|
|
609
|
+
ensure
|
|
610
|
+
clear_local_history_handle!(path)
|
|
611
|
+
clear_executor_history_path!(path)
|
|
525
612
|
end
|
|
526
613
|
|
|
527
614
|
def validate_runtime_flags!(flags)
|
|
@@ -573,6 +660,37 @@ module AgentHarness
|
|
|
573
660
|
log_debug("llm_history_cleanup_error", error: e.message)
|
|
574
661
|
nil
|
|
575
662
|
end
|
|
663
|
+
|
|
664
|
+
MAX_HISTORY_PATH_ATTEMPTS = 10
|
|
665
|
+
|
|
666
|
+
def reserve_local_llm_history_path
|
|
667
|
+
MAX_HISTORY_PATH_ATTEMPTS.times do
|
|
668
|
+
path = generate_llm_history_path
|
|
669
|
+
|
|
670
|
+
begin
|
|
671
|
+
File.open(path, File::WRONLY | File::CREAT | File::EXCL, 0o600, &:close)
|
|
672
|
+
return path
|
|
673
|
+
rescue Errno::EEXIST
|
|
674
|
+
next
|
|
675
|
+
end
|
|
676
|
+
end
|
|
677
|
+
|
|
678
|
+
raise "failed to reserve unique LLM history path after #{MAX_HISTORY_PATH_ATTEMPTS} attempts"
|
|
679
|
+
end
|
|
680
|
+
|
|
681
|
+
def clear_local_history_handle!(path)
|
|
682
|
+
return unless defined?(@aider_history_tempfile)
|
|
683
|
+
return unless @aider_history_tempfile&.path == path
|
|
684
|
+
|
|
685
|
+
@aider_history_tempfile = nil
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
def clear_executor_history_path!(path)
|
|
689
|
+
return unless defined?(@aider_history_path)
|
|
690
|
+
return unless @aider_history_path == path
|
|
691
|
+
|
|
692
|
+
@aider_history_path = nil
|
|
693
|
+
end
|
|
576
694
|
end
|
|
577
695
|
end
|
|
578
696
|
end
|
|
@@ -161,8 +161,81 @@ module AgentHarness
|
|
|
161
161
|
Base::DEFAULT_SMOKE_TEST_CONTRACT
|
|
162
162
|
end
|
|
163
163
|
|
|
164
|
+
# Parse a raw Claude CLI --output-format=json envelope into its components.
|
|
165
|
+
#
|
|
166
|
+
# Downstream callers that capture Claude CLI stdout directly (e.g. container
|
|
167
|
+
# execution plans) can use this to extract the assistant text, error state,
|
|
168
|
+
# token usage, and structured metadata without re-implementing the parsing.
|
|
169
|
+
#
|
|
170
|
+
# @param json_string [String] raw JSON envelope from Claude CLI stdout
|
|
171
|
+
# @return [Hash, nil] parsed components or nil if not a valid envelope
|
|
172
|
+
# - :output [String] the assistant's final text (the "result" field)
|
|
173
|
+
# - :error [String, nil] error message if is_error was true
|
|
174
|
+
# - :tokens [Hash, nil] {input:, output:, total:} token counts
|
|
175
|
+
# - :metadata [Hash] structured metadata (cost_usd, session_id, etc.)
|
|
176
|
+
def parse_cli_json_envelope(json_string)
|
|
177
|
+
return nil if json_string.nil? || json_string.empty?
|
|
178
|
+
|
|
179
|
+
parsed = JSON.parse(json_string)
|
|
180
|
+
return nil unless parsed.is_a?(Hash) && parsed.key?("result")
|
|
181
|
+
|
|
182
|
+
output = parsed["result"]
|
|
183
|
+
error = nil
|
|
184
|
+
|
|
185
|
+
if parsed["is_error"]
|
|
186
|
+
error = classify_error_message(output || "Unknown Claude CLI error")
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
tokens = extract_tokens(parsed)
|
|
190
|
+
metadata = extract_envelope_metadata(parsed)
|
|
191
|
+
|
|
192
|
+
{output: output, error: error, tokens: tokens, metadata: metadata}
|
|
193
|
+
rescue JSON::ParserError
|
|
194
|
+
nil
|
|
195
|
+
end
|
|
196
|
+
|
|
164
197
|
private
|
|
165
198
|
|
|
199
|
+
def classify_error_message(message)
|
|
200
|
+
msg_lower = message.downcase
|
|
201
|
+
|
|
202
|
+
if msg_lower.include?("rate limit") || msg_lower.include?("session limit")
|
|
203
|
+
"Rate limit exceeded"
|
|
204
|
+
elsif msg_lower.include?("deprecat") || msg_lower.include?("end-of-life")
|
|
205
|
+
"Model deprecated"
|
|
206
|
+
elsif msg_lower.include?("oauth token") || msg_lower.include?("authentication")
|
|
207
|
+
"Authentication error"
|
|
208
|
+
else
|
|
209
|
+
message
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def extract_tokens(parsed)
|
|
214
|
+
usage = parsed["usage"]
|
|
215
|
+
return nil unless usage
|
|
216
|
+
|
|
217
|
+
input = usage["input_tokens"]
|
|
218
|
+
output = usage["output_tokens"]
|
|
219
|
+
return nil unless input || output
|
|
220
|
+
|
|
221
|
+
input ||= 0
|
|
222
|
+
output ||= 0
|
|
223
|
+
|
|
224
|
+
{input: input, output: output, total: input + output}
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def extract_envelope_metadata(parsed)
|
|
228
|
+
meta = {}
|
|
229
|
+
meta[:cost_usd] = parsed["total_cost_usd"] if parsed.key?("total_cost_usd")
|
|
230
|
+
meta[:session_id] = parsed["session_id"] if parsed.key?("session_id")
|
|
231
|
+
meta[:stop_reason] = parsed["stop_reason"] if parsed.key?("stop_reason")
|
|
232
|
+
meta[:terminal_reason] = parsed["terminal_reason"] if parsed.key?("terminal_reason")
|
|
233
|
+
meta[:num_turns] = parsed["num_turns"] if parsed.key?("num_turns")
|
|
234
|
+
meta[:duration_ms] = parsed["duration_ms"] if parsed.key?("duration_ms")
|
|
235
|
+
meta[:duration_api_ms] = parsed["duration_api_ms"] if parsed.key?("duration_api_ms")
|
|
236
|
+
meta
|
|
237
|
+
end
|
|
238
|
+
|
|
166
239
|
def validate_version!(version)
|
|
167
240
|
unless version.is_a?(String) && !version.strip.empty?
|
|
168
241
|
raise ArgumentError, "Invalid version: #{version.inspect}. " \
|
|
@@ -297,6 +370,10 @@ module AgentHarness
|
|
|
297
370
|
end
|
|
298
371
|
|
|
299
372
|
def send_message(prompt:, **options)
|
|
373
|
+
if options[:mode] == :text
|
|
374
|
+
return send_text_message(prompt, **options.except(:mode))
|
|
375
|
+
end
|
|
376
|
+
|
|
300
377
|
super
|
|
301
378
|
ensure
|
|
302
379
|
cleanup_mcp_tempfiles!
|
|
@@ -321,6 +398,10 @@ module AgentHarness
|
|
|
321
398
|
true
|
|
322
399
|
end
|
|
323
400
|
|
|
401
|
+
def supports_text_mode?
|
|
402
|
+
true
|
|
403
|
+
end
|
|
404
|
+
|
|
324
405
|
def dangerous_mode_flags
|
|
325
406
|
["--dangerously-skip-permissions"]
|
|
326
407
|
end
|
|
@@ -329,6 +410,10 @@ module AgentHarness
|
|
|
329
410
|
:oauth
|
|
330
411
|
end
|
|
331
412
|
|
|
413
|
+
def supports_token_counting?
|
|
414
|
+
true
|
|
415
|
+
end
|
|
416
|
+
|
|
332
417
|
def execution_semantics
|
|
333
418
|
{
|
|
334
419
|
prompt_delivery: :arg,
|
|
@@ -461,17 +546,24 @@ module AgentHarness
|
|
|
461
546
|
output = result.stdout
|
|
462
547
|
error = nil
|
|
463
548
|
tokens = nil
|
|
549
|
+
metadata = {}
|
|
464
550
|
|
|
465
551
|
if result.failed?
|
|
466
552
|
combined = [result.stdout, result.stderr].compact.join("\n")
|
|
467
553
|
error = classify_error_message(combined)
|
|
468
554
|
end
|
|
469
555
|
|
|
470
|
-
# Parse JSON output to extract result text
|
|
556
|
+
# Parse JSON output to extract result text, token usage, and metadata
|
|
471
557
|
parsed = parse_json_output(output)
|
|
472
558
|
if parsed
|
|
559
|
+
# Handle is_error envelopes as provider errors
|
|
560
|
+
if parsed["is_error"]
|
|
561
|
+
error ||= classify_error_message(parsed["result"] || "Unknown Claude CLI error")
|
|
562
|
+
end
|
|
563
|
+
|
|
473
564
|
output = parsed["result"] || output
|
|
474
565
|
tokens = extract_tokens(parsed)
|
|
566
|
+
metadata = extract_envelope_metadata(parsed)
|
|
475
567
|
end
|
|
476
568
|
|
|
477
569
|
Response.new(
|
|
@@ -481,6 +573,7 @@ module AgentHarness
|
|
|
481
573
|
provider: self.class.provider_name,
|
|
482
574
|
model: @config.model,
|
|
483
575
|
tokens: tokens,
|
|
576
|
+
metadata: metadata,
|
|
484
577
|
error: error
|
|
485
578
|
)
|
|
486
579
|
end
|
|
@@ -491,6 +584,67 @@ module AgentHarness
|
|
|
491
584
|
|
|
492
585
|
private
|
|
493
586
|
|
|
587
|
+
def send_text_message(prompt, **options)
|
|
588
|
+
api_key = resolve_text_mode_api_key
|
|
589
|
+
model = options[:model] || @config.model
|
|
590
|
+
timeout = options[:timeout] || @config.timeout || default_timeout
|
|
591
|
+
max_tokens = options[:max_tokens]
|
|
592
|
+
|
|
593
|
+
transport = TextTransport.new(api_key: api_key, logger: @logger)
|
|
594
|
+
|
|
595
|
+
kwargs = {model: model, timeout: timeout}
|
|
596
|
+
kwargs[:max_tokens] = max_tokens if max_tokens
|
|
597
|
+
|
|
598
|
+
response = transport.send_message(prompt, **kwargs)
|
|
599
|
+
|
|
600
|
+
# Apply runtime model override if present
|
|
601
|
+
runtime = options[:provider_runtime]
|
|
602
|
+
runtime = ProviderRuntime.wrap(runtime) if runtime.is_a?(Hash)
|
|
603
|
+
if runtime&.model
|
|
604
|
+
response = Response.new(
|
|
605
|
+
output: response.output,
|
|
606
|
+
exit_code: response.exit_code,
|
|
607
|
+
duration: response.duration,
|
|
608
|
+
provider: response.provider,
|
|
609
|
+
model: runtime.model,
|
|
610
|
+
tokens: response.tokens,
|
|
611
|
+
metadata: response.metadata,
|
|
612
|
+
error: response.error
|
|
613
|
+
)
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
track_tokens(response) if response.tokens
|
|
617
|
+
|
|
618
|
+
log_debug("send_text_message_complete",
|
|
619
|
+
duration: response.duration,
|
|
620
|
+
tokens: response.tokens,
|
|
621
|
+
transport: :http)
|
|
622
|
+
|
|
623
|
+
response
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
# Resolve the API key for text mode, validating that the caller's
|
|
627
|
+
# credentials support direct API access without silently shifting
|
|
628
|
+
# billing from subscription to API-metered usage.
|
|
629
|
+
#
|
|
630
|
+
# @return [String] the API key
|
|
631
|
+
# @raise [AuthMismatchError] if no API key is available
|
|
632
|
+
def resolve_text_mode_api_key
|
|
633
|
+
api_key = ENV["ANTHROPIC_API_KEY"]
|
|
634
|
+
|
|
635
|
+
if api_key.nil? || api_key.strip.empty?
|
|
636
|
+
raise AuthMismatchError.new(
|
|
637
|
+
"Text mode requires an ANTHROPIC_API_KEY for direct API access. " \
|
|
638
|
+
"OAuth/subscription credentials cannot be used for HTTP transport " \
|
|
639
|
+
"because it would silently shift billing to API-metered usage. " \
|
|
640
|
+
"Set ANTHROPIC_API_KEY or use the default CLI mode instead.",
|
|
641
|
+
provider: :claude
|
|
642
|
+
)
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
api_key.strip
|
|
646
|
+
end
|
|
647
|
+
|
|
494
648
|
def parse_json_output(output)
|
|
495
649
|
return nil if output.nil? || output.empty?
|
|
496
650
|
|
|
@@ -499,32 +653,18 @@ module AgentHarness
|
|
|
499
653
|
nil
|
|
500
654
|
end
|
|
501
655
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
output = usage["output_tokens"]
|
|
508
|
-
return nil unless input || output
|
|
509
|
-
|
|
510
|
-
input ||= 0
|
|
511
|
-
output ||= 0
|
|
656
|
+
# Delegate to class-level implementations so both instance and class
|
|
657
|
+
# methods share a single definition.
|
|
658
|
+
def extract_envelope_metadata(parsed)
|
|
659
|
+
self.class.send(:extract_envelope_metadata, parsed)
|
|
660
|
+
end
|
|
512
661
|
|
|
513
|
-
|
|
662
|
+
def extract_tokens(parsed)
|
|
663
|
+
self.class.send(:extract_tokens, parsed)
|
|
514
664
|
end
|
|
515
665
|
|
|
516
666
|
def classify_error_message(message)
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
if msg_lower.include?("rate limit") || msg_lower.include?("session limit")
|
|
520
|
-
"Rate limit exceeded"
|
|
521
|
-
elsif msg_lower.include?("deprecat") || msg_lower.include?("end-of-life")
|
|
522
|
-
"Model deprecated"
|
|
523
|
-
elsif msg_lower.include?("oauth token") || msg_lower.include?("authentication")
|
|
524
|
-
"Authentication error"
|
|
525
|
-
else
|
|
526
|
-
message
|
|
527
|
-
end
|
|
667
|
+
self.class.send(:classify_error_message, message)
|
|
528
668
|
end
|
|
529
669
|
|
|
530
670
|
def parse_claude_mcp_output(output)
|
|
@@ -104,6 +104,15 @@ module AgentHarness
|
|
|
104
104
|
def send_message(prompt:, **options)
|
|
105
105
|
log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
|
|
106
106
|
|
|
107
|
+
# Text mode: fall back to CLI with tools disabled when the provider
|
|
108
|
+
# does not have an HTTP text transport. Providers that support text
|
|
109
|
+
# mode (e.g. Anthropic) override send_message to intercept this
|
|
110
|
+
# before reaching Base.
|
|
111
|
+
if options[:mode] == :text && !supports_text_mode?
|
|
112
|
+
log_debug("text_mode_cli_fallback", provider: self.class.provider_name)
|
|
113
|
+
options = options.except(:mode).merge(tools: :none)
|
|
114
|
+
end
|
|
115
|
+
|
|
107
116
|
# Warn when tools option is passed to a provider that doesn't support it
|
|
108
117
|
if options[:tools] && !supports_tool_control?
|
|
109
118
|
log_debug("tools_option_unsupported",
|