agent-harness 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 840999010c09f5e1b70d3dd0a1631cf76e15a13738954cb2f259149f7e0df9c3
4
- data.tar.gz: 79f321a55d661a7f1a018372b8fea6b1c0f55ce659bfeb62a4937c2fd5852976
3
+ metadata.gz: 3e879475ab73c89cd1dd1a107ce769e355426cee936e95df377ec242312cec4b
4
+ data.tar.gz: 492ed111e0b70703f5f55d2a448259450132515a614bc597310ddecea775a313
5
5
  SHA512:
6
- metadata.gz: a2092406f7f5f75623eea7e3b4bc3c78c9fd7ffcd6f85b1e90a2e20bdc59f4b5f4bab5d7e9e8dee7fdf8772881e311c02cec7e555ce797c01b1ec7c3f482e023
7
- data.tar.gz: 3670198c4053fb94c3ec4e990cc4649b19977ccf5d7a5a6b3a95f1acb077f53c50055cd57098eda54b0fa1afaab0d1edff3429d4d0a55c3abf8137c4d846cda2
6
+ metadata.gz: 02c690080d6dc6c39275c5188493c6e6a7a29303af35d1435d249ef996234235fddedb767489d429f0d98283429ec57dc8a033367aea7ca89278596ddf34d452
7
+ data.tar.gz: 4a5be3565b1c35b73abc61abc2238b6a5d41416d736162624a0db14fe30e96e01029eb687e35916ecd81cefbbe2d0397113339a80ee3965dcb9b943415223745
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.7.1"
2
+ ".": "0.7.3"
3
3
  }
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.7.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.2...agent-harness/v0.7.3) (2026-04-15)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * 114: feat: add text-only transport that bypasses the CLI ([a6be68a](https://github.com/viamin/agent-harness/commit/a6be68aa03b0202492caeb24233104cd1b814d88))
9
+ * 98: feat: add token usage extraction for remaining providers (cursor, gemini, aider, opencode, copilot, mistral_vibe) ([#105](https://github.com/viamin/agent-harness/issues/105)) ([b090748](https://github.com/viamin/agent-harness/commit/b090748b5d528ab864e94754c0992bc060669540))
10
+
11
+ ## [0.7.2](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.1...agent-harness/v0.7.2) (2026-04-15)
12
+
13
+
14
+ ### Bug Fixes
15
+
16
+ * 113: [P1] feat: support disabling tools for text-only send_message calls ([#115](https://github.com/viamin/agent-harness/issues/115)) ([62bc66a](https://github.com/viamin/agent-harness/commit/62bc66a3d34a889de65ba7c4951b8bdb1f388fa9))
17
+
3
18
  ## [0.7.1](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.0...agent-harness/v0.7.1) (2026-04-15)
4
19
 
5
20
 
@@ -59,6 +59,13 @@ module AgentHarness
59
59
  end
60
60
  end
61
61
 
62
+ # Auth mismatch errors — raised when the requested transport mode
63
+ # requires credentials that differ from the caller's current auth mode.
64
+ # For example, requesting HTTP text mode with only OAuth/subscription
65
+ # credentials (no API key) would silently shift billing from
66
+ # subscription to API-metered usage.
67
+ class AuthMismatchError < AuthenticationError; end
68
+
62
69
  # Configuration errors
63
70
  class ConfigurationError < Error; end
64
71
 
@@ -257,6 +257,11 @@ module AgentHarness
257
257
  :supported_mcp_transports,
258
258
  default: default_supported_mcp_transports
259
259
  ),
260
+ supports_token_counting: provider_metadata_value(
261
+ provider,
262
+ :supports_token_counting?,
263
+ default: default_supports_token_counting
264
+ ),
260
265
  supports_sessions: provider_metadata_value(
261
266
  provider,
262
267
  :supports_sessions?,
@@ -601,6 +606,10 @@ module AgentHarness
601
606
  false
602
607
  end
603
608
 
609
+ def default_supports_token_counting
610
+ false
611
+ end
612
+
604
613
  def default_supports_dangerous_mode
605
614
  false
606
615
  end
@@ -717,6 +726,13 @@ module AgentHarness
717
726
  # @option options [Integer] :timeout timeout in seconds
718
727
  # @option options [String] :session session identifier
719
728
  # @option options [Boolean] :dangerous_mode skip permission checks
729
+ # @option options [Symbol, Array<String>, nil] :tools tool access control.
730
+ # Pass +:none+ to disable all tool access (pure text-in/text-out mode).
731
+ # Pass an Array of tool name strings to selectively disable specific
732
+ # tools via the provider's disallowed-tools mechanism. Defaults to +nil+
733
+ # (tools enabled, provider default behavior).
734
+ # Providers that do not support tool control will emit a warning and
735
+ # ignore this option — it is never a hard failure.
720
736
  # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
721
737
  # runtime overrides (model, base_url, api_provider, env, flags, metadata).
722
738
  # For providers that delegate to Providers::Base#send_message, a plain Hash
@@ -839,6 +855,24 @@ module AgentHarness
839
855
  end
840
856
  end
841
857
 
858
+ # Check if provider supports tool access control (disabling tools)
859
+ #
860
+ # @return [Boolean] true if the provider supports the tools: option
861
+ def supports_tool_control?
862
+ false
863
+ end
864
+
865
+ # Check if provider supports text-only mode via direct HTTP transport.
866
+ #
867
+ # Providers that return +true+ will route +mode: :text+ requests
868
+ # through their REST API instead of the CLI. Providers that return
869
+ # +false+ fall back to the CLI path with tools forcibly disabled.
870
+ #
871
+ # @return [Boolean] true if the provider has an HTTP text transport
872
+ def supports_text_mode?
873
+ false
874
+ end
875
+
842
876
  # Check if provider supports dangerous mode
843
877
  #
844
878
  # @return [Boolean] true if dangerous mode is supported
@@ -868,6 +902,13 @@ module AgentHarness
868
902
  []
869
903
  end
870
904
 
905
+ # Whether this provider can extract token usage from CLI output
906
+ #
907
+ # @return [Boolean] true if the provider returns token counts
908
+ def supports_token_counting?
909
+ false
910
+ end
911
+
871
912
  # Validate provider configuration
872
913
  #
873
914
  # @return [Hash] with :valid, :errors keys
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
3
4
  require "securerandom"
4
5
  require "shellwords"
5
6
  require "tmpdir"
@@ -10,6 +11,8 @@ module AgentHarness
10
11
  #
11
12
  # Provides integration with the Aider CLI tool.
12
13
  class Aider < Base
14
+ include TokenUsageParsing
15
+
13
16
  UV_VERSION = "0.8.17"
14
17
  SUPPORTED_CLI_VERSION = "0.86.2"
15
18
  SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.87.0").freeze
@@ -196,6 +199,10 @@ module AgentHarness
196
199
  ["--restore-chat-history", session_id]
197
200
  end
198
201
 
202
+ def supports_token_counting?
203
+ true
204
+ end
205
+
199
206
  def send_message(prompt:, **options)
200
207
  log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
201
208
 
@@ -205,15 +212,19 @@ module AgentHarness
205
212
  options = normalize_mcp_servers(options)
206
213
  validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
207
214
 
208
- llm_history_path = generate_llm_history_path
209
- command = build_command(prompt, options.merge(llm_history_path: llm_history_path))
210
- preparation = build_execution_preparation(options)
211
215
  timeout = options[:timeout] || @config.timeout || default_timeout
216
+ raise TimeoutError, "Command timed out before execution started" if timeout <= 0
212
217
 
213
218
  start_time = Time.now
219
+ llm_history_path = prepare_llm_history_file!
220
+ command = build_command(prompt, options.merge(llm_history_path: llm_history_path))
221
+ preparation = build_execution_preparation(options)
222
+ remaining_timeout = timeout - (Time.now - start_time)
223
+ raise TimeoutError, "Command timed out before execution started" if remaining_timeout <= 0
224
+
214
225
  result = execute_with_timeout(
215
226
  command,
216
- timeout: timeout,
227
+ timeout: remaining_timeout,
217
228
  env: build_env(options),
218
229
  preparation: preparation,
219
230
  **command_execution_options(options)
@@ -221,13 +232,14 @@ module AgentHarness
221
232
  duration = Time.now - start_time
222
233
 
223
234
  response = parse_response(result, duration: duration, llm_history_path: llm_history_path)
224
- if runtime&.model
235
+ effective_runtime_model = normalized_model_name(runtime&.model)
236
+ if effective_runtime_model
225
237
  response = Response.new(
226
238
  output: response.output,
227
239
  exit_code: response.exit_code,
228
240
  duration: response.duration,
229
241
  provider: response.provider,
230
- model: runtime.model,
242
+ model: effective_runtime_model,
231
243
  tokens: response.tokens,
232
244
  metadata: response.metadata,
233
245
  error: response.error
@@ -259,10 +271,8 @@ module AgentHarness
259
271
  cmd += ["--llm-history-file", options[:llm_history_path]]
260
272
  end
261
273
 
262
- model = runtime&.model || @config.model
263
- if model && !model.empty?
264
- cmd += ["--model", model]
265
- end
274
+ model = effective_model_name(runtime)
275
+ cmd += ["--model", model] if model
266
276
 
267
277
  if options[:session]
268
278
  cmd += session_flags(options[:session])
@@ -316,11 +326,11 @@ module AgentHarness
316
326
  COMMON_SHELL_COMMAND_PATTERN =
317
327
  /\A(?:git|bundle|ruby|python\d*(?:\.\d+)?|uv|npm|yarn|pnpm|node|bash|sh|zsh|make|rake|rspec|rails|go|pytest|bin\/[\w.-]+|sed|rg|grep|find|ls|cat|cp|mv|rm|mkdir|touch|chmod|chown|docker|kubectl)\z/
318
328
  EXECUTOR_LLM_HISTORY_TIMEOUT = 10
319
-
329
+ HistoryFileHandle = Struct.new(:path)
320
330
  def generate_llm_history_path
321
- return "/tmp/aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}" if sandboxed_environment?
331
+ return "/tmp/aider_llm_history_#{SecureRandom.hex(8)}.json" if sandboxed_environment?
322
332
 
323
- File.join(Dir.tmpdir, "aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}")
333
+ File.join(Dir.tmpdir, "aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}.json")
324
334
  end
325
335
 
326
336
  def parse_token_usage(result, llm_history_path:)
@@ -328,11 +338,18 @@ module AgentHarness
328
338
  # Prefer the request-local history file when it includes a token report,
329
339
  # but fall back to captured command output because the usage summary is
330
340
  # printed there during normal runs.
331
- parse_token_usage_text(safe_read_llm_history(llm_history_path), source: :history) ||
341
+ parse_token_usage_history_content(safe_read_llm_history(llm_history_path)) ||
332
342
  parse_token_usage_text(result.stdout, source: :output) ||
333
343
  parse_token_usage_text(result.stderr, source: :output)
334
344
  end
335
345
 
346
+ def parse_token_usage_history_content(content)
347
+ return nil if content.nil? || content.strip.empty?
348
+
349
+ aggregate_token_counts(parse_history_entries(content)) ||
350
+ parse_token_usage_text(content, source: :history)
351
+ end
352
+
336
353
  def read_llm_history(path)
337
354
  return read_executor_llm_history(path) if sandboxed_environment?
338
355
  return nil unless path && File.exist?(path) && !File.zero?(path)
@@ -362,10 +379,67 @@ module AgentHarness
362
379
 
363
380
  input = parse_token_count(match[:input])
364
381
  output = parse_token_count(match[:output])
382
+ return nil if input.negative? || output.negative?
365
383
 
366
384
  {input: input, output: output, total: input + output}
367
385
  end
368
386
 
387
+ def parse_history_entries(content)
388
+ parsed = JSON.parse(content)
389
+ case parsed
390
+ when Array
391
+ parsed
392
+ when Hash
393
+ [parsed]
394
+ end
395
+ rescue JSON::ParserError
396
+ parsed_lines = []
397
+
398
+ content.each_line do |line|
399
+ next if line.strip.empty?
400
+
401
+ parsed_lines << JSON.parse(line)
402
+ rescue JSON::ParserError
403
+ return nil
404
+ end
405
+
406
+ parsed_lines.empty? ? nil : parsed_lines
407
+ end
408
+
409
+ def aggregate_token_counts(entries)
410
+ return nil unless entries&.any?
411
+
412
+ total_input = 0
413
+ total_output = 0
414
+ found = false
415
+
416
+ entries.each do |entry|
417
+ usage = find_usage_in_entry(entry)
418
+ next unless usage
419
+
420
+ input = token_count_for(usage, "prompt_tokens", "input_tokens", "promptTokens", "inputTokens")
421
+ output = token_count_for(usage, "completion_tokens", "output_tokens", "completionTokens", "outputTokens")
422
+ next if input.nil? && output.nil?
423
+
424
+ total_input += input || 0
425
+ total_output += output || 0
426
+ found = true
427
+ end
428
+
429
+ return nil unless found
430
+
431
+ {input: total_input, output: total_output, total: total_input + total_output}
432
+ end
433
+
434
+ def find_usage_in_entry(entry)
435
+ return nil unless entry.is_a?(Hash)
436
+
437
+ select_best_usage_payload([
438
+ entry["usage"],
439
+ nested_hash_value(entry, "response", "usage")
440
+ ])
441
+ end
442
+
369
443
  def extract_history_token_usage_match(content)
370
444
  lines = content.lines
371
445
 
@@ -513,6 +587,16 @@ module AgentHarness
513
587
  (normalized.to_f * multiplier).round
514
588
  end
515
589
 
590
+ def prepare_llm_history_file!
591
+ if sandboxed_environment?
592
+ @aider_history_path = generate_llm_history_path
593
+ else
594
+ path = reserve_local_llm_history_path
595
+ @aider_history_tempfile = HistoryFileHandle.new(path)
596
+ path
597
+ end
598
+ end
599
+
516
600
  def cleanup_llm_history_file!(path)
517
601
  return unless path
518
602
 
@@ -522,6 +606,9 @@ module AgentHarness
522
606
  rescue => e
523
607
  log_debug("llm_history_cleanup_error", error: e.message)
524
608
  nil
609
+ ensure
610
+ clear_local_history_handle!(path)
611
+ clear_executor_history_path!(path)
525
612
  end
526
613
 
527
614
  def validate_runtime_flags!(flags)
@@ -573,6 +660,37 @@ module AgentHarness
573
660
  log_debug("llm_history_cleanup_error", error: e.message)
574
661
  nil
575
662
  end
663
+
664
+ MAX_HISTORY_PATH_ATTEMPTS = 10
665
+
666
+ def reserve_local_llm_history_path
667
+ MAX_HISTORY_PATH_ATTEMPTS.times do
668
+ path = generate_llm_history_path
669
+
670
+ begin
671
+ File.open(path, File::WRONLY | File::CREAT | File::EXCL, 0o600, &:close)
672
+ return path
673
+ rescue Errno::EEXIST
674
+ next
675
+ end
676
+ end
677
+
678
+ raise "failed to reserve unique LLM history path after #{MAX_HISTORY_PATH_ATTEMPTS} attempts"
679
+ end
680
+
681
+ def clear_local_history_handle!(path)
682
+ return unless defined?(@aider_history_tempfile)
683
+ return unless @aider_history_tempfile&.path == path
684
+
685
+ @aider_history_tempfile = nil
686
+ end
687
+
688
+ def clear_executor_history_path!(path)
689
+ return unless defined?(@aider_history_path)
690
+ return unless @aider_history_path == path
691
+
692
+ @aider_history_path = nil
693
+ end
576
694
  end
577
695
  end
578
696
  end
@@ -297,6 +297,10 @@ module AgentHarness
297
297
  end
298
298
 
299
299
  def send_message(prompt:, **options)
300
+ if options[:mode] == :text
301
+ return send_text_message(prompt, **options.except(:mode))
302
+ end
303
+
300
304
  super
301
305
  ensure
302
306
  cleanup_mcp_tempfiles!
@@ -317,6 +321,14 @@ module AgentHarness
317
321
  ["--mcp-config", config_path]
318
322
  end
319
323
 
324
+ def supports_tool_control?
325
+ true
326
+ end
327
+
328
+ def supports_text_mode?
329
+ true
330
+ end
331
+
320
332
  def dangerous_mode_flags
321
333
  ["--dangerously-skip-permissions"]
322
334
  end
@@ -325,6 +337,10 @@ module AgentHarness
325
337
  :oauth
326
338
  end
327
339
 
340
+ def supports_token_counting?
341
+ true
342
+ end
343
+
328
344
  def execution_semantics
329
345
  {
330
346
  prompt_delivery: :arg,
@@ -401,6 +417,22 @@ module AgentHarness
401
417
 
402
418
  protected
403
419
 
420
+ # All tools the Claude CLI exposes by default.
421
+ # Used to build the --disallowedTools list when tools: :none is requested.
422
+ ALL_CLI_TOOLS = %w[
423
+ Agent
424
+ Bash
425
+ Read
426
+ Edit
427
+ Write
428
+ Grep
429
+ Glob
430
+ WebFetch
431
+ WebSearch
432
+ TodoWrite
433
+ NotebookEdit
434
+ ].freeze
435
+
404
436
  def build_command(prompt, options)
405
437
  cmd = [self.class.binary_name]
406
438
 
@@ -411,6 +443,14 @@ module AgentHarness
411
443
  cmd += ["--model", @config.model]
412
444
  end
413
445
 
446
+ # Add permission mode for tool-disabled requests (belt-and-suspenders)
447
+ if options[:tools]
448
+ # Skip --permission-mode plan when dangerous_mode is active, since
449
+ # --dangerously-skip-permissions would override it anyway.
450
+ # The --disallowedTools flags still provide the primary protection.
451
+ cmd += build_tool_control_flags(options[:tools], skip_permission_mode: options[:dangerous_mode])
452
+ end
453
+
414
454
  # Add dangerous mode if requested
415
455
  if options[:dangerous_mode] && supports_dangerous_mode?
416
456
  cmd += dangerous_mode_flags
@@ -463,6 +503,67 @@ module AgentHarness
463
503
 
464
504
  private
465
505
 
506
+ def send_text_message(prompt, **options)
507
+ api_key = resolve_text_mode_api_key
508
+ model = options[:model] || @config.model
509
+ timeout = options[:timeout] || @config.timeout || default_timeout
510
+ max_tokens = options[:max_tokens]
511
+
512
+ transport = TextTransport.new(api_key: api_key, logger: @logger)
513
+
514
+ kwargs = {model: model, timeout: timeout}
515
+ kwargs[:max_tokens] = max_tokens if max_tokens
516
+
517
+ response = transport.send_message(prompt, **kwargs)
518
+
519
+ # Apply runtime model override if present
520
+ runtime = options[:provider_runtime]
521
+ runtime = ProviderRuntime.wrap(runtime) if runtime.is_a?(Hash)
522
+ if runtime&.model
523
+ response = Response.new(
524
+ output: response.output,
525
+ exit_code: response.exit_code,
526
+ duration: response.duration,
527
+ provider: response.provider,
528
+ model: runtime.model,
529
+ tokens: response.tokens,
530
+ metadata: response.metadata,
531
+ error: response.error
532
+ )
533
+ end
534
+
535
+ track_tokens(response) if response.tokens
536
+
537
+ log_debug("send_text_message_complete",
538
+ duration: response.duration,
539
+ tokens: response.tokens,
540
+ transport: :http)
541
+
542
+ response
543
+ end
544
+
545
+ # Resolve the API key for text mode, validating that the caller's
546
+ # credentials support direct API access without silently shifting
547
+ # billing from subscription to API-metered usage.
548
+ #
549
+ # @return [String] the API key
550
+ # @raise [AuthMismatchError] if no API key is available
551
+ def resolve_text_mode_api_key
552
+ api_key = ENV["ANTHROPIC_API_KEY"]
553
+
554
+ if api_key.nil? || api_key.strip.empty?
555
+ raise AuthMismatchError.new(
556
+ "Text mode requires an ANTHROPIC_API_KEY for direct API access. " \
557
+ "OAuth/subscription credentials cannot be used for HTTP transport " \
558
+ "because it would silently shift billing to API-metered usage. " \
559
+ "Set ANTHROPIC_API_KEY or use the default CLI mode instead.",
560
+ provider: :claude
561
+ )
562
+ end
563
+
564
+ api_key.strip
565
+ end
566
+
466
567
  def parse_json_output(output)
467
568
  return nil if output.nil? || output.empty?
468
569
 
@@ -612,6 +713,23 @@ module AgentHarness
612
713
  end
613
714
  end
614
715
 
716
+ def build_tool_control_flags(tools_option, skip_permission_mode: false)
717
+ tool_names = case tools_option
718
+ when :none
719
+ ALL_CLI_TOOLS
720
+ when Array
721
+ tools_option
722
+ else
723
+ return []
724
+ end
725
+
726
+ return [] if tool_names.empty?
727
+
728
+ flags = tool_names.flat_map { |tool| ["--disallowedTools", tool] }
729
+ flags = ["--permission-mode", "plan"] + flags unless skip_permission_mode
730
+ flags
731
+ end
732
+
615
733
  def log_debug(action, **context)
616
734
  @logger&.debug("[AgentHarness::Anthropic] #{action}: #{context.inspect}")
617
735
  end
@@ -104,6 +104,26 @@ module AgentHarness
104
104
  def send_message(prompt:, **options)
105
105
  log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
106
106
 
107
+ # Text mode: fall back to CLI with tools disabled when the provider
108
+ # does not have an HTTP text transport. Providers that support text
109
+ # mode (e.g. Anthropic) override send_message to intercept this
110
+ # before reaching Base.
111
+ if options[:mode] == :text && !supports_text_mode?
112
+ log_debug("text_mode_cli_fallback", provider: self.class.provider_name)
113
+ options = options.except(:mode).merge(tools: :none)
114
+ end
115
+
116
+ # Warn when tools option is passed to a provider that doesn't support it
117
+ if options[:tools] && !supports_tool_control?
118
+ log_debug("tools_option_unsupported",
119
+ provider: self.class.provider_name,
120
+ tools: options[:tools])
121
+ @logger&.warn(
122
+ "[AgentHarness::#{self.class.provider_name}] tools option is not supported " \
123
+ "by this provider and will be ignored"
124
+ )
125
+ end
126
+
107
127
  # Coerce provider_runtime from Hash if needed
108
128
  options = normalize_provider_runtime(options)
109
129