RubyGems - agent-harness - Versions diffs - 0.7.2 → 0.7.4 - Mend

agent-harness 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +15 -0
data/lib/agent_harness/errors.rb +7 -0
data/lib/agent_harness/providers/adapter.rb +27 -0
data/lib/agent_harness/providers/aider.rb +132 -14
data/lib/agent_harness/providers/anthropic.rb +163 -23
data/lib/agent_harness/providers/base.rb +9 -0
data/lib/agent_harness/providers/github_copilot.rb +435 -494
data/lib/agent_harness/providers/registry.rb +1 -0
data/lib/agent_harness/providers/token_usage_parsing.rb +118 -0
data/lib/agent_harness/text_transport.rb +168 -0
data/lib/agent_harness/version.rb +1 -1
data/lib/agent_harness.rb +2 -0
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c8c1873c6be023d4ab91659139b56b89552b8a0e89c101fed27ef0113c868434
-  data.tar.gz: 5bd2792791e5e7f1d8ee7cdd49b93bb9ee7c3d7e762914d2e3d4b4d8bfb014d2
+  metadata.gz: 278785d86727fd759e55bcd8fd4fb4124a13c8f6ae818a40b2ae49bcbbb3b18f
+  data.tar.gz: 717338d556ef335ebf3d4e2f0fbdb4a9d92bbbe52bbb1739fcb8afaba7b0c1ac
 SHA512:
-  metadata.gz: e8530d91fec6ebddae4d0c8cb101a75c18df480ee15ae5006957576c20596ac199f0546a72e8d128dbcf8223eb39c7f7af6e7abe7aba05e67e27bda68c6b0bd5
-  data.tar.gz: 4ee7d860aa222170d8e3edd9319fd31eae0d174e571a8da0fae540b1fb5f6094c329ca0431a879f6d4927df7396e6a28aa2dedbae4467fa3d4cd8ed744829f34
+  metadata.gz: 76cd57c3875f38271390f3f7ebe29153d40924988315807d79fd85d37fdedde109e7c465a6eeeb889c858a6da53faac5cd48dc8a62862fed5d6843e73b4036a7
+  data.tar.gz: 6d74d1ac89feb72339a87bb08b413ee6996b0dc1b0b7cb7ff2446ac3ec12539434a00f57187e1632ec889b5fba5bab10ce20d2b91a7aaee5b21e39c837101b04

data/.release-please-manifest.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  ".": "0.7.2"
+  ".": "0.7.4"
 }

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,20 @@
 ## [Unreleased]
+## [0.7.4](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.3...agent-harness/v0.7.4) (2026-04-18)
+### Bug Fixes
+* 119: Claude provider leaks raw --output-format json envelope as response.output ([#120](https://github.com/viamin/agent-harness/issues/120)) ([602a5f9](https://github.com/viamin/agent-harness/commit/602a5f97e009ac59c798c7b1d7342cd43e2e8d4f))
+## [0.7.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.2...agent-harness/v0.7.3) (2026-04-15)
+### Bug Fixes
+* 114: feat: add text-only transport that bypasses the CLI ([a6be68a](https://github.com/viamin/agent-harness/commit/a6be68aa03b0202492caeb24233104cd1b814d88))
+* 98: feat: add token usage extraction for remaining providers (cursor, gemini, aider, opencode, copilot, mistral_vibe) ([#105](https://github.com/viamin/agent-harness/issues/105)) ([b090748](https://github.com/viamin/agent-harness/commit/b090748b5d528ab864e94754c0992bc060669540))
 ## [0.7.2](https://github.com/viamin/agent-harness/compare/agent-harness/v0.7.1...agent-harness/v0.7.2) (2026-04-15)

data/lib/agent_harness/errors.rb CHANGED Viewed

@@ -59,6 +59,13 @@ module AgentHarness
     end
   end
+  # Auth mismatch errors — raised when the requested transport mode
+  # requires credentials that differ from the caller's current auth mode.
+  # For example, requesting HTTP text mode with only OAuth/subscription
+  # credentials (no API key) would silently shift billing from
+  # subscription to API-metered usage.
+  class AuthMismatchError < AuthenticationError; end
   # Configuration errors
   class ConfigurationError < Error; end

data/lib/agent_harness/providers/adapter.rb CHANGED Viewed

@@ -257,6 +257,11 @@ module AgentHarness
                 :supported_mcp_transports,
                 default: default_supported_mcp_transports
               ),
+              supports_token_counting: provider_metadata_value(
+                provider,
+                :supports_token_counting?,
+                default: default_supports_token_counting
+              ),
               supports_sessions: provider_metadata_value(
                 provider,
                 :supports_sessions?,
@@ -601,6 +606,10 @@ module AgentHarness
           false
         end
+        def default_supports_token_counting
+          false
+        end
         def default_supports_dangerous_mode
           false
         end
@@ -853,6 +862,17 @@ module AgentHarness
         false
       end
+      # Check if provider supports text-only mode via direct HTTP transport.
+      #
+      # Providers that return +true+ will route +mode: :text+ requests
+      # through their REST API instead of the CLI. Providers that return
+      # +false+ fall back to the CLI path with tools forcibly disabled.
+      #
+      # @return [Boolean] true if the provider has an HTTP text transport
+      def supports_text_mode?
+        false
+      end
       # Check if provider supports dangerous mode
       #
       # @return [Boolean] true if dangerous mode is supported
@@ -882,6 +902,13 @@ module AgentHarness
         []
       end
+      # Whether this provider can extract token usage from CLI output
+      #
+      # @return [Boolean] true if the provider returns token counts
+      def supports_token_counting?
+        false
+      end
       # Validate provider configuration
       #
       # @return [Hash] with :valid, :errors keys

data/lib/agent_harness/providers/aider.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require "json"
 require "securerandom"
 require "shellwords"
 require "tmpdir"
@@ -10,6 +11,8 @@ module AgentHarness
     #
     # Provides integration with the Aider CLI tool.
     class Aider < Base
+      include TokenUsageParsing
       UV_VERSION = "0.8.17"
       SUPPORTED_CLI_VERSION = "0.86.2"
       SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.87.0").freeze
@@ -196,6 +199,10 @@ module AgentHarness
         ["--restore-chat-history", session_id]
       end
+      def supports_token_counting?
+        true
+      end
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
@@ -205,15 +212,19 @@ module AgentHarness
         options = normalize_mcp_servers(options)
         validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
-        llm_history_path = generate_llm_history_path
-        command = build_command(prompt, options.merge(llm_history_path: llm_history_path))
-        preparation = build_execution_preparation(options)
         timeout = options[:timeout] || @config.timeout || default_timeout
+        raise TimeoutError, "Command timed out before execution started" if timeout <= 0
         start_time = Time.now
+        llm_history_path = prepare_llm_history_file!
+        command = build_command(prompt, options.merge(llm_history_path: llm_history_path))
+        preparation = build_execution_preparation(options)
+        remaining_timeout = timeout - (Time.now - start_time)
+        raise TimeoutError, "Command timed out before execution started" if remaining_timeout <= 0
         result = execute_with_timeout(
           command,
-          timeout: timeout,
+          timeout: remaining_timeout,
           env: build_env(options),
           preparation: preparation,
           **command_execution_options(options)
@@ -221,13 +232,14 @@ module AgentHarness
         duration = Time.now - start_time
         response = parse_response(result, duration: duration, llm_history_path: llm_history_path)
-        if runtime&.model
+        effective_runtime_model = normalized_model_name(runtime&.model)
+        if effective_runtime_model
           response = Response.new(
             output: response.output,
             exit_code: response.exit_code,
             duration: response.duration,
             provider: response.provider,
-            model: runtime.model,
+            model: effective_runtime_model,
             tokens: response.tokens,
             metadata: response.metadata,
             error: response.error
@@ -259,10 +271,8 @@ module AgentHarness
           cmd += ["--llm-history-file", options[:llm_history_path]]
         end
-        model = runtime&.model || @config.model
-        if model && !model.empty?
-          cmd += ["--model", model]
-        end
+        model = effective_model_name(runtime)
+        cmd += ["--model", model] if model
         if options[:session]
           cmd += session_flags(options[:session])
@@ -316,11 +326,11 @@ module AgentHarness
       COMMON_SHELL_COMMAND_PATTERN =
         /\A(?:git|bundle|ruby|python\d*(?:\.\d+)?|uv|npm|yarn|pnpm|node|bash|sh|zsh|make|rake|rspec|rails|go|pytest|bin\/[\w.-]+|sed|rg|grep|find|ls|cat|cp|mv|rm|mkdir|touch|chmod|chown|docker|kubectl)\z/
       EXECUTOR_LLM_HISTORY_TIMEOUT = 10
+      HistoryFileHandle = Struct.new(:path)
       def generate_llm_history_path
-        return "/tmp/aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}" if sandboxed_environment?
+        return "/tmp/aider_llm_history_#{SecureRandom.hex(8)}.json" if sandboxed_environment?
-        File.join(Dir.tmpdir, "aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}")
+        File.join(Dir.tmpdir, "aider_llm_history_#{Process.pid}_#{SecureRandom.hex(8)}.json")
       end
       def parse_token_usage(result, llm_history_path:)
@@ -328,11 +338,18 @@ module AgentHarness
         # Prefer the request-local history file when it includes a token report,
         # but fall back to captured command output because the usage summary is
         # printed there during normal runs.
-        parse_token_usage_text(safe_read_llm_history(llm_history_path), source: :history) ||
+        parse_token_usage_history_content(safe_read_llm_history(llm_history_path)) ||
           parse_token_usage_text(result.stdout, source: :output) ||
           parse_token_usage_text(result.stderr, source: :output)
       end
+      def parse_token_usage_history_content(content)
+        return nil if content.nil? || content.strip.empty?
+        aggregate_token_counts(parse_history_entries(content)) ||
+          parse_token_usage_text(content, source: :history)
+      end
       def read_llm_history(path)
         return read_executor_llm_history(path) if sandboxed_environment?
         return nil unless path && File.exist?(path) && !File.zero?(path)
@@ -362,10 +379,67 @@ module AgentHarness
         input = parse_token_count(match[:input])
         output = parse_token_count(match[:output])
+        return nil if input.negative? || output.negative?
         {input: input, output: output, total: input + output}
       end
+      def parse_history_entries(content)
+        parsed = JSON.parse(content)
+        case parsed
+        when Array
+          parsed
+        when Hash
+          [parsed]
+        end
+      rescue JSON::ParserError
+        parsed_lines = []
+        content.each_line do |line|
+          next if line.strip.empty?
+          parsed_lines << JSON.parse(line)
+        rescue JSON::ParserError
+          return nil
+        end
+        parsed_lines.empty? ? nil : parsed_lines
+      end
+      def aggregate_token_counts(entries)
+        return nil unless entries&.any?
+        total_input = 0
+        total_output = 0
+        found = false
+        entries.each do |entry|
+          usage = find_usage_in_entry(entry)
+          next unless usage
+          input = token_count_for(usage, "prompt_tokens", "input_tokens", "promptTokens", "inputTokens")
+          output = token_count_for(usage, "completion_tokens", "output_tokens", "completionTokens", "outputTokens")
+          next if input.nil? && output.nil?
+          total_input += input || 0
+          total_output += output || 0
+          found = true
+        end
+        return nil unless found
+        {input: total_input, output: total_output, total: total_input + total_output}
+      end
+      def find_usage_in_entry(entry)
+        return nil unless entry.is_a?(Hash)
+        select_best_usage_payload([
+          entry["usage"],
+          nested_hash_value(entry, "response", "usage")
+        ])
+      end
       def extract_history_token_usage_match(content)
         lines = content.lines
@@ -513,6 +587,16 @@ module AgentHarness
         (normalized.to_f * multiplier).round
       end
+      def prepare_llm_history_file!
+        if sandboxed_environment?
+          @aider_history_path = generate_llm_history_path
+        else
+          path = reserve_local_llm_history_path
+          @aider_history_tempfile = HistoryFileHandle.new(path)
+          path
+        end
+      end
       def cleanup_llm_history_file!(path)
         return unless path
@@ -522,6 +606,9 @@ module AgentHarness
       rescue => e
         log_debug("llm_history_cleanup_error", error: e.message)
         nil
+      ensure
+        clear_local_history_handle!(path)
+        clear_executor_history_path!(path)
       end
       def validate_runtime_flags!(flags)
@@ -573,6 +660,37 @@ module AgentHarness
         log_debug("llm_history_cleanup_error", error: e.message)
         nil
       end
+      MAX_HISTORY_PATH_ATTEMPTS = 10
+      def reserve_local_llm_history_path
+        MAX_HISTORY_PATH_ATTEMPTS.times do
+          path = generate_llm_history_path
+          begin
+            File.open(path, File::WRONLY | File::CREAT | File::EXCL, 0o600, &:close)
+            return path
+          rescue Errno::EEXIST
+            next
+          end
+        end
+        raise "failed to reserve unique LLM history path after #{MAX_HISTORY_PATH_ATTEMPTS} attempts"
+      end
+      def clear_local_history_handle!(path)
+        return unless defined?(@aider_history_tempfile)
+        return unless @aider_history_tempfile&.path == path
+        @aider_history_tempfile = nil
+      end
+      def clear_executor_history_path!(path)
+        return unless defined?(@aider_history_path)
+        return unless @aider_history_path == path
+        @aider_history_path = nil
+      end
     end
   end
 end

data/lib/agent_harness/providers/anthropic.rb CHANGED Viewed

@@ -161,8 +161,81 @@ module AgentHarness
           Base::DEFAULT_SMOKE_TEST_CONTRACT
         end
+        # Parse a raw Claude CLI --output-format=json envelope into its components.
+        #
+        # Downstream callers that capture Claude CLI stdout directly (e.g. container
+        # execution plans) can use this to extract the assistant text, error state,
+        # token usage, and structured metadata without re-implementing the parsing.
+        #
+        # @param json_string [String] raw JSON envelope from Claude CLI stdout
+        # @return [Hash, nil] parsed components or nil if not a valid envelope
+        #   - :output [String] the assistant's final text (the "result" field)
+        #   - :error [String, nil] error message if is_error was true
+        #   - :tokens [Hash, nil] {input:, output:, total:} token counts
+        #   - :metadata [Hash] structured metadata (cost_usd, session_id, etc.)
+        def parse_cli_json_envelope(json_string)
+          return nil if json_string.nil? || json_string.empty?
+          parsed = JSON.parse(json_string)
+          return nil unless parsed.is_a?(Hash) && parsed.key?("result")
+          output = parsed["result"]
+          error = nil
+          if parsed["is_error"]
+            error = classify_error_message(output || "Unknown Claude CLI error")
+          end
+          tokens = extract_tokens(parsed)
+          metadata = extract_envelope_metadata(parsed)
+          {output: output, error: error, tokens: tokens, metadata: metadata}
+        rescue JSON::ParserError
+          nil
+        end
         private
+        def classify_error_message(message)
+          msg_lower = message.downcase
+          if msg_lower.include?("rate limit") || msg_lower.include?("session limit")
+            "Rate limit exceeded"
+          elsif msg_lower.include?("deprecat") || msg_lower.include?("end-of-life")
+            "Model deprecated"
+          elsif msg_lower.include?("oauth token") || msg_lower.include?("authentication")
+            "Authentication error"
+          else
+            message
+          end
+        end
+        def extract_tokens(parsed)
+          usage = parsed["usage"]
+          return nil unless usage
+          input = usage["input_tokens"]
+          output = usage["output_tokens"]
+          return nil unless input || output
+          input ||= 0
+          output ||= 0
+          {input: input, output: output, total: input + output}
+        end
+        def extract_envelope_metadata(parsed)
+          meta = {}
+          meta[:cost_usd] = parsed["total_cost_usd"] if parsed.key?("total_cost_usd")
+          meta[:session_id] = parsed["session_id"] if parsed.key?("session_id")
+          meta[:stop_reason] = parsed["stop_reason"] if parsed.key?("stop_reason")
+          meta[:terminal_reason] = parsed["terminal_reason"] if parsed.key?("terminal_reason")
+          meta[:num_turns] = parsed["num_turns"] if parsed.key?("num_turns")
+          meta[:duration_ms] = parsed["duration_ms"] if parsed.key?("duration_ms")
+          meta[:duration_api_ms] = parsed["duration_api_ms"] if parsed.key?("duration_api_ms")
+          meta
+        end
         def validate_version!(version)
           unless version.is_a?(String) && !version.strip.empty?
             raise ArgumentError, "Invalid version: #{version.inspect}. " \
@@ -297,6 +370,10 @@ module AgentHarness
       end
       def send_message(prompt:, **options)
+        if options[:mode] == :text
+          return send_text_message(prompt, **options.except(:mode))
+        end
         super
       ensure
         cleanup_mcp_tempfiles!
@@ -321,6 +398,10 @@ module AgentHarness
         true
       end
+      def supports_text_mode?
+        true
+      end
       def dangerous_mode_flags
         ["--dangerously-skip-permissions"]
       end
@@ -329,6 +410,10 @@ module AgentHarness
         :oauth
       end
+      def supports_token_counting?
+        true
+      end
       def execution_semantics
         {
           prompt_delivery: :arg,
@@ -461,17 +546,24 @@ module AgentHarness
         output = result.stdout
         error = nil
         tokens = nil
+        metadata = {}
         if result.failed?
           combined = [result.stdout, result.stderr].compact.join("\n")
           error = classify_error_message(combined)
         end
-        # Parse JSON output to extract result text and token usage
+        # Parse JSON output to extract result text, token usage, and metadata
         parsed = parse_json_output(output)
         if parsed
+          # Handle is_error envelopes as provider errors
+          if parsed["is_error"]
+            error ||= classify_error_message(parsed["result"] || "Unknown Claude CLI error")
+          end
           output = parsed["result"] || output
           tokens = extract_tokens(parsed)
+          metadata = extract_envelope_metadata(parsed)
         end
         Response.new(
@@ -481,6 +573,7 @@ module AgentHarness
           provider: self.class.provider_name,
           model: @config.model,
           tokens: tokens,
+          metadata: metadata,
           error: error
         )
       end
@@ -491,6 +584,67 @@ module AgentHarness
       private
+      def send_text_message(prompt, **options)
+        api_key = resolve_text_mode_api_key
+        model = options[:model] || @config.model
+        timeout = options[:timeout] || @config.timeout || default_timeout
+        max_tokens = options[:max_tokens]
+        transport = TextTransport.new(api_key: api_key, logger: @logger)
+        kwargs = {model: model, timeout: timeout}
+        kwargs[:max_tokens] = max_tokens if max_tokens
+        response = transport.send_message(prompt, **kwargs)
+        # Apply runtime model override if present
+        runtime = options[:provider_runtime]
+        runtime = ProviderRuntime.wrap(runtime) if runtime.is_a?(Hash)
+        if runtime&.model
+          response = Response.new(
+            output: response.output,
+            exit_code: response.exit_code,
+            duration: response.duration,
+            provider: response.provider,
+            model: runtime.model,
+            tokens: response.tokens,
+            metadata: response.metadata,
+            error: response.error
+          )
+        end
+        track_tokens(response) if response.tokens
+        log_debug("send_text_message_complete",
+          duration: response.duration,
+          tokens: response.tokens,
+          transport: :http)
+        response
+      end
+      # Resolve the API key for text mode, validating that the caller's
+      # credentials support direct API access without silently shifting
+      # billing from subscription to API-metered usage.
+      #
+      # @return [String] the API key
+      # @raise [AuthMismatchError] if no API key is available
+      def resolve_text_mode_api_key
+        api_key = ENV["ANTHROPIC_API_KEY"]
+        if api_key.nil? || api_key.strip.empty?
+          raise AuthMismatchError.new(
+            "Text mode requires an ANTHROPIC_API_KEY for direct API access. " \
+            "OAuth/subscription credentials cannot be used for HTTP transport " \
+            "because it would silently shift billing to API-metered usage. " \
+            "Set ANTHROPIC_API_KEY or use the default CLI mode instead.",
+            provider: :claude
+          )
+        end
+        api_key.strip
+      end
       def parse_json_output(output)
         return nil if output.nil? || output.empty?
@@ -499,32 +653,18 @@ module AgentHarness
         nil
       end
-      def extract_tokens(parsed)
-        usage = parsed["usage"]
-        return nil unless usage
-        input = usage["input_tokens"]
-        output = usage["output_tokens"]
-        return nil unless input || output
-        input ||= 0
-        output ||= 0
+      # Delegate to class-level implementations so both instance and class
+      # methods share a single definition.
+      def extract_envelope_metadata(parsed)
+        self.class.send(:extract_envelope_metadata, parsed)
+      end
-        {input: input, output: output, total: input + output}
+      def extract_tokens(parsed)
+        self.class.send(:extract_tokens, parsed)
       end
       def classify_error_message(message)
-        msg_lower = message.downcase
-        if msg_lower.include?("rate limit") || msg_lower.include?("session limit")
-          "Rate limit exceeded"
-        elsif msg_lower.include?("deprecat") || msg_lower.include?("end-of-life")
-          "Model deprecated"
-        elsif msg_lower.include?("oauth token") || msg_lower.include?("authentication")
-          "Authentication error"
-        else
-          message
-        end
+        self.class.send(:classify_error_message, message)
       end
       def parse_claude_mcp_output(output)

data/lib/agent_harness/providers/base.rb CHANGED Viewed

@@ -104,6 +104,15 @@ module AgentHarness
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Text mode: fall back to CLI with tools disabled when the provider
+        # does not have an HTTP text transport.  Providers that support text
+        # mode (e.g. Anthropic) override send_message to intercept this
+        # before reaching Base.
+        if options[:mode] == :text && !supports_text_mode?
+          log_debug("text_mode_cli_fallback", provider: self.class.provider_name)
+          options = options.except(:mode).merge(tools: :none)
+        end
         # Warn when tools option is passed to a provider that doesn't support it
         if options[:tools] && !supports_tool_control?
           log_debug("tools_option_unsupported",