RubyGems - agent-harness - Versions diffs - 0.5.5 → 0.5.7 - Mend

agent-harness 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +24 -0
data/README.md +76 -1
data/lib/agent_harness/command_executor.rb +453 -32
data/lib/agent_harness/docker_command_executor.rb +23 -3
data/lib/agent_harness/error_taxonomy.rb +10 -0
data/lib/agent_harness/errors.rb +5 -0
data/lib/agent_harness/orchestration/conductor.rb +40 -16
data/lib/agent_harness/orchestration/provider_manager.rb +21 -13
data/lib/agent_harness/provider_health_check.rb +216 -58
data/lib/agent_harness/provider_runtime.rb +132 -0
data/lib/agent_harness/providers/adapter.rb +157 -0
data/lib/agent_harness/providers/aider.rb +21 -0
data/lib/agent_harness/providers/anthropic.rb +21 -0
data/lib/agent_harness/providers/base.rb +83 -11
data/lib/agent_harness/providers/codex.rb +75 -8
data/lib/agent_harness/providers/cursor.rb +47 -2
data/lib/agent_harness/providers/gemini.rb +53 -0
data/lib/agent_harness/providers/github_copilot.rb +34 -6
data/lib/agent_harness/providers/kilocode.rb +39 -0
data/lib/agent_harness/providers/mistral_vibe.rb +4 -0
data/lib/agent_harness/providers/opencode.rb +91 -1
data/lib/agent_harness/providers/registry.rb +54 -0
data/lib/agent_harness/version.rb +1 -1
data/lib/agent_harness.rb +78 -6
metadata +22 -1

data/lib/agent_harness/providers/adapter.rb CHANGED Viewed

@@ -43,6 +43,17 @@ module AgentHarness
           raise NotImplementedError, "#{self} must implement .binary_name"
         end
+        # Installation contract for the provider CLI.
+        #
+        # Downstream applications can use this metadata to install a provider's
+        # supported CLI without hardcoding package names, install flags, or
+        # version pins outside AgentHarness.
+        #
+        # @return [Hash, nil] installation metadata or nil when not provided
+        def install_contract(version: nil)
+          nil
+        end
         # Required domains for firewall configuration
         #
         # @return [Hash] with :domains and :ip_ranges arrays
@@ -63,6 +74,49 @@ module AgentHarness
         def discover_models
           []
         end
+        # Installation contract for this provider's CLI.
+        #
+        # Downstream apps can use this metadata to provision the provider CLI
+        # without hardcoding package names, versions, or binary expectations
+        # outside agent-harness.
+        #
+        # @return [Hash, nil] install metadata, or nil when no first-class
+        #   installation contract is defined for the provider
+        def installation_contract(**options)
+          return install_contract unless options.key?(:version)
+          install_contract(version: options[:version])
+        end
+        # Build the install command from the provider installation contract.
+        #
+        # @param version [String, nil] optional explicit version override
+        # @return [Array<String>, nil] install command argv or nil when the
+        #   provider has no install contract
+        def install_command(version: nil)
+          contract = installation_contract
+          return nil unless contract
+          return contract[:install_command] unless version
+          package_name = contract[:package_name]
+          unless package_name
+            raise ArgumentError, "installation_contract must define :package_name when overriding version"
+          end
+          Array(contract[:install_command_prefix]) + ["#{package_name}@#{version}"]
+        end
+        # Canonical smoke-test contract for this provider.
+        #
+        # CLI-backed providers should expose a minimal real-execution prompt so
+        # downstream apps can reuse a stable provider-owned health check.
+        #
+        # @return [Hash, nil] smoke-test metadata or nil when not provided
+        def smoke_test_contract
+          nil
+        end
       end
       # Instance methods
@@ -75,11 +129,32 @@ module AgentHarness
       # @option options [Integer] :timeout timeout in seconds
       # @option options [String] :session session identifier
       # @option options [Boolean] :dangerous_mode skip permission checks
+      # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
+      #   runtime overrides (model, base_url, api_provider, env, flags, metadata).
+      #   For providers that delegate to Providers::Base#send_message, a plain Hash
+      #   is automatically coerced into a ProviderRuntime. Providers that override
+      #   #send_message directly are responsible for handling this option.
       # @return [Response] response object with output and metadata
       def send_message(prompt:, **options)
         raise NotImplementedError, "#{self.class} must implement #send_message"
       end
+      # Provider configuration schema for app-driven setup UIs
+      #
+      # Returns metadata describing the configurable fields, supported
+      # authentication modes, and backend compatibility for this provider.
+      # Applications use this to build generic provider-entry forms without
+      # hardcoding provider-specific knowledge.
+      #
+      # @return [Hash] with :fields, :auth_modes, :openai_compatible keys
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [auth_type],
+          openai_compatible: false
+        }
+      end
       # Provider capabilities
       #
       # @return [Hash] capability flags
@@ -219,6 +294,71 @@ module AgentHarness
         {healthy: true, message: "OK"}
       end
+      # Canonical smoke-test contract for this provider instance.
+      #
+      # @return [Hash, nil] smoke-test metadata
+      def smoke_test_contract
+        self.class.smoke_test_contract if self.class.respond_to?(:smoke_test_contract)
+      end
+      # Execute a minimal provider-owned smoke test via the configured executor.
+      #
+      # @param timeout [Integer, nil] timeout override in seconds
+      # @param provider_runtime [ProviderRuntime, Hash, nil] runtime overrides
+      # @return [Hash] normalized smoke-test result
+      def smoke_test(timeout: nil, provider_runtime: nil)
+        contract = smoke_test_contract
+        raise NotImplementedError, "#{self.class} does not implement #smoke_test_contract" unless contract
+        prompt = contract[:prompt]
+        if !prompt.is_a?(String) || prompt.strip.empty?
+          raise ConfigurationError, "#{self.class}.smoke_test_contract must define a non-empty :prompt"
+        end
+        response = send_message(
+          prompt: prompt,
+          timeout: timeout || contract[:timeout],
+          provider_runtime: provider_runtime
+        )
+        output = response.output.to_s.strip
+        expected_output = contract[:expected_output]&.strip
+        success = response.success? && (!contract.fetch(:require_output, true) || !output.empty?)
+        success &&= expected_output.nil? || output == expected_output
+        if success
+          return {
+            ok: true,
+            status: "ok",
+            message: contract[:success_message] || "Smoke test passed",
+            error_category: nil,
+            output: output,
+            exit_code: response.exit_code
+          }
+        end
+        message = response.error.to_s.strip
+        message = output if message.empty?
+        message = "Smoke test failed with exit code #{response.exit_code}" if message.empty?
+        {
+          ok: false,
+          status: "error",
+          message: message,
+          error_category: classify_smoke_test_message(message),
+          output: output,
+          exit_code: response.exit_code
+        }
+      rescue TimeoutError => e
+        failure_smoke_test_result(e.message, :timeout)
+      rescue AuthenticationError => e
+        failure_smoke_test_result(e.message, :auth_expired)
+      rescue RateLimitError => e
+        failure_smoke_test_result(e.message, :rate_limited)
+      rescue ProviderError => e
+        failure_smoke_test_result(e.message, classify_smoke_test_message(e.message))
+      end
       # Execution semantics for this provider
       #
       # Returns a hash describing provider-specific execution behavior so
@@ -250,6 +390,23 @@ module AgentHarness
       def parse_rate_limit_reset(output)
         nil
       end
+      private
+      def classify_smoke_test_message(message)
+        ErrorTaxonomy.classify(StandardError.new(message.to_s), error_patterns)
+      end
+      def failure_smoke_test_result(message, error_category)
+        {
+          ok: false,
+          status: "error",
+          message: message,
+          error_category: error_category,
+          output: nil,
+          exit_code: nil
+        }
+      end
     end
   end
 end

data/lib/agent_harness/providers/aider.rb CHANGED Viewed

@@ -49,6 +49,10 @@ module AgentHarness
             {name: "claude-3-5-sonnet", family: "claude-3-5-sonnet", tier: "standard", provider: "aider"}
           ]
         end
+        def smoke_test_contract
+          Base::DEFAULT_SMOKE_TEST_CONTRACT
+        end
       end
       def name
@@ -59,6 +63,23 @@ module AgentHarness
         "Aider"
       end
+      def configuration_schema
+        {
+          fields: [
+            {
+              name: :model,
+              type: :string,
+              label: "Model",
+              required: false,
+              hint: "Model identifier (supports OpenAI, Anthropic, and other model names)",
+              accepts_arbitrary: true
+            }
+          ],
+          auth_modes: [:api_key],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: true,

data/lib/agent_harness/providers/anthropic.rb CHANGED Viewed

@@ -81,6 +81,10 @@ module AgentHarness
           MODEL_PATTERN.match?(family_name)
         end
+        def smoke_test_contract
+          Base::DEFAULT_SMOKE_TEST_CONTRACT
+        end
         private
         def parse_models_list(output)
@@ -160,6 +164,23 @@ module AgentHarness
         "Anthropic Claude CLI"
       end
+      def configuration_schema
+        {
+          fields: [
+            {
+              name: :model,
+              type: :string,
+              label: "Model",
+              required: false,
+              hint: "Claude model to use (e.g. claude-3-5-sonnet-20241022)",
+              accepts_arbitrary: false
+            }
+          ],
+          auth_modes: [:oauth],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: true,

data/lib/agent_harness/providers/base.rb CHANGED Viewed

@@ -22,16 +22,18 @@ module AgentHarness
     #         system("which my-cli > /dev/null 2>&1")
     #       end
     #     end
-    #
-    #     protected
-    #
-    #     def build_command(prompt, options)
-    #       [self.class.binary_name, "--prompt", prompt]
-    #     end
     #   end
     class Base
       include Adapter
+      DEFAULT_SMOKE_TEST_CONTRACT = {
+        prompt: "Reply with exactly OK.",
+        expected_output: "OK",
+        timeout: 30,
+        require_output: true,
+        success_message: "Smoke test passed"
+      }.freeze
       # Common error patterns shared across providers that use standard
       # HTTP-style error responses. Providers with unique patterns (e.g.
       # Anthropic, GitHub Copilot) override error_patterns entirely.
@@ -63,6 +65,12 @@ module AgentHarness
       attr_reader :config, :logger
       attr_accessor :executor
+      class << self
+        def smoke_test_contract
+          nil
+        end
+      end
       # Initialize the provider
       #
       # @param config [ProviderConfig, nil] provider configuration
@@ -87,10 +95,16 @@ module AgentHarness
       #
       # @param prompt [String] the prompt to send
       # @param options [Hash] additional options
+      # @option options [ProviderRuntime, Hash, nil] :provider_runtime per-request
+      #   runtime overrides (model, base_url, api_provider, env, flags, metadata).
+      #   A plain Hash is automatically coerced into a ProviderRuntime.
       # @return [Response] the response
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Coerce provider_runtime from Hash if needed
+        options = normalize_provider_runtime(options)
         # Normalize and validate MCP servers
         options = normalize_mcp_servers(options)
         validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
@@ -103,11 +117,33 @@ module AgentHarness
         # Execute command
         start_time = Time.now
-        result = execute_with_timeout(command, timeout: timeout, env: build_env(options))
+        result = execute_with_timeout(
+          command,
+          timeout: timeout,
+          env: build_env(options),
+          **command_execution_options(options)
+        )
         duration = Time.now - start_time
         # Parse response
         response = parse_response(result, duration: duration)
+        runtime = options[:provider_runtime]
+        # Runtime model is a per-request override and always takes precedence
+        # over both the config-level model and whatever parse_response returned.
+        # This is intentional: callers use runtime overrides to route a single
+        # provider instance through different backends on each request.
+        if runtime&.model
+          response = Response.new(
+            output: response.output,
+            exit_code: response.exit_code,
+            duration: response.duration,
+            provider: response.provider,
+            model: runtime.model,
+            tokens: response.tokens,
+            metadata: response.metadata,
+            error: response.error
+          )
+        end
         # Track tokens
         track_tokens(response) if response.tokens
@@ -158,10 +194,20 @@ module AgentHarness
       # Build environment variables - override in subclasses
       #
+      # Provider subclasses should call +super+ and merge their own env vars
+      # so that ProviderRuntime env overrides are always included.
+      #
       # @param options [Hash] options
       # @return [Hash] environment variables
       def build_env(options)
-        {}
+        runtime = options[:provider_runtime]
+        return {} unless runtime
+        # Return overrides only. Ruby subprocess spawning treats nil values as
+        # explicit unsets in the child process, while omitted keys are inherited.
+        env = runtime.env.dup
+        runtime.unset_env.each { |key| env[key] = nil }
+        env
       end
       # Parse CLI output into Response - override in subclasses
@@ -211,6 +257,13 @@ module AgentHarness
       private
+      def normalize_provider_runtime(options)
+        raw = options[:provider_runtime]
+        return options if raw.nil? || raw.is_a?(ProviderRuntime)
+        options.merge(provider_runtime: ProviderRuntime.wrap(raw))
+      end
       def normalize_mcp_servers(options)
         servers = options[:mcp_servers]
         return options if servers.nil?
@@ -243,8 +296,21 @@ module AgentHarness
         options.merge(mcp_servers: normalized)
       end
-      def execute_with_timeout(command, timeout:, env:)
-        @executor.execute(command, timeout: timeout, env: env)
+      def command_execution_options(options)
+        execution_options = {
+          idle_timeout: options[:idle_timeout],
+          on_stdout_chunk: options[:on_stdout_chunk],
+          on_stderr_chunk: options[:on_stderr_chunk],
+          on_heartbeat: options[:on_heartbeat],
+          observer: options[:execution_observer] || options[:observer]
+        }.reject { |_, value| value.nil? }
+        execution_options[:heartbeat_interval] = options[:heartbeat_interval] if options.key?(:heartbeat_interval)
+        execution_options
+      end
+      def execute_with_timeout(command, timeout:, env:, stdin_data: nil, **execution_options)
+        @executor.execute(command, timeout: timeout, env: env, stdin_data: stdin_data, **execution_options)
       end
       def track_tokens(response)
@@ -252,7 +318,7 @@ module AgentHarness
         AgentHarness.token_tracker.record(
           provider: self.class.provider_name,
-          model: @config.model,
+          model: response.model || @config.model,
           input_tokens: response.tokens[:input] || 0,
           output_tokens: response.tokens[:output] || 0,
           total_tokens: response.tokens[:total]
@@ -283,7 +349,13 @@ module AgentHarness
             original_error: original_error
           )
         when :timeout
+          return original_error if original_error.is_a?(TimeoutError)
           TimeoutError.new(original_error.message, original_error: original_error)
+        when :idle_timeout
+          return original_error if original_error.is_a?(IdleTimeoutError)
+          IdleTimeoutError.new(original_error.message, original_error: original_error)
         else
           ProviderError.new(original_error.message, original_error: original_error)
         end

data/lib/agent_harness/providers/codex.rb CHANGED Viewed

@@ -8,6 +8,9 @@ module AgentHarness
     #
     # Provides integration with the OpenAI Codex CLI tool.
     class Codex < Base
+      SUPPORTED_CLI_VERSION = "0.116.0"
+      SUPPORTED_CLI_REQUIREMENT = Gem::Requirement.new(">= #{SUPPORTED_CLI_VERSION}", "< 0.117.0").freeze
       class << self
         def provider_name
           :codex
@@ -49,6 +52,37 @@ module AgentHarness
             {name: "codex", family: "codex", tier: "standard", provider: "codex"}
           ]
         end
+        def installation_contract
+          default_package = "@openai/codex@#{SUPPORTED_CLI_VERSION}".freeze
+          install_command_prefix = ["npm", "install", "-g", "--ignore-scripts"].freeze
+          install_command = (install_command_prefix + [default_package]).freeze
+          supported_versions = [SUPPORTED_CLI_VERSION].freeze
+          version_requirement = SUPPORTED_CLI_REQUIREMENT.requirements
+            .map { |op, ver| "#{op} #{ver}".freeze }
+            .freeze
+          contract = {
+            source: :npm,
+            package: default_package,
+            package_name: "@openai/codex",
+            version: SUPPORTED_CLI_VERSION,
+            version_requirement: version_requirement,
+            binary_name: binary_name,
+            install_command_prefix: install_command_prefix,
+            install_command: install_command,
+            supported_versions: supported_versions
+          }
+          contract.each_value do |value|
+            value.freeze if value.is_a?(String)
+          end
+          contract.freeze
+        end
+        def smoke_test_contract
+          Base::DEFAULT_SMOKE_TEST_CONTRACT
+        end
       end
       def name
@@ -59,6 +93,14 @@ module AgentHarness
         "OpenAI Codex CLI"
       end
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [:api_key],
+          openai_compatible: true
+        }
+      end
       def capabilities
         {
           streaming: false,
@@ -186,12 +228,16 @@ module AgentHarness
       def build_command(prompt, options)
         cmd = [self.class.binary_name, "exec"]
-        # When running inside an already-sandboxed Docker container, Codex's
-        # own sandboxing conflicts with the outer sandbox. Use --full-auto to
-        # skip nested sandboxing while keeping full tool access.
-        # Also applies when dangerous_mode is explicitly requested.
-        if sandboxed_environment? || options[:dangerous_mode]
+        externally_sandboxed = externally_sandboxed?(options)
+        # When externally_sandboxed is set, use --dangerously-bypass-approvals-and-sandbox
+        # instead of --full-auto. In the Codex CLI, full_auto is checked first and
+        # selects workspace-write sandbox mode, which overrides the bypass flag.
+        # Passing both would leave the run in the wrong sandbox mode.
+        #
+        # When NOT externally sandboxed: use --full-auto for Docker containers
+        # (to skip nested sandboxing) or when dangerous_mode is explicitly requested.
+        if !externally_sandboxed && (sandboxed_environment? || options[:dangerous_mode])
           cmd += dangerous_mode_flags
         end
@@ -200,10 +246,13 @@ module AgentHarness
           unless flags.is_a?(Array)
             raise ArgumentError, "Codex configuration error: default_flags must be an array of strings"
           end
+          # Strip --full-auto from defaults when externally sandboxed to avoid
+          # conflicting with --dangerously-bypass-approvals-and-sandbox.
+          flags -= dangerous_mode_flags if externally_sandboxed
           cmd += flags if flags.any?
         end
-        if externally_sandboxed?(options)
+        if externally_sandboxed
           cmd += sandbox_bypass_flags
         end
@@ -211,11 +260,29 @@ module AgentHarness
           cmd += session_flags(options[:session])
         end
+        runtime = options[:provider_runtime]
+        if runtime
+          cmd += ["--model", runtime.model] if runtime.model
+          runtime_flags = runtime.flags
+          # Strip --full-auto from runtime flags when externally sandboxed.
+          runtime_flags -= dangerous_mode_flags if externally_sandboxed
+          cmd += runtime_flags unless runtime_flags.empty?
+        end
         cmd << prompt
         cmd
       end
+      def build_env(options)
+        env = super
+        runtime = options[:provider_runtime]
+        return env unless runtime
+        env["OPENAI_BASE_URL"] = runtime.base_url if runtime.base_url
+        env
+      end
       def default_timeout
         300
       end
@@ -237,7 +304,7 @@ module AgentHarness
       end
       def sandbox_bypass_flags
-        ["--sandbox", "none"]
+        ["--dangerously-bypass-approvals-and-sandbox"]
       end
       def read_codex_credentials

data/lib/agent_harness/providers/cursor.rb CHANGED Viewed

@@ -83,6 +83,10 @@ module AgentHarness
         def supports_model_family?(family_name)
           family_name.match?(/^(claude|gpt|cursor)-/)
         end
+        def smoke_test_contract
+          Base::DEFAULT_SMOKE_TEST_CONTRACT
+        end
       end
       def name
@@ -93,6 +97,14 @@ module AgentHarness
         "Cursor AI"
       end
+      def configuration_schema
+        {
+          fields: [],
+          auth_modes: [:oauth],
+          openai_compatible: false
+        }
+      end
       def capabilities
         {
           streaming: false,
@@ -163,23 +175,50 @@ module AgentHarness
       def send_message(prompt:, **options)
         log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
+        # Coerce provider_runtime from Hash if needed (same as Base#send_message)
+        options = normalize_provider_runtime(options)
+        runtime = options[:provider_runtime]
         # Normalize and validate MCP servers (same as Base#send_message)
         options = normalize_mcp_servers(options)
         validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
         # Build command (without prompt in args - we send via stdin)
         command = [self.class.binary_name, "-p"]
+        command.concat(runtime.flags) if runtime&.flags&.any?
         # Calculate timeout
         timeout = options[:timeout] || @config.timeout || default_timeout
         # Execute command with prompt on stdin
+        env = build_env(options)
         start_time = Time.now
-        result = @executor.execute(command, timeout: timeout, stdin_data: prompt)
+        result = execute_with_timeout(
+          command,
+          timeout: timeout,
+          env: env,
+          stdin_data: prompt,
+          **command_execution_options(options)
+        )
         duration = Time.now - start_time
         # Parse response
         response = parse_response(result, duration: duration)
+        # Runtime model is a per-request override and always takes precedence
+        # over both the config-level model and whatever parse_response returned.
+        # See Base#send_message for rationale.
+        if runtime&.model
+          response = Response.new(
+            output: response.output,
+            exit_code: response.exit_code,
+            duration: response.duration,
+            provider: response.provider,
+            model: runtime.model,
+            tokens: response.tokens,
+            metadata: response.metadata,
+            error: response.error
+          )
+        end
         # Track tokens
         track_tokens(response) if response.tokens
@@ -201,7 +240,7 @@ module AgentHarness
       end
       def build_env(options)
-        {}
+        super
       end
       def default_timeout
@@ -298,7 +337,13 @@ module AgentHarness
         when :auth_expired
           raise AuthenticationError.new(error.message, provider: self.class.provider_name, original_error: error)
         when :timeout
+          raise error if error.is_a?(TimeoutError)
           raise TimeoutError.new(error.message, original_error: error)
+        when :idle_timeout
+          raise error if error.is_a?(IdleTimeoutError)
+          raise IdleTimeoutError.new(error.message, original_error: error)
         else
           raise ProviderError.new(error.message, original_error: error)
         end