RubyGems - agent-harness - Versions diffs - 0.5.6 → 0.5.7 - Mend

agent-harness 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +16 -0
data/README.md +76 -1
data/lib/agent_harness/command_executor.rb +453 -32
data/lib/agent_harness/docker_command_executor.rb +23 -3
data/lib/agent_harness/error_taxonomy.rb +10 -0
data/lib/agent_harness/errors.rb +5 -0
data/lib/agent_harness/orchestration/conductor.rb +40 -16
data/lib/agent_harness/orchestration/provider_manager.rb +21 -13
data/lib/agent_harness/provider_health_check.rb +216 -58
data/lib/agent_harness/provider_runtime.rb +20 -3
data/lib/agent_harness/providers/adapter.rb +136 -0
data/lib/agent_harness/providers/aider.rb +4 -0
data/lib/agent_harness/providers/anthropic.rb +4 -0
data/lib/agent_harness/providers/base.rb +46 -10
data/lib/agent_harness/providers/codex.rb +53 -9
data/lib/agent_harness/providers/cursor.rb +17 -1
data/lib/agent_harness/providers/gemini.rb +34 -0
data/lib/agent_harness/providers/github_copilot.rb +26 -6
data/lib/agent_harness/providers/kilocode.rb +39 -0
data/lib/agent_harness/providers/mistral_vibe.rb +4 -0
data/lib/agent_harness/providers/opencode.rb +68 -1
data/lib/agent_harness/providers/registry.rb +54 -0
data/lib/agent_harness/version.rb +1 -1
data/lib/agent_harness.rb +77 -6
metadata +21 -1

data/lib/agent_harness/provider_health_check.rb CHANGED Viewed

@@ -25,14 +25,18 @@ module AgentHarness
       #
       # @param timeout [Integer] timeout in seconds for each check
       # @return [Array<Hash>] health status for each provider
-      def check_all(timeout: configured_timeout)
+      def check_all(timeout: configured_timeout, executor: nil, provider_runtime: nil)
+        raise ArgumentError, "provider_runtime is only supported for single-provider health checks" unless provider_runtime.nil?
         provider_names = if AgentHarness.configuration.providers.empty?
           Providers::Registry.instance.all
         else
           enabled_provider_names
         end
-        provider_names.map { |name| check(name, timeout: timeout) }
+        provider_names.map do |name|
+          check(name, timeout: timeout, executor: executor, provider_runtime: provider_runtime)
+        end
       end
       # Check health of a single provider
@@ -40,32 +44,47 @@ module AgentHarness
       # @param provider_name [Symbol, String] the provider name
       # @param timeout [Integer] timeout in seconds
       # @return [Hash] health status with :name, :status, :message, :latency_ms keys
-      def check(provider_name, timeout: configured_timeout)
+      def check(provider_name, timeout: configured_timeout, executor: nil, provider_runtime: nil)
         name = normalize_name(provider_name)
         start_time = monotonic_now
         timeout = validate_timeout(timeout)
-        Timeout.timeout(timeout) do
-          perform_check(name, start_time)
+        # Honor the provider smoke-test contract timeout when it exceeds
+        # the health-check timeout, so real CLI round trips are not
+        # falsely reported as timeouts.
+        outer_timeout = effective_check_timeout(name, timeout)
+        Timeout.timeout(outer_timeout) do
+          perform_check(
+            name,
+            start_time,
+            timeout: timeout,
+            executor: executor,
+            provider_runtime: provider_runtime
+          )
         end
       rescue Timeout::Error
         build_result(
           name: name,
           status: "error",
-          message: "Health check timed out after #{timeout}s",
-          start_time: start_time || monotonic_now
+          message: "Health check timed out after #{outer_timeout || timeout}s",
+          start_time: start_time || monotonic_now,
+          error_category: :timeout,
+          check: :timeout
         )
-      rescue NotImplementedError => e
+      rescue NotImplementedError, ConfigurationError => e
         # NotImplementedError inherits from ScriptError, not StandardError,
         # so it must be rescued explicitly. Its messages are safe internal
-        # setup errors (e.g., missing provider methods) that help users
-        # diagnose configuration problems.
+        # setup errors (e.g., missing provider methods or malformed provider
+        # contracts) that help users diagnose configuration problems.
         AgentHarness.logger&.error("ProviderHealthCheck error for #{name}: #{e.class}")
         build_result(
           name: name,
           status: "error",
           message: "Health check failed: #{e.class}: #{e.message}",
-          start_time: start_time || monotonic_now
+          start_time: start_time || monotonic_now,
+          error_category: :configuration,
+          check: :provider_health
         )
       rescue => e
         # Return a generic message to avoid leaking sensitive details
@@ -76,7 +95,9 @@ module AgentHarness
           name: name,
           status: "error",
           message: "Health check failed: #{e.class}",
-          start_time: start_time || monotonic_now
+          start_time: start_time || monotonic_now,
+          error_category: :unknown,
+          check: :provider_health
         )
       end
@@ -148,7 +169,7 @@ module AgentHarness
         :unknown
       end
-      def perform_check(provider_name, start_time)
+      def perform_check(provider_name, start_time, timeout:, executor:, provider_runtime:)
         # Step 1: Check provider is registered
         registry = Providers::Registry.instance
         unless registry.registered?(provider_name)
@@ -156,53 +177,83 @@ module AgentHarness
             name: provider_name,
             status: "error",
             message: "Provider not registered",
-            start_time: start_time
+            start_time: start_time,
+            error_category: :installation,
+            check: :registration
           )
         end
-        # Step 2: Check CLI availability
         klass = registry.get(provider_name)
-        unless klass.available?
-          return build_result(
-            name: provider_name,
-            status: "error",
-            message: "CLI '#{klass.binary_name}' not found in PATH",
-            start_time: start_time
-          )
-        end
+        provider_instance = build_provider(provider_name, klass, executor: executor)
+        host_preflight_allowed = host_preflight_allowed?(executor: executor, provider_runtime: provider_runtime)
-        # Step 3: Check authentication
-        # Treat "not implemented" auth status as degraded rather than error,
-        # since most built-in providers don't implement auth_status hooks.
-        # In either case, continue to steps 4/5 so health and config issues
-        # are still surfaced for providers that lack an auth_status hook.
-        auth = Authentication.auth_status(provider_name)
         auth_degraded = false
-        unless auth[:valid]
-          unless auth_not_implemented?(auth)
+        if host_preflight_allowed
+          # Step 2a: Honor the provider's `.available?` contract when running
+          # against the default host executor. Custom providers may enforce
+          # version or feature checks beyond simple PATH presence, so this
+          # catches cases where the binary exists but the provider considers
+          # itself unavailable. We skip this when a custom executor is
+          # supplied because `.available?` always queries the global
+          # executor, which may not reflect the caller's execution context.
+          if executor.nil? && !klass.available?
             return build_result(
               name: provider_name,
               status: "error",
-              message: auth[:error] || "Authentication failed",
-              start_time: start_time
+              message: "Provider '#{klass.binary_name}' is not available (#{klass}.available? returned false)",
+              start_time: start_time,
+              error_category: :installation,
+              check: :availability
             )
           end
-          auth_degraded = true
-        end
-        # Step 4: Check provider-level health (e.g., endpoint reachability)
-        # The Adapter default always returns {healthy: true}, so providers
-        # that haven't implemented a real health check are reported as ok
-        # with a note that the check is not implemented.
-        provider_instance = build_provider(provider_name, klass)
-        health = provider_instance.health_status
-        unless health[:healthy]
-          return build_result(
-            name: provider_name,
-            status: "degraded",
-            message: health[:message] || "Provider health check failed",
-            start_time: start_time
-          )
+          # Step 2b: Verify the binary is findable by the effective executor.
+          unless provider_instance.executor.which(klass.binary_name)
+            return build_result(
+              name: provider_name,
+              status: "error",
+              message: "CLI '#{klass.binary_name}' not found in PATH",
+              start_time: start_time,
+              error_category: :installation,
+              check: :availability
+            )
+          end
+          # Step 3: Check authentication
+          # Treat "not implemented" auth status as degraded rather than error,
+          # since most built-in providers don't implement auth_status hooks.
+          # In either case, continue to steps 4/5 so health and config issues
+          # are still surfaced for providers that lack an auth_status hook.
+          auth = Authentication.auth_status(provider_name)
+          unless auth[:valid]
+            unless auth_not_implemented?(auth)
+              return build_result(
+                name: provider_name,
+                status: "error",
+                message: auth[:error] || "Authentication failed",
+                start_time: start_time,
+                error_category: :authentication,
+                check: :authentication
+              )
+            end
+            auth_degraded = true
+          end
+          # Step 4: Check provider-level health (e.g., endpoint reachability)
+          # The Adapter default always returns {healthy: true}, so providers
+          # that haven't implemented a real health check are reported as ok
+          # with a note that the check is not implemented.
+          health = provider_instance.health_status
+          unless health[:healthy]
+            return build_result(
+              name: provider_name,
+              status: "degraded",
+              message: health[:message] || "Provider health check failed",
+              start_time: start_time,
+              error_category: :transient,
+              check: :provider_health
+            )
+          end
         end
         # Step 5: Validate provider config
@@ -216,7 +267,52 @@ module AgentHarness
             name: provider_name,
             status: "degraded",
             message: "Configuration issues: #{errors_msg}",
-            start_time: start_time
+            start_time: start_time,
+            error_category: :configuration,
+            check: :configuration
+          )
+        end
+        smoke_contract = provider_instance.smoke_test_contract
+        # Explicitly handle missing smoke-test contract when no custom smoke_test implementation
+        if smoke_contract.nil? && !provider_overrides_method?(provider_instance, :smoke_test)
+          message = if host_preflight_allowed && auth_degraded
+            "Auth status check not implemented; health and config checks passed (smoke test unavailable)"
+          elsif host_preflight_allowed && (provider_overrides_method?(provider_instance, :health_status) ||
+            provider_overrides_method?(provider_instance, :validate_config))
+            "Health and config checks passed (smoke test unavailable)"
+          elsif host_preflight_allowed
+            "Registered and authenticated; health/config checks use defaults and smoke test is unavailable"
+          elsif provider_overrides_method?(provider_instance, :validate_config)
+            "Configuration checks passed, but smoke test is unavailable for the supplied execution context"
+          else
+            "Smoke test is unavailable for the supplied execution context"
+          end
+          return build_result(
+            name: provider_name,
+            status: "degraded",
+            message: message,
+            start_time: start_time,
+            error_category: :configuration,
+            check: :smoke_test
+          )
+        end
+        # When a contract exists, pass nil so the adapter falls through to
+        # contract[:timeout]. When the provider overrides #smoke_test without
+        # publishing a contract, forward the validated health-check timeout so
+        # the override can honour it instead of running without any limit.
+        smoke_timeout = smoke_contract ? nil : timeout
+        smoke = provider_instance.smoke_test(timeout: smoke_timeout, provider_runtime: provider_runtime)
+        unless smoke[:ok]
+          return build_result(
+            name: provider_name,
+            status: smoke[:status] || "error",
+            message: smoke[:message] || "Smoke test failed",
+            start_time: start_time,
+            error_category: normalize_smoke_error_category(smoke[:error_category], smoke[:message]),
+            check: :smoke_test
           )
         end
@@ -225,23 +321,30 @@ module AgentHarness
           return build_result(
             name: provider_name,
             status: "degraded",
-            message: "Auth status check not implemented; health and config checks passed",
-            start_time: start_time
+            message: "Auth status check not implemented; health, config, and smoke tests passed",
+            start_time: start_time,
+            error_category: :authentication,
+            check: :authentication
           )
         end
-        message = if provider_overrides_method?(provider_instance, :health_status) ||
+        message = if !host_preflight_allowed && provider_overrides_method?(provider_instance, :validate_config)
+          "Configuration and smoke test passed using the supplied execution context"
+        elsif !host_preflight_allowed
+          "Smoke test passed using the supplied execution context"
+        elsif provider_overrides_method?(provider_instance, :health_status) ||
             provider_overrides_method?(provider_instance, :validate_config)
           "All checks passed"
         else
-          "Registered and authenticated (health/config checks use defaults)"
+          "Registered, authenticated, and smoke test passed (health/config checks use defaults)"
         end
         build_result(
           name: provider_name,
           status: "ok",
           message: message,
-          start_time: start_time
+          start_time: start_time,
+          check: :smoke_test
         )
       end
@@ -258,25 +361,80 @@ module AgentHarness
         error.include?("not implemented")
       end
+      def host_preflight_allowed?(executor:, provider_runtime: nil)
+        effective_executor = executor || AgentHarness.configuration.command_executor
+        # Skip host preflight only when provider runtime has environment/config overrides
+        # that could conflict with host-level checks (env, base_url, api_provider, unset_env)
+        if provider_runtime
+          runtime = ProviderRuntime.wrap(provider_runtime)
+          return false if runtime && (!runtime.env.empty? || !runtime.unset_env.empty? || runtime.base_url || runtime.api_provider)
+        end
+        effective_executor.is_a?(CommandExecutor) && !effective_executor.is_a?(DockerCommandExecutor)
+      end
+      def effective_check_timeout(provider_name, base_timeout)
+        registry = Providers::Registry.instance
+        return base_timeout unless registry.registered?(provider_name)
+        contract = registry.smoke_test_contract(provider_name)
+        contract_timeout = contract&.dig(:timeout)
+        return base_timeout unless contract_timeout.is_a?(Numeric) && contract_timeout.positive?
+        [base_timeout, contract_timeout].max
+      end
+      def normalize_smoke_error_category(category, message)
+        normalized = if installation_failure_message?(message)
+          :installation
+        else
+          category || ErrorTaxonomy.classify_message(message)
+        end
+        case normalized&.to_sym
+        when :installation
+          :installation
+        when :auth_expired, :authentication
+          :authentication
+        when :rate_limited, :rate_limit
+          :rate_limit
+        when :quota_exceeded, :quota
+          :quota
+        when :timeout
+          :timeout
+        when :transient
+          :transient
+        when :sandbox_failure, :configuration, :permanent
+          :configuration
+        else
+          :unknown
+        end
+      end
+      def installation_failure_message?(message)
+        message.to_s.match?(/(not found in PATH|command not found|No such file or directory|is not installed)/i)
+      end
       def provider_overrides_method?(provider_instance, method_name)
         provider_instance.method(method_name).owner != Providers::Adapter
       end
-      def build_result(name:, status:, message:, start_time:)
+      def build_result(name:, status:, message:, start_time:, error_category: nil, check: nil)
         latency = ((monotonic_now - start_time) * 1000).round
         {
           name: name,
           status: status,
           message: message,
-          latency_ms: latency
+          latency_ms: latency,
+          error_category: error_category,
+          check: check
         }
       end
-      def build_provider(provider_name, klass)
+      def build_provider(provider_name, klass, executor:)
         config = AgentHarness.configuration.providers[provider_name]
         klass.new(
           config: config,
-          executor: AgentHarness.configuration.command_executor,
+          executor: executor || AgentHarness.configuration.command_executor,
           logger: AgentHarness.logger
         )
       end

data/lib/agent_harness/provider_runtime.rb CHANGED Viewed

@@ -25,15 +25,16 @@ module AgentHarness
   #     }
   #   )
   class ProviderRuntime
-    attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata
+    attr_reader :model, :base_url, :api_provider, :env, :flags, :metadata, :unset_env
     # @param model [String, nil] model identifier override
     # @param base_url [String, nil] upstream API base URL override
     # @param api_provider [String, nil] API-compatible backend name
     # @param env [Hash<String,String>] extra environment variables for the subprocess
     # @param flags [Array<String>] extra CLI flags to append
+    # @param unset_env [Array<String>] environment variable names to remove from inherited env
     # @param metadata [Hash] arbitrary provider-specific data
-    def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], metadata: {})
+    def initialize(model: nil, base_url: nil, api_provider: nil, env: {}, flags: [], unset_env: [], metadata: {})
       @model = model
       @base_url = base_url
       @api_provider = api_provider
@@ -70,6 +71,21 @@ module AgentHarness
       end
       @metadata = metadata_hash.dup.freeze
+      # Unset environment variables for the request. These are variable names that
+      # should be removed from the inherited environment before the provider
+      # command runs.
+      unset_array = unset_env || []
+      unless unset_array.is_a?(Array)
+        raise ArgumentError, "unset_env must be an Array (got #{unset_array.class})"
+      end
+      normalized_unset_env = unset_array.map.with_index do |key, index|
+        key.to_s
+      rescue NoMethodError
+        raise ArgumentError,
+          "unset_env must contain values convertible to String; invalid element at index #{index}: #{key.inspect} (#{key.class})"
+      end
+      @unset_env = normalized_unset_env.freeze
       freeze
     end
@@ -86,6 +102,7 @@ module AgentHarness
         api_provider: hash[:api_provider] || hash["api_provider"],
         env: hash[:env] || hash["env"] || {},
         flags: hash[:flags] || hash["flags"] || [],
+        unset_env: hash[:unset_env] || hash["unset_env"] || [],
         metadata: hash[:metadata] || hash["metadata"] || {}
       )
     end
@@ -109,7 +126,7 @@ module AgentHarness
     # @return [Boolean]
     def empty?
       model.nil? && base_url.nil? && api_provider.nil? &&
-        env.empty? && flags.empty? && metadata.empty?
+        env.empty? && flags.empty? && metadata.empty? && unset_env.empty?
     end
   end
 end

data/lib/agent_harness/providers/adapter.rb CHANGED Viewed

@@ -43,6 +43,17 @@ module AgentHarness
           raise NotImplementedError, "#{self} must implement .binary_name"
         end
+        # Installation contract for the provider CLI.
+        #
+        # Downstream applications can use this metadata to install a provider's
+        # supported CLI without hardcoding package names, install flags, or
+        # version pins outside AgentHarness.
+        #
+        # @return [Hash, nil] installation metadata or nil when not provided
+        def install_contract(version: nil)
+          nil
+        end
         # Required domains for firewall configuration
         #
         # @return [Hash] with :domains and :ip_ranges arrays
@@ -63,6 +74,49 @@ module AgentHarness
         def discover_models
           []
         end
+        # Installation contract for this provider's CLI.
+        #
+        # Downstream apps can use this metadata to provision the provider CLI
+        # without hardcoding package names, versions, or binary expectations
+        # outside agent-harness.
+        #
+        # @return [Hash, nil] install metadata, or nil when no first-class
+        #   installation contract is defined for the provider
+        def installation_contract(**options)
+          return install_contract unless options.key?(:version)
+          install_contract(version: options[:version])
+        end
+        # Build the install command from the provider installation contract.
+        #
+        # @param version [String, nil] optional explicit version override
+        # @return [Array<String>, nil] install command argv or nil when the
+        #   provider has no install contract
+        def install_command(version: nil)
+          contract = installation_contract
+          return nil unless contract
+          return contract[:install_command] unless version
+          package_name = contract[:package_name]
+          unless package_name
+            raise ArgumentError, "installation_contract must define :package_name when overriding version"
+          end
+          Array(contract[:install_command_prefix]) + ["#{package_name}@#{version}"]
+        end
+        # Canonical smoke-test contract for this provider.
+        #
+        # CLI-backed providers should expose a minimal real-execution prompt so
+        # downstream apps can reuse a stable provider-owned health check.
+        #
+        # @return [Hash, nil] smoke-test metadata or nil when not provided
+        def smoke_test_contract
+          nil
+        end
       end
       # Instance methods
@@ -240,6 +294,71 @@ module AgentHarness
         {healthy: true, message: "OK"}
       end
+      # Canonical smoke-test contract for this provider instance.
+      #
+      # @return [Hash, nil] smoke-test metadata
+      def smoke_test_contract
+        self.class.smoke_test_contract if self.class.respond_to?(:smoke_test_contract)
+      end
+      # Execute a minimal provider-owned smoke test via the configured executor.
+      #
+      # @param timeout [Integer, nil] timeout override in seconds
+      # @param provider_runtime [ProviderRuntime, Hash, nil] runtime overrides
+      # @return [Hash] normalized smoke-test result
+      def smoke_test(timeout: nil, provider_runtime: nil)
+        contract = smoke_test_contract
+        raise NotImplementedError, "#{self.class} does not implement #smoke_test_contract" unless contract
+        prompt = contract[:prompt]
+        if !prompt.is_a?(String) || prompt.strip.empty?
+          raise ConfigurationError, "#{self.class}.smoke_test_contract must define a non-empty :prompt"
+        end
+        response = send_message(
+          prompt: prompt,
+          timeout: timeout || contract[:timeout],
+          provider_runtime: provider_runtime
+        )
+        output = response.output.to_s.strip
+        expected_output = contract[:expected_output]&.strip
+        success = response.success? && (!contract.fetch(:require_output, true) || !output.empty?)
+        success &&= expected_output.nil? || output == expected_output
+        if success
+          return {
+            ok: true,
+            status: "ok",
+            message: contract[:success_message] || "Smoke test passed",
+            error_category: nil,
+            output: output,
+            exit_code: response.exit_code
+          }
+        end
+        message = response.error.to_s.strip
+        message = output if message.empty?
+        message = "Smoke test failed with exit code #{response.exit_code}" if message.empty?
+        {
+          ok: false,
+          status: "error",
+          message: message,
+          error_category: classify_smoke_test_message(message),
+          output: output,
+          exit_code: response.exit_code
+        }
+      rescue TimeoutError => e
+        failure_smoke_test_result(e.message, :timeout)
+      rescue AuthenticationError => e
+        failure_smoke_test_result(e.message, :auth_expired)
+      rescue RateLimitError => e
+        failure_smoke_test_result(e.message, :rate_limited)
+      rescue ProviderError => e
+        failure_smoke_test_result(e.message, classify_smoke_test_message(e.message))
+      end
       # Execution semantics for this provider
       #
       # Returns a hash describing provider-specific execution behavior so
@@ -271,6 +390,23 @@ module AgentHarness
       def parse_rate_limit_reset(output)
         nil
       end
+      private
+      def classify_smoke_test_message(message)
+        ErrorTaxonomy.classify(StandardError.new(message.to_s), error_patterns)
+      end
+      def failure_smoke_test_result(message, error_category)
+        {
+          ok: false,
+          status: "error",
+          message: message,
+          error_category: error_category,
+          output: nil,
+          exit_code: nil
+        }
+      end
     end
   end
 end

data/lib/agent_harness/providers/aider.rb CHANGED Viewed

@@ -49,6 +49,10 @@ module AgentHarness
             {name: "claude-3-5-sonnet", family: "claude-3-5-sonnet", tier: "standard", provider: "aider"}
           ]
         end
+        def smoke_test_contract
+          Base::DEFAULT_SMOKE_TEST_CONTRACT
+        end
       end
       def name

data/lib/agent_harness/providers/anthropic.rb CHANGED Viewed

@@ -81,6 +81,10 @@ module AgentHarness
           MODEL_PATTERN.match?(family_name)
         end
+        def smoke_test_contract
+          Base::DEFAULT_SMOKE_TEST_CONTRACT
+        end
         private
         def parse_models_list(output)