RubyGems - ace-test-runner-e2e - Versions diffs - 0.29.8 → 0.40.1 - Mend

ace-test-runner-e2e 0.29.8 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb CHANGED Viewed

@@ -6,16 +6,18 @@ module Ace
   module Test
     module EndToEndRunner
       module Molecules
-        # Discovers E2E test scenario directories (TS-*/scenario.yml) in packages
+        # Discovers deterministic preflight tests and agent E2E scenarios in packages
         #
         # Finds test scenarios in the TS-format directory structure:
+        #   {package}/test/feat/**/*_test.rb
         #   {package}/test/e2e/TS-*/scenario.yml
         #
         # Note: This is a Molecule (not an Atom) because it performs filesystem
         # I/O via Dir.glob.
         class TestDiscoverer
-          TEST_DIR = "test/e2e"
+          TEST_DIRS = ["test/e2e"].freeze
           SCENARIO_FILE = "scenario.yml"
+          DEFAULT_PREFLIGHT_GLOBS = ["test/feat/**/*_test.rb"].freeze
           SCENARIO_DIR_PATTERN = "TS-*"
           # Find E2E test scenario files matching criteria
@@ -47,6 +49,17 @@ module Ace
             ).map(&:file_path).sort
           end
+          # @return [Array<String>] Sorted list of matching deterministic preflight test files
+          def find_integration_tests(package:, base_dir: Dir.pwd)
+            package_path = File.join(base_dir, package)
+            preflight_globs.each do |glob|
+              files = Dir.glob(File.join(package_path, glob)).sort
+              return files unless files.empty?
+            end
+            []
+          end
           # Find TS-format scenario directories and load them as TestScenario models
           #
           # @param package [String] Package name
@@ -56,9 +69,11 @@ module Ace
           # @param base_dir [String] Base directory to search from
           # @return [Array<Models::TestScenario>] Loaded scenario models with test_cases
           def find_scenarios(package:, test_id: nil, tags: nil, exclude_tags: nil, base_dir: Dir.pwd)
-            test_dir = File.join(base_dir, package, TEST_DIR)
-            pattern = File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
-            scenario_files = Dir.glob(pattern).sort
+            patterns = TEST_DIRS.map do |test_dir_name|
+              test_dir = File.join(base_dir, package, test_dir_name)
+              File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
+            end
+            scenario_files = Dir.glob(patterns).sort
             loader = ScenarioLoader.new
             scenarios = scenario_files.map do |yml_path|
@@ -82,11 +97,13 @@ module Ace
           # @param base_dir [String] Base directory to search from
           # @return [Array<String>] Sorted list of package names
           def list_packages(base_dir: Dir.pwd)
-            pattern = File.join(base_dir, "*/#{TEST_DIR}/#{SCENARIO_DIR_PATTERN}/#{SCENARIO_FILE}")
+            patterns = TEST_DIRS.map do |test_dir_name|
+              File.join(base_dir, "*/#{test_dir_name}/#{SCENARIO_DIR_PATTERN}/#{SCENARIO_FILE}")
+            end
             base = Pathname.new(base_dir)
-            Dir.glob(pattern)
+            Dir.glob(patterns)
               .map { |f| Pathname.new(f).relative_path_from(base).each_filename.first }
               .uniq
               .sort
@@ -96,12 +113,14 @@ module Ace
           # Build glob pattern for finding TS-format scenario.yml files
           def build_scenario_pattern(package, test_id, base_dir)
-            test_dir = File.join(base_dir, package, TEST_DIR)
-            if test_id
-              File.join(test_dir, "*#{test_id}*", SCENARIO_FILE)
-            else
-              File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
+            TEST_DIRS.map do |test_dir_name|
+              test_dir = File.join(base_dir, package, test_dir_name)
+              if test_id
+                File.join(test_dir, "*#{test_id}*", SCENARIO_FILE)
+              else
+                File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
+              end
             end
           end
@@ -129,6 +148,12 @@ module Ace
             filtered
           end
+          def preflight_globs
+            configured = Molecules::ConfigLoader.load.dig("patterns", "preflight")
+            globs = [configured, *DEFAULT_PREFLIGHT_GLOBS].compact.uniq
+            globs.reject(&:empty?)
+          end
         end
       end
     end

data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb CHANGED Viewed

@@ -16,12 +16,18 @@ module Ace
           # @param provider [String] LLM provider:model string
           # @param timeout [Integer] Request timeout in seconds
           # @param config [Hash] Configuration hash (string keys) from ConfigLoader
-          def initialize(provider: nil, timeout: nil, config: nil)
+          def initialize(provider: nil, timeout: nil, config: nil, sandbox_backend_factory: nil)
             config ||= Molecules::ConfigLoader.load
-            @provider = provider || config.dig("execution", "provider") || "claude:sonnet"
+            @provider = provider || config.dig("execution", "runner_provider") ||
+              config.dig("execution", "provider") || "claude:sonnet"
+            @verifier_provider = config.dig("execution", "verifier_provider") ||
+              config.dig("execution", "provider") || @provider
             @timeout = timeout || config.dig("execution", "timeout") || 300
             @prompt_builder = Atoms::PromptBuilder.new
             @cli_provider_adapter = Atoms::CliProviderAdapter.new(config)
+            @sandbox_backend_factory = sandbox_backend_factory || lambda { |sandbox_path, source_root: nil|
+              Molecules::BwrapSandboxBackend.new(sandbox_root: sandbox_path, source_root: source_root)
+            }
           end
           # Execute a single test scenario via LLM
@@ -192,9 +198,10 @@ module Ace
           # Execute TC via skill invocation for CLI providers
           def execute_tc_via_skill(test_case, sandbox_path, scenario, cli_args: nil, run_id: nil, env_vars: nil)
             with_tc_error_handling(scenario) do |started_at|
+              sandbox_backend, prepared_env = prepared_env_for(sandbox_path, env_vars)
               prompt = @cli_provider_adapter.build_tc_skill_prompt(
                 test_case: test_case, scenario: scenario,
-                sandbox_path: sandbox_path, run_id: run_id, env_vars: env_vars
+                sandbox_path: sandbox_path, run_id: run_id, env_vars: prepared_env
               )
               response = Ace::LLM::QueryInterface.query(
@@ -202,7 +209,8 @@ module Ace
                 system: nil, cli_args: cli_args,
                 timeout: @timeout, fallback: false,
                 working_dir: sandbox_path,
-                subprocess_env: env_vars
+                subprocess_env: prepared_env,
+                subprocess_command_prefix: sandbox_backend.command_prefix(chdir: sandbox_path, env: prepared_env)
               )
               invocation_error = detect_skill_invocation_error(response[:text])
@@ -322,9 +330,23 @@ module Ace
             @pipeline_executors ||= {}
             @pipeline_executors[timeout] ||= Molecules::PipelineExecutor.new(
               provider: @provider,
-              timeout: timeout
+              verifier_provider: @verifier_provider,
+              timeout: timeout,
+              sandbox_backend_factory: @sandbox_backend_factory
             )
           end
+          def build_sandbox_backend(sandbox_path, env_vars)
+            @sandbox_backend_factory.call(
+              sandbox_path,
+              source_root: env_vars&.dig("ACE_E2E_SOURCE_ROOT") || env_vars&.dig(:ACE_E2E_SOURCE_ROOT)
+            )
+          end
+          def prepared_env_for(sandbox_path, env_vars)
+            sandbox_backend = build_sandbox_backend(sandbox_path, env_vars || {})
+            [sandbox_backend, sandbox_backend.prepared_env(env_vars || {})]
+          end
         end
       end
     end

data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 require "open3"
 require "fileutils"
 require "yaml"
+require "set"
 require "ace/b36ts"
 module Ace
@@ -30,7 +31,8 @@ module Ace
           # @param timestamp_generator Timestamp generator (injectable)
           def initialize(max_parallel: 4, base_dir: nil, discoverer: nil, affected_detector: nil,
             failure_finder: nil, output: $stdout, use_color: nil, progress: false,
-            suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil)
+            suite_report_writer: nil, scenario_loader: nil, timestamp_generator: nil,
+            runtime_builder: nil, shared_runtime_cache_root: nil)
             @max_parallel = max_parallel
             @base_dir = base_dir || Dir.pwd
             @discoverer = discoverer || Molecules::TestDiscoverer.new
@@ -43,6 +45,14 @@ module Ace
             @suite_report_writer = suite_report_writer || Molecules::SuiteReportWriter.new(config: config)
             @loader = scenario_loader || Molecules::ScenarioLoader.new
             @timestamp_generator = timestamp_generator || method(:default_timestamp)
+            @runtime_builder = runtime_builder || Molecules::SandboxRuntimeBuilder.new(
+              source_root: @base_dir,
+              ruby_version: config.dig("sandbox", "ruby_version") || Molecules::ConfigLoader.default_sandbox_ruby_version
+            )
+            @shared_runtime_cache_root = File.expand_path(
+              shared_runtime_cache_root || File.join(@base_dir, Molecules::SandboxRuntimeBuilder::DEFAULT_SHARED_RUNTIME_CACHE_ROOT)
+            )
+            @shared_runtime_root = nil
           end
           # Run E2E tests across all packages
@@ -57,6 +67,7 @@ module Ace
           # @option options [Integer] :timeout Timeout per test in seconds
           # @return [Hash] Summary of results
           def run(options = {})
+            pre_run_worktree = git_status_snapshot
             packages = @discoverer.list_packages(base_dir: @base_dir)
             if packages.empty?
@@ -122,6 +133,7 @@ module Ace
             total_tests = package_tests.values.flatten.size
             pkg_count = package_tests.keys.size
+            prepare_shared_runtime_cache if total_tests > 0
             # Pre-compute column widths for aligned output
             compute_column_widths(package_tests)
@@ -135,9 +147,9 @@ module Ace
             # Execute tests
             if options[:parallel]
-              run_parallel(package_tests, options)
+              run_parallel(package_tests, options, pre_run_worktree)
             else
-              run_sequential(package_tests, options)
+              run_sequential(package_tests, options, pre_run_worktree)
             end
           end
@@ -210,7 +222,7 @@ module Ace
           # @param package_tests [Hash] Package to tests mapping
           # @param options [Hash] Execution options
           # @return [Hash] Summary of results
-          def run_sequential(package_tests, options)
+          def run_sequential(package_tests, options, pre_run_worktree)
             results = {total: 0, passed: 0, failed: 0, errors: 0, total_cases: 0, passed_cases: 0, packages: {}}
             start_time = Time.now
@@ -265,7 +277,7 @@ module Ace
             done = true
             refresh_thread&.join
-            finalize_run(results, package_tests, start_time)
+            finalize_run(results, package_tests, start_time, pre_run_worktree)
           end
           # Run tests in parallel using subprocesses
@@ -273,7 +285,7 @@ module Ace
           # @param package_tests [Hash] Package to tests mapping
           # @param options [Hash] Execution options
           # @return [Hash] Summary of results
-          def run_parallel(package_tests, options)
+          def run_parallel(package_tests, options, pre_run_worktree)
             results = {total: 0, passed: 0, failed: 0, errors: 0, total_cases: 0, passed_cases: 0, packages: {}}
             queue = build_test_queue(package_tests)
             run_ids = generate_run_ids(queue.size)
@@ -297,7 +309,7 @@ module Ace
               check_running_processes(running, results)
             end
-            finalize_run(results, package_tests, start_time)
+            finalize_run(results, package_tests, start_time, pre_run_worktree)
           end
           # Build a flat queue of test items
@@ -328,7 +340,7 @@ module Ace
             cmd_array = build_test_command(package, test_file, options, run_id: run_id)
             # Spawn process with array form (no shell invocation)
-            stdin, stdout, stderr, thread = Open3.popen3(*cmd_array, chdir: @base_dir)
+            stdin, stdout, stderr, thread = Open3.popen3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
             {pid: thread.pid, thread: thread, stdout: stdout, stderr: stderr,
              stdin: stdin, package: package, test_file: test_file, output: String.new}
@@ -393,6 +405,18 @@ module Ace
             File.executable?(local) ? local : "ace-test-e2e"
           end
+          def prepare_shared_runtime_cache
+            @shared_runtime_root = @runtime_builder.prepare_shared_runtime(
+              cache_root: @shared_runtime_cache_root
+            )
+          end
+          def suite_subprocess_env
+            return {} if @shared_runtime_root.to_s.empty?
+            {Molecules::SandboxRuntimeBuilder::SHARED_RUNTIME_ENV_KEY => @shared_runtime_root}
+          end
           # Extract test ID from file path
           #
           # @param test_file [String] Path to scenario.yml file
@@ -497,6 +521,7 @@ module Ace
           # @return [Hash] Parsed result with :passed_cases and :total_cases
           def parse_subprocess_result(process)
             result = parse_test_output(process[:output], process[:thread].value.exitstatus, extract_test_name(process[:test_file]))
+            result[:report_dir] = normalize_report_dir(result[:report_dir], result[:test_name])
             result[:raw_output] = process[:output]
             # For non-pass results, check agent-written metadata as authoritative source
@@ -510,6 +535,34 @@ module Ace
             {status: "error", error: "Failed to parse result: #{e.message}"}
           end
+          def normalize_report_dir(report_dir, test_name)
+            return report_dir if report_dir.nil? || report_dir.empty?
+            return report_dir if File.directory?(report_dir)
+            return report_dir unless File.file?(report_dir)
+            resolved = resolve_report_dir_from_suite_report(report_dir, canonical_test_id(test_name))
+            resolved || report_dir
+          rescue
+            report_dir
+          end
+          def resolve_report_dir_from_suite_report(report_path, test_id)
+            return nil unless report_path.end_with?(".md")
+            return nil if test_id.nil? || test_id.empty?
+            content = File.read(report_path)
+            escaped = Regexp.escape(test_id)
+            table_match = content.match(/^\|\s*#{escaped}\s*\|\s*`([^`]+)`\s*\|$/m)
+            return nil unless table_match
+            File.expand_path(table_match[1], File.dirname(report_path))
+          end
+          def canonical_test_id(test_name)
+            match = test_name.to_s.match(/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i)
+            match ? match[1].upcase : test_name
+          end
           # Override result from agent-written metadata.yml when subprocess exit code is misleading
           #
           # @param result [Hash] Parsed result with :report_dir
@@ -576,7 +629,9 @@ module Ace
               error_msg ||= "Test execution returned ERROR status"
               base.merge(status: "error", error: error_msg)
             else
-              summary = output.match(/(\d+)\/(\d+) passed/)&.captures&.join("/") || "Test failed"
+              summary = output.lines.filter_map { |line| line[/^(Preflight failed: .+?)\s*$/, 1] }.last
+              summary ||= output.match(/(\d+)\/(\d+) passed/)&.captures&.join("/")
+              summary ||= "Test failed"
               base.merge(status: "fail", summary: summary)
             end
           rescue => e
@@ -589,8 +644,9 @@ module Ace
           # @param package_tests [Hash] Package to test files mapping
           # @param start_time [Time] When the run started
           # @return [Hash] Results with optional :report_path
-          def finalize_run(results, package_tests, start_time)
+          def finalize_run(results, package_tests, start_time, pre_run_worktree)
             write_failure_stubs(results, package_tests)
+            results[:suite_diagnostics] = build_suite_diagnostics(pre_run_worktree)
             @display.show_summary(results, Time.now - start_time)
             warn_on_lingering_claude_processes
@@ -641,6 +697,7 @@ module Ace
                   "status" => result[:status]
                 }
                 File.write(File.join(stub_dir, "metadata.yml"), YAML.dump(stub_data))
+                result[:report_dir] = stub_dir
                 if result[:raw_output] && !result[:raw_output].empty?
                   File.write(File.join(stub_dir, "subprocess_output.log"), result[:raw_output])
@@ -709,7 +766,9 @@ module Ace
               all_results, all_scenarios,
               package: "suite",
               timestamp: timestamp,
-              base_dir: @base_dir
+              base_dir: @base_dir,
+              report_kind: :suite,
+              diagnostics: results[:suite_diagnostics]
             )
           rescue => e
             warn "Warning: Suite report generation failed (#{e.class}: #{e.message})"
@@ -726,19 +785,40 @@ module Ace
             total = result_hash[:total_cases] || 0
             failed = [total - passed, 0].max
-            test_cases = []
-            passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass"} }
-            failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail"} }
             Models::TestResult.new(
               test_id: result_hash[:test_name] || "unknown",
               status: result_hash[:status] || "error",
-              test_cases: test_cases,
+              test_cases: [],
               summary: result_hash[:summary] || result_hash[:error] || "",
-              report_dir: result_hash[:report_dir]
+              report_dir: result_hash[:report_dir],
+              metadata: {"tcs-passed" => passed, "tcs-total" => total, "tcs-failed" => failed}
             )
           end
+          def git_status_snapshot
+            stdout, _stderr, status = Open3.capture3("git", "status", "--short", chdir: @base_dir)
+            return nil unless status.success?
+            stdout.lines.map(&:rstrip)
+          rescue
+            nil
+          end
+          def build_suite_diagnostics(pre_run_worktree)
+            post_run_worktree = git_status_snapshot
+            return {} unless pre_run_worktree && post_run_worktree
+            before = pre_run_worktree.to_set
+            new_entries = post_run_worktree.reject { |line| before.include?(line) }
+            new_tracked_entries = new_entries.reject { |line| line.start_with?("?? ") }
+            return {} if new_tracked_entries.empty?
+            {
+              dirty_worktree: true,
+              new_tracked_entries: new_tracked_entries
+            }
+          end
           # Load a scenario from file into a Models::TestScenario, with fallback
           #
           # @param package [String] Package name
@@ -806,7 +886,7 @@ module Ace
           # @return [Hash] Test result
           def run_single_test(package, test_file, options, run_id: nil)
             cmd_array = build_test_command(package, test_file, options, run_id: run_id)
-            output, stderr, status = Open3.capture3(*cmd_array, chdir: @base_dir)
+            output, stderr, status = Open3.capture3(suite_subprocess_env, *cmd_array, chdir: @base_dir)
             # Combine stdout and stderr for parsing
             combined_output = output + stderr