RubyGems - ace-test-runner-e2e - Versions diffs - 0.29.8 → 0.40.1 - Mend

ace-test-runner-e2e 0.29.8 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module Ace
           #
           # Resolves role: references to their concrete provider before checking.
           #
-          # @param provider_string [String] Provider:model string (e.g., "claude:sonnet", "role:e2e-executor")
+          # @param provider_string [String] Provider:model string (e.g., "claude:sonnet", "role:e2e-runner")
           # @return [Boolean]
           def cli_provider?(provider_string)
             resolved = resolve_provider_name(provider_string)
@@ -44,9 +44,9 @@ module Ace
           def build_execution_prompt(command:, tc_mode:)
             return_contract = if tc_mode
-              "- **Test ID**: ...\n- **TC ID**: ...\n- **Status**: pass | fail\n- **Report Paths**: ...\n- **Issues**: ..."
+              "- **Test ID**: ...\n- **TC ID**: ...\n- **Status**: pass | fail\n- **Report Paths**: ...\n- **Observations**: ...\n- **Issues**: ... (optional legacy alias)"
             else
-              "- **Test ID**: ...\n- **Status**: pass | fail | partial\n- **Passed**: ...\n- **Failed**: ...\n- **Total**: ...\n- **Report Paths**: ...\n- **Issues**: ..."
+              "- **Test ID**: ...\n- **Status**: pass | fail | partial\n- **Passed**: ...\n- **Failed**: ...\n- **Total**: ...\n- **Report Paths**: ...\n- **Observations**: ...\n- **Issues**: ... (optional legacy alias)"
             end
             <<~PROMPT.strip
@@ -55,8 +55,9 @@ module Ace
               Execution requirements:
               - Do not run `/ace-...` inside a shell command.
-              - If slash commands are unavailable, stop and report that limitation in `Issues`.
+              - If slash commands are unavailable, stop and report that limitation in `Observations`.
               - Write reports under `.ace-local/test-e2e/*-reports/`.
+              - `Observations` is required and must be a concise factual summary of actions, outcomes, and blockers without verdict language.
               - Return only this structured summary:
               #{return_contract}
             PROMPT
@@ -122,6 +123,7 @@ module Ace
               Verification requirements:
               - Inspect sandbox artifacts and scenario files directly.
+              - Judge from sandbox state first, then runner observations, then raw debug captures only when needed.
               - Evaluate each test case using `TC-*.verify.md` criteria when present.
               - Classify each failed test case with one category:
                 `test-spec-error`, `tool-bug`, `runner-error`, or `infrastructure-error`.
@@ -145,7 +147,7 @@ module Ace
           # Resolve the bare provider name from a provider string.
           # For role: references, resolves via ProviderModelParser to find the
-          # concrete provider (e.g. "role:e2e-executor" → "claude").
+          # concrete provider (e.g. "role:e2e-runner" → "claude").
           def resolve_provider_name(provider_string)
             name = self.class.provider_name(provider_string)
             return name unless name == "role"

data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb CHANGED Viewed

@@ -13,6 +13,7 @@ module Ace
         #   - **Failed**: 0
         #   - **Total**: 8
         #   - **Report Paths**: 8p5jo2-lint-ts001-reports/*
+        #   - **Observations**: None
         #   - **Issues**: None
         #
         # Falls back to ResultParser.parse() for JSON responses.
@@ -45,6 +46,7 @@ module Ace
             fields[:failed] = extract_field(text, "Failed")
             fields[:total] = extract_field(text, "Total")
             fields[:report_paths] = extract_field(text, "Report Paths")
+            fields[:observations] = extract_field(text, "Observations")
             fields[:issues] = extract_field(text, "Issues")
             # Need at least test_id and status for a valid parse
@@ -69,8 +71,7 @@ module Ace
             passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
             failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: ""} }
-            issues = parsed[:issues]
-            observations = (issues && issues.downcase != "none") ? issues : ""
+            observations = normalize_observations(parsed[:observations], parsed[:issues])
             {
               test_id: parsed[:test_id],
@@ -131,8 +132,8 @@ module Ace
             fields[:failed_tcs] = extract_field(text, "Failed TCs")
             fields[:issues] = extract_field(text, "Issues")
-            return parse(text) unless fields[:test_id] && fields[:status] &&
-              fields[:tcs_passed] && fields[:tcs_failed] && fields[:tcs_total]
+            return parse_minimal_verifier(text) unless fields[:test_id] && fields[:status]
+            return parse(text) unless fields[:tcs_passed] && fields[:tcs_failed] && fields[:tcs_total]
             passed = fields[:tcs_passed].to_i
             failed = fields[:tcs_failed].to_i
@@ -180,6 +181,58 @@ module Ace
             }
           end
+          def self.parse_minimal_verifier(text)
+            compact = text.to_s.strip
+            results_match = compact.match(/Results:\s*(\d+)\s*\/\s*(\d+)\s*passed/i)
+            if results_match
+              passed = results_match[1].to_i
+              total = results_match[2].to_i
+              status = if total.zero?
+                "fail"
+              elsif passed == total
+                "pass"
+              elsif passed.zero?
+                "fail"
+              else
+                "partial"
+              end
+              failed = [total - passed, 0].max
+              test_cases = []
+              passed.times { |i| test_cases << {id: "TC-#{format("%03d", i + 1)}", description: "", status: "pass", actual: "", notes: ""} }
+              failed.times { |i| test_cases << {id: "TC-#{format("%03d", passed + i + 1)}", description: "", status: "fail", actual: "", notes: "", category: "unknown"} }
+              return {
+                test_id: "",
+                status: status,
+                test_cases: test_cases,
+                summary: "#{passed}/#{total} passed",
+                observations: compact
+              }
+            end
+            status_match = compact.match(/\b(PASS|FAIL|PARTIAL|ERROR)\b/i)
+            return parse(text) unless status_match
+            status = normalize_status(status_match[1])
+            evidence = compact.sub(/^.*?\b#{Regexp.escape(status_match[1])}\b[:\-\s]*/i, "").strip
+            tc_status = (status == "pass") ? "pass" : "fail"
+            {
+              test_id: "",
+              status: status,
+              test_cases: [{
+                id: "TC-001",
+                description: "",
+                status: tc_status,
+                actual: "",
+                notes: evidence,
+                category: ((tc_status == "fail") ? "unknown" : nil)
+              }],
+              summary: evidence.empty? ? status : evidence,
+              observations: evidence
+            }
+          end
           # Parse TC-level markdown return contract
           def self.parse_tc_markdown(text)
             fields = {}
@@ -188,6 +241,7 @@ module Ace
             fields[:tc_id] = extract_field(text, "TC ID")
             fields[:status] = extract_field(text, "Status")
             fields[:report_paths] = extract_field(text, "Report Paths")
+            fields[:observations] = extract_field(text, "Observations")
             fields[:issues] = extract_field(text, "Issues")
             # Need test_id, tc_id, and status for a valid TC parse
@@ -200,8 +254,7 @@ module Ace
           def self.to_tc_normalized(parsed)
             parsed[:status] = normalize_status(parsed[:status])
-            issues = parsed[:issues]
-            observations = (issues && issues.downcase != "none") ? issues : ""
+            observations = normalize_observations(parsed[:observations], parsed[:issues])
             {
               test_id: parsed[:test_id],
@@ -234,9 +287,22 @@ module Ace
             end
           end
+          def self.normalize_observations(primary, fallback = nil)
+            [primary, fallback].each do |value|
+              next if value.nil?
+              normalized = value.to_s.strip
+              next if normalized.empty? || normalized.casecmp("none").zero?
+              return normalized
+            end
+            ""
+          end
           private_class_method :parse_markdown, :to_normalized, :extract_field,
             :parse_tc_markdown, :to_tc_normalized, :normalize_status,
-            :parse_failed_tcs
+            :parse_failed_tcs, :parse_minimal_verifier, :normalize_observations
         end
       end
     end

data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require "ace/b36ts"
 require "ace/support/cli"
 require "stringio"
 require "ace/support/cli"
@@ -23,6 +24,7 @@ module Ace
               in the monorepo. Tests run sequentially by default or in parallel
               with --parallel flag. Use --affected to only test changed packages.
               Use --only-failures to re-run only previously failed scenarios.
+              Full unfiltered suite runs retry failed scenarios once by default.
               Optionally filter to specific packages with a comma-separated list.
               Output:
@@ -40,6 +42,8 @@ module Ace
               "--affected --parallel 8       # Parallel affected tests only",
               "--only-failures               # Re-run failed scenarios from cache",
               "--affected --only-failures    # Re-run failed scenarios in affected packages",
+              "--no-retry-failures-once      # Disable default retry for a full suite run",
+              "--prune-artifacts             # Remove stale .ace-local/test-e2e artifacts before running",
               "--tags smoke,happy-path       # Include scenarios by tag",
               "--exclude-tags deep           # Exclude scenarios by tag",
               "--cli-args dangerously-skip-permissions  # Pass args to provider"
@@ -50,6 +54,8 @@ module Ace
             option :affected, type: :boolean, desc: "Only test affected packages"
             option :only_failures, type: :boolean,
               desc: "Re-run only previously failed scenarios"
+            option :retry_failures_once, type: :boolean,
+              desc: "Retry failed scenarios once after a full unfiltered suite run"
             option :cli_args, type: :string,
               desc: "Extra args for CLI-based LLM providers"
             option :provider, type: :string, default: Molecules::ConfigLoader.default_provider,
@@ -61,6 +67,8 @@ module Ace
             option :progress, type: :boolean, desc: "Enable live animated display"
             option :verify, type: :boolean,
               desc: "Run independent verifier pass for each scenario"
+            option :prune_artifacts, type: :boolean,
+              desc: "Remove stale .ace-local/test-e2e artifacts before running (preserves suite reports and runtime-cache)"
             option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
             option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
             option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
@@ -69,21 +77,36 @@ module Ace
               options = coerce_types(options, parallel: :integer, timeout: :integer)
               parallel = options[:parallel]
-              affected = options[:affected]
-              only_failures = options[:only_failures]
+              affected = !!options[:affected]
+              only_failures = !!options[:only_failures]
+              prune_artifacts = !!options[:prune_artifacts]
               tags = parse_csv_list(options[:tags])
               exclude_tags = parse_csv_list(options[:exclude_tags])
+              if only_failures && prune_artifacts
+                raise Ace::Support::Cli::Error.new(
+                  "--prune-artifacts cannot be used with --only-failures"
+                )
+              end
+              retry_failures_once = resolve_retry_failures_once(
+                requested: options[:retry_failures_once],
+                packages: packages,
+                affected: affected,
+                only_failures: only_failures,
+                tags: tags,
+                exclude_tags: exclude_tags
+              )
               output = quiet?(options) ? StringIO.new : $stdout
               progress = options[:progress] && !quiet?(options)
+              prune_artifacts_if_requested(output: output, prune_artifacts: prune_artifacts, quiet: quiet?(options))
-              orchestrator = Organisms::SuiteOrchestrator.new(
+              orchestrator = build_orchestrator(
                 max_parallel: [parallel, 1].max,
                 output: output,
                 progress: progress
               )
-              results = orchestrator.run(
+              run_options = {
                 parallel: parallel > 0,
                 affected: affected,
                 only_failures: only_failures,
@@ -94,6 +117,13 @@ module Ace
                 tags: tags,
                 exclude_tags: exclude_tags,
                 verify: options[:verify]
+              }
+              results = run_suite_with_retry(
+                orchestrator,
+                run_options: run_options,
+                output: output,
+                retry_failures_once: retry_failures_once
               )
               if results[:total].zero?
@@ -110,13 +140,173 @@ module Ace
               if results[:failed] > 0 || results[:errors] > 0
                 failed_count = results[:failed] + results[:errors]
                 raise Ace::Support::Cli::Error.new(
-                  "#{failed_count} test(s) failed or errored"
+                  results[:retry_attempted] ? "#{failed_count} test(s) failed or errored after retry" : "#{failed_count} test(s) failed or errored"
                 )
               end
+              results
             end
             private
+            def build_orchestrator(max_parallel:, output:, progress:)
+              Organisms::SuiteOrchestrator.new(
+                max_parallel: max_parallel,
+                output: output,
+                progress: progress
+              )
+            end
+            def build_retry_report_writer
+              Molecules::SuiteReportWriter.new(config: Molecules::ConfigLoader.load)
+            end
+            def build_artifact_pruner
+              Molecules::ArtifactPruner.new
+            end
+            def prune_artifacts_if_requested(output:, prune_artifacts:, quiet:)
+              return unless prune_artifacts
+              result = build_artifact_pruner.prune(base_dir: Dir.pwd)
+              return if quiet
+              output.puts(
+                "Pruned #{result[:deleted_count]} artifact(s) from #{result[:root_display]} (preserved suite reports and runtime-cache)"
+              )
+            end
+            def run_suite_with_retry(orchestrator, run_options:, output:, retry_failures_once:)
+              initial_results = orchestrator.run(run_options)
+              annotated = annotate_results(
+                initial_results,
+                retry_attempted: false,
+                attempts: 1,
+                flaky_scenarios: [],
+                remaining_failures: failure_scenarios(initial_results),
+                initial_report_path: initial_results[:report_path],
+                retry_report_path: nil,
+                report_path: initial_results[:report_path]
+              )
+              return annotated unless retry_failures_once && suite_failed?(initial_results)
+              output.puts "Retrying failed scenarios once..."
+              retry_results = orchestrator.run(run_options.merge(only_failures: true))
+              if retry_results[:total].zero?
+                raise Ace::Support::Cli::Error.new(
+                  "Retry pass found no failed test scenarios from attempt 1; aborting instead of silently passing"
+                )
+              end
+              flaky_scenarios = recovered_flaky_scenarios(initial_results, retry_results)
+              remaining_failures = failure_scenarios(retry_results)
+              final_report_path = write_retry_summary_report(initial_results, retry_results)
+              output.puts "Final Report: #{final_report_path}" if final_report_path
+              if remaining_failures.empty?
+                output.puts "#{flaky_scenarios.length} scenario(s) recovered on retry and were marked flaky"
+              else
+                output.puts "#{remaining_failures.length} scenario(s) still failing after retry"
+              end
+              annotate_results(
+                retry_results,
+                retry_attempted: true,
+                attempts: 2,
+                flaky_scenarios: flaky_scenarios,
+                remaining_failures: remaining_failures,
+                initial_report_path: initial_results[:report_path],
+                retry_report_path: retry_results[:report_path],
+                report_path: final_report_path || retry_results[:report_path]
+              )
+            end
+            def write_retry_summary_report(initial_results, retry_results)
+              build_retry_report_writer.write_retry_summary(
+                initial_results: initial_results,
+                retry_results: retry_results,
+                timestamp: Ace::B36ts.encode(Time.now.utc, format: :"50ms"),
+                base_dir: Dir.pwd
+              )
+            rescue => e
+              warn "Warning: Failed to write retry summary report: #{e.message}" if ENV["DEBUG"]
+              nil
+            end
+            def annotate_results(results, **extra)
+              results.merge(extra)
+            end
+            def suite_failed?(results)
+              results[:failed].to_i > 0 || results[:errors].to_i > 0
+            end
+            def failure_scenarios(results)
+              scenario_result_index(results)
+                .values
+                .select { |result| result[:status] != "pass" }
+                .map { |result| result[:test_id] }
+                .sort
+            end
+            def recovered_flaky_scenarios(initial_results, retry_results)
+              initial_by_test = scenario_result_index(initial_results)
+              retry_by_test = scenario_result_index(retry_results)
+              initial_by_test.each_with_object([]) do |(test_id, initial), flaky|
+                next if initial[:status] == "pass"
+                retry_result = retry_by_test[test_id]
+                next unless retry_result && retry_result[:status] == "pass"
+                flaky << {
+                  "test_id" => test_id,
+                  "initial_status" => initial[:status],
+                  "retry_status" => retry_result[:status]
+                }
+              end.sort_by { |entry| entry["test_id"] }
+            end
+            def scenario_result_index(results)
+              results.fetch(:packages, {}).values.flatten.each_with_object({}) do |result, index|
+                test_name = result[:test_name] || result[:test_id] || ""
+                test_id = test_name[/\A(TS-[A-Z0-9]+-\d+[a-z]*)/i, 1]&.upcase || test_name
+                next if test_id.empty?
+                index[test_id] = {
+                  test_id: test_id,
+                  status: result[:status],
+                  summary: result[:summary],
+                  error: result[:error]
+                }
+              end
+            end
+            def resolve_retry_failures_once(requested:, packages:, affected:, only_failures:, tags:, exclude_tags:)
+              scoped = scoped_suite_run?(
+                packages: packages,
+                affected: affected,
+                only_failures: only_failures,
+                tags: tags,
+                exclude_tags: exclude_tags
+              )
+              if requested == true && scoped
+                raise Ace::Support::Cli::Error.new(
+                  "--retry-failures-once is only supported for full unfiltered suite runs"
+                )
+              end
+              return requested unless requested.nil?
+              !scoped
+            end
+            def scoped_suite_run?(packages:, affected:, only_failures:, tags:, exclude_tags:)
+              [packages, affected, only_failures].any? ||
+                !tags.empty? ||
+                !exclude_tags.empty?
+            end
             def parse_csv_list(raw)
               return [] if raw.nil? || raw.strip.empty?

data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb CHANGED Viewed

@@ -20,9 +20,9 @@ module Ace
             desc <<~DESC.strip
               Run E2E tests via LLM execution
-              Discovers and executes TS-* test scenarios in a package's test/e2e/ directory.
-              Tests are sent to an LLM provider which executes the test steps and returns
-              structured results.
+              Discovers and executes deterministic preflight tests from test/feat
+              before TS-* agent scenarios from test/e2e. Tests are sent to an LLM
+              provider which executes the scenario steps and returns structured results.
               Output:
                 Exit codes: 0 (all pass), 1 (any fail/error)
@@ -35,7 +35,8 @@ module Ace
               "ace-lint --provider gemini:flash  # Use specific provider",
               "ace-lint --provider glite     # Use API provider (predict mode)",
               "ace-lint --tags smoke         # Run only smoke-tagged scenarios",
-              "ace-lint TS-LINT-003 --dry-run  # Preview scenarios that would run"
+              "ace-lint --prune-artifacts    # Remove stale .ace-local/test-e2e artifacts before running",
+              "ace-lint TS-LINT-003 --dry-run  # Preview preflight and scenario phases"
             ]
             argument :package, required: true, desc: "Package name (e.g., ace-lint)"
@@ -55,11 +56,13 @@ module Ace
             option :report_dir, type: :string,
               desc: "Explicit report directory path (overrides computed path)"
             option :dry_run, type: :boolean,
-              desc: "Preview which scenarios would run without executing"
+              desc: "Preview which preflight tests and scenarios would run without executing"
             option :tags, type: :string,
               desc: "Comma-separated scenario tags to include"
             option :verify, type: :boolean,
               desc: "Run independent verifier pass after runner execution"
+            option :prune_artifacts, type: :boolean,
+              desc: "Remove stale .ace-local/test-e2e artifacts before running (preserves final reports and runtime-cache)"
             option :quiet, type: :boolean, aliases: %w[-q], desc: "Suppress non-essential output"
             option :verbose, type: :boolean, aliases: %w[-v], desc: "Show verbose output"
             option :debug, type: :boolean, aliases: %w[-d], desc: "Show debug output"
@@ -67,13 +70,22 @@ module Ace
             def call(package:, test_id: nil, **options)
               options = coerce_types(options, timeout: :integer, parallel: :integer)
               output = quiet?(options) ? StringIO.new : $stdout
+              prune_artifacts = !!options[:prune_artifacts]
+              if options[:dry_run] && prune_artifacts
+                raise Ace::Support::Cli::Error.new(
+                  "--prune-artifacts cannot be used with --dry-run"
+                )
+              end
+              prune_artifacts_if_requested(output: output, prune_artifacts: prune_artifacts, quiet: quiet?(options))
               # Handle dry-run mode
               if options[:dry_run]
                 return handle_dry_run(package, test_id, output, tags: parse_tags(options[:tags]))
               end
-              orchestrator = Organisms::TestOrchestrator.new(
+              orchestrator = build_orchestrator(
                 provider: options[:provider],
                 timeout: options[:timeout],
                 parallel: options[:parallel],
@@ -110,7 +122,31 @@ module Ace
             private
-            # Handle dry-run mode: preview which scenarios would run
+            def build_orchestrator(provider:, timeout:, parallel:, progress:)
+              Organisms::TestOrchestrator.new(
+                provider: provider,
+                timeout: timeout,
+                parallel: parallel,
+                progress: progress
+              )
+            end
+            def build_artifact_pruner
+              Molecules::ArtifactPruner.new
+            end
+            def prune_artifacts_if_requested(output:, prune_artifacts:, quiet:)
+              return unless prune_artifacts
+              result = build_artifact_pruner.prune(base_dir: Dir.pwd)
+              return if quiet
+              output.puts(
+                "Pruned #{result[:deleted_count]} artifact(s) from #{result[:root_display]} (preserved final reports and runtime-cache)"
+              )
+            end
+            # Handle dry-run mode: preview which preflight tests and scenarios would run
             #
             # @param package [String] Package name
             # @param test_id [String, nil] Test ID
@@ -125,15 +161,28 @@ module Ace
                 tags: tags,
                 base_dir: Dir.pwd
               )
-              if files.empty?
+              preflight_files = discoverer.find_integration_tests(package: package, base_dir: Dir.pwd)
+              if files.empty? && preflight_files.empty?
                 raise Ace::Support::Cli::Error.new(
                   "No tests found for package '#{package}'" +
                   (test_id ? " with ID '#{test_id}'" : "")
                 )
               end
-              output.puts "Dry run: preview of scenarios to execute"
+              output.puts "Dry run: preview of execution phases"
+              output.puts ""
+              output.puts "Phase 1: deterministic preflight"
+              if preflight_files.empty?
+                output.puts "  (none)"
+              else
+                preflight_files.each do |file|
+                  output.puts "  [preflight] #{file}"
+                end
+              end
               output.puts ""
+              output.puts "Phase 2: scenarios"
+              output.puts "  (none)" if files.empty?
+              output.puts "" unless files.empty?
               files.each do |file|
                 scenario = loader.load(File.dirname(file))

data/lib/ace/test/end_to_end_runner/models/test_case.rb CHANGED Viewed

@@ -9,7 +9,8 @@ module Ace
         # Contains parsed frontmatter metadata and the full markdown body
         # from an independent test case file within a scenario directory.
         class TestCase
-          attr_reader :tc_id, :title, :content, :file_path, :pending, :goal_format
+          attr_reader :tc_id, :title, :content, :file_path, :pending, :goal_format,
+            :declared_artifacts, :optional_artifacts
           # @param tc_id [String] Test case identifier (e.g., "TC-001")
           # @param title [String] Test case title from frontmatter
@@ -17,13 +18,18 @@ module Ace
           # @param file_path [String] Absolute path to the source test file
           # @param pending [String, nil] Pending reason (presence = pending, value = reason)
           # @param goal_format [String, nil] Test case source format ("standalone")
-          def initialize(tc_id:, title:, content:, file_path:, pending: nil, goal_format: nil)
+          # @param declared_artifacts [Array<String>] Required artifact paths under results/tc/*
+          # @param optional_artifacts [Array<String>] Optional artifact paths under results/tc/*
+          def initialize(tc_id:, title:, content:, file_path:, pending: nil, goal_format: nil,
+            declared_artifacts: [], optional_artifacts: [])
             @tc_id = tc_id
             @title = title
             @content = content
             @file_path = file_path
             @pending = pending
             @goal_format = goal_format
+            @declared_artifacts = declared_artifacts
+            @optional_artifacts = optional_artifacts
           end
           # Whether this test case is pending (should be skipped)

data/lib/ace/test/end_to_end_runner/models/test_result.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module Ace
         # from executing a test scenario via LLM.
         class TestResult
           attr_reader :test_id, :status, :test_cases, :summary,
-            :started_at, :completed_at, :report_dir, :error
+            :started_at, :completed_at, :report_dir, :error, :metadata, :observations
           # @param test_id [String] Test identifier
           # @param status [String] Overall status: "pass", "fail", "partial", "error"
@@ -20,8 +20,10 @@ module Ace
           # @param completed_at [Time] When execution completed
           # @param report_dir [String, nil] Path to the reports directory
           # @param error [String, nil] Error message if execution failed
+          # @param observations [String] Runner/verifier observations for report context
+          # @param metadata [Hash] Additional structured phase/report metadata
           def initialize(test_id:, status:, test_cases: [], summary: "",
-            started_at: nil, completed_at: nil, report_dir: nil, error: nil)
+            started_at: nil, completed_at: nil, report_dir: nil, error: nil, observations: "", metadata: {})
             @test_id = test_id
             @status = status
             @test_cases = test_cases
@@ -30,6 +32,8 @@ module Ace
             @completed_at = completed_at || Time.now
             @report_dir = report_dir
             @error = error
+            @observations = observations.to_s
+            @metadata = metadata
           end
           # Check if the test passed
@@ -94,7 +98,9 @@ module Ace
               started_at: started_at,
               completed_at: completed_at,
               report_dir: dir,
-              error: error
+              error: error,
+              observations: observations,
+              metadata: metadata
             )
           end