ace-test-runner-e2e 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/e2e-runner/config.yml +70 -0
- data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
- data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
- data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
- data/CHANGELOG.md +1166 -0
- data/LICENSE +21 -0
- data/README.md +42 -0
- data/Rakefile +15 -0
- data/exe/ace-test-e2e +15 -0
- data/exe/ace-test-e2e-sh +67 -0
- data/exe/ace-test-e2e-suite +13 -0
- data/handbook/guides/e2e-testing.g.md +124 -0
- data/handbook/guides/scenario-yml-reference.g.md +182 -0
- data/handbook/guides/tc-authoring.g.md +131 -0
- data/handbook/skills/as-e2e-create/SKILL.md +30 -0
- data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
- data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
- data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
- data/handbook/skills/as-e2e-review/SKILL.md +35 -0
- data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
- data/handbook/skills/as-e2e-run/SKILL.md +48 -0
- data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
- data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
- data/handbook/templates/agent-experience-report.template.md +89 -0
- data/handbook/templates/metadata.template.yml +49 -0
- data/handbook/templates/scenario.yml.template.yml +60 -0
- data/handbook/templates/tc-file.template.md +45 -0
- data/handbook/templates/test-report.template.md +94 -0
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
- data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
- data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
- data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
- data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
- data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
- data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
- data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
- data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
- data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
- data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
- data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
- data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
- data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
- data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
- data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
- data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
- data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
- data/lib/ace/test/end_to_end_runner/version.rb +9 -0
- data/lib/ace/test/end_to_end_runner.rb +71 -0
- metadata +220 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ace
|
|
6
|
+
module Test
|
|
7
|
+
module EndToEndRunner
|
|
8
|
+
module Molecules
|
|
9
|
+
# Discovers E2E test scenario directories (TS-*/scenario.yml) in packages
|
|
10
|
+
#
|
|
11
|
+
# Finds test scenarios in the TS-format directory structure:
|
|
12
|
+
# {package}/test/e2e/TS-*/scenario.yml
|
|
13
|
+
#
|
|
14
|
+
# Note: This is a Molecule (not an Atom) because it performs filesystem
|
|
15
|
+
# I/O via Dir.glob.
|
|
16
|
+
class TestDiscoverer
|
|
17
|
+
TEST_DIR = "test/e2e"
|
|
18
|
+
SCENARIO_FILE = "scenario.yml"
|
|
19
|
+
SCENARIO_DIR_PATTERN = "TS-*"
|
|
20
|
+
|
|
21
|
+
# Find E2E test scenario files matching criteria
|
|
22
|
+
#
|
|
23
|
+
# @param package [String] Package name (e.g., "ace-lint")
|
|
24
|
+
# @param test_id [String, nil] Optional specific test ID (e.g., "TS-LINT-001")
|
|
25
|
+
# @param tags [Array<String>, String, nil] Scenario tags to include (OR semantics)
|
|
26
|
+
# @param exclude_tags [Array<String>, String, nil] Scenario tags to exclude (OR semantics)
|
|
27
|
+
# @param base_dir [String] Base directory to search from (default: current dir)
|
|
28
|
+
# @return [Array<String>] Sorted list of matching scenario.yml file paths
|
|
29
|
+
def find_tests(package:, test_id: nil, tags: nil, exclude_tags: nil, base_dir: Dir.pwd)
|
|
30
|
+
test_ids = test_id ? test_id.split(",").map(&:strip) : [nil]
|
|
31
|
+
scenario_files = test_ids
|
|
32
|
+
.flat_map { |id| Dir.glob(build_scenario_pattern(package, id, base_dir)) }
|
|
33
|
+
.uniq
|
|
34
|
+
.sort
|
|
35
|
+
|
|
36
|
+
return scenario_files if no_filters?(tags, exclude_tags)
|
|
37
|
+
|
|
38
|
+
loader = ScenarioLoader.new
|
|
39
|
+
scenarios = scenario_files.map do |yml_path|
|
|
40
|
+
loader.load(File.dirname(yml_path))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
filter_scenarios(
|
|
44
|
+
scenarios,
|
|
45
|
+
tags: normalize_tags(tags),
|
|
46
|
+
exclude_tags: normalize_tags(exclude_tags)
|
|
47
|
+
).map(&:file_path).sort
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Find TS-format scenario directories and load them as TestScenario models
|
|
51
|
+
#
|
|
52
|
+
# @param package [String] Package name
|
|
53
|
+
# @param test_id [String, nil] Optional test ID to filter
|
|
54
|
+
# @param tags [Array<String>, String, nil] Scenario tags to include (OR semantics)
|
|
55
|
+
# @param exclude_tags [Array<String>, String, nil] Scenario tags to exclude (OR semantics)
|
|
56
|
+
# @param base_dir [String] Base directory to search from
|
|
57
|
+
# @return [Array<Models::TestScenario>] Loaded scenario models with test_cases
|
|
58
|
+
def find_scenarios(package:, test_id: nil, tags: nil, exclude_tags: nil, base_dir: Dir.pwd)
|
|
59
|
+
test_dir = File.join(base_dir, package, TEST_DIR)
|
|
60
|
+
pattern = File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
|
|
61
|
+
scenario_files = Dir.glob(pattern).sort
|
|
62
|
+
|
|
63
|
+
loader = ScenarioLoader.new
|
|
64
|
+
scenarios = scenario_files.map do |yml_path|
|
|
65
|
+
scenario_dir = File.dirname(yml_path)
|
|
66
|
+
loader.load(scenario_dir)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
if test_id
|
|
70
|
+
scenarios = scenarios.select { |s| s.test_id == test_id }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
filter_scenarios(
|
|
74
|
+
scenarios,
|
|
75
|
+
tags: normalize_tags(tags),
|
|
76
|
+
exclude_tags: normalize_tags(exclude_tags)
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# List all packages that have E2E tests
|
|
81
|
+
#
|
|
82
|
+
# @param base_dir [String] Base directory to search from
|
|
83
|
+
# @return [Array<String>] Sorted list of package names
|
|
84
|
+
def list_packages(base_dir: Dir.pwd)
|
|
85
|
+
pattern = File.join(base_dir, "*/#{TEST_DIR}/#{SCENARIO_DIR_PATTERN}/#{SCENARIO_FILE}")
|
|
86
|
+
|
|
87
|
+
base = Pathname.new(base_dir)
|
|
88
|
+
|
|
89
|
+
Dir.glob(pattern)
|
|
90
|
+
.map { |f| Pathname.new(f).relative_path_from(base).each_filename.first }
|
|
91
|
+
.uniq
|
|
92
|
+
.sort
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
# Build glob pattern for finding TS-format scenario.yml files
|
|
98
|
+
def build_scenario_pattern(package, test_id, base_dir)
|
|
99
|
+
test_dir = File.join(base_dir, package, TEST_DIR)
|
|
100
|
+
|
|
101
|
+
if test_id
|
|
102
|
+
File.join(test_dir, "*#{test_id}*", SCENARIO_FILE)
|
|
103
|
+
else
|
|
104
|
+
File.join(test_dir, SCENARIO_DIR_PATTERN, SCENARIO_FILE)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def no_filters?(tags, exclude_tags)
|
|
109
|
+
normalize_tags(tags).empty? && normalize_tags(exclude_tags).empty?
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def normalize_tags(raw)
|
|
113
|
+
return [] if raw.nil?
|
|
114
|
+
|
|
115
|
+
values = raw.is_a?(Array) ? raw : raw.to_s.split(",")
|
|
116
|
+
values.map(&:to_s).map(&:strip).reject(&:empty?).map(&:downcase)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def filter_scenarios(scenarios, tags:, exclude_tags:)
|
|
120
|
+
filtered = scenarios
|
|
121
|
+
|
|
122
|
+
unless tags.empty?
|
|
123
|
+
filtered = filtered.select { |scenario| !(scenario.tags & tags).empty? }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
unless exclude_tags.empty?
|
|
127
|
+
filtered = filtered.reject { |scenario| !(scenario.tags & exclude_tags).empty? }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
filtered
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ace/llm"
|
|
4
|
+
require "ace/llm/query_interface"
|
|
5
|
+
|
|
6
|
+
module Ace
|
|
7
|
+
module Test
|
|
8
|
+
module EndToEndRunner
|
|
9
|
+
module Molecules
|
|
10
|
+
# Executes a single E2E test scenario via LLM
|
|
11
|
+
#
|
|
12
|
+
# Routes execution through two paths based on provider type:
|
|
13
|
+
# - CLI providers (claude, gemini, codex): deterministic standalone pipeline
|
|
14
|
+
# - API providers (google, anthropic): prompt-based prediction mode
|
|
15
|
+
class TestExecutor
|
|
16
|
+
# @param provider [String] LLM provider:model string
|
|
17
|
+
# @param timeout [Integer] Request timeout in seconds
|
|
18
|
+
# @param config [Hash] Configuration hash (string keys) from ConfigLoader
|
|
19
|
+
def initialize(provider: nil, timeout: nil, config: nil)
|
|
20
|
+
config ||= Molecules::ConfigLoader.load
|
|
21
|
+
@provider = provider || config.dig("execution", "provider") || "claude:sonnet"
|
|
22
|
+
@timeout = timeout || config.dig("execution", "timeout") || 300
|
|
23
|
+
@prompt_builder = Atoms::PromptBuilder.new
|
|
24
|
+
@cli_provider_adapter = Atoms::CliProviderAdapter.new(config)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Execute a single test scenario via LLM
|
|
28
|
+
#
|
|
29
|
+
# @param scenario [Models::TestScenario] The test scenario to execute
|
|
30
|
+
# @param cli_args [String, nil] Extra args for CLI providers
|
|
31
|
+
# @param run_id [String, nil] Pre-generated run ID for deterministic report paths
|
|
32
|
+
# @param test_cases [Array<String>, nil] Optional test case IDs to filter
|
|
33
|
+
# @param sandbox_path [String, nil] Path to pre-populated sandbox (skips LLM setup)
|
|
34
|
+
# @param env_vars [Hash, nil] Environment variables from setup execution
|
|
35
|
+
# @param report_dir [String, nil] Explicit report directory path (overrides computed path)
|
|
36
|
+
# @return [Models::TestResult] Test execution result
|
|
37
|
+
def execute(scenario, cli_args: nil, run_id: nil, test_cases: nil, sandbox_path: nil,
|
|
38
|
+
env_vars: nil, report_dir: nil, timeout: nil, verify: false)
|
|
39
|
+
resolved_timeout = timeout || @timeout
|
|
40
|
+
if Atoms::CliProviderAdapter.cli_provider?(@provider)
|
|
41
|
+
execute_via_pipeline(
|
|
42
|
+
scenario,
|
|
43
|
+
cli_args: cli_args,
|
|
44
|
+
run_id: run_id,
|
|
45
|
+
test_cases: test_cases,
|
|
46
|
+
sandbox_path: sandbox_path,
|
|
47
|
+
env_vars: env_vars,
|
|
48
|
+
report_dir: report_dir,
|
|
49
|
+
timeout: resolved_timeout
|
|
50
|
+
)
|
|
51
|
+
else
|
|
52
|
+
execute_via_prompt(scenario, cli_args: cli_args, test_cases: test_cases, timeout: resolved_timeout)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Execute a single test case via LLM in a pre-populated sandbox
|
|
57
|
+
#
|
|
58
|
+
# @param test_case [Models::TestCase] The single test case to execute
|
|
59
|
+
# @param sandbox_path [String] Path to the pre-populated sandbox
|
|
60
|
+
# @param scenario [Models::TestScenario] The parent scenario for metadata
|
|
61
|
+
# @param cli_args [String, nil] Extra args for CLI providers
|
|
62
|
+
# @param run_id [String, nil] Pre-generated run ID
|
|
63
|
+
# @param env_vars [Hash, nil] Environment variables from setup execution
|
|
64
|
+
# @return [Models::TestResult] Test execution result
|
|
65
|
+
def execute_tc(test_case:, sandbox_path:, scenario:, cli_args: nil, run_id: nil, env_vars: nil)
|
|
66
|
+
if Atoms::CliProviderAdapter.cli_provider?(@provider)
|
|
67
|
+
execute_tc_via_skill(test_case, sandbox_path, scenario, cli_args: cli_args, run_id: run_id, env_vars: env_vars)
|
|
68
|
+
else
|
|
69
|
+
execute_tc_via_prompt(test_case, sandbox_path, scenario, cli_args: cli_args)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
# Execute standalone scenarios with the deterministic pipeline.
|
|
76
|
+
def execute_via_pipeline(scenario, cli_args: nil, run_id: nil, test_cases: nil, sandbox_path: nil,
|
|
77
|
+
env_vars: nil, report_dir: nil, timeout: nil)
|
|
78
|
+
started_at = Time.now
|
|
79
|
+
resolved_report_dir = report_dir || default_report_dir_for(scenario, run_id)
|
|
80
|
+
resolved_sandbox_path = sandbox_path || resolve_sandbox_path(nil, resolved_report_dir)
|
|
81
|
+
|
|
82
|
+
if resolved_report_dir.nil? || resolved_sandbox_path.nil?
|
|
83
|
+
return Models::TestResult.new(
|
|
84
|
+
test_id: scenario.test_id,
|
|
85
|
+
status: "error",
|
|
86
|
+
summary: "Execution pipeline requires run_id/report_dir",
|
|
87
|
+
error: "Could not resolve deterministic sandbox/report paths",
|
|
88
|
+
started_at: started_at,
|
|
89
|
+
completed_at: Time.now
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
pipeline_executor(timeout: timeout).execute(
|
|
94
|
+
scenario: scenario,
|
|
95
|
+
cli_args: cli_args,
|
|
96
|
+
sandbox_path: resolved_sandbox_path,
|
|
97
|
+
report_dir: resolved_report_dir,
|
|
98
|
+
env_vars: env_vars,
|
|
99
|
+
test_cases: test_cases
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def resolve_sandbox_path(sandbox_path, report_dir)
|
|
104
|
+
return sandbox_path if sandbox_path && !sandbox_path.empty?
|
|
105
|
+
return nil unless report_dir
|
|
106
|
+
|
|
107
|
+
report_dir.sub(/-reports\z/, "")
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def default_report_dir_for(scenario, run_id)
|
|
111
|
+
return nil unless run_id && !run_id.to_s.empty?
|
|
112
|
+
|
|
113
|
+
File.join(Dir.pwd, ".ace-local", "test-e2e", "#{scenario.dir_name(run_id)}-reports")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Execute via prompt for API providers (original behavior)
|
|
117
|
+
#
|
|
118
|
+
# @param scenario [Models::TestScenario] The test scenario
|
|
119
|
+
# @param cli_args [String, nil] Extra args
|
|
120
|
+
# @param test_cases [Array<String>, nil] Optional test case IDs to filter
|
|
121
|
+
# @return [Models::TestResult]
|
|
122
|
+
def execute_via_prompt(scenario, cli_args: nil, test_cases: nil, timeout: nil)
|
|
123
|
+
started_at = Time.now
|
|
124
|
+
|
|
125
|
+
prompt = @prompt_builder.build(scenario, test_cases: test_cases)
|
|
126
|
+
|
|
127
|
+
response = Ace::LLM::QueryInterface.query(
|
|
128
|
+
@provider,
|
|
129
|
+
prompt,
|
|
130
|
+
system: Atoms::PromptBuilder::SYSTEM_PROMPT,
|
|
131
|
+
cli_args: cli_args,
|
|
132
|
+
timeout: timeout || @timeout,
|
|
133
|
+
fallback: false
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
parsed = Atoms::ResultParser.parse(response[:text])
|
|
137
|
+
completed_at = Time.now
|
|
138
|
+
|
|
139
|
+
# Validate TC fidelity: ensure agent executed the expected test cases
|
|
140
|
+
fidelity = Atoms::TcFidelityValidator.validate(
|
|
141
|
+
parsed, scenario, filtered_tc_ids: test_cases
|
|
142
|
+
)
|
|
143
|
+
if fidelity
|
|
144
|
+
return Models::TestResult.new(
|
|
145
|
+
test_id: scenario.test_id,
|
|
146
|
+
status: "error",
|
|
147
|
+
test_cases: parsed[:test_cases],
|
|
148
|
+
summary: fidelity[:error],
|
|
149
|
+
error: fidelity[:error],
|
|
150
|
+
started_at: started_at,
|
|
151
|
+
completed_at: completed_at
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
Models::TestResult.new(
|
|
156
|
+
test_id: scenario.test_id,
|
|
157
|
+
status: parsed[:status],
|
|
158
|
+
test_cases: parsed[:test_cases],
|
|
159
|
+
summary: parsed[:summary],
|
|
160
|
+
started_at: started_at,
|
|
161
|
+
completed_at: completed_at
|
|
162
|
+
)
|
|
163
|
+
rescue Atoms::ResultParser::ParseError => e
|
|
164
|
+
Models::TestResult.new(
|
|
165
|
+
test_id: scenario.test_id,
|
|
166
|
+
status: "error",
|
|
167
|
+
summary: "Failed to parse LLM response",
|
|
168
|
+
error: e.message,
|
|
169
|
+
started_at: started_at,
|
|
170
|
+
completed_at: Time.now
|
|
171
|
+
)
|
|
172
|
+
rescue Ace::LLM::Error => e
|
|
173
|
+
Models::TestResult.new(
|
|
174
|
+
test_id: scenario.test_id,
|
|
175
|
+
status: "error",
|
|
176
|
+
summary: "LLM execution failed",
|
|
177
|
+
error: e.message,
|
|
178
|
+
started_at: started_at || Time.now,
|
|
179
|
+
completed_at: Time.now
|
|
180
|
+
)
|
|
181
|
+
rescue => e
|
|
182
|
+
Models::TestResult.new(
|
|
183
|
+
test_id: scenario.test_id,
|
|
184
|
+
status: "error",
|
|
185
|
+
summary: "Unexpected execution error",
|
|
186
|
+
error: "#{e.class}: #{e.message}",
|
|
187
|
+
started_at: started_at || Time.now,
|
|
188
|
+
completed_at: Time.now
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Execute TC via skill invocation for CLI providers
|
|
193
|
+
def execute_tc_via_skill(test_case, sandbox_path, scenario, cli_args: nil, run_id: nil, env_vars: nil)
|
|
194
|
+
with_tc_error_handling(scenario) do |started_at|
|
|
195
|
+
prompt = @cli_provider_adapter.build_tc_skill_prompt(
|
|
196
|
+
test_case: test_case, scenario: scenario,
|
|
197
|
+
sandbox_path: sandbox_path, run_id: run_id, env_vars: env_vars
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
response = Ace::LLM::QueryInterface.query(
|
|
201
|
+
@provider, prompt,
|
|
202
|
+
system: nil, cli_args: cli_args,
|
|
203
|
+
timeout: @timeout, fallback: false,
|
|
204
|
+
working_dir: sandbox_path,
|
|
205
|
+
subprocess_env: env_vars
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
invocation_error = detect_skill_invocation_error(response[:text])
|
|
209
|
+
if invocation_error
|
|
210
|
+
return Models::TestResult.new(
|
|
211
|
+
test_id: scenario.test_id,
|
|
212
|
+
status: "error",
|
|
213
|
+
test_cases: [],
|
|
214
|
+
summary: "TC skill invocation failed before test execution",
|
|
215
|
+
error: invocation_error,
|
|
216
|
+
started_at: started_at,
|
|
217
|
+
completed_at: Time.now
|
|
218
|
+
)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
parsed = Atoms::SkillResultParser.parse_tc(response[:text])
|
|
222
|
+
completed_at = Time.now
|
|
223
|
+
|
|
224
|
+
Models::TestResult.new(
|
|
225
|
+
test_id: scenario.test_id,
|
|
226
|
+
status: parsed[:status],
|
|
227
|
+
test_cases: parsed[:test_cases],
|
|
228
|
+
summary: parsed[:summary],
|
|
229
|
+
started_at: started_at,
|
|
230
|
+
completed_at: completed_at
|
|
231
|
+
)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Execute TC via prompt for API providers
|
|
236
|
+
def execute_tc_via_prompt(test_case, sandbox_path, scenario, cli_args: nil)
|
|
237
|
+
with_tc_error_handling(scenario) do |started_at|
|
|
238
|
+
prompt = @prompt_builder.build_tc(
|
|
239
|
+
test_case: test_case, scenario: scenario, sandbox_path: sandbox_path
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
response = Ace::LLM::QueryInterface.query(
|
|
243
|
+
@provider, prompt,
|
|
244
|
+
system: Atoms::PromptBuilder::TC_SYSTEM_PROMPT,
|
|
245
|
+
cli_args: cli_args, timeout: @timeout, fallback: false
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
parsed = Atoms::ResultParser.parse_tc(response[:text])
|
|
249
|
+
completed_at = Time.now
|
|
250
|
+
|
|
251
|
+
Models::TestResult.new(
|
|
252
|
+
test_id: scenario.test_id,
|
|
253
|
+
status: parsed[:status],
|
|
254
|
+
test_cases: parsed[:test_cases],
|
|
255
|
+
summary: parsed[:summary],
|
|
256
|
+
started_at: started_at,
|
|
257
|
+
completed_at: completed_at
|
|
258
|
+
)
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Shared error handling for TC execution methods
|
|
263
|
+
def with_tc_error_handling(scenario)
|
|
264
|
+
started_at = Time.now
|
|
265
|
+
yield started_at
|
|
266
|
+
rescue Atoms::ResultParser::ParseError => e
|
|
267
|
+
Models::TestResult.new(
|
|
268
|
+
test_id: scenario.test_id, status: "error",
|
|
269
|
+
summary: "Failed to parse TC response",
|
|
270
|
+
error: e.message, started_at: started_at, completed_at: Time.now
|
|
271
|
+
)
|
|
272
|
+
rescue Ace::LLM::Error => e
|
|
273
|
+
Models::TestResult.new(
|
|
274
|
+
test_id: scenario.test_id, status: "error",
|
|
275
|
+
summary: "TC execution failed",
|
|
276
|
+
error: e.message, started_at: started_at, completed_at: Time.now
|
|
277
|
+
)
|
|
278
|
+
rescue => e
|
|
279
|
+
Models::TestResult.new(
|
|
280
|
+
test_id: scenario.test_id, status: "error",
|
|
281
|
+
summary: "Unexpected TC execution error",
|
|
282
|
+
error: "#{e.class}: #{e.message}",
|
|
283
|
+
started_at: started_at, completed_at: Time.now
|
|
284
|
+
)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Detect common failure modes where the agent did not execute the
|
|
288
|
+
# /as-e2e-run skill correctly.
|
|
289
|
+
#
|
|
290
|
+
# @param text [String] Raw LLM response text
|
|
291
|
+
# @return [String, nil] Error message when a known failure is detected
|
|
292
|
+
def detect_skill_invocation_error(text)
|
|
293
|
+
return nil if text.nil? || text.strip.empty?
|
|
294
|
+
|
|
295
|
+
checks = [
|
|
296
|
+
[/\/as-e2e-run.*command not found/i, "The slash command was executed in a shell instead of chat."],
|
|
297
|
+
[/exit code 127.*\/as-e2e-run|\/as-e2e-run.*exit code 127/im, "The slash command failed with shell exit code 127."],
|
|
298
|
+
[/No tests found for package/i, "The test command ran in the wrong context or with invalid arguments."],
|
|
299
|
+
[/\bace-test\s+e2e\b/i, "An invalid command (`ace-test e2e`) was attempted instead of `ace-test-e2e`."],
|
|
300
|
+
[/slash commands are unavailable/i, "The agent reported slash commands are unavailable in this environment."]
|
|
301
|
+
]
|
|
302
|
+
|
|
303
|
+
checks.each do |pattern, message|
|
|
304
|
+
next unless text.match?(pattern)
|
|
305
|
+
|
|
306
|
+
detail = extract_matching_line(text, pattern)
|
|
307
|
+
return "#{message} Detected output: #{detail}"
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
nil
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def extract_matching_line(text, pattern)
|
|
314
|
+
line = text.to_s.lines.find { |candidate| candidate.match?(pattern) }
|
|
315
|
+
return line.strip if line && !line.strip.empty?
|
|
316
|
+
|
|
317
|
+
text.to_s.strip.split(/\s+/).first(30).join(" ")
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def pipeline_executor(timeout: nil)
|
|
321
|
+
timeout ||= @timeout
|
|
322
|
+
@pipeline_executors ||= {}
|
|
323
|
+
@pipeline_executors[timeout] ||= Molecules::PipelineExecutor.new(
|
|
324
|
+
provider: @provider,
|
|
325
|
+
timeout: timeout
|
|
326
|
+
)
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
end
|