ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,259 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "yaml"
5
+
6
+ module Ace
7
+ module Test
8
+ module EndToEndRunner
9
+ module Molecules
10
+ # Writes E2E test reports to disk
11
+ #
12
+ # Generates summary, experience, and metadata reports following
13
+ # the standard report path contract.
14
+ class ReportWriter
15
+ # Write all reports for a test result
16
+ #
17
+ # @param result [Models::TestResult] The test result
18
+ # @param scenario [Models::TestScenario] The test scenario
19
+ # @param report_dir [String] Directory to write reports to
20
+ # @param test_case [Models::TestCase, nil] Optional single test case for TC-level reports
21
+ # @return [Hash] Paths to written report files
22
+ def write(result, scenario, report_dir:, test_case: nil)
23
+ FileUtils.mkdir_p(report_dir)
24
+
25
+ summary_path = write_summary(result, scenario, report_dir, test_case)
26
+ experience_path = write_experience(result, scenario, report_dir, test_case)
27
+ metadata_path = write_metadata(result, scenario, report_dir, test_case)
28
+
29
+ {
30
+ summary: summary_path,
31
+ experience: experience_path,
32
+ metadata: metadata_path
33
+ }
34
+ end
35
+
36
+ private
37
+
38
+ # Write summary report
39
+ # @return [String] Path to written file
40
+ def write_summary(result, scenario, report_dir, test_case = nil)
41
+ path = File.join(report_dir, "summary.r.md")
42
+
43
+ tc_rows = result.test_cases.map do |tc|
44
+ "| #{tc[:id]} | #{tc[:description]} | #{tc[:status].capitalize} |"
45
+ end.join("\n")
46
+
47
+ goal_criteria_sections = build_goal_criteria_sections(result.test_cases)
48
+ failed_entries = result.test_cases
49
+ .select { |tc| tc[:status] == "fail" }
50
+ .map do |tc|
51
+ {
52
+ "tc" => tc[:id],
53
+ "category" => tc[:category] || "runner-error",
54
+ "evidence" => tc[:notes].to_s
55
+ }
56
+ end
57
+ verdict = if result.failed_count.zero?
58
+ (result.status == "error") ? "fail" : "pass"
59
+ elsif result.passed_count.zero?
60
+ "fail"
61
+ else
62
+ "partial"
63
+ end
64
+ score = result.total_count.zero? ? 0.0 : (result.passed_count.to_f / result.total_count).round(3)
65
+
66
+ frontmatter_hash = {
67
+ "test-id" => result.test_id
68
+ }
69
+ if test_case
70
+ frontmatter_hash["tc-id"] = test_case.tc_id
71
+ frontmatter_hash["scenario-id"] = scenario.test_id
72
+ end
73
+ frontmatter_hash.merge!(
74
+ "package" => scenario.package,
75
+ "agent" => "ace-test-e2e",
76
+ "executed" => result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
77
+ "status" => result.status,
78
+ "tcs-passed" => result.passed_count,
79
+ "tcs-failed" => result.failed_count,
80
+ "tcs-total" => result.total_count,
81
+ "score" => score,
82
+ "verdict" => verdict,
83
+ "failed" => failed_entries
84
+ )
85
+ frontmatter_yaml = YAML.dump(frontmatter_hash).sub(/\A---\s*\n/, "").sub(/\.\.\.\s*\n\z/, "")
86
+
87
+ tc_info_rows = if test_case
88
+ "| TC ID | #{test_case.tc_id} |\n| TC Title | #{test_case.title} |\n"
89
+ else
90
+ ""
91
+ end
92
+
93
+ content = <<~REPORT
94
+ ---
95
+ #{frontmatter_yaml.rstrip}
96
+ ---
97
+
98
+ # E2E Test Report: #{result.test_id}
99
+
100
+ ## Test Information
101
+
102
+ | Field | Value |
103
+ |-------|-------|
104
+ | Test ID | #{result.test_id} |
105
+ #{tc_info_rows}| Title | #{scenario.title} |
106
+ | Package | #{scenario.package} |
107
+ | Agent | ace-test-e2e |
108
+ | Executed | #{result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ")} |
109
+ | Duration | #{result.duration_display} |
110
+
111
+ ## Results Summary
112
+
113
+ | Test Case | Description | Status |
114
+ |-----------|-------------|--------|
115
+ #{tc_rows}
116
+
117
+ ## Overall Status: #{result.status.upcase}
118
+
119
+ #{goal_criteria_sections}
120
+
121
+ #{result.summary}
122
+ #{"## Error\n\n#{result.error}" if result.error}
123
+ REPORT
124
+
125
+ File.write(path, content)
126
+ path
127
+ end
128
+
129
+ # Write experience report
130
+ # @return [String] Path to written file
131
+ def write_experience(result, scenario, report_dir, test_case = nil)
132
+ path = File.join(report_dir, "experience.r.md")
133
+
134
+ tc_title_suffix = test_case ? " / #{test_case.tc_id}" : ""
135
+
136
+ exp_frontmatter_lines = [
137
+ "test-id: #{result.test_id}"
138
+ ]
139
+ exp_frontmatter_lines << "tc-id: #{test_case.tc_id}" if test_case
140
+ exp_frontmatter_lines.concat([
141
+ "test-title: #{scenario.title}",
142
+ "package: #{scenario.package}",
143
+ "agent: ace-test-e2e",
144
+ "executed: #{result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ")}",
145
+ "status: #{(result.status == "pass") ? "complete" : "incomplete"}"
146
+ ])
147
+
148
+ content = <<~REPORT
149
+ ---
150
+ #{exp_frontmatter_lines.join("\n")}
151
+ ---
152
+
153
+ # Agent Experience Report: #{result.test_id}#{tc_title_suffix}
154
+
155
+ ## Summary
156
+
157
+ Executed via ace-test-e2e CLI using LLM provider.
158
+ #{(result.status == "pass") ? "No significant friction encountered." : "Test execution completed with issues noted below."}
159
+
160
+ ## Friction Points
161
+
162
+ ### Documentation Gaps
163
+
164
+ - Automated execution via LLM - no documentation gaps observed
165
+
166
+ ### Tool Behavior Issues
167
+
168
+ - #{result.error || "None observed"}
169
+
170
+ ## Positive Observations
171
+
172
+ - Automated test execution completed successfully via LLM
173
+ REPORT
174
+
175
+ File.write(path, content)
176
+ path
177
+ end
178
+
179
+ # Write metadata file
180
+ # @return [String] Path to written file
181
+ def write_metadata(result, scenario, report_dir, test_case = nil)
182
+ path = File.join(report_dir, "metadata.yml")
183
+
184
+ metadata = {
185
+ "run-id" => File.basename(report_dir).sub(/-reports\z/, ""),
186
+ "test-id" => result.test_id,
187
+ "package" => scenario.package,
188
+ "agent" => "ace-test-e2e",
189
+ "started" => result.started_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
190
+ "completed" => result.completed_at.utc.strftime("%Y-%m-%dT%H:%M:%SZ"),
191
+ "duration" => "#{result.duration.round(0)}s",
192
+ "status" => result.status,
193
+ "score" => (result.total_count.zero? ? 0.0 : (result.passed_count.to_f / result.total_count).round(3)),
194
+ "verdict" => (if result.status == "error"
195
+ "fail"
196
+ else
197
+ (if result.failed_count.zero?
198
+ "pass"
199
+ else
200
+ (result.passed_count.zero? ? "fail" : "partial")
201
+ end)
202
+ end),
203
+ "tcs-passed" => result.passed_count,
204
+ "tcs-failed" => result.failed_count,
205
+ "tcs-total" => result.total_count,
206
+ "results" => {
207
+ "passed" => result.passed_count,
208
+ "failed" => result.failed_count,
209
+ "total" => result.total_count
210
+ },
211
+ "failed" => result.test_cases
212
+ .select { |tc| tc[:status] == "fail" }
213
+ .map do |tc|
214
+ {
215
+ "tc" => tc[:id],
216
+ "category" => tc[:category] || "runner-error",
217
+ "evidence" => tc[:notes].to_s
218
+ }
219
+ end,
220
+ "failed_test_cases" => result.failed_test_case_ids
221
+ }
222
+
223
+ if test_case
224
+ metadata["scenario-id"] = scenario.test_id
225
+ metadata["tc-id"] = test_case.tc_id
226
+ end
227
+
228
+ File.write(path, YAML.dump(metadata))
229
+ path
230
+ end
231
+
232
+ def build_goal_criteria_sections(test_cases)
233
+ sections = test_cases.filter_map do |tc|
234
+ criteria = tc[:criteria]
235
+ next if criteria.nil? || criteria.empty?
236
+
237
+ rows = criteria.map do |criterion|
238
+ desc = criterion[:description].to_s.empty? ? criterion[:id] : criterion[:description]
239
+ "| #{desc} | #{criterion[:status].to_s.upcase} | #{criterion[:evidence]} |"
240
+ end.join("\n")
241
+
242
+ <<~SECTION
243
+ ### Goal Criteria: #{tc[:id]}
244
+
245
+ | Criterion | Status | Evidence |
246
+ |-----------|--------|----------|
247
+ #{rows}
248
+ SECTION
249
+ end
250
+
251
+ return "" if sections.empty?
252
+
253
+ "## Goal Evaluation\n\n#{sections.join("\n")}"
254
+ end
255
+ end
256
+ end
257
+ end
258
+ end
259
+ end
@@ -0,0 +1,254 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "yaml"
5
+
6
+ module Ace
7
+ module Test
8
+ module EndToEndRunner
9
+ module Molecules
10
+ # Loads a TS-format scenario directory into TestScenario + TestCase models.
11
+ #
12
+ # Supported test case format is standalone TC pairs only:
13
+ # - `TC-*.runner.md`
14
+ # - `TC-*.verify.md`
15
+ class ScenarioLoader
16
+ LEGACY_FIELDS = %w[mode execution-model].freeze
17
+
18
+ # Load a scenario directory
19
+ #
20
+ # @param scenario_dir [String] Path to the TS-* scenario directory
21
+ # @return [Models::TestScenario] Populated test scenario
22
+ # @raise [ArgumentError] If scenario.yml is missing, invalid, or has missing required fields
23
+ def load(scenario_dir)
24
+ yml_path = File.join(scenario_dir, "scenario.yml")
25
+ raise ArgumentError, "scenario.yml not found: #{yml_path}" unless File.exist?(yml_path)
26
+
27
+ frontmatter = parse_scenario_yml(yml_path)
28
+ validate_scenario!(frontmatter, yml_path)
29
+
30
+ test_cases = discover_test_cases(scenario_dir)
31
+ fixture_path = detect_fixture_path(scenario_dir)
32
+
33
+ Models::TestScenario.new(
34
+ test_id: frontmatter["test-id"],
35
+ title: frontmatter["title"],
36
+ area: frontmatter["area"],
37
+ package: frontmatter["package"] || infer_package(scenario_dir),
38
+ priority: frontmatter["priority"] || "medium",
39
+ duration: frontmatter["duration"] || "~5min",
40
+ timeout: parse_timeout(frontmatter["timeout"], yml_path),
41
+ requires: frontmatter["requires"] || {},
42
+ file_path: File.expand_path(yml_path),
43
+ content: File.read(yml_path),
44
+ setup_steps: frontmatter["setup"] || [],
45
+ dir_path: File.expand_path(scenario_dir),
46
+ fixture_path: fixture_path,
47
+ test_cases: test_cases,
48
+ tags: parse_tags(frontmatter["tags"]),
49
+ tool_under_test: frontmatter["tool-under-test"],
50
+ sandbox_layout: frontmatter["sandbox-layout"] || {}
51
+ )
52
+ end
53
+
54
+ private
55
+
56
+ # Parse an optional per-scenario timeout in seconds.
57
+ #
58
+ # @param raw_timeout [Object] Raw YAML timeout value
59
+ # @param source_path [String] Source file path for errors
60
+ # @return [Integer, nil] Timeout in seconds
61
+ # @raise [ArgumentError] If timeout is present and invalid
62
+ def parse_timeout(raw_timeout, source_path)
63
+ return nil if raw_timeout.nil?
64
+
65
+ value =
66
+ case raw_timeout
67
+ when Integer
68
+ raw_timeout
69
+ when Numeric
70
+ raw_timeout.to_i
71
+ when String
72
+ stripped = raw_timeout.strip
73
+ return nil if stripped.empty?
74
+ raise ArgumentError, "Invalid timeout in #{source_path}: #{raw_timeout.inspect}" unless stripped.match?(/\\A\\d+\\z/)
75
+ stripped.to_i
76
+ else
77
+ raise ArgumentError, "Invalid timeout in #{source_path}: #{raw_timeout.inspect}"
78
+ end
79
+
80
+ raise ArgumentError, "Invalid timeout in #{source_path}: must be greater than 0" if value <= 0
81
+ value
82
+ end
83
+
84
+ # Parse scenario.yml with safe YAML loading
85
+ #
86
+ # @param path [String] Path to scenario.yml
87
+ # @return [Hash] Parsed YAML frontmatter
88
+ # @raise [ArgumentError] If YAML is invalid or empty
89
+ def parse_scenario_yml(path)
90
+ content = File.read(path)
91
+ result = YAML.safe_load(content, permitted_classes: [Date])
92
+ raise ArgumentError, "Empty or invalid YAML in #{path}" if result.nil?
93
+
94
+ result
95
+ rescue Psych::SyntaxError => e
96
+ raise ArgumentError, "Invalid YAML in #{path}: #{e.message}"
97
+ end
98
+
99
+ # Validate required fields in scenario frontmatter
100
+ #
101
+ # @param frontmatter [Hash] Parsed scenario.yml
102
+ # @param path [String] File path for error messages
103
+ # @raise [ArgumentError] If required fields are missing
104
+ def validate_scenario!(frontmatter, path)
105
+ required = %w[test-id title area]
106
+ missing = required.reject { |field| frontmatter&.key?(field) }
107
+ unless missing.empty?
108
+ raise ArgumentError, "Missing required fields in #{path}: #{missing.join(", ")}"
109
+ end
110
+
111
+ legacy = LEGACY_FIELDS.select { |field| frontmatter.key?(field) }
112
+ return if legacy.empty?
113
+
114
+ raise ArgumentError,
115
+ "Legacy field(s) not supported in #{path}: #{legacy.join(", ")}. " \
116
+ "Remove these fields; standalone runner/verify scenarios are the only supported format."
117
+ end
118
+
119
+ # Discover and parse standalone TC files in the scenario directory.
120
+ #
121
+ # @param scenario_dir [String] Path to the scenario directory
122
+ # @return [Array<Models::TestCase>] Parsed test case models, sorted by TC ID
123
+ def discover_test_cases(scenario_dir)
124
+ runner_files = Dir.glob(File.join(scenario_dir, "TC-*.runner.md")).sort
125
+ verify_files = Dir.glob(File.join(scenario_dir, "TC-*.verify.md")).sort
126
+
127
+ if runner_files.empty? && verify_files.empty?
128
+ reject_inline_tc_files!(scenario_dir)
129
+ return []
130
+ end
131
+
132
+ validate_standalone_files!(scenario_dir, runner_files, verify_files)
133
+
134
+ runner_by_id = runner_files.to_h { |f| [extract_tc_id_from_standalone_name(f), f] }
135
+ verify_by_id = verify_files.to_h { |f| [extract_tc_id_from_standalone_name(f), f] }
136
+
137
+ runner_by_id.keys.sort.map do |tc_id|
138
+ runner_file = runner_by_id.fetch(tc_id)
139
+ verify_file = verify_by_id.fetch(tc_id)
140
+ parse_standalone_test_case(tc_id, runner_file, verify_file)
141
+ end
142
+ end
143
+
144
+ def reject_inline_tc_files!(scenario_dir)
145
+ inline_files = Dir.glob(File.join(scenario_dir, "TC-*.tc.md")).sort
146
+ return if inline_files.empty?
147
+
148
+ raise ArgumentError,
149
+ "Inline TC files are no longer supported in #{scenario_dir}. " \
150
+ "Replace #{inline_files.map { |f| File.basename(f) }.join(", ")} with standalone " \
151
+ "TC-*.runner.md and TC-*.verify.md pairs."
152
+ end
153
+
154
+ def parse_standalone_test_case(tc_id, runner_file, verify_file)
155
+ runner_content = File.read(runner_file)
156
+ verify_content = File.read(verify_file)
157
+
158
+ Models::TestCase.new(
159
+ tc_id: tc_id,
160
+ title: extract_title_from_markdown(runner_content) || tc_id,
161
+ content: build_standalone_content(runner_content, verify_content),
162
+ file_path: File.expand_path(runner_file),
163
+ pending: nil,
164
+ goal_format: "standalone"
165
+ )
166
+ end
167
+
168
+ def extract_tc_id_from_standalone_name(file_path)
169
+ basename = File.basename(file_path)
170
+ match = basename.match(/\A(TC-\d+[a-z]*)(?:-[^.]+)?\.(?:runner|verify)\.md\z/i)
171
+ return match[1].upcase if match
172
+
173
+ raise ArgumentError, "Invalid standalone test case filename: #{basename}"
174
+ end
175
+
176
+ def extract_title_from_markdown(markdown)
177
+ line = markdown.each_line.find { |l| l.strip.start_with?("#") }
178
+ return nil unless line
179
+
180
+ line.sub(/\A#+\s*/, "").strip
181
+ end
182
+
183
+ def build_standalone_content(runner_content, verify_content)
184
+ <<~CONTENT
185
+ ## Runner
186
+
187
+ #{runner_content.rstrip}
188
+
189
+ ## Verifier
190
+
191
+ #{verify_content.rstrip}
192
+ CONTENT
193
+ end
194
+
195
+ def validate_standalone_files!(scenario_dir, runner_files, verify_files)
196
+ runner_ids = runner_files.map { |f| extract_tc_id_from_standalone_name(f) }.uniq
197
+ verify_ids = verify_files.map { |f| extract_tc_id_from_standalone_name(f) }.uniq
198
+
199
+ missing_runner_ids = verify_ids - runner_ids
200
+ missing_verify_ids = runner_ids - verify_ids
201
+
202
+ unless missing_runner_ids.empty?
203
+ raise ArgumentError,
204
+ "Missing standalone runner file(s) for: #{missing_runner_ids.join(", ")} in #{scenario_dir}"
205
+ end
206
+
207
+ unless missing_verify_ids.empty?
208
+ raise ArgumentError,
209
+ "Missing standalone verify file(s) for: #{missing_verify_ids.join(", ")} in #{scenario_dir}"
210
+ end
211
+
212
+ runner_yml = File.join(scenario_dir, "runner.yml.md")
213
+ verifier_yml = File.join(scenario_dir, "verifier.yml.md")
214
+
215
+ raise ArgumentError, "Missing standalone file: #{runner_yml}" unless File.exist?(runner_yml)
216
+ raise ArgumentError, "Missing standalone file: #{verifier_yml}" unless File.exist?(verifier_yml)
217
+ end
218
+
219
+ def parse_tags(raw_tags)
220
+ return [] unless raw_tags
221
+
222
+ tags = raw_tags.is_a?(Array) ? raw_tags : [raw_tags]
223
+ tags.map(&:to_s).map(&:strip).reject(&:empty?).map(&:downcase)
224
+ end
225
+
226
+ # Detect fixtures directory if it exists
227
+ #
228
+ # @param scenario_dir [String] Path to the scenario directory
229
+ # @return [String, nil] Absolute path to fixtures/ or nil
230
+ def detect_fixture_path(scenario_dir)
231
+ path = File.join(scenario_dir, "fixtures")
232
+ Dir.exist?(path) ? File.expand_path(path) : nil
233
+ end
234
+
235
+ # Infer package name from scenario directory path
236
+ #
237
+ # @param scenario_dir [String] Path to scenario directory
238
+ # @return [String] Inferred package name
239
+ def infer_package(scenario_dir)
240
+ # Expected path: {package}/test/e2e/TS-{AREA}-{NNN}-{slug}/
241
+ parts = File.expand_path(scenario_dir).split("/")
242
+ parts.each_with_index do |part, idx|
243
+ next unless part == "test" && idx > 0 && parts[idx + 1] == "e2e"
244
+
245
+ return parts[idx - 1]
246
+ end
247
+
248
+ "unknown"
249
+ end
250
+ end
251
+ end
252
+ end
253
+ end
254
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "open3"
5
+ require "shellwords"
6
+
7
+ module Ace
8
+ module Test
9
+ module EndToEndRunner
10
+ module Molecules
11
+ # Executes setup steps deterministically to create a populated sandbox
12
+ #
13
+ # Processes the setup array from scenario.yml, running each action
14
+ # via Ruby system calls (no LLM involved). Supports: git-init,
15
+ # copy-fixtures, run, write-file, agent-env, and tmux-session actions.
16
+ #
17
+ # Note: This is a Molecule because it performs filesystem I/O and
18
+ # system calls via Open3 and FileUtils.
19
+ class SetupExecutor
20
+ # Execute all setup steps in a sandbox directory
21
+ #
22
+ # @param setup_steps [Array] Setup steps from scenario.yml
23
+ # @param sandbox_dir [String] Path to the sandbox directory
24
+ # @param fixture_source [String, nil] Path to the fixtures/ directory
25
+ # @param scenario_name [String, nil] Test ID for tmux session naming (e.g., "TS-OVERSEER-001")
26
+ # @param run_id [String, nil] Unique run ID for deterministic tmux session naming
27
+ # @return [Hash] Result with :success, :steps_completed, :error, :env, :tmux_session keys
28
+ def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil)
29
+ FileUtils.mkdir_p(sandbox_dir)
30
+ env = {}
31
+ steps_completed = 0
32
+ @tmux_session = nil
33
+ @scenario_name = scenario_name
34
+ @run_id = run_id
35
+
36
+ setup_steps.each do |step|
37
+ execute_step(step, sandbox_dir, env, fixture_source)
38
+ steps_completed += 1
39
+ end
40
+
41
+ {success: true, steps_completed: steps_completed, error: nil, env: env, tmux_session: @tmux_session}
42
+ rescue => e
43
+ {success: false, steps_completed: steps_completed, error: e.message, env: env, tmux_session: @tmux_session}
44
+ end
45
+
46
+ # Clean up resources created during setup (e.g. tmux session)
47
+ def teardown
48
+ return unless @tmux_session
49
+
50
+ system("tmux", "kill-session", "-t", @tmux_session, out: File::NULL, err: File::NULL)
51
+ @tmux_session = nil
52
+ end
53
+
54
+ private
55
+
56
+ # Dispatch a single step to the appropriate handler
57
+ #
58
+ # @param step [String, Hash] Step definition
59
+ # @param sandbox_dir [String] Sandbox path
60
+ # @param env [Hash] Environment variables
61
+ # @param fixture_source [String, nil] Fixtures path
62
+ def execute_step(step, sandbox_dir, env, fixture_source)
63
+ case step
64
+ when "git-init"
65
+ handle_git_init(sandbox_dir, env)
66
+ when "copy-fixtures"
67
+ handle_copy_fixtures(sandbox_dir, fixture_source)
68
+ when "tmux-session"
69
+ handle_tmux_session(env)
70
+ when Hash
71
+ execute_hash_step(step, sandbox_dir, env)
72
+ else
73
+ raise ArgumentError, "Unknown setup step: #{step.inspect}"
74
+ end
75
+ end
76
+
77
+ # Dispatch hash-based steps
78
+ def execute_hash_step(step, sandbox_dir, env)
79
+ key = step.keys.first
80
+ value = step.values.first
81
+
82
+ case key
83
+ when "run"
84
+ handle_run(value, sandbox_dir, env)
85
+ when "write-file"
86
+ handle_write_file(value["path"], value["content"], sandbox_dir)
87
+ when "agent-env"
88
+ handle_env(value, env)
89
+ when "tmux-session"
90
+ handle_tmux_session(env, value)
91
+ else
92
+ raise ArgumentError, "Unknown setup step type: #{key.inspect}"
93
+ end
94
+ end
95
+
96
+ # Create an isolated detached tmux session and store its name in env
97
+ def handle_tmux_session(env, config = nil)
98
+ name_source = config.is_a?(Hash) ? config["name-source"] : nil
99
+ session_name = if name_source == "run-id" && @run_id && !@run_id.to_s.empty?
100
+ @run_id
101
+ else
102
+ @scenario_name ? "#{@scenario_name}-e2e" : "ace-e2e-#{Time.now.to_i}"
103
+ end
104
+ _stdout, stderr, status = Open3.capture3("tmux", "new-session", "-d", "-s", session_name)
105
+ raise "Failed to create tmux session '#{session_name}': #{stderr.strip}" unless status.success?
106
+
107
+ @tmux_session = session_name
108
+ env["ACE_TMUX_SESSION"] = session_name
109
+ end
110
+
111
+ # Initialize a git repo with test user config
112
+ def handle_git_init(sandbox_dir, env)
113
+ run_command("git", "init", "-b", "main", chdir: sandbox_dir, env: env)
114
+ run_command("git", "config", "user.name", "Test User", chdir: sandbox_dir, env: env)
115
+ run_command("git", "config", "user.email", "test@example.com", chdir: sandbox_dir, env: env)
116
+ end
117
+
118
+ # Copy fixture files into sandbox
119
+ def handle_copy_fixtures(sandbox_dir, fixture_source)
120
+ raise ArgumentError, "No fixture source provided for copy-fixtures step" if fixture_source.nil?
121
+
122
+ FixtureCopier.new.copy(source_dir: fixture_source, target_dir: sandbox_dir)
123
+ end
124
+
125
+ # Execute a shell command in the sandbox
126
+ # NOTE: Uses shell invocation (bash -lc) intentionally to support
127
+ # shell operators (&&, |, >) in scenario.yml setup steps. Commands originate from
128
+ # committed scenario.yml files, not user input, so shell injection risk is mitigated.
129
+ def handle_run(command, sandbox_dir, env)
130
+ full_env = merged_environment(env)
131
+ # Re-export env vars after profile sourcing to protect against
132
+ # mise's shell hook clobbering (mise manages PROJECT_ROOT_PATH).
133
+ export_vars = env.dup
134
+ %w[PROJECT_ROOT_PATH ACE_TASKFLOW_PATH].each do |key|
135
+ export_vars[key] ||= ENV[key] if ENV[key]
136
+ end
137
+ exports = export_vars.map { |k, v| "export #{k}=#{Shellwords.shellescape(v.to_s)}" }.join("; ")
138
+ wrapped = exports.empty? ? command : "#{exports}; #{command}"
139
+ stdout, stderr, status = Open3.capture3(full_env, "bash", "-lc", wrapped, chdir: sandbox_dir)
140
+
141
+ unless status.success?
142
+ raise "Setup step 'run' failed (exit #{status.exitstatus}): #{command}\n#{stderr}"
143
+ end
144
+
145
+ stdout
146
+ end
147
+
148
+ # Write inline content to a file in the sandbox
149
+ def handle_write_file(path, content, sandbox_dir)
150
+ full_path = File.join(sandbox_dir, path)
151
+ FileUtils.mkdir_p(File.dirname(full_path))
152
+ File.write(full_path, content)
153
+ end
154
+
155
+ # Merge environment variables for subsequent steps
156
+ def handle_env(vars, env)
157
+ vars.each { |k, v| env[k.to_s] = v.to_s }
158
+ end
159
+
160
+ # Merge custom env vars with the process environment
161
+ #
162
+ # @param env [Hash] Custom environment variables
163
+ # @return [Hash] Merged environment
164
+ def merged_environment(env)
165
+ return ENV.to_h if env.empty?
166
+ ENV.to_h.merge(env.transform_keys(&:to_s))
167
+ end
168
+
169
+ # Run a command and raise on failure
170
+ def run_command(*args, chdir:, env: {})
171
+ _stdout, stderr, status = Open3.capture3(merged_environment(env), *args, chdir: chdir)
172
+
173
+ unless status.success?
174
+ raise "Command failed (exit #{status.exitstatus}): #{args.join(" ")}\n#{stderr}"
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end