ace-test-runner-e2e 0.29.8 → 0.40.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ace-defaults/e2e-runner/config.yml +14 -2
- data/CHANGELOG.md +233 -0
- data/README.md +2 -2
- data/exe/ace-test-e2e-sh +9 -4
- data/handbook/guides/e2e-testing.g.md +75 -9
- data/handbook/guides/scenario-yml-reference.g.md +21 -8
- data/handbook/guides/tc-authoring.g.md +23 -5
- data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
- data/handbook/skills/as-e2e-review/SKILL.md +2 -2
- data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
- data/handbook/templates/agent-experience-report.template.md +3 -2
- data/handbook/templates/scenario.yml.template.yml +7 -2
- data/handbook/templates/tc-file.template.md +16 -4
- data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
- data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
- data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
- data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
- data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
- data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
- data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
- data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
- data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
- data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
- data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
- data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
- data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
- data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
- data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
- data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
- data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
- data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
- data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
- data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
- data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
- data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
- data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
- data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
- data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
- data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
- data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
- data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
- data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
- data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
- data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
- data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
- data/lib/ace/test/end_to_end_runner/version.rb +1 -1
- data/lib/ace/test/end_to_end_runner.rb +4 -0
- metadata +21 -2
|
@@ -17,6 +17,20 @@ module Ace
|
|
|
17
17
|
# Note: This is a Molecule because it performs filesystem I/O and
|
|
18
18
|
# system calls via Open3 and FileUtils.
|
|
19
19
|
class SetupExecutor
|
|
20
|
+
AMBIENT_TMUX_ENV_VARS = %w[TMUX TMUX_PANE].freeze
|
|
21
|
+
BUNDLER_ENV_PREFIXES = %w[BUNDLE BUNDLER].freeze
|
|
22
|
+
STRIPPED_ENV_KEYS = %w[RUBYOPT RUBYLIB].freeze
|
|
23
|
+
RESERVED_ENV_KEYS = Molecules::SandboxRuntimeBuilder::RESERVED_ENV_KEYS + %w[
|
|
24
|
+
PATH HOME TMPDIR XDG_RUNTIME_DIR TMUX_TMPDIR ACE_TMUX_SESSION
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
def initialize(command_runner: nil, system_runner: nil, time_source: nil, sandbox_backend: nil)
|
|
28
|
+
@command_runner = command_runner || method(:capture3)
|
|
29
|
+
@system_runner = system_runner || method(:system)
|
|
30
|
+
@time_source = time_source || -> { Time.now.to_i }
|
|
31
|
+
@sandbox_backend = sandbox_backend
|
|
32
|
+
end
|
|
33
|
+
|
|
20
34
|
# Execute all setup steps in a sandbox directory
|
|
21
35
|
#
|
|
22
36
|
# @param setup_steps [Array] Setup steps from scenario.yml
|
|
@@ -25,29 +39,56 @@ module Ace
|
|
|
25
39
|
# @param scenario_name [String, nil] Test ID for tmux session naming (e.g., "TS-OVERSEER-001")
|
|
26
40
|
# @param run_id [String, nil] Unique run ID for deterministic tmux session naming
|
|
27
41
|
# @return [Hash] Result with :success, :steps_completed, :error, :env, :tmux_session keys
|
|
28
|
-
def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil
|
|
42
|
+
def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil, initial_env: {},
|
|
43
|
+
git_excludes: [])
|
|
29
44
|
FileUtils.mkdir_p(sandbox_dir)
|
|
30
|
-
env =
|
|
45
|
+
env = if @sandbox_backend
|
|
46
|
+
@sandbox_backend.prepared_env(initial_env.dup)
|
|
47
|
+
else
|
|
48
|
+
initial_env.dup
|
|
49
|
+
end
|
|
50
|
+
@git_excludes = normalize_git_excludes(git_excludes)
|
|
31
51
|
steps_completed = 0
|
|
32
52
|
@tmux_session = nil
|
|
33
53
|
@scenario_name = scenario_name
|
|
34
54
|
@run_id = run_id
|
|
55
|
+
@teardown_env = nil
|
|
35
56
|
|
|
36
57
|
setup_steps.each do |step|
|
|
37
58
|
execute_step(step, sandbox_dir, env, fixture_source)
|
|
38
59
|
steps_completed += 1
|
|
39
60
|
end
|
|
40
61
|
|
|
41
|
-
{
|
|
62
|
+
{
|
|
63
|
+
success: true,
|
|
64
|
+
steps_completed: steps_completed,
|
|
65
|
+
error: nil,
|
|
66
|
+
env: merged_environment(env),
|
|
67
|
+
tmux_session: @tmux_session
|
|
68
|
+
}
|
|
42
69
|
rescue => e
|
|
43
|
-
{
|
|
70
|
+
{
|
|
71
|
+
success: false,
|
|
72
|
+
steps_completed: steps_completed,
|
|
73
|
+
error: e.message,
|
|
74
|
+
env: merged_environment(env),
|
|
75
|
+
tmux_session: @tmux_session
|
|
76
|
+
}
|
|
44
77
|
end
|
|
45
78
|
|
|
46
79
|
# Clean up resources created during setup (e.g. tmux session)
|
|
47
80
|
def teardown
|
|
48
81
|
return unless @tmux_session
|
|
49
82
|
|
|
50
|
-
|
|
83
|
+
if @sandbox_backend
|
|
84
|
+
@sandbox_backend.capture3(
|
|
85
|
+
["tmux", "kill-session", "-t", @tmux_session],
|
|
86
|
+
chdir: @teardown_env&.fetch("PROJECT_ROOT_PATH", Dir.pwd) || Dir.pwd,
|
|
87
|
+
env: @teardown_env || {}
|
|
88
|
+
)
|
|
89
|
+
else
|
|
90
|
+
@system_runner.call("tmux", "kill-session", "-t", @tmux_session, out: File::NULL, err: File::NULL)
|
|
91
|
+
end
|
|
51
92
|
@tmux_session = nil
|
|
52
93
|
end
|
|
53
94
|
|
|
@@ -99,13 +140,23 @@ module Ace
|
|
|
99
140
|
session_name = if name_source == "run-id" && @run_id && !@run_id.to_s.empty?
|
|
100
141
|
@run_id
|
|
101
142
|
else
|
|
102
|
-
@scenario_name ? "#{@scenario_name}-e2e" : "ace-e2e-#{
|
|
143
|
+
@scenario_name ? "#{@scenario_name}-e2e" : "ace-e2e-#{@time_source.call}"
|
|
144
|
+
end
|
|
145
|
+
tmux_env = merged_environment(env).merge("TMUX_TMPDIR" => env["TMUX_TMPDIR"].to_s.empty? ? nil : env["TMUX_TMPDIR"])
|
|
146
|
+
if @sandbox_backend
|
|
147
|
+
_stdout, stderr, status = @sandbox_backend.capture3(
|
|
148
|
+
["tmux", "new-session", "-d", "-s", session_name],
|
|
149
|
+
chdir: env["PROJECT_ROOT_PATH"] || Dir.pwd,
|
|
150
|
+
env: tmux_env
|
|
151
|
+
)
|
|
152
|
+
else
|
|
153
|
+
_stdout, stderr, status = @command_runner.call(tmux_env, "tmux", "new-session", "-d", "-s", session_name)
|
|
103
154
|
end
|
|
104
|
-
_stdout, stderr, status = Open3.capture3("tmux", "new-session", "-d", "-s", session_name)
|
|
105
155
|
raise "Failed to create tmux session '#{session_name}': #{stderr.strip}" unless status.success?
|
|
106
156
|
|
|
107
157
|
@tmux_session = session_name
|
|
108
158
|
env["ACE_TMUX_SESSION"] = session_name
|
|
159
|
+
@teardown_env = merged_environment(env)
|
|
109
160
|
end
|
|
110
161
|
|
|
111
162
|
# Initialize a git repo with test user config
|
|
@@ -113,6 +164,7 @@ module Ace
|
|
|
113
164
|
run_command("git", "init", "-b", "main", chdir: sandbox_dir, env: env)
|
|
114
165
|
run_command("git", "config", "user.name", "Test User", chdir: sandbox_dir, env: env)
|
|
115
166
|
run_command("git", "config", "user.email", "test@example.com", chdir: sandbox_dir, env: env)
|
|
167
|
+
seed_git_excludes(sandbox_dir)
|
|
116
168
|
end
|
|
117
169
|
|
|
118
170
|
# Copy fixture files into sandbox
|
|
@@ -123,20 +175,25 @@ module Ace
|
|
|
123
175
|
end
|
|
124
176
|
|
|
125
177
|
# Execute a shell command in the sandbox
|
|
126
|
-
# NOTE: Uses shell invocation
|
|
127
|
-
#
|
|
178
|
+
# NOTE: Uses shell invocation intentionally to support shell operators
|
|
179
|
+
# (&&, |, >) in scenario.yml setup steps. Commands originate from
|
|
128
180
|
# committed scenario.yml files, not user input, so shell injection risk is mitigated.
|
|
181
|
+
# We explicitly disable profile/rc loading to keep sandbox env authoritative.
|
|
129
182
|
def handle_run(command, sandbox_dir, env)
|
|
130
183
|
full_env = merged_environment(env)
|
|
131
|
-
# Re-export env vars
|
|
132
|
-
#
|
|
184
|
+
# Re-export env vars inside the command to keep explicit sandbox
|
|
185
|
+
# values authoritative across compound shell expressions.
|
|
133
186
|
export_vars = env.dup
|
|
134
187
|
%w[PROJECT_ROOT_PATH].each do |key|
|
|
135
188
|
export_vars[key] ||= ENV[key] if ENV[key]
|
|
136
189
|
end
|
|
137
190
|
exports = export_vars.map { |k, v| "export #{k}=#{Shellwords.shellescape(v.to_s)}" }.join("; ")
|
|
138
191
|
wrapped = exports.empty? ? command : "#{exports}; #{command}"
|
|
139
|
-
stdout, stderr, status =
|
|
192
|
+
stdout, stderr, status = if @sandbox_backend
|
|
193
|
+
@sandbox_backend.capture3(["bash", "--noprofile", "--norc", "-c", wrapped], chdir: sandbox_dir, env: full_env)
|
|
194
|
+
else
|
|
195
|
+
Open3.capture3(full_env, "bash", "--noprofile", "--norc", "-c", wrapped, chdir: sandbox_dir)
|
|
196
|
+
end
|
|
140
197
|
|
|
141
198
|
unless status.success?
|
|
142
199
|
raise "Setup step 'run' failed (exit #{status.exitstatus}): #{command}\n#{stderr}"
|
|
@@ -154,7 +211,12 @@ module Ace
|
|
|
154
211
|
|
|
155
212
|
# Merge environment variables for subsequent steps
|
|
156
213
|
def handle_env(vars, env)
|
|
157
|
-
vars.each
|
|
214
|
+
vars.each do |k, v|
|
|
215
|
+
key = k.to_s
|
|
216
|
+
next if RESERVED_ENV_KEYS.include?(key)
|
|
217
|
+
|
|
218
|
+
env[key] = v.to_s
|
|
219
|
+
end
|
|
158
220
|
end
|
|
159
221
|
|
|
160
222
|
# Merge custom env vars with the process environment
|
|
@@ -162,18 +224,61 @@ module Ace
|
|
|
162
224
|
# @param env [Hash] Custom environment variables
|
|
163
225
|
# @return [Hash] Merged environment
|
|
164
226
|
def merged_environment(env)
|
|
165
|
-
|
|
166
|
-
|
|
227
|
+
base_env = sanitized_process_environment
|
|
228
|
+
return base_env if env.empty?
|
|
229
|
+
|
|
230
|
+
base_env.merge(env.transform_keys(&:to_s))
|
|
167
231
|
end
|
|
168
232
|
|
|
169
233
|
# Run a command and raise on failure
|
|
170
234
|
def run_command(*args, chdir:, env: {})
|
|
171
|
-
|
|
235
|
+
merged_env = merged_environment(env)
|
|
236
|
+
_stdout, stderr, status = if @sandbox_backend
|
|
237
|
+
@sandbox_backend.capture3(args, chdir: chdir, env: merged_env)
|
|
238
|
+
else
|
|
239
|
+
@command_runner.call(merged_env, *args, chdir: chdir)
|
|
240
|
+
end
|
|
172
241
|
|
|
173
242
|
unless status.success?
|
|
174
243
|
raise "Command failed (exit #{status.exitstatus}): #{args.join(" ")}\n#{stderr}"
|
|
175
244
|
end
|
|
176
245
|
end
|
|
246
|
+
|
|
247
|
+
def capture3(*args, **kwargs)
|
|
248
|
+
Open3.capture3(*args, **kwargs)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def seed_git_excludes(sandbox_dir)
|
|
252
|
+
patterns = (default_git_excludes + @git_excludes).uniq
|
|
253
|
+
return if patterns.empty?
|
|
254
|
+
|
|
255
|
+
exclude_path = File.join(sandbox_dir, ".git", "info", "exclude")
|
|
256
|
+
existing = File.exist?(exclude_path) ? File.readlines(exclude_path, chomp: true) : []
|
|
257
|
+
additions = patterns.reject { |pattern| existing.include?(pattern) }
|
|
258
|
+
return if additions.empty?
|
|
259
|
+
|
|
260
|
+
File.write(exclude_path, (existing + additions).join("\n") + "\n")
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def normalize_git_excludes(git_excludes)
|
|
264
|
+
Array(git_excludes).map(&:to_s).map(&:strip).reject(&:empty?).uniq
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def default_git_excludes
|
|
268
|
+
[".ace-local/", "reports/", "results/"]
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def sanitized_process_environment
|
|
272
|
+
ENV.to_h.each_with_object({}) do |(key, value), env|
|
|
273
|
+
if AMBIENT_TMUX_ENV_VARS.include?(key) || STRIPPED_ENV_KEYS.include?(key) ||
|
|
274
|
+
BUNDLER_ENV_PREFIXES.any? { |prefix| key.start_with?(prefix) }
|
|
275
|
+
env[key] = nil
|
|
276
|
+
next
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
env[key] = value
|
|
280
|
+
end
|
|
281
|
+
end
|
|
177
282
|
end
|
|
178
283
|
end
|
|
179
284
|
end
|