ace-test-runner-e2e 0.29.8 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.ace-defaults/e2e-runner/config.yml +14 -2
  3. data/CHANGELOG.md +233 -0
  4. data/README.md +2 -2
  5. data/exe/ace-test-e2e-sh +9 -4
  6. data/handbook/guides/e2e-testing.g.md +75 -9
  7. data/handbook/guides/scenario-yml-reference.g.md +21 -8
  8. data/handbook/guides/tc-authoring.g.md +23 -5
  9. data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
  10. data/handbook/skills/as-e2e-review/SKILL.md +2 -2
  11. data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
  12. data/handbook/templates/agent-experience-report.template.md +3 -2
  13. data/handbook/templates/scenario.yml.template.yml +7 -2
  14. data/handbook/templates/tc-file.template.md +16 -4
  15. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
  16. data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
  17. data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
  18. data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
  19. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
  20. data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
  21. data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
  22. data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
  23. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
  24. data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
  25. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
  26. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
  27. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
  28. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
  29. data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
  30. data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
  31. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
  32. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
  33. data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
  34. data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
  35. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
  36. data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
  37. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
  38. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
  39. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
  40. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
  41. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
  42. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
  43. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
  44. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
  45. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
  46. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
  47. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
  48. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
  49. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
  50. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  51. data/lib/ace/test/end_to_end_runner.rb +4 -0
  52. metadata +21 -2
@@ -17,6 +17,20 @@ module Ace
17
17
  # Note: This is a Molecule because it performs filesystem I/O and
18
18
  # system calls via Open3 and FileUtils.
19
19
  class SetupExecutor
20
+ AMBIENT_TMUX_ENV_VARS = %w[TMUX TMUX_PANE].freeze
21
+ BUNDLER_ENV_PREFIXES = %w[BUNDLE BUNDLER].freeze
22
+ STRIPPED_ENV_KEYS = %w[RUBYOPT RUBYLIB].freeze
23
+ RESERVED_ENV_KEYS = Molecules::SandboxRuntimeBuilder::RESERVED_ENV_KEYS + %w[
24
+ PATH HOME TMPDIR XDG_RUNTIME_DIR TMUX_TMPDIR ACE_TMUX_SESSION
25
+ ]
26
+
27
+ def initialize(command_runner: nil, system_runner: nil, time_source: nil, sandbox_backend: nil)
28
+ @command_runner = command_runner || method(:capture3)
29
+ @system_runner = system_runner || method(:system)
30
+ @time_source = time_source || -> { Time.now.to_i }
31
+ @sandbox_backend = sandbox_backend
32
+ end
33
+
20
34
  # Execute all setup steps in a sandbox directory
21
35
  #
22
36
  # @param setup_steps [Array] Setup steps from scenario.yml
@@ -25,29 +39,56 @@ module Ace
25
39
  # @param scenario_name [String, nil] Test ID for tmux session naming (e.g., "TS-OVERSEER-001")
26
40
  # @param run_id [String, nil] Unique run ID for deterministic tmux session naming
27
41
  # @return [Hash] Result with :success, :steps_completed, :error, :env, :tmux_session keys
28
- def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil)
42
+ def execute(setup_steps:, sandbox_dir:, fixture_source: nil, scenario_name: nil, run_id: nil, initial_env: {},
43
+ git_excludes: [])
29
44
  FileUtils.mkdir_p(sandbox_dir)
30
- env = {}
45
+ env = if @sandbox_backend
46
+ @sandbox_backend.prepared_env(initial_env.dup)
47
+ else
48
+ initial_env.dup
49
+ end
50
+ @git_excludes = normalize_git_excludes(git_excludes)
31
51
  steps_completed = 0
32
52
  @tmux_session = nil
33
53
  @scenario_name = scenario_name
34
54
  @run_id = run_id
55
+ @teardown_env = nil
35
56
 
36
57
  setup_steps.each do |step|
37
58
  execute_step(step, sandbox_dir, env, fixture_source)
38
59
  steps_completed += 1
39
60
  end
40
61
 
41
- {success: true, steps_completed: steps_completed, error: nil, env: env, tmux_session: @tmux_session}
62
+ {
63
+ success: true,
64
+ steps_completed: steps_completed,
65
+ error: nil,
66
+ env: merged_environment(env),
67
+ tmux_session: @tmux_session
68
+ }
42
69
  rescue => e
43
- {success: false, steps_completed: steps_completed, error: e.message, env: env, tmux_session: @tmux_session}
70
+ {
71
+ success: false,
72
+ steps_completed: steps_completed,
73
+ error: e.message,
74
+ env: merged_environment(env),
75
+ tmux_session: @tmux_session
76
+ }
44
77
  end
45
78
 
46
79
  # Clean up resources created during setup (e.g. tmux session)
47
80
  def teardown
48
81
  return unless @tmux_session
49
82
 
50
- system("tmux", "kill-session", "-t", @tmux_session, out: File::NULL, err: File::NULL)
83
+ if @sandbox_backend
84
+ @sandbox_backend.capture3(
85
+ ["tmux", "kill-session", "-t", @tmux_session],
86
+ chdir: @teardown_env&.fetch("PROJECT_ROOT_PATH", Dir.pwd) || Dir.pwd,
87
+ env: @teardown_env || {}
88
+ )
89
+ else
90
+ @system_runner.call("tmux", "kill-session", "-t", @tmux_session, out: File::NULL, err: File::NULL)
91
+ end
51
92
  @tmux_session = nil
52
93
  end
53
94
 
@@ -99,13 +140,23 @@ module Ace
99
140
  session_name = if name_source == "run-id" && @run_id && !@run_id.to_s.empty?
100
141
  @run_id
101
142
  else
102
- @scenario_name ? "#{@scenario_name}-e2e" : "ace-e2e-#{Time.now.to_i}"
143
+ @scenario_name ? "#{@scenario_name}-e2e" : "ace-e2e-#{@time_source.call}"
144
+ end
145
+ tmux_env = merged_environment(env).merge("TMUX_TMPDIR" => env["TMUX_TMPDIR"].to_s.empty? ? nil : env["TMUX_TMPDIR"])
146
+ if @sandbox_backend
147
+ _stdout, stderr, status = @sandbox_backend.capture3(
148
+ ["tmux", "new-session", "-d", "-s", session_name],
149
+ chdir: env["PROJECT_ROOT_PATH"] || Dir.pwd,
150
+ env: tmux_env
151
+ )
152
+ else
153
+ _stdout, stderr, status = @command_runner.call(tmux_env, "tmux", "new-session", "-d", "-s", session_name)
103
154
  end
104
- _stdout, stderr, status = Open3.capture3("tmux", "new-session", "-d", "-s", session_name)
105
155
  raise "Failed to create tmux session '#{session_name}': #{stderr.strip}" unless status.success?
106
156
 
107
157
  @tmux_session = session_name
108
158
  env["ACE_TMUX_SESSION"] = session_name
159
+ @teardown_env = merged_environment(env)
109
160
  end
110
161
 
111
162
  # Initialize a git repo with test user config
@@ -113,6 +164,7 @@ module Ace
113
164
  run_command("git", "init", "-b", "main", chdir: sandbox_dir, env: env)
114
165
  run_command("git", "config", "user.name", "Test User", chdir: sandbox_dir, env: env)
115
166
  run_command("git", "config", "user.email", "test@example.com", chdir: sandbox_dir, env: env)
167
+ seed_git_excludes(sandbox_dir)
116
168
  end
117
169
 
118
170
  # Copy fixture files into sandbox
@@ -123,20 +175,25 @@ module Ace
123
175
  end
124
176
 
125
177
  # Execute a shell command in the sandbox
126
- # NOTE: Uses shell invocation (bash -lc) intentionally to support
127
- # shell operators (&&, |, >) in scenario.yml setup steps. Commands originate from
178
+ # NOTE: Uses shell invocation intentionally to support shell operators
179
+ # (&&, |, >) in scenario.yml setup steps. Commands originate from
128
180
  # committed scenario.yml files, not user input, so shell injection risk is mitigated.
181
+ # We explicitly disable profile/rc loading to keep sandbox env authoritative.
129
182
  def handle_run(command, sandbox_dir, env)
130
183
  full_env = merged_environment(env)
131
- # Re-export env vars after profile sourcing to protect against
132
- # mise's shell hook clobbering.
184
+ # Re-export env vars inside the command to keep explicit sandbox
185
+ # values authoritative across compound shell expressions.
133
186
  export_vars = env.dup
134
187
  %w[PROJECT_ROOT_PATH].each do |key|
135
188
  export_vars[key] ||= ENV[key] if ENV[key]
136
189
  end
137
190
  exports = export_vars.map { |k, v| "export #{k}=#{Shellwords.shellescape(v.to_s)}" }.join("; ")
138
191
  wrapped = exports.empty? ? command : "#{exports}; #{command}"
139
- stdout, stderr, status = Open3.capture3(full_env, "bash", "-lc", wrapped, chdir: sandbox_dir)
192
+ stdout, stderr, status = if @sandbox_backend
193
+ @sandbox_backend.capture3(["bash", "--noprofile", "--norc", "-c", wrapped], chdir: sandbox_dir, env: full_env)
194
+ else
195
+ Open3.capture3(full_env, "bash", "--noprofile", "--norc", "-c", wrapped, chdir: sandbox_dir)
196
+ end
140
197
 
141
198
  unless status.success?
142
199
  raise "Setup step 'run' failed (exit #{status.exitstatus}): #{command}\n#{stderr}"
@@ -154,7 +211,12 @@ module Ace
154
211
 
155
212
  # Merge environment variables for subsequent steps
156
213
  def handle_env(vars, env)
157
- vars.each { |k, v| env[k.to_s] = v.to_s }
214
+ vars.each do |k, v|
215
+ key = k.to_s
216
+ next if RESERVED_ENV_KEYS.include?(key)
217
+
218
+ env[key] = v.to_s
219
+ end
158
220
  end
159
221
 
160
222
  # Merge custom env vars with the process environment
@@ -162,18 +224,61 @@ module Ace
162
224
  # @param env [Hash] Custom environment variables
163
225
  # @return [Hash] Merged environment
164
226
  def merged_environment(env)
165
- return ENV.to_h if env.empty?
166
- ENV.to_h.merge(env.transform_keys(&:to_s))
227
+ base_env = sanitized_process_environment
228
+ return base_env if env.empty?
229
+
230
+ base_env.merge(env.transform_keys(&:to_s))
167
231
  end
168
232
 
169
233
  # Run a command and raise on failure
170
234
  def run_command(*args, chdir:, env: {})
171
- _stdout, stderr, status = Open3.capture3(merged_environment(env), *args, chdir: chdir)
235
+ merged_env = merged_environment(env)
236
+ _stdout, stderr, status = if @sandbox_backend
237
+ @sandbox_backend.capture3(args, chdir: chdir, env: merged_env)
238
+ else
239
+ @command_runner.call(merged_env, *args, chdir: chdir)
240
+ end
172
241
 
173
242
  unless status.success?
174
243
  raise "Command failed (exit #{status.exitstatus}): #{args.join(" ")}\n#{stderr}"
175
244
  end
176
245
  end
246
+
247
+ def capture3(*args, **kwargs)
248
+ Open3.capture3(*args, **kwargs)
249
+ end
250
+
251
+ def seed_git_excludes(sandbox_dir)
252
+ patterns = (default_git_excludes + @git_excludes).uniq
253
+ return if patterns.empty?
254
+
255
+ exclude_path = File.join(sandbox_dir, ".git", "info", "exclude")
256
+ existing = File.exist?(exclude_path) ? File.readlines(exclude_path, chomp: true) : []
257
+ additions = patterns.reject { |pattern| existing.include?(pattern) }
258
+ return if additions.empty?
259
+
260
+ File.write(exclude_path, (existing + additions).join("\n") + "\n")
261
+ end
262
+
263
+ def normalize_git_excludes(git_excludes)
264
+ Array(git_excludes).map(&:to_s).map(&:strip).reject(&:empty?).uniq
265
+ end
266
+
267
+ def default_git_excludes
268
+ [".ace-local/", "reports/", "results/"]
269
+ end
270
+
271
+ def sanitized_process_environment
272
+ ENV.to_h.each_with_object({}) do |(key, value), env|
273
+ if AMBIENT_TMUX_ENV_VARS.include?(key) || STRIPPED_ENV_KEYS.include?(key) ||
274
+ BUNDLER_ENV_PREFIXES.any? { |prefix| key.start_with?(prefix) }
275
+ env[key] = nil
276
+ next
277
+ end
278
+
279
+ env[key] = value
280
+ end
281
+ end
177
282
  end
178
283
  end
179
284
  end