ace-test-runner-e2e 0.29.8 → 0.40.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.ace-defaults/e2e-runner/config.yml +14 -2
  3. data/CHANGELOG.md +233 -0
  4. data/README.md +2 -2
  5. data/exe/ace-test-e2e-sh +9 -4
  6. data/handbook/guides/e2e-testing.g.md +75 -9
  7. data/handbook/guides/scenario-yml-reference.g.md +21 -8
  8. data/handbook/guides/tc-authoring.g.md +23 -5
  9. data/handbook/skills/as-e2e-fix/SKILL.md +2 -2
  10. data/handbook/skills/as-e2e-review/SKILL.md +2 -2
  11. data/handbook/templates/ace-taskflow-fixture.template.md +17 -17
  12. data/handbook/templates/agent-experience-report.template.md +3 -2
  13. data/handbook/templates/scenario.yml.template.yml +7 -2
  14. data/handbook/templates/tc-file.template.md +16 -4
  15. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +53 -6
  16. data/handbook/workflow-instructions/e2e/create.wf.md +128 -25
  17. data/handbook/workflow-instructions/e2e/execute.wf.md +11 -7
  18. data/handbook/workflow-instructions/e2e/fix.wf.md +84 -15
  19. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +33 -1
  20. data/handbook/workflow-instructions/e2e/review.wf.md +40 -25
  21. data/handbook/workflow-instructions/e2e/rewrite.wf.md +22 -8
  22. data/handbook/workflow-instructions/e2e/run.wf.md +50 -26
  23. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +4 -4
  24. data/lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb +138 -0
  25. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +7 -5
  26. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +73 -7
  27. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +195 -5
  28. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +58 -9
  29. data/lib/ace/test/end_to_end_runner/models/test_case.rb +8 -2
  30. data/lib/ace/test/end_to_end_runner/models/test_result.rb +9 -3
  31. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +4 -2
  32. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +7 -2
  33. data/lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb +61 -0
  34. data/lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb +271 -0
  35. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +28 -1
  36. data/lib/ace/test/end_to_end_runner/molecules/integration_runner.rb +122 -0
  37. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +235 -18
  38. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +164 -13
  39. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +91 -19
  40. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +121 -18
  41. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +15 -12
  42. data/lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb +374 -0
  43. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +83 -5
  44. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +121 -16
  45. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +422 -97
  46. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +38 -13
  47. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +27 -5
  48. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +98 -18
  49. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +159 -19
  50. data/lib/ace/test/end_to_end_runner/version.rb +1 -1
  51. data/lib/ace/test/end_to_end_runner.rb +4 -0
  52. metadata +21 -2
@@ -4,6 +4,8 @@ require "fileutils"
4
4
  require "date"
5
5
  require "yaml"
6
6
  require "ace/b36ts"
7
+ require "ace/test_support/sandbox_package_copy"
8
+ require "ace/test/end_to_end_runner/molecules/integration_runner"
7
9
 
8
10
  module Ace
9
11
  module Test
@@ -28,19 +30,29 @@ module Ace
28
30
  # @param timestamp_generator [#call] Callable that returns a timestamp string
29
31
  # @param executor [#execute] Injectable test executor (for testing)
30
32
  # @param progress [Boolean] Enable animated progress display
31
- def initialize(provider: nil, timeout: nil, parallel: nil, base_dir: nil, timestamp_generator: nil, executor: nil, progress: false)
33
+ def initialize(provider: nil, timeout: nil, parallel: nil, base_dir: nil, timestamp_generator: nil,
34
+ executor: nil, progress: false, discoverer: nil, integration_runner: nil,
35
+ scenario_loader: nil, report_writer: nil, suite_report_writer: nil,
36
+ setup_executor_factory: nil, runtime_builder: nil)
32
37
  config = Molecules::ConfigLoader.load
33
- @provider = provider || config.dig("execution", "provider") || "claude:sonnet"
38
+ @provider = provider || config.dig("execution", "runner_provider") ||
39
+ config.dig("execution", "provider") || "claude:sonnet"
34
40
  @timeout = timeout || config.dig("execution", "timeout") || 300
35
41
  @parallel = parallel || config.dig("execution", "parallel") || 3
36
42
  @base_dir = base_dir || Dir.pwd
37
43
  @timestamp_generator = timestamp_generator || method(:default_timestamp)
38
44
  @progress = progress
39
- @discoverer = Molecules::TestDiscoverer.new
40
- @loader = Molecules::ScenarioLoader.new
45
+ @discoverer = discoverer || Molecules::TestDiscoverer.new
46
+ @integration_runner = integration_runner || Molecules::IntegrationRunner.new(base_dir: @base_dir)
47
+ @loader = scenario_loader || Molecules::ScenarioLoader.new
41
48
  @executor = executor || Molecules::TestExecutor.new(provider: @provider, timeout: @timeout, config: config)
42
- @report_writer = Molecules::ReportWriter.new
43
- @suite_report_writer = Molecules::SuiteReportWriter.new(config: config)
49
+ @report_writer = report_writer || Molecules::ReportWriter.new
50
+ @suite_report_writer = suite_report_writer || Molecules::SuiteReportWriter.new(config: config)
51
+ @setup_executor_factory = setup_executor_factory || ->(sandbox_backend: nil) { Molecules::SetupExecutor.new(sandbox_backend: sandbox_backend) }
52
+ @runtime_builder = runtime_builder || Molecules::SandboxRuntimeBuilder.new(
53
+ source_root: @base_dir,
54
+ ruby_version: config.dig("sandbox", "ruby_version") || Molecules::ConfigLoader.default_sandbox_ruby_version
55
+ )
44
56
  end
45
57
 
46
58
  # Run E2E tests for a package, optionally filtering by test ID
@@ -54,6 +66,11 @@ module Ace
54
66
  # @return [Array<Models::TestResult>] List of test results
55
67
  def run(package:, test_id: nil, test_cases: nil, verify: false, tags: nil,
56
68
  cli_args: nil, run_id: nil, report_dir: nil, output: $stdout)
69
+ integration_files = @discoverer.find_integration_tests(
70
+ package: package,
71
+ base_dir: @base_dir
72
+ )
73
+
57
74
  # Discover tests
58
75
  files = @discoverer.find_tests(
59
76
  package: package,
@@ -62,7 +79,7 @@ module Ace
62
79
  base_dir: @base_dir
63
80
  )
64
81
 
65
- if files.empty?
82
+ if files.empty? && integration_files.empty?
66
83
  output.puts "No E2E tests found in #{package}" +
67
84
  (test_id ? " matching #{test_id}" : "")
68
85
  return []
@@ -71,7 +88,7 @@ module Ace
71
88
  # Generate timestamp for this run (use external run_id when provided)
72
89
  timestamp = run_id || generate_timestamp
73
90
 
74
- if files.size == 1
91
+ if files.size == 1 && integration_files.empty?
75
92
  run_single_test(
76
93
  files.first,
77
94
  timestamp,
@@ -82,7 +99,16 @@ module Ace
82
99
  report_dir: report_dir
83
100
  )
84
101
  else
85
- run_package_tests(files, package, timestamp, cli_args, output, test_cases: test_cases, verify: verify)
102
+ run_package_tests(
103
+ files,
104
+ package,
105
+ timestamp,
106
+ cli_args,
107
+ output,
108
+ test_cases: test_cases,
109
+ verify: verify,
110
+ integration_files: integration_files
111
+ )
86
112
  end
87
113
  end
88
114
 
@@ -107,13 +133,50 @@ module Ace
107
133
  return [nil, nil, nil] unless cli_provider? && scenario.setup_steps.any?
108
134
 
109
135
  sandbox_dir = File.join(@base_dir, ".ace-local", "test-e2e", scenario.dir_name(timestamp))
110
- setup_executor = Molecules::SetupExecutor.new
136
+ setup_steps = effective_setup_steps_for(scenario)
137
+ package_copy = Ace::TestSupport::SandboxPackageCopy.new(source_root: @base_dir)
138
+ package_source = File.join(@base_dir, scenario.package.to_s)
139
+ package_copy_result = if File.directory?(package_source)
140
+ package_copy.prepare(
141
+ package_name: scenario.package,
142
+ sandbox_root: sandbox_dir
143
+ )
144
+ else
145
+ {
146
+ env: {
147
+ "PROJECT_ROOT_PATH" => File.expand_path(sandbox_dir),
148
+ "ACE_E2E_SOURCE_ROOT" => File.expand_path(@base_dir)
149
+ }
150
+ }
151
+ end
152
+ sandbox_builder = Molecules::PipelineSandboxBuilder.new(config_root: @base_dir)
153
+ protocol_packages = sandbox_builder.sync_protocol_sources_into(sandbox_dir)
154
+ runtime_result = @runtime_builder.prepare(
155
+ sandbox_root: sandbox_dir,
156
+ env: package_copy_result[:env],
157
+ tool_names: scenario.requires.fetch("tools", [])
158
+ )
159
+ sandbox_backend = Molecules::BwrapSandboxBackend.new(
160
+ sandbox_root: sandbox_dir,
161
+ source_root: runtime_result.dig(:env, "ACE_E2E_SOURCE_ROOT")
162
+ )
163
+ setup_executor = if @setup_executor_factory.arity.zero?
164
+ @setup_executor_factory.call
165
+ else
166
+ @setup_executor_factory.call(sandbox_backend: sandbox_backend)
167
+ end
111
168
  result = setup_executor.execute(
112
- setup_steps: scenario.setup_steps,
169
+ setup_steps: setup_steps,
113
170
  sandbox_dir: sandbox_dir,
114
171
  fixture_source: scenario.fixture_path,
115
172
  scenario_name: scenario.test_id,
116
- run_id: timestamp
173
+ run_id: timestamp,
174
+ initial_env: runtime_result[:env],
175
+ git_excludes: sandbox_support_git_excludes(
176
+ scenario,
177
+ protocol_packages,
178
+ setup_steps: setup_steps
179
+ )
117
180
  )
118
181
 
119
182
  unless result[:success]
@@ -130,6 +193,61 @@ module Ace
130
193
  [File.expand_path(sandbox_dir), env, setup_executor]
131
194
  end
132
195
 
196
+ def effective_setup_steps_for(scenario)
197
+ steps = Array(scenario.setup_steps)
198
+ return steps unless scenario.sandbox_profile == "ace-default"
199
+
200
+ has_config_sync = setup_contains_command?(steps, "ace-config sync")
201
+ has_handbook_sync = setup_contains_command?(steps, "ace-handbook sync")
202
+ bootstrap = []
203
+ bootstrap << {"run" => "ace-config sync ace-llm-providers-cli"} unless has_config_sync
204
+ bootstrap << {"run" => "ace-handbook sync"} unless has_handbook_sync
205
+ return steps if bootstrap.empty?
206
+
207
+ insert_after = steps.index("git-init")
208
+ return bootstrap + steps unless insert_after
209
+
210
+ steps.dup.insert(insert_after + 1, *bootstrap)
211
+ end
212
+
213
+ def setup_contains_command?(steps, fragment)
214
+ steps.any? do |step|
215
+ step.is_a?(Hash) && step["run"].to_s.include?(fragment)
216
+ end
217
+ end
218
+
219
+ def sandbox_support_git_excludes(scenario, protocol_packages, setup_steps:)
220
+ package_paths = ([scenario.package] + Array(protocol_packages))
221
+ .map(&:to_s)
222
+ .map(&:strip)
223
+ .reject(&:empty?)
224
+ .uniq
225
+ .sort
226
+ .map { |name| "#{name}/" }
227
+
228
+ # Most scenarios need the copied package tree to stay visible to tools
229
+ # like rg/fd. Only fixture-commit setup flows should hide the copied
230
+ # support trees from git staging.
231
+ return [] unless setup_contains_initial_commit?(setup_steps)
232
+
233
+ support_paths = [".ace-handbook/", ".ace/git/", ".ace/llm/providers/"]
234
+ (package_paths + support_paths).uniq
235
+ end
236
+
237
+ def setup_contains_initial_commit?(steps)
238
+ staged_changes = false
239
+
240
+ Array(steps).any? do |step|
241
+ next false unless step.is_a?(Hash)
242
+
243
+ command = step["run"].to_s
244
+ staged_changes ||= command.match?(/\bgit\s+add\b/m)
245
+
246
+ command.match?(/\bgit\s+commit\b/m) &&
247
+ (staged_changes || command.match?(/\bgit\s+commit\b(?=.*(?:\s--all\b|\s-[A-Za-z]*a[A-Za-z]*\b))/m))
248
+ end
249
+ end
250
+
133
251
  # Run a single test
134
252
  # @param test_cases [Array<String>, nil] Optional test case IDs to filter
135
253
  # @param report_dir [String, nil] Explicit report directory path (overrides computed path)
@@ -192,7 +310,23 @@ module Ace
192
310
  # Run all tests in a package
193
311
  # @param test_cases [Array<String>, nil] Optional test case IDs to filter
194
312
  # @return [Array<Models::TestResult>] Results for all tests
195
- def run_package_tests(files, package, timestamp, cli_args, output, test_cases: nil, verify: false)
313
+ def run_package_tests(files, package, timestamp, cli_args, output, test_cases: nil, verify: false,
314
+ integration_files: [])
315
+ integration_result = @integration_runner.run(
316
+ package: package,
317
+ files: integration_files,
318
+ timestamp: timestamp,
319
+ output: output
320
+ )
321
+ if integration_result && %w[fail error].include?(integration_result.status)
322
+ output.puts integration_result.summary
323
+ return [integration_result]
324
+ end
325
+
326
+ if files.empty?
327
+ return integration_result ? [integration_result] : []
328
+ end
329
+
196
330
  # Load scenarios upfront for titles and report generation
197
331
  scenarios = files.map { |f| @loader.load(File.dirname(f)) }
198
332
 
@@ -291,15 +425,17 @@ module Ace
291
425
  done = true
292
426
  refresh_thread&.join
293
427
 
428
+ combined_results = integration_result ? [integration_result] + results : results
429
+
294
430
  # Write suite report
295
431
  report_path = @suite_report_writer.write(
296
- results, scenarios,
297
- package: package, timestamp: timestamp, base_dir: @base_dir
432
+ combined_results, scenarios,
433
+ package: package, timestamp: timestamp, base_dir: @base_dir, report_kind: :package
298
434
  )
299
435
 
300
- display.show_summary(results, report_path)
436
+ display.show_summary(combined_results, report_path)
301
437
 
302
- results
438
+ combined_results
303
439
  end
304
440
 
305
441
  # Build the appropriate display manager for this run
@@ -332,12 +468,16 @@ module Ace
332
468
  # Uses Ace::B36ts library to encode unique IDs with 50ms precision,
333
469
  # ensuring distinct timestamps for parallel test runs.
334
470
  #
471
+ # Offset uses 0.1 (100ms) instead of 0.05 to avoid collisions with
472
+ # the 50ms encoder's approximate bucket size.
473
+ #
335
474
  # @param count [Integer] Number of unique timestamps needed
336
475
  # @return [Array<String>] Array of unique timestamp strings
337
476
  def generate_timestamps(count)
477
+ base_time = Time.now.utc
478
+
338
479
  count.times.map do |i|
339
- time = Time.now.utc + (i * 0.05) # 50ms offset per ID
340
- Ace::B36ts.encode(time, format: :"50ms")
480
+ Ace::B36ts.encode(base_time + (i * 0.1), format: :"50ms")
341
481
  end
342
482
  end
343
483
 
@@ -3,7 +3,7 @@
3
3
  module Ace
4
4
  module Test
5
5
  module EndToEndRunner
6
- VERSION = '0.29.8'
6
+ VERSION = '0.40.1'
7
7
  end
8
8
  end
9
9
  end
@@ -13,6 +13,7 @@ require_relative "end_to_end_runner/atoms/result_parser"
13
13
  require_relative "end_to_end_runner/atoms/skill_prompt_builder"
14
14
  require_relative "end_to_end_runner/atoms/skill_result_parser"
15
15
  require_relative "end_to_end_runner/atoms/suite_report_prompt_builder"
16
+ require_relative "end_to_end_runner/atoms/artifact_contract_validator"
16
17
  require_relative "end_to_end_runner/atoms/test_case_parser"
17
18
  require_relative "end_to_end_runner/atoms/tc_fidelity_validator"
18
19
  require_relative "end_to_end_runner/atoms/display_helpers"
@@ -20,6 +21,8 @@ require_relative "end_to_end_runner/atoms/display_helpers"
20
21
  # Molecules
21
22
  require_relative "end_to_end_runner/molecules/fixture_copier"
22
23
  require_relative "end_to_end_runner/molecules/scenario_loader"
24
+ require_relative "end_to_end_runner/molecules/bwrap_sandbox_backend"
25
+ require_relative "end_to_end_runner/molecules/sandbox_runtime_builder"
23
26
  require_relative "end_to_end_runner/molecules/setup_executor"
24
27
  require_relative "end_to_end_runner/molecules/config_loader"
25
28
  require_relative "end_to_end_runner/molecules/test_discoverer"
@@ -30,6 +33,7 @@ require_relative "end_to_end_runner/molecules/pipeline_report_generator"
30
33
  require_relative "end_to_end_runner/molecules/pipeline_executor"
31
34
  require_relative "end_to_end_runner/molecules/report_writer"
32
35
  require_relative "end_to_end_runner/molecules/suite_report_writer"
36
+ require_relative "end_to_end_runner/molecules/artifact_pruner"
33
37
  require_relative "end_to_end_runner/molecules/simple_display_manager"
34
38
  require_relative "end_to_end_runner/molecules/progress_display_manager"
35
39
  require_relative "end_to_end_runner/molecules/suite_simple_display_manager"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ace-test-runner-e2e
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.29.8
4
+ version: 0.40.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michal Czyz
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2026-04-05 00:00:00.000000000 Z
10
+ date: 2026-04-27 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: ace-support-cli
@@ -51,6 +51,20 @@ dependencies:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
53
  version: '0.9'
54
+ - !ruby/object:Gem::Dependency
55
+ name: ace-support-test-helpers
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '0.14'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '0.14'
54
68
  - !ruby/object:Gem::Dependency
55
69
  name: ace-llm
56
70
  requirement: !ruby/object:Gem::Requirement
@@ -158,6 +172,7 @@ files:
158
172
  - handbook/workflow-instructions/e2e/run.wf.md
159
173
  - handbook/workflow-instructions/e2e/setup-sandbox.wf.md
160
174
  - lib/ace/test/end_to_end_runner.rb
175
+ - lib/ace/test/end_to_end_runner/atoms/artifact_contract_validator.rb
161
176
  - lib/ace/test/end_to_end_runner/atoms/display_helpers.rb
162
177
  - lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb
163
178
  - lib/ace/test/end_to_end_runner/atoms/result_parser.rb
@@ -172,15 +187,19 @@ files:
172
187
  - lib/ace/test/end_to_end_runner/models/test_result.rb
173
188
  - lib/ace/test/end_to_end_runner/models/test_scenario.rb
174
189
  - lib/ace/test/end_to_end_runner/molecules/affected_detector.rb
190
+ - lib/ace/test/end_to_end_runner/molecules/artifact_pruner.rb
191
+ - lib/ace/test/end_to_end_runner/molecules/bwrap_sandbox_backend.rb
175
192
  - lib/ace/test/end_to_end_runner/molecules/config_loader.rb
176
193
  - lib/ace/test/end_to_end_runner/molecules/failure_finder.rb
177
194
  - lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb
195
+ - lib/ace/test/end_to_end_runner/molecules/integration_runner.rb
178
196
  - lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb
179
197
  - lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb
180
198
  - lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb
181
199
  - lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb
182
200
  - lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb
183
201
  - lib/ace/test/end_to_end_runner/molecules/report_writer.rb
202
+ - lib/ace/test/end_to_end_runner/molecules/sandbox_runtime_builder.rb
184
203
  - lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb
185
204
  - lib/ace/test/end_to_end_runner/molecules/setup_executor.rb
186
205
  - lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb