ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.ace-defaults/e2e-runner/config.yml +70 -0
  3. data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
  4. data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
  5. data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
  6. data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
  7. data/CHANGELOG.md +1166 -0
  8. data/LICENSE +21 -0
  9. data/README.md +42 -0
  10. data/Rakefile +15 -0
  11. data/exe/ace-test-e2e +15 -0
  12. data/exe/ace-test-e2e-sh +67 -0
  13. data/exe/ace-test-e2e-suite +13 -0
  14. data/handbook/guides/e2e-testing.g.md +124 -0
  15. data/handbook/guides/scenario-yml-reference.g.md +182 -0
  16. data/handbook/guides/tc-authoring.g.md +131 -0
  17. data/handbook/skills/as-e2e-create/SKILL.md +30 -0
  18. data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
  19. data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
  20. data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
  21. data/handbook/skills/as-e2e-review/SKILL.md +35 -0
  22. data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
  23. data/handbook/skills/as-e2e-run/SKILL.md +48 -0
  24. data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
  25. data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
  26. data/handbook/templates/agent-experience-report.template.md +89 -0
  27. data/handbook/templates/metadata.template.yml +49 -0
  28. data/handbook/templates/scenario.yml.template.yml +60 -0
  29. data/handbook/templates/tc-file.template.md +45 -0
  30. data/handbook/templates/test-report.template.md +94 -0
  31. data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
  32. data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
  33. data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
  34. data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
  35. data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
  36. data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
  37. data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
  38. data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
  39. data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
  40. data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
  41. data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
  42. data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
  43. data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
  44. data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
  45. data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
  46. data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
  47. data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
  48. data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
  49. data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
  50. data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
  51. data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
  52. data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
  53. data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
  54. data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
  55. data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
  56. data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
  57. data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
  58. data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
  59. data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
  60. data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
  61. data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
  62. data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
  63. data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
  64. data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
  65. data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
  66. data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
  67. data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
  68. data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
  69. data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
  70. data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
  71. data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
  72. data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
  73. data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
  74. data/lib/ace/test/end_to_end_runner/version.rb +9 -0
  75. data/lib/ace/test/end_to_end_runner.rb +71 -0
  76. metadata +220 -0
@@ -0,0 +1,60 @@
1
+ # E2E Test Scenario Configuration
2
+ # Standalone runner/verifier pair format
3
+ # See: handbook/guides/scenario-yml-reference.g.md
4
+
5
+ # Required: Unique test identifier
6
+ test-id: TS-{AREA}-{NNN}
7
+ title: {Descriptive Title}
8
+ area: {area-name}
9
+ package: {package-name}
10
+
11
+ # Optional: Priority level (high|medium|low)
12
+ priority: high
13
+
14
+ # Optional: Estimated duration
15
+ duration: ~{X}min
16
+
17
+ # Optional: Cost profile (smoke|happy-path|deep)
18
+ cost-tier: smoke
19
+
20
+ # Optional: Tags for discovery-time filtering (--tags/--exclude-tags, OR semantics)
21
+ tags: [{cost-tier}, "use-case:{area}"]
22
+
23
+ # Optional: Why this scenario must be E2E (not unit-only)
24
+ e2e-justification: "{Requires real CLI/tools/filesystem behavior}"
25
+
26
+ # Optional: Unit test files reviewed during Value Gate analysis
27
+ unit-coverage-reviewed:
28
+ - test/{layer}/{file}_test.rb
29
+
30
+ # Optional: Primary command under test
31
+ tool-under-test: {ace-tool}
32
+
33
+ # Optional: Declared sandbox artifact layout
34
+ sandbox-layout:
35
+ results/tc/01/: "Goal 1 artifacts"
36
+
37
+ # Optional: Prerequisites
38
+ requires:
39
+ tools: [{tool1}, {tool2}]
40
+ ruby: ">= 3.0"
41
+
42
+ # Setup directives (executed before test cases)
43
+ # Available: git-init, run:, copy-fixtures, write-file:, agent-env:, tmux-session
44
+ # Rules:
45
+ # - setup is fail-fast (do not use `|| true`)
46
+ # - fixtures/setup own sandbox preparation; runner TCs do not re-implement setup
47
+ setup:
48
+ - git-init
49
+ # Optional detached tmux session for test isolation (uses unique run ID)
50
+ # - tmux-session:
51
+ # name-source: run-id
52
+ - run: "cp $PROJECT_ROOT_PATH/mise.toml mise.toml && mise trust mise.toml"
53
+ # Uncomment as needed:
54
+ # - copy-fixtures # Copy fixtures/ directory to sandbox
55
+ # - agent-env: # Environment variables passed to runner/verifier agent subprocess
56
+ # PROJECT_ROOT_PATH: "."
57
+
58
+ # Optional: Verification tracking
59
+ # last-verified: YYYY-MM-DD
60
+ # verified-by: {agent-name}
@@ -0,0 +1,45 @@
1
+ ---
2
+ doc-type: template
3
+ title: Goal {N} - {Goal Title}
4
+ purpose: Documentation for ace-test-runner-e2e/handbook/templates/tc-file.template.md
5
+ ace-docs:
6
+ last-updated: 2026-02-25
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # Goal {N} - {Goal Title}
11
+
12
+ ## Goal
13
+
14
+ {Outcome to achieve}
15
+
16
+ ## Workspace
17
+
18
+ - Working directory: {sandbox-root}
19
+ - Output directory: `results/tc/{NN}/`
20
+
21
+ ## Constraints
22
+
23
+ - Use only declared scenario tools (`ace-*` and explicit exceptions)
24
+ - Keep artifacts under `results/tc/{NN}/`
25
+ - Do not write outside sandbox
26
+ - Execute actions only; do not assign PASS/FAIL in runner file
27
+
28
+ <!--
29
+ Companion verifier file (`TC-{NNN}-{slug}.verify.md`) example:
30
+
31
+ # Goal {N} - {Goal Title}
32
+
33
+ ## Expectations
34
+
35
+ - Impact Checks:
36
+ - {Sandbox/project impact expectation}
37
+ - Artifact Checks:
38
+ - {Artifact expectation}
39
+ - Debug Fallback:
40
+ - {Optional stdout/stderr/exit evidence when needed}
41
+
42
+ ## Verdict
43
+
44
+ - Pass when impact and artifact checks are satisfied from sandbox evidence.
45
+ -->
@@ -0,0 +1,94 @@
1
+ ---
2
+ doc-type: template
3
+ title: "E2E Test Report: {test-id}"
4
+ purpose: Documentation for ace-test-runner-e2e/handbook/templates/test-report.template.md
5
+ ace-docs:
6
+ last-updated: 2026-02-24
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # E2E Test Report: {test-id}
11
+
12
+ ## Test Information
13
+
14
+ | Field | Value |
15
+ |-------|-------|
16
+ | Test ID | {test-id} |
17
+ | Title | {test-title} |
18
+ | Package | {package} |
19
+ | Agent | {agent-name} |
20
+ | Executed | {timestamp} |
21
+ | Duration | {duration} | <!-- Format: "1m 23s" or "45s" -->
22
+
23
+ ## Results Summary
24
+
25
+ | Test Case | Description | Status |
26
+ |-----------|-------------|--------|
27
+ | TC-001 | {description} | Pass/Fail |
28
+ | TC-002 | {description} | Pass/Fail |
29
+ | TC-003 | {description} | Pass/Fail |
30
+
31
+ ## Overall Status: {PASS/FAIL/PARTIAL}
32
+
33
+ **Passed:** {count} | **Failed:** {count} | **Total:** {count}
34
+
35
+ ## Failed Test Details
36
+
37
+ ```yaml
38
+ failed:
39
+ - tc: TC-{NNN}
40
+ category: tool-bug|runner-error|test-spec-error|infrastructure-error
41
+ evidence: "{Brief description of failure and supporting artifacts}"
42
+ ```
43
+
44
+ ### TC-{NNN}: {Test Case Name}
45
+
46
+ **Objective:** {What this test case was verifying}
47
+
48
+ **Expected:**
49
+ - {Expected result 1}
50
+ - {Expected result 2}
51
+
52
+ **Actual:**
53
+ - {Actual result 1}
54
+ - {Actual result 2}
55
+
56
+ **Error Output:**
57
+ ```
58
+ {Captured error output if any}
59
+ ```
60
+
61
+ **Analysis:** {Brief analysis of why the test failed}
62
+
63
+ ## Test Environment
64
+
65
+ {Record environment details relevant to test execution.}
66
+
67
+ | Component | Version/Value |
68
+ |-----------|---------------|
69
+ | Ruby | {version} |
70
+ | Tool 1 | {version} |
71
+ | Tool 2 | {version} |
72
+
73
+ ## Observations
74
+
75
+ {Any observations, edge cases, or issues discovered during test execution that aren't failures but worth noting.}
76
+
77
+ - {Observation 1}
78
+ - {Observation 2}
79
+
80
+ ## Artifacts
81
+
82
+ {List any artifacts created during test execution.}
83
+
84
+ | Artifact | Path | Description |
85
+ |----------|------|-------------|
86
+ | Test data | `artifacts/` | Test input files |
87
+ | Logs | `artifacts/output.log` | Command output logs |
88
+
89
+ ## Next Steps
90
+
91
+ {Recommendations based on test results.}
92
+
93
+ - [ ] {Action item if tests failed}
94
+ - [ ] {Follow-up investigation if needed}
@@ -0,0 +1,126 @@
1
+ ---
2
+ doc-type: workflow
3
+ title: Analyze E2E Failures Workflow
4
+ purpose: analyze-e2e-failures workflow instruction
5
+ ace-docs:
6
+ last-updated: 2026-03-04
7
+ last-checked: 2026-03-21
8
+ ---
9
+
10
+ # Analyze E2E Failures Workflow
11
+
12
+ ## Goal
13
+
14
+ Analyze failing E2E scenarios and classify each failed test case before any fix is applied.
15
+
16
+ This workflow determines whether each failure is caused by:
17
+ - application/tool code
18
+ - E2E test definition/spec
19
+ - E2E runner/infrastructure
20
+
21
+ ## Hard Rule
22
+
23
+ - Do not edit package code, scenario files, or runner code in this workflow.
24
+ - Do not run rewrite/fix actions here.
25
+ - This workflow ends with an analysis report only.
26
+ - Do not ask the user where/how to fix during this workflow; decide from evidence.
27
+
28
+ ## Prerequisites
29
+
30
+ - E2E tests have already run and produced cache artifacts
31
+ - Reports are available under `.ace-local/test-e2e/*-reports/`
32
+
33
+ ## Project Context Loading
34
+
35
+ - Read and follow: `ace-bundle wfi://bundle`
36
+ - Read E2E guide: `ace-bundle guide://e2e-testing`
37
+ - Check recent changes: `git log --oneline -10`
38
+
39
+ ## Classification Categories
40
+
41
+ Use exactly one category per failed TC:
42
+
43
+ 1. `code-issue`
44
+ - Tool behavior is incorrect relative to expected product behavior
45
+
46
+ 2. `test-issue`
47
+ - Scenario/TC expectation, fixture, or steps are stale/incorrect
48
+
49
+ 3. `runner-infrastructure-issue`
50
+ - Sandbox/setup/provider/parsing/orchestration issue
51
+
52
+ ## Required Evidence Sources
53
+
54
+ Use these files as primary evidence:
55
+ - `summary.r.md`
56
+ - `experience.r.md`
57
+ - `metadata.yml`
58
+ - Relevant artifacts in `results/tc/{NN}/`
59
+
60
+ ## Analysis Procedure
61
+
62
+ 1. Locate latest failing report directories
63
+ ```bash
64
+ ls -lt .ace-local/test-e2e/*-reports/ 2>/dev/null | head -20
65
+ ```
66
+
67
+ 2. For each failing scenario, extract:
68
+ - failed TC IDs
69
+ - reported category/evidence from metadata
70
+ - corroborating artifact evidence
71
+
72
+ 3. Reclassify each failed TC if needed
73
+ - Use `code-issue`, `test-issue`, or `runner-infrastructure-issue`
74
+ - Add confidence: `high|medium|low`
75
+ - Add one disconfirming check per TC
76
+ - If confidence is `medium` or `low`, run at least one additional diagnostic read/search before final decision
77
+
78
+ 4. Recommend rerun scope (cost-aware)
79
+ - `scenario` (default)
80
+ - `package`
81
+ - `suite`
82
+ with explicit rationale
83
+
84
+ 5. Choose autonomous fix decision per failed TC
85
+ - Select a single primary fix action
86
+ - Provide concrete file targets in priority order
87
+ - Define explicit no-touch boundaries
88
+ - Do not emit option lists that require user selection
89
+
90
+ ## Required Output Contract
91
+
92
+ Produce this section before exiting:
93
+
94
+ ```markdown
95
+ ## E2E Failure Analysis Report
96
+
97
+ | Scenario / TC | Category | Evidence | Fix Target | Fix Target Layer | Primary Candidate Files | Fallback Candidate Files | Do-Not-Touch Boundaries | Confidence | Disconfirming Check | Rerun Scope |
98
+ |---|---|---|---|---|---|---|---|---|---|---|
99
+ | TS-FOO-001 / TC-003 | test-issue | summary + artifact mismatch details | scenario files | test-scenario-runner | TC-003-foo.runner.md | TC-003-foo.verify.md | lib/** | high | re-run scenario after spec adjustment | scenario |
100
+ ```
101
+
102
+ Then include:
103
+
104
+ ```markdown
105
+ ## Fix Decisions
106
+ - First item to fix: ...
107
+ - Chosen fix decision: ...
108
+ - Why this target first (unblocks most): ...
109
+
110
+ ### Execution Plan Input
111
+ - First item to fix: ...
112
+ - Why first (unblocks most): ...
113
+ - Required verification commands: ...
114
+ - Expected pass criteria per command: ...
115
+ ```
116
+
117
+ ## Success Criteria
118
+
119
+ - Every failed TC has a category and evidence
120
+ - Category is traceable to report/artifact facts
121
+ - Fix target is explicit per failed TC
122
+ - Fix target files are explicit per failed TC (primary + fallback)
123
+ - No-touch boundaries are explicit per failed TC
124
+ - A single autonomous chosen fix decision is present per failed TC
125
+ - Rerun scope recommendation is cost-aware
126
+ - No code/scenario/runner edits were made in this workflow