RubyGems - ace-test-runner-e2e - Versions diffs - 0.29.0 - Mend

ace-test-runner-e2e 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

checksums.yaml +7 -0
data/.ace-defaults/e2e-runner/config.yml +70 -0
data/.ace-defaults/nav/protocols/guide-sources/ace-test-runner-e2e.yml +11 -0
data/.ace-defaults/nav/protocols/skill-sources/ace-test-runner-e2e.yml +19 -0
data/.ace-defaults/nav/protocols/tmpl-sources/ace-test-runner-e2e.yml +12 -0
data/.ace-defaults/nav/protocols/wfi-sources/ace-test-runner-e2e.yml +11 -0
data/CHANGELOG.md +1166 -0
data/LICENSE +21 -0
data/README.md +42 -0
data/Rakefile +15 -0
data/exe/ace-test-e2e +15 -0
data/exe/ace-test-e2e-sh +67 -0
data/exe/ace-test-e2e-suite +13 -0
data/handbook/guides/e2e-testing.g.md +124 -0
data/handbook/guides/scenario-yml-reference.g.md +182 -0
data/handbook/guides/tc-authoring.g.md +131 -0
data/handbook/skills/as-e2e-create/SKILL.md +30 -0
data/handbook/skills/as-e2e-fix/SKILL.md +35 -0
data/handbook/skills/as-e2e-manage/SKILL.md +31 -0
data/handbook/skills/as-e2e-plan-changes/SKILL.md +30 -0
data/handbook/skills/as-e2e-review/SKILL.md +35 -0
data/handbook/skills/as-e2e-rewrite/SKILL.md +31 -0
data/handbook/skills/as-e2e-run/SKILL.md +48 -0
data/handbook/skills/as-e2e-setup-sandbox/SKILL.md +34 -0
data/handbook/templates/ace-taskflow-fixture.template.md +322 -0
data/handbook/templates/agent-experience-report.template.md +89 -0
data/handbook/templates/metadata.template.yml +49 -0
data/handbook/templates/scenario.yml.template.yml +60 -0
data/handbook/templates/tc-file.template.md +45 -0
data/handbook/templates/test-report.template.md +94 -0
data/handbook/workflow-instructions/e2e/analyze-failures.wf.md +126 -0
data/handbook/workflow-instructions/e2e/create.wf.md +395 -0
data/handbook/workflow-instructions/e2e/execute.wf.md +253 -0
data/handbook/workflow-instructions/e2e/fix.wf.md +166 -0
data/handbook/workflow-instructions/e2e/manage.wf.md +179 -0
data/handbook/workflow-instructions/e2e/plan-changes.wf.md +255 -0
data/handbook/workflow-instructions/e2e/review.wf.md +286 -0
data/handbook/workflow-instructions/e2e/rewrite.wf.md +281 -0
data/handbook/workflow-instructions/e2e/run.wf.md +355 -0
data/handbook/workflow-instructions/e2e/setup-sandbox.wf.md +461 -0
data/lib/ace/test/end_to_end_runner/atoms/display_helpers.rb +234 -0
data/lib/ace/test/end_to_end_runner/atoms/prompt_builder.rb +199 -0
data/lib/ace/test/end_to_end_runner/atoms/result_parser.rb +166 -0
data/lib/ace/test/end_to_end_runner/atoms/skill_prompt_builder.rb +166 -0
data/lib/ace/test/end_to_end_runner/atoms/skill_result_parser.rb +244 -0
data/lib/ace/test/end_to_end_runner/atoms/suite_report_prompt_builder.rb +103 -0
data/lib/ace/test/end_to_end_runner/atoms/tc_fidelity_validator.rb +39 -0
data/lib/ace/test/end_to_end_runner/atoms/test_case_parser.rb +108 -0
data/lib/ace/test/end_to_end_runner/cli/commands/run_suite.rb +130 -0
data/lib/ace/test/end_to_end_runner/cli/commands/run_test.rb +156 -0
data/lib/ace/test/end_to_end_runner/models/test_case.rb +47 -0
data/lib/ace/test/end_to_end_runner/models/test_result.rb +115 -0
data/lib/ace/test/end_to_end_runner/models/test_scenario.rb +90 -0
data/lib/ace/test/end_to_end_runner/molecules/affected_detector.rb +92 -0
data/lib/ace/test/end_to_end_runner/molecules/config_loader.rb +75 -0
data/lib/ace/test/end_to_end_runner/molecules/failure_finder.rb +203 -0
data/lib/ace/test/end_to_end_runner/molecules/fixture_copier.rb +35 -0
data/lib/ace/test/end_to_end_runner/molecules/pipeline_executor.rb +121 -0
data/lib/ace/test/end_to_end_runner/molecules/pipeline_prompt_bundler.rb +182 -0
data/lib/ace/test/end_to_end_runner/molecules/pipeline_report_generator.rb +321 -0
data/lib/ace/test/end_to_end_runner/molecules/pipeline_sandbox_builder.rb +131 -0
data/lib/ace/test/end_to_end_runner/molecules/progress_display_manager.rb +172 -0
data/lib/ace/test/end_to_end_runner/molecules/report_writer.rb +259 -0
data/lib/ace/test/end_to_end_runner/molecules/scenario_loader.rb +254 -0
data/lib/ace/test/end_to_end_runner/molecules/setup_executor.rb +181 -0
data/lib/ace/test/end_to_end_runner/molecules/simple_display_manager.rb +72 -0
data/lib/ace/test/end_to_end_runner/molecules/suite_progress_display_manager.rb +223 -0
data/lib/ace/test/end_to_end_runner/molecules/suite_report_writer.rb +277 -0
data/lib/ace/test/end_to_end_runner/molecules/suite_simple_display_manager.rb +116 -0
data/lib/ace/test/end_to_end_runner/molecules/test_discoverer.rb +136 -0
data/lib/ace/test/end_to_end_runner/molecules/test_executor.rb +332 -0
data/lib/ace/test/end_to_end_runner/organisms/suite_orchestrator.rb +830 -0
data/lib/ace/test/end_to_end_runner/organisms/test_orchestrator.rb +442 -0
data/lib/ace/test/end_to_end_runner/version.rb +9 -0
data/lib/ace/test/end_to_end_runner.rb +71 -0
metadata +220 -0

data/handbook/skills/as-e2e-manage/SKILL.md ADDED Viewed

@@ -0,0 +1,31 @@
+---
+name: as-e2e-manage
+description: Orchestrate the 3-stage E2E test lifecycle pipeline (review → plan → rewrite)
+# bundle: wfi://e2e/manage
+# agent: general-purpose
+user-invocable: true
+allowed-tools:
+  - Bash(ace-bundle:*)
+  - Read
+  - Write
+  - Glob
+  - Grep
+  - Skill
+argument-hint: "<package> [--dry-run] [--run-tests]"
+last_modified: 2026-02-11
+source: ace-test-runner-e2e
+integration:
+  targets:
+    - claude
+    - codex
+    - gemini
+    - opencode
+    - pi
+  providers: {}
+skill:
+  kind: workflow
+  execution:
+    workflow: wfi://e2e/manage
+---
+Load and run `ace-bundle wfi://e2e/manage` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.

data/handbook/skills/as-e2e-plan-changes/SKILL.md ADDED Viewed

@@ -0,0 +1,30 @@
+---
+name: as-e2e-plan-changes
+description: Analyze coverage matrix and produce a concrete E2E test change plan
+# bundle: wfi://e2e/plan-changes
+# agent: general-purpose
+user-invocable: true
+allowed-tools:
+  - Bash(ace-bundle:*)
+  - Read
+  - Glob
+  - Grep
+  - Skill
+argument-hint: "<package> [--review-report <path>] [--scope <scenario-id>]"
+last_modified: 2026-02-11
+source: ace-test-runner-e2e
+integration:
+  targets:
+    - claude
+    - codex
+    - gemini
+    - opencode
+    - pi
+  providers: {}
+skill:
+  kind: workflow
+  execution:
+    workflow: wfi://e2e/plan-changes
+---
+Load and run `ace-bundle wfi://e2e/plan-changes` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.

data/handbook/skills/as-e2e-review/SKILL.md ADDED Viewed

@@ -0,0 +1,35 @@
+---
+name: as-e2e-review
+description: Deep exploration producing a coverage matrix of functionality, unit tests, and E2E tests
+# bundle: wfi://e2e/review
+# agent: general-purpose
+user-invocable: true
+allowed-tools:
+  - Bash(ace-bundle:*)
+  - Read
+  - Glob
+  - Grep
+argument-hint: "<package> [--scope <scenario-id>]"
+last_modified: 2026-02-11
+source: ace-test-runner-e2e
+integration:
+  targets:
+    - claude
+    - codex
+    - gemini
+    - opencode
+    - pi
+  providers: {}
+assign:
+  source: wfi://e2e/review
+  steps:
+    - name: verify-e2e
+      description: Review E2E coverage for modified packages and run targeted scenarios
+      tags: [testing, e2e, verification]
+skill:
+  kind: workflow
+  execution:
+    workflow: wfi://e2e/review
+---
+Load and run `ace-bundle wfi://e2e/review` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.

data/handbook/skills/as-e2e-rewrite/SKILL.md ADDED Viewed

@@ -0,0 +1,31 @@
+---
+name: as-e2e-rewrite
+description: Execute a change plan — delete, create, modify, and consolidate E2E tests
+# bundle: wfi://e2e/rewrite
+# agent: general-purpose
+user-invocable: true
+allowed-tools:
+  - Bash(ace-bundle:*)
+  - Read
+  - Write
+  - Glob
+  - Grep
+  - Skill
+argument-hint: "<package> [--plan <path>] [--dry-run]"
+last_modified: 2026-02-11
+source: ace-test-runner-e2e
+integration:
+  targets:
+    - claude
+    - codex
+    - gemini
+    - opencode
+    - pi
+  providers: {}
+skill:
+  kind: workflow
+  execution:
+    workflow: wfi://e2e/rewrite
+---
+Load and run `ace-bundle wfi://e2e/rewrite` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.

data/handbook/skills/as-e2e-run/SKILL.md ADDED Viewed

@@ -0,0 +1,48 @@
+---
+name: as-e2e-run
+description: Execute an E2E test scenario
+# bundle: wfi://e2e/run
+# agent: general-purpose
+user-invocable: true
+allowed-tools:
+  - Bash(ace-bundle:*)
+  - Read
+  - Write
+  - Glob
+  - Grep
+argument-hint: "[package] [test-id] [--run-id ID] [--sandbox PATH] [--env K=V]"
+last_modified: 2026-02-11
+source: ace-test-runner-e2e
+skill:
+  kind: workflow
+  execution:
+    workflow: wfi://e2e/run
+---
+<!-- Route to the appropriate workflow based on arguments -->
+<!-- --sandbox present → focused execution workflow (pre-populated sandbox) -->
+<!-- --sandbox absent  → full workflow (locate, setup, execute) -->
+If `$ARGUMENTS` contains `--sandbox`:
+  read and run `ace-bundle wfi://e2e/execute`
+Otherwise:
+  read and run `ace-bundle wfi://e2e/run`
+ARGUMENTS: $ARGUMENTS
+## Execution Context
+- `/as-e2e-run ...` is a chat slash command, not a shell command.
+- Do **not** run `/ace-...` in bash (this causes `command not found` and no reports).
+- If slash commands are unavailable in the current environment, report that limitation explicitly in `Issues`.
+## Subagent Return Contract
+When invoked as a subagent (via a batch orchestrator such as `/as-assign-run-in-batches`), return a structured summary instead of verbose output:
+Return `Test ID`, `Status`, `Passed`, `Failed`, `Total`, `Report Paths`, and `Issues`.
+Do NOT include full report contents. Reports are written to disk; return only paths and summary counts for aggregation by the orchestrator.

data/handbook/skills/as-e2e-setup-sandbox/SKILL.md ADDED Viewed

@@ -0,0 +1,34 @@
+---
+name: as-e2e-setup-sandbox
+description: Standardized sandbox setup for safe E2E tests with external APIs
+# bundle: wfi://e2e/setup-sandbox
+# context: no-fork
+# agent: general-purpose
+user-invocable: true
+allowed-tools:
+  - Bash(ace-bundle:*)
+  - Bash(ace-test:*)
+  - Bash(ace-nav:*)
+  - Read
+  - Write
+  - Edit
+  - Grep
+  - Glob
+argument-hint: [test-id | scenario]
+last_modified: 2026-02-01
+source: ace-test-runner-e2e
+integration:
+  targets:
+    - claude
+    - codex
+    - gemini
+    - opencode
+    - pi
+  providers: {}
+skill:
+  kind: workflow
+  execution:
+    workflow: wfi://e2e/setup-sandbox
+---
+Load and run `ace-bundle wfi://e2e/setup-sandbox` in the current project, then follow the loaded workflow as the source of truth and execute it end-to-end instead of only summarizing it.

data/handbook/templates/ace-taskflow-fixture.template.md ADDED Viewed

@@ -0,0 +1,322 @@
+---
+doc-type: template
+title: ACE Taskflow Test Fixture Template
+purpose: Documentation for ace-test-runner-e2e/handbook/templates/ace-taskflow-fixture.template.md
+ace-docs:
+  last-updated: 2026-02-25
+  last-checked: 2026-03-21
+---
+# ACE Taskflow Test Fixture Template
+This template provides scaffolding for E2E tests that need valid ace-taskflow structures.
+## Basic Task Fixture
+Create a minimal valid taskflow structure:
+```bash
+# Create release directory structure
+mkdir -p "$REPO_DIR/.ace-taskflow/v.test/tasks/001-feature"
+# Create a valid task file
+cat > "$REPO_DIR/.ace-taskflow/v.test/tasks/001-feature/001-test-task.s.md" << 'EOF'
+---
+id: v.test+task.001
+status: pending
+priority: medium
+estimate: 1h
+dependencies: []
+---
+# Test Task Title
+## Objective
+This is a test task for E2E testing purposes.
+## Implementation Plan
+### Execution Steps
+- [ ] Step 1: First action
+- [ ] Step 2: Second action
+## Acceptance Criteria
+- [ ] Criterion 1
+- [ ] Criterion 2
+EOF
+```
+## Task with Worktree Metadata
+For tests involving ace-git-worktree:
+```bash
+cat > "$REPO_DIR/.ace-taskflow/v.test/tasks/001-feature/001-test-task.s.md" << 'EOF'
+---
+id: v.test+task.001
+status: in-progress
+priority: medium
+estimate: 1h
+dependencies: []
+worktree:
+  branch: 001-test-task
+  path: "../project-task.001"
+  created_at: '2026-01-15 10:00:00'
+  updated_at: '2026-01-15 10:00:00'
+  target_branch: main
+---
+# Test Task with Worktree
+## Objective
+Task with worktree metadata for worktree-related E2E tests.
+## Implementation Plan
+### Execution Steps
+- [ ] Step 1: Work in worktree
+## Acceptance Criteria
+- [ ] Worktree created
+EOF
+```
+## Parent Task with Subtasks (Orchestrator)
+For tests involving task hierarchies:
+```bash
+# Create parent task directory
+mkdir -p "$REPO_DIR/.ace-taskflow/v.test/tasks/100-parent-feature"
+# Create orchestrator task
+cat > "$REPO_DIR/.ace-taskflow/v.test/tasks/100-parent-feature/100-orchestrator.s.md" << 'EOF'
+---
+id: v.test+task.100
+status: pending
+priority: high
+estimate: 8h
+dependencies: []
+subtasks:
+  - 100.01
+  - 100.02
+---
+# Parent Feature Task
+## Objective
+Orchestrator task that coordinates subtasks.
+## Subtasks
+- 100.01: First subtask
+- 100.02: Second subtask
+## Acceptance Criteria
+- [ ] All subtasks completed
+EOF
+# Create first subtask
+cat > "$REPO_DIR/.ace-taskflow/v.test/tasks/100-parent-feature/100.01-first-subtask.s.md" << 'EOF'
+---
+id: v.test+task.100.01
+status: pending
+priority: medium
+estimate: 2h
+dependencies: []
+parent: 100
+---
+# First Subtask
+## Objective
+First part of the parent feature.
+## Implementation Plan
+- [ ] Implement first component
+## Acceptance Criteria
+- [ ] First component done
+EOF
+# Create second subtask
+cat > "$REPO_DIR/.ace-taskflow/v.test/tasks/100-parent-feature/100.02-second-subtask.s.md" << 'EOF'
+---
+id: v.test+task.100.02
+status: pending
+priority: medium
+estimate: 2h
+dependencies:
+  - 100.01
+parent: 100
+---
+# Second Subtask
+## Objective
+Second part of the parent feature, depends on first.
+## Implementation Plan
+- [ ] Implement second component
+## Acceptance Criteria
+- [ ] Second component done
+EOF
+```
+## Release Configuration
+For tests that need a complete release setup:
+```bash
+# Create release.yml
+cat > "$REPO_DIR/.ace-taskflow/v.test/release.yml" << 'EOF'
+id: v.test
+title: Test Release
+status: active
+started: 2026-01-01
+EOF
+```
+## Complete Test Setup Example
+Combining all patterns:
+```bash
+# Create isolated git repository
+REPO_DIR="$TEST_DIR/test-repo"
+mkdir -p "$REPO_DIR"
+cd "$REPO_DIR"
+git init --quiet .
+git config user.email "test@example.com"
+git config user.name "Test User"
+# Create taskflow structure
+mkdir -p .ace-taskflow/v.test/tasks/001-feature
+# Create release configuration
+cat > .ace-taskflow/v.test/release.yml << 'EOF'
+id: v.test
+title: Test Release
+status: active
+started: 2026-01-01
+EOF
+# Create task
+cat > .ace-taskflow/v.test/tasks/001-feature/001-test-task.s.md << 'EOF'
+---
+id: v.test+task.001
+status: pending
+priority: medium
+estimate: 1h
+dependencies: []
+---
+# Test Task
+## Objective
+Test task for E2E testing.
+## Implementation Plan
+- [ ] Do something
+## Acceptance Criteria
+- [ ] Done
+EOF
+# Commit the structure
+git add .ace-taskflow/
+git commit -m "Add taskflow structure" --quiet
+# Set PROJECT_ROOT_PATH for isolated testing
+export PROJECT_ROOT_PATH="$REPO_DIR"
+# Now ace-taskflow commands will use this isolated structure
+# ace-task show 001  # Should find the test task
+```
+## YAML Frontmatter Reference
+### Required Fields
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | string | Task ID in format `v.{version}+task.{number}` |
+| `status` | enum | One of: `pending`, `in-progress`, `done`, `blocked` |
+| `priority` | enum | One of: `high`, `medium`, `low` |
+### Optional Fields
+| Field | Type | Description |
+|-------|------|-------------|
+| `estimate` | string | Time estimate (e.g., `2h`, `1d`) |
+| `dependencies` | array | Task IDs this task depends on |
+| `parent` | string | Parent task number for subtasks |
+| `subtasks` | array | Child task numbers for orchestrators |
+| `worktree` | object | Worktree metadata if task uses worktree |
+### Status Values
+- `pending` - Not started
+- `in-progress` - Currently being worked on
+- `done` - Completed
+- `blocked` - Cannot proceed due to external blocker
+## Common Patterns
+### Testing Task Selection
+```bash
+# Verify ace-taskflow can find the task
+ace-task show 001
+# Should output task details
+# Verify task file path
+ace-task show 001 --path
+# Should output: .ace-taskflow/v.test/tasks/001-feature/001-test-task.s.md
+```
+### Testing Status Updates
+```bash
+# Create task in pending state
+# ... create task fixture ...
+# Mark as in-progress
+ace-task start 001
+# Verify status changed
+# Mark as done
+ace-task done 001
+# Verify status changed
+```
+### Testing Worktree Integration
+```bash
+# Create task with worktree metadata
+# ... create task fixture with worktree ...
+# Create corresponding worktree
+git worktree add "../project-task.001" -b 001-test-task
+# Verify worktree detection
+ace-git-worktree list
+```

data/handbook/templates/agent-experience-report.template.md ADDED Viewed

@@ -0,0 +1,89 @@
+---
+doc-type: template
+title: "Agent Experience Report: {test-id}"
+purpose: Documentation for ace-test-runner-e2e/handbook/templates/agent-experience-report.template.md
+ace-docs:
+  last-updated: 2026-02-14
+  last-checked: 2026-03-21
+---
+# Agent Experience Report: {test-id}
+## Summary
+{Brief 2-3 sentence summary of the test execution experience. Note overall friction level: smooth, minor issues, significant friction, or blocking issues. If no friction was encountered, state "No significant friction encountered" and briefly note what worked well.}
+## Friction Points
+| Severity | Category | Issue | Suggested Fix |
+|----------|----------|-------|---------------|
+| high/medium/low | docs/tool/cli | {Brief description} | {Actionable fix} |
+### Documentation Gaps
+{List any documentation that was missing, unclear, incomplete, or outdated. Include specific files/sections where improvements would help.}
+- {Documentation gap 1}
+- {Documentation gap 2}
+### Tool Behavior Issues
+{Describe any unexpected tool behavior, confusing error messages, or surprising results. Include the command and what was unexpected.}
+- {Tool issue 1}
+- {Tool issue 2}
+### API/CLI Friction
+{Note any API or CLI friction: inconsistent flags, missing options, awkward workflows, or verbose output.}
+- {CLI friction 1}
+- {CLI friction 2}
+## Root Cause Analysis
+{For any failures or significant friction, analyze WHY the issue occurred, not just WHAT happened. Consider: missing validation, unclear error messages, documentation gaps, design decisions, edge cases.}
+### {Issue 1 Title}
+**What happened:** {Description}
+**Why it happened:** {Root cause analysis}
+**Impact:** {How it affected test execution}
+## Improvement Suggestions
+{Concrete, actionable suggestions for improving the developer/agent experience. Prioritize by impact.}
+### High Priority
+- [ ] {Suggestion 1}
+- [ ] {Suggestion 2}
+### Medium Priority
+- [ ] {Suggestion 3}
+### Low Priority
+- [ ] {Suggestion 4}
+## Workarounds Used
+{Document any workarounds the agent had to employ to complete the test. These indicate areas needing improvement.}
+- **Issue:** {What required a workaround}
+  **Workaround:** {What was done instead}
+## Positive Observations
+{Note what worked well, was well-documented, or provided good DX. This helps identify patterns to replicate.}
+- {Positive observation 1}
+- {Positive observation 2}
+## Recommendations for Test Scenario
+{Suggestions for improving this specific test scenario based on execution experience.}
+- {Recommendation 1}
+- {Recommendation 2}

data/handbook/templates/metadata.template.yml ADDED Viewed

@@ -0,0 +1,49 @@
+# E2E Test Run Metadata
+# Generated by ace-test-runner-e2e workflow
+# Run identification
+run-id: "{run-id}"
+test-id: "{test-id}"
+package: "{package}"
+# Per-TC identification (TS-format)
+scenario_id: "{scenario-id}"      # TS-{AREA}-{NNN} for TS-format, null for MT-format
+tc_id: "{tc-id}"                  # TC-{NNN} for per-TC execution, null for full scenario
+tc_independent: false             # true if TC can run in isolation without prior state
+setup_method: "agent"             # "agent" (agent-driven) or "deterministic" (Ruby SetupExecutor)
+# Execution details
+agent: "{agent-name}"
+started: "{start-timestamp}"  # ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ
+completed: "{end-timestamp}"  # ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ
+duration: "{duration-seconds}s"
+# Test results
+status: "pass|fail|partial|incomplete"
+results:
+  passed: 0
+  failed: 0
+  total: 0
+# Git context
+git:
+  branch: "{branch-name}"
+  commit: "{commit-sha}"
+  dirty: false
+# Tool versions
+tools:
+  ruby: "{ruby-version}"
+  # Add other relevant tool versions
+  # standardrb: "{version}"
+  # rubocop: "{version}"
+# Environment
+environment:
+  os: "{os-name}"
+  arch: "{architecture}"
+# Cleanup status
+cleanup:
+  enabled: false
+  completed: false