npm - @haystackeditor/cli - Versions diffs - 0.4.0 → 0.6.0 - Mend

@haystackeditor/cli 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +2 -2
package/dist/commands/config.d.ts +19 -0
package/dist/commands/config.js +133 -0
package/dist/commands/init.d.ts +1 -1
package/dist/commands/init.js +34 -19
package/dist/commands/status.d.ts +1 -1
package/dist/commands/status.js +2 -2
package/dist/index.d.ts +1 -1
package/dist/index.js +24 -4
package/dist/types.d.ts +69 -2
package/dist/types.js +1 -1
package/dist/utils/config.d.ts +1 -1
package/dist/utils/config.js +4 -8
package/dist/utils/detect.d.ts +35 -2
package/dist/utils/detect.js +139 -1
package/dist/utils/secrets.d.ts +1 -1
package/dist/utils/secrets.js +1 -1
package/dist/utils/skill.d.ts +2 -2
package/dist/utils/skill.js +655 -446
package/package.json +1 -1

package/dist/utils/skill.js CHANGED Viewed

@@ -6,236 +6,487 @@ import * as fs from 'fs/promises';
 import * as path from 'path';
 /**
  * Claude Code slash command - invoked with /haystack
- * This is the "one command" entry point for users.
- * Uses task decomposition - complete one step, validate, then next step.
  */
 const CLAUDE_COMMAND_CONTENT = `# Set Up Haystack Verification
-You are setting up Haystack PR verification. Complete each step IN ORDER. Do NOT skip ahead.
+Follow .agents/skills/setup-haystack.md to set up Haystack verification for this repo.
+`;
+const SKILL_CONTENT = `# Haystack Verification Setup
+**Your job**: Help the verification system understand this app so it can visually verify PRs work correctly.
+The verification system has two parts:
+1. **Planner** - An AI that explores the codebase to figure out what to test
+2. **Executor** - Takes screenshots based on the plan
+Your \`.haystack.json\` flows feed into the **corpus** - they're hints for the Planner about what routes exist, what selectors to use, and what the core user journey looks like.
 ---
-## STEP 1: Initialize config
+## Step 1: Generate Base Config
 \`\`\`bash
 npx @haystackeditor/cli init --yes
 \`\`\`
-✅ **Checkpoint**: \`.haystack.yml\` exists with dev_server config.
+This detects your dev server command, port, and service dependencies.
+---
+## Step 2: Understand the App's Core Feature
+Before writing any flows, answer this:
+**What is the ONE main thing users do in this app?**
+Read the codebase to understand the core user journey:
+- E-commerce: browse products → add to cart → checkout
+- Dashboard: view metrics → filter data → export
+- Editor: create document → edit → save
+- Social: view feed → create post → interact
+- SaaS: sign up → configure → use feature
+Your flows should describe THIS journey, not just "pages load".
 ---
-## STEP 2: Discover all routes
+## Step 3: Assess Data Needs
-Find every route in the app:
 \`\`\`bash
-grep -r "path=\\|Route\\|<Link" src/ --include="*.tsx" | head -30
-ls src/pages/ src/app/ 2>/dev/null
+# Find API calls
+grep -r "fetch(\\|useQuery\\|useSWR\\|axios" src/ --include="*.tsx" --include="*.ts" | wc -l
+# Find dynamic routes
+grep -r "useParams\\|router.query\\|\\[.*\\]" src/ --include="*.tsx" | head -10
+# Find external domains
+grep -r "https://" src/ --include="*.ts" --include="*.tsx" | grep -v node_modules | head -10
 \`\`\`
-Add a flow for EACH route to \`.haystack.yml\`. Use \`trigger: always\` for main pages, \`trigger: on_change\` with \`watch_patterns\` for others.
+---
-✅ **Checkpoint**: Count your flows. You should have one for every route.
+## STOP - Ask About Test Data Strategy
+**You MUST ask the user before proceeding:**
+> I analyzed the codebase and found:
+> - X API fetch calls
+> - Y dynamic routes with parameters
+> - These external domains: [list them]
+>
+> **How should I handle test data for verification?**
+>
+> 1. **Passthrough** - Let API calls through to real servers (add domains to \`network.allow\`)
+> 2. **Staging URL** - Point flows at your staging/demo environment
+> 3. **Local fixtures** - JSON files in \`fixtures/\` directory
+> 4. **Skip data pages** - Only test static pages that don't need API data
+**Wait for the user's response before continuing.**
 ---
-## STEP 3: Fix ALL selectors (CRITICAL)
+## Step 4: Write Flows
-⛔ **STOP**: Look at every \`wait_for\` selector in your flows.
+Flows tell the Planner about your app's routes, UI elements, and user journeys.
-If ANY selector is \`#root\`, \`div\`, or \`h1\`, you MUST fix it now:
-\`\`\`bash
-# Find specific selectors in the codebase
-grep -r "data-testid\\|className=" src/components/ --include="*.tsx" | head -20
+### Structure
+\`\`\`yaml
+flows:
+  - name: "Descriptive name of what this tests"
+    trigger: always  # or on_change with watch_patterns
+    steps:
+      - action: navigate
+        url: "/"
+      - action: wait_for
+        selector: "[data-testid='specific-element']"
+      - action: click
+        selector: "[data-testid='button']"
+      - action: screenshot
+        name: "result"
 \`\`\`
-Replace generic selectors with specific ones:
-- \`[data-testid='dashboard']\`
-- \`.dashboard-content\`
-- \`[role='main']\`
+### What to Include
-✅ **Checkpoint**: Run \`grep "wait_for" .haystack.yml\` - NONE should have \`#root\`.
+**Core journey flow** (trigger: always):
+- The main thing users do in your app
+- Multiple steps with interactions (click, type)
+- Waits for meaningful state changes
----
+**Route coverage flows**:
+- One flow per major route
+- Uses specific selectors the Planner can learn from
+- \`watch_patterns\` to only run when relevant files change
-## STEP 4: Add 3+ interactive flows (REQUIRED)
+### Finding Good Selectors
-Find interactive elements:
 \`\`\`bash
-grep -r "onClick\\|Modal\\|Dialog\\|toggle\\|Switch" src/ --include="*.tsx" | head -20
+# Find data-testid attributes
+grep -r "data-testid" src/ --include="*.tsx" | head -20
+# Find aria-labels
+grep -r "aria-label" src/ --include="*.tsx" | head -20
+# Find component class names
+grep -r "className=" src/components/ --include="*.tsx" | head -20
 \`\`\`
-Add AT LEAST 3 flows with \`click\` or \`type\` actions:
+Use specific selectors like:
+- \`[data-testid='dashboard-chart']\`
+- \`[aria-label='Submit form']\`
+- \`.pricing-table\`
+- \`button[type='submit']\`
+If good selectors don't exist, add \`data-testid\` to key components.
+### watch_patterns
+For flows that only matter when certain files change:
 \`\`\`yaml
-- name: "Theme toggle works"
+- name: "Settings page works"
+  trigger: on_change
+  watch_patterns:
+    - "src/pages/settings/**"
+    - "src/components/settings/**"
   steps:
     - action: navigate
-      url: "/"
-    - action: click
-      selector: "[data-testid='theme-toggle']"
-    - action: screenshot
-      name: "after-toggle"
+      url: "/settings"
+    # ...
+\`\`\`
-- name: "Modal opens"
-  steps:
-    - action: navigate
-      url: "/dashboard"
-    - action: click
-      selector: "button[aria-label='Settings']"
-    - action: wait_for
-      selector: "[role='dialog']"
-    - action: screenshot
-      name: "modal-open"
+---
+## Step 5: Configure Fixtures (if needed)
+Based on user's answer in Step 3:
+**Passthrough**:
+\`\`\`yaml
+network:
+  allow:
+    - "api.example.com"
+    - "cdn.example.com"
+\`\`\`
+**Staging URL**:
+\`\`\`yaml
+flows:
+  - name: "Dashboard with real data"
+    steps:
+      - action: navigate
+        url: "https://staging.example.com/dashboard"
 \`\`\`
-✅ **Checkpoint**: Run \`grep -c "action: click" .haystack.yml\` - must be ≥3.
+**Local fixtures**:
+\`\`\`yaml
+fixtures:
+  - pattern: "/api/user"
+    source: "file://fixtures/user.json"
+  - pattern: "/api/data/*"
+    source: "file://fixtures/data.json"
+\`\`\`
 ---
-## STEP 5: Add fixtures for data-dependent pages
+## Step 6: Commit
-Find pages that fetch data:
 \`\`\`bash
-grep -r "useParams\\|fetch(\\|useQuery" src/ --include="*.tsx" | head -10
+git add .haystack.json fixtures/
+git commit -m "Add Haystack verification"
 \`\`\`
-For each data-dependent page, add fixtures:
+Done! The Planner will use your flows to understand the app and create verification plans for PRs.
+`;
+const REFERENCE_CONTENT = `# Haystack Reference
+Reference material for \`.haystack.json\` configuration. Only consult when needed for a specific step.
+---
+## Fixture Patterns
+### Option 1: Passthrough (Recommended - Easiest!)
 \`\`\`yaml
+# Let API calls through to real servers - no mocking needed
 fixtures:
-  - pattern: "/api/user/*"
-    source: "https://staging.example.com/api/user/demo"
-  - pattern: "/api/pr/*"
-    source: "file://fixtures/sample-pr.json"
+  - pattern: "/api/*"
+    source: passthrough
+network:
+  allow:
+    - "api.example.com"
+    - "cdn.example.com"
 \`\`\`
-✅ **Checkpoint**: Every page with \`:id\` or API calls has a fixture.
+**This is the easiest option.** Just allow the domains your app calls and let real APIs handle requests. No fixtures to maintain.
----
+### Option 2: Staging/Demo URL
+\`\`\`yaml
+# Point flows directly at staging - no fixtures needed
+flows:
+  - name: "Dashboard loads"
+    url: "https://staging.example.com/dashboard"
+    wait_for_selector: ".dashboard-content"
-## STEP 6: Final validation
+network:
+  allow:
+    - "staging.example.com"
+\`\`\`
-Count and verify:
-\`\`\`bash
-echo "=== Selector check (should be 0 #root) ==="
-grep "#root" .haystack.yml | wc -l
+### Option 3: Pre-signed URLs (S3/Cloud Storage)
+\`\`\`yaml
+fixtures:
+  - pattern: "/api/data"
+    source: "$FIXTURE_DATA_URL"      # Pre-signed URL from CI
+  - pattern: "/api/users"
+    source: "$FIXTURE_USERS_URL"
+secrets:
+  - FIXTURE_DATA_URL
+  - FIXTURE_USERS_URL
+\`\`\`
-echo "=== Interactive flows (should be ≥3) ==="
-grep -c "action: click\\|action: type" .haystack.yml
+See "Pre-signed URL Setup" below for CI configuration.
-echo "=== Fixtures (should be >0 for data pages) ==="
-grep -c "pattern:" .haystack.yml
+### Option 4: Local Fixtures (Simple Apps Only)
+\`\`\`yaml
+fixtures:
+  - pattern: "/api/user"
+    source: "file://fixtures/user.json"
+  - pattern: "/api/settings"
+    source: "file://fixtures/settings.json"
 \`\`\`
-If any check fails, GO BACK to that step and fix it.
+Create matching JSON files in \`fixtures/\` directory. Only use for small, stable data.
+### Option 5: HTTP Endpoint (Self-hosted)
+\`\`\`yaml
+fixtures:
+  - pattern: "/api/*"
+    source: "https://fixtures.yourcompany.com/api"
+    headers:
+      Authorization: "Bearer $FIXTURES_TOKEN"
+network:
+  allow:
+    - "fixtures.yourcompany.com"
+\`\`\`
 ---
-## STEP 7: Commit
+## Pre-signed URL Setup
+Generate temporary URLs in CI, pass to Haystack. No cloud credentials in sandbox.
+**Note:** URLs valid for 24 hours. Run on push to main to keep URLs fresh.
+### AWS S3
 \`\`\`bash
-git add .haystack.yml .agents/ .claude/ fixtures/
-git commit -m "Add Haystack verification"
+# Generate pre-signed URL (valid 24 hours)
+aws s3 presign s3://my-bucket/fixtures/data.json --expires-in 86400
 \`\`\`
-🎉 Done!
-`;
-const SKILL_CONTENT = `# Haystack Verification
+### Google Cloud Storage
+\`\`\`bash
+# Generate signed URL (valid 24 hours)
+gcloud storage sign-url gs://my-bucket/fixtures/data.json --duration=24h
+\`\`\`
-## What is Haystack?
+### Azure Blob Storage
+\`\`\`bash
+# Generate SAS URL (valid 24 hours)
+az storage blob generate-sas --account-name myaccount --container fixtures \\
+  --name data.json --permissions r --expiry $(date -u -d '+1 day' +%Y-%m-%dT%H:%MZ) \\
+  --full-uri
+\`\`\`
-Haystack provides **automated PR verification**. When a PR is opened:
+### Cloudflare R2 / DigitalOcean Spaces / MinIO
+\`\`\`bash
+# S3-compatible - use aws cli with custom endpoint
+aws s3 presign s3://my-bucket/data.json --expires-in 86400 \\
+  --endpoint-url https://your-r2-endpoint.com
+\`\`\`
-1. A sandbox spins up with your app running (dev server + any backend services)
-2. An AI agent reads the "flows" in \`.haystack.yml\`
-3. The agent executes each flow to verify changes work correctly
-4. Results (screenshots, API responses, errors) are posted to the PR
+---
-**Frontend flows**: The agent uses a browser to navigate pages, click buttons, fill forms, and take screenshots. This catches visual regressions and broken interactions.
+### GitHub Actions Example
+\`\`\`yaml
+# Run on push to main to keep URLs fresh
+on:
+  push:
+    branches: [main]
+jobs:
+  update-fixture-urls:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write  # OIDC - no long-lived secrets
+    steps:
+      - uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: arn:aws:iam::123456789:role/haystack-fixtures
+          aws-region: us-east-1
+      - name: Generate pre-signed URLs (valid 24 hours)
+        run: |
+          URL=$(aws s3 presign s3://my-bucket/fixtures/data.json --expires-in 86400)
+          haystack secrets set FIXTURE_DATA_URL "$URL"
+\`\`\`
-**Backend flows**: The agent makes HTTP requests to API endpoints and verifies responses. This catches broken endpoints and API contract changes.
+### General Approach
+1. Use OIDC to get temporary cloud credentials (no long-lived secrets)
+2. Generate pre-signed/signed URL for each fixture file (24 hour expiry)
+3. Store with \`haystack secrets set FIXTURE_URL "$URL"\`
+4. Run on push to main to keep URLs fresh
-**Without flows, Haystack has nothing to verify.** The config's \`dev_server\` settings just tell it how to start your app - the flows tell it what to actually test.
+---
-## Setup Workflow
+## Flow Examples
-### Step 1: Generate base config
-\`\`\`bash
-npx @haystackeditor/cli init
+### Basic Page Flow
+\`\`\`yaml
+flows:
+  - name: "Dashboard loads"
+    description: "Verify dashboard renders correctly"
+    trigger: always
+    watch_patterns:
+      - "src/components/dashboard/**"
+    steps:
+      - action: navigate
+        url: "/dashboard"
+      - action: wait_for
+        selector: "[data-testid='dashboard-content']"
+      - action: assert_no_errors
+      - action: screenshot
+        name: "dashboard"
 \`\`\`
-This auto-detects framework, ports, package manager and creates \`.haystack.yml\`.
-### Step 2: Review and customize
-After init, review the generated config and customize based on the app:
+### Interactive Flow (click)
+\`\`\`yaml
+  - name: "Modal opens"
+    trigger: on_change
+    watch_patterns:
+      - "src/components/settings/**"
+    steps:
+      - action: navigate
+        url: "/settings"
+      - action: wait_for
+        selector: ".settings-page"
+      - action: click
+        selector: "button[aria-label='Settings']"
+      - action: wait_for
+        selector: "[role='dialog']"
+      - action: screenshot
+        name: "settings-modal"
+\`\`\`
-| If the app has... | Add this |
-|-------------------|----------|
-| Login/authentication | Auth bypass env var (see Auth Bypass section) |
-| Key user journeys | Flows describing what to verify (see Flows section) |
-| API calls needing auth | Fixtures to mock responses (see Fixtures section) |
+### Form Flow (type)
+\`\`\`yaml
+  - name: "Contact form works"
+    trigger: on_change
+    watch_patterns:
+      - "src/components/contact/**"
+    steps:
+      - action: navigate
+        url: "/contact"
+      - action: wait_for
+        selector: "form"
+      - action: type
+        selector: "input[name='email']"
+        value: "test@example.com"
+      - action: click
+        selector: "button[type='submit']"
+      - action: wait_for
+        selector: ".success-message"
+\`\`\`
-**Minimum viable config**: Just \`dev_server\` settings. Flows and fixtures can be added later as needed.
+### Backend API Flow
+\`\`\`yaml
+  - name: "API health check"
+    trigger: always
+    steps:
+      - action: http_request
+        method: GET
+        url: "http://localhost:3001/health"
+      - action: assert_status
+        status: 200
+\`\`\`
+---
+## Finding Good Selectors
-### Step 3: Commit
+**Priority order:**
+1. \`[data-testid='feature-name']\` - Best
+2. \`[role='main']\`, \`[aria-label='...']\` - Semantic
+3. \`.specific-class-name\` - Component-specific
+4. Avoid: \`#root\`, \`div\`, \`h1\` - Too generic
+**How to find:**
 \`\`\`bash
-git add .haystack.yml .agents/
-git commit -m "Add Haystack verification"
+# Find data-testid attributes
+grep -r "data-testid" src/ --include="*.tsx"
+# Find class names
+grep -r "className=" src/components/Dashboard.tsx
+# Find semantic roles
+grep -r "role=\\|aria-label=" src/ --include="*.tsx"
 \`\`\`
-## Config Reference
+---
+## Config Structure
+⚠️ **IMPORTANT**: \`flows\` must be at TOP LEVEL, not nested under \`verification\`!
 \`\`\`yaml
 version: "1"
 name: my-app
-# Dev server configuration
 dev_server:
   command: pnpm dev
   port: 3000
-  ready_pattern: "Local:"    # Text in stdout when server is ready
-  env:
-    SKIP_AUTH: "true"        # Auth bypass for testing
+  ready_pattern: "ready|Local:|started"  # Regex for server ready
-# Verification commands (run in PR checks)
+# Verification commands - MUST include build
 verification:
   commands:
     - name: build
-      run: pnpm build
+      run: pnpm build       # ← REQUIRED
     - name: lint
       run: pnpm lint
     - name: typecheck
       run: pnpm tsc --noEmit
-# Flows: Advisory descriptions for AI verification agent
-# The agent reads these to understand WHAT to verify, then navigates autonomously
+network:
+  allow:
+    - "api.example.com"
+# ⚠️ flows at TOP LEVEL - NOT under verification!
 flows:
-  - name: "Landing page loads"
-    description: "Verify the landing page renders without errors"
+  - name: "Page loads"
     trigger: always
     steps:
       - action: navigate
         url: "/"
       - action: wait_for
-        selector: "[data-testid='landing']"
+        selector: "[data-testid='main']"
       - action: screenshot
-        name: "landing"
+        name: "home"
-  - name: "Dashboard loads with data"
-    description: "Verify dashboard shows user data correctly"
-    trigger: on_change
-    watch_patterns:
-      - "src/components/dashboard/**"
-    steps:
-      - action: navigate
-        url: "/dashboard"
-      - action: wait_for
-        selector: ".dashboard-content"
-      - action: assert_no_errors
+secrets:
+  - API_TOKEN
+fixtures:
+  - pattern: "/api/*"
+    source: passthrough
 \`\`\`
-## Monorepo Configuration
+---
-For monorepos with multiple services:
+## Monorepo Configuration
 \`\`\`yaml
 version: "1"
@@ -247,8 +498,6 @@ services:
     command: pnpm dev
     port: 3000
     ready_pattern: "Local:"
-    env:
-      VITE_SKIP_AUTH: "true"
   api:
     root: packages/api
@@ -260,410 +509,370 @@ services:
     root: infra/worker
     command: pnpm wrangler dev
     port: 8787
-  # Batch jobs (run once, don't stay running)
-  analysis:
-    root: packages/analysis
-    type: batch
-    command: pnpm start
-verification:
-  commands:
-    - name: build
-      run: pnpm build
-    - name: lint
-      run: pnpm lint
 \`\`\`
-## Multi-Repo Configuration
-When services live in separate git repositories (not a monorepo), each repo gets its own \`.haystack.yml\`:
-**Frontend repo** - Mock the API it depends on:
-\`\`\`yaml
-version: "1"
-name: frontend
-dev_server:
-  command: pnpm dev
-  port: 3000
-  env:
-    VITE_API_URL: "http://localhost:8080"  # Will be mocked
+---
-# Mock the API from the other repo
-fixtures:
-  - pattern: "/api/*"
-    source: "file://fixtures/api-responses.json"
-\`\`\`
+## Flow Triggers
-**API repo** - Standalone verification:
-\`\`\`yaml
-version: "1"
-name: api
+| Trigger | When it runs |
+|---------|--------------|
+| \`always\` | Every PR |
+| \`on_change\` | Only when \`watch_patterns\` match changed files |
+`;
+const PREPARE_VERIFICATION_CONTENT = `# Prepare Codebase for Verification
-dev_server:
-  command: pnpm dev
-  port: 8080
+**Your job**: Make this codebase easy to verify by adding semantic identifiers that the verification system can target.
-verification:
-  commands:
-    - name: test
-      run: pnpm test
-\`\`\`
+The verification Planner needs to find UI elements by selectors. Generic selectors like \`div\` or \`.flex\` are useless. Your job is to add meaningful identifiers throughout the codebase.
-Each repo is verified independently. Use fixtures to mock dependencies on other services.
+---
-## Fixtures (API Mocking)
+## What to Add
-Mock API responses so verification doesn't need real credentials:
+### 1. \`aria-label\` on Interactive Elements
-\`\`\`yaml
-fixtures:
-  # Local file (small data, commit to repo)
-  - pattern: "/api/user"
-    source: "file://fixtures/user.json"
-  # From staging server
-  - pattern: "/api/dashboard"
-    source: "https://staging.example.com/api/dashboard"
-    headers:
-      Authorization: "Bearer $STAGING_TOKEN"
+Every clickable/interactive element should have an aria-label describing what it does:
-  # Large data from S3
-  - pattern: "/api/analytics"
-    source: "s3://my-bucket/fixtures/analytics.json"
+\`\`\`tsx
+// Before
+<button onClick={onSave}>💾</button>
+<button onClick={() => setOpen(true)}>
+  <MenuIcon />
+</button>
-  # Use real API (no mocking)
-  - pattern: "/api/public/*"
-    source: passthrough
+// After
+<button onClick={onSave} aria-label="Save document">💾</button>
+<button onClick={() => setOpen(true)} aria-label="Open menu">
+  <MenuIcon />
+</button>
 \`\`\`
-## Understanding Flows
-**Flows are advisory, not mechanical scripts.**
-When a PR is opened, an AI agent (running in a Modal sandbox with browser access):
-1. Reads the flows to understand what to verify
-2. Navigates the app autonomously
-3. Determines if things work based on flow descriptions
-4. Captures screenshots and evidence
-5. Reports results
-The flow steps are hints like "check the landing page loads" - the AI figures out how to verify that. It can adapt if the UI changes slightly.
-### Flow Triggers
-| Trigger | When it runs |
-|---------|--------------|
-| \`always\` | Every PR |
-| \`on_change\` | Only when \`watch_patterns\` match changed files |
-### Flow Actions (Advisory)
-These describe what the agent should verify:
-**Browser:**
-- \`navigate\` - Go to URL
-- \`wait_for\` - Wait for element
-- \`click\` - Click element
-- \`type\` - Enter text
-- \`screenshot\` - Capture screenshot
-- \`assert_no_errors\` - Check for error states
+**Target elements:**
+- Buttons (especially icon-only buttons)
+- Links without descriptive text
+- Toggle switches
+- Dropdown triggers
+- Modal open/close buttons
+- Form submit buttons
+### 2. \`data-testid\` on Key Sections
+Major UI sections should have data-testid for easy targeting:
+\`\`\`tsx
+// Before
+<div className="flex flex-col p-4">
+  <h1>Dashboard</h1>
+  {/* content */}
+</div>
+// After
+<div className="flex flex-col p-4" data-testid="dashboard-container">
+  <h1>Dashboard</h1>
+  {/* content */}
+</div>
+\`\`\`
-**API:**
-- \`http_request\` - Make API call
-- \`assert_status\` - Check response code
-- \`websocket_connect\` - Test WebSocket
+**Target sections:**
+- Page containers (dashboard, settings, profile)
+- Navigation bars/sidebars
+- Modal/dialog content
+- Form containers
+- Data tables/lists
+- Card components
+- Loading states
+- Error states
+- Empty states
+### 3. \`role\` Attributes for Semantic Structure
+Add ARIA roles where HTML semantics aren't clear:
+\`\`\`tsx
+// Before
+<div className="modal-overlay">
+  <div className="modal-content">
+// After
+<div className="modal-overlay" role="presentation">
+  <div className="modal-content" role="dialog" aria-modal="true">
+\`\`\`
-## Auth Bypass
+**Common roles:**
+- \`role="dialog"\` - Modals/dialogs
+- \`role="navigation"\` - Nav sections
+- \`role="main"\` - Main content area
+- \`role="alert"\` - Error/success messages
+- \`role="status"\` - Loading indicators
+- \`role="tablist"\`, \`role="tab"\`, \`role="tabpanel"\` - Tab interfaces
+### 4. State Indicators
+Add attributes that indicate UI state:
+\`\`\`tsx
+// Before
+<button onClick={toggle}>
+  {isOpen ? 'Close' : 'Open'}
+</button>
+// After
+<button
+  onClick={toggle}
+  aria-expanded={isOpen}
+  aria-label={isOpen ? 'Close panel' : 'Open panel'}
+>
+  {isOpen ? 'Close' : 'Open'}
+</button>
+\`\`\`
-Most apps need auth bypassed for testing. Common patterns:
+**State attributes:**
+- \`aria-expanded\` - Collapsible sections
+- \`aria-selected\` - Selected items in lists
+- \`aria-checked\` - Checkboxes/toggles
+- \`aria-disabled\` - Disabled elements
+- \`aria-busy\` - Loading states
+- \`data-state="loading|error|success"\` - Custom states
+### 5. Form Accessibility
+Forms should have proper labels and descriptions:
+\`\`\`tsx
+// Before
+<input type="email" placeholder="Email" />
+<span className="text-red-500">{error}</span>
+// After
+<input
+  type="email"
+  placeholder="Email"
+  aria-label="Email address"
+  aria-describedby={error ? "email-error" : undefined}
+  aria-invalid={!!error}
+/>
+{error && <span id="email-error" role="alert" className="text-red-500">{error}</span>}
+\`\`\`
-| Framework | Env Var |
-|-----------|---------|
-| Vite/React | \`VITE_SKIP_AUTH=true\` |
-| Next.js | \`NEXT_PUBLIC_SKIP_AUTH=true\` |
-| Express | \`SKIP_AUTH=true\` |
-| Rails | \`SKIP_AUTH=true\` |
+---
-Add to \`dev_server.env\` or \`services.*.env\` in your config.
+## Step 1: Scan for Missing Identifiers
-## Codebase Discovery Guide
+\`\`\`bash
+# Find buttons without aria-label
+grep -rn "<button" src/ --include="*.tsx" | grep -v "aria-label" | head -20
-**Follow these steps to create comprehensive verification flows.**
+# Find icon-only buttons (likely missing labels)
+grep -rn "<button.*Icon\\|<button.*>.*</.*Icon>" src/ --include="*.tsx" | head -20
-### ⚠️ REQUIRED CHECKLIST - Complete ALL items before finishing:
+# Find modals/dialogs without role
+grep -rn "modal\\|dialog\\|Modal\\|Dialog" src/ --include="*.tsx" | grep -v "role=" | head -20
-1. [ ] **Page flows**: Every route in the app has a flow
-2. [ ] **Specific selectors**: Using \`[data-testid='x']\` or \`.specific-class\`, NOT \`#root\` or \`div\`
-3. [ ] **Interactive flows**: At least 3 flows that click buttons, open modals, or submit forms
-4. [ ] **Fixtures**: Pages with \`:id\` params or API fetches have fixtures (staging URL or local file)
-5. [ ] **Backend API flows**: If app has API endpoints, add http_request flows to test them
-6. [ ] **Watch patterns**: Each flow's \`watch_patterns\` matches the component file paths
+# Find forms without proper labeling
+grep -rn "<input\\|<select\\|<textarea" src/ --include="*.tsx" | grep -v "aria-label\\|id=" | head -20
-**You are NOT done until all 6 items are checked.**
+# Find major components (likely need data-testid)
+ls src/components/ src/pages/ 2>/dev/null
+\`\`\`
 ---
-### Step 1: Trace the Component Tree
+## Step 2: Prioritize by Impact
-Start from the entry point and trace imports to discover ALL features:
+Focus on elements the verification system is most likely to need:
-\`\`\`bash
-# Find the entry point
-cat src/main.tsx  # or src/index.tsx, pages/_app.tsx, etc.
+**High Priority (do first):**
+1. Navigation elements (header, sidebar, menu buttons)
+2. Primary actions (submit buttons, save buttons, CTAs)
+3. Modal triggers and dialogs
+4. Form inputs and submit buttons
+5. Page-level containers
-# Trace the router to find all routes
-grep -r "Route\|path=" src/ --include="*.tsx"
+**Medium Priority:**
+1. Toggle switches and checkboxes
+2. Dropdown menus
+3. Tab interfaces
+4. Cards and list items
+5. Loading/error states
-# Find all page/feature components
-ls src/pages/ src/components/ src/features/
-\`\`\`
+**Lower Priority:**
+1. Decorative elements
+2. Static content sections
+3. Footer links
-### Step 2: Find Good Selectors
+---
-**DON'T use generic selectors like \`#root\`.** The agent needs specific selectors to know the page loaded correctly.
+## Step 3: Add Identifiers Systematically
-Priority order for selectors:
-1. \`[data-testid='feature-name']\` - Best, explicit test hooks
-2. \`[role='main']\`, \`[role='navigation']\` - Semantic roles
-3. \`.feature-specific-class\` - Component-specific classes
-4. \`h1\`, \`.page-title\` - Unique page identifiers
+Go component by component. For each component:
-**How to find selectors:**
-\`\`\`bash
-# Search for data-testid attributes
-grep -r "data-testid" src/ --include="*.tsx"
+1. **Check the component's purpose** - What does it DO?
+2. **Add aria-label** to interactive elements describing the ACTION
+3. **Add data-testid** to the container if it's a major section
+4. **Add role** if the semantic HTML isn't clear
+5. **Add state attributes** if the component has dynamic states
-# Search for unique classNames in a component
-grep -r "className=" src/components/Dashboard.tsx
+### Naming Conventions
-# Look for page-specific elements
-grep -r "<h1\|<header\|role=" src/pages/
-\`\`\`
+**aria-label**: Describe the action, not the element
+- ✅ \`aria-label="Close modal"\`
+- ✅ \`aria-label="Submit contact form"\`
+- ❌ \`aria-label="Button"\`
+- ❌ \`aria-label="Click here"\`
-**Example - BAD vs GOOD:**
-\`\`\`yaml
-# BAD - too generic, every page has #root
-- action: wait_for
-  selector: "#root"
+**data-testid**: Use kebab-case, describe the section
+- ✅ \`data-testid="user-profile-card"\`
+- ✅ \`data-testid="search-results-list"\`
+- ❌ \`data-testid="div1"\`
+- ❌ \`data-testid="container"\`
-# GOOD - specific to this feature
-- action: wait_for
-  selector: "[data-testid='dashboard-content']"
-# or
-- action: wait_for
-  selector: ".dashboard-stats-grid"
-# or
-- action: wait_for
-  selector: "h1:has-text('Dashboard')"
-\`\`\`
+---
-### Step 3: Add Interactive Flows
+## Step 4: Verify Coverage
-Don't just screenshot static pages. Verify that interactions work:
+After adding identifiers, check coverage:
-**Look for interactive elements:**
 \`\`\`bash
-# Find buttons and clickable elements
-grep -r "onClick\|button\|Button" src/ --include="*.tsx"
+# Count aria-labels added
+grep -r "aria-label" src/ --include="*.tsx" | wc -l
-# Find modals and dialogs
-grep -r "Modal\|Dialog\|Drawer" src/ --include="*.tsx"
+# Count data-testid added
+grep -r "data-testid" src/ --include="*.tsx" | wc -l
-# Find forms
-grep -r "<form\|onSubmit\|handleSubmit" src/ --include="*.tsx"
+# Count role attributes
+grep -r "role=" src/ --include="*.tsx" | wc -l
-# Find toggles and switches
-grep -r "toggle\|Switch\|theme" src/ --include="*.tsx"
+# List all data-testid values (check for meaningful names)
+grep -oh 'data-testid="[^"]*"' src/ -r --include="*.tsx" | sort -u
 \`\`\`
-**Example interactive flows:**
-\`\`\`yaml
-# Theme toggle
-- name: "Theme toggle works"
-  steps:
-    - action: navigate
-      url: "/"
-    - action: click
-      selector: "[data-testid='theme-toggle']"
-    - action: screenshot
-      name: "dark-mode"
-    - action: click
-      selector: "[data-testid='theme-toggle']"
-    - action: screenshot
-      name: "light-mode"
-# Modal open/close
-- name: "Settings modal opens"
-  steps:
-    - action: navigate
-      url: "/dashboard"
-    - action: click
-      selector: "[aria-label='Settings']"
-    - action: wait_for
-      selector: "[role='dialog']"
-    - action: screenshot
-      name: "settings-modal"
-# Form submission
-- name: "Contact form submits"
-  steps:
-    - action: navigate
-      url: "/contact"
-    - action: type
-      selector: "input[name='email']"
-      value: "test@example.com"
-    - action: click
-      selector: "button[type='submit']"
-    - action: wait_for
-      selector: ".success-message"
-\`\`\`
+---
-### Step 4: Handle Data-Dependent Pages
+## Step 5: Commit
-**How to identify pages that need fixtures:**
 \`\`\`bash
-# Find components that fetch data
-grep -r "useQuery\|useSWR\|fetch(\|axios\|useEffect.*fetch" src/ --include="*.tsx"
+git add src/
+git commit -m "Add accessibility attributes for verification
-# Find API route parameters (these pages need data)
-grep -r "useParams\|router.query\|\[.*\]" src/pages/ src/app/ --include="*.tsx"
+- Added aria-labels to interactive elements
+- Added data-testid to major sections
+- Added ARIA roles for semantic structure
+- Added state indicators (aria-expanded, etc.)"
 \`\`\`
-If a page has \`:id\`, \`:slug\`, or fetches from \`/api/*\`, it needs fixtures.
-**Option A: Pull from staging (recommended for large/dynamic data)**
-\`\`\`yaml
-fixtures:
-  # Pull real data from staging API
-  - pattern: "/api/pr/*"
-    source: "https://staging.example.com/api/pr/sample"
-    headers:
-      Authorization: "Bearer $STAGING_TOKEN"
+---
-  # Or from S3 bucket
-  - pattern: "/api/analytics/*"
-    source: "s3://my-fixtures-bucket/analytics-sample.json"
-\`\`\`
+## Quick Reference: Common Patterns
-**Option B: Commit small fixture files**
-For small, stable data only:
-\`\`\`bash
-mkdir -p fixtures
-cat > fixtures/user.json << 'EOF'
-{"id": 1, "name": "Test User", "email": "test@example.com"}
-EOF
+### Icon Button
+\`\`\`tsx
+<button onClick={onAction} aria-label="Descriptive action name">
+  <Icon />
+</button>
 \`\`\`
-\`\`\`yaml
-fixtures:
-  - pattern: "/api/user"
-    source: "file://fixtures/user.json"
+### Modal
+\`\`\`tsx
+<div role="dialog" aria-modal="true" aria-labelledby="modal-title" data-testid="settings-modal">
+  <h2 id="modal-title">Settings</h2>
+  <button onClick={onClose} aria-label="Close settings">×</button>
+</div>
 \`\`\`
-**When to use each:**
-| Data Type | Use |
-|-----------|-----|
-| User profiles, settings | Local file (small, stable) |
-| PR data, analytics, lists | Staging API or S3 (large, dynamic) |
-| Auth tokens, sessions | Passthrough or mock inline |
-**Option B: Use demo/example routes**
-\`\`\`bash
-# Look for demo or example routes in the router
-grep -r "demo\|example\|sample" src/ --include="*.tsx"
+### Navigation
+\`\`\`tsx
+<nav aria-label="Main navigation" data-testid="main-nav">
+  <a href="/dashboard" aria-current={isActive ? "page" : undefined}>Dashboard</a>
+</nav>
 \`\`\`
-**Option C: Use real test data**
-If the app has seeded test data, use those identifiers:
-\`\`\`yaml
-- name: "PR review page loads"
-  steps:
-    - action: navigate
-      url: "/review/test-org/test-repo/1"  # Known test PR
-    - action: wait_for
-      selector: "[data-testid='pr-diff']"
+### Toggle
+\`\`\`tsx
+<button
+  onClick={toggle}
+  aria-pressed={isOn}
+  aria-label={\`\${isOn ? 'Disable' : 'Enable'} notifications\`}
+>
+  {isOn ? 'On' : 'Off'}
+</button>
 \`\`\`
-### Step 5: Add Backend API Flows (if applicable)
-If the app has API endpoints, test them directly:
-\`\`\`bash
-# Find API routes
-ls src/api/ app/api/ pages/api/ 2>/dev/null
-grep -r "app.get\|app.post\|router.get" --include="*.ts"
+### Loading State
+\`\`\`tsx
+<div data-testid="content-area" aria-busy={isLoading}>
+  {isLoading ? (
+    <div role="status" aria-label="Loading content">
+      <Spinner />
+    </div>
+  ) : (
+    content
+  )}
+</div>
 \`\`\`
-**Example API flows:**
-\`\`\`yaml
-flows:
-  - name: "API health check"
-    description: "Verify API server responds"
-    trigger: always
-    steps:
-      - action: http_request
-        method: GET
-        url: "http://localhost:3001/health"
-      - action: assert_status
-        status: 200
-  - name: "API returns valid data"
-    trigger: on_change
-    watch_patterns:
-      - "src/api/**"
-    steps:
-      - action: http_request
-        method: GET
-        url: "http://localhost:3001/api/users"
-      - action: assert_status
-        status: 200
+### Form Field
+\`\`\`tsx
+<div data-testid="email-field">
+  <label htmlFor="email">Email</label>
+  <input
+    id="email"
+    type="email"
+    aria-describedby={error ? "email-error" : "email-hint"}
+    aria-invalid={!!error}
+  />
+  <span id="email-hint">We'll never share your email</span>
+  {error && <span id="email-error" role="alert">{error}</span>}
+</div>
 \`\`\`
-### Step 6: Verify Your Flows
-After adding flows, validate the config:
-\`\`\`bash
-# Check YAML syntax
-npx @haystackeditor/cli validate
-# Or manually check
-cat .haystack.yml | head -50
+### Expandable Section
+\`\`\`tsx
+<div data-testid="faq-section">
+  <button
+    onClick={() => setExpanded(!expanded)}
+    aria-expanded={expanded}
+    aria-controls="faq-content"
+  >
+    FAQ
+  </button>
+  <div id="faq-content" hidden={!expanded}>
+    {content}
+  </div>
+</div>
 \`\`\`
+`;
+const PREPARE_HAYSTACK_COMMAND = `# Prepare Codebase for Verification
-### ✅ Final Checklist (ALL required):
-Before you finish, verify:
-1. [ ] **Page flows**: Every route has a flow
-2. [ ] **Specific selectors**: All use \`[data-testid='x']\` or \`.class-name\`, NOT \`#root\`/\`div\`/\`h1\`
-3. [ ] **Interactive flows**: At least 3 flows with \`click\` or \`type\` actions
-4. [ ] **Fixtures configured**: Data-dependent pages have fixtures (staging URL preferred, or local JSON)
-5. [ ] **Backend API flows**: API endpoints have \`http_request\` flows (if app has backend)
-6. [ ] **Watch patterns**: Each \`watch_patterns\` matches component file paths
+Follow .agents/skills/prepare-haystack.md to add accessibility attributes that make verification easier.
-⚠️ **If you have 0 interactive flows or 0 fixtures for data pages, you are not done.**
+Run this BEFORE /setup-haystack to ensure your codebase has good selectors.
 `;
 export async function createSkillFile() {
     const skillDir = path.join(process.cwd(), '.agents', 'skills');
-    const skillPath = path.join(skillDir, 'haystack.md');
+    const setupPath = path.join(skillDir, 'setup-haystack.md');
+    const refPath = path.join(skillDir, 'haystack-reference.md');
+    const prepPath = path.join(skillDir, 'prepare-haystack.md');
     // Create directory if needed
     await fs.mkdir(skillDir, { recursive: true });
-    // Write skill file
-    await fs.writeFile(skillPath, SKILL_CONTENT, 'utf-8');
-    return skillPath;
+    // Write all skill files
+    await fs.writeFile(setupPath, SKILL_CONTENT, 'utf-8');
+    await fs.writeFile(refPath, REFERENCE_CONTENT, 'utf-8');
+    await fs.writeFile(prepPath, PREPARE_VERIFICATION_CONTENT, 'utf-8');
+    return setupPath;
 }
 /**
- * Create the .claude/commands/haystack.md file for Claude Code slash command
- * Users can invoke with /haystack to start the setup wizard
+ * Create the .claude/commands/ files for Claude Code slash commands
+ * Users can invoke with /setup-haystack or /prepare-haystack
  */
 export async function createClaudeCommand() {
     const commandDir = path.join(process.cwd(), '.claude', 'commands');
-    const commandPath = path.join(commandDir, 'haystack.md');
+    const setupPath = path.join(commandDir, 'setup-haystack.md');
+    const prepPath = path.join(commandDir, 'prepare-haystack.md');
     // Create directory if needed
     await fs.mkdir(commandDir, { recursive: true });
-    // Write command file
-    await fs.writeFile(commandPath, CLAUDE_COMMAND_CONTENT, 'utf-8');
-    return commandPath;
+    // Write command files
+    await fs.writeFile(setupPath, CLAUDE_COMMAND_CONTENT, 'utf-8');
+    await fs.writeFile(prepPath, PREPARE_HAYSTACK_COMMAND, 'utf-8');
+    return setupPath;
 }