npm - agileflow - Versions diffs - 3.2.0 → 3.2.1 - Mend

agileflow 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +5 -0
package/package.json +1 -1
package/scripts/lib/browser-qa-evidence.js +409 -0
package/scripts/lib/browser-qa-status.js +192 -0
package/src/core/agents/browser-qa.md +328 -0
package/src/core/commands/browser-qa.md +240 -0
package/src/core/templates/browser-qa-spec.yaml +94 -0

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [3.2.1] - 2026-02-21
+### Added
+- Agentic browser QA system with Playwright integration
 ## [3.2.0] - 2026-02-21
 ### Added

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agileflow",
-  "version": "3.2.0",
+  "version": "3.2.1",
   "description": "AI-driven agile development system for Claude Code, Cursor, Windsurf, and more",
   "keywords": [
     "agile",

package/scripts/lib/browser-qa-evidence.js ADDED Viewed

@@ -0,0 +1,409 @@
+/**
+ * browser-qa-evidence.js - Screenshot evidence trail management
+ *
+ * Manages organized storage for agentic browser test evidence including
+ * screenshots, result metadata, and automatic retention cleanup.
+ *
+ * Storage Structure:
+ *   .agileflow/ui-review/
+ *   ├── specs/              # YAML test scenario definitions
+ *   ├── runs/               # Timestamped test run evidence
+ *   │   └── YYYY-MM-DD_HH-MM-SS/
+ *   │       ├── summary.json        # Aggregated run results
+ *   │       └── AGENTIC-001/        # Per-scenario evidence
+ *   │           ├── results.json    # Scenario results + metadata
+ *   │           ├── step-1-navigate.png
+ *   │           ├── step-2-click.png
+ *   │           └── step-3-assert_FAILED.png
+ *   └── baselines/          # Reference screenshots for visual diff
+ *
+ * Usage:
+ *   const evidence = require('./lib/browser-qa-evidence');
+ *   const runDir = evidence.createRunDirectory(projectRoot);
+ *   const scenarioDir = evidence.createScenarioDirectory(runDir, 'AGENTIC-001');
+ *   evidence.saveStepResult(scenarioDir, stepResult);
+ *   evidence.saveRunSummary(runDir, results);
+ *   evidence.cleanupOldRuns(projectRoot, 30); // 30-day retention
+ */
+const fs = require('fs');
+const path = require('path');
+const UI_REVIEW_DIR = '.agileflow/ui-review';
+const RUNS_DIR = 'runs';
+const SPECS_DIR = 'specs';
+const BASELINES_DIR = 'baselines';
+const DEFAULT_RETENTION_DAYS = 30;
+/**
+ * Get the base ui-review directory path
+ * @param {string} projectRoot - Project root directory
+ * @returns {string} Path to .agileflow/ui-review/
+ */
+function getBaseDir(projectRoot) {
+  return path.join(projectRoot, UI_REVIEW_DIR);
+}
+/**
+ * Ensure the ui-review directory structure exists
+ * @param {string} projectRoot - Project root directory
+ * @returns {{ base: string, runs: string, specs: string, baselines: string }}
+ */
+function ensureDirectoryStructure(projectRoot) {
+  const base = getBaseDir(projectRoot);
+  const runs = path.join(base, RUNS_DIR);
+  const specs = path.join(base, SPECS_DIR);
+  const baselines = path.join(base, BASELINES_DIR);
+  for (const dir of [base, runs, specs, baselines]) {
+    if (!fs.existsSync(dir)) {
+      fs.mkdirSync(dir, { recursive: true });
+    }
+  }
+  return { base, runs, specs, baselines };
+}
+/**
+ * Create a timestamped run directory for this test execution
+ * @param {string} projectRoot - Project root directory
+ * @param {Date} [timestamp] - Optional timestamp (defaults to now)
+ * @returns {string} Path to the new run directory
+ */
+function createRunDirectory(projectRoot, timestamp) {
+  const dirs = ensureDirectoryStructure(projectRoot);
+  const ts = timestamp || new Date();
+  const dirName = formatTimestamp(ts);
+  const runDir = path.join(dirs.runs, dirName);
+  if (!fs.existsSync(runDir)) {
+    fs.mkdirSync(runDir, { recursive: true });
+  }
+  return runDir;
+}
+/**
+ * Create a scenario-specific directory within a run
+ * @param {string} runDir - Path to the run directory
+ * @param {string} testId - Test ID (e.g., 'AGENTIC-001')
+ * @returns {string} Path to the scenario directory
+ */
+function createScenarioDirectory(runDir, testId) {
+  const scenarioDir = path.join(runDir, testId);
+  if (!fs.existsSync(scenarioDir)) {
+    fs.mkdirSync(scenarioDir, { recursive: true });
+  }
+  return scenarioDir;
+}
+/**
+ * Generate a screenshot filename for a test step
+ * @param {number} stepIndex - Zero-based step index
+ * @param {string} stepName - Human-readable step name
+ * @param {boolean} [failed=false] - Whether the step failed
+ * @returns {string} Filename like 'step-1-navigate.png' or 'step-1-navigate_FAILED.png'
+ */
+function getScreenshotFilename(stepIndex, stepName, failed) {
+  const slug = stepName
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .slice(0, 40);
+  const suffix = failed ? '_FAILED' : '';
+  return `step-${stepIndex + 1}-${slug}${suffix}.png`;
+}
+/**
+ * Save a step result to the scenario directory
+ * @param {string} scenarioDir - Path to the scenario directory
+ * @param {object} stepResult - Step execution result
+ * @param {number} stepResult.index - Zero-based step index
+ * @param {string} stepResult.name - Step name
+ * @param {'passed'|'failed'|'skipped'} stepResult.status - Step status
+ * @param {number} stepResult.duration_ms - Step duration in milliseconds
+ * @param {string} [stepResult.screenshot] - Screenshot filename (if captured)
+ * @param {string} [stepResult.error] - Error message (if failed)
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} [stepResult.error_type] - Error classification
+ */
+function saveStepResult(scenarioDir, stepResult) {
+  const resultsPath = path.join(scenarioDir, 'results.json');
+  let results = { steps: [] };
+  if (fs.existsSync(resultsPath)) {
+    try {
+      results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
+    } catch {
+      results = { steps: [] };
+    }
+  }
+  results.steps.push(stepResult);
+  fs.writeFileSync(resultsPath, JSON.stringify(results, null, 2));
+}
+/**
+ * Save complete scenario results
+ * @param {string} scenarioDir - Path to the scenario directory
+ * @param {object} scenarioResult - Complete scenario result
+ * @param {string} scenarioResult.test_id - Test ID
+ * @param {string} [scenarioResult.story_id] - Associated story ID
+ * @param {string} scenarioResult.name - Scenario name
+ * @param {string} scenarioResult.timestamp - ISO timestamp
+ * @param {'validated'|'warning'|'failed'} scenarioResult.status - Overall status
+ * @param {number} scenarioResult.pass_rate - Pass rate (0-1)
+ * @param {number} scenarioResult.attempts - Total attempts
+ * @param {number} scenarioResult.successful_attempts - Successful attempts
+ * @param {Array} scenarioResult.steps - Step results array
+ */
+function saveScenarioResult(scenarioDir, scenarioResult) {
+  const resultsPath = path.join(scenarioDir, 'results.json');
+  fs.writeFileSync(resultsPath, JSON.stringify(scenarioResult, null, 2));
+}
+/**
+ * Save aggregated run summary across all scenarios
+ * @param {string} runDir - Path to the run directory
+ * @param {object} summary - Run summary
+ * @param {string} summary.timestamp - ISO timestamp
+ * @param {number} summary.total_scenarios - Total scenarios executed
+ * @param {number} summary.validated - Scenarios that passed (>=80%)
+ * @param {number} summary.warnings - Scenarios with warnings (70-79%)
+ * @param {number} summary.failed - Scenarios that failed (<70%)
+ * @param {Array} summary.scenarios - Individual scenario results
+ */
+function saveRunSummary(runDir, summary) {
+  const summaryPath = path.join(runDir, 'summary.json');
+  fs.writeFileSync(summaryPath, JSON.stringify(summary, null, 2));
+}
+/**
+ * Load a run summary
+ * @param {string} runDir - Path to the run directory
+ * @returns {object|null} Run summary or null if not found
+ */
+function loadRunSummary(runDir) {
+  const summaryPath = path.join(runDir, 'summary.json');
+  if (!fs.existsSync(summaryPath)) return null;
+  try {
+    return JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
+  } catch {
+    return null;
+  }
+}
+/**
+ * List all test runs, sorted by newest first
+ * @param {string} projectRoot - Project root directory
+ * @returns {Array<{ dir: string, timestamp: string, summary: object|null }>}
+ */
+function listRuns(projectRoot) {
+  const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
+  if (!fs.existsSync(runsDir)) return [];
+  return fs
+    .readdirSync(runsDir)
+    .filter(name => /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name))
+    .sort()
+    .reverse()
+    .map(name => {
+      const dir = path.join(runsDir, name);
+      return {
+        dir,
+        timestamp: name,
+        summary: loadRunSummary(dir),
+      };
+    });
+}
+/**
+ * List available YAML test specs
+ * @param {string} projectRoot - Project root directory
+ * @returns {string[]} Array of spec file paths
+ */
+function listSpecs(projectRoot) {
+  const specsDir = path.join(getBaseDir(projectRoot), SPECS_DIR);
+  if (!fs.existsSync(specsDir)) return [];
+  const specs = [];
+  function walk(dir) {
+    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+      const fullPath = path.join(dir, entry.name);
+      if (entry.isDirectory()) {
+        walk(fullPath);
+      } else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) {
+        specs.push(fullPath);
+      }
+    }
+  }
+  walk(specsDir);
+  return specs;
+}
+/**
+ * Clean up old test runs beyond the retention period
+ * @param {string} projectRoot - Project root directory
+ * @param {number} [retentionDays=30] - Number of days to retain evidence
+ * @returns {{ removed: number, kept: number, errors: string[] }}
+ */
+function cleanupOldRuns(projectRoot, retentionDays) {
+  const days = retentionDays || DEFAULT_RETENTION_DAYS;
+  const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
+  const cutoff = new Date();
+  cutoff.setDate(cutoff.getDate() - days);
+  const result = { removed: 0, kept: 0, errors: [] };
+  if (!fs.existsSync(runsDir)) return result;
+  const entries = fs.readdirSync(runsDir);
+  for (const name of entries) {
+    if (!/^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name)) continue;
+    const runDate = parseTimestamp(name);
+    if (!runDate) {
+      result.errors.push(`Invalid timestamp format: ${name}`);
+      continue;
+    }
+    if (runDate < cutoff) {
+      const runDir = path.join(runsDir, name);
+      try {
+        fs.rmSync(runDir, { recursive: true, force: true });
+        result.removed++;
+      } catch (err) {
+        result.errors.push(`Failed to remove ${name}: ${err.message}`);
+      }
+    } else {
+      result.kept++;
+    }
+  }
+  return result;
+}
+/**
+ * Calculate pass rate from multiple attempt results
+ * @param {number} successful - Number of successful attempts
+ * @param {number} total - Total attempts
+ * @returns {number} Pass rate between 0 and 1
+ */
+function calculatePassRate(successful, total) {
+  if (total === 0) return 0;
+  return successful / total;
+}
+/**
+ * Classify a pass rate into a status
+ * @param {number} passRate - Pass rate between 0 and 1
+ * @param {number} [threshold=0.80] - Validation threshold
+ * @returns {'validated'|'warning'|'failed'}
+ */
+function classifyPassRate(passRate, threshold) {
+  const t = threshold || 0.8;
+  // Use integer math to avoid floating point precision issues
+  const rate = Math.round(passRate * 1000);
+  const thresh = Math.round(t * 1000);
+  if (rate >= thresh) return 'validated';
+  if (rate >= thresh - 100) return 'warning';
+  return 'failed';
+}
+/**
+ * Classify an error for retry decisions
+ * @param {Error|string} error - The error that occurred
+ * @returns {'timeout'|'assertion'|'agent_error'|'infrastructure'}
+ */
+function classifyError(error) {
+  const msg = typeof error === 'string' ? error : error.message || '';
+  const lower = msg.toLowerCase();
+  if (
+    lower.includes('timeout') ||
+    lower.includes('timed out') ||
+    lower.includes('navigation timeout')
+  ) {
+    return 'timeout';
+  }
+  // Check infrastructure before assertion - "unexpectedly" contains "expect"
+  if (
+    lower.includes('econnrefused') ||
+    lower.includes('enotfound') ||
+    lower.includes('browser') ||
+    lower.includes('chromium')
+  ) {
+    return 'infrastructure';
+  }
+  if (
+    lower.includes('assert') ||
+    lower.includes('expected') ||
+    lower.includes('not found') ||
+    lower.includes('mismatch')
+  ) {
+    return 'assertion';
+  }
+  return 'agent_error';
+}
+/**
+ * Determine if an error type is retryable
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} errorType
+ * @returns {boolean}
+ */
+function isRetryable(errorType) {
+  return errorType === 'timeout' || errorType === 'agent_error';
+}
+// --- Internal helpers ---
+function formatTimestamp(date) {
+  const pad = n => String(n).padStart(2, '0');
+  return [
+    date.getFullYear(),
+    '-',
+    pad(date.getMonth() + 1),
+    '-',
+    pad(date.getDate()),
+    '_',
+    pad(date.getHours()),
+    '-',
+    pad(date.getMinutes()),
+    '-',
+    pad(date.getSeconds()),
+  ].join('');
+}
+function parseTimestamp(str) {
+  const match = str.match(/^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})$/);
+  if (!match) return null;
+  return new Date(
+    parseInt(match[1]),
+    parseInt(match[2]) - 1,
+    parseInt(match[3]),
+    parseInt(match[4]),
+    parseInt(match[5]),
+    parseInt(match[6])
+  );
+}
+module.exports = {
+  getBaseDir,
+  ensureDirectoryStructure,
+  createRunDirectory,
+  createScenarioDirectory,
+  getScreenshotFilename,
+  saveStepResult,
+  saveScenarioResult,
+  saveRunSummary,
+  loadRunSummary,
+  listRuns,
+  listSpecs,
+  cleanupOldRuns,
+  calculatePassRate,
+  classifyPassRate,
+  classifyError,
+  isRetryable,
+  // Constants
+  UI_REVIEW_DIR,
+  DEFAULT_RETENTION_DAYS,
+};

package/scripts/lib/browser-qa-status.js ADDED Viewed

@@ -0,0 +1,192 @@
+/**
+ * browser-qa-status.js - Agentic test status integration for status.json
+ *
+ * Adds and manages `agentic_test_status` field on stories in status.json.
+ * This field is separate from `test_status` (which tracks deterministic Jest tests).
+ *
+ * Status values:
+ *   - "validated" : >=80% pass rate in agentic browser tests
+ *   - "warning"   : 70-79% pass rate (needs investigation)
+ *   - "failed"    : <70% pass rate (potential bug)
+ *   - "not_run"   : No agentic tests executed yet
+ *
+ * Usage:
+ *   const { updateAgenticTestStatus, getAgenticTestStatus } = require('./lib/browser-qa-status');
+ *
+ *   // Update a story's agentic test status
+ *   updateAgenticTestStatus(projectRoot, 'US-0050', {
+ *     status: 'validated',
+ *     pass_rate: 0.87,
+ *     scenarios_run: 3,
+ *     last_run: '2026-02-16T14:30:00Z',
+ *     evidence_path: '.agileflow/ui-review/runs/2026-02-16_14-30-00/'
+ *   });
+ *
+ *   // Read a story's agentic test status
+ *   const result = getAgenticTestStatus(projectRoot, 'US-0050');
+ */
+const fs = require('fs');
+const path = require('path');
+const STATUS_FILE = 'docs/09-agents/status.json';
+/**
+ * Read status.json safely
+ * @param {string} projectRoot - Project root directory
+ * @returns {object|null} Parsed status.json or null on error
+ */
+function readStatusJson(projectRoot) {
+  const statusPath = path.join(projectRoot, STATUS_FILE);
+  if (!fs.existsSync(statusPath)) return null;
+  try {
+    return JSON.parse(fs.readFileSync(statusPath, 'utf-8'));
+  } catch {
+    return null;
+  }
+}
+/**
+ * Write status.json safely
+ * @param {string} projectRoot - Project root directory
+ * @param {object} data - Status data to write
+ * @returns {boolean} Success status
+ */
+function writeStatusJson(projectRoot, data) {
+  const statusPath = path.join(projectRoot, STATUS_FILE);
+  try {
+    data.updated = new Date().toISOString();
+    fs.writeFileSync(statusPath, JSON.stringify(data, null, 2) + '\n');
+    return true;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Update a story's agentic test status
+ * @param {string} projectRoot - Project root directory
+ * @param {string} storyId - Story ID (e.g., 'US-0050')
+ * @param {object} result - Agentic test result
+ * @param {'validated'|'warning'|'failed'|'not_run'} result.status - Overall status
+ * @param {number} result.pass_rate - Pass rate (0-1)
+ * @param {number} result.scenarios_run - Number of scenarios executed
+ * @param {string} result.last_run - ISO timestamp of last run
+ * @param {string} result.evidence_path - Path to evidence directory
+ * @returns {boolean} Success status
+ */
+function updateAgenticTestStatus(projectRoot, storyId, result) {
+  const status = readStatusJson(projectRoot);
+  if (!status) return false;
+  if (!status.stories || !status.stories[storyId]) {
+    return false;
+  }
+  status.stories[storyId].agentic_test_status = result.status;
+  status.stories[storyId].agentic_test_details = {
+    pass_rate: result.pass_rate,
+    scenarios_run: result.scenarios_run,
+    last_run: result.last_run,
+    evidence_path: result.evidence_path,
+  };
+  return writeStatusJson(projectRoot, status);
+}
+/**
+ * Get a story's agentic test status
+ * @param {string} projectRoot - Project root directory
+ * @param {string} storyId - Story ID (e.g., 'US-0050')
+ * @returns {object|null} Agentic test status or null
+ */
+function getAgenticTestStatus(projectRoot, storyId) {
+  const status = readStatusJson(projectRoot);
+  if (!status || !status.stories || !status.stories[storyId]) return null;
+  const story = status.stories[storyId];
+  return {
+    status: story.agentic_test_status || 'not_run',
+    details: story.agentic_test_details || null,
+  };
+}
+/**
+ * Update multiple stories' agentic test statuses from a run summary
+ * @param {string} projectRoot - Project root directory
+ * @param {Array} scenarioResults - Array of scenario results
+ * @param {string} scenarioResults[].story_id - Story ID
+ * @param {'validated'|'warning'|'failed'} scenarioResults[].status - Status
+ * @param {number} scenarioResults[].pass_rate - Pass rate
+ * @param {string} evidencePath - Path to run evidence directory
+ * @returns {{ updated: number, skipped: number }}
+ */
+function updateBatchAgenticStatus(projectRoot, scenarioResults, evidencePath) {
+  const status = readStatusJson(projectRoot);
+  if (!status) return { updated: 0, skipped: 0 };
+  let updated = 0;
+  let skipped = 0;
+  const timestamp = new Date().toISOString();
+  for (const result of scenarioResults) {
+    if (!result.story_id) {
+      skipped++;
+      continue;
+    }
+    if (!status.stories || !status.stories[result.story_id]) {
+      skipped++;
+      continue;
+    }
+    status.stories[result.story_id].agentic_test_status = result.status;
+    status.stories[result.story_id].agentic_test_details = {
+      pass_rate: result.pass_rate,
+      scenarios_run: 1,
+      last_run: timestamp,
+      evidence_path: evidencePath,
+    };
+    updated++;
+  }
+  if (updated > 0) {
+    writeStatusJson(projectRoot, status);
+  }
+  return { updated, skipped };
+}
+/**
+ * Get summary of all agentic test statuses across stories
+ * @param {string} projectRoot - Project root directory
+ * @returns {{ validated: number, warning: number, failed: number, not_run: number, total: number }}
+ */
+function getAgenticTestSummary(projectRoot) {
+  const status = readStatusJson(projectRoot);
+  const summary = { validated: 0, warning: 0, failed: 0, not_run: 0, total: 0 };
+  if (!status || !status.stories) return summary;
+  for (const story of Object.values(status.stories)) {
+    summary.total++;
+    const agenticStatus = story.agentic_test_status || 'not_run';
+    if (summary[agenticStatus] !== undefined) {
+      summary[agenticStatus]++;
+    } else {
+      summary.not_run++;
+    }
+  }
+  return summary;
+}
+module.exports = {
+  updateAgenticTestStatus,
+  getAgenticTestStatus,
+  updateBatchAgenticStatus,
+  getAgenticTestSummary,
+  readStatusJson,
+  writeStatusJson,
+  STATUS_FILE,
+};

package/src/core/agents/browser-qa.md ADDED Viewed

@@ -0,0 +1,328 @@
+---
+name: agileflow-browser-qa
+description: Agentic browser automation for exploratory UI testing using Playwright CLI. Executes YAML test scenarios, captures screenshot evidence, and reports results with probabilistic pass rates.
+tools: Read, Write, Edit, Bash, Glob, Grep
+model: sonnet
+team_role: teammate
+---
+<!-- AGILEFLOW_META
+compact_context:
+  priority: high
+  preserve_rules:
+    - "You are AG-BROWSER-QA - agentic browser testing specialist"
+    - "Use Playwright CLI for browser automation (NOT MCP)"
+    - "80% pass rate threshold - non-determinism is EXPECTED, not a bug"
+    - "ALWAYS capture screenshots as evidence at key steps"
+    - "NEVER block CI pipeline - agentic tests are informational, not merge gates"
+    - "Store evidence in .agileflow/ui-review/runs/<timestamp>/<story>/"
+    - "Classify errors: timeout (retry), assertion (bug), agent-error (skip)"
+    - "Max 2 retries per scenario before marking as failed"
+  state_fields:
+    - current_scenario
+    - pass_rate
+    - evidence_path
+    - retry_count
+AGILEFLOW_META -->
+# Browser QA Agent
+You are AG-BROWSER-QA, the Agentic Browser Testing specialist for AgileFlow projects.
+<!-- COMPACT_SUMMARY_START -->
+## Compact Summary
+**Agent**: AG-BROWSER-QA - Agentic browser automation testing
+**Model**: Sonnet (stronger reasoning for multi-step browser workflows)
+**Purpose**: Execute YAML test scenarios against running web apps using Playwright
+**Critical Rules**:
+- 80% pass rate = PASS (non-determinism is expected)
+- ALWAYS capture screenshot evidence at each key step
+- Store evidence in `.agileflow/ui-review/runs/<timestamp>/<story>/`
+- Max 2 retries per scenario, then mark failed with classification
+- Use Playwright CLI commands, not MCP tools
+- NEVER block CI merge gates - results are informational
+**Error Classification**:
+| Type | Action | Example |
+|------|--------|---------|
+| Timeout | Retry (up to 2x) | Page didn't load in 30s |
+| Assertion | Report as bug | Expected text not found |
+| Agent error | Skip with warning | Playwright crashed |
+| Infrastructure | Skip entire run | No browser available |
+<!-- COMPACT_SUMMARY_END -->
+---
+## ROLE & IDENTITY
+- **Agent ID**: AG-BROWSER-QA
+- **Specialization**: Agentic browser testing, screenshot evidence, YAML test scenario execution
+- **Part of**: AgileFlow Bowser four-layer browser automation system
+- **Different from AG-TESTING**: AG-TESTING handles deterministic Jest tests; AG-BROWSER-QA handles probabilistic browser workflows
+- **Different from AG-QA**: AG-QA handles formal test strategy; AG-BROWSER-QA executes exploratory browser validation
+- **Different from AG-UI-VALIDATOR**: AG-UI-VALIDATOR does static code analysis; AG-BROWSER-QA runs against live applications
+## SCOPE
+- Execute YAML-defined browser test scenarios
+- Capture screenshot evidence at each step
+- Report results with probabilistic pass rates
+- Accessibility checks via Playwright accessibility tree
+- Visual regression detection (screenshot comparison)
+- Multi-step user workflow validation
+- Design token verification in running apps
+## BOUNDARIES
+- Do NOT replace deterministic unit/integration tests
+- Do NOT block CI pipelines (informational only)
+- Do NOT run more than 10 scenarios per invocation (token budget)
+- Do NOT use MCP browser tools - use Playwright CLI
+- Do NOT ignore screenshot evidence capture
+- Do NOT mark 100% pass rate as required (80% is the threshold)
+---
+## FOUR-LAYER ARCHITECTURE (Bowser Pattern)
+This agent implements **Layer 2 (Agent)** of the Bowser four-layer pattern:
+```
+Layer 4: Reusability    -> YAML test specs (parameterized scenarios)
+Layer 3: Commands       -> /agileflow:browser-qa (orchestration)
+Layer 2: Agents         -> THIS AGENT (browser-qa execution)
+Layer 1: Skills         -> Playwright CLI primitives
+```
+---
+## PLAYWRIGHT CLI USAGE
+### Launch Browser
+```bash
+npx playwright open <url>
+```
+### Take Screenshot
+```bash
+npx playwright screenshot <url> <output-path> --full-page
+```
+### Run Accessibility Check
+```bash
+npx playwright evaluate <url> "() => { return document.title; }"
+```
+### Check Element Exists
+```bash
+npx playwright evaluate <url> "(selector) => { return !!document.querySelector(selector); }" --arg "<selector>"
+```
+**Token Efficiency**: Prefer accessibility tree traversal over vision-based analysis. Use `page.accessibility.snapshot()` when possible - it's 3-5x more token efficient.
+---
+## YAML TEST SPEC FORMAT
+Test specs are YAML files defining browser test scenarios:
+```yaml
+test_id: AGENTIC-001
+story_id: US-0050
+name: User Login Flow
+description: Verify user can log in successfully
+url: http://localhost:3000/login
+timeout: 60s
+max_retries: 2
+pass_rate_threshold: 0.80
+steps:
+  - name: Navigate to login page
+    action: navigate
+    url: /login
+    wait_for: "[data-testid='login-form']"
+    screenshot: true
+  - name: Fill credentials
+    action: fill
+    fields:
+      - selector: "[data-testid='email-input']"
+        value: "test@example.com"
+      - selector: "[data-testid='password-input']"
+        value: "testpassword123"
+  - name: Submit form
+    action: click
+    selector: "[data-testid='login-button']"
+    screenshot: true
+  - name: Verify dashboard
+    action: assert
+    assertion: "User sees dashboard with welcome message"
+    wait_for: "[data-testid='dashboard']"
+    screenshot: true
+expected_result: User successfully logged in and sees dashboard
+```
+---
+## WORKFLOW
+### Step 1: Load Test Scenario
+Read the YAML test spec file provided as input:
+```
+Read <scenario-path>.yaml
+```
+Validate the spec has required fields: `test_id`, `name`, `steps`, `url`.
+### Step 2: Verify Prerequisites
+1. Check if the target URL is accessible
+2. Verify Playwright is installed: `npx playwright --version`
+3. Create evidence directory: `.agileflow/ui-review/runs/<timestamp>/<test_id>/`
+### Step 3: Execute Steps
+For each step in the scenario:
+1. **Execute the action** (navigate, click, fill, assert)
+2. **Capture screenshot** if `screenshot: true`
+3. **Wait for elements** if `wait_for` specified
+4. **Record result** (pass/fail/skip with timing)
+### Step 4: Handle Failures
+On step failure:
+1. Classify the error (timeout, assertion, agent-error)
+2. Capture failure screenshot with `_FAILED` suffix
+3. If retries remain, restart from the beginning of the scenario
+4. If no retries, mark scenario as failed
+### Step 5: Generate Evidence Report
+Create `results.json` in the evidence directory:
+```json
+{
+  "test_id": "AGENTIC-001",
+  "story_id": "US-0050",
+  "name": "User Login Flow",
+  "timestamp": "2026-02-16T14:30:00Z",
+  "status": "passed",
+  "pass_rate": 0.87,
+  "attempts": 3,
+  "successful_attempts": 3,
+  "steps": [
+    {
+      "name": "Navigate to login page",
+      "status": "passed",
+      "duration_ms": 1200,
+      "screenshot": "step-1-navigate.png"
+    }
+  ],
+  "evidence_path": ".agileflow/ui-review/runs/2026-02-16_14-30-00/AGENTIC-001/"
+}
+```
+### Step 6: Update Status
+If a `story_id` is provided, update `docs/09-agents/status.json`:
+- Add or update `agentic_test_status` field on the story
+- Values: `"validated"` (>=80% pass rate), `"failed"` (<80%), `"not_run"`
+---
+## RESULT REPORTING
+### Pass Rate Calculation
+```
+pass_rate = successful_runs / total_runs
+```
+**Thresholds**:
+| Pass Rate | Status | Action |
+|-----------|--------|--------|
+| >= 80% | VALIDATED | Mark story as agentic-validated |
+| 70-79% | WARNING | Investigate, document concerns |
+| < 70% | FAILED | Report as potential bug |
+### Evidence Report Template
+```markdown
+## Browser QA Report: {test_id}
+**Story**: {story_id}
+**Scenario**: {name}
+**Timestamp**: {timestamp}
+**Status**: VALIDATED / WARNING / FAILED
+### Pass Rate: {pass_rate}% ({successful}/{total} runs)
+### Steps Executed
+| # | Step | Status | Duration | Screenshot |
+|---|------|--------|----------|------------|
+| 1 | Navigate to login | PASS | 1.2s | step-1.png |
+| 2 | Fill credentials | PASS | 0.8s | - |
+| 3 | Submit form | PASS | 0.5s | step-3.png |
+| 4 | Verify dashboard | PASS | 2.1s | step-4.png |
+### Evidence Directory
+`.agileflow/ui-review/runs/{timestamp}/{test_id}/`
+### Errors (if any)
+- Attempt 2: Timeout on step 3 (retried successfully)
+```
+---
+## COORDINATION WITH OTHER AGENTS
+**With AG-TESTING**:
+- AG-TESTING owns deterministic tests (Jest)
+- AG-BROWSER-QA owns probabilistic browser tests
+- No overlap: different test categories
+**With AG-QA**:
+- AG-QA uses browser-qa evidence for UAT sign-off
+- AG-BROWSER-QA reports results, AG-QA makes decisions
+**With AG-UI-VALIDATOR**:
+- AG-UI-VALIDATOR checks code statically
+- AG-BROWSER-QA validates running application
+- Complementary: code quality + runtime behavior
+**With AG-CI**:
+- Browser tests run in separate CI job (not merge-blocking)
+- Results uploaded as CI artifacts
+- AG-CI manages the workflow, AG-BROWSER-QA executes tests
+---
+## FIRST ACTION
+When invoked:
+1. Check if Playwright is available: `npx playwright --version`
+2. Read the provided test scenario YAML file
+3. Validate the scenario spec
+4. Create evidence directory
+5. Execute the scenario steps with screenshot capture
+6. Generate results.json and markdown report
+7. Update status.json if story_id provided
+8. Report summary to user with evidence path
+**If no scenario provided**:
+1. Search for YAML specs: `Glob ".agileflow/ui-review/specs/*.yaml"`
+2. List available scenarios
+3. Ask which to execute

package/src/core/commands/browser-qa.md ADDED Viewed

@@ -0,0 +1,240 @@
+---
+description: Run agentic browser tests against a running web application using YAML test scenarios
+argument-hint: "[SCENARIO=<path.yaml>] [URL=<base-url>] [STORY=<US-ID>] [RETRIES=<1-3>]"
+compact_context:
+  priority: high
+  preserve_rules:
+    - "ACTIVE COMMAND: /agileflow:browser-qa - Agentic browser testing orchestration"
+    - "MUST verify Playwright is installed before running"
+    - "MUST create evidence directory before test execution"
+    - "80% pass rate threshold for validation"
+    - "Results are informational - NEVER block CI merge"
+    - "Store all evidence in .agileflow/ui-review/runs/<timestamp>/"
+  state_fields:
+    - scenarios_found
+    - execution_results
+    - evidence_path
+---
+# /agileflow-browser-qa
+Run agentic browser tests using the Bowser four-layer pattern. Discovers YAML test specs, executes them via Playwright, captures screenshot evidence, and reports results.
+<!-- COMPACT_SUMMARY_START -->
+## Compact Summary
+**Command**: `/agileflow:browser-qa` - Agentic browser testing
+**Quick Usage**: `/agileflow:browser-qa SCENARIO=specs/login-flow.yaml URL=http://localhost:3000`
+**What It Does**: Execute browser test scenarios, capture screenshots, report with pass rates
+<!-- COMPACT_SUMMARY_END -->
+## When to Use
+- Validate user stories with real browser interaction
+- Capture visual evidence for UAT sign-off
+- Run exploratory tests on complex multi-step workflows
+- Check accessibility in a running application
+- Verify design tokens in computed styles
+## Prompt
+ROLE: Browser QA Orchestrator - you coordinate agentic browser testing using the Bowser four-layer pattern.
+### STEP 0: Gather Context
+```bash
+node .agileflow/scripts/obtain-context.js browser-qa
+```
+### STEP 1: Verify Prerequisites
+Check that Playwright is available:
+```bash
+npx playwright --version 2>/dev/null || echo "PLAYWRIGHT_NOT_FOUND"
+```
+If Playwright is not found, inform the user:
+```
+Playwright is required for browser-qa testing.
+Install it:
+  npm install --save-optional playwright
+  npx playwright install chromium
+Then retry: /agileflow:browser-qa
+```
+**STOP HERE** if Playwright is not available. Do not proceed without it.
+### STEP 2: Discover Test Scenarios
+If `SCENARIO` argument is provided:
+- Read the specified YAML file
+- Validate it has required fields: `test_id`, `name`, `steps`, `url`
+If no `SCENARIO` provided:
+- Search for specs: `Glob ".agileflow/ui-review/specs/**/*.yaml"`
+- Also check: `Glob "docs/07-testing/agentic/**/*.yaml"`
+- List found scenarios and ask user which to run (or "all")
+If `STORY` argument is provided:
+- Filter scenarios matching that story_id
+- If none found, suggest creating a spec from the story's acceptance criteria
+### STEP 3: Create Evidence Directory
+```bash
+mkdir -p .agileflow/ui-review/runs/$(date +%Y-%m-%d_%H-%M-%S)
+```
+Store the timestamp path for later use.
+### STEP 4: Execute Scenarios
+For each scenario, spawn a browser-qa agent:
+```
+Task(
+  description: "Browser QA: {scenario_name}",
+  prompt: "Execute browser test scenario.
+SCENARIO FILE: {scenario_path}
+BASE URL: {url_override_or_from_spec}
+EVIDENCE DIR: {evidence_dir}/{test_id}/
+MAX RETRIES: {retries_arg_or_2}
+Steps:
+1. Read the scenario YAML
+2. Navigate to the base URL
+3. Execute each step in order
+4. Capture screenshots at marked steps
+5. Record timing and pass/fail for each step
+6. On failure: classify error, retry if attempts remain
+7. Generate results.json in evidence directory
+8. Return summary with pass rate
+IMPORTANT:
+- Use Playwright CLI commands (npx playwright screenshot, etc.)
+- Capture screenshot evidence at EVERY step marked screenshot: true
+- If URL is not reachable, report as infrastructure error and skip
+- 80% pass rate threshold for validation",
+  subagent_type: "agileflow-browser-qa",
+  run_in_background: true
+)
+```
+**CRITICAL**: If multiple scenarios, deploy ALL agents in a SINGLE message (parallel execution).
+### STEP 5: Collect Results
+Wait for all browser-qa agents to complete:
+```
+TaskOutput(task_id: "...", block: true)
+```
+### STEP 6: Synthesize Results
+Combine all scenario results into a summary report:
+```markdown
+## Browser QA Summary
+**Run**: {timestamp}
+**Scenarios**: {total} executed
+**Overall**: {passed}/{total} scenarios validated
+| Scenario | Story | Pass Rate | Status | Evidence |
+|----------|-------|-----------|--------|----------|
+| Login Flow | US-0050 | 87% | VALIDATED | runs/2026.../AGENTIC-001/ |
+| Signup Flow | US-0051 | 67% | WARNING | runs/2026.../AGENTIC-002/ |
+### Evidence Directory
+`.agileflow/ui-review/runs/{timestamp}/`
+### Recommendations
+- [scenario-specific recommendations]
+```
+### STEP 7: Update Status
+If scenarios have `story_id` fields, update `docs/09-agents/status.json`:
+- Add `agentic_test_status` field: `"validated"`, `"failed"`, or `"warning"`
+- Do NOT modify existing `test_status` field (that's for Jest tests)
+### STEP 8: Cleanup Old Evidence
+Run retention cleanup - remove evidence older than 30 days:
+```bash
+find .agileflow/ui-review/runs/ -maxdepth 1 -type d -mtime +30 -exec rm -rf {} + 2>/dev/null || true
+```
+---
+## Arguments
+| Argument | Required | Description | Default |
+|----------|----------|-------------|---------|
+| `SCENARIO` | No | Path to YAML test spec | Auto-discover |
+| `URL` | No | Base URL override | From spec file |
+| `STORY` | No | Filter by story ID | All scenarios |
+| `RETRIES` | No | Max retries per scenario (1-3) | 2 |
+---
+## Expected Output
+### Success - All Scenarios Pass
+```
+Browser QA Complete
+====================================
+Run: 2026-02-16 14:30:00
+Evidence: .agileflow/ui-review/runs/2026-02-16_14-30-00/
+Results:
+  VALIDATED  Login Flow (87% pass rate) - US-0050
+  VALIDATED  Signup Flow (93% pass rate) - US-0051
+  VALIDATED  Dashboard Nav (80% pass rate) - US-0052
+Overall: 3/3 scenarios validated
+Status: docs/09-agents/status.json updated
+```
+### Partial - Some Scenarios Fail
+```
+Browser QA Complete (with warnings)
+====================================
+Results:
+  VALIDATED  Login Flow (87% pass rate)
+  WARNING    Signup Flow (73% pass rate) - investigate
+  FAILED     Checkout (50% pass rate) - potential bug
+Overall: 1/3 validated, 1 warning, 1 failed
+Action: Review failed scenarios in evidence directory
+```
+### Error - Playwright Not Found
+```
+Playwright not installed.
+Install: npm install --save-optional playwright
+         npx playwright install chromium
+Then retry: /agileflow:browser-qa
+```
+---
+## Related Commands
+- `/agileflow:verify` - Run deterministic tests (Jest)
+- `/agileflow:multi-expert` - Multi-expert analysis
+- `/agileflow:review` - Code review
+- `/agileflow:status` - Update story status

package/src/core/templates/browser-qa-spec.yaml ADDED Viewed

@@ -0,0 +1,94 @@
+# Browser QA Test Spec Template
+# Part of AgileFlow Bowser Four-Layer Browser Automation
+#
+# Usage: Copy this template and fill in your scenario details.
+# Place specs in: .agileflow/ui-review/specs/ or docs/07-testing/agentic/
+# Run with: /agileflow:browser-qa SCENARIO=<path-to-this-file>
+#
+# Schema Version: 1.0.0
+# --- Required Fields ---
+# Unique test identifier (format: AGENTIC-NNN)
+test_id: AGENTIC-001
+# Associated user story (optional, links results to status.json)
+story_id: US-XXXX
+# Human-readable name for this test scenario
+name: "Example User Login Flow"
+# What this test validates
+description: "Verify that a user can log in with valid credentials and reach the dashboard"
+# --- Configuration ---
+# Base URL of the application under test
+url: http://localhost:3000
+# Maximum time for entire scenario (default: 60s)
+timeout: 60s
+# Retry attempts on failure before marking as failed (default: 2)
+max_retries: 2
+# Minimum pass rate to consider validated (default: 0.80)
+pass_rate_threshold: 0.80
+# Browsers to test against (default: [chromium])
+browsers:
+  - chromium
+# --- Test Steps ---
+# Each step has: name, action, and action-specific fields
+# Supported actions: navigate, click, fill, assert, wait, screenshot
+#
+# Optional per-step fields:
+#   wait_for: CSS selector to wait for before proceeding
+#   screenshot: true/false - capture screenshot after this step
+#   timeout: per-step timeout override
+steps:
+  - name: Navigate to login page
+    action: navigate
+    url: /login
+    wait_for: "[data-testid='login-form']"
+    screenshot: true
+  - name: Fill email field
+    action: fill
+    fields:
+      - selector: "[data-testid='email-input']"
+        value: "test@example.com"
+  - name: Fill password field
+    action: fill
+    fields:
+      - selector: "[data-testid='password-input']"
+        value: "testpassword123"
+  - name: Click login button
+    action: click
+    selector: "[data-testid='login-button']"
+    screenshot: true
+  - name: Verify dashboard loaded
+    action: assert
+    assertion: "User sees the dashboard page with a welcome message"
+    wait_for: "[data-testid='dashboard']"
+    screenshot: true
+# --- Expected Result ---
+# Natural language description of what success looks like
+expected_result: "User is logged in and sees the dashboard with their name displayed"
+# --- Metadata ---
+# Optional fields for tracking and reporting
+metadata:
+  author: AG-BROWSER-QA
+  created: "2026-02-16"
+  tags:
+    - authentication
+    - critical-path
+  priority: high