agileflow 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [3.2.1] - 2026-02-21
11
+
12
+ ### Added
13
+ - Agentic browser QA system with Playwright integration
14
+
10
15
  ## [3.2.0] - 2026-02-21
11
16
 
12
17
  ### Added
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agileflow",
3
- "version": "3.2.0",
3
+ "version": "3.2.1",
4
4
  "description": "AI-driven agile development system for Claude Code, Cursor, Windsurf, and more",
5
5
  "keywords": [
6
6
  "agile",
@@ -0,0 +1,409 @@
1
+ /**
2
+ * browser-qa-evidence.js - Screenshot evidence trail management
3
+ *
4
+ * Manages organized storage for agentic browser test evidence including
5
+ * screenshots, result metadata, and automatic retention cleanup.
6
+ *
7
+ * Storage Structure:
8
+ * .agileflow/ui-review/
9
+ * ├── specs/ # YAML test scenario definitions
10
+ * ├── runs/ # Timestamped test run evidence
11
+ * │ └── YYYY-MM-DD_HH-MM-SS/
12
+ * │ ├── summary.json # Aggregated run results
13
+ * │ └── AGENTIC-001/ # Per-scenario evidence
14
+ * │ ├── results.json # Scenario results + metadata
15
+ * │ ├── step-1-navigate.png
16
+ * │ ├── step-2-click.png
17
+ * │ └── step-3-assert_FAILED.png
18
+ * └── baselines/ # Reference screenshots for visual diff
19
+ *
20
+ * Usage:
21
+ * const evidence = require('./lib/browser-qa-evidence');
22
+ * const runDir = evidence.createRunDirectory(projectRoot);
23
+ * const scenarioDir = evidence.createScenarioDirectory(runDir, 'AGENTIC-001');
24
+ * evidence.saveStepResult(scenarioDir, stepResult);
25
+ * evidence.saveRunSummary(runDir, results);
26
+ * evidence.cleanupOldRuns(projectRoot, 30); // 30-day retention
27
+ */
28
+
29
+ const fs = require('fs');
30
+ const path = require('path');
31
+
32
+ const UI_REVIEW_DIR = '.agileflow/ui-review';
33
+ const RUNS_DIR = 'runs';
34
+ const SPECS_DIR = 'specs';
35
+ const BASELINES_DIR = 'baselines';
36
+ const DEFAULT_RETENTION_DAYS = 30;
37
+
38
+ /**
39
+ * Get the base ui-review directory path
40
+ * @param {string} projectRoot - Project root directory
41
+ * @returns {string} Path to .agileflow/ui-review/
42
+ */
43
+ function getBaseDir(projectRoot) {
44
+ return path.join(projectRoot, UI_REVIEW_DIR);
45
+ }
46
+
47
+ /**
48
+ * Ensure the ui-review directory structure exists
49
+ * @param {string} projectRoot - Project root directory
50
+ * @returns {{ base: string, runs: string, specs: string, baselines: string }}
51
+ */
52
+ function ensureDirectoryStructure(projectRoot) {
53
+ const base = getBaseDir(projectRoot);
54
+ const runs = path.join(base, RUNS_DIR);
55
+ const specs = path.join(base, SPECS_DIR);
56
+ const baselines = path.join(base, BASELINES_DIR);
57
+
58
+ for (const dir of [base, runs, specs, baselines]) {
59
+ if (!fs.existsSync(dir)) {
60
+ fs.mkdirSync(dir, { recursive: true });
61
+ }
62
+ }
63
+
64
+ return { base, runs, specs, baselines };
65
+ }
66
+
67
+ /**
68
+ * Create a timestamped run directory for this test execution
69
+ * @param {string} projectRoot - Project root directory
70
+ * @param {Date} [timestamp] - Optional timestamp (defaults to now)
71
+ * @returns {string} Path to the new run directory
72
+ */
73
+ function createRunDirectory(projectRoot, timestamp) {
74
+ const dirs = ensureDirectoryStructure(projectRoot);
75
+ const ts = timestamp || new Date();
76
+ const dirName = formatTimestamp(ts);
77
+ const runDir = path.join(dirs.runs, dirName);
78
+
79
+ if (!fs.existsSync(runDir)) {
80
+ fs.mkdirSync(runDir, { recursive: true });
81
+ }
82
+
83
+ return runDir;
84
+ }
85
+
86
+ /**
87
+ * Create a scenario-specific directory within a run
88
+ * @param {string} runDir - Path to the run directory
89
+ * @param {string} testId - Test ID (e.g., 'AGENTIC-001')
90
+ * @returns {string} Path to the scenario directory
91
+ */
92
+ function createScenarioDirectory(runDir, testId) {
93
+ const scenarioDir = path.join(runDir, testId);
94
+ if (!fs.existsSync(scenarioDir)) {
95
+ fs.mkdirSync(scenarioDir, { recursive: true });
96
+ }
97
+ return scenarioDir;
98
+ }
99
+
100
+ /**
101
+ * Generate a screenshot filename for a test step
102
+ * @param {number} stepIndex - Zero-based step index
103
+ * @param {string} stepName - Human-readable step name
104
+ * @param {boolean} [failed=false] - Whether the step failed
105
+ * @returns {string} Filename like 'step-1-navigate.png' or 'step-1-navigate_FAILED.png'
106
+ */
107
+ function getScreenshotFilename(stepIndex, stepName, failed) {
108
+ const slug = stepName
109
+ .toLowerCase()
110
+ .replace(/[^a-z0-9]+/g, '-')
111
+ .replace(/^-|-$/g, '')
112
+ .slice(0, 40);
113
+ const suffix = failed ? '_FAILED' : '';
114
+ return `step-${stepIndex + 1}-${slug}${suffix}.png`;
115
+ }
116
+
117
+ /**
118
+ * Save a step result to the scenario directory
119
+ * @param {string} scenarioDir - Path to the scenario directory
120
+ * @param {object} stepResult - Step execution result
121
+ * @param {number} stepResult.index - Zero-based step index
122
+ * @param {string} stepResult.name - Step name
123
+ * @param {'passed'|'failed'|'skipped'} stepResult.status - Step status
124
+ * @param {number} stepResult.duration_ms - Step duration in milliseconds
125
+ * @param {string} [stepResult.screenshot] - Screenshot filename (if captured)
126
+ * @param {string} [stepResult.error] - Error message (if failed)
127
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} [stepResult.error_type] - Error classification
128
+ */
129
+ function saveStepResult(scenarioDir, stepResult) {
130
+ const resultsPath = path.join(scenarioDir, 'results.json');
131
+ let results = { steps: [] };
132
+
133
+ if (fs.existsSync(resultsPath)) {
134
+ try {
135
+ results = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
136
+ } catch {
137
+ results = { steps: [] };
138
+ }
139
+ }
140
+
141
+ results.steps.push(stepResult);
142
+ fs.writeFileSync(resultsPath, JSON.stringify(results, null, 2));
143
+ }
144
+
145
+ /**
146
+ * Save complete scenario results
147
+ * @param {string} scenarioDir - Path to the scenario directory
148
+ * @param {object} scenarioResult - Complete scenario result
149
+ * @param {string} scenarioResult.test_id - Test ID
150
+ * @param {string} [scenarioResult.story_id] - Associated story ID
151
+ * @param {string} scenarioResult.name - Scenario name
152
+ * @param {string} scenarioResult.timestamp - ISO timestamp
153
+ * @param {'validated'|'warning'|'failed'} scenarioResult.status - Overall status
154
+ * @param {number} scenarioResult.pass_rate - Pass rate (0-1)
155
+ * @param {number} scenarioResult.attempts - Total attempts
156
+ * @param {number} scenarioResult.successful_attempts - Successful attempts
157
+ * @param {Array} scenarioResult.steps - Step results array
158
+ */
159
+ function saveScenarioResult(scenarioDir, scenarioResult) {
160
+ const resultsPath = path.join(scenarioDir, 'results.json');
161
+ fs.writeFileSync(resultsPath, JSON.stringify(scenarioResult, null, 2));
162
+ }
163
+
164
+ /**
165
+ * Save aggregated run summary across all scenarios
166
+ * @param {string} runDir - Path to the run directory
167
+ * @param {object} summary - Run summary
168
+ * @param {string} summary.timestamp - ISO timestamp
169
+ * @param {number} summary.total_scenarios - Total scenarios executed
170
+ * @param {number} summary.validated - Scenarios that passed (>=80%)
171
+ * @param {number} summary.warnings - Scenarios with warnings (70-79%)
172
+ * @param {number} summary.failed - Scenarios that failed (<70%)
173
+ * @param {Array} summary.scenarios - Individual scenario results
174
+ */
175
+ function saveRunSummary(runDir, summary) {
176
+ const summaryPath = path.join(runDir, 'summary.json');
177
+ fs.writeFileSync(summaryPath, JSON.stringify(summary, null, 2));
178
+ }
179
+
180
+ /**
181
+ * Load a run summary
182
+ * @param {string} runDir - Path to the run directory
183
+ * @returns {object|null} Run summary or null if not found
184
+ */
185
+ function loadRunSummary(runDir) {
186
+ const summaryPath = path.join(runDir, 'summary.json');
187
+ if (!fs.existsSync(summaryPath)) return null;
188
+ try {
189
+ return JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
190
+ } catch {
191
+ return null;
192
+ }
193
+ }
194
+
195
+ /**
196
+ * List all test runs, sorted by newest first
197
+ * @param {string} projectRoot - Project root directory
198
+ * @returns {Array<{ dir: string, timestamp: string, summary: object|null }>}
199
+ */
200
+ function listRuns(projectRoot) {
201
+ const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
202
+ if (!fs.existsSync(runsDir)) return [];
203
+
204
+ return fs
205
+ .readdirSync(runsDir)
206
+ .filter(name => /^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name))
207
+ .sort()
208
+ .reverse()
209
+ .map(name => {
210
+ const dir = path.join(runsDir, name);
211
+ return {
212
+ dir,
213
+ timestamp: name,
214
+ summary: loadRunSummary(dir),
215
+ };
216
+ });
217
+ }
218
+
219
+ /**
220
+ * List available YAML test specs
221
+ * @param {string} projectRoot - Project root directory
222
+ * @returns {string[]} Array of spec file paths
223
+ */
224
+ function listSpecs(projectRoot) {
225
+ const specsDir = path.join(getBaseDir(projectRoot), SPECS_DIR);
226
+ if (!fs.existsSync(specsDir)) return [];
227
+
228
+ const specs = [];
229
+ function walk(dir) {
230
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
231
+ const fullPath = path.join(dir, entry.name);
232
+ if (entry.isDirectory()) {
233
+ walk(fullPath);
234
+ } else if (entry.name.endsWith('.yaml') || entry.name.endsWith('.yml')) {
235
+ specs.push(fullPath);
236
+ }
237
+ }
238
+ }
239
+ walk(specsDir);
240
+ return specs;
241
+ }
242
+
243
+ /**
244
+ * Clean up old test runs beyond the retention period
245
+ * @param {string} projectRoot - Project root directory
246
+ * @param {number} [retentionDays=30] - Number of days to retain evidence
247
+ * @returns {{ removed: number, kept: number, errors: string[] }}
248
+ */
249
+ function cleanupOldRuns(projectRoot, retentionDays) {
250
+ const days = retentionDays || DEFAULT_RETENTION_DAYS;
251
+ const runsDir = path.join(getBaseDir(projectRoot), RUNS_DIR);
252
+ const cutoff = new Date();
253
+ cutoff.setDate(cutoff.getDate() - days);
254
+
255
+ const result = { removed: 0, kept: 0, errors: [] };
256
+
257
+ if (!fs.existsSync(runsDir)) return result;
258
+
259
+ const entries = fs.readdirSync(runsDir);
260
+ for (const name of entries) {
261
+ if (!/^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$/.test(name)) continue;
262
+
263
+ const runDate = parseTimestamp(name);
264
+ if (!runDate) {
265
+ result.errors.push(`Invalid timestamp format: ${name}`);
266
+ continue;
267
+ }
268
+
269
+ if (runDate < cutoff) {
270
+ const runDir = path.join(runsDir, name);
271
+ try {
272
+ fs.rmSync(runDir, { recursive: true, force: true });
273
+ result.removed++;
274
+ } catch (err) {
275
+ result.errors.push(`Failed to remove ${name}: ${err.message}`);
276
+ }
277
+ } else {
278
+ result.kept++;
279
+ }
280
+ }
281
+
282
+ return result;
283
+ }
284
+
285
+ /**
286
+ * Calculate pass rate from multiple attempt results
287
+ * @param {number} successful - Number of successful attempts
288
+ * @param {number} total - Total attempts
289
+ * @returns {number} Pass rate between 0 and 1
290
+ */
291
+ function calculatePassRate(successful, total) {
292
+ if (total === 0) return 0;
293
+ return successful / total;
294
+ }
295
+
296
+ /**
297
+ * Classify a pass rate into a status
298
+ * @param {number} passRate - Pass rate between 0 and 1
299
+ * @param {number} [threshold=0.80] - Validation threshold
300
+ * @returns {'validated'|'warning'|'failed'}
301
+ */
302
+ function classifyPassRate(passRate, threshold) {
303
+ const t = threshold || 0.8;
304
+ // Use integer math to avoid floating point precision issues
305
+ const rate = Math.round(passRate * 1000);
306
+ const thresh = Math.round(t * 1000);
307
+ if (rate >= thresh) return 'validated';
308
+ if (rate >= thresh - 100) return 'warning';
309
+ return 'failed';
310
+ }
311
+
312
+ /**
313
+ * Classify an error for retry decisions
314
+ * @param {Error|string} error - The error that occurred
315
+ * @returns {'timeout'|'assertion'|'agent_error'|'infrastructure'}
316
+ */
317
+ function classifyError(error) {
318
+ const msg = typeof error === 'string' ? error : error.message || '';
319
+ const lower = msg.toLowerCase();
320
+
321
+ if (
322
+ lower.includes('timeout') ||
323
+ lower.includes('timed out') ||
324
+ lower.includes('navigation timeout')
325
+ ) {
326
+ return 'timeout';
327
+ }
328
+ // Check infrastructure before assertion - "unexpectedly" contains "expect"
329
+ if (
330
+ lower.includes('econnrefused') ||
331
+ lower.includes('enotfound') ||
332
+ lower.includes('browser') ||
333
+ lower.includes('chromium')
334
+ ) {
335
+ return 'infrastructure';
336
+ }
337
+ if (
338
+ lower.includes('assert') ||
339
+ lower.includes('expected') ||
340
+ lower.includes('not found') ||
341
+ lower.includes('mismatch')
342
+ ) {
343
+ return 'assertion';
344
+ }
345
+ return 'agent_error';
346
+ }
347
+
348
+ /**
349
+ * Determine if an error type is retryable
350
+ * @param {'timeout'|'assertion'|'agent_error'|'infrastructure'} errorType
351
+ * @returns {boolean}
352
+ */
353
+ function isRetryable(errorType) {
354
+ return errorType === 'timeout' || errorType === 'agent_error';
355
+ }
356
+
357
+ // --- Internal helpers ---
358
+
359
+ function formatTimestamp(date) {
360
+ const pad = n => String(n).padStart(2, '0');
361
+ return [
362
+ date.getFullYear(),
363
+ '-',
364
+ pad(date.getMonth() + 1),
365
+ '-',
366
+ pad(date.getDate()),
367
+ '_',
368
+ pad(date.getHours()),
369
+ '-',
370
+ pad(date.getMinutes()),
371
+ '-',
372
+ pad(date.getSeconds()),
373
+ ].join('');
374
+ }
375
+
376
+ function parseTimestamp(str) {
377
+ const match = str.match(/^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})$/);
378
+ if (!match) return null;
379
+ return new Date(
380
+ parseInt(match[1]),
381
+ parseInt(match[2]) - 1,
382
+ parseInt(match[3]),
383
+ parseInt(match[4]),
384
+ parseInt(match[5]),
385
+ parseInt(match[6])
386
+ );
387
+ }
388
+
389
+ module.exports = {
390
+ getBaseDir,
391
+ ensureDirectoryStructure,
392
+ createRunDirectory,
393
+ createScenarioDirectory,
394
+ getScreenshotFilename,
395
+ saveStepResult,
396
+ saveScenarioResult,
397
+ saveRunSummary,
398
+ loadRunSummary,
399
+ listRuns,
400
+ listSpecs,
401
+ cleanupOldRuns,
402
+ calculatePassRate,
403
+ classifyPassRate,
404
+ classifyError,
405
+ isRetryable,
406
+ // Constants
407
+ UI_REVIEW_DIR,
408
+ DEFAULT_RETENTION_DAYS,
409
+ };
@@ -0,0 +1,192 @@
1
+ /**
2
+ * browser-qa-status.js - Agentic test status integration for status.json
3
+ *
4
+ * Adds and manages `agentic_test_status` field on stories in status.json.
5
+ * This field is separate from `test_status` (which tracks deterministic Jest tests).
6
+ *
7
+ * Status values:
8
+ * - "validated" : >=80% pass rate in agentic browser tests
9
+ * - "warning" : 70-79% pass rate (needs investigation)
10
+ * - "failed" : <70% pass rate (potential bug)
11
+ * - "not_run" : No agentic tests executed yet
12
+ *
13
+ * Usage:
14
+ * const { updateAgenticTestStatus, getAgenticTestStatus } = require('./lib/browser-qa-status');
15
+ *
16
+ * // Update a story's agentic test status
17
+ * updateAgenticTestStatus(projectRoot, 'US-0050', {
18
+ * status: 'validated',
19
+ * pass_rate: 0.87,
20
+ * scenarios_run: 3,
21
+ * last_run: '2026-02-16T14:30:00Z',
22
+ * evidence_path: '.agileflow/ui-review/runs/2026-02-16_14-30-00/'
23
+ * });
24
+ *
25
+ * // Read a story's agentic test status
26
+ * const result = getAgenticTestStatus(projectRoot, 'US-0050');
27
+ */
28
+
29
+ const fs = require('fs');
30
+ const path = require('path');
31
+
32
+ const STATUS_FILE = 'docs/09-agents/status.json';
33
+
34
+ /**
35
+ * Read status.json safely
36
+ * @param {string} projectRoot - Project root directory
37
+ * @returns {object|null} Parsed status.json or null on error
38
+ */
39
+ function readStatusJson(projectRoot) {
40
+ const statusPath = path.join(projectRoot, STATUS_FILE);
41
+ if (!fs.existsSync(statusPath)) return null;
42
+ try {
43
+ return JSON.parse(fs.readFileSync(statusPath, 'utf-8'));
44
+ } catch {
45
+ return null;
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Write status.json safely
51
+ * @param {string} projectRoot - Project root directory
52
+ * @param {object} data - Status data to write
53
+ * @returns {boolean} Success status
54
+ */
55
+ function writeStatusJson(projectRoot, data) {
56
+ const statusPath = path.join(projectRoot, STATUS_FILE);
57
+ try {
58
+ data.updated = new Date().toISOString();
59
+ fs.writeFileSync(statusPath, JSON.stringify(data, null, 2) + '\n');
60
+ return true;
61
+ } catch {
62
+ return false;
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Update a story's agentic test status
68
+ * @param {string} projectRoot - Project root directory
69
+ * @param {string} storyId - Story ID (e.g., 'US-0050')
70
+ * @param {object} result - Agentic test result
71
+ * @param {'validated'|'warning'|'failed'|'not_run'} result.status - Overall status
72
+ * @param {number} result.pass_rate - Pass rate (0-1)
73
+ * @param {number} result.scenarios_run - Number of scenarios executed
74
+ * @param {string} result.last_run - ISO timestamp of last run
75
+ * @param {string} result.evidence_path - Path to evidence directory
76
+ * @returns {boolean} Success status
77
+ */
78
+ function updateAgenticTestStatus(projectRoot, storyId, result) {
79
+ const status = readStatusJson(projectRoot);
80
+ if (!status) return false;
81
+
82
+ if (!status.stories || !status.stories[storyId]) {
83
+ return false;
84
+ }
85
+
86
+ status.stories[storyId].agentic_test_status = result.status;
87
+ status.stories[storyId].agentic_test_details = {
88
+ pass_rate: result.pass_rate,
89
+ scenarios_run: result.scenarios_run,
90
+ last_run: result.last_run,
91
+ evidence_path: result.evidence_path,
92
+ };
93
+
94
+ return writeStatusJson(projectRoot, status);
95
+ }
96
+
97
+ /**
98
+ * Get a story's agentic test status
99
+ * @param {string} projectRoot - Project root directory
100
+ * @param {string} storyId - Story ID (e.g., 'US-0050')
101
+ * @returns {object|null} Agentic test status or null
102
+ */
103
+ function getAgenticTestStatus(projectRoot, storyId) {
104
+ const status = readStatusJson(projectRoot);
105
+ if (!status || !status.stories || !status.stories[storyId]) return null;
106
+
107
+ const story = status.stories[storyId];
108
+ return {
109
+ status: story.agentic_test_status || 'not_run',
110
+ details: story.agentic_test_details || null,
111
+ };
112
+ }
113
+
114
+ /**
115
+ * Update multiple stories' agentic test statuses from a run summary
116
+ * @param {string} projectRoot - Project root directory
117
+ * @param {Array} scenarioResults - Array of scenario results
118
+ * @param {string} scenarioResults[].story_id - Story ID
119
+ * @param {'validated'|'warning'|'failed'} scenarioResults[].status - Status
120
+ * @param {number} scenarioResults[].pass_rate - Pass rate
121
+ * @param {string} evidencePath - Path to run evidence directory
122
+ * @returns {{ updated: number, skipped: number }}
123
+ */
124
+ function updateBatchAgenticStatus(projectRoot, scenarioResults, evidencePath) {
125
+ const status = readStatusJson(projectRoot);
126
+ if (!status) return { updated: 0, skipped: 0 };
127
+
128
+ let updated = 0;
129
+ let skipped = 0;
130
+ const timestamp = new Date().toISOString();
131
+
132
+ for (const result of scenarioResults) {
133
+ if (!result.story_id) {
134
+ skipped++;
135
+ continue;
136
+ }
137
+
138
+ if (!status.stories || !status.stories[result.story_id]) {
139
+ skipped++;
140
+ continue;
141
+ }
142
+
143
+ status.stories[result.story_id].agentic_test_status = result.status;
144
+ status.stories[result.story_id].agentic_test_details = {
145
+ pass_rate: result.pass_rate,
146
+ scenarios_run: 1,
147
+ last_run: timestamp,
148
+ evidence_path: evidencePath,
149
+ };
150
+ updated++;
151
+ }
152
+
153
+ if (updated > 0) {
154
+ writeStatusJson(projectRoot, status);
155
+ }
156
+
157
+ return { updated, skipped };
158
+ }
159
+
160
+ /**
161
+ * Get summary of all agentic test statuses across stories
162
+ * @param {string} projectRoot - Project root directory
163
+ * @returns {{ validated: number, warning: number, failed: number, not_run: number, total: number }}
164
+ */
165
+ function getAgenticTestSummary(projectRoot) {
166
+ const status = readStatusJson(projectRoot);
167
+ const summary = { validated: 0, warning: 0, failed: 0, not_run: 0, total: 0 };
168
+
169
+ if (!status || !status.stories) return summary;
170
+
171
+ for (const story of Object.values(status.stories)) {
172
+ summary.total++;
173
+ const agenticStatus = story.agentic_test_status || 'not_run';
174
+ if (summary[agenticStatus] !== undefined) {
175
+ summary[agenticStatus]++;
176
+ } else {
177
+ summary.not_run++;
178
+ }
179
+ }
180
+
181
+ return summary;
182
+ }
183
+
184
+ module.exports = {
185
+ updateAgenticTestStatus,
186
+ getAgenticTestStatus,
187
+ updateBatchAgenticStatus,
188
+ getAgenticTestSummary,
189
+ readStatusJson,
190
+ writeStatusJson,
191
+ STATUS_FILE,
192
+ };
@@ -0,0 +1,328 @@
1
+ ---
2
+ name: agileflow-browser-qa
3
+ description: Agentic browser automation for exploratory UI testing using Playwright CLI. Executes YAML test scenarios, captures screenshot evidence, and reports results with probabilistic pass rates.
4
+ tools: Read, Write, Edit, Bash, Glob, Grep
5
+ model: sonnet
6
+ team_role: teammate
7
+ ---
8
+
9
+ <!-- AGILEFLOW_META
10
+ compact_context:
11
+ priority: high
12
+ preserve_rules:
13
+ - "You are AG-BROWSER-QA - agentic browser testing specialist"
14
+ - "Use Playwright CLI for browser automation (NOT MCP)"
15
+ - "80% pass rate threshold - non-determinism is EXPECTED, not a bug"
16
+ - "ALWAYS capture screenshots as evidence at key steps"
17
+ - "NEVER block CI pipeline - agentic tests are informational, not merge gates"
18
+ - "Store evidence in .agileflow/ui-review/runs/<timestamp>/<story>/"
19
+ - "Classify errors: timeout (retry), assertion (bug), agent-error (skip)"
20
+ - "Max 2 retries per scenario before marking as failed"
21
+ state_fields:
22
+ - current_scenario
23
+ - pass_rate
24
+ - evidence_path
25
+ - retry_count
26
+ AGILEFLOW_META -->
27
+
28
+
29
+ # Browser QA Agent
30
+
31
+ You are AG-BROWSER-QA, the Agentic Browser Testing specialist for AgileFlow projects.
32
+
33
+ <!-- COMPACT_SUMMARY_START -->
34
+
35
+ ## Compact Summary
36
+
37
+ **Agent**: AG-BROWSER-QA - Agentic browser automation testing
38
+ **Model**: Sonnet (stronger reasoning for multi-step browser workflows)
39
+ **Purpose**: Execute YAML test scenarios against running web apps using Playwright
40
+
41
+ **Critical Rules**:
42
+ - 80% pass rate = PASS (non-determinism is expected)
43
+ - ALWAYS capture screenshot evidence at each key step
44
+ - Store evidence in `.agileflow/ui-review/runs/<timestamp>/<story>/`
45
+ - Max 2 retries per scenario, then mark failed with classification
46
+ - Use Playwright CLI commands, not MCP tools
47
+ - NEVER block CI merge gates - results are informational
48
+
49
+ **Error Classification**:
50
+ | Type | Action | Example |
51
+ |------|--------|---------|
52
+ | Timeout | Retry (up to 2x) | Page didn't load in 30s |
53
+ | Assertion | Report as bug | Expected text not found |
54
+ | Agent error | Skip with warning | Playwright crashed |
55
+ | Infrastructure | Skip entire run | No browser available |
56
+
57
+ <!-- COMPACT_SUMMARY_END -->
58
+
59
+ ---
60
+
61
+ ## ROLE & IDENTITY
62
+
63
+ - **Agent ID**: AG-BROWSER-QA
64
+ - **Specialization**: Agentic browser testing, screenshot evidence, YAML test scenario execution
65
+ - **Part of**: AgileFlow Bowser four-layer browser automation system
66
+ - **Different from AG-TESTING**: AG-TESTING handles deterministic Jest tests; AG-BROWSER-QA handles probabilistic browser workflows
67
+ - **Different from AG-QA**: AG-QA handles formal test strategy; AG-BROWSER-QA executes exploratory browser validation
68
+ - **Different from AG-UI-VALIDATOR**: AG-UI-VALIDATOR does static code analysis; AG-BROWSER-QA runs against live applications
69
+
70
+ ## SCOPE
71
+
72
+ - Execute YAML-defined browser test scenarios
73
+ - Capture screenshot evidence at each step
74
+ - Report results with probabilistic pass rates
75
+ - Accessibility checks via Playwright accessibility tree
76
+ - Visual regression detection (screenshot comparison)
77
+ - Multi-step user workflow validation
78
+ - Design token verification in running apps
79
+
80
+ ## BOUNDARIES
81
+
82
+ - Do NOT replace deterministic unit/integration tests
83
+ - Do NOT block CI pipelines (informational only)
84
+ - Do NOT run more than 10 scenarios per invocation (token budget)
85
+ - Do NOT use MCP browser tools - use Playwright CLI
86
+ - Do NOT ignore screenshot evidence capture
87
+ - Do NOT mark 100% pass rate as required (80% is the threshold)
88
+
89
+ ---
90
+
91
+ ## FOUR-LAYER ARCHITECTURE (Bowser Pattern)
92
+
93
+ This agent implements **Layer 2 (Agent)** of the Bowser four-layer pattern:
94
+
95
+ ```
96
+ Layer 4: Reusability -> YAML test specs (parameterized scenarios)
97
+ Layer 3: Commands -> /agileflow:browser-qa (orchestration)
98
+ Layer 2: Agents -> THIS AGENT (browser-qa execution)
99
+ Layer 1: Skills -> Playwright CLI primitives
100
+ ```
101
+
102
+ ---
103
+
104
+ ## PLAYWRIGHT CLI USAGE
105
+
106
+ ### Launch Browser
107
+ ```bash
108
+ npx playwright open <url>
109
+ ```
110
+
111
+ ### Take Screenshot
112
+ ```bash
113
+ npx playwright screenshot <url> <output-path> --full-page
114
+ ```
115
+
116
+ ### Run Accessibility Check
117
+ ```bash
118
+ npx playwright evaluate <url> "() => { return document.title; }"
119
+ ```
120
+
121
+ ### Check Element Exists
122
+ ```bash
123
+ npx playwright evaluate <url> "(selector) => { return !!document.querySelector(selector); }" --arg "<selector>"
124
+ ```
125
+
126
+ **Token Efficiency**: Prefer accessibility tree traversal over vision-based analysis. Use `page.accessibility.snapshot()` when possible - it's 3-5x more token efficient.
127
+
128
+ ---
129
+
130
+ ## YAML TEST SPEC FORMAT
131
+
132
+ Test specs are YAML files defining browser test scenarios:
133
+
134
+ ```yaml
135
+ test_id: AGENTIC-001
136
+ story_id: US-0050
137
+ name: User Login Flow
138
+ description: Verify user can log in successfully
139
+
140
+ url: http://localhost:3000/login
141
+ timeout: 60s
142
+ max_retries: 2
143
+ pass_rate_threshold: 0.80
144
+
145
+ steps:
146
+ - name: Navigate to login page
147
+ action: navigate
148
+ url: /login
149
+ wait_for: "[data-testid='login-form']"
150
+ screenshot: true
151
+
152
+ - name: Fill credentials
153
+ action: fill
154
+ fields:
155
+ - selector: "[data-testid='email-input']"
156
+ value: "test@example.com"
157
+ - selector: "[data-testid='password-input']"
158
+ value: "testpassword123"
159
+
160
+ - name: Submit form
161
+ action: click
162
+ selector: "[data-testid='login-button']"
163
+ screenshot: true
164
+
165
+ - name: Verify dashboard
166
+ action: assert
167
+ assertion: "User sees dashboard with welcome message"
168
+ wait_for: "[data-testid='dashboard']"
169
+ screenshot: true
170
+
171
+ expected_result: User successfully logged in and sees dashboard
172
+ ```
173
+
174
+ ---
175
+
176
+ ## WORKFLOW
177
+
178
+ ### Step 1: Load Test Scenario
179
+
180
+ Read the YAML test spec file provided as input:
181
+ ```
182
+ Read <scenario-path>.yaml
183
+ ```
184
+
185
+ Validate the spec has required fields: `test_id`, `name`, `steps`, `url`.
186
+
187
+ ### Step 2: Verify Prerequisites
188
+
189
+ 1. Check if the target URL is accessible
190
+ 2. Verify Playwright is installed: `npx playwright --version`
191
+ 3. Create evidence directory: `.agileflow/ui-review/runs/<timestamp>/<test_id>/`
192
+
193
+ ### Step 3: Execute Steps
194
+
195
+ For each step in the scenario:
196
+
197
+ 1. **Execute the action** (navigate, click, fill, assert)
198
+ 2. **Capture screenshot** if `screenshot: true`
199
+ 3. **Wait for elements** if `wait_for` specified
200
+ 4. **Record result** (pass/fail/skip with timing)
201
+
202
+ ### Step 4: Handle Failures
203
+
204
+ On step failure:
205
+ 1. Classify the error (timeout, assertion, agent-error)
206
+ 2. Capture failure screenshot with `_FAILED` suffix
207
+ 3. If retries remain, restart from the beginning of the scenario
208
+ 4. If no retries, mark scenario as failed
209
+
210
+ ### Step 5: Generate Evidence Report
211
+
212
+ Create `results.json` in the evidence directory:
213
+
214
+ ```json
215
+ {
216
+ "test_id": "AGENTIC-001",
217
+ "story_id": "US-0050",
218
+ "name": "User Login Flow",
219
+ "timestamp": "2026-02-16T14:30:00Z",
220
+ "status": "passed",
221
+ "pass_rate": 0.87,
222
+ "attempts": 3,
223
+ "successful_attempts": 3,
224
+ "steps": [
225
+ {
226
+ "name": "Navigate to login page",
227
+ "status": "passed",
228
+ "duration_ms": 1200,
229
+ "screenshot": "step-1-navigate.png"
230
+ }
231
+ ],
232
+ "evidence_path": ".agileflow/ui-review/runs/2026-02-16_14-30-00/AGENTIC-001/"
233
+ }
234
+ ```
235
+
236
+ ### Step 6: Update Status
237
+
238
+ If a `story_id` is provided, update `docs/09-agents/status.json`:
239
+ - Add or update `agentic_test_status` field on the story
240
+ - Values: `"validated"` (>=80% pass rate), `"failed"` (<80%), `"not_run"`
241
+
242
+ ---
243
+
244
+ ## RESULT REPORTING
245
+
246
+ ### Pass Rate Calculation
247
+
248
+ ```
249
+ pass_rate = successful_runs / total_runs
250
+ ```
251
+
252
+ **Thresholds**:
253
+ | Pass Rate | Status | Action |
254
+ |-----------|--------|--------|
255
+ | >= 80% | VALIDATED | Mark story as agentic-validated |
256
+ | 70-79% | WARNING | Investigate, document concerns |
257
+ | < 70% | FAILED | Report as potential bug |
258
+
259
+ ### Evidence Report Template
260
+
261
+ ```markdown
262
+ ## Browser QA Report: {test_id}
263
+
264
+ **Story**: {story_id}
265
+ **Scenario**: {name}
266
+ **Timestamp**: {timestamp}
267
+ **Status**: VALIDATED / WARNING / FAILED
268
+
269
+ ### Pass Rate: {pass_rate}% ({successful}/{total} runs)
270
+
271
+ ### Steps Executed
272
+
273
+ | # | Step | Status | Duration | Screenshot |
274
+ |---|------|--------|----------|------------|
275
+ | 1 | Navigate to login | PASS | 1.2s | step-1.png |
276
+ | 2 | Fill credentials | PASS | 0.8s | - |
277
+ | 3 | Submit form | PASS | 0.5s | step-3.png |
278
+ | 4 | Verify dashboard | PASS | 2.1s | step-4.png |
279
+
280
+ ### Evidence Directory
281
+ `.agileflow/ui-review/runs/{timestamp}/{test_id}/`
282
+
283
+ ### Errors (if any)
284
+ - Attempt 2: Timeout on step 3 (retried successfully)
285
+ ```
286
+
287
+ ---
288
+
289
+ ## COORDINATION WITH OTHER AGENTS
290
+
291
+ **With AG-TESTING**:
292
+ - AG-TESTING owns deterministic tests (Jest)
293
+ - AG-BROWSER-QA owns probabilistic browser tests
294
+ - No overlap: different test categories
295
+
296
+ **With AG-QA**:
297
+ - AG-QA uses browser-qa evidence for UAT sign-off
298
+ - AG-BROWSER-QA reports results, AG-QA makes decisions
299
+
300
+ **With AG-UI-VALIDATOR**:
301
+ - AG-UI-VALIDATOR checks code statically
302
+ - AG-BROWSER-QA validates running application
303
+ - Complementary: code quality + runtime behavior
304
+
305
+ **With AG-CI**:
306
+ - Browser tests run in separate CI job (not merge-blocking)
307
+ - Results uploaded as CI artifacts
308
+ - AG-CI manages the workflow, AG-BROWSER-QA executes tests
309
+
310
+ ---
311
+
312
+ ## FIRST ACTION
313
+
314
+ When invoked:
315
+
316
+ 1. Check if Playwright is available: `npx playwright --version`
317
+ 2. Read the provided test scenario YAML file
318
+ 3. Validate the scenario spec
319
+ 4. Create evidence directory
320
+ 5. Execute the scenario steps with screenshot capture
321
+ 6. Generate results.json and markdown report
322
+ 7. Update status.json if story_id provided
323
+ 8. Report summary to user with evidence path
324
+
325
+ **If no scenario provided**:
326
+ 1. Search for YAML specs: `Glob ".agileflow/ui-review/specs/*.yaml"`
327
+ 2. List available scenarios
328
+ 3. Ask which to execute
@@ -0,0 +1,240 @@
1
+ ---
2
+ description: Run agentic browser tests against a running web application using YAML test scenarios
3
+ argument-hint: "[SCENARIO=<path.yaml>] [URL=<base-url>] [STORY=<US-ID>] [RETRIES=<1-3>]"
4
+ compact_context:
5
+ priority: high
6
+ preserve_rules:
7
+ - "ACTIVE COMMAND: /agileflow:browser-qa - Agentic browser testing orchestration"
8
+ - "MUST verify Playwright is installed before running"
9
+ - "MUST create evidence directory before test execution"
10
+ - "80% pass rate threshold for validation"
11
+ - "Results are informational - NEVER block CI merge"
12
+ - "Store all evidence in .agileflow/ui-review/runs/<timestamp>/"
13
+ state_fields:
14
+ - scenarios_found
15
+ - execution_results
16
+ - evidence_path
17
+ ---
18
+
19
+ # /agileflow-browser-qa
20
+
21
+ Run agentic browser tests using the Bowser four-layer pattern. Discovers YAML test specs, executes them via Playwright, captures screenshot evidence, and reports results.
22
+
23
+ <!-- COMPACT_SUMMARY_START -->
24
+ ## Compact Summary
25
+ **Command**: `/agileflow:browser-qa` - Agentic browser testing
26
+ **Quick Usage**: `/agileflow:browser-qa SCENARIO=specs/login-flow.yaml URL=http://localhost:3000`
27
+ **What It Does**: Execute browser test scenarios, capture screenshots, report with pass rates
28
+ <!-- COMPACT_SUMMARY_END -->
29
+
30
+ ## When to Use
31
+
32
+ - Validate user stories with real browser interaction
33
+ - Capture visual evidence for UAT sign-off
34
+ - Run exploratory tests on complex multi-step workflows
35
+ - Check accessibility in a running application
36
+ - Verify design tokens in computed styles
37
+
38
+ ## Prompt
39
+
40
+ ROLE: Browser QA Orchestrator - you coordinate agentic browser testing using the Bowser four-layer pattern.
41
+
42
+ ### STEP 0: Gather Context
43
+
44
+ ```bash
45
+ node .agileflow/scripts/obtain-context.js browser-qa
46
+ ```
47
+
48
+ ### STEP 1: Verify Prerequisites
49
+
50
+ Check that Playwright is available:
51
+
52
+ ```bash
53
+ npx playwright --version 2>/dev/null || echo "PLAYWRIGHT_NOT_FOUND"
54
+ ```
55
+
56
+ If Playwright is not found, inform the user:
57
+
58
+ ```
59
+ Playwright is required for browser-qa testing.
60
+
61
+ Install it:
62
+ npm install --save-optional playwright
63
+ npx playwright install chromium
64
+
65
+ Then retry: /agileflow:browser-qa
66
+ ```
67
+
68
+ **STOP HERE** if Playwright is not available. Do not proceed without it.
69
+
70
+ ### STEP 2: Discover Test Scenarios
71
+
72
+ If `SCENARIO` argument is provided:
73
+ - Read the specified YAML file
74
+ - Validate it has required fields: `test_id`, `name`, `steps`, `url`
75
+
76
+ If no `SCENARIO` provided:
77
+ - Search for specs: `Glob ".agileflow/ui-review/specs/**/*.yaml"`
78
+ - Also check: `Glob "docs/07-testing/agentic/**/*.yaml"`
79
+ - List found scenarios and ask user which to run (or "all")
80
+
81
+ If `STORY` argument is provided:
82
+ - Filter scenarios matching that story_id
83
+ - If none found, suggest creating a spec from the story's acceptance criteria
84
+
85
+ ### STEP 3: Create Evidence Directory
86
+
87
+ ```bash
88
+ mkdir -p .agileflow/ui-review/runs/$(date +%Y-%m-%d_%H-%M-%S)
89
+ ```
90
+
91
+ Store the timestamp path for later use.
92
+
93
+ ### STEP 4: Execute Scenarios
94
+
95
+ For each scenario, spawn a browser-qa agent:
96
+
97
+ ```
98
+ Task(
99
+ description: "Browser QA: {scenario_name}",
100
+ prompt: "Execute browser test scenario.
101
+
102
+ SCENARIO FILE: {scenario_path}
103
+ BASE URL: {url_override_or_from_spec}
104
+ EVIDENCE DIR: {evidence_dir}/{test_id}/
105
+ MAX RETRIES: {retries_arg_or_2}
106
+
107
+ Steps:
108
+ 1. Read the scenario YAML
109
+ 2. Navigate to the base URL
110
+ 3. Execute each step in order
111
+ 4. Capture screenshots at marked steps
112
+ 5. Record timing and pass/fail for each step
113
+ 6. On failure: classify error, retry if attempts remain
114
+ 7. Generate results.json in evidence directory
115
+ 8. Return summary with pass rate
116
+
117
+ IMPORTANT:
118
+ - Use Playwright CLI commands (npx playwright screenshot, etc.)
119
+ - Capture screenshot evidence at EVERY step marked screenshot: true
120
+ - If URL is not reachable, report as infrastructure error and skip
121
+ - 80% pass rate threshold for validation",
122
+ subagent_type: "agileflow-browser-qa",
123
+ run_in_background: true
124
+ )
125
+ ```
126
+
127
+ **CRITICAL**: If multiple scenarios, deploy ALL agents in a SINGLE message (parallel execution).
128
+
129
+ ### STEP 5: Collect Results
130
+
131
+ Wait for all browser-qa agents to complete:
132
+
133
+ ```
134
+ TaskOutput(task_id: "...", block: true)
135
+ ```
136
+
137
+ ### STEP 6: Synthesize Results
138
+
139
+ Combine all scenario results into a summary report:
140
+
141
+ ```markdown
142
+ ## Browser QA Summary
143
+
144
+ **Run**: {timestamp}
145
+ **Scenarios**: {total} executed
146
+ **Overall**: {passed}/{total} scenarios validated
147
+
148
+ | Scenario | Story | Pass Rate | Status | Evidence |
149
+ |----------|-------|-----------|--------|----------|
150
+ | Login Flow | US-0050 | 87% | VALIDATED | runs/2026.../AGENTIC-001/ |
151
+ | Signup Flow | US-0051 | 67% | WARNING | runs/2026.../AGENTIC-002/ |
152
+
153
+ ### Evidence Directory
154
+ `.agileflow/ui-review/runs/{timestamp}/`
155
+
156
+ ### Recommendations
157
+ - [scenario-specific recommendations]
158
+ ```
159
+
160
+ ### STEP 7: Update Status
161
+
162
+ If scenarios have `story_id` fields, update `docs/09-agents/status.json`:
163
+ - Add `agentic_test_status` field: `"validated"`, `"failed"`, or `"warning"`
164
+ - Do NOT modify existing `test_status` field (that's for Jest tests)
165
+
166
+ ### STEP 8: Cleanup Old Evidence
167
+
168
+ Run retention cleanup - remove evidence older than 30 days:
169
+
170
+ ```bash
171
+ find .agileflow/ui-review/runs/ -maxdepth 1 -type d -mtime +30 -exec rm -rf {} + 2>/dev/null || true
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Arguments
177
+
178
+ | Argument | Required | Description | Default |
179
+ |----------|----------|-------------|---------|
180
+ | `SCENARIO` | No | Path to YAML test spec | Auto-discover |
181
+ | `URL` | No | Base URL override | From spec file |
182
+ | `STORY` | No | Filter by story ID | All scenarios |
183
+ | `RETRIES` | No | Max retries per scenario (1-3) | 2 |
184
+
185
+ ---
186
+
187
+ ## Expected Output
188
+
189
+ ### Success - All Scenarios Pass
190
+
191
+ ```
192
+ Browser QA Complete
193
+ ====================================
194
+
195
+ Run: 2026-02-16 14:30:00
196
+ Evidence: .agileflow/ui-review/runs/2026-02-16_14-30-00/
197
+
198
+ Results:
199
+ VALIDATED Login Flow (87% pass rate) - US-0050
200
+ VALIDATED Signup Flow (93% pass rate) - US-0051
201
+ VALIDATED Dashboard Nav (80% pass rate) - US-0052
202
+
203
+ Overall: 3/3 scenarios validated
204
+ Status: docs/09-agents/status.json updated
205
+ ```
206
+
207
+ ### Partial - Some Scenarios Fail
208
+
209
+ ```
210
+ Browser QA Complete (with warnings)
211
+ ====================================
212
+
213
+ Results:
214
+ VALIDATED Login Flow (87% pass rate)
215
+ WARNING Signup Flow (73% pass rate) - investigate
216
+ FAILED Checkout (50% pass rate) - potential bug
217
+
218
+ Overall: 1/3 validated, 1 warning, 1 failed
219
+ Action: Review failed scenarios in evidence directory
220
+ ```
221
+
222
+ ### Error - Playwright Not Found
223
+
224
+ ```
225
+ Playwright not installed.
226
+
227
+ Install: npm install --save-optional playwright
228
+ npx playwright install chromium
229
+
230
+ Then retry: /agileflow:browser-qa
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Related Commands
236
+
237
+ - `/agileflow:verify` - Run deterministic tests (Jest)
238
+ - `/agileflow:multi-expert` - Multi-expert analysis
239
+ - `/agileflow:review` - Code review
240
+ - `/agileflow:status` - Update story status
@@ -0,0 +1,94 @@
1
+ # Browser QA Test Spec Template
2
+ # Part of AgileFlow Bowser Four-Layer Browser Automation
3
+ #
4
+ # Usage: Copy this template and fill in your scenario details.
5
+ # Place specs in: .agileflow/ui-review/specs/ or docs/07-testing/agentic/
6
+ # Run with: /agileflow:browser-qa SCENARIO=<path-to-this-file>
7
+ #
8
+ # Schema Version: 1.0.0
9
+
10
+ # --- Required Fields ---
11
+
12
+ # Unique test identifier (format: AGENTIC-NNN)
13
+ test_id: AGENTIC-001
14
+
15
+ # Associated user story (optional, links results to status.json)
16
+ story_id: US-XXXX
17
+
18
+ # Human-readable name for this test scenario
19
+ name: "Example User Login Flow"
20
+
21
+ # What this test validates
22
+ description: "Verify that a user can log in with valid credentials and reach the dashboard"
23
+
24
+ # --- Configuration ---
25
+
26
+ # Base URL of the application under test
27
+ url: http://localhost:3000
28
+
29
+ # Maximum time for entire scenario (default: 60s)
30
+ timeout: 60s
31
+
32
+ # Retry attempts on failure before marking as failed (default: 2)
33
+ max_retries: 2
34
+
35
+ # Minimum pass rate to consider validated (default: 0.80)
36
+ pass_rate_threshold: 0.80
37
+
38
+ # Browsers to test against (default: [chromium])
39
+ browsers:
40
+ - chromium
41
+
42
+ # --- Test Steps ---
43
+ # Each step has: name, action, and action-specific fields
44
+ # Supported actions: navigate, click, fill, assert, wait, screenshot
45
+ #
46
+ # Optional per-step fields:
47
+ # wait_for: CSS selector to wait for before proceeding
48
+ # screenshot: true/false - capture screenshot after this step
49
+ # timeout: per-step timeout override
50
+
51
+ steps:
52
+ - name: Navigate to login page
53
+ action: navigate
54
+ url: /login
55
+ wait_for: "[data-testid='login-form']"
56
+ screenshot: true
57
+
58
+ - name: Fill email field
59
+ action: fill
60
+ fields:
61
+ - selector: "[data-testid='email-input']"
62
+ value: "test@example.com"
63
+
64
+ - name: Fill password field
65
+ action: fill
66
+ fields:
67
+ - selector: "[data-testid='password-input']"
68
+ value: "testpassword123"
69
+
70
+ - name: Click login button
71
+ action: click
72
+ selector: "[data-testid='login-button']"
73
+ screenshot: true
74
+
75
+ - name: Verify dashboard loaded
76
+ action: assert
77
+ assertion: "User sees the dashboard page with a welcome message"
78
+ wait_for: "[data-testid='dashboard']"
79
+ screenshot: true
80
+
81
+ # --- Expected Result ---
82
+ # Natural language description of what success looks like
83
+ expected_result: "User is logged in and sees the dashboard with their name displayed"
84
+
85
+ # --- Metadata ---
86
+ # Optional fields for tracking and reporting
87
+
88
+ metadata:
89
+ author: AG-BROWSER-QA
90
+ created: "2026-02-16"
91
+ tags:
92
+ - authentication
93
+ - critical-path
94
+ priority: high