start-vibing 4.3.4 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/package.json +2 -2
  2. package/template/.claude/agents/sd-audit.md +32 -0
  3. package/template/.claude/skills/e2e-audit/DESIGN.md +294 -0
  4. package/template/.claude/skills/e2e-audit/SKILL.md +660 -0
  5. package/template/.claude/skills/e2e-audit/e2e/fixtures/auth.setup.ts +70 -0
  6. package/template/.claude/skills/e2e-audit/e2e/fixtures/auth.ts +21 -0
  7. package/template/.claude/skills/e2e-audit/e2e/fixtures/base.ts +90 -0
  8. package/template/.claude/skills/e2e-audit/e2e/fixtures/storage/.gitkeep +0 -0
  9. package/template/.claude/skills/e2e-audit/e2e/fixtures/storage/admin.json +50 -0
  10. package/template/.claude/skills/e2e-audit/e2e/fixtures/storage/manager.json +50 -0
  11. package/template/.claude/skills/e2e-audit/e2e/fixtures/storage/member.json +50 -0
  12. package/template/.claude/skills/e2e-audit/e2e/fixtures/storage/owner.json +50 -0
  13. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-admin.page.ts +141 -0
  14. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-billing.page.ts +47 -0
  15. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-chat.page.ts +35 -0
  16. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-home.page.ts +134 -0
  17. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-integrations.page.ts +334 -0
  18. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-knowledge.page.ts +30 -0
  19. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-ontology.page.ts +71 -0
  20. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-profile.page.ts +38 -0
  21. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-teams.page.ts +123 -0
  22. package/template/.claude/skills/e2e-audit/e2e/pages/dashboard-transcripts.page.ts +109 -0
  23. package/template/.claude/skills/e2e-audit/e2e/specs/auth/login.spec.ts +59 -0
  24. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-admin.spec.ts +233 -0
  25. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-billing.spec.ts +44 -0
  26. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-chat.spec.ts +50 -0
  27. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-home.spec.ts +243 -0
  28. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-integrations.spec.ts +472 -0
  29. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-knowledge.spec.ts +57 -0
  30. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-ontology.spec.ts +72 -0
  31. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-profile.spec.ts +48 -0
  32. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-teams.spec.ts +247 -0
  33. package/template/.claude/skills/e2e-audit/e2e/specs/dashboard-transcripts.spec.ts +122 -0
  34. package/template/.claude/skills/e2e-audit/e2e/specs/security/headers.spec.ts +39 -0
  35. package/template/.claude/skills/e2e-audit/e2e/specs/security/rbac.spec.ts +92 -0
  36. package/template/.claude/skills/e2e-audit/e2e/specs/security/xss.spec.ts +74 -0
  37. package/template/.claude/skills/e2e-audit/e2e/utils/console-collector.ts +89 -0
  38. package/template/.claude/skills/e2e-audit/e2e/utils/security-helpers.ts +114 -0
  39. package/template/.claude/skills/e2e-audit/e2e/utils/test-data.ts +64 -0
  40. package/template/.claude/skills/e2e-audit/runbook.md +115 -0
  41. package/template/.claude/skills/super-design/SKILL.md +42 -4
  42. package/template/.claude/skills/super-design/scripts/discover-surfaces.sh +197 -0
  43. package/template/.claude/skills/super-design/scripts/extract-project-rules.sh +240 -0
  44. package/template/.claude/skills/super-design/scripts/verify-audit.sh +34 -1
@@ -0,0 +1,660 @@
1
+ ---
2
+ name: e2e-audit
3
+ description: Comprehensive E2E audit system - discovers all interactive elements on every page, generates page specs, creates fixed Playwright test files, validates UX/UI/security, detects dead code. Use when auditing pages, writing E2E tests, or checking test coverage.
4
+ ---
5
+
6
+ # E2E Audit Skill
7
+
8
+ ## Purpose
9
+
10
+ Systematically audit every page in the Hakutaku Dashboard, discovering all interactive elements and generating fixed Playwright test files that run standalone without AI.
11
+
12
+ ## Design Spec
13
+
14
+ Read `docs/e2e-audit/DESIGN.md` for the full architecture, page list, and patterns.
15
+
16
+ ## Pre-Audit: Server & Environment Setup
17
+
18
+ Before auditing ANY pages, set up monitoring:
19
+
20
+ ### 1. Start Dev Server with Log Capture
21
+
22
+ ```bash
23
+ # Start dev server in background, capture logs
24
+ bun run dev > /tmp/hakutaku-dev.log 2>&1 &
25
+ DEV_PID=$!
26
+
27
+ # Wait for server to be ready
28
+ until curl -s http://localhost:3000/api/health > /dev/null 2>&1; do sleep 2; done
29
+
30
+ # Verify tRPC returns JSON (not HTML error page)
31
+ curl -s "http://localhost:3000/api/v1/user.me?batch=1&input=%7B%7D" | head -1
32
+ # Should show JSON (even if UNAUTHORIZED), NOT "<!DOCTYPE html>"
33
+ ```
34
+
35
+ ### 2. Monitor Server Logs
36
+
37
+ Throughout the audit, periodically check server logs for:
38
+
39
+ ```bash
40
+ # Check for server errors
41
+ tail -50 /tmp/hakutaku-dev.log | grep -i "error\|warn\|fail\|crash\|exception"
42
+
43
+ # Check for specific issues
44
+ tail -50 /tmp/hakutaku-dev.log | grep -i "jest worker\|out of memory\|SIGKILL\|ECONNREFUSED"
45
+ ```
46
+
47
+ **What to look for in server logs:**
48
+ - `Jest worker encountered X child process exceptions` → Dev server crashed, needs restart
49
+ - `ECONNREFUSED` on DB/Redis → Missing env vars or services down
50
+ - `PrismaClientKnownRequestError` → DB schema mismatch
51
+ - `UNAUTHORIZED` / `FORBIDDEN` → Auth issues (may be expected in some test scenarios)
52
+ - Unhandled promise rejections → Real bugs
53
+ - Memory warnings → Performance issue
54
+
55
+ ### 3. Monitor Console Errors (Playwright)
56
+
57
+ On EVERY page navigation, immediately check:
58
+
59
+ ```
60
+ 1. browser_console_messages(level: "error") — capture all errors
61
+ 2. Count error toasts visible in snapshot
62
+ 3. If errors found:
63
+ a. Check server logs (tail /tmp/hakutaku-dev.log) to correlate
64
+ b. Classify: ENV/CONFIG vs BUG vs EXPECTED
65
+ c. If server is returning HTML instead of JSON → restart dev server
66
+ d. Document findings in page spec "## Error Analysis" section
67
+ ```
68
+
69
+ ### 4. Visual Validation
70
+
71
+ On EVERY page, verify visually:
72
+ - No broken layouts (elements overlapping, overflowing)
73
+ - No missing images (alt text visible instead of image)
74
+ - No untranslated keys (raw `key.path.like.this` visible)
75
+ - No raw error messages shown to users (stack traces, parse errors)
76
+ - Toast messages are appropriate (not storms of identical errors)
77
+ - Loading states transition to content (not stuck loading)
78
+
79
+ **Flag as FINDINGS any visual issues** — these are UX bugs even if not security issues.
80
+
81
+ ---
82
+
83
+ ## Workflow: Auditing a Page
84
+
85
+ For each page, follow this exact sequence:
86
+
87
+ ### 1. RESEARCH
88
+
89
+ Before writing any tests, research:
90
+ - OWASP Top 10 issues relevant to this page type
91
+ - Common vulnerabilities for the page's features (e.g., file upload → unrestricted file type, XSS in filename)
92
+ - Library-specific edge cases (e.g., TanStack Query cache issues, shadcn modal focus traps)
93
+
94
+ ### 2. DISCOVER
95
+
96
+ Use Playwright MCP to navigate, snapshot, AND INTERACT with everything:
97
+
98
+ ```
99
+ 1. Navigate to the page URL
100
+ 2. Take a snapshot (browser_snapshot)
101
+ 3. Check server logs (tail /tmp/hakutaku-dev.log) for server-side errors on this route
102
+ 4. Check console errors (browser_console_messages level: "error")
103
+ 5. List ALL interactive elements from snapshot
104
+
105
+ 6. CLICK EVERYTHING — do NOT just snapshot:
106
+ a. Click every TAB → snapshot each tab panel, note URL changes
107
+ b. Click every BUTTON → see what happens (modal? toast? redirect?)
108
+ c. Open every DROPDOWN/COMBOBOX → list all options
109
+ d. Toggle SIDEBAR expand/collapse → note hidden/revealed elements
110
+ e. Click every LINK → verify redirect URL is correct
111
+ f. Open every MODAL → snapshot modal content, list its elements
112
+ g. Hover elements that look like they have TOOLTIPS → capture text
113
+ h. Fill INPUTS with test data → check validation, error messages
114
+ i. Submit FORMS → check success/error toasts, redirects
115
+
116
+ 7. For each interaction, document:
117
+ - What was clicked/triggered
118
+ - What happened (URL change, modal opened, toast appeared, etc.)
119
+ - Any errors produced (console, server log, visual)
120
+
121
+ 8. Check states: empty, loading, error, populated
122
+ 9. Check RBAC: which elements appear per role
123
+ ```
124
+
125
+ **CRITICAL**: A snapshot-only audit misses hidden elements (modal contents,
126
+ dropdown options, tab panels, hover states). You MUST interact to discover them.
127
+
128
+ ### 2.3. FUNCTIONAL TESTING (GUARANTEE IT WORKS)
129
+
130
+ Discovery is NOT enough. You MUST **verify that every feature actually works**.
131
+ Clicking a button and seeing it exists is NOT testing — you must confirm the OUTCOME.
132
+
133
+ **THE COMPLETENESS RULE**: For EVERY interactive element discovered in step 2,
134
+ you MUST perform at least ONE interaction AND verify the outcome. No exceptions.
135
+ If an element can't be tested (requires external service, auth, etc.), mark it
136
+ `[~]` with a reason — but you MUST attempt it first.
137
+
138
+ ```
139
+ 1. EVERY BUTTON — Click it. What happened?
140
+ a. Did it navigate? → Verify the destination URL is correct
141
+ b. Did it open a modal/dialog? → Snapshot and list all elements inside
142
+ c. Did it trigger a toast? → Read the toast message, verify it's correct
143
+ d. Did it change state? → Verify the state actually changed (UI + DB)
144
+ e. Did nothing happen? → That's a BUG — document it
145
+ f. Is it disabled? → Verify WHY (missing prereq? permission? loading?)
146
+
147
+ 2. EVERY COMBOBOX/DROPDOWN — Open it and list ALL options.
148
+ a. Select EACH option one by one
149
+ b. After each selection, verify the OUTCOME changed:
150
+ - List count before vs after
151
+ - Verify displayed items match the filter criteria
152
+ c. Test "clear/reset" returns to original state
153
+ d. Test combinations (filter A + filter B together)
154
+
155
+ 3. EVERY INPUT — Type into it.
156
+ a. Type a valid value → verify it's accepted
157
+ b. Type an invalid value → verify error message appears
158
+ c. Leave it empty and submit → verify required validation
159
+ d. Test boundary values (min/max length, special chars)
160
+ e. For search: type known match, partial match, and no-match
161
+
162
+ 4. EVERY TAB — Click it.
163
+ a. Verify the tab panel content changes
164
+ b. Verify the URL updates (if URL-driven tabs)
165
+ c. Snapshot the NEW content and list its elements
166
+ d. Repeat steps 1-3 for elements INSIDE each tab panel
167
+
168
+ 5. EVERY LINK — Click it.
169
+ a. Verify the destination URL is correct
170
+ b. Verify the destination page loads (not 404)
171
+ c. Verify back button returns to original page
172
+
173
+ 6. MULTI-STEP WIZARDS — Navigate EVERY step of EVERY path.
174
+ a. For each wizard variant (e.g., each integration type):
175
+ - Click the variant to select it
176
+ - Click "Next" to advance to step 2
177
+ - SNAPSHOT step 2, list ALL elements
178
+ - Fill required fields in step 2
179
+ - Click "Next" to advance to step 3
180
+ - SNAPSHOT step 3, list ALL elements
181
+ - Continue until you reach the LAST step or hit a blocker
182
+ (e.g., OAuth requires external service)
183
+ b. Test "Previous" button at every step (goes back correctly?)
184
+ c. Test "Cancel" at every step (exits wizard correctly?)
185
+ d. Test step indicator buttons (can you jump to completed steps?)
186
+ e. Test validation: try advancing without filling required fields
187
+ f. If a step requires an external service (OAuth, file upload):
188
+ - Document what the step UI looks like
189
+ - Mark it `[~]` with "requires dev.hakutaku.ai"
190
+ - Still verify the UI elements are present and correct
191
+
192
+ 7. CRUD OPERATIONS — Verify end-to-end.
193
+ a. CREATE: Complete the FULL creation flow. Verify the created item
194
+ appears in the list afterward WITHOUT page refresh.
195
+ b. EDIT: Modify a field, save, RELOAD the page, verify change persists
196
+ c. DELETE: Delete YOUR item, verify it disappears from list AND DB
197
+ d. Test validation: empty forms, invalid data, duplicate names
198
+
199
+ 8. PAGINATION — If pagination exists:
200
+ a. Verify count text matches actual items
201
+ b. Navigate pages if multiple exist
202
+ c. Verify items don't repeat across pages
203
+
204
+ 9. SUBMIT FLOWS — Verify the COMPLETE chain, not just the button click.
205
+ a. For EVERY form/search submit that navigates to another page:
206
+ - Fill inputs → click submit → wait for navigation
207
+ - Verify the destination URL is correct
208
+ - Verify the destination page LOADED FULLY (not blank/error)
209
+ - Verify the SUBMITTED DATA appears correctly on the destination page
210
+ - Verify any SIDE EFFECTS completed (e.g., AI response in chat,
211
+ item appears in list, email sent)
212
+ b. For search → chat creation flows specifically:
213
+ - Type query → submit → verify toast → verify navigation to chat page
214
+ - ON THE CHAT PAGE: verify user message appears AND AI response streams
215
+ - If AI does NOT respond → that is a BUG, document it immediately
216
+ c. For create flows (wizard → detail page):
217
+ - Complete wizard → verify redirect to detail/list page
218
+ - Verify the created item is VISIBLE on the destination page
219
+ - Verify the item's data matches what was entered in the wizard
220
+ d. NEVER mark a submit flow as ✅ if you only verified the button click
221
+ or the toast — you MUST verify the outcome on the destination page
222
+ ```
223
+
224
+ **CRITICAL**: Seeing a filter combobox open is NOT testing. You must select each
225
+ option and VERIFY the list changes correctly. Same for search, create, delete, etc.
226
+
227
+ **CRITICAL**: "I clicked Next and saw step 2" is NOT testing step 2. You must
228
+ interact with EVERY element in step 2 (fill inputs, open dropdowns, click buttons)
229
+ before advancing to step 3. Each step is its own mini-page that needs full audit.
230
+
231
+ **PROJECT-SPECIFIC NOTE (Hakutaku)**: For features requiring external services
232
+ (OAuth integrations, file uploads with S3), use `https://dev.hakutaku.ai` instead
233
+ of `localhost:3000` — Upstash and OAuth callbacks require the real domain. You can
234
+ still verify the UI flow steps on localhost, but actual sync/upload operations need
235
+ the tunnel running. When hitting an OAuth/external blocker, mark the step `[~]` and
236
+ document what UI elements are present — do NOT skip the entire wizard path.
237
+
238
+ **NEVER DELETE OR EDIT EXISTING DEV DATA.** When testing CRUD operations:
239
+ - Always CREATE new items for testing (new integration, new chat, new file, etc.)
240
+ - NEVER delete or modify data that already exists in the dev database — other
241
+ developers may be using it.
242
+ - Only delete/edit items YOU created during the current audit session.
243
+ - If you need specific data states (error, paused, etc.), create new items and
244
+ put them in that state — don't change existing ones.
245
+
246
+ **CLONE STRATEGY FOR OAUTH INTEGRATIONS**: When testing integration types that
247
+ require OAuth credentials you don't have (Google Drive, OneDrive, Notion, Slack,
248
+ GitHub), you can't create new ones through the wizard. Instead:
249
+ 1. Check if an existing integration of that type already exists in the org
250
+ 2. If it does, CLONE it via the tRPC API or database (create a copy with a
251
+ test name like "E2E Test Clone - {Type} - {Date}")
252
+ 3. Test the CLONE's detail page fully: all tabs (Files, History, Settings),
253
+ all action buttons (Sync, Force Sync, Pause, Configure, Delete), all
254
+ settings (access control, sync frequency if applicable)
255
+ 4. After testing, DELETE the clone you created
256
+ 5. This lets you test detail page functionality for OAuth types without
257
+ needing actual OAuth credentials
258
+ 6. If NO existing integration of that type exists, mark those detail-page
259
+ tests as `[~]` with "no existing integration to clone"
260
+
261
+ ### 2.5. ANALYZE ERRORS
262
+
263
+ After discovery, collect and classify ALL console errors and toast notifications:
264
+
265
+ ```
266
+ 1. Run browser_console_messages(level: "error") — capture all errors
267
+ 2. Check for error toasts visible in the snapshot
268
+ 3. Classify each error:
269
+
270
+ ENV/CONFIG — Missing env vars, DB connection, external service down
271
+ → Document but don't block. Note what env is needed.
272
+ Example: "500 on /api/billing/status — likely missing STRIPE_SECRET_KEY"
273
+
274
+ BUG — Real code issues that happen regardless of env
275
+ → Flag as HIGH priority finding. Create fix recommendation.
276
+ Example: "tRPC returns HTML instead of JSON on batch error — missing error boundary"
277
+
278
+ EXPECTED — Known warnings, dev-only messages, React strict mode
279
+ → Document and skip.
280
+ Example: "React DevTools extension warning"
281
+
282
+ 4. Check for:
283
+ - Stack traces exposed in UI (security issue)
284
+ - Sensitive data in error messages (API keys, DB URLs, user data)
285
+ - Error messages that leak implementation details
286
+ - Failed API calls that should have fallback/retry
287
+ - Toast storms (>3 error toasts = bad UX, should consolidate)
288
+
289
+ 5. Record ALL findings in the page spec under "## Error Analysis"
290
+ 6. Add actionable items to the audit report
291
+ ```
292
+
293
+ This phase is CRITICAL — errors found during discovery often reveal:
294
+ - Missing error boundaries
295
+ - Unhandled API failures
296
+ - Environment-dependent code without fallbacks
297
+ - UX issues (toast spam, unhelpful error messages)
298
+ - Security leaks (stack traces, internal paths in errors)
299
+
300
+ ### 2.7. TRIANGULATE (DB + Source Code + UI)
301
+
302
+ After discovery and error analysis, **cross-reference** what the UI shows against the
303
+ database and the component source code to verify correctness:
304
+
305
+ ```
306
+ 1. READ SOURCE CODE for the page's components:
307
+ - Find the page component in src/app/(app)/dashboard/{page}/
308
+ - Read _components/ subfolder for page-specific components
309
+ - Read src/components/{feature}/ for shared feature components
310
+ - Identify which tRPC procedures are called (trpc.{router}.{procedure})
311
+ - Understand expected data flow: tRPC query → component props → UI rendering
312
+
313
+ 2. CHECK DATABASE for test data:
314
+ - Query the database (via Prisma/tRPC or MCP) to see what data exists
315
+ - Verify the UI accurately reflects what's in the DB:
316
+ * Counts match (e.g., "10 integrations" in UI = 10 rows in DB)
317
+ * Names/labels match (no stale cache, no wrong field displayed)
318
+ * Statuses match (e.g., "active" in DB shown as active in UI)
319
+ * Dates/timestamps are formatted correctly
320
+ - If data is EMPTY or INSUFFICIENT for testing:
321
+ a. Check if a seed script exists (schema/seeders/) and enrich it
322
+ b. OR use the UI flow to CREATE test data (e.g., create an integration,
323
+ upload a file, start a chat) — this also audits the creation flow
324
+ c. Document what data was needed and how it was obtained
325
+
326
+ 3. VERIFY BEHAVIOR matches expectations:
327
+ - Does clicking "Delete" actually delete from DB? (check before/after)
328
+ - Does creating an item show it in the list without refresh?
329
+ - Do filters actually filter the correct data? (cross-check with DB query)
330
+ - Do pagination counts match total DB records?
331
+ - Does sorting work correctly? (verify order matches DB ORDER BY)
332
+ - Do role-based visibility rules match ZenStack policies?
333
+
334
+ 4. FLAG MISMATCHES as findings:
335
+ - UI shows data that doesn't exist in DB → stale cache or mock data
336
+ - DB has data not shown in UI → missing query, wrong filter, permission issue
337
+ - UI count != DB count → pagination bug, filter leak, or policy issue
338
+ - Component code expects field X but API returns field Y → type mismatch
339
+ ```
340
+
341
+ **Why this matters**: A UI can "look correct" while showing wrong data. Triangulating
342
+ DB ↔ Source Code ↔ UI catches data integrity bugs that snapshot-only testing misses.
343
+
344
+ ### 2.9. VALIDATE CHECKLIST (GATE — BLOCKS GENERATE)
345
+
346
+ Before writing ANY test code (POM or spec), the page spec checklist MUST be 100% validated.
347
+
348
+ **What "validated" means for each element type:**
349
+ - **Button**: You CLICKED it and documented what happened
350
+ - **Combobox/Dropdown**: You OPENED it, listed ALL options, SELECTED each one
351
+ - **Input**: You TYPED into it and verified validation
352
+ - **Tab**: You CLICKED it and snapshotted the panel content
353
+ - **Link**: You CLICKED it and verified the destination
354
+ - **Modal/Dialog**: You OPENED it and listed all elements inside
355
+ - **Wizard step**: You NAVIGATED to it, interacted with ALL its elements
356
+ - **Action**: You TRIGGERED it and verified the outcome
357
+
358
+ **TRIANGULATION REQUIREMENT**: For every page, the checklist MUST include a
359
+ dedicated `## Triangulation` section with THREE explicit sub-checks:
360
+
361
+ ```markdown
362
+ ## Triangulation
363
+
364
+ ### DB Verification
365
+ - [ ] Count in UI matches DB count (e.g., "11 integrations" = 11 rows in DB)
366
+ - [ ] Status values in UI match DB status column
367
+ - [ ] Action outcomes confirmed in DB (e.g., Pausar → status changed to PAUSED)
368
+ - [ ] Created items appear in DB with correct fields
369
+
370
+ ### Source Code Verification
371
+ - [ ] Component file identified and read (path noted)
372
+ - [ ] tRPC procedures identified (router.procedure names)
373
+ - [ ] Known bugs traced to source (file:line noted)
374
+
375
+ ### UI vs DB Mismatch Check
376
+ - [ ] No stale data displayed (UI reflects current DB state)
377
+ - [ ] Filter/search results match what DB query would return
378
+ - [ ] Pagination count matches total DB records for this org
379
+ ```
380
+
381
+ Use `bunx dotenvx run -f artifacts/.env.local -f artifacts/.env.development -- prisma studio`
382
+ to open Prisma Studio for DB inspection. For quick queries, write a temp script
383
+ and run with `bunx dotenvx run ... -- bun run tmp-query.ts`, then delete it.
384
+
385
+ ```
386
+ 1. Open the page spec at docs/e2e-audit/page-specs/{page-name}.md
387
+ 2. Scan for EVERY `[ ]` (unchecked) item
388
+ 3. If ANY unchecked items remain:
389
+ a. DO NOT proceed to step 3 (GENERATE)
390
+ b. Go back and interact with each unchecked element via Playwright
391
+ c. After verifying, update the checklist: `[ ]` → `[x]` with ✅
392
+ d. If an element cannot be verified (missing, broken), mark with ❌
393
+ and explain why (e.g., "BUG: button not rendered")
394
+ e. If an element requires external service, mark with `[~]` and explain
395
+ (e.g., "[~] requires dev.hakutaku.ai for OAuth")
396
+ 4. ONLY when the spec has ZERO `[ ]` items may you proceed to GENERATE
397
+ 5. Final counts MUST be documented at the top of the spec:
398
+ - Total elements: N
399
+ - Validated [x]: N
400
+ - Blocked [~]: N (with reasons)
401
+ - Failed [❌]: N (with bug reports)
402
+ ```
403
+
404
+ **CRITICAL**: Writing tests for elements you haven't actually clicked, opened,
405
+ or interacted with produces unreliable tests. The checklist IS the proof of work.
406
+ A page spec full of `[ ]` means the audit is NOT done — it's just a draft.
407
+
408
+ **SELF-CHECK before proceeding**: Read the page spec top to bottom. For each `[x]`
409
+ item, can you recall EXACTLY what happened when you interacted with it? If not,
410
+ you didn't actually test it — you just checked the box. Go back and test it.
411
+
412
+ ### 3. GENERATE
413
+
414
+ Write three files per page:
415
+
416
+ #### Page Spec (`docs/e2e-audit/page-specs/{page-name}.md`)
417
+ - Complete inventory of all PAGE-SPECIFIC elements found
418
+ - Checklist format for tracking test coverage
419
+ - Notes on states, permissions, edge cases
420
+
421
+ **IMPORTANT — Shared vs Page-Specific Elements:**
422
+ - **Sidebar navigation, user menu, toast region, FAB button** are SHARED layout
423
+ elements. They appear on EVERY page and are NOT part of the page audit.
424
+ - Audit shared layout elements ONCE in a dedicated `shared-layout.md` spec.
425
+ - Each page spec should ONLY contain elements unique to THAT page's content
426
+ (inside `<main>`). Do NOT duplicate sidebar links in every page spec.
427
+ - If a shared element behaves DIFFERENTLY on a specific page (e.g., sidebar
428
+ highlights a different link), note the difference but don't re-audit the
429
+ entire sidebar.
430
+
431
+ #### Page Object Model (`tests/e2e/pages/{page-name}.page.ts`)
432
+ ```typescript
433
+ import { type Page, type Locator } from '@playwright/test'
434
+
435
+ export class DashboardHomePage {
436
+ readonly page: Page
437
+ readonly heading: Locator
438
+ readonly createButton: Locator
439
+
440
+ constructor(page: Page) {
441
+ this.page = page
442
+ this.heading = page.getByRole('heading', { name: 'Home' })
443
+ this.createButton = page.getByRole('button', { name: 'Create' })
444
+ }
445
+
446
+ async goto() {
447
+ await this.page.goto('/dashboard/home')
448
+ }
449
+
450
+ async waitForLoad() {
451
+ await this.heading.waitFor()
452
+ }
453
+ }
454
+ ```
455
+
456
+ #### Test Spec (`tests/e2e/specs/{page-name}.spec.ts`)
457
+ ```typescript
458
+ import { test, expect } from '../fixtures/base'
459
+ import { DashboardHomePage } from '../pages/dashboard-home.page'
460
+
461
+ test.describe('Dashboard Home @smoke', () => {
462
+ let home: DashboardHomePage
463
+
464
+ test.beforeEach(async ({ authenticatedPage, apiErrors: _apiErrors }) => {
465
+ home = new DashboardHomePage(authenticatedPage)
466
+ await home.goto()
467
+ await home.waitForLoad()
468
+ })
469
+
470
+ test('loads and displays heading', async ({ authenticatedPage }) => {
471
+ await expect(authenticatedPage, 'Should navigate to home page').toHaveURL(/dashboard\/home/)
472
+ await expect(home.heading).toBeVisible()
473
+ })
474
+
475
+ test('displays hero section', async () => {
476
+ await expect(home.heroTitle, 'Hero section should render after data loads').toBeVisible()
477
+ })
478
+ })
479
+ ```
480
+
481
+ ### 4. VALIDATE
482
+
483
+ Run the generated tests:
484
+ ```bash
485
+ bunx playwright test tests/e2e/specs/{page-name}.spec.ts
486
+ ```
487
+
488
+ Fix any failures before moving on.
489
+
490
+ ### 5. REPORT
491
+
492
+ Update `docs/e2e-audit/reports/master-audit.md` with:
493
+ - Elements found vs tested
494
+ - Security findings
495
+ - UX/UI issues
496
+ - Missing test-ids
497
+ - Accessibility gaps
498
+
499
+ ## API Error Tracking (MANDATORY)
500
+
501
+ Every test MUST use the `apiErrors` fixture to automatically detect and report API failures.
502
+ This replaces generic "TimeoutError: waiting for heading" with actionable messages like
503
+ `"API errors detected: GET /api/v1/integration.list → 500 Internal Server Error"`.
504
+
505
+ ### How It Works
506
+
507
+ The `apiErrors` fixture in `tests/e2e/fixtures/base.ts`:
508
+ 1. Listens to ALL network responses on the authenticated page
509
+ 2. Captures any 4xx/5xx responses from `/api/` or `/v1/` endpoints
510
+ 3. After the test completes, if any API errors were collected, it FAILS the test
511
+ with a detailed report including method, URL, status code, and status text
512
+ 4. Known env-dependent endpoints (billing, ontology) are excluded via `IGNORED_API_PATTERNS`
513
+
514
+ ### Usage Pattern
515
+
516
+ ```typescript
517
+ // REQUIRED: Destructure apiErrors in beforeEach (even if unused in test body)
518
+ test.beforeEach(async ({ authenticatedPage, apiErrors: _apiErrors }) => {
519
+ // apiErrors starts listening immediately — no setup needed
520
+ page = new SomePage(authenticatedPage)
521
+ await page.goto()
522
+ await page.waitForLoad()
523
+ })
524
+
525
+ // For standalone tests without beforeEach:
526
+ test('standalone test', async ({ authenticatedPage, apiErrors: _apiErrors }) => {
527
+ // apiErrors is active for this test
528
+ })
529
+ ```
530
+
531
+ ### Adding Ignored Endpoints
532
+
533
+ When an API returns errors due to missing env vars (not real bugs), add the pattern:
534
+
535
+ ```typescript
536
+ // In tests/e2e/fixtures/base.ts
537
+ const IGNORED_API_PATTERNS = [
538
+ /\/api\/billing\//, // Missing Stripe key in dev
539
+ /ontology\.getGraph/, // Missing ontology service
540
+ ]
541
+ ```
542
+
543
+ ### Custom Assertion Messages (MANDATORY)
544
+
545
+ Every `expect()` call MUST include a descriptive second argument explaining what the
546
+ assertion checks and what might be wrong if it fails:
547
+
548
+ ```typescript
549
+ // BAD — gives useless error: "expected locator to be visible"
550
+ await expect(cards.first()).toBeVisible()
551
+
552
+ // GOOD — gives actionable error with debugging hint
553
+ await expect(cards.first(), 'At least 1 integration card should render (is integration.list returning data?)').toBeVisible()
554
+
555
+ // BAD — gives "expected 0 to be greater than 0"
556
+ expect(count).toBeGreaterThan(0)
557
+
558
+ // GOOD — explains what's missing
559
+ expect(count, 'Integration cards not found — check if tRPC integration.list returns data for this org').toBeGreaterThan(0)
560
+ ```
561
+
562
+ **Why this matters**: When a test fails in CI, the developer sees the custom message
563
+ immediately — no need to reproduce locally or dig through Playwright traces.
564
+
565
+ ## Test Categories per Page
566
+
567
+ Every page MUST have tests for:
568
+
569
+ ### Navigation
570
+ - All links resolve (no 404s)
571
+ - Breadcrumbs correct
572
+ - Back/forward browser navigation works
573
+
574
+ ### Interactions
575
+ - All buttons clickable and produce expected result
576
+ - All modals open/close correctly
577
+ - All forms submit with valid data
578
+ - All dropdowns open and select options
579
+
580
+ ### Validation
581
+ - Required fields show errors when empty
582
+ - Invalid input shows appropriate error
583
+ - Max length enforced
584
+ - Special characters handled
585
+
586
+ ### UX/UI
587
+ - Loading states display (skeletons/spinners)
588
+ - Empty states display when no data
589
+ - Error states display on failure
590
+ - Toasts appear with correct message and type
591
+ - Tooltips show on hover
592
+ - Focus management (modals trap focus, inputs auto-focus)
593
+
594
+ ### Security
595
+ - Console has no sensitive data leaks
596
+ - XSS payloads in inputs don't execute
597
+ - RBAC: unauthorized roles can't access/see restricted elements
598
+ - No stack traces in UI error messages
599
+
600
+ ## Locator Strategy
601
+
602
+ Priority order:
603
+ 1. `getByRole()` — semantic, accessible
604
+ 2. `getByText()` — visible text
605
+ 3. `getByLabel()` — form fields
606
+ 4. `getByPlaceholder()` — inputs
607
+ 5. `getByTestId()` — last resort (flag missing semantic labels)
608
+
609
+ **NEVER use CSS selectors** (`.class`, `#id`, `div > span`).
610
+
611
+ ## File Structure
612
+
613
+ ```
614
+ tests/e2e/
615
+ ├── fixtures/
616
+ │ ├── auth.ts # Storage state paths + UserRole type
617
+ │ ├── auth.setup.ts # Auth setup (preserves valid sessions)
618
+ │ ├── base.ts # Extended fixtures (authenticatedPage, apiErrors)
619
+ │ └── storage/ # Auth state files (gitignored)
620
+ │ ├── owner.json
621
+ │ ├── admin.json
622
+ │ ├── manager.json
623
+ │ └── member.json
624
+ ├── pages/ # Page Object Models
625
+ │ ├── dashboard-home.page.ts
626
+ │ ├── dashboard-integrations.page.ts
627
+ │ ├── dashboard-teams.page.ts
628
+ │ └── ...
629
+ ├── specs/ # Test specifications
630
+ │ ├── dashboard-home.spec.ts
631
+ │ ├── dashboard-integrations.spec.ts
632
+ │ ├── dashboard-teams.spec.ts
633
+ │ ├── security/
634
+ │ │ ├── rbac.spec.ts
635
+ │ │ └── headers.spec.ts
636
+ │ └── ...
637
+ └── utils/
638
+ ├── console-collector.ts # Console message interceptor + sensitive data scanner
639
+ ├── security-helpers.ts # XSS payloads, header checks
640
+ └── test-data.ts # Shared test data
641
+ ```
642
+
643
+ ## Running
644
+
645
+ ```bash
646
+ # All tests
647
+ bun run test:e2e
648
+
649
+ # Smoke tests only
650
+ bunx playwright test --grep @smoke
651
+
652
+ # Security tests only
653
+ bunx playwright test --grep @security
654
+
655
+ # Specific page
656
+ bunx playwright test tests/e2e/specs/dashboard-home.spec.ts
657
+
658
+ # UI mode (visual debugging)
659
+ bunx playwright test --ui
660
+ ```