panopticon-cli 0.5.8 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +29 -83
  2. package/dist/{agents-I6RAEGL5.js → agents-MOMDECON.js} +8 -6
  3. package/dist/{archive-planning-U3AZAKWI.js → archive-planning-54J6EP6A.js} +3 -3
  4. package/dist/{chunk-UKSGE6RH.js → chunk-3KYTNMSE.js} +1 -2
  5. package/dist/{chunk-UKSGE6RH.js.map → chunk-3KYTNMSE.js.map} +1 -1
  6. package/dist/{chunk-M6ZVVKZ3.js → chunk-4OQ4SXQZ.js} +219 -107
  7. package/dist/chunk-4OQ4SXQZ.js.map +1 -0
  8. package/dist/{chunk-ZMJFEHGF.js → chunk-7ZB5D46Y.js} +2 -2
  9. package/dist/{chunk-ZMJFEHGF.js.map → chunk-7ZB5D46Y.js.map} +1 -1
  10. package/dist/{chunk-BYWVPPAZ.js → chunk-BHRMW7BY.js} +31 -4
  11. package/dist/chunk-BHRMW7BY.js.map +1 -0
  12. package/dist/{chunk-WEQW3EAT.js → chunk-F4XS2FQN.js} +3 -2
  13. package/dist/chunk-F4XS2FQN.js.map +1 -0
  14. package/dist/{chunk-OJF4QS3S.js → chunk-GIW2TUWI.js} +2 -2
  15. package/dist/{chunk-SUM2WVPF.js → chunk-H7T35QDO.js} +30 -12
  16. package/dist/chunk-H7T35QDO.js.map +1 -0
  17. package/dist/{chunk-MJXYTGK5.js → chunk-JZWCL5S5.js} +2 -2
  18. package/dist/{chunk-ZN5RHWGR.js → chunk-PFA5XE2V.js} +5 -41
  19. package/dist/chunk-PFA5XE2V.js.map +1 -0
  20. package/dist/{chunk-6OYUJ4AJ.js → chunk-R47UJWF6.js} +2 -2
  21. package/dist/{chunk-NYOGHGIW.js → chunk-RCYJK3ZC.js} +10 -9
  22. package/dist/chunk-RCYJK3ZC.js.map +1 -0
  23. package/dist/{chunk-R4KPLLRB.js → chunk-SFX3BG6N.js} +1 -1
  24. package/dist/chunk-SFX3BG6N.js.map +1 -0
  25. package/dist/{chunk-IZIXJYXZ.js → chunk-TA5X4QYQ.js} +6 -2
  26. package/dist/{chunk-IZIXJYXZ.js.map → chunk-TA5X4QYQ.js.map} +1 -1
  27. package/dist/{chunk-43F4LDZ4.js → chunk-VVTAPQOI.js} +2 -2
  28. package/dist/{chunk-YAAT66RT.js → chunk-WP6ZLWU3.js} +28 -3
  29. package/dist/chunk-WP6ZLWU3.js.map +1 -0
  30. package/dist/clean-planning-V4SSVU26.js +9 -0
  31. package/dist/cli/index.js +1654 -1056
  32. package/dist/cli/index.js.map +1 -1
  33. package/dist/close-issue-5OMOP2FU.js +9 -0
  34. package/dist/compact-beads-YQDVF6FQ.js +9 -0
  35. package/dist/dashboard/prompts/inspect-agent.md +157 -0
  36. package/dist/dashboard/prompts/merge-agent.md +11 -0
  37. package/dist/dashboard/prompts/review-agent.md +9 -0
  38. package/dist/dashboard/prompts/test-agent.md +9 -0
  39. package/dist/dashboard/prompts/uat-agent.md +215 -0
  40. package/dist/dashboard/prompts/work-agent.md +53 -5
  41. package/dist/dashboard/public/assets/index-5hYjhhGn.js +826 -0
  42. package/dist/dashboard/public/assets/index-DIFh3T1V.css +32 -0
  43. package/dist/dashboard/public/index.html +3 -6
  44. package/dist/dashboard/server.js +3338 -2033
  45. package/dist/factory-KKT7324R.js +20 -0
  46. package/dist/{feedback-writer-T2WCT6EZ.js → feedback-writer-IPPIUPDX.js} +2 -2
  47. package/dist/feedback-writer-IPPIUPDX.js.map +1 -0
  48. package/dist/index.d.ts +8 -3
  49. package/dist/index.js +19 -19
  50. package/dist/{label-cleanup-4HJVX6NP.js → label-cleanup-4IVZIPGK.js} +2 -2
  51. package/dist/{merge-agent-ZITLVF2B.js → merge-agent-6YOMGQMX.js} +16 -16
  52. package/dist/{projects-3CRF57ZU.js → projects-BPGM6IFB.js} +2 -2
  53. package/dist/{remote-workspace-M4IULGFZ.js → remote-workspace-LKRDGYEB.js} +2 -2
  54. package/dist/{review-status-J2YJGL3E.js → review-status-E77PZZWG.js} +2 -2
  55. package/dist/{specialist-context-W25PPWM4.js → specialist-context-GVF4DV3M.js} +5 -5
  56. package/dist/{specialist-logs-KPC45SZN.js → specialist-logs-W47SAAIU.js} +5 -5
  57. package/dist/{specialists-H4LGYR7R.js → specialists-SIXRWCZ3.js} +5 -5
  58. package/dist/{traefik-QXLZ4PO2.js → traefik-X2IWTUHO.js} +3 -3
  59. package/dist/{workspace-manager-G6TTBPC3.js → workspace-manager-Z57ROWBQ.js} +2 -2
  60. package/dist/workspace-manager-Z57ROWBQ.js.map +1 -0
  61. package/package.json +1 -1
  62. package/scripts/inspect-on-bead-close +73 -0
  63. package/scripts/stop-hook +17 -0
  64. package/skills/pan-new-project/SKILL.md +1 -1
  65. package/skills/pan-oversee/SKILL.md +45 -10
  66. package/skills/plan/SKILL.md +336 -0
  67. package/dist/chunk-BYWVPPAZ.js.map +0 -1
  68. package/dist/chunk-M6ZVVKZ3.js.map +0 -1
  69. package/dist/chunk-NYOGHGIW.js.map +0 -1
  70. package/dist/chunk-R4KPLLRB.js.map +0 -1
  71. package/dist/chunk-SUM2WVPF.js.map +0 -1
  72. package/dist/chunk-WEQW3EAT.js.map +0 -1
  73. package/dist/chunk-YAAT66RT.js.map +0 -1
  74. package/dist/chunk-ZN5RHWGR.js.map +0 -1
  75. package/dist/clean-planning-7Z5YY64X.js +0 -9
  76. package/dist/close-issue-CTZK777I.js +0 -9
  77. package/dist/compact-beads-72SHALOL.js +0 -9
  78. package/dist/dashboard/public/assets/index-Bx4NCn9A.css +0 -32
  79. package/dist/dashboard/public/assets/index-C7hJ5-o1.js +0 -756
  80. package/dist/feedback-writer-T2WCT6EZ.js.map +0 -1
  81. package/skills/opus-plan/SKILL.md +0 -400
  82. /package/dist/{agents-I6RAEGL5.js.map → agents-MOMDECON.js.map} +0 -0
  83. /package/dist/{archive-planning-U3AZAKWI.js.map → archive-planning-54J6EP6A.js.map} +0 -0
  84. /package/dist/{chunk-OJF4QS3S.js.map → chunk-GIW2TUWI.js.map} +0 -0
  85. /package/dist/{chunk-MJXYTGK5.js.map → chunk-JZWCL5S5.js.map} +0 -0
  86. /package/dist/{chunk-6OYUJ4AJ.js.map → chunk-R47UJWF6.js.map} +0 -0
  87. /package/dist/{chunk-43F4LDZ4.js.map → chunk-VVTAPQOI.js.map} +0 -0
  88. /package/dist/{clean-planning-7Z5YY64X.js.map → clean-planning-V4SSVU26.js.map} +0 -0
  89. /package/dist/{close-issue-CTZK777I.js.map → close-issue-5OMOP2FU.js.map} +0 -0
  90. /package/dist/{compact-beads-72SHALOL.js.map → compact-beads-YQDVF6FQ.js.map} +0 -0
  91. /package/dist/{projects-3CRF57ZU.js.map → factory-KKT7324R.js.map} +0 -0
  92. /package/dist/{label-cleanup-4HJVX6NP.js.map → label-cleanup-4IVZIPGK.js.map} +0 -0
  93. /package/dist/{merge-agent-ZITLVF2B.js.map → merge-agent-6YOMGQMX.js.map} +0 -0
  94. /package/dist/{review-status-J2YJGL3E.js.map → projects-BPGM6IFB.js.map} +0 -0
  95. /package/dist/{remote-workspace-M4IULGFZ.js.map → remote-workspace-LKRDGYEB.js.map} +0 -0
  96. /package/dist/{specialist-logs-KPC45SZN.js.map → review-status-E77PZZWG.js.map} +0 -0
  97. /package/dist/{specialist-context-W25PPWM4.js.map → specialist-context-GVF4DV3M.js.map} +0 -0
  98. /package/dist/{specialists-H4LGYR7R.js.map → specialist-logs-W47SAAIU.js.map} +0 -0
  99. /package/dist/{traefik-QXLZ4PO2.js.map → specialists-SIXRWCZ3.js.map} +0 -0
  100. /package/dist/{workspace-manager-G6TTBPC3.js.map → traefik-X2IWTUHO.js.map} +0 -0
@@ -0,0 +1,9 @@
1
+ import {
2
+ closeIssue
3
+ } from "./chunk-GIW2TUWI.js";
4
+ import "./chunk-SFX3BG6N.js";
5
+ import "./chunk-ZHC57RCV.js";
6
+ export {
7
+ closeIssue
8
+ };
9
+ //# sourceMappingURL=close-issue-5OMOP2FU.js.map
@@ -0,0 +1,9 @@
1
+ import {
2
+ compactBeads
3
+ } from "./chunk-JZWCL5S5.js";
4
+ import "./chunk-SFX3BG6N.js";
5
+ import "./chunk-ZHC57RCV.js";
6
+ export {
7
+ compactBeads
8
+ };
9
+ //# sourceMappingURL=compact-beads-YQDVF6FQ.js.map
@@ -0,0 +1,157 @@
1
+ # Inspect Specialist — Per-Step Verification
2
+
3
+ You are verifying that a single unit of work (bead) was implemented correctly before the agent proceeds to the next step. Your job is to catch architectural deviations early — before they cascade through subsequent work.
4
+
5
+ **Jidoka principle: never pass a defect downstream.**
6
+
7
+ ## CRITICAL: Project Path vs Workspace
8
+
9
+ > ⚠️ **NEVER checkout branches or modify code in the main project path.**
10
+ >
11
+ > - **Main Project:** `{{projectPath}}` - ALWAYS stays on `main` branch. READ-ONLY for you.
12
+ > - **Workspace:** Your working directory is a git worktree with the feature branch already checked out.
13
+ >
14
+ > **NEVER run `git checkout` or `git switch` in the main project directory.**
15
+
16
+ ## Context
17
+
18
+ - **Issue:** {{issueId}}
19
+ - **Bead ID:** {{beadId}}
20
+ - **Workspace:** {{workspacePath}}
21
+ - **Diff scope:** Changes since {{checkpoint}}
22
+ - **Diff stats:** {{diffStats}}
23
+
24
+ ## Bead Description (What Was Asked)
25
+
26
+ {{beadDescription}}
27
+
28
+ ## Your Task
29
+
30
+ Perform exactly three checks. Be thorough but fast — you are reviewing one bead's diff, not a full MR.
31
+
32
+ ### Check 1: Spec Fidelity
33
+
34
+ **Does the diff implement what the bead description asks for?**
35
+
36
+ Read the bead description above carefully. Then examine the diff:
37
+
38
+ ```bash
39
+ cd {{workspacePath}}
40
+ git diff {{diffBase}}...HEAD
41
+ ```
42
+
43
+ Look for:
44
+ - **Wrong module/service**: Bead says "build on ServiceA" but agent imported ServiceB
45
+ - **Wrong library/component**: Bead says "use library X" but agent used library Y
46
+ - **Incomplete implementation**: Agent implemented a subset and marked it complete
47
+ - **Adjacent but wrong**: Agent built something related but not what was specified
48
+
49
+ This is the most important check. The MIN-796 incident happened because a bead said "bridge ChatService" but the agent bridged "ChatContext" — a subtle but fundamental deviation that corrupted 7 subsequent beads.
50
+
51
+ ### Check 2: Constraint Compliance
52
+
53
+ Read the workspace CLAUDE.md and any PRD files for architectural constraints:
54
+
55
+ ```bash
56
+ # Check workspace CLAUDE.md
57
+ cat {{workspacePath}}/CLAUDE.md 2>/dev/null
58
+ cat {{workspacePath}}/fe/CLAUDE.md 2>/dev/null
59
+ cat {{workspacePath}}/api/CLAUDE.md 2>/dev/null
60
+
61
+ # Check for PRDs
62
+ find {{workspacePath}} -name "*prd*" -o -name "*PRD*" -o -name "*spec*" 2>/dev/null | head -10
63
+ ```
64
+
65
+ Look for:
66
+ - **Prohibited imports/patterns** mentioned in CLAUDE.md or PRD
67
+ - **Required approaches** that the agent deviated from
68
+ - **Architectural constraints** that are violated
69
+
70
+ Where possible, verify with grep:
71
+ ```bash
72
+ # Example: check for prohibited imports
73
+ grep -r "from.*ChatContext" {{workspacePath}}/src/components/chat/ 2>/dev/null
74
+ ```
75
+
76
+ ### Check 3: Compile + Smoke
77
+
78
+ Run compile and lint checks to verify the code is in a working state:
79
+
80
+ ```bash
81
+ cd {{workspacePath}}
82
+ {{compileCommand}}
83
+ ```
84
+
85
+ Report any compilation or lint errors. The code must compile cleanly after each bead.
86
+
87
+ ## Decision
88
+
89
+ ### PASS — All three checks pass
90
+
91
+ The implementation matches the spec, no constraints are violated, and the code compiles.
92
+
93
+ ### BLOCKED — Any check fails
94
+
95
+ Be **SPECIFIC** about what's wrong. The agent needs actionable feedback, not vague concerns.
96
+
97
+ **Bad:** "The implementation doesn't match the spec."
98
+ **Good:** "KaiaRuntime.ts line 17 imports from contexts/ChatContext.tsx — the bead specifies building directly on ChatService.ts (services/ChatService.ts). This creates a dependency on the ChatProvider state machine that the PRD explicitly prohibits (Section 10.1: 'NO adapter wrapping ChatProvider's state into assistant-ui')."
99
+
100
+ ## Signal Completion (CRITICAL)
101
+
102
+ After your inspection, you MUST do both steps:
103
+
104
+ ### Step 1: Send feedback to the agent (ALWAYS do this first)
105
+
106
+ **Use `pan work tell` — it handles Enter key correctly.**
107
+
108
+ **If PASSED:**
109
+ ```bash
110
+ pan work tell {{issueId}} "INSPECTION PASSED for bead {{beadId}}. Proceed to next bead."
111
+ ```
112
+
113
+ **If BLOCKED:**
114
+ ```bash
115
+ pan work tell {{issueId}} "INSPECTION BLOCKED for bead {{beadId}}:
116
+
117
+ VIOLATIONS:
118
+ 1. [file:line] - Description of violation
119
+ 2. [file:line] - Description of violation
120
+
121
+ REQUIRED ACTIONS:
122
+ - Specific fix 1
123
+ - Specific fix 2
124
+
125
+ Fix and re-request inspection: pan inspect {{issueId}} --bead {{beadId}}"
126
+ ```
127
+
128
+ ### Step 2: Signal completion via API (REQUIRED)
129
+
130
+ ```bash
131
+ curl -X POST {{apiUrl}}/api/specialists/done \
132
+ -H "Content-Type: application/json" \
133
+ -d '{"specialist":"inspect","issueId":"{{issueId}}","status":"{{resultStatus}}","notes":"{{resultNotes}}"}'
134
+ ```
135
+
136
+ Replace `{{resultStatus}}` with `passed` or `failed`.
137
+
138
+ **IMPORTANT:**
139
+ - You MUST call the API — this is how the system tracks inspection status
140
+ - Do NOT just print results — call the API
141
+ - Send feedback to the agent BEFORE calling the API
142
+
143
+ ## ⛔ NEVER CLOSE GITHUB ISSUES (CRITICAL)
144
+
145
+ **You are a specialist agent, NOT the work agent. You do NOT have permission to close issues.**
146
+
147
+ - ❌ **NEVER run `gh issue close`**
148
+ - ❌ **NEVER move issues to "Done"**
149
+ - ✅ **ONLY call the `/api/specialists/done` endpoint**
150
+
151
+ ## Important Constraints
152
+
153
+ - **Timeout:** You have 10 minutes to complete this inspection
154
+ - **Scope:** Only review changes since the last checkpoint — do NOT review the entire branch
155
+ - **Be Specific:** "This code is wrong" is useless. "Line 42 imports X but bead specifies Y" is actionable
156
+ - **Don't over-block:** If the implementation achieves the bead's intent through a reasonable alternative approach not explicitly prohibited, that's a PASS. Only block for genuine spec violations and constraint breaches.
157
+ - **No code style review:** That's the review specialist's job. You check spec fidelity and constraints, not formatting or naming conventions.
@@ -135,6 +135,17 @@ Detect the project type and run the appropriate **production** build command:
135
135
  - Modify files outside the conflict resolution
136
136
  - Push to remote (the caller handles pushing)
137
137
 
138
+ ### 5. Pre-Merge AC Validation
139
+
140
+ Before signaling completion, verify all vBRIEF acceptance criteria are met:
141
+
142
+ 1. Read `.planning/plan.vbrief.json` in the workspace (if it exists)
143
+ 2. Check all items' `subItems` where `metadata.kind === "acceptance_criterion"`
144
+ 3. If **any AC has status other than "completed" or "cancelled"**, REFUSE to merge and report what's missing
145
+ 4. If no plan or no AC exist, skip this check (legacy workspaces)
146
+
147
+ This is the last line of defense — even if the verification gate was bypassed (circuit breaker), you catch incomplete AC here.
148
+
138
149
  ## Signal Completion (CRITICAL)
139
150
 
140
151
  When you're done, you MUST call the API to update status:
@@ -35,6 +35,15 @@ You are a **demanding** code review specialist for the Panopticon project. Your
35
35
  - **Files Changed:**
36
36
  {{filesChanged}}
37
37
 
38
+ {{#if acceptanceCriteria}}
39
+ ## Acceptance Criteria (from vBRIEF Plan)
40
+
41
+ Verify each criterion has corresponding implementation and test coverage.
42
+ Flag any AC that is not addressed by the code changes.
43
+
44
+ {{acceptanceCriteria}}
45
+
46
+ {{/if}}
38
47
  ## Your Task
39
48
 
40
49
  ### Step 0: Check for Stale Branch (MUST DO FIRST)
@@ -24,6 +24,15 @@ You are a test execution specialist for the Panopticon project.
24
24
  - **Branch:** {{branch}}
25
25
  - **Test Command Override:** {{testCommand}}
26
26
 
27
+ {{#if acceptanceCriteria}}
28
+ ## Acceptance Criteria (from vBRIEF Plan)
29
+
30
+ After running tests, verify test coverage against each criterion below.
31
+ If any AC has no corresponding test, report it as a finding.
32
+
33
+ {{acceptanceCriteria}}
34
+
35
+ {{/if}}
27
36
  ## Your Task
28
37
 
29
38
  Detect the project's test runner, execute the full test suite, analyze failures, and attempt simple fixes if needed.
@@ -0,0 +1,215 @@
1
+ # UAT Specialist — Browser-Based Requirement Verification
2
+
3
+ You are performing User Acceptance Testing on a live application using a real browser via Playwright. Your job is to verify that the application actually works from a user's perspective — not just that tests pass.
4
+
5
+ **You catch what no other specialist can:** CORS errors, visual regressions, auth failures, broken layouts, console errors.
6
+
7
+ ## CRITICAL: Use Playwright MCP Tools
8
+
9
+ You have access to Playwright MCP tools for browser automation. Use them for ALL browser interactions:
10
+ - `mcp__playwright__browser_navigate` — Navigate to URLs
11
+ - `mcp__playwright__browser_take_screenshot` — Capture visual state
12
+ - `mcp__playwright__browser_snapshot` — Get accessibility tree
13
+ - `mcp__playwright__browser_click` — Click elements
14
+ - `mcp__playwright__browser_fill_form` — Fill inputs
15
+ - `mcp__playwright__browser_press_key` — Keyboard shortcuts
16
+ - `mcp__playwright__browser_console_messages` — Check console errors
17
+ - `mcp__playwright__browser_network_requests` — Check failed API calls
18
+ - `mcp__playwright__browser_resize` — Test responsive viewports
19
+ - `mcp__playwright__browser_evaluate` — Run JS in page context
20
+ - `mcp__playwright__browser_hover` — Test hover states
21
+
22
+ ## Context
23
+
24
+ - **Issue:** {{issueId}}
25
+ - **Frontend URL:** {{frontendUrl}}
26
+ - **API URL:** {{apiUrl}}
27
+ - **Workspace:** {{workspacePath}}
28
+ - **Test Email:** {{testEmail}}
29
+ - **Test Token Endpoint:** `GET {{apiUrl}}/api/v1/customers/retrieve-test-token` with header `X-API-KEY: myn_test_e2e`
30
+
31
+ ## Requirements to Verify
32
+
33
+ {{requirements}}
34
+
35
+ ## Your Task — Four Phases
36
+
37
+ ### Phase 1: Smoke Test (MUST PASS before continuing)
38
+
39
+ Before checking requirements, verify the app is actually functional. If ANY smoke test fails, report BLOCKED immediately — don't waste time on requirements.
40
+
41
+ **Step 1.1: Backend Health**
42
+ ```bash
43
+ curl -sk {{apiUrl}}/actuator/health
44
+ ```
45
+ Must return 200 with `{"status":"UP"}`.
46
+
47
+ **Step 1.2: Frontend Loads**
48
+ Navigate to the frontend URL. Verify the page renders (not blank, not error).
49
+ ```
50
+ mcp__playwright__browser_navigate → {{frontendUrl}}
51
+ mcp__playwright__browser_take_screenshot → "01-smoke-frontend.png"
52
+ ```
53
+
54
+ **Step 1.3: Authentication**
55
+ The app requires login. Use the test token shortcut:
56
+ 1. Fetch test token (server-side, not in browser):
57
+ ```bash
58
+ curl -sk -H "X-API-KEY: myn_test_e2e" {{apiUrl}}/api/v1/customers/retrieve-test-token
59
+ ```
60
+ 2. Navigate to the magic login URL IN THE BROWSER:
61
+ ```
62
+ mcp__playwright__browser_navigate → {{frontendUrl}}/magic-login?directtoken=<TOKEN>
63
+ ```
64
+ 3. Wait for redirect to /home (or wherever the app lands after login)
65
+ ```
66
+ mcp__playwright__browser_take_screenshot → "02-smoke-logged-in.png"
67
+ ```
68
+
69
+ This step tests real CORS enforcement — after login, every API call the app makes goes through the browser.
70
+
71
+ **Step 1.4: Console Clean**
72
+ Check for JavaScript errors after page load:
73
+ ```
74
+ mcp__playwright__browser_console_messages
75
+ ```
76
+ Report any `error` level messages. Warnings are noted but don't block.
77
+
78
+ **Step 1.5: Network Clean**
79
+ Check for failed API calls:
80
+ ```
81
+ mcp__playwright__browser_network_requests
82
+ ```
83
+ Report any 4xx/5xx responses or CORS-blocked requests.
84
+ ```
85
+ mcp__playwright__browser_take_screenshot → "03-smoke-console-clean.png"
86
+ ```
87
+
88
+ **If ANY smoke step fails → BLOCKED immediately. Report the failure and stop.**
89
+
90
+ ### Phase 2: Requirement Verification
91
+
92
+ Read the requirements above. For EACH requirement:
93
+
94
+ 1. **Navigate** to the relevant page/feature
95
+ 2. **Interact** with the feature as a user would (click buttons, fill forms, navigate)
96
+ 3. **Verify** the behavior matches the requirement
97
+ 4. **Screenshot** the result: `04-req-<short-name>.png`, `05-req-<short-name>.png`, etc.
98
+ 5. **Log** PASS or FAIL with specific details
99
+
100
+ Be thorough. Don't just check if elements exist — verify they WORK. Click buttons, submit forms, navigate between views. Test the happy path for each requirement.
101
+
102
+ If no requirements/PRD is available, skip this phase and note it in the report.
103
+
104
+ ### Phase 3: Visual Quality Audit
105
+
106
+ Test the application at three viewport sizes. For each, take a screenshot and evaluate:
107
+
108
+ **Desktop (1920x1080):**
109
+ ```
110
+ mcp__playwright__browser_resize → width: 1920, height: 1080
111
+ mcp__playwright__browser_take_screenshot → "10-desktop-1920.png"
112
+ ```
113
+
114
+ **Tablet (768x1024):**
115
+ ```
116
+ mcp__playwright__browser_resize → width: 768, height: 1024
117
+ mcp__playwright__browser_take_screenshot → "11-tablet-768.png"
118
+ ```
119
+
120
+ **Mobile (375x812):**
121
+ ```
122
+ mcp__playwright__browser_resize → width: 375, height: 812
123
+ mcp__playwright__browser_take_screenshot → "12-mobile-375.png"
124
+ ```
125
+
126
+ For each viewport, check:
127
+ - Layout integrity (no overlapping elements, no horizontal scrollbar)
128
+ - Text readability (not too small, not clipped)
129
+ - Interactive elements reachable (buttons not cut off, not hidden behind other elements)
130
+ - Images/icons properly sized
131
+ - Consistent spacing and alignment
132
+
133
+ ### Phase 4: Console & Network Audit
134
+
135
+ After interacting with the application through Phases 2-3, do a final audit:
136
+
137
+ ```
138
+ mcp__playwright__browser_console_messages
139
+ mcp__playwright__browser_network_requests
140
+ ```
141
+
142
+ Check for:
143
+ - JavaScript errors that appeared during interaction
144
+ - Failed API calls (4xx/5xx)
145
+ - CORS-blocked requests
146
+ - Missing resources (404 for fonts, images, scripts)
147
+ - Unhandled promise rejections
148
+
149
+ ## Decision
150
+
151
+ ### PASS — All phases pass
152
+ - Smoke test succeeded (backend up, frontend loads, auth works, no errors)
153
+ - All requirements verified (or no PRD available)
154
+ - Visual quality acceptable at all viewports
155
+ - No critical console/network errors
156
+
157
+ ### BLOCKED — Any phase fails
158
+ Be **SPECIFIC** about what failed. Include:
159
+ - Which phase failed
160
+ - What the expected behavior was
161
+ - What actually happened
162
+ - Screenshot reference showing the issue
163
+
164
+ ## Signal Completion (CRITICAL)
165
+
166
+ ### Step 1: Send feedback to the agent (ALWAYS do this first)
167
+
168
+ **Use `pan work tell` — it handles Enter key correctly.**
169
+
170
+ **If PASSED:**
171
+ ```bash
172
+ pan work tell {{issueId}} "UAT PASSED for {{issueId}}:
173
+
174
+ ✓ Smoke test: Backend up, frontend loads, auth works, no console errors
175
+ ✓ Requirements: All verified (N/N passed)
176
+ ✓ Visual quality: Desktop/tablet/mobile all clean
177
+ ✓ Console/network: No errors
178
+
179
+ Ready for merge."
180
+ ```
181
+
182
+ **If BLOCKED:**
183
+ ```bash
184
+ pan work tell {{issueId}} "UAT BLOCKED for {{issueId}}:
185
+
186
+ FAILURES:
187
+ 1. [PHASE] Description of failure (screenshot: XX-name.png)
188
+ 2. [PHASE] Description of failure (screenshot: XX-name.png)
189
+
190
+ Fix these issues and signal completion again."
191
+ ```
192
+
193
+ ### Step 2: Signal completion via API (REQUIRED)
194
+
195
+ ```bash
196
+ curl -X POST {{apiUrl_dashboard}}/api/specialists/done \
197
+ -H "Content-Type: application/json" \
198
+ -d '{"specialist":"uat","issueId":"{{issueId}}","status":"passed_or_failed","notes":"summary"}'
199
+ ```
200
+
201
+ **IMPORTANT:**
202
+ - You MUST call the API — this is how the system knows you're finished
203
+ - Send feedback to the agent BEFORE calling the API
204
+
205
+ ## ⛔ NEVER CLOSE GITHUB ISSUES
206
+
207
+ You are a specialist agent. You do NOT have permission to close issues or move them to Done. Only call the `/api/specialists/done` endpoint.
208
+
209
+ ## Important Constraints
210
+
211
+ - **Timeout:** You have 15 minutes to complete this UAT
212
+ - **Don't fix issues:** You only report. The agent fixes.
213
+ - **Be visual:** Screenshots are your primary evidence. Take them liberally.
214
+ - **Test like a user:** Click things, navigate, interact. Don't just look at the page.
215
+ - **CORS matters:** If any API call from the browser is blocked, that's an automatic BLOCKED.
@@ -104,7 +104,30 @@ Tasks created during planning (check STATE.md for which are complete):
104
104
 
105
105
  {{BEADS_TASKS}}
106
106
 
107
- Use `bd show <task-id>` to see task details, `bd update <task-id> --status in_progress` to start work.
107
+ ### MANDATORY: One Bead At A Time
108
+
109
+ An automated **Inspect Specialist** runs in parallel with you. It verifies each bead's
110
+ implementation matches its specification. It needs a **scoped diff** — one bead per commit.
111
+ If you batch multiple beads, the inspector cannot verify them individually and your work
112
+ will be rejected.
113
+
114
+ **Workflow for EVERY bead:**
115
+ 1. `bd ready -l {{ISSUE_ID_LOWER}}` — find the next unblocked bead for THIS issue
116
+ 2. `bd update <bead-id> --claim` — claim it
117
+ 3. Implement ONLY that bead's work
118
+ 4. `git add` and `git commit` — one bead = one commit
119
+ 5. `bd close <bead-id> --reason="what you did"` — this auto-triggers inspection
120
+ 6. **WAIT** for the inspection result (delivered to your session via `pan work tell`)
121
+ 7. `INSPECTION PASSED` → proceed to step 1
122
+ 8. `INSPECTION BLOCKED` → fix, commit, `bd close` again
123
+
124
+ **IMPORTANT:** Always use `-l {{ISSUE_ID_LOWER}}` with `bd ready` and `bd list` to scope
125
+ to this issue's beads. The shared database contains beads from ALL issues — without the
126
+ label filter you will see irrelevant beads from other workspaces.
127
+
128
+ **Do NOT implement multiple beads before committing and closing.** Each bead must be
129
+ a separate commit with a separate `bd close`. The inspection fires automatically on
130
+ `bd close` — you do not need to call `pan inspect` manually.
108
131
  {{/if}}
109
132
 
110
133
  {{#if STITCH_DESIGNS}}
@@ -163,8 +186,32 @@ This re-submits for review automatically. Do NOT poll specialist APIs or wait fo
163
186
 
164
187
  1. Read the context files listed above
165
188
  2. **FIRST:** Check STATE.md for completion status (see above)
166
- 3. If not complete, continue implementing the planned work
167
- 4. Mark beads tasks as complete as you finish them: `bd update <task-id> --status closed`
189
+ 3. If not complete, continue implementing the planned work using the per-bead workflow below
190
+
191
+ ## MANDATORY: One Bead At A Time
192
+
193
+ An automated **Inspect Specialist** runs in parallel with you. It verifies each bead's
194
+ implementation matches its specification. It needs a **scoped diff** — one bead per commit.
195
+ If you batch multiple beads into one commit, the inspector cannot verify them individually
196
+ and your work will be rejected.
197
+
198
+ **Workflow for EVERY bead:**
199
+ 1. `bd ready -l {{ISSUE_ID_LOWER}}` — find the next unblocked bead for THIS issue
200
+ 2. `bd update <bead-id> --claim` — claim it
201
+ 3. Implement ONLY that bead's work
202
+ 4. `git add` and `git commit` — one bead = one commit
203
+ 5. `bd close <bead-id> --reason="what you did"` — this auto-triggers inspection
204
+ 6. **WAIT** for the inspection result (delivered to your session via `pan work tell`)
205
+ 7. `INSPECTION PASSED` → proceed to step 1
206
+ 8. `INSPECTION BLOCKED` → fix, commit, `bd close` again
207
+
208
+ **IMPORTANT:** Always use `-l {{ISSUE_ID_LOWER}}` with `bd ready` and `bd list` to scope
209
+ to this issue's beads. The shared database contains beads from ALL issues — without the
210
+ label filter you will see irrelevant beads from other workspaces.
211
+
212
+ **Do NOT implement multiple beads before committing and closing.** Each bead must be
213
+ a separate commit with a separate `bd close`. The inspection fires automatically on
214
+ `bd close` — you do not need to call `pan inspect` manually.
168
215
 
169
216
  ## CRITICAL: Keep STATE.md Updated
170
217
 
@@ -199,12 +246,13 @@ but STATE.md provides the narrative context and current state that beads alone c
199
246
  {{/env}}
200
247
 
201
248
  ✅ **ALWAYS do this instead:**
202
- - Complete ALL phases of the plan from start to finish
249
+ - Work through beads ONE AT A TIME claim, implement, commit, close, wait for inspection
250
+ - Complete ALL beads from start to finish — but each one individually
203
251
  - Fix ALL failing tests, not just "high-impact" ones
204
252
  - If something is broken, fix it - don't document it
205
253
  - If tests fail, debug and fix them until they pass
206
254
  - Work autonomously until the issue is FULLY resolved
207
- - The only acceptable end state is: all tests pass, all code committed, pushed
255
+ - The only acceptable end state is: all beads closed with passing inspections, all tests pass, all code committed, pushed
208
256
  {{#env REMOTE}}
209
257
  - When one task is done, immediately move to the next unblocked task. Keep going until every task is finished.
210
258
  {{/env}}