iriai-build 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/bin/iriai-build.js +78 -0
  2. package/bridge-v3.js +98 -0
  3. package/cli/bootstrap.js +83 -0
  4. package/cli/commands/implementation.js +64 -0
  5. package/cli/commands/index.js +46 -0
  6. package/cli/commands/launch.js +153 -0
  7. package/cli/commands/plan.js +117 -0
  8. package/cli/commands/setup.js +80 -0
  9. package/cli/commands/slack.js +97 -0
  10. package/cli/commands/transfer.js +111 -0
  11. package/cli/config.js +92 -0
  12. package/cli/display.js +121 -0
  13. package/cli/terminal-input.js +666 -0
  14. package/cli/wait.js +82 -0
  15. package/index.js +1488 -0
  16. package/lib/agent-process.js +170 -0
  17. package/lib/bridge-state.js +126 -0
  18. package/lib/constants.js +137 -0
  19. package/lib/health-monitor.js +113 -0
  20. package/lib/prompt-builder.js +565 -0
  21. package/lib/signal-watcher.js +215 -0
  22. package/lib/slack-helpers.js +224 -0
  23. package/lib/state-machines/feature-lead.js +408 -0
  24. package/lib/state-machines/operator-agent.js +173 -0
  25. package/lib/state-machines/planning-role.js +161 -0
  26. package/lib/state-machines/role-agent.js +186 -0
  27. package/lib/state-machines/team-orchestrator.js +160 -0
  28. package/package.json +31 -0
  29. package/v3/.handover-html-evidence.md +35 -0
  30. package/v3/KICKOFF-HTML-EVIDENCE.md +98 -0
  31. package/v3/PLAN-HTML-EVIDENCE-HARDENING.md +603 -0
  32. package/v3/adapters/desktop-adapter.js +78 -0
  33. package/v3/adapters/interface.js +146 -0
  34. package/v3/adapters/slack-adapter.js +608 -0
  35. package/v3/adapters/slack-helpers.js +179 -0
  36. package/v3/adapters/terminal-adapter.js +249 -0
  37. package/v3/agent-supervisor.js +320 -0
  38. package/v3/artifact-portal.js +1184 -0
  39. package/v3/bridge.db +0 -0
  40. package/v3/constants.js +170 -0
  41. package/v3/db.js +76 -0
  42. package/v3/file-io.js +216 -0
  43. package/v3/helpers.js +174 -0
  44. package/v3/operator.js +364 -0
  45. package/v3/orchestrator.js +2886 -0
  46. package/v3/plan-compiler.js +440 -0
  47. package/v3/prompt-builder.js +849 -0
  48. package/v3/queries.js +461 -0
  49. package/v3/recovery.js +508 -0
  50. package/v3/review-sessions.js +360 -0
  51. package/v3/roles/accessibility-auditor/CLAUDE.md +50 -0
  52. package/v3/roles/analytics-engineer/CLAUDE.md +40 -0
  53. package/v3/roles/architect/CLAUDE.md +809 -0
  54. package/v3/roles/backend-implementer/CLAUDE.md +97 -0
  55. package/v3/roles/code-reviewer/CLAUDE.md +89 -0
  56. package/v3/roles/database-implementer/CLAUDE.md +97 -0
  57. package/v3/roles/deployer/CLAUDE.md +42 -0
  58. package/v3/roles/designer/CLAUDE.md +386 -0
  59. package/v3/roles/documentation/CLAUDE.md +40 -0
  60. package/v3/roles/feature-lead/CLAUDE.md +233 -0
  61. package/v3/roles/frontend-implementer/CLAUDE.md +97 -0
  62. package/v3/roles/implementer/CLAUDE.md +97 -0
  63. package/v3/roles/integration-tester/CLAUDE.md +174 -0
  64. package/v3/roles/observability-engineer/CLAUDE.md +40 -0
  65. package/v3/roles/operator/CLAUDE.md +322 -0
  66. package/v3/roles/orchestrator/CLAUDE.md +288 -0
  67. package/v3/roles/package-implementer/CLAUDE.md +47 -0
  68. package/v3/roles/performance-analyst/CLAUDE.md +49 -0
  69. package/v3/roles/plan-compiler/CLAUDE.md +163 -0
  70. package/v3/roles/planning-lead/CLAUDE.md +41 -0
  71. package/v3/roles/pm/CLAUDE.md +806 -0
  72. package/v3/roles/regression-tester/CLAUDE.md +135 -0
  73. package/v3/roles/release-manager/CLAUDE.md +43 -0
  74. package/v3/roles/security-auditor/CLAUDE.md +90 -0
  75. package/v3/roles/smoke-tester/CLAUDE.md +97 -0
  76. package/v3/roles/test-author/CLAUDE.md +42 -0
  77. package/v3/roles/verifier/CLAUDE.md +90 -0
  78. package/v3/schema.sql +134 -0
  79. package/v3/slack-adapter.js +510 -0
  80. package/v3/slack-helpers.js +346 -0
@@ -0,0 +1,603 @@
1
+ # Plan: HTML Evidence Document Hardening
2
+
3
+ ## Problem Statement
4
+
5
+ The current gate review process uses YAML-based `.output` files and `.gate-evidence.yaml` compiled into basic HTML. The orchestrator and feature lead make pass/fail decisions, but:
6
+
7
+ 1. **No structured gaps reporting** - Review agents report findings but don't explicitly call out what's MISSING in their domain
8
+ 2. **No deviation tracking** - Implementers diverge from the plan silently; reviewers have no visibility
9
+ 3. **No self-reported risks** - Implementers know where the bodies are buried but aren't asked
10
+ 4. **No coverage matrix** - No way to see "what from the plan is done vs not done" at a glance
11
+ 5. **Incomplete E2E coverage** - Integration-tester tests journeys but doesn't mandate error case GIFs
12
+ 6. **Context loss on long tasks** - Agents that span multiple context windows lose structured findings; handover is prose-based
13
+ 7. **HTML doc lacks reviewer commentary** - Orchestrator/FL decisions aren't captured in the evidence artifact
14
+ 8. **No visual distinction between complete and pending sections** - Can't tell what's filled vs awaiting
15
+
16
+ ## Solution Overview
17
+
18
+ ### Two-tier HTML evidence documents
19
+
20
+ - **Team gate HTML** - One per team per gate. Compiled by team orchestrator after QA agents finish. NO approve/reject buttons — this is evidence only, reviewed by the feature lead internally.
21
+ - **Feature gate HTML** - One per feature per gate. Compiled by feature lead. Links to team gate HTMLs. This is the ONLY doc that gets approve/reject buttons and is the ONLY doc posted to the Slack impl channel.
22
+
23
+ ### Planning channel thread communication policy
24
+
25
+ The planning channel thread (where `[FEATURE]` was posted) receives ONLY:
26
+ - **HTML evidence documents** — the feature gate HTML is posted as a thread update
27
+ - **Approve/reject buttons** — only on the feature gate HTML, one per gate
28
+
29
+ NO text-based status messages in the planning thread ("Pipeline started", "Phase approved", "Launching implementation", etc.). The HTML document IS the update. If you want to know the state, open the HTML.
30
+
31
+ The impl channel (`#impl-<slug>`) is **unchanged** — it still receives questions, operator relay messages, and gate decisions as before.
32
+
33
+ ### Incremental structured output (.output.partial)
34
+
35
+ Agents write append-only partial output as they complete each unit of work, preventing context loss.
36
+
37
+ ### Enriched output schemas
38
+
39
+ Implementers report deviations and risks. Review agents report gaps. Orchestrator/FL add comments.
40
+
41
+ ---
42
+
43
+ ## Detailed Changes
44
+
45
+ ### 1. Incremental Output Protocol (.output.partial)
46
+
47
+ **Applies to:** ALL agents that produce `.output` (implementers + review agents)
48
+
49
+ **Format:** Append-only multi-document YAML (separated by `---`)
50
+
51
+ ```yaml
52
+ ---
53
+ type: journey
54
+ name: auth-login
55
+ verdict: PASS
56
+ checks:
57
+ - criterion: "Login with valid credentials"
58
+ result: PASS
59
+ detail: "200 response, session cookie set"
60
+ gif_path: .recordings/gifs/auth-login.gif
61
+ completed_at: "2026-03-05T10:00:00Z"
62
+ ---
63
+ type: journey
64
+ name: auth-login-invalid-password
65
+ verdict: PASS
66
+ checks:
67
+ - criterion: "Login with wrong password"
68
+ result: PASS
69
+ detail: "401 response, error message shown"
70
+ gif_path: .recordings/gifs/auth-login-error.gif
71
+ completed_at: "2026-03-05T10:02:00Z"
72
+ ---
73
+ type: gap
74
+ category: error-handling
75
+ description: "Timeout scenario not tested"
76
+ severity: major
77
+ plan_reference: "journey-3, step 4"
78
+ ```
79
+
80
+ **Rules:**
81
+ - After completing each unit of work (journey, file review, acceptance criterion check), append a `---` separated YAML document to `.output.partial`
82
+ - Each entry is self-contained and independently parseable
83
+ - On restart, read `.output.partial` to know exactly what's done (structured, not prose)
84
+ - On completion, consolidate `.output.partial` into final `.output` with aggregated verdict, checks, gaps, issues
85
+ - Handover becomes lightweight: "read `.output.partial` for completed work, remaining: X, Y, Z"
86
+ - If agent hard-crashes without handover, `.output.partial` preserves all completed work on disk
87
+ - Parse with `yaml.loadAll()` — standard multi-document YAML
88
+ - Orchestrator can read `.output.partial` for progress visibility before agent finishes
89
+
90
+ **Entry types by agent:**
91
+
92
+ | Agent | Entry types |
93
+ |-------|-------------|
94
+ | Implementer | `file_complete` (per file modified), `deviation`, `risk` |
95
+ | Code reviewer | `file_review` (per file), `gap` |
96
+ | Security auditor | `endpoint_review` (per endpoint/flow), `gap` |
97
+ | Integration tester | `journey` (per journey), `gap` |
98
+ | Regression tester | `test_suite` (per suite), `gap` |
99
+ | Verifier | `criterion_check` (per acceptance criterion), `gap` |
100
+ | Smoke tester | `critical_path` (per path), `gap` |
101
+
102
+ ### 2. Implementer Output Schema Changes
103
+
104
+ **Files:** `roles/implementer/CLAUDE.md`, `roles/backend-implementer/CLAUDE.md`, `roles/frontend-implementer/CLAUDE.md`, `roles/database-implementer/CLAUDE.md`
105
+
106
+ **Add to Output section:**
107
+
108
+ ```yaml
109
+ task_id: [id]
110
+ role: implementer
111
+ summary_oneliner: "[one line]"
112
+ files_created: [list]
113
+ files_modified: [list]
114
+ deviations: # NEW
115
+ - plan_said: "[what the task specified]"
116
+ i_did: "[what was actually implemented]"
117
+ reason: "[why the deviation was necessary]"
118
+ self_reported_risks: # NEW
119
+ - description: "[what you're not confident about]"
120
+ severity: major|minor
121
+ file: "[path]"
122
+ ```
123
+
124
+ **Add to Process section:** After step 5 (verify), add:
125
+ - Step 6: Document any deviations from the task spec and why
126
+ - Step 7: Flag anything you're not confident about as a self-reported risk
127
+
128
+ **Add .output.partial protocol:** After each file is completed, append a `file_complete` entry.
129
+
130
+ ### 3. Review Agent Output Schema Changes
131
+
132
+ **Files:** `roles/code-reviewer/CLAUDE.md`, `roles/security-auditor/CLAUDE.md`, `roles/regression-tester/CLAUDE.md`, `roles/verifier/CLAUDE.md`, `roles/integration-tester/CLAUDE.md`
133
+
134
+ **Add `gaps` field to Output section (all 5 agents):**
135
+
136
+ ```yaml
137
+ task_id: [id]
138
+ role: [role]
139
+ verdict: PASS|FAIL|CONDITIONAL
140
+ summary_oneliner: "[counts]"
141
+ checks:
142
+ - criterion: "[review area]"
143
+ result: PASS|FAIL
144
+ detail: "[evidence]"
145
+ issues:
146
+ - severity: blocker|major|minor|nit
147
+ description: "[what's wrong]"
148
+ file: "[path]"
149
+ line: [number]
150
+ gaps: # NEW
151
+ - category: "[domain-specific]"
152
+ description: "[what's missing or not covered]"
153
+ severity: blocker|major|minor
154
+ plan_reference: "[task ID or acceptance criterion]"
155
+ duration_seconds: [elapsed]
156
+ ```
157
+
158
+ **Gap categories by agent:**
159
+
160
+ | Agent | Gap categories |
161
+ |-------|---------------|
162
+ | Code reviewer | error-handling, input-validation, pattern-compliance, edge-cases, test-coverage |
163
+ | Security auditor | auth, injection, rate-limiting, secrets, cors, csrf, data-exposure |
164
+ | Integration tester | untested-journey, missing-error-case, missing-edge-case, visual-gap |
165
+ | Regression tester | untested-regression, missing-backward-compat, skipped-test-suite |
166
+ | Verifier | unverified-criterion, insufficient-evidence, missing-acceptance-check |
167
+
168
+ **Add .output.partial protocol to all 5:** After each review item, append entry to `.output.partial`.
169
+
170
+ ### 4. Integration Tester: Comprehensive E2E Coverage Mandate
171
+
172
+ **File:** `roles/integration-tester/CLAUDE.md`
173
+
174
+ **Add new section "Comprehensive Journey Coverage - MANDATORY":**
175
+
176
+ ```
177
+ ## Comprehensive Journey Coverage - MANDATORY
178
+
179
+ For EVERY user journey defined in the plan:
180
+
181
+ ### Happy Path (Golden Path)
182
+ - Execute the full journey step by step
183
+ - Generate a GIF of the complete flow
184
+ - Every verify block must produce evidence
185
+
186
+ ### Error Cases (per journey)
187
+ For each journey, test ALL of the following error scenarios that apply:
188
+ - Invalid input (wrong types, missing fields, too long, empty)
189
+ - Authentication failures (expired token, wrong credentials, no token)
190
+ - Authorization failures (wrong role, insufficient permissions)
191
+ - Network/timeout scenarios (if applicable)
192
+ - Empty state (no data, first-time user)
193
+ - Boundary conditions (max items, zero items, concurrent access)
194
+
195
+ Each error case gets:
196
+ - Its own GIF showing the error flow and recovery/message
197
+ - A check entry in the output
198
+
199
+ ### Gap Reporting
200
+ After testing, cross-reference the plan's journey list against what you tested.
201
+ For any journey or error case NOT tested, write a gap entry with:
202
+ - Which journey/error case was skipped
203
+ - Why it was skipped (MCP unavailable, environment limitation, time constraint)
204
+ - Severity assessment (blocker if it's a critical path, major otherwise)
205
+ ```
206
+
207
+ ### 5. Orchestrator CLAUDE.md Updates
208
+
209
+ **File:** `roles/orchestrator/CLAUDE.md`
210
+
211
+ **Key change: Team orchestrator compiles team gate HTML but does NOT post it to Slack or attach approve/reject buttons.** The team gate HTML is written to disk for the feature lead to review internally. The orchestrator signals `.gate-ready` as before — the feature lead is the only one who presents evidence to the user.
212
+
213
+ **Changes to "Per-Phase Adversarial Review + Gate Evidence" section:**
214
+
215
+ Add after step 4 (QA roles complete), before visual review:
216
+
217
+ ```
218
+ 4b. **Review gaps from every review agent.** Read the `gaps` field in each QA agent's
219
+ `.output`. These are the primary inputs to your gate decision. A gap with severity
220
+ `blocker` means the phase cannot pass — re-dispatch the responsible agent.
221
+
222
+ 4c. **Aggregate implementer deviations and risks.** Read `deviations` and
223
+ `self_reported_risks` from each implementer's `.output`. Cross-reference deviations
224
+ against the plan — if a deviation contradicts a requirement, it's a blocker.
225
+
226
+ 4d. **Build coverage matrix.** For every task and acceptance criterion in the plan,
227
+ determine status:
228
+ - `implemented_verified` — implementer completed it AND a review agent verified it
229
+ - `implemented_unverified` — implementer completed it but no review agent checked it
230
+ - `not_implemented` — no implementer output references this item
231
+ Include the matrix in `.gate-evidence.yaml`.
232
+ ```
233
+
234
+ **Update gate evidence YAML schema in the example:**
235
+
236
+ ```yaml
237
+ gate: 1
238
+ feature: my-feature
239
+ recommendation:
240
+ verdict: APPROVE
241
+ reasoning: "All journeys pass with visual evidence verified"
242
+ pr:
243
+ url: https://github.com/org/repo/pull/123
244
+ branch: feature/my-feature
245
+ files_changed: 15
246
+ additions: 420
247
+ deletions: 50
248
+ summary: "Implemented auth flow with login, registration, and password reset."
249
+ coverage_matrix: # NEW
250
+ - plan_item: "task-1.1: Login endpoint"
251
+ status: implemented_verified
252
+ evidence_ref: "code-reviewer check 1, integration-tester journey auth-login"
253
+ - plan_item: "task-1.2: Rate limiting"
254
+ status: implemented_unverified
255
+ evidence_ref: "implementer output only"
256
+ - plan_item: "task-1.3: Password reset"
257
+ status: not_implemented
258
+ evidence_ref: null
259
+ deviations: # NEW (aggregated from implementers)
260
+ - source: backend-implementer
261
+ task_id: "1.1"
262
+ plan_said: "Use bcrypt for password hashing"
263
+ i_did: "Used argon2id"
264
+ reason: "argon2id is the current OWASP recommendation"
265
+ self_reported_risks: # NEW (aggregated from implementers)
266
+ - source: frontend-implementer
267
+ task_id: "1.2"
268
+ description: "Rate limit UI feedback relies on 429 status code; not tested with proxy"
269
+ severity: minor
270
+ file: "src/components/LoginForm.tsx"
271
+ reviewer_comments: # NEW
272
+ orchestrator:
273
+ verdict: convinced
274
+ reasoning: "All gaps are minor. Deviation on argon2id is an improvement. Coverage matrix shows 12/14 items verified."
275
+ concerns:
276
+ - "Rate limiting not visually verified — only unit tested"
277
+ journey_results:
278
+ - name: auth-login
279
+ verdict: PASS
280
+ type: happy-path
281
+ # ... existing fields ...
282
+ - name: auth-login-invalid-password # NEW: error case journeys
283
+ verdict: PASS
284
+ type: error-case
285
+ # ...
286
+ tasks:
287
+ - id: "1.1"
288
+ title: "Implement login endpoint"
289
+ role: backend-implementer
290
+ verdict: PASS
291
+ qa_verdicts:
292
+ - role: code-reviewer
293
+ verdict: PASS
294
+ issue_count: 0
295
+ gaps: # NEW
296
+ - category: test-coverage
297
+ description: "No unit tests for rate limiter middleware"
298
+ severity: major
299
+ - role: security-auditor
300
+ verdict: PASS
301
+ issue_count: 0
302
+ gaps: []
303
+ ```
304
+
305
+ ### 6. Feature Lead CLAUDE.md Updates
306
+
307
+ **File:** `roles/feature-lead/CLAUDE.md`
308
+
309
+ **Key change: The feature gate HTML is the ONLY artifact posted to the impl Slack channel, and it is the ONLY place approve/reject buttons appear.** One HTML per gate, one approval decision per gate. Team gate HTMLs are internal evidence reviewed by the FL — they are linked from the feature gate HTML but never posted to Slack independently.
310
+
311
+ **Changes to "Gate Evidence Document Protocol" section:**
312
+
313
+ After step 4 (wait for review agents), before step 5 (adversarial cross-check):
314
+
315
+ ```
316
+ 4b. **Review gaps across all levels.** Read `gaps` from:
317
+ - Each team orchestrator's `.gate-evidence.yaml` (team-level QA gaps)
318
+ - Each team's compiled `.gate-evidence.html` (review visually)
319
+ - Each feature-level review agent's `.output` (cross-team gaps)
320
+ Any blocker-severity gap that hasn't been addressed = REJECT.
321
+
322
+ 4c. **Build cross-team integration surface.** Document:
323
+ - APIs/contracts that Team A's work exposes and Team B consumes
324
+ - Shared database tables or state modified by multiple teams
325
+ - Any cross-team dependency that could break if one team's work changes
326
+
327
+ 4d. **Build feature-level coverage matrix.** Cross-reference the FULL plan
328
+ (all phases, all tasks, all acceptance criteria) against evidence from
329
+ all teams. This is the master view — status of every plan item.
330
+
331
+ 4e. **Add Feature Lead comments.** Write your assessment:
332
+ - verdict: convinced|not_convinced
333
+ - reasoning: reference specific gaps, deviations, and cross-team concerns
334
+ - concerns: list remaining items even if you're convinced overall
335
+ ```
336
+
337
+ **Update step 7 (compile HTML):**
338
+
339
+ ```
340
+ 7. **Compile feature gate HTML** — Call `compile_gate_evidence` MCP tool:
341
+ - `evidence_yaml_path`: your merged `.gate-evidence.yaml`
342
+ - `output_html_path`: `<featureDir>/.gate-evidence.html`
343
+ - `doc_type`: "feature"
344
+ - `team_html_paths`: list of team-level HTML paths to link to
345
+ - If tool returns ERROR -> re-dispatch affected role -> retry
346
+ ```
347
+
348
+ **Update step 8 (post to Slack):**
349
+
350
+ ```
351
+ 8. **Post feature gate HTML to impl channel** via `.agent-response`:
352
+ - Include `[evidence:<path to .gate-evidence.html>]` marker — HTML uploaded as attachment
353
+ - Include `[SLACK:decision]` block with approve/reject buttons
354
+ - This is the ONE approval point per gate — no per-team approvals
355
+ - The HTML links to team gate HTMLs for drill-down
356
+ - User reviews the HTML, then clicks approve/reject
357
+ ```
358
+
359
+ ### 7. Evidence Compiler HTML Template Overhaul
360
+
361
+ **File:** `tools/visual-verification-mcp/evidence-compiler.js`
362
+
363
+ #### New HTML sections (in order):
364
+
365
+ 1. **Header** (existing) — feature, gate, date, recommendation badge
366
+ 2. **Scoreboard** (existing) — tasks, journeys, regression, QA counts
367
+ 3. **Coverage Matrix** (NEW) — table:
368
+ - Column 1: Plan item (task ID + title)
369
+ - Column 2: Status badge
370
+ - Column 3: Evidence reference (link to check/journey)
371
+ - Styling:
372
+ - `implemented_verified`: normal text, green checkmark icon
373
+ - `implemented_unverified`: italic text, yellow/amber background
374
+ - `not_implemented`: strikethrough text, grey color
375
+ 4. **Deviations from Plan** (NEW) — cards showing plan_said vs i_did vs reason
376
+ 5. **Self-Reported Risks** (NEW) — cards with severity badges
377
+ 6. **Pull Request** (existing)
378
+ 7. **Summary** (existing)
379
+ 8. **Journey Evidence** (existing, enhanced):
380
+ - Happy path journeys grouped together
381
+ - Error case journeys grouped separately
382
+ - Each journey card shows embedded GIF
383
+ 9. **Tasks** (existing)
384
+ 10. **QA Verdicts** (existing, enhanced):
385
+ - Each QA agent card now has two subsections:
386
+ - **Findings** — existing checks + issues
387
+ - **Gaps** — new gaps field, rendered with warning/yellow background styling
388
+ - If a QA agent hasn't reported yet, render section as:
389
+ ```html
390
+ <div class="section-pending">
391
+ <h3>Security Audit</h3>
392
+ <p class="pending-label">Awaiting: security-auditor</p>
393
+ </div>
394
+ ```
395
+ 11. **Reviewer Comments** (NEW) — orchestrator and FL comments:
396
+ - Verdict badge (convinced/not_convinced)
397
+ - Reasoning text
398
+ - Concerns list
399
+ 12. **Cross-Team Integration Surface** (NEW, feature-level only):
400
+ - Shared APIs/contracts table
401
+ - Cross-team dependencies
402
+ 13. **Team Evidence Links** (NEW, feature-level only):
403
+ - Summary table: each team's section statuses (green check / grey dash)
404
+ - Links to team gate HTML files
405
+ 14. **Risks** (existing)
406
+ 15. **Deferred** (existing)
407
+ 16. **Recommendation** (existing)
408
+
409
+ #### Pending section styling (CSS):
410
+
411
+ ```css
412
+ .section-pending {
413
+ opacity: 0.5;
414
+ font-style: italic;
415
+ border: 1px dashed var(--border);
416
+ }
417
+ .section-pending .pending-label {
418
+ color: var(--muted);
419
+ font-style: italic;
420
+ }
421
+ .gap-item {
422
+ background: #fef3c7; /* yellow-100 */
423
+ border-left: 3px solid #f59e0b; /* amber-500 */
424
+ padding: 8px 12px;
425
+ margin: 4px 0;
426
+ }
427
+ .coverage-verified { color: var(--fg); }
428
+ .coverage-unverified { font-style: italic; background: #fef9c3; }
429
+ .coverage-missing { text-decoration: line-through; color: var(--muted); }
430
+ ```
431
+
432
+ #### compile_gate_evidence MCP tool changes:
433
+
434
+ Add optional parameters:
435
+ - `doc_type`: `"team"` (default) or `"feature"`
436
+ - `team_html_paths`: array of `{ team_num, html_path }` (feature-level only, for linking)
437
+
438
+ Update `validateEvidence()`:
439
+ - Warn (not error) on missing `gaps`, `deviations`, `coverage_matrix` — backwards compatible with in-flight features
440
+ - Error on missing `reviewer_comments` only when `recommendation` is present AND `doc_type` is `"feature"`
441
+
442
+ ### 8. prompt-builder.js Updates
443
+
444
+ **File:** `iriai-build/v3/prompt-builder.js`
445
+
446
+ **Changes to `buildGateReviewInstructions()`:**
447
+
448
+ Update step 6 (merge evidence) instruction to include new fields:
449
+
450
+ ```javascript
451
+ 6. **Merge evidence** — Combine all team YAMLs + feature-level review outputs into:
452
+ - Include: coverage_matrix, deviations, self_reported_risks (aggregated from all teams)
453
+ - Include: reviewer_comments with your FL assessment
454
+ - Include: cross_team_surface (APIs, contracts, shared state)
455
+ ```
456
+
457
+ Update step 7 (compile HTML) to pass doc_type and team paths.
458
+
459
+ Update step 8 (post to Slack): Remove text summary — the HTML file IS the message. Only include
460
+ `[evidence:path]` marker and `[SLACK:decision]` block.
461
+
462
+ ### 8b. orchestrator.js Updates — Suppress Text Status Messages in Planning Thread
463
+
464
+ **File:** `iriai-build/v3/orchestrator.js`
465
+
466
+ The planning channel thread should only receive HTML evidence docs (with approve/reject buttons).
467
+ Remove text-based status messages currently posted to the planning thread:
468
+
469
+ - `handlePlanApproval()`: Remove `"Plan approved! Launching implementation..."` and
470
+ `"Feature branches created. Launching agents..."` thread posts to planning channel.
471
+ - `handleGateApproval()`: Remove `"Gate approved! Feature Lead will advance..."` text post.
472
+ - `handleGateRejection()`: Remove `"Gate rejected..."` text post.
473
+ - `_requestPhaseReview()`: Remove the text summary post (`"PM phase complete. Output: ..."`).
474
+ The planning phase review gates (PM/Designer/Architect approve/reject buttons) still post — but
475
+ the accompanying text summary is replaced by the artifact upload only.
476
+ - `handlePhaseReviewApproval()`: Remove `"Phase approved. Starting X phase..."` text post.
477
+ - `handlePhaseReviewRejection()`: Remove `"X phase rejected. Re-dispatching..."` text post.
478
+
479
+ **What stays in the planning thread:**
480
+ - HTML evidence document uploads (feature gate HTML with buttons)
481
+ - Planning phase artifact uploads (PRD, design-decisions, plan.yaml) with approve/reject buttons
482
+ - Feature completion message
483
+
484
+ **Impl channel change:**
485
+ - Gate approval decisions in the impl channel MUST now include the feature gate HTML as an attachment.
486
+ The `[evidence:<path>]` marker + `[SLACK:decision]` block are posted together — the user reviews
487
+ the HTML before clicking approve/reject.
488
+ - Everything else in impl channel is unchanged (questions, operator relay, status updates).
489
+
490
+ ### 9. constants.js Update
491
+
492
+ **File:** `iriai-build/v3/constants.js`
493
+
494
+ Add new signal file name:
495
+
496
+ ```javascript
497
+ export const SIGNAL = {
498
+ // ... existing ...
499
+ OUTPUT_PARTIAL: ".output.partial", // NEW
500
+ };
501
+ ```
502
+
503
+ ### 10. Context Management Updates in Role CLAUDE.md files
504
+
505
+ **All agent CLAUDE.md files** — update the Context Management section:
506
+
507
+ Replace the handover instructions with:
508
+
509
+ ```
510
+ ## Context Management - MANDATORY
511
+
512
+ ### Incremental Output (.output.partial)
513
+ After completing each unit of work, append a `---` separated YAML entry to .output.partial:
514
+ ```bash
515
+ cat >> $SIGNAL_DIR/.output.partial << 'ENTRY_EOF'
516
+ ---
517
+ type: [entry_type]
518
+ [structured fields for this unit of work]
519
+ completed_at: "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
520
+ ENTRY_EOF
521
+ ```
522
+
523
+ This ensures completed work survives context exhaustion or crashes.
524
+
525
+ ### On Restart
526
+ Read .output.partial FIRST. It contains your completed work in structured form.
527
+ Do NOT redo any work that has an entry in .output.partial.
528
+
529
+ ### At 40% Context Remaining
530
+ 1. Ensure .output.partial is up to date (all completed work appended)
531
+ 2. Write .handover with: remaining items list only (completed work is in .output.partial)
532
+ 3. Signal: echo "context_threshold" > $SIGNAL_DIR/.needs-restart
533
+
534
+ ### Final Output
535
+ When all work is complete, consolidate .output.partial into .output:
536
+ 1. Read all entries from .output.partial
537
+ 2. Aggregate into final verdict, checks, gaps, issues
538
+ 3. Write consolidated .output
539
+ 4. Signal: echo DONE > .done
540
+ ```
541
+
542
+ ---
543
+
544
+ ## Files Modified (Summary)
545
+
546
+ | # | File | Change |
547
+ |---|------|--------|
548
+ | 1 | `v3/roles/implementer/CLAUDE.md` | Add deviations, self_reported_risks, .output.partial |
549
+ | 2 | `v3/roles/backend-implementer/CLAUDE.md` | Same as #1 |
550
+ | 3 | `v3/roles/frontend-implementer/CLAUDE.md` | Same as #1 |
551
+ | 4 | `v3/roles/database-implementer/CLAUDE.md` | Same as #1 |
552
+ | 5 | `v3/roles/code-reviewer/CLAUDE.md` | Add gaps, .output.partial |
553
+ | 6 | `v3/roles/security-auditor/CLAUDE.md` | Add gaps, .output.partial |
554
+ | 7 | `v3/roles/integration-tester/CLAUDE.md` | Add gaps, comprehensive E2E mandate, .output.partial |
555
+ | 8 | `v3/roles/regression-tester/CLAUDE.md` | Add gaps, .output.partial |
556
+ | 9 | `v3/roles/verifier/CLAUDE.md` | Add gaps, .output.partial |
557
+ | 10 | `v3/roles/orchestrator/CLAUDE.md` | Coverage matrix, deviation aggregation, reviewer comments, team HTML compilation (no buttons) |
558
+ | 11 | `v3/roles/feature-lead/CLAUDE.md` | Cross-team surface, FL comments, feature-level coverage matrix, sole Slack poster with buttons |
559
+ | 12 | `tools/visual-verification-mcp/evidence-compiler.js` | HTML template overhaul, new sections, pending styling |
560
+ | 13 | `iriai-build/v3/prompt-builder.js` | Gate review instructions update, remove text summaries |
561
+ | 14 | `iriai-build/v3/constants.js` | Add OUTPUT_PARTIAL signal |
562
+ | 15 | `iriai-build/v3/orchestrator.js` | Remove text status messages from impl channel (gate approved/rejected posts) |
563
+
564
+ ## Files NOT Modified
565
+
566
+ - `agent-supervisor.js` — No retry/backoff changes.
567
+ - `operator.js` — No relay changes.
568
+ - `schema.sql` — No DB schema changes.
569
+ - `db.js`, `queries.js` — No query changes.
570
+ - Planning pipeline roles (pm, designer, architect, plan-compiler) — Not in scope.
571
+ - `slack-adapter.js` — No Slack API changes.
572
+
573
+ ## Execution Order
574
+
575
+ ```
576
+ Phase 1 (parallel, no dependencies):
577
+ - Files 1-4: Implementer CLAUDE.md updates
578
+ - Files 5-9: Review agent CLAUDE.md updates
579
+ - File 14: constants.js (one line)
580
+
581
+ Phase 2 (depends on Phase 1):
582
+ - File 10: Orchestrator CLAUDE.md (references new output fields, team HTML compilation)
583
+ - File 11: Feature Lead CLAUDE.md (references new output fields, sole Slack poster)
584
+
585
+ Phase 3 (parallel with Phase 2):
586
+ - File 12: evidence-compiler.js (HTML template, can be built independently)
587
+ - File 13: prompt-builder.js (gate review instructions, remove text summaries)
588
+ - File 15: orchestrator.js (remove text status messages from impl channel)
589
+ ```
590
+
591
+ ## Backwards Compatibility
592
+
593
+ - In-flight features won't have new YAML fields. The evidence compiler renders missing fields as "pending" sections, not errors.
594
+ - `.output.partial` is additive — existing agents that don't write it still work; they just lose the crash-recovery benefit until updated.
595
+ - `validateEvidence()` warns (not errors) on missing new fields.
596
+ - `doc_type` defaults to `"team"` — existing `compile_gate_evidence` calls work unchanged.
597
+
598
+ ## Risk Assessment
599
+
600
+ - **Integration-tester runtime increase**: Mandating error case GIFs per journey significantly increases work. This is intentional — comprehensive visual evidence is the goal.
601
+ - **Agent context cost**: New output fields add ~10-20 lines per agent. Minimal context impact.
602
+ - **HTML file size**: More embedded GIFs (error cases) means larger HTML. The existing 18MB cap and resize logic in evidence-compiler.js handles this.
603
+ - **.output.partial disk usage**: Append-only files grow over time. Each entry is small (~20 lines YAML). Even 50 entries is <5KB. Not a concern.
@@ -0,0 +1,78 @@
1
+ // desktop-adapter.js — DesktopAdapter implementing InterfaceAdapter.
2
+ // For iriai-command Tauri desktop app.
3
+ // Stub — implementation deferred to Phase 7.
4
+ // Will use WebSocket or stdin/stdout JSON protocol as Tauri sidecar.
5
+
6
+ import { InterfaceAdapter } from "./interface.js";
7
+
8
+ export class DesktopAdapter extends InterfaceAdapter {
9
+ constructor({ port } = {}) {
10
+ super();
11
+ this.port = port || 9721;
12
+ this._connections = new Set();
13
+ // TODO: Start WebSocket server or stdin/stdout JSON protocol
14
+ }
15
+
16
+ async createFeatureChannel(featureId, slug) {
17
+ this._emit("feature:created", { featureId, slug });
18
+ return `desktop-${slug}`;
19
+ }
20
+
21
+ async postMessage(featureId, text) {
22
+ const ref = Date.now().toString();
23
+ this._emit("message", { featureId, text, ref });
24
+ return { ref };
25
+ }
26
+
27
+ async postThreadMessage(featureId, text) {
28
+ this._emit("thread-message", { featureId, text });
29
+ }
30
+
31
+ async postPipelineMessage(featureId, text) {
32
+ const ref = Date.now().toString();
33
+ this._emit("pipeline-message", { featureId, text, ref });
34
+ return { ref };
35
+ }
36
+
37
+ async postAgentResponse(featureId, agentLabel, content) {
38
+ const ref = Date.now().toString();
39
+ this._emit("agent-response", { featureId, agentLabel, content, ref });
40
+ return { ref };
41
+ }
42
+
43
+ async uploadArtifact(featureId, filePath, title) {
44
+ this._emit("artifact", { featureId, filePath, title });
45
+ }
46
+
47
+ async postDecision(featureId, decision) {
48
+ const ref = Date.now().toString();
49
+ this._emit("decision", { featureId, decision, ref });
50
+ // TODO: Wait for response event from Tauri client
51
+ return { ref };
52
+ }
53
+
54
+ async resolveDecisionMessage(featureId, messageRef, decisionId, selectedOption, selectedLabel, resolvedBy, feedback) {
55
+ this._emit("decision-resolved", { featureId, decisionId, selectedOption, selectedLabel, resolvedBy, feedback });
56
+ }
57
+
58
+ async postPlanForApproval(featureId, planDir) {
59
+ return this.postDecision(featureId, {
60
+ id: "plan-approval",
61
+ title: "Plan ready for approval",
62
+ context: "All planning phases complete.",
63
+ options: [
64
+ { id: "approve", label: "Approve Plan", style: "primary" },
65
+ { id: "reject", label: "Reject Plan", style: "danger" },
66
+ ],
67
+ });
68
+ }
69
+
70
+ async postFeatureComplete(featureId) {
71
+ this._emit("feature-complete", { featureId });
72
+ }
73
+
74
+ _emit(event, data) {
75
+ // TODO: Send over WebSocket / stdout
76
+ console.log(`[desktop] ${event}:`, JSON.stringify(data).slice(0, 200));
77
+ }
78
+ }