iriai-build 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/iriai-build.js +78 -0
- package/bridge-v3.js +98 -0
- package/cli/bootstrap.js +83 -0
- package/cli/commands/implementation.js +64 -0
- package/cli/commands/index.js +46 -0
- package/cli/commands/launch.js +153 -0
- package/cli/commands/plan.js +117 -0
- package/cli/commands/setup.js +80 -0
- package/cli/commands/slack.js +97 -0
- package/cli/commands/transfer.js +111 -0
- package/cli/config.js +92 -0
- package/cli/display.js +121 -0
- package/cli/terminal-input.js +666 -0
- package/cli/wait.js +82 -0
- package/index.js +1488 -0
- package/lib/agent-process.js +170 -0
- package/lib/bridge-state.js +126 -0
- package/lib/constants.js +137 -0
- package/lib/health-monitor.js +113 -0
- package/lib/prompt-builder.js +565 -0
- package/lib/signal-watcher.js +215 -0
- package/lib/slack-helpers.js +224 -0
- package/lib/state-machines/feature-lead.js +408 -0
- package/lib/state-machines/operator-agent.js +173 -0
- package/lib/state-machines/planning-role.js +161 -0
- package/lib/state-machines/role-agent.js +186 -0
- package/lib/state-machines/team-orchestrator.js +160 -0
- package/package.json +31 -0
- package/v3/.handover-html-evidence.md +35 -0
- package/v3/KICKOFF-HTML-EVIDENCE.md +98 -0
- package/v3/PLAN-HTML-EVIDENCE-HARDENING.md +603 -0
- package/v3/adapters/desktop-adapter.js +78 -0
- package/v3/adapters/interface.js +146 -0
- package/v3/adapters/slack-adapter.js +608 -0
- package/v3/adapters/slack-helpers.js +179 -0
- package/v3/adapters/terminal-adapter.js +249 -0
- package/v3/agent-supervisor.js +320 -0
- package/v3/artifact-portal.js +1184 -0
- package/v3/bridge.db +0 -0
- package/v3/constants.js +170 -0
- package/v3/db.js +76 -0
- package/v3/file-io.js +216 -0
- package/v3/helpers.js +174 -0
- package/v3/operator.js +364 -0
- package/v3/orchestrator.js +2886 -0
- package/v3/plan-compiler.js +440 -0
- package/v3/prompt-builder.js +849 -0
- package/v3/queries.js +461 -0
- package/v3/recovery.js +508 -0
- package/v3/review-sessions.js +360 -0
- package/v3/roles/accessibility-auditor/CLAUDE.md +50 -0
- package/v3/roles/analytics-engineer/CLAUDE.md +40 -0
- package/v3/roles/architect/CLAUDE.md +809 -0
- package/v3/roles/backend-implementer/CLAUDE.md +97 -0
- package/v3/roles/code-reviewer/CLAUDE.md +89 -0
- package/v3/roles/database-implementer/CLAUDE.md +97 -0
- package/v3/roles/deployer/CLAUDE.md +42 -0
- package/v3/roles/designer/CLAUDE.md +386 -0
- package/v3/roles/documentation/CLAUDE.md +40 -0
- package/v3/roles/feature-lead/CLAUDE.md +233 -0
- package/v3/roles/frontend-implementer/CLAUDE.md +97 -0
- package/v3/roles/implementer/CLAUDE.md +97 -0
- package/v3/roles/integration-tester/CLAUDE.md +174 -0
- package/v3/roles/observability-engineer/CLAUDE.md +40 -0
- package/v3/roles/operator/CLAUDE.md +322 -0
- package/v3/roles/orchestrator/CLAUDE.md +288 -0
- package/v3/roles/package-implementer/CLAUDE.md +47 -0
- package/v3/roles/performance-analyst/CLAUDE.md +49 -0
- package/v3/roles/plan-compiler/CLAUDE.md +163 -0
- package/v3/roles/planning-lead/CLAUDE.md +41 -0
- package/v3/roles/pm/CLAUDE.md +806 -0
- package/v3/roles/regression-tester/CLAUDE.md +135 -0
- package/v3/roles/release-manager/CLAUDE.md +43 -0
- package/v3/roles/security-auditor/CLAUDE.md +90 -0
- package/v3/roles/smoke-tester/CLAUDE.md +97 -0
- package/v3/roles/test-author/CLAUDE.md +42 -0
- package/v3/roles/verifier/CLAUDE.md +90 -0
- package/v3/schema.sql +134 -0
- package/v3/slack-adapter.js +510 -0
- package/v3/slack-helpers.js +346 -0
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
# Plan: HTML Evidence Document Hardening
|
|
2
|
+
|
|
3
|
+
## Problem Statement
|
|
4
|
+
|
|
5
|
+
The current gate review process uses YAML-based `.output` files and `.gate-evidence.yaml` compiled into basic HTML. The orchestrator and feature lead make pass/fail decisions, but:
|
|
6
|
+
|
|
7
|
+
1. **No structured gaps reporting** - Review agents report findings but don't explicitly call out what's MISSING in their domain
|
|
8
|
+
2. **No deviation tracking** - Implementers diverge from the plan silently; reviewers have no visibility
|
|
9
|
+
3. **No self-reported risks** - Implementers know where the bodies are buried but aren't asked
|
|
10
|
+
4. **No coverage matrix** - No way to see "what from the plan is done vs not done" at a glance
|
|
11
|
+
5. **Incomplete E2E coverage** - Integration-tester tests journeys but doesn't mandate error case GIFs
|
|
12
|
+
6. **Context loss on long tasks** - Agents that span multiple context windows lose structured findings; handover is prose-based
|
|
13
|
+
7. **HTML doc lacks reviewer commentary** - Orchestrator/FL decisions aren't captured in the evidence artifact
|
|
14
|
+
8. **No visual distinction between complete and pending sections** - Can't tell what's filled vs awaiting
|
|
15
|
+
|
|
16
|
+
## Solution Overview
|
|
17
|
+
|
|
18
|
+
### Two-tier HTML evidence documents
|
|
19
|
+
|
|
20
|
+
- **Team gate HTML** - One per team per gate. Compiled by team orchestrator after QA agents finish. NO approve/reject buttons — this is evidence only, reviewed by the feature lead internally.
|
|
21
|
+
- **Feature gate HTML** - One per feature per gate. Compiled by feature lead. Links to team gate HTMLs. This is the ONLY doc that gets approve/reject buttons and is the ONLY doc posted to the Slack impl channel.
|
|
22
|
+
|
|
23
|
+
### Planning channel thread communication policy
|
|
24
|
+
|
|
25
|
+
The planning channel thread (where `[FEATURE]` was posted) receives ONLY:
|
|
26
|
+
- **HTML evidence documents** — the feature gate HTML is posted as a thread update
|
|
27
|
+
- **Approve/reject buttons** — only on the feature gate HTML, one per gate
|
|
28
|
+
|
|
29
|
+
NO text-based status messages in the planning thread ("Pipeline started", "Phase approved", "Launching implementation", etc.). The HTML document IS the update. If you want to know the state, open the HTML.
|
|
30
|
+
|
|
31
|
+
The impl channel (`#impl-<slug>`) is **unchanged** — it still receives questions, operator relay messages, and gate decisions as before.
|
|
32
|
+
|
|
33
|
+
### Incremental structured output (.output.partial)
|
|
34
|
+
|
|
35
|
+
Agents write append-only partial output as they complete each unit of work, preventing context loss.
|
|
36
|
+
|
|
37
|
+
### Enriched output schemas
|
|
38
|
+
|
|
39
|
+
Implementers report deviations and risks. Review agents report gaps. Orchestrator/FL add comments.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Detailed Changes
|
|
44
|
+
|
|
45
|
+
### 1. Incremental Output Protocol (.output.partial)
|
|
46
|
+
|
|
47
|
+
**Applies to:** ALL agents that produce `.output` (implementers + review agents)
|
|
48
|
+
|
|
49
|
+
**Format:** Append-only multi-document YAML (separated by `---`)
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
---
|
|
53
|
+
type: journey
|
|
54
|
+
name: auth-login
|
|
55
|
+
verdict: PASS
|
|
56
|
+
checks:
|
|
57
|
+
- criterion: "Login with valid credentials"
|
|
58
|
+
result: PASS
|
|
59
|
+
detail: "200 response, session cookie set"
|
|
60
|
+
gif_path: .recordings/gifs/auth-login.gif
|
|
61
|
+
completed_at: "2026-03-05T10:00:00Z"
|
|
62
|
+
---
|
|
63
|
+
type: journey
|
|
64
|
+
name: auth-login-invalid-password
|
|
65
|
+
verdict: PASS
|
|
66
|
+
checks:
|
|
67
|
+
- criterion: "Login with wrong password"
|
|
68
|
+
result: PASS
|
|
69
|
+
detail: "401 response, error message shown"
|
|
70
|
+
gif_path: .recordings/gifs/auth-login-error.gif
|
|
71
|
+
completed_at: "2026-03-05T10:02:00Z"
|
|
72
|
+
---
|
|
73
|
+
type: gap
|
|
74
|
+
category: error-handling
|
|
75
|
+
description: "Timeout scenario not tested"
|
|
76
|
+
severity: major
|
|
77
|
+
plan_reference: "journey-3, step 4"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Rules:**
|
|
81
|
+
- After completing each unit of work (journey, file review, acceptance criterion check), append a `---` separated YAML document to `.output.partial`
|
|
82
|
+
- Each entry is self-contained and independently parseable
|
|
83
|
+
- On restart, read `.output.partial` to know exactly what's done (structured, not prose)
|
|
84
|
+
- On completion, consolidate `.output.partial` into final `.output` with aggregated verdict, checks, gaps, issues
|
|
85
|
+
- Handover becomes lightweight: "read `.output.partial` for completed work, remaining: X, Y, Z"
|
|
86
|
+
- If agent hard-crashes without handover, `.output.partial` preserves all completed work on disk
|
|
87
|
+
- Parse with `yaml.loadAll()` — standard multi-document YAML
|
|
88
|
+
- Orchestrator can read `.output.partial` for progress visibility before agent finishes
|
|
89
|
+
|
|
90
|
+
**Entry types by agent:**
|
|
91
|
+
|
|
92
|
+
| Agent | Entry types |
|
|
93
|
+
|-------|-------------|
|
|
94
|
+
| Implementer | `file_complete` (per file modified), `deviation`, `risk` |
|
|
95
|
+
| Code reviewer | `file_review` (per file), `gap` |
|
|
96
|
+
| Security auditor | `endpoint_review` (per endpoint/flow), `gap` |
|
|
97
|
+
| Integration tester | `journey` (per journey), `gap` |
|
|
98
|
+
| Regression tester | `test_suite` (per suite), `gap` |
|
|
99
|
+
| Verifier | `criterion_check` (per acceptance criterion), `gap` |
|
|
100
|
+
| Smoke tester | `critical_path` (per path), `gap` |
|
|
101
|
+
|
|
102
|
+
### 2. Implementer Output Schema Changes
|
|
103
|
+
|
|
104
|
+
**Files:** `roles/implementer/CLAUDE.md`, `roles/backend-implementer/CLAUDE.md`, `roles/frontend-implementer/CLAUDE.md`, `roles/database-implementer/CLAUDE.md`
|
|
105
|
+
|
|
106
|
+
**Add to Output section:**
|
|
107
|
+
|
|
108
|
+
```yaml
|
|
109
|
+
task_id: [id]
|
|
110
|
+
role: implementer
|
|
111
|
+
summary_oneliner: "[one line]"
|
|
112
|
+
files_created: [list]
|
|
113
|
+
files_modified: [list]
|
|
114
|
+
deviations: # NEW
|
|
115
|
+
- plan_said: "[what the task specified]"
|
|
116
|
+
i_did: "[what was actually implemented]"
|
|
117
|
+
reason: "[why the deviation was necessary]"
|
|
118
|
+
self_reported_risks: # NEW
|
|
119
|
+
- description: "[what you're not confident about]"
|
|
120
|
+
severity: major|minor
|
|
121
|
+
file: "[path]"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Add to Process section:** After step 5 (verify), add:
|
|
125
|
+
- Step 6: Document any deviations from the task spec and why
|
|
126
|
+
- Step 7: Flag anything you're not confident about as a self-reported risk
|
|
127
|
+
|
|
128
|
+
**Add .output.partial protocol:** After each file is completed, append a `file_complete` entry.
|
|
129
|
+
|
|
130
|
+
### 3. Review Agent Output Schema Changes
|
|
131
|
+
|
|
132
|
+
**Files:** `roles/code-reviewer/CLAUDE.md`, `roles/security-auditor/CLAUDE.md`, `roles/regression-tester/CLAUDE.md`, `roles/verifier/CLAUDE.md`, `roles/integration-tester/CLAUDE.md`
|
|
133
|
+
|
|
134
|
+
**Add `gaps` field to Output section (all 5 agents):**
|
|
135
|
+
|
|
136
|
+
```yaml
|
|
137
|
+
task_id: [id]
|
|
138
|
+
role: [role]
|
|
139
|
+
verdict: PASS|FAIL|CONDITIONAL
|
|
140
|
+
summary_oneliner: "[counts]"
|
|
141
|
+
checks:
|
|
142
|
+
- criterion: "[review area]"
|
|
143
|
+
result: PASS|FAIL
|
|
144
|
+
detail: "[evidence]"
|
|
145
|
+
issues:
|
|
146
|
+
- severity: blocker|major|minor|nit
|
|
147
|
+
description: "[what's wrong]"
|
|
148
|
+
file: "[path]"
|
|
149
|
+
line: [number]
|
|
150
|
+
gaps: # NEW
|
|
151
|
+
- category: "[domain-specific]"
|
|
152
|
+
description: "[what's missing or not covered]"
|
|
153
|
+
severity: blocker|major|minor
|
|
154
|
+
plan_reference: "[task ID or acceptance criterion]"
|
|
155
|
+
duration_seconds: [elapsed]
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**Gap categories by agent:**
|
|
159
|
+
|
|
160
|
+
| Agent | Gap categories |
|
|
161
|
+
|-------|---------------|
|
|
162
|
+
| Code reviewer | error-handling, input-validation, pattern-compliance, edge-cases, test-coverage |
|
|
163
|
+
| Security auditor | auth, injection, rate-limiting, secrets, cors, csrf, data-exposure |
|
|
164
|
+
| Integration tester | untested-journey, missing-error-case, missing-edge-case, visual-gap |
|
|
165
|
+
| Regression tester | untested-regression, missing-backward-compat, skipped-test-suite |
|
|
166
|
+
| Verifier | unverified-criterion, insufficient-evidence, missing-acceptance-check |
|
|
167
|
+
|
|
168
|
+
**Add .output.partial protocol to all 5:** After each review item, append entry to `.output.partial`.
|
|
169
|
+
|
|
170
|
+
### 4. Integration Tester: Comprehensive E2E Coverage Mandate
|
|
171
|
+
|
|
172
|
+
**File:** `roles/integration-tester/CLAUDE.md`
|
|
173
|
+
|
|
174
|
+
**Add new section "Comprehensive Journey Coverage - MANDATORY":**
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
## Comprehensive Journey Coverage - MANDATORY
|
|
178
|
+
|
|
179
|
+
For EVERY user journey defined in the plan:
|
|
180
|
+
|
|
181
|
+
### Happy Path (Golden Path)
|
|
182
|
+
- Execute the full journey step by step
|
|
183
|
+
- Generate a GIF of the complete flow
|
|
184
|
+
- Every verify block must produce evidence
|
|
185
|
+
|
|
186
|
+
### Error Cases (per journey)
|
|
187
|
+
For each journey, test ALL of the following error scenarios that apply:
|
|
188
|
+
- Invalid input (wrong types, missing fields, too long, empty)
|
|
189
|
+
- Authentication failures (expired token, wrong credentials, no token)
|
|
190
|
+
- Authorization failures (wrong role, insufficient permissions)
|
|
191
|
+
- Network/timeout scenarios (if applicable)
|
|
192
|
+
- Empty state (no data, first-time user)
|
|
193
|
+
- Boundary conditions (max items, zero items, concurrent access)
|
|
194
|
+
|
|
195
|
+
Each error case gets:
|
|
196
|
+
- Its own GIF showing the error flow and recovery/message
|
|
197
|
+
- A check entry in the output
|
|
198
|
+
|
|
199
|
+
### Gap Reporting
|
|
200
|
+
After testing, cross-reference the plan's journey list against what you tested.
|
|
201
|
+
For any journey or error case NOT tested, write a gap entry with:
|
|
202
|
+
- Which journey/error case was skipped
|
|
203
|
+
- Why it was skipped (MCP unavailable, environment limitation, time constraint)
|
|
204
|
+
- Severity assessment (blocker if it's a critical path, major otherwise)
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### 5. Orchestrator CLAUDE.md Updates
|
|
208
|
+
|
|
209
|
+
**File:** `roles/orchestrator/CLAUDE.md`
|
|
210
|
+
|
|
211
|
+
**Key change: Team orchestrator compiles team gate HTML but does NOT post it to Slack or attach approve/reject buttons.** The team gate HTML is written to disk for the feature lead to review internally. The orchestrator signals `.gate-ready` as before — the feature lead is the only one who presents evidence to the user.
|
|
212
|
+
|
|
213
|
+
**Changes to "Per-Phase Adversarial Review + Gate Evidence" section:**
|
|
214
|
+
|
|
215
|
+
Add after step 4 (QA roles complete), before visual review:
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
4b. **Review gaps from every review agent.** Read the `gaps` field in each QA agent's
|
|
219
|
+
`.output`. These are the primary inputs to your gate decision. A gap with severity
|
|
220
|
+
`blocker` means the phase cannot pass — re-dispatch the responsible agent.
|
|
221
|
+
|
|
222
|
+
4c. **Aggregate implementer deviations and risks.** Read `deviations` and
|
|
223
|
+
`self_reported_risks` from each implementer's `.output`. Cross-reference deviations
|
|
224
|
+
against the plan — if a deviation contradicts a requirement, it's a blocker.
|
|
225
|
+
|
|
226
|
+
4d. **Build coverage matrix.** For every task and acceptance criterion in the plan,
|
|
227
|
+
determine status:
|
|
228
|
+
- `implemented_verified` — implementer completed it AND a review agent verified it
|
|
229
|
+
- `implemented_unverified` — implementer completed it but no review agent checked it
|
|
230
|
+
- `not_implemented` — no implementer output references this item
|
|
231
|
+
Include the matrix in `.gate-evidence.yaml`.
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**Update gate evidence YAML schema in the example:**
|
|
235
|
+
|
|
236
|
+
```yaml
|
|
237
|
+
gate: 1
|
|
238
|
+
feature: my-feature
|
|
239
|
+
recommendation:
|
|
240
|
+
verdict: APPROVE
|
|
241
|
+
reasoning: "All journeys pass with visual evidence verified"
|
|
242
|
+
pr:
|
|
243
|
+
url: https://github.com/org/repo/pull/123
|
|
244
|
+
branch: feature/my-feature
|
|
245
|
+
files_changed: 15
|
|
246
|
+
additions: 420
|
|
247
|
+
deletions: 50
|
|
248
|
+
summary: "Implemented auth flow with login, registration, and password reset."
|
|
249
|
+
coverage_matrix: # NEW
|
|
250
|
+
- plan_item: "task-1.1: Login endpoint"
|
|
251
|
+
status: implemented_verified
|
|
252
|
+
evidence_ref: "code-reviewer check 1, integration-tester journey auth-login"
|
|
253
|
+
- plan_item: "task-1.2: Rate limiting"
|
|
254
|
+
status: implemented_unverified
|
|
255
|
+
evidence_ref: "implementer output only"
|
|
256
|
+
- plan_item: "task-1.3: Password reset"
|
|
257
|
+
status: not_implemented
|
|
258
|
+
evidence_ref: null
|
|
259
|
+
deviations: # NEW (aggregated from implementers)
|
|
260
|
+
- source: backend-implementer
|
|
261
|
+
task_id: "1.1"
|
|
262
|
+
plan_said: "Use bcrypt for password hashing"
|
|
263
|
+
i_did: "Used argon2id"
|
|
264
|
+
reason: "argon2id is the current OWASP recommendation"
|
|
265
|
+
self_reported_risks: # NEW (aggregated from implementers)
|
|
266
|
+
- source: frontend-implementer
|
|
267
|
+
task_id: "1.2"
|
|
268
|
+
description: "Rate limit UI feedback relies on 429 status code; not tested with proxy"
|
|
269
|
+
severity: minor
|
|
270
|
+
file: "src/components/LoginForm.tsx"
|
|
271
|
+
reviewer_comments: # NEW
|
|
272
|
+
orchestrator:
|
|
273
|
+
verdict: convinced
|
|
274
|
+
reasoning: "All gaps are minor. Deviation on argon2id is an improvement. Coverage matrix shows 12/14 items verified."
|
|
275
|
+
concerns:
|
|
276
|
+
- "Rate limiting not visually verified — only unit tested"
|
|
277
|
+
journey_results:
|
|
278
|
+
- name: auth-login
|
|
279
|
+
verdict: PASS
|
|
280
|
+
type: happy-path
|
|
281
|
+
# ... existing fields ...
|
|
282
|
+
- name: auth-login-invalid-password # NEW: error case journeys
|
|
283
|
+
verdict: PASS
|
|
284
|
+
type: error-case
|
|
285
|
+
# ...
|
|
286
|
+
tasks:
|
|
287
|
+
- id: "1.1"
|
|
288
|
+
title: "Implement login endpoint"
|
|
289
|
+
role: backend-implementer
|
|
290
|
+
verdict: PASS
|
|
291
|
+
qa_verdicts:
|
|
292
|
+
- role: code-reviewer
|
|
293
|
+
verdict: PASS
|
|
294
|
+
issue_count: 0
|
|
295
|
+
gaps: # NEW
|
|
296
|
+
- category: test-coverage
|
|
297
|
+
description: "No unit tests for rate limiter middleware"
|
|
298
|
+
severity: major
|
|
299
|
+
- role: security-auditor
|
|
300
|
+
verdict: PASS
|
|
301
|
+
issue_count: 0
|
|
302
|
+
gaps: []
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### 6. Feature Lead CLAUDE.md Updates
|
|
306
|
+
|
|
307
|
+
**File:** `roles/feature-lead/CLAUDE.md`
|
|
308
|
+
|
|
309
|
+
**Key change: The feature gate HTML is the ONLY artifact posted to the impl Slack channel, and it is the ONLY place approve/reject buttons appear.** One HTML per gate, one approval decision per gate. Team gate HTMLs are internal evidence reviewed by the FL — they are linked from the feature gate HTML but never posted to Slack independently.
|
|
310
|
+
|
|
311
|
+
**Changes to "Gate Evidence Document Protocol" section:**
|
|
312
|
+
|
|
313
|
+
After step 4 (wait for review agents), before step 5 (adversarial cross-check):
|
|
314
|
+
|
|
315
|
+
```
|
|
316
|
+
4b. **Review gaps across all levels.** Read `gaps` from:
|
|
317
|
+
- Each team orchestrator's `.gate-evidence.yaml` (team-level QA gaps)
|
|
318
|
+
- Each team's compiled `.gate-evidence.html` (review visually)
|
|
319
|
+
- Each feature-level review agent's `.output` (cross-team gaps)
|
|
320
|
+
Any blocker-severity gap that hasn't been addressed = REJECT.
|
|
321
|
+
|
|
322
|
+
4c. **Build cross-team integration surface.** Document:
|
|
323
|
+
- APIs/contracts that Team A's work exposes and Team B consumes
|
|
324
|
+
- Shared database tables or state modified by multiple teams
|
|
325
|
+
- Any cross-team dependency that could break if one team's work changes
|
|
326
|
+
|
|
327
|
+
4d. **Build feature-level coverage matrix.** Cross-reference the FULL plan
|
|
328
|
+
(all phases, all tasks, all acceptance criteria) against evidence from
|
|
329
|
+
all teams. This is the master view — status of every plan item.
|
|
330
|
+
|
|
331
|
+
4e. **Add Feature Lead comments.** Write your assessment:
|
|
332
|
+
- verdict: convinced|not_convinced
|
|
333
|
+
- reasoning: reference specific gaps, deviations, and cross-team concerns
|
|
334
|
+
- concerns: list remaining items even if you're convinced overall
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
**Update step 7 (compile HTML):**
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
7. **Compile feature gate HTML** — Call `compile_gate_evidence` MCP tool:
|
|
341
|
+
- `evidence_yaml_path`: your merged `.gate-evidence.yaml`
|
|
342
|
+
- `output_html_path`: `<featureDir>/.gate-evidence.html`
|
|
343
|
+
- `doc_type`: "feature"
|
|
344
|
+
- `team_html_paths`: list of team-level HTML paths to link to
|
|
345
|
+
- If tool returns ERROR -> re-dispatch affected role -> retry
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
**Update step 8 (post to Slack):**
|
|
349
|
+
|
|
350
|
+
```
|
|
351
|
+
8. **Post feature gate HTML to impl channel** via `.agent-response`:
|
|
352
|
+
- Include `[evidence:<path to .gate-evidence.html>]` marker — HTML uploaded as attachment
|
|
353
|
+
- Include `[SLACK:decision]` block with approve/reject buttons
|
|
354
|
+
- This is the ONE approval point per gate — no per-team approvals
|
|
355
|
+
- The HTML links to team gate HTMLs for drill-down
|
|
356
|
+
- User reviews the HTML, then clicks approve/reject
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### 7. Evidence Compiler HTML Template Overhaul
|
|
360
|
+
|
|
361
|
+
**File:** `tools/visual-verification-mcp/evidence-compiler.js`
|
|
362
|
+
|
|
363
|
+
#### New HTML sections (in order):
|
|
364
|
+
|
|
365
|
+
1. **Header** (existing) — feature, gate, date, recommendation badge
|
|
366
|
+
2. **Scoreboard** (existing) — tasks, journeys, regression, QA counts
|
|
367
|
+
3. **Coverage Matrix** (NEW) — table:
|
|
368
|
+
- Column 1: Plan item (task ID + title)
|
|
369
|
+
- Column 2: Status badge
|
|
370
|
+
- Column 3: Evidence reference (link to check/journey)
|
|
371
|
+
- Styling:
|
|
372
|
+
- `implemented_verified`: normal text, green checkmark icon
|
|
373
|
+
- `implemented_unverified`: italic text, yellow/amber background
|
|
374
|
+
- `not_implemented`: strikethrough text, grey color
|
|
375
|
+
4. **Deviations from Plan** (NEW) — cards showing plan_said vs i_did vs reason
|
|
376
|
+
5. **Self-Reported Risks** (NEW) — cards with severity badges
|
|
377
|
+
6. **Pull Request** (existing)
|
|
378
|
+
7. **Summary** (existing)
|
|
379
|
+
8. **Journey Evidence** (existing, enhanced):
|
|
380
|
+
- Happy path journeys grouped together
|
|
381
|
+
- Error case journeys grouped separately
|
|
382
|
+
- Each journey card shows embedded GIF
|
|
383
|
+
9. **Tasks** (existing)
|
|
384
|
+
10. **QA Verdicts** (existing, enhanced):
|
|
385
|
+
- Each QA agent card now has two subsections:
|
|
386
|
+
- **Findings** — existing checks + issues
|
|
387
|
+
- **Gaps** — new gaps field, rendered with warning/yellow background styling
|
|
388
|
+
- If a QA agent hasn't reported yet, render section as:
|
|
389
|
+
```html
|
|
390
|
+
<div class="section-pending">
|
|
391
|
+
<h3>Security Audit</h3>
|
|
392
|
+
<p class="pending-label">Awaiting: security-auditor</p>
|
|
393
|
+
</div>
|
|
394
|
+
```
|
|
395
|
+
11. **Reviewer Comments** (NEW) — orchestrator and FL comments:
|
|
396
|
+
- Verdict badge (convinced/not_convinced)
|
|
397
|
+
- Reasoning text
|
|
398
|
+
- Concerns list
|
|
399
|
+
12. **Cross-Team Integration Surface** (NEW, feature-level only):
|
|
400
|
+
- Shared APIs/contracts table
|
|
401
|
+
- Cross-team dependencies
|
|
402
|
+
13. **Team Evidence Links** (NEW, feature-level only):
|
|
403
|
+
- Summary table: each team's section statuses (green check / grey dash)
|
|
404
|
+
- Links to team gate HTML files
|
|
405
|
+
14. **Risks** (existing)
|
|
406
|
+
15. **Deferred** (existing)
|
|
407
|
+
16. **Recommendation** (existing)
|
|
408
|
+
|
|
409
|
+
#### Pending section styling (CSS):
|
|
410
|
+
|
|
411
|
+
```css
|
|
412
|
+
.section-pending {
|
|
413
|
+
opacity: 0.5;
|
|
414
|
+
font-style: italic;
|
|
415
|
+
border: 1px dashed var(--border);
|
|
416
|
+
}
|
|
417
|
+
.section-pending .pending-label {
|
|
418
|
+
color: var(--muted);
|
|
419
|
+
font-style: italic;
|
|
420
|
+
}
|
|
421
|
+
.gap-item {
|
|
422
|
+
background: #fef3c7; /* yellow-100 */
|
|
423
|
+
border-left: 3px solid #f59e0b; /* amber-500 */
|
|
424
|
+
padding: 8px 12px;
|
|
425
|
+
margin: 4px 0;
|
|
426
|
+
}
|
|
427
|
+
.coverage-verified { color: var(--fg); }
|
|
428
|
+
.coverage-unverified { font-style: italic; background: #fef9c3; }
|
|
429
|
+
.coverage-missing { text-decoration: line-through; color: var(--muted); }
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
#### compile_gate_evidence MCP tool changes:
|
|
433
|
+
|
|
434
|
+
Add optional parameters:
|
|
435
|
+
- `doc_type`: `"team"` (default) or `"feature"`
|
|
436
|
+
- `team_html_paths`: array of `{ team_num, html_path }` (feature-level only, for linking)
|
|
437
|
+
|
|
438
|
+
Update `validateEvidence()`:
|
|
439
|
+
- Warn (not error) on missing `gaps`, `deviations`, `coverage_matrix` — backwards compatible with in-flight features
|
|
440
|
+
- Error on missing `reviewer_comments` only when `recommendation` is present AND `doc_type` is `"feature"`
|
|
441
|
+
|
|
442
|
+
### 8. prompt-builder.js Updates
|
|
443
|
+
|
|
444
|
+
**File:** `iriai-build/v3/prompt-builder.js`
|
|
445
|
+
|
|
446
|
+
**Changes to `buildGateReviewInstructions()`:**
|
|
447
|
+
|
|
448
|
+
Update step 6 (merge evidence) instruction to include new fields:
|
|
449
|
+
|
|
450
|
+
```javascript
|
|
451
|
+
6. **Merge evidence** — Combine all team YAMLs + feature-level review outputs into:
|
|
452
|
+
- Include: coverage_matrix, deviations, self_reported_risks (aggregated from all teams)
|
|
453
|
+
- Include: reviewer_comments with your FL assessment
|
|
454
|
+
- Include: cross_team_surface (APIs, contracts, shared state)
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
Update step 7 (compile HTML) to pass doc_type and team paths.
|
|
458
|
+
|
|
459
|
+
Update step 8 (post to Slack): Remove text summary — the HTML file IS the message. Only include
|
|
460
|
+
`[evidence:path]` marker and `[SLACK:decision]` block.
|
|
461
|
+
|
|
462
|
+
### 8b. orchestrator.js Updates — Suppress Text Status Messages in Planning Thread
|
|
463
|
+
|
|
464
|
+
**File:** `iriai-build/v3/orchestrator.js`
|
|
465
|
+
|
|
466
|
+
The planning channel thread should only receive HTML evidence docs (with approve/reject buttons).
|
|
467
|
+
Remove text-based status messages currently posted to the planning thread:
|
|
468
|
+
|
|
469
|
+
- `handlePlanApproval()`: Remove `"Plan approved! Launching implementation..."` and
|
|
470
|
+
`"Feature branches created. Launching agents..."` thread posts to planning channel.
|
|
471
|
+
- `handleGateApproval()`: Remove `"Gate approved! Feature Lead will advance..."` text post.
|
|
472
|
+
- `handleGateRejection()`: Remove `"Gate rejected..."` text post.
|
|
473
|
+
- `_requestPhaseReview()`: Remove the text summary post (`"PM phase complete. Output: ..."`).
|
|
474
|
+
The planning phase review gates (PM/Designer/Architect approve/reject buttons) still post — but
|
|
475
|
+
the accompanying text summary is replaced by the artifact upload only.
|
|
476
|
+
- `handlePhaseReviewApproval()`: Remove `"Phase approved. Starting X phase..."` text post.
|
|
477
|
+
- `handlePhaseReviewRejection()`: Remove `"X phase rejected. Re-dispatching..."` text post.
|
|
478
|
+
|
|
479
|
+
**What stays in the planning thread:**
|
|
480
|
+
- HTML evidence document uploads (feature gate HTML with buttons)
|
|
481
|
+
- Planning phase artifact uploads (PRD, design-decisions, plan.yaml) with approve/reject buttons
|
|
482
|
+
- Feature completion message
|
|
483
|
+
|
|
484
|
+
**Impl channel change:**
|
|
485
|
+
- Gate approval decisions in the impl channel MUST now include the feature gate HTML as an attachment.
|
|
486
|
+
The `[evidence:<path>]` marker + `[SLACK:decision]` block are posted together — the user reviews
|
|
487
|
+
the HTML before clicking approve/reject.
|
|
488
|
+
- Everything else in impl channel is unchanged (questions, operator relay, status updates).
|
|
489
|
+
|
|
490
|
+
### 9. constants.js Update
|
|
491
|
+
|
|
492
|
+
**File:** `iriai-build/v3/constants.js`
|
|
493
|
+
|
|
494
|
+
Add new signal file name:
|
|
495
|
+
|
|
496
|
+
```javascript
|
|
497
|
+
export const SIGNAL = {
|
|
498
|
+
// ... existing ...
|
|
499
|
+
OUTPUT_PARTIAL: ".output.partial", // NEW
|
|
500
|
+
};
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
### 10. Context Management Updates in Role CLAUDE.md files
|
|
504
|
+
|
|
505
|
+
**All agent CLAUDE.md files** — update the Context Management section:
|
|
506
|
+
|
|
507
|
+
Replace the handover instructions with:
|
|
508
|
+
|
|
509
|
+
```
|
|
510
|
+
## Context Management - MANDATORY
|
|
511
|
+
|
|
512
|
+
### Incremental Output (.output.partial)
|
|
513
|
+
After completing each unit of work, append a `---` separated YAML entry to .output.partial:
|
|
514
|
+
```bash
|
|
515
|
+
cat >> $SIGNAL_DIR/.output.partial << 'ENTRY_EOF'
|
|
516
|
+
---
|
|
517
|
+
type: [entry_type]
|
|
518
|
+
[structured fields for this unit of work]
|
|
519
|
+
completed_at: "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
520
|
+
ENTRY_EOF
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
This ensures completed work survives context exhaustion or crashes.
|
|
524
|
+
|
|
525
|
+
### On Restart
|
|
526
|
+
Read .output.partial FIRST. It contains your completed work in structured form.
|
|
527
|
+
Do NOT redo any work that has an entry in .output.partial.
|
|
528
|
+
|
|
529
|
+
### At 40% Context Remaining
|
|
530
|
+
1. Ensure .output.partial is up to date (all completed work appended)
|
|
531
|
+
2. Write .handover with: remaining items list only (completed work is in .output.partial)
|
|
532
|
+
3. Signal: echo "context_threshold" > $SIGNAL_DIR/.needs-restart
|
|
533
|
+
|
|
534
|
+
### Final Output
|
|
535
|
+
When all work is complete, consolidate .output.partial into .output:
|
|
536
|
+
1. Read all entries from .output.partial
|
|
537
|
+
2. Aggregate into final verdict, checks, gaps, issues
|
|
538
|
+
3. Write consolidated .output
|
|
539
|
+
4. Signal: echo DONE > .done
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
---
|
|
543
|
+
|
|
544
|
+
## Files Modified (Summary)
|
|
545
|
+
|
|
546
|
+
| # | File | Change |
|
|
547
|
+
|---|------|--------|
|
|
548
|
+
| 1 | `v3/roles/implementer/CLAUDE.md` | Add deviations, self_reported_risks, .output.partial |
|
|
549
|
+
| 2 | `v3/roles/backend-implementer/CLAUDE.md` | Same as #1 |
|
|
550
|
+
| 3 | `v3/roles/frontend-implementer/CLAUDE.md` | Same as #1 |
|
|
551
|
+
| 4 | `v3/roles/database-implementer/CLAUDE.md` | Same as #1 |
|
|
552
|
+
| 5 | `v3/roles/code-reviewer/CLAUDE.md` | Add gaps, .output.partial |
|
|
553
|
+
| 6 | `v3/roles/security-auditor/CLAUDE.md` | Add gaps, .output.partial |
|
|
554
|
+
| 7 | `v3/roles/integration-tester/CLAUDE.md` | Add gaps, comprehensive E2E mandate, .output.partial |
|
|
555
|
+
| 8 | `v3/roles/regression-tester/CLAUDE.md` | Add gaps, .output.partial |
|
|
556
|
+
| 9 | `v3/roles/verifier/CLAUDE.md` | Add gaps, .output.partial |
|
|
557
|
+
| 10 | `v3/roles/orchestrator/CLAUDE.md` | Coverage matrix, deviation aggregation, reviewer comments, team HTML compilation (no buttons) |
|
|
558
|
+
| 11 | `v3/roles/feature-lead/CLAUDE.md` | Cross-team surface, FL comments, feature-level coverage matrix, sole Slack poster with buttons |
|
|
559
|
+
| 12 | `tools/visual-verification-mcp/evidence-compiler.js` | HTML template overhaul, new sections, pending styling |
|
|
560
|
+
| 13 | `iriai-build/v3/prompt-builder.js` | Gate review instructions update, remove text summaries |
|
|
561
|
+
| 14 | `iriai-build/v3/constants.js` | Add OUTPUT_PARTIAL signal |
|
|
562
|
+
| 15 | `iriai-build/v3/orchestrator.js` | Remove text status messages from impl channel (gate approved/rejected posts) |
|
|
563
|
+
|
|
564
|
+
## Files NOT Modified
|
|
565
|
+
|
|
566
|
+
- `agent-supervisor.js` — No retry/backoff changes.
|
|
567
|
+
- `operator.js` — No relay changes.
|
|
568
|
+
- `schema.sql` — No DB schema changes.
|
|
569
|
+
- `db.js`, `queries.js` — No query changes.
|
|
570
|
+
- Planning pipeline roles (pm, designer, architect, plan-compiler) — Not in scope.
|
|
571
|
+
- `slack-adapter.js` — No Slack API changes.
|
|
572
|
+
|
|
573
|
+
## Execution Order
|
|
574
|
+
|
|
575
|
+
```
|
|
576
|
+
Phase 1 (parallel, no dependencies):
|
|
577
|
+
- Files 1-4: Implementer CLAUDE.md updates
|
|
578
|
+
- Files 5-9: Review agent CLAUDE.md updates
|
|
579
|
+
- File 14: constants.js (one line)
|
|
580
|
+
|
|
581
|
+
Phase 2 (depends on Phase 1):
|
|
582
|
+
- File 10: Orchestrator CLAUDE.md (references new output fields, team HTML compilation)
|
|
583
|
+
- File 11: Feature Lead CLAUDE.md (references new output fields, sole Slack poster)
|
|
584
|
+
|
|
585
|
+
Phase 3 (parallel with Phase 2):
|
|
586
|
+
- File 12: evidence-compiler.js (HTML template, can be built independently)
|
|
587
|
+
- File 13: prompt-builder.js (gate review instructions, remove text summaries)
|
|
588
|
+
- File 15: orchestrator.js (remove text status messages from impl channel)
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
## Backwards Compatibility
|
|
592
|
+
|
|
593
|
+
- In-flight features won't have new YAML fields. The evidence compiler renders missing fields as "pending" sections, not errors.
|
|
594
|
+
- `.output.partial` is additive — existing agents that don't write it still work; they just lose the crash-recovery benefit until updated.
|
|
595
|
+
- `validateEvidence()` warns (not errors) on missing new fields.
|
|
596
|
+
- `doc_type` defaults to `"team"` — existing `compile_gate_evidence` calls work unchanged.
|
|
597
|
+
|
|
598
|
+
## Risk Assessment
|
|
599
|
+
|
|
600
|
+
- **Integration-tester runtime increase**: Mandating error case GIFs per journey significantly increases work. This is intentional — comprehensive visual evidence is the goal.
|
|
601
|
+
- **Agent context cost**: New output fields add ~10-20 lines per agent. Minimal context impact.
|
|
602
|
+
- **HTML file size**: More embedded GIFs (error cases) means larger HTML. The existing 18MB cap and resize logic in evidence-compiler.js handles this.
|
|
603
|
+
- **.output.partial disk usage**: Append-only files grow over time. Each entry is small (~20 lines YAML). Even 50 entries is <5KB. Not a concern.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// desktop-adapter.js — DesktopAdapter implementing InterfaceAdapter.
|
|
2
|
+
// For iriai-command Tauri desktop app.
|
|
3
|
+
// Stub — implementation deferred to Phase 7.
|
|
4
|
+
// Will use WebSocket or stdin/stdout JSON protocol as Tauri sidecar.
|
|
5
|
+
|
|
6
|
+
import { InterfaceAdapter } from "./interface.js";
|
|
7
|
+
|
|
8
|
+
export class DesktopAdapter extends InterfaceAdapter {
|
|
9
|
+
constructor({ port } = {}) {
|
|
10
|
+
super();
|
|
11
|
+
this.port = port || 9721;
|
|
12
|
+
this._connections = new Set();
|
|
13
|
+
// TODO: Start WebSocket server or stdin/stdout JSON protocol
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async createFeatureChannel(featureId, slug) {
|
|
17
|
+
this._emit("feature:created", { featureId, slug });
|
|
18
|
+
return `desktop-${slug}`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async postMessage(featureId, text) {
|
|
22
|
+
const ref = Date.now().toString();
|
|
23
|
+
this._emit("message", { featureId, text, ref });
|
|
24
|
+
return { ref };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async postThreadMessage(featureId, text) {
|
|
28
|
+
this._emit("thread-message", { featureId, text });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async postPipelineMessage(featureId, text) {
|
|
32
|
+
const ref = Date.now().toString();
|
|
33
|
+
this._emit("pipeline-message", { featureId, text, ref });
|
|
34
|
+
return { ref };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async postAgentResponse(featureId, agentLabel, content) {
|
|
38
|
+
const ref = Date.now().toString();
|
|
39
|
+
this._emit("agent-response", { featureId, agentLabel, content, ref });
|
|
40
|
+
return { ref };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async uploadArtifact(featureId, filePath, title) {
|
|
44
|
+
this._emit("artifact", { featureId, filePath, title });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async postDecision(featureId, decision) {
|
|
48
|
+
const ref = Date.now().toString();
|
|
49
|
+
this._emit("decision", { featureId, decision, ref });
|
|
50
|
+
// TODO: Wait for response event from Tauri client
|
|
51
|
+
return { ref };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async resolveDecisionMessage(featureId, messageRef, decisionId, selectedOption, selectedLabel, resolvedBy, feedback) {
|
|
55
|
+
this._emit("decision-resolved", { featureId, decisionId, selectedOption, selectedLabel, resolvedBy, feedback });
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async postPlanForApproval(featureId, planDir) {
|
|
59
|
+
return this.postDecision(featureId, {
|
|
60
|
+
id: "plan-approval",
|
|
61
|
+
title: "Plan ready for approval",
|
|
62
|
+
context: "All planning phases complete.",
|
|
63
|
+
options: [
|
|
64
|
+
{ id: "approve", label: "Approve Plan", style: "primary" },
|
|
65
|
+
{ id: "reject", label: "Reject Plan", style: "danger" },
|
|
66
|
+
],
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async postFeatureComplete(featureId) {
|
|
71
|
+
this._emit("feature-complete", { featureId });
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
_emit(event, data) {
|
|
75
|
+
// TODO: Send over WebSocket / stdout
|
|
76
|
+
console.log(`[desktop] ${event}:`, JSON.stringify(data).slice(0, 200));
|
|
77
|
+
}
|
|
78
|
+
}
|