@exaudeus/workrail 3.39.0 → 3.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/cli-worktrain.js +50 -26
  2. package/dist/console-ui/assets/{index-3oXZ_A9m.js → index-CXWCAonr.js} +1 -1
  3. package/dist/console-ui/index.html +1 -1
  4. package/dist/coordinators/pr-review.d.ts +6 -1
  5. package/dist/coordinators/pr-review.js +60 -5
  6. package/dist/daemon/workflow-runner.d.ts +3 -2
  7. package/dist/daemon/workflow-runner.js +6 -3
  8. package/dist/manifest.json +56 -40
  9. package/dist/mcp/output-schemas.d.ts +10 -10
  10. package/dist/mcp/tools.d.ts +12 -12
  11. package/dist/trigger/trigger-router.js +9 -2
  12. package/dist/types/workflow-source.d.ts +0 -1
  13. package/dist/types/workflow-source.js +3 -6
  14. package/dist/types/workflow.d.ts +1 -1
  15. package/dist/types/workflow.js +1 -2
  16. package/dist/v2/durable-core/domain/artifact-contract-validator.js +66 -0
  17. package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.d.ts +25 -0
  18. package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.js +31 -0
  19. package/dist/v2/durable-core/schemas/artifacts/index.d.ts +3 -1
  20. package/dist/v2/durable-core/schemas/artifacts/index.js +14 -1
  21. package/dist/v2/durable-core/schemas/artifacts/review-verdict.d.ts +41 -0
  22. package/dist/v2/durable-core/schemas/artifacts/review-verdict.js +30 -0
  23. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +236 -236
  24. package/dist/v2/durable-core/schemas/session/events.d.ts +50 -50
  25. package/dist/v2/durable-core/schemas/session/gaps.d.ts +2 -2
  26. package/dist/v2/durable-core/schemas/session/manifest.d.ts +4 -4
  27. package/dist/v2/durable-core/schemas/session/outputs.d.ts +8 -8
  28. package/dist/v2/usecases/console-routes.js +178 -0
  29. package/docs/design/coordinator-artifact-protocol-design-candidates.md +155 -0
  30. package/docs/design/coordinator-artifact-protocol-design-review.md +103 -0
  31. package/docs/design/coordinator-artifact-protocol-implementation-plan.md +259 -0
  32. package/docs/ideas/backlog.md +158 -100
  33. package/package.json +1 -1
  34. package/workflows/mr-review-workflow.agentic.v2.json +5 -1
@@ -90,8 +90,6 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
90
90
  content?: unknown;
91
91
  }>]>;
92
92
  }, "strip", z.ZodTypeAny, {
93
- outputId: string;
94
- outputChannel: "recap" | "artifact";
95
93
  payload: {
96
94
  payloadKind: "notes";
97
95
  notesMarkdown: string;
@@ -102,10 +100,10 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
102
100
  byteLength: number;
103
101
  content?: unknown;
104
102
  };
105
- supersedesOutputId?: string | undefined;
106
- }, {
107
103
  outputId: string;
108
104
  outputChannel: "recap" | "artifact";
105
+ supersedesOutputId?: string | undefined;
106
+ }, {
109
107
  payload: {
110
108
  payloadKind: "notes";
111
109
  notesMarkdown: string;
@@ -116,10 +114,10 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
116
114
  byteLength: number;
117
115
  content?: unknown;
118
116
  };
119
- supersedesOutputId?: string | undefined;
120
- }>, {
121
117
  outputId: string;
122
118
  outputChannel: "recap" | "artifact";
119
+ supersedesOutputId?: string | undefined;
120
+ }>, {
123
121
  payload: {
124
122
  payloadKind: "notes";
125
123
  notesMarkdown: string;
@@ -130,10 +128,10 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
130
128
  byteLength: number;
131
129
  content?: unknown;
132
130
  };
133
- supersedesOutputId?: string | undefined;
134
- }, {
135
131
  outputId: string;
136
132
  outputChannel: "recap" | "artifact";
133
+ supersedesOutputId?: string | undefined;
134
+ }, {
137
135
  payload: {
138
136
  payloadKind: "notes";
139
137
  notesMarkdown: string;
@@ -144,5 +142,7 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
144
142
  byteLength: number;
145
143
  content?: unknown;
146
144
  };
145
+ outputId: string;
146
+ outputChannel: "recap" | "artifact";
147
147
  supersedesOutputId?: string | undefined;
148
148
  }>;
@@ -40,6 +40,7 @@ exports.mountConsoleRoutes = mountConsoleRoutes;
40
40
  const express_1 = __importDefault(require("express"));
41
41
  const path_1 = __importDefault(require("path"));
42
42
  const fs_1 = __importDefault(require("fs"));
43
+ const os_1 = __importDefault(require("os"));
43
44
  const worktree_service_js_1 = require("./worktree-service.js");
44
45
  const workflow_js_1 = require("../../types/workflow.js");
45
46
  const dev_mode_js_1 = require("../../mcp/dev-mode.js");
@@ -135,6 +136,183 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
135
136
  req.on('close', () => { sseClients.delete(res); });
136
137
  res.on('close', () => { sseClients.delete(res); });
137
138
  });
139
+ const daemonEventsDir = path_1.default.join(process.env['HOME'] ?? os_1.default.homedir(), '.workrail', 'events', 'daemon');
140
+ async function tailDaemonEvents(filePath, prevSize) {
141
+ try {
142
+ const stat = await fs_1.default.promises.stat(filePath);
143
+ if (stat.size <= prevSize)
144
+ return [];
145
+ const fd = await fs_1.default.promises.open(filePath, 'r');
146
+ const length = stat.size - prevSize;
147
+ const buf = Buffer.alloc(length);
148
+ try {
149
+ await fd.read(buf, 0, length, prevSize);
150
+ }
151
+ finally {
152
+ await fd.close();
153
+ }
154
+ const chunk = buf.toString('utf8');
155
+ return chunk
156
+ .split('\n')
157
+ .filter(Boolean)
158
+ .flatMap((line) => {
159
+ try {
160
+ return [JSON.parse(line)];
161
+ }
162
+ catch {
163
+ return [];
164
+ }
165
+ });
166
+ }
167
+ catch {
168
+ return [];
169
+ }
170
+ }
171
+ const SESSION_SSE_EVENT_KINDS = new Set([
172
+ 'tool_called',
173
+ 'tool_call_started',
174
+ 'tool_call_completed',
175
+ 'tool_call_failed',
176
+ 'tool_error',
177
+ 'step_advanced',
178
+ 'session_completed',
179
+ 'issue_reported',
180
+ 'agent_stuck',
181
+ 'llm_turn_started',
182
+ 'llm_turn_completed',
183
+ 'signal_emitted',
184
+ ]);
185
+ app.get('/api/v2/sessions/:sessionId/events', async (req, res) => {
186
+ const { sessionId } = req.params;
187
+ const sessionResult = await consoleService.getSessionDetail(sessionId);
188
+ if (sessionResult.isErr()) {
189
+ const status = sessionResult.error.code === 'SESSION_LOAD_FAILED' ? 404 : 500;
190
+ res.status(status).json({ success: false, error: sessionResult.error.message });
191
+ return;
192
+ }
193
+ const sessionDetail = sessionResult.value;
194
+ if (!sessionDetail || !sessionDetail.runs || sessionDetail.runs.length === 0) {
195
+ res.status(404).json({ success: false, error: `Session not found: ${sessionId}` });
196
+ return;
197
+ }
198
+ res.setHeader('Content-Type', 'text/event-stream');
199
+ res.setHeader('Cache-Control', 'no-cache');
200
+ res.setHeader('Connection', 'keep-alive');
201
+ res.setHeader('X-Accel-Buffering', 'no');
202
+ res.flushHeaders();
203
+ res.write(`data: ${JSON.stringify({ kind: 'connected', sessionId })}\n\n`);
204
+ let currentLogDate = new Date().toISOString().slice(0, 10);
205
+ let currentLogPath = path_1.default.join(daemonEventsDir, `${currentLogDate}.jsonl`);
206
+ let fileOffset = 0;
207
+ try {
208
+ const stat = await fs_1.default.promises.stat(currentLogPath);
209
+ fileOffset = stat.size;
210
+ }
211
+ catch {
212
+ }
213
+ let isClosed = false;
214
+ let isProcessing = false;
215
+ let watcher = null;
216
+ const cleanup = () => {
217
+ if (isClosed)
218
+ return;
219
+ isClosed = true;
220
+ try {
221
+ watcher?.close();
222
+ }
223
+ catch { }
224
+ try {
225
+ if (!res.writableEnded)
226
+ res.end();
227
+ }
228
+ catch { }
229
+ };
230
+ const processNewEvents = async () => {
231
+ if (isClosed || isProcessing)
232
+ return;
233
+ isProcessing = true;
234
+ const todayDate = new Date().toISOString().slice(0, 10);
235
+ if (todayDate !== currentLogDate) {
236
+ currentLogDate = todayDate;
237
+ currentLogPath = path_1.default.join(daemonEventsDir, `${currentLogDate}.jsonl`);
238
+ fileOffset = 0;
239
+ }
240
+ const newEvents = await tailDaemonEvents(currentLogPath, fileOffset);
241
+ for (const event of newEvents) {
242
+ if (isClosed)
243
+ break;
244
+ const kind = typeof event['kind'] === 'string' ? event['kind'] : null;
245
+ const evtSessionId = typeof event['workrailSessionId'] === 'string'
246
+ ? event['workrailSessionId']
247
+ : null;
248
+ if (!kind || !SESSION_SSE_EVENT_KINDS.has(kind))
249
+ continue;
250
+ if (evtSessionId !== sessionId)
251
+ continue;
252
+ try {
253
+ res.write(`data: ${JSON.stringify(event)}\n\n`);
254
+ }
255
+ catch {
256
+ cleanup();
257
+ return;
258
+ }
259
+ if (kind === 'session_completed') {
260
+ cleanup();
261
+ return;
262
+ }
263
+ }
264
+ try {
265
+ const stat = await fs_1.default.promises.stat(currentLogPath);
266
+ fileOffset = stat.size;
267
+ }
268
+ catch {
269
+ fileOffset = 0;
270
+ }
271
+ isProcessing = false;
272
+ };
273
+ try {
274
+ fs_1.default.mkdirSync(daemonEventsDir, { recursive: true });
275
+ }
276
+ catch { }
277
+ try {
278
+ watcher = fs_1.default.watch(daemonEventsDir, { recursive: false }, (_eventType, filename) => {
279
+ if (filename !== null && filename.endsWith('.jsonl')) {
280
+ void processNewEvents();
281
+ }
282
+ });
283
+ watcher.on('error', cleanup);
284
+ }
285
+ catch {
286
+ }
287
+ const keepaliveInterval = setInterval(() => {
288
+ if (isClosed) {
289
+ clearInterval(keepaliveInterval);
290
+ return;
291
+ }
292
+ try {
293
+ res.write(': keepalive\n\n');
294
+ }
295
+ catch {
296
+ clearInterval(keepaliveInterval);
297
+ cleanup();
298
+ }
299
+ }, 30000);
300
+ const maxConnectionTimeout = setTimeout(() => {
301
+ clearInterval(keepaliveInterval);
302
+ cleanup();
303
+ }, 4 * 60 * 60 * 1000);
304
+ req.on('close', () => {
305
+ clearInterval(keepaliveInterval);
306
+ clearTimeout(maxConnectionTimeout);
307
+ cleanup();
308
+ });
309
+ res.on('close', () => {
310
+ clearInterval(keepaliveInterval);
311
+ clearTimeout(maxConnectionTimeout);
312
+ cleanup();
313
+ });
314
+ void processNewEvents();
315
+ });
138
316
  const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
139
317
  const PERF_FILE_READ_LIMIT_BYTES = 5 * 1024 * 1024;
140
318
  async function readDiskEntries(perfFile) {
@@ -0,0 +1,155 @@
1
+ # Design Candidates: Coordinator Artifact Protocol
2
+
3
+ **Status:** Candidate analysis complete
4
+ **Date:** 2026-04-18
5
+ **Task:** Implement wr.review_verdict schema, fix onComplete callback, update mr-review workflow to emit it, update coordinator to read artifacts before keyword-scanning
6
+
7
+ ---
8
+
9
+ ## Problem Understanding
10
+
11
+ ### Core Tensions
12
+
13
+ **T1: Breaking interface vs. backward compatibility**
14
+ `CoordinatorDeps.getAgentResult` returns `Promise<string | null>` today. Changing it to `Promise<{ recapMarkdown: string | null; artifacts: readonly unknown[] }>` is a compile-time breaking change. All call sites (2 in coordinator, 2 in test fakes, 1 real implementation) must change simultaneously. TypeScript catches this at build, so risk is low -- but the change must be complete.
15
+
16
+ **T2: N+1 HTTP calls vs. tip-node-only simplicity**
17
+ ALL-node aggregation requires walking `runs[0].nodes` and fetching each node's detail individually. For a 6-phase workflow, that's 6 HTTP calls to localhost per session. The simple approach (tip node only) would miss a verdict artifact from any non-final step.
18
+
19
+ **T3: `required: false` vs. engine enforcement**
20
+ `outputContract` with `required: false` means the engine won't block if the artifact is absent. This is the correct transition strategy but means the coordinator must maintain two code paths (artifact + keyword-scan fallback) until the graduation criterion (10+ consecutive sessions with 0 fallback warnings) is met.
21
+
22
+ **T4: Schema strictness vs. forward compatibility**
23
+ `.strict()` rejects unknown fields (forward-incompatible). `.strip()` strips them silently (forward-compatible). The task spec says `.strict()`, which matches the `loop-control.ts` precedent. The design doc recommends `.strip()` for forward-compat. **Task spec wins** -- use `.strict()` to be consistent with existing schema patterns.
24
+
25
+ ### Likely Seam
26
+
27
+ `CoordinatorDeps.getAgentResult` is the real boundary. It is already the I/O abstraction layer where the coordinator interacts with sessions. Changing the return type here forces all consumers to acknowledge the new shape without touching coordinator routing logic.
28
+
29
+ ### What Makes This Hard
30
+
31
+ 1. **Three separate `onComplete` sites:** `makeCompleteStepTool` (line 1249), `makeContinueWorkflowTool` (line 1046), and the closure definition (line 2096). TypeScript will catch signature mismatches on the closure but not at the two call sites if the closure's new parameter is optional.
32
+ 2. **Exhaustiveness in the switch:** `artifact-contract-validator.ts` switch currently handles only `LOOP_CONTROL_CONTRACT_REF`. Adding `'wr.contracts.review_verdict'` to `ARTIFACT_CONTRACT_REFS` without adding a switch case causes `validateArtifactContract()` to hit the default `UNKNOWN_CONTRACT_REF` error for any step declaring this contract.
33
+ 3. **`source?` field on ReviewFindings:** Adding `source` as required breaks 4 existing test literals. Making it optional (`source?`) is a minor type weakness but preserves backward compat.
34
+
35
+ ---
36
+
37
+ ## Philosophy Constraints
38
+
39
+ From CLAUDE.md:
40
+ - **Make illegal states unrepresentable:** `verdict: 'clean'|'minor'|'blocking'` not `string`. `source: 'artifact'|'keyword_scan'` not `string`.
41
+ - **Validate at boundaries:** Zod parse at coordinator read time + engine validation at advance time.
42
+ - **Errors are data:** `readVerdictArtifact()` returns `ReviewFindings | null`, not throws.
43
+ - **Functional/declarative:** `readVerdictArtifact()` is a pure function, composable with `parseFindingsFromNotes()`.
44
+ - **Prefer fakes over mocks:** The `makeFakeDeps()` pattern in tests is the established style.
45
+
46
+ **Conflict:** `required: false` during transition temporarily violates 'make illegal states unrepresentable' at the coordinator level. Accepted per design doc -- the fallback is explicit and time-boxed.
47
+
48
+ ---
49
+
50
+ ## Impact Surface
51
+
52
+ Files that must change:
53
+ - `src/v2/durable-core/schemas/artifacts/review-verdict.ts` (new)
54
+ - `src/v2/durable-core/schemas/artifacts/index.ts` (ARTIFACT_CONTRACT_REFS)
55
+ - `src/v2/durable-core/domain/artifact-contract-validator.ts` (switch case)
56
+ - `src/daemon/workflow-runner.ts` (onComplete signature, WorkflowRunSuccess, final return)
57
+ - `src/cli-worktrain.ts` (getAgentResult implementation + return type)
58
+ - `src/coordinators/pr-review.ts` (CoordinatorDeps, ReviewFindings, readVerdictArtifact, call sites)
59
+ - `workflows/mr-review-workflow.agentic.v2.json` (phase-6 outputContract + prompt)
60
+ - `tests/unit/coordinator-pr-review.test.ts` (new tests + updated fakes)
61
+
62
+ Must remain consistent:
63
+ - `ConsoleNodeDetail.artifacts` -- no change needed, already returns artifacts
64
+ - `projectArtifactsV2()` -- no change needed, already projects artifacts
65
+ - `delivery-action.ts` -- reads `lastStepNotes`, not artifacts; no change needed
66
+ - `makeSpawnAgentTool()` -- returns `{ notes: string }` only; `lastStepArtifacts` gap acknowledged, post-MVP
67
+
68
+ ---
69
+
70
+ ## Candidates
71
+
72
+ ### Candidate A: Exact task spec implementation (RECOMMENDED)
73
+
74
+ **Summary:** Implement all three changes exactly as specified: fix `onComplete` to forward `params.artifacts`, add `wr.review_verdict` schema with `.strict()`, update `getAgentResult` to aggregate ALL-node artifacts, add `readVerdictArtifact()` pure function with keyword-scan fallback.
75
+
76
+ **Tensions resolved:**
77
+ - T1: TypeScript compile-time catch ensures completeness
78
+ - T3: `required: false` + keyword-scan fallback avoids session blocking
79
+
80
+ **Tensions accepted:**
81
+ - T2: N+1 calls (accepted -- localhost, negligible latency)
82
+ - T4: `.strict()` over `.strip()` (follows existing precedent)
83
+
84
+ **Boundary:** `CoordinatorDeps.getAgentResult` return type change. Best-fit because it is already the established abstraction boundary for coordinator-to-session I/O. All consumers must acknowledge the change at this single point.
85
+
86
+ **Failure mode:** Missing the `makeContinueWorkflowTool` `onComplete` call site (line 1046) when updating `makeCompleteStepTool` (line 1249). Both tools call `onComplete` but are in separate functions. TypeScript will not catch this if `artifacts?` is optional in the signature -- the closure will be called with `undefined` for `artifacts` from `continue_workflow`, and `lastStepArtifacts` will be silently empty.
87
+
88
+ **Repo-pattern relationship:** Follows `loop-control.ts` schema pattern exactly. Follows `WorkflowRunSuccess.lastStepNotes` conditional spread pattern. Follows `makeFakeDeps()` fake deps testing pattern. No new patterns introduced.
89
+
90
+ **Gains:**
91
+ - Coordinator reads typed data for sessions that emit the artifact
92
+ - Additive: all existing sessions continue to work via fallback
93
+ - Zero new infrastructure: 7 file changes + 1 new file
94
+ - Artifact visible in console (`hasArtifacts: true` on phase-6 node)
95
+ - Observability: `source: 'artifact'|'keyword_scan'` + logging enables emission rate tracking
96
+
97
+ **Losses:**
98
+ - N+1 HTTP calls per session for artifact aggregation
99
+ - Two coordinator code paths until graduation
100
+
101
+ **Scope:** Best-fit. Minimal delta, highest backward compatibility, clear graduation path.
102
+
103
+ **Philosophy:** Honors validate-at-boundaries, functional/declarative, prefer-fakes, exhaustiveness (closed enum `source`). Minor tension: `source?` optional field vs. type-safety-first. Temporary conflict with 'make illegal states unrepresentable' (accepted).
104
+
105
+ ---
106
+
107
+ ### Candidate B: Tip-node only (simpler, misses design intent)
108
+
109
+ **Summary:** Only read tip node's artifacts -- matching the existing `preferredTipNodeId` pattern in `getAgentResult` today. Avoids N+1 calls.
110
+
111
+ **Tensions resolved:**
112
+ - T2: 1 HTTP call vs. N+1
113
+
114
+ **Tensions accepted:**
115
+ - Violates task spec 'CRITICAL: must aggregate artifacts across ALL session nodes'
116
+ - If a verdict artifact is on step N-1 and the workflow gains a post-synthesis confirmation step N, coordinator silently gets zero artifacts
117
+
118
+ **Failure mode:** Silent data loss when artifact is on a non-final node. This is the ORANGE-1 constraint from the design doc.
119
+
120
+ **Scope:** Too narrow -- explicitly contradicts task requirement.
121
+
122
+ **Why rejected:** The task spec uses 'CRITICAL' emphasis for ALL-node aggregation. Disqualified.
123
+
124
+ ---
125
+
126
+ ## Comparison and Recommendation
127
+
128
+ **Recommendation: Candidate A.** No contest -- Candidate B is disqualified by the task spec.
129
+
130
+ | Criterion | A | B |
131
+ |-----------|---|---|
132
+ | ALL-node aggregation (task spec) | Correct | WRONG |
133
+ | N+1 calls | Accepted | Avoided |
134
+ | Backward compat | Full | Same |
135
+ | Schema precedent | Follows exactly | N/A |
136
+ | Philosophy fit | Best | N/A |
137
+
138
+ ---
139
+
140
+ ## Self-Critique
141
+
142
+ **Strongest counter-argument:** N+1 calls add latency. For a 6-step session, that's 6 additional HTTP calls. Acceptable on localhost (~50-100ms) but could be optimized with a `/api/v2/sessions/:id/artifacts` aggregation endpoint (Candidate C from the design doc). Evidence required: a second coordinator that needs this, or performance data showing N+1 calls are a problem.
143
+
144
+ **Narrower option that almost works:** Tip-node only. Loses for the explicit task-spec reason.
145
+
146
+ **Broader option:** Add `/api/v2/sessions/:id/artifacts` server-side endpoint. Right long-term direction, premature now.
147
+
148
+ **Assumption that would invalidate:** If `runs[0].nodes` in the session detail response returns objects without `nodeId` fields. Confirmed from `ConsoleDagNode` type that `nodeId: string` is always present.
149
+
150
+ ---
151
+
152
+ ## Open Questions for the Main Agent
153
+
154
+ 1. Should `source?` be optional or required on `ReviewFindings`? Optional breaks fewer existing tests but weakens the type. The 4 existing `ReviewFindings` literals in tests would need `source` added if required.
155
+ 2. Should `readVerdictArtifact()` log a divergence warning when both artifact severity and keyword-scan severity are available but disagree? The design doc recommends this (ORANGE finding). Adds ~10 LOC but improves observability.
@@ -0,0 +1,103 @@
1
+ # Design Review Findings: Coordinator Artifact Protocol
2
+
3
+ **Status:** Review complete
4
+ **Date:** 2026-04-18
5
+ **Design reviewed:** Candidate A from coordinator-artifact-protocol-design-candidates.md
6
+
7
+ ---
8
+
9
+ ## Tradeoff Review
10
+
11
+ | Tradeoff | Acceptable? | When it stops being acceptable |
12
+ |----------|-------------|-------------------------------|
13
+ | N+1 HTTP calls for all-node aggregation | Yes (localhost, ~50-100ms) | If coordinator is called for sessions with 50+ nodes |
14
+ | `source?` optional on `ReviewFindings` | Yes (observability only, not routing) | If future code switches exhaustively on `source` |
15
+ | `.strict()` schema | Yes (follows existing precedent) | If LLM consistently emits extra fields causing Zod failures |
16
+ | `required: false` in outputContract | Yes (transition strategy) | Once 10+ consecutive sessions confirm 100% artifact emission |
17
+
18
+ ---
19
+
20
+ ## Failure Mode Review
21
+
22
+ | Failure Mode | Severity | Handling | Missing Mitigation |
23
+ |-------------|----------|----------|--------------------|
24
+ | Missing `makeContinueWorkflowTool` onComplete update | LOW | TypeScript won't catch (optional param) -- manual verification required | Code comment at both call sites |
25
+ | Per-node HTTP fetch failure during aggregation | LOW | Graceful fallback to keyword scan | Per-node try/catch + WARN logging |
26
+ | Agent emits malformed artifact (wrong enum, missing field) | MEDIUM | `safeParse` fails silently without logging | `[WARN coord:reason=artifact_parse_failed]` logging REQUIRED |
27
+ | `runs[0].nodes` undefined for empty sessions | NONE | Null check + empty-array fallback | None |
28
+ | `required: false` default behavior | NONE | Engine correctly reads `required: false` and skips validation | None |
29
+
30
+ ---
31
+
32
+ ## Runner-Up / Simpler Alternative Review
33
+
34
+ **Runner-up (tip-node only):** Disqualified by task spec 'CRITICAL: must aggregate artifacts across ALL session nodes'. No elements worth incorporating.
35
+
36
+ **Simpler variant (skip `lastStepArtifacts`):** The pr-review coordinator reads via HTTP, not via `WorkflowRunSuccess`. Skipping would satisfy the coordinator use case. Rejected because the task spec explicitly requires it, and it's the foundation for `spawn_agent` artifact surfacing (post-MVP).
37
+
38
+ **Simpler variant (skip `onComplete` change):** Would leave `WorkflowRunSuccess.lastStepArtifacts` always undefined. Rejected -- inconsistent state.
39
+
40
+ ---
41
+
42
+ ## Philosophy Alignment
43
+
44
+ **Satisfied:** validate-at-boundaries, errors-as-data, functional/declarative, prefer-fakes, exhaustiveness (closed enums), immutability.
45
+
46
+ **Under tension (accepted):**
47
+ - `source?` optional vs. type-safety-first: minor, observability-only field
48
+ - `required: false` vs. make-illegal-states-unrepresentable: time-boxed transition strategy
49
+
50
+ ---
51
+
52
+ ## Findings
53
+
54
+ ### RED (must fix before shipping)
55
+
56
+ **R1: `readVerdictArtifact()` must log on malformed artifact**
57
+ If the agent emits an artifact with `kind: 'wr.review_verdict'` but wrong schema, `safeParse` fails silently. Without logging, FM3 (malformed artifact) is invisible and prevents monitoring of the artifact emission rate.
58
+
59
+ Required: `process.stderr.write('[WARN coord:reason=artifact_parse_failed ...]')` when `safeParse` fails AND the artifact has `kind === 'wr.review_verdict'`.
60
+
61
+ **R2: Per-node fetch errors must be caught individually**
62
+ The current outer `try/catch` in `getAgentResult` covers the entire function. The new implementation walks multiple nodes -- if one node fetch throws, the outer catch aborts the entire aggregation. Each per-node fetch must be wrapped individually so one failure doesn't discard all other nodes' artifacts.
63
+
64
+ ---
65
+
66
+ ### ORANGE (fix before C1 -> C2 graduation)
67
+
68
+ **O1: Log when keyword scan fires on a session that had artifacts**
69
+ The coordinator cannot distinguish 'artifact never emitted' from 'artifact emitted but invalid' without checking. Add a log entry when `readVerdictArtifact` returns null but `artifacts.length > 0`. This enables the graduation metric (10+ sessions with 0 fallback warnings).
70
+
71
+ Required log: `[INFO coord:source=keyword_scan reason=no_valid_artifact artifactCount=N]`
72
+
73
+ **O2: Divergence detection warning**
74
+ If both artifact severity (from `readVerdictArtifact`) and keyword-scan severity (from `parseFindingsFromNotes`) are available and disagree, log at WARN. Design doc recommends this (ORANGE finding). Protects against semantic inconsistency between notes and artifact.
75
+
76
+ ---
77
+
78
+ ### YELLOW (future consideration)
79
+
80
+ **Y1: `source?` optional on `ReviewFindings`**
81
+ Making `source` required would improve type safety. Currently deferred to avoid breaking 4 existing test literals. When those tests are updated for other reasons, upgrade `source` to required.
82
+
83
+ **Y2: Post-graduation: remove keyword scan fallback**
84
+ Once the graduation criterion is met, `parseFindingsFromNotes` callers can be removed from the coordinator routing logic. The `unknown` severity variant can also be removed from `ReviewSeverity`.
85
+
86
+ ---
87
+
88
+ ## Recommended Revisions
89
+
90
+ 1. **R1:** In `readVerdictArtifact()`, check if `raw` object has `kind === 'wr.review_verdict'` before `safeParse`. If kind matches but safeParse fails, log WARN.
91
+ 2. **R2:** In `getAgentResult()` implementation, wrap each per-node HTTP fetch in its own try/catch. Failed nodes are skipped with a WARN log; successful nodes contribute their artifacts.
92
+ 3. **O1:** After the artifact/keyword-scan decision in the coordinator, log `source` with the artifact count context.
93
+ 4. **O2:** Add divergence check: run keyword scan on `recapMarkdown` when an artifact is found; if severities disagree, log WARN.
94
+
95
+ ---
96
+
97
+ ## Residual Concerns
98
+
99
+ 1. **`continue_workflow` onComplete call site:** `makeContinueWorkflowTool` is marked DEPRECATED for daemon sessions, but it still calls `onComplete`. The new `artifacts?` parameter must be passed from `params.artifacts` at line 1046. Must be verified manually -- TypeScript won't catch a missing optional parameter.
100
+
101
+ 2. **`.strict()` vs. LLM reliability:** If the LLM adds extra fields (e.g., `rationale`, `notes`) to the artifact, `.strict()` causes Zod failure. With `required: false`, this just triggers the keyword-scan fallback. Acceptable during transition. If the failure rate is high in production, consider switching to `.strip()`.
102
+
103
+ 3. **Convention only:** `V1` suffix on `ReviewVerdictArtifactV1Schema` is a convention, not enforced. No migration path exists for schema changes. Future schema evolution must use a new type (`ReviewVerdictArtifactV2Schema`) in parallel until old sessions are retired.