@exaudeus/workrail 3.38.0 → 3.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-worktrain.js +231 -0
- package/dist/console-ui/assets/{index-BtOJj6Xy.js → index-CXWCAonr.js} +1 -1
- package/dist/console-ui/index.html +1 -1
- package/dist/coordinators/pr-review.d.ts +62 -0
- package/dist/coordinators/pr-review.js +575 -0
- package/dist/daemon/workflow-runner.d.ts +3 -2
- package/dist/daemon/workflow-runner.js +6 -3
- package/dist/manifest.json +58 -34
- package/dist/mcp/output-schemas.d.ts +10 -10
- package/dist/mcp/tools.d.ts +12 -12
- package/dist/trigger/trigger-router.js +9 -2
- package/dist/types/workflow-source.d.ts +0 -1
- package/dist/types/workflow-source.js +3 -6
- package/dist/types/workflow.d.ts +1 -1
- package/dist/types/workflow.js +1 -2
- package/dist/v2/durable-core/domain/artifact-contract-validator.js +66 -0
- package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.d.ts +25 -0
- package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.js +31 -0
- package/dist/v2/durable-core/schemas/artifacts/index.d.ts +3 -1
- package/dist/v2/durable-core/schemas/artifacts/index.js +14 -1
- package/dist/v2/durable-core/schemas/artifacts/review-verdict.d.ts +41 -0
- package/dist/v2/durable-core/schemas/artifacts/review-verdict.js +30 -0
- package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +236 -236
- package/dist/v2/durable-core/schemas/session/events.d.ts +50 -50
- package/dist/v2/durable-core/schemas/session/gaps.d.ts +2 -2
- package/dist/v2/durable-core/schemas/session/manifest.d.ts +4 -4
- package/dist/v2/durable-core/schemas/session/outputs.d.ts +8 -8
- package/dist/v2/usecases/console-routes.js +178 -0
- package/docs/design/coordinator-artifact-protocol-design-candidates.md +155 -0
- package/docs/design/coordinator-artifact-protocol-design-review.md +103 -0
- package/docs/design/coordinator-artifact-protocol-implementation-plan.md +259 -0
- package/docs/discovery/coordinator-design-review.md +73 -0
- package/docs/discovery/coordinator-script-design.md +96 -679
- package/docs/discovery/hypothesis-challenge-report.md +44 -0
- package/docs/discovery/simulation-report.md +85 -0
- package/docs/ideas/backlog.md +158 -100
- package/package.json +1 -1
- package/workflows/mr-review-workflow.agentic.v2.json +5 -1
|
@@ -90,8 +90,6 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
|
|
|
90
90
|
content?: unknown;
|
|
91
91
|
}>]>;
|
|
92
92
|
}, "strip", z.ZodTypeAny, {
|
|
93
|
-
outputId: string;
|
|
94
|
-
outputChannel: "recap" | "artifact";
|
|
95
93
|
payload: {
|
|
96
94
|
payloadKind: "notes";
|
|
97
95
|
notesMarkdown: string;
|
|
@@ -102,10 +100,10 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
|
|
|
102
100
|
byteLength: number;
|
|
103
101
|
content?: unknown;
|
|
104
102
|
};
|
|
105
|
-
supersedesOutputId?: string | undefined;
|
|
106
|
-
}, {
|
|
107
103
|
outputId: string;
|
|
108
104
|
outputChannel: "recap" | "artifact";
|
|
105
|
+
supersedesOutputId?: string | undefined;
|
|
106
|
+
}, {
|
|
109
107
|
payload: {
|
|
110
108
|
payloadKind: "notes";
|
|
111
109
|
notesMarkdown: string;
|
|
@@ -116,10 +114,10 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
|
|
|
116
114
|
byteLength: number;
|
|
117
115
|
content?: unknown;
|
|
118
116
|
};
|
|
119
|
-
supersedesOutputId?: string | undefined;
|
|
120
|
-
}>, {
|
|
121
117
|
outputId: string;
|
|
122
118
|
outputChannel: "recap" | "artifact";
|
|
119
|
+
supersedesOutputId?: string | undefined;
|
|
120
|
+
}>, {
|
|
123
121
|
payload: {
|
|
124
122
|
payloadKind: "notes";
|
|
125
123
|
notesMarkdown: string;
|
|
@@ -130,10 +128,10 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
|
|
|
130
128
|
byteLength: number;
|
|
131
129
|
content?: unknown;
|
|
132
130
|
};
|
|
133
|
-
supersedesOutputId?: string | undefined;
|
|
134
|
-
}, {
|
|
135
131
|
outputId: string;
|
|
136
132
|
outputChannel: "recap" | "artifact";
|
|
133
|
+
supersedesOutputId?: string | undefined;
|
|
134
|
+
}, {
|
|
137
135
|
payload: {
|
|
138
136
|
payloadKind: "notes";
|
|
139
137
|
notesMarkdown: string;
|
|
@@ -144,5 +142,7 @@ export declare const NodeOutputAppendedDataV1Schema: z.ZodEffects<z.ZodObject<{
|
|
|
144
142
|
byteLength: number;
|
|
145
143
|
content?: unknown;
|
|
146
144
|
};
|
|
145
|
+
outputId: string;
|
|
146
|
+
outputChannel: "recap" | "artifact";
|
|
147
147
|
supersedesOutputId?: string | undefined;
|
|
148
148
|
}>;
|
|
@@ -40,6 +40,7 @@ exports.mountConsoleRoutes = mountConsoleRoutes;
|
|
|
40
40
|
const express_1 = __importDefault(require("express"));
|
|
41
41
|
const path_1 = __importDefault(require("path"));
|
|
42
42
|
const fs_1 = __importDefault(require("fs"));
|
|
43
|
+
const os_1 = __importDefault(require("os"));
|
|
43
44
|
const worktree_service_js_1 = require("./worktree-service.js");
|
|
44
45
|
const workflow_js_1 = require("../../types/workflow.js");
|
|
45
46
|
const dev_mode_js_1 = require("../../mcp/dev-mode.js");
|
|
@@ -135,6 +136,183 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
|
|
|
135
136
|
req.on('close', () => { sseClients.delete(res); });
|
|
136
137
|
res.on('close', () => { sseClients.delete(res); });
|
|
137
138
|
});
|
|
139
|
+
const daemonEventsDir = path_1.default.join(process.env['HOME'] ?? os_1.default.homedir(), '.workrail', 'events', 'daemon');
|
|
140
|
+
async function tailDaemonEvents(filePath, prevSize) {
|
|
141
|
+
try {
|
|
142
|
+
const stat = await fs_1.default.promises.stat(filePath);
|
|
143
|
+
if (stat.size <= prevSize)
|
|
144
|
+
return [];
|
|
145
|
+
const fd = await fs_1.default.promises.open(filePath, 'r');
|
|
146
|
+
const length = stat.size - prevSize;
|
|
147
|
+
const buf = Buffer.alloc(length);
|
|
148
|
+
try {
|
|
149
|
+
await fd.read(buf, 0, length, prevSize);
|
|
150
|
+
}
|
|
151
|
+
finally {
|
|
152
|
+
await fd.close();
|
|
153
|
+
}
|
|
154
|
+
const chunk = buf.toString('utf8');
|
|
155
|
+
return chunk
|
|
156
|
+
.split('\n')
|
|
157
|
+
.filter(Boolean)
|
|
158
|
+
.flatMap((line) => {
|
|
159
|
+
try {
|
|
160
|
+
return [JSON.parse(line)];
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
return [];
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
return [];
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
const SESSION_SSE_EVENT_KINDS = new Set([
|
|
172
|
+
'tool_called',
|
|
173
|
+
'tool_call_started',
|
|
174
|
+
'tool_call_completed',
|
|
175
|
+
'tool_call_failed',
|
|
176
|
+
'tool_error',
|
|
177
|
+
'step_advanced',
|
|
178
|
+
'session_completed',
|
|
179
|
+
'issue_reported',
|
|
180
|
+
'agent_stuck',
|
|
181
|
+
'llm_turn_started',
|
|
182
|
+
'llm_turn_completed',
|
|
183
|
+
'signal_emitted',
|
|
184
|
+
]);
|
|
185
|
+
app.get('/api/v2/sessions/:sessionId/events', async (req, res) => {
|
|
186
|
+
const { sessionId } = req.params;
|
|
187
|
+
const sessionResult = await consoleService.getSessionDetail(sessionId);
|
|
188
|
+
if (sessionResult.isErr()) {
|
|
189
|
+
const status = sessionResult.error.code === 'SESSION_LOAD_FAILED' ? 404 : 500;
|
|
190
|
+
res.status(status).json({ success: false, error: sessionResult.error.message });
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
const sessionDetail = sessionResult.value;
|
|
194
|
+
if (!sessionDetail || !sessionDetail.runs || sessionDetail.runs.length === 0) {
|
|
195
|
+
res.status(404).json({ success: false, error: `Session not found: ${sessionId}` });
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
199
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
200
|
+
res.setHeader('Connection', 'keep-alive');
|
|
201
|
+
res.setHeader('X-Accel-Buffering', 'no');
|
|
202
|
+
res.flushHeaders();
|
|
203
|
+
res.write(`data: ${JSON.stringify({ kind: 'connected', sessionId })}\n\n`);
|
|
204
|
+
let currentLogDate = new Date().toISOString().slice(0, 10);
|
|
205
|
+
let currentLogPath = path_1.default.join(daemonEventsDir, `${currentLogDate}.jsonl`);
|
|
206
|
+
let fileOffset = 0;
|
|
207
|
+
try {
|
|
208
|
+
const stat = await fs_1.default.promises.stat(currentLogPath);
|
|
209
|
+
fileOffset = stat.size;
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
}
|
|
213
|
+
let isClosed = false;
|
|
214
|
+
let isProcessing = false;
|
|
215
|
+
let watcher = null;
|
|
216
|
+
const cleanup = () => {
|
|
217
|
+
if (isClosed)
|
|
218
|
+
return;
|
|
219
|
+
isClosed = true;
|
|
220
|
+
try {
|
|
221
|
+
watcher?.close();
|
|
222
|
+
}
|
|
223
|
+
catch { }
|
|
224
|
+
try {
|
|
225
|
+
if (!res.writableEnded)
|
|
226
|
+
res.end();
|
|
227
|
+
}
|
|
228
|
+
catch { }
|
|
229
|
+
};
|
|
230
|
+
const processNewEvents = async () => {
|
|
231
|
+
if (isClosed || isProcessing)
|
|
232
|
+
return;
|
|
233
|
+
isProcessing = true;
|
|
234
|
+
const todayDate = new Date().toISOString().slice(0, 10);
|
|
235
|
+
if (todayDate !== currentLogDate) {
|
|
236
|
+
currentLogDate = todayDate;
|
|
237
|
+
currentLogPath = path_1.default.join(daemonEventsDir, `${currentLogDate}.jsonl`);
|
|
238
|
+
fileOffset = 0;
|
|
239
|
+
}
|
|
240
|
+
const newEvents = await tailDaemonEvents(currentLogPath, fileOffset);
|
|
241
|
+
for (const event of newEvents) {
|
|
242
|
+
if (isClosed)
|
|
243
|
+
break;
|
|
244
|
+
const kind = typeof event['kind'] === 'string' ? event['kind'] : null;
|
|
245
|
+
const evtSessionId = typeof event['workrailSessionId'] === 'string'
|
|
246
|
+
? event['workrailSessionId']
|
|
247
|
+
: null;
|
|
248
|
+
if (!kind || !SESSION_SSE_EVENT_KINDS.has(kind))
|
|
249
|
+
continue;
|
|
250
|
+
if (evtSessionId !== sessionId)
|
|
251
|
+
continue;
|
|
252
|
+
try {
|
|
253
|
+
res.write(`data: ${JSON.stringify(event)}\n\n`);
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
cleanup();
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
if (kind === 'session_completed') {
|
|
260
|
+
cleanup();
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
try {
|
|
265
|
+
const stat = await fs_1.default.promises.stat(currentLogPath);
|
|
266
|
+
fileOffset = stat.size;
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
fileOffset = 0;
|
|
270
|
+
}
|
|
271
|
+
isProcessing = false;
|
|
272
|
+
};
|
|
273
|
+
try {
|
|
274
|
+
fs_1.default.mkdirSync(daemonEventsDir, { recursive: true });
|
|
275
|
+
}
|
|
276
|
+
catch { }
|
|
277
|
+
try {
|
|
278
|
+
watcher = fs_1.default.watch(daemonEventsDir, { recursive: false }, (_eventType, filename) => {
|
|
279
|
+
if (filename !== null && filename.endsWith('.jsonl')) {
|
|
280
|
+
void processNewEvents();
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
watcher.on('error', cleanup);
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
}
|
|
287
|
+
const keepaliveInterval = setInterval(() => {
|
|
288
|
+
if (isClosed) {
|
|
289
|
+
clearInterval(keepaliveInterval);
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
try {
|
|
293
|
+
res.write(': keepalive\n\n');
|
|
294
|
+
}
|
|
295
|
+
catch {
|
|
296
|
+
clearInterval(keepaliveInterval);
|
|
297
|
+
cleanup();
|
|
298
|
+
}
|
|
299
|
+
}, 30000);
|
|
300
|
+
const maxConnectionTimeout = setTimeout(() => {
|
|
301
|
+
clearInterval(keepaliveInterval);
|
|
302
|
+
cleanup();
|
|
303
|
+
}, 4 * 60 * 60 * 1000);
|
|
304
|
+
req.on('close', () => {
|
|
305
|
+
clearInterval(keepaliveInterval);
|
|
306
|
+
clearTimeout(maxConnectionTimeout);
|
|
307
|
+
cleanup();
|
|
308
|
+
});
|
|
309
|
+
res.on('close', () => {
|
|
310
|
+
clearInterval(keepaliveInterval);
|
|
311
|
+
clearTimeout(maxConnectionTimeout);
|
|
312
|
+
cleanup();
|
|
313
|
+
});
|
|
314
|
+
void processNewEvents();
|
|
315
|
+
});
|
|
138
316
|
const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
|
|
139
317
|
const PERF_FILE_READ_LIMIT_BYTES = 5 * 1024 * 1024;
|
|
140
318
|
async function readDiskEntries(perfFile) {
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# Design Candidates: Coordinator Artifact Protocol
|
|
2
|
+
|
|
3
|
+
**Status:** Candidate analysis complete
|
|
4
|
+
**Date:** 2026-04-18
|
|
5
|
+
**Task:** Implement wr.review_verdict schema, fix onComplete callback, update mr-review workflow to emit it, update coordinator to read artifacts before keyword-scanning
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Problem Understanding
|
|
10
|
+
|
|
11
|
+
### Core Tensions
|
|
12
|
+
|
|
13
|
+
**T1: Breaking interface vs. backward compatibility**
|
|
14
|
+
`CoordinatorDeps.getAgentResult` returns `Promise<string | null>` today. Changing it to `Promise<{ recapMarkdown: string | null; artifacts: readonly unknown[] }>` is a compile-time breaking change. All call sites (2 in coordinator, 2 in test fakes, 1 real implementation) must change simultaneously. TypeScript catches this at build, so risk is low -- but the change must be complete.
|
|
15
|
+
|
|
16
|
+
**T2: N+1 HTTP calls vs. tip-node-only simplicity**
|
|
17
|
+
ALL-node aggregation requires walking `runs[0].nodes` and fetching each node's detail individually. For a 6-phase workflow, that's 6 HTTP calls to localhost per session. The simple approach (tip node only) would miss a verdict artifact from any non-final step.
|
|
18
|
+
|
|
19
|
+
**T3: `required: false` vs. engine enforcement**
|
|
20
|
+
`outputContract` with `required: false` means the engine won't block if the artifact is absent. This is the correct transition strategy but means the coordinator must maintain two code paths (artifact + keyword-scan fallback) until the graduation criterion (10+ consecutive sessions with 0 fallback warnings) is met.
|
|
21
|
+
|
|
22
|
+
**T4: Schema strictness vs. forward compatibility**
|
|
23
|
+
`.strict()` rejects unknown fields (forward-incompatible). `.strip()` strips them silently (forward-compatible). The task spec says `.strict()`, which matches the `loop-control.ts` precedent. The design doc recommends `.strip()` for forward-compat. **Task spec wins** -- use `.strict()` to be consistent with existing schema patterns.
|
|
24
|
+
|
|
25
|
+
### Likely Seam
|
|
26
|
+
|
|
27
|
+
`CoordinatorDeps.getAgentResult` is the real boundary. It is already the I/O abstraction layer where the coordinator interacts with sessions. Changing the return type here forces all consumers to acknowledge the new shape without touching coordinator routing logic.
|
|
28
|
+
|
|
29
|
+
### What Makes This Hard
|
|
30
|
+
|
|
31
|
+
1. **Three separate `onComplete` sites:** `makeCompleteStepTool` (line 1249), `makeContinueWorkflowTool` (line 1046), and the closure definition (line 2096). TypeScript will catch signature mismatches on the closure but not at the two call sites if the closure's new parameter is optional.
|
|
32
|
+
2. **Exhaustiveness in the switch:** `artifact-contract-validator.ts` switch currently handles only `LOOP_CONTROL_CONTRACT_REF`. Adding `'wr.contracts.review_verdict'` to `ARTIFACT_CONTRACT_REFS` without adding a switch case causes `validateArtifactContract()` to hit the default `UNKNOWN_CONTRACT_REF` error for any step declaring this contract.
|
|
33
|
+
3. **`source?` field on ReviewFindings:** Adding `source` as required breaks 4 existing test literals. Making it optional (`source?`) is a minor type weakness but preserves backward compat.
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Philosophy Constraints
|
|
38
|
+
|
|
39
|
+
From CLAUDE.md:
|
|
40
|
+
- **Make illegal states unrepresentable:** `verdict: 'clean'|'minor'|'blocking'` not `string`. `source: 'artifact'|'keyword_scan'` not `string`.
|
|
41
|
+
- **Validate at boundaries:** Zod parse at coordinator read time + engine validation at advance time.
|
|
42
|
+
- **Errors are data:** `readVerdictArtifact()` returns `ReviewFindings | null`, not throws.
|
|
43
|
+
- **Functional/declarative:** `readVerdictArtifact()` is a pure function, composable with `parseFindingsFromNotes()`.
|
|
44
|
+
- **Prefer fakes over mocks:** The `makeFakeDeps()` pattern in tests is the established style.
|
|
45
|
+
|
|
46
|
+
**Conflict:** `required: false` during transition temporarily violates 'make illegal states unrepresentable' at the coordinator level. Accepted per design doc -- the fallback is explicit and time-boxed.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Impact Surface
|
|
51
|
+
|
|
52
|
+
Files that must change:
|
|
53
|
+
- `src/v2/durable-core/schemas/artifacts/review-verdict.ts` (new)
|
|
54
|
+
- `src/v2/durable-core/schemas/artifacts/index.ts` (ARTIFACT_CONTRACT_REFS)
|
|
55
|
+
- `src/v2/durable-core/domain/artifact-contract-validator.ts` (switch case)
|
|
56
|
+
- `src/daemon/workflow-runner.ts` (onComplete signature, WorkflowRunSuccess, final return)
|
|
57
|
+
- `src/cli-worktrain.ts` (getAgentResult implementation + return type)
|
|
58
|
+
- `src/coordinators/pr-review.ts` (CoordinatorDeps, ReviewFindings, readVerdictArtifact, call sites)
|
|
59
|
+
- `workflows/mr-review-workflow.agentic.v2.json` (phase-6 outputContract + prompt)
|
|
60
|
+
- `tests/unit/coordinator-pr-review.test.ts` (new tests + updated fakes)
|
|
61
|
+
|
|
62
|
+
Must remain consistent:
|
|
63
|
+
- `ConsoleNodeDetail.artifacts` -- no change needed, already returns artifacts
|
|
64
|
+
- `projectArtifactsV2()` -- no change needed, already projects artifacts
|
|
65
|
+
- `delivery-action.ts` -- reads `lastStepNotes`, not artifacts; no change needed
|
|
66
|
+
- `makeSpawnAgentTool()` -- returns `{ notes: string }` only; `lastStepArtifacts` gap acknowledged, post-MVP
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Candidates
|
|
71
|
+
|
|
72
|
+
### Candidate A: Exact task spec implementation (RECOMMENDED)
|
|
73
|
+
|
|
74
|
+
**Summary:** Implement all three changes exactly as specified: fix `onComplete` to forward `params.artifacts`, add `wr.review_verdict` schema with `.strict()`, update `getAgentResult` to aggregate ALL-node artifacts, add `readVerdictArtifact()` pure function with keyword-scan fallback.
|
|
75
|
+
|
|
76
|
+
**Tensions resolved:**
|
|
77
|
+
- T1: TypeScript compile-time catch ensures completeness
|
|
78
|
+
- T3: `required: false` + keyword-scan fallback avoids session blocking
|
|
79
|
+
|
|
80
|
+
**Tensions accepted:**
|
|
81
|
+
- T2: N+1 calls (accepted -- localhost, negligible latency)
|
|
82
|
+
- T4: `.strict()` over `.strip()` (follows existing precedent)
|
|
83
|
+
|
|
84
|
+
**Boundary:** `CoordinatorDeps.getAgentResult` return type change. Best-fit because it is already the established abstraction boundary for coordinator-to-session I/O. All consumers must acknowledge the change at this single point.
|
|
85
|
+
|
|
86
|
+
**Failure mode:** Missing the `makeContinueWorkflowTool` `onComplete` call site (line 1046) when updating `makeCompleteStepTool` (line 1249). Both tools call `onComplete` but are in separate functions. TypeScript will not catch this if `artifacts?` is optional in the signature -- the closure will be called with `undefined` for `artifacts` from `continue_workflow`, and `lastStepArtifacts` will be silently empty.
|
|
87
|
+
|
|
88
|
+
**Repo-pattern relationship:** Follows `loop-control.ts` schema pattern exactly. Follows `WorkflowRunSuccess.lastStepNotes` conditional spread pattern. Follows `makeFakeDeps()` fake deps testing pattern. No new patterns introduced.
|
|
89
|
+
|
|
90
|
+
**Gains:**
|
|
91
|
+
- Coordinator reads typed data for sessions that emit the artifact
|
|
92
|
+
- Additive: all existing sessions continue to work via fallback
|
|
93
|
+
- Zero new infrastructure: 7 file changes + 1 new file
|
|
94
|
+
- Artifact visible in console (`hasArtifacts: true` on phase-6 node)
|
|
95
|
+
- Observability: `source: 'artifact'|'keyword_scan'` + logging enables emission rate tracking
|
|
96
|
+
|
|
97
|
+
**Losses:**
|
|
98
|
+
- N+1 HTTP calls per session for artifact aggregation
|
|
99
|
+
- Two coordinator code paths until graduation
|
|
100
|
+
|
|
101
|
+
**Scope:** Best-fit. Minimal delta, highest backward compatibility, clear graduation path.
|
|
102
|
+
|
|
103
|
+
**Philosophy:** Honors validate-at-boundaries, functional/declarative, prefer-fakes, exhaustiveness (closed enum `source`). Minor tension: `source?` optional field vs. type-safety-first. Temporary conflict with 'make illegal states unrepresentable' (accepted).
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
### Candidate B: Tip-node only (simpler, misses design intent)
|
|
108
|
+
|
|
109
|
+
**Summary:** Only read tip node's artifacts -- matching the existing `preferredTipNodeId` pattern in `getAgentResult` today. Avoids N+1 calls.
|
|
110
|
+
|
|
111
|
+
**Tensions resolved:**
|
|
112
|
+
- T2: 1 HTTP call vs. N+1
|
|
113
|
+
|
|
114
|
+
**Tensions accepted:**
|
|
115
|
+
- Violates task spec 'CRITICAL: must aggregate artifacts across ALL session nodes'
|
|
116
|
+
- If a verdict artifact is on step N-1 and the workflow gains a post-synthesis confirmation step N, coordinator silently gets zero artifacts
|
|
117
|
+
|
|
118
|
+
**Failure mode:** Silent data loss when artifact is on a non-final node. This is the ORANGE-1 constraint from the design doc.
|
|
119
|
+
|
|
120
|
+
**Scope:** Too narrow -- explicitly contradicts task requirement.
|
|
121
|
+
|
|
122
|
+
**Why rejected:** The task spec uses 'CRITICAL' emphasis for ALL-node aggregation. Disqualified.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Comparison and Recommendation
|
|
127
|
+
|
|
128
|
+
**Recommendation: Candidate A.** No contest -- Candidate B is disqualified by the task spec.
|
|
129
|
+
|
|
130
|
+
| Criterion | A | B |
|
|
131
|
+
|-----------|---|---|
|
|
132
|
+
| ALL-node aggregation (task spec) | Correct | WRONG |
|
|
133
|
+
| N+1 calls | Accepted | Avoided |
|
|
134
|
+
| Backward compat | Full | Same |
|
|
135
|
+
| Schema precedent | Follows exactly | N/A |
|
|
136
|
+
| Philosophy fit | Best | N/A |
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Self-Critique
|
|
141
|
+
|
|
142
|
+
**Strongest counter-argument:** N+1 calls add latency. For a 6-step session, that's 6 additional HTTP calls. Acceptable on localhost (~50-100ms) but could be optimized with a `/api/v2/sessions/:id/artifacts` aggregation endpoint (Candidate C from the design doc). Evidence required: a second coordinator that needs this, or performance data showing N+1 calls are a problem.
|
|
143
|
+
|
|
144
|
+
**Narrower option that almost works:** Tip-node only. Loses for the explicit task-spec reason.
|
|
145
|
+
|
|
146
|
+
**Broader option:** Add `/api/v2/sessions/:id/artifacts` server-side endpoint. Right long-term direction, premature now.
|
|
147
|
+
|
|
148
|
+
**Assumption that would invalidate:** If `runs[0].nodes` in the session detail response returns objects without `nodeId` fields. Confirmed from `ConsoleDagNode` type that `nodeId: string` is always present.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Open Questions for the Main Agent
|
|
153
|
+
|
|
154
|
+
1. Should `source?` be optional or required on `ReviewFindings`? Optional breaks fewer existing tests but weakens the type. The 4 existing `ReviewFindings` literals in tests would need `source` added if required.
|
|
155
|
+
2. Should `readVerdictArtifact()` log a divergence warning when both artifact severity and keyword-scan severity are available but disagree? The design doc recommends this (ORANGE finding). Adds ~10 LOC but improves observability.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Design Review Findings: Coordinator Artifact Protocol
|
|
2
|
+
|
|
3
|
+
**Status:** Review complete
|
|
4
|
+
**Date:** 2026-04-18
|
|
5
|
+
**Design reviewed:** Candidate A from coordinator-artifact-protocol-design-candidates.md
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Tradeoff Review
|
|
10
|
+
|
|
11
|
+
| Tradeoff | Acceptable? | When it stops being acceptable |
|
|
12
|
+
|----------|-------------|-------------------------------|
|
|
13
|
+
| N+1 HTTP calls for all-node aggregation | Yes (localhost, ~50-100ms) | If coordinator is called for sessions with 50+ nodes |
|
|
14
|
+
| `source?` optional on `ReviewFindings` | Yes (observability only, not routing) | If future code switches exhaustively on `source` |
|
|
15
|
+
| `.strict()` schema | Yes (follows existing precedent) | If LLM consistently emits extra fields causing Zod failures |
|
|
16
|
+
| `required: false` in outputContract | Yes (transition strategy) | Once 10+ consecutive sessions confirm 100% artifact emission |
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Failure Mode Review
|
|
21
|
+
|
|
22
|
+
| Failure Mode | Severity | Handling | Missing Mitigation |
|
|
23
|
+
|-------------|----------|----------|--------------------|
|
|
24
|
+
| Missing `makeContinueWorkflowTool` onComplete update | LOW | TypeScript won't catch (optional param) -- manual verification required | Code comment at both call sites |
|
|
25
|
+
| Per-node HTTP fetch failure during aggregation | LOW | Graceful fallback to keyword scan | Per-node try/catch + WARN logging |
|
|
26
|
+
| Agent emits malformed artifact (wrong enum, missing field) | MEDIUM | `safeParse` fails silently without logging | `[WARN coord:reason=artifact_parse_failed]` logging REQUIRED |
|
|
27
|
+
| `runs[0].nodes` undefined for empty sessions | NONE | Null check + empty-array fallback | None |
|
|
28
|
+
| `required: false` default behavior | NONE | Engine correctly reads `required: false` and skips validation | None |
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Runner-Up / Simpler Alternative Review
|
|
33
|
+
|
|
34
|
+
**Runner-up (tip-node only):** Disqualified by task spec 'CRITICAL: must aggregate artifacts across ALL session nodes'. No elements worth incorporating.
|
|
35
|
+
|
|
36
|
+
**Simpler variant (skip `lastStepArtifacts`):** The pr-review coordinator reads via HTTP, not via `WorkflowRunSuccess`. Skipping would satisfy the coordinator use case. Rejected because the task spec explicitly requires it, and it's the foundation for `spawn_agent` artifact surfacing (post-MVP).
|
|
37
|
+
|
|
38
|
+
**Simpler variant (skip `onComplete` change):** Would leave `WorkflowRunSuccess.lastStepArtifacts` always undefined. Rejected -- inconsistent state.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Philosophy Alignment
|
|
43
|
+
|
|
44
|
+
**Satisfied:** validate-at-boundaries, errors-as-data, functional/declarative, prefer-fakes, exhaustiveness (closed enums), immutability.
|
|
45
|
+
|
|
46
|
+
**Under tension (accepted):**
|
|
47
|
+
- `source?` optional vs. type-safety-first: minor, observability-only field
|
|
48
|
+
- `required: false` vs. make-illegal-states-unrepresentable: time-boxed transition strategy
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Findings
|
|
53
|
+
|
|
54
|
+
### RED (must fix before shipping)
|
|
55
|
+
|
|
56
|
+
**R1: `readVerdictArtifact()` must log on malformed artifact**
|
|
57
|
+
If the agent emits an artifact with `kind: 'wr.review_verdict'` but wrong schema, `safeParse` fails silently. Without logging, FM3 (malformed artifact) is invisible and prevents monitoring of the artifact emission rate.
|
|
58
|
+
|
|
59
|
+
Required: `process.stderr.write('[WARN coord:reason=artifact_parse_failed ...]')` when `safeParse` fails AND the artifact has `kind === 'wr.review_verdict'`.
|
|
60
|
+
|
|
61
|
+
**R2: Per-node fetch errors must be caught individually**
|
|
62
|
+
The current outer `try/catch` in `getAgentResult` covers the entire function. The new implementation walks multiple nodes -- if one node fetch throws, the outer catch aborts the entire aggregation. Each per-node fetch must be wrapped individually so one failure doesn't discard all other nodes' artifacts.
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
### ORANGE (fix before C1 -> C2 graduation)
|
|
67
|
+
|
|
68
|
+
**O1: Log when keyword scan fires on a session that had artifacts**
|
|
69
|
+
The coordinator cannot distinguish 'artifact never emitted' from 'artifact emitted but invalid' without checking. Add a log entry when `readVerdictArtifact` returns null but `artifacts.length > 0`. This enables the graduation metric (10+ sessions with 0 fallback warnings).
|
|
70
|
+
|
|
71
|
+
Required log: `[INFO coord:source=keyword_scan reason=no_valid_artifact artifactCount=N]`
|
|
72
|
+
|
|
73
|
+
**O2: Divergence detection warning**
|
|
74
|
+
If both artifact severity (from `readVerdictArtifact`) and keyword-scan severity (from `parseFindingsFromNotes`) are available and disagree, log at WARN. Design doc recommends this (ORANGE finding). Protects against semantic inconsistency between notes and artifact.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
### YELLOW (future consideration)
|
|
79
|
+
|
|
80
|
+
**Y1: `source?` optional on `ReviewFindings`**
|
|
81
|
+
Making `source` required would improve type safety. Currently deferred to avoid breaking 4 existing test literals. When those tests are updated for other reasons, upgrade `source` to required.
|
|
82
|
+
|
|
83
|
+
**Y2: Post-graduation: remove keyword scan fallback**
|
|
84
|
+
Once the graduation criterion is met, `parseFindingsFromNotes` callers can be removed from the coordinator routing logic. The `unknown` severity variant can also be removed from `ReviewSeverity`.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Recommended Revisions
|
|
89
|
+
|
|
90
|
+
1. **R1:** In `readVerdictArtifact()`, check if `raw` object has `kind === 'wr.review_verdict'` before `safeParse`. If kind matches but safeParse fails, log WARN.
|
|
91
|
+
2. **R2:** In `getAgentResult()` implementation, wrap each per-node HTTP fetch in its own try/catch. Failed nodes are skipped with a WARN log; successful nodes contribute their artifacts.
|
|
92
|
+
3. **O1:** After the artifact/keyword-scan decision in the coordinator, log `source` with the artifact count context.
|
|
93
|
+
4. **O2:** Add divergence check: run keyword scan on `recapMarkdown` when an artifact is found; if severities disagree, log WARN.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Residual Concerns
|
|
98
|
+
|
|
99
|
+
1. **`continue_workflow` onComplete call site:** `makeContinueWorkflowTool` is marked DEPRECATED for daemon sessions, but it still calls `onComplete`. The new `artifacts?` parameter must be passed from `params.artifacts` at line 1046. Must be verified manually -- TypeScript won't catch a missing optional parameter.
|
|
100
|
+
|
|
101
|
+
2. **`.strict()` vs. LLM reliability:** If the LLM adds extra fields (e.g., `rationale`, `notes`) to the artifact, `.strict()` causes Zod failure. With `required: false`, this just triggers the keyword-scan fallback. Acceptable during transition. If the failure rate is high in production, consider switching to `.strip()`.
|
|
102
|
+
|
|
103
|
+
3. **Convention only:** `V1` suffix on `ReviewVerdictArtifactV1Schema` is a convention, not enforced. No migration path exists for schema changes. Future schema evolution must use a new type (`ReviewVerdictArtifactV2Schema`) in parallel until old sessions are retired.
|