@exaudeus/workrail 3.39.0 → 3.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/cli/commands/init.js +0 -3
  2. package/dist/cli-worktrain.js +58 -26
  3. package/dist/cli.js +0 -18
  4. package/dist/config/app-config.d.ts +0 -16
  5. package/dist/config/app-config.js +0 -14
  6. package/dist/config/config-file.js +0 -3
  7. package/dist/console-ui/assets/index-CQt4UhPB.js +28 -0
  8. package/dist/console-ui/assets/index-DGj8EsFR.css +1 -0
  9. package/dist/console-ui/index.html +2 -2
  10. package/dist/coordinators/pr-review.d.ts +23 -1
  11. package/dist/coordinators/pr-review.js +224 -5
  12. package/dist/daemon/daemon-events.d.ts +9 -1
  13. package/dist/daemon/soul-template.d.ts +2 -2
  14. package/dist/daemon/soul-template.js +11 -1
  15. package/dist/daemon/workflow-runner.d.ts +17 -3
  16. package/dist/daemon/workflow-runner.js +401 -28
  17. package/dist/di/container.js +1 -25
  18. package/dist/di/tokens.d.ts +0 -3
  19. package/dist/di/tokens.js +0 -3
  20. package/dist/engine/engine-factory.js +0 -1
  21. package/dist/infrastructure/console-defaults.d.ts +1 -0
  22. package/dist/infrastructure/console-defaults.js +4 -0
  23. package/dist/infrastructure/session/index.d.ts +0 -1
  24. package/dist/infrastructure/session/index.js +1 -3
  25. package/dist/manifest.json +124 -124
  26. package/dist/mcp/handlers/session.d.ts +1 -0
  27. package/dist/mcp/handlers/session.js +61 -13
  28. package/dist/mcp/output-schemas.d.ts +10 -10
  29. package/dist/mcp/server.js +1 -18
  30. package/dist/mcp/tools.d.ts +12 -12
  31. package/dist/mcp/transports/http-entry.js +0 -2
  32. package/dist/mcp/transports/stdio-entry.js +1 -2
  33. package/dist/mcp/types.d.ts +0 -2
  34. package/dist/trigger/daemon-console.d.ts +2 -0
  35. package/dist/trigger/daemon-console.js +1 -1
  36. package/dist/trigger/trigger-listener.d.ts +2 -0
  37. package/dist/trigger/trigger-listener.js +3 -1
  38. package/dist/trigger/trigger-router.d.ts +4 -3
  39. package/dist/trigger/trigger-router.js +13 -5
  40. package/dist/trigger/trigger-store.js +17 -4
  41. package/dist/types/workflow-source.d.ts +0 -1
  42. package/dist/types/workflow-source.js +3 -6
  43. package/dist/types/workflow.d.ts +1 -1
  44. package/dist/types/workflow.js +1 -2
  45. package/dist/v2/durable-core/domain/artifact-contract-validator.js +66 -0
  46. package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.d.ts +25 -0
  47. package/dist/v2/durable-core/schemas/artifacts/coordinator-signal.js +31 -0
  48. package/dist/v2/durable-core/schemas/artifacts/index.d.ts +3 -1
  49. package/dist/v2/durable-core/schemas/artifacts/index.js +14 -1
  50. package/dist/v2/durable-core/schemas/artifacts/review-verdict.d.ts +41 -0
  51. package/dist/v2/durable-core/schemas/artifacts/review-verdict.js +30 -0
  52. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +236 -236
  53. package/dist/v2/durable-core/schemas/session/events.d.ts +50 -50
  54. package/dist/v2/durable-core/schemas/session/gaps.d.ts +2 -2
  55. package/dist/v2/durable-core/schemas/session/manifest.d.ts +4 -4
  56. package/dist/v2/durable-core/schemas/session/outputs.d.ts +8 -8
  57. package/dist/v2/usecases/console-routes.d.ts +2 -1
  58. package/dist/v2/usecases/console-routes.js +207 -5
  59. package/dist/v2/usecases/console-service.js +14 -0
  60. package/dist/v2/usecases/console-types.d.ts +1 -0
  61. package/docs/authoring.md +16 -16
  62. package/docs/design/coordinator-artifact-protocol-design-candidates.md +155 -0
  63. package/docs/design/coordinator-artifact-protocol-design-review.md +103 -0
  64. package/docs/design/coordinator-artifact-protocol-implementation-plan.md +259 -0
  65. package/docs/design/coordinator-message-queue-drain-plan.md +241 -0
  66. package/docs/design/coordinator-message-queue-drain-review.md +120 -0
  67. package/docs/design/coordinator-message-queue-drain.md +289 -0
  68. package/docs/design/shaping-workflow-external-research.md +119 -0
  69. package/docs/discovery/late-bound-goals-impl-plan.md +147 -0
  70. package/docs/discovery/late-bound-goals-review.md +82 -0
  71. package/docs/discovery/late-bound-goals.md +118 -0
  72. package/docs/discovery/steer-endpoint-design-candidates.md +288 -0
  73. package/docs/discovery/steer-endpoint-design-review-findings.md +104 -0
  74. package/docs/discovery/steer-endpoint-implementation-plan.md +284 -0
  75. package/docs/ideas/backlog.md +447 -97
  76. package/docs/ideas/design-candidates-console-session-tree-impl.md +64 -0
  77. package/docs/ideas/design-candidates-session-tree-view.md +196 -0
  78. package/docs/ideas/design-review-findings-console-session-tree-impl.md +75 -0
  79. package/docs/ideas/design-review-findings-session-tree-view.md +88 -0
  80. package/docs/ideas/implementation_plan_session_tree_view.md +238 -0
  81. package/package.json +2 -1
  82. package/spec/authoring-spec.json +16 -16
  83. package/spec/shape.schema.json +178 -0
  84. package/spec/workflow-tags.json +232 -47
  85. package/workflows/coding-task-workflow-agentic.json +491 -480
  86. package/workflows/mr-review-workflow.agentic.v2.json +5 -1
  87. package/workflows/wr.shaping.json +182 -0
  88. package/dist/console-ui/assets/index-3oXZ_A9m.js +0 -28
  89. package/dist/console-ui/assets/index-8dh0Psu-.css +0 -1
  90. package/dist/infrastructure/session/DashboardHeartbeat.d.ts +0 -8
  91. package/dist/infrastructure/session/DashboardHeartbeat.js +0 -39
  92. package/dist/infrastructure/session/DashboardLockRelease.d.ts +0 -2
  93. package/dist/infrastructure/session/DashboardLockRelease.js +0 -29
  94. package/dist/infrastructure/session/HttpServer.d.ts +0 -60
  95. package/dist/infrastructure/session/HttpServer.js +0 -912
  96. package/workflows/coding-task-workflow-agentic.lean.v2.json +0 -648
  97. package/workflows/coding-task-workflow-agentic.v2.json +0 -324
@@ -0,0 +1,103 @@
1
+ # Design Review Findings: Coordinator Artifact Protocol
2
+
3
+ **Status:** Review complete
4
+ **Date:** 2026-04-18
5
+ **Design reviewed:** Candidate A from coordinator-artifact-protocol-design-candidates.md
6
+
7
+ ---
8
+
9
+ ## Tradeoff Review
10
+
11
+ | Tradeoff | Acceptable? | When it stops being acceptable |
12
+ |----------|-------------|-------------------------------|
13
+ | N+1 HTTP calls for all-node aggregation | Yes (localhost, ~50-100ms) | If coordinator is called for sessions with 50+ nodes |
14
+ | `source?` optional on `ReviewFindings` | Yes (observability only, not routing) | If future code switches exhaustively on `source` |
15
+ | `.strict()` schema | Yes (follows existing precedent) | If LLM consistently emits extra fields causing Zod failures |
16
+ | `required: false` in outputContract | Yes (transition strategy) | Once 10+ consecutive sessions confirm 100% artifact emission |
17
+
18
+ ---
19
+
20
+ ## Failure Mode Review
21
+
22
+ | Failure Mode | Severity | Handling | Missing Mitigation |
23
+ |-------------|----------|----------|--------------------|
24
+ | Missing `makeContinueWorkflowTool` onComplete update | LOW | TypeScript won't catch (optional param) -- manual verification required | Code comment at both call sites |
25
+ | Per-node HTTP fetch failure during aggregation | LOW | Graceful fallback to keyword scan | Per-node try/catch + WARN logging |
26
+ | Agent emits malformed artifact (wrong enum, missing field) | MEDIUM | `safeParse` fails silently without logging | `[WARN coord:reason=artifact_parse_failed]` logging REQUIRED |
27
+ | `runs[0].nodes` undefined for empty sessions | NONE | Null check + empty-array fallback | None |
28
+ | `required: false` default behavior | NONE | Engine correctly reads `required: false` and skips validation | None |
29
+
30
+ ---
31
+
32
+ ## Runner-Up / Simpler Alternative Review
33
+
34
+ **Runner-up (tip-node only):** Disqualified by task spec 'CRITICAL: must aggregate artifacts across ALL session nodes'. No elements worth incorporating.
35
+
36
+ **Simpler variant (skip `lastStepArtifacts`):** The pr-review coordinator reads via HTTP, not via `WorkflowRunSuccess`. Skipping would satisfy the coordinator use case. Rejected because the task spec explicitly requires it, and it's the foundation for `spawn_agent` artifact surfacing (post-MVP).
37
+
38
+ **Simpler variant (skip `onComplete` change):** Would leave `WorkflowRunSuccess.lastStepArtifacts` always undefined. Rejected -- inconsistent state.
39
+
40
+ ---
41
+
42
+ ## Philosophy Alignment
43
+
44
+ **Satisfied:** validate-at-boundaries, errors-as-data, functional/declarative, prefer-fakes, exhaustiveness (closed enums), immutability.
45
+
46
+ **Under tension (accepted):**
47
+ - `source?` optional vs. type-safety-first: minor, observability-only field
48
+ - `required: false` vs. make-illegal-states-unrepresentable: time-boxed transition strategy
49
+
50
+ ---
51
+
52
+ ## Findings
53
+
54
+ ### RED (must fix before shipping)
55
+
56
+ **R1: `readVerdictArtifact()` must log on malformed artifact**
57
+ If the agent emits an artifact with `kind: 'wr.review_verdict'` but wrong schema, `safeParse` fails silently. Without logging, FM3 (malformed artifact) is invisible and prevents monitoring of the artifact emission rate.
58
+
59
+ Required: `process.stderr.write('[WARN coord:reason=artifact_parse_failed ...]')` when `safeParse` fails AND the artifact has `kind === 'wr.review_verdict'`.
60
+
61
+ **R2: Per-node fetch errors must be caught individually**
62
+ The current outer `try/catch` in `getAgentResult` covers the entire function. The new implementation walks multiple nodes -- if one node fetch throws, the outer catch aborts the entire aggregation. Each per-node fetch must be wrapped individually so one failure doesn't discard all other nodes' artifacts.
63
+
64
+ ---
65
+
66
+ ### ORANGE (fix before C1 -> C2 graduation)
67
+
68
+ **O1: Log when keyword scan fires on a session that had artifacts**
69
+ The coordinator cannot distinguish 'artifact never emitted' from 'artifact emitted but invalid' without checking. Add a log entry when `readVerdictArtifact` returns null but `artifacts.length > 0`. This enables the graduation metric (10+ sessions with 0 fallback warnings).
70
+
71
+ Required log: `[INFO coord:source=keyword_scan reason=no_valid_artifact artifactCount=N]`
72
+
73
+ **O2: Divergence detection warning**
74
+ If both artifact severity (from `readVerdictArtifact`) and keyword-scan severity (from `parseFindingsFromNotes`) are available and disagree, log at WARN. Design doc recommends this (ORANGE finding). Protects against semantic inconsistency between notes and artifact.
75
+
76
+ ---
77
+
78
+ ### YELLOW (future consideration)
79
+
80
+ **Y1: `source?` optional on `ReviewFindings`**
81
+ Making `source` required would improve type safety. Currently deferred to avoid breaking 4 existing test literals. When those tests are updated for other reasons, upgrade `source` to required.
82
+
83
+ **Y2: Post-graduation: remove keyword scan fallback**
84
+ Once the graduation criterion is met, `parseFindingsFromNotes` callers can be removed from the coordinator routing logic. The `unknown` severity variant can also be removed from `ReviewSeverity`.
85
+
86
+ ---
87
+
88
+ ## Recommended Revisions
89
+
90
+ 1. **R1:** In `readVerdictArtifact()`, check if `raw` object has `kind === 'wr.review_verdict'` before `safeParse`. If kind matches but safeParse fails, log WARN.
91
+ 2. **R2:** In `getAgentResult()` implementation, wrap each per-node HTTP fetch in its own try/catch. Failed nodes are skipped with a WARN log; successful nodes contribute their artifacts.
92
+ 3. **O1:** After the artifact/keyword-scan decision in the coordinator, log `source` with the artifact count context.
93
+ 4. **O2:** Add divergence check: run keyword scan on `recapMarkdown` when an artifact is found; if severities disagree, log WARN.
94
+
95
+ ---
96
+
97
+ ## Residual Concerns
98
+
99
+ 1. **`continue_workflow` onComplete call site:** `makeContinueWorkflowTool` is marked DEPRECATED for daemon sessions, but it still calls `onComplete`. The new `artifacts?` parameter must be passed from `params.artifacts` at line 1046. Must be verified manually -- TypeScript won't catch a missing optional parameter.
100
+
101
+ 2. **`.strict()` vs. LLM reliability:** If the LLM adds extra fields (e.g., `rationale`, `notes`) to the artifact, `.strict()` causes Zod failure. With `required: false`, this just triggers the keyword-scan fallback. Acceptable during transition. If the failure rate is high in production, consider switching to `.strip()`.
102
+
103
+ 3. **Convention only:** `V1` suffix on `ReviewVerdictArtifactV1Schema` is a convention, not enforced. No migration path exists for schema changes. Future schema evolution must use a new type (`ReviewVerdictArtifactV2Schema`) in parallel until old sessions are retired.
@@ -0,0 +1,259 @@
1
+ # Implementation Plan: Coordinator Artifact Protocol
2
+
3
+ **Date:** 2026-04-18
4
+ **Branch:** `feat/coordinator-artifact-protocol`
5
+
6
+ ---
7
+
8
+ ## Problem Statement
9
+
10
+ The PR review coordinator (`src/coordinators/pr-review.ts`) extracts review severity from completed review sessions by running a keyword scan on free-form step notes. The coordinator ignores the `artifacts[]` field that `GET /api/v2/sessions/:id/nodes/:nodeId` already returns. This makes severity extraction brittle and unmeasurable.
11
+
12
+ The fix: define a `wr.review_verdict` artifact schema, update the final handoff step to emit it, update `getAgentResult()` to return artifacts alongside notes, and update the coordinator to try the artifact path before the keyword scan.
13
+
14
+ ---
15
+
16
+ ## Acceptance Criteria
17
+
18
+ 1. `npm run build` completes with 0 TypeScript errors
19
+ 2. `tests/unit/coordinator-pr-review.test.ts` passes (all existing tests + new `readVerdictArtifact` tests)
20
+ 3. `readVerdictArtifact([{ kind: 'wr.review_verdict', verdict: 'clean', ... }])` returns `{ severity: 'clean', source: 'artifact', ... }`
21
+ 4. `readVerdictArtifact([])` returns `null`
22
+ 5. `readVerdictArtifact([{ kind: 'wr.review_verdict', verdict: 'INVALID' }])` returns `null` and logs WARN
23
+ 6. `CoordinatorDeps.getAgentResult` return type is `Promise<{ recapMarkdown: string | null; artifacts: readonly unknown[] }>`
24
+ 7. `WorkflowRunSuccess` has optional field `lastStepArtifacts?: readonly unknown[]`
25
+ 8. `mr-review-workflow.agentic.v2.json` phase-6-final-handoff has `outputContract: { contractRef: 'wr.contracts.review_verdict', required: false }`
26
+ 9. `isValidContractRef('wr.contracts.review_verdict')` returns `true`
27
+ 10. `validateArtifactContract([{ kind: 'wr.review_verdict', verdict: 'clean', ... }], { contractRef: 'wr.contracts.review_verdict' })` returns `{ valid: true, artifact: ... }`
28
+
29
+ ---
30
+
31
+ ## Non-Goals
32
+
33
+ - Do NOT add a `/api/v2/sessions/:id/artifacts` server-side aggregation endpoint
34
+ - Do NOT change `required: false` to `required: true` (post-graduation decision)
35
+ - Do NOT remove the keyword-scan fallback from `parseFindingsFromNotes`
36
+ - Do NOT add a `coordinatorProtocol` field to the workflow JSON (deferred)
37
+ - Do NOT add artifacts to `spawn_agent` return value (post-MVP)
38
+ - Do NOT make `source` required on `ReviewFindings` (breaking change deferred)
39
+
40
+ ---
41
+
42
+ ## Philosophy Constraints
43
+
44
+ - **Make illegal states unrepresentable:** `verdict`, `source`, `confidence` use closed enums
45
+ - **Validate at boundaries:** Zod `safeParse` in `readVerdictArtifact()`; engine validation via `validateArtifactContract()`
46
+ - **Errors are data:** `readVerdictArtifact()` returns `ReviewFindings | null`, not throws
47
+ - **Functional/declarative:** `readVerdictArtifact()` is a pure function
48
+ - **Prefer fakes over mocks:** New tests use `makeFakeDeps()` pattern
49
+
50
+ ---
51
+
52
+ ## Invariants
53
+
54
+ 1. `required: false` in outputContract -- never block sessions during transition
55
+ 2. Schema registration (`ARTIFACT_CONTRACT_REFS`) MUST be done before workflow JSON update (compiler validates at load time via `isValidContractRef()`)
56
+ 3. Keyword-scan fallback MUST remain live in `parseFindingsFromNotes`
57
+ 4. All call sites of `CoordinatorDeps.getAgentResult` MUST handle `{ recapMarkdown, artifacts }` shape
58
+ 5. `readVerdictArtifact()` MUST log `[WARN coord:reason=artifact_parse_failed]` when kind matches but safeParse fails
59
+ 6. Per-node HTTP fetch failures MUST be caught individually (not by outer try/catch)
60
+ 7. `makeContinueWorkflowTool` AND `makeCompleteStepTool` MUST both pass artifacts to `onComplete`
61
+
62
+ ---
63
+
64
+ ## Selected Approach
65
+
66
+ **Candidate A:** Three ordered changes, all additive, following existing repo patterns exactly.
67
+
68
+ **Rationale:** Zero new infrastructure; follows `loop-control.ts` schema pattern; follows `WorkflowRunSuccess.lastStepNotes` conditional spread pattern; follows `makeFakeDeps()` testing pattern; backward compatible via `required: false` + keyword-scan fallback.
69
+
70
+ **Runner-up:** Tip-node only artifact read. Disqualified by task spec 'CRITICAL: must aggregate artifacts across ALL session nodes'.
71
+
72
+ ---
73
+
74
+ ## Slices
75
+
76
+ ### Slice 1: Schema registration (prerequisite for all other changes)
77
+
78
+ **Files:**
79
+ - `src/v2/durable-core/schemas/artifacts/review-verdict.ts` (NEW)
80
+ - `src/v2/durable-core/schemas/artifacts/index.ts` (update)
81
+ - `src/v2/durable-core/domain/artifact-contract-validator.ts` (update)
82
+
83
+ **Work:**
84
+ 1. Create `review-verdict.ts` following `loop-control.ts` pattern:
85
+ - `REVIEW_VERDICT_CONTRACT_REF = 'wr.contracts.review_verdict' as const`
86
+ - `ReviewVerdictArtifactV1Schema = z.object({ kind: z.literal('wr.review_verdict'), verdict: z.enum(['clean', 'minor', 'blocking']), confidence: z.enum(['high', 'medium', 'low']), findings: z.array(z.object({ severity: z.enum(['critical', 'major', 'minor', 'nit']), summary: z.string().min(1) }).strict()), summary: z.string().min(1) }).strict()`
87
+ - `isReviewVerdictArtifact()` type guard
88
+ - `parseReviewVerdictArtifact()` convenience function
89
+ 2. Update `index.ts`: export all new symbols, add `'wr.contracts.review_verdict'` to `ARTIFACT_CONTRACT_REFS`
90
+ 3. Update `artifact-contract-validator.ts`: import new symbols, add `case REVIEW_VERDICT_CONTRACT_REF:` to switch with `validateReviewVerdictContract()` helper
91
+
92
+ **Done when:** `isValidContractRef('wr.contracts.review_verdict')` returns `true`; `validateArtifactContract([{ kind: 'wr.review_verdict', ... }], { contractRef: 'wr.contracts.review_verdict' })` returns `{ valid: true, artifact: ... }`.
93
+
94
+ ---
95
+
96
+ ### Slice 2: Fix onComplete callback signature
97
+
98
+ **Files:**
99
+ - `src/daemon/workflow-runner.ts`
100
+
101
+ **Work:**
102
+ 1. Change `onComplete` closure definition (line 2096) from `(notes: string | undefined): void` to `(notes: string | undefined, artifacts?: readonly unknown[]): void`
103
+ 2. Add `let lastStepArtifacts: readonly unknown[] | undefined;` near `let lastStepNotes`
104
+ 3. Update `onComplete` body to set `lastStepArtifacts = artifacts`
105
+ 4. Add `lastStepArtifacts?: readonly unknown[]` to `WorkflowRunSuccess` interface
106
+ 5. Update `makeCompleteStepTool` call to `onComplete(notes)` -> `onComplete(notes, params.artifacts as readonly unknown[] | undefined)` (line 1249)
107
+ 6. Update `makeContinueWorkflowTool` call to `onComplete(params.notesMarkdown)` -> `onComplete(params.notesMarkdown, params.artifacts as readonly unknown[] | undefined)` (line 1046)
108
+ 7. Update the final `return` in `runWorkflow()` (line 2622) to spread `lastStepArtifacts` conditionally
109
+
110
+ **Done when:** `WorkflowRunSuccess` has `lastStepArtifacts` field; both tool factory call sites pass artifacts; `npm run build` passes.
111
+
112
+ ---
113
+
114
+ ### Slice 3: Update getAgentResult to return artifacts
115
+
116
+ **Files:**
117
+ - `src/cli-worktrain.ts`
118
+
119
+ **Work:**
120
+ 1. Change `getAgentResult: async (sessionHandle: string): Promise<string | null>` -> `Promise<{ recapMarkdown: string | null; artifacts: readonly unknown[] }>`
121
+ 2. In the implementation body:
122
+ - After reading `runs[0]`, read `runs[0].nodes` as `Array<{ nodeId: string; [key: string]: unknown }>` (with null check)
123
+ - Walk all nodes, fetch each node detail with individual `try/catch`:
124
+ ```
125
+ for (const node of nodes) {
126
+ try {
127
+ const nodeRes = await fetch(nodeUrl + '/' + node.nodeId)
128
+ // collect artifacts from nodeData['artifacts']
129
+ } catch { /* log WARN, continue */ }
130
+ }
131
+ ```
132
+ - Return `{ recapMarkdown: recap, artifacts: collectedArtifacts }` (or `{ recapMarkdown: null, artifacts: [] }` on failure)
133
+ 3. Early-return failures must also return `{ recapMarkdown: null, artifacts: [] }` instead of `null`
134
+
135
+ **Done when:** Return type is `Promise<{ recapMarkdown: string | null; artifacts: readonly unknown[] }>`; TypeScript compile-time errors at call sites force updates.
136
+
137
+ ---
138
+
139
+ ### Slice 4: Update coordinator to use artifact path
140
+
141
+ **Files:**
142
+ - `src/coordinators/pr-review.ts`
143
+
144
+ **Work:**
145
+ 1. Import `ReviewVerdictArtifactV1Schema` from artifacts schema
146
+ 2. Update `CoordinatorDeps.getAgentResult` return type to match new shape
147
+ 3. Add `source?: 'artifact' | 'keyword_scan'` to `ReviewFindings` interface
148
+ 4. Add `readVerdictArtifact(artifacts: readonly unknown[]): ReviewFindings | null` pure function:
149
+ - Walk artifacts array
150
+ - For each, check `(raw as any).kind === 'wr.review_verdict'`
151
+ - If kind matches, call `ReviewVerdictArtifactV1Schema.safeParse(raw)`
152
+ - On success: return `{ severity: v.verdict, findingSummaries: v.findings.map(f => f.summary), raw: JSON.stringify(v), source: 'artifact' }`
153
+ - On failure: log `[WARN coord:reason=artifact_parse_failed]`, continue to next artifact
154
+ - If no valid artifact found and artifacts.length > 0: log `[INFO coord:source=keyword_scan reason=no_valid_artifact artifactCount=N]`
155
+ - Return `null`
156
+ 5. Update both call sites in `runPrReviewCoordinator()`:
157
+ - `const { recapMarkdown: notes, artifacts } = await deps.getAgentResult(handle);`
158
+ - `const findingsResult = readVerdictArtifact(artifacts) ? ok(readVerdictArtifact(artifacts)!) : parseFindingsFromNotes(notes);`
159
+ - Log `[INFO coord:source=artifact]` or `[INFO coord:source=keyword_scan]`
160
+ 6. Add divergence check (O2): if artifact verdict and keyword-scan severity disagree, log WARN
161
+ 7. Update traceability JSON block to include `source` field
162
+
163
+ **Done when:** Coordinator tries artifact path first; keyword-scan fallback works; logging emits; `npm run build` passes.
164
+
165
+ ---
166
+
167
+ ### Slice 5: Update mr-review workflow
168
+
169
+ **Files:**
170
+ - `workflows/mr-review-workflow.agentic.v2.json`
171
+
172
+ **Work:**
173
+ 1. In `phase-6-final-handoff` step, add `outputContract: { "contractRef": "wr.contracts.review_verdict", "required": false }`
174
+ 2. Append to the step `prompt` field the artifact emission instruction:
175
+ ```
176
+ \n\nAfter completing your notes, emit a structured verdict via complete_step artifacts[] parameter. Use exactly this schema:\n{ "kind": "wr.review_verdict", "verdict": "clean|minor|blocking", "confidence": "high|medium|low", "findings": [{ "severity": "critical|major|minor|nit", "summary": "one-line description" }], "summary": "one-line overall summary" }\nFor a clean review with no findings, use findings: [].
177
+ ```
178
+
179
+ **Done when:** Workflow JSON validates via `npm run build`; `isValidContractRef('wr.contracts.review_verdict')` returns `true` (prerequisite: Slice 1 must be done first).
180
+
181
+ ---
182
+
183
+ ### Slice 6: Tests
184
+
185
+ **Files:**
186
+ - `tests/unit/coordinator-pr-review.test.ts`
187
+
188
+ **Work:**
189
+ 1. Update `makeFakeDeps()` to return `{ recapMarkdown: string | null; artifacts: readonly unknown[] }` from `getAgentResult` (change return type from `string | null`)
190
+ 2. Update `ReviewFindings` literal objects in `buildFixGoal` tests to add `source: 'artifact'` or `source: 'keyword_scan'` (or leave as optional -- `source?` means no update needed)
191
+ 3. Add new `describe('readVerdictArtifact')` block:
192
+ - `it('returns ReviewFindings with source artifact for valid artifact')`
193
+ - `it('returns null for invalid schema (wrong verdict enum)')`
194
+ - `it('returns null for empty artifacts array')`
195
+ - `it('returns null for artifact with different kind')`
196
+ - `it('returns first valid artifact when multiple present')`
197
+ 4. Import `readVerdictArtifact` from `pr-review.js`
198
+
199
+ **Done when:** All existing tests pass; 5 new `readVerdictArtifact` tests pass.
200
+
201
+ ---
202
+
203
+ ## Test Design
204
+
205
+ **Unit tests (pure function):**
206
+ - `readVerdictArtifact` with valid `wr.review_verdict` artifact -> returns `ReviewFindings` with `severity` mapped from `verdict`, `source: 'artifact'`
207
+ - `readVerdictArtifact` with invalid schema (wrong enum) -> returns `null`
208
+ - `readVerdictArtifact` with empty array -> returns `null`
209
+ - `readVerdictArtifact` with artifact of different `kind` -> returns `null` (no false positives)
210
+ - `readVerdictArtifact` with valid + invalid artifacts -> returns valid one (first match wins)
211
+
212
+ **Integration tests (fake deps):**
213
+ - Existing `runPrReviewCoordinator` tests must pass with updated `getAgentResult` return type
214
+ - The fake `getAgentResult` returns `{ recapMarkdown: 'APPROVE ...', artifacts: [] }` by default
215
+
216
+ ---
217
+
218
+ ## Risk Register
219
+
220
+ | Risk | Likelihood | Impact | Mitigation |
221
+ |------|-----------|--------|------------|
222
+ | Missing `makeContinueWorkflowTool` onComplete update | Low | Silent -- artifacts not forwarded from continue_workflow path | Manual verification; code comment at both call sites |
223
+ | Per-node HTTP fetch error aborting aggregation | Low | Graceful fallback to keyword scan | Per-node try/catch (Slice 3 R2) |
224
+ | LLM emits extra fields in artifact (`.strict()` reject) | Medium | Zod fail -> WARN log -> keyword scan fallback | Acceptable during `required: false` transition |
225
+ | `runs[0].nodes` undefined or empty | Low | Empty artifact array -> keyword scan fallback | Null check in Slice 3 |
226
+
227
+ ---
228
+
229
+ ## PR Packaging Strategy
230
+
231
+ Single PR: `feat/coordinator-artifact-protocol`
232
+
233
+ All 6 slices in one PR. Changes are tightly coupled (schema + validator + coordinator must be consistent). Breaking the PR into multiple would require interface stubs that add noise.
234
+
235
+ **PR description structure:**
236
+ 1. Summary: what was done and why
237
+ 2. Change 1 (schema), Change 2 (onComplete), Change 3 (coordinator + workflow)
238
+ 3. Test plan: `npm run build`, `npx vitest run tests/unit/coordinator-pr-review.test.ts`
239
+
240
+ ---
241
+
242
+ ## Philosophy Alignment
243
+
244
+ | Slice | Principle | Status |
245
+ |-------|-----------|--------|
246
+ | 1 (schema) | Make illegal states unrepresentable | Satisfied -- closed enums, kind literal |
247
+ | 1 (schema) | Validate at boundaries | Satisfied -- Zod strict schema |
248
+ | 2 (onComplete) | Immutability by default | Satisfied -- `readonly unknown[]` |
249
+ | 3 (getAgentResult) | Errors are data | Satisfied -- returns `{ recapMarkdown: null, artifacts: [] }` not null |
250
+ | 4 (coordinator) | Functional/declarative | Satisfied -- `readVerdictArtifact()` is pure |
251
+ | 4 (coordinator) | Make illegal states unrepresentable | Tension -- `source?` optional; accepted tradeoff |
252
+ | 6 (tests) | Prefer fakes over mocks | Satisfied -- `makeFakeDeps()` pattern |
253
+
254
+ ---
255
+
256
+ ## planConfidenceBand: High
257
+
258
+ - unresolvedUnknownCount: 0
259
+ - followUpTickets: Y1 (make source required post-graduation), Y2 (remove keyword scan post-graduation), spawn_agent artifacts gap (post-MVP)
@@ -0,0 +1,241 @@
1
+ # Implementation Plan: Coordinator Message Queue Drain
2
+
3
+ ## 1. Problem Statement
4
+
5
+ `worktrain tell "<message>"` appends to `~/.workrail/message-queue.jsonl` but the PR review
6
+ coordinator (`runPrReviewCoordinator`) never reads this file. Messages sent from a phone,
7
+ terminal, or automation (e.g., "stop", "skip-pr 42") are silently ignored. The coordinator
8
+ must drain this queue at the start of each cycle and act on actionable messages before spawning
9
+ any agent.
10
+
11
+ ## 2. Acceptance Criteria
12
+
13
+ AC1. When `stop` appears as the first meaningful word in a queued message (matched by
14
+ `/^\s*stop\b/i`), the coordinator exits cleanly without reviewing any PR, and appends an
15
+ outbox notification that includes the full triggering message text and timestamp.
16
+
17
+ AC2. When `skip-pr N` appears in a queued message (matched by `/\bskip[- ]pr[\s#]+(\d+)/i`),
18
+ PR #N is removed from the list before Stage 1 review dispatch. An outbox notification is
19
+ appended confirming the skip.
20
+
21
+ AC3. When `add-pr N` appears in a queued message (matched by `/\badd[- ]pr[\s#]+(\d+)/i`),
22
+ PR #N is added to the list (with Set dedup to prevent duplicates). An outbox notification
23
+ is appended confirming the addition.
24
+
25
+ AC4. Messages that match no recognized pattern are skipped silently (treated as notes).
26
+
27
+ AC5. After draining, the cursor in `~/.workrail/message-queue-cursor.json` is updated so
28
+ processed messages are not re-processed on the next coordinator invocation.
29
+
30
+ AC6. If `~/.workrail/message-queue.jsonl` does not exist (ENOENT), the drain returns a no-op
31
+ result and the coordinator proceeds normally.
32
+
33
+ AC7. Malformed JSONL lines (unparseable JSON) are skipped without crashing the coordinator.
34
+ A stderr warning is emitted for each skipped malformed line.
35
+
36
+ AC8. All drain I/O (readFile, appendFile, homedir, joinPath, now, generateId) is injected via
37
+ `CoordinatorDeps`. No direct `fs` imports are added to `pr-review.ts`.
38
+
39
+ AC9. Unit tests for `drainMessageQueue()` use fake deps (in-memory file map). No real filesystem
40
+ access in tests.
41
+
42
+ ## 3. Non-Goals
43
+
44
+ - No `reprioritize` message kind in this PR
45
+ - No workspace routing (workspaceHint matching) -- all messages are consumed regardless of hint
46
+ - No structured `kind` field on `QueuedMessage` (Candidate C) -- that is a follow-up issue
47
+ - No truncation or compaction of consumed messages (queue remains append-only)
48
+ - No real-time / `--watch` mode
49
+ - No multi-coordinator fan-out (single coordinator consumes the queue)
50
+ - No integration test (unit tests with fakes are sufficient)
51
+
52
+ ## 4. Philosophy-Driven Constraints
53
+
54
+ - Errors as data: `drainMessageQueue` returns `DrainResult`, never throws
55
+ - All I/O injected: `CoordinatorDeps` gains `readFile` and `appendFile`; zero direct fs imports
56
+ - Immutability: `DrainResult` and all new interfaces are fully readonly
57
+ - Prefer fakes over mocks: tests use in-memory fake deps
58
+ - Validate at boundaries: JSONL parsing, ENOENT, cursor desync handled at the read boundary
59
+ - Document WHY: function header explains the cursor pattern and text-matching tradeoff
60
+
61
+ ## 5. Invariants
62
+
63
+ I1. `message-queue.jsonl` is never written or truncated by the coordinator (append-only)
64
+ I2. The coordinator drains the queue BEFORE Stage 1 (PR discovery) -- never mid-agent-run
65
+ I3. `stop: true` in `DrainResult` takes absolute precedence; coordinator must check stop before
66
+ acting on `skipPrNumbers` or `addPrNumbers`
67
+ I4. The cursor advances only AFTER successful outbox writes (best-effort; cursor write failure
68
+ does not block drain -- same pattern as worktrain-inbox.ts)
69
+ I5. ENOENT on message-queue.jsonl = no messages = coordinator proceeds normally (not an error)
70
+ I6. Cursor desync guard: if `cursor > totalLines`, reset to 0 (queue was wiped)
71
+
72
+ ## 6. Selected Approach & Rationale
73
+
74
+ **Selected: Candidate B** -- `drainMessageQueue()` pure function with cursor + text parsing.
75
+
76
+ **Rationale:** Direct adaptation of the `worktrain-inbox.ts` cursor pattern (already tested, same
77
+ `InboxCursor` shape `{ lastReadCount: number }`). Additive to `CoordinatorDeps`. Text parsing is
78
+ narrow (`^\\s*stop\\b`) and consistent with how `parseFindingsFromNotes()` works in the same file.
79
+
80
+ **Runner-up: Candidate C** (structured `kind` field on `QueuedMessage`). Loses because it
81
+ requires a schema change to the public CLI interface (`worktrain tell`), which is out of scope.
82
+ Filed as a follow-up.
83
+
84
+ ## 7. Vertical Slices
85
+
86
+ ### Slice 1: Extend `CoordinatorDeps` and add `DrainResult` type
87
+
88
+ **Files:** `src/coordinators/pr-review.ts`
89
+
90
+ **Work:**
91
+ - Add `readFile: (path: string) => Promise<string>` to `CoordinatorDeps`
92
+ - Add `appendFile: (path: string, content: string) => Promise<void>` to `CoordinatorDeps`
93
+ - Add `mkdir: (path: string, options: { recursive: boolean }) => Promise<string | undefined>` to `CoordinatorDeps`
94
+ - Define `DrainResult` interface (readonly: stop, stopReason, skipPrNumbers, addPrNumbers, messagesProcessed)
95
+
96
+ **Done when:** TypeScript compiles with new interface fields. No runtime behavior change yet.
97
+
98
+ **Note:** Updating fake deps in `coordinator-pr-review.test.ts` is part of this slice (compile-
99
+ time requirement).
100
+
101
+ ---
102
+
103
+ ### Slice 2: Implement `drainMessageQueue()`
104
+
105
+ **Files:** `src/coordinators/pr-review.ts`
106
+
107
+ **Work:**
108
+ - New exported function `drainMessageQueue(deps, workrailDir)` -- deps is the coordinator deps
109
+ subset; workrailDir defaults to `deps.joinPath(deps.homedir(), '.workrail')`
110
+ - Reads `message-queue.jsonl` (ENOENT -> return empty result)
111
+ - Reads cursor from `message-queue-cursor.json` (missing/corrupt -> 0)
112
+ - Applies cursor desync guard (cursor > totalLines -> reset to 0)
113
+ - Parses new lines (slice from cursor), skips malformed with stderr warning
114
+ - For each parsed `QueuedMessage`:
115
+ - `^\\s*stop\\b/i` match -> set stop=true, record stopReason=message.message
116
+ - `/\\bskip[- ]pr[\\s#]+([0-9]+)/i` match -> add to skipSet
117
+ - `/\\badd[- ]pr[\\s#]+([0-9]+)/i` match -> add to addSet
118
+ - Otherwise: skip (informational note)
119
+ - After processing all new messages:
120
+ - For each actionable message: appendFile to outbox.jsonl with confirmation text
121
+ - Append stderr `[INFO coord:drain kind=... message="..." ts=...]` per actionable message
122
+ - Update cursor file (non-fatal on failure)
123
+ - Return `DrainResult`
124
+
125
+ **Done when:** Function exists, TypeScript compiles, unit tests pass.
126
+
127
+ ---
128
+
129
+ ### Slice 3: Integrate drain into `runPrReviewCoordinator()`
130
+
131
+ **Files:** `src/coordinators/pr-review.ts`
132
+
133
+ **Work:**
134
+ - Call `drainMessageQueue(deps)` at the top of `runPrReviewCoordinator()` (before Stage 1 log)
135
+ - Check `drainResult.stop` immediately:
136
+ - If true: log stop reason, write report (empty/aborted), return early with all zeros
137
+ - Apply `drainResult.skipPrNumbers` to remove PRs from the discovered list (after Stage 1)
138
+ - Apply `drainResult.addPrNumbers` to add PRs to the list (with Set dedup, before Stage 1)
139
+ - Log drain activity: `[drain] processed N messages, skip=[...], add=[...]` if messagesProcessed > 0
140
+
141
+ **Done when:** Integration passes existing coordinator unit tests + new drain integration test.
142
+
143
+ ---
144
+
145
+ ### Slice 4: Wire new deps in `cli-worktrain.ts`
146
+
147
+ **Files:** `src/cli-worktrain.ts`
148
+
149
+ **Work:**
150
+ - Add `readFile: (p: string) => fs.promises.readFile(p, 'utf-8')` to CoordinatorDeps wiring
151
+ - Add `appendFile: (p: string, content: string) => fs.promises.appendFile(p, content, 'utf-8')`
152
+ to CoordinatorDeps wiring
153
+ - Add `mkdir: (p: string, opts: { recursive: boolean }) => fs.promises.mkdir(p, opts)` to
154
+ CoordinatorDeps wiring
155
+
156
+ **Done when:** `worktrain run pr-review --dry-run` compiles and runs without error.
157
+
158
+ ---
159
+
160
+ ### Slice 5: Unit tests for `drainMessageQueue()`
161
+
162
+ **Files:** `tests/unit/coordinator-pr-review.test.ts`
163
+
164
+ **Work:**
165
+ - Add `readFile` and `appendFile` to the existing fake CoordinatorDeps helper
166
+ - New `describe('drainMessageQueue')` block covering:
167
+ - ENOENT -> returns empty DrainResult (messagesProcessed=0, stop=false)
168
+ - Stop message at start of message text -> stop=true, stopReason set
169
+ - Stop NOT triggered when 'stop' appears mid-sentence ("please stop overthinking" -- note: this
170
+ still fires with `^\\s*stop` since it doesn't start the message; test confirms this is the
171
+ designed behavior)
172
+ - skip-pr with PR number -> skipPrNumbers contains the number
173
+ - add-pr with PR number -> addPrNumbers contains the number
174
+ - Malformed JSONL lines skipped, messagesProcessed counts only valid lines
175
+ - Cursor advances after drain
176
+ - Cursor desync guard resets to 0 when cursor > totalLines
177
+ - Multiple messages: stop takes precedence regardless of order in queue
178
+ - Note-only messages: no action, cursor advances, messagesProcessed = N
179
+
180
+ **Done when:** All new tests pass; no existing tests broken.
181
+
182
+ ## 8. Test Design
183
+
184
+ **Strategy:** Fake deps only (in-memory Map for files, Set for dirs). No real filesystem.
185
+
186
+ **Key test helpers:**
187
+ ```ts
188
+ interface FakeDrainFs {
189
+ files: Map<string, string>;
190
+ }
191
+
192
+ function makeDrainDeps(fs: FakeDrainFs): Pick<CoordinatorDeps, 'readFile' | 'appendFile' | 'mkdir' | 'homedir' | 'joinPath' | 'now' | 'generateId' | 'stderr'>
193
+ ```
194
+
195
+ **Critical test cases:**
196
+ - `stop` as sole message: stop=true, outbox has triggering text
197
+ - `skip-pr 42` after a note: skipPrNumbers=[42], messagesProcessed=2
198
+ - Two `skip-pr` for same PR: deduplicated in Set (skipPrNumbers=[42] not [42, 42])
199
+ - Cursor = 5, file has 5 lines: messagesProcessed=0 (all previously read)
200
+ - Cursor = 10, file has 5 lines: cursor reset to 0, all 5 processed
201
+
202
+ ## 9. Risk Register
203
+
204
+ | Risk | Likelihood | Impact | Mitigation |
205
+ |---|---|---|---|
206
+ | `stop` false positive on note message | Low | Medium | `^\\s*stop\\b` anchor; outbox shows triggering text |
207
+ | Cursor file write failure | Very Low | Low | Non-fatal; next run re-reads from 0 (desync reset) |
208
+ | Outbox write failure during stop | Very Low | Low | Non-fatal; stderr log is backup |
209
+ | `readFile`/`appendFile` not wired in cli-worktrain.ts | Low | High | Slice 4 is explicit; TypeScript will catch missing fields at compile time |
210
+
211
+ ## 10. PR Packaging Strategy
212
+
213
+ Single PR on branch `feat/coordinator-message-queue`. All 5 slices in one PR -- they are
214
+ tightly coupled (type change -> function -> integration -> wiring -> tests). Separating them
215
+ would create a non-compiling intermediate state.
216
+
217
+ ## 11. Philosophy Alignment Per Slice
218
+
219
+ | Slice | Principle | Status |
220
+ |---|---|---|
221
+ | 1 | Immutability by default | Satisfied -- all new fields are readonly |
222
+ | 1 | Explicit domain types | Tension -- DrainResult uses boolean stop not a discriminated union; documented |
223
+ | 2 | Errors are data | Satisfied -- DrainResult is a value; ENOENT returns empty result |
224
+ | 2 | Dependency injection | Satisfied -- all I/O via injected deps |
225
+ | 2 | Validate at boundaries | Satisfied -- malformed JSONL skipped at parse boundary |
226
+ | 3 | Determinism over cleverness | Satisfied -- same queue + cursor = same result |
227
+ | 4 | Compose with small pure functions | Satisfied -- drainMessageQueue is pure at logic level |
228
+ | 5 | Prefer fakes over mocks | Satisfied -- fake deps, no vi.mock() |
229
+
230
+ ## 12. Follow-Up Tickets
231
+
232
+ 1. **Add `kind` field to `QueuedMessage` for structured dispatch** (Candidate C) -- unblocks
233
+ automated tooling writing to the message queue without text fragility.
234
+ 2. **`worktrain tell --help` should list recognized coordinator command patterns** -- discovery
235
+ for users who don't know what command words the coordinator recognizes.
236
+
237
+ ## Summary
238
+
239
+ - `estimatedPRCount`: 1
240
+ - `unresolvedUnknownCount`: 0
241
+ - `planConfidenceBand`: High