@exaudeus/workrail 3.71.1 → 3.72.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-worktrain.js +4 -6
- package/dist/console-ui/assets/{index-CsX-nVV7.js → index-Yj9NHqbR.js} +1 -1
- package/dist/console-ui/index.html +1 -1
- package/dist/daemon/active-sessions.d.ts +17 -0
- package/dist/daemon/active-sessions.js +55 -0
- package/dist/daemon/context-loader.d.ts +32 -0
- package/dist/daemon/context-loader.js +34 -0
- package/dist/daemon/session-scope.d.ts +28 -0
- package/dist/daemon/session-scope.js +21 -0
- package/dist/daemon/tools/_shared.d.ts +38 -0
- package/dist/daemon/tools/_shared.js +101 -0
- package/dist/daemon/tools/bash.d.ts +3 -0
- package/dist/daemon/tools/bash.js +57 -0
- package/dist/daemon/tools/continue-workflow.d.ts +6 -0
- package/dist/daemon/tools/continue-workflow.js +208 -0
- package/dist/daemon/tools/file-tools.d.ts +6 -0
- package/dist/daemon/tools/file-tools.js +195 -0
- package/dist/daemon/tools/glob-grep.d.ts +4 -0
- package/dist/daemon/tools/glob-grep.js +172 -0
- package/dist/daemon/tools/report-issue.d.ts +3 -0
- package/dist/daemon/tools/report-issue.js +129 -0
- package/dist/daemon/tools/signal-coordinator.d.ts +4 -0
- package/dist/daemon/tools/signal-coordinator.js +105 -0
- package/dist/daemon/tools/spawn-agent.d.ts +6 -0
- package/dist/daemon/tools/spawn-agent.js +135 -0
- package/dist/daemon/turn-end/conversation-flusher.d.ts +4 -0
- package/dist/daemon/turn-end/conversation-flusher.js +8 -0
- package/dist/daemon/turn-end/detect-stuck.d.ts +2 -0
- package/dist/daemon/turn-end/detect-stuck.js +5 -0
- package/dist/daemon/turn-end/step-injector.d.ts +8 -0
- package/dist/daemon/turn-end/step-injector.js +10 -0
- package/dist/daemon/workflow-runner.d.ts +54 -29
- package/dist/daemon/workflow-runner.js +175 -989
- package/dist/infrastructure/storage/workflow-resolution.js +5 -6
- package/dist/manifest.json +161 -25
- package/dist/mcp/handlers/shared/request-workflow-reader.js +14 -0
- package/dist/trigger/coordinator-deps.d.ts +15 -0
- package/dist/trigger/coordinator-deps.js +322 -0
- package/dist/trigger/delivery-pipeline.d.ts +18 -0
- package/dist/trigger/delivery-pipeline.js +148 -0
- package/dist/trigger/dispatch-deduplicator.d.ts +6 -0
- package/dist/trigger/dispatch-deduplicator.js +24 -0
- package/dist/trigger/trigger-listener.d.ts +2 -3
- package/dist/trigger/trigger-listener.js +9 -276
- package/dist/trigger/trigger-router.d.ts +8 -7
- package/dist/trigger/trigger-router.js +19 -97
- package/dist/v2/usecases/console-routes.js +10 -2
- package/docs/ideas/backlog.md +82 -48
- package/package.json +1 -1
- package/workflows/wr.research.json +158 -0
|
@@ -36,16 +36,14 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
36
36
|
exports.TriggerRouter = void 0;
|
|
37
37
|
exports.interpolateGoalTemplate = interpolateGoalTemplate;
|
|
38
38
|
const crypto = __importStar(require("node:crypto"));
|
|
39
|
-
const fs = __importStar(require("node:fs/promises"));
|
|
40
|
-
const path = __importStar(require("node:path"));
|
|
41
39
|
const node_child_process_1 = require("node:child_process");
|
|
42
40
|
const node_util_1 = require("node:util");
|
|
43
|
-
const workflow_runner_js_1 = require("../daemon/workflow-runner.js");
|
|
44
41
|
const assert_never_js_1 = require("../runtime/assert-never.js");
|
|
45
42
|
const index_js_1 = require("../v2/infra/in-memory/keyed-async-queue/index.js");
|
|
46
43
|
const delivery_client_js_1 = require("./delivery-client.js");
|
|
47
|
-
const
|
|
44
|
+
const delivery_pipeline_js_1 = require("./delivery-pipeline.js");
|
|
48
45
|
const adaptive_pipeline_js_1 = require("../coordinators/adaptive-pipeline.js");
|
|
46
|
+
const dispatch_deduplicator_js_1 = require("./dispatch-deduplicator.js");
|
|
49
47
|
const execFileAsync = (0, node_util_1.promisify)(node_child_process_1.execFile);
|
|
50
48
|
function interpolateGoalTemplate(template, staticGoal, payload, triggerId) {
|
|
51
49
|
const TOKEN_RE = /\{\{([^}]+)\}\}/g;
|
|
@@ -123,68 +121,17 @@ function validateHmac(rawBody, secret, headerValue) {
|
|
|
123
121
|
async function maybeRunDelivery(triggerId, trigger, result, execFn) {
|
|
124
122
|
if (result._tag !== 'success')
|
|
125
123
|
return;
|
|
126
|
-
if (trigger.autoCommit !== true) {
|
|
127
|
-
console.log(`[TriggerRouter] Delivery skipped: triggerId=${triggerId} -- autoCommit not set for this trigger.`);
|
|
128
|
-
return;
|
|
129
|
-
}
|
|
130
124
|
if (result.lastStepNotes === undefined) {
|
|
131
|
-
|
|
132
|
-
`
|
|
133
|
-
|
|
125
|
+
if (trigger.autoCommit === true) {
|
|
126
|
+
console.warn(`[TriggerRouter] Delivery skipped: triggerId=${triggerId} -- ` +
|
|
127
|
+
`lastStepNotes is absent (agent did not provide notes on the final step). ` +
|
|
128
|
+
`Ensure the workflow produces a JSON handoff block in its final step notes.`);
|
|
129
|
+
}
|
|
134
130
|
return;
|
|
135
131
|
}
|
|
136
|
-
|
|
137
|
-
if (parseResult.kind === 'err') {
|
|
138
|
-
console.warn(`[TriggerRouter] Delivery skipped: triggerId=${triggerId} -- ` +
|
|
139
|
-
`handoff artifact not parseable: ${parseResult.error}. ` +
|
|
140
|
-
`Ensure the workflow's final step produces a JSON block with commitType, filesChanged, etc.`);
|
|
132
|
+
if (trigger.autoCommit !== true)
|
|
141
133
|
return;
|
|
142
|
-
|
|
143
|
-
const deliveryCwd = result.sessionWorkspacePath ?? trigger.workspacePath;
|
|
144
|
-
const deliveryResult = await (0, delivery_action_js_1.runDelivery)(parseResult.value, deliveryCwd, {
|
|
145
|
-
autoCommit: trigger.autoCommit,
|
|
146
|
-
autoOpenPR: trigger.autoOpenPR,
|
|
147
|
-
secretScan: trigger.secretScan ?? true,
|
|
148
|
-
triggerId,
|
|
149
|
-
workflowId: trigger.workflowId,
|
|
150
|
-
...(result.botIdentity !== undefined ? { botIdentity: result.botIdentity } : {}),
|
|
151
|
-
...(trigger.branchStrategy === 'worktree' && result.sessionWorkspacePath
|
|
152
|
-
? {
|
|
153
|
-
sessionId: result.sessionId ?? '',
|
|
154
|
-
branchPrefix: trigger.branchPrefix ?? 'worktrain/',
|
|
155
|
-
}
|
|
156
|
-
: {}),
|
|
157
|
-
}, execFn);
|
|
158
|
-
switch (deliveryResult._tag) {
|
|
159
|
-
case 'committed':
|
|
160
|
-
console.log(`[TriggerRouter] Delivery committed: triggerId=${triggerId} sha=${deliveryResult.sha}`);
|
|
161
|
-
break;
|
|
162
|
-
case 'pr_opened':
|
|
163
|
-
console.log(`[TriggerRouter] Delivery PR opened: triggerId=${triggerId} url=${deliveryResult.url}`);
|
|
164
|
-
break;
|
|
165
|
-
case 'skipped':
|
|
166
|
-
console.log(`[TriggerRouter] Delivery skipped: triggerId=${triggerId} reason=${deliveryResult.reason}`);
|
|
167
|
-
break;
|
|
168
|
-
case 'error':
|
|
169
|
-
console.warn(`[TriggerRouter] Delivery error: triggerId=${triggerId} phase=${deliveryResult.phase} ` +
|
|
170
|
-
`details=${deliveryResult.details}`);
|
|
171
|
-
break;
|
|
172
|
-
}
|
|
173
|
-
if (trigger.branchStrategy === 'worktree' && result.sessionWorkspacePath) {
|
|
174
|
-
try {
|
|
175
|
-
await execFn('git', ['-C', trigger.workspacePath, 'worktree', 'remove', '--force', result.sessionWorkspacePath], { cwd: trigger.workspacePath, timeout: 60000 });
|
|
176
|
-
console.log(`[TriggerRouter] Worktree removed: triggerId=${triggerId} path=${result.sessionWorkspacePath}`);
|
|
177
|
-
}
|
|
178
|
-
catch (err) {
|
|
179
|
-
console.warn(`[TriggerRouter] Could not remove worktree: triggerId=${triggerId} ` +
|
|
180
|
-
`path=${result.sessionWorkspacePath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
181
|
-
}
|
|
182
|
-
if (result.sessionId !== undefined) {
|
|
183
|
-
await fs.unlink(path.join(workflow_runner_js_1.DAEMON_SESSIONS_DIR, `${result.sessionId}.json`)).catch(() => { });
|
|
184
|
-
await fs.unlink(path.join(workflow_runner_js_1.DAEMON_SESSIONS_DIR, `${result.sessionId}-conversation.jsonl`)).catch(() => { });
|
|
185
|
-
console.log(`[TriggerRouter] Session sidecar removed: triggerId=${triggerId} sessionId=${result.sessionId}`);
|
|
186
|
-
}
|
|
187
|
-
}
|
|
134
|
+
await (0, delivery_pipeline_js_1.runDeliveryPipeline)(delivery_pipeline_js_1.DEFAULT_DELIVERY_PIPELINE, result, trigger, execFn, triggerId);
|
|
188
135
|
}
|
|
189
136
|
class Semaphore {
|
|
190
137
|
constructor(max) {
|
|
@@ -216,20 +163,19 @@ class Semaphore {
|
|
|
216
163
|
}
|
|
217
164
|
const DEFAULT_MAX_CONCURRENT_SESSIONS = 3;
|
|
218
165
|
class TriggerRouter {
|
|
219
|
-
constructor(index, ctx, apiKey, runWorkflowFn, execFn, maxConcurrentSessions, emitter, notificationService,
|
|
166
|
+
constructor(index, ctx, apiKey, runWorkflowFn, execFn, maxConcurrentSessions, emitter, notificationService, activeSessionSet, coordinatorDeps, modeExecutors, deduplicator) {
|
|
220
167
|
this.index = index;
|
|
221
168
|
this.ctx = ctx;
|
|
222
169
|
this.apiKey = apiKey;
|
|
223
170
|
this.runWorkflowFn = runWorkflowFn;
|
|
224
171
|
this.queue = new index_js_1.KeyedAsyncQueue();
|
|
225
|
-
this._recentAdaptiveDispatches = new Map();
|
|
226
172
|
this.execFn = execFn ?? execFileAsync;
|
|
227
173
|
this.emitter = emitter;
|
|
228
174
|
this.notificationService = notificationService;
|
|
229
|
-
this.
|
|
230
|
-
this.abortRegistry = abortRegistry;
|
|
175
|
+
this._activeSessionSet = activeSessionSet;
|
|
231
176
|
this._coordinatorDeps = coordinatorDeps;
|
|
232
177
|
this._modeExecutors = modeExecutors;
|
|
178
|
+
this._deduplicator = deduplicator ?? new dispatch_deduplicator_js_1.DispatchDeduplicator(TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS);
|
|
233
179
|
const requested = maxConcurrentSessions ?? DEFAULT_MAX_CONCURRENT_SESSIONS;
|
|
234
180
|
const cap = Number.isNaN(requested) ? DEFAULT_MAX_CONCURRENT_SESSIONS : requested;
|
|
235
181
|
if (cap < 1) {
|
|
@@ -324,18 +270,10 @@ class TriggerRouter {
|
|
|
324
270
|
};
|
|
325
271
|
{
|
|
326
272
|
const dedupeKey = `${workflowTrigger.workflowId}::${workflowTrigger.goal}::${workflowTrigger.workspacePath}`;
|
|
327
|
-
|
|
328
|
-
for (const [key, ts] of this._recentAdaptiveDispatches) {
|
|
329
|
-
if (now - ts >= TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS) {
|
|
330
|
-
this._recentAdaptiveDispatches.delete(key);
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
const lastDispatch = this._recentAdaptiveDispatches.get(dedupeKey);
|
|
334
|
-
if (lastDispatch !== undefined && now - lastDispatch < TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS) {
|
|
273
|
+
if (this._deduplicator.checkAndRecord(dedupeKey)) {
|
|
335
274
|
console.log(`[TriggerRouter] Skipping duplicate route dispatch: workflowId=${workflowTrigger.workflowId} goal="${workflowTrigger.goal.slice(0, 60)}" (already dispatched within 30s)`);
|
|
336
275
|
return { _tag: 'enqueued', triggerId: trigger.id };
|
|
337
276
|
}
|
|
338
|
-
this._recentAdaptiveDispatches.set(dedupeKey, now);
|
|
339
277
|
}
|
|
340
278
|
this.emitter?.emit({ kind: 'trigger_fired', triggerId: trigger.id, workflowId: trigger.workflowId });
|
|
341
279
|
const queueKey = trigger.concurrencyMode === 'parallel'
|
|
@@ -351,7 +289,7 @@ class TriggerRouter {
|
|
|
351
289
|
await this.semaphore.acquire();
|
|
352
290
|
let result;
|
|
353
291
|
try {
|
|
354
|
-
result = await this.runWorkflowFn(workflowTrigger, this.ctx, this.apiKey, undefined, this.emitter, this.
|
|
292
|
+
result = await this.runWorkflowFn(workflowTrigger, this.ctx, this.apiKey, undefined, this.emitter, this._activeSessionSet);
|
|
355
293
|
}
|
|
356
294
|
finally {
|
|
357
295
|
this.semaphore.release();
|
|
@@ -406,21 +344,13 @@ class TriggerRouter {
|
|
|
406
344
|
});
|
|
407
345
|
return { _tag: 'enqueued', triggerId: trigger.id };
|
|
408
346
|
}
|
|
409
|
-
dispatch(workflowTrigger) {
|
|
410
|
-
if (
|
|
347
|
+
dispatch(workflowTrigger, source) {
|
|
348
|
+
if (source?.kind !== 'pre_allocated') {
|
|
411
349
|
const dedupeKey = `${workflowTrigger.workflowId}::${workflowTrigger.goal}::${workflowTrigger.workspacePath}`;
|
|
412
|
-
|
|
413
|
-
for (const [key, ts] of this._recentAdaptiveDispatches) {
|
|
414
|
-
if (now - ts >= TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS) {
|
|
415
|
-
this._recentAdaptiveDispatches.delete(key);
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
const lastDispatch = this._recentAdaptiveDispatches.get(dedupeKey);
|
|
419
|
-
if (lastDispatch !== undefined && now - lastDispatch < TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS) {
|
|
350
|
+
if (this._deduplicator.checkAndRecord(dedupeKey)) {
|
|
420
351
|
console.log(`[TriggerRouter] Skipping duplicate dispatch: workflowId=${workflowTrigger.workflowId} goal="${workflowTrigger.goal.slice(0, 60)}" (already dispatched within 30s)`);
|
|
421
352
|
return workflowTrigger.workflowId;
|
|
422
353
|
}
|
|
423
|
-
this._recentAdaptiveDispatches.set(dedupeKey, now);
|
|
424
354
|
}
|
|
425
355
|
else {
|
|
426
356
|
console.log(`[TriggerRouter] Pre-allocated session dispatched: workflowId=${workflowTrigger.workflowId} goal="${workflowTrigger.goal.slice(0, 60)}"`);
|
|
@@ -434,7 +364,7 @@ class TriggerRouter {
|
|
|
434
364
|
await this.semaphore.acquire();
|
|
435
365
|
let result;
|
|
436
366
|
try {
|
|
437
|
-
result = await this.runWorkflowFn(workflowTrigger, this.ctx, this.apiKey, undefined, this.emitter, this.
|
|
367
|
+
result = await this.runWorkflowFn(workflowTrigger, this.ctx, this.apiKey, undefined, this.emitter, this._activeSessionSet, undefined, undefined, source);
|
|
438
368
|
}
|
|
439
369
|
finally {
|
|
440
370
|
this.semaphore.release();
|
|
@@ -485,14 +415,7 @@ class TriggerRouter {
|
|
|
485
415
|
};
|
|
486
416
|
}
|
|
487
417
|
const dedupeKey = `${goal}::${workspace}`;
|
|
488
|
-
|
|
489
|
-
for (const [key, ts] of this._recentAdaptiveDispatches) {
|
|
490
|
-
if (now - ts >= TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS) {
|
|
491
|
-
this._recentAdaptiveDispatches.delete(key);
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
const lastDispatch = this._recentAdaptiveDispatches.get(dedupeKey);
|
|
495
|
-
if (lastDispatch !== undefined && now - lastDispatch < TriggerRouter.ADAPTIVE_DEDUPE_TTL_MS) {
|
|
418
|
+
if (this._deduplicator.checkAndRecord(dedupeKey)) {
|
|
496
419
|
console.log(`[TriggerRouter] Skipping duplicate adaptive dispatch: goal="${goal.slice(0, 60)}" ` +
|
|
497
420
|
`(already dispatched within 30s)`);
|
|
498
421
|
return {
|
|
@@ -503,7 +426,6 @@ class TriggerRouter {
|
|
|
503
426
|
},
|
|
504
427
|
};
|
|
505
428
|
}
|
|
506
|
-
this._recentAdaptiveDispatches.set(dedupeKey, now);
|
|
507
429
|
const opts = {
|
|
508
430
|
goal,
|
|
509
431
|
workspace,
|
|
@@ -646,8 +646,16 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
|
|
|
646
646
|
else {
|
|
647
647
|
sessionHandle = workflowId;
|
|
648
648
|
}
|
|
649
|
-
const trigger = { workflowId, goal, workspacePath, context
|
|
650
|
-
|
|
649
|
+
const trigger = { workflowId, goal, workspacePath, context };
|
|
650
|
+
const allocatedSession = {
|
|
651
|
+
continueToken: startResponse.continueToken ?? '',
|
|
652
|
+
checkpointToken: startResponse.checkpointToken,
|
|
653
|
+
firstStepPrompt: startResponse.pending?.prompt ?? '',
|
|
654
|
+
isComplete: startResponse.isComplete,
|
|
655
|
+
triggerSource: 'mcp',
|
|
656
|
+
};
|
|
657
|
+
const source = { kind: 'pre_allocated', trigger, session: allocatedSession };
|
|
658
|
+
void (0, workflow_runner_js_1.runWorkflow)(trigger, v2ToolContext, apiKey ?? '', undefined, undefined, undefined, undefined, undefined, source).then((result) => {
|
|
651
659
|
if (result._tag === 'success') {
|
|
652
660
|
console.log(`[ConsoleRoutes] Auto dispatch completed: workflowId=${workflowId} stopReason=${result.stopReason}`);
|
|
653
661
|
}
|
package/docs/ideas/backlog.md
CHANGED
|
@@ -74,6 +74,22 @@ Agent writes a complete handoff block (commitType, prTitle, prBody, filesChanged
|
|
|
74
74
|
The autonomous workflow runner (`worktrain daemon`). Completely separate from the MCP server -- calls the engine directly in-process.
|
|
75
75
|
|
|
76
76
|
|
|
77
|
+
### Daemon architecture: remaining migrations (Apr 29, 2026)
|
|
78
|
+
|
|
79
|
+
**Status: partial** | A9 shipped Apr 29, 2026.
|
|
80
|
+
|
|
81
|
+
Track A (A1-A9) shipped and the `SessionSource` migration is complete. `WorkflowTrigger._preAllocatedStartResponse` is gone.
|
|
82
|
+
|
|
83
|
+
**Remaining items:**
|
|
84
|
+
|
|
85
|
+
- `CriticalEffect<T>` / `ObservabilityEffect` type distinction -- categorize side effects in `runAgentLoop` and finalization as either crash-relevant or observability-only
|
|
86
|
+
- `StateRef` mutation wrapper -- replace direct `state.pendingSteerParts.push()` mutations with an explicit mutation API
|
|
87
|
+
- Zod tool param validation -- replace manual `typeof` checks in tool factories with Zod schema validation (requires `zodToJsonSchema` or maintaining two sources of truth for param schemas)
|
|
88
|
+
- `createCoordinatorDeps` unit tests -- extraction in B3 improved testability; cover `spawnSession`, `awaitSessions`, `getAgentResult` at minimum
|
|
89
|
+
- Wire `AllocatedSession.triggerSource` to the `run_started` event for session attribution (one-liner once the event schema field is added -- see "Session trigger source attribution" entry below)
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
77
93
|
### `wr.refactoring` workflow (Apr 28, 2026)
|
|
78
94
|
|
|
79
95
|
**Status: idea** | Priority: medium
|
|
@@ -103,59 +119,35 @@ The `wr.coding-task` workflow has too much overhead for pure refactors (design r
|
|
|
103
119
|
|
|
104
120
|
---
|
|
105
121
|
|
|
106
|
-
### runWorkflow() functional core refactor --
|
|
122
|
+
### runWorkflow() functional core refactor -- Phases 2-4 (Apr 24-29, 2026)
|
|
107
123
|
|
|
108
|
-
**Status: done** |
|
|
124
|
+
**Status: done** | Phases 2-3 shipped Apr 29, 2026. Phase 4 (A1-A8) shipped Apr 29, 2026.
|
|
109
125
|
|
|
110
|
-
Phase 1
|
|
126
|
+
Phase 1 (PR #818): `tagToStatsOutcome`, `buildAgentClient`, `evaluateStuckSignals`, `SessionState`, `finalizeSession`.
|
|
127
|
+
Phase 2 (PR #830): `PreAgentSession`/`PreAgentSessionResult`, `buildPreAgentSession`, `constructTools`, `persistTokens` Result type, TDZ fix.
|
|
128
|
+
Phase 3 (PRs #835, #837): `buildTurnEndSubscriber`, `buildAgentCallbacks`, `buildSessionResult`. runWorkflow() body: 539 → 308 lines.
|
|
111
129
|
|
|
112
|
-
**
|
|
130
|
+
**Phase 4 (Track A, PRs #839-#861, Apr 29, 2026):**
|
|
131
|
+
- A1: `runStartupRecovery` apiKey injected as parameter (removes process.env read)
|
|
132
|
+
- A2: Turn-end collaborators extracted to `src/daemon/turn-end/` (`step-injector`, `detect-stuck`, `conversation-flusher`)
|
|
133
|
+
- A3: `SessionScope` + `FileStateTracker` -- typed tool-layer contract, raw Map encapsulated (#843)
|
|
134
|
+
- A4: All 11 tool factories extracted to `src/daemon/tools/` -- workflow-runner.ts -1,500 lines (#851)
|
|
135
|
+
- A5: `ContextLoader` + `ContextBundle` -- two-phase context assembly, parallelized with pre-agent session setup (#855)
|
|
136
|
+
- A6: `ActiveSessionSet` + `SessionHandle` -- replaces `SteerRegistry` + `AbortRegistry` dual Maps; closes TDZ hazard (#856)
|
|
137
|
+
- A7: `buildAgentReadySession` + `runAgentLoop` extracted -- runWorkflow() body: 302 → 92 lines (#859)
|
|
138
|
+
- A8: `SessionSource` discriminated union + `AllocatedSession` -- typed vocabulary for `_preAllocatedStartResponse` migration (#861)
|
|
139
|
+
- A9: Full `SessionSource` migration -- `WorkflowTrigger._preAllocatedStartResponse` removed; all 4 call sites construct `SessionSource` directly; `runWorkflow()` accepts `source?: SessionSource` (#869)
|
|
113
140
|
|
|
114
|
-
**
|
|
141
|
+
**Also shipped (Track B, PRs #846-#848):**
|
|
142
|
+
- B1: `DispatchDeduplicator` -- compile-enforced dedup contract, replaces verbal MUST comment
|
|
143
|
+
- B2: `DeliveryPipeline` + `DeliveryStage` -- staged delivery, preempts accretion in trigger-router.ts
|
|
144
|
+
- B3: `createCoordinatorDeps` + `setDispatch` -- extracted from 900-line trigger-listener.ts; circular dep fixed
|
|
115
145
|
|
|
116
|
-
|
|
117
|
-
interface SessionContext {
|
|
118
|
-
readonly systemPrompt: string;
|
|
119
|
-
readonly tools: readonly AgentTool[];
|
|
120
|
-
readonly sessionTimeoutMs: number;
|
|
121
|
-
readonly maxTurns: number;
|
|
122
|
-
readonly initialPrompt: string;
|
|
123
|
-
readonly agentCallbacks: AgentLoopCallbacks;
|
|
124
|
-
}
|
|
146
|
+
**Unit tests added (PRs #863-#865):** `DefaultFileStateTracker` (15), `DefaultContextLoader` (12), `ActiveSessionSet`/`SessionHandle` (11).
|
|
125
147
|
|
|
126
|
-
|
|
127
|
-
trigger: WorkflowTrigger,
|
|
128
|
-
agentClient: AgentClientInterface,
|
|
129
|
-
modelId: string,
|
|
130
|
-
soulContent: string, // already loaded by loadDaemonSoul()
|
|
131
|
-
workspaceContext: string | null, // already loaded by loadWorkspaceContext()
|
|
132
|
-
sessionNotes: readonly string[], // already loaded by loadSessionNotes()
|
|
133
|
-
state: SessionState,
|
|
134
|
-
// ... tool factories, schemas, etc.
|
|
135
|
-
): SessionContext
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
The shell then does:
|
|
139
|
-
1. All I/O in sequence: `loadDaemonSoul`, `loadWorkspaceContext`, `loadSessionNotes`, `git worktree add`, `executeStartWorkflow`, `parseContinueTokenOrFail`, `persistTokens`
|
|
140
|
-
**What Phase 2 delivered (PR #830):**
|
|
141
|
-
- `PreAgentSession` interface + `PreAgentSessionResult` discriminated union -- all early-exit paths type-enforced
|
|
142
|
-
- `buildPreAgentSession()` -- all pre-agent I/O extracted; steer+daemon registries registered after all failing I/O (FM1 invariant)
|
|
143
|
-
- `constructTools()` -- explicitly impure named function, `state` as explicit parameter
|
|
144
|
-
- `persistTokens()` returns `Promise<Result<void, PersistTokensError>>` using `src/runtime/result.ts`
|
|
145
|
-
- `sidecardLifecycleFor()` pure function with `assertNever` exhaustiveness
|
|
146
|
-
- TDZ hazard fixed: `abortRegistry.set()` now registered after `const agent = new AgentLoop()`
|
|
148
|
+
**Total workflow-runner.ts reduction: ~4,955 → ~2,800 lines (44%).**
|
|
147
149
|
|
|
148
|
-
**
|
|
149
|
-
- `buildTurnEndSubscriber()` extracted -- runWorkflow() body: 539 → 426 lines
|
|
150
|
-
- Tool param validation at LLM boundary (8 tool factories)
|
|
151
|
-
- `buildAgentCallbacks()` + `buildSessionResult()` pure functions -- body: 426 → 308 lines
|
|
152
|
-
- Test flakiness fix: `settleFireAndForget()` + `retry: 2` in vitest config
|
|
153
|
-
|
|
154
|
-
**Still deferred:**
|
|
155
|
-
- `CriticalEffect<T>` / `ObservabilityEffect` type distinction
|
|
156
|
-
- `StateRef` mutation wrapper
|
|
157
|
-
- Zod tool param validation (replacing manual typeof checks -- requires zodToJsonSchema or two sources of truth)
|
|
158
|
-
- `wr.refactoring` workflow (see backlog entry above)
|
|
150
|
+
**Follow-on:** `wr.refactoring` workflow (see backlog entry above). Remaining items in "Daemon architecture: remaining migrations" entry below.
|
|
159
151
|
|
|
160
152
|
---
|
|
161
153
|
|
|
@@ -623,6 +615,29 @@ The stdio/HTTP MCP server that Claude Code (and other MCP clients) connect to. M
|
|
|
623
615
|
|
|
624
616
|
## Console
|
|
625
617
|
|
|
618
|
+
### Task picker mode: browse and launch available work (Apr 29, 2026)
|
|
619
|
+
|
|
620
|
+
**Status: idea** | Priority: high
|
|
621
|
+
|
|
622
|
+
**Problem:** Once WorkTrain is configured (workspace set up, triggers.yml written, daemon running), there is still no easy way to say "run this workflow now" from the console. Dispatch requires knowing the API or writing a webhook. The console has a dispatch endpoint but no UI to drive it.
|
|
623
|
+
|
|
624
|
+
**Vision:** A console panel that lists the triggers already configured in triggers.yml and lets the user click one to fire it immediately -- without leaving the browser, without touching the API, without writing YAML.
|
|
625
|
+
|
|
626
|
+
**How it works:**
|
|
627
|
+
1. Console calls `GET /api/v2/triggers` to list all triggers loaded by the daemon.
|
|
628
|
+
2. User sees a list: trigger ID, workflow, goal, last-fired timestamp. Clicks "Run".
|
|
629
|
+
3. Console POSTs to `/api/v2/auto/dispatch` (already implemented) with the trigger's workflowId + goal + workspace.
|
|
630
|
+
4. New session appears in the session list immediately. User watches the DAG advance live.
|
|
631
|
+
5. On completion: outcome, PR link (if opened), and step notes all visible in the same panel.
|
|
632
|
+
|
|
633
|
+
**What this is not:** An onboarding wizard or zero-setup flow -- the daemon and environment must already be configured. This is a dispatch surface for *already-configured* users who want to trigger work without using the CLI or waiting for a webhook.
|
|
634
|
+
|
|
635
|
+
**Why it matters:** Makes the console a control plane, not just a read-only viewer. The daemon gains a "run this now" button. Users get to watch the agent work in real time, which builds confidence before trusting it on unattended tasks.
|
|
636
|
+
|
|
637
|
+
**Dependency:** `GET /api/v2/triggers` endpoint (returns the live trigger index -- may need to be added). `POST /api/v2/auto/dispatch` already exists. No new daemon work required.
|
|
638
|
+
|
|
639
|
+
---
|
|
640
|
+
|
|
626
641
|
### Console interactivity and liveliness
|
|
627
642
|
|
|
628
643
|
**Status: idea** | Priority: medium
|
|
@@ -733,6 +748,14 @@ A workflow that aggregates activity across git history, GitLab/GitHub MRs and re
|
|
|
733
748
|
|
|
734
749
|
## Platform Vision (longer-term)
|
|
735
750
|
|
|
751
|
+
### Inspiration: openclaw (Apr 29, 2026)
|
|
752
|
+
|
|
753
|
+
**Source:** https://github.com/openclaw/openclaw
|
|
754
|
+
|
|
755
|
+
openclaw is worth studying deeply before building out the platform layer. Draw inspiration from it when designing: multi-agent orchestration patterns, coordinator architecture, context packaging for subagents, task queue and dispatch models, and the overall shape of an autonomous engineering platform. Review it before making architectural decisions on any of the Platform Vision items below.
|
|
756
|
+
|
|
757
|
+
---
|
|
758
|
+
|
|
736
759
|
### Knowledge graph for agent context
|
|
737
760
|
|
|
738
761
|
**Status: idea** | Priority: medium
|
|
@@ -1089,8 +1112,7 @@ WorkTrain is a persistent background daemon that initiates workflows autonomousl
|
|
|
1089
1112
|
- Bot identity (`botIdentity`) and acting-as-user support
|
|
1090
1113
|
- Dynamic model selection (`agentConfig.model`)
|
|
1091
1114
|
- macOS notifications
|
|
1092
|
-
-
|
|
1093
|
-
- AbortRegistry + SIGTERM graceful shutdown
|
|
1115
|
+
- `ActiveSessionSet` + mid-session steer injection + SIGTERM graceful shutdown (replaces SteerRegistry + AbortRegistry)
|
|
1094
1116
|
- `maxOutputTokens` per trigger, `maxQueueDepth` with HTTP 429
|
|
1095
1117
|
- Crash recovery Phase B
|
|
1096
1118
|
- `daemon-soul.md` / workspace context injection
|
|
@@ -1103,6 +1125,7 @@ WorkTrain is a persistent background daemon that initiates workflows autonomousl
|
|
|
1103
1125
|
- Worktree orphan cleanup on delivery failure
|
|
1104
1126
|
- runWorkflow() Phase 2 architecture (PR #830): `PreAgentSession`/`buildPreAgentSession`, `constructTools`, `persistTokens` Result type, `sidecardLifecycleFor` pure function, TDZ hazard fix for abort registry
|
|
1105
1127
|
- runWorkflow() Phase 3 architecture (PRs #835, #837): `buildTurnEndSubscriber` (539→426 lines), tool param validation at LLM boundary (8 factories), `buildAgentCallbacks` + `buildSessionResult` pure functions (426→308 lines), test flakiness fix (settleFireAndForget + retry:2)
|
|
1128
|
+
- runWorkflow() Phase 4 / Track A+B architecture (PRs #839-#869, Apr 29, 2026): six-layer daemon decomposition -- `SessionScope`+`FileStateTracker`, tool extraction to `src/daemon/tools/`, `ContextLoader`+`ContextBundle`, `ActiveSessionSet`+`SessionHandle` (TDZ fix), `buildAgentReadySession`+`runAgentLoop`, `SessionSource`+`AllocatedSession`+full `_preAllocatedStartResponse` removal, `DispatchDeduplicator`, `DeliveryPipeline`, `createCoordinatorDeps`. workflow-runner.ts: 4,955 → 2,800 lines (44%). 38 new unit tests for new abstractions. `ActiveSessionSet` replaces `SteerRegistry`+`AbortRegistry`.
|
|
1106
1129
|
|
|
1107
1130
|
### WorkRail engine / MCP features
|
|
1108
1131
|
|
|
@@ -1165,3 +1188,14 @@ The agent is expensive, inconsistent, and slow. Scripts are free, deterministic,
|
|
|
1165
1188
|
|
|
1166
1189
|
---
|
|
1167
1190
|
|
|
1191
|
+
### Worktree and branch lifecycle management
|
|
1192
|
+
|
|
1193
|
+
WorkTrain has no tooling to surface the state of worktrees and branches relative to main. Doing this manually today requires running git commands across every registered worktree, cross-referencing merged PR lists, and inspecting each branch's unique commits to determine if the work landed. Pain points observed in practice:
|
|
1194
|
+
|
|
1195
|
+
- Worktrees persist after their branch's PR is squash-merged -- no signal that they are safe to delete
|
|
1196
|
+
- No inventory of which branches have genuinely unmerged work vs. fully superseded content
|
|
1197
|
+
- Abandoned in-progress branches have no attached context about why they were abandoned or what state they were in
|
|
1198
|
+
- Daemon-spawned worktrees under `~/.workrail/worktrees/` are opaque -- no indication of which session created them or whether cleanup is safe
|
|
1199
|
+
|
|
1200
|
+
---
|
|
1201
|
+
|
package/package.json
CHANGED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "wr.research",
|
|
3
|
+
"name": "General-Purpose Research",
|
|
4
|
+
"description": "General-purpose agentic research workflow for any topic — technology evaluation, design decisions, codebase exploration, markets, frameworks, architectural patterns. Produces a BLUF-headed Research Brief with ranked findings, falsified priors, and explicit what-was-not-verified boundaries. Two scale modes: quick (~20 min) and deep (~2 hr).",
|
|
5
|
+
"about": "Use `wr.research` when you have an open-ended question and want a structured, evidence-grounded answer rather than a summary dump. The workflow is built for the everyday research moment: you're considering adopting a framework, you need to understand a technology before a design review, you're trying to figure out who's doing the most interesting work in some space, or you need to inform a real decision. It is *not* the right tool for a competitor battlecard (use `wr.competitive-analysis` instead) or for a documented bug investigation (use `wr.bug-investigation`).\n\n**What you get out**: a Research Brief that opens with the answer in 3–5 sentences (BLUF), then ranked findings with confidence bands, the contradictions found between sources, the priors of yours and the agent's that were *falsified* during the run, an explicit 'what we still don't know' section, and recommended next steps tied to specific unanswered sub-questions. The brief earns its conclusions: every load-bearing claim is multi-source verified; single-source claims are confined to the body with explicit [unconfirmed] markers; training-time agent priors are inadmissible to the BLUF.\n\n**How to get good results**: be specific at intake. The workflow asks what you already believe — give it concrete claims, not vague impressions, because falsifiable priors are what produces the 'oh, that is not actually true' moments that distinguish insight from summary. Pick `decision` mode if you have a real choice to make and a deadline; pick `understanding` mode if you are building a mental model. Pick `quick` (~20 min) for a directional answer, `deep` (~2 hr) when you would otherwise spend a day reading. There is one human gate after planning where you approve or edit the Source Map and sub-question decomposition; everything after that runs unattended.\n\n**Mode budgets** (quick / deep): subagent fan-out cap 5/10; per-subagent token budget 8k/25k; total depth-serial token budget 30k/120k; iteration cap 1/2; brief word budget 800/2500; source map cap 5/8.",
|
|
6
|
+
"examples": [
|
|
7
|
+
"Should we adopt Kotlin Multiplatform for our shared business logic?",
|
|
8
|
+
"How do modern Android apps typically handle navigation state in 2026?",
|
|
9
|
+
"Decide between FSRS, SM-2, and Anki default scheduler for a vocabulary app",
|
|
10
|
+
"Survey current published work on agent memory architectures"
|
|
11
|
+
],
|
|
12
|
+
"version": "1.0.0",
|
|
13
|
+
"validatedAgainstSpecVersion": 3,
|
|
14
|
+
"metricsProfile": "research",
|
|
15
|
+
"preconditions": [
|
|
16
|
+
"User can supply a research question they care about, including what they already believe, what good enough looks like, and if applicable the decision the research is meant to inform.",
|
|
17
|
+
"Environment has web_search, web_fetch, file read/write, and bash tools available.",
|
|
18
|
+
"Working directory is writable; the workflow creates ./research/<sessionId>/ for durable artifacts."
|
|
19
|
+
],
|
|
20
|
+
"metaGuidance": [
|
|
21
|
+
"Notes-first durability: notesMarkdown and context variables are the durable execution truth. Disk artifacts under ./research/<sessionId>/ are rich content backing the notes.",
|
|
22
|
+
"The main agent owns every merge, every confidence promotion, every narrative decision, every word-budget cut, and every emission gate. Subagents produce raw outputs; they never produce canonical answers.",
|
|
23
|
+
"Parallelize only breadth-regime collection in Phase 3. All synthesis (merge, gap-analysis, brief-writing, dissent integration) is serial main-agent work.",
|
|
24
|
+
"Confidence laundering is the highest-stakes failure: a finding's confidence band cannot exceed the lowest-tier claim it depends on. prior:unverified claims are inadmissible to BLUF and ranked findings, period.",
|
|
25
|
+
"Hard word budget at Phase 8 enforces anti-completionism. The agent must cut, not expand, to fit. Empty 'What we do not know' sections fail the validation gate.",
|
|
26
|
+
"Adversarial review (Phase 7) must run as a separate Executor with artifacts-only context — no chain-of-thought sharing. Pass only file paths and an explicit 'do not infer prior reasoning' clause.",
|
|
27
|
+
"The collection-gap loop (Phases 3-5) runs at most iterationCap times (1 quick / 2 deep). Encode the cap as a context variable check: refuse the loop continuation when iterationCount >= iterationCap."
|
|
28
|
+
],
|
|
29
|
+
"references": [
|
|
30
|
+
{
|
|
31
|
+
"id": "spec-bluf",
|
|
32
|
+
"title": "BLUF communication standard",
|
|
33
|
+
"source": "docs/reference/worktrain-daemon-invariants.md",
|
|
34
|
+
"purpose": "Reference for the brief's required opening structure: answer in 3-5 sentences, then evidence, then caveats.",
|
|
35
|
+
"authoritative": false
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"id": "spec-authoring",
|
|
39
|
+
"title": "WorkRail authoring principles",
|
|
40
|
+
"source": "docs/authoring-v2.md",
|
|
41
|
+
"purpose": "Authoring rules for step prompts, output contracts, and loop control patterns used throughout this workflow.",
|
|
42
|
+
"authoritative": true
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
"assessments": [
|
|
46
|
+
{
|
|
47
|
+
"id": "assessment-brief-quality",
|
|
48
|
+
"purpose": "Final Research Brief meets structural, confidence, and focus requirements before emission.",
|
|
49
|
+
"dimensions": [
|
|
50
|
+
{
|
|
51
|
+
"id": "structural_integrity",
|
|
52
|
+
"purpose": "All required sections present in canonical order, within size caps, with non-empty required sections.",
|
|
53
|
+
"levels": [
|
|
54
|
+
"low",
|
|
55
|
+
"high"
|
|
56
|
+
]
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"id": "confidence_integrity",
|
|
60
|
+
"purpose": "Every BLUF and ranked-finding claim is verified or inferred; no prior:unverified claims; confidence bands respect lowest-tier rule.",
|
|
61
|
+
"levels": [
|
|
62
|
+
"low",
|
|
63
|
+
"high"
|
|
64
|
+
]
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"id": "focus_integrity",
|
|
68
|
+
"purpose": "Brief stays within word budget, every claim ties to a sub-question, BLUF directly answers the intake question.",
|
|
69
|
+
"levels": [
|
|
70
|
+
"low",
|
|
71
|
+
"high"
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
}
|
|
76
|
+
],
|
|
77
|
+
"steps": [
|
|
78
|
+
{
|
|
79
|
+
"id": "phase-0-intake",
|
|
80
|
+
"title": "Phase 0: Intake",
|
|
81
|
+
"prompt": "Capture exactly what the user wants to know, what they already believe, and what 'good enough' looks like. Seed the Priors Ledger to distinguish verified evidence from training-time memory.\n\nConstraints: Do not start any web_search or web_fetch. Treat your own training-time knowledge with suspicion -- anything you already know about the topic is a prior, not a fact, until you fetch it in-session.\n\nProcedure:\n1. Create ./research/<sessionId>/. If research-log.md already exists, this is a resume -- read the log, find the last completed phase marker, and skip ahead.\n2. Initialize research-log.md with: '# Research Log -- <sessionId> -- <ISO timestamp>'\n3. Present the user with this seven-field intake form (all required):\n - Q1: Question (one sentence, no jargon)\n - Q2: Mode (decision | understanding). If decision: also capture decision deadline and decision owner.\n - Q3: What you already believe (free text; seeds the Priors Ledger)\n - Q4: What 'good enough' looks like (e.g., 'I can confidently choose between A and B')\n - Q5: Out-of-scope (explicit list)\n - Q6: Source preferences or exclusions (optional)\n - Q7: Run mode (quick | deep)\n4. Write intake.json.\n5. Initialize priors-ledger.json: extract atomic falsifiable claims from Q3 tagged `prior:unverified` source `user`. Then write your own atomic priors about the topic tagged `prior:unverified` source `agent`. Ask one focused follow-up if Q3 yields no atomic claims.\n6. Append '## Phase 0 complete' plus a one-line summary to research-log.md.\n\nOutput context keys: sessionId, workingDir, intakeQuestion, intakeMode (decision|understanding), mode (quick|deep), goodEnoughCriteria, outOfScope, priorsLedgerPath, intakeJsonPath, iterationCount (set to 0), iterationCap (1 if quick else 2).\n\nVerify: working directory and both JSON files exist; priors-ledger.json has at least one prior or an explicit 'no priors' note; research-log.md ends with Phase 0 marker."
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"id": "phase-1-plan",
|
|
85
|
+
"title": "Phase 1: Plan, Source Map, and Dependency Matrix",
|
|
86
|
+
"prompt": "Produce three artifacts that commit the run to a specific shape before any collection happens.\n\nConstraints: Light web_search to map the source landscape is permitted, but do NOT begin substantive collection. The regime is determined by a mechanical rule, not judgment. Source Map: max 5 entries (quick) / 8 (deep). Sub-questions: 3-8 entries.\n\nProcedure:\n1. Source Map (source-map.md): enumerate source types most relevant to THIS specific question. One-line rationale per entry tied to this question. Include critic/contrarian sources proactively if the topic has hype.\n2. Sub-question decomposition (dependency-matrix.json): split the intake question into 3-8 sub-questions whose conjunction would answer it. For each, list IDs of other sub-questions it depends on.\n3. Regime decision (mechanical rule):\n - All dependency lists empty -> regime = breadth (answers can be gathered in parallel)\n - Any sub-question has dependencies -> regime = depth_serial (produce topological ordering)\n4. Plan (plan.md): per sub-question: planned subagent task, source-map entries to prioritize, stop rule (min 3 fetches AND 2 consecutive zero-novelty fetches OR token budget hit), token budget (8k quick / 25k deep per subagent).\n5. Append '## Phase 1 complete' plus regime and sub-question count to research-log.md.\n\nOutput context keys: sourceMapPath, dependencyMatrixPath, planPath, regime (breadth | depth_serial), subQuestionCount, subagentCap (5 quick / 10 deep), perSubagentTokenBudget (8000 quick / 25000 deep).\n\nVerify: Source Map respects mode cap; sub-question count 3-8; regime set by mechanical rule; all three files exist."
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"id": "phase-2-confirm-plan",
|
|
90
|
+
"title": "Phase 2: Confirm plan with user (single human gate)",
|
|
91
|
+
"prompt": "Get user approval or edits on the Plan, Source Map, and Dependency Matrix before any collection tokens are spent. This is the workflow's only human gate.\n\nConstraints: Do not proceed without explicit user response. User edits to the Dependency Matrix can flip the regime; honor that. Show artifact content inline, not just file paths.\n\nProcedure:\n1. Present inline: the intake question, the Source Map, the sub-questions with dependencies and declared regime, and the per-sub-question Plan.\n2. Ask: 'Approve, edit, or abort?' with a one-sentence summary of each option.\n3. If edit: capture edits, revise artifacts on disk, present again. Repeat until approve or abort.\n4. If abort: append '## Phase 2: aborted by user' to research-log.md and end.\n5. If approve: append '## Phase 2 complete' to research-log.md.\n\nOutput context keys: planApproved (true/false), regime (possibly updated), aborted (true/false).\n\nVerify: User explicitly approved or aborted; if edits applied, artifacts on disk reflect them; if regime flipped, topological ordering updated.",
|
|
92
|
+
"requireConfirmation": true
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"id": "collection-gap-loop",
|
|
96
|
+
"title": "Collection and gap-analysis loop (bounded)",
|
|
97
|
+
"type": "loop",
|
|
98
|
+
"loop": {
|
|
99
|
+
"type": "while",
|
|
100
|
+
"conditionSource": {
|
|
101
|
+
"kind": "artifact_contract",
|
|
102
|
+
"contractRef": "wr.contracts.loop_control",
|
|
103
|
+
"loopId": "research_collection_loop"
|
|
104
|
+
},
|
|
105
|
+
"maxIterations": 3
|
|
106
|
+
},
|
|
107
|
+
"body": [
|
|
108
|
+
{
|
|
109
|
+
"id": "phase-3-collection",
|
|
110
|
+
"title": "Phase 3: Collection (regime-dependent)",
|
|
111
|
+
"prompt": "Execute collection in the declared regime. Produce per-pass claim files under ./research/<sessionId>/claims/.\n\nConstraints: Each claim must include: text, source_url, source_type, confidence (single-source|inferred -- NEVER verified, that is Phase 4's job), answers_subquestion (ID), fetched_at (ISO timestamp). Per-pass token budgets are hard caps. Do not mix regimes within a pass.\n\nIf regime = breadth:\n1. Spawn N WorkRail Executors in parallel (N = min(subQuestionCount, subagentCap)).\n2. Each Executor receives: its sub-question, Source Map, relevant priors-ledger.json slice. Tools: web_search, web_fetch, file:read, bash.\n3. Each Executor: stop after min 3 fetches AND 2 consecutive zero-novelty fetches OR token budget (perSubagentTokenBudget) OR 5-minute wall-clock. Write exactly one file: claims/pass-<iterationCount+1>-sub-<subQuestionId>.json. NEVER tag `verified`. Do NOT modify other files or spawn subagents.\n4. Wait for all Executors.\n\nIf regime = depth_serial:\n1. Walk topological ordering of sub-questions.\n2. For each sub-question: fetch sources; you may revise earlier claims in this same pass.\n3. Stop per-sub-question: min 3 fetches AND 2 consecutive zero-novelty OR budget hit OR 5-minute wall-clock.\n4. Write claims/pass-<iterationCount+1>-step-<i>-<subQuestionId>.json.\n5. Total token budget: 30000 (quick) / 120000 (deep).\n\nBoth regimes: append '## Phase 3 complete (pass <iterationCount+1>)' plus one-line summary per Executor/step to research-log.md.\n\nOutput context keys: claimsThisPass (int), claimFilesThisPass (paths array).\n\nVerify: Every Executor/step produced exactly one claims file; no subagent tagged `verified`; budgets respected."
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"id": "phase-4-merge",
|
|
115
|
+
"title": "Phase 4: Merge and corroborate",
|
|
116
|
+
"prompt": "Merge all claim files from Phase 3, apply the typed corroboration rule, and flag falsified priors. Main-agent only -- no delegation.\n\nCorroboration rule:\n- verified: >= 2 source URLs from distinct hostnames, not syndicated copies citing the same primary article.\n- single-source: 1 URL supports it.\n- inferred: derived across multiple fetched sources; record the derivation chain.\n\nPriors Ledger update:\n- prior:unverified contradicted by >= 1 verified or single-source claim -> tag falsified-pending-review.\n- prior:unverified corroborated by >= 1 verified claim -> tag corroborated.\n- Untouched priors remain prior:unverified and inadmissible to user-facing claims.\n\nProcedure:\n1. Read all claims/ files for current pass.\n2. Deduplicate by claim text + source URL.\n3. Apply corroboration rule to each unique claim.\n4. Cross-reference against priors-ledger.json; update tags.\n5. Write claims/merged-pass-<passNumber>.json.\n6. Update priors-ledger.json.\n7. Append '## Phase 4 complete (pass <passNumber>): <X> verified, <Y> single-source, <Z> inferred, <P> falsified-pending, <Q> corroborated' to research-log.md.\n\nOutput context keys: verifiedClaimCount, singleSourceClaimCount, inferredClaimCount, falsifiedPendingPriorCount, corroboratedPriorCount, mergedLedgerPath.\n\nVerify: merged-pass-<passNumber>.json exists with final confidence tags; no claim promoted to verified on a single source; priors-ledger.json updated."
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"id": "phase-5-gap-analysis",
|
|
120
|
+
"title": "Phase 5: Gap analysis and loop decision",
|
|
121
|
+
"prompt": "Identify which sub-questions are resolved, partial, or open. Decide whether to iterate or proceed to synthesis. Emit a wr.loop_control artifact to control the loop.\n\nClassification:\n- resolved: >= 2 verified claims OR (1 verified claim AND no contradicting evidence)\n- partial: only single-source or inferred claims, OR verified claims that contradict each other unresolvedly\n- open: no claims or only prior:unverified speculation\n\nIteration is justified ONLY IF: at least one partial/open sub-question is on the critical path to the deliverable AND iterationCount < iterationCap AND named gap differs from lastGapName (no-progress detector).\n\nProcedure:\n1. Read merged-pass-<passNumber>.json and dependency-matrix.json.\n2. Classify each sub-question. Write gap-analysis.md with three sections.\n3. Determine loop decision:\n - If iterationCount >= iterationCap: decision = stop.\n - If all resolved OR no critical-path gap: decision = stop.\n - If named gap = lastGapName: decision = stop (no-progress).\n - Else: decision = continue. Name the single highest-priority gap; update Plan with focused sub-question; increment iterationCount; set lastGapName.\n4. Append '## Phase 5 complete (pass <passNumber>): <decision>' to research-log.md.\n5. Emit the wr.loop_control artifact in complete_step artifacts: { kind: 'wr.loop_control', decision: <'continue'|'stop'> }\n\nOutput context keys: iterationCount (incremented if continuing), lastGapName, focusedSubQuestion (if continuing).\n\nVerify: Every sub-question classified; iteration decision respects cap and no-progress detector; wr.loop_control artifact emitted.",
|
|
122
|
+
"outputContract": {
|
|
123
|
+
"contractRef": "wr.contracts.loop_control"
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
]
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"id": "phase-6-synthesis",
|
|
130
|
+
"title": "Phase 6: Synthesis — structured findings before any prose",
|
|
131
|
+
"prompt": "Produce, in strict order, the structured contents of the Research Brief before any narrative prose. This discipline is what produces insight rather than summary.\n\nConstraints:\n- Restate the intake question verbatim at the top. Do not drift the question.\n- Structured sections first, prose last -- a summary cannot be produced from these artifacts, only synthesis can.\n- BLUF and ranked findings: only verified or inferred (with shown derivation) claims. prior:unverified inadmissible. single-source admissible in body only with [unconfirmed] marker.\n- Finding confidence band cannot exceed lowest-tier load-bearing claim.\n- Word budget: 800 (quick) / 2500 (deep) excluding appendices. Cut to fit.\n- Caps: <= 5 ranked findings, <= 3 recommended next steps.\n- 'What we do not know' must be non-empty and specific.\n\nProcedure:\n1. Restate intake question verbatim.\n2. Produce in exact order before any prose:\n a. Ranked findings (<= 5): confidence band H|M|L|U, strongest evidence for (cite), strongest evidence against (cite or 'no significant counter-evidence found').\n b. Contradictions: source pairs that disagree, both citations, one-line resolution or 'unresolved'.\n c. Falsified priors: every priors-ledger entry tagged falsified-pending-review, with the overturning claim. Promote to 'falsified' here.\n d. What we now know / What we still do not know: partition from Phase 5 sub-question statuses.\n e. Implications for the requestor's decision/goal: from intakeMode and goodEnoughCriteria.\n f. Recommended next steps (<= 3): each tied to a specific remaining unknown with estimated cost.\n3. Write BLUF (3-5 sentences answering the question directly). Then weave (a)-(f) into body prose. Stay under word budget.\n4. Save draft as brief.md.\n5. Append '## Phase 6 complete: <wordCount> words' to research-log.md.\n\nOutput context keys: briefPath, draftWordCount, rankedFindingsCount, falsifiedPriorCount, contradictionsCount.\n\nVerify: Intake question restated verbatim; structured sections exist before prose; no prior:unverified in BLUF or ranked findings; 'What we do not know' non-empty and specific; word count at or under budget."
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
"id": "phase-7-adversarial-review",
|
|
135
|
+
"title": "Phase 7: Adversarial review — isolated Executor, artifacts only",
|
|
136
|
+
"prompt": "Spawn a separate WorkRail Executor with artifacts-only context to produce the strongest argument that the BLUF and ranked-finding #1 are wrong. Structural isolation is the whole point.\n\nConstraints:\n- Executor receives ONLY: brief.md, claims/, priors-ledger.json, source-map.md file paths. NOT notesMarkdown from Phase 6, NOT any in-context narrative.\n- Executor has NO web tools. Its job is to challenge the existing evidence base, not extend it.\n- Executor writes only dissent.md.\n- 'Looks good' is not acceptable output.\n\nProcedure:\n1. Spawn one WorkRail Executor with file paths to brief.md, claims/ directory, priors-ledger.json, source-map.md. Tools: file:read only.\n2. Executor prompt (verbatim contract to include): 'Read brief.md, claims/, priors-ledger.json, and source-map.md. Produce the strongest written argument that the BLUF and ranked-finding #1 are wrong, using ONLY the evidence in these artifacts. If you cannot mount such an argument, identify the single weakest claim and explain in detail why -- citing evidence gaps, single-source dependencies, or unresolved contradictions. No significant dissent is acceptable ONLY IF you explicitly state the brief is unfalsifiable with available evidence and name what would change that. Write to dissent.md.'\n3. Wait for Executor. Read dissent.md.\n4. Append '## Phase 7 complete: dissent type = <substantive|weakest-claim|unfalsifiable>' to research-log.md.\n\nOutput context keys: dissentPath, dissentType (substantive|weakest-claim|unfalsifiable), dissentIdentifiesLoadBearingError (boolean).\n\nVerify: dissent.md exists and is non-trivial; Executor had file paths only; Executor had no web tools."
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"id": "phase-8-finalize",
|
|
140
|
+
"title": "Phase 8: Finalize, integrate dissent, validate, emit",
|
|
141
|
+
"prompt": "Integrate dissent into the brief, add the premortem, run the three-dimension validation gate, and emit the final brief.\n\nDissent integration:\n- If dissentIdentifiesLoadBearingError: revise brief to address it.\n- If dissent is legitimate weaker case: append verbatim under 'Dissent' section.\n- If unfalsifiable disclosure: include as 'No significant dissent reached threshold; reviewer noted: <quote>'.\n\nAdd premortem paragraph: 'If this brief turns out to be wrong six months from now, the most likely reason is ___'.\n\nAssemble final brief in canonical order: BLUF -> Ranked findings -> Contradictions -> Falsified priors -> What we know/do not know -> Implications -> Recommended next steps -> Dissent -> Premortem -> Evidence base [1]... -> Appendix A (Priors Ledger) -> Appendix B (Source Map) -> Appendix C (Dependency Matrix) -> Appendix D (Gap analysis log).\n\nRun validation gate (assessmentRefs: assessment-brief-quality). For each failed dimension:\n- structural_integrity low: fix section order, caps, or missing required sections (max 2 revise attempts).\n- confidence_integrity low: remove prior:unverified claims from BLUF/findings or add missing confidence bands (max 1 revise attempt).\n- focus_integrity low: cut to word budget, remove orphan claims, or fix question drift (max 1 revise attempt).\nAfter revise caps: set validationGateResult = fail and surface to user with named failed dimension.\n\nIf all pass: append '## Phase 8 complete: RESEARCH COMPLETE -- brief.md emitted' to research-log.md. Present the brief to the user.\n\nOutput context keys: validationGateResult (pass|fail), finalWordCount, finalBriefPath.\n\nVerify: Sections in canonical order; validation passed or failure named; premortem present; word count at or under budget; 'What we do not know' non-empty; no prior:unverified in BLUF or ranked findings.",
|
|
142
|
+
"assessmentRefs": [
|
|
143
|
+
"assessment-brief-quality"
|
|
144
|
+
],
|
|
145
|
+
"assessmentConsequences": [
|
|
146
|
+
{
|
|
147
|
+
"when": {
|
|
148
|
+
"anyEqualsLevel": "low"
|
|
149
|
+
},
|
|
150
|
+
"effect": {
|
|
151
|
+
"kind": "require_followup",
|
|
152
|
+
"guidance": "structural_integrity low: fix section order, caps, or missing required sections (BLUF 3-5 sentences, findings <=5, next-steps <=3, What-we-do-not-know non-empty, Dissent present, Premortem present). confidence_integrity low: remove prior:unverified claims from BLUF and ranked findings; ensure all findings have confidence bands; check no finding band exceeds its lowest-tier supporting claim. focus_integrity low: cut prose to word budget (800 quick / 2500 deep excluding appendices); tie all body claims to ..."
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
]
|
|
156
|
+
}
|
|
157
|
+
]
|
|
158
|
+
}
|