@exaudeus/workrail 3.59.6 → 3.59.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/index.d.ts +0 -1
- package/dist/cli/commands/index.js +1 -3
- package/dist/cli-worktrain.js +2 -51
- package/dist/console/standalone-console.js +1 -1
- package/dist/console-ui/assets/{index-xMwhHmR2.js → index-CtUfpXCx.js} +1 -1
- package/dist/console-ui/index.html +1 -1
- package/dist/daemon/workflow-runner.js +1 -1
- package/dist/manifest.json +21 -37
- package/dist/trigger/polling-scheduler.js +1 -1
- package/dist/v2/usecases/console-routes.d.ts +1 -3
- package/dist/v2/usecases/console-routes.js +28 -107
- package/docs/design/dispatch-dedup-prealloc-bypass-implementation-plan.md +2 -0
- package/docs/ideas/backlog.md +91 -0
- package/package.json +1 -1
- package/dist/cli/commands/worktrain-trigger-poll.d.ts +0 -23
- package/dist/cli/commands/worktrain-trigger-poll.js +0 -94
- package/dist/trigger/daemon-console.d.ts +0 -28
- package/dist/trigger/daemon-console.js +0 -120
package/dist/manifest.json
CHANGED
|
@@ -238,8 +238,8 @@
|
|
|
238
238
|
"bytes": 31
|
|
239
239
|
},
|
|
240
240
|
"cli-worktrain.js": {
|
|
241
|
-
"sha256": "
|
|
242
|
-
"bytes":
|
|
241
|
+
"sha256": "ffe47ef9595c11968952fe573e7ec11bf87813bd0197d24c173d7740479b8f5b",
|
|
242
|
+
"bytes": 58480
|
|
243
243
|
},
|
|
244
244
|
"cli.d.ts": {
|
|
245
245
|
"sha256": "43e818adf60173644896298637f47b01d5819b17eda46eaa32d0c7d64724d012",
|
|
@@ -258,12 +258,12 @@
|
|
|
258
258
|
"bytes": 745
|
|
259
259
|
},
|
|
260
260
|
"cli/commands/index.d.ts": {
|
|
261
|
-
"sha256": "
|
|
262
|
-
"bytes":
|
|
261
|
+
"sha256": "088a55b17f60d1b3f6eb289d51b4af75b44a7ef72b772a95b867321ebd3cb3ed",
|
|
262
|
+
"bytes": 2522
|
|
263
263
|
},
|
|
264
264
|
"cli/commands/index.js": {
|
|
265
|
-
"sha256": "
|
|
266
|
-
"bytes":
|
|
265
|
+
"sha256": "60eed410bbf8faae535cb5f27971159a943bbc480450108683623d79d1fe1ad9",
|
|
266
|
+
"bytes": 5818
|
|
267
267
|
},
|
|
268
268
|
"cli/commands/init.d.ts": {
|
|
269
269
|
"sha256": "b5f8b88a072c68509dab3938ba1d6b4a949ad32f8fc55e91c5039b8c77301c1b",
|
|
@@ -393,14 +393,6 @@
|
|
|
393
393
|
"sha256": "b0286fef461835a0b73070fd278e43af5f3a1fbebbe1c6de1fc39ace4075df8f",
|
|
394
394
|
"bytes": 1395
|
|
395
395
|
},
|
|
396
|
-
"cli/commands/worktrain-trigger-poll.d.ts": {
|
|
397
|
-
"sha256": "73cffd1f6a44d8b9ef35bd15492dabc32d3d6e907c52617c7f482fd5229aed93",
|
|
398
|
-
"bytes": 934
|
|
399
|
-
},
|
|
400
|
-
"cli/commands/worktrain-trigger-poll.js": {
|
|
401
|
-
"sha256": "aedf752565bbf6053bfdd79a7b1452b67e4c786e01a160aede9390b2313ccf91",
|
|
402
|
-
"bytes": 3920
|
|
403
|
-
},
|
|
404
396
|
"cli/commands/worktrain-trigger-test.d.ts": {
|
|
405
397
|
"sha256": "3b85edacabf0657b208892f13b8fb540f794f47f18b5a1263562d3518f7fce43",
|
|
406
398
|
"bytes": 1357
|
|
@@ -481,16 +473,16 @@
|
|
|
481
473
|
"sha256": "5fe866e54f796975dec5d8ba9983aefd86074db212d3fccd64eed04bc9f0b3da",
|
|
482
474
|
"bytes": 8011
|
|
483
475
|
},
|
|
476
|
+
"console-ui/assets/index-CtUfpXCx.js": {
|
|
477
|
+
"sha256": "a0106452fd0eb629474a299b2eb757cae92578a4c5ed792bd6a2daefb203645f",
|
|
478
|
+
"bytes": 760528
|
|
479
|
+
},
|
|
484
480
|
"console-ui/assets/index-DGj8EsFR.css": {
|
|
485
481
|
"sha256": "3bdb55ec0957928e0ebbb86a7d6b36d28f7ba7d5c0f3e236fd8f2e2aacee2fa4",
|
|
486
482
|
"bytes": 60631
|
|
487
483
|
},
|
|
488
|
-
"console-ui/assets/index-xMwhHmR2.js": {
|
|
489
|
-
"sha256": "9fbff5f59a5e014930778fd79a1de39f532b20862a4fa10d7320d12602b445fc",
|
|
490
|
-
"bytes": 760528
|
|
491
|
-
},
|
|
492
484
|
"console-ui/index.html": {
|
|
493
|
-
"sha256": "
|
|
485
|
+
"sha256": "5cb6ae1aa3a61a3c9eb6138d92f686d9880cb4822cc96c2399c9eb0c223ce67c",
|
|
494
486
|
"bytes": 417
|
|
495
487
|
},
|
|
496
488
|
"console/standalone-console.d.ts": {
|
|
@@ -498,8 +490,8 @@
|
|
|
498
490
|
"bytes": 788
|
|
499
491
|
},
|
|
500
492
|
"console/standalone-console.js": {
|
|
501
|
-
"sha256": "
|
|
502
|
-
"bytes":
|
|
493
|
+
"sha256": "bfe72ea5161d6d3af917180fad905abae05b9ae5cb329dadbe3d8acaed6aed9a",
|
|
494
|
+
"bytes": 6314
|
|
503
495
|
},
|
|
504
496
|
"context-assembly/deps.d.ts": {
|
|
505
497
|
"sha256": "d699ae8f8f081d92708eba5969e35cf24a45d3f86de72d308ffc4a542b954bc3",
|
|
@@ -650,8 +642,8 @@
|
|
|
650
642
|
"bytes": 7307
|
|
651
643
|
},
|
|
652
644
|
"daemon/workflow-runner.js": {
|
|
653
|
-
"sha256": "
|
|
654
|
-
"bytes":
|
|
645
|
+
"sha256": "0d4991c3589e75679d4035d506d84ebe595df8328c6e632d352597c9e23ad741",
|
|
646
|
+
"bytes": 95348
|
|
655
647
|
},
|
|
656
648
|
"di/container.d.ts": {
|
|
657
649
|
"sha256": "003bb7fb7478d627524b9b1e76bd0a963a243794a687ff233b96dc0e33a06d9f",
|
|
@@ -1661,14 +1653,6 @@
|
|
|
1661
1653
|
"sha256": "6728a2169f4007b9ea0414fade6b21500500d9c79d0b09296d92ef8bcabb9c79",
|
|
1662
1654
|
"bytes": 2763
|
|
1663
1655
|
},
|
|
1664
|
-
"trigger/daemon-console.d.ts": {
|
|
1665
|
-
"sha256": "a3b9a9f58482c6ea379c0e02c30f55a5820c7c37fa3fae55fc336cd518f35462",
|
|
1666
|
-
"bytes": 1162
|
|
1667
|
-
},
|
|
1668
|
-
"trigger/daemon-console.js": {
|
|
1669
|
-
"sha256": "f2f09c05e48b42ebf1c7be137fc6eced46673048471b7114434710b5691fe6f2",
|
|
1670
|
-
"bytes": 5497
|
|
1671
|
-
},
|
|
1672
1656
|
"trigger/delivery-action.d.ts": {
|
|
1673
1657
|
"sha256": "559e2b2645aa60528f73de351cd35ebf45c5b82f47797aa15ddd681319315d39",
|
|
1674
1658
|
"bytes": 1759
|
|
@@ -1722,8 +1706,8 @@
|
|
|
1722
1706
|
"bytes": 1126
|
|
1723
1707
|
},
|
|
1724
1708
|
"trigger/polling-scheduler.js": {
|
|
1725
|
-
"sha256": "
|
|
1726
|
-
"bytes":
|
|
1709
|
+
"sha256": "61b94e35aae2e9578a9e9cc32548791166b9ec98abb8f2cff58135fc6b3e5593",
|
|
1710
|
+
"bytes": 23945
|
|
1727
1711
|
},
|
|
1728
1712
|
"trigger/trigger-listener.d.ts": {
|
|
1729
1713
|
"sha256": "1eebb3d4829030b264c3798b0b0d55d7357d313ab83e3f344ad455eaafcedb44",
|
|
@@ -3070,12 +3054,12 @@
|
|
|
3070
3054
|
"bytes": 4795
|
|
3071
3055
|
},
|
|
3072
3056
|
"v2/usecases/console-routes.d.ts": {
|
|
3073
|
-
"sha256": "
|
|
3074
|
-
"bytes":
|
|
3057
|
+
"sha256": "bb8abe9b6510ab09ab9aec1af24c253bf16b538c18c5ac583caffe48ef347536",
|
|
3058
|
+
"bytes": 596
|
|
3075
3059
|
},
|
|
3076
3060
|
"v2/usecases/console-routes.js": {
|
|
3077
|
-
"sha256": "
|
|
3078
|
-
"bytes":
|
|
3061
|
+
"sha256": "80d7629410da6b991471497b209d85969192d1fe3601af9e643e030b93b9aa54",
|
|
3062
|
+
"bytes": 28556
|
|
3079
3063
|
},
|
|
3080
3064
|
"v2/usecases/console-service.d.ts": {
|
|
3081
3065
|
"sha256": "fc8fe65427fa9f4f3535344b385b36f66ca06b7e3bfaea708931817a3edcad2b",
|
|
@@ -475,7 +475,7 @@ function extractDotPath(obj, rawPath) {
|
|
|
475
475
|
async function countActiveSessions(sessionsDir) {
|
|
476
476
|
try {
|
|
477
477
|
const files = await fs.readdir(sessionsDir);
|
|
478
|
-
return files.filter((f) => f.endsWith('.json')).length;
|
|
478
|
+
return files.filter((f) => f.endsWith('.json') && !f.startsWith('queue-issue-')).length;
|
|
479
479
|
}
|
|
480
480
|
catch {
|
|
481
481
|
return 0;
|
|
@@ -2,7 +2,5 @@ import type { Application } from 'express';
|
|
|
2
2
|
import type { ConsoleService } from './console-service.js';
|
|
3
3
|
import type { WorkflowService } from '../../application/services/workflow-service.js';
|
|
4
4
|
import type { ToolCallTimingRingBuffer } from '../../mcp/tool-call-timing.js';
|
|
5
|
-
import type { TriggerRouter } from '../../trigger/trigger-router.js';
|
|
6
5
|
import type { V2ToolContext } from '../../mcp/types.js';
|
|
7
|
-
|
|
8
|
-
export declare function mountConsoleRoutes(app: Application, consoleService: ConsoleService, workflowService?: WorkflowService, timingRingBuffer?: ToolCallTimingRingBuffer, toolCallsPerfFile?: string, serverVersion?: string, v2ToolContext?: V2ToolContext, triggerRouter?: TriggerRouter, steerRegistry?: SteerRegistry, pollingScheduler?: import('../../trigger/polling-scheduler.js').PollingScheduler): () => void;
|
|
6
|
+
export declare function mountConsoleRoutes(app: Application, consoleService: ConsoleService, workflowService?: WorkflowService, timingRingBuffer?: ToolCallTimingRingBuffer, toolCallsPerfFile?: string, serverVersion?: string, v2ToolContext?: V2ToolContext): () => void;
|
|
@@ -91,7 +91,7 @@ function loadWorkflowTags() {
|
|
|
91
91
|
return { version: 0, tags: [], workflows: {} };
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
|
-
function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuffer, toolCallsPerfFile, serverVersion, v2ToolContext
|
|
94
|
+
function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuffer, toolCallsPerfFile, serverVersion, v2ToolContext) {
|
|
95
95
|
const sseClients = new Set();
|
|
96
96
|
let sseDebounceTimer = null;
|
|
97
97
|
function broadcastChange() {
|
|
@@ -429,13 +429,11 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
|
|
|
429
429
|
repoRootsExpiresAt = Date.now() + REPO_ROOTS_TTL_MS;
|
|
430
430
|
}
|
|
431
431
|
const repoRoots = cachedRepoRoots;
|
|
432
|
-
const
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
timeoutPromise,
|
|
438
|
-
]);
|
|
432
|
+
const worktreeWork = (0, worktree_service_js_1.getWorktreeList)(repoRoots, activeSessions)
|
|
433
|
+
.finally(() => { if (timeoutId !== null)
|
|
434
|
+
clearTimeout(timeoutId); })
|
|
435
|
+
.catch(() => ({ repos: [] }));
|
|
436
|
+
const data = await Promise.race([worktreeWork, timeoutPromise]);
|
|
439
437
|
if (timeoutId !== null)
|
|
440
438
|
clearTimeout(timeoutId);
|
|
441
439
|
res.json({ success: true, data });
|
|
@@ -532,7 +530,7 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
|
|
|
532
530
|
}
|
|
533
531
|
app.post('/api/v2/auto/dispatch', express_1.default.json(), async (req, res) => {
|
|
534
532
|
if (!v2ToolContext) {
|
|
535
|
-
res.status(503).json({ success: false, error: 'Autonomous dispatch requires
|
|
533
|
+
res.status(503).json({ success: false, error: 'Autonomous dispatch requires the WorkTrain daemon. Run worktrain console alongside worktrain daemon to enable browser dispatch.' });
|
|
536
534
|
return;
|
|
537
535
|
}
|
|
538
536
|
const body = req.body;
|
|
@@ -590,107 +588,30 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
|
|
|
590
588
|
sessionHandle = workflowId;
|
|
591
589
|
}
|
|
592
590
|
const trigger = { workflowId, goal, workspacePath, context, _preAllocatedStartResponse: startResponse };
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
else {
|
|
614
|
-
(0, assert_never_js_1.assertNever)(result);
|
|
615
|
-
}
|
|
616
|
-
});
|
|
617
|
-
}
|
|
591
|
+
void (0, workflow_runner_js_1.runWorkflow)(trigger, v2ToolContext, apiKey ?? '', undefined, undefined, undefined).then((result) => {
|
|
592
|
+
if (result._tag === 'success') {
|
|
593
|
+
console.log(`[ConsoleRoutes] Auto dispatch completed: workflowId=${workflowId} stopReason=${result.stopReason}`);
|
|
594
|
+
}
|
|
595
|
+
else if (result._tag === 'delivery_failed') {
|
|
596
|
+
console.log(`[ConsoleRoutes] Auto dispatch delivery failed: workflowId=${workflowId}`);
|
|
597
|
+
}
|
|
598
|
+
else if (result._tag === 'timeout') {
|
|
599
|
+
console.log(`[ConsoleRoutes] Auto dispatch timed out: workflowId=${workflowId}`);
|
|
600
|
+
}
|
|
601
|
+
else if (result._tag === 'error') {
|
|
602
|
+
console.log(`[ConsoleRoutes] Auto dispatch failed: workflowId=${workflowId} error=${result.message}`);
|
|
603
|
+
}
|
|
604
|
+
else if (result._tag === 'stuck') {
|
|
605
|
+
console.log(`[ConsoleRoutes] Auto dispatch stuck: workflowId=${workflowId} reason=${result.reason} message=${result.message}`);
|
|
606
|
+
}
|
|
607
|
+
else {
|
|
608
|
+
(0, assert_never_js_1.assertNever)(result);
|
|
609
|
+
}
|
|
610
|
+
});
|
|
618
611
|
res.json({ success: true, data: { status: 'dispatched', workflowId, sessionHandle } });
|
|
619
612
|
});
|
|
620
613
|
app.get('/api/v2/triggers', (_req, res) => {
|
|
621
|
-
|
|
622
|
-
res.json({ success: true, data: { triggers: [] } });
|
|
623
|
-
return;
|
|
624
|
-
}
|
|
625
|
-
const triggers = triggerRouter.listTriggers().map((t) => ({
|
|
626
|
-
id: t.id,
|
|
627
|
-
provider: t.provider,
|
|
628
|
-
workflowId: t.workflowId,
|
|
629
|
-
workspacePath: t.workspacePath,
|
|
630
|
-
goal: t.goal,
|
|
631
|
-
}));
|
|
632
|
-
res.json({ success: true, data: { triggers } });
|
|
633
|
-
});
|
|
634
|
-
app.post('/api/v2/triggers/:triggerId/poll', async (req, res) => {
|
|
635
|
-
if (!pollingScheduler) {
|
|
636
|
-
res.status(503).json({ success: false, error: 'Force poll not available (not a daemon context).' });
|
|
637
|
-
return;
|
|
638
|
-
}
|
|
639
|
-
const triggerId = req.params['triggerId'] ?? '';
|
|
640
|
-
if (!triggerId) {
|
|
641
|
-
res.status(400).json({ success: false, error: 'Missing triggerId' });
|
|
642
|
-
return;
|
|
643
|
-
}
|
|
644
|
-
const result = await pollingScheduler.forcePoll(triggerId);
|
|
645
|
-
switch (result.kind) {
|
|
646
|
-
case 'ok':
|
|
647
|
-
res.json({
|
|
648
|
-
success: true,
|
|
649
|
-
data: {
|
|
650
|
-
triggerId,
|
|
651
|
-
cycleRan: result.cycleRan,
|
|
652
|
-
message: result.cycleRan
|
|
653
|
-
? `Poll cycle started for trigger '${triggerId}'.`
|
|
654
|
-
: `Poll cycle skipped for trigger '${triggerId}' -- a previous cycle is still running.`,
|
|
655
|
-
},
|
|
656
|
-
});
|
|
657
|
-
return;
|
|
658
|
-
case 'not_found':
|
|
659
|
-
res.status(400).json({ success: false, error: `Trigger '${triggerId}' not found` });
|
|
660
|
-
return;
|
|
661
|
-
case 'wrong_provider':
|
|
662
|
-
res.status(400).json({
|
|
663
|
-
success: false,
|
|
664
|
-
error: `Trigger '${triggerId}' is not a queue poll trigger (provider: ${result.provider})`,
|
|
665
|
-
});
|
|
666
|
-
return;
|
|
667
|
-
default: {
|
|
668
|
-
const _exhaustive = result;
|
|
669
|
-
res.status(500).json({ success: false, error: 'Unexpected forcePoll result' });
|
|
670
|
-
void _exhaustive;
|
|
671
|
-
return;
|
|
672
|
-
}
|
|
673
|
-
}
|
|
674
|
-
});
|
|
675
|
-
app.post('/api/v2/sessions/:sessionId/steer', express_1.default.json(), (req, res) => {
|
|
676
|
-
if (!steerRegistry) {
|
|
677
|
-
res.status(503).json({ success: false, error: 'Steer not available (not a daemon context).' });
|
|
678
|
-
return;
|
|
679
|
-
}
|
|
680
|
-
const { sessionId } = req.params;
|
|
681
|
-
const body = req.body;
|
|
682
|
-
const text = typeof body.text === 'string' ? body.text.trim() : '';
|
|
683
|
-
if (!text) {
|
|
684
|
-
res.status(400).json({ success: false, error: 'text is required and must be a non-empty string.' });
|
|
685
|
-
return;
|
|
686
|
-
}
|
|
687
|
-
const callback = steerRegistry.get(sessionId);
|
|
688
|
-
if (!callback) {
|
|
689
|
-
res.status(404).json({ success: false, error: 'Session not found or not a daemon session.' });
|
|
690
|
-
return;
|
|
691
|
-
}
|
|
692
|
-
callback(text);
|
|
693
|
-
res.json({ success: true });
|
|
614
|
+
res.json({ success: true, data: { triggers: [] } });
|
|
694
615
|
});
|
|
695
616
|
const consoleDist = resolveConsoleDist();
|
|
696
617
|
if (consoleDist) {
|
package/docs/ideas/backlog.md
CHANGED
|
@@ -7486,3 +7486,94 @@ Medium for the cleanup command (quality of life, stops log noise). High for star
|
|
|
7486
7486
|
**Files:** `src/coordinators/modes/implement-shared.ts`, `src/coordinators/pr-review.ts`.
|
|
7487
7487
|
|
|
7488
7488
|
**Priority:** Medium. Correctness issues that won't crash in production but make future refactors unsafe.
|
|
7489
|
+
|
|
7490
|
+
---
|
|
7491
|
+
|
|
7492
|
+
## Current state update (Apr 21, 2026)
|
|
7493
|
+
|
|
7494
|
+
**npm version: v3.59.6** | Daemon PID: 54113 | Status: Running, pipeline active
|
|
7495
|
+
|
|
7496
|
+
### What shipped in this session (Apr 19-21, 2026)
|
|
7497
|
+
|
|
7498
|
+
**All five autonomous pipeline items (previously recorded) plus:**
|
|
7499
|
+
|
|
7500
|
+
- ✅ **Discovery loop fix** (#748) -- three coupled fixes: thread `maxSessionMinutes` through `spawnSession` (sessions now get 55/35/65 min instead of 30 min default), inspect `PipelineOutcome` in polling-scheduler and apply `worktrain:in-progress` label on escalation, write issue-ownership sidecar for cross-restart idempotency
|
|
7501
|
+
- ✅ **In-process `awaitSessions` and `getAgentResult`** (#741) -- replaced HTTP calls to the daemon's own console with direct `ConsoleService` access
|
|
7502
|
+
- ✅ **Try/catch on all coordinator I/O** (#740) -- `getAgentResult`, `pollForPR`, `postToOutbox` all wrapped; coordinator no longer crashes on I/O failure
|
|
7503
|
+
- ✅ **Dispatch dedup prealloc bypass** (#744) -- `dispatch()` now bypasses dedup for pre-allocated sessions, fixing the zombie session bug that prevented discovery from starting
|
|
7504
|
+
- ✅ **Promise.race crash fix** (#733) -- worktrees scan timeout no longer crashes the daemon via unhandled rejection
|
|
7505
|
+
- ✅ **Trigger validator** (#690) -- `worktrain trigger validate` command, `validateTriggerStrict()` pure function
|
|
7506
|
+
- ✅ **`worktrain trigger poll`** (#697) -- force immediate poll cycle on any queue trigger
|
|
7507
|
+
- ✅ **`worktrain trigger test`** (#656) -- dry-run showing what would dispatch
|
|
7508
|
+
- ✅ **Auto-load ~/.workrail/.env** (#673) -- daemon reads secrets from .env automatically
|
|
7509
|
+
- ✅ **Daemon lifecycle events** (#674) -- `session_aborted` on SIGTERM, `daemon_heartbeat` every 30s
|
|
7510
|
+
- ✅ **Attribution signals** (#658) -- `[WT]` PR title prefix, `Co-authored-by: WorkTrain` commit trailers, `worktrain:generated` label
|
|
7511
|
+
- ✅ **Secret scan before push** (#660) -- pattern-based scan blocks commits with leaked credentials
|
|
7512
|
+
- ✅ **Unified logs stream** (#680) -- `worktrain logs` now merges daemon events, queue-poll.jsonl, and filtered stderr
|
|
7513
|
+
- ✅ **Stale lock file handling** (#705) -- validates lock file PID before trusting port discovery
|
|
7514
|
+
- ✅ **5 architectural audits** (docs/design/) -- coordinator access, error handling, testability, type bloat, memory management
|
|
7515
|
+
- ✅ **Stale user workflow cleanup** -- removed old copies from `~/.workrail/workflows/` that were causing ValidationError noise
|
|
7516
|
+
|
|
7517
|
+
### Current pipeline state (live)
|
|
7518
|
+
|
|
7519
|
+
Discovery session `ecf359d7` running: 77 turns, 11 step advances (active, making real progress on issue #393). Session `b7df0c8b` also running (just started). First clean run after all pipeline fixes landed.
|
|
7520
|
+
|
|
7521
|
+
### Accurate limitations (v3.59.6)
|
|
7522
|
+
|
|
7523
|
+
1. **Ghost sessions in event log** -- sessions killed by daemon crashes don't get `session_aborted` events from old daemon instances. New daemons emit it on shutdown, but historical sessions show as RUNNING.
|
|
7524
|
+
2. **Worktree orphan leak** -- if `maybeRunDelivery()` worktree removal fails after sidecar deletion, orphan is invisible to `runStartupRecovery`. See backlog.
|
|
7525
|
+
3. **`queue-poll.jsonl` never rotated** -- disk exhaustion risk on long-running daemons. See backlog.
|
|
7526
|
+
4. **`ReviewSeverity` missing `assertNever`** -- future variants silently fall through. See backlog.
|
|
7527
|
+
5. **`process.stderr.write` in `readVerdictArtifact`** -- bypasses injected dep, invisible to test fakes. See backlog.
|
|
7528
|
+
6. **WorkRail MCP stale state** -- `workrail cleanup` command doesn't exist yet. Manual cleanup needed for dead managed sources, old session accumulation.
|
|
7529
|
+
7. **Trigger validation static/runtime gap** -- some runtime checks not in static validator. See trigger-validation-gap-audit.md.
|
|
7530
|
+
8. **WorkflowTrigger type bloat** -- mixes trigger config, session runtime state, delivery config. See workflow-trigger-lifecycle-audit.md.
|
|
7531
|
+
9. **Conversation history not persisted** -- LLM conversation history is in-memory only. On crash, context is lost. See backlog.
|
|
7532
|
+
|
|
7533
|
+
### Next priorities (groomed Apr 21)
|
|
7534
|
+
|
|
7535
|
+
1. **Watch the current pipeline run** -- discovery `ecf359d7` is active at 77 turns/11 steps. If it completes, shaping and coding should fire automatically. First end-to-end validation.
|
|
7536
|
+
2. **Execution time tracking** -- add session timing to `execution-stats.jsonl` for timeout calibration. Small change in `runWorkflow()` finally block.
|
|
7537
|
+
3. **Three audit findings from above** -- worktree orphan leak, queue-poll rotation, assertNever fixes. All small, targeted.
|
|
7538
|
+
4. **`workrail cleanup` command** -- removes dead managed sources, rotates old session files, clears stale git caches. Stops ValidationError noise in MCP server logs.
|
|
7539
|
+
5. **Conversation history persistence** -- `conversation.jsonl` per session, append-only. Prerequisite for true crash recovery.
|
|
7540
|
+
6. **Autonomous crash recovery and interrupted-session resume** -- see full entry below (Apr 21).
|
|
7541
|
+
|
|
7542
|
+
---
|
|
7543
|
+
|
|
7544
|
+
## Autonomous crash recovery and interrupted-session resume (Apr 21, 2026)
|
|
7545
|
+
|
|
7546
|
+
**The problem we hit today:** A daemon crash loop (console `worktrees scan` unhandled rejection) killed all in-flight sessions. The queue correctly detected the sidecar and skipped re-dispatch for 56 min (TTL), but when the sidecar expired the session was re-dispatched from scratch with zero context from the previous attempt. The agent had already spent ~10 min in Phase 0, read codebase files, and formed a plan -- all of that work was lost.
|
|
7547
|
+
|
|
7548
|
+
**What we want:** WorkTrain should be able to detect orphaned sessions on startup and make an autonomous decision: resume if the session had meaningful progress, discard and re-dispatch from scratch if it was too early to be worth resuming.
|
|
7549
|
+
|
|
7550
|
+
**Resumability decision criteria (heuristics):**
|
|
7551
|
+
- Session had >= 1 `continue_workflow` call (at least one step advance): worth resuming -- the agent made real progress.
|
|
7552
|
+
- Session is at step 0 with 0 advances but > 5 LLM turns: borderline -- context was accumulated but no checkpoint written. Resume is risky (stale context), discard is safer. Could surface to console for human decision.
|
|
7553
|
+
- Session is at step 0, < 5 turns, < 2 min: discard -- nothing was lost.
|
|
7554
|
+
- Session's worktree is missing or corrupted: discard -- can't resume cleanly.
|
|
7555
|
+
- Session is on a coding workflow and has uncommitted changes in the worktree: pause for human review before discarding (could have partial work).
|
|
7556
|
+
|
|
7557
|
+
**Implementation sketch:**
|
|
7558
|
+
|
|
7559
|
+
1. **On daemon startup**, `runStartupRecovery()` already scans `daemon-sessions/` for orphaned token files. Extend it to also inspect the session event log for each orphan:
|
|
7560
|
+
- Count `continue_workflow` calls and LLM turns from `~/.workrail/events/<sessionId>.jsonl`
|
|
7561
|
+
- Apply decision criteria above
|
|
7562
|
+
- For resume candidates: call `continue_workflow` with the checkpoint token and a synthesized re-entry prompt: "You are resuming a session that was interrupted by a daemon crash. Your last known step was [stepLabel]. Continue from where you left off."
|
|
7563
|
+
- For discard candidates: emit `session_aborted` event, delete the sidecar, re-add the issue to the queue (or just let the TTL expire and the queue re-select naturally)
|
|
7564
|
+
|
|
7565
|
+
2. **Conversation history prerequisite**: Resume is only useful if the agent can reconstruct its context. Today, conversation history is in-memory only -- it is lost on crash. The `conversation.jsonl` per-session persistence (backlog item #5 above) is a prerequisite for high-quality resume. Without it, resume starts from the workflow system prompt plus the current step recap only. This is enough for mid-pipeline phases (shaping, coding) since they read artifacts from disk. It may be insufficient for early discovery phases.
|
|
7566
|
+
|
|
7567
|
+
3. **`worktrain session resume <sessionId>` CLI** -- manual override for human-initiated resume. Useful when the daemon's automatic heuristic chose to discard but the user sees partial work worth keeping.
|
|
7568
|
+
|
|
7569
|
+
4. **Queue sidecar TTL for resume vs. discard**: Today the sidecar TTL prevents re-dispatch during the entire pipeline window (56 min). With autonomous resume, the TTL for a discarded session should be much shorter (5 min) so the queue can quickly re-select. For a resumed session, keep the full TTL and extend it by the time already spent.
|
|
7570
|
+
|
|
7571
|
+
**Files to change:**
|
|
7572
|
+
- `src/daemon/workflow-runner.ts` -- `runStartupRecovery()`: add event log inspection and conditional resume
|
|
7573
|
+
- `src/trigger/polling-scheduler.ts` -- `doPollGitHubQueue()`: accept a `ttlOverride` param so discard path uses short TTL
|
|
7574
|
+
- `src/trigger/adapters/github-queue-poller.ts` -- `checkIdempotency()`: handle expired sidecars with `ttlOverride`
|
|
7575
|
+
- New: `src/daemon/session-recovery-policy.ts` -- pure function `evaluateRecovery(orphan, eventLog) -> 'resume' | 'discard' | 'human_review'`
|
|
7576
|
+
|
|
7577
|
+
**Priority:** High. Every daemon crash currently wastes all in-flight work and waits up to 56 min before retrying. With even basic resume (step > 0 → resume, step = 0 → discard + fast re-dispatch), we'd recover most of the lost work and reduce retry latency from 56 min to < 5 min.
|
|
7578
|
+
|
|
7579
|
+
**Depends on:** Conversation history persistence (for high-quality resume context).
|
package/package.json
CHANGED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import type { CliResult } from '../types/cli-result.js';
|
|
2
|
-
export interface WorktrainTriggerPollDeps {
|
|
3
|
-
readonly fetch: (url: string, opts: {
|
|
4
|
-
method: string;
|
|
5
|
-
signal?: AbortSignal;
|
|
6
|
-
}) => Promise<{
|
|
7
|
-
readonly ok: boolean;
|
|
8
|
-
readonly status: number;
|
|
9
|
-
readonly json: () => Promise<unknown>;
|
|
10
|
-
}>;
|
|
11
|
-
readonly readFile: (path: string) => Promise<string>;
|
|
12
|
-
readonly deleteFile: (path: string) => Promise<void>;
|
|
13
|
-
readonly isPidAlive: (pid: number) => boolean;
|
|
14
|
-
readonly print: (line: string) => void;
|
|
15
|
-
readonly stderr: (line: string) => void;
|
|
16
|
-
readonly homedir: () => string;
|
|
17
|
-
readonly joinPath: (...paths: string[]) => string;
|
|
18
|
-
}
|
|
19
|
-
export interface WorktrainTriggerPollOpts {
|
|
20
|
-
readonly triggerId: string;
|
|
21
|
-
readonly port?: number;
|
|
22
|
-
}
|
|
23
|
-
export declare function executeWorktrainTriggerPollCommand(deps: WorktrainTriggerPollDeps, opts: WorktrainTriggerPollOpts): Promise<CliResult>;
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.executeWorktrainTriggerPollCommand = executeWorktrainTriggerPollCommand;
|
|
4
|
-
const cli_result_js_1 = require("../types/cli-result.js");
|
|
5
|
-
const DEFAULT_POLL_PORT = 3200;
|
|
6
|
-
const LOCK_FILE_NAMES = ['daemon-console.lock', 'dashboard.lock'];
|
|
7
|
-
async function discoverConsolePort(deps, portOverride) {
|
|
8
|
-
if (portOverride !== undefined && portOverride > 0) {
|
|
9
|
-
return portOverride;
|
|
10
|
-
}
|
|
11
|
-
let staleLockPath;
|
|
12
|
-
for (const lockFileName of LOCK_FILE_NAMES) {
|
|
13
|
-
const lockPath = deps.joinPath(deps.homedir(), '.workrail', lockFileName);
|
|
14
|
-
try {
|
|
15
|
-
const raw = await deps.readFile(lockPath);
|
|
16
|
-
const parsed = JSON.parse(raw);
|
|
17
|
-
if (typeof parsed.pid === 'number' && parsed.pid > 0) {
|
|
18
|
-
if (!deps.isPidAlive(parsed.pid)) {
|
|
19
|
-
deps.stderr(`[Poll] ${lockFileName} points to dead PID ${parsed.pid} -- skipping stale lock, falling back to port ${DEFAULT_POLL_PORT}`);
|
|
20
|
-
staleLockPath = lockPath;
|
|
21
|
-
continue;
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
if (typeof parsed.port === 'number' && parsed.port > 0) {
|
|
25
|
-
return parsed.port;
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
catch {
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
if (staleLockPath !== undefined) {
|
|
32
|
-
try {
|
|
33
|
-
await deps.deleteFile(staleLockPath);
|
|
34
|
-
}
|
|
35
|
-
catch {
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
return DEFAULT_POLL_PORT;
|
|
39
|
-
}
|
|
40
|
-
async function executeWorktrainTriggerPollCommand(deps, opts) {
|
|
41
|
-
const triggerId = opts.triggerId.trim();
|
|
42
|
-
if (!triggerId) {
|
|
43
|
-
deps.stderr('[Poll] Error: triggerId must not be empty.');
|
|
44
|
-
return (0, cli_result_js_1.failure)('triggerId must not be empty.');
|
|
45
|
-
}
|
|
46
|
-
const port = await discoverConsolePort(deps, opts.port);
|
|
47
|
-
const url = `http://127.0.0.1:${port}/api/v2/triggers/${encodeURIComponent(triggerId)}/poll`;
|
|
48
|
-
deps.print(`[Poll] Forcing immediate poll cycle for trigger: ${triggerId}`);
|
|
49
|
-
let responseBody;
|
|
50
|
-
try {
|
|
51
|
-
const response = await deps.fetch(url, {
|
|
52
|
-
method: 'POST',
|
|
53
|
-
signal: AbortSignal.timeout(30000),
|
|
54
|
-
});
|
|
55
|
-
responseBody = await response.json();
|
|
56
|
-
if (!response.ok) {
|
|
57
|
-
const errMsg = typeof responseBody['error'] === 'string'
|
|
58
|
-
? responseBody['error']
|
|
59
|
-
: `HTTP ${response.status}`;
|
|
60
|
-
deps.stderr(`[Poll] Error: ${errMsg}`);
|
|
61
|
-
return (0, cli_result_js_1.failure)(errMsg);
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
catch (e) {
|
|
65
|
-
const msg = e instanceof Error ? e.message : String(e);
|
|
66
|
-
const isConnRefused = msg.includes('ECONNREFUSED') || msg.includes('fetch failed');
|
|
67
|
-
const isTimeout = e instanceof Error && e.name === 'TimeoutError';
|
|
68
|
-
if (isConnRefused) {
|
|
69
|
-
deps.stderr(`[Poll] Error: Could not connect to WorkTrain daemon on port ${port}. ` +
|
|
70
|
-
`Ensure the daemon is running with: worktrain daemon`);
|
|
71
|
-
return (0, cli_result_js_1.failure)(`Could not connect to daemon on port ${port}`);
|
|
72
|
-
}
|
|
73
|
-
if (isTimeout) {
|
|
74
|
-
deps.stderr(`[Poll] Error: Request timed out after 30s. The poll cycle may still be running.`);
|
|
75
|
-
return (0, cli_result_js_1.failure)('Request timed out after 30s');
|
|
76
|
-
}
|
|
77
|
-
deps.stderr(`[Poll] Error: ${msg}`);
|
|
78
|
-
return (0, cli_result_js_1.failure)(msg);
|
|
79
|
-
}
|
|
80
|
-
const body = responseBody;
|
|
81
|
-
const data = body['data'];
|
|
82
|
-
if (data !== undefined) {
|
|
83
|
-
const cycleRan = data['cycleRan'];
|
|
84
|
-
const message = typeof data['message'] === 'string' ? data['message'] : '';
|
|
85
|
-
if (cycleRan === true) {
|
|
86
|
-
deps.print(`[Poll] ${message || 'Poll cycle started.'}`);
|
|
87
|
-
}
|
|
88
|
-
else {
|
|
89
|
-
deps.print(`[Poll] ${message || 'Poll cycle skipped (previous cycle still running).'}`);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
deps.print('[Poll] Done.');
|
|
93
|
-
return (0, cli_result_js_1.success)();
|
|
94
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import 'reflect-metadata';
|
|
2
|
-
import type { V2ToolContext } from '../mcp/types.js';
|
|
3
|
-
import type { TriggerRouter } from './trigger-router.js';
|
|
4
|
-
import type { PollingScheduler } from './polling-scheduler.js';
|
|
5
|
-
import type { WorkflowService } from '../application/services/workflow-service.js';
|
|
6
|
-
import type { SteerRegistry } from '../daemon/workflow-runner.js';
|
|
7
|
-
import type { Result } from '../runtime/result.js';
|
|
8
|
-
export interface DaemonConsoleHandle {
|
|
9
|
-
readonly port: number;
|
|
10
|
-
stop(): Promise<void>;
|
|
11
|
-
}
|
|
12
|
-
export type DaemonConsoleError = {
|
|
13
|
-
readonly kind: 'port_conflict';
|
|
14
|
-
readonly port: number;
|
|
15
|
-
} | {
|
|
16
|
-
readonly kind: 'io_error';
|
|
17
|
-
readonly message: string;
|
|
18
|
-
};
|
|
19
|
-
export interface StartDaemonConsoleOptions {
|
|
20
|
-
readonly port?: number;
|
|
21
|
-
readonly triggerRouter?: TriggerRouter;
|
|
22
|
-
readonly serverVersion?: string;
|
|
23
|
-
readonly workflowService?: WorkflowService;
|
|
24
|
-
readonly lockFilePath?: string;
|
|
25
|
-
readonly steerRegistry?: SteerRegistry;
|
|
26
|
-
readonly pollingScheduler?: PollingScheduler;
|
|
27
|
-
}
|
|
28
|
-
export declare function startDaemonConsole(ctx: V2ToolContext, options?: StartDaemonConsoleOptions): Promise<Result<DaemonConsoleHandle, DaemonConsoleError>>;
|