@blockrun/franklin 3.9.6 → 3.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/context.js +1 -1
- package/dist/commands/task.d.ts +11 -0
- package/dist/commands/task.js +134 -0
- package/dist/index.js +16 -0
- package/dist/panel/html.js +492 -21
- package/dist/panel/server.js +127 -0
- package/dist/tasks/lost-detection.d.ts +15 -0
- package/dist/tasks/lost-detection.js +51 -0
- package/dist/tasks/paths.d.ts +12 -0
- package/dist/tasks/paths.js +32 -0
- package/dist/tasks/runner.d.ts +21 -0
- package/dist/tasks/runner.js +191 -0
- package/dist/tasks/spawn.d.ts +26 -0
- package/dist/tasks/spawn.js +72 -0
- package/dist/tasks/store.d.ts +24 -0
- package/dist/tasks/store.js +124 -0
- package/dist/tasks/types.d.ts +32 -0
- package/dist/tasks/types.js +14 -0
- package/dist/tools/detach.d.ts +9 -0
- package/dist/tools/detach.js +53 -0
- package/dist/tools/index.d.ts +2 -1
- package/dist/tools/index.js +3 -1
- package/dist/tools/tool-categories.js +4 -0
- package/package.json +1 -1
package/dist/panel/server.js
CHANGED
|
@@ -15,6 +15,10 @@ import { loadLearnings } from '../learnings/store.js';
|
|
|
15
15
|
import { readAudit } from '../stats/audit.js';
|
|
16
16
|
import { snapshot as marketsSnapshot } from '../trading/providers/telemetry.js';
|
|
17
17
|
import { describeWiring } from '../trading/providers/registry.js';
|
|
18
|
+
import { listTasks, readTaskMeta, readTaskEvents, } from '../tasks/store.js';
|
|
19
|
+
import { reconcileLostTasks } from '../tasks/lost-detection.js';
|
|
20
|
+
import { taskLogPath } from '../tasks/paths.js';
|
|
21
|
+
import { isTerminalTaskStatus } from '../tasks/types.js';
|
|
18
22
|
import { getHTML } from './html.js';
|
|
19
23
|
const sseClients = new Set();
|
|
20
24
|
function json(res, data, status = 200) {
|
|
@@ -380,6 +384,129 @@ export function createPanelServer(port) {
|
|
|
380
384
|
json(res, learnings);
|
|
381
385
|
return;
|
|
382
386
|
}
|
|
387
|
+
// ─── Tasks ─────────────────────────────────────────────────────────
|
|
388
|
+
// Background tasks dispatched via the Detach tool / `franklin task`.
|
|
389
|
+
// The list endpoint reconciles lost tasks (dead pids) before snapshot
|
|
390
|
+
// so the UI never displays a zombie as "running". Detail / log /
|
|
391
|
+
// events endpoints power the per-task drawer in the Tasks tab.
|
|
392
|
+
if (p === '/api/tasks') {
|
|
393
|
+
try {
|
|
394
|
+
reconcileLostTasks();
|
|
395
|
+
}
|
|
396
|
+
catch { /* best-effort */ }
|
|
397
|
+
json(res, { tasks: listTasks() });
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
if (p.startsWith('/api/tasks/')) {
|
|
401
|
+
const rest = p.slice('/api/tasks/'.length);
|
|
402
|
+
const segments = rest.split('/');
|
|
403
|
+
const runId = decodeURIComponent(segments[0] || '');
|
|
404
|
+
const sub = segments[1];
|
|
405
|
+
if (!runId) {
|
|
406
|
+
res.writeHead(404);
|
|
407
|
+
res.end('Not found');
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
// GET /api/tasks/:runId
|
|
411
|
+
if (!sub) {
|
|
412
|
+
const meta = readTaskMeta(runId);
|
|
413
|
+
if (!meta) {
|
|
414
|
+
res.writeHead(404);
|
|
415
|
+
res.end('Not found');
|
|
416
|
+
return;
|
|
417
|
+
}
|
|
418
|
+
json(res, meta);
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
// GET /api/tasks/:runId/log — supports Range: bytes=N- for tail polling.
|
|
422
|
+
// Brand-new tasks may not have created log.txt yet — return empty 200
|
|
423
|
+
// rather than 404 so the panel UI's tail loop doesn't surface noise.
|
|
424
|
+
if (sub === 'log') {
|
|
425
|
+
const logPath = taskLogPath(runId);
|
|
426
|
+
let content;
|
|
427
|
+
try {
|
|
428
|
+
content = fs.readFileSync(logPath);
|
|
429
|
+
}
|
|
430
|
+
catch (err) {
|
|
431
|
+
if (err.code === 'ENOENT') {
|
|
432
|
+
res.writeHead(200, {
|
|
433
|
+
'Content-Type': 'text/plain; charset=utf-8',
|
|
434
|
+
'Cache-Control': 'no-store',
|
|
435
|
+
});
|
|
436
|
+
res.end('');
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
throw err;
|
|
440
|
+
}
|
|
441
|
+
const total = content.length;
|
|
442
|
+
const range = req.headers['range'];
|
|
443
|
+
if (typeof range === 'string') {
|
|
444
|
+
const m = range.match(/^bytes=(\d+)-$/);
|
|
445
|
+
if (m) {
|
|
446
|
+
const start = Math.min(parseInt(m[1], 10), total);
|
|
447
|
+
const slice = content.subarray(start);
|
|
448
|
+
res.writeHead(206, {
|
|
449
|
+
'Content-Type': 'text/plain; charset=utf-8',
|
|
450
|
+
'Cache-Control': 'no-store',
|
|
451
|
+
'Content-Range': `bytes ${start}-${Math.max(total - 1, start)}/${total}`,
|
|
452
|
+
});
|
|
453
|
+
res.end(slice);
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
res.writeHead(200, {
|
|
458
|
+
'Content-Type': 'text/plain; charset=utf-8',
|
|
459
|
+
'Cache-Control': 'no-store',
|
|
460
|
+
});
|
|
461
|
+
res.end(content);
|
|
462
|
+
return;
|
|
463
|
+
}
|
|
464
|
+
// GET /api/tasks/:runId/events
|
|
465
|
+
if (sub === 'events') {
|
|
466
|
+
json(res, { events: readTaskEvents(runId) });
|
|
467
|
+
return;
|
|
468
|
+
}
|
|
469
|
+
// POST /api/tasks/:runId/cancel — loopback only.
|
|
470
|
+
// Sends SIGTERM to the recorded pid; the runner then writes a
|
|
471
|
+
// `cancelled` event itself. This endpoint never mutates meta
|
|
472
|
+
// directly to avoid racing the runner (see store.ts contract).
|
|
473
|
+
if (sub === 'cancel' && req.method === 'POST') {
|
|
474
|
+
if (!isLoopback(req)) {
|
|
475
|
+
json(res, { error: 'forbidden' }, 403);
|
|
476
|
+
return;
|
|
477
|
+
}
|
|
478
|
+
try {
|
|
479
|
+
const meta = readTaskMeta(runId);
|
|
480
|
+
if (!meta) {
|
|
481
|
+
res.writeHead(404);
|
|
482
|
+
res.end('Not found');
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
if (isTerminalTaskStatus(meta.status)) {
|
|
486
|
+
json(res, { ok: false, reason: `already ${meta.status}` });
|
|
487
|
+
return;
|
|
488
|
+
}
|
|
489
|
+
if (typeof meta.pid !== 'number') {
|
|
490
|
+
json(res, { ok: false, reason: 'no pid recorded' });
|
|
491
|
+
return;
|
|
492
|
+
}
|
|
493
|
+
try {
|
|
494
|
+
process.kill(meta.pid, 'SIGTERM');
|
|
495
|
+
json(res, { ok: true });
|
|
496
|
+
}
|
|
497
|
+
catch (err) {
|
|
498
|
+
json(res, { ok: false, reason: err.message });
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
catch (err) {
|
|
502
|
+
json(res, { ok: false, reason: err.message });
|
|
503
|
+
}
|
|
504
|
+
return;
|
|
505
|
+
}
|
|
506
|
+
res.writeHead(404);
|
|
507
|
+
res.end('Not found');
|
|
508
|
+
return;
|
|
509
|
+
}
|
|
383
510
|
// 404
|
|
384
511
|
res.writeHead(404);
|
|
385
512
|
res.end('Not found');
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lost-task detection.
|
|
3
|
+
*
|
|
4
|
+
* For every task currently in `running` or `queued`, check whether its recorded
|
|
5
|
+
* pid is still alive via `process.kill(pid, 0)`. If the pid is gone, the
|
|
6
|
+
* runner crashed or was killed externally; flip status to `lost` so observers
|
|
7
|
+
* (CLI list, agent prompt) stop misreporting it as in-flight.
|
|
8
|
+
*
|
|
9
|
+
* EPERM means the pid exists but we don't have permission to signal it —
|
|
10
|
+
* treat that as alive. ESRCH (or anything else) means dead.
|
|
11
|
+
*
|
|
12
|
+
* Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
|
|
13
|
+
* on `task list`"; v3.11 may add a pidStartTime cross-check.
|
|
14
|
+
*/
|
|
15
|
+
export declare function reconcileLostTasks(now?: number): number;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lost-task detection.
|
|
3
|
+
*
|
|
4
|
+
* For every task currently in `running` or `queued`, check whether its recorded
|
|
5
|
+
* pid is still alive via `process.kill(pid, 0)`. If the pid is gone, the
|
|
6
|
+
* runner crashed or was killed externally; flip status to `lost` so observers
|
|
7
|
+
* (CLI list, agent prompt) stop misreporting it as in-flight.
|
|
8
|
+
*
|
|
9
|
+
* EPERM means the pid exists but we don't have permission to signal it —
|
|
10
|
+
* treat that as alive. ESRCH (or anything else) means dead.
|
|
11
|
+
*
|
|
12
|
+
* Best-effort: PID reuse can lie. v3.10's contract is "lazy reconciliation
|
|
13
|
+
* on `task list`"; v3.11 may add a pidStartTime cross-check.
|
|
14
|
+
*/
|
|
15
|
+
import { listTasks, applyEvent } from './store.js';
|
|
16
|
+
function isPidAlive(pid) {
|
|
17
|
+
try {
|
|
18
|
+
process.kill(pid, 0);
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
catch (err) {
|
|
22
|
+
// EPERM means it exists but we can't signal it — still alive.
|
|
23
|
+
return err.code === 'EPERM';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export function reconcileLostTasks(now = Date.now()) {
|
|
27
|
+
let n = 0;
|
|
28
|
+
for (const t of listTasks()) {
|
|
29
|
+
if (t.status !== 'running' && t.status !== 'queued')
|
|
30
|
+
continue;
|
|
31
|
+
if (typeof t.pid !== 'number')
|
|
32
|
+
continue;
|
|
33
|
+
if (isPidAlive(t.pid))
|
|
34
|
+
continue;
|
|
35
|
+
try {
|
|
36
|
+
applyEvent(t.runId, {
|
|
37
|
+
at: now,
|
|
38
|
+
kind: 'lost',
|
|
39
|
+
summary: 'Backing process not found — task may have been killed externally.',
|
|
40
|
+
});
|
|
41
|
+
n++;
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
// Meta could vanish mid-reconcile (e.g. the task dir was deleted out from
|
|
45
|
+
// under us) — log and continue with the next task. One bad task should
|
|
46
|
+
// not abort the whole sweep.
|
|
47
|
+
process.stderr.write(`[franklin] reconcileLostTasks: skipping ${t.runId}: ${err.message}\n`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return n;
|
|
51
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-task on-disk layout under $FRANKLIN_HOME/tasks/<runId>/.
|
|
3
|
+
* meta.json — single TaskRecord, atomically rewritten
|
|
4
|
+
* events.jsonl — append-only event log
|
|
5
|
+
* log.txt — child process stdout/stderr
|
|
6
|
+
*/
|
|
7
|
+
export declare function getTasksDir(): string;
|
|
8
|
+
export declare function getTaskDir(runId: string): string;
|
|
9
|
+
export declare function ensureTaskDir(runId: string): string;
|
|
10
|
+
export declare function taskMetaPath(runId: string): string;
|
|
11
|
+
export declare function taskEventsPath(runId: string): string;
|
|
12
|
+
export declare function taskLogPath(runId: string): string;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-task on-disk layout under $FRANKLIN_HOME/tasks/<runId>/.
|
|
3
|
+
* meta.json — single TaskRecord, atomically rewritten
|
|
4
|
+
* events.jsonl — append-only event log
|
|
5
|
+
* log.txt — child process stdout/stderr
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'node:fs';
|
|
8
|
+
import os from 'node:os';
|
|
9
|
+
import path from 'node:path';
|
|
10
|
+
function franklinHome() {
|
|
11
|
+
return process.env.FRANKLIN_HOME || path.join(os.homedir(), '.franklin');
|
|
12
|
+
}
|
|
13
|
+
export function getTasksDir() {
|
|
14
|
+
return path.join(franklinHome(), 'tasks');
|
|
15
|
+
}
|
|
16
|
+
export function getTaskDir(runId) {
|
|
17
|
+
return path.join(getTasksDir(), runId);
|
|
18
|
+
}
|
|
19
|
+
export function ensureTaskDir(runId) {
|
|
20
|
+
const dir = getTaskDir(runId);
|
|
21
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
22
|
+
return dir;
|
|
23
|
+
}
|
|
24
|
+
export function taskMetaPath(runId) {
|
|
25
|
+
return path.join(getTaskDir(runId), 'meta.json');
|
|
26
|
+
}
|
|
27
|
+
export function taskEventsPath(runId) {
|
|
28
|
+
return path.join(getTaskDir(runId), 'events.jsonl');
|
|
29
|
+
}
|
|
30
|
+
export function taskLogPath(runId) {
|
|
31
|
+
return path.join(getTaskDir(runId), 'log.txt');
|
|
32
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detached task runner. The hidden `_task-runner <runId>` subcommand of the
|
|
3
|
+
* `franklin` CLI dispatches into this module, which is what actually executes
|
|
4
|
+
* the user's command in the detached child process.
|
|
5
|
+
*
|
|
6
|
+
* Lifecycle (per task):
|
|
7
|
+
* 1. Read meta.json. Bail with exit code 2 if it's gone.
|
|
8
|
+
* 2. Open log.txt for append, record our own pid + status=running, emit
|
|
9
|
+
* a `running` event.
|
|
10
|
+
* 3. Spawn `bash -lc <command>` with stdout/stderr piped to log.txt.
|
|
11
|
+
* 4. Heartbeat every 5s: just refresh meta.lastEventAt so observers can see
|
|
12
|
+
* "still going."
|
|
13
|
+
* 5. On child exit (or spawn error), close the log fd, finalize meta with
|
|
14
|
+
* exitCode + status (`succeeded` if 0, `failed` otherwise), emit a
|
|
15
|
+
* terminal event whose summary is the last 500 chars of log.
|
|
16
|
+
*
|
|
17
|
+
* Defensive style: we re-read meta inside the heartbeat and on exit because
|
|
18
|
+
* a concurrent `franklin task cancel` (or external `rm -rf`) can vanish the
|
|
19
|
+
* task dir mid-flight. Every fs operation is best-effort.
|
|
20
|
+
*/
|
|
21
|
+
export declare function runDetachedTask(runId: string): Promise<number>;
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detached task runner. The hidden `_task-runner <runId>` subcommand of the
|
|
3
|
+
* `franklin` CLI dispatches into this module, which is what actually executes
|
|
4
|
+
* the user's command in the detached child process.
|
|
5
|
+
*
|
|
6
|
+
* Lifecycle (per task):
|
|
7
|
+
* 1. Read meta.json. Bail with exit code 2 if it's gone.
|
|
8
|
+
* 2. Open log.txt for append, record our own pid + status=running, emit
|
|
9
|
+
* a `running` event.
|
|
10
|
+
* 3. Spawn `bash -lc <command>` with stdout/stderr piped to log.txt.
|
|
11
|
+
* 4. Heartbeat every 5s: just refresh meta.lastEventAt so observers can see
|
|
12
|
+
* "still going."
|
|
13
|
+
* 5. On child exit (or spawn error), close the log fd, finalize meta with
|
|
14
|
+
* exitCode + status (`succeeded` if 0, `failed` otherwise), emit a
|
|
15
|
+
* terminal event whose summary is the last 500 chars of log.
|
|
16
|
+
*
|
|
17
|
+
* Defensive style: we re-read meta inside the heartbeat and on exit because
|
|
18
|
+
* a concurrent `franklin task cancel` (or external `rm -rf`) can vanish the
|
|
19
|
+
* task dir mid-flight. Every fs operation is best-effort.
|
|
20
|
+
*/
|
|
21
|
+
import { spawn } from 'node:child_process';
|
|
22
|
+
import fs from 'node:fs';
|
|
23
|
+
import { readTaskMeta, applyEvent, writeTaskMeta } from './store.js';
|
|
24
|
+
import { taskLogPath, ensureTaskDir } from './paths.js';
|
|
25
|
+
const HEARTBEAT_MS = 5_000;
|
|
26
|
+
const TAIL_BYTES = 500;
|
|
27
|
+
function safeCloseFd(fd) {
|
|
28
|
+
try {
|
|
29
|
+
fs.closeSync(fd);
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
/* already closed */
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function readLogTail(runId) {
|
|
36
|
+
try {
|
|
37
|
+
const buf = fs.readFileSync(taskLogPath(runId), 'utf-8');
|
|
38
|
+
return buf.slice(-TAIL_BYTES).replace(/\s+/g, ' ').trim();
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return '';
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
export async function runDetachedTask(runId) {
|
|
45
|
+
const meta = readTaskMeta(runId);
|
|
46
|
+
if (!meta) {
|
|
47
|
+
process.stderr.write(`runner: no task ${runId}\n`);
|
|
48
|
+
return 2;
|
|
49
|
+
}
|
|
50
|
+
ensureTaskDir(runId);
|
|
51
|
+
const logFd = fs.openSync(taskLogPath(runId), 'a');
|
|
52
|
+
let logFdClosed = false;
|
|
53
|
+
const closeLog = () => {
|
|
54
|
+
if (logFdClosed)
|
|
55
|
+
return;
|
|
56
|
+
logFdClosed = true;
|
|
57
|
+
safeCloseFd(logFd);
|
|
58
|
+
};
|
|
59
|
+
const startedAt = Date.now();
|
|
60
|
+
writeTaskMeta({
|
|
61
|
+
...meta,
|
|
62
|
+
pid: process.pid,
|
|
63
|
+
status: 'running',
|
|
64
|
+
startedAt,
|
|
65
|
+
lastEventAt: startedAt,
|
|
66
|
+
});
|
|
67
|
+
applyEvent(runId, { at: startedAt, kind: 'running', summary: 'runner started' });
|
|
68
|
+
// `finalized` guards against the rare case where the heartbeat timer
|
|
69
|
+
// already fired but its callback is still on the event-loop queue at
|
|
70
|
+
// the moment finalize() runs — without this flag, a heartbeat write
|
|
71
|
+
// could land *after* the terminal event and clobber lastEventAt /
|
|
72
|
+
// status. We flip it before clearInterval so any pending callback
|
|
73
|
+
// bails on its first line.
|
|
74
|
+
let finalized = false;
|
|
75
|
+
// Heartbeat: every 5s while child is alive, refresh lastEventAt so
|
|
76
|
+
// observers see "still going." If the meta has been deleted out from
|
|
77
|
+
// under us (someone rm'd the task dir), skip silently — no need to
|
|
78
|
+
// re-create a stub.
|
|
79
|
+
const heartbeat = setInterval(() => {
|
|
80
|
+
if (finalized)
|
|
81
|
+
return;
|
|
82
|
+
const cur = readTaskMeta(runId);
|
|
83
|
+
if (!cur)
|
|
84
|
+
return;
|
|
85
|
+
try {
|
|
86
|
+
writeTaskMeta({ ...cur, lastEventAt: Date.now() });
|
|
87
|
+
}
|
|
88
|
+
catch (err) {
|
|
89
|
+
process.stderr.write(`[franklin] runner heartbeat: ${err.message}\n`);
|
|
90
|
+
}
|
|
91
|
+
}, HEARTBEAT_MS);
|
|
92
|
+
// Best-effort finalize. Used by both the normal exit path and the spawn
|
|
93
|
+
// error path. Always closes the log fd and clears the heartbeat.
|
|
94
|
+
// If `finalized` is already true (cancel path beat us to it), bail —
|
|
95
|
+
// we would otherwise overwrite the on-disk `cancelled` terminal state
|
|
96
|
+
// with `failed` after `child.kill('SIGTERM')` causes child.on('exit').
|
|
97
|
+
const finalize = (exitCode, status, fallbackSummary) => {
|
|
98
|
+
if (finalized)
|
|
99
|
+
return;
|
|
100
|
+
finalized = true;
|
|
101
|
+
clearInterval(heartbeat);
|
|
102
|
+
closeLog();
|
|
103
|
+
const endedAt = Date.now();
|
|
104
|
+
const tail = readLogTail(runId);
|
|
105
|
+
const cur = readTaskMeta(runId);
|
|
106
|
+
if (cur) {
|
|
107
|
+
try {
|
|
108
|
+
writeTaskMeta({ ...cur, exitCode });
|
|
109
|
+
}
|
|
110
|
+
catch (err) {
|
|
111
|
+
process.stderr.write(`[franklin] runner finalize writeTaskMeta: ${err.message}\n`);
|
|
112
|
+
}
|
|
113
|
+
try {
|
|
114
|
+
applyEvent(runId, {
|
|
115
|
+
at: endedAt,
|
|
116
|
+
kind: status,
|
|
117
|
+
summary: tail || fallbackSummary,
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
catch (err) {
|
|
121
|
+
process.stderr.write(`[franklin] runner finalize applyEvent: ${err.message}\n`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// Meta vanished mid-run. Nothing to finalize. Surface for ops, exit clean.
|
|
126
|
+
process.stderr.write(`[franklin] runner: meta for ${runId} disappeared before finalize\n`);
|
|
127
|
+
}
|
|
128
|
+
};
|
|
129
|
+
const child = spawn('bash', ['-lc', meta.command], {
|
|
130
|
+
cwd: meta.workingDir,
|
|
131
|
+
stdio: ['ignore', logFd, logFd],
|
|
132
|
+
env: { ...process.env, FRANKLIN_TASK_RUN_ID: runId },
|
|
133
|
+
});
|
|
134
|
+
// Cancel path: parent CLI sends SIGTERM (or user hits Ctrl-C). We must
|
|
135
|
+
// (a) flip `finalized` BEFORE the soon-to-fire child.exit handler runs so
|
|
136
|
+
// it short-circuits and doesn't write status=failed,
|
|
137
|
+
// (b) clear the heartbeat for the same reason,
|
|
138
|
+
// (c) kill the child (SIGTERM) so the bash process actually dies,
|
|
139
|
+
// (d) applyEvent('cancelled') so the on-disk terminal state is correct,
|
|
140
|
+
// (e) close the log fd,
|
|
141
|
+
// (f) exit 130 (the canonical Ctrl-C / SIGTERM exit code) on a small delay
|
|
142
|
+
// so any in-flight fs writes flush.
|
|
143
|
+
const onSignal = () => {
|
|
144
|
+
if (finalized)
|
|
145
|
+
return;
|
|
146
|
+
finalized = true;
|
|
147
|
+
clearInterval(heartbeat);
|
|
148
|
+
try {
|
|
149
|
+
child.kill('SIGTERM');
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
/* child may already be gone */
|
|
153
|
+
}
|
|
154
|
+
closeLog();
|
|
155
|
+
try {
|
|
156
|
+
applyEvent(runId, {
|
|
157
|
+
at: Date.now(),
|
|
158
|
+
kind: 'cancelled',
|
|
159
|
+
summary: 'Cancelled via SIGTERM',
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
process.stderr.write(`[franklin] runner cancel applyEvent: ${err.message}\n`);
|
|
164
|
+
}
|
|
165
|
+
setTimeout(() => process.exit(130), 500);
|
|
166
|
+
};
|
|
167
|
+
process.on('SIGTERM', onSignal);
|
|
168
|
+
process.on('SIGINT', onSignal);
|
|
169
|
+
return await new Promise((resolve) => {
|
|
170
|
+
let resolved = false;
|
|
171
|
+
const settle = (code) => {
|
|
172
|
+
if (resolved)
|
|
173
|
+
return;
|
|
174
|
+
resolved = true;
|
|
175
|
+
resolve(code);
|
|
176
|
+
};
|
|
177
|
+
child.on('error', (err) => {
|
|
178
|
+
// Spawn itself failed — bash not on $PATH, EACCES, etc. Make sure we
|
|
179
|
+
// close the log fd, finalize the task, and exit.
|
|
180
|
+
const exitCode = 1;
|
|
181
|
+
finalize(exitCode, 'failed', `spawn error: ${err.message}`);
|
|
182
|
+
settle(exitCode);
|
|
183
|
+
});
|
|
184
|
+
child.on('exit', (code, signal) => {
|
|
185
|
+
const exitCode = typeof code === 'number' ? code : signal ? 128 : 1;
|
|
186
|
+
const status = exitCode === 0 ? 'succeeded' : 'failed';
|
|
187
|
+
finalize(exitCode, status, status === 'succeeded' ? 'completed' : `exited with code ${exitCode}`);
|
|
188
|
+
settle(exitCode);
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public spawn surface for the detached task subsystem.
|
|
3
|
+
*
|
|
4
|
+
* `startDetachedTask` is the synchronous entry point used by the `Task`
|
|
5
|
+
* agent tool and by `franklin task` callers. It writes a queued
|
|
6
|
+
* TaskRecord to disk, opens log.txt for stdout/stderr capture, then
|
|
7
|
+
* spawns `franklin _task-runner <runId>` with `detached: true` and
|
|
8
|
+
* unrefs the child so this process can exit without waiting on the
|
|
9
|
+
* task. The runner subprocess takes over from there: it spawns the
|
|
10
|
+
* actual user command, drives heartbeats, and finalizes meta on exit.
|
|
11
|
+
*
|
|
12
|
+
* Performance contract: startDetachedTask must return in <250ms. That
|
|
13
|
+
* is enforced by the integration test in test/local.mjs and is the
|
|
14
|
+
* reason all I/O here is sync — we want one fs write + one spawn, not
|
|
15
|
+
* an async chain that could be interrupted by a slow microtask.
|
|
16
|
+
*
|
|
17
|
+
* CLI path resolution (in priority order):
|
|
18
|
+
* 1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
|
|
19
|
+
* 2. <cwd>/dist/index.js — the published bundle's entry point.
|
|
20
|
+
*/
|
|
21
|
+
export interface StartDetachedTaskInput {
|
|
22
|
+
label: string;
|
|
23
|
+
command: string;
|
|
24
|
+
workingDir: string;
|
|
25
|
+
}
|
|
26
|
+
export declare function startDetachedTask(input: StartDetachedTaskInput): string;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public spawn surface for the detached task subsystem.
|
|
3
|
+
*
|
|
4
|
+
* `startDetachedTask` is the synchronous entry point used by the `Task`
|
|
5
|
+
* agent tool and by `franklin task` callers. It writes a queued
|
|
6
|
+
* TaskRecord to disk, opens log.txt for stdout/stderr capture, then
|
|
7
|
+
* spawns `franklin _task-runner <runId>` with `detached: true` and
|
|
8
|
+
* unrefs the child so this process can exit without waiting on the
|
|
9
|
+
* task. The runner subprocess takes over from there: it spawns the
|
|
10
|
+
* actual user command, drives heartbeats, and finalizes meta on exit.
|
|
11
|
+
*
|
|
12
|
+
* Performance contract: startDetachedTask must return in <250ms. That
|
|
13
|
+
* is enforced by the integration test in test/local.mjs and is the
|
|
14
|
+
* reason all I/O here is sync — we want one fs write + one spawn, not
|
|
15
|
+
* an async chain that could be interrupted by a slow microtask.
|
|
16
|
+
*
|
|
17
|
+
* CLI path resolution (in priority order):
|
|
18
|
+
* 1. process.env.FRANKLIN_CLI_PATH — escape hatch for tests / dev.
|
|
19
|
+
* 2. <cwd>/dist/index.js — the published bundle's entry point.
|
|
20
|
+
*/
|
|
21
|
+
import { spawn } from 'node:child_process';
|
|
22
|
+
import fs from 'node:fs';
|
|
23
|
+
import path from 'node:path';
|
|
24
|
+
import { randomUUID } from 'node:crypto';
|
|
25
|
+
import { writeTaskMeta } from './store.js';
|
|
26
|
+
import { taskLogPath, ensureTaskDir } from './paths.js';
|
|
27
|
+
function resolveCliPath() {
|
|
28
|
+
const fromEnv = process.env.FRANKLIN_CLI_PATH;
|
|
29
|
+
if (fromEnv && fromEnv.length > 0)
|
|
30
|
+
return fromEnv;
|
|
31
|
+
return path.resolve(process.cwd(), 'dist', 'index.js');
|
|
32
|
+
}
|
|
33
|
+
function generateRunId() {
|
|
34
|
+
return `t_${Date.now().toString(36)}_${randomUUID().slice(0, 8)}`;
|
|
35
|
+
}
|
|
36
|
+
export function startDetachedTask(input) {
|
|
37
|
+
const runId = generateRunId();
|
|
38
|
+
const now = Date.now();
|
|
39
|
+
const record = {
|
|
40
|
+
runId,
|
|
41
|
+
runtime: 'detached-bash',
|
|
42
|
+
label: input.label,
|
|
43
|
+
command: input.command,
|
|
44
|
+
workingDir: input.workingDir,
|
|
45
|
+
status: 'queued',
|
|
46
|
+
createdAt: now,
|
|
47
|
+
};
|
|
48
|
+
writeTaskMeta(record);
|
|
49
|
+
ensureTaskDir(runId);
|
|
50
|
+
const cliPath = resolveCliPath();
|
|
51
|
+
const logFd = fs.openSync(taskLogPath(runId), 'a');
|
|
52
|
+
// detached + unref + ignore stdin = parent can exit immediately while
|
|
53
|
+
// the child keeps running. The runner reopens its own log handles via
|
|
54
|
+
// the inherited stdout/stderr fds, so we close ours after spawn returns.
|
|
55
|
+
const child = spawn(process.execPath, [cliPath, '_task-runner', runId], {
|
|
56
|
+
cwd: input.workingDir,
|
|
57
|
+
detached: true,
|
|
58
|
+
stdio: ['ignore', logFd, logFd],
|
|
59
|
+
env: { ...process.env, FRANKLIN_TASK_RUN_ID: runId },
|
|
60
|
+
});
|
|
61
|
+
child.unref();
|
|
62
|
+
// The child has duped the fd; closing ours frees the parent's slot.
|
|
63
|
+
// Surface unexpected errors instead of swallowing — a leaked fd here
|
|
64
|
+
// is rare but worth knowing about.
|
|
65
|
+
try {
|
|
66
|
+
fs.closeSync(logFd);
|
|
67
|
+
}
|
|
68
|
+
catch (err) {
|
|
69
|
+
process.stderr.write(`[franklin] startDetachedTask: closing log fd failed: ${err.message}\n`);
|
|
70
|
+
}
|
|
71
|
+
return runId;
|
|
72
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Task persistence: meta.json (single record) + events.jsonl (append-only log).
|
|
3
|
+
*
|
|
4
|
+
* Concurrency contract: applyEvent does a read-modify-write on meta.json. It
|
|
5
|
+
* is safe to call from a single writer per task — by convention, that writer
|
|
6
|
+
* is the _task-runner subprocess. CLI commands that need to influence a
|
|
7
|
+
* running task (e.g. `franklin task cancel`) MUST signal the runner pid
|
|
8
|
+
* (SIGTERM) rather than calling applyEvent directly, otherwise the two
|
|
9
|
+
* writers race and one update is silently lost. Lost-task reconciliation
|
|
10
|
+
* is an exception — it runs only when the runner is provably dead, so
|
|
11
|
+
* there is no second writer to race with.
|
|
12
|
+
*
|
|
13
|
+
* Atomicity: writeTaskMeta uses tmp + rename; readers see either old or new
|
|
14
|
+
* meta, never partial. appendTaskEvent relies on POSIX O_APPEND + PIPE_BUF
|
|
15
|
+
* atomicity (~4096 bytes); summaries should stay short. readTaskEvents is
|
|
16
|
+
* tolerant of a torn last line.
|
|
17
|
+
*/
|
|
18
|
+
import type { TaskRecord, TaskEventRecord } from './types.js';
|
|
19
|
+
export declare function writeTaskMeta(record: TaskRecord): void;
|
|
20
|
+
export declare function readTaskMeta(runId: string): TaskRecord | null;
|
|
21
|
+
export declare function appendTaskEvent(runId: string, event: TaskEventRecord): void;
|
|
22
|
+
export declare function readTaskEvents(runId: string): TaskEventRecord[];
|
|
23
|
+
export declare function applyEvent(runId: string, event: TaskEventRecord): TaskRecord;
|
|
24
|
+
export declare function listTasks(): TaskRecord[];
|