ftown-bridge 0.11.2 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/centrifugo-client.d.ts +4 -1
- package/dist/centrifugo-client.js +27 -0
- package/dist/centrifugo-client.js.map +1 -1
- package/dist/codex-installer.js +8 -2
- package/dist/codex-installer.js.map +1 -1
- package/dist/create-ftown-session.d.ts +1 -0
- package/dist/create-ftown-session.js +4 -3
- package/dist/create-ftown-session.js.map +1 -1
- package/dist/ftown-sessions-cli.js +334 -0
- package/dist/ftown-sessions-cli.js.map +1 -1
- package/dist/index.js +167 -11
- package/dist/index.js.map +1 -1
- package/dist/install-ftown-skill.d.ts +2 -0
- package/dist/install-ftown-skill.js +39 -0
- package/dist/install-ftown-skill.js.map +1 -1
- package/dist/local-api-server.d.ts +10 -0
- package/dist/local-api-server.js +147 -0
- package/dist/local-api-server.js.map +1 -1
- package/dist/loop-run-store.d.ts +9 -0
- package/dist/loop-run-store.js +159 -0
- package/dist/loop-run-store.js.map +1 -0
- package/dist/loop-schedule.d.ts +18 -0
- package/dist/loop-schedule.js +35 -0
- package/dist/loop-schedule.js.map +1 -0
- package/dist/loop-scheduler.d.ts +148 -0
- package/dist/loop-scheduler.js +534 -0
- package/dist/loop-scheduler.js.map +1 -0
- package/dist/loop-store.d.ts +36 -0
- package/dist/loop-store.js +128 -0
- package/dist/loop-store.js.map +1 -0
- package/dist/loop-validation.d.ts +14 -0
- package/dist/loop-validation.js +95 -0
- package/dist/loop-validation.js.map +1 -0
- package/dist/types.d.ts +107 -2
- package/package.json +2 -1
- package/skills/ftown/SKILL.md +48 -0
- package/skills/ftown/agents/openai.yaml +4 -0
- package/skills/ftown/references/loops.md +88 -0
- package/skills/{ftown-orchestrator/SKILL.md → ftown/references/orchestrator.md} +22 -13
- package/skills/{ftown-sessions/SKILL.md → ftown/references/sessions.md} +22 -16
- package/skills/{ftown-workflows/SKILL.md → ftown/references/workflows.md} +13 -20
- package/skills/ftown/scripts/ftown +4 -0
- package/skills/ftown-sessions/scripts/ftown-sessions +0 -4
- package/skills/ftown-workflows/scripts/ftown-workflows +0 -4
- /package/skills/{ftown-workflows → ftown}/scripts/example.flow.mjs +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loop-schedule.js","sourceRoot":"","sources":["../src/loop-schedule.ts"],"names":[],"mappings":"AAAA,OAAO,UAAU,MAAM,aAAa,CAAC;AAIrC;;;;;;;GAOG;AACH,MAAM,UAAU,cAAc,CAAC,QAAsB,EAAE,MAAc;IACnE,IAAI,QAAQ,CAAC,IAAI,KAAK,UAAU;QAAE,OAAO,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACnF,MAAM,EAAE,GAAG,UAAU,CAAC,eAAe,CAAC,QAAQ,CAAC,UAAU,EAAE;QACzD,WAAW,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC;QAC7B,EAAE,EAAE,QAAQ,CAAC,EAAE;KAChB,CAAC,CAAC;IACH,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC,OAAO,EAAE,CAAC;AACtC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,KAAK,CAAC,IAAU,EAAE,KAAa;IAC7C,IAAI,IAAI,CAAC,eAAe;QAAE,OAAO,IAAI,CAAC,CAAC,0CAA0C;IACjF,IAAI,CAAC,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAChC,IAAI,CAAC,IAAI,CAAC,SAAS;QAAE,OAAO,KAAK,CAAC;IAClC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC;AAC7C,CAAC"}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { type LoopRuntimeMutator } from './loop-store.js';
|
|
2
|
+
import type { CreateFtownSessionInput } from './create-ftown-session.js';
|
|
3
|
+
import type { RemoveFtownSessionOptions } from './remove-ftown-session.js';
|
|
4
|
+
import type { Loop, LoopRunRecord, Session } from './types.js';
|
|
5
|
+
/** Base tick cadence; also the finalize grace so a just-spawned PTY is not mistaken for exited. */
|
|
6
|
+
export declare const LOOP_TICK_INTERVAL_MS = 30000;
|
|
7
|
+
export interface FlightResult {
|
|
8
|
+
stdout: string;
|
|
9
|
+
stderr: string;
|
|
10
|
+
exitCode: number;
|
|
11
|
+
}
|
|
12
|
+
/** Preflight/postflight primitive: promisified child_process.exec (captures exit code + timeout). */
|
|
13
|
+
export type RunFlight = (command: string, cwd: string | undefined, timeoutMs?: number, extraEnv?: Record<string, string>) => Promise<FlightResult>;
|
|
14
|
+
/** In-process flight spawn — wraps createFtownSession(sessionDeps, input) in index.ts. */
|
|
15
|
+
export type SpawnSession = (input: CreateFtownSessionInput) => Promise<Session>;
|
|
16
|
+
/** In-process run removal — wraps removeFtownSession({store,runner,centrifugo,userId}, id, opts) in index.ts. */
|
|
17
|
+
export type RemoveSession = (id: string, options?: RemoveFtownSessionOptions) => Promise<Session | null>;
|
|
18
|
+
export interface SchedulerStore {
|
|
19
|
+
loadSession(id: string): Promise<Session | null>;
|
|
20
|
+
loadTerminalLog(id: string): Promise<string>;
|
|
21
|
+
listSessions(): Promise<Session[]>;
|
|
22
|
+
}
|
|
23
|
+
export interface SchedulerRunner {
|
|
24
|
+
isRunning(id: string): boolean;
|
|
25
|
+
stop(id: string): boolean;
|
|
26
|
+
}
|
|
27
|
+
export interface SchedulerCentrifugo {
|
|
28
|
+
publishLoopUpdate(userId: string, loop: Loop): Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
export interface LoopStoreApi {
|
|
31
|
+
listLoops(): Loop[];
|
|
32
|
+
/** Fresh-read → mutate scheduler-owned fields → save; null when deleted concurrently. */
|
|
33
|
+
mutateLoopRuntime(id: string, fn: LoopRuntimeMutator): Loop | null;
|
|
34
|
+
}
|
|
35
|
+
export interface LoopRunRecordStoreApi {
|
|
36
|
+
upsertLoopRunRecord(record: LoopRunRecord): LoopRunRecord;
|
|
37
|
+
pruneLoopRunRecords(loopId: string, keep: number | null, preserveIds?: Iterable<string | undefined>): void;
|
|
38
|
+
}
|
|
39
|
+
export interface SchedulerDeps {
|
|
40
|
+
store: SchedulerStore;
|
|
41
|
+
runner: SchedulerRunner;
|
|
42
|
+
centrifugo: SchedulerCentrifugo;
|
|
43
|
+
userId: string;
|
|
44
|
+
/** Built in index.ts as (input) => createFtownSession(sessionDeps, input) — the direct in-process call. */
|
|
45
|
+
spawnSession: SpawnSession;
|
|
46
|
+
/** Built in index.ts as (id, opts) => removeFtownSession({store,runner,centrifugo,userId}, id, opts). */
|
|
47
|
+
removeSession: RemoveSession;
|
|
48
|
+
/** Loop persistence. Defaults to the real ~/.ftown/loops.json store. */
|
|
49
|
+
loops?: LoopStoreApi;
|
|
50
|
+
/** Durable loop-run log persistence. Defaults to the real ~/.ftown/loop-runs.json store. */
|
|
51
|
+
runRecords?: LoopRunRecordStoreApi;
|
|
52
|
+
/** Flight runner. Defaults to the exec-based runFlightCommand. */
|
|
53
|
+
runFlight?: RunFlight;
|
|
54
|
+
/** Clock seam. Defaults to Date.now. */
|
|
55
|
+
now?: () => number;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Runs child_process.exec and normalizes the result to { stdout, stderr,
|
|
59
|
+
* exitCode }. Never rejects — the exit code is the signal (a timeout maps to
|
|
60
|
+
* 124, any other failure to the real exit code or 1).
|
|
61
|
+
*
|
|
62
|
+
* The hard budget is enforced by SIGKILL on the whole process GROUP, NOT by
|
|
63
|
+
* exec's built-in `timeout`. exec's timeout only sends SIGTERM to the spawned
|
|
64
|
+
* `/bin/sh`; a detached grandchild that keeps the stdout pipe open, or a child
|
|
65
|
+
* that traps SIGTERM, would keep the exec callback from ever firing — and since
|
|
66
|
+
* the scheduler awaits every flight inside a single re-entrancy-guarded tick,
|
|
67
|
+
* ONE such flight would wedge the entire scheduler permanently. Spawning
|
|
68
|
+
* `detached` makes the child its own group leader, so `kill(-pid, SIGKILL)`
|
|
69
|
+
* takes down the whole tree and the flight can never exceed its budget.
|
|
70
|
+
*/
|
|
71
|
+
export declare function runFlightCommand(command: string, cwd: string | undefined, timeoutMs?: number, extraEnv?: Record<string, string>): Promise<FlightResult>;
|
|
72
|
+
/**
|
|
73
|
+
* The scheduled-loops engine. On each 30s tick it FINALIZES each loop's
|
|
74
|
+
* in-flight run(s) (Phase A) before deciding whether to FIRE a new one
|
|
75
|
+
* (Phase B). All side effects go through injected collaborators so it is
|
|
76
|
+
* unit-testable without a live bridge, real fs or real timers (mirrors
|
|
77
|
+
* workflow-runner.ts).
|
|
78
|
+
*
|
|
79
|
+
* Persistence rule: the scheduler NEVER writes a whole detached Loop back
|
|
80
|
+
* across an await. Every runtime-field change goes through
|
|
81
|
+
* store.mutateLoopRuntime (fresh-read → mutate → save), so a loop deleted or
|
|
82
|
+
* user-edited during a long flight is neither resurrected nor clobbered.
|
|
83
|
+
*/
|
|
84
|
+
export declare class LoopScheduler {
|
|
85
|
+
private readonly store;
|
|
86
|
+
private readonly runner;
|
|
87
|
+
private readonly centrifugo;
|
|
88
|
+
private readonly userId;
|
|
89
|
+
private readonly spawnSession;
|
|
90
|
+
private readonly removeSession;
|
|
91
|
+
private readonly loops;
|
|
92
|
+
private readonly runRecords;
|
|
93
|
+
private readonly runFlight;
|
|
94
|
+
private readonly now;
|
|
95
|
+
/** Re-entrancy guard: tick N+1 never overlaps N. */
|
|
96
|
+
private tickRunning;
|
|
97
|
+
/** Set once start() runs (after reconcileOnStart). kick() no-ops before this so an
|
|
98
|
+
* early run_loop_now cannot trigger an un-reconciled tick that stampedes overdue loops. */
|
|
99
|
+
private started;
|
|
100
|
+
/** Per-loop in-memory fire lock, shared by tick + run_loop_now/kick. */
|
|
101
|
+
private readonly firingLoops;
|
|
102
|
+
/** loopId -> (runSessionId -> fire-time ms). Every run THIS process spawned, so under
|
|
103
|
+
* overlapPolicy:'allow' each concurrent run is finalized/postflighted/maxRuntime-checked
|
|
104
|
+
* independently — not just the newest. Rebuilt lazily from the persisted primary on restart. */
|
|
105
|
+
private readonly inFlight;
|
|
106
|
+
private timer;
|
|
107
|
+
constructor(deps: SchedulerDeps);
|
|
108
|
+
start(): void;
|
|
109
|
+
stop(): void;
|
|
110
|
+
/** Immediate, guarded, out-of-band tick (used by run_loop_now). No-op until start()
|
|
111
|
+
* has run, so a kick that races startup cannot fire before reconcileOnStart. */
|
|
112
|
+
kick(): void;
|
|
113
|
+
/** Drop scheduler tracking for a deleted loop and stop any run it left alive, so a
|
|
114
|
+
* just-deleted loop never leaks a live AI session with nothing left to finalize it. */
|
|
115
|
+
onLoopDeleted(loop: Loop): void;
|
|
116
|
+
/**
|
|
117
|
+
* Missed-schedule policy, run once before the first tick: for every loop whose
|
|
118
|
+
* nextRunAt is missing or already past, recompute it from now (skip missed
|
|
119
|
+
* occurrences; never stampede overdue loops). runNowRequested is preserved so a
|
|
120
|
+
* manual override survives a restart and still fires on the first tick. A loop
|
|
121
|
+
* with a corrupt persisted schedule is skipped here (logged) and reported as an
|
|
122
|
+
* error on its first fire.
|
|
123
|
+
*/
|
|
124
|
+
reconcileOnStart(now?: number): Promise<void>;
|
|
125
|
+
tick(now?: number): Promise<void>;
|
|
126
|
+
private processLoop;
|
|
127
|
+
/** Phase A: finalize each in-flight run once its PTY is confirmed gone (past grace)
|
|
128
|
+
* or over its per-run maxRuntime budget. Under 'allow' this walks every tracked run,
|
|
129
|
+
* not just the newest, so none is orphaned. */
|
|
130
|
+
private finalizePhase;
|
|
131
|
+
/** Seed the persisted primary run into in-memory tracking after a restart (when this
|
|
132
|
+
* process has spawned nothing yet for the loop), so a run left 'running' by a prior
|
|
133
|
+
* process is still finalized. */
|
|
134
|
+
private ensureTracked;
|
|
135
|
+
private track;
|
|
136
|
+
/** Phase B: fire the loop if due, honoring the per-loop lock and the overlap policy. */
|
|
137
|
+
private firePhase;
|
|
138
|
+
/** Advance the schedule up front (so failures/skips never stampede), then preflight → flight. */
|
|
139
|
+
private fireLoop;
|
|
140
|
+
/** Resolve one finished run to ok/error, update the loop badge (only if this is the
|
|
141
|
+
* loop's tracked/latest run), then run postflight + retention for it. */
|
|
142
|
+
private finalizeRun;
|
|
143
|
+
private runPostflight;
|
|
144
|
+
/** Keep the newest N run-sessions for this loop; prune older finished ones. */
|
|
145
|
+
private pruneRuns;
|
|
146
|
+
private persist;
|
|
147
|
+
private publish;
|
|
148
|
+
}
|
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { computeNextRun, isDue } from './loop-schedule.js';
|
|
3
|
+
import { pruneLoopRunRecords, recordForSession, skippedRunRecord, upsertLoopRunRecord, } from './loop-run-store.js';
|
|
4
|
+
import { listLoops, mutateLoopRuntime } from './loop-store.js';
|
|
5
|
+
/** Base tick cadence; also the finalize grace so a just-spawned PTY is not mistaken for exited. */
|
|
6
|
+
export const LOOP_TICK_INTERVAL_MS = 30_000;
|
|
7
|
+
const iso = (ms) => new Date(ms).toISOString();
|
|
8
|
+
/**
|
|
9
|
+
* Runs child_process.exec and normalizes the result to { stdout, stderr,
|
|
10
|
+
* exitCode }. Never rejects — the exit code is the signal (a timeout maps to
|
|
11
|
+
* 124, any other failure to the real exit code or 1).
|
|
12
|
+
*
|
|
13
|
+
* The hard budget is enforced by SIGKILL on the whole process GROUP, NOT by
|
|
14
|
+
* exec's built-in `timeout`. exec's timeout only sends SIGTERM to the spawned
|
|
15
|
+
* `/bin/sh`; a detached grandchild that keeps the stdout pipe open, or a child
|
|
16
|
+
* that traps SIGTERM, would keep the exec callback from ever firing — and since
|
|
17
|
+
* the scheduler awaits every flight inside a single re-entrancy-guarded tick,
|
|
18
|
+
* ONE such flight would wedge the entire scheduler permanently. Spawning
|
|
19
|
+
* `detached` makes the child its own group leader, so `kill(-pid, SIGKILL)`
|
|
20
|
+
* takes down the whole tree and the flight can never exceed its budget.
|
|
21
|
+
*/
|
|
22
|
+
export function runFlightCommand(command, cwd, timeoutMs = 30_000, extraEnv) {
|
|
23
|
+
const env = extraEnv ? { ...process.env, ...extraEnv } : process.env;
|
|
24
|
+
const MAX_CAPTURE = 1024 * 1024;
|
|
25
|
+
return new Promise((resolve) => {
|
|
26
|
+
let settled = false;
|
|
27
|
+
let stdout = '';
|
|
28
|
+
let stderr = '';
|
|
29
|
+
let exitCode = null;
|
|
30
|
+
let killedByTimeout = false;
|
|
31
|
+
let exitGrace;
|
|
32
|
+
// Own process group (detached ⇒ the sh is its own group leader) so the hard
|
|
33
|
+
// timeout can SIGKILL the WHOLE tree, including a grandchild that outlived
|
|
34
|
+
// its parent's SIGTERM or kept the stdout pipe open. This is what stops one
|
|
35
|
+
// runaway flight from wedging the awaiting tick forever.
|
|
36
|
+
const child = spawn('/bin/sh', ['-c', command], {
|
|
37
|
+
cwd: cwd ?? process.cwd(),
|
|
38
|
+
env,
|
|
39
|
+
detached: true,
|
|
40
|
+
});
|
|
41
|
+
const settle = () => {
|
|
42
|
+
if (settled)
|
|
43
|
+
return;
|
|
44
|
+
settled = true;
|
|
45
|
+
clearTimeout(hardTimer);
|
|
46
|
+
clearTimeout(exitGrace);
|
|
47
|
+
// Release the pipe read-ends so a still-open grandchild write-end cannot
|
|
48
|
+
// keep the bridge's event loop (or the test runner) alive after we return.
|
|
49
|
+
child.stdout?.destroy();
|
|
50
|
+
child.stderr?.destroy();
|
|
51
|
+
child.unref?.();
|
|
52
|
+
resolve({ stdout, stderr, exitCode: killedByTimeout ? 124 : (exitCode ?? 1) });
|
|
53
|
+
};
|
|
54
|
+
const recordExit = (code, signal) => {
|
|
55
|
+
if (exitCode === null)
|
|
56
|
+
exitCode = typeof code === 'number' ? code : signal ? 1 : 0;
|
|
57
|
+
};
|
|
58
|
+
child.stdout?.on('data', (d) => {
|
|
59
|
+
if (stdout.length < MAX_CAPTURE)
|
|
60
|
+
stdout += d.toString('utf8');
|
|
61
|
+
});
|
|
62
|
+
child.stderr?.on('data', (d) => {
|
|
63
|
+
if (stderr.length < MAX_CAPTURE)
|
|
64
|
+
stderr += d.toString('utf8');
|
|
65
|
+
});
|
|
66
|
+
child.on('error', () => {
|
|
67
|
+
if (exitCode === null)
|
|
68
|
+
exitCode = 1;
|
|
69
|
+
settle();
|
|
70
|
+
});
|
|
71
|
+
// Settle on 'exit' (the shell terminated), NOT 'close' (all stdio closed):
|
|
72
|
+
// a backgrounded/detached grandchild keeps the pipe open, so 'close' may
|
|
73
|
+
// never come. 'close' still wins the race when it fires first (full stdout);
|
|
74
|
+
// otherwise a short grace after 'exit' lets the parent's own output flush.
|
|
75
|
+
child.on('exit', (code, signal) => {
|
|
76
|
+
recordExit(code, signal);
|
|
77
|
+
exitGrace = setTimeout(settle, 150);
|
|
78
|
+
exitGrace.unref?.();
|
|
79
|
+
});
|
|
80
|
+
child.on('close', (code, signal) => {
|
|
81
|
+
recordExit(code, signal);
|
|
82
|
+
settle();
|
|
83
|
+
});
|
|
84
|
+
const hardTimer = setTimeout(() => {
|
|
85
|
+
killedByTimeout = true;
|
|
86
|
+
if (child.pid) {
|
|
87
|
+
try {
|
|
88
|
+
process.kill(-child.pid, 'SIGKILL'); // whole process group
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
try {
|
|
92
|
+
child.kill('SIGKILL');
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
/* already gone */
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
settle();
|
|
100
|
+
}, timeoutMs);
|
|
101
|
+
hardTimer.unref?.();
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
/** Byte-accurate tail: keep the last `maxBytes` of a (possibly huge) terminal log. */
|
|
105
|
+
function truncateTail(text, maxBytes) {
|
|
106
|
+
const buf = Buffer.from(text, 'utf8');
|
|
107
|
+
if (buf.length <= maxBytes)
|
|
108
|
+
return text;
|
|
109
|
+
return buf.subarray(buf.length - maxBytes).toString('utf8');
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Resolve a finished run to ok/error. ONLY a cleanly `completed` run is 'ok';
|
|
113
|
+
* a missing record (removed/lost) OR a store status still stuck at
|
|
114
|
+
* running/pending at finalize time (the process died without a clean status
|
|
115
|
+
* transition) is a crash ⇒ 'error'. Never reports a crashed run as success.
|
|
116
|
+
*/
|
|
117
|
+
function resolveRunStatus(run) {
|
|
118
|
+
return run?.status === 'completed' ? 'ok' : 'error';
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* The scheduled-loops engine. On each 30s tick it FINALIZES each loop's
|
|
122
|
+
* in-flight run(s) (Phase A) before deciding whether to FIRE a new one
|
|
123
|
+
* (Phase B). All side effects go through injected collaborators so it is
|
|
124
|
+
* unit-testable without a live bridge, real fs or real timers (mirrors
|
|
125
|
+
* workflow-runner.ts).
|
|
126
|
+
*
|
|
127
|
+
* Persistence rule: the scheduler NEVER writes a whole detached Loop back
|
|
128
|
+
* across an await. Every runtime-field change goes through
|
|
129
|
+
* store.mutateLoopRuntime (fresh-read → mutate → save), so a loop deleted or
|
|
130
|
+
* user-edited during a long flight is neither resurrected nor clobbered.
|
|
131
|
+
*/
|
|
132
|
+
export class LoopScheduler {
|
|
133
|
+
store;
|
|
134
|
+
runner;
|
|
135
|
+
centrifugo;
|
|
136
|
+
userId;
|
|
137
|
+
spawnSession;
|
|
138
|
+
removeSession;
|
|
139
|
+
loops;
|
|
140
|
+
runRecords;
|
|
141
|
+
runFlight;
|
|
142
|
+
now;
|
|
143
|
+
/** Re-entrancy guard: tick N+1 never overlaps N. */
|
|
144
|
+
tickRunning = false;
|
|
145
|
+
/** Set once start() runs (after reconcileOnStart). kick() no-ops before this so an
|
|
146
|
+
* early run_loop_now cannot trigger an un-reconciled tick that stampedes overdue loops. */
|
|
147
|
+
started = false;
|
|
148
|
+
/** Per-loop in-memory fire lock, shared by tick + run_loop_now/kick. */
|
|
149
|
+
firingLoops = new Set();
|
|
150
|
+
/** loopId -> (runSessionId -> fire-time ms). Every run THIS process spawned, so under
|
|
151
|
+
* overlapPolicy:'allow' each concurrent run is finalized/postflighted/maxRuntime-checked
|
|
152
|
+
* independently — not just the newest. Rebuilt lazily from the persisted primary on restart. */
|
|
153
|
+
inFlight = new Map();
|
|
154
|
+
timer;
|
|
155
|
+
constructor(deps) {
|
|
156
|
+
this.store = deps.store;
|
|
157
|
+
this.runner = deps.runner;
|
|
158
|
+
this.centrifugo = deps.centrifugo;
|
|
159
|
+
this.userId = deps.userId;
|
|
160
|
+
this.spawnSession = deps.spawnSession;
|
|
161
|
+
this.removeSession = deps.removeSession;
|
|
162
|
+
this.loops = deps.loops ?? { listLoops, mutateLoopRuntime };
|
|
163
|
+
this.runRecords = deps.runRecords ?? { upsertLoopRunRecord, pruneLoopRunRecords };
|
|
164
|
+
this.runFlight = deps.runFlight ?? runFlightCommand;
|
|
165
|
+
this.now = deps.now ?? (() => Date.now());
|
|
166
|
+
}
|
|
167
|
+
start() {
|
|
168
|
+
this.started = true;
|
|
169
|
+
if (this.timer)
|
|
170
|
+
return;
|
|
171
|
+
this.timer = setInterval(() => {
|
|
172
|
+
void this.tick();
|
|
173
|
+
}, LOOP_TICK_INTERVAL_MS);
|
|
174
|
+
}
|
|
175
|
+
stop() {
|
|
176
|
+
if (this.timer) {
|
|
177
|
+
clearInterval(this.timer);
|
|
178
|
+
this.timer = undefined;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
/** Immediate, guarded, out-of-band tick (used by run_loop_now). No-op until start()
|
|
182
|
+
* has run, so a kick that races startup cannot fire before reconcileOnStart. */
|
|
183
|
+
kick() {
|
|
184
|
+
if (!this.started)
|
|
185
|
+
return;
|
|
186
|
+
if (!this.tickRunning)
|
|
187
|
+
void this.tick();
|
|
188
|
+
}
|
|
189
|
+
/** Drop scheduler tracking for a deleted loop and stop any run it left alive, so a
|
|
190
|
+
* just-deleted loop never leaks a live AI session with nothing left to finalize it. */
|
|
191
|
+
onLoopDeleted(loop) {
|
|
192
|
+
const ids = new Set();
|
|
193
|
+
const tracked = this.inFlight.get(loop.id);
|
|
194
|
+
if (tracked)
|
|
195
|
+
for (const id of tracked.keys())
|
|
196
|
+
ids.add(id);
|
|
197
|
+
if (loop.lastStatus === 'running' && loop.lastSessionId)
|
|
198
|
+
ids.add(loop.lastSessionId);
|
|
199
|
+
for (const id of ids) {
|
|
200
|
+
if (this.runner.isRunning(id))
|
|
201
|
+
this.runner.stop(id);
|
|
202
|
+
}
|
|
203
|
+
this.inFlight.delete(loop.id);
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Missed-schedule policy, run once before the first tick: for every loop whose
|
|
207
|
+
* nextRunAt is missing or already past, recompute it from now (skip missed
|
|
208
|
+
* occurrences; never stampede overdue loops). runNowRequested is preserved so a
|
|
209
|
+
* manual override survives a restart and still fires on the first tick. A loop
|
|
210
|
+
* with a corrupt persisted schedule is skipped here (logged) and reported as an
|
|
211
|
+
* error on its first fire.
|
|
212
|
+
*/
|
|
213
|
+
async reconcileOnStart(now = this.now()) {
|
|
214
|
+
for (const loop of this.loops.listLoops()) {
|
|
215
|
+
try {
|
|
216
|
+
const overdue = loop.nextRunAt ? Date.parse(loop.nextRunAt) <= now : true;
|
|
217
|
+
if (!overdue)
|
|
218
|
+
continue;
|
|
219
|
+
const nextRunMs = computeNextRun(loop.schedule, now); // may throw on a corrupt cron
|
|
220
|
+
await this.persist(loop.id, (l) => {
|
|
221
|
+
l.nextRunAt = iso(nextRunMs);
|
|
222
|
+
l.updatedAt = iso(now);
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
catch (err) {
|
|
226
|
+
console.error(`[LoopScheduler] reconcile failed for loop ${loop.id}:`, err);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
async tick(now = this.now()) {
|
|
231
|
+
if (this.tickRunning)
|
|
232
|
+
return; // tick N+1 never overlaps N
|
|
233
|
+
this.tickRunning = true;
|
|
234
|
+
try {
|
|
235
|
+
for (const loop of this.loops.listLoops()) {
|
|
236
|
+
try {
|
|
237
|
+
await this.processLoop(loop, now);
|
|
238
|
+
}
|
|
239
|
+
catch (err) {
|
|
240
|
+
// One bad loop must not kill the tick (mirrors resurrectSessions).
|
|
241
|
+
console.error(`[LoopScheduler] loop ${loop.id} failed:`, err);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
finally {
|
|
246
|
+
this.tickRunning = false;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
async processLoop(loop, now) {
|
|
250
|
+
await this.finalizePhase(loop, now); // Phase A — finalize before fire
|
|
251
|
+
await this.firePhase(loop, now); // Phase B
|
|
252
|
+
}
|
|
253
|
+
/** Phase A: finalize each in-flight run once its PTY is confirmed gone (past grace)
|
|
254
|
+
* or over its per-run maxRuntime budget. Under 'allow' this walks every tracked run,
|
|
255
|
+
* not just the newest, so none is orphaned. */
|
|
256
|
+
async finalizePhase(loop, now) {
|
|
257
|
+
this.ensureTracked(loop);
|
|
258
|
+
const tracked = this.inFlight.get(loop.id);
|
|
259
|
+
if (!tracked || tracked.size === 0)
|
|
260
|
+
return;
|
|
261
|
+
// Snapshot: finalizeRun mutates the map while we iterate.
|
|
262
|
+
for (const [runId, startedMs] of [...tracked]) {
|
|
263
|
+
const running = this.runner.isRunning(runId);
|
|
264
|
+
const elapsed = now - startedMs;
|
|
265
|
+
if (running && loop.maxRuntimeMs && elapsed > loop.maxRuntimeMs) {
|
|
266
|
+
this.runner.stop(runId);
|
|
267
|
+
await this.finalizeRun(loop, now, runId, true);
|
|
268
|
+
}
|
|
269
|
+
else if (!running && elapsed >= LOOP_TICK_INTERVAL_MS) {
|
|
270
|
+
await this.finalizeRun(loop, now, runId, false);
|
|
271
|
+
}
|
|
272
|
+
// else: still running, or still inside the grace window — leave it.
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
/** Seed the persisted primary run into in-memory tracking after a restart (when this
|
|
276
|
+
* process has spawned nothing yet for the loop), so a run left 'running' by a prior
|
|
277
|
+
* process is still finalized. */
|
|
278
|
+
ensureTracked(loop) {
|
|
279
|
+
if (loop.lastStatus !== 'running' || !loop.lastSessionId)
|
|
280
|
+
return;
|
|
281
|
+
const existing = this.inFlight.get(loop.id);
|
|
282
|
+
if (existing && existing.size > 0)
|
|
283
|
+
return; // already tracking this process's run(s)
|
|
284
|
+
const parsed = Date.parse(loop.lastRunAt ?? '');
|
|
285
|
+
const startedMs = Number.isNaN(parsed) ? this.now() : parsed;
|
|
286
|
+
const map = existing ?? new Map();
|
|
287
|
+
map.set(loop.lastSessionId, startedMs);
|
|
288
|
+
this.inFlight.set(loop.id, map);
|
|
289
|
+
}
|
|
290
|
+
track(loopId, sessionId, startedMs) {
|
|
291
|
+
const map = this.inFlight.get(loopId) ?? new Map();
|
|
292
|
+
map.set(sessionId, startedMs);
|
|
293
|
+
this.inFlight.set(loopId, map);
|
|
294
|
+
}
|
|
295
|
+
/** Phase B: fire the loop if due, honoring the per-loop lock and the overlap policy. */
|
|
296
|
+
async firePhase(loop, now) {
|
|
297
|
+
if (!isDue(loop, now))
|
|
298
|
+
return;
|
|
299
|
+
if (this.firingLoops.has(loop.id))
|
|
300
|
+
return;
|
|
301
|
+
// A previous run that has EXITED but was not yet finalized (still inside the
|
|
302
|
+
// grace window) must be finalized BEFORE we decide to fire. Otherwise a
|
|
303
|
+
// skip-policy loop double-fires in the grace window (isRunning is already
|
|
304
|
+
// false, so the overlap guard below misses it) and the just-finished run is
|
|
305
|
+
// orphaned — its finalize/postflight never runs.
|
|
306
|
+
if (loop.lastStatus === 'running' && loop.lastSessionId && !this.runner.isRunning(loop.lastSessionId)) {
|
|
307
|
+
await this.finalizeRun(loop, now, loop.lastSessionId, false);
|
|
308
|
+
const fresh = this.loops.listLoops().find((l) => l.id === loop.id);
|
|
309
|
+
if (!fresh)
|
|
310
|
+
return; // deleted during finalize
|
|
311
|
+
loop = fresh;
|
|
312
|
+
}
|
|
313
|
+
// Overlap guard: a skip-policy loop whose previous run is STILL alive advances
|
|
314
|
+
// its schedule only — no new fire, no skipCount bump (overlap-skip is not a
|
|
315
|
+
// preflight-skip).
|
|
316
|
+
if (loop.overlapPolicy === 'skip' &&
|
|
317
|
+
loop.lastStatus === 'running' &&
|
|
318
|
+
loop.lastSessionId &&
|
|
319
|
+
this.runner.isRunning(loop.lastSessionId)) {
|
|
320
|
+
await this.persist(loop.id, (l) => {
|
|
321
|
+
try {
|
|
322
|
+
l.nextRunAt = iso(computeNextRun(l.schedule, now));
|
|
323
|
+
}
|
|
324
|
+
catch {
|
|
325
|
+
/* corrupt schedule surfaces as an error on the fire path, not here */
|
|
326
|
+
}
|
|
327
|
+
l.runNowRequested = false;
|
|
328
|
+
l.updatedAt = iso(now);
|
|
329
|
+
});
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
this.firingLoops.add(loop.id);
|
|
333
|
+
try {
|
|
334
|
+
await this.fireLoop(loop, now);
|
|
335
|
+
}
|
|
336
|
+
finally {
|
|
337
|
+
this.firingLoops.delete(loop.id);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
/** Advance the schedule up front (so failures/skips never stampede), then preflight → flight. */
|
|
341
|
+
async fireLoop(loop, now) {
|
|
342
|
+
// Compute the next fire first so a persisted-corrupt schedule is reported as an
|
|
343
|
+
// error (with a bounded backoff) instead of silently re-throwing every tick.
|
|
344
|
+
let nextRunMs;
|
|
345
|
+
try {
|
|
346
|
+
nextRunMs = computeNextRun(loop.schedule, now);
|
|
347
|
+
}
|
|
348
|
+
catch (err) {
|
|
349
|
+
await this.persist(loop.id, (l) => {
|
|
350
|
+
l.lastRunAt = iso(now);
|
|
351
|
+
l.nextRunAt = iso(now + LOOP_TICK_INTERVAL_MS); // bounded backoff — no stampede
|
|
352
|
+
l.runNowRequested = false;
|
|
353
|
+
l.lastStatus = 'error';
|
|
354
|
+
l.updatedAt = iso(now);
|
|
355
|
+
});
|
|
356
|
+
console.error(`[LoopScheduler] bad schedule for loop ${loop.id}:`, err);
|
|
357
|
+
return;
|
|
358
|
+
}
|
|
359
|
+
try {
|
|
360
|
+
let preflightOut = '';
|
|
361
|
+
if (loop.preflight) {
|
|
362
|
+
const r = await this.runFlight(loop.preflight.command, loop.workdir, loop.preflight.timeoutMs);
|
|
363
|
+
preflightOut = r.stdout;
|
|
364
|
+
if (r.exitCode !== 0) {
|
|
365
|
+
const skippedAt = iso(now);
|
|
366
|
+
const details = [
|
|
367
|
+
`Preflight exited with code ${r.exitCode}.`,
|
|
368
|
+
r.stdout ? `\nstdout:\n${r.stdout}` : '',
|
|
369
|
+
r.stderr ? `\nstderr:\n${r.stderr}` : '',
|
|
370
|
+
].join('');
|
|
371
|
+
// ABORT: skip (not error), no session, no run-node.
|
|
372
|
+
const skipped = await this.persist(loop.id, (l) => {
|
|
373
|
+
l.lastRunAt = skippedAt;
|
|
374
|
+
l.nextRunAt = iso(nextRunMs);
|
|
375
|
+
l.runNowRequested = false;
|
|
376
|
+
l.lastStatus = 'skipped';
|
|
377
|
+
l.skipCount += 1;
|
|
378
|
+
l.updatedAt = skippedAt;
|
|
379
|
+
});
|
|
380
|
+
if (skipped) {
|
|
381
|
+
this.runRecords.upsertLoopRunRecord(skippedRunRecord(loop, skippedAt, details));
|
|
382
|
+
}
|
|
383
|
+
if (skipped && loop.postflight?.runOnSkip) {
|
|
384
|
+
await this.runPostflight(loop, { status: 'skipped', sessionId: '', output: '' });
|
|
385
|
+
}
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
const task = loop.task.replaceAll('{{preflight}}', preflightOut);
|
|
390
|
+
const session = await this.spawnSession({
|
|
391
|
+
shellType: loop.harness,
|
|
392
|
+
prompt: task,
|
|
393
|
+
workingDir: loop.workdir,
|
|
394
|
+
model: loop.model,
|
|
395
|
+
env: preflightOut ? { FTOWN_PREFLIGHT_OUTPUT: preflightOut } : undefined,
|
|
396
|
+
loopId: loop.id,
|
|
397
|
+
suppressBriefing: true, // no child/orchestrator briefing paragraph in the task
|
|
398
|
+
name: `${loop.name} · ${iso(now)}`,
|
|
399
|
+
// parentSessionId intentionally omitted — loopId is the sole grouping key.
|
|
400
|
+
});
|
|
401
|
+
const updated = await this.persist(loop.id, (l) => {
|
|
402
|
+
l.lastRunAt = iso(now);
|
|
403
|
+
l.nextRunAt = iso(nextRunMs);
|
|
404
|
+
l.runNowRequested = false;
|
|
405
|
+
l.lastSessionId = session.id;
|
|
406
|
+
l.lastStatus = 'running';
|
|
407
|
+
l.runCount += 1;
|
|
408
|
+
l.updatedAt = iso(now);
|
|
409
|
+
});
|
|
410
|
+
if (!updated) {
|
|
411
|
+
// The loop was deleted during preflight/spawn: do not resurrect it and do
|
|
412
|
+
// not leave an orphan run that nothing would ever finalize or prune.
|
|
413
|
+
this.runner.stop(session.id);
|
|
414
|
+
await this.removeSession(session.id).catch(() => undefined);
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
this.runRecords.upsertLoopRunRecord(recordForSession(updated, session, iso(now)));
|
|
418
|
+
this.track(loop.id, session.id, now);
|
|
419
|
+
}
|
|
420
|
+
catch (err) {
|
|
421
|
+
// A failure after the schedule was computed: record error + persist so the
|
|
422
|
+
// loop resumes its cadence instead of stampede-retrying every tick.
|
|
423
|
+
await this.persist(loop.id, (l) => {
|
|
424
|
+
l.lastRunAt = iso(now);
|
|
425
|
+
l.nextRunAt = iso(nextRunMs);
|
|
426
|
+
l.runNowRequested = false;
|
|
427
|
+
l.lastStatus = 'error';
|
|
428
|
+
l.updatedAt = iso(now);
|
|
429
|
+
});
|
|
430
|
+
console.error(`[LoopScheduler] fire failed for loop ${loop.id}:`, err);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
/** Resolve one finished run to ok/error, update the loop badge (only if this is the
|
|
434
|
+
* loop's tracked/latest run), then run postflight + retention for it. */
|
|
435
|
+
async finalizeRun(loop, now, runId, forcedError) {
|
|
436
|
+
const tracked = this.inFlight.get(loop.id);
|
|
437
|
+
const startedMs = tracked?.get(runId) ?? Date.parse(loop.lastRunAt ?? iso(now));
|
|
438
|
+
if (tracked) {
|
|
439
|
+
tracked.delete(runId);
|
|
440
|
+
if (tracked.size === 0)
|
|
441
|
+
this.inFlight.delete(loop.id);
|
|
442
|
+
}
|
|
443
|
+
const run = runId ? await this.store.loadSession(runId) : null;
|
|
444
|
+
const status = forcedError ? 'error' : resolveRunStatus(run);
|
|
445
|
+
const fullOutput = runId ? await this.store.loadTerminalLog(runId) : '';
|
|
446
|
+
const output = truncateTail(fullOutput, 65_536);
|
|
447
|
+
const outputBytes = Buffer.byteLength(fullOutput, 'utf8');
|
|
448
|
+
const tailBytes = Buffer.byteLength(output, 'utf8');
|
|
449
|
+
// Only the loop's most-recently-STARTED run (lastSessionId) drives the badge;
|
|
450
|
+
// an older overlapping 'allow' run finalizes silently (still postflight +
|
|
451
|
+
// retention) without flipping the badge away from a newer run's state.
|
|
452
|
+
const updated = await this.persist(loop.id, (l) => {
|
|
453
|
+
if (l.lastSessionId === runId)
|
|
454
|
+
l.lastStatus = status;
|
|
455
|
+
l.updatedAt = iso(now);
|
|
456
|
+
});
|
|
457
|
+
void updated;
|
|
458
|
+
const baseRecord = run
|
|
459
|
+
? recordForSession(loop, run, Number.isFinite(startedMs) ? iso(startedMs) : run.createdAt)
|
|
460
|
+
: {
|
|
461
|
+
id: runId,
|
|
462
|
+
loopId: loop.id,
|
|
463
|
+
bridgeId: loop.bridgeId,
|
|
464
|
+
sessionId: runId,
|
|
465
|
+
name: `${loop.name} · ${runId}`,
|
|
466
|
+
status,
|
|
467
|
+
startedAt: Number.isFinite(startedMs) ? iso(startedMs) : iso(now),
|
|
468
|
+
updatedAt: iso(now),
|
|
469
|
+
harness: loop.harness,
|
|
470
|
+
workdir: loop.workdir,
|
|
471
|
+
task: loop.task,
|
|
472
|
+
model: loop.model,
|
|
473
|
+
sessionStatus: undefined,
|
|
474
|
+
};
|
|
475
|
+
this.runRecords.upsertLoopRunRecord({
|
|
476
|
+
...baseRecord,
|
|
477
|
+
status,
|
|
478
|
+
sessionStatus: run?.status,
|
|
479
|
+
errorReason: forcedError ? 'max_runtime_exceeded' : run?.errorReason,
|
|
480
|
+
updatedAt: iso(now),
|
|
481
|
+
finishedAt: iso(now),
|
|
482
|
+
durationMs: Math.max(0, now - (Number.isFinite(startedMs) ? startedMs : now)),
|
|
483
|
+
logTail: output,
|
|
484
|
+
logBytes: outputBytes,
|
|
485
|
+
logTruncated: outputBytes > tailBytes,
|
|
486
|
+
});
|
|
487
|
+
if (loop.postflight) {
|
|
488
|
+
await this.runPostflight(loop, { status, sessionId: runId, output });
|
|
489
|
+
}
|
|
490
|
+
await this.pruneRuns(loop);
|
|
491
|
+
}
|
|
492
|
+
async runPostflight(loop, ctx) {
|
|
493
|
+
if (!loop.postflight)
|
|
494
|
+
return;
|
|
495
|
+
await this.runFlight(loop.postflight.command, loop.workdir, loop.postflight.timeoutMs, {
|
|
496
|
+
FTOWN_RUN_STATUS: ctx.status,
|
|
497
|
+
FTOWN_RUN_SESSION_ID: ctx.sessionId,
|
|
498
|
+
FTOWN_RUN_OUTPUT: ctx.output,
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
/** Keep the newest N run-sessions for this loop; prune older finished ones. */
|
|
502
|
+
async pruneRuns(loop) {
|
|
503
|
+
const keep = loop.retention.autoClearAfterRuns;
|
|
504
|
+
if (keep == null)
|
|
505
|
+
return;
|
|
506
|
+
const runs = (await this.store.listSessions())
|
|
507
|
+
.filter((s) => s.loopId === loop.id)
|
|
508
|
+
.sort((a, b) => Date.parse(b.createdAt) - Date.parse(a.createdAt));
|
|
509
|
+
for (const run of runs.slice(keep)) {
|
|
510
|
+
if (this.runner.isRunning(run.id))
|
|
511
|
+
continue;
|
|
512
|
+
if (run.id === loop.lastSessionId)
|
|
513
|
+
continue;
|
|
514
|
+
await this.removeSession(run.id, { onlyIfFinished: true });
|
|
515
|
+
}
|
|
516
|
+
this.runRecords.pruneLoopRunRecords(loop.id, keep, [loop.lastSessionId, ...runs.filter((run) => this.runner.isRunning(run.id)).map((run) => run.id)]);
|
|
517
|
+
}
|
|
518
|
+
async persist(id, fn) {
|
|
519
|
+
const updated = this.loops.mutateLoopRuntime(id, fn);
|
|
520
|
+
if (updated)
|
|
521
|
+
await this.publish(updated);
|
|
522
|
+
return updated;
|
|
523
|
+
}
|
|
524
|
+
async publish(loop) {
|
|
525
|
+
try {
|
|
526
|
+
await this.centrifugo.publishLoopUpdate(this.userId, loop);
|
|
527
|
+
}
|
|
528
|
+
catch (err) {
|
|
529
|
+
// A UI-sync failure must never break the scheduler (matches session create).
|
|
530
|
+
console.error(`[LoopScheduler] Failed to publish loop update for ${loop.id}:`, err);
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
//# sourceMappingURL=loop-scheduler.js.map
|