convene-cli 1.5.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.js +9 -0
- package/dist/cache.js +173 -1
- package/dist/catalog/prompt.js +27 -5
- package/dist/commands/auth.js +128 -3
- package/dist/commands/beat.js +145 -0
- package/dist/commands/catchup.js +3 -1
- package/dist/commands/fetch.js +20 -3
- package/dist/commands/init.js +11 -1
- package/dist/commands/session-start.js +77 -7
- package/dist/commands/watch-reap.js +212 -0
- package/dist/commands/watch.js +109 -26
- package/dist/commands/worktree.js +155 -17
- package/dist/config.js +30 -0
- package/dist/index.js +12 -0
- package/dist/render.js +45 -1
- package/package.json +1 -1
package/dist/commands/init.js
CHANGED
|
@@ -209,7 +209,10 @@ function registerHook(noHook) {
|
|
|
209
209
|
if (raw !== null)
|
|
210
210
|
node_fs_1.default.writeFileSync(hook_1.SETTINGS_PATH + '.bak', raw);
|
|
211
211
|
node_fs_1.default.writeFileSync(hook_1.SETTINGS_PATH, (0, hook_1.serializeSettings)((0, hook_1.withHook)(settings)));
|
|
212
|
-
log(`Registered UserPromptSubmit hook in ${hook_1.SETTINGS_PATH}${raw !== null ? ' (backup: settings.json.bak)' : ''}.`);
|
|
212
|
+
log(`Registered the lightweight \`convene fetch\` UserPromptSubmit hook in ${hook_1.SETTINGS_PATH}${raw !== null ? ' (backup: settings.json.bak)' : ''}.`);
|
|
213
|
+
log(' ↳ This is the ONE global write: it fires in every repo (a silent no-op off the bus) and is ' +
|
|
214
|
+
'ADDITIVE — your own hooks are left untouched. Prefer not to touch ~/.claude? Re-run with ' +
|
|
215
|
+
'`--no-hook`; the committed project `.claude/settings.json` hook still covers this repo.');
|
|
213
216
|
}
|
|
214
217
|
catch (err) {
|
|
215
218
|
log(`Could not write settings (${err?.message}). Add this hook manually:`);
|
|
@@ -265,6 +268,13 @@ const COORD_HOOKS = [
|
|
|
265
268
|
verb: 'gate-push',
|
|
266
269
|
note: 'release the deploy lane after a push (idempotent)',
|
|
267
270
|
},
|
|
271
|
+
{
|
|
272
|
+
event: 'PostToolUse',
|
|
273
|
+
matcher: 'Edit|Write|MultiEdit',
|
|
274
|
+
command: 'convene beat --stdin',
|
|
275
|
+
verb: 'beat',
|
|
276
|
+
note: 'debounced session activity-beat so a heads-down session still pulses on the bus',
|
|
277
|
+
},
|
|
268
278
|
];
|
|
269
279
|
/**
|
|
270
280
|
* Wire the WP13 coordination hooks into a settings file (global or committed
|
|
@@ -7,7 +7,8 @@ exports.sessionStart = sessionStart;
|
|
|
7
7
|
* FAIL-OPEN (P0-FAILSAFE), copying the fetch.ts scaffold:
|
|
8
8
|
* - hard watchdog at 6000ms → exit 0 no matter what (SessionStart's own default
|
|
9
9
|
* timeout is 30s, which would stall a boot — we bound it ourselves);
|
|
10
|
-
* - the network GET is bounded at 4000ms;
|
|
10
|
+
* - the network GET is bounded at FETCH_TIMEOUT_MS (default 4000ms; overridable
|
|
11
|
+
* via CONVENE_FETCH_TIMEOUT_MS for deterministic tests);
|
|
11
12
|
* - any error / non-bus repo / DEGRADED emits NOTHING and exits 0.
|
|
12
13
|
*
|
|
13
14
|
* What it does on a fresh, authenticated bus repo:
|
|
@@ -22,11 +23,14 @@ const node_child_process_1 = require("node:child_process");
|
|
|
22
23
|
const git_1 = require("../git");
|
|
23
24
|
const config_1 = require("../config");
|
|
24
25
|
const cache_1 = require("../cache");
|
|
26
|
+
const worktree_1 = require("./worktree");
|
|
25
27
|
const api_1 = require("../api");
|
|
26
28
|
const render_1 = require("../render");
|
|
27
29
|
const catchup_1 = require("./catchup");
|
|
28
30
|
const exit_1 = require("../exit");
|
|
29
|
-
|
|
31
|
+
// Default 4000ms; overridable via CONVENE_FETCH_TIMEOUT_MS (tests drive it small for
|
|
32
|
+
// deterministic, load-independent latency-budget assertions). See config.ts.
|
|
33
|
+
const FETCH_TIMEOUT_MS = (0, config_1.resolveFetchTimeoutMs)();
|
|
30
34
|
const WATCHDOG_MS = 6000;
|
|
31
35
|
const MAX_ITEMS = 400;
|
|
32
36
|
// Don't relaunch the watch daemon if one stamped a heartbeat this recently — a
|
|
@@ -36,14 +40,23 @@ const WATCH_FRESH_SEC = 60;
|
|
|
36
40
|
* Launch `convene watch` as a DETACHED background daemon (§4.4): the watch runs
|
|
37
41
|
* for the life of the session surfacing mid-task halts, so it must NOT be a
|
|
38
42
|
* blocking hook entry. Best-effort + fail-open: any error is swallowed; a launch
|
|
39
|
-
* failure never wedges the boot.
|
|
40
|
-
*
|
|
43
|
+
* failure never wedges the boot.
|
|
44
|
+
*
|
|
45
|
+
* Two dedup guards (the daemon-leak fix) prevent piling up duplicate watchers:
|
|
46
|
+
* 1. authoritative — the scope's pidfile names a process that is STILL ALIVE
|
|
47
|
+
* (survives even a long quiet gap where the heartbeat would have gone stale);
|
|
48
|
+
* 2. cheap fast-path — a heartbeat stamped within WATCH_FRESH_SEC.
|
|
49
|
+
* The detached spawn inherits this process's env (so CLAUDE_CODE_SESSION_ID flows
|
|
50
|
+
* to the child, scoping its pidfile/liveness to the owning session).
|
|
41
51
|
*/
|
|
42
52
|
function launchWatch(slug) {
|
|
43
53
|
try {
|
|
54
|
+
const owner = (0, cache_1.readWatchPid)(slug);
|
|
55
|
+
if (owner && (0, cache_1.isPidAlive)(owner.pid))
|
|
56
|
+
return; // a live watcher already owns this scope
|
|
44
57
|
const age = (0, cache_1.watchHeartbeatAgeSec)(slug);
|
|
45
58
|
if (age !== null && age < WATCH_FRESH_SEC)
|
|
46
|
-
return; //
|
|
59
|
+
return; // recently heartbeating
|
|
47
60
|
const child = (0, node_child_process_1.spawn)(process.execPath, [process.argv[1], 'watch'], {
|
|
48
61
|
detached: true,
|
|
49
62
|
stdio: 'ignore',
|
|
@@ -57,6 +70,51 @@ function launchWatch(slug) {
|
|
|
57
70
|
function emit(s) {
|
|
58
71
|
process.stdout.write(s + '\n');
|
|
59
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* SOFT auto-isolate: if this session booted INTO a checkout that already has a
|
|
75
|
+
* live sibling, provision a fresh isolated worktree and return a relocate block
|
|
76
|
+
* to emit (best-effort, never throws). The deterministic TIEBREAK that prevents a
|
|
77
|
+
* relocation storm: at SessionStart this session has not yet written its own feed
|
|
78
|
+
* `.json` (that only happens on the first `convene fetch`), so liveSessionCount is
|
|
79
|
+
* purely the INCUMBENT count. Only the session booting into an occupied checkout
|
|
80
|
+
* sees count >= 1 and moves; the incumbents, having no live sibling pulse newer
|
|
81
|
+
* than their own at their boot, never moved — exactly one side relocates.
|
|
82
|
+
*
|
|
83
|
+
* The full gate (ALL must hold):
|
|
84
|
+
* - a session discriminator exists (we are a disambiguable concurrent session);
|
|
85
|
+
* - >= 1 live sibling within the wide window (a co-tenant exists at all);
|
|
86
|
+
* - >= 1 live sibling within the TIGHT recency window (relaunch-ghost guard — a
|
|
87
|
+
* stale just-closed sibling's `.json` aged past this window won't trigger);
|
|
88
|
+
* - not already auto-isolated for THIS instance (the per-instance sentinel, so a
|
|
89
|
+
* resume/clear of an already-relocated session does not re-provision).
|
|
90
|
+
*
|
|
91
|
+
* Returns the relocate block string on a successful provision, else null. Fail-OPEN
|
|
92
|
+
* on every branch — any failure means "no relocation", and the boot proceeds.
|
|
93
|
+
*/
|
|
94
|
+
function maybeAutoIsolate(top, slug, instance) {
|
|
95
|
+
try {
|
|
96
|
+
// Gate 1: we must be a disambiguable concurrent session (have a discriminator).
|
|
97
|
+
if (!(0, git_1.sessionDiscriminator)())
|
|
98
|
+
return null;
|
|
99
|
+
// Gate 2: at least one live INCUMBENT sibling exists in the wide window.
|
|
100
|
+
if ((0, cache_1.liveSessionCount)(slug, cache_1.LIVE_SESSION_WINDOW_SEC) < 1)
|
|
101
|
+
return null;
|
|
102
|
+
// Gate 3: recency — a sibling pulsed within the tight window (relaunch-ghost guard).
|
|
103
|
+
if ((0, cache_1.liveSessionCount)(slug, cache_1.LIVE_SESSION_RECENT_SEC) < 1)
|
|
104
|
+
return null;
|
|
105
|
+
// Gate 4: idempotency — already relocated this exact instance? do nothing.
|
|
106
|
+
if ((0, cache_1.autoIsolatedAlready)(slug, instance))
|
|
107
|
+
return null;
|
|
108
|
+
const res = (0, worktree_1.provisionAutoWorktree)(top, slug);
|
|
109
|
+
if (!res)
|
|
110
|
+
return null; // provisioning failed → fail-open, no relocation
|
|
111
|
+
(0, cache_1.markAutoIsolated)(slug, instance);
|
|
112
|
+
return (0, render_1.renderRelocateBlock)(res);
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
return null; // fail-open: never let auto-isolate wedge a boot
|
|
116
|
+
}
|
|
117
|
+
}
|
|
60
118
|
async function run(opts) {
|
|
61
119
|
const top = (0, git_1.gitToplevel)();
|
|
62
120
|
if (!top)
|
|
@@ -72,15 +130,24 @@ async function run(opts) {
|
|
|
72
130
|
const session = (0, git_1.sessionId)(member, top);
|
|
73
131
|
// Mint a fresh instance for THIS boot (a fresh boot = a fresh instance).
|
|
74
132
|
const instance = (0, cache_1.mintSessionInstance)(slug);
|
|
133
|
+
// SOFT auto-isolate (evaluated NOW, before this session writes its own feed
|
|
134
|
+
// .json): if a live sibling already occupies this checkout, provision a fresh
|
|
135
|
+
// isolated worktree and stage a relocate block. Best-effort; null = no move.
|
|
136
|
+
const relocateBlock = maybeAutoIsolate(top, slug, instance);
|
|
75
137
|
// Launch the detached watch daemon from the SessionStart path (not a Bash hook).
|
|
76
138
|
launchWatch(slug);
|
|
77
139
|
const api = new api_1.ConveneApi(cfg.baseUrl, cfg.apiKey, session, cfg.tool, instance);
|
|
78
140
|
const since = opts.since != null ? Number(opts.since) : undefined;
|
|
79
141
|
const res = await api.sessionOpen(slug, { since: Number.isFinite(since) ? since : undefined, advance: true, maxItems: MAX_ITEMS }, FETCH_TIMEOUT_MS);
|
|
80
|
-
// DEGRADED / failure → emit NOTHING
|
|
81
|
-
// sentinel so the first fetch doesn't
|
|
142
|
+
// DEGRADED / failure → emit NOTHING from the catch-up digest (structural
|
|
143
|
+
// suppression). Still record the sentinel so the first fetch doesn't
|
|
144
|
+
// double-surface from its own cache path. The relocate block is INDEPENDENT of
|
|
145
|
+
// the network digest (it is purely local filesystem signal), so it is still
|
|
146
|
+
// surfaced — moving off an occupied checkout shouldn't depend on bus liveness.
|
|
82
147
|
if (!res.ok || !res.json || res.json.degraded) {
|
|
83
148
|
(0, cache_1.markCatchupSurfaced)(slug, instance);
|
|
149
|
+
if (relocateBlock)
|
|
150
|
+
emit(relocateBlock);
|
|
84
151
|
return;
|
|
85
152
|
}
|
|
86
153
|
if (opts.json) {
|
|
@@ -89,6 +156,9 @@ async function run(opts) {
|
|
|
89
156
|
else {
|
|
90
157
|
emit((0, render_1.renderSessionOpenBlock)({ slug, member, session, digest: (0, catchup_1.toDigest)(res.json) }));
|
|
91
158
|
}
|
|
159
|
+
// Emit the relocate block AFTER the digest (both are surfaced — digest then move).
|
|
160
|
+
if (relocateBlock)
|
|
161
|
+
emit(relocateBlock);
|
|
92
162
|
(0, cache_1.markCatchupSurfaced)(slug, instance);
|
|
93
163
|
}
|
|
94
164
|
async function sessionStart(opts = {}) {
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parseReapableWatchers = parseReapableWatchers;
|
|
7
|
+
exports.excludeSpared = excludeSpared;
|
|
8
|
+
exports.reapWatchers = reapWatchers;
|
|
9
|
+
exports.watchReap = watchReap;
|
|
10
|
+
/**
|
|
11
|
+
* `convene watch-reap` — the cleanup half of the daemon-leak fix.
|
|
12
|
+
*
|
|
13
|
+
* Detached `convene watch` daemons spawned at SessionStart outlive their session
|
|
14
|
+
* and reparent to PID 1; before the self-termination ceilings (see watch.ts) they
|
|
15
|
+
* leaked forever (145 observed). New watchers now self-exit, but this verb mops up
|
|
16
|
+
* the ALREADY-ORPHANED backlog and is also invoked by `convene doctor --fix`.
|
|
17
|
+
*
|
|
18
|
+
* THE PPID-1 SUBTLETY (load-bearing): the SessionStart hook spawns the watcher
|
|
19
|
+
* detached+unref and exits IMMEDIATELY, so a watcher reparents to PID 1 within
|
|
20
|
+
* seconds EVEN WHILE its session is alive. ppid===1 therefore does NOT distinguish
|
|
21
|
+
* a dead-session orphan from a live session's watcher. Selection is two-gated:
|
|
22
|
+
* 1. parseReapableWatchers — ppid===1 + a tight argv match (the candidate set);
|
|
23
|
+
* 2. SPARE the living — drop any candidate whose pid is named by a `*.watch.pid`
|
|
24
|
+
* file AND is still alive. A post-fix watcher always writes its scoped
|
|
25
|
+
* pidfile, so a live, pidfile-owned watcher is by definition a current owner,
|
|
26
|
+
* never a dead-session orphan.
|
|
27
|
+
* Caveat: the FIRST reap after a user upgrades may kill their currently-running
|
|
28
|
+
* OLD-cli watcher — a pre-fix watcher wrote NO pidfile, so it is indistinguishable
|
|
29
|
+
* from a true orphan. That is acceptable: it self-heals on the next SessionStart,
|
|
30
|
+
* and from then on every live new-cli watcher carries a pidfile and is spared.
|
|
31
|
+
*
|
|
32
|
+
* Fail-open + POSIX-only: any error returns a benign empty result, never throws.
|
|
33
|
+
*/
|
|
34
|
+
const node_child_process_1 = require("node:child_process");
|
|
35
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
36
|
+
const cache_1 = require("../cache");
|
|
37
|
+
/**
|
|
38
|
+
* PURE candidate selector (unit-tested): from `ps -eo pid,ppid,args` output,
|
|
39
|
+
* return the pids of processes that LOOK like detached convene watchers orphaned
|
|
40
|
+
* to init. This is the FIRST of two gates — the live-pidfile spare filter
|
|
41
|
+
* (see reapWatchers) is the second.
|
|
42
|
+
*
|
|
43
|
+
* Match TIGHTLY to avoid collateral damage:
|
|
44
|
+
* - the args must reference the convene bin (its basename — `index.js` for the
|
|
45
|
+
* installed CLI, or the bin name like `convene`) AND end with `watch` as the
|
|
46
|
+
* FINAL token (the detached watcher's exact shape: `node <bin> watch`);
|
|
47
|
+
* - never a bare `watch`, an `npm run watch`, or any process that merely
|
|
48
|
+
* contains the word watch elsewhere;
|
|
49
|
+
* - exclude `selfPid` (the reaper itself, were it ever named similarly);
|
|
50
|
+
* - select ONLY ppid === 1 (orphaned to init). NOTE this also catches live
|
|
51
|
+
* sessions' watchers (see the header) — the spare gate, not this, protects them.
|
|
52
|
+
*/
|
|
53
|
+
function parseReapableWatchers(psOutput, conveneBin, selfPid) {
|
|
54
|
+
const binBase = conveneBin ? node_path_1.default.basename(conveneBin) : '';
|
|
55
|
+
const out = [];
|
|
56
|
+
for (const line of (psOutput || '').split('\n')) {
|
|
57
|
+
const s = line.trim();
|
|
58
|
+
if (!s)
|
|
59
|
+
continue;
|
|
60
|
+
// " PID PPID ARGS..." — split off the first two numeric columns; the rest
|
|
61
|
+
// (which may contain spaces) is the command line.
|
|
62
|
+
const m = s.match(/^(\d+)\s+(\d+)\s+(.*)$/);
|
|
63
|
+
if (!m)
|
|
64
|
+
continue;
|
|
65
|
+
const pid = parseInt(m[1], 10);
|
|
66
|
+
const ppid = parseInt(m[2], 10);
|
|
67
|
+
const args = m[3];
|
|
68
|
+
if (!Number.isFinite(pid) || !Number.isFinite(ppid))
|
|
69
|
+
continue;
|
|
70
|
+
if (pid === selfPid)
|
|
71
|
+
continue; // never reap ourselves
|
|
72
|
+
if (ppid !== 1)
|
|
73
|
+
continue; // only orphans
|
|
74
|
+
const tokens = args.split(/\s+/).filter(Boolean);
|
|
75
|
+
if (tokens.length < 2)
|
|
76
|
+
continue;
|
|
77
|
+
// `watch` must be the FINAL token (the watcher takes no positional args after
|
|
78
|
+
// the verb; --notify/--project flags would precede nothing meaningful, but to
|
|
79
|
+
// stay strict we require it dead-last as the spawn shape guarantees).
|
|
80
|
+
if (tokens[tokens.length - 1] !== 'watch')
|
|
81
|
+
continue;
|
|
82
|
+
// The bin must appear among the args (basename match handles absolute paths).
|
|
83
|
+
const looksConvene = (binBase && tokens.some((t) => node_path_1.default.basename(t) === binBase)) ||
|
|
84
|
+
tokens.some((t) => node_path_1.default.basename(t) === 'convene' || node_path_1.default.basename(t) === 'index.js');
|
|
85
|
+
if (!looksConvene)
|
|
86
|
+
continue;
|
|
87
|
+
out.push(pid);
|
|
88
|
+
}
|
|
89
|
+
return out;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* PURE second gate (unit-tested): the candidates NOT in `sparePids`. A spared pid
|
|
93
|
+
* is a live, pidfile-owned watcher (a current session's owner) — never reaped.
|
|
94
|
+
*/
|
|
95
|
+
function excludeSpared(candidatePids, sparePids) {
|
|
96
|
+
const spare = new Set(sparePids);
|
|
97
|
+
return candidatePids.filter((p) => !spare.has(p));
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Synchronous bounded sleep for the SIGTERM→SIGKILL grace. Uses `sleep(1)` (POSIX
|
|
101
|
+
* — and we're already past the win32 guard) so it does NOT peg a core like a busy
|
|
102
|
+
* spin would; reapWatchers stays synchronous so doctor can call it inline. Falls
|
|
103
|
+
* back to a brief busy-wait only if `sleep` is somehow unavailable.
|
|
104
|
+
*/
|
|
105
|
+
const sleepSync = (sec) => {
|
|
106
|
+
try {
|
|
107
|
+
const r = (0, node_child_process_1.spawnSync)('sleep', [String(sec)], { timeout: Math.ceil(sec * 1000) + 1000 });
|
|
108
|
+
if (r.status === 0)
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
/* fall through to the busy-wait */
|
|
113
|
+
}
|
|
114
|
+
const until = Date.now() + sec * 1000;
|
|
115
|
+
while (Date.now() < until) {
|
|
116
|
+
/* fallback only */
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
/**
|
|
120
|
+
* Find + (unless dryRun) kill orphaned convene watchers, SPARING any watcher a
|
|
121
|
+
* live session still owns (named by a live `*.watch.pid`). SIGTERM each, give a
|
|
122
|
+
* short grace, then SIGKILL any survivor. Never throws — every failure path
|
|
123
|
+
* returns a benign result. POSIX-only (win32 returns an empty noted result).
|
|
124
|
+
*/
|
|
125
|
+
function reapWatchers({ dryRun = false } = {}) {
|
|
126
|
+
if (process.platform === 'win32') {
|
|
127
|
+
return { found: 0, killed: 0, pids: [], spared: 0, note: 'reap is POSIX-only' };
|
|
128
|
+
}
|
|
129
|
+
let psOut = '';
|
|
130
|
+
try {
|
|
131
|
+
const r = (0, node_child_process_1.spawnSync)('ps', ['-eo', 'pid,ppid,args'], { encoding: 'utf8', timeout: 5000 });
|
|
132
|
+
if (r.status !== 0 || !r.stdout)
|
|
133
|
+
return { found: 0, killed: 0, pids: [], spared: 0, note: 'ps unavailable' };
|
|
134
|
+
psOut = r.stdout;
|
|
135
|
+
}
|
|
136
|
+
catch {
|
|
137
|
+
return { found: 0, killed: 0, pids: [], spared: 0, note: 'ps failed' };
|
|
138
|
+
}
|
|
139
|
+
const bin = process.argv[1] || '';
|
|
140
|
+
const candidates = parseReapableWatchers(psOut, bin, process.pid);
|
|
141
|
+
// SPARE live-owned watchers: a current session's watcher is ppid 1 too (see the
|
|
142
|
+
// header), so the only safe discriminator is its live pidfile. Union all scopes'
|
|
143
|
+
// recorded pids, keep the live ones, and exclude them from the kill-set.
|
|
144
|
+
const sparePids = (0, cache_1.readAllWatchPids)().filter(cache_1.isPidAlive);
|
|
145
|
+
const pids = excludeSpared(candidates, sparePids);
|
|
146
|
+
const spared = candidates.length - pids.length;
|
|
147
|
+
if (dryRun || pids.length === 0) {
|
|
148
|
+
return { found: pids.length, killed: 0, pids, spared };
|
|
149
|
+
}
|
|
150
|
+
// SIGTERM each (best-effort, per-pid try so one EPERM doesn't abort the sweep).
|
|
151
|
+
for (const pid of pids) {
|
|
152
|
+
try {
|
|
153
|
+
process.kill(pid, 'SIGTERM');
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
/* already gone / not ours */
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Brief grace, then SIGKILL any survivor.
|
|
160
|
+
sleepSync(1.5);
|
|
161
|
+
let killed = 0;
|
|
162
|
+
for (const pid of pids) {
|
|
163
|
+
let alive = false;
|
|
164
|
+
try {
|
|
165
|
+
process.kill(pid, 0);
|
|
166
|
+
alive = true;
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
alive = false; // ESRCH ⇒ the SIGTERM took
|
|
170
|
+
}
|
|
171
|
+
if (alive) {
|
|
172
|
+
try {
|
|
173
|
+
process.kill(pid, 'SIGKILL');
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
/* lost the race / not ours */
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Count it killed if it is now gone.
|
|
180
|
+
try {
|
|
181
|
+
process.kill(pid, 0);
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
killed++;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return { found: pids.length, killed, pids, spared };
|
|
188
|
+
}
|
|
189
|
+
/** CLI action for `convene watch-reap`. Prints a one-line summary. Never throws. */
|
|
190
|
+
async function watchReap(opts = {}) {
|
|
191
|
+
try {
|
|
192
|
+
const res = reapWatchers({ dryRun: opts.dryRun });
|
|
193
|
+
if (res.note) {
|
|
194
|
+
process.stdout.write(`watch-reap: ${res.note}\n`);
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
const sparedTail = res.spared > 0 ? `; spared ${res.spared} live-owned` : '';
|
|
198
|
+
if (opts.dryRun) {
|
|
199
|
+
process.stdout.write(res.found === 0
|
|
200
|
+
? `watch-reap: no orphaned watchers to reap${sparedTail}\n`
|
|
201
|
+
: `watch-reap: would reap ${res.found} orphaned watcher(s): ${res.pids.join(', ')}${sparedTail}\n`);
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
process.stdout.write(res.found === 0
|
|
205
|
+
? `watch-reap: no orphaned watchers to reap${sparedTail}\n`
|
|
206
|
+
: `watch-reap: reaped ${res.killed}/${res.found} orphaned watcher(s)${sparedTail}\n`);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
catch {
|
|
210
|
+
/* fail-open: cleanup must never crash */
|
|
211
|
+
}
|
|
212
|
+
}
|
package/dist/commands/watch.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.watchShouldExit = watchShouldExit;
|
|
3
4
|
exports.watch = watch;
|
|
4
5
|
/**
|
|
5
6
|
* `convene watch` (WP12) — a DETACHED long-poll on the existing /poll stream,
|
|
@@ -41,6 +42,42 @@ const POLL_WAIT_SEC = 25; // server holds up to ~25s; capped at 50 server-side
|
|
|
41
42
|
const POLL_TIMEOUT_MS = POLL_WAIT_SEC * 1000 + 5000; // MUST exceed wait*1000
|
|
42
43
|
const BACKOFF_BASE_MS = 1000;
|
|
43
44
|
const BACKOFF_MAX_MS = 30_000;
|
|
45
|
+
/**
|
|
46
|
+
* SELF-TERMINATION (the daemon-leak fix). A `convene watch` is spawned
|
|
47
|
+
* detached+unref at SessionStart, so it OUTLIVES its session and reparents to
|
|
48
|
+
* PID 1 — and the original loop had NO exit path, leaking ~one orphan per boot
|
|
49
|
+
* (145 observed, ~1 day old). Two independent ceilings now guarantee it dies:
|
|
50
|
+
*
|
|
51
|
+
* - MAX_RUNTIME_MS — an absolute lifetime cap (12h prod; tests set it tiny). A
|
|
52
|
+
* watcher can never outlive this no matter what.
|
|
53
|
+
* - IDLE_EXIT_MS — exit once the OWNING session has gone quiet. The watcher
|
|
54
|
+
* inherits CLAUDE_CODE_SESSION_ID, so liveSessionCount(slug, window) reports
|
|
55
|
+
* whether the owner is still actively fetching (it rewrites its feed .json
|
|
56
|
+
* each prompt). Zero live sessions ⇒ start an idle clock; 20 min idle ⇒ exit.
|
|
57
|
+
*
|
|
58
|
+
* Total time-to-die for a genuinely-closed session ≈ LIVE_SESSION_WINDOW_SEC
|
|
59
|
+
* (10m, for the owner's last .json to age out) + IDLE_EXIT_MS (20m) ≈ 30m. That
|
|
60
|
+
* is deliberately longer than any plausible heads-down turn, and is FAIL-OPEN:
|
|
61
|
+
* if we kill a watcher whose session is merely mid-long-turn, the next
|
|
62
|
+
* SessionStart relaunches one — the only cost is a brief mid-turn-halt blind
|
|
63
|
+
* spot, never a crash. A newer watcher taking over the same scope (pidfile
|
|
64
|
+
* newest-wins) also retires the older one immediately.
|
|
65
|
+
*/
|
|
66
|
+
const MAX_RUNTIME_MS = (0, config_1.resolveWatchMaxMs)();
|
|
67
|
+
const IDLE_EXIT_MS = 20 * 60 * 1000;
|
|
68
|
+
/**
|
|
69
|
+
* PURE termination decision (unit-tested). Exit when the absolute lifetime cap is
|
|
70
|
+
* hit, OR the owner has been idle past the idle ceiling. `idleSince == null`
|
|
71
|
+
* means the owner is currently live → never an idle exit.
|
|
72
|
+
*/
|
|
73
|
+
function watchShouldExit(args) {
|
|
74
|
+
const { startedAt, now, maxRuntimeMs, idleSince, idleExitMs } = args;
|
|
75
|
+
if (now - startedAt >= maxRuntimeMs)
|
|
76
|
+
return true;
|
|
77
|
+
if (idleSince != null && now - idleSince >= idleExitMs)
|
|
78
|
+
return true;
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
44
81
|
const HALT_TYPES = new Set(['halt', 'interrupt']);
|
|
45
82
|
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
46
83
|
/** Map a server feed/poll message to an inert WatchEntry. Returns null for non-halts. */
|
|
@@ -102,36 +139,82 @@ async function loop(opts) {
|
|
|
102
139
|
let backoff = BACKOFF_BASE_MS;
|
|
103
140
|
let iterations = 0;
|
|
104
141
|
const limit = typeof opts.maxIterations === 'number' ? opts.maxIterations : Infinity;
|
|
142
|
+
// Claim ownership of this session's scope + record our birth time. The pidfile
|
|
143
|
+
// makes us discoverable to the spawn-dedup guards and enables newest-wins
|
|
144
|
+
// handover (a later watcher overwrites it, and we notice + exit).
|
|
145
|
+
const startedAt = Date.now();
|
|
146
|
+
(0, cache_1.writeWatchPid)(slug);
|
|
147
|
+
let idleSince = null;
|
|
105
148
|
// Heartbeat up-front so a just-launched watch reads as healthy immediately.
|
|
106
149
|
(0, cache_1.touchWatchHeartbeat)(slug);
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
150
|
+
try {
|
|
151
|
+
while (iterations < limit) {
|
|
152
|
+
// Termination checks run at the TOP of every iteration so they fire
|
|
153
|
+
// regardless of poll success/backoff. Wrapped fail-open: any error here
|
|
154
|
+
// SKIPS the check and continues — a termination-check fault must never crash
|
|
155
|
+
// the daemon (and a wrongly-killed watcher would only be relaunched).
|
|
156
|
+
try {
|
|
157
|
+
// Newest-wins: a DIFFERENT pid in our scope's pidfile means a fresher
|
|
158
|
+
// watcher took over — retire ourselves immediately.
|
|
159
|
+
const owner = (0, cache_1.readWatchPid)(slug);
|
|
160
|
+
if (owner && owner.pid !== process.pid)
|
|
161
|
+
break;
|
|
162
|
+
// Owner liveness: zero live sessions ⇒ the owning session has gone quiet,
|
|
163
|
+
// so start (or keep) the idle clock; any live session resets it.
|
|
164
|
+
const live = (0, cache_1.liveSessionCount)(slug, cache_1.LIVE_SESSION_WINDOW_SEC);
|
|
165
|
+
if (live === 0) {
|
|
166
|
+
if (idleSince == null)
|
|
167
|
+
idleSince = Date.now();
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
idleSince = null;
|
|
171
|
+
}
|
|
172
|
+
if (watchShouldExit({
|
|
173
|
+
startedAt,
|
|
174
|
+
now: Date.now(),
|
|
175
|
+
maxRuntimeMs: MAX_RUNTIME_MS,
|
|
176
|
+
idleSince,
|
|
177
|
+
idleExitMs: IDLE_EXIT_MS,
|
|
178
|
+
})) {
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
/* fail-open: a faulty termination check never crashes the daemon */
|
|
184
|
+
}
|
|
185
|
+
const res = await api.poll(slug, { since: cursor, wait: POLL_WAIT_SEC }, POLL_TIMEOUT_MS).catch(() => null);
|
|
186
|
+
// Every loop iteration stamps liveness — even an empty/failed poll proves the
|
|
187
|
+
// daemon is alive (the health line distinguishes "down" from "quiet").
|
|
188
|
+
(0, cache_1.touchWatchHeartbeat)(slug);
|
|
189
|
+
if (!res || !res.ok || !res.json) {
|
|
190
|
+
// Transport failure / timeout / parse error → self-heal with backoff.
|
|
191
|
+
await sleep(backoff);
|
|
192
|
+
backoff = Math.min(backoff * 2, BACKOFF_MAX_MS);
|
|
123
193
|
continue;
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
194
|
+
}
|
|
195
|
+
backoff = BACKOFF_BASE_MS; // recovered
|
|
196
|
+
const msgs = Array.isArray(res.json.messages) ? res.json.messages : [];
|
|
197
|
+
for (const m of msgs) {
|
|
198
|
+
const entry = toEntry(m);
|
|
199
|
+
if (!entry)
|
|
200
|
+
continue;
|
|
201
|
+
(0, cache_1.appendWatchEntry)(slug, entry);
|
|
202
|
+
if (opts.notify)
|
|
203
|
+
notifyBestEffort(entry);
|
|
204
|
+
}
|
|
205
|
+
// Advance the resume cursor to the server's reported cursor (monotonic). This
|
|
206
|
+
// is the long-poll resume seq, NOT the reader's high-water — the daemon must
|
|
207
|
+
// move past EVERY message it saw (incl. non-halts) or it would re-fetch them
|
|
208
|
+
// forever. The reader's high-water only advances over rendered halts.
|
|
209
|
+
if (typeof res.json.cursor === 'number' && res.json.cursor > cursor)
|
|
210
|
+
cursor = res.json.cursor;
|
|
211
|
+
iterations++;
|
|
127
212
|
}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
//
|
|
131
|
-
//
|
|
132
|
-
|
|
133
|
-
cursor = res.json.cursor;
|
|
134
|
-
iterations++;
|
|
213
|
+
}
|
|
214
|
+
finally {
|
|
215
|
+
// Release our scope's pidfile so the next SessionStart spawns freely — but
|
|
216
|
+
// ONLY if it still names us (a newer watcher may already own it).
|
|
217
|
+
(0, cache_1.clearWatchPidIfOwner)(slug);
|
|
135
218
|
}
|
|
136
219
|
return 0;
|
|
137
220
|
}
|