@lcv-ideas-software/cross-review 4.0.7 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +207 -0
- package/README.md +3 -1
- package/dist/scripts/smoke.js +179 -79
- package/dist/scripts/smoke.js.map +1 -1
- package/dist/src/core/cache-manifest.d.ts +2 -2
- package/dist/src/core/cache-manifest.js +15 -9
- package/dist/src/core/cache-manifest.js.map +1 -1
- package/dist/src/core/config.d.ts +2 -2
- package/dist/src/core/config.js +2 -2
- package/dist/src/core/orchestrator.js +63 -63
- package/dist/src/core/orchestrator.js.map +1 -1
- package/dist/src/core/session-store.d.ts +35 -34
- package/dist/src/core/session-store.js +269 -156
- package/dist/src/core/session-store.js.map +1 -1
- package/dist/src/dashboard/server.js +5 -1
- package/dist/src/dashboard/server.js.map +1 -1
- package/dist/src/mcp/server.js +41 -33
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/security/redact.js +13 -2
- package/dist/src/security/redact.js.map +1 -1
- package/package.json +3 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
2
|
import fs from "node:fs";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import lockfile from "proper-lockfile";
|
|
4
5
|
import { redact } from "../security/redact.js";
|
|
5
6
|
import { mergeCost, mergeUsage } from "./cost.js";
|
|
6
7
|
import { PEERS } from "./types.js";
|
|
@@ -21,7 +22,7 @@ function now() {
|
|
|
21
22
|
const ATOMIC_WRITE_RETRY_CODES = new Set(["EPERM", "EACCES", "EBUSY", "EEXIST"]);
|
|
22
23
|
const ATOMIC_WRITE_MAX_ATTEMPTS = 5;
|
|
23
24
|
const TMP_NONCE_BYTES = 2;
|
|
24
|
-
function writeJson(file, data) {
|
|
25
|
+
async function writeJson(file, data) {
|
|
25
26
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
26
27
|
const nonce = crypto.randomBytes(TMP_NONCE_BYTES).toString("hex");
|
|
27
28
|
const tmp = `${file}.${process.pid}.${Date.now()}.${nonce}.tmp`;
|
|
@@ -37,11 +38,17 @@ function writeJson(file, data) {
|
|
|
37
38
|
const code = err.code;
|
|
38
39
|
if (!code || !ATOMIC_WRITE_RETRY_CODES.has(code))
|
|
39
40
|
break;
|
|
41
|
+
// v4.1.0 hardening: pre-v4.1.0 used `while (Date.now() - start <
|
|
42
|
+
// wait) {}` busy-wait which blocked the single Node.js event loop
|
|
43
|
+
// thread for up to 310 ms (10+20+40+80+160) under repeated
|
|
44
|
+
// Windows-AV-induced EPERM/EBUSY contention. The CPU-burning
|
|
45
|
+
// busy-wait starved SSE streaming + concurrent sessions + MCP
|
|
46
|
+
// stdio reads. Now the backoff awaits a Promise-based timer:
|
|
47
|
+
// event loop remains fully responsive between attempts.
|
|
40
48
|
const wait = 10 * 2 ** attempt; // 10, 20, 40, 80, 160 ms
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
}
|
|
49
|
+
await new Promise((resolve) => {
|
|
50
|
+
setTimeout(resolve, wait);
|
|
51
|
+
});
|
|
45
52
|
}
|
|
46
53
|
}
|
|
47
54
|
// Terminal failure path: best-effort tmp cleanup so callers don't see
|
|
@@ -99,6 +106,16 @@ export class SessionStore {
|
|
|
99
106
|
// monotonically thereafter. Restart re-initializes from disk, so seq
|
|
100
107
|
// remains correct across process boundaries.
|
|
101
108
|
seqCache = new Map();
|
|
109
|
+
// v4.1.0: track in-flight fire-and-forget appendEvent promises so
|
|
110
|
+
// callers that need synchronous read-after-write semantics (smoke
|
|
111
|
+
// tests, post-round aggregation) can call `flushPendingEvents()` to
|
|
112
|
+
// wait for all pending event writes to settle before reading.
|
|
113
|
+
// appendEvent is async because withSessionLock is async (proper-
|
|
114
|
+
// lockfile); the emit pipeline must stay sync, so it uses
|
|
115
|
+
// `void store.appendEvent(event)` and the store remembers the
|
|
116
|
+
// promise here. Promises resolve/reject within appendEvent's own
|
|
117
|
+
// try/catch — flush() therefore always settles, never rejects.
|
|
118
|
+
pendingEventWrites = new Set();
|
|
102
119
|
constructor(config) {
|
|
103
120
|
this.config = config;
|
|
104
121
|
fs.mkdirSync(this.sessionsDir(), { recursive: true });
|
|
@@ -140,10 +157,6 @@ export class SessionStore {
|
|
|
140
157
|
return false;
|
|
141
158
|
}
|
|
142
159
|
}
|
|
143
|
-
sleepSync(ms) {
|
|
144
|
-
const buffer = new SharedArrayBuffer(4);
|
|
145
|
-
Atomics.wait(new Int32Array(buffer), 0, 0, ms);
|
|
146
|
-
}
|
|
147
160
|
totalsFor(meta) {
|
|
148
161
|
const peerResults = meta.rounds.flatMap((round) => round.peers);
|
|
149
162
|
const generations = meta.generation_files ?? [];
|
|
@@ -158,46 +171,114 @@ export class SessionStore {
|
|
|
158
171
|
]),
|
|
159
172
|
};
|
|
160
173
|
}
|
|
161
|
-
|
|
174
|
+
// v4.1.0 hardening: pre-v4.1.0 acquired the lock via an exclusive
|
|
175
|
+
// file-create syscall followed by a separate JSON metadata write,
|
|
176
|
+
// which had a multi-process TOCTOU race window. Process A's create
|
|
177
|
+
// returned an empty inode + fd; before A's metadata write executed,
|
|
178
|
+
// process B could observe the empty file, fail to JSON-parse it,
|
|
179
|
+
// remove the lock path, create its own valid lock, and enter the
|
|
180
|
+
// critical section. Process A would then write into the now-orphan
|
|
181
|
+
// inode via the still-open fd and ALSO enter the critical section,
|
|
182
|
+
// corrupting meta.json. proper-lockfile uses `fs.mkdir` (atomic
|
|
183
|
+
// across NTFS and POSIX) so the lock comes into existence as a
|
|
184
|
+
// directory in a single syscall — no empty-window race possible.
|
|
185
|
+
// The mkdir-based lock also fixes the lock-holder freshness signal:
|
|
186
|
+
// proper-lockfile's `update` interval touches the lockfile's mtime
|
|
187
|
+
// every 5 s, and any other process treats the lock as stale once the
|
|
188
|
+
// mtime is older than `stale` ms (120 s). This is more robust than
|
|
189
|
+
// the pre-v4.1.0 PID-aliveness check, which had collision risk after
|
|
190
|
+
// process restart.
|
|
191
|
+
async withSessionLock(sessionId, fn) {
|
|
162
192
|
const dir = this.sessionDir(sessionId);
|
|
163
|
-
const
|
|
164
|
-
const
|
|
165
|
-
|
|
193
|
+
const target = this.metaPath(sessionId);
|
|
194
|
+
const lockfilePath = path.join(dir, ".lock");
|
|
195
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
196
|
+
// proper-lockfile requires the target path to exist (it uses it for
|
|
197
|
+
// realpath resolution). Init creates the session dir then immediately
|
|
198
|
+
// calls withSessionLock-protected writes; pre-create an empty meta
|
|
199
|
+
// placeholder so the first init() can acquire the lock. Existing
|
|
200
|
+
// session reuses preserve their meta.
|
|
201
|
+
if (!fs.existsSync(target)) {
|
|
166
202
|
try {
|
|
167
|
-
|
|
168
|
-
fs.writeFileSync(fd, JSON.stringify({ pid: process.pid, acquired_at: now() }));
|
|
169
|
-
fs.closeSync(fd);
|
|
170
|
-
break;
|
|
203
|
+
fs.writeFileSync(target, "{}\n", { flag: "wx" });
|
|
171
204
|
}
|
|
172
|
-
catch (
|
|
173
|
-
if (
|
|
174
|
-
throw
|
|
175
|
-
|
|
176
|
-
const lock = readJson(lockPath);
|
|
177
|
-
const age = lock.acquired_at ? Date.now() - Date.parse(lock.acquired_at) : Infinity;
|
|
178
|
-
if (!lock.pid || age > 120_000 || !this.processAlive(lock.pid)) {
|
|
179
|
-
fs.rmSync(lockPath, { force: true });
|
|
180
|
-
continue;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
catch {
|
|
184
|
-
fs.rmSync(lockPath, { force: true });
|
|
185
|
-
continue;
|
|
186
|
-
}
|
|
187
|
-
if (Date.now() >= timeoutAt) {
|
|
188
|
-
throw new Error(`timed out waiting for session lock: ${sessionId}`, { cause: error });
|
|
189
|
-
}
|
|
190
|
-
this.sleepSync(100);
|
|
205
|
+
catch (err) {
|
|
206
|
+
if (err.code !== "EEXIST")
|
|
207
|
+
throw err;
|
|
208
|
+
/* concurrent process created it; fine */
|
|
191
209
|
}
|
|
192
210
|
}
|
|
211
|
+
// Pre-v4.1.0 legacy `.lock` regular file detection — FAIL CLOSED.
|
|
212
|
+
//
|
|
213
|
+
// Pre-v4.1.0 created `.lock` as a regular file containing
|
|
214
|
+
// `{pid, ts}` JSON. proper-lockfile claims `.lock` as a DIRECTORY
|
|
215
|
+
// via mkdir, so a leftover regular file blocks every subsequent
|
|
216
|
+
// lockfile.lock() with EEXIST. The original v4.1.0 design tried
|
|
217
|
+
// to auto-clean stale legacy files. Codex (session 059b0093 R1
|
|
218
|
+
// through R4) progressively demonstrated that NO auto-clean is
|
|
219
|
+
// safe under live cross-version operation:
|
|
220
|
+
//
|
|
221
|
+
// • R1: unconditional removal split-brained with a live legacy
|
|
222
|
+
// holder.
|
|
223
|
+
// • R2: removal-when-pid-alive-but-mtime-stale split-brained
|
|
224
|
+
// because legacy locks do not heartbeat (mtime is frozen at
|
|
225
|
+
// acquisition).
|
|
226
|
+
// • R3: per-process atomic decisions still raced two v4.1
|
|
227
|
+
// migrators.
|
|
228
|
+
// • R4: serializing v4.1 migrators via a separate mutex still
|
|
229
|
+
// left the cross-version race: v4.0.x's own stale-removal
|
|
230
|
+
// path does not honor any v4.1 mutex, so a concurrent v4.0.x
|
|
231
|
+
// could remove a stale `.lock` and create its own live one
|
|
232
|
+
// between v4.1's read and v4.1's path-based rmSync —
|
|
233
|
+
// v4.1 then deletes the new live legacy lock → split-brain.
|
|
234
|
+
//
|
|
235
|
+
// Resolution: v4.1.0 NEVER auto-removes a legacy regular `.lock`
|
|
236
|
+
// file. If one is observed, withSessionLock throws a clear
|
|
237
|
+
// remediation error to the caller, instructing the operator to
|
|
238
|
+
// stop all cross-review processes and remove the file manually.
|
|
239
|
+
// This is a ONE-TIME operator step at v4.0.x → v4.1.0 upgrade.
|
|
240
|
+
// After all hosts are on v4.1.0 the locks are mkdir-atomic and
|
|
241
|
+
// the issue cannot recur.
|
|
193
242
|
try {
|
|
194
|
-
|
|
243
|
+
const stat = fs.statSync(lockfilePath);
|
|
244
|
+
if (stat.isFile()) {
|
|
245
|
+
throw new Error(`cross-review v4.1.0 detected a pre-v4.1.0 lock file at ${lockfilePath}. ` +
|
|
246
|
+
`Live cross-version migration is not supported (would split-brain with any ` +
|
|
247
|
+
`concurrent v4.0.x process). To migrate safely: (1) stop all cross-review ` +
|
|
248
|
+
`processes / close all MCP hosts that loaded the server, (2) remove the ` +
|
|
249
|
+
`legacy lock file, (3) restart. POSIX one-liner for full cleanup: ` +
|
|
250
|
+
`\`find ${this.config.data_dir}/sessions -name .lock -type f -delete\`. ` +
|
|
251
|
+
`See CHANGELOG v04.01.00 migration notes for the rationale.`);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
catch (err) {
|
|
255
|
+
if (err instanceof Error && err.message.includes("detected a pre-v4.1.0 lock file")) {
|
|
256
|
+
throw err;
|
|
257
|
+
}
|
|
258
|
+
if (err.code !== "ENOENT") {
|
|
259
|
+
/* ignore other stat errors; lockfile.lock will surface them */
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
const release = await lockfile.lock(target, {
|
|
263
|
+
stale: 120_000,
|
|
264
|
+
update: 5_000,
|
|
265
|
+
retries: { retries: 30, factor: 1.5, minTimeout: 100, maxTimeout: 1_000 },
|
|
266
|
+
realpath: false,
|
|
267
|
+
lockfilePath,
|
|
268
|
+
});
|
|
269
|
+
try {
|
|
270
|
+
return await fn();
|
|
195
271
|
}
|
|
196
272
|
finally {
|
|
197
|
-
|
|
273
|
+
try {
|
|
274
|
+
await release();
|
|
275
|
+
}
|
|
276
|
+
catch {
|
|
277
|
+
/* lock was already released by stale-detection or sibling process */
|
|
278
|
+
}
|
|
198
279
|
}
|
|
199
280
|
}
|
|
200
|
-
init(task, caller, snapshot, reviewFocus) {
|
|
281
|
+
async init(task, caller, snapshot, reviewFocus) {
|
|
201
282
|
const session_id = crypto.randomUUID();
|
|
202
283
|
// v2.22.0 (B.P3): snapshot the cost ceiling at session_init time so
|
|
203
284
|
// budget pressure analysis is decoupled from later env-var mutation.
|
|
@@ -227,7 +308,7 @@ export class SessionStore {
|
|
|
227
308
|
budget_warning_emitted: false,
|
|
228
309
|
};
|
|
229
310
|
fs.mkdirSync(path.join(this.sessionDir(session_id), "agent-runs"), { recursive: true });
|
|
230
|
-
writeJson(this.metaPath(session_id), meta);
|
|
311
|
+
await writeJson(this.metaPath(session_id), meta);
|
|
231
312
|
fs.writeFileSync(path.join(this.sessionDir(session_id), "task.md"), task, "utf8");
|
|
232
313
|
if (reviewFocus) {
|
|
233
314
|
fs.writeFileSync(path.join(this.sessionDir(session_id), "review-focus.md"), reviewFocus, "utf8");
|
|
@@ -243,8 +324,8 @@ export class SessionStore {
|
|
|
243
324
|
// R5 throws when in_flight is already populated; the boot-time
|
|
244
325
|
// `clearStaleInFlight` sweep clears any orphan in_flight from a
|
|
245
326
|
// crashed prior host so legitimate operators are not blocked.
|
|
246
|
-
markInFlight(sessionId, params) {
|
|
247
|
-
return this.withSessionLock(sessionId, () => {
|
|
327
|
+
async markInFlight(sessionId, params) {
|
|
328
|
+
return this.withSessionLock(sessionId, async () => {
|
|
248
329
|
const meta = this.read(sessionId);
|
|
249
330
|
if (meta.in_flight) {
|
|
250
331
|
throw new Error(`session ${sessionId} already has an in-flight round (round=${meta.in_flight.round}, started_at=${meta.in_flight.started_at}); refusing to start a concurrent round. Wait for the round to complete, cancel it via session_cancel_job, or recover it via session_recover_interrupted.`);
|
|
@@ -262,7 +343,7 @@ export class SessionStore {
|
|
|
262
343
|
detail: `Round ${params.round} is running.`,
|
|
263
344
|
};
|
|
264
345
|
meta.updated_at = now();
|
|
265
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
346
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
266
347
|
return meta;
|
|
267
348
|
});
|
|
268
349
|
}
|
|
@@ -304,23 +385,47 @@ export class SessionStore {
|
|
|
304
385
|
commitSeq(sessionId, committed) {
|
|
305
386
|
this.seqCache.set(sessionId, committed);
|
|
306
387
|
}
|
|
307
|
-
|
|
388
|
+
// v4.1.0: durable event persistence. withSessionLock became async
|
|
389
|
+
// with the proper-lockfile refactor; appendEvent awaits the lock so
|
|
390
|
+
// callers that read events after persisting get the expected
|
|
391
|
+
// synchronous-write semantics (e.g. the session_doctor sweep + smoke
|
|
392
|
+
// fixtures that read events.ndjson immediately after appendEvent).
|
|
393
|
+
// Fire-and-forget callers wrap with `void store.appendEvent(...)`.
|
|
394
|
+
async appendEvent(event) {
|
|
308
395
|
const sessionId = event.session_id;
|
|
309
396
|
if (!sessionId)
|
|
310
397
|
return;
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
398
|
+
const write = (async () => {
|
|
399
|
+
try {
|
|
400
|
+
await this.withSessionLock(sessionId, () => {
|
|
401
|
+
const file = this.eventsPath(sessionId);
|
|
402
|
+
const seq = this.peekNextSeq(sessionId, file);
|
|
403
|
+
fs.appendFileSync(file, `${JSON.stringify({ ...event, seq, ts: event.ts ?? now() })}\n`, "utf8");
|
|
404
|
+
// Only commit the cache AFTER the durable append succeeded.
|
|
405
|
+
// If appendFileSync threw above, the cache still reflects the
|
|
406
|
+
// last persisted seq and the next call reuses this seq number.
|
|
407
|
+
this.commitSeq(sessionId, seq);
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
catch {
|
|
411
|
+
// Event persistence must never break provider calls or MCP responses.
|
|
412
|
+
}
|
|
413
|
+
})();
|
|
414
|
+
this.pendingEventWrites.add(write);
|
|
415
|
+
void write.finally(() => {
|
|
416
|
+
this.pendingEventWrites.delete(write);
|
|
417
|
+
});
|
|
418
|
+
return write;
|
|
419
|
+
}
|
|
420
|
+
// v4.1.0: wait for all in-flight fire-and-forget event writes to
|
|
421
|
+
// settle. Used by tests/sweeps that need synchronous read-after-write
|
|
422
|
+
// semantics for events.ndjson when the emit pipeline used
|
|
423
|
+
// `void store.appendEvent(...)`. Always resolves (never rejects);
|
|
424
|
+
// appendEvent swallows its own errors.
|
|
425
|
+
async flushPendingEvents() {
|
|
426
|
+
while (this.pendingEventWrites.size > 0) {
|
|
427
|
+
const snapshot = Array.from(this.pendingEventWrites);
|
|
428
|
+
await Promise.allSettled(snapshot);
|
|
324
429
|
}
|
|
325
430
|
}
|
|
326
431
|
readEvents(sessionId, sinceSeq = 0) {
|
|
@@ -415,11 +520,11 @@ export class SessionStore {
|
|
|
415
520
|
fs.writeFileSync(file, redact(draft), "utf8");
|
|
416
521
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
417
522
|
}
|
|
418
|
-
saveGeneration(sessionId, round, result, label = "generation") {
|
|
523
|
+
async saveGeneration(sessionId, round, result, label = "generation") {
|
|
419
524
|
const file = path.join(this.sessionDir(sessionId), "agent-runs", `round-${round}-${result.peer}-${label}.json`);
|
|
420
|
-
writeJson(file, { ...result, text: redact(result.text) });
|
|
525
|
+
await writeJson(file, { ...result, text: redact(result.text) });
|
|
421
526
|
const relativePath = path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
422
|
-
this.withSessionLock(sessionId, () => {
|
|
527
|
+
await this.withSessionLock(sessionId, async () => {
|
|
423
528
|
const meta = this.read(sessionId);
|
|
424
529
|
const artifact = {
|
|
425
530
|
ts: now(),
|
|
@@ -434,7 +539,7 @@ export class SessionStore {
|
|
|
434
539
|
meta.generation_files = [...(meta.generation_files ?? []), artifact];
|
|
435
540
|
meta.totals = this.totalsFor(meta);
|
|
436
541
|
meta.updated_at = now();
|
|
437
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
542
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
438
543
|
});
|
|
439
544
|
return relativePath;
|
|
440
545
|
}
|
|
@@ -448,18 +553,18 @@ export class SessionStore {
|
|
|
448
553
|
fs.writeFileSync(file, redact(text), "utf8");
|
|
449
554
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
450
555
|
}
|
|
451
|
-
savePeerResult(sessionId, round, result, label = "response") {
|
|
556
|
+
async savePeerResult(sessionId, round, result, label = "response") {
|
|
452
557
|
const file = path.join(this.sessionDir(sessionId), "agent-runs", `round-${round}-${result.peer}-${label}.json`);
|
|
453
|
-
writeJson(file, { ...result, text: redact(result.text) });
|
|
558
|
+
await writeJson(file, { ...result, text: redact(result.text) });
|
|
454
559
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
455
560
|
}
|
|
456
|
-
savePeerFailure(sessionId, round, failure) {
|
|
561
|
+
async savePeerFailure(sessionId, round, failure) {
|
|
457
562
|
const file = path.join(this.sessionDir(sessionId), "agent-runs", `round-${round}-${failure.peer}-failure.json`);
|
|
458
|
-
writeJson(file, { ...failure, message: redact(failure.message) });
|
|
563
|
+
await writeJson(file, { ...failure, message: redact(failure.message) });
|
|
459
564
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
460
565
|
}
|
|
461
|
-
appendRound(sessionId, params) {
|
|
462
|
-
return this.withSessionLock(sessionId, () => {
|
|
566
|
+
async appendRound(sessionId, params) {
|
|
567
|
+
return this.withSessionLock(sessionId, async () => {
|
|
463
568
|
const meta = this.read(sessionId);
|
|
464
569
|
// v3.2.0 (Codex bug report 2026-05-12): refuse to append a round
|
|
465
570
|
// to a finalized session. Otherwise the per-round
|
|
@@ -507,19 +612,19 @@ export class SessionStore {
|
|
|
507
612
|
// diff-based drift if a peer's cost changed in a retry loop.
|
|
508
613
|
const roundCost = params.peers.reduce((sum, peer) => sum + (peer.cost?.total_cost ?? 0), 0);
|
|
509
614
|
meta.costs_per_round = [...(meta.costs_per_round ?? []), roundCost];
|
|
510
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
615
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
511
616
|
return round;
|
|
512
617
|
});
|
|
513
618
|
}
|
|
514
619
|
// v2.22.0 (B.P3): one-shot guard for `session.budget_warning` emit
|
|
515
620
|
// idempotency. Persisted in meta.json so the warning fires at most
|
|
516
621
|
// once per session even across host restarts.
|
|
517
|
-
markBudgetWarningEmitted(sessionId) {
|
|
518
|
-
return this.withSessionLock(sessionId, () => {
|
|
622
|
+
async markBudgetWarningEmitted(sessionId) {
|
|
623
|
+
return this.withSessionLock(sessionId, async () => {
|
|
519
624
|
const meta = this.read(sessionId);
|
|
520
625
|
meta.budget_warning_emitted = true;
|
|
521
626
|
meta.updated_at = now();
|
|
522
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
627
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
523
628
|
return meta;
|
|
524
629
|
});
|
|
525
630
|
}
|
|
@@ -527,12 +632,12 @@ export class SessionStore {
|
|
|
527
632
|
// orchestrator's circular loop calls this every round so resumed
|
|
528
633
|
// sessions can pick up the rotation cursor and consecutive-no-change
|
|
529
634
|
// count from disk without re-deriving them by walking events.
|
|
530
|
-
setCircularState(sessionId, state) {
|
|
531
|
-
return this.withSessionLock(sessionId, () => {
|
|
635
|
+
async setCircularState(sessionId, state) {
|
|
636
|
+
return this.withSessionLock(sessionId, async () => {
|
|
532
637
|
const meta = this.read(sessionId);
|
|
533
638
|
meta.circular_state = state;
|
|
534
639
|
meta.updated_at = now();
|
|
535
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
640
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
536
641
|
return meta;
|
|
537
642
|
});
|
|
538
643
|
}
|
|
@@ -545,8 +650,8 @@ export class SessionStore {
|
|
|
545
650
|
// max_rounds the caller actually requested. This fills that gap with
|
|
546
651
|
// pure-additive metadata; `cost_ceiling_usd` is kept in sync with
|
|
547
652
|
// `effective_cost_ceiling_usd` for back-compat with v3.4.x readers.
|
|
548
|
-
setSessionTraceability(sessionId, traceability) {
|
|
549
|
-
return this.withSessionLock(sessionId, () => {
|
|
653
|
+
async setSessionTraceability(sessionId, traceability) {
|
|
654
|
+
return this.withSessionLock(sessionId, async () => {
|
|
550
655
|
const meta = this.read(sessionId);
|
|
551
656
|
meta.requested_max_rounds = traceability.requested_max_rounds;
|
|
552
657
|
meta.effective_max_rounds = traceability.effective_max_rounds;
|
|
@@ -557,7 +662,7 @@ export class SessionStore {
|
|
|
557
662
|
// only know `cost_ceiling_usd` still see the effective ceiling.
|
|
558
663
|
meta.cost_ceiling_usd = traceability.effective_cost_ceiling_usd;
|
|
559
664
|
meta.updated_at = now();
|
|
560
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
665
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
561
666
|
return meta;
|
|
562
667
|
});
|
|
563
668
|
}
|
|
@@ -576,8 +681,8 @@ export class SessionStore {
|
|
|
576
681
|
throw err;
|
|
577
682
|
}
|
|
578
683
|
}
|
|
579
|
-
finalize(sessionId, outcome, reason) {
|
|
580
|
-
return this.withSessionLock(sessionId, () => {
|
|
684
|
+
async finalize(sessionId, outcome, reason) {
|
|
685
|
+
return this.withSessionLock(sessionId, async () => {
|
|
581
686
|
const meta = this.read(sessionId);
|
|
582
687
|
// v3.2.0 (Codex bug report 2026-05-12): when the caller asserts
|
|
583
688
|
// outcome="converged", the latest round (if any) MUST have
|
|
@@ -606,12 +711,12 @@ export class SessionStore {
|
|
|
606
711
|
detail: reason ?? outcome,
|
|
607
712
|
};
|
|
608
713
|
meta.updated_at = now();
|
|
609
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
714
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
610
715
|
return meta;
|
|
611
716
|
});
|
|
612
717
|
}
|
|
613
|
-
requestCancellation(sessionId, reason = "operator_requested", jobId) {
|
|
614
|
-
return this.withSessionLock(sessionId, () => {
|
|
718
|
+
async requestCancellation(sessionId, reason = "operator_requested", jobId) {
|
|
719
|
+
return this.withSessionLock(sessionId, async () => {
|
|
615
720
|
const meta = this.read(sessionId);
|
|
616
721
|
meta.control = {
|
|
617
722
|
status: "cancel_requested",
|
|
@@ -626,12 +731,12 @@ export class SessionStore {
|
|
|
626
731
|
detail: `Cancellation requested: ${reason}`,
|
|
627
732
|
};
|
|
628
733
|
meta.updated_at = now();
|
|
629
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
734
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
630
735
|
return meta;
|
|
631
736
|
});
|
|
632
737
|
}
|
|
633
|
-
markCancelled(sessionId, reason = "cancelled") {
|
|
634
|
-
return this.withSessionLock(sessionId, () => {
|
|
738
|
+
async markCancelled(sessionId, reason = "cancelled") {
|
|
739
|
+
return this.withSessionLock(sessionId, async () => {
|
|
635
740
|
const meta = this.read(sessionId);
|
|
636
741
|
meta.outcome = "aborted";
|
|
637
742
|
meta.outcome_reason = reason;
|
|
@@ -649,7 +754,7 @@ export class SessionStore {
|
|
|
649
754
|
detail: reason,
|
|
650
755
|
};
|
|
651
756
|
meta.updated_at = now();
|
|
652
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
757
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
653
758
|
return meta;
|
|
654
759
|
});
|
|
655
760
|
}
|
|
@@ -657,12 +762,12 @@ export class SessionStore {
|
|
|
657
762
|
const meta = this.read(sessionId);
|
|
658
763
|
return meta.control?.status === "cancel_requested";
|
|
659
764
|
}
|
|
660
|
-
appendFallbackEvent(sessionId, event) {
|
|
661
|
-
return this.withSessionLock(sessionId, () => {
|
|
765
|
+
async appendFallbackEvent(sessionId, event) {
|
|
766
|
+
return this.withSessionLock(sessionId, async () => {
|
|
662
767
|
const meta = this.read(sessionId);
|
|
663
768
|
meta.fallback_events = [...(meta.fallback_events ?? []), event];
|
|
664
769
|
meta.updated_at = now();
|
|
665
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
770
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
666
771
|
return meta;
|
|
667
772
|
});
|
|
668
773
|
}
|
|
@@ -672,10 +777,10 @@ export class SessionStore {
|
|
|
672
777
|
// across rounds increments `round_count` instead of producing
|
|
673
778
|
// duplicate entries. Returns the updated checklist (or empty array
|
|
674
779
|
// if nothing was added/updated).
|
|
675
|
-
appendEvidenceChecklistItems(sessionId, round, incoming) {
|
|
780
|
+
async appendEvidenceChecklistItems(sessionId, round, incoming) {
|
|
676
781
|
if (!incoming.length)
|
|
677
782
|
return [];
|
|
678
|
-
return this.withSessionLock(sessionId, () => {
|
|
783
|
+
return this.withSessionLock(sessionId, async () => {
|
|
679
784
|
const meta = this.read(sessionId);
|
|
680
785
|
const existing = meta.evidence_checklist ?? [];
|
|
681
786
|
const byId = new Map(existing.map((item) => [item.id, item]));
|
|
@@ -724,7 +829,7 @@ export class SessionStore {
|
|
|
724
829
|
});
|
|
725
830
|
meta.evidence_checklist = updated;
|
|
726
831
|
meta.updated_at = ts;
|
|
727
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
832
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
728
833
|
return updated;
|
|
729
834
|
});
|
|
730
835
|
}
|
|
@@ -750,8 +855,8 @@ export class SessionStore {
|
|
|
750
855
|
// by the orchestrator via a separate event so operators see when peers
|
|
751
856
|
// keep asking for items they explicitly closed; the status itself is
|
|
752
857
|
// operator-owned.
|
|
753
|
-
runEvidenceChecklistAddressDetection(sessionId, currentRound) {
|
|
754
|
-
return this.withSessionLock(sessionId, () => {
|
|
858
|
+
async runEvidenceChecklistAddressDetection(sessionId, currentRound) {
|
|
859
|
+
return this.withSessionLock(sessionId, async () => {
|
|
755
860
|
const meta = this.read(sessionId);
|
|
756
861
|
const checklist = meta.evidence_checklist ?? [];
|
|
757
862
|
if (!checklist.length) {
|
|
@@ -824,7 +929,7 @@ export class SessionStore {
|
|
|
824
929
|
if (notResurfaced.length || reopened.length) {
|
|
825
930
|
meta.evidence_status_history = history;
|
|
826
931
|
meta.updated_at = ts;
|
|
827
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
932
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
828
933
|
}
|
|
829
934
|
return {
|
|
830
935
|
not_resurfaced: notResurfaced,
|
|
@@ -843,8 +948,8 @@ export class SessionStore {
|
|
|
843
948
|
// "not_resurfaced" — both are runtime-managed (judge promotion and
|
|
844
949
|
// resurfacing inference respectively). Returns the mutated item and the
|
|
845
950
|
// appended history entry.
|
|
846
|
-
setEvidenceChecklistItemStatus(sessionId, itemId, status, options = {}) {
|
|
847
|
-
return this.withSessionLock(sessionId, () => {
|
|
951
|
+
async setEvidenceChecklistItemStatus(sessionId, itemId, status, options = {}) {
|
|
952
|
+
return this.withSessionLock(sessionId, async () => {
|
|
848
953
|
const meta = this.read(sessionId);
|
|
849
954
|
const checklist = meta.evidence_checklist ?? [];
|
|
850
955
|
const item = checklist.find((entry) => entry.id === itemId);
|
|
@@ -878,7 +983,7 @@ export class SessionStore {
|
|
|
878
983
|
meta.evidence_status_history = history;
|
|
879
984
|
meta.evidence_checklist = checklist;
|
|
880
985
|
meta.updated_at = ts;
|
|
881
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
986
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
882
987
|
return { item, history_entry: entry };
|
|
883
988
|
});
|
|
884
989
|
}
|
|
@@ -887,8 +992,8 @@ export class SessionStore {
|
|
|
887
992
|
// moves anything other than open. Atomic under the session lock.
|
|
888
993
|
// Returns null when the item is not currently `open` (already
|
|
889
994
|
// addressed, terminal, or missing) so the caller can skip emit.
|
|
890
|
-
markEvidenceItemAddressedByJudge(sessionId, itemId, params) {
|
|
891
|
-
return this.withSessionLock(sessionId, () => {
|
|
995
|
+
async markEvidenceItemAddressedByJudge(sessionId, itemId, params) {
|
|
996
|
+
return this.withSessionLock(sessionId, async () => {
|
|
892
997
|
const meta = this.read(sessionId);
|
|
893
998
|
const checklist = meta.evidence_checklist ?? [];
|
|
894
999
|
const item = checklist.find((entry) => entry.id === itemId);
|
|
@@ -920,16 +1025,16 @@ export class SessionStore {
|
|
|
920
1025
|
meta.evidence_status_history = history;
|
|
921
1026
|
meta.evidence_checklist = checklist;
|
|
922
1027
|
meta.updated_at = ts;
|
|
923
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
1028
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
924
1029
|
return { item, history_entry: entry };
|
|
925
1030
|
});
|
|
926
1031
|
}
|
|
927
|
-
recoverInterruptedSessions(activeSessionIds = new Set()) {
|
|
1032
|
+
async recoverInterruptedSessions(activeSessionIds = new Set()) {
|
|
928
1033
|
const recovered = [];
|
|
929
1034
|
for (const session of this.list()) {
|
|
930
1035
|
if (session.outcome || activeSessionIds.has(session.session_id) || !session.in_flight)
|
|
931
1036
|
continue;
|
|
932
|
-
const updated = this.withSessionLock(session.session_id, () => {
|
|
1037
|
+
const updated = await this.withSessionLock(session.session_id, async () => {
|
|
933
1038
|
const current = this.read(session.session_id);
|
|
934
1039
|
if (current.outcome || activeSessionIds.has(current.session_id) || !current.in_flight) {
|
|
935
1040
|
return current;
|
|
@@ -947,7 +1052,7 @@ export class SessionStore {
|
|
|
947
1052
|
detail: `Recovered interrupted round ${round} after MCP restart. Start a new round to continue from saved session context.`,
|
|
948
1053
|
};
|
|
949
1054
|
current.updated_at = now();
|
|
950
|
-
writeJson(this.metaPath(current.session_id), current);
|
|
1055
|
+
await writeJson(this.metaPath(current.session_id), current);
|
|
951
1056
|
return current;
|
|
952
1057
|
});
|
|
953
1058
|
recovered.push(updated);
|
|
@@ -1162,7 +1267,7 @@ export class SessionStore {
|
|
|
1162
1267
|
// `item_types` (open items grouped by surfacing peer) and
|
|
1163
1268
|
// `chronic_blockers` (item ids with `round_count >= 3`) so operators
|
|
1164
1269
|
// can see which evidence asks are systemic vs cauda ruidosa.
|
|
1165
|
-
sessionDoctor(limit = 20, includeLegacy = false, repair = false) {
|
|
1270
|
+
async sessionDoctor(limit = 20, includeLegacy = false, repair = false) {
|
|
1166
1271
|
const cappedLimit = Math.max(1, Math.min(100, Math.trunc(limit) || 20));
|
|
1167
1272
|
// v3.6.0 (C): opt-in repair pass BEFORE the read-only audit. Fixes
|
|
1168
1273
|
// the contradictory `outcome="converged" + health.state="blocked"`
|
|
@@ -1184,7 +1289,7 @@ export class SessionStore {
|
|
|
1184
1289
|
// for manual operator inspection rather than guessing.
|
|
1185
1290
|
if (latestConverged) {
|
|
1186
1291
|
const fromState = session.convergence_health?.state;
|
|
1187
|
-
const fixed = this.withSessionLock(session.session_id, () => {
|
|
1292
|
+
const fixed = await this.withSessionLock(session.session_id, async () => {
|
|
1188
1293
|
const meta = this.read(session.session_id);
|
|
1189
1294
|
if (meta.outcome === "converged" &&
|
|
1190
1295
|
meta.convergence_health?.state === "blocked" &&
|
|
@@ -1195,7 +1300,7 @@ export class SessionStore {
|
|
|
1195
1300
|
detail: `v3.6.0 doctor repair: recomputed health from latest round (was "blocked" with outcome="converged" — pre-v3.2.0 corruption artifact)`,
|
|
1196
1301
|
};
|
|
1197
1302
|
meta.updated_at = now();
|
|
1198
|
-
writeJson(this.metaPath(session.session_id), meta);
|
|
1303
|
+
await writeJson(this.metaPath(session.session_id), meta);
|
|
1199
1304
|
return true;
|
|
1200
1305
|
}
|
|
1201
1306
|
return false;
|
|
@@ -1576,7 +1681,7 @@ export class SessionStore {
|
|
|
1576
1681
|
// original session is preserved (append-only); a new session opens
|
|
1577
1682
|
// for re-deliberation with a fresh task + initial_draft and a
|
|
1578
1683
|
// structural reference back to the contested session.
|
|
1579
|
-
contestVerdict(params) {
|
|
1684
|
+
async contestVerdict(params) {
|
|
1580
1685
|
const original = this.read(params.session_id);
|
|
1581
1686
|
if (!original.outcome) {
|
|
1582
1687
|
throw new Error(`cannot_contest_in_flight_session: session ${params.session_id} has no outcome yet (still in flight). Wait for it to converge or finalize before contesting.`);
|
|
@@ -1585,17 +1690,17 @@ export class SessionStore {
|
|
|
1585
1690
|
throw new Error(`session_already_contested: session ${params.session_id} was already contested at ${original.contestation.contested_at} (new_session_id=${original.contestation.new_session_id}).`);
|
|
1586
1691
|
}
|
|
1587
1692
|
const newCaller = params.new_caller ?? "operator";
|
|
1588
|
-
const newSession = this.init(params.new_task, newCaller, [], undefined);
|
|
1693
|
+
const newSession = await this.init(params.new_task, newCaller, [], undefined);
|
|
1589
1694
|
// Cross-link new session → original.
|
|
1590
|
-
this.withSessionLock(newSession.session_id, () => {
|
|
1695
|
+
await this.withSessionLock(newSession.session_id, async () => {
|
|
1591
1696
|
const m = this.read(newSession.session_id);
|
|
1592
1697
|
m.contests_session_id = params.session_id;
|
|
1593
1698
|
m.updated_at = now();
|
|
1594
|
-
writeJson(this.metaPath(newSession.session_id), m);
|
|
1699
|
+
await writeJson(this.metaPath(newSession.session_id), m);
|
|
1595
1700
|
return m;
|
|
1596
1701
|
});
|
|
1597
1702
|
// Stamp original with contestation record.
|
|
1598
|
-
const contestedMeta = this.withSessionLock(params.session_id, () => {
|
|
1703
|
+
const contestedMeta = await this.withSessionLock(params.session_id, async () => {
|
|
1599
1704
|
const m = this.read(params.session_id);
|
|
1600
1705
|
m.contestation = {
|
|
1601
1706
|
contested_at: now(),
|
|
@@ -1604,19 +1709,19 @@ export class SessionStore {
|
|
|
1604
1709
|
new_session_id: newSession.session_id,
|
|
1605
1710
|
};
|
|
1606
1711
|
m.updated_at = now();
|
|
1607
|
-
writeJson(this.metaPath(params.session_id), m);
|
|
1712
|
+
await writeJson(this.metaPath(params.session_id), m);
|
|
1608
1713
|
return m;
|
|
1609
1714
|
});
|
|
1610
1715
|
return { contested_meta: contestedMeta, new_session_id: newSession.session_id };
|
|
1611
1716
|
}
|
|
1612
|
-
attachEvidence(sessionId, params) {
|
|
1717
|
+
async attachEvidence(sessionId, params) {
|
|
1613
1718
|
const extension = safeFilePart(params.extension ?? "txt").replace(/\./g, "") || "txt";
|
|
1614
1719
|
const label = safeFilePart(params.label);
|
|
1615
1720
|
const relativePath = `evidence/${timestampFilePart()}-${label}.${extension}`;
|
|
1616
1721
|
const file = path.join(this.sessionDir(sessionId), relativePath);
|
|
1617
1722
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
1618
1723
|
fs.writeFileSync(file, redact(params.content), "utf8");
|
|
1619
|
-
const meta = this.withSessionLock(sessionId, () => {
|
|
1724
|
+
const meta = await this.withSessionLock(sessionId, async () => {
|
|
1620
1725
|
const current = this.read(sessionId);
|
|
1621
1726
|
current.evidence_files = [
|
|
1622
1727
|
...(current.evidence_files ?? []),
|
|
@@ -1628,13 +1733,13 @@ export class SessionStore {
|
|
|
1628
1733
|
},
|
|
1629
1734
|
];
|
|
1630
1735
|
current.updated_at = now();
|
|
1631
|
-
writeJson(this.metaPath(sessionId), current);
|
|
1736
|
+
await writeJson(this.metaPath(sessionId), current);
|
|
1632
1737
|
return current;
|
|
1633
1738
|
});
|
|
1634
1739
|
return { path: relativePath.replace(/\\/g, "/"), meta };
|
|
1635
1740
|
}
|
|
1636
|
-
escalateToOperator(sessionId, params) {
|
|
1637
|
-
return this.withSessionLock(sessionId, () => {
|
|
1741
|
+
async escalateToOperator(sessionId, params) {
|
|
1742
|
+
return this.withSessionLock(sessionId, async () => {
|
|
1638
1743
|
const meta = this.read(sessionId);
|
|
1639
1744
|
meta.operator_escalations = [
|
|
1640
1745
|
...(meta.operator_escalations ?? []),
|
|
@@ -1646,11 +1751,11 @@ export class SessionStore {
|
|
|
1646
1751
|
detail: `Operator escalation requested: ${params.reason}`,
|
|
1647
1752
|
};
|
|
1648
1753
|
meta.updated_at = now();
|
|
1649
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
1754
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
1650
1755
|
return meta;
|
|
1651
1756
|
});
|
|
1652
1757
|
}
|
|
1653
|
-
sweepIdle(idleMs, outcome = "aborted", reason = "stale") {
|
|
1758
|
+
async sweepIdle(idleMs, outcome = "aborted", reason = "stale") {
|
|
1654
1759
|
const effectiveIdleMs = Math.max(idleMs, SWEEP_MIN_IDLE_MS);
|
|
1655
1760
|
const nowMs = Date.now();
|
|
1656
1761
|
const swept = [];
|
|
@@ -1661,7 +1766,7 @@ export class SessionStore {
|
|
|
1661
1766
|
const idleFor = Number.isFinite(updatedAt) ? nowMs - updatedAt : Infinity;
|
|
1662
1767
|
if (idleFor < effectiveIdleMs)
|
|
1663
1768
|
continue;
|
|
1664
|
-
const finalized = this.withSessionLock(session.session_id, () => {
|
|
1769
|
+
const finalized = await this.withSessionLock(session.session_id, async () => {
|
|
1665
1770
|
const current = this.read(session.session_id);
|
|
1666
1771
|
current.outcome = outcome;
|
|
1667
1772
|
current.outcome_reason = reason;
|
|
@@ -1673,7 +1778,7 @@ export class SessionStore {
|
|
|
1673
1778
|
idle_ms: idleFor,
|
|
1674
1779
|
};
|
|
1675
1780
|
current.updated_at = now();
|
|
1676
|
-
writeJson(this.metaPath(session.session_id), current);
|
|
1781
|
+
await writeJson(this.metaPath(session.session_id), current);
|
|
1677
1782
|
return current;
|
|
1678
1783
|
});
|
|
1679
1784
|
swept.push(finalized);
|
|
@@ -1798,7 +1903,7 @@ export class SessionStore {
|
|
|
1798
1903
|
// - in_flight.started_at is older than HEARTBEAT_STALE_AFTER_MS.
|
|
1799
1904
|
// Sessions still actively running on a live PID are skipped. Idempotent
|
|
1800
1905
|
// + best-effort. Returns counts for telemetry.
|
|
1801
|
-
clearStaleInFlight() {
|
|
1906
|
+
async clearStaleInFlight() {
|
|
1802
1907
|
const HEARTBEAT_STALE_AFTER_MS = 30 * 60 * 1000; // 30 minutes
|
|
1803
1908
|
let scanned = 0;
|
|
1804
1909
|
let cleared = 0;
|
|
@@ -1808,34 +1913,39 @@ export class SessionStore {
|
|
|
1808
1913
|
scanned += 1;
|
|
1809
1914
|
const startedIso = session.in_flight.started_at;
|
|
1810
1915
|
const startedAge = startedIso ? Date.now() - Date.parse(startedIso) : Infinity;
|
|
1811
|
-
//
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
holderAlive = false;
|
|
1824
|
-
}
|
|
1916
|
+
// v4.1.0: lock-holder freshness is reported by proper-lockfile's
|
|
1917
|
+
// mtime-based stale detection. lockfile.check returns true if the
|
|
1918
|
+
// lock is actively held (mtime within `stale` ms), false otherwise.
|
|
1919
|
+
// This replaces the pre-v4.1.0 PID-aliveness check, which had
|
|
1920
|
+
// collision risk after PID-recycling restart.
|
|
1921
|
+
let holderAlive;
|
|
1922
|
+
try {
|
|
1923
|
+
holderAlive = await lockfile.check(this.metaPath(session.session_id), {
|
|
1924
|
+
stale: 120_000,
|
|
1925
|
+
realpath: false,
|
|
1926
|
+
lockfilePath: path.join(this.sessionDir(session.session_id), ".lock"),
|
|
1927
|
+
});
|
|
1825
1928
|
}
|
|
1826
|
-
|
|
1827
|
-
//
|
|
1828
|
-
holderAlive =
|
|
1929
|
+
catch {
|
|
1930
|
+
// metaPath missing or unreadable: treat as no active holder.
|
|
1931
|
+
holderAlive = false;
|
|
1932
|
+
}
|
|
1933
|
+
// Fallback heartbeat staleness signal when no active lock and
|
|
1934
|
+
// started_at indicates the in_flight marker itself is stale.
|
|
1935
|
+
if (!holderAlive && Number.isFinite(startedAge) && startedAge <= HEARTBEAT_STALE_AFTER_MS) {
|
|
1936
|
+
// No live holder but started_at is recent; do nothing yet (lock
|
|
1937
|
+
// may have been released cleanly; let normal finalize handle it).
|
|
1938
|
+
continue;
|
|
1829
1939
|
}
|
|
1830
1940
|
if (!holderAlive || startedAge > HEARTBEAT_STALE_AFTER_MS) {
|
|
1831
1941
|
try {
|
|
1832
|
-
this.withSessionLock(session.session_id, () => {
|
|
1942
|
+
await this.withSessionLock(session.session_id, async () => {
|
|
1833
1943
|
const current = this.read(session.session_id);
|
|
1834
1944
|
if (!current.in_flight)
|
|
1835
1945
|
return;
|
|
1836
1946
|
delete current.in_flight;
|
|
1837
1947
|
current.updated_at = now();
|
|
1838
|
-
writeJson(this.metaPath(session.session_id), current);
|
|
1948
|
+
await writeJson(this.metaPath(session.session_id), current);
|
|
1839
1949
|
cleared += 1;
|
|
1840
1950
|
});
|
|
1841
1951
|
}
|
|
@@ -1871,7 +1981,7 @@ export class SessionStore {
|
|
|
1871
1981
|
// threshold (default 24h via CROSS_REVIEW_STALE_HOURS).
|
|
1872
1982
|
//
|
|
1873
1983
|
// Idempotent + best-effort. Returns counts for telemetry.
|
|
1874
|
-
abortStaleSessions(staleHours) {
|
|
1984
|
+
async abortStaleSessions(staleHours) {
|
|
1875
1985
|
const envHours = Number.parseFloat(process.env.CROSS_REVIEW_STALE_HOURS ?? "");
|
|
1876
1986
|
const hours = staleHours != null && staleHours > 0
|
|
1877
1987
|
? staleHours
|
|
@@ -1891,26 +2001,29 @@ export class SessionStore {
|
|
|
1891
2001
|
if (session.in_flight)
|
|
1892
2002
|
continue;
|
|
1893
2003
|
scanned += 1;
|
|
1894
|
-
//
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
/* malformed lock — fall through to staleness check */
|
|
1905
|
-
}
|
|
2004
|
+
// v4.1.0: lock-holder freshness via proper-lockfile mtime-based
|
|
2005
|
+
// stale detection. lockfile.check returns true if a live holder
|
|
2006
|
+
// is touching the lockfile mtime within `stale` ms.
|
|
2007
|
+
let holderAlive;
|
|
2008
|
+
try {
|
|
2009
|
+
holderAlive = await lockfile.check(this.metaPath(session.session_id), {
|
|
2010
|
+
stale: 120_000,
|
|
2011
|
+
realpath: false,
|
|
2012
|
+
lockfilePath: path.join(this.sessionDir(session.session_id), ".lock"),
|
|
2013
|
+
});
|
|
1906
2014
|
}
|
|
2015
|
+
catch {
|
|
2016
|
+
holderAlive = false;
|
|
2017
|
+
}
|
|
2018
|
+
if (holderAlive)
|
|
2019
|
+
continue;
|
|
1907
2020
|
const lastTouched = Date.parse(session.updated_at);
|
|
1908
2021
|
if (!Number.isFinite(lastTouched))
|
|
1909
2022
|
continue;
|
|
1910
2023
|
if (Date.now() - lastTouched < staleThresholdMs)
|
|
1911
2024
|
continue;
|
|
1912
2025
|
try {
|
|
1913
|
-
this.finalize(session.session_id, "aborted", `stale_no_finalize_${hours}h`);
|
|
2026
|
+
await this.finalize(session.session_id, "aborted", `stale_no_finalize_${hours}h`);
|
|
1914
2027
|
aborted += 1;
|
|
1915
2028
|
}
|
|
1916
2029
|
catch {
|