@lcv-ideas-software/cross-review 4.0.8 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +204 -0
- package/README.md +74 -72
- package/dist/scripts/runtime-smoke.js +10 -3
- package/dist/scripts/runtime-smoke.js.map +1 -1
- package/dist/scripts/smoke.js +200 -79
- package/dist/scripts/smoke.js.map +1 -1
- package/dist/src/core/cache-manifest.d.ts +2 -2
- package/dist/src/core/cache-manifest.js +15 -9
- package/dist/src/core/cache-manifest.js.map +1 -1
- package/dist/src/core/config.d.ts +2 -2
- package/dist/src/core/config.js +2 -2
- package/dist/src/core/orchestrator.js +63 -63
- package/dist/src/core/orchestrator.js.map +1 -1
- package/dist/src/core/session-store.d.ts +35 -34
- package/dist/src/core/session-store.js +268 -157
- package/dist/src/core/session-store.js.map +1 -1
- package/dist/src/dashboard/server.js +5 -1
- package/dist/src/dashboard/server.js.map +1 -1
- package/dist/src/mcp/server.js +41 -33
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/security/redact.js +13 -2
- package/dist/src/security/redact.js.map +1 -1
- package/package.json +3 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
2
|
import fs from "node:fs";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import lockfile from "proper-lockfile";
|
|
4
5
|
import { redact } from "../security/redact.js";
|
|
5
6
|
import { mergeCost, mergeUsage } from "./cost.js";
|
|
6
7
|
import { PEERS } from "./types.js";
|
|
@@ -21,7 +22,7 @@ function now() {
|
|
|
21
22
|
const ATOMIC_WRITE_RETRY_CODES = new Set(["EPERM", "EACCES", "EBUSY", "EEXIST"]);
|
|
22
23
|
const ATOMIC_WRITE_MAX_ATTEMPTS = 5;
|
|
23
24
|
const TMP_NONCE_BYTES = 2;
|
|
24
|
-
function writeJson(file, data) {
|
|
25
|
+
async function writeJson(file, data) {
|
|
25
26
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
26
27
|
const nonce = crypto.randomBytes(TMP_NONCE_BYTES).toString("hex");
|
|
27
28
|
const tmp = `${file}.${process.pid}.${Date.now()}.${nonce}.tmp`;
|
|
@@ -37,11 +38,17 @@ function writeJson(file, data) {
|
|
|
37
38
|
const code = err.code;
|
|
38
39
|
if (!code || !ATOMIC_WRITE_RETRY_CODES.has(code))
|
|
39
40
|
break;
|
|
41
|
+
// v4.1.0 hardening: pre-v4.1.0 used `while (Date.now() - start <
|
|
42
|
+
// wait) {}` busy-wait which blocked the single Node.js event loop
|
|
43
|
+
// thread for up to 310 ms (10+20+40+80+160) under repeated
|
|
44
|
+
// Windows-AV-induced EPERM/EBUSY contention. The CPU-burning
|
|
45
|
+
// busy-wait starved SSE streaming + concurrent sessions + MCP
|
|
46
|
+
// stdio reads. Now the backoff awaits a Promise-based timer:
|
|
47
|
+
// event loop remains fully responsive between attempts.
|
|
40
48
|
const wait = 10 * 2 ** attempt; // 10, 20, 40, 80, 160 ms
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
}
|
|
49
|
+
await new Promise((resolve) => {
|
|
50
|
+
setTimeout(resolve, wait);
|
|
51
|
+
});
|
|
45
52
|
}
|
|
46
53
|
}
|
|
47
54
|
// Terminal failure path: best-effort tmp cleanup so callers don't see
|
|
@@ -99,6 +106,16 @@ export class SessionStore {
|
|
|
99
106
|
// monotonically thereafter. Restart re-initializes from disk, so seq
|
|
100
107
|
// remains correct across process boundaries.
|
|
101
108
|
seqCache = new Map();
|
|
109
|
+
// v4.1.0: track in-flight fire-and-forget appendEvent promises so
|
|
110
|
+
// callers that need synchronous read-after-write semantics (smoke
|
|
111
|
+
// tests, post-round aggregation) can call `flushPendingEvents()` to
|
|
112
|
+
// wait for all pending event writes to settle before reading.
|
|
113
|
+
// appendEvent is async because withSessionLock is async (proper-
|
|
114
|
+
// lockfile); the emit pipeline must stay sync, so it uses
|
|
115
|
+
// `void store.appendEvent(event)` and the store remembers the
|
|
116
|
+
// promise here. Promises resolve/reject within appendEvent's own
|
|
117
|
+
// try/catch — flush() therefore always settles, never rejects.
|
|
118
|
+
pendingEventWrites = new Set();
|
|
102
119
|
constructor(config) {
|
|
103
120
|
this.config = config;
|
|
104
121
|
fs.mkdirSync(this.sessionsDir(), { recursive: true });
|
|
@@ -140,10 +157,6 @@ export class SessionStore {
|
|
|
140
157
|
return false;
|
|
141
158
|
}
|
|
142
159
|
}
|
|
143
|
-
sleepSync(ms) {
|
|
144
|
-
const buffer = new SharedArrayBuffer(4);
|
|
145
|
-
Atomics.wait(new Int32Array(buffer), 0, 0, ms);
|
|
146
|
-
}
|
|
147
160
|
totalsFor(meta) {
|
|
148
161
|
const peerResults = meta.rounds.flatMap((round) => round.peers);
|
|
149
162
|
const generations = meta.generation_files ?? [];
|
|
@@ -158,46 +171,112 @@ export class SessionStore {
|
|
|
158
171
|
]),
|
|
159
172
|
};
|
|
160
173
|
}
|
|
161
|
-
|
|
174
|
+
// v4.1.0 hardening: pre-v4.1.0 acquired the lock via an exclusive
|
|
175
|
+
// file-create syscall followed by a separate JSON metadata write,
|
|
176
|
+
// which had a multi-process TOCTOU race window. Process A's create
|
|
177
|
+
// returned an empty inode + fd; before A's metadata write executed,
|
|
178
|
+
// process B could observe the empty file, fail to JSON-parse it,
|
|
179
|
+
// remove the lock path, create its own valid lock, and enter the
|
|
180
|
+
// critical section. Process A would then write into the now-orphan
|
|
181
|
+
// inode via the still-open fd and ALSO enter the critical section,
|
|
182
|
+
// corrupting meta.json. proper-lockfile uses `fs.mkdir` (atomic
|
|
183
|
+
// across NTFS and POSIX) so the lock comes into existence as a
|
|
184
|
+
// directory in a single syscall — no empty-window race possible.
|
|
185
|
+
// The mkdir-based lock also fixes the lock-holder freshness signal:
|
|
186
|
+
// proper-lockfile's `update` interval touches the lockfile's mtime
|
|
187
|
+
// every 5 s, and any other process treats the lock as stale once the
|
|
188
|
+
// mtime is older than `stale` ms (120 s). This is more robust than
|
|
189
|
+
// the pre-v4.1.0 PID-aliveness check, which had collision risk after
|
|
190
|
+
// process restart.
|
|
191
|
+
async withSessionLock(sessionId, fn) {
|
|
162
192
|
const dir = this.sessionDir(sessionId);
|
|
163
|
-
const
|
|
164
|
-
const
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
193
|
+
const target = this.metaPath(sessionId);
|
|
194
|
+
const lockfilePath = path.join(dir, ".lock");
|
|
195
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
196
|
+
// proper-lockfile requires the target path to exist (it uses it for
|
|
197
|
+
// realpath resolution). Init creates the session dir then immediately
|
|
198
|
+
// calls withSessionLock-protected writes; pre-create an empty meta
|
|
199
|
+
// placeholder so the first init() can acquire the lock. Existing
|
|
200
|
+
// session reuses preserve their meta.
|
|
201
|
+
try {
|
|
202
|
+
fs.writeFileSync(target, "{}\n", { flag: "wx" });
|
|
203
|
+
}
|
|
204
|
+
catch (err) {
|
|
205
|
+
if (err.code !== "EEXIST")
|
|
206
|
+
throw err;
|
|
207
|
+
/* existing or concurrently-created meta; fine */
|
|
208
|
+
}
|
|
209
|
+
// Pre-v4.1.0 legacy `.lock` regular file detection — FAIL CLOSED.
|
|
210
|
+
//
|
|
211
|
+
// Pre-v4.1.0 created `.lock` as a regular file containing
|
|
212
|
+
// `{pid, ts}` JSON. proper-lockfile claims `.lock` as a DIRECTORY
|
|
213
|
+
// via mkdir, so a leftover regular file blocks every subsequent
|
|
214
|
+
// lockfile.lock() with EEXIST. The original v4.1.0 design tried
|
|
215
|
+
// to auto-clean stale legacy files. Codex (session 059b0093 R1
|
|
216
|
+
// through R4) progressively demonstrated that NO auto-clean is
|
|
217
|
+
// safe under live cross-version operation:
|
|
218
|
+
//
|
|
219
|
+
// • R1: unconditional removal split-brained with a live legacy
|
|
220
|
+
// holder.
|
|
221
|
+
// • R2: removal-when-pid-alive-but-mtime-stale split-brained
|
|
222
|
+
// because legacy locks do not heartbeat (mtime is frozen at
|
|
223
|
+
// acquisition).
|
|
224
|
+
// • R3: per-process atomic decisions still raced two v4.1
|
|
225
|
+
// migrators.
|
|
226
|
+
// • R4: serializing v4.1 migrators via a separate mutex still
|
|
227
|
+
// left the cross-version race: v4.0.x's own stale-removal
|
|
228
|
+
// path does not honor any v4.1 mutex, so a concurrent v4.0.x
|
|
229
|
+
// could remove a stale `.lock` and create its own live one
|
|
230
|
+
// between v4.1's read and v4.1's path-based rmSync —
|
|
231
|
+
// v4.1 then deletes the new live legacy lock → split-brain.
|
|
232
|
+
//
|
|
233
|
+
// Resolution: v4.1.0 NEVER auto-removes a legacy regular `.lock`
|
|
234
|
+
// file. If one is observed, withSessionLock throws a clear
|
|
235
|
+
// remediation error to the caller, instructing the operator to
|
|
236
|
+
// stop all cross-review processes and remove the file manually.
|
|
237
|
+
// This is a ONE-TIME operator step at v4.0.x → v4.1.0 upgrade.
|
|
238
|
+
// After all hosts are on v4.1.0 the locks are mkdir-atomic and
|
|
239
|
+
// the issue cannot recur.
|
|
240
|
+
try {
|
|
241
|
+
const stat = fs.statSync(lockfilePath);
|
|
242
|
+
if (stat.isFile()) {
|
|
243
|
+
throw new Error(`cross-review v4.1.0 detected a pre-v4.1.0 lock file at ${lockfilePath}. ` +
|
|
244
|
+
`Live cross-version migration is not supported (would split-brain with any ` +
|
|
245
|
+
`concurrent v4.0.x process). To migrate safely: (1) stop all cross-review ` +
|
|
246
|
+
`processes / close all MCP hosts that loaded the server, (2) remove the ` +
|
|
247
|
+
`legacy lock file, (3) restart. POSIX one-liner for full cleanup: ` +
|
|
248
|
+
`\`find ${this.config.data_dir}/sessions -name .lock -type f -delete\`. ` +
|
|
249
|
+
`See CHANGELOG v04.01.00 migration notes for the rationale.`);
|
|
171
250
|
}
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
fs.rmSync(lockPath, { force: true });
|
|
180
|
-
continue;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
catch {
|
|
184
|
-
fs.rmSync(lockPath, { force: true });
|
|
185
|
-
continue;
|
|
186
|
-
}
|
|
187
|
-
if (Date.now() >= timeoutAt) {
|
|
188
|
-
throw new Error(`timed out waiting for session lock: ${sessionId}`, { cause: error });
|
|
189
|
-
}
|
|
190
|
-
this.sleepSync(100);
|
|
251
|
+
}
|
|
252
|
+
catch (err) {
|
|
253
|
+
if (err instanceof Error && err.message.includes("detected a pre-v4.1.0 lock file")) {
|
|
254
|
+
throw err;
|
|
255
|
+
}
|
|
256
|
+
if (err.code !== "ENOENT") {
|
|
257
|
+
/* ignore other stat errors; lockfile.lock will surface them */
|
|
191
258
|
}
|
|
192
259
|
}
|
|
260
|
+
const release = await lockfile.lock(target, {
|
|
261
|
+
stale: 120_000,
|
|
262
|
+
update: 5_000,
|
|
263
|
+
retries: { retries: 30, factor: 1.5, minTimeout: 100, maxTimeout: 1_000 },
|
|
264
|
+
realpath: false,
|
|
265
|
+
lockfilePath,
|
|
266
|
+
});
|
|
193
267
|
try {
|
|
194
|
-
return fn();
|
|
268
|
+
return await fn();
|
|
195
269
|
}
|
|
196
270
|
finally {
|
|
197
|
-
|
|
271
|
+
try {
|
|
272
|
+
await release();
|
|
273
|
+
}
|
|
274
|
+
catch {
|
|
275
|
+
/* lock was already released by stale-detection or sibling process */
|
|
276
|
+
}
|
|
198
277
|
}
|
|
199
278
|
}
|
|
200
|
-
init(task, caller, snapshot, reviewFocus) {
|
|
279
|
+
async init(task, caller, snapshot, reviewFocus) {
|
|
201
280
|
const session_id = crypto.randomUUID();
|
|
202
281
|
// v2.22.0 (B.P3): snapshot the cost ceiling at session_init time so
|
|
203
282
|
// budget pressure analysis is decoupled from later env-var mutation.
|
|
@@ -227,7 +306,7 @@ export class SessionStore {
|
|
|
227
306
|
budget_warning_emitted: false,
|
|
228
307
|
};
|
|
229
308
|
fs.mkdirSync(path.join(this.sessionDir(session_id), "agent-runs"), { recursive: true });
|
|
230
|
-
writeJson(this.metaPath(session_id), meta);
|
|
309
|
+
await writeJson(this.metaPath(session_id), meta);
|
|
231
310
|
fs.writeFileSync(path.join(this.sessionDir(session_id), "task.md"), task, "utf8");
|
|
232
311
|
if (reviewFocus) {
|
|
233
312
|
fs.writeFileSync(path.join(this.sessionDir(session_id), "review-focus.md"), reviewFocus, "utf8");
|
|
@@ -243,8 +322,8 @@ export class SessionStore {
|
|
|
243
322
|
// R5 throws when in_flight is already populated; the boot-time
|
|
244
323
|
// `clearStaleInFlight` sweep clears any orphan in_flight from a
|
|
245
324
|
// crashed prior host so legitimate operators are not blocked.
|
|
246
|
-
markInFlight(sessionId, params) {
|
|
247
|
-
return this.withSessionLock(sessionId, () => {
|
|
325
|
+
async markInFlight(sessionId, params) {
|
|
326
|
+
return this.withSessionLock(sessionId, async () => {
|
|
248
327
|
const meta = this.read(sessionId);
|
|
249
328
|
if (meta.in_flight) {
|
|
250
329
|
throw new Error(`session ${sessionId} already has an in-flight round (round=${meta.in_flight.round}, started_at=${meta.in_flight.started_at}); refusing to start a concurrent round. Wait for the round to complete, cancel it via session_cancel_job, or recover it via session_recover_interrupted.`);
|
|
@@ -262,7 +341,7 @@ export class SessionStore {
|
|
|
262
341
|
detail: `Round ${params.round} is running.`,
|
|
263
342
|
};
|
|
264
343
|
meta.updated_at = now();
|
|
265
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
344
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
266
345
|
return meta;
|
|
267
346
|
});
|
|
268
347
|
}
|
|
@@ -304,23 +383,47 @@ export class SessionStore {
|
|
|
304
383
|
commitSeq(sessionId, committed) {
|
|
305
384
|
this.seqCache.set(sessionId, committed);
|
|
306
385
|
}
|
|
307
|
-
|
|
386
|
+
// v4.1.0: durable event persistence. withSessionLock became async
|
|
387
|
+
// with the proper-lockfile refactor; appendEvent awaits the lock so
|
|
388
|
+
// callers that read events after persisting get the expected
|
|
389
|
+
// synchronous-write semantics (e.g. the session_doctor sweep + smoke
|
|
390
|
+
// fixtures that read events.ndjson immediately after appendEvent).
|
|
391
|
+
// Fire-and-forget callers wrap with `void store.appendEvent(...)`.
|
|
392
|
+
async appendEvent(event) {
|
|
308
393
|
const sessionId = event.session_id;
|
|
309
394
|
if (!sessionId)
|
|
310
395
|
return;
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
396
|
+
const write = (async () => {
|
|
397
|
+
try {
|
|
398
|
+
await this.withSessionLock(sessionId, () => {
|
|
399
|
+
const file = this.eventsPath(sessionId);
|
|
400
|
+
const seq = this.peekNextSeq(sessionId, file);
|
|
401
|
+
fs.appendFileSync(file, `${JSON.stringify({ ...event, seq, ts: event.ts ?? now() })}\n`, "utf8");
|
|
402
|
+
// Only commit the cache AFTER the durable append succeeded.
|
|
403
|
+
// If appendFileSync threw above, the cache still reflects the
|
|
404
|
+
// last persisted seq and the next call reuses this seq number.
|
|
405
|
+
this.commitSeq(sessionId, seq);
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
catch {
|
|
409
|
+
// Event persistence must never break provider calls or MCP responses.
|
|
410
|
+
}
|
|
411
|
+
})();
|
|
412
|
+
this.pendingEventWrites.add(write);
|
|
413
|
+
void write.finally(() => {
|
|
414
|
+
this.pendingEventWrites.delete(write);
|
|
415
|
+
});
|
|
416
|
+
return write;
|
|
417
|
+
}
|
|
418
|
+
// v4.1.0: wait for all in-flight fire-and-forget event writes to
|
|
419
|
+
// settle. Used by tests/sweeps that need synchronous read-after-write
|
|
420
|
+
// semantics for events.ndjson when the emit pipeline used
|
|
421
|
+
// `void store.appendEvent(...)`. Always resolves (never rejects);
|
|
422
|
+
// appendEvent swallows its own errors.
|
|
423
|
+
async flushPendingEvents() {
|
|
424
|
+
while (this.pendingEventWrites.size > 0) {
|
|
425
|
+
const snapshot = Array.from(this.pendingEventWrites);
|
|
426
|
+
await Promise.allSettled(snapshot);
|
|
324
427
|
}
|
|
325
428
|
}
|
|
326
429
|
readEvents(sessionId, sinceSeq = 0) {
|
|
@@ -415,11 +518,11 @@ export class SessionStore {
|
|
|
415
518
|
fs.writeFileSync(file, redact(draft), "utf8");
|
|
416
519
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
417
520
|
}
|
|
418
|
-
saveGeneration(sessionId, round, result, label = "generation") {
|
|
521
|
+
async saveGeneration(sessionId, round, result, label = "generation") {
|
|
419
522
|
const file = path.join(this.sessionDir(sessionId), "agent-runs", `round-${round}-${result.peer}-${label}.json`);
|
|
420
|
-
writeJson(file, { ...result, text: redact(result.text) });
|
|
523
|
+
await writeJson(file, { ...result, text: redact(result.text) });
|
|
421
524
|
const relativePath = path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
422
|
-
this.withSessionLock(sessionId, () => {
|
|
525
|
+
await this.withSessionLock(sessionId, async () => {
|
|
423
526
|
const meta = this.read(sessionId);
|
|
424
527
|
const artifact = {
|
|
425
528
|
ts: now(),
|
|
@@ -434,7 +537,7 @@ export class SessionStore {
|
|
|
434
537
|
meta.generation_files = [...(meta.generation_files ?? []), artifact];
|
|
435
538
|
meta.totals = this.totalsFor(meta);
|
|
436
539
|
meta.updated_at = now();
|
|
437
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
540
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
438
541
|
});
|
|
439
542
|
return relativePath;
|
|
440
543
|
}
|
|
@@ -448,18 +551,18 @@ export class SessionStore {
|
|
|
448
551
|
fs.writeFileSync(file, redact(text), "utf8");
|
|
449
552
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
450
553
|
}
|
|
451
|
-
savePeerResult(sessionId, round, result, label = "response") {
|
|
554
|
+
async savePeerResult(sessionId, round, result, label = "response") {
|
|
452
555
|
const file = path.join(this.sessionDir(sessionId), "agent-runs", `round-${round}-${result.peer}-${label}.json`);
|
|
453
|
-
writeJson(file, { ...result, text: redact(result.text) });
|
|
556
|
+
await writeJson(file, { ...result, text: redact(result.text) });
|
|
454
557
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
455
558
|
}
|
|
456
|
-
savePeerFailure(sessionId, round, failure) {
|
|
559
|
+
async savePeerFailure(sessionId, round, failure) {
|
|
457
560
|
const file = path.join(this.sessionDir(sessionId), "agent-runs", `round-${round}-${failure.peer}-failure.json`);
|
|
458
|
-
writeJson(file, { ...failure, message: redact(failure.message) });
|
|
561
|
+
await writeJson(file, { ...failure, message: redact(failure.message) });
|
|
459
562
|
return path.relative(this.sessionDir(sessionId), file).replace(/\\/g, "/");
|
|
460
563
|
}
|
|
461
|
-
appendRound(sessionId, params) {
|
|
462
|
-
return this.withSessionLock(sessionId, () => {
|
|
564
|
+
async appendRound(sessionId, params) {
|
|
565
|
+
return this.withSessionLock(sessionId, async () => {
|
|
463
566
|
const meta = this.read(sessionId);
|
|
464
567
|
// v3.2.0 (Codex bug report 2026-05-12): refuse to append a round
|
|
465
568
|
// to a finalized session. Otherwise the per-round
|
|
@@ -507,19 +610,19 @@ export class SessionStore {
|
|
|
507
610
|
// diff-based drift if a peer's cost changed in a retry loop.
|
|
508
611
|
const roundCost = params.peers.reduce((sum, peer) => sum + (peer.cost?.total_cost ?? 0), 0);
|
|
509
612
|
meta.costs_per_round = [...(meta.costs_per_round ?? []), roundCost];
|
|
510
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
613
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
511
614
|
return round;
|
|
512
615
|
});
|
|
513
616
|
}
|
|
514
617
|
// v2.22.0 (B.P3): one-shot guard for `session.budget_warning` emit
|
|
515
618
|
// idempotency. Persisted in meta.json so the warning fires at most
|
|
516
619
|
// once per session even across host restarts.
|
|
517
|
-
markBudgetWarningEmitted(sessionId) {
|
|
518
|
-
return this.withSessionLock(sessionId, () => {
|
|
620
|
+
async markBudgetWarningEmitted(sessionId) {
|
|
621
|
+
return this.withSessionLock(sessionId, async () => {
|
|
519
622
|
const meta = this.read(sessionId);
|
|
520
623
|
meta.budget_warning_emitted = true;
|
|
521
624
|
meta.updated_at = now();
|
|
522
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
625
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
523
626
|
return meta;
|
|
524
627
|
});
|
|
525
628
|
}
|
|
@@ -527,12 +630,12 @@ export class SessionStore {
|
|
|
527
630
|
// orchestrator's circular loop calls this every round so resumed
|
|
528
631
|
// sessions can pick up the rotation cursor and consecutive-no-change
|
|
529
632
|
// count from disk without re-deriving them by walking events.
|
|
530
|
-
setCircularState(sessionId, state) {
|
|
531
|
-
return this.withSessionLock(sessionId, () => {
|
|
633
|
+
async setCircularState(sessionId, state) {
|
|
634
|
+
return this.withSessionLock(sessionId, async () => {
|
|
532
635
|
const meta = this.read(sessionId);
|
|
533
636
|
meta.circular_state = state;
|
|
534
637
|
meta.updated_at = now();
|
|
535
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
638
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
536
639
|
return meta;
|
|
537
640
|
});
|
|
538
641
|
}
|
|
@@ -545,8 +648,8 @@ export class SessionStore {
|
|
|
545
648
|
// max_rounds the caller actually requested. This fills that gap with
|
|
546
649
|
// pure-additive metadata; `cost_ceiling_usd` is kept in sync with
|
|
547
650
|
// `effective_cost_ceiling_usd` for back-compat with v3.4.x readers.
|
|
548
|
-
setSessionTraceability(sessionId, traceability) {
|
|
549
|
-
return this.withSessionLock(sessionId, () => {
|
|
651
|
+
async setSessionTraceability(sessionId, traceability) {
|
|
652
|
+
return this.withSessionLock(sessionId, async () => {
|
|
550
653
|
const meta = this.read(sessionId);
|
|
551
654
|
meta.requested_max_rounds = traceability.requested_max_rounds;
|
|
552
655
|
meta.effective_max_rounds = traceability.effective_max_rounds;
|
|
@@ -557,7 +660,7 @@ export class SessionStore {
|
|
|
557
660
|
// only know `cost_ceiling_usd` still see the effective ceiling.
|
|
558
661
|
meta.cost_ceiling_usd = traceability.effective_cost_ceiling_usd;
|
|
559
662
|
meta.updated_at = now();
|
|
560
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
663
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
561
664
|
return meta;
|
|
562
665
|
});
|
|
563
666
|
}
|
|
@@ -576,8 +679,8 @@ export class SessionStore {
|
|
|
576
679
|
throw err;
|
|
577
680
|
}
|
|
578
681
|
}
|
|
579
|
-
finalize(sessionId, outcome, reason) {
|
|
580
|
-
return this.withSessionLock(sessionId, () => {
|
|
682
|
+
async finalize(sessionId, outcome, reason) {
|
|
683
|
+
return this.withSessionLock(sessionId, async () => {
|
|
581
684
|
const meta = this.read(sessionId);
|
|
582
685
|
// v3.2.0 (Codex bug report 2026-05-12): when the caller asserts
|
|
583
686
|
// outcome="converged", the latest round (if any) MUST have
|
|
@@ -606,12 +709,12 @@ export class SessionStore {
|
|
|
606
709
|
detail: reason ?? outcome,
|
|
607
710
|
};
|
|
608
711
|
meta.updated_at = now();
|
|
609
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
712
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
610
713
|
return meta;
|
|
611
714
|
});
|
|
612
715
|
}
|
|
613
|
-
requestCancellation(sessionId, reason = "operator_requested", jobId) {
|
|
614
|
-
return this.withSessionLock(sessionId, () => {
|
|
716
|
+
async requestCancellation(sessionId, reason = "operator_requested", jobId) {
|
|
717
|
+
return this.withSessionLock(sessionId, async () => {
|
|
615
718
|
const meta = this.read(sessionId);
|
|
616
719
|
meta.control = {
|
|
617
720
|
status: "cancel_requested",
|
|
@@ -626,12 +729,12 @@ export class SessionStore {
|
|
|
626
729
|
detail: `Cancellation requested: ${reason}`,
|
|
627
730
|
};
|
|
628
731
|
meta.updated_at = now();
|
|
629
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
732
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
630
733
|
return meta;
|
|
631
734
|
});
|
|
632
735
|
}
|
|
633
|
-
markCancelled(sessionId, reason = "cancelled") {
|
|
634
|
-
return this.withSessionLock(sessionId, () => {
|
|
736
|
+
async markCancelled(sessionId, reason = "cancelled") {
|
|
737
|
+
return this.withSessionLock(sessionId, async () => {
|
|
635
738
|
const meta = this.read(sessionId);
|
|
636
739
|
meta.outcome = "aborted";
|
|
637
740
|
meta.outcome_reason = reason;
|
|
@@ -649,7 +752,7 @@ export class SessionStore {
|
|
|
649
752
|
detail: reason,
|
|
650
753
|
};
|
|
651
754
|
meta.updated_at = now();
|
|
652
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
755
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
653
756
|
return meta;
|
|
654
757
|
});
|
|
655
758
|
}
|
|
@@ -657,12 +760,12 @@ export class SessionStore {
|
|
|
657
760
|
const meta = this.read(sessionId);
|
|
658
761
|
return meta.control?.status === "cancel_requested";
|
|
659
762
|
}
|
|
660
|
-
appendFallbackEvent(sessionId, event) {
|
|
661
|
-
return this.withSessionLock(sessionId, () => {
|
|
763
|
+
async appendFallbackEvent(sessionId, event) {
|
|
764
|
+
return this.withSessionLock(sessionId, async () => {
|
|
662
765
|
const meta = this.read(sessionId);
|
|
663
766
|
meta.fallback_events = [...(meta.fallback_events ?? []), event];
|
|
664
767
|
meta.updated_at = now();
|
|
665
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
768
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
666
769
|
return meta;
|
|
667
770
|
});
|
|
668
771
|
}
|
|
@@ -672,10 +775,10 @@ export class SessionStore {
|
|
|
672
775
|
// across rounds increments `round_count` instead of producing
|
|
673
776
|
// duplicate entries. Returns the updated checklist (or empty array
|
|
674
777
|
// if nothing was added/updated).
|
|
675
|
-
appendEvidenceChecklistItems(sessionId, round, incoming) {
|
|
778
|
+
async appendEvidenceChecklistItems(sessionId, round, incoming) {
|
|
676
779
|
if (!incoming.length)
|
|
677
780
|
return [];
|
|
678
|
-
return this.withSessionLock(sessionId, () => {
|
|
781
|
+
return this.withSessionLock(sessionId, async () => {
|
|
679
782
|
const meta = this.read(sessionId);
|
|
680
783
|
const existing = meta.evidence_checklist ?? [];
|
|
681
784
|
const byId = new Map(existing.map((item) => [item.id, item]));
|
|
@@ -724,7 +827,7 @@ export class SessionStore {
|
|
|
724
827
|
});
|
|
725
828
|
meta.evidence_checklist = updated;
|
|
726
829
|
meta.updated_at = ts;
|
|
727
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
830
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
728
831
|
return updated;
|
|
729
832
|
});
|
|
730
833
|
}
|
|
@@ -750,8 +853,8 @@ export class SessionStore {
|
|
|
750
853
|
// by the orchestrator via a separate event so operators see when peers
|
|
751
854
|
// keep asking for items they explicitly closed; the status itself is
|
|
752
855
|
// operator-owned.
|
|
753
|
-
runEvidenceChecklistAddressDetection(sessionId, currentRound) {
|
|
754
|
-
return this.withSessionLock(sessionId, () => {
|
|
856
|
+
async runEvidenceChecklistAddressDetection(sessionId, currentRound) {
|
|
857
|
+
return this.withSessionLock(sessionId, async () => {
|
|
755
858
|
const meta = this.read(sessionId);
|
|
756
859
|
const checklist = meta.evidence_checklist ?? [];
|
|
757
860
|
if (!checklist.length) {
|
|
@@ -824,7 +927,7 @@ export class SessionStore {
|
|
|
824
927
|
if (notResurfaced.length || reopened.length) {
|
|
825
928
|
meta.evidence_status_history = history;
|
|
826
929
|
meta.updated_at = ts;
|
|
827
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
930
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
828
931
|
}
|
|
829
932
|
return {
|
|
830
933
|
not_resurfaced: notResurfaced,
|
|
@@ -843,8 +946,8 @@ export class SessionStore {
|
|
|
843
946
|
// "not_resurfaced" — both are runtime-managed (judge promotion and
|
|
844
947
|
// resurfacing inference respectively). Returns the mutated item and the
|
|
845
948
|
// appended history entry.
|
|
846
|
-
setEvidenceChecklistItemStatus(sessionId, itemId, status, options = {}) {
|
|
847
|
-
return this.withSessionLock(sessionId, () => {
|
|
949
|
+
async setEvidenceChecklistItemStatus(sessionId, itemId, status, options = {}) {
|
|
950
|
+
return this.withSessionLock(sessionId, async () => {
|
|
848
951
|
const meta = this.read(sessionId);
|
|
849
952
|
const checklist = meta.evidence_checklist ?? [];
|
|
850
953
|
const item = checklist.find((entry) => entry.id === itemId);
|
|
@@ -878,7 +981,7 @@ export class SessionStore {
|
|
|
878
981
|
meta.evidence_status_history = history;
|
|
879
982
|
meta.evidence_checklist = checklist;
|
|
880
983
|
meta.updated_at = ts;
|
|
881
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
984
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
882
985
|
return { item, history_entry: entry };
|
|
883
986
|
});
|
|
884
987
|
}
|
|
@@ -887,8 +990,8 @@ export class SessionStore {
|
|
|
887
990
|
// moves anything other than open. Atomic under the session lock.
|
|
888
991
|
// Returns null when the item is not currently `open` (already
|
|
889
992
|
// addressed, terminal, or missing) so the caller can skip emit.
|
|
890
|
-
markEvidenceItemAddressedByJudge(sessionId, itemId, params) {
|
|
891
|
-
return this.withSessionLock(sessionId, () => {
|
|
993
|
+
async markEvidenceItemAddressedByJudge(sessionId, itemId, params) {
|
|
994
|
+
return this.withSessionLock(sessionId, async () => {
|
|
892
995
|
const meta = this.read(sessionId);
|
|
893
996
|
const checklist = meta.evidence_checklist ?? [];
|
|
894
997
|
const item = checklist.find((entry) => entry.id === itemId);
|
|
@@ -920,16 +1023,16 @@ export class SessionStore {
|
|
|
920
1023
|
meta.evidence_status_history = history;
|
|
921
1024
|
meta.evidence_checklist = checklist;
|
|
922
1025
|
meta.updated_at = ts;
|
|
923
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
1026
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
924
1027
|
return { item, history_entry: entry };
|
|
925
1028
|
});
|
|
926
1029
|
}
|
|
927
|
-
recoverInterruptedSessions(activeSessionIds = new Set()) {
|
|
1030
|
+
async recoverInterruptedSessions(activeSessionIds = new Set()) {
|
|
928
1031
|
const recovered = [];
|
|
929
1032
|
for (const session of this.list()) {
|
|
930
1033
|
if (session.outcome || activeSessionIds.has(session.session_id) || !session.in_flight)
|
|
931
1034
|
continue;
|
|
932
|
-
const updated = this.withSessionLock(session.session_id, () => {
|
|
1035
|
+
const updated = await this.withSessionLock(session.session_id, async () => {
|
|
933
1036
|
const current = this.read(session.session_id);
|
|
934
1037
|
if (current.outcome || activeSessionIds.has(current.session_id) || !current.in_flight) {
|
|
935
1038
|
return current;
|
|
@@ -947,7 +1050,7 @@ export class SessionStore {
|
|
|
947
1050
|
detail: `Recovered interrupted round ${round} after MCP restart. Start a new round to continue from saved session context.`,
|
|
948
1051
|
};
|
|
949
1052
|
current.updated_at = now();
|
|
950
|
-
writeJson(this.metaPath(current.session_id), current);
|
|
1053
|
+
await writeJson(this.metaPath(current.session_id), current);
|
|
951
1054
|
return current;
|
|
952
1055
|
});
|
|
953
1056
|
recovered.push(updated);
|
|
@@ -1162,7 +1265,7 @@ export class SessionStore {
|
|
|
1162
1265
|
// `item_types` (open items grouped by surfacing peer) and
|
|
1163
1266
|
// `chronic_blockers` (item ids with `round_count >= 3`) so operators
|
|
1164
1267
|
// can see which evidence asks are systemic vs cauda ruidosa.
|
|
1165
|
-
sessionDoctor(limit = 20, includeLegacy = false, repair = false) {
|
|
1268
|
+
async sessionDoctor(limit = 20, includeLegacy = false, repair = false) {
|
|
1166
1269
|
const cappedLimit = Math.max(1, Math.min(100, Math.trunc(limit) || 20));
|
|
1167
1270
|
// v3.6.0 (C): opt-in repair pass BEFORE the read-only audit. Fixes
|
|
1168
1271
|
// the contradictory `outcome="converged" + health.state="blocked"`
|
|
@@ -1184,7 +1287,7 @@ export class SessionStore {
|
|
|
1184
1287
|
// for manual operator inspection rather than guessing.
|
|
1185
1288
|
if (latestConverged) {
|
|
1186
1289
|
const fromState = session.convergence_health?.state;
|
|
1187
|
-
const fixed = this.withSessionLock(session.session_id, () => {
|
|
1290
|
+
const fixed = await this.withSessionLock(session.session_id, async () => {
|
|
1188
1291
|
const meta = this.read(session.session_id);
|
|
1189
1292
|
if (meta.outcome === "converged" &&
|
|
1190
1293
|
meta.convergence_health?.state === "blocked" &&
|
|
@@ -1195,7 +1298,7 @@ export class SessionStore {
|
|
|
1195
1298
|
detail: `v3.6.0 doctor repair: recomputed health from latest round (was "blocked" with outcome="converged" — pre-v3.2.0 corruption artifact)`,
|
|
1196
1299
|
};
|
|
1197
1300
|
meta.updated_at = now();
|
|
1198
|
-
writeJson(this.metaPath(session.session_id), meta);
|
|
1301
|
+
await writeJson(this.metaPath(session.session_id), meta);
|
|
1199
1302
|
return true;
|
|
1200
1303
|
}
|
|
1201
1304
|
return false;
|
|
@@ -1576,7 +1679,7 @@ export class SessionStore {
|
|
|
1576
1679
|
// original session is preserved (append-only); a new session opens
|
|
1577
1680
|
// for re-deliberation with a fresh task + initial_draft and a
|
|
1578
1681
|
// structural reference back to the contested session.
|
|
1579
|
-
contestVerdict(params) {
|
|
1682
|
+
async contestVerdict(params) {
|
|
1580
1683
|
const original = this.read(params.session_id);
|
|
1581
1684
|
if (!original.outcome) {
|
|
1582
1685
|
throw new Error(`cannot_contest_in_flight_session: session ${params.session_id} has no outcome yet (still in flight). Wait for it to converge or finalize before contesting.`);
|
|
@@ -1585,17 +1688,17 @@ export class SessionStore {
|
|
|
1585
1688
|
throw new Error(`session_already_contested: session ${params.session_id} was already contested at ${original.contestation.contested_at} (new_session_id=${original.contestation.new_session_id}).`);
|
|
1586
1689
|
}
|
|
1587
1690
|
const newCaller = params.new_caller ?? "operator";
|
|
1588
|
-
const newSession = this.init(params.new_task, newCaller, [], undefined);
|
|
1691
|
+
const newSession = await this.init(params.new_task, newCaller, [], undefined);
|
|
1589
1692
|
// Cross-link new session → original.
|
|
1590
|
-
this.withSessionLock(newSession.session_id, () => {
|
|
1693
|
+
await this.withSessionLock(newSession.session_id, async () => {
|
|
1591
1694
|
const m = this.read(newSession.session_id);
|
|
1592
1695
|
m.contests_session_id = params.session_id;
|
|
1593
1696
|
m.updated_at = now();
|
|
1594
|
-
writeJson(this.metaPath(newSession.session_id), m);
|
|
1697
|
+
await writeJson(this.metaPath(newSession.session_id), m);
|
|
1595
1698
|
return m;
|
|
1596
1699
|
});
|
|
1597
1700
|
// Stamp original with contestation record.
|
|
1598
|
-
const contestedMeta = this.withSessionLock(params.session_id, () => {
|
|
1701
|
+
const contestedMeta = await this.withSessionLock(params.session_id, async () => {
|
|
1599
1702
|
const m = this.read(params.session_id);
|
|
1600
1703
|
m.contestation = {
|
|
1601
1704
|
contested_at: now(),
|
|
@@ -1604,19 +1707,19 @@ export class SessionStore {
|
|
|
1604
1707
|
new_session_id: newSession.session_id,
|
|
1605
1708
|
};
|
|
1606
1709
|
m.updated_at = now();
|
|
1607
|
-
writeJson(this.metaPath(params.session_id), m);
|
|
1710
|
+
await writeJson(this.metaPath(params.session_id), m);
|
|
1608
1711
|
return m;
|
|
1609
1712
|
});
|
|
1610
1713
|
return { contested_meta: contestedMeta, new_session_id: newSession.session_id };
|
|
1611
1714
|
}
|
|
1612
|
-
attachEvidence(sessionId, params) {
|
|
1715
|
+
async attachEvidence(sessionId, params) {
|
|
1613
1716
|
const extension = safeFilePart(params.extension ?? "txt").replace(/\./g, "") || "txt";
|
|
1614
1717
|
const label = safeFilePart(params.label);
|
|
1615
1718
|
const relativePath = `evidence/${timestampFilePart()}-${label}.${extension}`;
|
|
1616
1719
|
const file = path.join(this.sessionDir(sessionId), relativePath);
|
|
1617
1720
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
1618
1721
|
fs.writeFileSync(file, redact(params.content), "utf8");
|
|
1619
|
-
const meta = this.withSessionLock(sessionId, () => {
|
|
1722
|
+
const meta = await this.withSessionLock(sessionId, async () => {
|
|
1620
1723
|
const current = this.read(sessionId);
|
|
1621
1724
|
current.evidence_files = [
|
|
1622
1725
|
...(current.evidence_files ?? []),
|
|
@@ -1628,13 +1731,13 @@ export class SessionStore {
|
|
|
1628
1731
|
},
|
|
1629
1732
|
];
|
|
1630
1733
|
current.updated_at = now();
|
|
1631
|
-
writeJson(this.metaPath(sessionId), current);
|
|
1734
|
+
await writeJson(this.metaPath(sessionId), current);
|
|
1632
1735
|
return current;
|
|
1633
1736
|
});
|
|
1634
1737
|
return { path: relativePath.replace(/\\/g, "/"), meta };
|
|
1635
1738
|
}
|
|
1636
|
-
escalateToOperator(sessionId, params) {
|
|
1637
|
-
return this.withSessionLock(sessionId, () => {
|
|
1739
|
+
async escalateToOperator(sessionId, params) {
|
|
1740
|
+
return this.withSessionLock(sessionId, async () => {
|
|
1638
1741
|
const meta = this.read(sessionId);
|
|
1639
1742
|
meta.operator_escalations = [
|
|
1640
1743
|
...(meta.operator_escalations ?? []),
|
|
@@ -1646,11 +1749,11 @@ export class SessionStore {
|
|
|
1646
1749
|
detail: `Operator escalation requested: ${params.reason}`,
|
|
1647
1750
|
};
|
|
1648
1751
|
meta.updated_at = now();
|
|
1649
|
-
writeJson(this.metaPath(sessionId), meta);
|
|
1752
|
+
await writeJson(this.metaPath(sessionId), meta);
|
|
1650
1753
|
return meta;
|
|
1651
1754
|
});
|
|
1652
1755
|
}
|
|
1653
|
-
sweepIdle(idleMs, outcome = "aborted", reason = "stale") {
|
|
1756
|
+
async sweepIdle(idleMs, outcome = "aborted", reason = "stale") {
|
|
1654
1757
|
const effectiveIdleMs = Math.max(idleMs, SWEEP_MIN_IDLE_MS);
|
|
1655
1758
|
const nowMs = Date.now();
|
|
1656
1759
|
const swept = [];
|
|
@@ -1661,7 +1764,7 @@ export class SessionStore {
|
|
|
1661
1764
|
const idleFor = Number.isFinite(updatedAt) ? nowMs - updatedAt : Infinity;
|
|
1662
1765
|
if (idleFor < effectiveIdleMs)
|
|
1663
1766
|
continue;
|
|
1664
|
-
const finalized = this.withSessionLock(session.session_id, () => {
|
|
1767
|
+
const finalized = await this.withSessionLock(session.session_id, async () => {
|
|
1665
1768
|
const current = this.read(session.session_id);
|
|
1666
1769
|
current.outcome = outcome;
|
|
1667
1770
|
current.outcome_reason = reason;
|
|
@@ -1673,7 +1776,7 @@ export class SessionStore {
|
|
|
1673
1776
|
idle_ms: idleFor,
|
|
1674
1777
|
};
|
|
1675
1778
|
current.updated_at = now();
|
|
1676
|
-
writeJson(this.metaPath(session.session_id), current);
|
|
1779
|
+
await writeJson(this.metaPath(session.session_id), current);
|
|
1677
1780
|
return current;
|
|
1678
1781
|
});
|
|
1679
1782
|
swept.push(finalized);
|
|
@@ -1798,7 +1901,7 @@ export class SessionStore {
|
|
|
1798
1901
|
// - in_flight.started_at is older than HEARTBEAT_STALE_AFTER_MS.
|
|
1799
1902
|
// Sessions still actively running on a live PID are skipped. Idempotent
|
|
1800
1903
|
// + best-effort. Returns counts for telemetry.
|
|
1801
|
-
clearStaleInFlight() {
|
|
1904
|
+
async clearStaleInFlight() {
|
|
1802
1905
|
const HEARTBEAT_STALE_AFTER_MS = 30 * 60 * 1000; // 30 minutes
|
|
1803
1906
|
let scanned = 0;
|
|
1804
1907
|
let cleared = 0;
|
|
@@ -1808,34 +1911,39 @@ export class SessionStore {
|
|
|
1808
1911
|
scanned += 1;
|
|
1809
1912
|
const startedIso = session.in_flight.started_at;
|
|
1810
1913
|
const startedAge = startedIso ? Date.now() - Date.parse(startedIso) : Infinity;
|
|
1811
|
-
//
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
holderAlive = false;
|
|
1824
|
-
}
|
|
1914
|
+
// v4.1.0: lock-holder freshness is reported by proper-lockfile's
|
|
1915
|
+
// mtime-based stale detection. lockfile.check returns true if the
|
|
1916
|
+
// lock is actively held (mtime within `stale` ms), false otherwise.
|
|
1917
|
+
// This replaces the pre-v4.1.0 PID-aliveness check, which had
|
|
1918
|
+
// collision risk after PID-recycling restart.
|
|
1919
|
+
let holderAlive;
|
|
1920
|
+
try {
|
|
1921
|
+
holderAlive = await lockfile.check(this.metaPath(session.session_id), {
|
|
1922
|
+
stale: 120_000,
|
|
1923
|
+
realpath: false,
|
|
1924
|
+
lockfilePath: path.join(this.sessionDir(session.session_id), ".lock"),
|
|
1925
|
+
});
|
|
1825
1926
|
}
|
|
1826
|
-
|
|
1827
|
-
//
|
|
1828
|
-
holderAlive =
|
|
1927
|
+
catch {
|
|
1928
|
+
// metaPath missing or unreadable: treat as no active holder.
|
|
1929
|
+
holderAlive = false;
|
|
1930
|
+
}
|
|
1931
|
+
// Fallback heartbeat staleness signal when no active lock and
|
|
1932
|
+
// started_at indicates the in_flight marker itself is stale.
|
|
1933
|
+
if (!holderAlive && Number.isFinite(startedAge) && startedAge <= HEARTBEAT_STALE_AFTER_MS) {
|
|
1934
|
+
// No live holder but started_at is recent; do nothing yet (lock
|
|
1935
|
+
// may have been released cleanly; let normal finalize handle it).
|
|
1936
|
+
continue;
|
|
1829
1937
|
}
|
|
1830
1938
|
if (!holderAlive || startedAge > HEARTBEAT_STALE_AFTER_MS) {
|
|
1831
1939
|
try {
|
|
1832
|
-
this.withSessionLock(session.session_id, () => {
|
|
1940
|
+
await this.withSessionLock(session.session_id, async () => {
|
|
1833
1941
|
const current = this.read(session.session_id);
|
|
1834
1942
|
if (!current.in_flight)
|
|
1835
1943
|
return;
|
|
1836
1944
|
delete current.in_flight;
|
|
1837
1945
|
current.updated_at = now();
|
|
1838
|
-
writeJson(this.metaPath(session.session_id), current);
|
|
1946
|
+
await writeJson(this.metaPath(session.session_id), current);
|
|
1839
1947
|
cleared += 1;
|
|
1840
1948
|
});
|
|
1841
1949
|
}
|
|
@@ -1871,7 +1979,7 @@ export class SessionStore {
|
|
|
1871
1979
|
// threshold (default 24h via CROSS_REVIEW_STALE_HOURS).
|
|
1872
1980
|
//
|
|
1873
1981
|
// Idempotent + best-effort. Returns counts for telemetry.
|
|
1874
|
-
abortStaleSessions(staleHours) {
|
|
1982
|
+
async abortStaleSessions(staleHours) {
|
|
1875
1983
|
const envHours = Number.parseFloat(process.env.CROSS_REVIEW_STALE_HOURS ?? "");
|
|
1876
1984
|
const hours = staleHours != null && staleHours > 0
|
|
1877
1985
|
? staleHours
|
|
@@ -1891,26 +1999,29 @@ export class SessionStore {
|
|
|
1891
1999
|
if (session.in_flight)
|
|
1892
2000
|
continue;
|
|
1893
2001
|
scanned += 1;
|
|
1894
|
-
//
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
/* malformed lock — fall through to staleness check */
|
|
1905
|
-
}
|
|
2002
|
+
// v4.1.0: lock-holder freshness via proper-lockfile mtime-based
|
|
2003
|
+
// stale detection. lockfile.check returns true if a live holder
|
|
2004
|
+
// is touching the lockfile mtime within `stale` ms.
|
|
2005
|
+
let holderAlive;
|
|
2006
|
+
try {
|
|
2007
|
+
holderAlive = await lockfile.check(this.metaPath(session.session_id), {
|
|
2008
|
+
stale: 120_000,
|
|
2009
|
+
realpath: false,
|
|
2010
|
+
lockfilePath: path.join(this.sessionDir(session.session_id), ".lock"),
|
|
2011
|
+
});
|
|
1906
2012
|
}
|
|
2013
|
+
catch {
|
|
2014
|
+
holderAlive = false;
|
|
2015
|
+
}
|
|
2016
|
+
if (holderAlive)
|
|
2017
|
+
continue;
|
|
1907
2018
|
const lastTouched = Date.parse(session.updated_at);
|
|
1908
2019
|
if (!Number.isFinite(lastTouched))
|
|
1909
2020
|
continue;
|
|
1910
2021
|
if (Date.now() - lastTouched < staleThresholdMs)
|
|
1911
2022
|
continue;
|
|
1912
2023
|
try {
|
|
1913
|
-
this.finalize(session.session_id, "aborted", `stale_no_finalize_${hours}h`);
|
|
2024
|
+
await this.finalize(session.session_id, "aborted", `stale_no_finalize_${hours}h`);
|
|
1914
2025
|
aborted += 1;
|
|
1915
2026
|
}
|
|
1916
2027
|
catch {
|