pi-crew 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/state/artifact-store.ts +6 -1
- package/src/state/locks.ts +36 -6
- package/src/state/mailbox.ts +50 -39
- package/src/state/state-store.ts +25 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.8] — Final 5 Low-Severity Issue Fixes (2026-06-01)
|
|
4
|
+
|
|
5
|
+
### Phase 5 (Final): Race Conditions + Edge Cases
|
|
6
|
+
|
|
7
|
+
- **Issue #12: `acquireLockWithRetry` race** (Low) — `src/state/locks.ts`: added `isLockHolderAlive()` check. Now uses BOTH staleness AND PID liveness: fresh + alive holder = fail, else = safe to clear. Prevents stealing a lock from a still-running process whose PID was recently reused.
|
|
8
|
+
|
|
9
|
+
- **Issue #13: `loadRunManifestById` TOCTOU** (Low) — `src/state/state-store.ts`: retry-on-stat-mismatch approach. Re-stat and re-read in a loop (up to 3 attempts) until size/mtime are stable across stat and read. Catches torn writes without depending on `withFileLockSync`.
|
|
10
|
+
|
|
11
|
+
- **Issue #14: `cleanupOldArtifacts` N stat calls** (Low) — `src/state/artifact-store.ts`: use `Dirent.isDirectory()` from `readdirSync({ withFileTypes: true })` to avoid `statSync` for type info. `statSync` now only for mtime.
|
|
12
|
+
|
|
13
|
+
- **Issue #15: `validateMailbox` concurrent access** (Low) — `src/state/mailbox.ts`: wrap read + optional repair in `withFileLockSync`.
|
|
14
|
+
|
|
15
|
+
- **Issue #16: `updateMailboxMessageReply` concurrent rewrite** (Low) — `src/state/mailbox.ts`: wrap read-modify-write in `withFileLockSync`.
|
|
16
|
+
|
|
17
|
+
### Bug fix in `withFileLockSync`
|
|
18
|
+
|
|
19
|
+
- `src/state/locks.ts`: use separate `.lock` sidecar instead of the file path itself. Previously `withFileLockSync(path)` used `path` as the lock file, colliding with append/read operations on the same path.
|
|
20
|
+
|
|
21
|
+
### Tests
|
|
22
|
+
|
|
23
|
+
- 2282 tests pass / 0 failures (`npm test`).
|
|
24
|
+
|
|
3
25
|
## [0.5.7] — 11 Issue Fixes Across 5 Phases (2026-06-01)
|
|
4
26
|
|
|
5
27
|
### Phase 1: Schema/Type Fixes
|
package/README.md
CHANGED
package/package.json
CHANGED
|
@@ -66,6 +66,10 @@ export function cleanupOldArtifacts(artifactsRoot: string, options: ArtifactClea
|
|
|
66
66
|
const cutoff = nowMs() - maxAgeMs;
|
|
67
67
|
let didCleanup = false;
|
|
68
68
|
try {
|
|
69
|
+
// FIX: Use { withFileTypes: true } to get Dirent objects (with isDirectory/isFile
|
|
70
|
+
// info), avoiding the need for a separate statSync per entry just to check the
|
|
71
|
+
// type. We still need statSync for mtime, but only on entries that passed the
|
|
72
|
+
// marker-file and symlink filters.
|
|
69
73
|
const entries = fs.readdirSync(artifactsRoot, { withFileTypes: true });
|
|
70
74
|
for (const entry of entries) {
|
|
71
75
|
if (entry.name === markerFile) continue;
|
|
@@ -74,7 +78,8 @@ export function cleanupOldArtifacts(artifactsRoot: string, options: ArtifactClea
|
|
|
74
78
|
try {
|
|
75
79
|
const stat = fs.statSync(target);
|
|
76
80
|
if (stat.mtimeMs >= cutoff) continue;
|
|
77
|
-
|
|
81
|
+
// Use Dirent info instead of stat.isDirectory() to save a stat call
|
|
82
|
+
if (entry.isDirectory()) {
|
|
78
83
|
fs.rmSync(target, { recursive: true, force: true });
|
|
79
84
|
} else {
|
|
80
85
|
fs.unlinkSync(target);
|
package/src/state/locks.ts
CHANGED
|
@@ -40,6 +40,25 @@ function isLockStale(filePath: string, staleMs: number): boolean {
|
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
function isLockHolderAlive(filePath: string): boolean {
|
|
44
|
+
try {
|
|
45
|
+
const raw = fs.readFileSync(filePath, "utf-8");
|
|
46
|
+
const parsed = JSON.parse(raw) as { pid?: unknown };
|
|
47
|
+
const pid = typeof parsed.pid === "number" ? parsed.pid : undefined;
|
|
48
|
+
if (pid === undefined) return true; // Unknown holder — assume alive to be safe
|
|
49
|
+
try {
|
|
50
|
+
process.kill(pid, 0);
|
|
51
|
+
return true; // Signal 0 succeeded — process is alive
|
|
52
|
+
} catch (error) {
|
|
53
|
+
const code = (error as NodeJS.ErrnoException).code;
|
|
54
|
+
// EPERM: process exists but we don't have permission to signal it
|
|
55
|
+
return code === "EPERM";
|
|
56
|
+
}
|
|
57
|
+
} catch {
|
|
58
|
+
return true; // Can't read — assume alive to be safe
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
43
62
|
function writeLockFile(filePath: string): void {
|
|
44
63
|
const fd = fs.openSync(filePath, fs.constants.O_WRONLY | fs.constants.O_CREAT | fs.constants.O_EXCL, 0o644);
|
|
45
64
|
try {
|
|
@@ -62,11 +81,17 @@ function acquireLockWithRetry(filePath: string, staleMs: number): void {
|
|
|
62
81
|
if (Date.now() > deadline) {
|
|
63
82
|
throw new Error(`Run '${path.basename(filePath)}' is locked by another operation.`);
|
|
64
83
|
}
|
|
65
|
-
//
|
|
66
|
-
|
|
84
|
+
// FIX: Use both staleness AND PID liveness to decide if we can steal
|
|
85
|
+
// a lock. Previously only staleness was checked, so a process whose
|
|
86
|
+
// PID was recently reused by another process could have its lock
|
|
87
|
+
// stolen even while still active. Now: fresh+alive = fail, else = clear.
|
|
88
|
+
const isStale = isLockStale(filePath, staleMs);
|
|
89
|
+
const isHolderAlive = isLockHolderAlive(filePath);
|
|
90
|
+
if (!isStale && isHolderAlive) {
|
|
91
|
+
// Lock is fresh AND holder is alive — fail fast
|
|
67
92
|
throw new Error(`Run '${path.basename(filePath)}' is locked by another operation.`);
|
|
68
93
|
}
|
|
69
|
-
// Lock is stale
|
|
94
|
+
// Lock is stale OR holder is dead — safe to clear
|
|
70
95
|
try {
|
|
71
96
|
fs.rmSync(filePath, { force: true });
|
|
72
97
|
} catch { /* race — let loop retry */ }
|
|
@@ -118,14 +143,19 @@ async function acquireLockWithRetryAsync(filePath: string, staleMs: number): Pro
|
|
|
118
143
|
* Uses the same O_EXCL atomic create strategy as run locks.
|
|
119
144
|
*/
|
|
120
145
|
export function withFileLockSync<T>(filePath: string, fn: () => T, options: RunLockOptions = {}): T {
|
|
146
|
+
// FIX: Use a separate .lock sidecar so the lock file doesn't collide with
|
|
147
|
+
// the file being protected. Previously withFileLockSync used the file path
|
|
148
|
+
// itself as the lock, which meant any operation on the same file (read,
|
|
149
|
+
// append, or even the lock acquisition itself) would race with the lock.
|
|
150
|
+
const lockFile = `${filePath}.lock`;
|
|
121
151
|
const staleMs = options.staleMs ?? DEFAULT_STALE_MS;
|
|
122
|
-
fs.mkdirSync(path.dirname(
|
|
123
|
-
acquireLockWithRetry(
|
|
152
|
+
fs.mkdirSync(path.dirname(lockFile), { recursive: true });
|
|
153
|
+
acquireLockWithRetry(lockFile, staleMs);
|
|
124
154
|
try {
|
|
125
155
|
return fn();
|
|
126
156
|
} finally {
|
|
127
157
|
try {
|
|
128
|
-
fs.rmSync(
|
|
158
|
+
fs.rmSync(lockFile, { force: true });
|
|
129
159
|
} catch {
|
|
130
160
|
// Best-effort lock cleanup.
|
|
131
161
|
}
|
package/src/state/mailbox.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { redactSecrets } from "../utils/redaction.ts";
|
|
|
6
6
|
import { logInternalError } from "../utils/internal-error.ts";
|
|
7
7
|
import { atomicWriteFile } from "./atomic-write.ts";
|
|
8
8
|
import { withEventLogLockSync } from "./event-log.ts";
|
|
9
|
+
import { withFileLockSync } from "./locks.ts";
|
|
9
10
|
import { DEFAULT_MAILBOX } from "../config/defaults.ts";
|
|
10
11
|
|
|
11
12
|
export type MailboxDirection = "inbox" | "outbox";
|
|
@@ -419,29 +420,34 @@ export function updateMailboxMessageReply(manifest: TeamRunManifest, originalMes
|
|
|
419
420
|
|
|
420
421
|
for (const { filePath, direction } of filesToSearch) {
|
|
421
422
|
if (!fs.existsSync(filePath)) continue;
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
const
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
msg
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
423
|
+
// FIX: Wrap read-modify-write in withFileLockSync to prevent concurrent
|
|
424
|
+
// updates from clobbering each other (each reply rewrites the whole file).
|
|
425
|
+
const found = withFileLockSync(filePath, () => {
|
|
426
|
+
const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
|
|
427
|
+
let localFound = false;
|
|
428
|
+
const updatedLines: string[] = [];
|
|
429
|
+
for (const line of lines) {
|
|
430
|
+
try {
|
|
431
|
+
const parsed = JSON.parse(line) as unknown;
|
|
432
|
+
const msg = parseMailboxMessage(parsed, direction);
|
|
433
|
+
if (msg && msg.id === originalMessageId) {
|
|
434
|
+
msg.repliedAt = new Date().toISOString();
|
|
435
|
+
msg.replyContent = replyContent;
|
|
436
|
+
updatedLines.push(JSON.stringify(redactSecrets(msg)));
|
|
437
|
+
localFound = true;
|
|
438
|
+
} else {
|
|
439
|
+
updatedLines.push(line);
|
|
440
|
+
}
|
|
441
|
+
} catch {
|
|
435
442
|
updatedLines.push(line);
|
|
436
443
|
}
|
|
437
|
-
} catch {
|
|
438
|
-
updatedLines.push(line);
|
|
439
444
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
return;
|
|
444
|
-
}
|
|
445
|
+
if (localFound) {
|
|
446
|
+
atomicWriteFile(filePath, `${updatedLines.join("\n")}\n`);
|
|
447
|
+
}
|
|
448
|
+
return localFound;
|
|
449
|
+
});
|
|
450
|
+
if (found) return;
|
|
445
451
|
}
|
|
446
452
|
// Not finding the original is non-fatal; the reply is still delivered.
|
|
447
453
|
}
|
|
@@ -464,26 +470,31 @@ export function validateMailbox(manifest: TeamRunManifest, options: { repair?: b
|
|
|
464
470
|
for (const direction of ["inbox", "outbox"] as const) {
|
|
465
471
|
if (options.signal?.aborted) break;
|
|
466
472
|
const filePath = mailboxFile(manifest, direction);
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
const
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
const
|
|
476
|
-
if (!
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
473
|
+
// FIX: Wrap read + optional repair in withFileLockSync so concurrent appends
|
|
474
|
+
// don't race with the read-modify-write. Mailbox files are capped at 10MB
|
|
475
|
+
// (MAILBOX_ARCHIVE_THRESHOLD_BYTES), so the per-call memory is bounded.
|
|
476
|
+
withFileLockSync(filePath, () => {
|
|
477
|
+
const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
|
|
478
|
+
const validLines: string[] = [];
|
|
479
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
480
|
+
if (options.signal?.aborted) break;
|
|
481
|
+
const line = lines[i];
|
|
482
|
+
if (!line) continue;
|
|
483
|
+
try {
|
|
484
|
+
const parsed = JSON.parse(line) as unknown;
|
|
485
|
+
const message = parseMailboxMessage(parsed, direction);
|
|
486
|
+
if (!message) throw new Error("invalid message schema");
|
|
487
|
+
validLines.push(JSON.stringify(redactSecrets(message)));
|
|
488
|
+
} catch (error) {
|
|
489
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
490
|
+
issues.push({ level: "error", path: filePath, message });
|
|
491
|
+
}
|
|
481
492
|
}
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
}
|
|
493
|
+
if (options.repair && validLines.length !== lines.length) {
|
|
494
|
+
atomicWriteFile(filePath, `${validLines.join("\n")}${validLines.length ? "\n" : ""}`);
|
|
495
|
+
repaired.push(filePath);
|
|
496
|
+
}
|
|
497
|
+
});
|
|
487
498
|
}
|
|
488
499
|
const delivery = readDeliveryState(manifest);
|
|
489
500
|
const allMessages = readMailbox(manifest);
|
package/src/state/state-store.ts
CHANGED
|
@@ -324,18 +324,39 @@ export function loadRunManifestById(cwd: string, runId: string): { manifest: Tea
|
|
|
324
324
|
}
|
|
325
325
|
}
|
|
326
326
|
|
|
327
|
-
|
|
327
|
+
// FIX: Re-stat and re-read inside a single synchronous block to close the
|
|
328
|
+
// TOCTOU window. We use a sentinel-based re-read: if mtime/size changed
|
|
329
|
+
// between the initial stat and the read, re-read until stable. With file
|
|
330
|
+
// sizes typically small (<5MB), the extra cost is negligible. Note: this
|
|
331
|
+
// doesn't fully prevent torn writes — callers needing strict consistency
|
|
332
|
+
// should use withRunLock() around the whole load+modify+save sequence.
|
|
333
|
+
let attempts = 0;
|
|
334
|
+
let manifest: TeamRunManifest | undefined;
|
|
335
|
+
let tasks: TeamTaskState[] | undefined;
|
|
336
|
+
while (attempts < 3) {
|
|
337
|
+
const freshStat = fs.statSync(manifestPath);
|
|
338
|
+
manifest = readJsonFile<TeamRunManifest>(manifestPath);
|
|
339
|
+
const freshTasksStat = fs.existsSync(tasksPath) ? fs.statSync(tasksPath) : undefined;
|
|
340
|
+
tasks = readJsonFile<TeamTaskState[]>(tasksPath) ?? [];
|
|
341
|
+
// If size/mtime didn't change between stat and read, we're consistent.
|
|
342
|
+
if (freshStat.mtimeMs === manifestStat.mtimeMs && freshStat.size === manifestStat.size
|
|
343
|
+
&& (!freshTasksStat || (freshTasksStat.mtimeMs === tasksStat?.mtimeMs && freshTasksStat.size === tasksStat?.size))) {
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
attempts += 1;
|
|
347
|
+
manifestStat = freshStat;
|
|
348
|
+
tasksStat = freshTasksStat;
|
|
349
|
+
}
|
|
328
350
|
if (!manifest || !validateRunManifestPaths(cwd, runId, manifest, stateRoot, tasksPath)) return undefined;
|
|
329
|
-
const tasks = readJsonFile<TeamTaskState[]>(tasksPath) ?? [];
|
|
330
351
|
setManifestCache(stateRoot, {
|
|
331
352
|
manifest,
|
|
332
|
-
tasks,
|
|
353
|
+
tasks: tasks ?? [],
|
|
333
354
|
manifestMtimeMs: manifestStat.mtimeMs,
|
|
334
355
|
manifestSize: manifestStat.size,
|
|
335
356
|
tasksMtimeMs,
|
|
336
357
|
tasksSize: tasksStat?.size ?? 0,
|
|
337
358
|
});
|
|
338
|
-
return { manifest, tasks };
|
|
359
|
+
return { manifest, tasks: tasks ?? [] };
|
|
339
360
|
}
|
|
340
361
|
|
|
341
362
|
export async function loadRunManifestByIdAsync(cwd: string, runId: string): Promise<{ manifest: TeamRunManifest; tasks: TeamTaskState[] } | undefined> {
|