pi-crew 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.5.8] — Final 5 Low-Severity Issue Fixes (2026-06-01)
4
+
5
+ ### Phase 5 (Final): Race Conditions + Edge Cases
6
+
7
+ - **Issue #12: `acquireLockWithRetry` race** (Low) — `src/state/locks.ts`: added `isLockHolderAlive()` check. Now uses BOTH staleness AND PID liveness: fresh + alive holder = fail, else = safe to clear. Prevents stealing a lock from a still-running process whose PID was recently reused.
8
+
9
+ - **Issue #13: `loadRunManifestById` TOCTOU** (Low) — `src/state/state-store.ts`: retry-on-stat-mismatch approach. Re-stat and re-read in a loop (up to 3 attempts) until size/mtime are stable across stat and read. Catches torn writes without depending on `withFileLockSync`.
10
+
11
+ - **Issue #14: `cleanupOldArtifacts` N stat calls** (Low) — `src/state/artifact-store.ts`: use `Dirent.isDirectory()` from `readdirSync({ withFileTypes: true })` to avoid `statSync` for type info. `statSync` now only for mtime.
12
+
13
+ - **Issue #15: `validateMailbox` concurrent access** (Low) — `src/state/mailbox.ts`: wrap read + optional repair in `withFileLockSync`.
14
+
15
+ - **Issue #16: `updateMailboxMessageReply` concurrent rewrite** (Low) — `src/state/mailbox.ts`: wrap read-modify-write in `withFileLockSync`.
16
+
17
+ ### Bug fix in `withFileLockSync`
18
+
19
+ - `src/state/locks.ts`: use separate `.lock` sidecar instead of the file path itself. Previously `withFileLockSync(path)` used `path` as the lock file, colliding with append/read operations on the same path.
20
+
21
+ ### Tests
22
+
23
+ - 2282 tests pass / 0 failures (`npm test`).
24
+
3
25
  ## [0.5.7] — 11 Issue Fixes Across 5 Phases (2026-06-01)
4
26
 
5
27
  ### Phase 1: Schema/Type Fixes
package/README.md CHANGED
@@ -9,7 +9,7 @@ npm: pi-crew
9
9
  repo: https://github.com/baphuongna/pi-crew
10
10
  ```
11
11
 
12
- **v0.5.7**: See [CHANGELOG.md](CHANGELOG.md).
12
+ **v0.5.8**: See [CHANGELOG.md](CHANGELOG.md).
13
13
 
14
14
  ### Security highlights (v0.5.5)
15
15
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-crew",
3
- "version": "0.5.7",
3
+ "version": "0.5.8",
4
4
  "description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
5
5
  "author": "baphuongna",
6
6
  "license": "MIT",
@@ -66,6 +66,10 @@ export function cleanupOldArtifacts(artifactsRoot: string, options: ArtifactClea
66
66
  const cutoff = nowMs() - maxAgeMs;
67
67
  let didCleanup = false;
68
68
  try {
69
+ // FIX: Use { withFileTypes: true } to get Dirent objects (with isDirectory/isFile
70
+ // info), avoiding the need for a separate statSync per entry just to check the
71
+ // type. We still need statSync for mtime, but only on entries that passed the
72
+ // marker-file and symlink filters.
69
73
  const entries = fs.readdirSync(artifactsRoot, { withFileTypes: true });
70
74
  for (const entry of entries) {
71
75
  if (entry.name === markerFile) continue;
@@ -74,7 +78,8 @@ export function cleanupOldArtifacts(artifactsRoot: string, options: ArtifactClea
74
78
  try {
75
79
  const stat = fs.statSync(target);
76
80
  if (stat.mtimeMs >= cutoff) continue;
77
- if (stat.isDirectory()) {
81
+ // Use Dirent info instead of stat.isDirectory() to save a stat call
82
+ if (entry.isDirectory()) {
78
83
  fs.rmSync(target, { recursive: true, force: true });
79
84
  } else {
80
85
  fs.unlinkSync(target);
@@ -40,6 +40,25 @@ function isLockStale(filePath: string, staleMs: number): boolean {
40
40
  }
41
41
  }
42
42
 
43
+ function isLockHolderAlive(filePath: string): boolean {
44
+ try {
45
+ const raw = fs.readFileSync(filePath, "utf-8");
46
+ const parsed = JSON.parse(raw) as { pid?: unknown };
47
+ const pid = typeof parsed.pid === "number" ? parsed.pid : undefined;
48
+ if (pid === undefined) return true; // Unknown holder — assume alive to be safe
49
+ try {
50
+ process.kill(pid, 0);
51
+ return true; // Signal 0 succeeded — process is alive
52
+ } catch (error) {
53
+ const code = (error as NodeJS.ErrnoException).code;
54
+ // EPERM: process exists but we don't have permission to signal it
55
+ return code === "EPERM";
56
+ }
57
+ } catch {
58
+ return true; // Can't read — assume alive to be safe
59
+ }
60
+ }
61
+
43
62
  function writeLockFile(filePath: string): void {
44
63
  const fd = fs.openSync(filePath, fs.constants.O_WRONLY | fs.constants.O_CREAT | fs.constants.O_EXCL, 0o644);
45
64
  try {
@@ -62,11 +81,17 @@ function acquireLockWithRetry(filePath: string, staleMs: number): void {
62
81
  if (Date.now() > deadline) {
63
82
  throw new Error(`Run '${path.basename(filePath)}' is locked by another operation.`);
64
83
  }
65
- // If lock is not stale, fail fast (sync should not wait for active locks)
66
- if (!isLockStale(filePath, staleMs)) {
84
+ // FIX: Use both staleness AND PID liveness to decide if we can steal
85
+ // a lock. Previously only staleness was checked, so a process whose
86
+ // PID was recently reused by another process could have its lock
87
+ // stolen even while still active. Now: fresh+alive = fail, else = clear.
88
+ const isStale = isLockStale(filePath, staleMs);
89
+ const isHolderAlive = isLockHolderAlive(filePath);
90
+ if (!isStale && isHolderAlive) {
91
+ // Lock is fresh AND holder is alive — fail fast
67
92
  throw new Error(`Run '${path.basename(filePath)}' is locked by another operation.`);
68
93
  }
69
- // Lock is stale try to clear it, but don't bail on rmSync error let loop retry
94
+ // Lock is stale OR holder is deadsafe to clear
70
95
  try {
71
96
  fs.rmSync(filePath, { force: true });
72
97
  } catch { /* race — let loop retry */ }
@@ -118,14 +143,19 @@ async function acquireLockWithRetryAsync(filePath: string, staleMs: number): Pro
118
143
  * Uses the same O_EXCL atomic create strategy as run locks.
119
144
  */
120
145
  export function withFileLockSync<T>(filePath: string, fn: () => T, options: RunLockOptions = {}): T {
146
+ // FIX: Use a separate .lock sidecar so the lock file doesn't collide with
147
+ // the file being protected. Previously withFileLockSync used the file path
148
+ // itself as the lock, which meant any operation on the same file (read,
149
+ // append, or even the lock acquisition itself) would race with the lock.
150
+ const lockFile = `${filePath}.lock`;
121
151
  const staleMs = options.staleMs ?? DEFAULT_STALE_MS;
122
- fs.mkdirSync(path.dirname(filePath), { recursive: true });
123
- acquireLockWithRetry(filePath, staleMs);
152
+ fs.mkdirSync(path.dirname(lockFile), { recursive: true });
153
+ acquireLockWithRetry(lockFile, staleMs);
124
154
  try {
125
155
  return fn();
126
156
  } finally {
127
157
  try {
128
- fs.rmSync(filePath, { force: true });
158
+ fs.rmSync(lockFile, { force: true });
129
159
  } catch {
130
160
  // Best-effort lock cleanup.
131
161
  }
@@ -6,6 +6,7 @@ import { redactSecrets } from "../utils/redaction.ts";
6
6
  import { logInternalError } from "../utils/internal-error.ts";
7
7
  import { atomicWriteFile } from "./atomic-write.ts";
8
8
  import { withEventLogLockSync } from "./event-log.ts";
9
+ import { withFileLockSync } from "./locks.ts";
9
10
  import { DEFAULT_MAILBOX } from "../config/defaults.ts";
10
11
 
11
12
  export type MailboxDirection = "inbox" | "outbox";
@@ -419,29 +420,34 @@ export function updateMailboxMessageReply(manifest: TeamRunManifest, originalMes
419
420
 
420
421
  for (const { filePath, direction } of filesToSearch) {
421
422
  if (!fs.existsSync(filePath)) continue;
422
- const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
423
- let found = false;
424
- const updatedLines: string[] = [];
425
- for (const line of lines) {
426
- try {
427
- const parsed = JSON.parse(line) as unknown;
428
- const msg = parseMailboxMessage(parsed, direction);
429
- if (msg && msg.id === originalMessageId) {
430
- msg.repliedAt = new Date().toISOString();
431
- msg.replyContent = replyContent;
432
- updatedLines.push(JSON.stringify(redactSecrets(msg)));
433
- found = true;
434
- } else {
423
+ // FIX: Wrap read-modify-write in withFileLockSync to prevent concurrent
424
+ // updates from clobbering each other (each reply rewrites the whole file).
425
+ const found = withFileLockSync(filePath, () => {
426
+ const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
427
+ let localFound = false;
428
+ const updatedLines: string[] = [];
429
+ for (const line of lines) {
430
+ try {
431
+ const parsed = JSON.parse(line) as unknown;
432
+ const msg = parseMailboxMessage(parsed, direction);
433
+ if (msg && msg.id === originalMessageId) {
434
+ msg.repliedAt = new Date().toISOString();
435
+ msg.replyContent = replyContent;
436
+ updatedLines.push(JSON.stringify(redactSecrets(msg)));
437
+ localFound = true;
438
+ } else {
439
+ updatedLines.push(line);
440
+ }
441
+ } catch {
435
442
  updatedLines.push(line);
436
443
  }
437
- } catch {
438
- updatedLines.push(line);
439
444
  }
440
- }
441
- if (found) {
442
- atomicWriteFile(filePath, `${updatedLines.join("\n")}\n`);
443
- return;
444
- }
445
+ if (localFound) {
446
+ atomicWriteFile(filePath, `${updatedLines.join("\n")}\n`);
447
+ }
448
+ return localFound;
449
+ });
450
+ if (found) return;
445
451
  }
446
452
  // Not finding the original is non-fatal; the reply is still delivered.
447
453
  }
@@ -464,26 +470,31 @@ export function validateMailbox(manifest: TeamRunManifest, options: { repair?: b
464
470
  for (const direction of ["inbox", "outbox"] as const) {
465
471
  if (options.signal?.aborted) break;
466
472
  const filePath = mailboxFile(manifest, direction);
467
- const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
468
- const validLines: string[] = [];
469
- for (let i = 0; i < lines.length; i += 1) {
470
- if (options.signal?.aborted) break;
471
- const line = lines[i];
472
- if (!line) continue;
473
- try {
474
- const parsed = JSON.parse(line) as unknown;
475
- const message = parseMailboxMessage(parsed, direction);
476
- if (!message) throw new Error("invalid message schema");
477
- validLines.push(JSON.stringify(redactSecrets(message)));
478
- } catch (error) {
479
- const message = error instanceof Error ? error.message : String(error);
480
- issues.push({ level: "error", path: filePath, message });
473
+ // FIX: Wrap read + optional repair in withFileLockSync so concurrent appends
474
+ // don't race with the read-modify-write. Mailbox files are capped at 10MB
475
+ // (MAILBOX_ARCHIVE_THRESHOLD_BYTES), so the per-call memory is bounded.
476
+ withFileLockSync(filePath, () => {
477
+ const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
478
+ const validLines: string[] = [];
479
+ for (let i = 0; i < lines.length; i += 1) {
480
+ if (options.signal?.aborted) break;
481
+ const line = lines[i];
482
+ if (!line) continue;
483
+ try {
484
+ const parsed = JSON.parse(line) as unknown;
485
+ const message = parseMailboxMessage(parsed, direction);
486
+ if (!message) throw new Error("invalid message schema");
487
+ validLines.push(JSON.stringify(redactSecrets(message)));
488
+ } catch (error) {
489
+ const message = error instanceof Error ? error.message : String(error);
490
+ issues.push({ level: "error", path: filePath, message });
491
+ }
481
492
  }
482
- }
483
- if (options.repair && validLines.length !== lines.length) {
484
- atomicWriteFile(filePath, `${validLines.join("\n")}${validLines.length ? "\n" : ""}`);
485
- repaired.push(filePath);
486
- }
493
+ if (options.repair && validLines.length !== lines.length) {
494
+ atomicWriteFile(filePath, `${validLines.join("\n")}${validLines.length ? "\n" : ""}`);
495
+ repaired.push(filePath);
496
+ }
497
+ });
487
498
  }
488
499
  const delivery = readDeliveryState(manifest);
489
500
  const allMessages = readMailbox(manifest);
@@ -324,18 +324,39 @@ export function loadRunManifestById(cwd: string, runId: string): { manifest: Tea
324
324
  }
325
325
  }
326
326
 
327
- const manifest = readJsonFile<TeamRunManifest>(manifestPath);
327
+ // FIX: Re-stat and re-read inside a single synchronous block to close the
328
+ // TOCTOU window. We use a sentinel-based re-read: if mtime/size changed
329
+ // between the initial stat and the read, re-read until stable. With file
330
+ // sizes typically small (<5MB), the extra cost is negligible. Note: this
331
+ // doesn't fully prevent torn writes — callers needing strict consistency
332
+ // should use withRunLock() around the whole load+modify+save sequence.
333
+ let attempts = 0;
334
+ let manifest: TeamRunManifest | undefined;
335
+ let tasks: TeamTaskState[] | undefined;
336
+ while (attempts < 3) {
337
+ const freshStat = fs.statSync(manifestPath);
338
+ manifest = readJsonFile<TeamRunManifest>(manifestPath);
339
+ const freshTasksStat = fs.existsSync(tasksPath) ? fs.statSync(tasksPath) : undefined;
340
+ tasks = readJsonFile<TeamTaskState[]>(tasksPath) ?? [];
341
+ // If size/mtime didn't change between stat and read, we're consistent.
342
+ if (freshStat.mtimeMs === manifestStat.mtimeMs && freshStat.size === manifestStat.size
343
+ && (!freshTasksStat || (freshTasksStat.mtimeMs === tasksStat?.mtimeMs && freshTasksStat.size === tasksStat?.size))) {
344
+ break;
345
+ }
346
+ attempts += 1;
347
+ manifestStat = freshStat;
348
+ tasksStat = freshTasksStat;
349
+ }
328
350
  if (!manifest || !validateRunManifestPaths(cwd, runId, manifest, stateRoot, tasksPath)) return undefined;
329
- const tasks = readJsonFile<TeamTaskState[]>(tasksPath) ?? [];
330
351
  setManifestCache(stateRoot, {
331
352
  manifest,
332
- tasks,
353
+ tasks: tasks ?? [],
333
354
  manifestMtimeMs: manifestStat.mtimeMs,
334
355
  manifestSize: manifestStat.size,
335
356
  tasksMtimeMs,
336
357
  tasksSize: tasksStat?.size ?? 0,
337
358
  });
338
- return { manifest, tasks };
359
+ return { manifest, tasks: tasks ?? [] };
339
360
  }
340
361
 
341
362
  export async function loadRunManifestByIdAsync(cwd: string, runId: string): Promise<{ manifest: TeamRunManifest; tasks: TeamTaskState[] } | undefined> {