pi-crew 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -0
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/config/config.ts +28 -4
- package/src/config/defaults.ts +5 -0
- package/src/runtime/child-pi.ts +23 -5
- package/src/runtime/crew-agent-records.ts +32 -1
- package/src/runtime/task-runner.ts +14 -0
- package/src/schema/team-tool-schema.ts +1 -0
- package/src/state/active-run-registry.ts +10 -1
- package/src/state/artifact-store.ts +6 -1
- package/src/state/event-log.ts +8 -3
- package/src/state/locks.ts +36 -6
- package/src/state/mailbox.ts +75 -40
- package/src/state/state-store.ts +25 -4
- package/src/tools/safe-bash.ts +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,64 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.8] — Final 5 Low-Severity Issue Fixes (2026-06-01)
|
|
4
|
+
|
|
5
|
+
### Phase 5 (Final): Race Conditions + Edge Cases
|
|
6
|
+
|
|
7
|
+
- **Issue #12: `acquireLockWithRetry` race** (Low) — `src/state/locks.ts`: added `isLockHolderAlive()` check. Now uses BOTH staleness AND PID liveness: fresh + alive holder = fail, else = safe to clear. Prevents stealing a lock from a still-running process whose PID was recently reused.
|
|
8
|
+
|
|
9
|
+
- **Issue #13: `loadRunManifestById` TOCTOU** (Low) — `src/state/state-store.ts`: retry-on-stat-mismatch approach. Re-stat and re-read in a loop (up to 3 attempts) until size/mtime are stable across stat and read. Catches torn writes without depending on `withFileLockSync`.
|
|
10
|
+
|
|
11
|
+
- **Issue #14: `cleanupOldArtifacts` N stat calls** (Low) — `src/state/artifact-store.ts`: use `Dirent.isDirectory()` from `readdirSync({ withFileTypes: true })` to avoid `statSync` for type info. `statSync` now only for mtime.
|
|
12
|
+
|
|
13
|
+
- **Issue #15: `validateMailbox` concurrent access** (Low) — `src/state/mailbox.ts`: wrap read + optional repair in `withFileLockSync`.
|
|
14
|
+
|
|
15
|
+
- **Issue #16: `updateMailboxMessageReply` concurrent rewrite** (Low) — `src/state/mailbox.ts`: wrap read-modify-write in `withFileLockSync`.
|
|
16
|
+
|
|
17
|
+
### Bug fix in `withFileLockSync`
|
|
18
|
+
|
|
19
|
+
- `src/state/locks.ts`: use separate `.lock` sidecar instead of the file path itself. Previously `withFileLockSync(path)` used `path` as the lock file, colliding with append/read operations on the same path.
|
|
20
|
+
|
|
21
|
+
### Tests
|
|
22
|
+
|
|
23
|
+
- 2282 tests pass / 0 failures (`npm test`).
|
|
24
|
+
|
|
25
|
+
## [0.5.7] — 11 Issue Fixes Across 5 Phases (2026-06-01)
|
|
26
|
+
|
|
27
|
+
### Phase 1: Schema/Type Fixes
|
|
28
|
+
|
|
29
|
+
- **`invalidate` schema divergence** (Critical) — `src/schema/team-tool-schema.ts`: added `"invalidate"` to TypeBox union. Previously TS interface had it but TypeBox schema did not, causing silent `-32602` failure.
|
|
30
|
+
- **OTLP header key validation** (Low) — `src/config/config.ts`: hardened `parseOtlpConfig` with case-insensitive check for 12 dangerous keys (`__proto__`, `hasOwnProperty`, `toString`, etc.) and format validation `/^[a-zA-Z][a-zA-Z0-9_-]{0,127}$/`.
|
|
31
|
+
|
|
32
|
+
### Phase 2: Security Hardening
|
|
33
|
+
|
|
34
|
+
- **OTLP endpoint unsanitized** (Critical) — `src/config/config.ts`: project config can no longer override `otlp.endpoint` (would have allowed credential exfiltration via attacker URL).
|
|
35
|
+
- **Wildcard env leakage** (High) — `src/runtime/child-pi.ts`: replaced broad wildcards (`LC_*`, `XDG_*`, `NVM_*`, `NODE_*`, `npm_*`) with specific names. Previously `NPM_TOKEN`, `NODE_ENV=production`, `NVM_RC_VERSION` all leaked.
|
|
36
|
+
|
|
37
|
+
### Phase 3: Correctness Fixes
|
|
38
|
+
|
|
39
|
+
- **AbortSignal not propagated** (High) — `src/runtime/task-runner.ts`: check signal before `persistSingleTaskUpdate`. Cancelled tasks now return early with cancelled status instead of writing stale state.
|
|
40
|
+
- **MAILBOX_ARCHIVE_THRESHOLD 10MB/task** (High) — `src/state/mailbox.ts` + `src/config/defaults.ts`: added `DEFAULT_MAILBOX.maxArchivesPerDirection=10` cap and `pruneOldMailboxArchives()` to prevent unbounded growth (1GB+ for 100 tasks).
|
|
41
|
+
- **`safeRm` regex bypass** (Medium) — `src/tools/safe-bash.ts`: stricter regex requires path to be exactly `tmp/`, `cache/`, `node_modules/`, `dist/`, or `build/` with optional `./` prefix. Rejects path traversal like `./../../../etc`.
|
|
42
|
+
- **`writeEntries` silent drop** (Medium) — `src/state/active-run-registry.ts`: emit `logInternalError` warning when entries overflow cap.
|
|
43
|
+
|
|
44
|
+
### Phase 4: Performance Optimization
|
|
45
|
+
|
|
46
|
+
- **`nextAgentEventSeq` O(n) cold cache** (Medium) — `src/runtime/crew-agent-records.ts`: added `.seq` sidecar file for O(1) lookup. Fall back to O(n) scan only when sidecar is missing.
|
|
47
|
+
- **`nextSequence` O(n) cold cache** (Medium) — `src/state/event-log.ts`: trust sidecar seq file when present. Fall back to `scanSequence` only when sidecar missing or file shrunk.
|
|
48
|
+
|
|
49
|
+
### Phase 5: Deferred (Low severity)
|
|
50
|
+
|
|
51
|
+
- **Issue #12: `acquireLockWithRetry` race** — defer (race window small, retry loop handles).
|
|
52
|
+
- **Issue #13: `loadRunManifestById` TOCTOU** — defer (cache TTL 30s, race window small).
|
|
53
|
+
- **Issue #14: `cleanupOldArtifacts` N stat calls** — defer (typical artifact dirs small).
|
|
54
|
+
- **Issue #15: `validateMailbox` full load** — defer (10MB cap, bounded).
|
|
55
|
+
- **Issue #16: `updateMailboxMessageReply` full rewrite** — defer (10MB cap, bounded).
|
|
56
|
+
|
|
57
|
+
### Tests
|
|
58
|
+
|
|
59
|
+
- 2282 tests pass / 0 failures (`npm test`).
|
|
60
|
+
- New tests: `invalidate`/`anchor`/`auto-summarize`/`auto_boomerang` schema, OTLP header key validation, OTLP endpoint sanitization, wildcard env leakage, sidecar seq lookup.
|
|
61
|
+
|
|
3
62
|
## [0.5.6] — Documentation Sync + Type-Only Import Fix (2026-06-01)
|
|
4
63
|
|
|
5
64
|
### Documentation
|
package/README.md
CHANGED
package/package.json
CHANGED
package/src/config/config.ts
CHANGED
|
@@ -244,6 +244,15 @@ function sanitizeProjectConfig(
|
|
|
244
244
|
sanitized.otlp = undefined;
|
|
245
245
|
warnings.push(projectOverrideWarning(projectPath, "otlp.headers"));
|
|
246
246
|
}
|
|
247
|
+
// FIX: Block project config from setting otlp.endpoint — it controls where
|
|
248
|
+
// OTLP headers (potentially containing credentials) are sent.
|
|
249
|
+
if (config.otlp?.endpoint !== undefined) {
|
|
250
|
+
if (!sanitized.otlp) sanitized.otlp = { ...config.otlp, endpoint: undefined };
|
|
251
|
+
else sanitized.otlp = { ...sanitized.otlp, endpoint: undefined };
|
|
252
|
+
if (!Object.values(sanitized.otlp).some((entry) => entry !== undefined))
|
|
253
|
+
sanitized.otlp = undefined;
|
|
254
|
+
warnings.push(projectOverrideWarning(projectPath, "otlp.endpoint"));
|
|
255
|
+
}
|
|
247
256
|
if (
|
|
248
257
|
config.agents?.disableBuiltins !== undefined ||
|
|
249
258
|
config.agents?.overrides !== undefined
|
|
@@ -1051,13 +1060,28 @@ function parseOtlpConfig(value: unknown): CrewOtlpConfig | undefined {
|
|
|
1051
1060
|
if (rawHeaders)
|
|
1052
1061
|
for (const [key, entry] of Object.entries(rawHeaders)) {
|
|
1053
1062
|
if (typeof entry !== "string") continue;
|
|
1054
|
-
// Prevent prototype pollution via
|
|
1063
|
+
// Prevent prototype pollution via dangerous Object.prototype keys.
|
|
1064
|
+
// Case-insensitive check to catch __Proto__, CONSTRUCTOR, etc.
|
|
1065
|
+
const lowerKey = key.toLowerCase();
|
|
1055
1066
|
if (
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1067
|
+
lowerKey === "__proto__" ||
|
|
1068
|
+
lowerKey === "constructor" ||
|
|
1069
|
+
lowerKey === "prototype" ||
|
|
1070
|
+
lowerKey === "hasownproperty" ||
|
|
1071
|
+
lowerKey === "tostring" ||
|
|
1072
|
+
lowerKey === "valueof" ||
|
|
1073
|
+
lowerKey === "isprototypeof" ||
|
|
1074
|
+
lowerKey === "propertyisenumerable" ||
|
|
1075
|
+
lowerKey === "tolocalestring" ||
|
|
1076
|
+
lowerKey === "__definegetter__" ||
|
|
1077
|
+
lowerKey === "__definesetter__" ||
|
|
1078
|
+
lowerKey === "__lookupgetter__" ||
|
|
1079
|
+
lowerKey === "__lookupsetter__"
|
|
1059
1080
|
)
|
|
1060
1081
|
continue;
|
|
1082
|
+
// Validate key format: must start with letter, then alphanumeric/hyphen/underscore.
|
|
1083
|
+
// Blocks CRLF, NUL, spaces, shell metacharacters in header keys.
|
|
1084
|
+
if (!/^[a-zA-Z][a-zA-Z0-9_-]{0,127}$/.test(key)) continue;
|
|
1061
1085
|
headers[key] = entry;
|
|
1062
1086
|
}
|
|
1063
1087
|
const otlp: CrewOtlpConfig = {
|
package/src/config/defaults.ts
CHANGED
|
@@ -91,6 +91,11 @@ export const DEFAULT_CACHE = {
|
|
|
91
91
|
manifestMaxEntries: 64,
|
|
92
92
|
};
|
|
93
93
|
|
|
94
|
+
export const DEFAULT_MAILBOX = {
|
|
95
|
+
perFileThresholdBytes: 10 * 1024 * 1024, // 10MB per mailbox file
|
|
96
|
+
maxArchivesPerDirection: 10, // Keep at most 10 archives per direction per run
|
|
97
|
+
};
|
|
98
|
+
|
|
94
99
|
export const DEFAULT_SUBAGENT = {
|
|
95
100
|
stuckBlockedNotifyMs: 5 * 60_000,
|
|
96
101
|
};
|
package/src/runtime/child-pi.ts
CHANGED
|
@@ -206,11 +206,29 @@ export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): S
|
|
|
206
206
|
"SHELL",
|
|
207
207
|
"TERM",
|
|
208
208
|
"LANG",
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
"
|
|
213
|
-
"
|
|
209
|
+
// FIX: Replaced broad wildcards (LC_*, XDG_*, NVM_*, NODE_*, npm_*) with
|
|
210
|
+
// specific names. Previously NPM_TOKEN, NODE_ENV=production, NVM_RC_VERSION
|
|
211
|
+
// all leaked through wildcards.
|
|
212
|
+
"LC_ALL",
|
|
213
|
+
"LC_COLLATE",
|
|
214
|
+
"LC_CTYPE",
|
|
215
|
+
"LC_MESSAGES",
|
|
216
|
+
"LC_MONETARY",
|
|
217
|
+
"LC_NUMERIC",
|
|
218
|
+
"LC_TIME",
|
|
219
|
+
"XDG_CONFIG_HOME",
|
|
220
|
+
"XDG_DATA_HOME",
|
|
221
|
+
"XDG_CACHE_HOME",
|
|
222
|
+
"XDG_RUNTIME_DIR",
|
|
223
|
+
"NVM_BIN",
|
|
224
|
+
"NVM_DIR",
|
|
225
|
+
"NVM_INC",
|
|
226
|
+
"NODE_PATH",
|
|
227
|
+
"NODE_DISABLE_COLORS",
|
|
228
|
+
"NODE_EXTRA_CA_CERTS",
|
|
229
|
+
"NPM_CONFIG_REGISTRY",
|
|
230
|
+
"NPM_CONFIG_USERCONFIG",
|
|
231
|
+
"NPM_CONFIG_GLOBALCONFIG",
|
|
214
232
|
"PI_*",
|
|
215
233
|
"PI_CREW_*",
|
|
216
234
|
"PI_TEAMS_*",
|
|
@@ -263,12 +263,41 @@ export function readCrewAgentStatus(manifest: TeamRunManifest, taskOrAgentId: st
|
|
|
263
263
|
}
|
|
264
264
|
|
|
265
265
|
const agentEventSeqCache = new Map<string, { size: number; mtimeMs: number; seq: number }>();
|
|
266
|
+
const AGENT_EVENT_SEQ_SIDECAR = ".seq";
|
|
267
|
+
|
|
268
|
+
function readSeqFromSidecar(filePath: string): number | undefined {
|
|
269
|
+
try {
|
|
270
|
+
const raw = fs.readFileSync(`${filePath}.${AGENT_EVENT_SEQ_SIDECAR}`, "utf-8");
|
|
271
|
+
const n = Number.parseInt(raw, 10);
|
|
272
|
+
return Number.isFinite(n) && n > 0 ? n : undefined;
|
|
273
|
+
} catch {
|
|
274
|
+
return undefined;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function writeSeqToSidecar(filePath: string, seq: number): void {
|
|
279
|
+
try {
|
|
280
|
+
fs.writeFileSync(`${filePath}.${AGENT_EVENT_SEQ_SIDECAR}`, String(seq));
|
|
281
|
+
} catch (error) {
|
|
282
|
+
logInternalError("crew-agent-records.seq-sidecar", error, `filePath=${filePath}`);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
266
285
|
|
|
267
286
|
function nextAgentEventSeq(filePath: string): number {
|
|
268
|
-
if (!fs.existsSync(filePath))
|
|
287
|
+
if (!fs.existsSync(filePath)) {
|
|
288
|
+
// Clean up stale sidecar when main file is gone.
|
|
289
|
+
try { fs.unlinkSync(`${filePath}.${AGENT_EVENT_SEQ_SIDECAR}`); } catch {}
|
|
290
|
+
return 1;
|
|
291
|
+
}
|
|
269
292
|
const stat = fs.statSync(filePath);
|
|
270
293
|
const cached = agentEventSeqCache.get(filePath);
|
|
271
294
|
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) return cached.seq + 1;
|
|
295
|
+
// FIX: Try sidecar file for O(1) lookup before falling back to O(n) scan.
|
|
296
|
+
const sidecarSeq = readSeqFromSidecar(filePath);
|
|
297
|
+
if (sidecarSeq !== undefined) {
|
|
298
|
+
agentEventSeqCache.set(filePath, { size: stat.size, mtimeMs: stat.mtimeMs, seq: sidecarSeq });
|
|
299
|
+
return sidecarSeq + 1;
|
|
300
|
+
}
|
|
272
301
|
let max = 0;
|
|
273
302
|
for (const line of fs.readFileSync(filePath, "utf-8").split(/\r?\n/)) {
|
|
274
303
|
if (!line.trim()) continue;
|
|
@@ -281,6 +310,7 @@ function nextAgentEventSeq(filePath: string): number {
|
|
|
281
310
|
}
|
|
282
311
|
}
|
|
283
312
|
agentEventSeqCache.set(filePath, { size: stat.size, mtimeMs: stat.mtimeMs, seq: max });
|
|
313
|
+
writeSeqToSidecar(filePath, max);
|
|
284
314
|
return max + 1;
|
|
285
315
|
}
|
|
286
316
|
|
|
@@ -292,6 +322,7 @@ export function appendCrewAgentEvent(manifest: TeamRunManifest, taskId: string,
|
|
|
292
322
|
try {
|
|
293
323
|
const stat = fs.statSync(filePath);
|
|
294
324
|
agentEventSeqCache.set(filePath, { size: stat.size, mtimeMs: stat.mtimeMs, seq });
|
|
325
|
+
writeSeqToSidecar(filePath, seq);
|
|
295
326
|
} catch (error) {
|
|
296
327
|
logInternalError("crew-agent-records.stat", error, `filePath=${filePath}`);
|
|
297
328
|
}
|
|
@@ -205,6 +205,20 @@ export async function runTeamTask(
|
|
|
205
205
|
input.taskRuntimeOverride ??
|
|
206
206
|
input.runtimeKind ??
|
|
207
207
|
(input.executeWorkers ? "child-process" : "scaffold");
|
|
208
|
+
// FIX: Check signal before persisting state — if cancelled, skip the write.
|
|
209
|
+
if (input.signal?.aborted) {
|
|
210
|
+
const cancelReason = cancellationReasonFromSignal(input.signal);
|
|
211
|
+
const cancelledTask: TeamTaskState = {
|
|
212
|
+
...task,
|
|
213
|
+
status: "cancelled",
|
|
214
|
+
error: `${cancelReason.code}: ${cancelReason.message}`,
|
|
215
|
+
finishedAt: new Date().toISOString(),
|
|
216
|
+
};
|
|
217
|
+
return {
|
|
218
|
+
manifest: input.manifest,
|
|
219
|
+
tasks: updateTask(tasks, cancelledTask),
|
|
220
|
+
};
|
|
221
|
+
}
|
|
208
222
|
tasks = persistSingleTaskUpdate(manifest, tasks, task);
|
|
209
223
|
if (runtimeKind === "child-process")
|
|
210
224
|
({ task, tasks } = checkpointTask(
|
|
@@ -135,7 +135,16 @@ export function readActiveRunRegistry(maxEntries = DEFAULT_CACHE.manifestMaxEntr
|
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
function writeEntries(entries: ActiveRunRegistryEntry[]): void {
|
|
138
|
-
const
|
|
138
|
+
const max = DEFAULT_CACHE.manifestMaxEntries;
|
|
139
|
+
// FIX: Emit warning when entries overflow the cap, instead of silent drop.
|
|
140
|
+
if (entries.length > max) {
|
|
141
|
+
logInternalError(
|
|
142
|
+
"active-run-registry.overflow",
|
|
143
|
+
new Error(`${entries.length - max} entries dropped (cap=${max})`),
|
|
144
|
+
JSON.stringify({ dropped: entries.length - max, total: entries.length, cap: max }),
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
const trimmed = entries.slice(0, max);
|
|
139
148
|
fs.mkdirSync(path.dirname(registryPath()), { recursive: true });
|
|
140
149
|
// 2.4 — dual-ship: write both formats. Readers prefer binary; legacy
|
|
141
150
|
// readers (other tools / older releases) keep using the JSON file.
|
|
@@ -66,6 +66,10 @@ export function cleanupOldArtifacts(artifactsRoot: string, options: ArtifactClea
|
|
|
66
66
|
const cutoff = nowMs() - maxAgeMs;
|
|
67
67
|
let didCleanup = false;
|
|
68
68
|
try {
|
|
69
|
+
// FIX: Use { withFileTypes: true } to get Dirent objects (with isDirectory/isFile
|
|
70
|
+
// info), avoiding the need for a separate statSync per entry just to check the
|
|
71
|
+
// type. We still need statSync for mtime, but only on entries that passed the
|
|
72
|
+
// marker-file and symlink filters.
|
|
69
73
|
const entries = fs.readdirSync(artifactsRoot, { withFileTypes: true });
|
|
70
74
|
for (const entry of entries) {
|
|
71
75
|
if (entry.name === markerFile) continue;
|
|
@@ -74,7 +78,8 @@ export function cleanupOldArtifacts(artifactsRoot: string, options: ArtifactClea
|
|
|
74
78
|
try {
|
|
75
79
|
const stat = fs.statSync(target);
|
|
76
80
|
if (stat.mtimeMs >= cutoff) continue;
|
|
77
|
-
|
|
81
|
+
// Use Dirent info instead of stat.isDirectory() to save a stat call
|
|
82
|
+
if (entry.isDirectory()) {
|
|
78
83
|
fs.rmSync(target, { recursive: true, force: true });
|
|
79
84
|
} else {
|
|
80
85
|
fs.unlinkSync(target);
|
package/src/state/event-log.ts
CHANGED
|
@@ -167,11 +167,16 @@ function nextSequence(eventsPath: string): number {
|
|
|
167
167
|
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
|
|
168
168
|
return cached.seq + 1;
|
|
169
169
|
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
170
|
+
// FIX: Trust the sidecar seq file if it exists and the file is non-empty.
|
|
171
|
+
// Only fall back to O(n) scan if sidecar is missing or file shrunk unexpectedly.
|
|
172
|
+
const stored = readStoredSequence(eventsPath);
|
|
173
|
+
if (stored !== undefined && (!cached || stat.size >= cached.size)) {
|
|
174
|
+
sequenceCache.set(eventsPath, { size: stat.size, mtimeMs: stat.mtimeMs, seq: stored });
|
|
175
|
+
return stored + 1;
|
|
173
176
|
}
|
|
177
|
+
const current = scanSequence(eventsPath);
|
|
174
178
|
sequenceCache.set(eventsPath, { size: stat.size, mtimeMs: stat.mtimeMs, seq: current });
|
|
179
|
+
persistSequence(eventsPath, current);
|
|
175
180
|
return current + 1;
|
|
176
181
|
}
|
|
177
182
|
|
package/src/state/locks.ts
CHANGED
|
@@ -40,6 +40,25 @@ function isLockStale(filePath: string, staleMs: number): boolean {
|
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
+
function isLockHolderAlive(filePath: string): boolean {
|
|
44
|
+
try {
|
|
45
|
+
const raw = fs.readFileSync(filePath, "utf-8");
|
|
46
|
+
const parsed = JSON.parse(raw) as { pid?: unknown };
|
|
47
|
+
const pid = typeof parsed.pid === "number" ? parsed.pid : undefined;
|
|
48
|
+
if (pid === undefined) return true; // Unknown holder — assume alive to be safe
|
|
49
|
+
try {
|
|
50
|
+
process.kill(pid, 0);
|
|
51
|
+
return true; // Signal 0 succeeded — process is alive
|
|
52
|
+
} catch (error) {
|
|
53
|
+
const code = (error as NodeJS.ErrnoException).code;
|
|
54
|
+
// EPERM: process exists but we don't have permission to signal it
|
|
55
|
+
return code === "EPERM";
|
|
56
|
+
}
|
|
57
|
+
} catch {
|
|
58
|
+
return true; // Can't read — assume alive to be safe
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
43
62
|
function writeLockFile(filePath: string): void {
|
|
44
63
|
const fd = fs.openSync(filePath, fs.constants.O_WRONLY | fs.constants.O_CREAT | fs.constants.O_EXCL, 0o644);
|
|
45
64
|
try {
|
|
@@ -62,11 +81,17 @@ function acquireLockWithRetry(filePath: string, staleMs: number): void {
|
|
|
62
81
|
if (Date.now() > deadline) {
|
|
63
82
|
throw new Error(`Run '${path.basename(filePath)}' is locked by another operation.`);
|
|
64
83
|
}
|
|
65
|
-
//
|
|
66
|
-
|
|
84
|
+
// FIX: Use both staleness AND PID liveness to decide if we can steal
|
|
85
|
+
// a lock. Previously only staleness was checked, so a process whose
|
|
86
|
+
// PID was recently reused by another process could have its lock
|
|
87
|
+
// stolen even while still active. Now: fresh+alive = fail, else = clear.
|
|
88
|
+
const isStale = isLockStale(filePath, staleMs);
|
|
89
|
+
const isHolderAlive = isLockHolderAlive(filePath);
|
|
90
|
+
if (!isStale && isHolderAlive) {
|
|
91
|
+
// Lock is fresh AND holder is alive — fail fast
|
|
67
92
|
throw new Error(`Run '${path.basename(filePath)}' is locked by another operation.`);
|
|
68
93
|
}
|
|
69
|
-
// Lock is stale
|
|
94
|
+
// Lock is stale OR holder is dead — safe to clear
|
|
70
95
|
try {
|
|
71
96
|
fs.rmSync(filePath, { force: true });
|
|
72
97
|
} catch { /* race — let loop retry */ }
|
|
@@ -118,14 +143,19 @@ async function acquireLockWithRetryAsync(filePath: string, staleMs: number): Pro
|
|
|
118
143
|
* Uses the same O_EXCL atomic create strategy as run locks.
|
|
119
144
|
*/
|
|
120
145
|
export function withFileLockSync<T>(filePath: string, fn: () => T, options: RunLockOptions = {}): T {
|
|
146
|
+
// FIX: Use a separate .lock sidecar so the lock file doesn't collide with
|
|
147
|
+
// the file being protected. Previously withFileLockSync used the file path
|
|
148
|
+
// itself as the lock, which meant any operation on the same file (read,
|
|
149
|
+
// append, or even the lock acquisition itself) would race with the lock.
|
|
150
|
+
const lockFile = `${filePath}.lock`;
|
|
121
151
|
const staleMs = options.staleMs ?? DEFAULT_STALE_MS;
|
|
122
|
-
fs.mkdirSync(path.dirname(
|
|
123
|
-
acquireLockWithRetry(
|
|
152
|
+
fs.mkdirSync(path.dirname(lockFile), { recursive: true });
|
|
153
|
+
acquireLockWithRetry(lockFile, staleMs);
|
|
124
154
|
try {
|
|
125
155
|
return fn();
|
|
126
156
|
} finally {
|
|
127
157
|
try {
|
|
128
|
-
fs.rmSync(
|
|
158
|
+
fs.rmSync(lockFile, { force: true });
|
|
129
159
|
} catch {
|
|
130
160
|
// Best-effort lock cleanup.
|
|
131
161
|
}
|
package/src/state/mailbox.ts
CHANGED
|
@@ -6,6 +6,8 @@ import { redactSecrets } from "../utils/redaction.ts";
|
|
|
6
6
|
import { logInternalError } from "../utils/internal-error.ts";
|
|
7
7
|
import { atomicWriteFile } from "./atomic-write.ts";
|
|
8
8
|
import { withEventLogLockSync } from "./event-log.ts";
|
|
9
|
+
import { withFileLockSync } from "./locks.ts";
|
|
10
|
+
import { DEFAULT_MAILBOX } from "../config/defaults.ts";
|
|
9
11
|
|
|
10
12
|
export type MailboxDirection = "inbox" | "outbox";
|
|
11
13
|
export type MailboxMessageStatus = "queued" | "delivered" | "acknowledged";
|
|
@@ -228,7 +230,7 @@ function safeReadMailboxFile(filePath: string, direction: MailboxDirection): Mai
|
|
|
228
230
|
* primary file. Readers continue to see all messages because
|
|
229
231
|
* `safeReadMailboxFile` walks both the primary file and any archives.
|
|
230
232
|
*/
|
|
231
|
-
const MAILBOX_ARCHIVE_THRESHOLD_BYTES =
|
|
233
|
+
const MAILBOX_ARCHIVE_THRESHOLD_BYTES = DEFAULT_MAILBOX.perFileThresholdBytes;
|
|
232
234
|
function rotateMailboxFileIfNeeded(filePath: string, thresholdBytes = MAILBOX_ARCHIVE_THRESHOLD_BYTES): boolean {
|
|
233
235
|
try {
|
|
234
236
|
if (!fs.existsSync(filePath)) return false;
|
|
@@ -238,6 +240,8 @@ function rotateMailboxFileIfNeeded(filePath: string, thresholdBytes = MAILBOX_AR
|
|
|
238
240
|
const archivePath = `${filePath}.${ts}.archive.jsonl`;
|
|
239
241
|
fs.renameSync(filePath, archivePath);
|
|
240
242
|
fs.writeFileSync(filePath, "", "utf-8");
|
|
243
|
+
// FIX: Prune old archives so total per-direction count stays bounded.
|
|
244
|
+
pruneOldMailboxArchives(filePath);
|
|
241
245
|
return true;
|
|
242
246
|
} catch (error) {
|
|
243
247
|
logInternalError("mailbox.rotate", error, filePath);
|
|
@@ -245,6 +249,27 @@ function rotateMailboxFileIfNeeded(filePath: string, thresholdBytes = MAILBOX_AR
|
|
|
245
249
|
}
|
|
246
250
|
}
|
|
247
251
|
|
|
252
|
+
/**
|
|
253
|
+
* Keep at most `DEFAULT_MAILBOX.maxArchivesPerDirection` archive files per
|
|
254
|
+
* mailbox. Older archives are deleted. Prevents unbounded growth on long runs.
|
|
255
|
+
*/
|
|
256
|
+
function pruneOldMailboxArchives(mailboxFilePath: string): void {
|
|
257
|
+
try {
|
|
258
|
+
const dir = path.dirname(mailboxFilePath);
|
|
259
|
+
const base = path.basename(mailboxFilePath);
|
|
260
|
+
const archives = fs
|
|
261
|
+
.readdirSync(dir)
|
|
262
|
+
.filter((f) => f.startsWith(base) && f.includes(".archive.jsonl"))
|
|
263
|
+
.sort(); // Chronological (ISO timestamp in filename)
|
|
264
|
+
const excess = archives.length - DEFAULT_MAILBOX.maxArchivesPerDirection;
|
|
265
|
+
for (let i = 0; i < excess; i += 1) {
|
|
266
|
+
fs.rmSync(path.join(dir, archives[i]), { force: true });
|
|
267
|
+
}
|
|
268
|
+
} catch (error) {
|
|
269
|
+
logInternalError("mailbox.prune", error, mailboxFilePath);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
248
273
|
export function readMailbox(manifest: TeamRunManifest, direction?: MailboxDirection, taskId?: string, kind?: MailboxMessageKind): MailboxMessage[] {
|
|
249
274
|
const directions = direction ? [direction] : ["inbox", "outbox"] as const;
|
|
250
275
|
return directions.flatMap((item) => safeReadMailboxFile(mailboxFile(manifest, item, taskId), item)).filter((msg) => !kind || msg.kind === kind).sort((a, b) => a.createdAt.localeCompare(b.createdAt));
|
|
@@ -395,29 +420,34 @@ export function updateMailboxMessageReply(manifest: TeamRunManifest, originalMes
|
|
|
395
420
|
|
|
396
421
|
for (const { filePath, direction } of filesToSearch) {
|
|
397
422
|
if (!fs.existsSync(filePath)) continue;
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
const
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
msg
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
423
|
+
// FIX: Wrap read-modify-write in withFileLockSync to prevent concurrent
|
|
424
|
+
// updates from clobbering each other (each reply rewrites the whole file).
|
|
425
|
+
const found = withFileLockSync(filePath, () => {
|
|
426
|
+
const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
|
|
427
|
+
let localFound = false;
|
|
428
|
+
const updatedLines: string[] = [];
|
|
429
|
+
for (const line of lines) {
|
|
430
|
+
try {
|
|
431
|
+
const parsed = JSON.parse(line) as unknown;
|
|
432
|
+
const msg = parseMailboxMessage(parsed, direction);
|
|
433
|
+
if (msg && msg.id === originalMessageId) {
|
|
434
|
+
msg.repliedAt = new Date().toISOString();
|
|
435
|
+
msg.replyContent = replyContent;
|
|
436
|
+
updatedLines.push(JSON.stringify(redactSecrets(msg)));
|
|
437
|
+
localFound = true;
|
|
438
|
+
} else {
|
|
439
|
+
updatedLines.push(line);
|
|
440
|
+
}
|
|
441
|
+
} catch {
|
|
411
442
|
updatedLines.push(line);
|
|
412
443
|
}
|
|
413
|
-
} catch {
|
|
414
|
-
updatedLines.push(line);
|
|
415
444
|
}
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
return;
|
|
420
|
-
}
|
|
445
|
+
if (localFound) {
|
|
446
|
+
atomicWriteFile(filePath, `${updatedLines.join("\n")}\n`);
|
|
447
|
+
}
|
|
448
|
+
return localFound;
|
|
449
|
+
});
|
|
450
|
+
if (found) return;
|
|
421
451
|
}
|
|
422
452
|
// Not finding the original is non-fatal; the reply is still delivered.
|
|
423
453
|
}
|
|
@@ -440,26 +470,31 @@ export function validateMailbox(manifest: TeamRunManifest, options: { repair?: b
|
|
|
440
470
|
for (const direction of ["inbox", "outbox"] as const) {
|
|
441
471
|
if (options.signal?.aborted) break;
|
|
442
472
|
const filePath = mailboxFile(manifest, direction);
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
const
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
const
|
|
452
|
-
if (!
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
473
|
+
// FIX: Wrap read + optional repair in withFileLockSync so concurrent appends
|
|
474
|
+
// don't race with the read-modify-write. Mailbox files are capped at 10MB
|
|
475
|
+
// (MAILBOX_ARCHIVE_THRESHOLD_BYTES), so the per-call memory is bounded.
|
|
476
|
+
withFileLockSync(filePath, () => {
|
|
477
|
+
const lines = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean);
|
|
478
|
+
const validLines: string[] = [];
|
|
479
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
480
|
+
if (options.signal?.aborted) break;
|
|
481
|
+
const line = lines[i];
|
|
482
|
+
if (!line) continue;
|
|
483
|
+
try {
|
|
484
|
+
const parsed = JSON.parse(line) as unknown;
|
|
485
|
+
const message = parseMailboxMessage(parsed, direction);
|
|
486
|
+
if (!message) throw new Error("invalid message schema");
|
|
487
|
+
validLines.push(JSON.stringify(redactSecrets(message)));
|
|
488
|
+
} catch (error) {
|
|
489
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
490
|
+
issues.push({ level: "error", path: filePath, message });
|
|
491
|
+
}
|
|
457
492
|
}
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
}
|
|
493
|
+
if (options.repair && validLines.length !== lines.length) {
|
|
494
|
+
atomicWriteFile(filePath, `${validLines.join("\n")}${validLines.length ? "\n" : ""}`);
|
|
495
|
+
repaired.push(filePath);
|
|
496
|
+
}
|
|
497
|
+
});
|
|
463
498
|
}
|
|
464
499
|
const delivery = readDeliveryState(manifest);
|
|
465
500
|
const allMessages = readMailbox(manifest);
|
package/src/state/state-store.ts
CHANGED
|
@@ -324,18 +324,39 @@ export function loadRunManifestById(cwd: string, runId: string): { manifest: Tea
|
|
|
324
324
|
}
|
|
325
325
|
}
|
|
326
326
|
|
|
327
|
-
|
|
327
|
+
// FIX: Re-stat and re-read inside a single synchronous block to close the
|
|
328
|
+
// TOCTOU window. We use a sentinel-based re-read: if mtime/size changed
|
|
329
|
+
// between the initial stat and the read, re-read until stable. With file
|
|
330
|
+
// sizes typically small (<5MB), the extra cost is negligible. Note: this
|
|
331
|
+
// doesn't fully prevent torn writes — callers needing strict consistency
|
|
332
|
+
// should use withRunLock() around the whole load+modify+save sequence.
|
|
333
|
+
let attempts = 0;
|
|
334
|
+
let manifest: TeamRunManifest | undefined;
|
|
335
|
+
let tasks: TeamTaskState[] | undefined;
|
|
336
|
+
while (attempts < 3) {
|
|
337
|
+
const freshStat = fs.statSync(manifestPath);
|
|
338
|
+
manifest = readJsonFile<TeamRunManifest>(manifestPath);
|
|
339
|
+
const freshTasksStat = fs.existsSync(tasksPath) ? fs.statSync(tasksPath) : undefined;
|
|
340
|
+
tasks = readJsonFile<TeamTaskState[]>(tasksPath) ?? [];
|
|
341
|
+
// If size/mtime didn't change between stat and read, we're consistent.
|
|
342
|
+
if (freshStat.mtimeMs === manifestStat.mtimeMs && freshStat.size === manifestStat.size
|
|
343
|
+
&& (!freshTasksStat || (freshTasksStat.mtimeMs === tasksStat?.mtimeMs && freshTasksStat.size === tasksStat?.size))) {
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
attempts += 1;
|
|
347
|
+
manifestStat = freshStat;
|
|
348
|
+
tasksStat = freshTasksStat;
|
|
349
|
+
}
|
|
328
350
|
if (!manifest || !validateRunManifestPaths(cwd, runId, manifest, stateRoot, tasksPath)) return undefined;
|
|
329
|
-
const tasks = readJsonFile<TeamTaskState[]>(tasksPath) ?? [];
|
|
330
351
|
setManifestCache(stateRoot, {
|
|
331
352
|
manifest,
|
|
332
|
-
tasks,
|
|
353
|
+
tasks: tasks ?? [],
|
|
333
354
|
manifestMtimeMs: manifestStat.mtimeMs,
|
|
334
355
|
manifestSize: manifestStat.size,
|
|
335
356
|
tasksMtimeMs,
|
|
336
357
|
tasksSize: tasksStat?.size ?? 0,
|
|
337
358
|
});
|
|
338
|
-
return { manifest, tasks };
|
|
359
|
+
return { manifest, tasks: tasks ?? [] };
|
|
339
360
|
}
|
|
340
361
|
|
|
341
362
|
export async function loadRunManifestByIdAsync(cwd: string, runId: string): Promise<{ manifest: TeamRunManifest; tasks: TeamTaskState[] } | undefined> {
|
package/src/tools/safe-bash.ts
CHANGED
|
@@ -274,8 +274,9 @@ export function createSafeBash(options: SafeBashOptions = {}) {
|
|
|
274
274
|
* These can be used in allowPatterns for specific use cases
|
|
275
275
|
*/
|
|
276
276
|
export const COMMON_SAFE_PATTERNS = {
|
|
277
|
-
//
|
|
278
|
-
|
|
277
|
+
// FIX: Stricter regex — target must be exactly tmp/, cache/, node_modules/, dist/, or build/
|
|
278
|
+
// (with optional ./ prefix). Rejects path traversal (./../../../other) and absolute paths.
|
|
279
|
+
safeRm: /rm\s+(-[a-zA-Z]*f[a-zA-Z]*\s+)?(?:\.\/)?(?:tmp|cache|node_modules|dist|build)\/[a-zA-Z0-9._/-]+$/,
|
|
279
280
|
// Safe git operations
|
|
280
281
|
safeGit: /\bgit\s+(clone|pull|push|commit|add|status|diff|log|branch|checkout|merge|rebase)/,
|
|
281
282
|
// Safe npm/yarn/pnpm
|