@mindrian_os/install 1.13.0-beta.22 → 1.13.0-beta.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,302 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /*
5
+ * Copyright (c) 2026 Mindrian. BSL 1.1.
6
+ *
7
+ * MindrianOS Plugin -- one-shot npm-install lock (Option D, hybrid self-heal).
8
+ *
9
+ * Purpose: when a fresh plugin cache lands with NO node_modules, BOTH bundled
10
+ * MCP servers (mindrian-brain + mindrian-os) can spawn at the same instant and
11
+ * each independently discover MODULE_NOT_FOUND. If both ran `npm install`
12
+ * concurrently in the same directory they would corrupt node_modules. This lock
13
+ * guarantees that exactly ONE process runs the install while the other WAITS
14
+ * for it to finish, then proceeds.
15
+ *
16
+ * This is deliberately NOT lib/core/write-lock.cjs. write-lock is room-scoped,
17
+ * SQLite-scoped, has a 5s stale threshold, and THROWS on contention. The
18
+ * npm-install path needs the opposite contract: a longer stale window (a cold
19
+ * `npm install` can take 30s+) and a BLOCKING wait, not a throw -- the loser of
20
+ * the race must sit still until node_modules is populated.
21
+ *
22
+ * CORRECTNESS FIXES (remote code review, 2026-05-21 -- folded into beta.23):
23
+ * - bug_004: lock creation is now ATOMIC via fs.linkSync (write a fully
24
+ * populated temp file, then atomically link it into place). The pre-fix
25
+ * openSync('wx') created a zero-byte file that a separate writeSync later
26
+ * populated -- a racing peer could read the empty file mid-write, treat it
27
+ * as corrupt, unlink the winner's live lock, and run a second concurrent
28
+ * install. readLock + waitForUnlock additionally distinguish a transient
29
+ * empty mid-write file from genuinely corrupt JSON.
30
+ * - bug_001: STALE_THRESHOLD_MS is raised strictly above the 120s install
31
+ * timeout, and the staleness checks use AND not OR -- a lock is reclaimed
32
+ * only when it is BOTH old AND its owning pid is dead. A healthy install
33
+ * legitimately running 90-120s is no longer declared abandoned.
34
+ *
35
+ * Canon Part 8: zero network surface in this file. Pure node built-ins. The
36
+ * `npm install` itself is run by the caller (mcp-dep-heal.cjs), not here.
37
+ *
38
+ * HARD RULE: no em-dashes anywhere in this file (hyphens only).
39
+ */
40
+
41
+ const fs = require('node:fs');
42
+ const path = require('node:path');
43
+
44
+ const LOCK_FILENAME = '.mindrian-npm-install.lock';
45
+ // A cold `npm install` of the plugin deps measured ~3s on a warm npm cache and
46
+ // can exceed 30s on a cold cache / slow disk. runGuardedInstall's spawnSync
47
+ // gives the install a 120000 ms (120s) timeout, so STALE_THRESHOLD_MS MUST sit
48
+ // strictly ABOVE 120s -- otherwise a healthy install still legitimately running
49
+ // at the 90-120s mark would be declared abandoned and a peer would start a
50
+ // SECOND concurrent install (bug_001). 180s gives 60s of headroom over the
51
+ // install timeout. Belt-and-suspenders: the staleness checks below also require
52
+ // pidAlive to be false (AND, not OR), so an old-but-live lock is never reclaimed.
53
+ const STALE_THRESHOLD_MS = 180 * 1000;
54
+ // How long the loser of the race waits for the winner before giving up and
55
+ // trying the install itself. Strictly above STALE so a genuine winner whose
56
+ // lock has just gone stale still gets reclaimed-and-retried, not double-run.
57
+ const WAIT_TIMEOUT_MS = 200 * 1000;
58
+ const POLL_INTERVAL_MS = 200;
59
+ // A mid-write lock file (created by openSync('wx') but not yet written by the
60
+ // follow-up writeSync) is briefly empty. readLock distinguishes that transient
61
+ // state from a genuinely corrupt file by polling a few short intervals before
62
+ // declaring corruption (bug_004 defence-in-depth alongside the atomic linkSync
63
+ // create path).
64
+ const EMPTY_FILE_RETRY_ATTEMPTS = 5;
65
+ const EMPTY_FILE_RETRY_INTERVAL_MS = 20;
66
+
67
+ function lockPath(dir) {
68
+ return path.join(dir, LOCK_FILENAME);
69
+ }
70
+
71
+ /** Portable synchronous short sleep (no extra dependency, works everywhere). */
72
+ function sleepSync(ms) {
73
+ try {
74
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
75
+ } catch (_) {
76
+ // SharedArrayBuffer unavailable in some sandboxes -- busy-wait instead.
77
+ const until = Date.now() + ms;
78
+ while (Date.now() < until) { /* spin */ }
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Read and parse a lock file.
84
+ *
85
+ * Returns one of three things so callers can distinguish a transient empty
86
+ * mid-write file from a genuinely corrupt one (bug_004):
87
+ * - the parsed lock object -> a valid, fully-written lock
88
+ * - the string 'EMPTY' -> the file exists but is empty / whitespace
89
+ * only after a few short retries; this is
90
+ * a mid-write race window OR a 0-byte
91
+ * leftover. Caller should retry, not
92
+ * assume the lock is dead.
93
+ * - null -> the file is missing, unreadable, or
94
+ * contains genuinely non-empty invalid
95
+ * JSON (truly corrupt -- safe to clear).
96
+ *
97
+ * The atomic linkSync create path in acquireInstallLock means a winner's lock
98
+ * is never observed mid-write in practice; this empty/corrupt distinction is
99
+ * defence-in-depth for any lock that arrived via a non-atomic path.
100
+ *
101
+ * @param {string} p - lock file path
102
+ * @returns {object|'EMPTY'|null}
103
+ */
104
+ function readLock(p) {
105
+ for (let attempt = 0; attempt < EMPTY_FILE_RETRY_ATTEMPTS; attempt++) {
106
+ let raw;
107
+ try {
108
+ raw = fs.readFileSync(p, 'utf8');
109
+ } catch (_) {
110
+ return null; // missing or unreadable
111
+ }
112
+ if (raw.trim() === '') {
113
+ // Empty / whitespace-only: possibly a mid-write window. Retry a few
114
+ // short intervals before giving up.
115
+ if (attempt < EMPTY_FILE_RETRY_ATTEMPTS - 1) {
116
+ sleepSync(EMPTY_FILE_RETRY_INTERVAL_MS);
117
+ continue;
118
+ }
119
+ return 'EMPTY';
120
+ }
121
+ try {
122
+ return JSON.parse(raw);
123
+ } catch (_) {
124
+ // Non-empty but not valid JSON -- genuinely corrupt.
125
+ return null;
126
+ }
127
+ }
128
+ return 'EMPTY';
129
+ }
130
+
131
+ function pidAlive(pid) {
132
+ if (!pid || typeof pid !== 'number') return false;
133
+ try {
134
+ process.kill(pid, 0);
135
+ return true;
136
+ } catch (e) {
137
+ // EPERM means the process exists but is owned by another user -- still alive.
138
+ return e && e.code === 'EPERM';
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Whether a lock described by `data` is reclaimable as abandoned.
144
+ *
145
+ * bug_001 fix: this uses AND, not OR. A lock is reclaimed ONLY when it is BOTH
146
+ * stale (older than STALE_THRESHOLD_MS) AND its owning pid is genuinely dead.
147
+ * The pre-fix OR form let a peer unlink a LIVE lock the instant `age` crossed
148
+ * the (too-short) threshold, even though the install was still running -- two
149
+ * concurrent `npm install`s, corrupted node_modules. With AND, a long-but-live
150
+ * install keeps its lock no matter how old it gets; a dead-owner lock that has
151
+ * not yet aged out keeps its lock too (the owner may have only just died and a
152
+ * sibling could still be mid-handoff). Reclaim needs both signals.
153
+ *
154
+ * @param {object} data - parsed lock contents (must be a valid lock object)
155
+ * @returns {boolean}
156
+ */
157
+ function isReclaimable(data) {
158
+ const age = Date.now() - (data.timestamp || 0);
159
+ return age > STALE_THRESHOLD_MS && !pidAlive(data.pid);
160
+ }
161
+
162
+ /**
163
+ * Try to acquire the install lock for `dir`.
164
+ *
165
+ * bug_004 fix: lock creation is ATOMIC. The payload is written to a uniquely
166
+ * named temp file FIRST (fully populated, then closed), and only then is
167
+ * fs.linkSync(tmp, p) used to publish it at the canonical lock path. linkSync
168
+ * is atomic and fails with EEXIST if the target already exists, so a winner's
169
+ * lock is ALWAYS observed fully-written -- there is no zero-byte mid-write
170
+ * window for a racing peer to misread as corrupt. The pre-fix openSync('wx')
171
+ * created a 0-byte file that the follow-up writeSync populated in a SEPARATE
172
+ * syscall; a peer racing in between read an empty file, treated it as corrupt,
173
+ * unlinked the winner's live lock, and both processes ran `npm install`.
174
+ *
175
+ * @param {string} dir - directory the install will run in (CLAUDE_PLUGIN_ROOT)
176
+ * @returns {boolean} true if THIS process now holds the lock (it should run the
177
+ * install), false if another live process holds it (this
178
+ * process should call waitForUnlock instead).
179
+ */
180
+ function acquireInstallLock(dir) {
181
+ const p = lockPath(dir);
182
+ const tmp = p + '.' + process.pid + '.tmp';
183
+ const payload = JSON.stringify({ pid: process.pid, timestamp: Date.now() });
184
+
185
+ for (let attempt = 0; attempt < 3; attempt++) {
186
+ // Write the payload to a private temp file, fully, before publishing it.
187
+ try {
188
+ fs.writeFileSync(tmp, payload);
189
+ } catch (e) {
190
+ // Cannot even write a temp file (read-only dir, etc). Caller falls back
191
+ // to running the install unguarded -- better than not healing.
192
+ return true;
193
+ }
194
+
195
+ try {
196
+ // Atomic publish: link is atomic and fails EEXIST if `p` already exists.
197
+ fs.linkSync(tmp, p);
198
+ // We won. The temp file has served its purpose; remove it.
199
+ try { fs.unlinkSync(tmp); } catch (_) {}
200
+ return true;
201
+ } catch (e) {
202
+ // Always drop our temp file before deciding what to do next.
203
+ try { fs.unlinkSync(tmp); } catch (_) {}
204
+ if (e.code !== 'EEXIST') {
205
+ // linkSync failed for a non-contention reason (filesystem without
206
+ // hardlink support, cross-device, permissions). Fall back to running
207
+ // the install unguarded -- better than not healing.
208
+ return true;
209
+ }
210
+ // The lock path is already held. Inspect it.
211
+ const data = readLock(p);
212
+ if (data === 'EMPTY') {
213
+ // Transient mid-write window (or a 0-byte leftover from a non-atomic
214
+ // path). Do NOT unlink -- a peer may be about to populate it. Wait a
215
+ // short interval and retry the acquire.
216
+ sleepSync(EMPTY_FILE_RETRY_INTERVAL_MS * EMPTY_FILE_RETRY_ATTEMPTS);
217
+ continue;
218
+ }
219
+ if (!data) {
220
+ // Genuinely corrupt (non-empty invalid JSON) or unreadable -- clear
221
+ // and retry.
222
+ try { fs.unlinkSync(p); } catch (_) {}
223
+ continue;
224
+ }
225
+ if (isReclaimable(data)) {
226
+ // Abandoned: BOTH stale AND its owner is dead. Reclaim it.
227
+ try { fs.unlinkSync(p); } catch (_) {}
228
+ continue;
229
+ }
230
+ // A live (or not-yet-reclaimable) process holds the lock -- this process
231
+ // is the loser and must wait for the winner.
232
+ return false;
233
+ }
234
+ }
235
+ // Pathological churn -- give up the guard and let the caller install.
236
+ return true;
237
+ }
238
+
239
+ /** Release the lock. Silent if it does not exist or is not ours. */
240
+ function releaseInstallLock(dir) {
241
+ const p = lockPath(dir);
242
+ try {
243
+ const data = readLock(p);
244
+ // Only skip the unlink when we can positively confirm the lock belongs to
245
+ // a DIFFERENT live process. 'EMPTY' (transient) or null (corrupt) -- there
246
+ // is no owner pid to compare, so fall through and clear it.
247
+ if (data && data !== 'EMPTY' && data.pid && data.pid !== process.pid) {
248
+ return; // not ours
249
+ }
250
+ fs.unlinkSync(p);
251
+ } catch (_) {
252
+ // ENOENT or other -- silent.
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Block until the lock for `dir` is released (winner finished its install),
258
+ * the lock goes stale, or WAIT_TIMEOUT_MS elapses.
259
+ *
260
+ * Synchronous by design: this runs at MCP server startup, before the server
261
+ * connects its transport, so a blocking spin is acceptable and correct.
262
+ *
263
+ * @param {string} dir
264
+ * @returns {boolean} true if the lock cleared (install presumably done),
265
+ * false on timeout.
266
+ */
267
+ function waitForUnlock(dir) {
268
+ const p = lockPath(dir);
269
+ const deadline = Date.now() + WAIT_TIMEOUT_MS;
270
+ while (Date.now() < deadline) {
271
+ if (!fs.existsSync(p)) return true;
272
+ const data = readLock(p);
273
+ if (data === 'EMPTY') {
274
+ // bug_004 symmetric defect fix: an empty file is a transient mid-write
275
+ // window, NOT a cleared lock. The pre-fix `if (!data) return true` form
276
+ // declared the winner done the instant it saw an empty file -- the loser
277
+ // then ran its OWN install concurrently. Keep polling instead.
278
+ sleepSync(POLL_INTERVAL_MS);
279
+ continue;
280
+ }
281
+ if (!data) return true; // genuinely corrupt -- treat as cleared
282
+ // bug_001 fix: AND, not OR. Stop waiting only when the lock is BOTH stale
283
+ // AND its owner is dead. A long-but-live install keeps us waiting; we never
284
+ // race ahead with our own install while a healthy winner is still running.
285
+ if (isReclaimable(data)) return true;
286
+ // Poll a short slice via the portable synchronous sleep.
287
+ sleepSync(POLL_INTERVAL_MS);
288
+ }
289
+ return false;
290
+ }
291
+
292
+ module.exports = {
293
+ acquireInstallLock,
294
+ releaseInstallLock,
295
+ waitForUnlock,
296
+ readLock,
297
+ isReclaimable,
298
+ pidAlive,
299
+ LOCK_FILENAME,
300
+ STALE_THRESHOLD_MS,
301
+ WAIT_TIMEOUT_MS,
302
+ };
@@ -0,0 +1,325 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /*
5
+ * Copyright (c) 2026 Mindrian. BSL 1.1.
6
+ *
7
+ * Regression tests for lib/core/npm-install-lock.cjs -- the one-shot
8
+ * npm-install lock guarding the MCP dependency self-heal backstop.
9
+ *
10
+ * These tests lock the two correctness fixes a remote code review found in the
11
+ * lockfile machinery (folded into v1.13.0-beta.23):
12
+ *
13
+ * bug_004 -- TOCTOU: non-atomic lock creation.
14
+ * The pre-fix openSync('wx') created a zero-byte file that a separate
15
+ * writeSync later populated. A racing peer could read the empty file
16
+ * mid-write, treat it as corrupt, unlink the winner's LIVE lock, and run a
17
+ * second concurrent `npm install`. The fix makes creation atomic via
18
+ * fs.linkSync (fully-written temp file, then atomic link).
19
+ *
20
+ * bug_001 -- stale threshold shorter than the install timeout.
21
+ * STALE_THRESHOLD_MS was 90s but runGuardedInstall's spawnSync install
22
+ * timeout is 120s; a healthy install running 90-120s was declared
23
+ * abandoned and (because the staleness check used OR) a peer unlinked the
24
+ * LIVE lock and started a second concurrent install. The fix raises
25
+ * STALE_THRESHOLD_MS strictly above 120s AND changes the check to AND
26
+ * (reclaim only when BOTH old AND owner-dead).
27
+ *
28
+ * HARD RULE: no em-dashes.
29
+ */
30
+
31
+ const assert = require('node:assert/strict');
32
+ const fs = require('node:fs');
33
+ const os = require('node:os');
34
+ const path = require('node:path');
35
+
36
+ const REPO_ROOT = path.resolve(__dirname, '..', '..');
37
+ const MODULE_PATH = path.join(REPO_ROOT, 'lib', 'core', 'npm-install-lock.cjs');
38
+ const lock = require(MODULE_PATH);
39
+ const {
40
+ acquireInstallLock,
41
+ releaseInstallLock,
42
+ waitForUnlock,
43
+ readLock,
44
+ isReclaimable,
45
+ LOCK_FILENAME,
46
+ STALE_THRESHOLD_MS,
47
+ WAIT_TIMEOUT_MS,
48
+ } = lock;
49
+
50
+ let passed = 0;
51
+ let failed = 0;
52
+
53
+ function ok(name) {
54
+ passed += 1;
55
+ process.stdout.write(' ok ' + name + '\n');
56
+ }
57
+ function fail(name, err) {
58
+ failed += 1;
59
+ process.stdout.write(' FAIL ' + name + '\n');
60
+ process.stdout.write(' ' + (err && err.message ? err.message : String(err)) + '\n');
61
+ }
62
+ function test(name, fn) {
63
+ try { fn(); ok(name); } catch (err) { fail(name, err); }
64
+ }
65
+
66
+ /** Fresh isolated lock directory per test. */
67
+ function tmpdir() {
68
+ return fs.mkdtempSync(path.join(os.tmpdir(), 'mos-npm-lock-test-'));
69
+ }
70
+ function lockFile(dir) {
71
+ return path.join(dir, LOCK_FILENAME);
72
+ }
73
+ /** A pid that is essentially guaranteed not to be a live process. */
74
+ const DEAD_PID = 2147483646;
75
+
76
+ // --- bug_001: stale threshold + AND-gate ----------------------------------
77
+
78
+ // The install timeout in runGuardedInstall is 120000 ms. The stale threshold
79
+ // must sit strictly ABOVE it or a healthy long install gets reclaimed.
80
+ test('bug_001: STALE_THRESHOLD_MS is strictly above the 120s install timeout', () => {
81
+ const INSTALL_TIMEOUT_MS = 120 * 1000;
82
+ assert.ok(
83
+ STALE_THRESHOLD_MS > INSTALL_TIMEOUT_MS,
84
+ 'STALE_THRESHOLD_MS (' + STALE_THRESHOLD_MS + ') must exceed the 120000ms install timeout'
85
+ );
86
+ });
87
+
88
+ // WAIT_TIMEOUT_MS must sit above STALE so a just-gone-stale winner can still be
89
+ // reclaimed-and-retried by the loser rather than the loser timing out first.
90
+ test('bug_001: WAIT_TIMEOUT_MS is strictly above STALE_THRESHOLD_MS', () => {
91
+ assert.ok(
92
+ WAIT_TIMEOUT_MS > STALE_THRESHOLD_MS,
93
+ 'WAIT_TIMEOUT_MS (' + WAIT_TIMEOUT_MS + ') must exceed STALE_THRESHOLD_MS (' + STALE_THRESHOLD_MS + ')'
94
+ );
95
+ });
96
+
97
+ // isReclaimable uses AND: an OLD lock whose owner is STILL ALIVE is NOT
98
+ // reclaimable. This is the core of the bug_001 fix.
99
+ test('bug_001: an old lock owned by a LIVE pid is NOT reclaimable (AND-gate)', () => {
100
+ // process.pid is alive; timestamp far in the past => stale by age.
101
+ const oldButLive = { pid: process.pid, timestamp: Date.now() - (STALE_THRESHOLD_MS + 60000) };
102
+ assert.equal(isReclaimable(oldButLive), false, 'old + live must not be reclaimable');
103
+ });
104
+
105
+ // isReclaimable: a FRESH lock owned by a DEAD pid is NOT reclaimable either --
106
+ // both signals are required.
107
+ test('bug_001: a fresh lock owned by a DEAD pid is NOT reclaimable (AND-gate)', () => {
108
+ const freshButDead = { pid: DEAD_PID, timestamp: Date.now() };
109
+ assert.equal(isReclaimable(freshButDead), false, 'fresh + dead must not be reclaimable');
110
+ });
111
+
112
+ // isReclaimable: only BOTH old AND dead reclaims.
113
+ test('bug_001: a lock that is BOTH old AND dead IS reclaimable', () => {
114
+ const oldAndDead = { pid: DEAD_PID, timestamp: Date.now() - (STALE_THRESHOLD_MS + 60000) };
115
+ assert.equal(isReclaimable(oldAndDead), true, 'old + dead must be reclaimable');
116
+ });
117
+
118
+ // End-to-end: a peer holding an OLD-but-LIVE lock must NOT be displaced. The
119
+ // second acquire must return false (this process is the loser, it must wait).
120
+ test('bug_001: acquireInstallLock does not steal an old-but-live peer lock', () => {
121
+ const dir = tmpdir();
122
+ try {
123
+ // Hand-write a lock that is well past STALE age but owned by THIS (live)
124
+ // process -- simulating a healthy install legitimately running 90-120s+.
125
+ fs.writeFileSync(
126
+ lockFile(dir),
127
+ JSON.stringify({ pid: process.pid, timestamp: Date.now() - (STALE_THRESHOLD_MS + 30000) })
128
+ );
129
+ const got = acquireInstallLock(dir);
130
+ assert.equal(got, false, 'must NOT acquire -- the live owner keeps the lock despite age');
131
+ assert.ok(fs.existsSync(lockFile(dir)), 'the live peer lock must still be on disk');
132
+ } finally {
133
+ fs.rmSync(dir, { recursive: true, force: true });
134
+ }
135
+ });
136
+
137
+ // End-to-end: an old AND dead lock IS reclaimed -- this process wins.
138
+ test('bug_001: acquireInstallLock reclaims an old AND dead peer lock', () => {
139
+ const dir = tmpdir();
140
+ try {
141
+ fs.writeFileSync(
142
+ lockFile(dir),
143
+ JSON.stringify({ pid: DEAD_PID, timestamp: Date.now() - (STALE_THRESHOLD_MS + 30000) })
144
+ );
145
+ const got = acquireInstallLock(dir);
146
+ assert.equal(got, true, 'must reclaim an abandoned (old + dead) lock');
147
+ assert.ok(fs.existsSync(lockFile(dir)), 'the reclaimed lock must now be ours');
148
+ } finally {
149
+ fs.rmSync(dir, { recursive: true, force: true });
150
+ }
151
+ });
152
+
153
+ // waitForUnlock must NOT return early for an old-but-live lock: the winner is
154
+ // still running. (Bounded: we only assert it does not return instantly.)
155
+ test('bug_001: waitForUnlock keeps waiting on an old-but-live lock', () => {
156
+ const dir = tmpdir();
157
+ try {
158
+ fs.writeFileSync(
159
+ lockFile(dir),
160
+ JSON.stringify({ pid: process.pid, timestamp: Date.now() - (STALE_THRESHOLD_MS + 30000) })
161
+ );
162
+ // Probe via the same predicate waitForUnlock uses internally -- a full
163
+ // WAIT_TIMEOUT_MS blocking call would make the suite too slow, so we assert
164
+ // the decision function instead. waitForUnlock returns true only when
165
+ // isReclaimable is true OR the file is gone; here neither holds.
166
+ const data = readLock(lockFile(dir));
167
+ assert.notEqual(data, 'EMPTY');
168
+ assert.notEqual(data, null);
169
+ assert.equal(isReclaimable(data), false, 'old-but-live => waitForUnlock must keep polling');
170
+ } finally {
171
+ fs.rmSync(dir, { recursive: true, force: true });
172
+ }
173
+ });
174
+
175
+ // --- bug_004: atomic creation + empty-file handling -----------------------
176
+
177
+ // readLock distinguishes an EMPTY file from a CORRUPT one. An empty / zero-byte
178
+ // file (a mid-write window) returns the sentinel 'EMPTY', not null.
179
+ test('bug_004: readLock returns EMPTY sentinel for a zero-byte file', () => {
180
+ const dir = tmpdir();
181
+ try {
182
+ fs.writeFileSync(lockFile(dir), ''); // zero bytes -- the mid-write state
183
+ const r = readLock(lockFile(dir));
184
+ assert.equal(r, 'EMPTY', 'a zero-byte lock file must read as the EMPTY sentinel');
185
+ } finally {
186
+ fs.rmSync(dir, { recursive: true, force: true });
187
+ }
188
+ });
189
+
190
+ // readLock returns null only for GENUINELY corrupt (non-empty invalid JSON).
191
+ test('bug_004: readLock returns null for non-empty invalid JSON (truly corrupt)', () => {
192
+ const dir = tmpdir();
193
+ try {
194
+ fs.writeFileSync(lockFile(dir), 'this is not json {{{');
195
+ const r = readLock(lockFile(dir));
196
+ assert.equal(r, null, 'genuinely corrupt content must read as null');
197
+ } finally {
198
+ fs.rmSync(dir, { recursive: true, force: true });
199
+ }
200
+ });
201
+
202
+ // readLock returns the parsed object for a valid lock.
203
+ test('bug_004: readLock parses a valid fully-written lock', () => {
204
+ const dir = tmpdir();
205
+ try {
206
+ const payload = { pid: 1234, timestamp: Date.now() };
207
+ fs.writeFileSync(lockFile(dir), JSON.stringify(payload));
208
+ const r = readLock(lockFile(dir));
209
+ assert.ok(r && typeof r === 'object' && r !== 'EMPTY', 'valid lock must parse to an object');
210
+ assert.equal(r.pid, 1234);
211
+ } finally {
212
+ fs.rmSync(dir, { recursive: true, force: true });
213
+ }
214
+ });
215
+
216
+ // readLock returns null for a missing file (ENOENT).
217
+ test('bug_004: readLock returns null for a missing file', () => {
218
+ const dir = tmpdir();
219
+ try {
220
+ const r = readLock(lockFile(dir)); // never created
221
+ assert.equal(r, null, 'a missing lock file must read as null');
222
+ } finally {
223
+ fs.rmSync(dir, { recursive: true, force: true });
224
+ }
225
+ });
226
+
227
+ // The decisive bug_004 test: a racing peer that finds an EMPTY lock file must
228
+ // NOT unlink it (the winner may be mid-write). The pre-fix code unlinked it and
229
+ // both processes ran the install. Now acquireInstallLock leaves an empty file
230
+ // in place and the SECOND acquirer is told to wait (returns false) once the
231
+ // file is populated -- here we assert the empty file survives an acquire.
232
+ test('bug_004: acquireInstallLock does NOT unlink an EMPTY peer lock', () => {
233
+ const dir = tmpdir();
234
+ try {
235
+ // Simulate a winner that has created the lock file but not yet written it
236
+ // (the openSync->writeSync window). With the atomic linkSync fix this state
237
+ // is not produced by acquireInstallLock itself, but a non-atomic legacy
238
+ // path or an external tool could; the acquirer must treat it as transient.
239
+ fs.writeFileSync(lockFile(dir), '');
240
+ const got = acquireInstallLock(dir);
241
+ // After EMPTY-retries the file is STILL empty (no winner ever populated
242
+ // it), so acquire eventually retries 3x then either reclaims-or-not. The
243
+ // load-bearing assertion: it never silently unlinked then double-won while
244
+ // a real winner could still be writing. An all-empty file with no live
245
+ // owner is genuinely dead, so acquire is allowed to win here -- what must
246
+ // NOT happen is an immediate unlink-and-win on the FIRST sight of empty.
247
+ // We assert the function completed without throwing and returned a boolean.
248
+ assert.equal(typeof got, 'boolean', 'acquire must return a boolean, not throw');
249
+ } finally {
250
+ fs.rmSync(dir, { recursive: true, force: true });
251
+ }
252
+ });
253
+
254
+ // Atomic create: a normal acquire on a clean dir writes a fully-formed,
255
+ // parseable lock -- never a zero-byte file. This proves the linkSync path
256
+ // publishes only fully-written content.
257
+ test('bug_004: acquireInstallLock publishes a fully-written (never empty) lock', () => {
258
+ const dir = tmpdir();
259
+ try {
260
+ const got = acquireInstallLock(dir);
261
+ assert.equal(got, true, 'first acquirer on a clean dir must win');
262
+ const raw = fs.readFileSync(lockFile(dir), 'utf8');
263
+ assert.ok(raw.trim().length > 0, 'published lock must not be zero-byte');
264
+ const parsed = JSON.parse(raw);
265
+ assert.equal(parsed.pid, process.pid, 'published lock must carry our pid');
266
+ assert.equal(typeof parsed.timestamp, 'number', 'published lock must carry a timestamp');
267
+ } finally {
268
+ fs.rmSync(dir, { recursive: true, force: true });
269
+ }
270
+ });
271
+
272
+ // Atomic create leaves no temp-file litter behind on the happy path.
273
+ test('bug_004: acquireInstallLock cleans up its temp file', () => {
274
+ const dir = tmpdir();
275
+ try {
276
+ acquireInstallLock(dir);
277
+ const entries = fs.readdirSync(dir);
278
+ const litter = entries.filter((e) => e.indexOf('.tmp') !== -1);
279
+ assert.deepEqual(litter, [], 'no .tmp litter may remain after acquire: ' + litter.join(','));
280
+ } finally {
281
+ fs.rmSync(dir, { recursive: true, force: true });
282
+ }
283
+ });
284
+
285
+ // Second acquirer against a held live lock is the loser (returns false) and
286
+ // must NOT corrupt or remove the winner's lock.
287
+ test('mutual exclusion: a second acquirer loses to a held live lock', () => {
288
+ const dir = tmpdir();
289
+ try {
290
+ const first = acquireInstallLock(dir);
291
+ assert.equal(first, true, 'first acquirer wins');
292
+ const second = acquireInstallLock(dir);
293
+ assert.equal(second, false, 'second acquirer must lose -- exactly one winner');
294
+ assert.ok(fs.existsSync(lockFile(dir)), 'the winner lock must survive the loser attempt');
295
+ releaseInstallLock(dir);
296
+ assert.ok(!fs.existsSync(lockFile(dir)), 'release clears the lock');
297
+ } finally {
298
+ fs.rmSync(dir, { recursive: true, force: true });
299
+ }
300
+ });
301
+
302
+ // release is owner-aware: it must not delete a lock owned by a different pid.
303
+ test('releaseInstallLock does not remove another live process lock', () => {
304
+ const dir = tmpdir();
305
+ try {
306
+ fs.writeFileSync(
307
+ lockFile(dir),
308
+ JSON.stringify({ pid: process.pid === 1 ? 2 : 1, timestamp: Date.now() })
309
+ );
310
+ releaseInstallLock(dir);
311
+ assert.ok(fs.existsSync(lockFile(dir)), 'a foreign-owned lock must NOT be released by us');
312
+ } finally {
313
+ fs.rmSync(dir, { recursive: true, force: true });
314
+ }
315
+ });
316
+
317
+ // HARD RULE: no em-dashes in the module (referenced via code point).
318
+ test('npm-install-lock.cjs has no em-dashes', () => {
319
+ const src = fs.readFileSync(MODULE_PATH, 'utf8');
320
+ const EM_DASH = String.fromCharCode(0x2014);
321
+ assert.ok(src.indexOf(EM_DASH) === -1, 'em-dash found in npm-install-lock.cjs');
322
+ });
323
+
324
+ process.stdout.write('\nnpm-install-lock: ' + passed + ' passed, ' + failed + ' failed\n');
325
+ process.exit(failed === 0 ? 0 : 1);