@mindrian_os/install 1.13.0-beta.21 → 1.13.0-beta.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /*
5
+ * Copyright (c) 2026 Mindrian. BSL 1.1.
6
+ *
7
+ * MindrianOS Plugin -- portable npm CLI resolution (debug session
8
+ * mcp-servers-cache-missing-node-modules, escalated mandate 2026-05-21).
9
+ *
10
+ * THE PROBLEM (code review of the prior Option D fix, commit f6cafe74):
11
+ * The self-heal and the SessionStart reconcile hook both ran
12
+ * `spawnSync('npm', ['install', ...])`. That bare invocation is NOT
13
+ * cross-platform:
14
+ * - WINDOWS: `npm` is `npm.cmd` (a batch file). spawnSync('npm') with no
15
+ * shell:true and no .cmd suffix returns ENOENT -- the heal silently does
16
+ * nothing. On Windows the node_modules gap then NEVER heals.
17
+ * - MAC: even with the .cmd issue aside, spawnSync('npm') depends on `npm`
18
+ * being on the child process PATH. A GUI-launched (Dock/Finder) Claude
19
+ * Code gives child processes a minimal PATH that frequently excludes the
20
+ * nvm / Homebrew bin directory where `npm` lives -- same ENOENT, different
21
+ * cause. `shell:true` does NOT fix this (it does not add the missing dir
22
+ * to PATH).
23
+ *
24
+ * THE FIX (this module): resolve npm to an ABSOLUTE path, independent of PATH
25
+ * and independent of the platform file extension.
26
+ *
27
+ * The key insight: npm ships in the SAME distribution as the `node` binary
28
+ * already executing this code. `process.execPath` is the absolute path to
29
+ * that node binary. npm's real entry point is a plain JavaScript file --
30
+ * `npm-cli.js` -- which lives at a fixed location relative to the node binary:
31
+ * - POSIX (Linux, Mac): <nodeBinDir>/../lib/node_modules/npm/bin/npm-cli.js
32
+ * - WINDOWS: <nodeBinDir>/node_modules/npm/bin/npm-cli.js
33
+ * Running `node <absolute npm-cli.js> install ...` invokes npm directly with
34
+ * the SAME node binary, sidestepping PATH, the .cmd extension, and shell:true
35
+ * entirely. This is correct by construction on Windows, Mac, and Linux.
36
+ *
37
+ * Fallback: if npm-cli.js cannot be located off process.execPath (an unusual
38
+ * layout -- a system-package node, a relocated install), the resolver returns
39
+ * a PATH-based spawn descriptor that DOES carry the Windows .cmd handling
40
+ * (shell:true on win32) so the backstop is still better than the bare
41
+ * pre-fix invocation.
42
+ *
43
+ * Canon Part 8: zero network surface. Pure node built-ins. This module only
44
+ * computes a spawn descriptor; the caller runs `npm install`.
45
+ *
46
+ * HARD RULE: no em-dashes anywhere in this file (hyphens only).
47
+ */
48
+
49
+ const fs = require('node:fs');
50
+ const path = require('node:path');
51
+
52
+ const IS_WINDOWS = process.platform === 'win32';
53
+
54
+ /**
55
+ * Candidate absolute locations of npm's JavaScript entry point (npm-cli.js),
56
+ * derived from the directory of the currently-running node binary.
57
+ *
58
+ * Node distributions place npm consistently:
59
+ * - POSIX tarball / nvm / Homebrew / Volta:
60
+ * bin/node + lib/node_modules/npm/bin/npm-cli.js
61
+ * - Windows zip / installer:
62
+ * node.exe + node_modules/npm/bin/npm-cli.js (same dir as node.exe)
63
+ *
64
+ * Both layouts are probed on every platform (a defensive superset) so a
65
+ * non-standard packaging still resolves if npm is present anywhere npm
66
+ * normally ships.
67
+ *
68
+ * @param {string} [execPath] - override for process.execPath (tests)
69
+ * @returns {string[]} absolute candidate paths, most-likely first
70
+ */
71
+ function npmCliCandidates(execPath) {
72
+ const nodeBin = path.dirname(execPath || process.execPath);
73
+ return [
74
+ // Windows-style: npm sits beside node.exe.
75
+ path.join(nodeBin, 'node_modules', 'npm', 'bin', 'npm-cli.js'),
76
+ // POSIX-style: npm sits one level up under lib/.
77
+ path.join(nodeBin, '..', 'lib', 'node_modules', 'npm', 'bin', 'npm-cli.js'),
78
+ // Some Windows installs nest under a node_modules/npm with a lib prefix.
79
+ path.join(nodeBin, '..', 'node_modules', 'npm', 'bin', 'npm-cli.js'),
80
+ ];
81
+ }
82
+
83
+ /**
84
+ * Resolve a portable, absolute spawn descriptor for `npm install`.
85
+ *
86
+ * Preferred result (strategy 'node-npm-cli'):
87
+ * { command: process.execPath, baseArgs: [<abs npm-cli.js>], shell: false }
88
+ * Run npm by feeding its JS entry point to the current node binary. No PATH
89
+ * dependency, no .cmd extension, no shell. Correct on Windows, Mac, Linux.
90
+ *
91
+ * Fallback result (strategy 'path-npm'):
92
+ * { command: 'npm', baseArgs: [], shell: true on win32 else false }
93
+ * Used only when npm-cli.js is not found off process.execPath. shell:true is
94
+ * set on Windows so the OS resolves `npm` -> `npm.cmd` (still better than the
95
+ * pre-fix bare spawn, though it remains PATH-dependent).
96
+ *
97
+ * @param {object} [opts]
98
+ * @param {string} [opts.execPath] - override process.execPath (tests)
99
+ * @returns {{command:string, baseArgs:string[], shell:boolean, strategy:string, npmCli:(string|null)}}
100
+ */
101
+ function resolveNpmCli(opts) {
102
+ opts = opts || {};
103
+ const candidates = npmCliCandidates(opts.execPath);
104
+ for (const candidate of candidates) {
105
+ try {
106
+ if (fs.existsSync(candidate)) {
107
+ return {
108
+ command: opts.execPath || process.execPath,
109
+ baseArgs: [candidate],
110
+ shell: false,
111
+ strategy: 'node-npm-cli',
112
+ npmCli: candidate,
113
+ };
114
+ }
115
+ } catch (_) {
116
+ // stat failure on a candidate -- try the next one.
117
+ }
118
+ }
119
+ // Fallback: npm-cli.js not locatable. PATH-based spawn, with Windows .cmd
120
+ // handling via shell:true. Still an improvement over the bare pre-fix call.
121
+ return {
122
+ command: 'npm',
123
+ baseArgs: [],
124
+ shell: IS_WINDOWS,
125
+ strategy: 'path-npm',
126
+ npmCli: null,
127
+ };
128
+ }
129
+
130
+ /**
131
+ * Build the full argv for `npm install` (production-safe, quiet, no scripts
132
+ * surprises) given a resolved descriptor from resolveNpmCli().
133
+ *
134
+ * @param {{baseArgs:string[]}} descriptor
135
+ * @param {string[]} [installArgs] - npm args after `install`; defaults to the
136
+ * quiet production set used by the heal path.
137
+ * @returns {string[]} argv to pass as the second arg of spawnSync(command, argv)
138
+ */
139
+ function buildInstallArgs(descriptor, installArgs) {
140
+ const tail = Array.isArray(installArgs) && installArgs.length
141
+ ? installArgs
142
+ : ['--no-audit', '--no-fund', '--silent'];
143
+ return descriptor.baseArgs.concat(['install'], tail);
144
+ }
145
+
146
+ module.exports = {
147
+ resolveNpmCli,
148
+ buildInstallArgs,
149
+ npmCliCandidates,
150
+ IS_WINDOWS,
151
+ };
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /*
5
+ * Copyright (c) 2026 Mindrian. BSL 1.1.
6
+ *
7
+ * Regression tests for lib/core/npm-cli-resolve.cjs -- portable npm CLI
8
+ * resolution (debug session mcp-servers-cache-missing-node-modules, escalated
9
+ * mandate 2026-05-21).
10
+ *
11
+ * The prior Option D fix (commit f6cafe74) ran a bare `spawnSync('npm', ...)`
12
+ * that was DEAD on Windows (npm is npm.cmd) and FRAGILE on Mac (PATH gap for
13
+ * GUI-launched Claude Code). These tests lock the cross-platform contract:
14
+ *
15
+ * Test 1: resolveNpmCli on the test host returns the 'node-npm-cli' strategy
16
+ * and the resolved npm-cli.js actually exists.
17
+ * Test 2: the resolved command equals process.execPath (the current node
18
+ * binary) -- NOT the literal string 'npm'. This is what makes it
19
+ * PATH-independent.
20
+ * Test 3: WINDOWS layout -- npmCliCandidates for a win32-style execPath puts
21
+ * `<nodeBinDir>/node_modules/npm/bin/npm-cli.js` first (where the
22
+ * Windows node installer ships npm). No `.cmd` anywhere in the argv.
23
+ * Test 4: buildInstallArgs produces `[<npm-cli.js>] install --no-audit
24
+ * --no-fund --silent` for the node-npm-cli strategy.
25
+ * Test 5: the fallback descriptor (path-npm) sets shell:true ONLY on win32,
26
+ * so even the fallback carries Windows .cmd handling.
27
+ * Test 6: no em-dashes in npm-cli-resolve.cjs (HARD RULE).
28
+ * Test 7: the argv never contains the bare token 'npm' as command on the
29
+ * node-npm-cli strategy -- proves PATH is not relied upon.
30
+ *
31
+ * HARD RULE: no em-dashes.
32
+ */
33
+
34
+ const assert = require('node:assert/strict');
35
+ const fs = require('node:fs');
36
+ const path = require('node:path');
37
+
38
+ const REPO_ROOT = path.resolve(__dirname, '..', '..');
39
+ const MODULE_PATH = path.join(REPO_ROOT, 'lib', 'core', 'npm-cli-resolve.cjs');
40
+ const { resolveNpmCli, buildInstallArgs, npmCliCandidates } = require(MODULE_PATH);
41
+
42
+ let passed = 0;
43
+ let failed = 0;
44
+
45
+ function ok(name) {
46
+ passed += 1;
47
+ process.stdout.write(' ok ' + name + '\n');
48
+ }
49
+ function fail(name, err) {
50
+ failed += 1;
51
+ process.stdout.write(' FAIL ' + name + '\n');
52
+ process.stdout.write(' ' + (err && err.message ? err.message : String(err)) + '\n');
53
+ }
54
+ function test(name, fn) {
55
+ try { fn(); ok(name); } catch (err) { fail(name, err); }
56
+ }
57
+
58
+ // Test 1 -- node-npm-cli strategy on the test host, npm-cli.js exists.
59
+ test('resolveNpmCli returns node-npm-cli strategy with a real npm-cli.js', () => {
60
+ const r = resolveNpmCli();
61
+ assert.equal(r.strategy, 'node-npm-cli', 'expected node-npm-cli strategy on a normal node install');
62
+ assert.ok(r.npmCli, 'npmCli path should be set');
63
+ assert.ok(fs.existsSync(r.npmCli), 'resolved npm-cli.js must exist on disk: ' + r.npmCli);
64
+ });
65
+
66
+ // Test 2 -- command is the node binary, not the literal 'npm'.
67
+ test('resolveNpmCli command is process.execPath (PATH-independent)', () => {
68
+ const r = resolveNpmCli();
69
+ assert.equal(r.command, process.execPath, 'command must be the current node binary');
70
+ assert.notEqual(r.command, 'npm', 'command must NOT be the bare PATH-dependent string npm');
71
+ assert.equal(r.shell, false, 'node-npm-cli strategy needs no shell');
72
+ });
73
+
74
+ // Test 3 -- Windows layout: npm-cli.js beside node.exe is the first candidate.
75
+ test('npmCliCandidates puts the Windows-layout path first', () => {
76
+ // path.win32 mirrors what require('path') resolves to on a real Windows host.
77
+ const winExec = 'C:\\Program Files\\nodejs\\node.exe';
78
+ const cands = npmCliCandidates(winExec);
79
+ assert.ok(Array.isArray(cands) && cands.length >= 3, 'expected >= 3 candidates');
80
+ // The first candidate is the <nodeBinDir>/node_modules/npm/bin/npm-cli.js
81
+ // layout -- exactly where the Windows node installer ships npm.
82
+ assert.ok(
83
+ cands[0].indexOf('node_modules') !== -1 && cands[0].indexOf('npm-cli.js') !== -1,
84
+ 'first candidate must target node_modules/npm/bin/npm-cli.js'
85
+ );
86
+ // No candidate is a .cmd file -- we always run the JS entry point directly.
87
+ for (const c of cands) {
88
+ assert.ok(c.indexOf('.cmd') === -1, 'no candidate may be a .cmd batch file: ' + c);
89
+ }
90
+ });
91
+
92
+ // Test 4 -- buildInstallArgs argv shape for the node-npm-cli strategy.
93
+ test('buildInstallArgs produces [npm-cli.js] install ...quiet-flags', () => {
94
+ const r = resolveNpmCli();
95
+ const argv = buildInstallArgs(r);
96
+ assert.equal(argv[0], r.npmCli, 'first arg must be the npm-cli.js path');
97
+ assert.equal(argv[1], 'install', 'second arg must be install');
98
+ assert.deepEqual(
99
+ argv.slice(2),
100
+ ['--no-audit', '--no-fund', '--silent'],
101
+ 'default install args must be the quiet production set'
102
+ );
103
+ // A custom install-args override is honored.
104
+ const custom = buildInstallArgs(r, ['--omit=dev']);
105
+ assert.deepEqual(custom.slice(1), ['install', '--omit=dev']);
106
+ });
107
+
108
+ // Test 5 -- fallback descriptor: shell:true only on Windows.
109
+ test('path-npm fallback sets shell:true only on win32', () => {
110
+ // Force the fallback by pointing execPath at a freshly-created EMPTY temp
111
+ // directory tree deep enough that none of the candidate paths (including the
112
+ // `../lib/node_modules/...` POSIX-layout candidate) can escape onto a real
113
+ // system npm. The temp dir is isolated and known to contain no node_modules.
114
+ const os = require('node:os');
115
+ const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'mos-npm-resolve-test-'));
116
+ try {
117
+ // <tmpRoot>/x/y/z/bin/node -- candidates resolve under <tmpRoot>/x/y/z,
118
+ // which is empty, so resolveNpmCli must fall through to path-npm.
119
+ const fakeBin = path.join(tmpRoot, 'x', 'y', 'z', 'bin');
120
+ fs.mkdirSync(fakeBin, { recursive: true });
121
+ const r = resolveNpmCli({ execPath: path.join(fakeBin, 'node') });
122
+ assert.equal(r.strategy, 'path-npm', 'expected the path-npm fallback in an isolated empty tree');
123
+ assert.equal(r.command, 'npm', 'fallback command is the bare npm');
124
+ assert.equal(r.npmCli, null, 'fallback has no npmCli path');
125
+ assert.equal(
126
+ r.shell,
127
+ process.platform === 'win32',
128
+ 'fallback shell must be true on Windows (npm.cmd handling) and false elsewhere'
129
+ );
130
+ } finally {
131
+ fs.rmSync(tmpRoot, { recursive: true, force: true });
132
+ }
133
+ });
134
+
135
+ // Test 6 -- HARD RULE: no em-dashes. The em-dash is referenced via its Unicode
136
+ // code point (U+2014) so this test file itself stays em-dash-clean.
137
+ test('npm-cli-resolve.cjs has no em-dashes', () => {
138
+ const src = fs.readFileSync(MODULE_PATH, 'utf8');
139
+ const EM_DASH = String.fromCharCode(0x2014);
140
+ assert.ok(src.indexOf(EM_DASH) === -1, 'em-dash found in npm-cli-resolve.cjs');
141
+ });
142
+
143
+ // Test 7 -- the node-npm-cli argv never relies on a bare 'npm' token.
144
+ test('node-npm-cli argv does not depend on PATH resolution of npm', () => {
145
+ const r = resolveNpmCli();
146
+ const fullArgv = [r.command].concat(buildInstallArgs(r));
147
+ assert.notEqual(fullArgv[0], 'npm', 'command position must not be the bare npm token');
148
+ // The npm-cli.js path is absolute -- not resolved against PATH or cwd.
149
+ assert.ok(path.isAbsolute(r.npmCli), 'npm-cli.js path must be absolute');
150
+ });
151
+
152
+ process.stdout.write('\nnpm-cli-resolve: ' + passed + ' passed, ' + failed + ' failed\n');
153
+ process.exit(failed === 0 ? 0 : 1);
@@ -0,0 +1,302 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ /*
5
+ * Copyright (c) 2026 Mindrian. BSL 1.1.
6
+ *
7
+ * MindrianOS Plugin -- one-shot npm-install lock (Option D, hybrid self-heal).
8
+ *
9
+ * Purpose: when a fresh plugin cache lands with NO node_modules, BOTH bundled
10
+ * MCP servers (mindrian-brain + mindrian-os) can spawn at the same instant and
11
+ * each independently discover MODULE_NOT_FOUND. If both ran `npm install`
12
+ * concurrently in the same directory they would corrupt node_modules. This lock
13
+ * guarantees that exactly ONE process runs the install while the other WAITS
14
+ * for it to finish, then proceeds.
15
+ *
16
+ * This is deliberately NOT lib/core/write-lock.cjs. write-lock is room-scoped,
17
+ * SQLite-scoped, has a 5s stale threshold, and THROWS on contention. The
18
+ * npm-install path needs the opposite contract: a longer stale window (a cold
19
+ * `npm install` can take 30s+) and a BLOCKING wait, not a throw -- the loser of
20
+ * the race must sit still until node_modules is populated.
21
+ *
22
+ * CORRECTNESS FIXES (remote code review, 2026-05-21 -- folded into beta.23):
23
+ * - bug_004: lock creation is now ATOMIC via fs.linkSync (write a fully
24
+ * populated temp file, then atomically link it into place). The pre-fix
25
+ * openSync('wx') created a zero-byte file that a separate writeSync later
26
+ * populated -- a racing peer could read the empty file mid-write, treat it
27
+ * as corrupt, unlink the winner's live lock, and run a second concurrent
28
+ * install. readLock + waitForUnlock additionally distinguish a transient
29
+ * empty mid-write file from genuinely corrupt JSON.
30
+ * - bug_001: STALE_THRESHOLD_MS is raised strictly above the 120s install
31
+ * timeout, and the staleness checks use AND not OR -- a lock is reclaimed
32
+ * only when it is BOTH old AND its owning pid is dead. A healthy install
33
+ * legitimately running 90-120s is no longer declared abandoned.
34
+ *
35
+ * Canon Part 8: zero network surface in this file. Pure node built-ins. The
36
+ * `npm install` itself is run by the caller (mcp-dep-heal.cjs), not here.
37
+ *
38
+ * HARD RULE: no em-dashes anywhere in this file (hyphens only).
39
+ */
40
+
41
+ const fs = require('node:fs');
42
+ const path = require('node:path');
43
+
44
+ const LOCK_FILENAME = '.mindrian-npm-install.lock';
45
+ // A cold `npm install` of the plugin deps measured ~3s on a warm npm cache and
46
+ // can exceed 30s on a cold cache / slow disk. runGuardedInstall's spawnSync
47
+ // gives the install a 120000 ms (120s) timeout, so STALE_THRESHOLD_MS MUST sit
48
+ // strictly ABOVE 120s -- otherwise a healthy install still legitimately running
49
+ // at the 90-120s mark would be declared abandoned and a peer would start a
50
+ // SECOND concurrent install (bug_001). 180s gives 60s of headroom over the
51
+ // install timeout. Belt-and-suspenders: the staleness checks below also require
52
+ // pidAlive to be false (AND, not OR), so an old-but-live lock is never reclaimed.
53
+ const STALE_THRESHOLD_MS = 180 * 1000;
54
+ // How long the loser of the race waits for the winner before giving up and
55
+ // trying the install itself. Strictly above STALE so a genuine winner whose
56
+ // lock has just gone stale still gets reclaimed-and-retried, not double-run.
57
+ const WAIT_TIMEOUT_MS = 200 * 1000;
58
+ const POLL_INTERVAL_MS = 200;
59
+ // A mid-write lock file (created by openSync('wx') but not yet written by the
60
+ // follow-up writeSync) is briefly empty. readLock distinguishes that transient
61
+ // state from a genuinely corrupt file by polling a few short intervals before
62
+ // declaring corruption (bug_004 defence-in-depth alongside the atomic linkSync
63
+ // create path).
64
+ const EMPTY_FILE_RETRY_ATTEMPTS = 5;
65
+ const EMPTY_FILE_RETRY_INTERVAL_MS = 20;
66
+
67
+ function lockPath(dir) {
68
+ return path.join(dir, LOCK_FILENAME);
69
+ }
70
+
71
+ /** Portable synchronous short sleep (no extra dependency, works everywhere). */
72
+ function sleepSync(ms) {
73
+ try {
74
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
75
+ } catch (_) {
76
+ // SharedArrayBuffer unavailable in some sandboxes -- busy-wait instead.
77
+ const until = Date.now() + ms;
78
+ while (Date.now() < until) { /* spin */ }
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Read and parse a lock file.
84
+ *
85
+ * Returns one of three things so callers can distinguish a transient empty
86
+ * mid-write file from a genuinely corrupt one (bug_004):
87
+ * - the parsed lock object -> a valid, fully-written lock
88
+ * - the string 'EMPTY' -> the file exists but is empty / whitespace
89
+ * only after a few short retries; this is
90
+ * a mid-write race window OR a 0-byte
91
+ * leftover. Caller should retry, not
92
+ * assume the lock is dead.
93
+ * - null -> the file is missing, unreadable, or
94
+ * contains genuinely non-empty invalid
95
+ * JSON (truly corrupt -- safe to clear).
96
+ *
97
+ * The atomic linkSync create path in acquireInstallLock means a winner's lock
98
+ * is never observed mid-write in practice; this empty/corrupt distinction is
99
+ * defence-in-depth for any lock that arrived via a non-atomic path.
100
+ *
101
+ * @param {string} p - lock file path
102
+ * @returns {object|'EMPTY'|null}
103
+ */
104
+ function readLock(p) {
105
+ for (let attempt = 0; attempt < EMPTY_FILE_RETRY_ATTEMPTS; attempt++) {
106
+ let raw;
107
+ try {
108
+ raw = fs.readFileSync(p, 'utf8');
109
+ } catch (_) {
110
+ return null; // missing or unreadable
111
+ }
112
+ if (raw.trim() === '') {
113
+ // Empty / whitespace-only: possibly a mid-write window. Retry a few
114
+ // short intervals before giving up.
115
+ if (attempt < EMPTY_FILE_RETRY_ATTEMPTS - 1) {
116
+ sleepSync(EMPTY_FILE_RETRY_INTERVAL_MS);
117
+ continue;
118
+ }
119
+ return 'EMPTY';
120
+ }
121
+ try {
122
+ return JSON.parse(raw);
123
+ } catch (_) {
124
+ // Non-empty but not valid JSON -- genuinely corrupt.
125
+ return null;
126
+ }
127
+ }
128
+ return 'EMPTY';
129
+ }
130
+
131
+ function pidAlive(pid) {
132
+ if (!pid || typeof pid !== 'number') return false;
133
+ try {
134
+ process.kill(pid, 0);
135
+ return true;
136
+ } catch (e) {
137
+ // EPERM means the process exists but is owned by another user -- still alive.
138
+ return e && e.code === 'EPERM';
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Whether a lock described by `data` is reclaimable as abandoned.
144
+ *
145
+ * bug_001 fix: this uses AND, not OR. A lock is reclaimed ONLY when it is BOTH
146
+ * stale (older than STALE_THRESHOLD_MS) AND its owning pid is genuinely dead.
147
+ * The pre-fix OR form let a peer unlink a LIVE lock the instant `age` crossed
148
+ * the (too-short) threshold, even though the install was still running -- two
149
+ * concurrent `npm install`s, corrupted node_modules. With AND, a long-but-live
150
+ * install keeps its lock no matter how old it gets; a dead-owner lock that has
151
+ * not yet aged out keeps its lock too (the owner may have only just died and a
152
+ * sibling could still be mid-handoff). Reclaim needs both signals.
153
+ *
154
+ * @param {object} data - parsed lock contents (must be a valid lock object)
155
+ * @returns {boolean}
156
+ */
157
+ function isReclaimable(data) {
158
+ const age = Date.now() - (data.timestamp || 0);
159
+ return age > STALE_THRESHOLD_MS && !pidAlive(data.pid);
160
+ }
161
+
162
+ /**
163
+ * Try to acquire the install lock for `dir`.
164
+ *
165
+ * bug_004 fix: lock creation is ATOMIC. The payload is written to a uniquely
166
+ * named temp file FIRST (fully populated, then closed), and only then is
167
+ * fs.linkSync(tmp, p) used to publish it at the canonical lock path. linkSync
168
+ * is atomic and fails with EEXIST if the target already exists, so a winner's
169
+ * lock is ALWAYS observed fully-written -- there is no zero-byte mid-write
170
+ * window for a racing peer to misread as corrupt. The pre-fix openSync('wx')
171
+ * created a 0-byte file that the follow-up writeSync populated in a SEPARATE
172
+ * syscall; a peer racing in between read an empty file, treated it as corrupt,
173
+ * unlinked the winner's live lock, and both processes ran `npm install`.
174
+ *
175
+ * @param {string} dir - directory the install will run in (CLAUDE_PLUGIN_ROOT)
176
+ * @returns {boolean} true if THIS process now holds the lock (it should run the
177
+ * install), false if another live process holds it (this
178
+ * process should call waitForUnlock instead).
179
+ */
180
+ function acquireInstallLock(dir) {
181
+ const p = lockPath(dir);
182
+ const tmp = p + '.' + process.pid + '.tmp';
183
+ const payload = JSON.stringify({ pid: process.pid, timestamp: Date.now() });
184
+
185
+ for (let attempt = 0; attempt < 3; attempt++) {
186
+ // Write the payload to a private temp file, fully, before publishing it.
187
+ try {
188
+ fs.writeFileSync(tmp, payload);
189
+ } catch (e) {
190
+ // Cannot even write a temp file (read-only dir, etc). Caller falls back
191
+ // to running the install unguarded -- better than not healing.
192
+ return true;
193
+ }
194
+
195
+ try {
196
+ // Atomic publish: link is atomic and fails EEXIST if `p` already exists.
197
+ fs.linkSync(tmp, p);
198
+ // We won. The temp file has served its purpose; remove it.
199
+ try { fs.unlinkSync(tmp); } catch (_) {}
200
+ return true;
201
+ } catch (e) {
202
+ // Always drop our temp file before deciding what to do next.
203
+ try { fs.unlinkSync(tmp); } catch (_) {}
204
+ if (e.code !== 'EEXIST') {
205
+ // linkSync failed for a non-contention reason (filesystem without
206
+ // hardlink support, cross-device, permissions). Fall back to running
207
+ // the install unguarded -- better than not healing.
208
+ return true;
209
+ }
210
+ // The lock path is already held. Inspect it.
211
+ const data = readLock(p);
212
+ if (data === 'EMPTY') {
213
+ // Transient mid-write window (or a 0-byte leftover from a non-atomic
214
+ // path). Do NOT unlink -- a peer may be about to populate it. Wait a
215
+ // short interval and retry the acquire.
216
+ sleepSync(EMPTY_FILE_RETRY_INTERVAL_MS * EMPTY_FILE_RETRY_ATTEMPTS);
217
+ continue;
218
+ }
219
+ if (!data) {
220
+ // Genuinely corrupt (non-empty invalid JSON) or unreadable -- clear
221
+ // and retry.
222
+ try { fs.unlinkSync(p); } catch (_) {}
223
+ continue;
224
+ }
225
+ if (isReclaimable(data)) {
226
+ // Abandoned: BOTH stale AND its owner is dead. Reclaim it.
227
+ try { fs.unlinkSync(p); } catch (_) {}
228
+ continue;
229
+ }
230
+ // A live (or not-yet-reclaimable) process holds the lock -- this process
231
+ // is the loser and must wait for the winner.
232
+ return false;
233
+ }
234
+ }
235
+ // Pathological churn -- give up the guard and let the caller install.
236
+ return true;
237
+ }
238
+
239
+ /** Release the lock. Silent if it does not exist or is not ours. */
240
+ function releaseInstallLock(dir) {
241
+ const p = lockPath(dir);
242
+ try {
243
+ const data = readLock(p);
244
+ // Only skip the unlink when we can positively confirm the lock belongs to
245
+ // a DIFFERENT live process. 'EMPTY' (transient) or null (corrupt) -- there
246
+ // is no owner pid to compare, so fall through and clear it.
247
+ if (data && data !== 'EMPTY' && data.pid && data.pid !== process.pid) {
248
+ return; // not ours
249
+ }
250
+ fs.unlinkSync(p);
251
+ } catch (_) {
252
+ // ENOENT or other -- silent.
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Block until the lock for `dir` is released (winner finished its install),
258
+ * the lock goes stale, or WAIT_TIMEOUT_MS elapses.
259
+ *
260
+ * Synchronous by design: this runs at MCP server startup, before the server
261
+ * connects its transport, so a blocking spin is acceptable and correct.
262
+ *
263
+ * @param {string} dir
264
+ * @returns {boolean} true if the lock cleared (install presumably done),
265
+ * false on timeout.
266
+ */
267
+ function waitForUnlock(dir) {
268
+ const p = lockPath(dir);
269
+ const deadline = Date.now() + WAIT_TIMEOUT_MS;
270
+ while (Date.now() < deadline) {
271
+ if (!fs.existsSync(p)) return true;
272
+ const data = readLock(p);
273
+ if (data === 'EMPTY') {
274
+ // bug_004 symmetric defect fix: an empty file is a transient mid-write
275
+ // window, NOT a cleared lock. The pre-fix `if (!data) return true` form
276
+ // declared the winner done the instant it saw an empty file -- the loser
277
+ // then ran its OWN install concurrently. Keep polling instead.
278
+ sleepSync(POLL_INTERVAL_MS);
279
+ continue;
280
+ }
281
+ if (!data) return true; // genuinely corrupt -- treat as cleared
282
+ // bug_001 fix: AND, not OR. Stop waiting only when the lock is BOTH stale
283
+ // AND its owner is dead. A long-but-live install keeps us waiting; we never
284
+ // race ahead with our own install while a healthy winner is still running.
285
+ if (isReclaimable(data)) return true;
286
+ // Poll a short slice via the portable synchronous sleep.
287
+ sleepSync(POLL_INTERVAL_MS);
288
+ }
289
+ return false;
290
+ }
291
+
292
+ module.exports = {
293
+ acquireInstallLock,
294
+ releaseInstallLock,
295
+ waitForUnlock,
296
+ readLock,
297
+ isReclaimable,
298
+ pidAlive,
299
+ LOCK_FILENAME,
300
+ STALE_THRESHOLD_MS,
301
+ WAIT_TIMEOUT_MS,
302
+ };