@mindrian_os/install 1.13.0-beta.22 → 1.13.0-beta.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +23 -0
- package/bin/mindrian-brain-mcp-client.cjs +16 -3
- package/bin/mindrian-mcp-server.cjs +18 -3
- package/hooks/hooks.json +8 -8
- package/lib/core/mcp-dep-heal.cjs +246 -0
- package/lib/core/mcp-dep-heal.test.cjs +253 -0
- package/lib/core/npm-cli-resolve.cjs +151 -0
- package/lib/core/npm-cli-resolve.test.cjs +153 -0
- package/lib/core/npm-install-lock.cjs +302 -0
- package/lib/core/npm-install-lock.test.cjs +325 -0
- package/package.json +2 -4
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/*
|
|
5
|
+
* Copyright (c) 2026 Mindrian. BSL 1.1.
|
|
6
|
+
*
|
|
7
|
+
* MindrianOS Plugin -- portable npm CLI resolution (debug session
|
|
8
|
+
* mcp-servers-cache-missing-node-modules, escalated mandate 2026-05-21).
|
|
9
|
+
*
|
|
10
|
+
* THE PROBLEM (code review of the prior Option D fix, commit f6cafe74):
|
|
11
|
+
* The self-heal and the SessionStart reconcile hook both ran
|
|
12
|
+
* `spawnSync('npm', ['install', ...])`. That bare invocation is NOT
|
|
13
|
+
* cross-platform:
|
|
14
|
+
* - WINDOWS: `npm` is `npm.cmd` (a batch file). spawnSync('npm') with no
|
|
15
|
+
* shell:true and no .cmd suffix returns ENOENT -- the heal silently does
|
|
16
|
+
* nothing. On Windows the node_modules gap then NEVER heals.
|
|
17
|
+
* - MAC: even with the .cmd issue aside, spawnSync('npm') depends on `npm`
|
|
18
|
+
* being on the child process PATH. A GUI-launched (Dock/Finder) Claude
|
|
19
|
+
* Code gives child processes a minimal PATH that frequently excludes the
|
|
20
|
+
* nvm / Homebrew bin directory where `npm` lives -- same ENOENT, different
|
|
21
|
+
* cause. `shell:true` does NOT fix this (it does not add the missing dir
|
|
22
|
+
* to PATH).
|
|
23
|
+
*
|
|
24
|
+
* THE FIX (this module): resolve npm to an ABSOLUTE path, independent of PATH
|
|
25
|
+
* and independent of the platform file extension.
|
|
26
|
+
*
|
|
27
|
+
* The key insight: npm ships in the SAME distribution as the `node` binary
|
|
28
|
+
* already executing this code. `process.execPath` is the absolute path to
|
|
29
|
+
* that node binary. npm's real entry point is a plain JavaScript file --
|
|
30
|
+
* `npm-cli.js` -- which lives at a fixed location relative to the node binary:
|
|
31
|
+
* - POSIX (Linux, Mac): <nodeBinDir>/../lib/node_modules/npm/bin/npm-cli.js
|
|
32
|
+
* - WINDOWS: <nodeBinDir>/node_modules/npm/bin/npm-cli.js
|
|
33
|
+
* Running `node <absolute npm-cli.js> install ...` invokes npm directly with
|
|
34
|
+
* the SAME node binary, sidestepping PATH, the .cmd extension, and shell:true
|
|
35
|
+
* entirely. This is correct by construction on Windows, Mac, and Linux.
|
|
36
|
+
*
|
|
37
|
+
* Fallback: if npm-cli.js cannot be located off process.execPath (an unusual
|
|
38
|
+
* layout -- a system-package node, a relocated install), the resolver returns
|
|
39
|
+
* a PATH-based spawn descriptor that DOES carry the Windows .cmd handling
|
|
40
|
+
* (shell:true on win32) so the backstop is still better than the bare
|
|
41
|
+
* pre-fix invocation.
|
|
42
|
+
*
|
|
43
|
+
* Canon Part 8: zero network surface. Pure node built-ins. This module only
|
|
44
|
+
* computes a spawn descriptor; the caller runs `npm install`.
|
|
45
|
+
*
|
|
46
|
+
* HARD RULE: no em-dashes anywhere in this file (hyphens only).
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
const fs = require('node:fs');
|
|
50
|
+
const path = require('node:path');
|
|
51
|
+
|
|
52
|
+
const IS_WINDOWS = process.platform === 'win32';
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Candidate absolute locations of npm's JavaScript entry point (npm-cli.js),
|
|
56
|
+
* derived from the directory of the currently-running node binary.
|
|
57
|
+
*
|
|
58
|
+
* Node distributions place npm consistently:
|
|
59
|
+
* - POSIX tarball / nvm / Homebrew / Volta:
|
|
60
|
+
* bin/node + lib/node_modules/npm/bin/npm-cli.js
|
|
61
|
+
* - Windows zip / installer:
|
|
62
|
+
* node.exe + node_modules/npm/bin/npm-cli.js (same dir as node.exe)
|
|
63
|
+
*
|
|
64
|
+
* Both layouts are probed on every platform (a defensive superset) so a
|
|
65
|
+
* non-standard packaging still resolves if npm is present anywhere npm
|
|
66
|
+
* normally ships.
|
|
67
|
+
*
|
|
68
|
+
* @param {string} [execPath] - override for process.execPath (tests)
|
|
69
|
+
* @returns {string[]} absolute candidate paths, most-likely first
|
|
70
|
+
*/
|
|
71
|
+
function npmCliCandidates(execPath) {
|
|
72
|
+
const nodeBin = path.dirname(execPath || process.execPath);
|
|
73
|
+
return [
|
|
74
|
+
// Windows-style: npm sits beside node.exe.
|
|
75
|
+
path.join(nodeBin, 'node_modules', 'npm', 'bin', 'npm-cli.js'),
|
|
76
|
+
// POSIX-style: npm sits one level up under lib/.
|
|
77
|
+
path.join(nodeBin, '..', 'lib', 'node_modules', 'npm', 'bin', 'npm-cli.js'),
|
|
78
|
+
// Some Windows installs nest under a node_modules/npm with a lib prefix.
|
|
79
|
+
path.join(nodeBin, '..', 'node_modules', 'npm', 'bin', 'npm-cli.js'),
|
|
80
|
+
];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Resolve a portable, absolute spawn descriptor for `npm install`.
|
|
85
|
+
*
|
|
86
|
+
* Preferred result (strategy 'node-npm-cli'):
|
|
87
|
+
* { command: process.execPath, baseArgs: [<abs npm-cli.js>], shell: false }
|
|
88
|
+
* Run npm by feeding its JS entry point to the current node binary. No PATH
|
|
89
|
+
* dependency, no .cmd extension, no shell. Correct on Windows, Mac, Linux.
|
|
90
|
+
*
|
|
91
|
+
* Fallback result (strategy 'path-npm'):
|
|
92
|
+
* { command: 'npm', baseArgs: [], shell: true on win32 else false }
|
|
93
|
+
* Used only when npm-cli.js is not found off process.execPath. shell:true is
|
|
94
|
+
* set on Windows so the OS resolves `npm` -> `npm.cmd` (still better than the
|
|
95
|
+
* pre-fix bare spawn, though it remains PATH-dependent).
|
|
96
|
+
*
|
|
97
|
+
* @param {object} [opts]
|
|
98
|
+
* @param {string} [opts.execPath] - override process.execPath (tests)
|
|
99
|
+
* @returns {{command:string, baseArgs:string[], shell:boolean, strategy:string, npmCli:(string|null)}}
|
|
100
|
+
*/
|
|
101
|
+
function resolveNpmCli(opts) {
|
|
102
|
+
opts = opts || {};
|
|
103
|
+
const candidates = npmCliCandidates(opts.execPath);
|
|
104
|
+
for (const candidate of candidates) {
|
|
105
|
+
try {
|
|
106
|
+
if (fs.existsSync(candidate)) {
|
|
107
|
+
return {
|
|
108
|
+
command: opts.execPath || process.execPath,
|
|
109
|
+
baseArgs: [candidate],
|
|
110
|
+
shell: false,
|
|
111
|
+
strategy: 'node-npm-cli',
|
|
112
|
+
npmCli: candidate,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
} catch (_) {
|
|
116
|
+
// stat failure on a candidate -- try the next one.
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Fallback: npm-cli.js not locatable. PATH-based spawn, with Windows .cmd
|
|
120
|
+
// handling via shell:true. Still an improvement over the bare pre-fix call.
|
|
121
|
+
return {
|
|
122
|
+
command: 'npm',
|
|
123
|
+
baseArgs: [],
|
|
124
|
+
shell: IS_WINDOWS,
|
|
125
|
+
strategy: 'path-npm',
|
|
126
|
+
npmCli: null,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Build the full argv for `npm install` (production-safe, quiet, no scripts
|
|
132
|
+
* surprises) given a resolved descriptor from resolveNpmCli().
|
|
133
|
+
*
|
|
134
|
+
* @param {{baseArgs:string[]}} descriptor
|
|
135
|
+
* @param {string[]} [installArgs] - npm args after `install`; defaults to the
|
|
136
|
+
* quiet production set used by the heal path.
|
|
137
|
+
* @returns {string[]} argv to pass as the second arg of spawnSync(command, argv)
|
|
138
|
+
*/
|
|
139
|
+
function buildInstallArgs(descriptor, installArgs) {
|
|
140
|
+
const tail = Array.isArray(installArgs) && installArgs.length
|
|
141
|
+
? installArgs
|
|
142
|
+
: ['--no-audit', '--no-fund', '--silent'];
|
|
143
|
+
return descriptor.baseArgs.concat(['install'], tail);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
module.exports = {
|
|
147
|
+
resolveNpmCli,
|
|
148
|
+
buildInstallArgs,
|
|
149
|
+
npmCliCandidates,
|
|
150
|
+
IS_WINDOWS,
|
|
151
|
+
};
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/*
|
|
5
|
+
* Copyright (c) 2026 Mindrian. BSL 1.1.
|
|
6
|
+
*
|
|
7
|
+
* Regression tests for lib/core/npm-cli-resolve.cjs -- portable npm CLI
|
|
8
|
+
* resolution (debug session mcp-servers-cache-missing-node-modules, escalated
|
|
9
|
+
* mandate 2026-05-21).
|
|
10
|
+
*
|
|
11
|
+
* The prior Option D fix (commit f6cafe74) ran a bare `spawnSync('npm', ...)`
|
|
12
|
+
* that was DEAD on Windows (npm is npm.cmd) and FRAGILE on Mac (PATH gap for
|
|
13
|
+
* GUI-launched Claude Code). These tests lock the cross-platform contract:
|
|
14
|
+
*
|
|
15
|
+
* Test 1: resolveNpmCli on the test host returns the 'node-npm-cli' strategy
|
|
16
|
+
* and the resolved npm-cli.js actually exists.
|
|
17
|
+
* Test 2: the resolved command equals process.execPath (the current node
|
|
18
|
+
* binary) -- NOT the literal string 'npm'. This is what makes it
|
|
19
|
+
* PATH-independent.
|
|
20
|
+
* Test 3: WINDOWS layout -- npmCliCandidates for a win32-style execPath puts
|
|
21
|
+
* `<nodeBinDir>/node_modules/npm/bin/npm-cli.js` first (where the
|
|
22
|
+
* Windows node installer ships npm). No `.cmd` anywhere in the argv.
|
|
23
|
+
* Test 4: buildInstallArgs produces `[<npm-cli.js>] install --no-audit
|
|
24
|
+
* --no-fund --silent` for the node-npm-cli strategy.
|
|
25
|
+
* Test 5: the fallback descriptor (path-npm) sets shell:true ONLY on win32,
|
|
26
|
+
* so even the fallback carries Windows .cmd handling.
|
|
27
|
+
* Test 6: no em-dashes in npm-cli-resolve.cjs (HARD RULE).
|
|
28
|
+
* Test 7: the argv never contains the bare token 'npm' as command on the
|
|
29
|
+
* node-npm-cli strategy -- proves PATH is not relied upon.
|
|
30
|
+
*
|
|
31
|
+
* HARD RULE: no em-dashes.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
const assert = require('node:assert/strict');
|
|
35
|
+
const fs = require('node:fs');
|
|
36
|
+
const path = require('node:path');
|
|
37
|
+
|
|
38
|
+
const REPO_ROOT = path.resolve(__dirname, '..', '..');
|
|
39
|
+
const MODULE_PATH = path.join(REPO_ROOT, 'lib', 'core', 'npm-cli-resolve.cjs');
|
|
40
|
+
const { resolveNpmCli, buildInstallArgs, npmCliCandidates } = require(MODULE_PATH);
|
|
41
|
+
|
|
42
|
+
let passed = 0;
|
|
43
|
+
let failed = 0;
|
|
44
|
+
|
|
45
|
+
function ok(name) {
|
|
46
|
+
passed += 1;
|
|
47
|
+
process.stdout.write(' ok ' + name + '\n');
|
|
48
|
+
}
|
|
49
|
+
function fail(name, err) {
|
|
50
|
+
failed += 1;
|
|
51
|
+
process.stdout.write(' FAIL ' + name + '\n');
|
|
52
|
+
process.stdout.write(' ' + (err && err.message ? err.message : String(err)) + '\n');
|
|
53
|
+
}
|
|
54
|
+
function test(name, fn) {
|
|
55
|
+
try { fn(); ok(name); } catch (err) { fail(name, err); }
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Test 1 -- node-npm-cli strategy on the test host, npm-cli.js exists.
|
|
59
|
+
test('resolveNpmCli returns node-npm-cli strategy with a real npm-cli.js', () => {
|
|
60
|
+
const r = resolveNpmCli();
|
|
61
|
+
assert.equal(r.strategy, 'node-npm-cli', 'expected node-npm-cli strategy on a normal node install');
|
|
62
|
+
assert.ok(r.npmCli, 'npmCli path should be set');
|
|
63
|
+
assert.ok(fs.existsSync(r.npmCli), 'resolved npm-cli.js must exist on disk: ' + r.npmCli);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// Test 2 -- command is the node binary, not the literal 'npm'.
|
|
67
|
+
test('resolveNpmCli command is process.execPath (PATH-independent)', () => {
|
|
68
|
+
const r = resolveNpmCli();
|
|
69
|
+
assert.equal(r.command, process.execPath, 'command must be the current node binary');
|
|
70
|
+
assert.notEqual(r.command, 'npm', 'command must NOT be the bare PATH-dependent string npm');
|
|
71
|
+
assert.equal(r.shell, false, 'node-npm-cli strategy needs no shell');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// Test 3 -- Windows layout: npm-cli.js beside node.exe is the first candidate.
|
|
75
|
+
test('npmCliCandidates puts the Windows-layout path first', () => {
|
|
76
|
+
// path.win32 mirrors what require('path') resolves to on a real Windows host.
|
|
77
|
+
const winExec = 'C:\\Program Files\\nodejs\\node.exe';
|
|
78
|
+
const cands = npmCliCandidates(winExec);
|
|
79
|
+
assert.ok(Array.isArray(cands) && cands.length >= 3, 'expected >= 3 candidates');
|
|
80
|
+
// The first candidate is the <nodeBinDir>/node_modules/npm/bin/npm-cli.js
|
|
81
|
+
// layout -- exactly where the Windows node installer ships npm.
|
|
82
|
+
assert.ok(
|
|
83
|
+
cands[0].indexOf('node_modules') !== -1 && cands[0].indexOf('npm-cli.js') !== -1,
|
|
84
|
+
'first candidate must target node_modules/npm/bin/npm-cli.js'
|
|
85
|
+
);
|
|
86
|
+
// No candidate is a .cmd file -- we always run the JS entry point directly.
|
|
87
|
+
for (const c of cands) {
|
|
88
|
+
assert.ok(c.indexOf('.cmd') === -1, 'no candidate may be a .cmd batch file: ' + c);
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Test 4 -- buildInstallArgs argv shape for the node-npm-cli strategy.
|
|
93
|
+
test('buildInstallArgs produces [npm-cli.js] install ...quiet-flags', () => {
|
|
94
|
+
const r = resolveNpmCli();
|
|
95
|
+
const argv = buildInstallArgs(r);
|
|
96
|
+
assert.equal(argv[0], r.npmCli, 'first arg must be the npm-cli.js path');
|
|
97
|
+
assert.equal(argv[1], 'install', 'second arg must be install');
|
|
98
|
+
assert.deepEqual(
|
|
99
|
+
argv.slice(2),
|
|
100
|
+
['--no-audit', '--no-fund', '--silent'],
|
|
101
|
+
'default install args must be the quiet production set'
|
|
102
|
+
);
|
|
103
|
+
// A custom install-args override is honored.
|
|
104
|
+
const custom = buildInstallArgs(r, ['--omit=dev']);
|
|
105
|
+
assert.deepEqual(custom.slice(1), ['install', '--omit=dev']);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// Test 5 -- fallback descriptor: shell:true only on Windows.
|
|
109
|
+
test('path-npm fallback sets shell:true only on win32', () => {
|
|
110
|
+
// Force the fallback by pointing execPath at a freshly-created EMPTY temp
|
|
111
|
+
// directory tree deep enough that none of the candidate paths (including the
|
|
112
|
+
// `../lib/node_modules/...` POSIX-layout candidate) can escape onto a real
|
|
113
|
+
// system npm. The temp dir is isolated and known to contain no node_modules.
|
|
114
|
+
const os = require('node:os');
|
|
115
|
+
const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'mos-npm-resolve-test-'));
|
|
116
|
+
try {
|
|
117
|
+
// <tmpRoot>/x/y/z/bin/node -- candidates resolve under <tmpRoot>/x/y/z,
|
|
118
|
+
// which is empty, so resolveNpmCli must fall through to path-npm.
|
|
119
|
+
const fakeBin = path.join(tmpRoot, 'x', 'y', 'z', 'bin');
|
|
120
|
+
fs.mkdirSync(fakeBin, { recursive: true });
|
|
121
|
+
const r = resolveNpmCli({ execPath: path.join(fakeBin, 'node') });
|
|
122
|
+
assert.equal(r.strategy, 'path-npm', 'expected the path-npm fallback in an isolated empty tree');
|
|
123
|
+
assert.equal(r.command, 'npm', 'fallback command is the bare npm');
|
|
124
|
+
assert.equal(r.npmCli, null, 'fallback has no npmCli path');
|
|
125
|
+
assert.equal(
|
|
126
|
+
r.shell,
|
|
127
|
+
process.platform === 'win32',
|
|
128
|
+
'fallback shell must be true on Windows (npm.cmd handling) and false elsewhere'
|
|
129
|
+
);
|
|
130
|
+
} finally {
|
|
131
|
+
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// Test 6 -- HARD RULE: no em-dashes. The em-dash is referenced via its Unicode
|
|
136
|
+
// code point (U+2014) so this test file itself stays em-dash-clean.
|
|
137
|
+
test('npm-cli-resolve.cjs has no em-dashes', () => {
|
|
138
|
+
const src = fs.readFileSync(MODULE_PATH, 'utf8');
|
|
139
|
+
const EM_DASH = String.fromCharCode(0x2014);
|
|
140
|
+
assert.ok(src.indexOf(EM_DASH) === -1, 'em-dash found in npm-cli-resolve.cjs');
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// Test 7 -- the node-npm-cli argv never relies on a bare 'npm' token.
|
|
144
|
+
test('node-npm-cli argv does not depend on PATH resolution of npm', () => {
|
|
145
|
+
const r = resolveNpmCli();
|
|
146
|
+
const fullArgv = [r.command].concat(buildInstallArgs(r));
|
|
147
|
+
assert.notEqual(fullArgv[0], 'npm', 'command position must not be the bare npm token');
|
|
148
|
+
// The npm-cli.js path is absolute -- not resolved against PATH or cwd.
|
|
149
|
+
assert.ok(path.isAbsolute(r.npmCli), 'npm-cli.js path must be absolute');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
process.stdout.write('\nnpm-cli-resolve: ' + passed + ' passed, ' + failed + ' failed\n');
|
|
153
|
+
process.exit(failed === 0 ? 0 : 1);
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
/*
|
|
5
|
+
* Copyright (c) 2026 Mindrian. BSL 1.1.
|
|
6
|
+
*
|
|
7
|
+
* MindrianOS Plugin -- one-shot npm-install lock (Option D, hybrid self-heal).
|
|
8
|
+
*
|
|
9
|
+
* Purpose: when a fresh plugin cache lands with NO node_modules, BOTH bundled
|
|
10
|
+
* MCP servers (mindrian-brain + mindrian-os) can spawn at the same instant and
|
|
11
|
+
* each independently discover MODULE_NOT_FOUND. If both ran `npm install`
|
|
12
|
+
* concurrently in the same directory they would corrupt node_modules. This lock
|
|
13
|
+
* guarantees that exactly ONE process runs the install while the other WAITS
|
|
14
|
+
* for it to finish, then proceeds.
|
|
15
|
+
*
|
|
16
|
+
* This is deliberately NOT lib/core/write-lock.cjs. write-lock is room-scoped,
|
|
17
|
+
* SQLite-scoped, has a 5s stale threshold, and THROWS on contention. The
|
|
18
|
+
* npm-install path needs the opposite contract: a longer stale window (a cold
|
|
19
|
+
* `npm install` can take 30s+) and a BLOCKING wait, not a throw -- the loser of
|
|
20
|
+
* the race must sit still until node_modules is populated.
|
|
21
|
+
*
|
|
22
|
+
* CORRECTNESS FIXES (remote code review, 2026-05-21 -- folded into beta.23):
|
|
23
|
+
* - bug_004: lock creation is now ATOMIC via fs.linkSync (write a fully
|
|
24
|
+
* populated temp file, then atomically link it into place). The pre-fix
|
|
25
|
+
* openSync('wx') created a zero-byte file that a separate writeSync later
|
|
26
|
+
* populated -- a racing peer could read the empty file mid-write, treat it
|
|
27
|
+
* as corrupt, unlink the winner's live lock, and run a second concurrent
|
|
28
|
+
* install. readLock + waitForUnlock additionally distinguish a transient
|
|
29
|
+
* empty mid-write file from genuinely corrupt JSON.
|
|
30
|
+
* - bug_001: STALE_THRESHOLD_MS is raised strictly above the 120s install
|
|
31
|
+
* timeout, and the staleness checks use AND not OR -- a lock is reclaimed
|
|
32
|
+
* only when it is BOTH old AND its owning pid is dead. A healthy install
|
|
33
|
+
* legitimately running 90-120s is no longer declared abandoned.
|
|
34
|
+
*
|
|
35
|
+
* Canon Part 8: zero network surface in this file. Pure node built-ins. The
|
|
36
|
+
* `npm install` itself is run by the caller (mcp-dep-heal.cjs), not here.
|
|
37
|
+
*
|
|
38
|
+
* HARD RULE: no em-dashes anywhere in this file (hyphens only).
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
const fs = require('node:fs');
|
|
42
|
+
const path = require('node:path');
|
|
43
|
+
|
|
44
|
+
const LOCK_FILENAME = '.mindrian-npm-install.lock';
|
|
45
|
+
// A cold `npm install` of the plugin deps measured ~3s on a warm npm cache and
|
|
46
|
+
// can exceed 30s on a cold cache / slow disk. runGuardedInstall's spawnSync
|
|
47
|
+
// gives the install a 120000 ms (120s) timeout, so STALE_THRESHOLD_MS MUST sit
|
|
48
|
+
// strictly ABOVE 120s -- otherwise a healthy install still legitimately running
|
|
49
|
+
// at the 90-120s mark would be declared abandoned and a peer would start a
|
|
50
|
+
// SECOND concurrent install (bug_001). 180s gives 60s of headroom over the
|
|
51
|
+
// install timeout. Belt-and-suspenders: the staleness checks below also require
|
|
52
|
+
// pidAlive to be false (AND, not OR), so an old-but-live lock is never reclaimed.
|
|
53
|
+
const STALE_THRESHOLD_MS = 180 * 1000;
|
|
54
|
+
// How long the loser of the race waits for the winner before giving up and
|
|
55
|
+
// trying the install itself. Strictly above STALE so a genuine winner whose
|
|
56
|
+
// lock has just gone stale still gets reclaimed-and-retried, not double-run.
|
|
57
|
+
const WAIT_TIMEOUT_MS = 200 * 1000;
|
|
58
|
+
const POLL_INTERVAL_MS = 200;
|
|
59
|
+
// A mid-write lock file (created by openSync('wx') but not yet written by the
|
|
60
|
+
// follow-up writeSync) is briefly empty. readLock distinguishes that transient
|
|
61
|
+
// state from a genuinely corrupt file by polling a few short intervals before
|
|
62
|
+
// declaring corruption (bug_004 defence-in-depth alongside the atomic linkSync
|
|
63
|
+
// create path).
|
|
64
|
+
const EMPTY_FILE_RETRY_ATTEMPTS = 5;
|
|
65
|
+
const EMPTY_FILE_RETRY_INTERVAL_MS = 20;
|
|
66
|
+
|
|
67
|
+
function lockPath(dir) {
|
|
68
|
+
return path.join(dir, LOCK_FILENAME);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Portable synchronous short sleep (no extra dependency, works everywhere). */
|
|
72
|
+
function sleepSync(ms) {
|
|
73
|
+
try {
|
|
74
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
75
|
+
} catch (_) {
|
|
76
|
+
// SharedArrayBuffer unavailable in some sandboxes -- busy-wait instead.
|
|
77
|
+
const until = Date.now() + ms;
|
|
78
|
+
while (Date.now() < until) { /* spin */ }
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Read and parse a lock file.
|
|
84
|
+
*
|
|
85
|
+
* Returns one of three things so callers can distinguish a transient empty
|
|
86
|
+
* mid-write file from a genuinely corrupt one (bug_004):
|
|
87
|
+
* - the parsed lock object -> a valid, fully-written lock
|
|
88
|
+
* - the string 'EMPTY' -> the file exists but is empty / whitespace
|
|
89
|
+
* only after a few short retries; this is
|
|
90
|
+
* a mid-write race window OR a 0-byte
|
|
91
|
+
* leftover. Caller should retry, not
|
|
92
|
+
* assume the lock is dead.
|
|
93
|
+
* - null -> the file is missing, unreadable, or
|
|
94
|
+
* contains genuinely non-empty invalid
|
|
95
|
+
* JSON (truly corrupt -- safe to clear).
|
|
96
|
+
*
|
|
97
|
+
* The atomic linkSync create path in acquireInstallLock means a winner's lock
|
|
98
|
+
* is never observed mid-write in practice; this empty/corrupt distinction is
|
|
99
|
+
* defence-in-depth for any lock that arrived via a non-atomic path.
|
|
100
|
+
*
|
|
101
|
+
* @param {string} p - lock file path
|
|
102
|
+
* @returns {object|'EMPTY'|null}
|
|
103
|
+
*/
|
|
104
|
+
function readLock(p) {
|
|
105
|
+
for (let attempt = 0; attempt < EMPTY_FILE_RETRY_ATTEMPTS; attempt++) {
|
|
106
|
+
let raw;
|
|
107
|
+
try {
|
|
108
|
+
raw = fs.readFileSync(p, 'utf8');
|
|
109
|
+
} catch (_) {
|
|
110
|
+
return null; // missing or unreadable
|
|
111
|
+
}
|
|
112
|
+
if (raw.trim() === '') {
|
|
113
|
+
// Empty / whitespace-only: possibly a mid-write window. Retry a few
|
|
114
|
+
// short intervals before giving up.
|
|
115
|
+
if (attempt < EMPTY_FILE_RETRY_ATTEMPTS - 1) {
|
|
116
|
+
sleepSync(EMPTY_FILE_RETRY_INTERVAL_MS);
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
return 'EMPTY';
|
|
120
|
+
}
|
|
121
|
+
try {
|
|
122
|
+
return JSON.parse(raw);
|
|
123
|
+
} catch (_) {
|
|
124
|
+
// Non-empty but not valid JSON -- genuinely corrupt.
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return 'EMPTY';
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function pidAlive(pid) {
|
|
132
|
+
if (!pid || typeof pid !== 'number') return false;
|
|
133
|
+
try {
|
|
134
|
+
process.kill(pid, 0);
|
|
135
|
+
return true;
|
|
136
|
+
} catch (e) {
|
|
137
|
+
// EPERM means the process exists but is owned by another user -- still alive.
|
|
138
|
+
return e && e.code === 'EPERM';
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Whether a lock described by `data` is reclaimable as abandoned.
|
|
144
|
+
*
|
|
145
|
+
* bug_001 fix: this uses AND, not OR. A lock is reclaimed ONLY when it is BOTH
|
|
146
|
+
* stale (older than STALE_THRESHOLD_MS) AND its owning pid is genuinely dead.
|
|
147
|
+
* The pre-fix OR form let a peer unlink a LIVE lock the instant `age` crossed
|
|
148
|
+
* the (too-short) threshold, even though the install was still running -- two
|
|
149
|
+
* concurrent `npm install`s, corrupted node_modules. With AND, a long-but-live
|
|
150
|
+
* install keeps its lock no matter how old it gets; a dead-owner lock that has
|
|
151
|
+
* not yet aged out keeps its lock too (the owner may have only just died and a
|
|
152
|
+
* sibling could still be mid-handoff). Reclaim needs both signals.
|
|
153
|
+
*
|
|
154
|
+
* @param {object} data - parsed lock contents (must be a valid lock object)
|
|
155
|
+
* @returns {boolean}
|
|
156
|
+
*/
|
|
157
|
+
function isReclaimable(data) {
|
|
158
|
+
const age = Date.now() - (data.timestamp || 0);
|
|
159
|
+
return age > STALE_THRESHOLD_MS && !pidAlive(data.pid);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Try to acquire the install lock for `dir`.
|
|
164
|
+
*
|
|
165
|
+
* bug_004 fix: lock creation is ATOMIC. The payload is written to a uniquely
|
|
166
|
+
* named temp file FIRST (fully populated, then closed), and only then is
|
|
167
|
+
* fs.linkSync(tmp, p) used to publish it at the canonical lock path. linkSync
|
|
168
|
+
* is atomic and fails with EEXIST if the target already exists, so a winner's
|
|
169
|
+
* lock is ALWAYS observed fully-written -- there is no zero-byte mid-write
|
|
170
|
+
* window for a racing peer to misread as corrupt. The pre-fix openSync('wx')
|
|
171
|
+
* created a 0-byte file that the follow-up writeSync populated in a SEPARATE
|
|
172
|
+
* syscall; a peer racing in between read an empty file, treated it as corrupt,
|
|
173
|
+
* unlinked the winner's live lock, and both processes ran `npm install`.
|
|
174
|
+
*
|
|
175
|
+
* @param {string} dir - directory the install will run in (CLAUDE_PLUGIN_ROOT)
|
|
176
|
+
* @returns {boolean} true if THIS process now holds the lock (it should run the
|
|
177
|
+
* install), false if another live process holds it (this
|
|
178
|
+
* process should call waitForUnlock instead).
|
|
179
|
+
*/
|
|
180
|
+
function acquireInstallLock(dir) {
|
|
181
|
+
const p = lockPath(dir);
|
|
182
|
+
const tmp = p + '.' + process.pid + '.tmp';
|
|
183
|
+
const payload = JSON.stringify({ pid: process.pid, timestamp: Date.now() });
|
|
184
|
+
|
|
185
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
186
|
+
// Write the payload to a private temp file, fully, before publishing it.
|
|
187
|
+
try {
|
|
188
|
+
fs.writeFileSync(tmp, payload);
|
|
189
|
+
} catch (e) {
|
|
190
|
+
// Cannot even write a temp file (read-only dir, etc). Caller falls back
|
|
191
|
+
// to running the install unguarded -- better than not healing.
|
|
192
|
+
return true;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
try {
|
|
196
|
+
// Atomic publish: link is atomic and fails EEXIST if `p` already exists.
|
|
197
|
+
fs.linkSync(tmp, p);
|
|
198
|
+
// We won. The temp file has served its purpose; remove it.
|
|
199
|
+
try { fs.unlinkSync(tmp); } catch (_) {}
|
|
200
|
+
return true;
|
|
201
|
+
} catch (e) {
|
|
202
|
+
// Always drop our temp file before deciding what to do next.
|
|
203
|
+
try { fs.unlinkSync(tmp); } catch (_) {}
|
|
204
|
+
if (e.code !== 'EEXIST') {
|
|
205
|
+
// linkSync failed for a non-contention reason (filesystem without
|
|
206
|
+
// hardlink support, cross-device, permissions). Fall back to running
|
|
207
|
+
// the install unguarded -- better than not healing.
|
|
208
|
+
return true;
|
|
209
|
+
}
|
|
210
|
+
// The lock path is already held. Inspect it.
|
|
211
|
+
const data = readLock(p);
|
|
212
|
+
if (data === 'EMPTY') {
|
|
213
|
+
// Transient mid-write window (or a 0-byte leftover from a non-atomic
|
|
214
|
+
// path). Do NOT unlink -- a peer may be about to populate it. Wait a
|
|
215
|
+
// short interval and retry the acquire.
|
|
216
|
+
sleepSync(EMPTY_FILE_RETRY_INTERVAL_MS * EMPTY_FILE_RETRY_ATTEMPTS);
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
if (!data) {
|
|
220
|
+
// Genuinely corrupt (non-empty invalid JSON) or unreadable -- clear
|
|
221
|
+
// and retry.
|
|
222
|
+
try { fs.unlinkSync(p); } catch (_) {}
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
if (isReclaimable(data)) {
|
|
226
|
+
// Abandoned: BOTH stale AND its owner is dead. Reclaim it.
|
|
227
|
+
try { fs.unlinkSync(p); } catch (_) {}
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
// A live (or not-yet-reclaimable) process holds the lock -- this process
|
|
231
|
+
// is the loser and must wait for the winner.
|
|
232
|
+
return false;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
// Pathological churn -- give up the guard and let the caller install.
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/** Release the lock. Silent if it does not exist or is not ours. */
|
|
240
|
+
function releaseInstallLock(dir) {
|
|
241
|
+
const p = lockPath(dir);
|
|
242
|
+
try {
|
|
243
|
+
const data = readLock(p);
|
|
244
|
+
// Only skip the unlink when we can positively confirm the lock belongs to
|
|
245
|
+
// a DIFFERENT live process. 'EMPTY' (transient) or null (corrupt) -- there
|
|
246
|
+
// is no owner pid to compare, so fall through and clear it.
|
|
247
|
+
if (data && data !== 'EMPTY' && data.pid && data.pid !== process.pid) {
|
|
248
|
+
return; // not ours
|
|
249
|
+
}
|
|
250
|
+
fs.unlinkSync(p);
|
|
251
|
+
} catch (_) {
|
|
252
|
+
// ENOENT or other -- silent.
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Block until the lock for `dir` is released (winner finished its install),
|
|
258
|
+
* the lock goes stale, or WAIT_TIMEOUT_MS elapses.
|
|
259
|
+
*
|
|
260
|
+
* Synchronous by design: this runs at MCP server startup, before the server
|
|
261
|
+
* connects its transport, so a blocking spin is acceptable and correct.
|
|
262
|
+
*
|
|
263
|
+
* @param {string} dir
|
|
264
|
+
* @returns {boolean} true if the lock cleared (install presumably done),
|
|
265
|
+
* false on timeout.
|
|
266
|
+
*/
|
|
267
|
+
function waitForUnlock(dir) {
|
|
268
|
+
const p = lockPath(dir);
|
|
269
|
+
const deadline = Date.now() + WAIT_TIMEOUT_MS;
|
|
270
|
+
while (Date.now() < deadline) {
|
|
271
|
+
if (!fs.existsSync(p)) return true;
|
|
272
|
+
const data = readLock(p);
|
|
273
|
+
if (data === 'EMPTY') {
|
|
274
|
+
// bug_004 symmetric defect fix: an empty file is a transient mid-write
|
|
275
|
+
// window, NOT a cleared lock. The pre-fix `if (!data) return true` form
|
|
276
|
+
// declared the winner done the instant it saw an empty file -- the loser
|
|
277
|
+
// then ran its OWN install concurrently. Keep polling instead.
|
|
278
|
+
sleepSync(POLL_INTERVAL_MS);
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
if (!data) return true; // genuinely corrupt -- treat as cleared
|
|
282
|
+
// bug_001 fix: AND, not OR. Stop waiting only when the lock is BOTH stale
|
|
283
|
+
// AND its owner is dead. A long-but-live install keeps us waiting; we never
|
|
284
|
+
// race ahead with our own install while a healthy winner is still running.
|
|
285
|
+
if (isReclaimable(data)) return true;
|
|
286
|
+
// Poll a short slice via the portable synchronous sleep.
|
|
287
|
+
sleepSync(POLL_INTERVAL_MS);
|
|
288
|
+
}
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
module.exports = {
|
|
293
|
+
acquireInstallLock,
|
|
294
|
+
releaseInstallLock,
|
|
295
|
+
waitForUnlock,
|
|
296
|
+
readLock,
|
|
297
|
+
isReclaimable,
|
|
298
|
+
pidAlive,
|
|
299
|
+
LOCK_FILENAME,
|
|
300
|
+
STALE_THRESHOLD_MS,
|
|
301
|
+
WAIT_TIMEOUT_MS,
|
|
302
|
+
};
|