@mnemonik/scanner 5.120.1 → 5.131.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/doctor.ts ADDED
@@ -0,0 +1,239 @@
1
+ import { execFileSync } from 'child_process';
2
+ import { existsSync, readFileSync, readdirSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { homedir } from 'os';
5
+ import { fileURLToPath } from 'url';
6
+ import { pidIsScanner } from './pid.js';
7
+
8
+ /**
9
+ * `mnemonik-scanner doctor` — the guard that turns the prose upgrade procedure
10
+ * into an executable check. It asserts the invariants that every past botched
11
+ * reinstall violated, and prints the one blessed remediation. Any single ✗
12
+ * exits non-zero so a wrapper (or a human) can react.
13
+ *
14
+ * Invariants:
15
+ * 1. Exactly one `mnemonik-scanner` on PATH, inside the user npm prefix
16
+ * (a second copy under /usr is the classic PATH-shadow version skew).
17
+ * 2. If a systemd user unit exists, it is active and its ExecStart points at
18
+ * that same binary.
19
+ * 3. Exactly one live daemon, and under systemd its PID == the unit MainPID
20
+ * (a stray `node …/scanner/dist/index.js` is a duplicate local-build daemon).
21
+ * 4. No leftover legacy per-project daemons.
22
+ * 5. Installed version is not behind npm latest (warning, not failure).
23
+ */
24
+
25
+ const HOME = homedir();
26
+ const MNEMONIK_DIR = join(HOME, '.mnemonik');
27
+ const PID_FILE = join(MNEMONIK_DIR, 'daemon.pid');
28
+ const LEGACY_DAEMONS_DIR = join(MNEMONIK_DIR, 'daemons');
29
+ const UNIT_FILE = join(HOME, '.config/systemd/user/mnemonik-scanner.service');
30
+
31
+ type Level = 'ok' | 'warn' | 'fail';
32
+ interface Check {
33
+ level: Level;
34
+ label: string;
35
+ detail?: string;
36
+ }
37
+
38
+ function sh(cmd: string, args: string[], timeoutMs = 5000): string | null {
39
+ try {
40
+ return execFileSync(cmd, args, {
41
+ encoding: 'utf-8',
42
+ stdio: ['ignore', 'pipe', 'ignore'],
43
+ timeout: timeoutMs,
44
+ }).trim();
45
+ } catch {
46
+ return null;
47
+ }
48
+ }
49
+
50
+ function pidAlive(pid: number): boolean {
51
+ try {
52
+ process.kill(pid, 0);
53
+ } catch {
54
+ return false;
55
+ }
56
+ // Existence alone is not identity — after PID reuse the recorded number can
57
+ // belong to an unrelated process. A live-but-foreign PID is dead to us.
58
+ return pidIsScanner(pid);
59
+ }
60
+
61
+ function installedVersion(): string | null {
62
+ try {
63
+ const here = fileURLToPath(new URL('.', import.meta.url));
64
+ const pkg = JSON.parse(readFileSync(join(here, '..', 'package.json'), 'utf-8')) as {
65
+ version?: string;
66
+ };
67
+ return pkg.version ?? null;
68
+ } catch {
69
+ return null;
70
+ }
71
+ }
72
+
73
+ export async function runDoctor(): Promise<void> {
74
+ const checks: Check[] = [];
75
+
76
+ // 1. Binary uniqueness + location.
77
+ const prefix = sh('npm', ['config', 'get', 'prefix']);
78
+ const whichOut = sh('which', ['-a', 'mnemonik-scanner']);
79
+ const binPaths = whichOut
80
+ ? whichOut
81
+ .split('\n')
82
+ .map((s) => s.trim())
83
+ .filter(Boolean)
84
+ : [];
85
+ if (binPaths.length === 0) {
86
+ checks.push({
87
+ level: 'fail',
88
+ label: 'binary on PATH',
89
+ detail: 'mnemonik-scanner not found on PATH',
90
+ });
91
+ } else if (binPaths.length > 1) {
92
+ checks.push({
93
+ level: 'fail',
94
+ label: 'single binary on PATH',
95
+ detail: `found ${binPaths.length} copies (PATH shadow → version skew):\n ${binPaths.join('\n ')}`,
96
+ });
97
+ } else if (prefix && !binPaths[0]!.startsWith(prefix)) {
98
+ checks.push({
99
+ level: 'warn',
100
+ label: 'binary in user npm prefix',
101
+ detail: `${binPaths[0]} is not under npm prefix ${prefix}`,
102
+ });
103
+ } else {
104
+ checks.push({ level: 'ok', label: 'single binary on PATH', detail: binPaths[0] });
105
+ }
106
+ const binPath = binPaths.length === 1 ? binPaths[0]! : null;
107
+
108
+ // 2. systemd unit health + ExecStart match.
109
+ let unitMainPid: number | null = null;
110
+ const hasUnit = existsSync(UNIT_FILE);
111
+ if (hasUnit) {
112
+ const show = sh('systemctl', [
113
+ '--user',
114
+ 'show',
115
+ 'mnemonik-scanner',
116
+ '-p',
117
+ 'ActiveState',
118
+ '-p',
119
+ 'MainPID',
120
+ '-p',
121
+ 'ExecStart',
122
+ ]);
123
+ const active = /ActiveState=active/.test(show ?? '');
124
+ const mainPidMatch = /MainPID=(\d+)/.exec(show ?? '');
125
+ unitMainPid = mainPidMatch ? parseInt(mainPidMatch[1]!, 10) : null;
126
+ const execMatch = /path=([^\s;]+)/.exec(show ?? '');
127
+ const execPath = execMatch ? execMatch[1]! : null;
128
+
129
+ if (!active) {
130
+ checks.push({ level: 'fail', label: 'systemd unit active', detail: 'unit is not active' });
131
+ } else if (binPath && execPath && execPath !== binPath) {
132
+ checks.push({
133
+ level: 'fail',
134
+ label: 'ExecStart matches binary',
135
+ detail: `unit runs ${execPath} but PATH resolves ${binPath}`,
136
+ });
137
+ } else {
138
+ checks.push({
139
+ level: 'ok',
140
+ label: 'systemd unit active',
141
+ detail: `MainPID ${unitMainPid ?? '?'}`,
142
+ });
143
+ }
144
+ } else {
145
+ checks.push({
146
+ level: 'warn',
147
+ label: 'systemd unit',
148
+ detail: 'no user unit — daemon must be supervised another way',
149
+ });
150
+ }
151
+
152
+ // 3. Single live daemon; under systemd it must equal MainPID.
153
+ let filePid: number | null = null;
154
+ try {
155
+ filePid = parseInt(readFileSync(PID_FILE, 'utf-8').trim(), 10);
156
+ if (Number.isNaN(filePid)) filePid = null;
157
+ } catch {
158
+ filePid = null;
159
+ }
160
+ const psOut = sh('ps', ['-eo', 'pid,cmd']);
161
+ const daemonPids = (psOut ?? '')
162
+ .split('\n')
163
+ // Only the long-running `start` daemon counts — never a `doctor`/`status`/
164
+ // `log`/`stop` invocation (which also runs …/scanner/dist/index.js), and
165
+ // never this doctor process itself.
166
+ .filter((l) => /(mnemonik-scanner|scanner\/dist\/index\.js)/.test(l) && /\bstart\b/.test(l))
167
+ .map((l) => parseInt(l.trim().split(/\s+/)[0]!, 10))
168
+ .filter((n) => !Number.isNaN(n) && n !== process.pid);
169
+ if (daemonPids.length === 0) {
170
+ checks.push({ level: 'fail', label: 'daemon running', detail: 'no daemon process found' });
171
+ } else if (daemonPids.length > 1) {
172
+ checks.push({
173
+ level: 'fail',
174
+ label: 'single daemon',
175
+ detail: `${daemonPids.length} daemon processes (duplicate) — PIDs ${daemonPids.join(', ')}`,
176
+ });
177
+ } else if (hasUnit && unitMainPid && daemonPids[0] !== unitMainPid) {
178
+ checks.push({
179
+ level: 'fail',
180
+ label: 'daemon is the systemd one',
181
+ detail: `running PID ${daemonPids[0]} != unit MainPID ${unitMainPid} (stray local-build daemon)`,
182
+ });
183
+ } else if (filePid && !pidAlive(filePid)) {
184
+ checks.push({
185
+ level: 'warn',
186
+ label: 'PID file fresh',
187
+ detail: `stale ${PID_FILE} (PID ${filePid} dead)`,
188
+ });
189
+ } else {
190
+ checks.push({ level: 'ok', label: 'single daemon', detail: `PID ${daemonPids[0]}` });
191
+ }
192
+
193
+ // 4. Legacy per-project daemons.
194
+ try {
195
+ const legacy = readdirSync(LEGACY_DAEMONS_DIR)
196
+ .filter((f) => f.endsWith('.pid'))
197
+ .map((f) => parseInt(readFileSync(join(LEGACY_DAEMONS_DIR, f), 'utf-8').trim(), 10))
198
+ .filter((n) => !Number.isNaN(n) && pidAlive(n));
199
+ if (legacy.length > 0) {
200
+ checks.push({
201
+ level: 'fail',
202
+ label: 'no legacy daemons',
203
+ detail: `legacy per-project daemons alive: ${legacy.join(', ')}`,
204
+ });
205
+ }
206
+ } catch {
207
+ // dir absent — nothing to check
208
+ }
209
+
210
+ // 5. Version vs npm latest (informational).
211
+ const local = installedVersion();
212
+ const latest = sh('npm', ['view', '@mnemonik/scanner', 'version'], 15000);
213
+ if (local && latest && local !== latest) {
214
+ checks.push({
215
+ level: 'warn',
216
+ label: 'up to date',
217
+ detail: `installed ${local}, npm latest ${latest} — run: make scanner-deploy`,
218
+ });
219
+ } else if (local) {
220
+ checks.push({ level: 'ok', label: 'up to date', detail: `v${local}` });
221
+ }
222
+
223
+ // Report.
224
+ const icon = { ok: '✓', warn: '⚠', fail: '✗' } as const;
225
+ console.log('mnemonik-scanner doctor\n');
226
+ for (const c of checks) {
227
+ console.log(` ${icon[c.level]} ${c.label}${c.detail ? `: ${c.detail}` : ''}`);
228
+ }
229
+ const failed = checks.filter((c) => c.level === 'fail');
230
+ if (failed.length > 0) {
231
+ console.log(
232
+ `\n${failed.length} problem(s). Canonical fix: \`make scanner-deploy\` (build → publish shared-first → npm i -g → systemctl restart → re-verify).\n` +
233
+ 'Do NOT hand-copy dist/ or run `make daemon-*` on a systemd host — those are what cause this.'
234
+ );
235
+ process.exit(1);
236
+ }
237
+ console.log('\nAll invariants hold.');
238
+ process.exit(0);
239
+ }
package/src/fileLog.ts ADDED
@@ -0,0 +1,67 @@
1
+ import { appendFileSync, renameSync, statSync } from 'fs';
2
+
3
+ /**
4
+ * Append-only sink that mirrors the daemon's console output to a file the
5
+ * `mnemonik-scanner log` command can tail. Rotates to `<file>.old` once the
6
+ * file would pass `maxSize`, so a daemon that runs for weeks can't grow it
7
+ * without bound. Every operation is best-effort: a logging failure must never
8
+ * crash the daemon, so all filesystem errors are swallowed.
9
+ */
10
+ export function createRotatingFileSink(logFile: string, maxSize: number): (chunk: string) => void {
11
+ let size = 0;
12
+ try {
13
+ size = statSync(logFile).size;
14
+ } catch {
15
+ size = 0;
16
+ }
17
+
18
+ return (chunk: string): void => {
19
+ if (!chunk) return;
20
+ try {
21
+ const bytes = Buffer.byteLength(chunk);
22
+ if (size > 0 && size + bytes > maxSize) {
23
+ try {
24
+ renameSync(logFile, logFile + '.old');
25
+ } catch {
26
+ // Rotation failed (e.g. cross-device) — keep appending to the
27
+ // current file rather than losing the line.
28
+ }
29
+ size = 0;
30
+ }
31
+ appendFileSync(logFile, chunk);
32
+ size += bytes;
33
+ } catch {
34
+ // Never let logging break the daemon.
35
+ }
36
+ };
37
+ }
38
+
39
+ /**
40
+ * Tee `process.stdout`/`process.stderr` into `logFile` on top of their normal
41
+ * destination. This is what makes `mnemonik-scanner log` reflect live activity
42
+ * regardless of how the daemon is supervised: under systemd stdout is wired to
43
+ * the journal (a socket), under a bare shell to the terminal — either way the
44
+ * file now receives the same lines. Call once at daemon start, before the
45
+ * daemon writes anything worth capturing.
46
+ */
47
+ export function installFileLogging(logFile: string, maxSize: number): void {
48
+ const sink = createRotatingFileSink(logFile, maxSize);
49
+
50
+ for (const stream of [process.stdout, process.stderr]) {
51
+ const original = stream.write.bind(stream) as (...args: unknown[]) => boolean;
52
+ stream.write = ((chunk: unknown, encoding?: unknown, cb?: unknown): boolean => {
53
+ try {
54
+ if (typeof chunk === 'string') {
55
+ sink(chunk);
56
+ } else if (Buffer.isBuffer(chunk)) {
57
+ const enc = typeof encoding === 'string' ? (encoding as BufferEncoding) : 'utf8';
58
+ sink(chunk.toString(enc));
59
+ }
60
+ } catch {
61
+ // ignore — mirroring is best-effort
62
+ }
63
+ // Preserve the real stream's overloads (chunk, cb) / (chunk, enc, cb).
64
+ return original(chunk, encoding, cb);
65
+ }) as typeof stream.write;
66
+ }
67
+ }
package/src/index.ts CHANGED
@@ -5,6 +5,9 @@ import { existsSync } from 'fs';
5
5
  import { join } from 'path';
6
6
  import { homedir } from 'os';
7
7
  import { ScannerDaemon } from './daemon.js';
8
+ import { installFileLogging } from './fileLog.js';
9
+ import { runDoctor } from './doctor.js';
10
+ import { pidIsScanner } from './pid.js';
8
11
 
9
12
  const DEFAULT_SERVER = 'https://api.mnemonik.dev';
10
13
  const MNEMONIK_DIR = join(homedir(), '.mnemonik');
@@ -21,7 +24,7 @@ interface ScannerConfig {
21
24
  }
22
25
 
23
26
  interface CliArgs {
24
- command: 'start' | 'stop' | 'status' | 'log' | 'help';
27
+ command: 'start' | 'stop' | 'status' | 'log' | 'doctor' | 'help';
25
28
  key?: string;
26
29
  server?: string;
27
30
  roots?: string[];
@@ -31,7 +34,7 @@ function parseCliArgs(): CliArgs {
31
34
  const args = process.argv.slice(2);
32
35
  const command = (args[0] ?? 'help') as CliArgs['command'];
33
36
 
34
- if (!['start', 'stop', 'status', 'log', 'help'].includes(command)) {
37
+ if (!['start', 'stop', 'status', 'log', 'doctor', 'help'].includes(command)) {
35
38
  return { command: 'help' };
36
39
  }
37
40
 
@@ -81,18 +84,6 @@ async function checkConfigPermissions(): Promise<void> {
81
84
  }
82
85
  }
83
86
 
84
- async function rotateLogIfNeeded(): Promise<void> {
85
- try {
86
- const { rename } = await import('fs/promises');
87
- const s = await stat(LOG_FILE);
88
- if (s.size > MAX_LOG_SIZE) {
89
- await rename(LOG_FILE, LOG_FILE + '.old');
90
- }
91
- } catch {
92
- // Log file doesn't exist yet
93
- }
94
- }
95
-
96
87
  async function acquireLock(retried = false): Promise<boolean> {
97
88
  try {
98
89
  const { open: fsOpen } = await import('fs/promises');
@@ -113,7 +104,12 @@ async function acquireLock(retried = false): Promise<boolean> {
113
104
  const pid = parseInt(existing.trim(), 10);
114
105
  try {
115
106
  process.kill(pid, 0);
116
- return false; // Process alive, lock is valid
107
+ // Alive is not enough after PID reuse the number can belong to an
108
+ // unrelated process. Only an actual scanner holds the lock.
109
+ if (pidIsScanner(pid)) {
110
+ return false; // Process alive and is the scanner — lock is valid
111
+ }
112
+ // Live but foreign PID — treat as stale and reclaim below.
117
113
  } catch {
118
114
  // Holder is dead — remove stale lock and retry once
119
115
  }
@@ -136,10 +132,13 @@ async function readPid(): Promise<number | null> {
136
132
  // Check if process is actually alive
137
133
  try {
138
134
  process.kill(pid, 0);
139
- return pid;
140
135
  } catch {
141
136
  return null; // Stale PID
142
137
  }
138
+ // Alive but foreign (PID reuse) is just as stale — never report it as
139
+ // the daemon, and never let handleStop() SIGTERM it.
140
+ if (!pidIsScanner(pid)) return null;
141
+ return pid;
143
142
  } catch {
144
143
  return null;
145
144
  }
@@ -154,6 +153,7 @@ Usage:
154
153
  mnemonik-scanner stop Stop the running daemon
155
154
  mnemonik-scanner status Show daemon status
156
155
  mnemonik-scanner log Tail the scanner log file
156
+ mnemonik-scanner doctor Check install health (drift detection)
157
157
  mnemonik-scanner help Show this help
158
158
 
159
159
  Options (for start):
@@ -225,6 +225,13 @@ async function handleStart(cli: CliArgs): Promise<void> {
225
225
  // Ensure directories exist
226
226
  await mkdir(MNEMONIK_DIR, { recursive: true });
227
227
 
228
+ // Mirror all console output into scanner.log so `mnemonik-scanner log`
229
+ // reflects live activity regardless of supervisor. Under systemd, stdout is
230
+ // wired to the journal socket, not this file — without the tee the on-disk
231
+ // log goes stale and `log` shows nothing current. Install before any daemon
232
+ // output so the whole session is captured.
233
+ installFileLogging(LOG_FILE, MAX_LOG_SIZE);
234
+
228
235
  // Save config for future runs (never save env var key to file)
229
236
  const configToSave: ScannerConfig = {
230
237
  roots,
@@ -240,8 +247,6 @@ async function handleStart(cli: CliArgs): Promise<void> {
240
247
  await writeConfig(configToSave);
241
248
  await checkConfigPermissions();
242
249
 
243
- await rotateLogIfNeeded();
244
-
245
250
  const locked = await acquireLock();
246
251
  if (!locked) {
247
252
  const pid = await readPid();
@@ -342,7 +347,11 @@ async function handleStatus(): Promise<void> {
342
347
  const oldPid = parseInt(content.trim(), 10);
343
348
  try {
344
349
  process.kill(oldPid, 0);
345
- staleOldDaemons.push(`${f} (PID ${oldPid})`);
350
+ // Same PID-reuse guard as readPid(): only report genuinely live
351
+ // scanner processes, not whatever now occupies a recycled PID.
352
+ if (pidIsScanner(oldPid)) {
353
+ staleOldDaemons.push(`${f} (PID ${oldPid})`);
354
+ }
346
355
  } catch {
347
356
  // Dead process, just a stale file
348
357
  }
@@ -388,6 +397,9 @@ async function main(): Promise<void> {
388
397
  case 'log':
389
398
  await handleLog();
390
399
  break;
400
+ case 'doctor':
401
+ await runDoctor();
402
+ break;
391
403
  case 'help':
392
404
  default:
393
405
  printHelp();
package/src/pid.ts ADDED
@@ -0,0 +1,37 @@
1
+ import { execFileSync } from 'child_process';
2
+ import { readFileSync } from 'fs';
3
+
4
+ /**
5
+ * PID-reuse guard shared by the CLI lock paths (index.ts) and doctor.
6
+ *
7
+ * `process.kill(pid, 0)` only proves that *some* process is alive at that
8
+ * PID, not that it is ours. After a reboot or plain PID recycling the number
9
+ * in daemon.pid can belong to an unrelated process — SIGTERMing it from
10
+ * `stop`, or refusing to `start` because of it, would be wrong. So a live PID
11
+ * only counts as the scanner when its command line matches the daemon's
12
+ * signature (same pattern doctor uses to find daemon processes via `ps`).
13
+ */
14
+ const SCANNER_CMD_PATTERN = /(mnemonik-scanner|scanner\/dist\/index\.js)/;
15
+
16
+ function processCmdline(pid: number): string | null {
17
+ // Linux: /proc/<pid>/cmdline is NUL-separated argv — cheap and exact.
18
+ try {
19
+ return readFileSync(`/proc/${pid}/cmdline`, 'utf-8').replace(/\0/g, ' ');
20
+ } catch {
21
+ // /proc unavailable (macOS) or unreadable — fall back to ps below.
22
+ }
23
+ try {
24
+ return execFileSync('ps', ['-o', 'args=', '-p', String(pid)], {
25
+ encoding: 'utf-8',
26
+ stdio: ['ignore', 'pipe', 'ignore'],
27
+ timeout: 5000,
28
+ }).trim();
29
+ } catch {
30
+ return null; // No such process, or ps unavailable — cannot confirm identity.
31
+ }
32
+ }
33
+
34
+ export function pidIsScanner(pid: number): boolean {
35
+ const cmdline = processCmdline(pid);
36
+ return cmdline !== null && SCANNER_CMD_PATTERN.test(cmdline);
37
+ }
package/src/watcher.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { watch, type FSWatcher } from 'fs';
2
2
  import { join, relative } from 'path';
3
- import { readdir } from 'fs/promises';
3
+ import { readdir, stat } from 'fs/promises';
4
4
  import { isGitBoundary } from '@mnemonik/shared';
5
5
 
6
6
  const SKIP_DIRS = new Set([
@@ -27,7 +27,15 @@ export type ChangeHandler = (changedFiles: string[]) => void;
27
27
  export type ErrorHandler = (err: Error) => void;
28
28
 
29
29
  export class FileWatcher {
30
- private watchers: FSWatcher[] = [];
30
+ // Keyed by directory so a watcher can be closed and re-attached when the
31
+ // directory is deleted and recreated at the same path.
32
+ private watchers = new Map<string, FSWatcher>();
33
+ private watchedDirs = new Set<string>();
34
+ // Inode per watched directory: a delete+recreate faster than the delete
35
+ // event's stat() presents as "directory exists and is already watched",
36
+ // but the live watcher is bound to the OLD inode and is inert. Comparing
37
+ // inodes at event time detects the swap so the subtree can re-attach.
38
+ private watchedDirInodes = new Map<string, number>();
31
39
  private pendingFiles = new Set<string>();
32
40
  private flushTimer: ReturnType<typeof setTimeout> | null = null;
33
41
  private debounceMs: number;
@@ -49,10 +57,12 @@ export class FileWatcher {
49
57
  }
50
58
 
51
59
  stop(): void {
52
- for (const w of this.watchers) {
60
+ for (const w of this.watchers.values()) {
53
61
  w.close();
54
62
  }
55
- this.watchers = [];
63
+ this.watchers.clear();
64
+ this.watchedDirs.clear();
65
+ this.watchedDirInodes.clear();
56
66
  if (this.flushTimer) {
57
67
  clearTimeout(this.flushTimer);
58
68
  this.flushTimer = null;
@@ -74,14 +84,29 @@ export class FileWatcher {
74
84
  private async watchDir(dir: string): Promise<void> {
75
85
  const dirName = dir.split('/').pop() ?? '';
76
86
  if (SKIP_DIRS.has(dirName)) return;
87
+ // Dedup guard — the change callback re-enters watchDir for new
88
+ // subdirectories, and a rapid create/rename burst can resolve the same
89
+ // path twice before the first watch is registered.
90
+ if (this.watchedDirs.has(dir)) return;
91
+ this.watchedDirs.add(dir);
77
92
 
78
93
  try {
79
- const watcher = watch(dir, { persistent: true }, (_event, filename) => {
94
+ // Record the inode BEFORE attaching so watchNewDir can distinguish a
95
+ // benign event on this directory from a recreate-at-same-path.
96
+ this.watchedDirInodes.set(dir, (await stat(dir)).ino);
97
+ const watcher = watch(dir, { persistent: true }, (event, filename) => {
80
98
  if (!filename) return;
81
99
  const fullPath = join(dir, filename);
82
100
  const relPath = relative(this.rootPath, fullPath);
83
101
  this.pendingFiles.add(relPath);
84
102
  this.scheduleFlush();
103
+ // fs.watch is non-recursive: the initial recursion below only covers
104
+ // directories that existed at start(). When this event is a newly
105
+ // created directory, attach a watcher to it too — otherwise the
106
+ // subtree is a permanent blind spot until restart. Fire-and-forget;
107
+ // watchNewDir no-ops for plain files, skip dirs, git boundaries, and
108
+ // already-watched paths.
109
+ void this.watchNewDir(fullPath, event);
85
110
  });
86
111
 
87
112
  watcher.on('error', (err) => {
@@ -91,7 +116,7 @@ export class FileWatcher {
91
116
  }
92
117
  });
93
118
 
94
- this.watchers.push(watcher);
119
+ this.watchers.set(dir, watcher);
95
120
 
96
121
  const entries = await readdir(dir, { withFileTypes: true });
97
122
  for (const entry of entries) {
@@ -104,8 +129,91 @@ export class FileWatcher {
104
129
  await this.watchDir(child);
105
130
  }
106
131
  }
132
+ } catch (err) {
133
+ // Watch registration failed. Never fatal for the subtree's parent, but
134
+ // the *reason* matters: inotify/fd limit exhaustion means silently
135
+ // growing blind spots, while permission-denied is a benign property of
136
+ // the directory itself. Drop the bookkeeping (and any watcher that did
137
+ // attach before the failure) so a later retry can re-enter cleanly.
138
+ this.watchedDirs.delete(dir);
139
+ this.watchedDirInodes.delete(dir);
140
+ const stale = this.watchers.get(dir);
141
+ if (stale) {
142
+ stale.close();
143
+ this.watchers.delete(dir);
144
+ }
145
+ const code = (err as NodeJS.ErrnoException).code;
146
+ if (code === 'ENOSPC' || code === 'EMFILE' || code === 'ENFILE') {
147
+ console.warn(
148
+ `[scanner] Watch limit reached (${code}) — subtree unwatched: ${dir}. ` +
149
+ 'Raise fs.inotify.max_user_watches or trim scanner roots.'
150
+ );
151
+ // Surface to the daemon only for the root — losing the root means the
152
+ // whole project is blind; a subtree gap must not tear the project down
153
+ // (the daemon's onError removes the project entirely).
154
+ if (dir === this.rootPath && this.onError) {
155
+ this.onError(err as Error);
156
+ }
157
+ } else {
158
+ // Permission denied or inaccessible directory
159
+ console.warn(`[scanner] Cannot watch ${dir}: ${(err as Error).message}`);
160
+ }
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Attach a watcher to a directory created after start(). Called from the
166
+ * per-directory change callback with every event path; stats the path and
167
+ * only recurses when it is a genuinely new, watchable directory.
168
+ */
169
+ private async watchNewDir(fullPath: string, eventType: string): Promise<void> {
170
+ try {
171
+ // Stat before the dedup check: a delete event arrives with the same
172
+ // path as the original create, and the stale watchedDirs entry must
173
+ // not short-circuit the vanish detection below.
174
+ const s = await stat(fullPath);
175
+ if (!s.isDirectory()) return;
176
+ if (this.watchedDirs.has(fullPath)) {
177
+ // Already watched — but a delete+recreate faster than this stat
178
+ // presents exactly like this, with the live watcher bound to the
179
+ // OLD (dead) inode and inert. A 'rename' event means the entry's
180
+ // identity changed (created / deleted / moved), so re-attach
181
+ // unconditionally — comparing inodes is NOT sufficient there
182
+ // because ext4 routinely hands the freed inode straight back to
183
+ // the recreated directory. 'change' events are attrib noise (a
184
+ // write inside the child bumps its mtime, which fires on the
185
+ // parent), so the cheap inode check keeps those churn-free.
186
+ if (eventType !== 'rename' && this.watchedDirInodes.get(fullPath) === s.ino) return;
187
+ this.unwatchSubtree(fullPath);
188
+ }
189
+ if (await isGitBoundary(fullPath)) return;
190
+ await this.watchDir(fullPath);
107
191
  } catch {
108
- // Permission denied or inaccessible directory
192
+ // Path vanished between event and stat. If it (or anything under it)
193
+ // was a watched directory, drop the bookkeeping and close the dead
194
+ // watchers — otherwise the dedup guards in watchDir/watchNewDir block
195
+ // re-attachment forever when the path is recreated (build-output
196
+ // wipes, codegen, rm -rf && mkdir).
197
+ this.unwatchSubtree(fullPath);
198
+ }
199
+ }
200
+
201
+ /**
202
+ * Forget a deleted directory and everything watched beneath it. fs.watch
203
+ * emits no per-descendant events on a recursive delete, so the whole
204
+ * prefix must be purged here for a recreate to re-watch the full subtree.
205
+ */
206
+ private unwatchSubtree(root: string): void {
207
+ const prefix = root + '/';
208
+ for (const dir of this.watchedDirs) {
209
+ if (dir !== root && !dir.startsWith(prefix)) continue;
210
+ this.watchedDirs.delete(dir);
211
+ this.watchedDirInodes.delete(dir);
212
+ const w = this.watchers.get(dir);
213
+ if (w) {
214
+ w.close();
215
+ this.watchers.delete(dir);
216
+ }
109
217
  }
110
218
  }
111
219
  }