moflo 4.10.7 → 4.10.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.claude/guidance/shipped/moflo-cli-reference.md +1 -1
  2. package/.claude/guidance/shipped/moflo-memory-strategy.md +1 -1
  3. package/.claude/guidance/shipped/moflo-yaml-reference.md +4 -4
  4. package/.claude/skills/memory-optimization/SKILL.md +1 -1
  5. package/.claude/skills/memory-patterns/SKILL.md +3 -3
  6. package/.claude/skills/vector-search/SKILL.md +2 -2
  7. package/README.md +5 -5
  8. package/bin/hooks.mjs +3 -2
  9. package/bin/index-all.mjs +3 -2
  10. package/bin/index-guidance.mjs +4 -4
  11. package/bin/lib/daemon-port.mjs +66 -0
  12. package/bin/lib/process-manager.mjs +3 -3
  13. package/dist/src/cli/commands/daemon.js +31 -10
  14. package/dist/src/cli/commands/doctor-checks-config.js +182 -10
  15. package/dist/src/cli/commands/doctor-fixes.js +208 -3
  16. package/dist/src/cli/commands/doctor-registry.js +16 -1
  17. package/dist/src/cli/commands/memory.js +8 -8
  18. package/dist/src/cli/commands/neural.js +8 -6
  19. package/dist/src/cli/config/moflo-config.js +68 -3
  20. package/dist/src/cli/index.js +18 -19
  21. package/dist/src/cli/init/moflo-yaml-template.js +1 -1
  22. package/dist/src/cli/mcp-server.js +59 -10
  23. package/dist/src/cli/mcp-tools/memory-tools.js +46 -27
  24. package/dist/src/cli/memory/auto-memory-bridge.js +1 -1
  25. package/dist/src/cli/memory/controllers/attestation-log.js +1 -1
  26. package/dist/src/cli/memory/controllers/causal-graph.js +1 -1
  27. package/dist/src/cli/memory/daemon-write-client.js +178 -49
  28. package/dist/src/cli/memory/database-provider.js +58 -3
  29. package/dist/src/cli/memory/intelligence.js +54 -26
  30. package/dist/src/cli/memory/memory-initializer.js +21 -11
  31. package/dist/src/cli/movector/model-router.js +1 -1
  32. package/dist/src/cli/movector/q-learning-router.js +2 -2
  33. package/dist/src/cli/services/daemon-dashboard.js +94 -25
  34. package/dist/src/cli/services/daemon-lock.js +390 -3
  35. package/dist/src/cli/services/daemon-port.js +252 -0
  36. package/dist/src/cli/version.js +1 -1
  37. package/package.json +2 -2
  38. package/dist/src/cli/config-adapter.js +0 -182
@@ -9,9 +9,11 @@
9
9
  * and verifying the process command line before trusting a "live" PID.
10
10
  */
11
11
  import * as fs from 'fs';
12
- import { dirname, join } from 'path';
12
+ import { dirname, join, sep } from 'path';
13
13
  import { fileURLToPath } from 'url';
14
- import { execSync } from 'child_process';
14
+ import { execFileSync, execSync } from 'child_process';
15
+ import { atomicWriteFileSync } from '../shared/utils/atomic-file-write.js';
16
+ import { normalizeProjectRoot } from './daemon-port.js';
15
17
  const LOCK_FILENAME = 'daemon.lock';
16
18
  const LOCK_LABEL = 'moflo-daemon';
17
19
  /** Resolve the lock file path for a project root. */
@@ -48,6 +50,18 @@ export function readOwnMofloVersion() {
48
50
  /**
49
51
  * Try to acquire the daemon lock atomically.
50
52
  *
53
+ * Before the EEXIST atomic write, runs a same-project orphan scan (#1150):
54
+ * enumerates moflo daemon processes whose command line is rooted at THIS
55
+ * project's CLI binary. If any are found that the lock doesn't account for,
56
+ * they get SIGTERM'd (3s graceful → SIGKILL) before we try to acquire.
57
+ * Catches the failure mode where the lock was unlinked (e.g. by an old
58
+ * doctor-fix or a crashed shutdown handler) but the daemon process is still
59
+ * alive — without this scan, a fresh daemon would spawn alongside it.
60
+ *
61
+ * Tests can opt out of the scan via `MOFLO_TEST_SKIP_ORPHAN_SCAN=1` (the same
62
+ * env-var also disables the post-spawn fallback in #1086 so vitest workers
63
+ * don't pay the 8s Windows introspection cost on every acquire).
64
+ *
51
65
  * @returns `{ acquired: true }` on success,
52
66
  * `{ acquired: false, holder: pid }` if another daemon owns the lock.
53
67
  */
@@ -58,6 +72,14 @@ export function acquireDaemonLock(projectRoot, pid = process.pid) {
58
72
  if (!fs.existsSync(stateDir)) {
59
73
  fs.mkdirSync(stateDir, { recursive: true });
60
74
  }
75
+ // #1150 — same-project orphan scan. Runs BEFORE the atomic write because
76
+ // (a) it lets us reclaim the lock after a crash that left the daemon
77
+ // running but the lock unlinked, and
78
+ // (b) the second-spawn case (lock absent, prior daemon alive) is exactly
79
+ // the failure mode that produced two-daemons-per-project in #1145's
80
+ // waxstack audit.
81
+ const lockHolderPid = readLockPayload(lock)?.pid;
82
+ reapSameProjectOrphans(projectRoot, pid, lockHolderPid);
61
83
  const payload = {
62
84
  pid,
63
85
  startedAt: Date.now(),
@@ -108,6 +130,48 @@ export function releaseDaemonLock(projectRoot, pid = process.pid, force = false)
108
130
  safeUnlink(lock);
109
131
  }
110
132
  }
133
+ /**
134
+ * Stamp the daemon's bound HTTP port into the lock file (#1145).
135
+ *
136
+ * Called by `daemon-dashboard.startDashboard()` after a successful bind so
137
+ * clients can read the actual port (vs. guessing the fixed default and
138
+ * silently hitting another project's daemon).
139
+ *
140
+ * Best-effort by design:
141
+ * - Missing lock → no-op (the daemon didn't acquire the lock; this is
142
+ * a test or unusual startup path).
143
+ * - Lock owned by a different PID → no-op (we don't overwrite locks we
144
+ * don't own).
145
+ * - Write failure → swallowed (the daemon still serves; clients fall
146
+ * back to the deterministic port resolution).
147
+ *
148
+ * Returns `true` on a successful stamp, `false` otherwise. The boolean is
149
+ * informational — production callers don't branch on it.
150
+ */
151
+ export function writeLockPort(projectRoot, port, pid = process.pid) {
152
+ if (!Number.isFinite(port) || port < 1 || port > 65535)
153
+ return false;
154
+ const lock = lockPath(projectRoot);
155
+ const existing = readLockPayload(lock);
156
+ if (!existing)
157
+ return false;
158
+ if (existing.pid !== pid)
159
+ return false;
160
+ if (existing.port === port)
161
+ return true;
162
+ const updated = { ...existing, port };
163
+ try {
164
+ // Atomic write-then-rename: a client reading mid-write never sees a
165
+ // truncated JSON. The vulnerable window is precisely a re-stamp after
166
+ // a daemon recycle on a different port, when clients are likeliest
167
+ // to be probing the lock for the new port.
168
+ atomicWriteFileSync(lock, JSON.stringify(updated));
169
+ return true;
170
+ }
171
+ catch {
172
+ return false;
173
+ }
174
+ }
111
175
  /**
112
176
  * Atomically transfer the daemon lock to a new PID (e.g. parent → child).
113
177
  *
@@ -125,6 +189,9 @@ export function transferDaemonLock(projectRoot, newPid, fromPid = process.pid) {
125
189
  startedAt: Date.now(),
126
190
  label: LOCK_LABEL,
127
191
  version: existing.version ?? readOwnMofloVersion(),
192
+ // Preserve the port field across PID transfers (#1145) — the child
193
+ // process inherits the parent's binding, so the port is still valid.
194
+ ...(existing.port != null ? { port: existing.port } : {}),
128
195
  };
129
196
  try {
130
197
  // Atomic overwrite — no unlink/recreate gap
@@ -214,11 +281,33 @@ function safeUnlink(path) {
214
281
  function isProcessAlive(pid) {
215
282
  try {
216
283
  process.kill(pid, 0);
217
- return true;
218
284
  }
219
285
  catch {
220
286
  return false;
221
287
  }
288
+ // Linux zombie handling: `kill(pid, 0)` succeeds for zombie processes
289
+ // (exited but not yet reaped). A zombie can't write to the DB or hold
290
+ // a lock, so treating it as alive exhausts the kill window polling a
291
+ // corpse. Read /proc/<pid>/stat and treat 'Z' as dead — same logic as
292
+ // bin/lib/daemon-recycler.mjs:51-69. The case surfaces in tests AND
293
+ // in any production path where the daemon and our process share a
294
+ // parent (foreground mode, vitest worker that spawned a child); on
295
+ // standard detached-daemon production paths init reaps so this is a
296
+ // no-op there.
297
+ if (process.platform === 'linux') {
298
+ try {
299
+ const stat = fs.readFileSync(`/proc/${pid}/stat`, 'utf-8');
300
+ const lastParen = stat.lastIndexOf(')');
301
+ if (lastParen !== -1 && stat.charAt(lastParen + 2) === 'Z')
302
+ return false;
303
+ }
304
+ catch (err) {
305
+ if (err && err.code === 'ENOENT')
306
+ return false;
307
+ // /proc unavailable — fall through with the kill(0) verdict.
308
+ }
309
+ }
310
+ return true;
222
311
  }
223
312
  /**
224
313
  * Cross-platform check: is this PID actually a moflo/claude-flow daemon?
@@ -293,4 +382,302 @@ function isDaemonProcessUnix(pid) {
293
382
  return true; // fallback
294
383
  }
295
384
  }
385
+ // ---------------------------------------------------------------------------
386
+ // Same-project orphan detection (#1150)
387
+ // ---------------------------------------------------------------------------
388
+ /**
389
+ * Enumerate moflo daemon node processes whose command line is rooted at THIS
390
+ * project's CLI binary (consumer install OR dogfood-source).
391
+ *
392
+ * Returns PIDs. Used by `acquireDaemonLock` (pre-acquire reap) and the
393
+ * `daemon-orphan` doctor check/fix.
394
+ *
395
+ * Matching strategy: cmdline must contain BOTH a moflo daemon marker
396
+ * (`daemon ... start` + `moflo`/`claude-flow`) AND one of the two
397
+ * project-rooted cli.js paths. This keeps daemons for OTHER projects out of
398
+ * scope — they have their own project root and (post-#1145) their own port.
399
+ *
400
+ * Cross-platform:
401
+ * - Windows: `Get-CimInstance Win32_Process` via PowerShell (single shell
402
+ * invocation that returns all node processes with command lines).
403
+ * - Linux: `/proc/<pid>/cmdline` walk.
404
+ * - macOS: `ps -axo pid,command` (no `/proc`).
405
+ *
406
+ * Falls back to `[]` if the platform probe fails — better to spawn an extra
407
+ * daemon than to wrongly kill a foreign-project one.
408
+ */
409
+ export function findProjectDaemonPids(projectRoot, opts = {}) {
410
+ if (process.env.MOFLO_TEST_SKIP_ORPHAN_SCAN === '1')
411
+ return [];
412
+ const candidates = projectCliCandidates(projectRoot);
413
+ if (candidates.length === 0)
414
+ return [];
415
+ let processes;
416
+ if (opts.pidsHint) {
417
+ processes = opts.pidsHint;
418
+ }
419
+ else {
420
+ try {
421
+ if (process.platform === 'win32')
422
+ processes = listMofloDaemonsWindows();
423
+ else if (process.platform === 'linux')
424
+ processes = listMofloDaemonsLinux();
425
+ else
426
+ processes = listMofloDaemonsUnix();
427
+ }
428
+ catch {
429
+ return [];
430
+ }
431
+ }
432
+ return processes.filter(p => cmdlineMatchesProject(p.cmdline, candidates)).map(p => p.pid);
433
+ }
434
+ /**
435
+ * Candidate absolute paths for THIS project's daemon CLI binary.
436
+ *
437
+ * Returns the two layouts moflo ships with — consumer install
438
+ * (`<root>/node_modules/moflo/bin/cli.js`) and dogfood-source
439
+ * (`<root>/bin/cli.js`) — normalised for case-insensitive substring match
440
+ * via the shared `normalizeProjectRoot` helper (which realpaths + lowercases
441
+ * on Windows, matching the #1145 daemon-identity surface so the two checks
442
+ * agree about which root a process belongs to).
443
+ *
444
+ * Never includes the bare `projectRoot` prefix as a match candidate: an
445
+ * unrelated process (editor, npm script) whose cmdline happens to mention
446
+ * the project path would otherwise false-positive once the daemon-marker
447
+ * regex also incidentally matched.
448
+ */
449
+ function projectCliCandidates(projectRoot) {
450
+ const cliRelatives = [
451
+ join('node_modules', 'moflo', 'bin', 'cli.js'),
452
+ join('bin', 'cli.js'),
453
+ ];
454
+ // realpath both the input AND each candidate path — on macOS the
455
+ // command-line records the realpath'd form (`/private/var/folders/...`)
456
+ // while the cwd-rooted candidate stays under `/var/folders/...`.
457
+ const normRoot = normalizeProjectRoot(projectRoot);
458
+ const out = new Set();
459
+ for (const rel of cliRelatives) {
460
+ // Apply normalizeForMatch ON TOP of normalizeProjectRoot so the
461
+ // substring match also tolerates mixed separators in the spawn-recorded
462
+ // cmdline ("\\" vs "/"). `normalizeProjectRoot` realpaths + lowercases
463
+ // on Windows; `normalizeForMatch` collapses slashes.
464
+ out.add(normalizeForMatch(normalizeProjectRoot(join(projectRoot, rel))));
465
+ out.add(normalizeForMatch(normalizeProjectRoot(join(normRoot, rel))));
466
+ }
467
+ return Array.from(out).filter(s => s.length > 0);
468
+ }
469
+ function cmdlineMatchesProject(cmdline, candidates) {
470
+ // Daemon marker — must look like a moflo daemon to even consider matching.
471
+ if (!/daemon[\s\S]{0,40}start/i.test(cmdline))
472
+ return false;
473
+ if (!/moflo|claude-flow/i.test(cmdline))
474
+ return false;
475
+ // Substring match against case-folded, slash-normalised forms.
476
+ const norm = normalizeForMatch(cmdline);
477
+ return candidates.some(c => c.length > 0 && norm.includes(c));
478
+ }
479
+ function normalizeForMatch(p) {
480
+ // Collapse mixed slashes to the OS separator so the substring check works
481
+ // regardless of how spawn quoted the path. Case-fold on Windows.
482
+ const collapsed = p.replace(/[\\/]+/g, sep);
483
+ return process.platform === 'win32' ? collapsed.toLowerCase() : collapsed;
484
+ }
485
+ function listMofloDaemonsWindows() {
486
+ // Use execFileSync so the PS command is passed as a single argument vector
487
+ // (no cmd.exe quote-mangling). The `@($res)` array-cast handles the
488
+ // single-result case (`ConvertTo-Json` emits a bare object otherwise, and
489
+ // `-AsArray` is PS 6+ only). The `if ($res)` guard avoids emitting an
490
+ // empty string that JSON.parse can't read.
491
+ const script = "$res = Get-CimInstance Win32_Process -Filter \"Name='node.exe'\" " +
492
+ "| Select-Object ProcessId, CommandLine; " +
493
+ "if ($res) { @($res) | ConvertTo-Json -Compress -Depth 3 }";
494
+ let raw;
495
+ try {
496
+ raw = execFileSync('powershell', ['-NoProfile', '-Command', script], {
497
+ encoding: 'utf-8',
498
+ timeout: 10000,
499
+ windowsHide: true,
500
+ maxBuffer: 16 * 1024 * 1024,
501
+ });
502
+ }
503
+ catch {
504
+ return [];
505
+ }
506
+ if (!raw.trim())
507
+ return [];
508
+ let parsed;
509
+ try {
510
+ parsed = JSON.parse(raw);
511
+ }
512
+ catch {
513
+ return [];
514
+ }
515
+ if (!Array.isArray(parsed))
516
+ parsed = [parsed];
517
+ return parsed
518
+ .filter((p) => p && typeof p.CommandLine === 'string' && p.CommandLine.length > 0)
519
+ .map((p) => ({ pid: Number(p.ProcessId), cmdline: String(p.CommandLine) }))
520
+ .filter((p) => Number.isFinite(p.pid) && p.pid > 0);
521
+ }
522
+ function listMofloDaemonsLinux() {
523
+ const out = [];
524
+ let entries;
525
+ try {
526
+ entries = fs.readdirSync('/proc');
527
+ }
528
+ catch {
529
+ return [];
530
+ }
531
+ for (const entry of entries) {
532
+ if (!/^\d+$/.test(entry))
533
+ continue;
534
+ const pid = parseInt(entry, 10);
535
+ try {
536
+ // cmdline is NUL-separated argv. Replace NULs with spaces for matching.
537
+ const raw = fs.readFileSync(`/proc/${pid}/cmdline`, 'utf-8');
538
+ if (!raw)
539
+ continue;
540
+ const cmdline = raw.replace(/\0+$/, '').replace(/\0/g, ' ');
541
+ if (!/\bnode\b/i.test(cmdline) && !/\.js\b/.test(cmdline))
542
+ continue;
543
+ out.push({ pid, cmdline });
544
+ }
545
+ catch { /* process exited mid-scan / no perms — skip */ }
546
+ }
547
+ return out;
548
+ }
549
+ function listMofloDaemonsUnix() {
550
+ let raw;
551
+ try {
552
+ // -axww = all processes including session leaders (BSD form portable to
553
+ // macOS/Linux), unlimited line width so long cmdlines don't truncate.
554
+ // execFileSync (no shell) keeps quoting consistent with the rest of the
555
+ // codebase.
556
+ raw = execFileSync('ps', ['-axww', '-o', 'pid=,command='], {
557
+ encoding: 'utf-8',
558
+ timeout: 5000,
559
+ maxBuffer: 16 * 1024 * 1024,
560
+ });
561
+ }
562
+ catch {
563
+ return [];
564
+ }
565
+ const out = [];
566
+ for (const line of raw.split('\n')) {
567
+ const trimmed = line.trim();
568
+ if (!trimmed)
569
+ continue;
570
+ const sepIdx = trimmed.indexOf(' ');
571
+ if (sepIdx === -1)
572
+ continue;
573
+ const pid = parseInt(trimmed.slice(0, sepIdx), 10);
574
+ if (!Number.isFinite(pid) || pid <= 0)
575
+ continue;
576
+ const cmdline = trimmed.slice(sepIdx + 1).trim();
577
+ if (!/\bnode\b/i.test(cmdline) && !/\.js\b/.test(cmdline))
578
+ continue;
579
+ out.push({ pid, cmdline });
580
+ }
581
+ return out;
582
+ }
583
+ /**
584
+ * Same-project orphan reap. Called from `acquireDaemonLock` BEFORE the atomic
585
+ * write. PIDs that match the lock-holder OR our own process are skipped.
586
+ *
587
+ * Best-effort: failures during kill are swallowed because the next step
588
+ * (atomic exclusive write of the lock) is the source of truth — if the
589
+ * orphan survives, the lock-acquire still fails cleanly and the caller
590
+ * reports a stale lock-holder rather than spawning a duplicate.
591
+ *
592
+ * Exported for the `Daemon Orphan` healer fix which reuses the same logic.
593
+ */
594
+ export function reapSameProjectOrphans(projectRoot, ownPid = process.pid, lockHolderPid,
595
+ /**
596
+ * Pre-computed project-daemon PIDs. Skips re-running the OS process scan
597
+ * when the caller already has them — the `Daemon Orphan` doctor-fix
598
+ * computes them once via `findProjectDaemonPids` and then reuses the
599
+ * same list here.
600
+ */
601
+ pidsHint) {
602
+ const reaped = [];
603
+ const survived = [];
604
+ const allPids = pidsHint ?? findProjectDaemonPids(projectRoot);
605
+ const foreignPids = allPids.filter(p => {
606
+ if (p === ownPid)
607
+ return false;
608
+ if (lockHolderPid != null && p === lockHolderPid)
609
+ return false;
610
+ return true;
611
+ });
612
+ if (foreignPids.length === 0)
613
+ return { reaped, survived };
614
+ for (const pid of foreignPids) {
615
+ if (terminateOrphan(pid))
616
+ reaped.push(pid);
617
+ else
618
+ survived.push(pid);
619
+ }
620
+ return { reaped, survived };
621
+ }
622
+ /**
623
+ * Terminate a same-project daemon orphan: SIGTERM → 3s graceful poll →
624
+ * SIGKILL (POSIX) / `taskkill /F /T` (Windows). Returns true once the PID
625
+ * is no longer alive.
626
+ */
627
+ function terminateOrphan(pid) {
628
+ if (!isProcessAlive(pid))
629
+ return true;
630
+ try {
631
+ if (process.platform === 'win32') {
632
+ // No SIGTERM equivalent for our detached Node daemon on Windows — go
633
+ // straight to /F /T (same shape as bin/lib/daemon-recycler.mjs and
634
+ // killBackgroundDaemon). execFileSync keeps args un-shell-quoted.
635
+ try {
636
+ execFileSync('taskkill', ['/F', '/T', '/PID', String(pid)], {
637
+ windowsHide: true,
638
+ timeout: 3000,
639
+ });
640
+ }
641
+ catch { /* already exiting */ }
642
+ }
643
+ else {
644
+ try {
645
+ process.kill(pid, 'SIGTERM');
646
+ }
647
+ catch { /* already dead */ }
648
+ }
649
+ }
650
+ catch { /* fall through to liveness poll */ }
651
+ // Graceful window
652
+ const gracefulDeadline = Date.now() + 3000;
653
+ while (Date.now() < gracefulDeadline && isProcessAlive(pid)) {
654
+ sleepSyncMs(100);
655
+ }
656
+ if (!isProcessAlive(pid))
657
+ return true;
658
+ // Escalate to SIGKILL on POSIX (Windows already used /F)
659
+ if (process.platform !== 'win32') {
660
+ try {
661
+ process.kill(pid, 'SIGKILL');
662
+ }
663
+ catch { /* already dead */ }
664
+ }
665
+ const killDeadline = Date.now() + 1000;
666
+ while (Date.now() < killDeadline && isProcessAlive(pid)) {
667
+ sleepSyncMs(100);
668
+ }
669
+ return !isProcessAlive(pid);
670
+ }
671
+ function sleepSyncMs(ms) {
672
+ try {
673
+ const buf = new Int32Array(new SharedArrayBuffer(4));
674
+ Atomics.wait(buf, 0, 0, ms);
675
+ }
676
+ catch {
677
+ // SharedArrayBuffer disabled (rare — exotic Node flags); fall back to a
678
+ // tight loop. Caller's wait windows are bounded so this is safe.
679
+ const deadline = Date.now() + ms;
680
+ while (Date.now() < deadline) { /* spin */ }
681
+ }
682
+ }
296
683
  //# sourceMappingURL=daemon-lock.js.map
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Daemon port resolution — single source of truth for the moflo daemon's
3
+ * HTTP port.
4
+ *
5
+ * Before #1145, `DEFAULT_DASHBOARD_PORT` in `daemon-dashboard.ts` and
6
+ * `DEFAULT_DAEMON_PORT` in `daemon-write-client.ts` were two separate `3117`
7
+ * literals. The server tried 3117 → 3126 on `EADDRINUSE`; the client always
8
+ * POSTed to 3117. When a second moflo project's daemon bound 3118+, that
9
+ * project's clients still hit 3117 → silent cross-project read/write
10
+ * routing. See `docs/internal/1145-daemon-port-collision-analysis.md`.
11
+ *
12
+ * This module collapses both literals into one resolver. Every entry point
13
+ * MUST go through `resolveProjectPort()` (or read the `port` field a
14
+ * already-bound daemon recorded in `.moflo/daemon.lock`).
15
+ *
16
+ * Resolution precedence — server and client agree:
17
+ * 1. `MOFLO_DAEMON_PORT` env override (consumer pin / smoke harness — wins)
18
+ * 2. Lock-file `port` field (client-only — server WRITES this after bind)
19
+ * 3. `resolveProjectPort(projectRoot)` — sha256(path) → 33000+(hash%1000)
20
+ * 4. `LEGACY_DEFAULT_PORT` (3117) — read-only fallback for ancient locks
21
+ * with no port field and no env override; warns once via stderr
22
+ *
23
+ * @module cli/services/daemon-port
24
+ */
25
+ import { createHash } from 'node:crypto';
26
+ import { existsSync, readFileSync, realpathSync } from 'node:fs';
27
+ import { join } from 'node:path';
28
+ import * as http from 'node:http';
29
+ /**
30
+ * Deterministic port range. 33000-33999 — clear of every common dev-server
31
+ * port (3000, 3001, 4000, 5000, 5173, 8000, 8080), every well-known service
32
+ * (≤ 1024), and the moflo legacy default (3117). Collision probability across
33
+ * N active projects is ~N/1000; the identity check (`isDaemonIdentityMatch`)
34
+ * is the safety net when collisions do hit.
35
+ */
36
+ export const PORT_RANGE_BASE = 33000;
37
+ export const PORT_RANGE_SIZE = 1000;
38
+ /**
39
+ * Legacy default port — used by daemons that haven't been upgraded past
40
+ * 4.10.7 and locks that never recorded a port field. Kept as a read-only
41
+ * fallback so a fresh client probing an old daemon still finds it; clients
42
+ * that fall through to this path emit a one-time deprecation warn.
43
+ *
44
+ * NEW code must NEVER reference this constant outside `daemon-port.ts`. The
45
+ * regression guard at `tests/system/no-fixed-3117.test.ts` enforces.
46
+ */
47
+ export const LEGACY_DEFAULT_PORT = 3117;
48
+ /**
49
+ * Resolve the canonical port for a given project root.
50
+ *
51
+ * Pure function — no I/O. Same project path → same port across daemon
52
+ * restarts, across processes, across machines (the hash is deterministic).
53
+ *
54
+ * @param projectRoot absolute path to the project root (use `findProjectRoot()`)
55
+ * @returns port in `[PORT_RANGE_BASE, PORT_RANGE_BASE + PORT_RANGE_SIZE)`
56
+ */
57
+ export function resolveProjectPort(projectRoot) {
58
+ const envPort = readEnvPortOverride();
59
+ if (envPort != null)
60
+ return envPort;
61
+ const hash = createHash('sha256').update(projectRoot).digest();
62
+ return PORT_RANGE_BASE + (hash.readUInt16BE(0) % PORT_RANGE_SIZE);
63
+ }
64
+ /**
65
+ * Read `MOFLO_DAEMON_PORT` from the environment. Returns the parsed port
66
+ * (1-65535) or `null` if unset/invalid.
67
+ *
68
+ * Exported so callers can short-circuit lock-file reads when the env is
69
+ * pinned — useful in the smoke harness and CI where the env is the
70
+ * authoritative pin.
71
+ */
72
+ export function readEnvPortOverride() {
73
+ const raw = process.env.MOFLO_DAEMON_PORT;
74
+ if (!raw)
75
+ return null;
76
+ const n = parseInt(raw, 10);
77
+ if (!Number.isFinite(n) || n < 1 || n > 65535)
78
+ return null;
79
+ return n;
80
+ }
81
+ /**
82
+ * Resolve the daemon port a CLIENT should connect to for a given project.
83
+ *
84
+ * Reads `.moflo/daemon.lock` to discover the actual bound port — if the
85
+ * daemon collided with another project in its deterministic-range bucket
86
+ * and the dashboard retry loop bumped it forward, the lock reflects reality
87
+ * and the client follows. Falls back to `resolveProjectPort` when the lock
88
+ * is absent (daemon not yet started), the lock has no `port` field (old
89
+ * daemon predating #1145), or the port reads as invalid.
90
+ *
91
+ * Never throws — every I/O failure degrades to the deterministic fallback.
92
+ */
93
+ export function resolveClientPort(projectRoot) {
94
+ const envPort = readEnvPortOverride();
95
+ if (envPort != null)
96
+ return envPort;
97
+ try {
98
+ const lockFile = join(projectRoot, '.moflo', 'daemon.lock');
99
+ if (existsSync(lockFile)) {
100
+ const raw = readFileSync(lockFile, 'utf-8');
101
+ const lock = JSON.parse(raw);
102
+ const lockPort = typeof lock?.port === 'number' ? lock.port : null;
103
+ if (lockPort && Number.isFinite(lockPort) && lockPort > 0 && lockPort < 65536) {
104
+ return lockPort;
105
+ }
106
+ }
107
+ }
108
+ catch {
109
+ // Corrupt or unreadable lock — fall through to deterministic port.
110
+ }
111
+ return resolveProjectPort(projectRoot);
112
+ }
113
+ /**
114
+ * Build the list of ports the SERVER should try, in order, when starting
115
+ * the daemon. First entry is the deterministic port; the rest are the
116
+ * collision-fallback range. Capped at `PORT_RANGE_SIZE` so the loop can
117
+ * never wrap past the bucket.
118
+ *
119
+ * If the env override is set, the list collapses to that single port —
120
+ * the consumer pinned it on purpose; respect their choice and hard-fail
121
+ * if it's already in use.
122
+ */
123
+ export function serverPortCandidates(projectRoot, maxAttempts = 10) {
124
+ const envPort = readEnvPortOverride();
125
+ if (envPort != null)
126
+ return [envPort];
127
+ const base = resolveProjectPort(projectRoot);
128
+ const attempts = Math.min(Math.max(1, maxAttempts), PORT_RANGE_SIZE);
129
+ const ports = [];
130
+ for (let i = 0; i < attempts; i++) {
131
+ ports.push(PORT_RANGE_BASE + ((base - PORT_RANGE_BASE + i) % PORT_RANGE_SIZE));
132
+ }
133
+ return ports;
134
+ }
135
+ // ============================================================================
136
+ // Identity probe — shared by client + healer (#1145)
137
+ // ============================================================================
138
+ /**
139
+ * Normalize project root paths for identity comparison.
140
+ *
141
+ * - Resolve symlinks via `realpathSync`. macOS aliases `/var/folders`
142
+ * → `/private/var/folders`; one side of the daemon/client pair may
143
+ * resolve the symlink and the other may not, producing false-positive
144
+ * identity mismatches on otherwise-matching project roots (caught by
145
+ * the consumer-smoke harness on macOS + Ubuntu after the original
146
+ * #1145 fix). Ubuntu hits the same shape via `/tmp` symlinks under
147
+ * certain mount configurations.
148
+ * - Lowercase on Windows so `C:\Users\...` and `c:\users\...` compare
149
+ * equal. POSIX is case-sensitive — pass through.
150
+ *
151
+ * Never throws — a path that doesn't exist (or that we lack permission
152
+ * to stat) falls back to the input string. The fallback case is safe
153
+ * because the symlink-mismatch class only fires on paths that DO exist
154
+ * (both daemon and client just resolved them).
155
+ */
156
+ export function normalizeProjectRoot(p) {
157
+ let resolved = p;
158
+ try {
159
+ resolved = realpathSync(p);
160
+ }
161
+ catch { /* path doesn't exist / EACCES — use input */ }
162
+ return process.platform === 'win32' ? resolved.toLowerCase() : resolved;
163
+ }
164
+ /**
165
+ * Send `GET /api/health` to `127.0.0.1:<port>` and parse the daemon's
166
+ * identity payload. Never throws — every failure mode maps to a
167
+ * `DaemonIdentityProbe` variant.
168
+ *
169
+ * Shared by `daemon-write-client.ts` (per-request safety net) and
170
+ * `doctor-checks-config.ts` (`checkDaemonIdentity` subcheck).
171
+ */
172
+ export function probeDaemonHealth(port, timeoutMs) {
173
+ return new Promise((resolve) => {
174
+ let done = false;
175
+ const finish = (r) => {
176
+ if (done)
177
+ return;
178
+ done = true;
179
+ resolve(r);
180
+ };
181
+ const req = http.get({ host: '127.0.0.1', port, path: '/api/health', timeout: timeoutMs }, (res) => {
182
+ const status = res.statusCode ?? 0;
183
+ if (status === 404) {
184
+ res.resume();
185
+ finish({ kind: 'legacy' });
186
+ return;
187
+ }
188
+ if (status !== 200) {
189
+ res.resume();
190
+ finish({ kind: 'unreachable' });
191
+ return;
192
+ }
193
+ let buf = '';
194
+ res.setEncoding('utf8');
195
+ res.on('data', (chunk) => { buf += chunk; });
196
+ res.on('end', () => {
197
+ try {
198
+ const data = JSON.parse(buf);
199
+ if (typeof data?.projectRoot === 'string' && data.projectRoot.length > 0) {
200
+ finish({ kind: 'identity', projectRoot: data.projectRoot });
201
+ return;
202
+ }
203
+ // 200 but no identity field — pre-#1145 daemon that 200s on
204
+ // every URL. Same handling as a 404.
205
+ finish({ kind: 'legacy' });
206
+ }
207
+ catch {
208
+ finish({ kind: 'legacy' });
209
+ }
210
+ });
211
+ res.on('error', () => finish({ kind: 'unreachable' }));
212
+ });
213
+ req.on('error', () => finish({ kind: 'unreachable' }));
214
+ req.on('timeout', () => { req.destroy(); finish({ kind: 'unreachable' }); });
215
+ });
216
+ }
217
+ /** Retry backoff schedule for HTTP liveness probes (#1163 — Windows CI race). */
218
+ const PROBE_RETRY_BACKOFF_MS = [50, 200, 800];
219
+ /**
220
+ * {@link probeDaemonHealth} with retry on transient `unreachable` results.
221
+ *
222
+ * The bare one-shot probe was tripping in CI when a daemon was mid-boot on
223
+ * Windows: the lockfile said v4.10.8 was alive but the HTTP server hadn't
224
+ * accepted /api/health yet, and the doctor check raised "unreachable" inside
225
+ * the 1500ms window. This wrapper retries 3× at 50/200/800 ms — total
226
+ * worst-case ~1s extra — and only on `unreachable`. `identity` and `legacy`
227
+ * are terminal: a daemon answering with a different project root or 404 is
228
+ * a real signal, not a race.
229
+ *
230
+ * Mirrors the retry pattern from `bin/lib/file-sync.mjs:syncWithRetry`
231
+ * (`feedback_transient_retry_circuit_breaker` — every transient-error op
232
+ * uses 50/200/800ms backoff).
233
+ *
234
+ * Worst-case elapsed = (PROBE_RETRY_BACKOFF_MS.length + 1) × timeoutMs
235
+ * + sum(PROBE_RETRY_BACKOFF_MS). For doctor's 1500ms timeout that's
236
+ * 4 × 1500 + 1050 ≈ 7s, fully off the hot path; callers picking a tighter
237
+ * timeout should account for the 4× multiplier.
238
+ */
239
+ export async function probeDaemonHealthWithRetry(port, timeoutMs) {
240
+ let last = { kind: 'unreachable' };
241
+ const maxAttempts = PROBE_RETRY_BACKOFF_MS.length + 1;
242
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
243
+ if (attempt > 0) {
244
+ await new Promise((resolve) => setTimeout(resolve, PROBE_RETRY_BACKOFF_MS[attempt - 1]));
245
+ }
246
+ last = await probeDaemonHealth(port, timeoutMs);
247
+ if (last.kind !== 'unreachable')
248
+ return last;
249
+ }
250
+ return last;
251
+ }
252
+ //# sourceMappingURL=daemon-port.js.map