@modelstatus/cli 0.1.34 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,156 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { hasCmd, run } from "./shell.js";
4
+ import { detectInLine } from "../detect/core.js";
5
+ import { redactValue } from "../redact.js";
6
+ import { scanConfigEntries, entriesFromKV } from "./configscan.js";
7
+
8
+ /* Pure parsers (unit-tested). */
9
+
10
+ /** `gh variable list [--json name,value]` → [{name, value}]. Actions VARIABLES are
11
+ * NON-secret config values (unlike secrets), so we scan the VALUE for model ids via
12
+ * entriesFromKV. Handles `--json name,value` ([{name,value}]) and the tab-separated
13
+ * table (NAME\tVALUE\tUPDATED). Pure — no JSON shape knowledge leaks out. */
14
+ export function parseVariableList(stdout) {
15
+ const s = String(stdout || "").trim();
16
+ if (!s) return [];
17
+ try {
18
+ const j = JSON.parse(s);
19
+ if (Array.isArray(j)) return j.map((x) => ({ name: x.name, value: x.value ?? "" })).filter((x) => x.name);
20
+ } catch {
21
+ /* fall through to table parse */
22
+ }
23
+ const out = [];
24
+ for (const raw of s.split(/\r?\n/)) {
25
+ const line = raw.trim();
26
+ if (!line) continue;
27
+ if (/^name\b/i.test(line)) continue; // header
28
+ if (/^[-\s|]+$/.test(line)) continue; // separator rule
29
+ // gh's table output is tab- or 2+-space-separated: NAME VALUE UPDATED.
30
+ const cols = line.split(/\t|\s{2,}/).map((c) => c.trim());
31
+ const name = cols[0];
32
+ if (name && /^[A-Za-z_][A-Za-z0-9_]*$/.test(name)) out.push({ name, value: cols[1] ?? "" });
33
+ }
34
+ return out;
35
+ }
36
+
37
+ /** `gh secret list [--json name]` → NAMES only. Handles `--json name` ([{name}])
38
+ * and the tab/space-separated table (NAME UPDATED). We NEVER `gh secret view`. */
39
+ export function parseGhSecretList(stdout) {
40
+ const s = String(stdout || "").trim();
41
+ try {
42
+ const j = JSON.parse(s);
43
+ if (Array.isArray(j)) return j.map((x) => x.name).filter(Boolean);
44
+ } catch {
45
+ /* fall through to table parse */
46
+ }
47
+ const names = [];
48
+ for (const raw of s.split(/\r?\n/)) {
49
+ const line = raw.trim();
50
+ if (!line) continue;
51
+ const name = line.split(/\s{2,}|\t/).map((c) => c.trim()).filter(Boolean)[0];
52
+ if (name && /^[A-Za-z_][A-Za-z0-9_]*$/.test(name)) names.push(name);
53
+ }
54
+ return names;
55
+ }
56
+
57
+ /** Line-scan one workflow YAML body → Candidates (model refs in workflow steps).
58
+ * Pure: takes text + relPath + compiled, returns #L<n>-located candidates with a
59
+ * redacted, 160-capped snippet. detectInLine returns a Set, iterated with for…of. */
60
+ export function scanWorkflowText(text, relPath, compiled, env) {
61
+ const out = [];
62
+ const seen = new Set();
63
+ String(text || "").split(/\r?\n/).forEach((line, i) => {
64
+ for (const model_string of detectInLine(line, compiled)) {
65
+ const locator = `github-actions://workflows/${relPath}#L${i + 1}`;
66
+ const key = `${model_string}|${locator}`;
67
+ if (seen.has(key)) continue;
68
+ seen.add(key);
69
+ out.push({
70
+ model_string,
71
+ source_type: "github-actions",
72
+ location_label: locator,
73
+ source_path: relPath,
74
+ source_line: i + 1,
75
+ environment: env || "unknown",
76
+ snippet: redactValue(line.trim()).slice(0, 160),
77
+ });
78
+ }
79
+ });
80
+ return out;
81
+ }
82
+
83
+ /** GitHub Actions VARIABLES + secrets + workflows. LIVE integration: gated on the
84
+ * enabled toggle AND the `gh` CLI. Surfaces:
85
+ * (a) `gh variable list` → VALUES scanned via entriesFromKV (Actions variables are
86
+ * NON-secret config, the natural home for a `OPENAI_MODEL=gpt-4o` style value);
87
+ * (b) `gh secret list` → NAME-only entries (NEVER a value — no value API exists);
88
+ * (c) a NARROW own-walk of <root>/.github/workflows/*.yml line-scanned for model
89
+ * refs (does NOT import filesystem.js — concurrency guardrail).
90
+ * When scoped to a GitHub Environment via opts.ghEnvironment, that environment is
91
+ * AUTHORITATIVE for variables (passed straight through), else the folded opts.env /
92
+ * guessEnvFrom applies. opts: { root, ghRepo, ghEnvironment }. */
93
+ export const githubActionsSource = {
94
+ id: "github-actions",
95
+ label: "GitHub Actions variables + secrets + workflows",
96
+ kind: "cli",
97
+ integration: true,
98
+ envTag: "unknown",
99
+ async available() {
100
+ return hasCmd("gh");
101
+ },
102
+ async authState() {
103
+ const r = await run("gh", ["auth", "status"]);
104
+ // `gh auth status` writes to stderr even on success; ok is the signal.
105
+ if (!r.ok) return { connected: false, mode: "auth-status", reason: (r.stderr || r.stdout || "not logged in").split("\n")[0] };
106
+ return { connected: true, mode: "auth-status" };
107
+ },
108
+ async collect(opts, compiled) {
109
+ const repoArg = opts?.ghRepo ? ["--repo", opts.ghRepo] : [];
110
+ const repoTag = opts?.ghRepo || "repo";
111
+ // A GitHub Environment is authoritative for env-scoped variables — pass it as
112
+ // the explicit env (overriding guessEnvFrom). Else fall back to the folded opts.env.
113
+ const ghEnv = opts?.ghEnvironment || "";
114
+ const envArg = ghEnv ? ["--env", ghEnv] : [];
115
+ const out = [];
116
+
117
+ // (a) VARIABLES — non-secret VALUES, scanned through the redaction funnel. We
118
+ // ask for JSON so the value column is unambiguous; a model id in a variable
119
+ // value (e.g. OPENAI_MODEL=gpt-4o) is exactly what we want to catch.
120
+ const vars = await run("gh", ["variable", "list", ...repoArg, ...envArg, "--json", "name,value"]);
121
+ if (vars.ok) {
122
+ for (const { name, value } of parseVariableList(vars.stdout)) {
123
+ const entries = entriesFromKV(name, value, `github-actions://${repoTag}/variables#${name}`, ghEnv || repoTag);
124
+ out.push(...scanConfigEntries(entries, compiled, { sourceType: "github-actions", env: ghEnv || opts?.env }));
125
+ }
126
+ }
127
+
128
+ // (b) Secret NAMES only (never a value — there is no value API anyway).
129
+ const secrets = await run("gh", ["secret", "list", ...repoArg, ...envArg]);
130
+ if (secrets.ok) {
131
+ for (const name of parseGhSecretList(secrets.stdout)) {
132
+ const entries = entriesFromKV(name, "", `github-actions://${repoTag}/secrets#${name}`, ghEnv || repoTag);
133
+ out.push(...scanConfigEntries(entries, compiled, { sourceType: "github-actions", env: ghEnv || opts?.env }));
134
+ }
135
+ }
136
+
137
+ // (c) Workflow YAML bodies — own narrow walk of <root>/.github/workflows.
138
+ const wfDir = path.join(opts?.root || ".", ".github", "workflows");
139
+ let files = [];
140
+ try {
141
+ files = fs.readdirSync(wfDir).filter((f) => /\.ya?ml$/.test(f));
142
+ } catch {
143
+ /* no workflows dir — secrets-only is fine */
144
+ }
145
+ for (const f of files) {
146
+ let text;
147
+ try {
148
+ text = fs.readFileSync(path.join(wfDir, f), "utf8");
149
+ } catch {
150
+ continue;
151
+ }
152
+ out.push(...scanWorkflowText(text, f, compiled, opts?.env));
153
+ }
154
+ return out;
155
+ },
156
+ };
@@ -5,22 +5,43 @@ import { awsSecretsSource } from "./aws.js";
5
5
  import { k8sSource } from "./k8s.js";
6
6
  import { helmSource } from "./helm.js";
7
7
  import { sqlSource } from "./sql.js";
8
+ import { awsLambdaSource } from "./aws-lambda.js";
9
+ import { vercelSource } from "./vercel.js";
10
+ import { supabaseEdgeSource } from "./supabase.js";
11
+ import { githubActionsSource } from "./github-actions.js";
12
+ import { enabledIds, getEnvTag } from "../integrations.js";
8
13
 
9
14
  /**
10
15
  * A Source discovers AI-model usage from one place and emits normalized Candidates:
11
16
  * { model_string, source_type, location_label, source_path, source_line?, environment, snippet }
12
17
  *
13
- * Interface:
18
+ * Interface (only id/label/available/collect are REQUIRED — the rest are OPTIONAL
19
+ * and read via ?? / ?., so the 6 original sources keep working untouched):
14
20
  * id: string
15
21
  * label: string
16
- * available(opts): Promise<boolean> // is the backing tool/creds present?
22
+ * available(opts): Promise<boolean> // is the backing tool/creds present? (cheap PATH check — no spawn)
17
23
  * collect(opts, compiled): Promise<Candidate[]>
24
+ * kind?: "local" | "cli" | "mcp" | "api" // descriptive grouping only; NEVER gates execution (default "local")
25
+ * authState?(opts): Promise<{ connected, mode, account?, reason? }>
26
+ * // OPTIONAL richer read-only identity probe (MAY spawn); used by the
27
+ * // TUI "test" key + verbose `mm sources`, NOT by the hot collect path
28
+ * envTag?: "prod"|"staging"|"dev"|"unknown" // a declared default env fallback (the authoritative per-source
29
+ * // envTag comes from integrations.json and is folded into opts.env)
30
+ * integration?: true // marks a LIVE integration subject to the enabled-gate (the 6
31
+ * // original sources omit it → unaffected)
18
32
  *
19
- * The secrets/config sources (env, aws-secrets, k8s, helm, sql) each shell out to
20
- * an ALREADY-AUTHENTICATED CLI, run read-only, scan locally, and REDACT every
21
- * snippet — only non-sensitive model ids ever leave the machine, never secrets.
33
+ * The secrets/config sources (env, aws-secrets, k8s, helm, sql) and the 4 live
34
+ * integrations each shell out to an ALREADY-AUTHENTICATED CLI, run read-only, scan
35
+ * locally, and REDACT every snippet — only non-sensitive model ids ever leave the
36
+ * machine, never secrets. Secret-NAME-only surfaces (gh/supabase secret lists,
37
+ * vercel env names) pass the NAME with an EMPTY value so no value can leak.
22
38
  */
23
- const SOURCES = [filesystemSource, envSource, awsSecretsSource, k8sSource, helmSource, sqlSource];
39
+ const SOURCES = [
40
+ filesystemSource, envSource, awsSecretsSource, k8sSource, helmSource, sqlSource,
41
+ // Live integrations (integration:true) — only run when toggled on in
42
+ // integrations.json OR named explicitly in --sources (see the gate below).
43
+ awsLambdaSource, vercelSource, supabaseEdgeSource, githubActionsSource,
44
+ ];
24
45
 
25
46
  export const ALL_SOURCE_IDS = SOURCES.map((s) => s.id);
26
47
 
@@ -32,27 +53,63 @@ export function getSource(id) {
32
53
  return SOURCES.find((s) => s.id === id) ?? null;
33
54
  }
34
55
 
35
- /** Which of the requested sources are usable right now (tool/creds present). */
36
- export async function availability(sourceIds, opts = {}) {
56
+ /** A live integration runs only when enabled in integrations.json OR explicitly
57
+ * named (so a one-off `--sources vercel` works without toggling it on first).
58
+ * The 6 original sources omit `integration` → this never gates them. */
59
+ function integrationAllowed(src, id, explicit) {
60
+ if (!src?.integration) return true; // not an integration → always allowed
61
+ return enabledIds().has(id) || explicit.has(id);
62
+ }
63
+
64
+ /** Which of the requested sources are usable right now (tool/creds present).
65
+ * For a live integration, `available` is reported as `enabled && hasCmd` UNLESS
66
+ * it was explicitly requested by name (then just hasCmd). Each row also carries
67
+ * `enabled` + `integration` so cmdSources / the TUI can render the toggle.
68
+ * `explicit` is the set of ids the caller named verbatim. */
69
+ export async function availability(sourceIds, opts = {}, explicit = new Set()) {
37
70
  const ids = sourceIds && sourceIds.length ? sourceIds : ["filesystem"];
71
+ const enabled = enabledIds();
38
72
  const report = [];
39
73
  for (const id of ids) {
40
74
  const src = getSource(id);
41
- report.push({ id, label: src?.label ?? id, available: src ? await src.available(opts) : false, known: !!src });
75
+ const hasTool = src ? await src.available(opts) : false;
76
+ const isIntegration = !!src?.integration;
77
+ const isEnabled = enabled.has(id);
78
+ // Integrations only count as "available" when enabled (unless explicitly asked).
79
+ const available = isIntegration && !explicit.has(id) ? hasTool && isEnabled : hasTool;
80
+ report.push({
81
+ id,
82
+ label: src?.label ?? id,
83
+ kind: src?.kind ?? "local",
84
+ available,
85
+ known: !!src,
86
+ integration: isIntegration,
87
+ enabled: isEnabled,
88
+ });
42
89
  }
43
90
  return report;
44
91
  }
45
92
 
46
- /** Run a set of sources, returning a flat, de-duplicated Candidate[]. */
47
- export async function collectFrom(sourceIds, opts, patterns) {
93
+ /** Run a set of sources, returning a flat, de-duplicated Candidate[]. Stays on
94
+ * the cheap path: uses available() (PATH check), never authState() (spawn).
95
+ * `explicit` is the set of ids the caller named verbatim — naming a live
96
+ * integration there overrides the enabled-gate. */
97
+ export async function collectFrom(sourceIds, opts, patterns, explicit = new Set()) {
48
98
  const compiled = compilePatterns(patterns);
49
99
  const ids = sourceIds && sourceIds.length ? sourceIds : ["filesystem"];
50
100
  const seen = new Set();
51
101
  const out = [];
52
102
  for (const id of ids) {
53
103
  const src = getSource(id);
54
- if (!src || !(await src.available(opts))) continue;
55
- for (const c of await src.collect(opts, compiled)) {
104
+ if (!src) continue;
105
+ // Live-integration gate: skip a disabled integration unless explicitly named.
106
+ if (!integrationAllowed(src, id, explicit)) continue;
107
+ if (!(await src.available(opts))) continue;
108
+ // Fold the per-source declared envTag into opts.env so the integration's env
109
+ // overrides guessEnvFrom — but an explicit --env flag (opts.env) still wins,
110
+ // and Vercel's authoritative deploy target still wins inside its own collect.
111
+ const srcOpts = src.integration ? { ...opts, env: opts.env || getEnvTag(id) } : opts;
112
+ for (const c of await src.collect(srcOpts, compiled)) {
56
113
  const key = `${c.model_string}|${c.location_label}`;
57
114
  if (seen.has(key)) continue;
58
115
  seen.add(key);
@@ -0,0 +1,238 @@
1
+ /* TRUE BACKGROUND SCAN — PARENT side.
2
+ *
3
+ * Spawns the scan as a SEPARATE OS process (self-re-exec of the running runtime
4
+ * via the hidden `__mm_scan_worker` sentinel; see src/index.js top dispatch +
5
+ * src/sources/scan-worker.js), readline-parses its NDJSON stdout into a plain
6
+ * mutable `stats` store, and re-dispatches onto the SAME named-handler shape
7
+ * scan-runner.js uses ({onCandidate,onProgress,onSkip,onDone,onError}) — so this
8
+ * is the mechanical swap the scan-runner.js header comment promised, except the
9
+ * walk now runs in its OWN event loop (no cooperative-yield lag during a game /
10
+ * heavy IO), and survives an Ink unmount because it's a real subprocess.
11
+ *
12
+ * Returned handle mirrors scan-runner.js's {abort,pause,resume,paused} plus a
13
+ * read-only `stats` getter and the `child` reference. The game HUD reads `stats`
14
+ * every render and NEVER awaits the scan.
15
+ *
16
+ * Dual-runtime spawn (the ONE place runtime matters — both branches verified):
17
+ * - Compiled binary (bun --compile): process.execPath IS the binary; re-exec
18
+ * it with just the worker args. argv[1] is a virtual /$bunfs path and is NOT
19
+ * spawnable, so do NOT prepend it.
20
+ * - node / npm: process.execPath is node; prepend process.argv[1] (the real
21
+ * absolute src/index.js path) so node loads the entry, which dispatches.
22
+ */
23
+ import { spawn } from "node:child_process";
24
+ import readline from "node:readline";
25
+
26
+ /** True inside the bun-compiled binary, false under node. Defined-in-bun signal. */
27
+ export function isBunRuntime() {
28
+ return typeof globalThis.Bun !== "undefined" || process.versions.bun != null;
29
+ }
30
+
31
+ /**
32
+ * Build the argv passed to spawn() for `process.execPath`.
33
+ * @param {string[]} workerArgs ['--root', dir, ...] worker flags (NO sentinel)
34
+ * @returns {string[]} full spawn argv (sentinel + flags, with argv[1] prepended under node)
35
+ */
36
+ export function buildSpawnArgs(workerArgs) {
37
+ const wargs = ["__mm_scan_worker", ...workerArgs];
38
+ // Compiled binary: re-exec the binary itself (execPath) with just the worker
39
+ // args. node: execPath is node, so prepend the real entry script (argv[1]).
40
+ return isBunRuntime() ? wargs : [process.argv[1], ...wargs];
41
+ }
42
+
43
+ /**
44
+ * Start a background filesystem scan in a separate OS process.
45
+ *
46
+ * @param {object} opts
47
+ * @param {string} opts.root dir to walk (required)
48
+ * @param {string[]} [opts.exclude] extra ignore patterns (csv-joined for the worker)
49
+ * @param {object} [opts.env] extra env merged over process.env for the child
50
+ * @param {string} [opts.registryCachePath] pre-fetched snapshot JSON path (worker skips the network)
51
+ *
52
+ * @param {object} [handlers]
53
+ * @param {(candidate:object)=>void} [handlers.onCandidate]
54
+ * @param {(p:object)=>void} [handlers.onProgress] {filesScanned,dirsSeen,catalogsSkipped,currentDir}
55
+ * @param {(s:object)=>void} [handlers.onSkip] {path,distinct,catalogsSkipped}
56
+ * @param {(r:object)=>void} [handlers.onDone] {candidates,filesScanned,dirsSeen,catalogsSkipped,scannedAt} — suppressed after abort()
57
+ * @param {(err:Error)=>void} [handlers.onError]
58
+ *
59
+ * @returns {{abort:()=>void, pause:()=>void, resume:()=>void, paused:boolean, stats:object, child:import('node:child_process').ChildProcess}}
60
+ */
61
+ export function startScanProcess(opts, handlers = {}) {
62
+ const { root, exclude, env, registryCachePath } = opts || {};
63
+ const {
64
+ onCandidate = () => {},
65
+ onProgress = () => {},
66
+ onSkip = () => {},
67
+ onDone = () => {},
68
+ onError = () => {},
69
+ } = handlers;
70
+
71
+ // Worker flags. Sentinel + argv[1] handling lives in buildSpawnArgs.
72
+ const workerArgs = ["--root", root];
73
+ if (exclude && exclude.length) workerArgs.push("--exclude", exclude.join(","));
74
+ if (registryCachePath) workerArgs.push("--registry-cache", registryCachePath);
75
+
76
+ const child = spawn(process.execPath, buildSpawnArgs(workerArgs), {
77
+ // ignore stdin; PIPE stdout (NDJSON) + stderr (diagnostics). NEVER 'inherit'
78
+ // — the worker shares no terminal with the game, so its output can't touch
79
+ // the alt-screen frame.
80
+ stdio: ["ignore", "pipe", "pipe"],
81
+ env: { ...process.env, ...(env || {}) },
82
+ detached: false,
83
+ windowsHide: true,
84
+ });
85
+
86
+ // The HUD store — a plain mutable object the loop reads each render, read-only.
87
+ const stats = {
88
+ phase: "scanning", // scanning | done | error
89
+ filesScanned: 0,
90
+ candidateCount: 0,
91
+ dirsSeen: 0,
92
+ catalogsSkipped: 0,
93
+ currentDir: "",
94
+ scannedAt: null,
95
+ error: null,
96
+ };
97
+
98
+ let aborted = false;
99
+ let paused = false;
100
+ let finalized = false; // exactly-once phase finalization (done/error/exit)
101
+ let stderrBuf = "";
102
+
103
+ function handleLine(line) {
104
+ if (!line) return;
105
+ let msg;
106
+ try {
107
+ msg = JSON.parse(line);
108
+ } catch {
109
+ return; // drop a malformed / partial-leftover line, never throw
110
+ }
111
+ switch (msg.t) {
112
+ case "prog":
113
+ // dir-only prog lines carry just dirsSeen; full prog lines carry counts.
114
+ if (typeof msg.filesScanned === "number") stats.filesScanned = msg.filesScanned;
115
+ if (typeof msg.dirsSeen === "number") stats.dirsSeen = msg.dirsSeen;
116
+ if (typeof msg.catalogsSkipped === "number") stats.catalogsSkipped = msg.catalogsSkipped;
117
+ if (typeof msg.currentDir === "string") stats.currentDir = msg.currentDir;
118
+ if (!aborted) {
119
+ onProgress({
120
+ filesScanned: stats.filesScanned,
121
+ dirsSeen: stats.dirsSeen,
122
+ catalogsSkipped: stats.catalogsSkipped,
123
+ currentDir: stats.currentDir,
124
+ });
125
+ }
126
+ break;
127
+ case "cand":
128
+ // COUNTS only on the hot path: the loop never renders cand payloads, so
129
+ // parse pressure during a game is near-zero. The full Candidate[] is
130
+ // read once from the done line.
131
+ stats.candidateCount++;
132
+ if (!aborted) onCandidate(msg.candidate);
133
+ break;
134
+ case "skip":
135
+ if (typeof msg.catalogsSkipped === "number") stats.catalogsSkipped = msg.catalogsSkipped;
136
+ if (!aborted) onSkip({ path: msg.path, distinct: msg.distinct, catalogsSkipped: msg.catalogsSkipped });
137
+ break;
138
+ case "done":
139
+ if (finalized) return;
140
+ finalized = true;
141
+ stats.phase = "done";
142
+ stats.filesScanned = msg.filesScanned ?? stats.filesScanned;
143
+ stats.dirsSeen = msg.dirsSeen ?? stats.dirsSeen;
144
+ stats.catalogsSkipped = msg.catalogsSkipped ?? stats.catalogsSkipped;
145
+ stats.candidateCount = (msg.candidates || []).length;
146
+ stats.scannedAt = msg.scannedAt ?? Date.now();
147
+ if (!aborted) onDone({
148
+ candidates: msg.candidates || [],
149
+ filesScanned: stats.filesScanned,
150
+ dirsSeen: stats.dirsSeen,
151
+ catalogsSkipped: stats.catalogsSkipped,
152
+ scannedAt: stats.scannedAt,
153
+ });
154
+ break;
155
+ case "err":
156
+ if (finalized) return;
157
+ finalized = true;
158
+ stats.phase = "error";
159
+ stats.error = msg.message || "scan failed";
160
+ if (!aborted) onError(new Error(stats.error));
161
+ break;
162
+ default:
163
+ break;
164
+ }
165
+ }
166
+
167
+ const rl = readline.createInterface({ input: child.stdout, crlfDelay: Infinity });
168
+ rl.on("line", handleLine);
169
+
170
+ child.stderr.on("data", (d) => {
171
+ stderrBuf += d.toString();
172
+ if (stderrBuf.length > 64_000) stderrBuf = stderrBuf.slice(-64_000); // bound
173
+ });
174
+
175
+ child.on("error", (err) => {
176
+ // Spawn failure (e.g. execPath missing) — surface unless we asked to die.
177
+ if (finalized || aborted) return;
178
+ finalized = true;
179
+ stats.phase = "error";
180
+ stats.error = err?.message || "failed to start scan worker";
181
+ onError(err instanceof Error ? err : new Error(stats.error));
182
+ });
183
+
184
+ child.on("exit", (code, signal) => {
185
+ // If the child exited without a done/err line and we didn't abort it, treat
186
+ // a non-zero/signal exit as an error so the HUD doesn't hang on "scanning".
187
+ if (finalized || aborted) return;
188
+ if (code === 0) return; // a 0 exit always followed a done line above
189
+ finalized = true;
190
+ stats.phase = "error";
191
+ stats.error = (stderrBuf.trim().split("\n").pop() || `scan worker exited (code ${code}, signal ${signal})`);
192
+ onError(new Error(stats.error));
193
+ });
194
+
195
+ return {
196
+ abort() {
197
+ // Idempotent. Suppresses onDone/onError and kills the child (cross-platform).
198
+ if (aborted) return;
199
+ aborted = true;
200
+ try {
201
+ child.kill("SIGTERM");
202
+ } catch {
203
+ /* already gone */
204
+ }
205
+ },
206
+ pause() {
207
+ if (paused) return;
208
+ paused = true;
209
+ // SIGSTOP genuinely freezes the child on posix; on win32 it's a no-op on
210
+ // the process but we still flip the flag so the UI reflects intent.
211
+ if (process.platform !== "win32") {
212
+ try {
213
+ child.kill("SIGSTOP");
214
+ } catch {
215
+ /* ignore */
216
+ }
217
+ }
218
+ },
219
+ resume() {
220
+ if (!paused) return;
221
+ paused = false;
222
+ if (process.platform !== "win32") {
223
+ try {
224
+ child.kill("SIGCONT");
225
+ } catch {
226
+ /* ignore */
227
+ }
228
+ }
229
+ },
230
+ get paused() {
231
+ return paused;
232
+ },
233
+ get stats() {
234
+ return stats;
235
+ },
236
+ child,
237
+ };
238
+ }
@@ -0,0 +1,127 @@
1
+ /* Background-scan RUNNER: a thin, React-free seam over the cooperative
2
+ * streaming filesystem scan. It owns the AbortController + paused flag, runs
3
+ * scanFilesystemStreaming on the main event loop (the walk yields via
4
+ * setImmediate on a time + count budget, so a foreground frame loop and the
5
+ * input handler keep ticking in the gaps), and re-emits the engine's raw
6
+ * onEvent protocol onto a NAMED handlers object so callers never touch
7
+ * event-type strings.
8
+ *
9
+ * Why no worker_threads: the shipped artifact is a single Bun-compiled binary
10
+ * built from ONE entry (`bun build src/index.js --compile`). A `new Worker(new
11
+ * URL('./scan-worker.js', …))` would resolve to a loose .js path that doesn't
12
+ * exist inside the self-contained binary — it'd work under `node src/index.js`
13
+ * (npm) but break on the CDN binary. The scan is already background-capable on
14
+ * the single thread, so a worker buys nothing here. The returned handle shape
15
+ * ({ abort, pause, resume, paused }) is DELIBERATELY identical to what a
16
+ * worker-backed impl would expose, so a future worker swap (with build-script
17
+ * support + an in-process fallback) is mechanical, not a rewrite. */
18
+ import { scanFilesystemStreaming } from "./filesystem.js";
19
+
20
+ /**
21
+ * Start a background filesystem scan.
22
+ *
23
+ * @param {object} opts
24
+ * @param {string} opts.root dir to walk (required)
25
+ * @param {object} opts.compiled compiled detection patterns (compilePatterns(snapshot.detection))
26
+ * @param {string[]} [opts.exclude] extra ignore patterns (forwarded verbatim)
27
+ * @param {object} [opts.env] env override (MM_MAX_PER_FILE etc.), forwarded verbatim
28
+ * @param {number} [opts.yieldBudgetMs=8] elapsed-time yield budget (the core responsiveness enhancement)
29
+ * @param {number} [opts.maxFilesPerSlice=40] file-count yield ceiling (legacy YIELD_EVERY backstop)
30
+ *
31
+ * @param {object} [handlers]
32
+ * @param {(candidate:object)=>void} [handlers.onCandidate] per detected usage — the firehose, NOT batched here
33
+ * @param {(p:object)=>void} [handlers.onProgress] { filesScanned, dirsSeen, catalogsSkipped, currentDir }
34
+ * @param {(d:object)=>void} [handlers.onDir] { path, dirsSeen }
35
+ * @param {(s:object)=>void} [handlers.onSkip] { path, distinct, catalogsSkipped } (catalog files)
36
+ * @param {(r:object)=>void} [handlers.onDone] { candidates, filesScanned, dirsSeen, catalogsSkipped, scannedAt } — suppressed after abort()
37
+ * @param {(err:Error)=>void} [handlers.onError] walk threw
38
+ *
39
+ * @returns {{ abort:()=>void, pause:()=>void, resume:()=>void, paused:boolean }}
40
+ * abort() idempotent; aborts the walk + suppresses onDone (so a torn-down
41
+ * consumer never setState-after-unmount). resume from partial is N/A.
42
+ * pause() idempotent; engine stops reading files but the async fn stays alive
43
+ * (event loop free → a foreground loop keeps ticking).
44
+ * resume() idempotent.
45
+ * paused getter mirroring the internal flag (for UI).
46
+ *
47
+ * Coalescing (the 120ms render flush) stays in the CONSUMER — the runner is a
48
+ * synchronous re-emitter so its semantics are identical whether backed by
49
+ * cooperative async (direct calls, today) or a future worker (message-port
50
+ * events). It does NOT touch React, registry fetch, disk cache, or upload.
51
+ */
52
+ export function startScan(opts, handlers = {}) {
53
+ const { root, compiled, exclude, env, yieldBudgetMs = 8, maxFilesPerSlice = 40 } = opts || {};
54
+ const {
55
+ onCandidate = () => {},
56
+ onProgress = () => {},
57
+ onDir = () => {},
58
+ onSkip = () => {},
59
+ onDone = () => {},
60
+ onError = () => {},
61
+ } = handlers;
62
+
63
+ const ac = new AbortController();
64
+ let paused = false;
65
+ let aborted = false;
66
+ // Track the latest progress counters so onDone can report a complete summary
67
+ // even when the final event before completion was a `candidate` (no counts).
68
+ let filesScanned = 0;
69
+ let dirsSeen = 0;
70
+ let catalogsSkipped = 0;
71
+
72
+ scanFilesystemStreaming(
73
+ { root, signal: ac.signal, exclude, env, isPaused: () => paused, yieldBudgetMs, maxFilesPerSlice },
74
+ compiled,
75
+ (ev) => {
76
+ // Re-dispatch the raw engine event onto named handlers. No batching here.
77
+ switch (ev.type) {
78
+ case "dir":
79
+ dirsSeen = ev.dirsSeen;
80
+ onDir({ path: ev.path, dirsSeen: ev.dirsSeen });
81
+ break;
82
+ case "candidate":
83
+ onCandidate(ev.candidate);
84
+ break;
85
+ case "skip":
86
+ catalogsSkipped = ev.catalogsSkipped;
87
+ onSkip({ path: ev.path, distinct: ev.distinct, catalogsSkipped: ev.catalogsSkipped });
88
+ break;
89
+ case "progress":
90
+ filesScanned = ev.filesScanned;
91
+ dirsSeen = ev.dirsSeen;
92
+ catalogsSkipped = ev.catalogsSkipped;
93
+ onProgress({ filesScanned: ev.filesScanned, dirsSeen: ev.dirsSeen, catalogsSkipped: ev.catalogsSkipped, currentDir: ev.currentDir });
94
+ break;
95
+ default:
96
+ break;
97
+ }
98
+ },
99
+ ).then(
100
+ (candidates) => {
101
+ // Suppress onDone after abort: the engine returns its partial set on a
102
+ // detected abort, but a torn-down caller must not be re-notified.
103
+ if (aborted) return;
104
+ onDone({ candidates, filesScanned, dirsSeen, catalogsSkipped, scannedAt: Date.now() });
105
+ },
106
+ (err) => {
107
+ if (aborted) return;
108
+ onError(err instanceof Error ? err : new Error(String(err)));
109
+ },
110
+ );
111
+
112
+ return {
113
+ abort() {
114
+ aborted = true;
115
+ ac.abort();
116
+ },
117
+ pause() {
118
+ paused = true;
119
+ },
120
+ resume() {
121
+ paused = false;
122
+ },
123
+ get paused() {
124
+ return paused;
125
+ },
126
+ };
127
+ }