npm - @modelstatus/cli - Versions diffs - 0.1.34 → 0.1.36 - Mend

@modelstatus/cli 0.1.34 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/package.json +1 -1
package/src/api.js +6 -0
package/src/ci.js +2 -2
package/src/index.js +219 -12
package/src/integrations.js +121 -0
package/src/sources/aws-lambda.js +95 -0
package/src/sources/configscan.js +8 -2
package/src/sources/filesystem.js +0 -0
package/src/sources/github-actions.js +156 -0
package/src/sources/index.js +70 -13
package/src/sources/scan-process.js +238 -0
package/src/sources/scan-runner.js +127 -0
package/src/sources/scan-worker.js +148 -0
package/src/sources/supabase-edge.js +183 -0
package/src/sources/supabase.js +5 -0
package/src/sources/vercel.js +74 -0
package/src/tui/app.js +45 -2
package/src/tui/game/DkGame.js +21 -0
package/src/tui/game/dk-core.js +688 -0
package/src/tui/game/dk-render.js +160 -0
package/src/tui/game/input.js +169 -0
package/src/tui/game/loop.js +337 -0
package/src/tui/game/term.js +330 -0
package/src/tui/views/add.js +1 -1
package/src/tui/views/integrations.js +224 -0
package/src/tui/views/inventory.js +31 -2
package/src/tui/views/scan.js +116 -6

package/src/sources/github-actions.js ADDED Viewed

@@ -0,0 +1,156 @@
+import fs from "node:fs";
+import path from "node:path";
+import { hasCmd, run } from "./shell.js";
+import { detectInLine } from "../detect/core.js";
+import { redactValue } from "../redact.js";
+import { scanConfigEntries, entriesFromKV } from "./configscan.js";
+/* Pure parsers (unit-tested). */
+/** `gh variable list [--json name,value]` → [{name, value}]. Actions VARIABLES are
+ * NON-secret config values (unlike secrets), so we scan the VALUE for model ids via
+ * entriesFromKV. Handles `--json name,value` ([{name,value}]) and the tab-separated
+ * table (NAME\tVALUE\tUPDATED). Pure — no JSON shape knowledge leaks out. */
+export function parseVariableList(stdout) {
+  const s = String(stdout || "").trim();
+  if (!s) return [];
+  try {
+    const j = JSON.parse(s);
+    if (Array.isArray(j)) return j.map((x) => ({ name: x.name, value: x.value ?? "" })).filter((x) => x.name);
+  } catch {
+    /* fall through to table parse */
+  }
+  const out = [];
+  for (const raw of s.split(/\r?\n/)) {
+    const line = raw.trim();
+    if (!line) continue;
+    if (/^name\b/i.test(line)) continue; // header
+    if (/^[-\s|]+$/.test(line)) continue; // separator rule
+    // gh's table output is tab- or 2+-space-separated: NAME  VALUE  UPDATED.
+    const cols = line.split(/\t|\s{2,}/).map((c) => c.trim());
+    const name = cols[0];
+    if (name && /^[A-Za-z_][A-Za-z0-9_]*$/.test(name)) out.push({ name, value: cols[1] ?? "" });
+  }
+  return out;
+}
+/** `gh secret list [--json name]` → NAMES only. Handles `--json name` ([{name}])
+ * and the tab/space-separated table (NAME  UPDATED). We NEVER `gh secret view`. */
+export function parseGhSecretList(stdout) {
+  const s = String(stdout || "").trim();
+  try {
+    const j = JSON.parse(s);
+    if (Array.isArray(j)) return j.map((x) => x.name).filter(Boolean);
+  } catch {
+    /* fall through to table parse */
+  }
+  const names = [];
+  for (const raw of s.split(/\r?\n/)) {
+    const line = raw.trim();
+    if (!line) continue;
+    const name = line.split(/\s{2,}|\t/).map((c) => c.trim()).filter(Boolean)[0];
+    if (name && /^[A-Za-z_][A-Za-z0-9_]*$/.test(name)) names.push(name);
+  }
+  return names;
+}
+/** Line-scan one workflow YAML body → Candidates (model refs in workflow steps).
+ * Pure: takes text + relPath + compiled, returns #L<n>-located candidates with a
+ * redacted, 160-capped snippet. detectInLine returns a Set, iterated with for…of. */
+export function scanWorkflowText(text, relPath, compiled, env) {
+  const out = [];
+  const seen = new Set();
+  String(text || "").split(/\r?\n/).forEach((line, i) => {
+    for (const model_string of detectInLine(line, compiled)) {
+      const locator = `github-actions://workflows/${relPath}#L${i + 1}`;
+      const key = `${model_string}|${locator}`;
+      if (seen.has(key)) continue;
+      seen.add(key);
+      out.push({
+        model_string,
+        source_type: "github-actions",
+        location_label: locator,
+        source_path: relPath,
+        source_line: i + 1,
+        environment: env || "unknown",
+        snippet: redactValue(line.trim()).slice(0, 160),
+      });
+    }
+  });
+  return out;
+}
+/** GitHub Actions VARIABLES + secrets + workflows. LIVE integration: gated on the
+ * enabled toggle AND the `gh` CLI. Surfaces:
+ *   (a) `gh variable list` → VALUES scanned via entriesFromKV (Actions variables are
+ *       NON-secret config, the natural home for a `OPENAI_MODEL=gpt-4o` style value);
+ *   (b) `gh secret list` → NAME-only entries (NEVER a value — no value API exists);
+ *   (c) a NARROW own-walk of <root>/.github/workflows/*.yml line-scanned for model
+ *       refs (does NOT import filesystem.js — concurrency guardrail).
+ * When scoped to a GitHub Environment via opts.ghEnvironment, that environment is
+ * AUTHORITATIVE for variables (passed straight through), else the folded opts.env /
+ * guessEnvFrom applies. opts: { root, ghRepo, ghEnvironment }. */
+export const githubActionsSource = {
+  id: "github-actions",
+  label: "GitHub Actions variables + secrets + workflows",
+  kind: "cli",
+  integration: true,
+  envTag: "unknown",
+  async available() {
+    return hasCmd("gh");
+  },
+  async authState() {
+    const r = await run("gh", ["auth", "status"]);
+    // `gh auth status` writes to stderr even on success; ok is the signal.
+    if (!r.ok) return { connected: false, mode: "auth-status", reason: (r.stderr || r.stdout || "not logged in").split("\n")[0] };
+    return { connected: true, mode: "auth-status" };
+  },
+  async collect(opts, compiled) {
+    const repoArg = opts?.ghRepo ? ["--repo", opts.ghRepo] : [];
+    const repoTag = opts?.ghRepo || "repo";
+    // A GitHub Environment is authoritative for env-scoped variables — pass it as
+    // the explicit env (overriding guessEnvFrom). Else fall back to the folded opts.env.
+    const ghEnv = opts?.ghEnvironment || "";
+    const envArg = ghEnv ? ["--env", ghEnv] : [];
+    const out = [];
+    // (a) VARIABLES — non-secret VALUES, scanned through the redaction funnel. We
+    // ask for JSON so the value column is unambiguous; a model id in a variable
+    // value (e.g. OPENAI_MODEL=gpt-4o) is exactly what we want to catch.
+    const vars = await run("gh", ["variable", "list", ...repoArg, ...envArg, "--json", "name,value"]);
+    if (vars.ok) {
+      for (const { name, value } of parseVariableList(vars.stdout)) {
+        const entries = entriesFromKV(name, value, `github-actions://${repoTag}/variables#${name}`, ghEnv || repoTag);
+        out.push(...scanConfigEntries(entries, compiled, { sourceType: "github-actions", env: ghEnv || opts?.env }));
+      }
+    }
+    // (b) Secret NAMES only (never a value — there is no value API anyway).
+    const secrets = await run("gh", ["secret", "list", ...repoArg, ...envArg]);
+    if (secrets.ok) {
+      for (const name of parseGhSecretList(secrets.stdout)) {
+        const entries = entriesFromKV(name, "", `github-actions://${repoTag}/secrets#${name}`, ghEnv || repoTag);
+        out.push(...scanConfigEntries(entries, compiled, { sourceType: "github-actions", env: ghEnv || opts?.env }));
+      }
+    }
+    // (c) Workflow YAML bodies — own narrow walk of <root>/.github/workflows.
+    const wfDir = path.join(opts?.root || ".", ".github", "workflows");
+    let files = [];
+    try {
+      files = fs.readdirSync(wfDir).filter((f) => /\.ya?ml$/.test(f));
+    } catch {
+      /* no workflows dir — secrets-only is fine */
+    }
+    for (const f of files) {
+      let text;
+      try {
+        text = fs.readFileSync(path.join(wfDir, f), "utf8");
+      } catch {
+        continue;
+      }
+      out.push(...scanWorkflowText(text, f, compiled, opts?.env));
+    }
+    return out;
+  },
+};

package/src/sources/index.js CHANGED Viewed

@@ -5,22 +5,43 @@ import { awsSecretsSource } from "./aws.js";
 import { k8sSource } from "./k8s.js";
 import { helmSource } from "./helm.js";
 import { sqlSource } from "./sql.js";
+import { awsLambdaSource } from "./aws-lambda.js";
+import { vercelSource } from "./vercel.js";
+import { supabaseEdgeSource } from "./supabase.js";
+import { githubActionsSource } from "./github-actions.js";
+import { enabledIds, getEnvTag } from "../integrations.js";
 /**
  * A Source discovers AI-model usage from one place and emits normalized Candidates:
  *   { model_string, source_type, location_label, source_path, source_line?, environment, snippet }
  *
- * Interface:
+ * Interface (only id/label/available/collect are REQUIRED — the rest are OPTIONAL
+ * and read via ?? / ?., so the 6 original sources keep working untouched):
  *   id: string
  *   label: string
- *   available(opts): Promise<boolean>          // is the backing tool/creds present?
+ *   available(opts): Promise<boolean>          // is the backing tool/creds present? (cheap PATH check — no spawn)
  *   collect(opts, compiled): Promise<Candidate[]>
+ *   kind?: "local" | "cli" | "mcp" | "api"     // descriptive grouping only; NEVER gates execution (default "local")
+ *   authState?(opts): Promise<{ connected, mode, account?, reason? }>
+ *                                              // OPTIONAL richer read-only identity probe (MAY spawn); used by the
+ *                                              // TUI "test" key + verbose `mm sources`, NOT by the hot collect path
+ *   envTag?: "prod"|"staging"|"dev"|"unknown"  // a declared default env fallback (the authoritative per-source
+ *                                              // envTag comes from integrations.json and is folded into opts.env)
+ *   integration?: true                          // marks a LIVE integration subject to the enabled-gate (the 6
+ *                                              // original sources omit it → unaffected)
  *
- * The secrets/config sources (env, aws-secrets, k8s, helm, sql) each shell out to
- * an ALREADY-AUTHENTICATED CLI, run read-only, scan locally, and REDACT every
- * snippet — only non-sensitive model ids ever leave the machine, never secrets.
+ * The secrets/config sources (env, aws-secrets, k8s, helm, sql) and the 4 live
+ * integrations each shell out to an ALREADY-AUTHENTICATED CLI, run read-only, scan
+ * locally, and REDACT every snippet — only non-sensitive model ids ever leave the
+ * machine, never secrets. Secret-NAME-only surfaces (gh/supabase secret lists,
+ * vercel env names) pass the NAME with an EMPTY value so no value can leak.
  */
-const SOURCES = [filesystemSource, envSource, awsSecretsSource, k8sSource, helmSource, sqlSource];
+const SOURCES = [
+  filesystemSource, envSource, awsSecretsSource, k8sSource, helmSource, sqlSource,
+  // Live integrations (integration:true) — only run when toggled on in
+  // integrations.json OR named explicitly in --sources (see the gate below).
+  awsLambdaSource, vercelSource, supabaseEdgeSource, githubActionsSource,
+];
 export const ALL_SOURCE_IDS = SOURCES.map((s) => s.id);
@@ -32,27 +53,63 @@ export function getSource(id) {
   return SOURCES.find((s) => s.id === id) ?? null;
 }
-/** Which of the requested sources are usable right now (tool/creds present). */
-export async function availability(sourceIds, opts = {}) {
+/** A live integration runs only when enabled in integrations.json OR explicitly
+ * named (so a one-off `--sources vercel` works without toggling it on first).
+ * The 6 original sources omit `integration` → this never gates them. */
+function integrationAllowed(src, id, explicit) {
+  if (!src?.integration) return true; // not an integration → always allowed
+  return enabledIds().has(id) || explicit.has(id);
+}
+/** Which of the requested sources are usable right now (tool/creds present).
+ * For a live integration, `available` is reported as `enabled && hasCmd` UNLESS
+ * it was explicitly requested by name (then just hasCmd). Each row also carries
+ * `enabled` + `integration` so cmdSources / the TUI can render the toggle.
+ * `explicit` is the set of ids the caller named verbatim. */
+export async function availability(sourceIds, opts = {}, explicit = new Set()) {
   const ids = sourceIds && sourceIds.length ? sourceIds : ["filesystem"];
+  const enabled = enabledIds();
   const report = [];
   for (const id of ids) {
     const src = getSource(id);
-    report.push({ id, label: src?.label ?? id, available: src ? await src.available(opts) : false, known: !!src });
+    const hasTool = src ? await src.available(opts) : false;
+    const isIntegration = !!src?.integration;
+    const isEnabled = enabled.has(id);
+    // Integrations only count as "available" when enabled (unless explicitly asked).
+    const available = isIntegration && !explicit.has(id) ? hasTool && isEnabled : hasTool;
+    report.push({
+      id,
+      label: src?.label ?? id,
+      kind: src?.kind ?? "local",
+      available,
+      known: !!src,
+      integration: isIntegration,
+      enabled: isEnabled,
+    });
   }
   return report;
 }
-/** Run a set of sources, returning a flat, de-duplicated Candidate[]. */
-export async function collectFrom(sourceIds, opts, patterns) {
+/** Run a set of sources, returning a flat, de-duplicated Candidate[]. Stays on
+ * the cheap path: uses available() (PATH check), never authState() (spawn).
+ * `explicit` is the set of ids the caller named verbatim — naming a live
+ * integration there overrides the enabled-gate. */
+export async function collectFrom(sourceIds, opts, patterns, explicit = new Set()) {
   const compiled = compilePatterns(patterns);
   const ids = sourceIds && sourceIds.length ? sourceIds : ["filesystem"];
   const seen = new Set();
   const out = [];
   for (const id of ids) {
     const src = getSource(id);
-    if (!src || !(await src.available(opts))) continue;
-    for (const c of await src.collect(opts, compiled)) {
+    if (!src) continue;
+    // Live-integration gate: skip a disabled integration unless explicitly named.
+    if (!integrationAllowed(src, id, explicit)) continue;
+    if (!(await src.available(opts))) continue;
+    // Fold the per-source declared envTag into opts.env so the integration's env
+    // overrides guessEnvFrom — but an explicit --env flag (opts.env) still wins,
+    // and Vercel's authoritative deploy target still wins inside its own collect.
+    const srcOpts = src.integration ? { ...opts, env: opts.env || getEnvTag(id) } : opts;
+    for (const c of await src.collect(srcOpts, compiled)) {
       const key = `${c.model_string}|${c.location_label}`;
       if (seen.has(key)) continue;
       seen.add(key);

package/src/sources/scan-process.js ADDED Viewed

@@ -0,0 +1,238 @@
+/* TRUE BACKGROUND SCAN — PARENT side.
+ *
+ * Spawns the scan as a SEPARATE OS process (self-re-exec of the running runtime
+ * via the hidden `__mm_scan_worker` sentinel; see src/index.js top dispatch +
+ * src/sources/scan-worker.js), readline-parses its NDJSON stdout into a plain
+ * mutable `stats` store, and re-dispatches onto the SAME named-handler shape
+ * scan-runner.js uses ({onCandidate,onProgress,onSkip,onDone,onError}) — so this
+ * is the mechanical swap the scan-runner.js header comment promised, except the
+ * walk now runs in its OWN event loop (no cooperative-yield lag during a game /
+ * heavy IO), and survives an Ink unmount because it's a real subprocess.
+ *
+ * Returned handle mirrors scan-runner.js's {abort,pause,resume,paused} plus a
+ * read-only `stats` getter and the `child` reference. The game HUD reads `stats`
+ * every render and NEVER awaits the scan.
+ *
+ * Dual-runtime spawn (the ONE place runtime matters — both branches verified):
+ *   - Compiled binary (bun --compile): process.execPath IS the binary; re-exec
+ *     it with just the worker args. argv[1] is a virtual /$bunfs path and is NOT
+ *     spawnable, so do NOT prepend it.
+ *   - node / npm: process.execPath is node; prepend process.argv[1] (the real
+ *     absolute src/index.js path) so node loads the entry, which dispatches.
+ */
+import { spawn } from "node:child_process";
+import readline from "node:readline";
+/** True inside the bun-compiled binary, false under node. Defined-in-bun signal. */
+export function isBunRuntime() {
+  return typeof globalThis.Bun !== "undefined" || process.versions.bun != null;
+}
+/**
+ * Build the argv passed to spawn() for `process.execPath`.
+ * @param {string[]} workerArgs  ['--root', dir, ...] worker flags (NO sentinel)
+ * @returns {string[]} full spawn argv (sentinel + flags, with argv[1] prepended under node)
+ */
+export function buildSpawnArgs(workerArgs) {
+  const wargs = ["__mm_scan_worker", ...workerArgs];
+  // Compiled binary: re-exec the binary itself (execPath) with just the worker
+  // args. node: execPath is node, so prepend the real entry script (argv[1]).
+  return isBunRuntime() ? wargs : [process.argv[1], ...wargs];
+}
+/**
+ * Start a background filesystem scan in a separate OS process.
+ *
+ * @param {object} opts
+ * @param {string} opts.root                dir to walk (required)
+ * @param {string[]} [opts.exclude]         extra ignore patterns (csv-joined for the worker)
+ * @param {object} [opts.env]               extra env merged over process.env for the child
+ * @param {string} [opts.registryCachePath] pre-fetched snapshot JSON path (worker skips the network)
+ *
+ * @param {object} [handlers]
+ * @param {(candidate:object)=>void} [handlers.onCandidate]
+ * @param {(p:object)=>void}         [handlers.onProgress] {filesScanned,dirsSeen,catalogsSkipped,currentDir}
+ * @param {(s:object)=>void}         [handlers.onSkip]     {path,distinct,catalogsSkipped}
+ * @param {(r:object)=>void}         [handlers.onDone]     {candidates,filesScanned,dirsSeen,catalogsSkipped,scannedAt} — suppressed after abort()
+ * @param {(err:Error)=>void}        [handlers.onError]
+ *
+ * @returns {{abort:()=>void, pause:()=>void, resume:()=>void, paused:boolean, stats:object, child:import('node:child_process').ChildProcess}}
+ */
+export function startScanProcess(opts, handlers = {}) {
+  const { root, exclude, env, registryCachePath } = opts || {};
+  const {
+    onCandidate = () => {},
+    onProgress = () => {},
+    onSkip = () => {},
+    onDone = () => {},
+    onError = () => {},
+  } = handlers;
+  // Worker flags. Sentinel + argv[1] handling lives in buildSpawnArgs.
+  const workerArgs = ["--root", root];
+  if (exclude && exclude.length) workerArgs.push("--exclude", exclude.join(","));
+  if (registryCachePath) workerArgs.push("--registry-cache", registryCachePath);
+  const child = spawn(process.execPath, buildSpawnArgs(workerArgs), {
+    // ignore stdin; PIPE stdout (NDJSON) + stderr (diagnostics). NEVER 'inherit'
+    // — the worker shares no terminal with the game, so its output can't touch
+    // the alt-screen frame.
+    stdio: ["ignore", "pipe", "pipe"],
+    env: { ...process.env, ...(env || {}) },
+    detached: false,
+    windowsHide: true,
+  });
+  // The HUD store — a plain mutable object the loop reads each render, read-only.
+  const stats = {
+    phase: "scanning", // scanning | done | error
+    filesScanned: 0,
+    candidateCount: 0,
+    dirsSeen: 0,
+    catalogsSkipped: 0,
+    currentDir: "",
+    scannedAt: null,
+    error: null,
+  };
+  let aborted = false;
+  let paused = false;
+  let finalized = false; // exactly-once phase finalization (done/error/exit)
+  let stderrBuf = "";
+  function handleLine(line) {
+    if (!line) return;
+    let msg;
+    try {
+      msg = JSON.parse(line);
+    } catch {
+      return; // drop a malformed / partial-leftover line, never throw
+    }
+    switch (msg.t) {
+      case "prog":
+        // dir-only prog lines carry just dirsSeen; full prog lines carry counts.
+        if (typeof msg.filesScanned === "number") stats.filesScanned = msg.filesScanned;
+        if (typeof msg.dirsSeen === "number") stats.dirsSeen = msg.dirsSeen;
+        if (typeof msg.catalogsSkipped === "number") stats.catalogsSkipped = msg.catalogsSkipped;
+        if (typeof msg.currentDir === "string") stats.currentDir = msg.currentDir;
+        if (!aborted) {
+          onProgress({
+            filesScanned: stats.filesScanned,
+            dirsSeen: stats.dirsSeen,
+            catalogsSkipped: stats.catalogsSkipped,
+            currentDir: stats.currentDir,
+          });
+        }
+        break;
+      case "cand":
+        // COUNTS only on the hot path: the loop never renders cand payloads, so
+        // parse pressure during a game is near-zero. The full Candidate[] is
+        // read once from the done line.
+        stats.candidateCount++;
+        if (!aborted) onCandidate(msg.candidate);
+        break;
+      case "skip":
+        if (typeof msg.catalogsSkipped === "number") stats.catalogsSkipped = msg.catalogsSkipped;
+        if (!aborted) onSkip({ path: msg.path, distinct: msg.distinct, catalogsSkipped: msg.catalogsSkipped });
+        break;
+      case "done":
+        if (finalized) return;
+        finalized = true;
+        stats.phase = "done";
+        stats.filesScanned = msg.filesScanned ?? stats.filesScanned;
+        stats.dirsSeen = msg.dirsSeen ?? stats.dirsSeen;
+        stats.catalogsSkipped = msg.catalogsSkipped ?? stats.catalogsSkipped;
+        stats.candidateCount = (msg.candidates || []).length;
+        stats.scannedAt = msg.scannedAt ?? Date.now();
+        if (!aborted) onDone({
+          candidates: msg.candidates || [],
+          filesScanned: stats.filesScanned,
+          dirsSeen: stats.dirsSeen,
+          catalogsSkipped: stats.catalogsSkipped,
+          scannedAt: stats.scannedAt,
+        });
+        break;
+      case "err":
+        if (finalized) return;
+        finalized = true;
+        stats.phase = "error";
+        stats.error = msg.message || "scan failed";
+        if (!aborted) onError(new Error(stats.error));
+        break;
+      default:
+        break;
+    }
+  }
+  const rl = readline.createInterface({ input: child.stdout, crlfDelay: Infinity });
+  rl.on("line", handleLine);
+  child.stderr.on("data", (d) => {
+    stderrBuf += d.toString();
+    if (stderrBuf.length > 64_000) stderrBuf = stderrBuf.slice(-64_000); // bound
+  });
+  child.on("error", (err) => {
+    // Spawn failure (e.g. execPath missing) — surface unless we asked to die.
+    if (finalized || aborted) return;
+    finalized = true;
+    stats.phase = "error";
+    stats.error = err?.message || "failed to start scan worker";
+    onError(err instanceof Error ? err : new Error(stats.error));
+  });
+  child.on("exit", (code, signal) => {
+    // If the child exited without a done/err line and we didn't abort it, treat
+    // a non-zero/signal exit as an error so the HUD doesn't hang on "scanning".
+    if (finalized || aborted) return;
+    if (code === 0) return; // a 0 exit always followed a done line above
+    finalized = true;
+    stats.phase = "error";
+    stats.error = (stderrBuf.trim().split("\n").pop() || `scan worker exited (code ${code}, signal ${signal})`);
+    onError(new Error(stats.error));
+  });
+  return {
+    abort() {
+      // Idempotent. Suppresses onDone/onError and kills the child (cross-platform).
+      if (aborted) return;
+      aborted = true;
+      try {
+        child.kill("SIGTERM");
+      } catch {
+        /* already gone */
+      }
+    },
+    pause() {
+      if (paused) return;
+      paused = true;
+      // SIGSTOP genuinely freezes the child on posix; on win32 it's a no-op on
+      // the process but we still flip the flag so the UI reflects intent.
+      if (process.platform !== "win32") {
+        try {
+          child.kill("SIGSTOP");
+        } catch {
+          /* ignore */
+        }
+      }
+    },
+    resume() {
+      if (!paused) return;
+      paused = false;
+      if (process.platform !== "win32") {
+        try {
+          child.kill("SIGCONT");
+        } catch {
+          /* ignore */
+        }
+      }
+    },
+    get paused() {
+      return paused;
+    },
+    get stats() {
+      return stats;
+    },
+    child,
+  };
+}

package/src/sources/scan-runner.js ADDED Viewed

@@ -0,0 +1,127 @@
+/* Background-scan RUNNER: a thin, React-free seam over the cooperative
+ * streaming filesystem scan. It owns the AbortController + paused flag, runs
+ * scanFilesystemStreaming on the main event loop (the walk yields via
+ * setImmediate on a time + count budget, so a foreground frame loop and the
+ * input handler keep ticking in the gaps), and re-emits the engine's raw
+ * onEvent protocol onto a NAMED handlers object so callers never touch
+ * event-type strings.
+ *
+ * Why no worker_threads: the shipped artifact is a single Bun-compiled binary
+ * built from ONE entry (`bun build src/index.js --compile`). A `new Worker(new
+ * URL('./scan-worker.js', …))` would resolve to a loose .js path that doesn't
+ * exist inside the self-contained binary — it'd work under `node src/index.js`
+ * (npm) but break on the CDN binary. The scan is already background-capable on
+ * the single thread, so a worker buys nothing here. The returned handle shape
+ * ({ abort, pause, resume, paused }) is DELIBERATELY identical to what a
+ * worker-backed impl would expose, so a future worker swap (with build-script
+ * support + an in-process fallback) is mechanical, not a rewrite. */
+import { scanFilesystemStreaming } from "./filesystem.js";
+/**
+ * Start a background filesystem scan.
+ *
+ * @param {object} opts
+ * @param {string} opts.root              dir to walk (required)
+ * @param {object} opts.compiled          compiled detection patterns (compilePatterns(snapshot.detection))
+ * @param {string[]} [opts.exclude]       extra ignore patterns (forwarded verbatim)
+ * @param {object} [opts.env]             env override (MM_MAX_PER_FILE etc.), forwarded verbatim
+ * @param {number} [opts.yieldBudgetMs=8] elapsed-time yield budget (the core responsiveness enhancement)
+ * @param {number} [opts.maxFilesPerSlice=40] file-count yield ceiling (legacy YIELD_EVERY backstop)
+ *
+ * @param {object} [handlers]
+ * @param {(candidate:object)=>void}  [handlers.onCandidate] per detected usage — the firehose, NOT batched here
+ * @param {(p:object)=>void}          [handlers.onProgress]  { filesScanned, dirsSeen, catalogsSkipped, currentDir }
+ * @param {(d:object)=>void}          [handlers.onDir]       { path, dirsSeen }
+ * @param {(s:object)=>void}          [handlers.onSkip]      { path, distinct, catalogsSkipped } (catalog files)
+ * @param {(r:object)=>void}          [handlers.onDone]      { candidates, filesScanned, dirsSeen, catalogsSkipped, scannedAt } — suppressed after abort()
+ * @param {(err:Error)=>void}         [handlers.onError]     walk threw
+ *
+ * @returns {{ abort:()=>void, pause:()=>void, resume:()=>void, paused:boolean }}
+ *   abort()  idempotent; aborts the walk + suppresses onDone (so a torn-down
+ *            consumer never setState-after-unmount). resume from partial is N/A.
+ *   pause()  idempotent; engine stops reading files but the async fn stays alive
+ *            (event loop free → a foreground loop keeps ticking).
+ *   resume() idempotent.
+ *   paused   getter mirroring the internal flag (for UI).
+ *
+ * Coalescing (the 120ms render flush) stays in the CONSUMER — the runner is a
+ * synchronous re-emitter so its semantics are identical whether backed by
+ * cooperative async (direct calls, today) or a future worker (message-port
+ * events). It does NOT touch React, registry fetch, disk cache, or upload.
+ */
+export function startScan(opts, handlers = {}) {
+  const { root, compiled, exclude, env, yieldBudgetMs = 8, maxFilesPerSlice = 40 } = opts || {};
+  const {
+    onCandidate = () => {},
+    onProgress = () => {},
+    onDir = () => {},
+    onSkip = () => {},
+    onDone = () => {},
+    onError = () => {},
+  } = handlers;
+  const ac = new AbortController();
+  let paused = false;
+  let aborted = false;
+  // Track the latest progress counters so onDone can report a complete summary
+  // even when the final event before completion was a `candidate` (no counts).
+  let filesScanned = 0;
+  let dirsSeen = 0;
+  let catalogsSkipped = 0;
+  scanFilesystemStreaming(
+    { root, signal: ac.signal, exclude, env, isPaused: () => paused, yieldBudgetMs, maxFilesPerSlice },
+    compiled,
+    (ev) => {
+      // Re-dispatch the raw engine event onto named handlers. No batching here.
+      switch (ev.type) {
+        case "dir":
+          dirsSeen = ev.dirsSeen;
+          onDir({ path: ev.path, dirsSeen: ev.dirsSeen });
+          break;
+        case "candidate":
+          onCandidate(ev.candidate);
+          break;
+        case "skip":
+          catalogsSkipped = ev.catalogsSkipped;
+          onSkip({ path: ev.path, distinct: ev.distinct, catalogsSkipped: ev.catalogsSkipped });
+          break;
+        case "progress":
+          filesScanned = ev.filesScanned;
+          dirsSeen = ev.dirsSeen;
+          catalogsSkipped = ev.catalogsSkipped;
+          onProgress({ filesScanned: ev.filesScanned, dirsSeen: ev.dirsSeen, catalogsSkipped: ev.catalogsSkipped, currentDir: ev.currentDir });
+          break;
+        default:
+          break;
+      }
+    },
+  ).then(
+    (candidates) => {
+      // Suppress onDone after abort: the engine returns its partial set on a
+      // detected abort, but a torn-down caller must not be re-notified.
+      if (aborted) return;
+      onDone({ candidates, filesScanned, dirsSeen, catalogsSkipped, scannedAt: Date.now() });
+    },
+    (err) => {
+      if (aborted) return;
+      onError(err instanceof Error ? err : new Error(String(err)));
+    },
+  );
+  return {
+    abort() {
+      aborted = true;
+      ac.abort();
+    },
+    pause() {
+      paused = true;
+    },
+    resume() {
+      paused = false;
+    },
+    get paused() {
+      return paused;
+    },
+  };
+}