npm - socket - Versions diffs - 1.1.111 → 1.1.113 - Mend

socket 1.1.111 → 1.1.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/dist/cli.js CHANGED Viewed

@@ -15,10 +15,10 @@ var words = require('../external/@socketsecurity/registry/lib/words');
 var fs$1 = require('node:fs');
 var arrays = require('../external/@socketsecurity/registry/lib/arrays');
 var prompts = require('../external/@socketsecurity/registry/lib/prompts');
-var bin = require('../external/@socketsecurity/registry/lib/bin');
-var childProcess = require('node:child_process');
 var os = require('node:os');
 var spawn = require('../external/@socketsecurity/registry/lib/spawn');
+var bin = require('../external/@socketsecurity/registry/lib/bin');
+var childProcess = require('node:child_process');
 var fs$2 = require('../external/@socketsecurity/registry/lib/fs');
 var strings = require('../external/@socketsecurity/registry/lib/strings');
 var path$1 = require('../external/@socketsecurity/registry/lib/path');
@@ -1110,8 +1110,10 @@ async function fetchSupportedScanFileNames(options) {
 /**
  * Finalize a tier1 reachability scan.
- *  - Associates the tier1 reachability scan metadata with the full scan.
- *  - Sets the tier1 reachability scan to "finalized" state.
+ *  - Associates the tier1 reachability scan metadata with the full scan
+ *    (or with `null` when called from a standalone reachability flow that
+ *    has no full scan to bind to).
+ *  - Transitions the tier1 reachability scan to its DONE terminal state.
  */
 async function finalizeTier1Scan(tier1ReachabilityScanId, scanId) {
   // we do not use the SDK here because the tier1-reachability-scan/finalize is a hidden
@@ -1835,12 +1837,21 @@ async function performReachabilityAnalysis(options) {
     }
     return coanaResult;
   }
+  // Coana writes the facts file relative to the scan `cwd` (it is spawned
+  // with `cwd` above), so resolve the read path against `cwd` too. Reading
+  // the bare relative path would resolve against `process.cwd()` and miss
+  // the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`), silently
+  // dropping the tier 1 scan id and skipping finalize downstream.
+  const resolvedReportPath = path.resolve(cwd, outputFilePath);
   return {
     ok: true,
     data: {
-      // Use the actual output filename for the scan.
+      // Use the actual output filename for the scan. Keep this `cwd`-relative
+      // so the upload (which relativizes against `cwd`) and the post-success
+      // unlink (`path.resolve(cwd, reachabilityReport)`) keep working.
       reachabilityReport: outputFilePath,
-      tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(outputFilePath)
+      tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(resolvedReportPath)
     }
   };
 }
@@ -1930,318 +1941,6 @@ async function resolveBazelBinary(explicit) {
   throw new utils.InputError('Could not find bazelisk or bazel on PATH. ' + 'Install bazelisk (recommended; https://github.com/bazelbuild/bazelisk) ' + 'or bazel, or pass --bazel <path>.');
 }
-/**
- * Parse `bazel query --output=build` text and `unsorted_deps.json` files
- * (rules_jvm_external) into a uniform `ExtractedArtifact` shape consumed by
- * the converter.
- *
- * Security gate: every regex uses bounded character classes to prevent
- * catastrophic backtracking on hostile bazel-query output. Rules without
- * `maven_coordinates=` are skipped. Caller is responsible for size-capping
- * the input string.
- */
-// Per-rule block matcher: matches `<kind>(...)` where kind is jvm_import or
-// aar_import, bounded by `^)` (closing paren on its own line) — Bazel
-// `--output=build` output convention. Body length capped at 8 KiB; real
-// rules are ~500 bytes, so the cap is 16x normal. Prevents pathological
-// backtracking on hostile input.
-const RULE_RE = /^(jvm_import|aar_import)\(([\s\S]{0,8192}?)^\)/gm;
-// Cache for per-attribute regexes — avoids recompiling the same pattern on
-// every rule block. Keyed by attr name; all attr names are safe alphanumeric
-// identifiers so no escaping is needed beyond the bounded character class.
-const ATTR_RE_CACHE = new Map();
-// Cache for per-tag-key regexes used by extractTagValue.
-const TAG_RE_CACHE = new Map();
-function extractAttr(body, attr) {
-  // Match `<attr> = "VALUE"` — quoted-string attrs only.
-  // Quoted value capped at 4 KiB; canonical Maven URLs are ~150 bytes.
-  let re = ATTR_RE_CACHE.get(attr);
-  if (!re) {
-    re = new RegExp(`\\b${attr}\\s*=\\s*"([^"\\n]{0,4096})"`);
-    ATTR_RE_CACHE.set(attr, re);
-  }
-  const m = re.exec(body);
-  return m?.[1];
-}
-// Extracts a `key=value` pair from inside a Bazel `tags = [...]` attribute
-// (rules_jvm_external encodes maven_sha256, maven_coordinates etc. this way).
-// Pattern: `"maven_sha256=<hex>"` inside the tags list.
-// Returns undefined when the tag is absent or malformed.
-function extractTagValue(body, tagKey) {
-  // Match the full tags = [...] block (bounded at 8 KiB).
-  const tagsM = /\btags\s*=\s*\[([\s\S]{0,8192}?)\]/m.exec(body);
-  if (!tagsM) {
-    return undefined;
-  }
-  const tagsBlob = tagsM[1];
-  // Within the blob, look for "<tagKey>=<value>" inside a quoted string.
-  // Bounded at 512 bytes per tag entry (sha256 hex is 64 chars; URLs ~150).
-  let tagRe = TAG_RE_CACHE.get(tagKey);
-  if (!tagRe) {
-    tagRe = new RegExp(`"${tagKey}=([^"\\n]{0,512})"`);
-    TAG_RE_CACHE.set(tagKey, tagRe);
-  }
-  const m = tagRe.exec(tagsBlob);
-  return m?.[1];
-}
-function extractDeps(body) {
-  // Match `deps = ["a", "b", ...]`. Body length capped at 16 KiB; real
-  // dep lists are <2 KiB.
-  const m = /\bdeps\s*=\s*\[([\s\S]{0,16384}?)\]/m.exec(body);
-  if (!m) {
-    return [];
-  }
-  const out = [];
-  // Per-label cap at 512 bytes; real Bazel labels are <100 bytes.
-  for (const q of m[1].matchAll(/"([^"\n]{0,512})"/g)) {
-    out.push(q[1]);
-  }
-  return out;
-}
-/**
- * Parse `bazel query --output=build` stdout into `ExtractedArtifact[]`.
- * Skips rules without a `maven_coordinates` attribute (those aren't
- * rules_jvm_external lockfile rules).
- */
-function parseBazelBuildOutput(text) {
-  const results = [];
-  for (const m of text.matchAll(RULE_RE)) {
-    const ruleKind = m[1];
-    const body = m[2];
-    const ruleName = extractAttr(body, 'name');
-    // maven_coordinates can be:
-    //   (a) a top-level rule attribute: `maven_coordinates = "g:a:v"` (newer rje)
-    //   (b) inside tags = [...]: `"maven_coordinates=g:a:v"` (older rje, e.g. ray)
-    const coords = extractAttr(body, 'maven_coordinates') ?? extractTagValue(body, 'maven_coordinates');
-    if (!ruleName || !coords) {
-      continue;
-    }
-    // maven_sha256 is encoded inside tags = [...] as "maven_sha256=<hex>" by
-    // rules_jvm_external; try tags first, fall back to standalone attr for
-    // older rule shapes that may declare it as a top-level attribute.
-    const mavenSha256 = extractTagValue(body, 'maven_sha256') ?? extractAttr(body, 'maven_sha256');
-    results.push({
-      ruleKind,
-      ruleName,
-      mavenCoordinates: coords,
-      mavenUrl: extractAttr(body, 'maven_url'),
-      mavenSha256,
-      deps: extractDeps(body)
-    });
-  }
-  return results;
-}
-function ruleNameFromCoordinate(c) {
-  return c.replace(/[^A-Za-z0-9]/g, '_');
-}
-/**
- * Parse supported `external/<repo>/unsorted_deps.json` shapes emitted by
- * rules_jvm_external. Older files use an artifact array with full coordinates;
- * newer v2 lock-file-shaped files use artifact/dependency maps keyed by
- * `group:artifact`. Caller MUST size-cap the input because JSON.parse is
- * unbounded by default.
- */
-function parseUnsortedDepsJson(json) {
-  let parsed;
-  try {
-    parsed = JSON.parse(json);
-  } catch {
-    return [];
-  }
-  const maybe = parsed;
-  if (Array.isArray(maybe.artifacts)) {
-    const out = [];
-    for (const a of maybe.artifacts) {
-      if (typeof a?.coordinates !== 'string') {
-        continue;
-      }
-      const deps = [];
-      if (Array.isArray(a.deps)) {
-        for (const d of a.deps) {
-          if (typeof d === 'string') {
-            deps.push(d);
-          }
-        }
-      }
-      out.push({
-        ruleKind: 'jvm_import',
-        ruleName: ruleNameFromCoordinate(a.coordinates),
-        mavenCoordinates: a.coordinates,
-        mavenUrl: typeof a.url === 'string' ? a.url : undefined,
-        mavenSha256: typeof a.sha256 === 'string' ? a.sha256 : undefined,
-        deps
-      });
-    }
-    return out;
-  }
-  if (!maybe.artifacts || typeof maybe.artifacts !== 'object') {
-    return [];
-  }
-  const dependencies = maybe.dependencies ?? {};
-  const out = [];
-  for (const [groupArtifact, artifact] of Object.entries(maybe.artifacts)) {
-    if (!artifact || typeof artifact.version !== 'string') {
-      continue;
-    }
-    const shasums = artifact.shasums ?? {};
-    const jarSha = shasums['jar'];
-    if (typeof jarSha === 'string' || Object.keys(shasums).length === 0) {
-      out.push(v2Artifact(groupArtifact, artifact.version, jarSha, dependencies));
-    }
-    for (const [classifier, sha256] of Object.entries(shasums)) {
-      if (classifier === 'jar' || typeof sha256 !== 'string') {
-        continue;
-      }
-      const classifierKey = `${groupArtifact}:jar:${classifier}`;
-      out.push(v2Artifact(classifierKey, artifact.version, sha256, dependencies));
-    }
-  }
-  return out;
-}
-function v2Artifact(artifactKey, version, sha256, dependencies) {
-  return {
-    ruleKind: 'jvm_import',
-    ruleName: ruleNameFromCoordinate(artifactKey),
-    mavenCoordinates: `${artifactKey}:${version}`,
-    mavenSha256: sha256,
-    deps: Array.isArray(dependencies[artifactKey]) ? dependencies[artifactKey].filter(d => typeof d === 'string') : []
-  };
-}
-let probed = false;
-// Verifies `java` is functional in the current execution environment. Bazel
-// JVM manifest extraction (rules_jvm_external → Coursier) requires a real
-// JDK; the CLI does not attempt to discover Homebrew installs or mutate the
-// caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
-// actionable message so the surfaced error names the prerequisite directly
-// instead of relying on Bazel's downstream diagnostic.
-function ensureJavaOnPath() {
-  if (probed) {
-    return;
-  }
-  try {
-    childProcess.execSync('java -version', {
-      stdio: 'ignore'
-    });
-    probed = true;
-  } catch {
-    throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
-  }
-}
-// Validates that --bazel-output-base is a path we can use as Bazel's output_base.
-// Throws InputError if:
-//   - the input contains `..` segments (path traversal guard)
-//   - the existing path is not writable
-//   - the path cannot be created (parent not writable)
-function validateOutputBase(outputBase, cwd) {
-  // Path traversal guard: reject any literal `..` segment in user input.
-  // After path.resolve these are normalised away, so we check the raw input.
-  // Split on both separators. On Windows `path.sep === '\\'`, so
-  // input like `foo/../etc` would not contain a `..` segment under the
-  // platform-specific split, bypassing the guard — yet path.resolve below
-  // would still normalise the `..` and a traversal target could materialise.
-  const segments = outputBase.split(/[\\/]/);
-  if (segments.includes('..')) {
-    throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
-  }
-  const resolved = path.resolve(cwd, outputBase);
-  if (fs$1.existsSync(resolved)) {
-    try {
-      fs$1.accessSync(resolved, fs$1.constants.W_OK);
-    } catch {
-      throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
-    }
-    return;
-  }
-  // Path does not exist yet — try to create it so bazel can populate it.
-  try {
-    fs$1.mkdirSync(resolved, {
-      recursive: true
-    });
-  } catch (e) {
-    throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
-  }
-}
-// Stable shim dir name — same process will get the same dir; concurrent
-// socket-cli invocations on the same machine share it. The symlink target
-// is whatever python3 resolves to NOW; if PATH changes between invocations
-// we replace the symlink.
-const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
-// Cache the result for the lifetime of this process.
-let cached = null;
-// Safe wrapper around whichBin that returns null instead of throwing when
-// nothrow semantics are broken in older registry versions (realpath 'null' bug).
-async function safeWhichBin(name) {
-  try {
-    return (await bin.whichBin(name, {
-      nothrow: true
-    })) ?? null;
-  } catch {
-    return null;
-  }
-}
-async function provisionPythonShim() {
-  if (cached) {
-    return cached;
-  }
-  const pythonOnPath = await safeWhichBin('python');
-  if (pythonOnPath) {
-    cached = {
-      augmentedEnv: undefined,
-      shimDir: undefined
-    };
-    return cached;
-  }
-  const python3OnPath = await safeWhichBin('python3');
-  if (!python3OnPath) {
-    throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
-  }
-  const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
-  fs$1.mkdirSync(shimDir, {
-    recursive: true
-  });
-  const linkPath = path.join(shimDir, 'python');
-  // Replace the symlink defensively in case python3's resolved path moved.
-  if (fs$1.existsSync(linkPath)) {
-    try {
-      fs$1.unlinkSync(linkPath);
-    } catch {
-      // Tolerate races; the next symlinkSync may still succeed.
-    }
-  }
-  // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
-  // so a concurrent socket-cli invocation may re-create the link between our
-  // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
-  // other process won the race and left a usable shim in place.
-  try {
-    fs$1.symlinkSync(python3OnPath, linkPath);
-  } catch (e) {
-    if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
-      throw e;
-    }
-  }
-  const augmentedEnv = {
-    ...process.env,
-    PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
-  };
-  cached = {
-    augmentedEnv,
-    shimDir
-  };
-  return cached;
-}
 // Default per-invocation timeout for bazel queries. Bazel cold-cache starts
 // can take several minutes; 10 minutes is generous while still bounding CI hangs.
 const BAZEL_QUERY_TIMEOUT_MS = 600_000;
@@ -2258,42 +1957,58 @@ function splitBazelFlags(flags) {
   }
   return flags.split(/\s+/).filter(Boolean);
 }
-function buildBazelModShowVisibleReposArgv(opts) {
+// Build the shared startup-flag prefix for any bazel invocation. Centralised
+// so `--output_user_root` propagates to every spawn — principle 7 of the
+// Maven design requires per-invocation server isolation across query,
+// cquery, and `bazel mod` commands alike.
+function buildStartupFlags(opts) {
   const startup = [];
   if (opts.bazelRc) {
     startup.push(`--bazelrc=${opts.bazelRc}`);
   }
+  if (opts.outputUserRoot) {
+    startup.push(`--output_user_root=${opts.outputUserRoot}`);
+  }
   if (opts.bazelOutputBase) {
     startup.push(`--output_base=${opts.bazelOutputBase}`);
   }
+  return startup;
+}
+function buildBazelModShowVisibleReposArgv(opts) {
+  const userFlags = splitBazelFlags(opts.bazelFlags);
+  return [...buildStartupFlags(opts), 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
+}
+function buildBazelModShowMavenExtensionArgv(opts) {
   const userFlags = splitBazelFlags(opts.bazelFlags);
-  return [...startup, 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
+  return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven',
+  // Belt-and-suspenders output reducer mirroring the PyPI path: bias the
+  // report toward the root module's usages. The authoritative pruning is
+  // the importers-filter applied to the parsed output, so this is not
+  // relied on for correctness.
+  '--extension_usages=<root>', ...userFlags];
 }
 function buildBazelModShowPipExtensionArgv(opts) {
-  const startup = [];
-  if (opts.bazelRc) {
-    startup.push(`--bazelrc=${opts.bazelRc}`);
-  }
-  if (opts.bazelOutputBase) {
-    startup.push(`--output_base=${opts.bazelOutputBase}`);
-  }
   const userFlags = splitBazelFlags(opts.bazelFlags);
-  return [...startup, 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
+  return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
 }
 function buildBazelArgv(queryStr, opts, output = 'build') {
   // Startup flags MUST precede the `query` subcommand.
   // Bazel argv shape: <startup> query <queryFlags> <invocationFlags> <queryStr> --output=<output> <userFlags>
-  const startup = [];
-  if (opts.bazelRc) {
-    startup.push(`--bazelrc=${opts.bazelRc}`);
-  }
-  if (opts.bazelOutputBase) {
-    startup.push(`--output_base=${opts.bazelOutputBase}`);
-  }
   // Keep query output stable and avoid updating Bazel lockfiles while extracting.
   const queryFlags = ['--lockfile_mode=off', '--noshow_progress'];
   const userFlags = splitBazelFlags(opts.bazelFlags);
-  return [...startup, 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
+  return [...buildStartupFlags(opts), 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
+}
+// Lightweight presence-check cquery used by the tri-state probe classifier.
+// `--keep_going --output=label` keeps it fast even on partial-analysis
+// repos and avoids paying for `--output=jsonproto` plus
+// `--proto:output_rule_attrs` (which the heavier metadata extraction in
+// `bazel-cquery.mts` needs but the probe does not).
+function buildBazelProbeCqueryArgv(repoName, opts) {
+  const userFlags = splitBazelFlags(opts.bazelFlags);
+  return [...buildStartupFlags(opts), 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, `@${repoName}//...`, '--output=label', '--keep_going', ...userFlags];
 }
 function stringField(value) {
   return typeof value === 'string' ? value : '';
@@ -2407,14 +2122,7 @@ async function runBazelQuery(queryStr, opts, output) {
     }
   }
 }
-/**
- * Bzlmod-native visible repository enumeration. This is only a candidate
- * source; callers must still validate each returned apparent repo name with a
- * semantic query for generated ecosystem rules.
- */
-async function runBazelModShowVisibleRepos(opts) {
-  const argv = buildBazelModShowVisibleReposArgv(opts);
+async function runBazelOneShot(argv, opts, step) {
   if (opts.verbose) {
     logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
   }
@@ -2446,364 +2154,830 @@ async function runBazelModShowVisibleRepos(opts) {
     durationMs: Date.now() - startedAt,
     opts,
     result,
-    step: 'bazel mod dump_repo_mapping'
+    step
   });
   return result;
 }
 /**
- * Bzlmod-native rules_python pip extension usage inspection. This is the
- * authoritative source for root-module pip.parse metadata when Bazel supports
- * the command; callers keep bounded static parsing as fallback.
+ * Bzlmod-native visible repository enumeration. NOTE: only consumed by the
+ * legacy PyPI path; the Maven path uses `runBazelModShowMavenExtension`
+ * instead because `dump_repo_mapping` over-enumerates apparent names that
+ * are not Maven hubs.
  */
-async function runBazelModShowPipExtension(opts) {
-  const argv = buildBazelModShowPipExtensionArgv(opts);
-  if (opts.verbose) {
-    logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
-  }
-  const startedAt = Date.now();
-  let result;
-  try {
-    const output = await spawn.spawn(opts.bin, argv, {
-      cwd: opts.cwd,
-      timeout: BAZEL_QUERY_TIMEOUT_MS,
-      ...(opts.env ? {
-        env: opts.env
-      } : {})
-    });
-    const {
-      code,
-      stderr,
-      stdout
-    } = output;
-    result = {
-      code,
-      stdout,
-      stderr
-    };
-  } catch (e) {
-    result = normalizeSpawnError(e);
-  }
-  logBazelTrace({
-    argv,
-    durationMs: Date.now() - startedAt,
-    opts,
-    result,
-    step: 'bazel mod show_extension rules_python pip'
-  });
-  return result;
+async function runBazelModShowVisibleRepos(opts) {
+  return await runBazelOneShot(buildBazelModShowVisibleReposArgv(opts), opts, 'bazel mod dump_repo_mapping');
 }
 /**
- * Build a `RepoProbe` (compatible with bazel-repo-discovery) bound to opts.
- * Used by `discoverMavenRepos` to validate candidate Maven repo
- * names against the running workspace.
+ * Bzlmod-native Maven hub enumeration via the rules_jvm_external maven
+ * extension. The text-format report lists every repo the extension
+ * generated; `parseShowExtensionOutput` (bazel-repo-discovery.mts)
+ * extracts the hubs from the `Fetched repositories:` section.
  */
-function buildProbeFor(opts) {
-  return async repoName => {
-    const queryStr = `kind("jvm_import rule|aar_import rule", @${repoName}//:*)`;
-    const result = await runBazelQuery(queryStr, opts);
-    return {
-      stdout: result.stdout,
-      code: result.code
-    };
-  };
+async function runBazelModShowMavenExtension(opts) {
+  return await runBazelOneShot(buildBazelModShowMavenExtensionArgv(opts), opts, 'bazel mod show_extension rules_jvm_external maven');
 }
 /**
- * Build a `RepoProbe` for validating pip hub candidates.
- * Queries the hub for package targets (e.g. `@<hub>//...`) and returns
- * stdout so the caller can check for `:pkg` labels or alias rules.
- * Does NOT require `pypi_name=` tags in the hub output, because those
- * tags live on spoke repos, not the hub alias layer.
+ * Bzlmod-native rules_python pip extension usage inspection. Used by the
+ * PyPI path; kept here since the argv shape is identical to the maven
+ * variant modulo the extension target.
+ */
+async function runBazelModShowPipExtension(opts) {
+  return await runBazelOneShot(buildBazelModShowPipExtensionArgv(opts), opts, 'bazel mod show_extension rules_python pip');
+}
+/**
+ * Build a `RepoProbe` (compatible with bazel-repo-discovery's tri-state
+ * classifier) bound to opts. Runs the lightweight presence-check cquery
+ * `@<name>//... --output=label --keep_going` — cheap enough to attempt
+ * every conventional Maven hub name without triggering `repository_rule`
+ * fetches on undefined names (Exp 3).
+ */
+function buildMavenProbeFor(opts) {
+  return async repoName => {
+    const argv = buildBazelProbeCqueryArgv(repoName, opts);
+    const result = await runBazelOneShot(argv, opts, `bazel cquery probe @${repoName}`);
+    return {
+      code: result.code,
+      stdout: result.stdout,
+      stderr: result.stderr
+    };
+  };
+}
+/**
+ * Build a `RepoProbe` for validating pip hub candidates.
+ * Queries the hub for package targets (e.g. `@<hub>//...`) and returns the
+ * full result triple so the caller can check for `:pkg` labels or alias
+ * rules. Does NOT require `pypi_name=` tags in the hub output, because
+ * those tags live on spoke repos, not the hub alias layer.
  */
 function buildPypiProbeFor(opts) {
   return async hubName => {
     const queryStr = `@${hubName}//...`;
     const result = await runBazelQuery(queryStr, opts);
     return {
+      code: result.code,
       stdout: result.stdout,
-      code: result.code
+      stderr: result.stderr
     };
   };
 }
-// Maximum size (bytes) we will read for any single Bazel workspace file.
-// Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
-const MAX_WORKSPACE_FILE_BYTES$1 = 5 * 1024 * 1024;
-// Maximum candidate count we will return (deduped) before truncating.
-// Real repos have <20; this is a hard ceiling against pathological inputs.
-const MAX_CANDIDATES$1 = 256;
+/**
+ * Per-repo metadata cquery + jsonproto parser for the Maven path.
+ *
+ * Pipeline:
+ *  1. Build a cquery argv targeting `attr("tags", "\bmaven_coordinates=",
+ *     @<repo>//...)` plus a union variant for the direct `maven_coordinates`
+ *     attribute. `--output=jsonproto` +
+ *     `--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps`
+ *     keeps the payload small while still surfacing the resolved Maven graph.
+ *  2. Spawn under a caller-supplied `outputUserRoot` so the orchestrator can
+ *     reap the server cleanly (`bazel --output_user_root=<this> shutdown`
+ *     followed by `rm -rf`). The runner itself never deletes anything —
+ *     server lifecycle is the orchestrator's concern.
+ *  3. Parse the jsonproto stream defensively: dispatch on `attribute[].type`
+ *     and accept both camelCase (`stringValue`, `stringListValue`) and
+ *     snake_case (`string_value`, `string_list_value`) payload keys.
+ *  4. Extract the maven coordinate from the direct `maven_coordinates` attr
+ *     when present, else scan `tags` for `maven_coordinates=<G:A:V>`.
+ *  5. Resolve each rule's `deps`/`exports`/`runtime_deps` label edges into
+ *     versionless Maven coordinates against this repo's own targets, while
+ *     `repoName` is still in scope. Edges that point at a hub-prefixed target
+ *     we cannot resolve are reported as `unresolvedLabels` so the caller can
+ *     flip the hub partial rather than silently dropping graph edges.
+ *  6. Tag every artifact with `workspace:<rel-path>` + `repo:<name>`
+ *     provenance via `sourceRepo`.
+ */
-// Regex strategy: anchored, bounded character classes, no nested quantifiers.
-// Match `use_repo(maven, "X", "Y", ...)` with a bounded arg-list window to
-// avoid catastrophic backtracking on hostile input.
-// Bzlmod use_repo(maven, "name1", "name2"...).
-// Bounded: matches up to ~4KB of arg list to avoid catastrophic backtracking.
-const USE_REPO_RE = /use_repo\s*\(\s*maven\s*,([^)]{0,4096})\)/g;
-const BAZEL_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
-const BAZEL_REPO_NAME_RE = new RegExp(`^${BAZEL_REPO_NAME_PATTERN}$`);
-// Quoted-name extractor inside the captured argument blob.
-const QUOTED_NAME_RE = new RegExp(`"(${BAZEL_REPO_NAME_PATTERN})"`, 'g');
-// Legacy maven_install(name = "X", ...) on a single statement.
-// Match the name= keyword arg specifically; bounded.
-const MAVEN_INSTALL_NAME_RE = new RegExp(`maven_install\\s*\\([^)]{0,8192}?\\bname\\s*=\\s*"(${BAZEL_REPO_NAME_PATTERN})"`, 'g');
-const MAVEN_COORDINATES_MARKER_RE = /\bmaven_coordinates\s*=/;
+// One Maven artifact recovered from the cquery stream. `ruleKind` is whatever
+// `ruleClass` jsonproto reports (`jvm_import`, `aar_import`, `java_library`,
+// `kt_jvm_import`, any future rules_jvm_external rule), so the type is open.
+// `deps` holds resolved versionless Maven coordinates (the parser resolves the
+// rule's label edges against this repo's own targets), not raw Bazel labels.
+// Result of parsing one repo's cquery stream: the recovered artifacts (with
+// resolved coordinate edges in `deps`) plus any hub-prefixed dep labels that
+// could not be resolved.
+// Maven coordinate token: `g:a:v` (3 parts) or `g:a:v:classifier` /
+// `g:a:packaging:v` (4-part rules_jvm_external shapes). Tolerant of dots,
+// dashes, plus, underscores in any part.
+const MAVEN_COORD_TAG_RE = /^maven_coordinates=(.+)$/;
+// The dep/export/runtime_deps attributes whose label edges encode the
+// resolved Maven graph. rules_jvm_external writes `jvm_import.deps` (e.g.
+// `junit` -> `@maven//:org_hamcrest_hamcrest_core`); compile/runtime scopes
+// surface via `exports`/`runtime_deps`. We union all three.
+const EDGE_ATTR_NAMES = new Set(['deps', 'exports', 'runtime_deps']);
+// Build the metadata cquery target expression for one repo. The union of
+// two predicates picks up artifacts that:
+//  - encode the coordinate in the conventional `tags = ["maven_coordinates=..."]`
+//    list (rules_jvm_external's emission for `jvm_import` and friends), or
+//  - declare the coordinate as a direct `maven_coordinates` attribute
+//    (Bazel-native java_library / kt_jvm_import shape).
+// Note: a `maven_url`-only predicate was intentionally dropped — those rules
+// carry no coordinate, so selecting them only to discard them downstream is
+// wasted analysis. If POM-only artifacts ever matter, synthesize
+// a coordinate from `maven_url` instead of re-adding the selector.
+function buildMetadataCqueryExpr(repoName) {
+  const r = `@${repoName}//...`;
+  // The `\b` boundary in the tags predicate prevents matches on tag values
+  // like `pre_maven_coordinates=fake`; see todo 2 acceptance test (10).
+  return [`attr("tags", "\\bmaven_coordinates=", ${r})`, `attr("maven_coordinates", ".+", ${r})`].join(' union ');
+}
+// Build the full cquery argv for a per-repo metadata cquery. Exposed for
+// argv-shape unit tests without touching `spawn`.
+function buildMetadataCqueryArgv(repoName, opts) {
+  const startup = [];
+  if (opts.bazelRc) {
+    startup.push(`--bazelrc=${opts.bazelRc}`);
+  }
+  if (opts.outputUserRoot) {
+    startup.push(`--output_user_root=${opts.outputUserRoot}`);
+  }
+  if (opts.bazelOutputBase) {
+    startup.push(`--output_base=${opts.bazelOutputBase}`);
+  }
+  const userFlags = splitBazelFlags(opts.bazelFlags);
+  return [...startup, 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, buildMetadataCqueryExpr(repoName), '--output=jsonproto', '--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps', '--keep_going', ...userFlags];
+}
+function readStringAttr(attr) {
+  if (attr.type !== 'STRING') {
+    return undefined;
+  }
+  if (typeof attr.stringValue === 'string') {
+    return attr.stringValue;
+  }
+  if (typeof attr.string_value === 'string') {
+    return attr.string_value;
+  }
+  return undefined;
+}
+function readStringListAttr(attr) {
+  if (attr.type !== 'STRING_LIST') {
+    return undefined;
+  }
+  if (Array.isArray(attr.stringListValue)) {
+    return attr.stringListValue;
+  }
+  if (Array.isArray(attr.string_list_value)) {
+    return attr.string_list_value;
+  }
+  return undefined;
+}
-// Reads file contents, refusing files that exceed MAX_WORKSPACE_FILE_BYTES.
-// Returns null when the file is missing, oversized, or unreadable.
-function safeReadFile$1(file) {
-  if (!fs$1.existsSync(file)) {
-    return null;
+// Reads a `LABEL_LIST` jsonproto attribute. Bazel serializes label lists into
+// the same string-list payload (`stringListValue` / `string_list_value`) it
+// uses for `STRING_LIST`, but tags the attribute `type: "LABEL_LIST"`. The
+// `deps`/`exports`/`runtime_deps` edge attrs are LABEL_LIST, so a STRING_LIST
+// reader would silently return nothing and leave the graph empty.
+function readLabelListAttr(attr) {
+  if (attr.type !== 'LABEL_LIST') {
+    return undefined;
   }
-  try {
-    const stat = fs$1.statSync(file);
-    if (stat.size > MAX_WORKSPACE_FILE_BYTES$1) {
-      return null;
-    }
-    return fs$1.readFileSync(file, 'utf8');
-  } catch {
-    return null;
+  if (Array.isArray(attr.stringListValue)) {
+    return attr.stringListValue;
   }
+  if (Array.isArray(attr.string_list_value)) {
+    return attr.string_list_value;
+  }
+  return undefined;
 }
-// Walks workspace root for legacy Starlark sources we can scan: WORKSPACE
-// (and WORKSPACE.bazel) plus top-level .bzl files. Non-recursive by design;
-// Phase 1 explicitly avoids static Starlark parsing at depth.
-function listLegacyStarlarkFiles$1(cwd) {
-  const files = [];
-  const candidates = ['WORKSPACE', 'WORKSPACE.bazel'];
-  for (const c of candidates) {
-    const p = path.join(cwd, c);
-    if (fs$1.existsSync(p)) {
-      files.push(p);
-    }
+// Strip the trailing version segment from a Maven coordinate, preserving any
+// packaging/classifier segments. `g:a:v` -> `g:a`,
+// `g:a:packaging:v` -> `g:a:packaging`,
+// `g:a:packaging:classifier:v` -> `g:a:packaging:classifier`. Coordinates with
+// fewer than 3 segments have no version to strip and are returned unchanged.
+// This matches depscan's `coordinateToParts` keying (position 3 = extension,
+// position 4 = classifier on the versionless key), so AAR/classifier artifacts
+// key correctly instead of being mis-keyed as bare `group:artifact` jars.
+function versionlessCoordinate(coord) {
+  const parts = coord.split(':');
+  if (parts.length < 3) {
+    return coord;
+  }
+  return parts.slice(0, -1).join(':');
+}
+// Recover the `@<repo>//` prefix from a fully-qualified target label, covering
+// both apparent (`@maven//:foo`) and bzlmod-canonical
+// (`@@rules_jvm_external++maven+maven//pkg:foo`) forms. Returns undefined for
+// labels that aren't repo-qualified (e.g. `:src`).
+function repoPrefixOfLabel(label) {
+  if (!label.startsWith('@')) {
+    return undefined;
   }
-  // Top-level .bzl files only.
-  try {
-    for (const entry of fs$1.readdirSync(cwd)) {
-      if (entry.endsWith('.bzl')) {
-        files.push(path.join(cwd, entry));
+  const sep = label.indexOf('//');
+  if (sep < 0) {
+    return undefined;
+  }
+  return label.slice(0, sep + 2);
+}
+// Strip the leading `@<repo>//:` prefix from a fully-qualified target label
+// to recover the bare rule name (e.g. `com_google_guava_guava`).
+function ruleNameFromLabel(label) {
+  const colon = label.lastIndexOf(':');
+  return colon >= 0 ? label.slice(colon + 1) : label;
+}
+// Extract the maven coordinate from a rule's attributes. Prefers the direct
+// `maven_coordinates` attribute (Bazel-native shape); falls back to scanning
+// `tags` for a `maven_coordinates=<G:A:V>` entry (rules_jvm_external shape).
+// Returns undefined if neither yields a non-empty value.
+function extractMavenCoordinate(rule) {
+  let coord;
+  for (const attr of rule.attribute ?? []) {
+    if (attr.name === 'maven_coordinates') {
+      const direct = readStringAttr(attr);
+      if (direct && direct.length) {
+        coord = direct;
+      }
+    } else if (attr.name === 'tags') {
+      const tags = readStringListAttr(attr);
+      if (tags) {
+        for (const tag of tags) {
+          const m = MAVEN_COORD_TAG_RE.exec(tag);
+          if (m && !coord) {
+            coord = m[1];
+          }
+        }
       }
     }
-  } catch {
-    // Ignore unreadable cwd.
   }
-  return files;
+  return coord;
 }
-// Returns deduplicated, sorted list of items, capped at MAX_CANDIDATES.
-function uniqueSorted(items) {
-  const seen = new Set();
-  const out = [];
-  for (const item of items) {
-    if (!seen.has(item)) {
-      seen.add(item);
-      out.push(item);
-      if (out.length >= MAX_CANDIDATES$1) {
-        break;
+// Collect the union of `deps`/`exports`/`runtime_deps` label edges off a rule.
+function extractEdgeLabels(rule) {
+  const labels = [];
+  for (const attr of rule.attribute ?? []) {
+    if (attr.name && EDGE_ATTR_NAMES.has(attr.name)) {
+      const list = readLabelListAttr(attr);
+      if (list) {
+        labels.push(...list);
       }
     }
   }
-  return out.sort();
+  return labels;
 }
-function apparentNameFromJsonValue(value) {
-  if (!value || typeof value !== 'object') {
-    return undefined;
-  }
-  const obj = value;
-  const direct = obj['apparentName'] ?? obj['apparent_name'];
-  if (typeof direct === 'string') {
-    return direct;
+// A coordinate-bearing rule recovered from the cquery stream, before its edge
+// labels are resolved to coordinates.
+// Build the label -> coordinate index from this repo's own coordinate-bearing
+// targets, keyed by the full emitted rule label (the form dep labels also use,
+// since both come from the same cquery output). The `:<ruleName>` suffix map
+// is a fallback for labels that don't full-match.
+function buildLabelCoordIndex(records) {
+  const fullLabels = new Map();
+  const suffixToCoords = new Map();
+  const hubPrefixes = new Set();
+  for (const rec of records) {
+    const coord = versionlessCoordinate(rec.coord);
+    fullLabels.set(rec.fullLabel, coord);
+    const suffix = `:${rec.ruleName}`;
+    const set = suffixToCoords.get(suffix) ?? new Set();
+    set.add(coord);
+    suffixToCoords.set(suffix, set);
+    const prefix = repoPrefixOfLabel(rec.fullLabel);
+    if (prefix) {
+      hubPrefixes.add(prefix);
+    }
   }
-  for (const nested of Object.values(obj)) {
-    const found = apparentNameFromJsonValue(nested);
-    if (found) {
-      return found;
+  return {
+    fullLabels,
+    hubPrefixes,
+    suffixToCoords
+  };
+}
+function isHubPrefixed(label, hubPrefixes) {
+  for (const prefix of hubPrefixes) {
+    if (label.startsWith(prefix)) {
+      return true;
     }
   }
-  return undefined;
+  return false;
 }
-function apparentNamesFromRepoMapping(value) {
-  if (!value || typeof value !== 'object' || Array.isArray(value)) {
-    return [];
+// Resolve one dep label into a versionless coordinate. Classifies into three
+// buckets (there is deliberately no "seen but coordinate-less" bucket — the
+// cquery only selects coordinate-bearing targets):
+//  - `coord`     — full-label match, unique-suffix fallback, or an already-a-
+//                  coordinate `g:a:v` string label.
+//  - `unresolved`— hub-prefixed but resolves to nothing in the selected set
+//                  (missing target or ambiguous suffix): a known-dropped edge.
+//  - `drop`      — a non-maven target (`@platforms//…`, `:src`): intentional.
+function resolveDepLabel(label, index) {
+  const full = index.fullLabels.get(label);
+  if (full) {
+    return {
+      coord: full,
+      kind: 'coord'
+    };
   }
-  const candidates = [];
-  for (const [name, canonicalName] of Object.entries(value)) {
-    if (name.startsWith('@') || typeof canonicalName !== 'string') {
-      continue;
-    }
-    if (BAZEL_REPO_NAME_RE.test(name)) {
-      candidates.push(name);
+  if (isHubPrefixed(label, index.hubPrefixes)) {
+    // Suffix fallback, but only when the match is unique.
+    const suffix = `:${ruleNameFromLabel(label)}`;
+    const set = index.suffixToCoords.get(suffix);
+    if (set && set.size === 1) {
+      return {
+        coord: [...set][0],
+        kind: 'coord'
+      };
     }
+    // Hub-prefixed but missing or ambiguous — a genuinely dropped edge.
+    return {
+      kind: 'unresolved'
+    };
   }
-  return candidates;
-}
-function normalizeRepoName(name) {
-  const repo = name.startsWith('@') ? name.slice(1) : name;
-  return BAZEL_REPO_NAME_RE.test(repo) ? repo : undefined;
+  // Already-a-coordinate fallback: a bare `g:a:v` string label (not a Bazel
+  // label). Versionless-normalize it. Exclude `//`-prefixed package-relative
+  // labels (`//pkg:thing`) — those are Bazel targets, not coordinates.
+  if (label.includes(':') && !label.startsWith('@') && !label.startsWith(':') && !label.startsWith('//')) {
+    return {
+      coord: versionlessCoordinate(label),
+      kind: 'coord'
+    };
+  }
+  // Non-maven target — intentional drop, not counted.
+  return {
+    kind: 'drop'
+  };
 }
-// Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accept the
-// older streamed jsonproto shape in case older Bazel versions or fixtures still
-// return repository records with apparentName fields.
-function parseVisibleRepoCandidates(output) {
-  const candidates = [];
-  for (const line of output.split(/\r?\n/)) {
-    const trimmed = line.trim();
-    if (!trimmed) {
-      continue;
-    }
-    try {
-      const parsed = JSON.parse(trimmed);
-      candidates.push(...apparentNamesFromRepoMapping(parsed));
-      const apparentName = apparentNameFromJsonValue(parsed);
-      if (apparentName) {
-        const repo = normalizeRepoName(apparentName);
-        if (repo) {
-          candidates.push(repo);
+// Pure parser for the jsonproto cquery stream. Returns one
+// `ExtractedArtifact` per rule with a recoverable maven coordinate (its `deps`
+// holding resolved versionless coordinates) plus the set of hub-prefixed dep
+// labels that could not be resolved. The `sourceRepo` field carries
+// `<workspaceRelPath>:<repoName>` provenance when a workspace path was
+// provided; otherwise just the repo name.
+function parseCqueryJsonproto(stdout, repoName, workspaceRelPath) {
+  if (!stdout.trim()) {
+    return {
+      artifacts: [],
+      unresolvedLabels: []
+    };
+  }
+  // Bazel 5+ emits a single JSON envelope; older versions stream one target
+  // per line. Try envelope-first, then fall back to per-line.
+  const targets = [];
+  try {
+    const parsed = JSON.parse(stdout);
+    if (parsed.results) {
+      for (const r of parsed.results) {
+        if (r.target) {
+          targets.push(r.target);
         }
       }
-    } catch {
-      // Ignore malformed lines; caller will fall back to static discovery when
-      // no usable visible repo names are found.
     }
+  } catch {
+    // Fall through to per-line scanning.
   }
-  return uniqueSorted(candidates);
-}
-// Step 1: parse candidate Maven repo names from Bzlmod and legacy entry points.
-function parseMavenRepoCandidates(cwd, verbose) {
-  const candidates = [];
-  // Bzlmod path: parse MODULE.bazel for use_repo(maven, ...).
-  const moduleBazel = path.join(cwd, 'MODULE.bazel');
-  const moduleContent = safeReadFile$1(moduleBazel);
-  if (moduleContent) {
-    const bzlmodHits = [];
-    for (const m of moduleContent.matchAll(USE_REPO_RE)) {
-      const argBlob = m[1] ?? '';
-      for (const n of argBlob.matchAll(QUOTED_NAME_RE)) {
-        bzlmodHits.push(n[1]);
+  if (!targets.length) {
+    for (const line of stdout.split(/\r?\n/)) {
+      const trimmed = line.trim();
+      if (!trimmed) {
+        continue;
+      }
+      try {
+        const parsed = JSON.parse(trimmed);
+        if (parsed?.rule) {
+          targets.push(parsed);
+        }
+      } catch {
+        // Skip malformed lines.
       }
     }
-    candidates.push(...bzlmodHits);
-    if (verbose) {
-      logger.logger.log('[VERBOSE] discovery: scanned', moduleBazel, `(${bzlmodHits.length} use_repo match(es))`);
-    }
-  } else if (verbose) {
-    logger.logger.log('[VERBOSE] discovery:', moduleBazel, 'not present (skipping bzlmod scan)');
   }
-  // Legacy path: scan WORKSPACE + top-level .bzl files for maven_install(name=...).
-  const legacyFiles = listLegacyStarlarkFiles$1(cwd);
-  if (verbose) {
-    logger.logger.log('[VERBOSE] discovery: legacy files considered:', legacyFiles.length ? legacyFiles : '(none)');
-  }
-  for (const file of legacyFiles) {
-    const content = safeReadFile$1(file);
-    if (!content) {
+  // First pass: collect coordinate-bearing rules with their raw edge labels.
+  const records = [];
+  for (const target of targets) {
+    if (target.type && target.type !== 'RULE') {
       continue;
     }
-    const fileHits = [];
-    for (const m of content.matchAll(MAVEN_INSTALL_NAME_RE)) {
-      fileHits.push(m[1]);
+    const rule = target.rule;
+    if (!rule || !rule.name) {
+      continue;
     }
-    candidates.push(...fileHits);
-    if (verbose) {
-      logger.logger.log('[VERBOSE] discovery: scanned', file, `(${fileHits.length} maven_install name match(es))`);
+    const coord = extractMavenCoordinate(rule);
+    if (!coord) {
+      continue;
     }
+    records.push({
+      coord,
+      edgeLabels: extractEdgeLabels(rule),
+      fullLabel: rule.name,
+      ruleKind: rule.ruleClass ?? rule.rule_class ?? 'unknown',
+      ruleName: ruleNameFromLabel(rule.name)
+    });
   }
-  const deduped = uniqueSorted(candidates);
-  if (verbose) {
-    logger.logger.log('[VERBOSE] discovery: candidate set (pre-seed):', deduped);
-  }
-  return deduped;
-}
-// Step 2: validate a candidate by running the probe and confirming
-// `maven_coordinates=` appears in stdout (the marker emitted by jvm_import /
-// aar_import rules generated by rules_jvm_external). Returns the probe
-// stdout alongside the verdict so the caller can cache it and reuse it
-// instead of running an identical extraction query.
-async function validateMavenRepo(repoName, probe, verbose) {
-  try {
-    const result = await probe(repoName);
-    if (result.code !== 0) {
-      if (verbose) {
-        logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (code=${result.code})`);
+  // Second pass: resolve edge labels against this repo's own targets.
+  const index = buildLabelCoordIndex(records);
+  const provenance = workspaceRelPath ? `${workspaceRelPath}:${repoName}` : repoName;
+  const out = [];
+  const unresolved = new Set();
+  for (const rec of records) {
+    const deps = new Set();
+    for (const label of rec.edgeLabels) {
+      const resolution = resolveDepLabel(label, index);
+      if (resolution.kind === 'coord') {
+        deps.add(resolution.coord);
+      } else if (resolution.kind === 'unresolved') {
+        unresolved.add(label);
       }
-      return {
-        valid: false,
-        stdout: result.stdout
-      };
     }
-    const valid = MAVEN_COORDINATES_MARKER_RE.test(result.stdout);
-    if (verbose) {
-      logger.logger.log(`[VERBOSE] discovery: probe @${repoName}:`, valid ? 'ACCEPT (maven_coordinates marker found)' : 'REJECT (no maven_coordinates marker in probe stdout)');
+    out.push({
+      deps: [...deps],
+      mavenCoordinates: rec.coord,
+      ruleKind: rec.ruleKind,
+      ruleName: rec.ruleName,
+      sourceRepo: provenance
+    });
+  }
+  return {
+    artifacts: out,
+    unresolvedLabels: [...unresolved]
+  };
+}
+// Classify the runner's raw outcome. Non-zero exit with `--keep_going` is a
+// `partial` (some target analysis failed; the successful subset is still in
+// stdout). A clean exit with unresolved hub-prefixed edges is also `partial`
+// — the graph is known-incomplete. Zero exit with no parsed artefacts is
+// `empty`. Spawn timeout is signalled separately; this helper handles the
+// post-spawn case.
+function classifyCqueryOutcome(code, artifactCount, unresolvedCount) {
+  if (code === 0) {
+    if (!artifactCount) {
+      return 'empty';
     }
+    return unresolvedCount > 0 ? 'partial' : 'ok';
+  }
+  // --keep_going treats partial-analysis failures with non-zero exit but
+  // still yields the successful subset on stdout. Anything we parsed is
+  // worth keeping.
+  return artifactCount > 0 ? 'partial' : 'error';
+}
+// Spawn the per-repo metadata cquery, parse the result, and return a
+// structured outcome. On spawn timeout, return `status: 'timeout'` so the
+// orchestrator can reap the server (`bazel --output_user_root=<dir>
+// shutdown` + `rm -rf`) before moving on.
+async function runMetadataCqueryForRepo(args) {
+  const {
+    opts,
+    repoName,
+    timeoutMs,
+    workspaceRelPath,
+    workspaceRoot
+  } = args;
+  const argv = buildMetadataCqueryArgv(repoName, opts);
+  const startedAt = Date.now();
+  try {
+    const result = await spawn.spawn(opts.bin, argv, {
+      cwd: workspaceRoot,
+      timeout: timeoutMs,
+      ...(opts.env ? {
+        env: opts.env
+      } : {})
+    });
+    const {
+      code,
+      stderr,
+      stdout
+    } = result;
+    const {
+      artifacts,
+      unresolvedLabels
+    } = parseCqueryJsonproto(stdout, repoName, workspaceRelPath);
     return {
-      valid,
-      stdout: result.stdout
+      artifacts,
+      durationMs: Date.now() - startedAt,
+      repoName,
+      status: classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
+      stderr,
+      unresolvedLabels,
+      workspaceRelPath
     };
   } catch (e) {
-    if (verbose) {
-      logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (probe threw):`, utils.getErrorCause(e));
-    }
+    const err = e;
+    const stdout = typeof err.stdout === 'string' ? err.stdout : '';
+    const stderr = typeof err.stderr === 'string' ? err.stderr : '';
+    const timedOut = err.timedOut === true || err.killed === true || err.signal === 'SIGTERM' || err.signal === 'SIGKILL';
+    const {
+      artifacts,
+      unresolvedLabels
+    } = stdout ? parseCqueryJsonproto(stdout, repoName, workspaceRelPath) : {
+      artifacts: [],
+      unresolvedLabels: []
+    };
+    // The registry `spawn` rejects on a non-zero exit, so a `--keep_going`
+    // cquery that exits non-zero but still emitted a usable subset lands here
+    // — not in the try block. Classify by what we parsed (subset present =>
+    // `partial`, nothing parsed => `error`) so that partial subset is written
+    // best-effort rather than discarded as a hard error. Timeout stays
+    // distinct so the orchestrator can reap the wedged server.
+    const code = typeof err.code === 'number' ? err.code : 1;
     return {
-      valid: false,
-      stdout: ''
+      artifacts,
+      durationMs: Date.now() - startedAt,
+      repoName,
+      status: timedOut ? 'timeout' : classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
+      stderr,
+      unresolvedLabels,
+      workspaceRelPath
     };
   }
 }
-// The default maven_install repo name when no explicit `name=` is given.
-// Included as a seed so repos that define maven_install in a subdirectory
-// .bzl file (not scanned by parseMavenRepoCandidates) are still discovered.
-const DEFAULT_MAVEN_REPO_SEED = 'maven';
+let probed = false;
-// Composition: parse, then validate each candidate; return validated subset
-// as a Map keyed by repo name with the validated probe stdout as value.
-// Map iteration order matches insertion order, so callers that just want
-// the list of repo names can call `Array.from(repos.keys())`. Callers that
-// want to skip re-running the same `bazel query` during extraction can read
-// the cached stdout off the Map and parse it directly.
-//
-// Always seeds with the default `@maven` repo name so repos whose
-// maven_install is defined in a sub-directory .bzl file (not reachable by
-// the top-level static scan) can still be discovered via probe validation.
-async function discoverMavenRepos(cwd, probe, nativeCandidates, verbose) {
-  const parsed = nativeCandidates && nativeCandidates.length ? nativeCandidates : parseMavenRepoCandidates(cwd, verbose);
-  if (verbose) {
-    logger.logger.log('[VERBOSE] discovery: candidate source:', nativeCandidates && nativeCandidates.length ? `bzlmod visible-repos (${nativeCandidates.length})` : `static parse (${parsed.length})`);
+// Verifies `java` is functional in the current execution environment. Bazel
+// JVM manifest extraction (rules_jvm_external → Coursier) requires a real
+// JDK; the CLI does not attempt to discover Homebrew installs or mutate the
+// caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
+// actionable message so the surfaced error names the prerequisite directly
+// instead of relying on Bazel's downstream diagnostic.
+function ensureJavaOnPath() {
+  if (probed) {
+    return;
   }
-  // Seed with the default repo name first (so it appears first in output if
-  // validated). Dedup via Set before validation.
-  const seen = new Set([DEFAULT_MAVEN_REPO_SEED]);
-  const candidates = [DEFAULT_MAVEN_REPO_SEED];
-  for (const c of parsed) {
-    if (!seen.has(c)) {
-      seen.add(c);
-      candidates.push(c);
+  try {
+    childProcess.execSync('java -version', {
+      stdio: 'ignore'
+    });
+    probed = true;
+  } catch {
+    throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
+  }
+}
+// Validates that --bazel-output-base is a path we can use as Bazel's output_base.
+// Throws InputError if:
+//   - the input contains `..` segments (path traversal guard)
+//   - the existing path is not writable
+//   - the path cannot be created (parent not writable)
+function validateOutputBase(outputBase, cwd) {
+  // Path traversal guard: reject any literal `..` segment in user input.
+  // After path.resolve these are normalised away, so we check the raw input.
+  // Split on both separators. On Windows `path.sep === '\\'`, so
+  // input like `foo/../etc` would not contain a `..` segment under the
+  // platform-specific split, bypassing the guard — yet path.resolve below
+  // would still normalise the `..` and a traversal target could materialise.
+  const segments = outputBase.split(/[\\/]/);
+  if (segments.includes('..')) {
+    throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
+  }
+  const resolved = path.resolve(cwd, outputBase);
+  if (fs$1.existsSync(resolved)) {
+    try {
+      fs$1.accessSync(resolved, fs$1.constants.W_OK);
+    } catch {
+      throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
     }
+    return;
   }
-  if (verbose) {
-    logger.logger.log('[VERBOSE] discovery: candidate set to probe (seed-first, deduped):', candidates);
+  // Path does not exist yet — try to create it so bazel can populate it.
+  try {
+    fs$1.mkdirSync(resolved, {
+      recursive: true
+    });
+  } catch (e) {
+    throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
   }
-  const validated = new Map();
-  for (const c of candidates) {
-    // eslint-disable-next-line no-await-in-loop
-    const result = await validateMavenRepo(c, probe, verbose);
-    if (result.valid) {
-      validated.set(c, result.stdout);
+}
+// Stable shim dir name — same process will get the same dir; concurrent
+// socket-cli invocations on the same machine share it. The symlink target
+// is whatever python3 resolves to NOW; if PATH changes between invocations
+// we replace the symlink.
+const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
+// Cache the result for the lifetime of this process.
+let cached = null;
+// Safe wrapper around whichBin that returns null instead of throwing when
+// nothrow semantics are broken in older registry versions (realpath 'null' bug).
+async function safeWhichBin(name) {
+  try {
+    return (await bin.whichBin(name, {
+      nothrow: true
+    })) ?? null;
+  } catch {
+    return null;
+  }
+}
+async function provisionPythonShim() {
+  if (cached) {
+    return cached;
+  }
+  const pythonOnPath = await safeWhichBin('python');
+  if (pythonOnPath) {
+    cached = {
+      augmentedEnv: undefined,
+      shimDir: undefined
+    };
+    return cached;
+  }
+  const python3OnPath = await safeWhichBin('python3');
+  if (!python3OnPath) {
+    throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
+  }
+  const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
+  fs$1.mkdirSync(shimDir, {
+    recursive: true
+  });
+  const linkPath = path.join(shimDir, 'python');
+  // Replace the symlink defensively in case python3's resolved path moved.
+  if (fs$1.existsSync(linkPath)) {
+    try {
+      fs$1.unlinkSync(linkPath);
+    } catch {
+      // Tolerate races; the next symlinkSync may still succeed.
     }
   }
+  // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
+  // so a concurrent socket-cli invocation may re-create the link between our
+  // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
+  // other process won the race and left a usable shim in place.
+  try {
+    fs$1.symlinkSync(python3OnPath, linkPath);
+  } catch (e) {
+    if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
+      throw e;
+    }
+  }
+  const augmentedEnv = {
+    ...process.env,
+    PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
+  };
+  cached = {
+    augmentedEnv,
+    shimDir
+  };
+  return cached;
+}
+/**
+ * Maven hub repo discovery for `socket manifest bazel`.
+ *
+ * - Bzlmod path: `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
+ *   emits a text-format report listing every repo the maven extension generated;
+ *   `parseShowExtensionOutput` extracts the names of hub repos (items annotated
+ *   with `(imported by ...)`) and skips generated per-artifact repos.
+ * - Legacy WORKSPACE path: probe a fixed list of conventional Maven hub names.
+ *   Each probe is classified into `populated` / `empty` / `not-defined`; the
+ *   orchestrator keeps only the `populated` candidates.
+ *
+ * No Starlark source is read by this module. All semantic interpretation
+ * comes from Bazel itself (`mod show_extension`, `cquery`).
+ */
+// The importer token Bazel prints for a hub generated for the root module
+// itself (`(imported by <root>, …)`). Hubs imported only by rulesets
+// (`rules_jvm_external@6.7`, `stardoc@0.7.2`, …) are build-tooling, not the
+// user's SBOM, and are filtered out by the orchestrator.
+const ROOT_MODULE_IMPORTER = '<root>';
+// One hub repo from a `bazel mod show_extension` report: its name plus the
+// modules that imported it (the `(imported by …)` annotation), merged across
+// every line the repo appears on.
+// Conventional Maven hub names rules_jvm_external sets up under
+// WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility
+// lookup never triggers a `repository_rule` fetch) so the orchestrator can
+// try them all without paying the cost of a real cquery on undefined repos.
+const CONVENTIONAL_MAVEN_REPO_NAMES = ['maven', 'maven_install', 'maven_dev', 'unpinned_maven', 'maven_unpinned'];
+// Pattern Bazel emits when a probed repo name isn't visible to the main
+// module. Used to distinguish `not-defined` (skip silently) from `empty`
+// (the repo exists but has no targets). Tolerant of either single- or
+// double-quote styles Bazel has used across versions.
+const NOT_VISIBLE_STDERR_RE = /No repository visible as ['"]?@?[A-Za-z0-9._+-]+['"]? from/;
+// Other "repo isn't analyzable" patterns Bazel emits, especially under
+// WORKSPACE mode and on Bazel 6.x. They all map to `not-defined`.
+const NO_SUCH_PACKAGE_STDERR_RE = /no such package ['"`]?@/;
+// Pattern emitted when a repo IS visible / defined but yields no targets.
+// `--keep_going` plus `'no targets found beneath'` is the empty-but-defined
+// signature. The orchestrator treats `empty` and `not-defined` uniformly
+// as skips.
+const NO_TARGETS_STDERR_RE = /no targets found beneath/i;
+// Anchor for the maven extension's section header in
+// `bazel mod show_extension` output. Tolerant of the canonical-name form
+// Bazel uses across versions (`@@rules_jvm_external+`, `@@rules_jvm_external~`,
+// or any future separator) and of trailing trailing whitespace.
+const SHOW_EXT_SECTION_HEADER_RE = /^## @@?[A-Za-z0-9._+~-]+\/\/:extensions\.bzl%maven:\s*$/m;
+// Bullet within `Fetched repositories:` that names a hub repo (one with an
+// `(imported by ...)` annotation). Bullets without that annotation are
+// generated per-artifact repos and are skipped.
+const FETCHED_HUB_BULLET_RE = /^ {2}- (?<name>\S+) \(imported by (?<importers>[^)]+)\)\s*$/;
+// Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
+// stdout. Returns the hub repos listed under `Fetched repositories:` — i.e.
+// items annotated with `(imported by ...)` — each carrying the set of modules
+// that imported it. Generated per-artifact repos (no annotation) are skipped.
+// A repo can legitimately appear on multiple lines with different importers,
+// so importers are merged per repo (name-only dedupe would lose that, and the
+// importers data is what lets the orchestrator keep only root-imported hubs).
+// Output is sorted by name. Tolerant of `DEBUG:` / `WARNING:` lines from
+// Bazel; the section header `## @@<canonical>//:extensions.bzl%maven:` is the
+// anchor.
+function parseShowExtensionOutput(stdout) {
+  const headerMatch = SHOW_EXT_SECTION_HEADER_RE.exec(stdout);
+  if (!headerMatch) {
+    return [];
+  }
+  const tail = stdout.slice(headerMatch.index + headerMatch[0].length);
+  // Find the `Fetched repositories:` line within the section.
+  const fetchedIdx = tail.indexOf('\nFetched repositories:');
+  if (fetchedIdx === -1) {
+    return [];
+  }
+  const afterFetched = tail.slice(fetchedIdx + '\nFetched repositories:'.length);
+  const importersByName = new Map();
+  for (const line of afterFetched.split(/\r?\n/)) {
+    // Stop at the next `## ` section header (some Bazel versions print
+    // multiple extensions in one report).
+    if (line.startsWith('## ')) {
+      break;
+    }
+    // Empty line is fine; bullet that doesn't match is fine (it's an
+    // un-imported generated artifact repo) — skip it.
+    const match = FETCHED_HUB_BULLET_RE.exec(line);
+    if (!match || !match.groups) {
+      continue;
+    }
+    const name = match.groups['name'];
+    if (!name) {
+      continue;
+    }
+    const importers = importersByName.get(name) ?? new Set();
+    for (const importer of (match.groups['importers'] ?? '').split(',').map(s => s.trim()).filter(Boolean)) {
+      importers.add(importer);
+    }
+    importersByName.set(name, importers);
+  }
+  return [...importersByName.keys()].sort().map(name => ({
+    importers: [...importersByName.get(name)].sort(),
+    name
+  }));
+}
+// Classify a raw probe result into one of three states. The probe contract
+// is whatever the runner emits — typically a lightweight
+// `cquery '@<name>//...' --keep_going --output=label`. The orchestrator
+// treats `empty` and `not-defined` uniformly as no-ops; the distinction
+// is preserved for verbose-mode diagnostics.
+function classifyProbeResult(result) {
+  // A successful probe with any stdout means the repo exists AND has at
+  // least one target — populated.
+  if (result.code === 0 && result.stdout.trim().length > 0) {
+    return 'populated';
+  }
+  // Code 1 with the "no repository visible" message → undefined.
+  if (result.code !== 0 && (NOT_VISIBLE_STDERR_RE.test(result.stderr) || NO_SUCH_PACKAGE_STDERR_RE.test(result.stderr))) {
+    return 'not-defined';
+  }
+  // Code 1 with the "no targets" message → defined but empty.
+  if (result.code !== 0 && NO_TARGETS_STDERR_RE.test(result.stderr)) {
+    return 'empty';
+  }
+  // Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo
+  // name isn't declared (Exp 5c). Treat as not-defined.
+  if (result.code === 0) {
+    return 'not-defined';
+  }
+  // Code 1 with no recognizable message: be conservative and call it
+  // not-defined so the orchestrator skips it without erroring the workspace.
+  return 'not-defined';
+}
+// Convenience: probe a single candidate and return its classified status,
+// with optional verbose logging. Pure orchestration around `probe` +
+// `classifyProbeResult`; isolated so the test suite can exercise the
+// logging contract independently of the runner implementation.
+async function probeCandidate(repoName, probe, verbose) {
+  let result;
+  try {
+    result = await probe(repoName);
+  } catch (e) {
+    if (verbose) {
+      logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${e instanceof Error ? e.message : String(e)})`);
+    }
+    return 'not-defined';
+  }
+  const status = classifyProbeResult(result);
   if (verbose) {
-    logger.logger.log('[VERBOSE] discovery: validated repos:', Array.from(validated.keys()));
+    logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: ${status}`);
   }
-  return validated;
+  return status;
 }
 // Detects whether the given Bazel workspace uses Bzlmod (MODULE.bazel),
@@ -2833,6 +3007,164 @@ function getBazelInvocationFlags(mode) {
   return ['--noenable_bzlmod', '--enable_workspace'];
 }
+/**
+ * Walk the directory tree rooted at `cwd` and return every directory that
+ * looks like a Bazel workspace root — i.e. contains `MODULE.bazel`,
+ * `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots
+ * (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example
+ * `examples/<name>/MODULE.bazel`); the per-workspace algorithm in the
+ * orchestrator runs once per discovered root.
+ *
+ * The walker is dependency-injected with the directory-prune policy:
+ * callers pass the set of basenames and basename prefixes the walk must
+ * refuse to descend into. This module intentionally hardcodes none of
+ * the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose
+ * the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the
+ * Bazel-specific bits (`bazel-*` output_base symlinks,
+ * `.socket-auto-manifest`).
+ *
+ * Discovery is bounded-but-complete: the walk visits directories in
+ * deterministic (sorted) order under a single visited-directory budget
+ * (`MAX_WALK_DIRS`) as the only pathological-input / symlink-loop guard —
+ * there is no depth cap, because the deepest workspace marker observed across
+ * the OSS corpus (9) sat *below* the old depth-8 ceiling, so that ceiling
+ * silently dropped real first-party modules. All roots found within the
+ * budget are collected, sorted, then capped to `MAX_WORKSPACE_ROOTS`. Both
+ * the cap and a budget exhaustion `logger.warn` UNCONDITIONALLY (a missed
+ * module silently drops its Maven hub, so truncation must never be silent).
+ */
+// Hard ceiling on workspace roots; 16 sits well above realistic monorepo
+// counts while tightening the guard against pathological inputs.
+const MAX_WORKSPACE_ROOTS = 16;
+// Hard ceiling on directories visited. The sole guard against pathological
+// inputs and symlink loops (a loop consumes the budget and stops). A few
+// thousand is far above any realistic first-party tree once the prune policy
+// has removed vendored/output dirs.
+const DEFAULT_MAX_WALK_DIRS = 5_000;
+// Files whose presence promotes a directory to a workspace root.
+const WORKSPACE_MARKER_FILES = new Set(['MODULE.bazel', 'WORKSPACE', 'WORKSPACE.bazel']);
+const EMPTY_SET = new Set();
+const EMPTY_ARRAY = [];
+// Walks the tree rooted at `opts.cwd` and returns absolute paths to every
+// directory that contains at least one workspace marker file. Output is
+// sorted for determinism and capped at MAX_WORKSPACE_ROOTS.
+function findWorkspaceRoots(opts) {
+  const {
+    cwd,
+    verbose
+  } = opts;
+  const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET;
+  const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY;
+  const maxWalkDirs = opts.maxWalkDirs ?? DEFAULT_MAX_WALK_DIRS;
+  const roots = [];
+  // LIFO stack; children are pushed in reverse-sorted order so they pop in
+  // ascending order, giving a deterministic traversal.
+  const stack = [cwd];
+  let dirsVisited = 0;
+  let budgetHit = false;
+  while (stack.length) {
+    if (dirsVisited >= maxWalkDirs) {
+      budgetHit = true;
+      break;
+    }
+    const dir = stack.pop();
+    if (dir === undefined) {
+      break;
+    }
+    dirsVisited += 1;
+    let entries;
+    try {
+      entries = fs$1.readdirSync(dir, {
+        withFileTypes: true
+      });
+    } catch {
+      continue;
+    }
+    let isWorkspaceRoot = false;
+    const childNames = [];
+    for (const entry of entries) {
+      if (entry.isFile()) {
+        if (WORKSPACE_MARKER_FILES.has(entry.name)) {
+          isWorkspaceRoot = true;
+        }
+        continue;
+      }
+      if (!entry.isDirectory()) {
+        continue;
+      }
+      const name = entry.name;
+      if (ignoreDirNames.has(name)) {
+        continue;
+      }
+      let pruned = false;
+      for (const prefix of ignoreDirPrefixes) {
+        if (name.startsWith(prefix)) {
+          pruned = true;
+          break;
+        }
+      }
+      if (!pruned) {
+        childNames.push(name);
+      }
+    }
+    if (isWorkspaceRoot) {
+      roots.push(dir);
+    }
+    // Descend regardless of whether this dir is itself a root — nested
+    // workspaces are common (root MODULE.bazel + examples/*/MODULE.bazel).
+    childNames.sort();
+    for (let i = childNames.length - 1; i >= 0; i -= 1) {
+      stack.push(path.join(dir, childNames[i]));
+    }
+  }
+  roots.sort();
+  const kept = roots.slice(0, MAX_WORKSPACE_ROOTS);
+  const droppedCount = roots.length - kept.length;
+  if (budgetHit) {
+    // The dir budget was exhausted, so an unknown number of roots may be
+    // undiscovered — surface it unconditionally.
+    logger.logger.warn(`Bazel workspace walk hit the ${maxWalkDirs}-directory budget; some workspaces beneath ${cwd} may be undiscovered (found ${roots.length}, kept ${kept.length}).`);
+  }
+  if (droppedCount > 0) {
+    // The cap dropped roots. Exact count when the full tree was walked; "≥"
+    // when the budget cut the walk short (more roots may exist).
+    const qualifier = budgetHit ? '≥' : '';
+    logger.logger.warn(`Bazel workspace walk found ${roots.length} workspace root(s); capping at ${MAX_WORKSPACE_ROOTS} and dropping ${qualifier}${droppedCount}.`);
+    if (verbose) {
+      logger.logger.log('[VERBOSE] workspace walker: dropped roots:', roots.slice(MAX_WORKSPACE_ROOTS));
+    }
+  }
+  return kept;
+}
+// Best-effort-per-hub produces four distinct run outcomes a single `ok`
+// boolean would conflate:
+//  - `complete`    — every discovered hub extracted cleanly; >=1 manifest.
+//  - `partial`     — >=1 manifest written, but at least one hub failed,
+//                    timed out, or dropped edges. Worth uploading, but the
+//                    graph is known-incomplete.
+//  - `noEcosystem` — no Bazel/Maven found. Whether that's an error is
+//                    caller-dependent (tolerated in auto mode, error in
+//                    explicit mode), so it must NOT be flattened into the
+//                    failure states.
+//  - `hardFailure` — zero manifests written and it wasn't `noEcosystem`
+//                    (discovery threw, or every discovered hub failed).
+//                    Always an error for every caller.
+const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000;
+const REAP_TIMEOUT_MS = 10_000;
+// Default directory-prune policy for the Bazel workspace walk. The
+// orchestrator applies this unconditionally so neither caller (the explicit
+// `socket manifest bazel` command nor `--auto-manifest`) can omit it and let
+// the walk descend `node_modules`/VCS/vendored trees. Callers may
+// pass extra names/prefixes to EXTEND, not replace, this set.
+const DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES = new Set([...utils.IGNORED_DIRS, '.hg', '.idea', '.pnpm-store', '.socket-auto-manifest', '.svn', '.vscode']);
+// Bazel's `bazel-*` output_base symlinks.
+const DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES = ['bazel-'];
 // Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }.
 // Returns null on malformed input.
 function splitCoord(c) {
@@ -2845,213 +3177,304 @@ function splitCoord(c) {
     version: c.slice(lastColon + 1)
   };
 }
-// Builds a lookup from rule label suffix (e.g. ":com_google_guava_guava") to canonical coord.
-function buildLabelToCoordMap(artifacts) {
-  const fullLabels = new Map();
-  const suffixToCoords = new Map();
-  for (const a of artifacts) {
-    // The rule name (e.g. "com_google_guava_guava") becomes the path under @<repo>//:<name>.
-    // We record by ":<name>" suffix so we can look up regardless of repo name.
-    const suffix = `:${a.ruleName}`;
-    const coords = suffixToCoords.get(suffix) ?? new Set();
-    coords.add(a.mavenCoordinates);
-    suffixToCoords.set(suffix, coords);
-    if (a.sourceRepo) {
-      fullLabels.set(`@${a.sourceRepo}//${suffix}`, a.mavenCoordinates);
-    }
+// A versionless `maven_install.json` key must have 2-4 non-empty
+// colon-separated segments (`g:a`, `g:a:ext`, `g:a:ext:classifier`) — exactly
+// the range depscan's `coordinateToParts` accepts. A key outside that range,
+// or with an empty segment, is rejected after upload, so reject it locally.
+function isValidVersionlessKey(key) {
+  const parts = key.split(':');
+  if (parts.length < 2 || parts.length > 4) {
+    return false;
   }
-  return {
-    fullLabels,
-    suffixToCoords
-  };
+  return parts.every(p => p.length > 0);
 }
-// Converts a Bazel dep label to a Maven coordinate, using the label-to-coord map.
-// Returns null when the label is not recognised.
-function depLabelToCoord(label, labelToCoord) {
-  // label may be "@maven//:com_google_guava_failureaccess".
-  const colon = label.lastIndexOf(':');
-  if (colon < 0) {
-    return null;
-  }
-  const fullMatch = labelToCoord.fullLabels.get(label);
-  if (fullMatch) {
-    return fullMatch;
-  }
-  const key = label.slice(colon);
-  const suffixMatches = labelToCoord.suffixToCoords.get(key);
-  if (!suffixMatches) {
-    return null;
-  }
-  if (suffixMatches.size > 1) {
-    throw new Error(`Ambiguous Bazel dependency label ${label} maps rule suffix ${key} to multiple Maven coordinates: ${Array.from(suffixMatches).sort().join(', ')}. The generated maven_install.json cannot resolve this dependency label losslessly.`);
-  }
-  return Array.from(suffixMatches)[0] ?? null;
-}
+// Builds a modern `maven_install.json` from artifacts whose `deps` already
+// hold resolved versionless coordinates (the cquery parser resolves edge
+// labels against each repo's own targets while `repoName` is in scope, so no
+// label-to-coordinate resolution happens here). Keys are versionless `g:a`
+// (preserving any packaging/classifier segments); dependency values are the
+// resolved coordinate sets.
+//
+// Two-phase so the emitted graph is internally closed and survives the server
+// parser, which rejects malformed coordinates and edges referencing unlisted
+// artifacts (and can abort after enough errors). Phase 1 builds (and
+// validates) the artifact keys; phase 2 emits only edges whose source AND
+// target are valid emitted keys. Anything dropped is reported so the caller
+// can flip the hub partial — never silently lost post-upload.
 function normalizeToMavenInstallJson(artifacts) {
-  const labelToCoord = buildLabelToCoordMap(artifacts);
   const out = {
     artifacts: {},
     dependencies: {}
   };
+  const droppedArtifacts = [];
+  const prunedEdges = [];
   const versionsByGroupArtifact = new Map();
-  const dependencySets = new Map();
+  // Phase 1: artifacts. Validate each key (shape + non-empty version) before
+  // accepting it; record the set of valid emitted keys.
+  const depsByKey = new Map();
   for (const a of artifacts) {
     const split = splitCoord(a.mavenCoordinates);
     if (!split) {
+      droppedArtifacts.push(a.mavenCoordinates);
+      continue;
+    }
+    const key = split.groupArtifact;
+    // A `g:a:` coordinate strips to the valid-shaped key `g:a` but an empty
+    // version, which the server rejects — require both.
+    if (!isValidVersionlessKey(key) || !split.version) {
+      droppedArtifacts.push(a.mavenCoordinates);
       continue;
     }
-    const existingVersion = versionsByGroupArtifact.get(split.groupArtifact);
+    const existingVersion = versionsByGroupArtifact.get(key);
     if (existingVersion && existingVersion !== split.version) {
-      throw new Error(`Conflicting versions for ${split.groupArtifact}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
+      throw new Error(`Conflicting versions for ${key}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
     }
     if (!existingVersion) {
-      versionsByGroupArtifact.set(split.groupArtifact, split.version);
-      out.artifacts[split.groupArtifact] = {
-        shasums: a.mavenSha256 ? {
-          jar: a.mavenSha256
-        } : {},
-        version: split.version
-      };
-    } else if (a.mavenSha256 && !out.artifacts[split.groupArtifact]?.shasums.jar) {
-      out.artifacts[split.groupArtifact] = {
-        shasums: {
-          jar: a.mavenSha256
-        },
+      versionsByGroupArtifact.set(key, split.version);
+      out.artifacts[key] = {
         version: split.version
       };
     }
-    // Dependency keys in maven_install.json use "g:a" (no version),
-    // matching the canonical rules_jvm_external lockfile shape.
-    // Only emit an entry when there are actual dependencies (lockfile omits
-    // artifacts with an empty dep list).
-    const depKey = split.groupArtifact;
-    const depCoords = dependencySets.get(depKey) ?? new Set();
-    for (const depLabel of a.deps) {
-      // First try our rule-label lookup (the common case for --output=build text).
-      const c = depLabelToCoord(depLabel, labelToCoord);
-      if (c) {
-        // c is "g:a:v"; strip the version to produce "g:a" per lockfile shape.
-        const cs = splitCoord(c);
-        depCoords.add(cs ? cs.groupArtifact : c);
-      } else if (depLabel.includes(':') && !depLabel.startsWith('@') && !depLabel.startsWith(':')) {
-        // unsorted_deps.json deps may be "g:a:v" in older files or
-        // "g:a" in v2 lock-file-shaped maps. Strip only when a version is
-        // present.
-        const parts = depLabel.split(':');
-        depCoords.add(parts.length >= 3 ? parts.slice(0, -1).join(':') : depLabel);
-      }
+    // Accumulate the candidate edge set keyed by "g:a" (no version), matching
+    // the canonical rules_jvm_external lockfile shape. Pruned against valid
+    // keys in phase 2.
+    const depCoords = depsByKey.get(key) ?? new Set();
+    for (const depCoord of a.deps) {
+      depCoords.add(depCoord);
     }
     if (depCoords.size) {
-      dependencySets.set(depKey, depCoords);
+      depsByKey.set(key, depCoords);
     }
   }
-  for (const [depKey, depCoords] of dependencySets) {
-    out.dependencies[depKey] = Array.from(depCoords);
+  // Phase 2: edges. Emit only where both source and target are emitted keys.
+  const validKeys = new Set(Object.keys(out.artifacts));
+  for (const [key, depCoords] of depsByKey) {
+    if (!validKeys.has(key)) {
+      for (const target of depCoords) {
+        prunedEdges.push(`${key} -> ${target}`);
+      }
+      continue;
+    }
+    const kept = [];
+    for (const target of depCoords) {
+      if (validKeys.has(target)) {
+        kept.push(target);
+      } else {
+        prunedEdges.push(`${key} -> ${target}`);
+      }
+    }
+    if (kept.length) {
+      out.dependencies[key] = kept;
+    }
+  }
+  return {
+    droppedArtifacts,
+    json: out,
+    prunedEdges
+  };
+}
+// Cross-workspace dedup keyed on the full Maven coordinate string
+// (`g:a:v[:classifier]`). The metadata cquery emits one entry per rule,
+// so the same `androidx.annotation:annotation:1.8.2` can show up in
+// `examples/dagger/@maven` and `examples/ksp/@maven` in rules_kotlin —
+// downstream only needs it once. Each occurrence resolves its edges against
+// its own repo's targets, so the resolved `deps` can legitimately differ
+// between occurrences; union them rather than keeping only the first, or
+// real graph edges would be silently dropped.
+function dedupArtifactsByCoord(artifacts) {
+  const byCoord = new Map();
+  for (const a of artifacts) {
+    const existing = byCoord.get(a.mavenCoordinates);
+    if (!existing) {
+      byCoord.set(a.mavenCoordinates, {
+        ...a,
+        deps: [...a.deps]
+      });
+      continue;
+    }
+    const merged = new Set(existing.deps);
+    for (const dep of a.deps) {
+      merged.add(dep);
+    }
+    existing.deps = [...merged];
+  }
+  return [...byCoord.values()];
+}
+// Dedup, normalize, and write one hub's manifest. The path mirrors the
+// workspace tree: `<manifestDir>/<relPath>/<name>.json`, where `<name>` is
+// `maven_install.json` for a hub literally named `maven`, else
+// `<hub>_maven_install.json` (matching the server walker's
+// `**/*_maven_install.json` glob). The root workspace (`relPath===''`) writes
+// at `<manifestDir>/<name>.json`. Returns `manifestPath: undefined` (no file
+// written) when the hub yields zero valid artifacts, plus the dropped/pruned
+// accounting so the caller can flip the hub partial.
+async function writeHubManifest(args) {
+  const {
+    artifacts,
+    manifestDir,
+    relPath,
+    repoName
+  } = args;
+  const deduped = dedupArtifactsByCoord(artifacts);
+  const {
+    droppedArtifacts,
+    json,
+    prunedEdges
+  } = normalizeToMavenInstallJson(deduped);
+  const artifactCount = Object.keys(json.artifacts).length;
+  if (!artifactCount) {
+    return {
+      artifactCount: 0,
+      droppedArtifacts,
+      manifestPath: undefined,
+      prunedEdges
+    };
+  }
+  const fileName = repoName === 'maven' ? 'maven_install.json' : `${repoName}_maven_install.json`;
+  const hubDir = relPath ? path.join(manifestDir, relPath) : manifestDir;
+  fs$1.mkdirSync(hubDir, {
+    recursive: true
+  });
+  const manifestPath = path.join(hubDir, fileName);
+  await fs$1.promises.writeFile(manifestPath, JSON.stringify(json, null, 2), 'utf8');
+  return {
+    artifactCount,
+    droppedArtifacts,
+    manifestPath,
+    prunedEdges
+  };
+}
+// Build the per-workspace candidate Maven hub list.
+//
+// Bzlmod mode: trust `bazel mod show_extension` as the authoritative hub
+// list, keeping only hubs imported by <root>.
+//
+// WORKSPACE mode: no equivalent of `show_extension`, so probe the
+// conventional hub names.
+//
+// On `show_extension` failure (or a parse that yields zero root hubs) under
+// Bzlmod, fall through to the conventional-name probe so partial discovery
+// is still possible.
+async function discoverCandidatesForWorkspace(workspaceRoot, mode, queryOpts, verbose) {
+  const candidates = [];
+  let showExtensionSucceeded = false;
+  if (mode.bzlmod) {
+    const extResult = await runBazelModShowMavenExtension(queryOpts);
+    if (extResult.code === 0) {
+      // The maven extension generates a hub for EVERY module that uses it —
+      // the root's own `maven.install` hub(s) plus the rulesets' internal
+      // hubs (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs
+      // imported by <root>; the rest are build-tooling, not the user's SBOM.
+      const entries = parseShowExtensionOutput(extResult.stdout);
+      const kept = entries.filter(e => e.importers.includes(ROOT_MODULE_IMPORTER));
+      candidates.push(...kept.map(e => e.name));
+      // Gate the probe fallback on the KEPT count, not the raw parse: a
+      // report listing only transitive ruleset hubs (all filtered out) must
+      // still fall through to conventional probing so a root @maven isn't
+      // missed.
+      showExtensionSucceeded = kept.length > 0;
+      if (verbose) {
+        logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension kept root hub(s)`, kept.map(e => e.name));
+        for (const dropped of entries) {
+          if (!dropped.importers.includes(ROOT_MODULE_IMPORTER)) {
+            logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: dropped ${dropped.name} — imported by ${dropped.importers.join(', ')}, not ${ROOT_MODULE_IMPORTER}`);
+          }
+        }
+      }
+    } else if (verbose) {
+      logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`);
+    }
+  }
+  // Probe candidates the show_extension path could not authoritatively
+  // enumerate: when it produced root hubs, probe nothing extra; otherwise
+  // (WORKSPACE mode, a failed show_extension, or a parse with zero root
+  // hubs) probe the conventional hub names.
+  const seen = new Set(candidates);
+  const toProbe = (showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES]).filter(name => !seen.has(name));
+  if (!toProbe.length) {
+    return candidates;
+  }
+  const probe = buildMavenProbeFor(queryOpts);
+  for (const name of toProbe) {
+    // eslint-disable-next-line no-await-in-loop
+    const status = await probeCandidate(name, probe, verbose);
+    if (status === 'populated') {
+      candidates.push(name);
+      seen.add(name);
+    }
   }
-  return out;
+  return candidates;
 }
-// Resolves the bazel `external/` dir for the given workspace.
-//
-// Bazel's `bazel-out/` convenience symlink points at
-// `<output_base>/execroot/<workspace>/bazel-out/`; the `external/` dir we
-// want is at `<output_base>/external/`. `path.join` is purely lexical and
-// would collapse `bazel-out/..` to the cwd itself, which is the wrong place
-// Resolve the symlink at the filesystem level and walk up to
-// `<output_base>` instead.
-function bazelExternalDir(cwd, outputBase) {
-  if (outputBase) {
-    return path.join(outputBase, 'external');
-  }
-  const bazelOutLink = path.join(cwd, 'bazel-out');
-  if (!fs$1.existsSync(bazelOutLink)) {
-    return null;
-  }
+// Best-effort reap of a Bazel server. Spawned with a short timeout so
+// a wedged server can't itself hang the cleanup; failures are swallowed
+// because the caller will `rm -rf` the output_user_root regardless.
+async function reapBazelServer(bin, outputUserRoot, verbose) {
   try {
-    // realpath follows symlinks: .../<output_base>/execroot/<workspace>/bazel-out
-    const real = fs$1.realpathSync(bazelOutLink);
-    // Walk up bazel-out -> <workspace> -> execroot -> <output_base>, then into external/.
-    return path.join(real, '..', '..', '..', 'external');
-  } catch {
-    return null;
+    await spawn.spawn(bin, [`--output_user_root=${outputUserRoot}`, 'shutdown'], {
+      timeout: REAP_TIMEOUT_MS
+    });
+  } catch (e) {
+    // Server may already be dead, or shutdown itself timed out — the
+    // tempdir removal below is sufficient cleanup.
+    if (verbose) {
+      logger.logger.log(`[VERBOSE] reapBazelServer: shutdown failed for ${outputUserRoot} (${utils.getErrorCause(e)}); tempdir removal will still run`);
+    }
   }
 }
-// Internal diagnostic: when truthy, skip the unsorted_deps.json fast path
-// and force the bazel-query regex fallback. Used by bazel-bench to
-// deterministically exercise parseBazelBuildOutput on every CI run. Truthy
-// values are '1', 'true', 'yes' (case-insensitive); anything else (unset,
-// '', '0', 'false') is treated as off. Not exposed as a user-facing CLI
-// flag, so it is read here rather than added to constants.mts.
-function isForceQueryFallbackEnabled() {
-  const raw = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'];
-  if (!raw) {
-    return false;
-  }
-  const normalized = raw.toLowerCase();
-  return normalized === '1' || normalized === 'true' || normalized === 'yes';
-}
-// Tries `external/<repo>/unsorted_deps.json` first; falls back to parsing the
-// probe stdout the caller already captured during discovery. Discovery runs
-// the same `kind("jvm_import rule|aar_import rule", @<repo>//:*)` query that
-// extraction needs, so reusing its stdout skips one bazel-query invocation
-// per repo on the unpinned path (where unsorted_deps.json isn't on disk).
-async function extractFromOneRepo(repoName, queryOpts, cachedProbeStdout) {
-  const verbose = queryOpts.verbose;
-  // unsorted_deps.json lives under the bazel external dir.
-  // When --output_base is set, it's under that; otherwise under the workspace's
-  // bazel-out symlink (resolved via realpath, NOT lexical path.join — the
-  // lexical form would collapse `bazel-out/..` to cwd and miss the file).
-  const externalDir = bazelExternalDir(queryOpts.cwd, queryOpts.bazelOutputBase);
-  if (verbose) {
-    logger.logger.log(`[VERBOSE] @${repoName}: external dir:`, externalDir ?? '(unresolved — bazel-out symlink absent)');
-  }
-  const forceFallback = isForceQueryFallbackEnabled();
-  if (forceFallback && verbose) {
-    logger.logger.log(`[VERBOSE] @${repoName}: SOCKET_BAZEL_FORCE_QUERY_FALLBACK set; skipping unsorted_deps.json fast path.`);
-  }
-  const candidates = forceFallback ? [] : externalDir ? [path.join(externalDir, repoName, 'unsorted_deps.json')] : [];
-  for (const c of candidates) {
-    if (fs$1.existsSync(c)) {
-      // Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles.
-      // eslint-disable-next-line no-await-in-loop
-      const stat = await fs$1.promises.stat(c);
-      if (stat.size > 1024 * 1024 * 1024) {
-        logger.logger.warn(`Skipping oversized ${c} (${stat.size} bytes); falling back to cached probe stdout.`);
-        break;
-      }
-      const json = fs$1.readFileSync(c, 'utf8');
-      const parsed = parseUnsortedDepsJson(json);
-      if (parsed.length) {
-        if (verbose) {
-          logger.logger.log(`[VERBOSE] @${repoName}: source=unsorted_deps.json (${c}, ${parsed.length} artifact(s))`);
-        }
-        return parsed.map(a => ({
-          ...a,
-          sourceRepo: repoName
-        }));
-      }
-    } else if (verbose) {
-      logger.logger.log(`[VERBOSE] @${repoName}: unsorted_deps.json miss at`, c);
+async function removeTempdir(dir, verbose) {
+  try {
+    await fs$1.promises.rm(dir, {
+      recursive: true,
+      force: true
+    });
+  } catch (e) {
+    // Best effort. The next CLI invocation lands a fresh tempdir.
+    if (verbose) {
+      logger.logger.log(`[VERBOSE] removeTempdir: ${dir} not fully removed (${utils.getErrorCause(e)}); a stale dir may linger until the next OS tempdir sweep`);
     }
   }
-  // Reuse the probe stdout that discovery already captured for this repo.
-  // The probe ran exactly this query during validation and only validated
-  // repos with code === 0 make it into the cache, so retry is unnecessary
-  // — if the probe was flaky, the repo wouldn't be in the map.
-  if (!cachedProbeStdout) {
-    logger.logger.warn(`No cached probe stdout for @${repoName}; skipping. (This shouldn't happen — discovery should have populated it.)`);
-    return [];
-  }
-  if (verbose) {
-    logger.logger.log(`[VERBOSE] @${repoName}: source=cached probe stdout (${cachedProbeStdout.length} bytes)`);
-  }
-  return parseBazelBuildOutput(cachedProbeStdout).map(a => ({
-    ...a,
-    sourceRepo: repoName
-  }));
+}
+function makeOutputUserRoot() {
+  return fs$1.mkdtempSync(path.join(os.tmpdir(), 'socket-bazel-'));
+}
+// Construct the BazelQueryOptions shape used for a single workspace's
+// queries. Lifted to module scope (out of the per-workspace loop) so
+// ESLint's consistent-function-scoping is happy; takes everything it
+// previously closed over as explicit params.
+function buildQueryOpts(args) {
+  const {
+    baseEnv,
+    bin,
+    invocationFlags,
+    opts,
+    outputUserRoot,
+    spawnCwd,
+    verbose
+  } = args;
+  return {
+    bin,
+    cwd: spawnCwd,
+    invocationFlags,
+    outputUserRoot,
+    ...(opts.bazelRc ? {
+      bazelRc: opts.bazelRc
+    } : {}),
+    ...(opts.bazelFlags ? {
+      bazelFlags: opts.bazelFlags
+    } : {}),
+    ...(opts.bazelOutputBase ? {
+      bazelOutputBase: opts.bazelOutputBase
+    } : {}),
+    ...(baseEnv ? {
+      env: baseEnv
+    } : {}),
+    verbose
+  };
 }
 async function extractBazelToMaven(opts) {
   const {
@@ -3066,143 +3489,232 @@ async function extractBazelToMaven(opts) {
     logger.logger.warn(`Warning: cwd does not exist: ${cwd}`);
   }
   logger.logger.groupEnd();
+  const perRepoTimeoutMs = opts.perRepoTimeoutMs ?? DEFAULT_PER_REPO_TIMEOUT_MS;
+  // Validate config + ensure toolchains BEFORE we mint a tempdir.
+  let bin;
+  let baseEnv;
   try {
-    // Validate caller-provided Bazel filesystem settings before invoking Bazel.
     if (opts.bazelOutputBase) {
       validateOutputBase(opts.bazelOutputBase, opts.cwd);
     }
-    // Java must be available before rules_jvm_external/Coursier runs;
-    // python shim follows so its augmented PATH inherits the JDK prefix.
     ensureJavaOnPath();
     const shim = await provisionPythonShim();
-    const baseEnv = shim.augmentedEnv ?? opts.env;
-    // Step 1: workspace detection.
-    const mode = detectWorkspaceMode(cwd);
-    logger.logger.info(`Workspace mode: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
-    const invocationFlags = getBazelInvocationFlags(mode);
-    // Step 2: bazel binary resolution.
-    const bin = await resolveBazelBinary(opts.bin);
-    logger.logger.info(`Using bazel: ${bin}`);
+    baseEnv = shim.augmentedEnv ?? opts.env;
+    bin = await resolveBazelBinary(opts.bin);
+  } catch (e) {
+    logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
     if (verbose) {
-      logger.logger.log('[VERBOSE] resolved options:', {
-        bin,
-        bazelRc: opts.bazelRc ?? '(unset)',
-        bazelOutputBase: opts.bazelOutputBase ?? '(unset)',
-        bazelFlags: opts.bazelFlags ?? '(unset)',
-        invocationFlags
-      });
+      logger.logger.group('[VERBOSE] error:');
+      logger.logger.log(e);
+      logger.logger.groupEnd();
     }
+    return {
+      artifactCount: 0,
+      manifestPaths: [],
+      status: 'hardFailure'
+    };
+  }
+  logger.logger.info(`Using bazel: ${bin}`);
-    // Step 3: build the shared query options object.
-    const queryOpts = {
-      bin,
+  // Track every output_user_root we mint so we can reap them all in
+  // the cleanup pass, even if a per-repo timeout forced a re-mint.
+  let outputUserRoot = makeOutputUserRoot();
+  const mintedRoots = [outputUserRoot];
+  if (verbose) {
+    logger.logger.log(`[VERBOSE] initial --output_user_root=${outputUserRoot} (will be reaped on completion)`);
+  }
+  const layout = opts.outLayout ?? 'standalone';
+  const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
+  // One manifest per (workspace, hub), written best-effort: a single wedged
+  // hub must not discard the manifests every other hub produced.
+  const manifestPaths = [];
+  let totalArtifacts = 0;
+  let anyRepos = false;
+  let hubsSucceeded = 0;
+  let hubsFailed = 0;
+  try {
+    // Always apply the default prune policy so no caller can forget it;
+    // callers EXTEND it via ignoreDirNames/ignoreDirPrefixes.
+    const ignoreDirNames = new Set([...DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES, ...(opts.ignoreDirNames ?? [])]);
+    const ignoreDirPrefixes = [...DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES, ...(opts.ignoreDirPrefixes ?? [])];
+    const workspaceRoots = findWorkspaceRoots({
       cwd,
-      invocationFlags,
-      ...(opts.bazelRc ? {
-        bazelRc: opts.bazelRc
-      } : {}),
-      ...(opts.bazelFlags ? {
-        bazelFlags: opts.bazelFlags
-      } : {}),
-      ...(opts.bazelOutputBase ? {
-        bazelOutputBase: opts.bazelOutputBase
-      } : {}),
-      ...(baseEnv ? {
-        env: baseEnv
-      } : {}),
+      ignoreDirNames,
+      ignoreDirPrefixes,
       verbose
-    };
-    // Step 4: discover validated Maven repos via the two-step recipe.
-    // Bzlmod has a native visible-repository surface; prefer that over static
-    // MODULE.bazel parsing and keep bounded parsing as the legacy/fallback path.
-    let nativeCandidates;
-    if (mode.bzlmod) {
-      const visibleRepos = await runBazelModShowVisibleRepos(queryOpts);
-      if (visibleRepos.code === 0) {
-        nativeCandidates = parseVisibleRepoCandidates(visibleRepos.stdout);
+    });
+    if (!workspaceRoots.length) {
+      logger.logger.warn(`No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`);
+      return {
+        artifactCount: 0,
+        manifestPaths: [],
+        status: 'noEcosystem'
+      };
+    }
+    if (verbose) {
+      logger.logger.log(`[VERBOSE] discovered ${workspaceRoots.length} workspace root(s):`, workspaceRoots);
+    }
+    for (const workspaceRoot of workspaceRoots) {
+      const relPath = path.relative(cwd, workspaceRoot);
+      let mode;
+      try {
+        mode = detectWorkspaceMode(workspaceRoot);
+      } catch (e) {
         if (verbose) {
-          logger.logger.log('[VERBOSE] Bzlmod visible repo candidates:', nativeCandidates);
+          logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: detect failed (${utils.getErrorCause(e)}); skipping`);
         }
-      } else if (verbose) {
-        logger.logger.log('[VERBOSE] bazel mod show_repo failed; falling back to static candidate parsing:', visibleRepos.stderr);
+        continue;
       }
-    }
-    // Returns Map<repoName, probeStdout> so extraction can reuse the probe
-    // output and skip running an identical bazel-query a second time.
-    const probe = buildProbeFor(queryOpts);
-    const repos = await discoverMavenRepos(cwd, probe, nativeCandidates, verbose);
-    const repoNames = Array.from(repos.keys());
-    logger.logger.info(`Discovered ${repos.size} Maven repo(s): ${repoNames.join(', ') || '(none)'}`);
+      logger.logger.info(`Workspace ${relPath || '.'}: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
+      const invocationFlags = getBazelInvocationFlags(mode);
+      const queryOptsFor = userRoot => buildQueryOpts({
+        baseEnv,
+        bin,
+        invocationFlags,
+        opts,
+        outputUserRoot: userRoot,
+        spawnCwd: workspaceRoot,
+        verbose
+      });
-    // Step 5: extract artifacts from each repo (preferring unsorted_deps.json).
-    const allArtifacts = [];
-    for (const [repo, probeStdout] of repos) {
       // eslint-disable-next-line no-await-in-loop
-      const artifacts = await extractFromOneRepo(repo, queryOpts, probeStdout);
-      allArtifacts.push(...artifacts);
-      logger.logger.info(`@${repo}: ${artifacts.length} artifact(s)`);
-    }
-    // Step 6: normalize to maven_install.json shape.
-    const normalized = normalizeToMavenInstallJson(allArtifacts);
-    // Step 7: write outputs.
-    // Standalone output writes directly to `out`; auto-manifest uses a sibling directory
-    // to avoid colliding with a repo's checked-in rules_jvm_external lockfile and
-    // to avoid repo-root gitignore patterns such as `/maven_install.json`.
-    const layout = opts.outLayout ?? 'standalone';
-    const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
-    fs$1.mkdirSync(manifestDir, {
-      recursive: true
-    });
-    const manifestPath = path.join(manifestDir, 'maven_install.json');
-    await fs$1.promises.writeFile(manifestPath, JSON.stringify(normalized, null, 2), 'utf8');
-    if (verbose) {
-      logger.logger.log('[VERBOSE] outputs:', {
-        artifactCount: allArtifacts.length,
-        generatedManifest: path.relative(out, manifestPath),
-        layout,
-        manifest: manifestPath,
-        mavenRepos: repoNames,
-        tool: 'socket manifest bazel',
-        workspace: {
-          bzlmod: mode.bzlmod,
-          legacyWorkspace: mode.workspace
+      const candidates = await discoverCandidatesForWorkspace(workspaceRoot, mode, queryOptsFor(outputUserRoot), verbose);
+      logger.logger.info(`Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${candidates.join(', ') || '(none)'}`);
+      for (const repoName of candidates) {
+        anyRepos = true;
+        if (verbose) {
+          logger.logger.log(`[VERBOSE] workspace ${relPath || '.'}: running metadata cquery for @${repoName} (timeout ${perRepoTimeoutMs}ms)`);
         }
-      });
+        // eslint-disable-next-line no-await-in-loop
+        const result = await runMetadataCqueryForRepo({
+          opts: queryOptsFor(outputUserRoot),
+          repoName,
+          timeoutMs: perRepoTimeoutMs,
+          workspaceRelPath: relPath,
+          workspaceRoot
+        });
+        if (result.status === 'timeout') {
+          logger.logger.warn(`@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`);
+          hubsFailed += 1;
+          // eslint-disable-next-line no-await-in-loop
+          await reapBazelServer(bin, outputUserRoot, verbose);
+          // eslint-disable-next-line no-await-in-loop
+          await removeTempdir(outputUserRoot, verbose);
+          outputUserRoot = makeOutputUserRoot();
+          mintedRoots.push(outputUserRoot);
+          if (verbose) {
+            logger.logger.log(`[VERBOSE] minted fresh --output_user_root=${outputUserRoot} after timeout`);
+          }
+          continue;
+        }
+        if (result.status === 'error') {
+          logger.logger.warn(`@${repoName}: cquery failed; skipping this hub`);
+          hubsFailed += 1;
+          continue;
+        }
+        // A scan must never silently upload a graph missing edges it knows
+        // it dropped: warn unconditionally and treat the hub as partial.
+        let hubPartial = result.unresolvedLabels.length > 0;
+        if (hubPartial) {
+          logger.logger.warn(`@${repoName}: dropped ${result.unresolvedLabels.length} unresolved dependency edge(s): ${result.unresolvedLabels.join(', ')}`);
+        }
+        // A non-zero cquery exit that still yielded a usable subset
+        // (--keep_going) is reported as `partial` even with no unresolved
+        // labels — the graph is known-incomplete, so flip the hub partial.
+        if (result.status === 'partial' && !result.unresolvedLabels.length) {
+          hubPartial = true;
+          logger.logger.warn(`@${repoName}: cquery partially failed (--keep_going); the dependency graph may be incomplete`);
+        }
+        let written;
+        try {
+          // eslint-disable-next-line no-await-in-loop
+          written = await writeHubManifest({
+            artifacts: result.artifacts,
+            cwd,
+            manifestDir,
+            relPath,
+            repoName,
+            verbose
+          });
+        } catch (e) {
+          // Best-effort per hub: a write failure must not abort the walk and
+          // discard the manifests other hubs already produced.
+          logger.logger.warn(`@${repoName}: failed to write manifest (${utils.getErrorCause(e)}); skipping this hub`);
+          hubsFailed += 1;
+          continue;
+        }
+        if (written.droppedArtifacts.length) {
+          hubPartial = true;
+          logger.logger.warn(`@${repoName}: dropped ${written.droppedArtifacts.length} malformed Maven coordinate(s): ${written.droppedArtifacts.join(', ')}`);
+        }
+        if (written.prunedEdges.length) {
+          hubPartial = true;
+          logger.logger.warn(`@${repoName}: pruned ${written.prunedEdges.length} dependency edge(s) referencing unlisted artifacts: ${written.prunedEdges.join(', ')}`);
+        }
+        if (written.manifestPath) {
+          manifestPaths.push(written.manifestPath);
+          totalArtifacts += written.artifactCount;
+          if (hubPartial) {
+            hubsFailed += 1;
+          } else {
+            hubsSucceeded += 1;
+          }
+          if (verbose) {
+            logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status}, ${written.artifactCount} artifact(s) -> ${written.manifestPath}`);
+          }
+        } else {
+          // No artifacts to write (empty hub). Not itself a failure, but if
+          // edges were dropped the partial signal still applies.
+          if (hubPartial) {
+            hubsFailed += 1;
+          }
+          if (verbose) {
+            logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status} (no manifest written)`);
+          }
+        }
+      }
     }
-    if (!allArtifacts.length) {
-      if (!repos.size) {
+    if (!manifestPaths.length) {
+      if (!anyRepos) {
         if (verbose) {
           logger.logger.info('No Maven artifacts extracted. failureCategory=no-supported-ecosystem');
         }
         return {
           artifactCount: 0,
-          manifestPath,
-          noEcosystemFound: true,
-          ok: false
+          manifestPaths: [],
+          status: 'noEcosystem'
         };
       }
-      logger.logger.fail(`Discovered Maven repo(s) ${repoNames.join(', ')} but extracted zero artifacts. failureCategory=ecosystem-detected-but-empty`);
+      logger.logger.fail('Discovered Maven repo(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty');
       return {
         artifactCount: 0,
-        manifestPath,
-        ok: false
+        manifestPaths: [],
+        status: 'hardFailure'
       };
     }
-    logger.logger.success(`Wrote ${allArtifacts.length} artifact(s) to ${path.relative(cwd, manifestPath)}.`);
+    const status = hubsFailed ? 'partial' : 'complete';
+    if (status === 'complete') {
+      logger.logger.success(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total.`);
+    } else {
+      logger.logger.warn(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete.`);
+    }
+    if (verbose) {
+      logger.logger.log('[VERBOSE] outputs:', {
+        artifactCount: totalArtifacts,
+        hubsFailed,
+        hubsSucceeded,
+        layout,
+        manifestPaths,
+        status
+      });
+    }
     return {
-      artifactCount: allArtifacts.length,
-      manifestPath,
-      ok: true
+      artifactCount: totalArtifacts,
+      manifestPaths,
+      status
     };
   } catch (e) {
-    // Always surface the error message; users should not have to
-    // re-run a multi-minute bazel build with --verbose just to see whether
-    // the failure was a missing dependency, permission error, or network blip.
     logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
     if (verbose) {
       logger.logger.group('[VERBOSE] error:');
@@ -3213,8 +3725,16 @@ async function extractBazelToMaven(opts) {
     }
     return {
       artifactCount: 0,
-      ok: false
+      manifestPaths: [],
+      status: 'hardFailure'
     };
+  } finally {
+    for (const dir of mintedRoots) {
+      // eslint-disable-next-line no-await-in-loop
+      await reapBazelServer(bin, dir, verbose);
+      // eslint-disable-next-line no-await-in-loop
+      await removeTempdir(dir, verbose);
+    }
   }
 }
@@ -4084,12 +4604,20 @@ async function generateAutoManifest({
       outLayout: 'flat',
       verbose: Boolean(bazelConfig?.verbose) || verbose
     });
-    if (!mavenResult.ok && !mavenResult.noEcosystemFound) {
+    // Only a hard failure (zero manifests, ecosystem present) aborts the
+    // wider scan. A partial run still produced manifests worth uploading; an
+    // absent ecosystem is tolerated here (it's only an error when EVERY
+    // ecosystem is absent, which the caller decides).
+    if (mavenResult.status === 'hardFailure') {
       throw new Error('Bazel auto-manifest generation failed for ecosystem(s): maven');
     }
-    if (mavenResult.ok && mavenResult.manifestPath) {
-      generatedFiles.push(mavenResult.manifestPath);
-    } else if (mavenResult.noEcosystemFound) {
+    if (mavenResult.status === 'complete' || mavenResult.status === 'partial') {
+      generatedFiles.push(...mavenResult.manifestPaths);
+      if (mavenResult.status === 'partial') {
+        logger.logger.warn(`Bazel Maven manifest generation was partial (${mavenResult.manifestPaths.length} manifest(s) written); some hubs failed or had incomplete dependency graphs. Uploading what was generated.`);
+      }
+    } else {
       logger.logger.info('No supported Bazel Maven ecosystem detected.');
     }
   }
@@ -4307,6 +4835,13 @@ async function handleCreateNewScan({
   const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined;
   if (reach && scanId && tier1ReachabilityScanId) {
     await finalizeTier1Scan(tier1ReachabilityScanId, scanId);
+  } else if (reach.runReachabilityAnalysis && scanId && !tier1ReachabilityScanId) {
+    // Reachability analysis ran and a scan was created, but no tier 1
+    // reachability scan id was extracted from the facts file. Surface this
+    // instead of silently skipping finalize — otherwise the tier 1 row stays
+    // stuck (e.g. at COANA_DONE) and the full scan is never linked to its
+    // reachability report.
+    logger.logger.warn('Reachability analysis ran but no tier 1 reachability scan ID was found; skipping tier 1 finalize. The scan was created but its reachability report was not linked.');
   }
   // On a successful scan, clean up the `.socket.facts.json` coana wrote at
@@ -7734,6 +8269,85 @@ async function run$G(argv, importMeta, context) {
   await spawnPromise;
 }
+// Result shape returned by `validatePypiHub`. Kept local to the PyPI module
+// since validation here is hub-alias-marker based (different from the
+// Maven-side tri-state classifier).
+// PyPI-only repo-name predicate (Bazel apparent-name grammar).
+const PYPI_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
+const PYPI_REPO_NAME_RE = new RegExp(`^${PYPI_REPO_NAME_PATTERN}$`);
+function pypiApparentNameFromJsonValue(value) {
+  if (!value || typeof value !== 'object') {
+    return undefined;
+  }
+  const obj = value;
+  const direct = obj['apparentName'] ?? obj['apparent_name'];
+  if (typeof direct === 'string') {
+    return direct;
+  }
+  for (const nested of Object.values(obj)) {
+    const found = pypiApparentNameFromJsonValue(nested);
+    if (found) {
+      return found;
+    }
+  }
+  return undefined;
+}
+function pypiApparentNamesFromRepoMapping(value) {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return [];
+  }
+  const candidates = [];
+  for (const [name, canonicalName] of Object.entries(value)) {
+    if (name.startsWith('@') || typeof canonicalName !== 'string') {
+      continue;
+    }
+    if (PYPI_REPO_NAME_RE.test(name)) {
+      candidates.push(name);
+    }
+  }
+  return candidates;
+}
+function pypiNormalizeRepoName(name) {
+  const repo = name.startsWith('@') ? name.slice(1) : name;
+  return PYPI_REPO_NAME_RE.test(repo) ? repo : undefined;
+}
+// Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accepts
+// the older streamed jsonproto shape (apparentName / apparent_name records).
+// PyPI-only; the Maven path consumes `bazel mod show_extension` instead.
+function parseVisibleRepoCandidates(output) {
+  const seen = new Set();
+  const candidates = [];
+  for (const line of output.split(/\r?\n/)) {
+    const trimmed = line.trim();
+    if (!trimmed) {
+      continue;
+    }
+    try {
+      const parsed = JSON.parse(trimmed);
+      for (const c of pypiApparentNamesFromRepoMapping(parsed)) {
+        if (!seen.has(c)) {
+          seen.add(c);
+          candidates.push(c);
+        }
+      }
+      const apparentName = pypiApparentNameFromJsonValue(parsed);
+      if (apparentName) {
+        const repo = pypiNormalizeRepoName(apparentName);
+        if (repo && !seen.has(repo)) {
+          seen.add(repo);
+          candidates.push(repo);
+        }
+      }
+    } catch {
+      // Skip malformed lines; caller falls back to static discovery when no
+      // usable visible repo names are found.
+    }
+  }
+  return candidates.sort();
+}
 // Maximum size (bytes) we will read for any single Bazel workspace file.
 // Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
 const MAX_WORKSPACE_FILE_BYTES = 5 * 1024 * 1024;
@@ -8676,6 +9290,13 @@ const config$e = {
     Note: this command generates dependency manifests for Bazel workspaces.
     It does not run reachability analysis.
+    Maven hub discovery: under Bzlmod, hubs are enumerated from
+    \`bazel mod show_extension\` and filtered to the root module's own hubs.
+    Under legacy WORKSPACE mode (no \`show_extension\`), only conventionally
+    named hubs are probed (\`maven\`, \`maven_install\`, \`maven_dev\`, …). A hub
+    with a non-conventional name that \`show_extension\` does not enumerate is
+    not discovered yet; a flag to name extra hubs is planned.
     To generate AND upload in one step, use \`socket scan create --auto-manifest\`
     instead — it detects Bazel workspaces, generates Maven manifests by
     default, and uploads the result. This subcommand is for generation only.
@@ -8697,21 +9318,29 @@ const cmdManifestBazel = {
 // failures that must propagate to a non-zero CLI exit; returns void on
 // success.
 //
-// - Hard failure: ok === false && !noEcosystemFound. The ecosystem was
-//   detected (or the runner crashed), but extraction failed. Always a
-//   non-zero exit, even when another ecosystem succeeded.
-// - No-discovery: noEcosystemFound === true. Genuinely absent ecosystem.
-//   Auto-detect mode tolerates this when at least one other ecosystem
-//   succeeded; explicit mode treats it as an error.
+// - `complete`/`partial` both count as produced output (>=1 manifest).
+//   `partial` additionally warns — a known-incomplete SBOM is still emitted,
+//   not a hard error.
+// - `hardFailure`: the ecosystem was detected (or the runner crashed) but
+//   wrote zero manifests. Always a non-zero exit, even when another
+//   ecosystem succeeded.
+// - `noEcosystem`: genuinely absent ecosystem. Auto-detect mode tolerates it
+//   when at least one other ecosystem produced output; explicit mode treats
+//   it as an error (the user requested an ecosystem that isn't there).
 function evaluateEcosystemOutcomes(outcomes, isExplicit) {
-  const hardFailures = outcomes.filter(o => !o.ok && !o.noEcosystemFound);
-  const noDiscoveries = outcomes.filter(o => o.noEcosystemFound);
-  const successes = outcomes.filter(o => o.ok && o.manifestPath);
+  const produced = outcomes.filter(o => (o.status === 'complete' || o.status === 'partial') && o.manifestPaths.length > 0);
+  const hardFailures = outcomes.filter(o => o.status === 'hardFailure');
+  const noDiscoveries = outcomes.filter(o => o.status === 'noEcosystem');
+  for (const partial of outcomes) {
+    if (partial.status === 'partial') {
+      logger.logger.warn(`Bazel ${partial.ecosystem} manifest generation was partial; the uploaded SBOM is known-incomplete.`);
+    }
+  }
   if (!isExplicit) {
     if (hardFailures.length) {
       throw new utils.InputError(`Bazel auto-manifest generation hit hard failure(s) in ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
     }
-    if (successes.length) {
+    if (produced.length) {
       return;
     }
     if (noDiscoveries.length === outcomes.length) {
@@ -8720,7 +9349,8 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
     return;
   }
-  // Explicit mode: every requested ecosystem must succeed.
+  // Explicit mode: every requested ecosystem must produce output. A partial
+  // run counts (it wrote manifests); absent or hard-failed ecosystems error.
   if (noDiscoveries.length) {
     throw new utils.InputError(`No Bazel rules found for explicitly requested ecosystem(s): ${noDiscoveries.map(f => f.ecosystem).join(', ')}.`);
   }
@@ -8728,6 +9358,32 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
     throw new utils.InputError(`Bazel manifest generation failed for explicitly requested ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
   }
 }
+// Map the legacy PyPI result shape (single manifestPath + ok/noEcosystem
+// booleans) into the shared status vocabulary so both ecosystems flow through
+// one success gate. PyPI has no partial state. Only a `complete` outcome
+// carries a manifest path; `noEcosystem`/`hardFailure` carry none, preserving
+// the invariant that a non-success outcome produced no usable output (a
+// detected-but-empty PyPI run writes a stub file but is still a hard failure,
+// and that stub must not be surfaced as produced output).
+function pypiOutcome(result) {
+  if (result.noEcosystemFound) {
+    return {
+      manifestPaths: [],
+      status: 'noEcosystem'
+    };
+  }
+  if (result.ok && result.manifestPath) {
+    return {
+      manifestPaths: [result.manifestPath],
+      status: 'complete'
+    };
+  }
+  return {
+    manifestPaths: [],
+    status: 'hardFailure'
+  };
+}
 async function run$F(argv, importMeta, {
   parentName
 }) {
@@ -8861,9 +9517,8 @@ async function run$F(argv, importMeta, {
       });
       outcomes.push({
         ecosystem: 'maven',
-        ok: mavenResult.ok,
-        noEcosystemFound: mavenResult.noEcosystemFound,
-        manifestPath: mavenResult.manifestPath
+        manifestPaths: mavenResult.manifestPaths,
+        status: mavenResult.status
       });
     } else if (eco === 'pypi') {
       // eslint-disable-next-line no-await-in-loop
@@ -8879,9 +9534,7 @@ async function run$F(argv, importMeta, {
       });
       outcomes.push({
         ecosystem: 'pypi',
-        ok: pypiResult.ok,
-        noEcosystemFound: pypiResult.noEcosystemFound,
-        manifestPath: pypiResult.manifestPath
+        ...pypiOutcome(pypiResult)
       });
     }
   }
@@ -14446,7 +15099,7 @@ const reachabilityFlags = {
   reachConcurrency: {
     type: 'number',
     default: 1,
-    description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available. NPM reachability analysis does not support concurrent execution, so the concurrency level is ignored for NPM.'
+    description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available.'
   },
   reachContinueOnAnalysisErrors: {
     type: 'boolean',
@@ -16828,6 +17481,7 @@ async function run$8(argv, importMeta, {
 }
 async function outputScanReach(result, {
+  cwd,
   outputKind,
   outputPath
 }) {
@@ -16848,7 +17502,11 @@ async function outputScanReach(result, {
   logger.logger.info(`Reachability report has been written to: ${actualOutputPath}`);
   // Warn about individual vulnerabilities where reachability analysis errored.
-  const errors = utils.extractReachabilityErrors(result.data.reachabilityReport);
+  // Resolve the report path against the scan `cwd` (not `process.cwd()`):
+  // Coana writes the facts file relative to `cwd` and `reachabilityReport`
+  // is a `cwd`-relative path, so reading the bare relative path would miss
+  // the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`).
+  const errors = utils.extractReachabilityErrors(path.resolve(cwd, result.data.reachabilityReport));
   if (errors.length) {
     logger.logger.log('');
     logger.logger.warn(`Reachability analysis returned ${errors.length} ${words.pluralize('error', errors.length)} for individual ${words.pluralize('vulnerability', errors.length)}:`);
@@ -16877,6 +17535,7 @@ async function handleScanReach({
   });
   if (!supportedFilesCResult.ok) {
     await outputScanReach(supportedFilesCResult, {
+      cwd,
       outputKind,
       outputPath
     });
@@ -16924,7 +17583,22 @@ async function handleScanReach({
     uploadManifests: true
   });
   spinner.stop();
+  // Standalone reachability has no full scan to bind to, but the tier1
+  // reachability scan row still needs to transition to its DONE terminal
+  // state — otherwise it sits at the post-Coana intermediate state forever
+  // and looks indistinguishable from a stuck run. Pass `null` as the full
+  // scan id; the endpoint accepts it for this flow. Best-effort: never
+  // block the user-visible output on this.
+  const tier1Id = result.ok ? result.data?.tier1ReachabilityScanId : undefined;
+  if (tier1Id) {
+    const finalizeResult = await finalizeTier1Scan(tier1Id, null);
+    if (!finalizeResult.ok) {
+      logger.logger.warn(`Failed to finalize tier1 reachability scan: ${finalizeResult.message}${finalizeResult.cause ? ` — ${finalizeResult.cause}` : ''}`);
+    }
+  }
   await outputScanReach(result, {
+    cwd,
     outputKind,
     outputPath
   });
@@ -19067,5 +19741,5 @@ process.on('unhandledRejection', async (reason, promise) => {
   // eslint-disable-next-line n/no-process-exit
   process.exit(1);
 });
-//# debugId=52e1770b-8fec-41b9-83a1-5c52a6251b6c
+//# debugId=614e598d-c01b-4289-b35e-bff2af2ac507
 //# sourceMappingURL=cli.js.map