socket 1.1.112 → 1.1.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +4 -1
  2. package/dist/cli.js +1600 -926
  3. package/dist/cli.js.map +1 -1
  4. package/dist/constants.js +4 -4
  5. package/dist/constants.js.map +1 -1
  6. package/dist/tsconfig.dts.tsbuildinfo +1 -1
  7. package/dist/types/commands/manifest/bazel/bazel-cquery.d.mts +70 -0
  8. package/dist/types/commands/manifest/bazel/bazel-cquery.d.mts.map +1 -0
  9. package/dist/types/commands/manifest/bazel/bazel-pypi-discovery.d.mts +14 -1
  10. package/dist/types/commands/manifest/bazel/bazel-pypi-discovery.d.mts.map +1 -1
  11. package/dist/types/commands/manifest/bazel/bazel-query-runner.d.mts +58 -14
  12. package/dist/types/commands/manifest/bazel/bazel-query-runner.d.mts.map +1 -1
  13. package/dist/types/commands/manifest/bazel/bazel-repo-discovery.d.mts +43 -30
  14. package/dist/types/commands/manifest/bazel/bazel-repo-discovery.d.mts.map +1 -1
  15. package/dist/types/commands/manifest/bazel/bazel-workspace-walk.d.mts +18 -0
  16. package/dist/types/commands/manifest/bazel/bazel-workspace-walk.d.mts.map +1 -0
  17. package/dist/types/commands/manifest/bazel/cmd-manifest-bazel.d.mts +12 -10
  18. package/dist/types/commands/manifest/bazel/cmd-manifest-bazel.d.mts.map +1 -1
  19. package/dist/types/commands/manifest/bazel/extract_bazel_to_maven.d.mts +70 -8
  20. package/dist/types/commands/manifest/bazel/extract_bazel_to_maven.d.mts.map +1 -1
  21. package/dist/types/commands/manifest/generate_auto_manifest.d.mts.map +1 -1
  22. package/dist/types/commands/scan/finalize-tier1-scan.d.mts +6 -4
  23. package/dist/types/commands/scan/finalize-tier1-scan.d.mts.map +1 -1
  24. package/dist/types/commands/scan/handle-create-new-scan.d.mts.map +1 -1
  25. package/dist/types/commands/scan/handle-scan-reach.d.mts.map +1 -1
  26. package/dist/types/commands/scan/output-scan-reach.d.mts +2 -1
  27. package/dist/types/commands/scan/output-scan-reach.d.mts.map +1 -1
  28. package/dist/types/commands/scan/perform-reachability-analysis.d.mts.map +1 -1
  29. package/dist/types/utils/glob.d.mts +1 -0
  30. package/dist/types/utils/glob.d.mts.map +1 -1
  31. package/dist/utils.js +2 -1
  32. package/dist/utils.js.map +1 -1
  33. package/package.json +2 -2
  34. package/requirements.json +1 -1
  35. package/dist/types/commands/manifest/bazel/bazel-build-parser.d.mts +0 -34
  36. package/dist/types/commands/manifest/bazel/bazel-build-parser.d.mts.map +0 -1
package/dist/cli.js CHANGED
@@ -15,10 +15,10 @@ var words = require('../external/@socketsecurity/registry/lib/words');
15
15
  var fs$1 = require('node:fs');
16
16
  var arrays = require('../external/@socketsecurity/registry/lib/arrays');
17
17
  var prompts = require('../external/@socketsecurity/registry/lib/prompts');
18
- var bin = require('../external/@socketsecurity/registry/lib/bin');
19
- var childProcess = require('node:child_process');
20
18
  var os = require('node:os');
21
19
  var spawn = require('../external/@socketsecurity/registry/lib/spawn');
20
+ var bin = require('../external/@socketsecurity/registry/lib/bin');
21
+ var childProcess = require('node:child_process');
22
22
  var fs$2 = require('../external/@socketsecurity/registry/lib/fs');
23
23
  var strings = require('../external/@socketsecurity/registry/lib/strings');
24
24
  var path$1 = require('../external/@socketsecurity/registry/lib/path');
@@ -1110,8 +1110,10 @@ async function fetchSupportedScanFileNames(options) {
1110
1110
 
1111
1111
  /**
1112
1112
  * Finalize a tier1 reachability scan.
1113
- * - Associates the tier1 reachability scan metadata with the full scan.
1114
- * - Sets the tier1 reachability scan to "finalized" state.
1113
+ * - Associates the tier1 reachability scan metadata with the full scan
1114
+ * (or with `null` when called from a standalone reachability flow that
1115
+ * has no full scan to bind to).
1116
+ * - Transitions the tier1 reachability scan to its DONE terminal state.
1115
1117
  */
1116
1118
  async function finalizeTier1Scan(tier1ReachabilityScanId, scanId) {
1117
1119
  // we do not use the SDK here because the tier1-reachability-scan/finalize is a hidden
@@ -1835,12 +1837,21 @@ async function performReachabilityAnalysis(options) {
1835
1837
  }
1836
1838
  return coanaResult;
1837
1839
  }
1840
+
1841
+ // Coana writes the facts file relative to the scan `cwd` (it is spawned
1842
+ // with `cwd` above), so resolve the read path against `cwd` too. Reading
1843
+ // the bare relative path would resolve against `process.cwd()` and miss
1844
+ // the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`), silently
1845
+ // dropping the tier 1 scan id and skipping finalize downstream.
1846
+ const resolvedReportPath = path.resolve(cwd, outputFilePath);
1838
1847
  return {
1839
1848
  ok: true,
1840
1849
  data: {
1841
- // Use the actual output filename for the scan.
1850
+ // Use the actual output filename for the scan. Keep this `cwd`-relative
1851
+ // so the upload (which relativizes against `cwd`) and the post-success
1852
+ // unlink (`path.resolve(cwd, reachabilityReport)`) keep working.
1842
1853
  reachabilityReport: outputFilePath,
1843
- tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(outputFilePath)
1854
+ tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(resolvedReportPath)
1844
1855
  }
1845
1856
  };
1846
1857
  }
@@ -1930,318 +1941,6 @@ async function resolveBazelBinary(explicit) {
1930
1941
  throw new utils.InputError('Could not find bazelisk or bazel on PATH. ' + 'Install bazelisk (recommended; https://github.com/bazelbuild/bazelisk) ' + 'or bazel, or pass --bazel <path>.');
1931
1942
  }
1932
1943
 
1933
- /**
1934
- * Parse `bazel query --output=build` text and `unsorted_deps.json` files
1935
- * (rules_jvm_external) into a uniform `ExtractedArtifact` shape consumed by
1936
- * the converter.
1937
- *
1938
- * Security gate: every regex uses bounded character classes to prevent
1939
- * catastrophic backtracking on hostile bazel-query output. Rules without
1940
- * `maven_coordinates=` are skipped. Caller is responsible for size-capping
1941
- * the input string.
1942
- */
1943
-
1944
- // Per-rule block matcher: matches `<kind>(...)` where kind is jvm_import or
1945
- // aar_import, bounded by `^)` (closing paren on its own line) — Bazel
1946
- // `--output=build` output convention. Body length capped at 8 KiB; real
1947
- // rules are ~500 bytes, so the cap is 16x normal. Prevents pathological
1948
- // backtracking on hostile input.
1949
- const RULE_RE = /^(jvm_import|aar_import)\(([\s\S]{0,8192}?)^\)/gm;
1950
-
1951
- // Cache for per-attribute regexes — avoids recompiling the same pattern on
1952
- // every rule block. Keyed by attr name; all attr names are safe alphanumeric
1953
- // identifiers so no escaping is needed beyond the bounded character class.
1954
- const ATTR_RE_CACHE = new Map();
1955
-
1956
- // Cache for per-tag-key regexes used by extractTagValue.
1957
- const TAG_RE_CACHE = new Map();
1958
- function extractAttr(body, attr) {
1959
- // Match `<attr> = "VALUE"` — quoted-string attrs only.
1960
- // Quoted value capped at 4 KiB; canonical Maven URLs are ~150 bytes.
1961
- let re = ATTR_RE_CACHE.get(attr);
1962
- if (!re) {
1963
- re = new RegExp(`\\b${attr}\\s*=\\s*"([^"\\n]{0,4096})"`);
1964
- ATTR_RE_CACHE.set(attr, re);
1965
- }
1966
- const m = re.exec(body);
1967
- return m?.[1];
1968
- }
1969
-
1970
- // Extracts a `key=value` pair from inside a Bazel `tags = [...]` attribute
1971
- // (rules_jvm_external encodes maven_sha256, maven_coordinates etc. this way).
1972
- // Pattern: `"maven_sha256=<hex>"` inside the tags list.
1973
- // Returns undefined when the tag is absent or malformed.
1974
- function extractTagValue(body, tagKey) {
1975
- // Match the full tags = [...] block (bounded at 8 KiB).
1976
- const tagsM = /\btags\s*=\s*\[([\s\S]{0,8192}?)\]/m.exec(body);
1977
- if (!tagsM) {
1978
- return undefined;
1979
- }
1980
- const tagsBlob = tagsM[1];
1981
- // Within the blob, look for "<tagKey>=<value>" inside a quoted string.
1982
- // Bounded at 512 bytes per tag entry (sha256 hex is 64 chars; URLs ~150).
1983
- let tagRe = TAG_RE_CACHE.get(tagKey);
1984
- if (!tagRe) {
1985
- tagRe = new RegExp(`"${tagKey}=([^"\\n]{0,512})"`);
1986
- TAG_RE_CACHE.set(tagKey, tagRe);
1987
- }
1988
- const m = tagRe.exec(tagsBlob);
1989
- return m?.[1];
1990
- }
1991
- function extractDeps(body) {
1992
- // Match `deps = ["a", "b", ...]`. Body length capped at 16 KiB; real
1993
- // dep lists are <2 KiB.
1994
- const m = /\bdeps\s*=\s*\[([\s\S]{0,16384}?)\]/m.exec(body);
1995
- if (!m) {
1996
- return [];
1997
- }
1998
- const out = [];
1999
- // Per-label cap at 512 bytes; real Bazel labels are <100 bytes.
2000
- for (const q of m[1].matchAll(/"([^"\n]{0,512})"/g)) {
2001
- out.push(q[1]);
2002
- }
2003
- return out;
2004
- }
2005
-
2006
- /**
2007
- * Parse `bazel query --output=build` stdout into `ExtractedArtifact[]`.
2008
- * Skips rules without a `maven_coordinates` attribute (those aren't
2009
- * rules_jvm_external lockfile rules).
2010
- */
2011
- function parseBazelBuildOutput(text) {
2012
- const results = [];
2013
- for (const m of text.matchAll(RULE_RE)) {
2014
- const ruleKind = m[1];
2015
- const body = m[2];
2016
- const ruleName = extractAttr(body, 'name');
2017
- // maven_coordinates can be:
2018
- // (a) a top-level rule attribute: `maven_coordinates = "g:a:v"` (newer rje)
2019
- // (b) inside tags = [...]: `"maven_coordinates=g:a:v"` (older rje, e.g. ray)
2020
- const coords = extractAttr(body, 'maven_coordinates') ?? extractTagValue(body, 'maven_coordinates');
2021
- if (!ruleName || !coords) {
2022
- continue;
2023
- }
2024
- // maven_sha256 is encoded inside tags = [...] as "maven_sha256=<hex>" by
2025
- // rules_jvm_external; try tags first, fall back to standalone attr for
2026
- // older rule shapes that may declare it as a top-level attribute.
2027
- const mavenSha256 = extractTagValue(body, 'maven_sha256') ?? extractAttr(body, 'maven_sha256');
2028
- results.push({
2029
- ruleKind,
2030
- ruleName,
2031
- mavenCoordinates: coords,
2032
- mavenUrl: extractAttr(body, 'maven_url'),
2033
- mavenSha256,
2034
- deps: extractDeps(body)
2035
- });
2036
- }
2037
- return results;
2038
- }
2039
- function ruleNameFromCoordinate(c) {
2040
- return c.replace(/[^A-Za-z0-9]/g, '_');
2041
- }
2042
-
2043
- /**
2044
- * Parse supported `external/<repo>/unsorted_deps.json` shapes emitted by
2045
- * rules_jvm_external. Older files use an artifact array with full coordinates;
2046
- * newer v2 lock-file-shaped files use artifact/dependency maps keyed by
2047
- * `group:artifact`. Caller MUST size-cap the input because JSON.parse is
2048
- * unbounded by default.
2049
- */
2050
- function parseUnsortedDepsJson(json) {
2051
- let parsed;
2052
- try {
2053
- parsed = JSON.parse(json);
2054
- } catch {
2055
- return [];
2056
- }
2057
- const maybe = parsed;
2058
- if (Array.isArray(maybe.artifacts)) {
2059
- const out = [];
2060
- for (const a of maybe.artifacts) {
2061
- if (typeof a?.coordinates !== 'string') {
2062
- continue;
2063
- }
2064
- const deps = [];
2065
- if (Array.isArray(a.deps)) {
2066
- for (const d of a.deps) {
2067
- if (typeof d === 'string') {
2068
- deps.push(d);
2069
- }
2070
- }
2071
- }
2072
- out.push({
2073
- ruleKind: 'jvm_import',
2074
- ruleName: ruleNameFromCoordinate(a.coordinates),
2075
- mavenCoordinates: a.coordinates,
2076
- mavenUrl: typeof a.url === 'string' ? a.url : undefined,
2077
- mavenSha256: typeof a.sha256 === 'string' ? a.sha256 : undefined,
2078
- deps
2079
- });
2080
- }
2081
- return out;
2082
- }
2083
- if (!maybe.artifacts || typeof maybe.artifacts !== 'object') {
2084
- return [];
2085
- }
2086
- const dependencies = maybe.dependencies ?? {};
2087
- const out = [];
2088
- for (const [groupArtifact, artifact] of Object.entries(maybe.artifacts)) {
2089
- if (!artifact || typeof artifact.version !== 'string') {
2090
- continue;
2091
- }
2092
- const shasums = artifact.shasums ?? {};
2093
- const jarSha = shasums['jar'];
2094
- if (typeof jarSha === 'string' || Object.keys(shasums).length === 0) {
2095
- out.push(v2Artifact(groupArtifact, artifact.version, jarSha, dependencies));
2096
- }
2097
- for (const [classifier, sha256] of Object.entries(shasums)) {
2098
- if (classifier === 'jar' || typeof sha256 !== 'string') {
2099
- continue;
2100
- }
2101
- const classifierKey = `${groupArtifact}:jar:${classifier}`;
2102
- out.push(v2Artifact(classifierKey, artifact.version, sha256, dependencies));
2103
- }
2104
- }
2105
- return out;
2106
- }
2107
- function v2Artifact(artifactKey, version, sha256, dependencies) {
2108
- return {
2109
- ruleKind: 'jvm_import',
2110
- ruleName: ruleNameFromCoordinate(artifactKey),
2111
- mavenCoordinates: `${artifactKey}:${version}`,
2112
- mavenSha256: sha256,
2113
- deps: Array.isArray(dependencies[artifactKey]) ? dependencies[artifactKey].filter(d => typeof d === 'string') : []
2114
- };
2115
- }
2116
-
2117
- let probed = false;
2118
-
2119
- // Verifies `java` is functional in the current execution environment. Bazel
2120
- // JVM manifest extraction (rules_jvm_external → Coursier) requires a real
2121
- // JDK; the CLI does not attempt to discover Homebrew installs or mutate the
2122
- // caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
2123
- // actionable message so the surfaced error names the prerequisite directly
2124
- // instead of relying on Bazel's downstream diagnostic.
2125
- function ensureJavaOnPath() {
2126
- if (probed) {
2127
- return;
2128
- }
2129
- try {
2130
- childProcess.execSync('java -version', {
2131
- stdio: 'ignore'
2132
- });
2133
- probed = true;
2134
- } catch {
2135
- throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
2136
- }
2137
- }
2138
-
2139
- // Validates that --bazel-output-base is a path we can use as Bazel's output_base.
2140
- // Throws InputError if:
2141
- // - the input contains `..` segments (path traversal guard)
2142
- // - the existing path is not writable
2143
- // - the path cannot be created (parent not writable)
2144
- function validateOutputBase(outputBase, cwd) {
2145
- // Path traversal guard: reject any literal `..` segment in user input.
2146
- // After path.resolve these are normalised away, so we check the raw input.
2147
- // Split on both separators. On Windows `path.sep === '\\'`, so
2148
- // input like `foo/../etc` would not contain a `..` segment under the
2149
- // platform-specific split, bypassing the guard — yet path.resolve below
2150
- // would still normalise the `..` and a traversal target could materialise.
2151
- const segments = outputBase.split(/[\\/]/);
2152
- if (segments.includes('..')) {
2153
- throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
2154
- }
2155
- const resolved = path.resolve(cwd, outputBase);
2156
- if (fs$1.existsSync(resolved)) {
2157
- try {
2158
- fs$1.accessSync(resolved, fs$1.constants.W_OK);
2159
- } catch {
2160
- throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
2161
- }
2162
- return;
2163
- }
2164
- // Path does not exist yet — try to create it so bazel can populate it.
2165
- try {
2166
- fs$1.mkdirSync(resolved, {
2167
- recursive: true
2168
- });
2169
- } catch (e) {
2170
- throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
2171
- }
2172
- }
2173
-
2174
- // Stable shim dir name — same process will get the same dir; concurrent
2175
- // socket-cli invocations on the same machine share it. The symlink target
2176
- // is whatever python3 resolves to NOW; if PATH changes between invocations
2177
- // we replace the symlink.
2178
- const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
2179
-
2180
- // Cache the result for the lifetime of this process.
2181
- let cached = null;
2182
-
2183
- // Safe wrapper around whichBin that returns null instead of throwing when
2184
- // nothrow semantics are broken in older registry versions (realpath 'null' bug).
2185
- async function safeWhichBin(name) {
2186
- try {
2187
- return (await bin.whichBin(name, {
2188
- nothrow: true
2189
- })) ?? null;
2190
- } catch {
2191
- return null;
2192
- }
2193
- }
2194
- async function provisionPythonShim() {
2195
- if (cached) {
2196
- return cached;
2197
- }
2198
- const pythonOnPath = await safeWhichBin('python');
2199
- if (pythonOnPath) {
2200
- cached = {
2201
- augmentedEnv: undefined,
2202
- shimDir: undefined
2203
- };
2204
- return cached;
2205
- }
2206
- const python3OnPath = await safeWhichBin('python3');
2207
- if (!python3OnPath) {
2208
- throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
2209
- }
2210
- const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
2211
- fs$1.mkdirSync(shimDir, {
2212
- recursive: true
2213
- });
2214
- const linkPath = path.join(shimDir, 'python');
2215
- // Replace the symlink defensively in case python3's resolved path moved.
2216
- if (fs$1.existsSync(linkPath)) {
2217
- try {
2218
- fs$1.unlinkSync(linkPath);
2219
- } catch {
2220
- // Tolerate races; the next symlinkSync may still succeed.
2221
- }
2222
- }
2223
- // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
2224
- // so a concurrent socket-cli invocation may re-create the link between our
2225
- // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
2226
- // other process won the race and left a usable shim in place.
2227
- try {
2228
- fs$1.symlinkSync(python3OnPath, linkPath);
2229
- } catch (e) {
2230
- if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
2231
- throw e;
2232
- }
2233
- }
2234
- const augmentedEnv = {
2235
- ...process.env,
2236
- PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
2237
- };
2238
- cached = {
2239
- augmentedEnv,
2240
- shimDir
2241
- };
2242
- return cached;
2243
- }
2244
-
2245
1944
  // Default per-invocation timeout for bazel queries. Bazel cold-cache starts
2246
1945
  // can take several minutes; 10 minutes is generous while still bounding CI hangs.
2247
1946
  const BAZEL_QUERY_TIMEOUT_MS = 600_000;
@@ -2258,42 +1957,58 @@ function splitBazelFlags(flags) {
2258
1957
  }
2259
1958
  return flags.split(/\s+/).filter(Boolean);
2260
1959
  }
2261
- function buildBazelModShowVisibleReposArgv(opts) {
1960
+
1961
+ // Build the shared startup-flag prefix for any bazel invocation. Centralised
1962
+ // so `--output_user_root` propagates to every spawn — principle 7 of the
1963
+ // Maven design requires per-invocation server isolation across query,
1964
+ // cquery, and `bazel mod` commands alike.
1965
+ function buildStartupFlags(opts) {
2262
1966
  const startup = [];
2263
1967
  if (opts.bazelRc) {
2264
1968
  startup.push(`--bazelrc=${opts.bazelRc}`);
2265
1969
  }
1970
+ if (opts.outputUserRoot) {
1971
+ startup.push(`--output_user_root=${opts.outputUserRoot}`);
1972
+ }
2266
1973
  if (opts.bazelOutputBase) {
2267
1974
  startup.push(`--output_base=${opts.bazelOutputBase}`);
2268
1975
  }
1976
+ return startup;
1977
+ }
1978
+ function buildBazelModShowVisibleReposArgv(opts) {
1979
+ const userFlags = splitBazelFlags(opts.bazelFlags);
1980
+ return [...buildStartupFlags(opts), 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
1981
+ }
1982
+ function buildBazelModShowMavenExtensionArgv(opts) {
2269
1983
  const userFlags = splitBazelFlags(opts.bazelFlags);
2270
- return [...startup, 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
1984
+ return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven',
1985
+ // Belt-and-suspenders output reducer mirroring the PyPI path: bias the
1986
+ // report toward the root module's usages. The authoritative pruning is
1987
+ // the importers-filter applied to the parsed output, so this is not
1988
+ // relied on for correctness.
1989
+ '--extension_usages=<root>', ...userFlags];
2271
1990
  }
2272
1991
  function buildBazelModShowPipExtensionArgv(opts) {
2273
- const startup = [];
2274
- if (opts.bazelRc) {
2275
- startup.push(`--bazelrc=${opts.bazelRc}`);
2276
- }
2277
- if (opts.bazelOutputBase) {
2278
- startup.push(`--output_base=${opts.bazelOutputBase}`);
2279
- }
2280
1992
  const userFlags = splitBazelFlags(opts.bazelFlags);
2281
- return [...startup, 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
1993
+ return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
2282
1994
  }
2283
1995
  function buildBazelArgv(queryStr, opts, output = 'build') {
2284
1996
  // Startup flags MUST precede the `query` subcommand.
2285
1997
  // Bazel argv shape: <startup> query <queryFlags> <invocationFlags> <queryStr> --output=<output> <userFlags>
2286
- const startup = [];
2287
- if (opts.bazelRc) {
2288
- startup.push(`--bazelrc=${opts.bazelRc}`);
2289
- }
2290
- if (opts.bazelOutputBase) {
2291
- startup.push(`--output_base=${opts.bazelOutputBase}`);
2292
- }
2293
1998
  // Keep query output stable and avoid updating Bazel lockfiles while extracting.
2294
1999
  const queryFlags = ['--lockfile_mode=off', '--noshow_progress'];
2295
2000
  const userFlags = splitBazelFlags(opts.bazelFlags);
2296
- return [...startup, 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
2001
+ return [...buildStartupFlags(opts), 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
2002
+ }
2003
+
2004
+ // Lightweight presence-check cquery used by the tri-state probe classifier.
2005
+ // `--keep_going --output=label` keeps it fast even on partial-analysis
2006
+ // repos and avoids paying for `--output=jsonproto` plus
2007
+ // `--proto:output_rule_attrs` (which the heavier metadata extraction in
2008
+ // `bazel-cquery.mts` needs but the probe does not).
2009
+ function buildBazelProbeCqueryArgv(repoName, opts) {
2010
+ const userFlags = splitBazelFlags(opts.bazelFlags);
2011
+ return [...buildStartupFlags(opts), 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, `@${repoName}//...`, '--output=label', '--keep_going', ...userFlags];
2297
2012
  }
2298
2013
  function stringField(value) {
2299
2014
  return typeof value === 'string' ? value : '';
@@ -2407,14 +2122,7 @@ async function runBazelQuery(queryStr, opts, output) {
2407
2122
  }
2408
2123
  }
2409
2124
  }
2410
-
2411
- /**
2412
- * Bzlmod-native visible repository enumeration. This is only a candidate
2413
- * source; callers must still validate each returned apparent repo name with a
2414
- * semantic query for generated ecosystem rules.
2415
- */
2416
- async function runBazelModShowVisibleRepos(opts) {
2417
- const argv = buildBazelModShowVisibleReposArgv(opts);
2125
+ async function runBazelOneShot(argv, opts, step) {
2418
2126
  if (opts.verbose) {
2419
2127
  logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
2420
2128
  }
@@ -2446,364 +2154,830 @@ async function runBazelModShowVisibleRepos(opts) {
2446
2154
  durationMs: Date.now() - startedAt,
2447
2155
  opts,
2448
2156
  result,
2449
- step: 'bazel mod dump_repo_mapping'
2157
+ step
2450
2158
  });
2451
2159
  return result;
2452
2160
  }
2453
2161
 
2454
2162
  /**
2455
- * Bzlmod-native rules_python pip extension usage inspection. This is the
2456
- * authoritative source for root-module pip.parse metadata when Bazel supports
2457
- * the command; callers keep bounded static parsing as fallback.
2163
+ * Bzlmod-native visible repository enumeration. NOTE: only consumed by the
2164
+ * legacy PyPI path; the Maven path uses `runBazelModShowMavenExtension`
2165
+ * instead because `dump_repo_mapping` over-enumerates apparent names that
2166
+ * are not Maven hubs.
2458
2167
  */
2459
- async function runBazelModShowPipExtension(opts) {
2460
- const argv = buildBazelModShowPipExtensionArgv(opts);
2461
- if (opts.verbose) {
2462
- logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
2463
- }
2464
- const startedAt = Date.now();
2465
- let result;
2466
- try {
2467
- const output = await spawn.spawn(opts.bin, argv, {
2468
- cwd: opts.cwd,
2469
- timeout: BAZEL_QUERY_TIMEOUT_MS,
2470
- ...(opts.env ? {
2471
- env: opts.env
2472
- } : {})
2473
- });
2474
- const {
2475
- code,
2476
- stderr,
2477
- stdout
2478
- } = output;
2479
- result = {
2480
- code,
2481
- stdout,
2482
- stderr
2483
- };
2484
- } catch (e) {
2485
- result = normalizeSpawnError(e);
2486
- }
2487
- logBazelTrace({
2488
- argv,
2489
- durationMs: Date.now() - startedAt,
2490
- opts,
2491
- result,
2492
- step: 'bazel mod show_extension rules_python pip'
2493
- });
2494
- return result;
2168
+ async function runBazelModShowVisibleRepos(opts) {
2169
+ return await runBazelOneShot(buildBazelModShowVisibleReposArgv(opts), opts, 'bazel mod dump_repo_mapping');
2495
2170
  }
2496
2171
 
2497
2172
  /**
2498
- * Build a `RepoProbe` (compatible with bazel-repo-discovery) bound to opts.
2499
- * Used by `discoverMavenRepos` to validate candidate Maven repo
2500
- * names against the running workspace.
2173
+ * Bzlmod-native Maven hub enumeration via the rules_jvm_external maven
2174
+ * extension. The text-format report lists every repo the extension
2175
+ * generated; `parseShowExtensionOutput` (bazel-repo-discovery.mts)
2176
+ * extracts the hubs from the `Fetched repositories:` section.
2501
2177
  */
2502
- function buildProbeFor(opts) {
2503
- return async repoName => {
2504
- const queryStr = `kind("jvm_import rule|aar_import rule", @${repoName}//:*)`;
2505
- const result = await runBazelQuery(queryStr, opts);
2506
- return {
2507
- stdout: result.stdout,
2508
- code: result.code
2509
- };
2510
- };
2178
+ async function runBazelModShowMavenExtension(opts) {
2179
+ return await runBazelOneShot(buildBazelModShowMavenExtensionArgv(opts), opts, 'bazel mod show_extension rules_jvm_external maven');
2511
2180
  }
2512
2181
 
2513
2182
  /**
2514
- * Build a `RepoProbe` for validating pip hub candidates.
2515
- * Queries the hub for package targets (e.g. `@<hub>//...`) and returns
2516
- * stdout so the caller can check for `:pkg` labels or alias rules.
2517
- * Does NOT require `pypi_name=` tags in the hub output, because those
2518
- * tags live on spoke repos, not the hub alias layer.
2183
+ * Bzlmod-native rules_python pip extension usage inspection. Used by the
2184
+ * PyPI path; kept here since the argv shape is identical to the maven
2185
+ * variant modulo the extension target.
2186
+ */
2187
+ async function runBazelModShowPipExtension(opts) {
2188
+ return await runBazelOneShot(buildBazelModShowPipExtensionArgv(opts), opts, 'bazel mod show_extension rules_python pip');
2189
+ }
2190
+
2191
+ /**
2192
+ * Build a `RepoProbe` (compatible with bazel-repo-discovery's tri-state
2193
+ * classifier) bound to opts. Runs the lightweight presence-check cquery
2194
+ * `@<name>//... --output=label --keep_going` — cheap enough to attempt
2195
+ * every conventional Maven hub name without triggering `repository_rule`
2196
+ * fetches on undefined names (Exp 3).
2197
+ */
2198
+ function buildMavenProbeFor(opts) {
2199
+ return async repoName => {
2200
+ const argv = buildBazelProbeCqueryArgv(repoName, opts);
2201
+ const result = await runBazelOneShot(argv, opts, `bazel cquery probe @${repoName}`);
2202
+ return {
2203
+ code: result.code,
2204
+ stdout: result.stdout,
2205
+ stderr: result.stderr
2206
+ };
2207
+ };
2208
+ }
2209
+
2210
+ /**
2211
+ * Build a `RepoProbe` for validating pip hub candidates.
2212
+ * Queries the hub for package targets (e.g. `@<hub>//...`) and returns the
2213
+ * full result triple so the caller can check for `:pkg` labels or alias
2214
+ * rules. Does NOT require `pypi_name=` tags in the hub output, because
2215
+ * those tags live on spoke repos, not the hub alias layer.
2519
2216
  */
2520
2217
  function buildPypiProbeFor(opts) {
2521
2218
  return async hubName => {
2522
2219
  const queryStr = `@${hubName}//...`;
2523
2220
  const result = await runBazelQuery(queryStr, opts);
2524
2221
  return {
2222
+ code: result.code,
2525
2223
  stdout: result.stdout,
2526
- code: result.code
2224
+ stderr: result.stderr
2527
2225
  };
2528
2226
  };
2529
2227
  }
2530
2228
 
2531
- // Maximum size (bytes) we will read for any single Bazel workspace file.
2532
- // Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
2533
- const MAX_WORKSPACE_FILE_BYTES$1 = 5 * 1024 * 1024;
2534
-
2535
- // Maximum candidate count we will return (deduped) before truncating.
2536
- // Real repos have <20; this is a hard ceiling against pathological inputs.
2537
- const MAX_CANDIDATES$1 = 256;
2229
+ /**
2230
+ * Per-repo metadata cquery + jsonproto parser for the Maven path.
2231
+ *
2232
+ * Pipeline:
2233
+ * 1. Build a cquery argv targeting `attr("tags", "\bmaven_coordinates=",
2234
+ * @<repo>//...)` plus a union variant for the direct `maven_coordinates`
2235
+ * attribute. `--output=jsonproto` +
2236
+ * `--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps`
2237
+ * keeps the payload small while still surfacing the resolved Maven graph.
2238
+ * 2. Spawn under a caller-supplied `outputUserRoot` so the orchestrator can
2239
+ * reap the server cleanly (`bazel --output_user_root=<this> shutdown`
2240
+ * followed by `rm -rf`). The runner itself never deletes anything —
2241
+ * server lifecycle is the orchestrator's concern.
2242
+ * 3. Parse the jsonproto stream defensively: dispatch on `attribute[].type`
2243
+ * and accept both camelCase (`stringValue`, `stringListValue`) and
2244
+ * snake_case (`string_value`, `string_list_value`) payload keys.
2245
+ * 4. Extract the maven coordinate from the direct `maven_coordinates` attr
2246
+ * when present, else scan `tags` for `maven_coordinates=<G:A:V>`.
2247
+ * 5. Resolve each rule's `deps`/`exports`/`runtime_deps` label edges into
2248
+ * versionless Maven coordinates against this repo's own targets, while
2249
+ * `repoName` is still in scope. Edges that point at a hub-prefixed target
2250
+ * we cannot resolve are reported as `unresolvedLabels` so the caller can
2251
+ * flip the hub partial rather than silently dropping graph edges.
2252
+ * 6. Tag every artifact with `workspace:<rel-path>` + `repo:<name>`
2253
+ * provenance via `sourceRepo`.
2254
+ */
2538
2255
 
2539
- // Regex strategy: anchored, bounded character classes, no nested quantifiers.
2540
- // Match `use_repo(maven, "X", "Y", ...)` with a bounded arg-list window to
2541
- // avoid catastrophic backtracking on hostile input.
2542
-
2543
- // Bzlmod use_repo(maven, "name1", "name2"...).
2544
- // Bounded: matches up to ~4KB of arg list to avoid catastrophic backtracking.
2545
- const USE_REPO_RE = /use_repo\s*\(\s*maven\s*,([^)]{0,4096})\)/g;
2546
- const BAZEL_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
2547
- const BAZEL_REPO_NAME_RE = new RegExp(`^${BAZEL_REPO_NAME_PATTERN}$`);
2548
- // Quoted-name extractor inside the captured argument blob.
2549
- const QUOTED_NAME_RE = new RegExp(`"(${BAZEL_REPO_NAME_PATTERN})"`, 'g');
2550
-
2551
- // Legacy maven_install(name = "X", ...) on a single statement.
2552
- // Match the name= keyword arg specifically; bounded.
2553
- const MAVEN_INSTALL_NAME_RE = new RegExp(`maven_install\\s*\\([^)]{0,8192}?\\bname\\s*=\\s*"(${BAZEL_REPO_NAME_PATTERN})"`, 'g');
2554
- const MAVEN_COORDINATES_MARKER_RE = /\bmaven_coordinates\s*=/;
2256
+ // One Maven artifact recovered from the cquery stream. `ruleKind` is whatever
2257
+ // `ruleClass` jsonproto reports (`jvm_import`, `aar_import`, `java_library`,
2258
+ // `kt_jvm_import`, any future rules_jvm_external rule), so the type is open.
2259
+ // `deps` holds resolved versionless Maven coordinates (the parser resolves the
2260
+ // rule's label edges against this repo's own targets), not raw Bazel labels.
2261
+
2262
+ // Result of parsing one repo's cquery stream: the recovered artifacts (with
2263
+ // resolved coordinate edges in `deps`) plus any hub-prefixed dep labels that
2264
+ // could not be resolved.
2265
+
2266
+ // Maven coordinate token: `g:a:v` (3 parts) or `g:a:v:classifier` /
2267
+ // `g:a:packaging:v` (4-part rules_jvm_external shapes). Tolerant of dots,
2268
+ // dashes, plus, underscores in any part.
2269
+ const MAVEN_COORD_TAG_RE = /^maven_coordinates=(.+)$/;
2270
+
2271
+ // The dep/export/runtime_deps attributes whose label edges encode the
2272
+ // resolved Maven graph. rules_jvm_external writes `jvm_import.deps` (e.g.
2273
+ // `junit` -> `@maven//:org_hamcrest_hamcrest_core`); compile/runtime scopes
2274
+ // surface via `exports`/`runtime_deps`. We union all three.
2275
+ const EDGE_ATTR_NAMES = new Set(['deps', 'exports', 'runtime_deps']);
2276
+
2277
+ // Build the metadata cquery target expression for one repo. The union of
2278
+ // two predicates picks up artifacts that:
2279
+ // - encode the coordinate in the conventional `tags = ["maven_coordinates=..."]`
2280
+ // list (rules_jvm_external's emission for `jvm_import` and friends), or
2281
+ // - declare the coordinate as a direct `maven_coordinates` attribute
2282
+ // (Bazel-native java_library / kt_jvm_import shape).
2283
+ // Note: a `maven_url`-only predicate was intentionally dropped — those rules
2284
+ // carry no coordinate, so selecting them only to discard them downstream is
2285
+ // wasted analysis. If POM-only artifacts ever matter, synthesize
2286
+ // a coordinate from `maven_url` instead of re-adding the selector.
2287
+ function buildMetadataCqueryExpr(repoName) {
2288
+ const r = `@${repoName}//...`;
2289
+ // The `\b` boundary in the tags predicate prevents matches on tag values
2290
+ // like `pre_maven_coordinates=fake`; see todo 2 acceptance test (10).
2291
+ return [`attr("tags", "\\bmaven_coordinates=", ${r})`, `attr("maven_coordinates", ".+", ${r})`].join(' union ');
2292
+ }
2293
+
2294
+ // Build the full cquery argv for a per-repo metadata cquery. Exposed for
2295
+ // argv-shape unit tests without touching `spawn`.
2296
+ function buildMetadataCqueryArgv(repoName, opts) {
2297
+ const startup = [];
2298
+ if (opts.bazelRc) {
2299
+ startup.push(`--bazelrc=${opts.bazelRc}`);
2300
+ }
2301
+ if (opts.outputUserRoot) {
2302
+ startup.push(`--output_user_root=${opts.outputUserRoot}`);
2303
+ }
2304
+ if (opts.bazelOutputBase) {
2305
+ startup.push(`--output_base=${opts.bazelOutputBase}`);
2306
+ }
2307
+ const userFlags = splitBazelFlags(opts.bazelFlags);
2308
+ return [...startup, 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, buildMetadataCqueryExpr(repoName), '--output=jsonproto', '--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps', '--keep_going', ...userFlags];
2309
+ }
2310
+ function readStringAttr(attr) {
2311
+ if (attr.type !== 'STRING') {
2312
+ return undefined;
2313
+ }
2314
+ if (typeof attr.stringValue === 'string') {
2315
+ return attr.stringValue;
2316
+ }
2317
+ if (typeof attr.string_value === 'string') {
2318
+ return attr.string_value;
2319
+ }
2320
+ return undefined;
2321
+ }
2322
+ function readStringListAttr(attr) {
2323
+ if (attr.type !== 'STRING_LIST') {
2324
+ return undefined;
2325
+ }
2326
+ if (Array.isArray(attr.stringListValue)) {
2327
+ return attr.stringListValue;
2328
+ }
2329
+ if (Array.isArray(attr.string_list_value)) {
2330
+ return attr.string_list_value;
2331
+ }
2332
+ return undefined;
2333
+ }
2555
2334
 
2556
- // Reads file contents, refusing files that exceed MAX_WORKSPACE_FILE_BYTES.
2557
- // Returns null when the file is missing, oversized, or unreadable.
2558
- function safeReadFile$1(file) {
2559
- if (!fs$1.existsSync(file)) {
2560
- return null;
2335
+ // Reads a `LABEL_LIST` jsonproto attribute. Bazel serializes label lists into
2336
+ // the same string-list payload (`stringListValue` / `string_list_value`) it
2337
+ // uses for `STRING_LIST`, but tags the attribute `type: "LABEL_LIST"`. The
2338
+ // `deps`/`exports`/`runtime_deps` edge attrs are LABEL_LIST, so a STRING_LIST
2339
+ // reader would silently return nothing and leave the graph empty.
2340
+ function readLabelListAttr(attr) {
2341
+ if (attr.type !== 'LABEL_LIST') {
2342
+ return undefined;
2561
2343
  }
2562
- try {
2563
- const stat = fs$1.statSync(file);
2564
- if (stat.size > MAX_WORKSPACE_FILE_BYTES$1) {
2565
- return null;
2566
- }
2567
- return fs$1.readFileSync(file, 'utf8');
2568
- } catch {
2569
- return null;
2344
+ if (Array.isArray(attr.stringListValue)) {
2345
+ return attr.stringListValue;
2570
2346
  }
2347
+ if (Array.isArray(attr.string_list_value)) {
2348
+ return attr.string_list_value;
2349
+ }
2350
+ return undefined;
2571
2351
  }
2572
2352
 
2573
- // Walks workspace root for legacy Starlark sources we can scan: WORKSPACE
2574
- // (and WORKSPACE.bazel) plus top-level .bzl files. Non-recursive by design;
2575
- // Phase 1 explicitly avoids static Starlark parsing at depth.
2576
- function listLegacyStarlarkFiles$1(cwd) {
2577
- const files = [];
2578
- const candidates = ['WORKSPACE', 'WORKSPACE.bazel'];
2579
- for (const c of candidates) {
2580
- const p = path.join(cwd, c);
2581
- if (fs$1.existsSync(p)) {
2582
- files.push(p);
2583
- }
2353
+ // Strip the trailing version segment from a Maven coordinate, preserving any
2354
+ // packaging/classifier segments. `g:a:v` -> `g:a`,
2355
+ // `g:a:packaging:v` -> `g:a:packaging`,
2356
+ // `g:a:packaging:classifier:v` -> `g:a:packaging:classifier`. Coordinates with
2357
+ // fewer than 3 segments have no version to strip and are returned unchanged.
2358
+ // This matches depscan's `coordinateToParts` keying (position 3 = extension,
2359
+ // position 4 = classifier on the versionless key), so AAR/classifier artifacts
2360
+ // key correctly instead of being mis-keyed as bare `group:artifact` jars.
2361
+ function versionlessCoordinate(coord) {
2362
+ const parts = coord.split(':');
2363
+ if (parts.length < 3) {
2364
+ return coord;
2365
+ }
2366
+ return parts.slice(0, -1).join(':');
2367
+ }
2368
+
2369
+ // Recover the `@<repo>//` prefix from a fully-qualified target label, covering
2370
+ // both apparent (`@maven//:foo`) and bzlmod-canonical
2371
+ // (`@@rules_jvm_external++maven+maven//pkg:foo`) forms. Returns undefined for
2372
+ // labels that aren't repo-qualified (e.g. `:src`).
2373
+ function repoPrefixOfLabel(label) {
2374
+ if (!label.startsWith('@')) {
2375
+ return undefined;
2584
2376
  }
2585
- // Top-level .bzl files only.
2586
- try {
2587
- for (const entry of fs$1.readdirSync(cwd)) {
2588
- if (entry.endsWith('.bzl')) {
2589
- files.push(path.join(cwd, entry));
2377
+ const sep = label.indexOf('//');
2378
+ if (sep < 0) {
2379
+ return undefined;
2380
+ }
2381
+ return label.slice(0, sep + 2);
2382
+ }
2383
+
2384
+ // Strip the leading `@<repo>//:` prefix from a fully-qualified target label
2385
+ // to recover the bare rule name (e.g. `com_google_guava_guava`).
2386
+ function ruleNameFromLabel(label) {
2387
+ const colon = label.lastIndexOf(':');
2388
+ return colon >= 0 ? label.slice(colon + 1) : label;
2389
+ }
2390
+
2391
+ // Extract the maven coordinate from a rule's attributes. Prefers the direct
2392
+ // `maven_coordinates` attribute (Bazel-native shape); falls back to scanning
2393
+ // `tags` for a `maven_coordinates=<G:A:V>` entry (rules_jvm_external shape).
2394
+ // Returns undefined if neither yields a non-empty value.
2395
+ function extractMavenCoordinate(rule) {
2396
+ let coord;
2397
+ for (const attr of rule.attribute ?? []) {
2398
+ if (attr.name === 'maven_coordinates') {
2399
+ const direct = readStringAttr(attr);
2400
+ if (direct && direct.length) {
2401
+ coord = direct;
2402
+ }
2403
+ } else if (attr.name === 'tags') {
2404
+ const tags = readStringListAttr(attr);
2405
+ if (tags) {
2406
+ for (const tag of tags) {
2407
+ const m = MAVEN_COORD_TAG_RE.exec(tag);
2408
+ if (m && !coord) {
2409
+ coord = m[1];
2410
+ }
2411
+ }
2590
2412
  }
2591
2413
  }
2592
- } catch {
2593
- // Ignore unreadable cwd.
2594
2414
  }
2595
- return files;
2415
+ return coord;
2596
2416
  }
2597
2417
 
2598
- // Returns deduplicated, sorted list of items, capped at MAX_CANDIDATES.
2599
- function uniqueSorted(items) {
2600
- const seen = new Set();
2601
- const out = [];
2602
- for (const item of items) {
2603
- if (!seen.has(item)) {
2604
- seen.add(item);
2605
- out.push(item);
2606
- if (out.length >= MAX_CANDIDATES$1) {
2607
- break;
2418
+ // Collect the union of `deps`/`exports`/`runtime_deps` label edges off a rule.
2419
+ function extractEdgeLabels(rule) {
2420
+ const labels = [];
2421
+ for (const attr of rule.attribute ?? []) {
2422
+ if (attr.name && EDGE_ATTR_NAMES.has(attr.name)) {
2423
+ const list = readLabelListAttr(attr);
2424
+ if (list) {
2425
+ labels.push(...list);
2608
2426
  }
2609
2427
  }
2610
2428
  }
2611
- return out.sort();
2429
+ return labels;
2612
2430
  }
2613
- function apparentNameFromJsonValue(value) {
2614
- if (!value || typeof value !== 'object') {
2615
- return undefined;
2616
- }
2617
- const obj = value;
2618
- const direct = obj['apparentName'] ?? obj['apparent_name'];
2619
- if (typeof direct === 'string') {
2620
- return direct;
2431
+
2432
+ // A coordinate-bearing rule recovered from the cquery stream, before its edge
2433
+ // labels are resolved to coordinates.
2434
+
2435
+ // Build the label -> coordinate index from this repo's own coordinate-bearing
2436
+ // targets, keyed by the full emitted rule label (the form dep labels also use,
2437
+ // since both come from the same cquery output). The `:<ruleName>` suffix map
2438
+ // is a fallback for labels that don't full-match.
2439
+ function buildLabelCoordIndex(records) {
2440
+ const fullLabels = new Map();
2441
+ const suffixToCoords = new Map();
2442
+ const hubPrefixes = new Set();
2443
+ for (const rec of records) {
2444
+ const coord = versionlessCoordinate(rec.coord);
2445
+ fullLabels.set(rec.fullLabel, coord);
2446
+ const suffix = `:${rec.ruleName}`;
2447
+ const set = suffixToCoords.get(suffix) ?? new Set();
2448
+ set.add(coord);
2449
+ suffixToCoords.set(suffix, set);
2450
+ const prefix = repoPrefixOfLabel(rec.fullLabel);
2451
+ if (prefix) {
2452
+ hubPrefixes.add(prefix);
2453
+ }
2621
2454
  }
2622
- for (const nested of Object.values(obj)) {
2623
- const found = apparentNameFromJsonValue(nested);
2624
- if (found) {
2625
- return found;
2455
+ return {
2456
+ fullLabels,
2457
+ hubPrefixes,
2458
+ suffixToCoords
2459
+ };
2460
+ }
2461
+ function isHubPrefixed(label, hubPrefixes) {
2462
+ for (const prefix of hubPrefixes) {
2463
+ if (label.startsWith(prefix)) {
2464
+ return true;
2626
2465
  }
2627
2466
  }
2628
- return undefined;
2467
+ return false;
2629
2468
  }
2630
- function apparentNamesFromRepoMapping(value) {
2631
- if (!value || typeof value !== 'object' || Array.isArray(value)) {
2632
- return [];
2469
+ // Resolve one dep label into a versionless coordinate. Classifies into three
2470
+ // buckets (there is deliberately no "seen but coordinate-less" bucket — the
2471
+ // cquery only selects coordinate-bearing targets):
2472
+ // - `coord` — full-label match, unique-suffix fallback, or an already-a-
2473
+ // coordinate `g:a:v` string label.
2474
+ // - `unresolved`— hub-prefixed but resolves to nothing in the selected set
2475
+ // (missing target or ambiguous suffix): a known-dropped edge.
2476
+ // - `drop` — a non-maven target (`@platforms//…`, `:src`): intentional.
2477
+ function resolveDepLabel(label, index) {
2478
+ const full = index.fullLabels.get(label);
2479
+ if (full) {
2480
+ return {
2481
+ coord: full,
2482
+ kind: 'coord'
2483
+ };
2633
2484
  }
2634
- const candidates = [];
2635
- for (const [name, canonicalName] of Object.entries(value)) {
2636
- if (name.startsWith('@') || typeof canonicalName !== 'string') {
2637
- continue;
2638
- }
2639
- if (BAZEL_REPO_NAME_RE.test(name)) {
2640
- candidates.push(name);
2485
+ if (isHubPrefixed(label, index.hubPrefixes)) {
2486
+ // Suffix fallback, but only when the match is unique.
2487
+ const suffix = `:${ruleNameFromLabel(label)}`;
2488
+ const set = index.suffixToCoords.get(suffix);
2489
+ if (set && set.size === 1) {
2490
+ return {
2491
+ coord: [...set][0],
2492
+ kind: 'coord'
2493
+ };
2641
2494
  }
2495
+ // Hub-prefixed but missing or ambiguous — a genuinely dropped edge.
2496
+ return {
2497
+ kind: 'unresolved'
2498
+ };
2642
2499
  }
2643
- return candidates;
2644
- }
2645
- function normalizeRepoName(name) {
2646
- const repo = name.startsWith('@') ? name.slice(1) : name;
2647
- return BAZEL_REPO_NAME_RE.test(repo) ? repo : undefined;
2500
+ // Already-a-coordinate fallback: a bare `g:a:v` string label (not a Bazel
2501
+ // label). Versionless-normalize it. Exclude `//`-prefixed package-relative
2502
+ // labels (`//pkg:thing`) — those are Bazel targets, not coordinates.
2503
+ if (label.includes(':') && !label.startsWith('@') && !label.startsWith(':') && !label.startsWith('//')) {
2504
+ return {
2505
+ coord: versionlessCoordinate(label),
2506
+ kind: 'coord'
2507
+ };
2508
+ }
2509
+ // Non-maven target — intentional drop, not counted.
2510
+ return {
2511
+ kind: 'drop'
2512
+ };
2648
2513
  }
2649
2514
 
2650
- // Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accept the
2651
- // older streamed jsonproto shape in case older Bazel versions or fixtures still
2652
- // return repository records with apparentName fields.
2653
- function parseVisibleRepoCandidates(output) {
2654
- const candidates = [];
2655
- for (const line of output.split(/\r?\n/)) {
2656
- const trimmed = line.trim();
2657
- if (!trimmed) {
2658
- continue;
2659
- }
2660
- try {
2661
- const parsed = JSON.parse(trimmed);
2662
- candidates.push(...apparentNamesFromRepoMapping(parsed));
2663
- const apparentName = apparentNameFromJsonValue(parsed);
2664
- if (apparentName) {
2665
- const repo = normalizeRepoName(apparentName);
2666
- if (repo) {
2667
- candidates.push(repo);
2515
+ // Pure parser for the jsonproto cquery stream. Returns one
2516
+ // `ExtractedArtifact` per rule with a recoverable maven coordinate (its `deps`
2517
+ // holding resolved versionless coordinates) plus the set of hub-prefixed dep
2518
+ // labels that could not be resolved. The `sourceRepo` field carries
2519
+ // `<workspaceRelPath>:<repoName>` provenance when a workspace path was
2520
+ // provided; otherwise just the repo name.
2521
+ function parseCqueryJsonproto(stdout, repoName, workspaceRelPath) {
2522
+ if (!stdout.trim()) {
2523
+ return {
2524
+ artifacts: [],
2525
+ unresolvedLabels: []
2526
+ };
2527
+ }
2528
+ // Bazel 5+ emits a single JSON envelope; older versions stream one target
2529
+ // per line. Try envelope-first, then fall back to per-line.
2530
+ const targets = [];
2531
+ try {
2532
+ const parsed = JSON.parse(stdout);
2533
+ if (parsed.results) {
2534
+ for (const r of parsed.results) {
2535
+ if (r.target) {
2536
+ targets.push(r.target);
2668
2537
  }
2669
2538
  }
2670
- } catch {
2671
- // Ignore malformed lines; caller will fall back to static discovery when
2672
- // no usable visible repo names are found.
2673
2539
  }
2540
+ } catch {
2541
+ // Fall through to per-line scanning.
2674
2542
  }
2675
- return uniqueSorted(candidates);
2676
- }
2677
-
2678
- // Step 1: parse candidate Maven repo names from Bzlmod and legacy entry points.
2679
- function parseMavenRepoCandidates(cwd, verbose) {
2680
- const candidates = [];
2681
-
2682
- // Bzlmod path: parse MODULE.bazel for use_repo(maven, ...).
2683
- const moduleBazel = path.join(cwd, 'MODULE.bazel');
2684
- const moduleContent = safeReadFile$1(moduleBazel);
2685
- if (moduleContent) {
2686
- const bzlmodHits = [];
2687
- for (const m of moduleContent.matchAll(USE_REPO_RE)) {
2688
- const argBlob = m[1] ?? '';
2689
- for (const n of argBlob.matchAll(QUOTED_NAME_RE)) {
2690
- bzlmodHits.push(n[1]);
2543
+ if (!targets.length) {
2544
+ for (const line of stdout.split(/\r?\n/)) {
2545
+ const trimmed = line.trim();
2546
+ if (!trimmed) {
2547
+ continue;
2548
+ }
2549
+ try {
2550
+ const parsed = JSON.parse(trimmed);
2551
+ if (parsed?.rule) {
2552
+ targets.push(parsed);
2553
+ }
2554
+ } catch {
2555
+ // Skip malformed lines.
2691
2556
  }
2692
2557
  }
2693
- candidates.push(...bzlmodHits);
2694
- if (verbose) {
2695
- logger.logger.log('[VERBOSE] discovery: scanned', moduleBazel, `(${bzlmodHits.length} use_repo match(es))`);
2696
- }
2697
- } else if (verbose) {
2698
- logger.logger.log('[VERBOSE] discovery:', moduleBazel, 'not present (skipping bzlmod scan)');
2699
2558
  }
2700
-
2701
- // Legacy path: scan WORKSPACE + top-level .bzl files for maven_install(name=...).
2702
- const legacyFiles = listLegacyStarlarkFiles$1(cwd);
2703
- if (verbose) {
2704
- logger.logger.log('[VERBOSE] discovery: legacy files considered:', legacyFiles.length ? legacyFiles : '(none)');
2705
- }
2706
- for (const file of legacyFiles) {
2707
- const content = safeReadFile$1(file);
2708
- if (!content) {
2559
+ // First pass: collect coordinate-bearing rules with their raw edge labels.
2560
+ const records = [];
2561
+ for (const target of targets) {
2562
+ if (target.type && target.type !== 'RULE') {
2709
2563
  continue;
2710
2564
  }
2711
- const fileHits = [];
2712
- for (const m of content.matchAll(MAVEN_INSTALL_NAME_RE)) {
2713
- fileHits.push(m[1]);
2565
+ const rule = target.rule;
2566
+ if (!rule || !rule.name) {
2567
+ continue;
2714
2568
  }
2715
- candidates.push(...fileHits);
2716
- if (verbose) {
2717
- logger.logger.log('[VERBOSE] discovery: scanned', file, `(${fileHits.length} maven_install name match(es))`);
2569
+ const coord = extractMavenCoordinate(rule);
2570
+ if (!coord) {
2571
+ continue;
2718
2572
  }
2573
+ records.push({
2574
+ coord,
2575
+ edgeLabels: extractEdgeLabels(rule),
2576
+ fullLabel: rule.name,
2577
+ ruleKind: rule.ruleClass ?? rule.rule_class ?? 'unknown',
2578
+ ruleName: ruleNameFromLabel(rule.name)
2579
+ });
2719
2580
  }
2720
- const deduped = uniqueSorted(candidates);
2721
- if (verbose) {
2722
- logger.logger.log('[VERBOSE] discovery: candidate set (pre-seed):', deduped);
2723
- }
2724
- return deduped;
2725
- }
2726
- // Step 2: validate a candidate by running the probe and confirming
2727
- // `maven_coordinates=` appears in stdout (the marker emitted by jvm_import /
2728
- // aar_import rules generated by rules_jvm_external). Returns the probe
2729
- // stdout alongside the verdict so the caller can cache it and reuse it
2730
- // instead of running an identical extraction query.
2731
- async function validateMavenRepo(repoName, probe, verbose) {
2732
- try {
2733
- const result = await probe(repoName);
2734
- if (result.code !== 0) {
2735
- if (verbose) {
2736
- logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (code=${result.code})`);
2581
+ // Second pass: resolve edge labels against this repo's own targets.
2582
+ const index = buildLabelCoordIndex(records);
2583
+ const provenance = workspaceRelPath ? `${workspaceRelPath}:${repoName}` : repoName;
2584
+ const out = [];
2585
+ const unresolved = new Set();
2586
+ for (const rec of records) {
2587
+ const deps = new Set();
2588
+ for (const label of rec.edgeLabels) {
2589
+ const resolution = resolveDepLabel(label, index);
2590
+ if (resolution.kind === 'coord') {
2591
+ deps.add(resolution.coord);
2592
+ } else if (resolution.kind === 'unresolved') {
2593
+ unresolved.add(label);
2737
2594
  }
2738
- return {
2739
- valid: false,
2740
- stdout: result.stdout
2741
- };
2742
2595
  }
2743
- const valid = MAVEN_COORDINATES_MARKER_RE.test(result.stdout);
2744
- if (verbose) {
2745
- logger.logger.log(`[VERBOSE] discovery: probe @${repoName}:`, valid ? 'ACCEPT (maven_coordinates marker found)' : 'REJECT (no maven_coordinates marker in probe stdout)');
2596
+ out.push({
2597
+ deps: [...deps],
2598
+ mavenCoordinates: rec.coord,
2599
+ ruleKind: rec.ruleKind,
2600
+ ruleName: rec.ruleName,
2601
+ sourceRepo: provenance
2602
+ });
2603
+ }
2604
+ return {
2605
+ artifacts: out,
2606
+ unresolvedLabels: [...unresolved]
2607
+ };
2608
+ }
2609
+
2610
+ // Classify the runner's raw outcome. Non-zero exit with `--keep_going` is a
2611
+ // `partial` (some target analysis failed; the successful subset is still in
2612
+ // stdout). A clean exit with unresolved hub-prefixed edges is also `partial`
2613
+ // — the graph is known-incomplete. Zero exit with no parsed artefacts is
2614
+ // `empty`. Spawn timeout is signalled separately; this helper handles the
2615
+ // post-spawn case.
2616
+ function classifyCqueryOutcome(code, artifactCount, unresolvedCount) {
2617
+ if (code === 0) {
2618
+ if (!artifactCount) {
2619
+ return 'empty';
2746
2620
  }
2621
+ return unresolvedCount > 0 ? 'partial' : 'ok';
2622
+ }
2623
+ // --keep_going treats partial-analysis failures with non-zero exit but
2624
+ // still yields the successful subset on stdout. Anything we parsed is
2625
+ // worth keeping.
2626
+ return artifactCount > 0 ? 'partial' : 'error';
2627
+ }
2628
+
2629
+ // Spawn the per-repo metadata cquery, parse the result, and return a
2630
+ // structured outcome. On spawn timeout, return `status: 'timeout'` so the
2631
+ // orchestrator can reap the server (`bazel --output_user_root=<dir>
2632
+ // shutdown` + `rm -rf`) before moving on.
2633
+ async function runMetadataCqueryForRepo(args) {
2634
+ const {
2635
+ opts,
2636
+ repoName,
2637
+ timeoutMs,
2638
+ workspaceRelPath,
2639
+ workspaceRoot
2640
+ } = args;
2641
+ const argv = buildMetadataCqueryArgv(repoName, opts);
2642
+ const startedAt = Date.now();
2643
+ try {
2644
+ const result = await spawn.spawn(opts.bin, argv, {
2645
+ cwd: workspaceRoot,
2646
+ timeout: timeoutMs,
2647
+ ...(opts.env ? {
2648
+ env: opts.env
2649
+ } : {})
2650
+ });
2651
+ const {
2652
+ code,
2653
+ stderr,
2654
+ stdout
2655
+ } = result;
2656
+ const {
2657
+ artifacts,
2658
+ unresolvedLabels
2659
+ } = parseCqueryJsonproto(stdout, repoName, workspaceRelPath);
2747
2660
  return {
2748
- valid,
2749
- stdout: result.stdout
2661
+ artifacts,
2662
+ durationMs: Date.now() - startedAt,
2663
+ repoName,
2664
+ status: classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
2665
+ stderr,
2666
+ unresolvedLabels,
2667
+ workspaceRelPath
2750
2668
  };
2751
2669
  } catch (e) {
2752
- if (verbose) {
2753
- logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (probe threw):`, utils.getErrorCause(e));
2754
- }
2670
+ const err = e;
2671
+ const stdout = typeof err.stdout === 'string' ? err.stdout : '';
2672
+ const stderr = typeof err.stderr === 'string' ? err.stderr : '';
2673
+ const timedOut = err.timedOut === true || err.killed === true || err.signal === 'SIGTERM' || err.signal === 'SIGKILL';
2674
+ const {
2675
+ artifacts,
2676
+ unresolvedLabels
2677
+ } = stdout ? parseCqueryJsonproto(stdout, repoName, workspaceRelPath) : {
2678
+ artifacts: [],
2679
+ unresolvedLabels: []
2680
+ };
2681
+ // The registry `spawn` rejects on a non-zero exit, so a `--keep_going`
2682
+ // cquery that exits non-zero but still emitted a usable subset lands here
2683
+ // — not in the try block. Classify by what we parsed (subset present =>
2684
+ // `partial`, nothing parsed => `error`) so that partial subset is written
2685
+ // best-effort rather than discarded as a hard error. Timeout stays
2686
+ // distinct so the orchestrator can reap the wedged server.
2687
+ const code = typeof err.code === 'number' ? err.code : 1;
2755
2688
  return {
2756
- valid: false,
2757
- stdout: ''
2689
+ artifacts,
2690
+ durationMs: Date.now() - startedAt,
2691
+ repoName,
2692
+ status: timedOut ? 'timeout' : classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
2693
+ stderr,
2694
+ unresolvedLabels,
2695
+ workspaceRelPath
2758
2696
  };
2759
2697
  }
2760
2698
  }
2761
2699
 
2762
- // The default maven_install repo name when no explicit `name=` is given.
2763
- // Included as a seed so repos that define maven_install in a subdirectory
2764
- // .bzl file (not scanned by parseMavenRepoCandidates) are still discovered.
2765
- const DEFAULT_MAVEN_REPO_SEED = 'maven';
2700
+ let probed = false;
2766
2701
 
2767
- // Composition: parse, then validate each candidate; return validated subset
2768
- // as a Map keyed by repo name with the validated probe stdout as value.
2769
- // Map iteration order matches insertion order, so callers that just want
2770
- // the list of repo names can call `Array.from(repos.keys())`. Callers that
2771
- // want to skip re-running the same `bazel query` during extraction can read
2772
- // the cached stdout off the Map and parse it directly.
2773
- //
2774
- // Always seeds with the default `@maven` repo name so repos whose
2775
- // maven_install is defined in a sub-directory .bzl file (not reachable by
2776
- // the top-level static scan) can still be discovered via probe validation.
2777
- async function discoverMavenRepos(cwd, probe, nativeCandidates, verbose) {
2778
- const parsed = nativeCandidates && nativeCandidates.length ? nativeCandidates : parseMavenRepoCandidates(cwd, verbose);
2779
- if (verbose) {
2780
- logger.logger.log('[VERBOSE] discovery: candidate source:', nativeCandidates && nativeCandidates.length ? `bzlmod visible-repos (${nativeCandidates.length})` : `static parse (${parsed.length})`);
2702
+ // Verifies `java` is functional in the current execution environment. Bazel
2703
+ // JVM manifest extraction (rules_jvm_external Coursier) requires a real
2704
+ // JDK; the CLI does not attempt to discover Homebrew installs or mutate the
2705
+ // caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
2706
+ // actionable message so the surfaced error names the prerequisite directly
2707
+ // instead of relying on Bazel's downstream diagnostic.
2708
+ function ensureJavaOnPath() {
2709
+ if (probed) {
2710
+ return;
2781
2711
  }
2782
- // Seed with the default repo name first (so it appears first in output if
2783
- // validated). Dedup via Set before validation.
2784
- const seen = new Set([DEFAULT_MAVEN_REPO_SEED]);
2785
- const candidates = [DEFAULT_MAVEN_REPO_SEED];
2786
- for (const c of parsed) {
2787
- if (!seen.has(c)) {
2788
- seen.add(c);
2789
- candidates.push(c);
2712
+ try {
2713
+ childProcess.execSync('java -version', {
2714
+ stdio: 'ignore'
2715
+ });
2716
+ probed = true;
2717
+ } catch {
2718
+ throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
2719
+ }
2720
+ }
2721
+
2722
+ // Validates that --bazel-output-base is a path we can use as Bazel's output_base.
2723
+ // Throws InputError if:
2724
+ // - the input contains `..` segments (path traversal guard)
2725
+ // - the existing path is not writable
2726
+ // - the path cannot be created (parent not writable)
2727
+ function validateOutputBase(outputBase, cwd) {
2728
+ // Path traversal guard: reject any literal `..` segment in user input.
2729
+ // After path.resolve these are normalised away, so we check the raw input.
2730
+ // Split on both separators. On Windows `path.sep === '\\'`, so
2731
+ // input like `foo/../etc` would not contain a `..` segment under the
2732
+ // platform-specific split, bypassing the guard — yet path.resolve below
2733
+ // would still normalise the `..` and a traversal target could materialise.
2734
+ const segments = outputBase.split(/[\\/]/);
2735
+ if (segments.includes('..')) {
2736
+ throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
2737
+ }
2738
+ const resolved = path.resolve(cwd, outputBase);
2739
+ if (fs$1.existsSync(resolved)) {
2740
+ try {
2741
+ fs$1.accessSync(resolved, fs$1.constants.W_OK);
2742
+ } catch {
2743
+ throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
2790
2744
  }
2745
+ return;
2791
2746
  }
2792
- if (verbose) {
2793
- logger.logger.log('[VERBOSE] discovery: candidate set to probe (seed-first, deduped):', candidates);
2747
+ // Path does not exist yet — try to create it so bazel can populate it.
2748
+ try {
2749
+ fs$1.mkdirSync(resolved, {
2750
+ recursive: true
2751
+ });
2752
+ } catch (e) {
2753
+ throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
2794
2754
  }
2795
- const validated = new Map();
2796
- for (const c of candidates) {
2797
- // eslint-disable-next-line no-await-in-loop
2798
- const result = await validateMavenRepo(c, probe, verbose);
2799
- if (result.valid) {
2800
- validated.set(c, result.stdout);
2755
+ }
2756
+
2757
+ // Stable shim dir name — same process will get the same dir; concurrent
2758
+ // socket-cli invocations on the same machine share it. The symlink target
2759
+ // is whatever python3 resolves to NOW; if PATH changes between invocations
2760
+ // we replace the symlink.
2761
+ const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
2762
+
2763
+ // Cache the result for the lifetime of this process.
2764
+ let cached = null;
2765
+
2766
+ // Safe wrapper around whichBin that returns null instead of throwing when
2767
+ // nothrow semantics are broken in older registry versions (realpath 'null' bug).
2768
+ async function safeWhichBin(name) {
2769
+ try {
2770
+ return (await bin.whichBin(name, {
2771
+ nothrow: true
2772
+ })) ?? null;
2773
+ } catch {
2774
+ return null;
2775
+ }
2776
+ }
2777
+ async function provisionPythonShim() {
2778
+ if (cached) {
2779
+ return cached;
2780
+ }
2781
+ const pythonOnPath = await safeWhichBin('python');
2782
+ if (pythonOnPath) {
2783
+ cached = {
2784
+ augmentedEnv: undefined,
2785
+ shimDir: undefined
2786
+ };
2787
+ return cached;
2788
+ }
2789
+ const python3OnPath = await safeWhichBin('python3');
2790
+ if (!python3OnPath) {
2791
+ throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
2792
+ }
2793
+ const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
2794
+ fs$1.mkdirSync(shimDir, {
2795
+ recursive: true
2796
+ });
2797
+ const linkPath = path.join(shimDir, 'python');
2798
+ // Replace the symlink defensively in case python3's resolved path moved.
2799
+ if (fs$1.existsSync(linkPath)) {
2800
+ try {
2801
+ fs$1.unlinkSync(linkPath);
2802
+ } catch {
2803
+ // Tolerate races; the next symlinkSync may still succeed.
2801
2804
  }
2802
2805
  }
2806
+ // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
2807
+ // so a concurrent socket-cli invocation may re-create the link between our
2808
+ // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
2809
+ // other process won the race and left a usable shim in place.
2810
+ try {
2811
+ fs$1.symlinkSync(python3OnPath, linkPath);
2812
+ } catch (e) {
2813
+ if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
2814
+ throw e;
2815
+ }
2816
+ }
2817
+ const augmentedEnv = {
2818
+ ...process.env,
2819
+ PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
2820
+ };
2821
+ cached = {
2822
+ augmentedEnv,
2823
+ shimDir
2824
+ };
2825
+ return cached;
2826
+ }
2827
+
2828
+ /**
2829
+ * Maven hub repo discovery for `socket manifest bazel`.
2830
+ *
2831
+ * - Bzlmod path: `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
2832
+ * emits a text-format report listing every repo the maven extension generated;
2833
+ * `parseShowExtensionOutput` extracts the names of hub repos (items annotated
2834
+ * with `(imported by ...)`) and skips generated per-artifact repos.
2835
+ * - Legacy WORKSPACE path: probe a fixed list of conventional Maven hub names.
2836
+ * Each probe is classified into `populated` / `empty` / `not-defined`; the
2837
+ * orchestrator keeps only the `populated` candidates.
2838
+ *
2839
+ * No Starlark source is read by this module. All semantic interpretation
2840
+ * comes from Bazel itself (`mod show_extension`, `cquery`).
2841
+ */
2842
+
2843
+ // The importer token Bazel prints for a hub generated for the root module
2844
+ // itself (`(imported by <root>, …)`). Hubs imported only by rulesets
2845
+ // (`rules_jvm_external@6.7`, `stardoc@0.7.2`, …) are build-tooling, not the
2846
+ // user's SBOM, and are filtered out by the orchestrator.
2847
+ const ROOT_MODULE_IMPORTER = '<root>';
2848
+
2849
+ // One hub repo from a `bazel mod show_extension` report: its name plus the
2850
+ // modules that imported it (the `(imported by …)` annotation), merged across
2851
+ // every line the repo appears on.
2852
+
2853
+ // Conventional Maven hub names rules_jvm_external sets up under
2854
+ // WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility
2855
+ // lookup never triggers a `repository_rule` fetch) so the orchestrator can
2856
+ // try them all without paying the cost of a real cquery on undefined repos.
2857
+ const CONVENTIONAL_MAVEN_REPO_NAMES = ['maven', 'maven_install', 'maven_dev', 'unpinned_maven', 'maven_unpinned'];
2858
+
2859
+ // Pattern Bazel emits when a probed repo name isn't visible to the main
2860
+ // module. Used to distinguish `not-defined` (skip silently) from `empty`
2861
+ // (the repo exists but has no targets). Tolerant of either single- or
2862
+ // double-quote styles Bazel has used across versions.
2863
+ const NOT_VISIBLE_STDERR_RE = /No repository visible as ['"]?@?[A-Za-z0-9._+-]+['"]? from/;
2864
+ // Other "repo isn't analyzable" patterns Bazel emits, especially under
2865
+ // WORKSPACE mode and on Bazel 6.x. They all map to `not-defined`.
2866
+ const NO_SUCH_PACKAGE_STDERR_RE = /no such package ['"`]?@/;
2867
+ // Pattern emitted when a repo IS visible / defined but yields no targets.
2868
+ // `--keep_going` plus `'no targets found beneath'` is the empty-but-defined
2869
+ // signature. The orchestrator treats `empty` and `not-defined` uniformly
2870
+ // as skips.
2871
+ const NO_TARGETS_STDERR_RE = /no targets found beneath/i;
2872
+ // Anchor for the maven extension's section header in
2873
+ // `bazel mod show_extension` output. Tolerant of the canonical-name form
2874
+ // Bazel uses across versions (`@@rules_jvm_external+`, `@@rules_jvm_external~`,
2875
+ // or any future separator) and of trailing trailing whitespace.
2876
+ const SHOW_EXT_SECTION_HEADER_RE = /^## @@?[A-Za-z0-9._+~-]+\/\/:extensions\.bzl%maven:\s*$/m;
2877
+ // Bullet within `Fetched repositories:` that names a hub repo (one with an
2878
+ // `(imported by ...)` annotation). Bullets without that annotation are
2879
+ // generated per-artifact repos and are skipped.
2880
+ const FETCHED_HUB_BULLET_RE = /^ {2}- (?<name>\S+) \(imported by (?<importers>[^)]+)\)\s*$/;
2881
+
2882
+ // Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
2883
+ // stdout. Returns the hub repos listed under `Fetched repositories:` — i.e.
2884
+ // items annotated with `(imported by ...)` — each carrying the set of modules
2885
+ // that imported it. Generated per-artifact repos (no annotation) are skipped.
2886
+ // A repo can legitimately appear on multiple lines with different importers,
2887
+ // so importers are merged per repo (name-only dedupe would lose that, and the
2888
+ // importers data is what lets the orchestrator keep only root-imported hubs).
2889
+ // Output is sorted by name. Tolerant of `DEBUG:` / `WARNING:` lines from
2890
+ // Bazel; the section header `## @@<canonical>//:extensions.bzl%maven:` is the
2891
+ // anchor.
2892
+ function parseShowExtensionOutput(stdout) {
2893
+ const headerMatch = SHOW_EXT_SECTION_HEADER_RE.exec(stdout);
2894
+ if (!headerMatch) {
2895
+ return [];
2896
+ }
2897
+ const tail = stdout.slice(headerMatch.index + headerMatch[0].length);
2898
+ // Find the `Fetched repositories:` line within the section.
2899
+ const fetchedIdx = tail.indexOf('\nFetched repositories:');
2900
+ if (fetchedIdx === -1) {
2901
+ return [];
2902
+ }
2903
+ const afterFetched = tail.slice(fetchedIdx + '\nFetched repositories:'.length);
2904
+ const importersByName = new Map();
2905
+ for (const line of afterFetched.split(/\r?\n/)) {
2906
+ // Stop at the next `## ` section header (some Bazel versions print
2907
+ // multiple extensions in one report).
2908
+ if (line.startsWith('## ')) {
2909
+ break;
2910
+ }
2911
+ // Empty line is fine; bullet that doesn't match is fine (it's an
2912
+ // un-imported generated artifact repo) — skip it.
2913
+ const match = FETCHED_HUB_BULLET_RE.exec(line);
2914
+ if (!match || !match.groups) {
2915
+ continue;
2916
+ }
2917
+ const name = match.groups['name'];
2918
+ if (!name) {
2919
+ continue;
2920
+ }
2921
+ const importers = importersByName.get(name) ?? new Set();
2922
+ for (const importer of (match.groups['importers'] ?? '').split(',').map(s => s.trim()).filter(Boolean)) {
2923
+ importers.add(importer);
2924
+ }
2925
+ importersByName.set(name, importers);
2926
+ }
2927
+ return [...importersByName.keys()].sort().map(name => ({
2928
+ importers: [...importersByName.get(name)].sort(),
2929
+ name
2930
+ }));
2931
+ }
2932
+
2933
+ // Classify a raw probe result into one of three states. The probe contract
2934
+ // is whatever the runner emits — typically a lightweight
2935
+ // `cquery '@<name>//...' --keep_going --output=label`. The orchestrator
2936
+ // treats `empty` and `not-defined` uniformly as no-ops; the distinction
2937
+ // is preserved for verbose-mode diagnostics.
2938
+ function classifyProbeResult(result) {
2939
+ // A successful probe with any stdout means the repo exists AND has at
2940
+ // least one target — populated.
2941
+ if (result.code === 0 && result.stdout.trim().length > 0) {
2942
+ return 'populated';
2943
+ }
2944
+ // Code 1 with the "no repository visible" message → undefined.
2945
+ if (result.code !== 0 && (NOT_VISIBLE_STDERR_RE.test(result.stderr) || NO_SUCH_PACKAGE_STDERR_RE.test(result.stderr))) {
2946
+ return 'not-defined';
2947
+ }
2948
+ // Code 1 with the "no targets" message → defined but empty.
2949
+ if (result.code !== 0 && NO_TARGETS_STDERR_RE.test(result.stderr)) {
2950
+ return 'empty';
2951
+ }
2952
+ // Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo
2953
+ // name isn't declared (Exp 5c). Treat as not-defined.
2954
+ if (result.code === 0) {
2955
+ return 'not-defined';
2956
+ }
2957
+ // Code 1 with no recognizable message: be conservative and call it
2958
+ // not-defined so the orchestrator skips it without erroring the workspace.
2959
+ return 'not-defined';
2960
+ }
2961
+
2962
+ // Convenience: probe a single candidate and return its classified status,
2963
+ // with optional verbose logging. Pure orchestration around `probe` +
2964
+ // `classifyProbeResult`; isolated so the test suite can exercise the
2965
+ // logging contract independently of the runner implementation.
2966
+ async function probeCandidate(repoName, probe, verbose) {
2967
+ let result;
2968
+ try {
2969
+ result = await probe(repoName);
2970
+ } catch (e) {
2971
+ if (verbose) {
2972
+ logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${e instanceof Error ? e.message : String(e)})`);
2973
+ }
2974
+ return 'not-defined';
2975
+ }
2976
+ const status = classifyProbeResult(result);
2803
2977
  if (verbose) {
2804
- logger.logger.log('[VERBOSE] discovery: validated repos:', Array.from(validated.keys()));
2978
+ logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: ${status}`);
2805
2979
  }
2806
- return validated;
2980
+ return status;
2807
2981
  }
2808
2982
 
2809
2983
  // Detects whether the given Bazel workspace uses Bzlmod (MODULE.bazel),
@@ -2833,6 +3007,164 @@ function getBazelInvocationFlags(mode) {
2833
3007
  return ['--noenable_bzlmod', '--enable_workspace'];
2834
3008
  }
2835
3009
 
3010
+ /**
3011
+ * Walk the directory tree rooted at `cwd` and return every directory that
3012
+ * looks like a Bazel workspace root — i.e. contains `MODULE.bazel`,
3013
+ * `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots
3014
+ * (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example
3015
+ * `examples/<name>/MODULE.bazel`); the per-workspace algorithm in the
3016
+ * orchestrator runs once per discovered root.
3017
+ *
3018
+ * The walker is dependency-injected with the directory-prune policy:
3019
+ * callers pass the set of basenames and basename prefixes the walk must
3020
+ * refuse to descend into. This module intentionally hardcodes none of
3021
+ * the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose
3022
+ * the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the
3023
+ * Bazel-specific bits (`bazel-*` output_base symlinks,
3024
+ * `.socket-auto-manifest`).
3025
+ *
3026
+ * Discovery is bounded-but-complete: the walk visits directories in
3027
+ * deterministic (sorted) order under a single visited-directory budget
3028
+ * (`MAX_WALK_DIRS`) as the only pathological-input / symlink-loop guard —
3029
+ * there is no depth cap, because the deepest workspace marker observed across
3030
+ * the OSS corpus (9) sat *below* the old depth-8 ceiling, so that ceiling
3031
+ * silently dropped real first-party modules. All roots found within the
3032
+ * budget are collected, sorted, then capped to `MAX_WORKSPACE_ROOTS`. Both
3033
+ * the cap and a budget exhaustion `logger.warn` UNCONDITIONALLY (a missed
3034
+ * module silently drops its Maven hub, so truncation must never be silent).
3035
+ */
3036
+
3037
+
3038
+ // Hard ceiling on workspace roots; 16 sits well above realistic monorepo
3039
+ // counts while tightening the guard against pathological inputs.
3040
+ const MAX_WORKSPACE_ROOTS = 16;
3041
+ // Hard ceiling on directories visited. The sole guard against pathological
3042
+ // inputs and symlink loops (a loop consumes the budget and stops). A few
3043
+ // thousand is far above any realistic first-party tree once the prune policy
3044
+ // has removed vendored/output dirs.
3045
+ const DEFAULT_MAX_WALK_DIRS = 5_000;
3046
+ // Files whose presence promotes a directory to a workspace root.
3047
+ const WORKSPACE_MARKER_FILES = new Set(['MODULE.bazel', 'WORKSPACE', 'WORKSPACE.bazel']);
3048
+ const EMPTY_SET = new Set();
3049
+ const EMPTY_ARRAY = [];
3050
+
3051
+ // Walks the tree rooted at `opts.cwd` and returns absolute paths to every
3052
+ // directory that contains at least one workspace marker file. Output is
3053
+ // sorted for determinism and capped at MAX_WORKSPACE_ROOTS.
3054
+ function findWorkspaceRoots(opts) {
3055
+ const {
3056
+ cwd,
3057
+ verbose
3058
+ } = opts;
3059
+ const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET;
3060
+ const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY;
3061
+ const maxWalkDirs = opts.maxWalkDirs ?? DEFAULT_MAX_WALK_DIRS;
3062
+ const roots = [];
3063
+ // LIFO stack; children are pushed in reverse-sorted order so they pop in
3064
+ // ascending order, giving a deterministic traversal.
3065
+ const stack = [cwd];
3066
+ let dirsVisited = 0;
3067
+ let budgetHit = false;
3068
+ while (stack.length) {
3069
+ if (dirsVisited >= maxWalkDirs) {
3070
+ budgetHit = true;
3071
+ break;
3072
+ }
3073
+ const dir = stack.pop();
3074
+ if (dir === undefined) {
3075
+ break;
3076
+ }
3077
+ dirsVisited += 1;
3078
+ let entries;
3079
+ try {
3080
+ entries = fs$1.readdirSync(dir, {
3081
+ withFileTypes: true
3082
+ });
3083
+ } catch {
3084
+ continue;
3085
+ }
3086
+ let isWorkspaceRoot = false;
3087
+ const childNames = [];
3088
+ for (const entry of entries) {
3089
+ if (entry.isFile()) {
3090
+ if (WORKSPACE_MARKER_FILES.has(entry.name)) {
3091
+ isWorkspaceRoot = true;
3092
+ }
3093
+ continue;
3094
+ }
3095
+ if (!entry.isDirectory()) {
3096
+ continue;
3097
+ }
3098
+ const name = entry.name;
3099
+ if (ignoreDirNames.has(name)) {
3100
+ continue;
3101
+ }
3102
+ let pruned = false;
3103
+ for (const prefix of ignoreDirPrefixes) {
3104
+ if (name.startsWith(prefix)) {
3105
+ pruned = true;
3106
+ break;
3107
+ }
3108
+ }
3109
+ if (!pruned) {
3110
+ childNames.push(name);
3111
+ }
3112
+ }
3113
+ if (isWorkspaceRoot) {
3114
+ roots.push(dir);
3115
+ }
3116
+ // Descend regardless of whether this dir is itself a root — nested
3117
+ // workspaces are common (root MODULE.bazel + examples/*/MODULE.bazel).
3118
+ childNames.sort();
3119
+ for (let i = childNames.length - 1; i >= 0; i -= 1) {
3120
+ stack.push(path.join(dir, childNames[i]));
3121
+ }
3122
+ }
3123
+ roots.sort();
3124
+ const kept = roots.slice(0, MAX_WORKSPACE_ROOTS);
3125
+ const droppedCount = roots.length - kept.length;
3126
+ if (budgetHit) {
3127
+ // The dir budget was exhausted, so an unknown number of roots may be
3128
+ // undiscovered — surface it unconditionally.
3129
+ logger.logger.warn(`Bazel workspace walk hit the ${maxWalkDirs}-directory budget; some workspaces beneath ${cwd} may be undiscovered (found ${roots.length}, kept ${kept.length}).`);
3130
+ }
3131
+ if (droppedCount > 0) {
3132
+ // The cap dropped roots. Exact count when the full tree was walked; "≥"
3133
+ // when the budget cut the walk short (more roots may exist).
3134
+ const qualifier = budgetHit ? '≥' : '';
3135
+ logger.logger.warn(`Bazel workspace walk found ${roots.length} workspace root(s); capping at ${MAX_WORKSPACE_ROOTS} and dropping ${qualifier}${droppedCount}.`);
3136
+ if (verbose) {
3137
+ logger.logger.log('[VERBOSE] workspace walker: dropped roots:', roots.slice(MAX_WORKSPACE_ROOTS));
3138
+ }
3139
+ }
3140
+ return kept;
3141
+ }
3142
+
3143
+ // Best-effort-per-hub produces four distinct run outcomes a single `ok`
3144
+ // boolean would conflate:
3145
+ // - `complete` — every discovered hub extracted cleanly; >=1 manifest.
3146
+ // - `partial` — >=1 manifest written, but at least one hub failed,
3147
+ // timed out, or dropped edges. Worth uploading, but the
3148
+ // graph is known-incomplete.
3149
+ // - `noEcosystem` — no Bazel/Maven found. Whether that's an error is
3150
+ // caller-dependent (tolerated in auto mode, error in
3151
+ // explicit mode), so it must NOT be flattened into the
3152
+ // failure states.
3153
+ // - `hardFailure` — zero manifests written and it wasn't `noEcosystem`
3154
+ // (discovery threw, or every discovered hub failed).
3155
+ // Always an error for every caller.
3156
+
3157
+ const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000;
3158
+ const REAP_TIMEOUT_MS = 10_000;
3159
+
3160
+ // Default directory-prune policy for the Bazel workspace walk. The
3161
+ // orchestrator applies this unconditionally so neither caller (the explicit
3162
+ // `socket manifest bazel` command nor `--auto-manifest`) can omit it and let
3163
+ // the walk descend `node_modules`/VCS/vendored trees. Callers may
3164
+ // pass extra names/prefixes to EXTEND, not replace, this set.
3165
+ const DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES = new Set([...utils.IGNORED_DIRS, '.hg', '.idea', '.pnpm-store', '.socket-auto-manifest', '.svn', '.vscode']);
3166
+ // Bazel's `bazel-*` output_base symlinks.
3167
+ const DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES = ['bazel-'];
2836
3168
  // Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }.
2837
3169
  // Returns null on malformed input.
2838
3170
  function splitCoord(c) {
@@ -2845,213 +3177,304 @@ function splitCoord(c) {
2845
3177
  version: c.slice(lastColon + 1)
2846
3178
  };
2847
3179
  }
2848
- // Builds a lookup from rule label suffix (e.g. ":com_google_guava_guava") to canonical coord.
2849
- function buildLabelToCoordMap(artifacts) {
2850
- const fullLabels = new Map();
2851
- const suffixToCoords = new Map();
2852
- for (const a of artifacts) {
2853
- // The rule name (e.g. "com_google_guava_guava") becomes the path under @<repo>//:<name>.
2854
- // We record by ":<name>" suffix so we can look up regardless of repo name.
2855
- const suffix = `:${a.ruleName}`;
2856
- const coords = suffixToCoords.get(suffix) ?? new Set();
2857
- coords.add(a.mavenCoordinates);
2858
- suffixToCoords.set(suffix, coords);
2859
- if (a.sourceRepo) {
2860
- fullLabels.set(`@${a.sourceRepo}//${suffix}`, a.mavenCoordinates);
2861
- }
3180
+ // A versionless `maven_install.json` key must have 2-4 non-empty
3181
+ // colon-separated segments (`g:a`, `g:a:ext`, `g:a:ext:classifier`) — exactly
3182
+ // the range depscan's `coordinateToParts` accepts. A key outside that range,
3183
+ // or with an empty segment, is rejected after upload, so reject it locally.
3184
+ function isValidVersionlessKey(key) {
3185
+ const parts = key.split(':');
3186
+ if (parts.length < 2 || parts.length > 4) {
3187
+ return false;
2862
3188
  }
2863
- return {
2864
- fullLabels,
2865
- suffixToCoords
2866
- };
3189
+ return parts.every(p => p.length > 0);
2867
3190
  }
2868
3191
 
2869
- // Converts a Bazel dep label to a Maven coordinate, using the label-to-coord map.
2870
- // Returns null when the label is not recognised.
2871
- function depLabelToCoord(label, labelToCoord) {
2872
- // label may be "@maven//:com_google_guava_failureaccess".
2873
- const colon = label.lastIndexOf(':');
2874
- if (colon < 0) {
2875
- return null;
2876
- }
2877
- const fullMatch = labelToCoord.fullLabels.get(label);
2878
- if (fullMatch) {
2879
- return fullMatch;
2880
- }
2881
- const key = label.slice(colon);
2882
- const suffixMatches = labelToCoord.suffixToCoords.get(key);
2883
- if (!suffixMatches) {
2884
- return null;
2885
- }
2886
- if (suffixMatches.size > 1) {
2887
- throw new Error(`Ambiguous Bazel dependency label ${label} maps rule suffix ${key} to multiple Maven coordinates: ${Array.from(suffixMatches).sort().join(', ')}. The generated maven_install.json cannot resolve this dependency label losslessly.`);
2888
- }
2889
- return Array.from(suffixMatches)[0] ?? null;
2890
- }
3192
+ // Builds a modern `maven_install.json` from artifacts whose `deps` already
3193
+ // hold resolved versionless coordinates (the cquery parser resolves edge
3194
+ // labels against each repo's own targets while `repoName` is in scope, so no
3195
+ // label-to-coordinate resolution happens here). Keys are versionless `g:a`
3196
+ // (preserving any packaging/classifier segments); dependency values are the
3197
+ // resolved coordinate sets.
3198
+ //
3199
+ // Two-phase so the emitted graph is internally closed and survives the server
3200
+ // parser, which rejects malformed coordinates and edges referencing unlisted
3201
+ // artifacts (and can abort after enough errors). Phase 1 builds (and
3202
+ // validates) the artifact keys; phase 2 emits only edges whose source AND
3203
+ // target are valid emitted keys. Anything dropped is reported so the caller
3204
+ // can flip the hub partial — never silently lost post-upload.
2891
3205
  function normalizeToMavenInstallJson(artifacts) {
2892
- const labelToCoord = buildLabelToCoordMap(artifacts);
2893
3206
  const out = {
2894
3207
  artifacts: {},
2895
3208
  dependencies: {}
2896
3209
  };
3210
+ const droppedArtifacts = [];
3211
+ const prunedEdges = [];
2897
3212
  const versionsByGroupArtifact = new Map();
2898
- const dependencySets = new Map();
3213
+ // Phase 1: artifacts. Validate each key (shape + non-empty version) before
3214
+ // accepting it; record the set of valid emitted keys.
3215
+ const depsByKey = new Map();
2899
3216
  for (const a of artifacts) {
2900
3217
  const split = splitCoord(a.mavenCoordinates);
2901
3218
  if (!split) {
3219
+ droppedArtifacts.push(a.mavenCoordinates);
3220
+ continue;
3221
+ }
3222
+ const key = split.groupArtifact;
3223
+ // A `g:a:` coordinate strips to the valid-shaped key `g:a` but an empty
3224
+ // version, which the server rejects — require both.
3225
+ if (!isValidVersionlessKey(key) || !split.version) {
3226
+ droppedArtifacts.push(a.mavenCoordinates);
2902
3227
  continue;
2903
3228
  }
2904
- const existingVersion = versionsByGroupArtifact.get(split.groupArtifact);
3229
+ const existingVersion = versionsByGroupArtifact.get(key);
2905
3230
  if (existingVersion && existingVersion !== split.version) {
2906
- throw new Error(`Conflicting versions for ${split.groupArtifact}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
3231
+ throw new Error(`Conflicting versions for ${key}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
2907
3232
  }
2908
3233
  if (!existingVersion) {
2909
- versionsByGroupArtifact.set(split.groupArtifact, split.version);
2910
- out.artifacts[split.groupArtifact] = {
2911
- shasums: a.mavenSha256 ? {
2912
- jar: a.mavenSha256
2913
- } : {},
2914
- version: split.version
2915
- };
2916
- } else if (a.mavenSha256 && !out.artifacts[split.groupArtifact]?.shasums.jar) {
2917
- out.artifacts[split.groupArtifact] = {
2918
- shasums: {
2919
- jar: a.mavenSha256
2920
- },
3234
+ versionsByGroupArtifact.set(key, split.version);
3235
+ out.artifacts[key] = {
2921
3236
  version: split.version
2922
3237
  };
2923
3238
  }
2924
- // Dependency keys in maven_install.json use "g:a" (no version),
2925
- // matching the canonical rules_jvm_external lockfile shape.
2926
- // Only emit an entry when there are actual dependencies (lockfile omits
2927
- // artifacts with an empty dep list).
2928
- const depKey = split.groupArtifact;
2929
- const depCoords = dependencySets.get(depKey) ?? new Set();
2930
- for (const depLabel of a.deps) {
2931
- // First try our rule-label lookup (the common case for --output=build text).
2932
- const c = depLabelToCoord(depLabel, labelToCoord);
2933
- if (c) {
2934
- // c is "g:a:v"; strip the version to produce "g:a" per lockfile shape.
2935
- const cs = splitCoord(c);
2936
- depCoords.add(cs ? cs.groupArtifact : c);
2937
- } else if (depLabel.includes(':') && !depLabel.startsWith('@') && !depLabel.startsWith(':')) {
2938
- // unsorted_deps.json deps may be "g:a:v" in older files or
2939
- // "g:a" in v2 lock-file-shaped maps. Strip only when a version is
2940
- // present.
2941
- const parts = depLabel.split(':');
2942
- depCoords.add(parts.length >= 3 ? parts.slice(0, -1).join(':') : depLabel);
2943
- }
3239
+ // Accumulate the candidate edge set keyed by "g:a" (no version), matching
3240
+ // the canonical rules_jvm_external lockfile shape. Pruned against valid
3241
+ // keys in phase 2.
3242
+ const depCoords = depsByKey.get(key) ?? new Set();
3243
+ for (const depCoord of a.deps) {
3244
+ depCoords.add(depCoord);
2944
3245
  }
2945
3246
  if (depCoords.size) {
2946
- dependencySets.set(depKey, depCoords);
3247
+ depsByKey.set(key, depCoords);
2947
3248
  }
2948
3249
  }
2949
- for (const [depKey, depCoords] of dependencySets) {
2950
- out.dependencies[depKey] = Array.from(depCoords);
3250
+ // Phase 2: edges. Emit only where both source and target are emitted keys.
3251
+ const validKeys = new Set(Object.keys(out.artifacts));
3252
+ for (const [key, depCoords] of depsByKey) {
3253
+ if (!validKeys.has(key)) {
3254
+ for (const target of depCoords) {
3255
+ prunedEdges.push(`${key} -> ${target}`);
3256
+ }
3257
+ continue;
3258
+ }
3259
+ const kept = [];
3260
+ for (const target of depCoords) {
3261
+ if (validKeys.has(target)) {
3262
+ kept.push(target);
3263
+ } else {
3264
+ prunedEdges.push(`${key} -> ${target}`);
3265
+ }
3266
+ }
3267
+ if (kept.length) {
3268
+ out.dependencies[key] = kept;
3269
+ }
3270
+ }
3271
+ return {
3272
+ droppedArtifacts,
3273
+ json: out,
3274
+ prunedEdges
3275
+ };
3276
+ }
3277
+
3278
+ // Cross-workspace dedup keyed on the full Maven coordinate string
3279
+ // (`g:a:v[:classifier]`). The metadata cquery emits one entry per rule,
3280
+ // so the same `androidx.annotation:annotation:1.8.2` can show up in
3281
+ // `examples/dagger/@maven` and `examples/ksp/@maven` in rules_kotlin —
3282
+ // downstream only needs it once. Each occurrence resolves its edges against
3283
+ // its own repo's targets, so the resolved `deps` can legitimately differ
3284
+ // between occurrences; union them rather than keeping only the first, or
3285
+ // real graph edges would be silently dropped.
3286
+ function dedupArtifactsByCoord(artifacts) {
3287
+ const byCoord = new Map();
3288
+ for (const a of artifacts) {
3289
+ const existing = byCoord.get(a.mavenCoordinates);
3290
+ if (!existing) {
3291
+ byCoord.set(a.mavenCoordinates, {
3292
+ ...a,
3293
+ deps: [...a.deps]
3294
+ });
3295
+ continue;
3296
+ }
3297
+ const merged = new Set(existing.deps);
3298
+ for (const dep of a.deps) {
3299
+ merged.add(dep);
3300
+ }
3301
+ existing.deps = [...merged];
3302
+ }
3303
+ return [...byCoord.values()];
3304
+ }
3305
+ // Dedup, normalize, and write one hub's manifest. The path mirrors the
3306
+ // workspace tree: `<manifestDir>/<relPath>/<name>.json`, where `<name>` is
3307
+ // `maven_install.json` for a hub literally named `maven`, else
3308
+ // `<hub>_maven_install.json` (matching the server walker's
3309
+ // `**/*_maven_install.json` glob). The root workspace (`relPath===''`) writes
3310
+ // at `<manifestDir>/<name>.json`. Returns `manifestPath: undefined` (no file
3311
+ // written) when the hub yields zero valid artifacts, plus the dropped/pruned
3312
+ // accounting so the caller can flip the hub partial.
3313
+ async function writeHubManifest(args) {
3314
+ const {
3315
+ artifacts,
3316
+ manifestDir,
3317
+ relPath,
3318
+ repoName
3319
+ } = args;
3320
+ const deduped = dedupArtifactsByCoord(artifacts);
3321
+ const {
3322
+ droppedArtifacts,
3323
+ json,
3324
+ prunedEdges
3325
+ } = normalizeToMavenInstallJson(deduped);
3326
+ const artifactCount = Object.keys(json.artifacts).length;
3327
+ if (!artifactCount) {
3328
+ return {
3329
+ artifactCount: 0,
3330
+ droppedArtifacts,
3331
+ manifestPath: undefined,
3332
+ prunedEdges
3333
+ };
3334
+ }
3335
+ const fileName = repoName === 'maven' ? 'maven_install.json' : `${repoName}_maven_install.json`;
3336
+ const hubDir = relPath ? path.join(manifestDir, relPath) : manifestDir;
3337
+ fs$1.mkdirSync(hubDir, {
3338
+ recursive: true
3339
+ });
3340
+ const manifestPath = path.join(hubDir, fileName);
3341
+ await fs$1.promises.writeFile(manifestPath, JSON.stringify(json, null, 2), 'utf8');
3342
+ return {
3343
+ artifactCount,
3344
+ droppedArtifacts,
3345
+ manifestPath,
3346
+ prunedEdges
3347
+ };
3348
+ }
3349
+
3350
+ // Build the per-workspace candidate Maven hub list.
3351
+ //
3352
+ // Bzlmod mode: trust `bazel mod show_extension` as the authoritative hub
3353
+ // list, keeping only hubs imported by <root>.
3354
+ //
3355
+ // WORKSPACE mode: no equivalent of `show_extension`, so probe the
3356
+ // conventional hub names.
3357
+ //
3358
+ // On `show_extension` failure (or a parse that yields zero root hubs) under
3359
+ // Bzlmod, fall through to the conventional-name probe so partial discovery
3360
+ // is still possible.
3361
+ async function discoverCandidatesForWorkspace(workspaceRoot, mode, queryOpts, verbose) {
3362
+ const candidates = [];
3363
+ let showExtensionSucceeded = false;
3364
+ if (mode.bzlmod) {
3365
+ const extResult = await runBazelModShowMavenExtension(queryOpts);
3366
+ if (extResult.code === 0) {
3367
+ // The maven extension generates a hub for EVERY module that uses it —
3368
+ // the root's own `maven.install` hub(s) plus the rulesets' internal
3369
+ // hubs (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs
3370
+ // imported by <root>; the rest are build-tooling, not the user's SBOM.
3371
+ const entries = parseShowExtensionOutput(extResult.stdout);
3372
+ const kept = entries.filter(e => e.importers.includes(ROOT_MODULE_IMPORTER));
3373
+ candidates.push(...kept.map(e => e.name));
3374
+ // Gate the probe fallback on the KEPT count, not the raw parse: a
3375
+ // report listing only transitive ruleset hubs (all filtered out) must
3376
+ // still fall through to conventional probing so a root @maven isn't
3377
+ // missed.
3378
+ showExtensionSucceeded = kept.length > 0;
3379
+ if (verbose) {
3380
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension kept root hub(s)`, kept.map(e => e.name));
3381
+ for (const dropped of entries) {
3382
+ if (!dropped.importers.includes(ROOT_MODULE_IMPORTER)) {
3383
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: dropped ${dropped.name} — imported by ${dropped.importers.join(', ')}, not ${ROOT_MODULE_IMPORTER}`);
3384
+ }
3385
+ }
3386
+ }
3387
+ } else if (verbose) {
3388
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`);
3389
+ }
3390
+ }
3391
+ // Probe candidates the show_extension path could not authoritatively
3392
+ // enumerate: when it produced root hubs, probe nothing extra; otherwise
3393
+ // (WORKSPACE mode, a failed show_extension, or a parse with zero root
3394
+ // hubs) probe the conventional hub names.
3395
+ const seen = new Set(candidates);
3396
+ const toProbe = (showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES]).filter(name => !seen.has(name));
3397
+ if (!toProbe.length) {
3398
+ return candidates;
3399
+ }
3400
+ const probe = buildMavenProbeFor(queryOpts);
3401
+ for (const name of toProbe) {
3402
+ // eslint-disable-next-line no-await-in-loop
3403
+ const status = await probeCandidate(name, probe, verbose);
3404
+ if (status === 'populated') {
3405
+ candidates.push(name);
3406
+ seen.add(name);
3407
+ }
2951
3408
  }
2952
- return out;
3409
+ return candidates;
2953
3410
  }
2954
3411
 
2955
- // Resolves the bazel `external/` dir for the given workspace.
2956
- //
2957
- // Bazel's `bazel-out/` convenience symlink points at
2958
- // `<output_base>/execroot/<workspace>/bazel-out/`; the `external/` dir we
2959
- // want is at `<output_base>/external/`. `path.join` is purely lexical and
2960
- // would collapse `bazel-out/..` to the cwd itself, which is the wrong place
2961
- // Resolve the symlink at the filesystem level and walk up to
2962
- // `<output_base>` instead.
2963
- function bazelExternalDir(cwd, outputBase) {
2964
- if (outputBase) {
2965
- return path.join(outputBase, 'external');
2966
- }
2967
- const bazelOutLink = path.join(cwd, 'bazel-out');
2968
- if (!fs$1.existsSync(bazelOutLink)) {
2969
- return null;
2970
- }
3412
+ // Best-effort reap of a Bazel server. Spawned with a short timeout so
3413
+ // a wedged server can't itself hang the cleanup; failures are swallowed
3414
+ // because the caller will `rm -rf` the output_user_root regardless.
3415
+ async function reapBazelServer(bin, outputUserRoot, verbose) {
2971
3416
  try {
2972
- // realpath follows symlinks: .../<output_base>/execroot/<workspace>/bazel-out
2973
- const real = fs$1.realpathSync(bazelOutLink);
2974
- // Walk up bazel-out -> <workspace> -> execroot -> <output_base>, then into external/.
2975
- return path.join(real, '..', '..', '..', 'external');
2976
- } catch {
2977
- return null;
3417
+ await spawn.spawn(bin, [`--output_user_root=${outputUserRoot}`, 'shutdown'], {
3418
+ timeout: REAP_TIMEOUT_MS
3419
+ });
3420
+ } catch (e) {
3421
+ // Server may already be dead, or shutdown itself timed out — the
3422
+ // tempdir removal below is sufficient cleanup.
3423
+ if (verbose) {
3424
+ logger.logger.log(`[VERBOSE] reapBazelServer: shutdown failed for ${outputUserRoot} (${utils.getErrorCause(e)}); tempdir removal will still run`);
3425
+ }
2978
3426
  }
2979
3427
  }
2980
-
2981
- // Internal diagnostic: when truthy, skip the unsorted_deps.json fast path
2982
- // and force the bazel-query regex fallback. Used by bazel-bench to
2983
- // deterministically exercise parseBazelBuildOutput on every CI run. Truthy
2984
- // values are '1', 'true', 'yes' (case-insensitive); anything else (unset,
2985
- // '', '0', 'false') is treated as off. Not exposed as a user-facing CLI
2986
- // flag, so it is read here rather than added to constants.mts.
2987
- function isForceQueryFallbackEnabled() {
2988
- const raw = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'];
2989
- if (!raw) {
2990
- return false;
2991
- }
2992
- const normalized = raw.toLowerCase();
2993
- return normalized === '1' || normalized === 'true' || normalized === 'yes';
2994
- }
2995
-
2996
- // Tries `external/<repo>/unsorted_deps.json` first; falls back to parsing the
2997
- // probe stdout the caller already captured during discovery. Discovery runs
2998
- // the same `kind("jvm_import rule|aar_import rule", @<repo>//:*)` query that
2999
- // extraction needs, so reusing its stdout skips one bazel-query invocation
3000
- // per repo on the unpinned path (where unsorted_deps.json isn't on disk).
3001
- async function extractFromOneRepo(repoName, queryOpts, cachedProbeStdout) {
3002
- const verbose = queryOpts.verbose;
3003
- // unsorted_deps.json lives under the bazel external dir.
3004
- // When --output_base is set, it's under that; otherwise under the workspace's
3005
- // bazel-out symlink (resolved via realpath, NOT lexical path.join — the
3006
- // lexical form would collapse `bazel-out/..` to cwd and miss the file).
3007
- const externalDir = bazelExternalDir(queryOpts.cwd, queryOpts.bazelOutputBase);
3008
- if (verbose) {
3009
- logger.logger.log(`[VERBOSE] @${repoName}: external dir:`, externalDir ?? '(unresolved — bazel-out symlink absent)');
3010
- }
3011
- const forceFallback = isForceQueryFallbackEnabled();
3012
- if (forceFallback && verbose) {
3013
- logger.logger.log(`[VERBOSE] @${repoName}: SOCKET_BAZEL_FORCE_QUERY_FALLBACK set; skipping unsorted_deps.json fast path.`);
3014
- }
3015
- const candidates = forceFallback ? [] : externalDir ? [path.join(externalDir, repoName, 'unsorted_deps.json')] : [];
3016
- for (const c of candidates) {
3017
- if (fs$1.existsSync(c)) {
3018
- // Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles.
3019
- // eslint-disable-next-line no-await-in-loop
3020
- const stat = await fs$1.promises.stat(c);
3021
- if (stat.size > 1024 * 1024 * 1024) {
3022
- logger.logger.warn(`Skipping oversized ${c} (${stat.size} bytes); falling back to cached probe stdout.`);
3023
- break;
3024
- }
3025
- const json = fs$1.readFileSync(c, 'utf8');
3026
- const parsed = parseUnsortedDepsJson(json);
3027
- if (parsed.length) {
3028
- if (verbose) {
3029
- logger.logger.log(`[VERBOSE] @${repoName}: source=unsorted_deps.json (${c}, ${parsed.length} artifact(s))`);
3030
- }
3031
- return parsed.map(a => ({
3032
- ...a,
3033
- sourceRepo: repoName
3034
- }));
3035
- }
3036
- } else if (verbose) {
3037
- logger.logger.log(`[VERBOSE] @${repoName}: unsorted_deps.json miss at`, c);
3428
+ async function removeTempdir(dir, verbose) {
3429
+ try {
3430
+ await fs$1.promises.rm(dir, {
3431
+ recursive: true,
3432
+ force: true
3433
+ });
3434
+ } catch (e) {
3435
+ // Best effort. The next CLI invocation lands a fresh tempdir.
3436
+ if (verbose) {
3437
+ logger.logger.log(`[VERBOSE] removeTempdir: ${dir} not fully removed (${utils.getErrorCause(e)}); a stale dir may linger until the next OS tempdir sweep`);
3038
3438
  }
3039
3439
  }
3040
- // Reuse the probe stdout that discovery already captured for this repo.
3041
- // The probe ran exactly this query during validation and only validated
3042
- // repos with code === 0 make it into the cache, so retry is unnecessary
3043
- // — if the probe was flaky, the repo wouldn't be in the map.
3044
- if (!cachedProbeStdout) {
3045
- logger.logger.warn(`No cached probe stdout for @${repoName}; skipping. (This shouldn't happen — discovery should have populated it.)`);
3046
- return [];
3047
- }
3048
- if (verbose) {
3049
- logger.logger.log(`[VERBOSE] @${repoName}: source=cached probe stdout (${cachedProbeStdout.length} bytes)`);
3050
- }
3051
- return parseBazelBuildOutput(cachedProbeStdout).map(a => ({
3052
- ...a,
3053
- sourceRepo: repoName
3054
- }));
3440
+ }
3441
+ function makeOutputUserRoot() {
3442
+ return fs$1.mkdtempSync(path.join(os.tmpdir(), 'socket-bazel-'));
3443
+ }
3444
+
3445
+ // Construct the BazelQueryOptions shape used for a single workspace's
3446
+ // queries. Lifted to module scope (out of the per-workspace loop) so
3447
+ // ESLint's consistent-function-scoping is happy; takes everything it
3448
+ // previously closed over as explicit params.
3449
+ function buildQueryOpts(args) {
3450
+ const {
3451
+ baseEnv,
3452
+ bin,
3453
+ invocationFlags,
3454
+ opts,
3455
+ outputUserRoot,
3456
+ spawnCwd,
3457
+ verbose
3458
+ } = args;
3459
+ return {
3460
+ bin,
3461
+ cwd: spawnCwd,
3462
+ invocationFlags,
3463
+ outputUserRoot,
3464
+ ...(opts.bazelRc ? {
3465
+ bazelRc: opts.bazelRc
3466
+ } : {}),
3467
+ ...(opts.bazelFlags ? {
3468
+ bazelFlags: opts.bazelFlags
3469
+ } : {}),
3470
+ ...(opts.bazelOutputBase ? {
3471
+ bazelOutputBase: opts.bazelOutputBase
3472
+ } : {}),
3473
+ ...(baseEnv ? {
3474
+ env: baseEnv
3475
+ } : {}),
3476
+ verbose
3477
+ };
3055
3478
  }
3056
3479
  async function extractBazelToMaven(opts) {
3057
3480
  const {
@@ -3066,143 +3489,232 @@ async function extractBazelToMaven(opts) {
3066
3489
  logger.logger.warn(`Warning: cwd does not exist: ${cwd}`);
3067
3490
  }
3068
3491
  logger.logger.groupEnd();
3492
+ const perRepoTimeoutMs = opts.perRepoTimeoutMs ?? DEFAULT_PER_REPO_TIMEOUT_MS;
3493
+
3494
+ // Validate config + ensure toolchains BEFORE we mint a tempdir.
3495
+ let bin;
3496
+ let baseEnv;
3069
3497
  try {
3070
- // Validate caller-provided Bazel filesystem settings before invoking Bazel.
3071
3498
  if (opts.bazelOutputBase) {
3072
3499
  validateOutputBase(opts.bazelOutputBase, opts.cwd);
3073
3500
  }
3074
- // Java must be available before rules_jvm_external/Coursier runs;
3075
- // python shim follows so its augmented PATH inherits the JDK prefix.
3076
3501
  ensureJavaOnPath();
3077
3502
  const shim = await provisionPythonShim();
3078
- const baseEnv = shim.augmentedEnv ?? opts.env;
3079
-
3080
- // Step 1: workspace detection.
3081
- const mode = detectWorkspaceMode(cwd);
3082
- logger.logger.info(`Workspace mode: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
3083
- const invocationFlags = getBazelInvocationFlags(mode);
3084
-
3085
- // Step 2: bazel binary resolution.
3086
- const bin = await resolveBazelBinary(opts.bin);
3087
- logger.logger.info(`Using bazel: ${bin}`);
3503
+ baseEnv = shim.augmentedEnv ?? opts.env;
3504
+ bin = await resolveBazelBinary(opts.bin);
3505
+ } catch (e) {
3506
+ logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
3088
3507
  if (verbose) {
3089
- logger.logger.log('[VERBOSE] resolved options:', {
3090
- bin,
3091
- bazelRc: opts.bazelRc ?? '(unset)',
3092
- bazelOutputBase: opts.bazelOutputBase ?? '(unset)',
3093
- bazelFlags: opts.bazelFlags ?? '(unset)',
3094
- invocationFlags
3095
- });
3508
+ logger.logger.group('[VERBOSE] error:');
3509
+ logger.logger.log(e);
3510
+ logger.logger.groupEnd();
3096
3511
  }
3512
+ return {
3513
+ artifactCount: 0,
3514
+ manifestPaths: [],
3515
+ status: 'hardFailure'
3516
+ };
3517
+ }
3518
+ logger.logger.info(`Using bazel: ${bin}`);
3097
3519
 
3098
- // Step 3: build the shared query options object.
3099
- const queryOpts = {
3100
- bin,
3520
+ // Track every output_user_root we mint so we can reap them all in
3521
+ // the cleanup pass, even if a per-repo timeout forced a re-mint.
3522
+ let outputUserRoot = makeOutputUserRoot();
3523
+ const mintedRoots = [outputUserRoot];
3524
+ if (verbose) {
3525
+ logger.logger.log(`[VERBOSE] initial --output_user_root=${outputUserRoot} (will be reaped on completion)`);
3526
+ }
3527
+ const layout = opts.outLayout ?? 'standalone';
3528
+ const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
3529
+ // One manifest per (workspace, hub), written best-effort: a single wedged
3530
+ // hub must not discard the manifests every other hub produced.
3531
+ const manifestPaths = [];
3532
+ let totalArtifacts = 0;
3533
+ let anyRepos = false;
3534
+ let hubsSucceeded = 0;
3535
+ let hubsFailed = 0;
3536
+ try {
3537
+ // Always apply the default prune policy so no caller can forget it;
3538
+ // callers EXTEND it via ignoreDirNames/ignoreDirPrefixes.
3539
+ const ignoreDirNames = new Set([...DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES, ...(opts.ignoreDirNames ?? [])]);
3540
+ const ignoreDirPrefixes = [...DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES, ...(opts.ignoreDirPrefixes ?? [])];
3541
+ const workspaceRoots = findWorkspaceRoots({
3101
3542
  cwd,
3102
- invocationFlags,
3103
- ...(opts.bazelRc ? {
3104
- bazelRc: opts.bazelRc
3105
- } : {}),
3106
- ...(opts.bazelFlags ? {
3107
- bazelFlags: opts.bazelFlags
3108
- } : {}),
3109
- ...(opts.bazelOutputBase ? {
3110
- bazelOutputBase: opts.bazelOutputBase
3111
- } : {}),
3112
- ...(baseEnv ? {
3113
- env: baseEnv
3114
- } : {}),
3543
+ ignoreDirNames,
3544
+ ignoreDirPrefixes,
3115
3545
  verbose
3116
- };
3117
-
3118
- // Step 4: discover validated Maven repos via the two-step recipe.
3119
- // Bzlmod has a native visible-repository surface; prefer that over static
3120
- // MODULE.bazel parsing and keep bounded parsing as the legacy/fallback path.
3121
- let nativeCandidates;
3122
- if (mode.bzlmod) {
3123
- const visibleRepos = await runBazelModShowVisibleRepos(queryOpts);
3124
- if (visibleRepos.code === 0) {
3125
- nativeCandidates = parseVisibleRepoCandidates(visibleRepos.stdout);
3546
+ });
3547
+ if (!workspaceRoots.length) {
3548
+ logger.logger.warn(`No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`);
3549
+ return {
3550
+ artifactCount: 0,
3551
+ manifestPaths: [],
3552
+ status: 'noEcosystem'
3553
+ };
3554
+ }
3555
+ if (verbose) {
3556
+ logger.logger.log(`[VERBOSE] discovered ${workspaceRoots.length} workspace root(s):`, workspaceRoots);
3557
+ }
3558
+ for (const workspaceRoot of workspaceRoots) {
3559
+ const relPath = path.relative(cwd, workspaceRoot);
3560
+ let mode;
3561
+ try {
3562
+ mode = detectWorkspaceMode(workspaceRoot);
3563
+ } catch (e) {
3126
3564
  if (verbose) {
3127
- logger.logger.log('[VERBOSE] Bzlmod visible repo candidates:', nativeCandidates);
3565
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: detect failed (${utils.getErrorCause(e)}); skipping`);
3128
3566
  }
3129
- } else if (verbose) {
3130
- logger.logger.log('[VERBOSE] bazel mod show_repo failed; falling back to static candidate parsing:', visibleRepos.stderr);
3567
+ continue;
3131
3568
  }
3132
- }
3133
- // Returns Map<repoName, probeStdout> so extraction can reuse the probe
3134
- // output and skip running an identical bazel-query a second time.
3135
- const probe = buildProbeFor(queryOpts);
3136
- const repos = await discoverMavenRepos(cwd, probe, nativeCandidates, verbose);
3137
- const repoNames = Array.from(repos.keys());
3138
- logger.logger.info(`Discovered ${repos.size} Maven repo(s): ${repoNames.join(', ') || '(none)'}`);
3569
+ logger.logger.info(`Workspace ${relPath || '.'}: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
3570
+ const invocationFlags = getBazelInvocationFlags(mode);
3571
+ const queryOptsFor = userRoot => buildQueryOpts({
3572
+ baseEnv,
3573
+ bin,
3574
+ invocationFlags,
3575
+ opts,
3576
+ outputUserRoot: userRoot,
3577
+ spawnCwd: workspaceRoot,
3578
+ verbose
3579
+ });
3139
3580
 
3140
- // Step 5: extract artifacts from each repo (preferring unsorted_deps.json).
3141
- const allArtifacts = [];
3142
- for (const [repo, probeStdout] of repos) {
3143
3581
  // eslint-disable-next-line no-await-in-loop
3144
- const artifacts = await extractFromOneRepo(repo, queryOpts, probeStdout);
3145
- allArtifacts.push(...artifacts);
3146
- logger.logger.info(`@${repo}: ${artifacts.length} artifact(s)`);
3147
- }
3148
-
3149
- // Step 6: normalize to maven_install.json shape.
3150
- const normalized = normalizeToMavenInstallJson(allArtifacts);
3151
-
3152
- // Step 7: write outputs.
3153
- // Standalone output writes directly to `out`; auto-manifest uses a sibling directory
3154
- // to avoid colliding with a repo's checked-in rules_jvm_external lockfile and
3155
- // to avoid repo-root gitignore patterns such as `/maven_install.json`.
3156
- const layout = opts.outLayout ?? 'standalone';
3157
- const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
3158
- fs$1.mkdirSync(manifestDir, {
3159
- recursive: true
3160
- });
3161
- const manifestPath = path.join(manifestDir, 'maven_install.json');
3162
- await fs$1.promises.writeFile(manifestPath, JSON.stringify(normalized, null, 2), 'utf8');
3163
- if (verbose) {
3164
- logger.logger.log('[VERBOSE] outputs:', {
3165
- artifactCount: allArtifacts.length,
3166
- generatedManifest: path.relative(out, manifestPath),
3167
- layout,
3168
- manifest: manifestPath,
3169
- mavenRepos: repoNames,
3170
- tool: 'socket manifest bazel',
3171
- workspace: {
3172
- bzlmod: mode.bzlmod,
3173
- legacyWorkspace: mode.workspace
3582
+ const candidates = await discoverCandidatesForWorkspace(workspaceRoot, mode, queryOptsFor(outputUserRoot), verbose);
3583
+ logger.logger.info(`Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${candidates.join(', ') || '(none)'}`);
3584
+ for (const repoName of candidates) {
3585
+ anyRepos = true;
3586
+ if (verbose) {
3587
+ logger.logger.log(`[VERBOSE] workspace ${relPath || '.'}: running metadata cquery for @${repoName} (timeout ${perRepoTimeoutMs}ms)`);
3174
3588
  }
3175
- });
3589
+ // eslint-disable-next-line no-await-in-loop
3590
+ const result = await runMetadataCqueryForRepo({
3591
+ opts: queryOptsFor(outputUserRoot),
3592
+ repoName,
3593
+ timeoutMs: perRepoTimeoutMs,
3594
+ workspaceRelPath: relPath,
3595
+ workspaceRoot
3596
+ });
3597
+ if (result.status === 'timeout') {
3598
+ logger.logger.warn(`@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`);
3599
+ hubsFailed += 1;
3600
+ // eslint-disable-next-line no-await-in-loop
3601
+ await reapBazelServer(bin, outputUserRoot, verbose);
3602
+ // eslint-disable-next-line no-await-in-loop
3603
+ await removeTempdir(outputUserRoot, verbose);
3604
+ outputUserRoot = makeOutputUserRoot();
3605
+ mintedRoots.push(outputUserRoot);
3606
+ if (verbose) {
3607
+ logger.logger.log(`[VERBOSE] minted fresh --output_user_root=${outputUserRoot} after timeout`);
3608
+ }
3609
+ continue;
3610
+ }
3611
+ if (result.status === 'error') {
3612
+ logger.logger.warn(`@${repoName}: cquery failed; skipping this hub`);
3613
+ hubsFailed += 1;
3614
+ continue;
3615
+ }
3616
+ // A scan must never silently upload a graph missing edges it knows
3617
+ // it dropped: warn unconditionally and treat the hub as partial.
3618
+ let hubPartial = result.unresolvedLabels.length > 0;
3619
+ if (hubPartial) {
3620
+ logger.logger.warn(`@${repoName}: dropped ${result.unresolvedLabels.length} unresolved dependency edge(s): ${result.unresolvedLabels.join(', ')}`);
3621
+ }
3622
+ // A non-zero cquery exit that still yielded a usable subset
3623
+ // (--keep_going) is reported as `partial` even with no unresolved
3624
+ // labels — the graph is known-incomplete, so flip the hub partial.
3625
+ if (result.status === 'partial' && !result.unresolvedLabels.length) {
3626
+ hubPartial = true;
3627
+ logger.logger.warn(`@${repoName}: cquery partially failed (--keep_going); the dependency graph may be incomplete`);
3628
+ }
3629
+ let written;
3630
+ try {
3631
+ // eslint-disable-next-line no-await-in-loop
3632
+ written = await writeHubManifest({
3633
+ artifacts: result.artifacts,
3634
+ cwd,
3635
+ manifestDir,
3636
+ relPath,
3637
+ repoName,
3638
+ verbose
3639
+ });
3640
+ } catch (e) {
3641
+ // Best-effort per hub: a write failure must not abort the walk and
3642
+ // discard the manifests other hubs already produced.
3643
+ logger.logger.warn(`@${repoName}: failed to write manifest (${utils.getErrorCause(e)}); skipping this hub`);
3644
+ hubsFailed += 1;
3645
+ continue;
3646
+ }
3647
+ if (written.droppedArtifacts.length) {
3648
+ hubPartial = true;
3649
+ logger.logger.warn(`@${repoName}: dropped ${written.droppedArtifacts.length} malformed Maven coordinate(s): ${written.droppedArtifacts.join(', ')}`);
3650
+ }
3651
+ if (written.prunedEdges.length) {
3652
+ hubPartial = true;
3653
+ logger.logger.warn(`@${repoName}: pruned ${written.prunedEdges.length} dependency edge(s) referencing unlisted artifacts: ${written.prunedEdges.join(', ')}`);
3654
+ }
3655
+ if (written.manifestPath) {
3656
+ manifestPaths.push(written.manifestPath);
3657
+ totalArtifacts += written.artifactCount;
3658
+ if (hubPartial) {
3659
+ hubsFailed += 1;
3660
+ } else {
3661
+ hubsSucceeded += 1;
3662
+ }
3663
+ if (verbose) {
3664
+ logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status}, ${written.artifactCount} artifact(s) -> ${written.manifestPath}`);
3665
+ }
3666
+ } else {
3667
+ // No artifacts to write (empty hub). Not itself a failure, but if
3668
+ // edges were dropped the partial signal still applies.
3669
+ if (hubPartial) {
3670
+ hubsFailed += 1;
3671
+ }
3672
+ if (verbose) {
3673
+ logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status} (no manifest written)`);
3674
+ }
3675
+ }
3676
+ }
3176
3677
  }
3177
- if (!allArtifacts.length) {
3178
- if (!repos.size) {
3678
+ if (!manifestPaths.length) {
3679
+ if (!anyRepos) {
3179
3680
  if (verbose) {
3180
3681
  logger.logger.info('No Maven artifacts extracted. failureCategory=no-supported-ecosystem');
3181
3682
  }
3182
3683
  return {
3183
3684
  artifactCount: 0,
3184
- manifestPath,
3185
- noEcosystemFound: true,
3186
- ok: false
3685
+ manifestPaths: [],
3686
+ status: 'noEcosystem'
3187
3687
  };
3188
3688
  }
3189
- logger.logger.fail(`Discovered Maven repo(s) ${repoNames.join(', ')} but extracted zero artifacts. failureCategory=ecosystem-detected-but-empty`);
3689
+ logger.logger.fail('Discovered Maven repo(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty');
3190
3690
  return {
3191
3691
  artifactCount: 0,
3192
- manifestPath,
3193
- ok: false
3692
+ manifestPaths: [],
3693
+ status: 'hardFailure'
3194
3694
  };
3195
3695
  }
3196
- logger.logger.success(`Wrote ${allArtifacts.length} artifact(s) to ${path.relative(cwd, manifestPath)}.`);
3696
+ const status = hubsFailed ? 'partial' : 'complete';
3697
+ if (status === 'complete') {
3698
+ logger.logger.success(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total.`);
3699
+ } else {
3700
+ logger.logger.warn(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete.`);
3701
+ }
3702
+ if (verbose) {
3703
+ logger.logger.log('[VERBOSE] outputs:', {
3704
+ artifactCount: totalArtifacts,
3705
+ hubsFailed,
3706
+ hubsSucceeded,
3707
+ layout,
3708
+ manifestPaths,
3709
+ status
3710
+ });
3711
+ }
3197
3712
  return {
3198
- artifactCount: allArtifacts.length,
3199
- manifestPath,
3200
- ok: true
3713
+ artifactCount: totalArtifacts,
3714
+ manifestPaths,
3715
+ status
3201
3716
  };
3202
3717
  } catch (e) {
3203
- // Always surface the error message; users should not have to
3204
- // re-run a multi-minute bazel build with --verbose just to see whether
3205
- // the failure was a missing dependency, permission error, or network blip.
3206
3718
  logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
3207
3719
  if (verbose) {
3208
3720
  logger.logger.group('[VERBOSE] error:');
@@ -3213,8 +3725,16 @@ async function extractBazelToMaven(opts) {
3213
3725
  }
3214
3726
  return {
3215
3727
  artifactCount: 0,
3216
- ok: false
3728
+ manifestPaths: [],
3729
+ status: 'hardFailure'
3217
3730
  };
3731
+ } finally {
3732
+ for (const dir of mintedRoots) {
3733
+ // eslint-disable-next-line no-await-in-loop
3734
+ await reapBazelServer(bin, dir, verbose);
3735
+ // eslint-disable-next-line no-await-in-loop
3736
+ await removeTempdir(dir, verbose);
3737
+ }
3218
3738
  }
3219
3739
  }
3220
3740
 
@@ -4084,12 +4604,20 @@ async function generateAutoManifest({
4084
4604
  outLayout: 'flat',
4085
4605
  verbose: Boolean(bazelConfig?.verbose) || verbose
4086
4606
  });
4087
- if (!mavenResult.ok && !mavenResult.noEcosystemFound) {
4607
+
4608
+ // Only a hard failure (zero manifests, ecosystem present) aborts the
4609
+ // wider scan. A partial run still produced manifests worth uploading; an
4610
+ // absent ecosystem is tolerated here (it's only an error when EVERY
4611
+ // ecosystem is absent, which the caller decides).
4612
+ if (mavenResult.status === 'hardFailure') {
4088
4613
  throw new Error('Bazel auto-manifest generation failed for ecosystem(s): maven');
4089
4614
  }
4090
- if (mavenResult.ok && mavenResult.manifestPath) {
4091
- generatedFiles.push(mavenResult.manifestPath);
4092
- } else if (mavenResult.noEcosystemFound) {
4615
+ if (mavenResult.status === 'complete' || mavenResult.status === 'partial') {
4616
+ generatedFiles.push(...mavenResult.manifestPaths);
4617
+ if (mavenResult.status === 'partial') {
4618
+ logger.logger.warn(`Bazel Maven manifest generation was partial (${mavenResult.manifestPaths.length} manifest(s) written); some hubs failed or had incomplete dependency graphs. Uploading what was generated.`);
4619
+ }
4620
+ } else {
4093
4621
  logger.logger.info('No supported Bazel Maven ecosystem detected.');
4094
4622
  }
4095
4623
  }
@@ -4307,6 +4835,13 @@ async function handleCreateNewScan({
4307
4835
  const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined;
4308
4836
  if (reach && scanId && tier1ReachabilityScanId) {
4309
4837
  await finalizeTier1Scan(tier1ReachabilityScanId, scanId);
4838
+ } else if (reach.runReachabilityAnalysis && scanId && !tier1ReachabilityScanId) {
4839
+ // Reachability analysis ran and a scan was created, but no tier 1
4840
+ // reachability scan id was extracted from the facts file. Surface this
4841
+ // instead of silently skipping finalize — otherwise the tier 1 row stays
4842
+ // stuck (e.g. at COANA_DONE) and the full scan is never linked to its
4843
+ // reachability report.
4844
+ logger.logger.warn('Reachability analysis ran but no tier 1 reachability scan ID was found; skipping tier 1 finalize. The scan was created but its reachability report was not linked.');
4310
4845
  }
4311
4846
 
4312
4847
  // On a successful scan, clean up the `.socket.facts.json` coana wrote at
@@ -7734,6 +8269,85 @@ async function run$G(argv, importMeta, context) {
7734
8269
  await spawnPromise;
7735
8270
  }
7736
8271
 
8272
+ // Result shape returned by `validatePypiHub`. Kept local to the PyPI module
8273
+ // since validation here is hub-alias-marker based (different from the
8274
+ // Maven-side tri-state classifier).
8275
+
8276
+ // PyPI-only repo-name predicate (Bazel apparent-name grammar).
8277
+ const PYPI_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
8278
+ const PYPI_REPO_NAME_RE = new RegExp(`^${PYPI_REPO_NAME_PATTERN}$`);
8279
+ function pypiApparentNameFromJsonValue(value) {
8280
+ if (!value || typeof value !== 'object') {
8281
+ return undefined;
8282
+ }
8283
+ const obj = value;
8284
+ const direct = obj['apparentName'] ?? obj['apparent_name'];
8285
+ if (typeof direct === 'string') {
8286
+ return direct;
8287
+ }
8288
+ for (const nested of Object.values(obj)) {
8289
+ const found = pypiApparentNameFromJsonValue(nested);
8290
+ if (found) {
8291
+ return found;
8292
+ }
8293
+ }
8294
+ return undefined;
8295
+ }
8296
+ function pypiApparentNamesFromRepoMapping(value) {
8297
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
8298
+ return [];
8299
+ }
8300
+ const candidates = [];
8301
+ for (const [name, canonicalName] of Object.entries(value)) {
8302
+ if (name.startsWith('@') || typeof canonicalName !== 'string') {
8303
+ continue;
8304
+ }
8305
+ if (PYPI_REPO_NAME_RE.test(name)) {
8306
+ candidates.push(name);
8307
+ }
8308
+ }
8309
+ return candidates;
8310
+ }
8311
+ function pypiNormalizeRepoName(name) {
8312
+ const repo = name.startsWith('@') ? name.slice(1) : name;
8313
+ return PYPI_REPO_NAME_RE.test(repo) ? repo : undefined;
8314
+ }
8315
+
8316
+ // Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accepts
8317
+ // the older streamed jsonproto shape (apparentName / apparent_name records).
8318
+ // PyPI-only; the Maven path consumes `bazel mod show_extension` instead.
8319
+ function parseVisibleRepoCandidates(output) {
8320
+ const seen = new Set();
8321
+ const candidates = [];
8322
+ for (const line of output.split(/\r?\n/)) {
8323
+ const trimmed = line.trim();
8324
+ if (!trimmed) {
8325
+ continue;
8326
+ }
8327
+ try {
8328
+ const parsed = JSON.parse(trimmed);
8329
+ for (const c of pypiApparentNamesFromRepoMapping(parsed)) {
8330
+ if (!seen.has(c)) {
8331
+ seen.add(c);
8332
+ candidates.push(c);
8333
+ }
8334
+ }
8335
+ const apparentName = pypiApparentNameFromJsonValue(parsed);
8336
+ if (apparentName) {
8337
+ const repo = pypiNormalizeRepoName(apparentName);
8338
+ if (repo && !seen.has(repo)) {
8339
+ seen.add(repo);
8340
+ candidates.push(repo);
8341
+ }
8342
+ }
8343
+ } catch {
8344
+ // Skip malformed lines; caller falls back to static discovery when no
8345
+ // usable visible repo names are found.
8346
+ }
8347
+ }
8348
+ return candidates.sort();
8349
+ }
8350
+
7737
8351
  // Maximum size (bytes) we will read for any single Bazel workspace file.
7738
8352
  // Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
7739
8353
  const MAX_WORKSPACE_FILE_BYTES = 5 * 1024 * 1024;
@@ -8676,6 +9290,13 @@ const config$e = {
8676
9290
  Note: this command generates dependency manifests for Bazel workspaces.
8677
9291
  It does not run reachability analysis.
8678
9292
 
9293
+ Maven hub discovery: under Bzlmod, hubs are enumerated from
9294
+ \`bazel mod show_extension\` and filtered to the root module's own hubs.
9295
+ Under legacy WORKSPACE mode (no \`show_extension\`), only conventionally
9296
+ named hubs are probed (\`maven\`, \`maven_install\`, \`maven_dev\`, …). A hub
9297
+ with a non-conventional name that \`show_extension\` does not enumerate is
9298
+ not discovered yet; a flag to name extra hubs is planned.
9299
+
8679
9300
  To generate AND upload in one step, use \`socket scan create --auto-manifest\`
8680
9301
  instead — it detects Bazel workspaces, generates Maven manifests by
8681
9302
  default, and uploads the result. This subcommand is for generation only.
@@ -8697,21 +9318,29 @@ const cmdManifestBazel = {
8697
9318
  // failures that must propagate to a non-zero CLI exit; returns void on
8698
9319
  // success.
8699
9320
  //
8700
- // - Hard failure: ok === false && !noEcosystemFound. The ecosystem was
8701
- // detected (or the runner crashed), but extraction failed. Always a
8702
- // non-zero exit, even when another ecosystem succeeded.
8703
- // - No-discovery: noEcosystemFound === true. Genuinely absent ecosystem.
8704
- // Auto-detect mode tolerates this when at least one other ecosystem
8705
- // succeeded; explicit mode treats it as an error.
9321
+ // - `complete`/`partial` both count as produced output (>=1 manifest).
9322
+ // `partial` additionally warns a known-incomplete SBOM is still emitted,
9323
+ // not a hard error.
9324
+ // - `hardFailure`: the ecosystem was detected (or the runner crashed) but
9325
+ // wrote zero manifests. Always a non-zero exit, even when another
9326
+ // ecosystem succeeded.
9327
+ // - `noEcosystem`: genuinely absent ecosystem. Auto-detect mode tolerates it
9328
+ // when at least one other ecosystem produced output; explicit mode treats
9329
+ // it as an error (the user requested an ecosystem that isn't there).
8706
9330
  function evaluateEcosystemOutcomes(outcomes, isExplicit) {
8707
- const hardFailures = outcomes.filter(o => !o.ok && !o.noEcosystemFound);
8708
- const noDiscoveries = outcomes.filter(o => o.noEcosystemFound);
8709
- const successes = outcomes.filter(o => o.ok && o.manifestPath);
9331
+ const produced = outcomes.filter(o => (o.status === 'complete' || o.status === 'partial') && o.manifestPaths.length > 0);
9332
+ const hardFailures = outcomes.filter(o => o.status === 'hardFailure');
9333
+ const noDiscoveries = outcomes.filter(o => o.status === 'noEcosystem');
9334
+ for (const partial of outcomes) {
9335
+ if (partial.status === 'partial') {
9336
+ logger.logger.warn(`Bazel ${partial.ecosystem} manifest generation was partial; the uploaded SBOM is known-incomplete.`);
9337
+ }
9338
+ }
8710
9339
  if (!isExplicit) {
8711
9340
  if (hardFailures.length) {
8712
9341
  throw new utils.InputError(`Bazel auto-manifest generation hit hard failure(s) in ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
8713
9342
  }
8714
- if (successes.length) {
9343
+ if (produced.length) {
8715
9344
  return;
8716
9345
  }
8717
9346
  if (noDiscoveries.length === outcomes.length) {
@@ -8720,7 +9349,8 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
8720
9349
  return;
8721
9350
  }
8722
9351
 
8723
- // Explicit mode: every requested ecosystem must succeed.
9352
+ // Explicit mode: every requested ecosystem must produce output. A partial
9353
+ // run counts (it wrote manifests); absent or hard-failed ecosystems error.
8724
9354
  if (noDiscoveries.length) {
8725
9355
  throw new utils.InputError(`No Bazel rules found for explicitly requested ecosystem(s): ${noDiscoveries.map(f => f.ecosystem).join(', ')}.`);
8726
9356
  }
@@ -8728,6 +9358,32 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
8728
9358
  throw new utils.InputError(`Bazel manifest generation failed for explicitly requested ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
8729
9359
  }
8730
9360
  }
9361
+
9362
+ // Map the legacy PyPI result shape (single manifestPath + ok/noEcosystem
9363
+ // booleans) into the shared status vocabulary so both ecosystems flow through
9364
+ // one success gate. PyPI has no partial state. Only a `complete` outcome
9365
+ // carries a manifest path; `noEcosystem`/`hardFailure` carry none, preserving
9366
+ // the invariant that a non-success outcome produced no usable output (a
9367
+ // detected-but-empty PyPI run writes a stub file but is still a hard failure,
9368
+ // and that stub must not be surfaced as produced output).
9369
+ function pypiOutcome(result) {
9370
+ if (result.noEcosystemFound) {
9371
+ return {
9372
+ manifestPaths: [],
9373
+ status: 'noEcosystem'
9374
+ };
9375
+ }
9376
+ if (result.ok && result.manifestPath) {
9377
+ return {
9378
+ manifestPaths: [result.manifestPath],
9379
+ status: 'complete'
9380
+ };
9381
+ }
9382
+ return {
9383
+ manifestPaths: [],
9384
+ status: 'hardFailure'
9385
+ };
9386
+ }
8731
9387
  async function run$F(argv, importMeta, {
8732
9388
  parentName
8733
9389
  }) {
@@ -8861,9 +9517,8 @@ async function run$F(argv, importMeta, {
8861
9517
  });
8862
9518
  outcomes.push({
8863
9519
  ecosystem: 'maven',
8864
- ok: mavenResult.ok,
8865
- noEcosystemFound: mavenResult.noEcosystemFound,
8866
- manifestPath: mavenResult.manifestPath
9520
+ manifestPaths: mavenResult.manifestPaths,
9521
+ status: mavenResult.status
8867
9522
  });
8868
9523
  } else if (eco === 'pypi') {
8869
9524
  // eslint-disable-next-line no-await-in-loop
@@ -8879,9 +9534,7 @@ async function run$F(argv, importMeta, {
8879
9534
  });
8880
9535
  outcomes.push({
8881
9536
  ecosystem: 'pypi',
8882
- ok: pypiResult.ok,
8883
- noEcosystemFound: pypiResult.noEcosystemFound,
8884
- manifestPath: pypiResult.manifestPath
9537
+ ...pypiOutcome(pypiResult)
8885
9538
  });
8886
9539
  }
8887
9540
  }
@@ -14446,7 +15099,7 @@ const reachabilityFlags = {
14446
15099
  reachConcurrency: {
14447
15100
  type: 'number',
14448
15101
  default: 1,
14449
- description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available. NPM reachability analysis does not support concurrent execution, so the concurrency level is ignored for NPM.'
15102
+ description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available.'
14450
15103
  },
14451
15104
  reachContinueOnAnalysisErrors: {
14452
15105
  type: 'boolean',
@@ -16828,6 +17481,7 @@ async function run$8(argv, importMeta, {
16828
17481
  }
16829
17482
 
16830
17483
  async function outputScanReach(result, {
17484
+ cwd,
16831
17485
  outputKind,
16832
17486
  outputPath
16833
17487
  }) {
@@ -16848,7 +17502,11 @@ async function outputScanReach(result, {
16848
17502
  logger.logger.info(`Reachability report has been written to: ${actualOutputPath}`);
16849
17503
 
16850
17504
  // Warn about individual vulnerabilities where reachability analysis errored.
16851
- const errors = utils.extractReachabilityErrors(result.data.reachabilityReport);
17505
+ // Resolve the report path against the scan `cwd` (not `process.cwd()`):
17506
+ // Coana writes the facts file relative to `cwd` and `reachabilityReport`
17507
+ // is a `cwd`-relative path, so reading the bare relative path would miss
17508
+ // the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`).
17509
+ const errors = utils.extractReachabilityErrors(path.resolve(cwd, result.data.reachabilityReport));
16852
17510
  if (errors.length) {
16853
17511
  logger.logger.log('');
16854
17512
  logger.logger.warn(`Reachability analysis returned ${errors.length} ${words.pluralize('error', errors.length)} for individual ${words.pluralize('vulnerability', errors.length)}:`);
@@ -16877,6 +17535,7 @@ async function handleScanReach({
16877
17535
  });
16878
17536
  if (!supportedFilesCResult.ok) {
16879
17537
  await outputScanReach(supportedFilesCResult, {
17538
+ cwd,
16880
17539
  outputKind,
16881
17540
  outputPath
16882
17541
  });
@@ -16924,7 +17583,22 @@ async function handleScanReach({
16924
17583
  uploadManifests: true
16925
17584
  });
16926
17585
  spinner.stop();
17586
+
17587
+ // Standalone reachability has no full scan to bind to, but the tier1
17588
+ // reachability scan row still needs to transition to its DONE terminal
17589
+ // state — otherwise it sits at the post-Coana intermediate state forever
17590
+ // and looks indistinguishable from a stuck run. Pass `null` as the full
17591
+ // scan id; the endpoint accepts it for this flow. Best-effort: never
17592
+ // block the user-visible output on this.
17593
+ const tier1Id = result.ok ? result.data?.tier1ReachabilityScanId : undefined;
17594
+ if (tier1Id) {
17595
+ const finalizeResult = await finalizeTier1Scan(tier1Id, null);
17596
+ if (!finalizeResult.ok) {
17597
+ logger.logger.warn(`Failed to finalize tier1 reachability scan: ${finalizeResult.message}${finalizeResult.cause ? ` — ${finalizeResult.cause}` : ''}`);
17598
+ }
17599
+ }
16927
17600
  await outputScanReach(result, {
17601
+ cwd,
16928
17602
  outputKind,
16929
17603
  outputPath
16930
17604
  });
@@ -19067,5 +19741,5 @@ process.on('unhandledRejection', async (reason, promise) => {
19067
19741
  // eslint-disable-next-line n/no-process-exit
19068
19742
  process.exit(1);
19069
19743
  });
19070
- //# debugId=52e1770b-8fec-41b9-83a1-5c52a6251b6c
19744
+ //# debugId=614e598d-c01b-4289-b35e-bff2af2ac507
19071
19745
  //# sourceMappingURL=cli.js.map