socket 1.1.112 → 1.1.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +11 -1
  2. package/dist/cli.js +2076 -1434
  3. package/dist/cli.js.map +1 -1
  4. package/dist/constants.js +6 -8
  5. package/dist/constants.js.map +1 -1
  6. package/dist/tsconfig.dts.tsbuildinfo +1 -1
  7. package/dist/types/commands/manifest/bazel/bazel-cquery.d.mts +70 -0
  8. package/dist/types/commands/manifest/bazel/bazel-cquery.d.mts.map +1 -0
  9. package/dist/types/commands/manifest/bazel/bazel-pypi-discovery.d.mts +14 -1
  10. package/dist/types/commands/manifest/bazel/bazel-pypi-discovery.d.mts.map +1 -1
  11. package/dist/types/commands/manifest/bazel/bazel-query-runner.d.mts +58 -14
  12. package/dist/types/commands/manifest/bazel/bazel-query-runner.d.mts.map +1 -1
  13. package/dist/types/commands/manifest/bazel/bazel-repo-discovery.d.mts +43 -30
  14. package/dist/types/commands/manifest/bazel/bazel-repo-discovery.d.mts.map +1 -1
  15. package/dist/types/commands/manifest/bazel/bazel-workspace-walk.d.mts +18 -0
  16. package/dist/types/commands/manifest/bazel/bazel-workspace-walk.d.mts.map +1 -0
  17. package/dist/types/commands/manifest/bazel/cmd-manifest-bazel.d.mts +12 -10
  18. package/dist/types/commands/manifest/bazel/cmd-manifest-bazel.d.mts.map +1 -1
  19. package/dist/types/commands/manifest/bazel/extract_bazel_to_maven.d.mts +70 -8
  20. package/dist/types/commands/manifest/bazel/extract_bazel_to_maven.d.mts.map +1 -1
  21. package/dist/types/commands/manifest/cmd-manifest-gradle.d.mts.map +1 -1
  22. package/dist/types/commands/manifest/cmd-manifest-kotlin.d.mts.map +1 -1
  23. package/dist/types/commands/manifest/cmd-manifest-scala.d.mts.map +1 -1
  24. package/dist/types/commands/manifest/coana-manifest-facts.d.mts +27 -0
  25. package/dist/types/commands/manifest/coana-manifest-facts.d.mts.map +1 -0
  26. package/dist/types/commands/manifest/convert-gradle-to-facts.d.mts +8 -2
  27. package/dist/types/commands/manifest/convert-gradle-to-facts.d.mts.map +1 -1
  28. package/dist/types/commands/manifest/convert-sbt-to-facts.d.mts +10 -2
  29. package/dist/types/commands/manifest/convert-sbt-to-facts.d.mts.map +1 -1
  30. package/dist/types/commands/manifest/generate_auto_manifest.d.mts.map +1 -1
  31. package/dist/types/commands/manifest/setup-manifest-config.d.mts.map +1 -1
  32. package/dist/types/commands/scan/finalize-tier1-scan.d.mts +6 -4
  33. package/dist/types/commands/scan/finalize-tier1-scan.d.mts.map +1 -1
  34. package/dist/types/commands/scan/handle-create-new-scan.d.mts.map +1 -1
  35. package/dist/types/commands/scan/handle-scan-reach.d.mts.map +1 -1
  36. package/dist/types/commands/scan/output-scan-reach.d.mts +2 -1
  37. package/dist/types/commands/scan/output-scan-reach.d.mts.map +1 -1
  38. package/dist/types/commands/scan/perform-reachability-analysis.d.mts.map +1 -1
  39. package/dist/types/constants.d.mts +2 -4
  40. package/dist/types/constants.d.mts.map +1 -1
  41. package/dist/types/utils/glob.d.mts +1 -0
  42. package/dist/types/utils/glob.d.mts.map +1 -1
  43. package/dist/types/utils/socket-json.d.mts +4 -2
  44. package/dist/types/utils/socket-json.d.mts.map +1 -1
  45. package/dist/utils.js +2 -1
  46. package/dist/utils.js.map +1 -1
  47. package/package.json +2 -2
  48. package/requirements.json +1 -1
  49. package/dist/socket-facts.init.gradle +0 -429
  50. package/dist/socket-facts.plugin.scala +0 -416
  51. package/dist/types/commands/manifest/bazel/bazel-build-parser.d.mts +0 -34
  52. package/dist/types/commands/manifest/bazel/bazel-build-parser.d.mts.map +0 -1
package/dist/cli.js CHANGED
@@ -15,10 +15,10 @@ var words = require('../external/@socketsecurity/registry/lib/words');
15
15
  var fs$1 = require('node:fs');
16
16
  var arrays = require('../external/@socketsecurity/registry/lib/arrays');
17
17
  var prompts = require('../external/@socketsecurity/registry/lib/prompts');
18
- var bin = require('../external/@socketsecurity/registry/lib/bin');
19
- var childProcess = require('node:child_process');
20
18
  var os = require('node:os');
21
19
  var spawn = require('../external/@socketsecurity/registry/lib/spawn');
20
+ var bin = require('../external/@socketsecurity/registry/lib/bin');
21
+ var childProcess = require('node:child_process');
22
22
  var fs$2 = require('../external/@socketsecurity/registry/lib/fs');
23
23
  var strings = require('../external/@socketsecurity/registry/lib/strings');
24
24
  var path$1 = require('../external/@socketsecurity/registry/lib/path');
@@ -1110,8 +1110,10 @@ async function fetchSupportedScanFileNames(options) {
1110
1110
 
1111
1111
  /**
1112
1112
  * Finalize a tier1 reachability scan.
1113
- * - Associates the tier1 reachability scan metadata with the full scan.
1114
- * - Sets the tier1 reachability scan to "finalized" state.
1113
+ * - Associates the tier1 reachability scan metadata with the full scan
1114
+ * (or with `null` when called from a standalone reachability flow that
1115
+ * has no full scan to bind to).
1116
+ * - Transitions the tier1 reachability scan to its DONE terminal state.
1115
1117
  */
1116
1118
  async function finalizeTier1Scan(tier1ReachabilityScanId, scanId) {
1117
1119
  // we do not use the SDK here because the tier1-reachability-scan/finalize is a hidden
@@ -1835,12 +1837,21 @@ async function performReachabilityAnalysis(options) {
1835
1837
  }
1836
1838
  return coanaResult;
1837
1839
  }
1840
+
1841
+ // Coana writes the facts file relative to the scan `cwd` (it is spawned
1842
+ // with `cwd` above), so resolve the read path against `cwd` too. Reading
1843
+ // the bare relative path would resolve against `process.cwd()` and miss
1844
+ // the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`), silently
1845
+ // dropping the tier 1 scan id and skipping finalize downstream.
1846
+ const resolvedReportPath = path.resolve(cwd, outputFilePath);
1838
1847
  return {
1839
1848
  ok: true,
1840
1849
  data: {
1841
- // Use the actual output filename for the scan.
1850
+ // Use the actual output filename for the scan. Keep this `cwd`-relative
1851
+ // so the upload (which relativizes against `cwd`) and the post-success
1852
+ // unlink (`path.resolve(cwd, reachabilityReport)`) keep working.
1842
1853
  reachabilityReport: outputFilePath,
1843
- tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(outputFilePath)
1854
+ tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(resolvedReportPath)
1844
1855
  }
1845
1856
  };
1846
1857
  }
@@ -1930,318 +1941,6 @@ async function resolveBazelBinary(explicit) {
1930
1941
  throw new utils.InputError('Could not find bazelisk or bazel on PATH. ' + 'Install bazelisk (recommended; https://github.com/bazelbuild/bazelisk) ' + 'or bazel, or pass --bazel <path>.');
1931
1942
  }
1932
1943
 
1933
- /**
1934
- * Parse `bazel query --output=build` text and `unsorted_deps.json` files
1935
- * (rules_jvm_external) into a uniform `ExtractedArtifact` shape consumed by
1936
- * the converter.
1937
- *
1938
- * Security gate: every regex uses bounded character classes to prevent
1939
- * catastrophic backtracking on hostile bazel-query output. Rules without
1940
- * `maven_coordinates=` are skipped. Caller is responsible for size-capping
1941
- * the input string.
1942
- */
1943
-
1944
- // Per-rule block matcher: matches `<kind>(...)` where kind is jvm_import or
1945
- // aar_import, bounded by `^)` (closing paren on its own line) — Bazel
1946
- // `--output=build` output convention. Body length capped at 8 KiB; real
1947
- // rules are ~500 bytes, so the cap is 16x normal. Prevents pathological
1948
- // backtracking on hostile input.
1949
- const RULE_RE = /^(jvm_import|aar_import)\(([\s\S]{0,8192}?)^\)/gm;
1950
-
1951
- // Cache for per-attribute regexes — avoids recompiling the same pattern on
1952
- // every rule block. Keyed by attr name; all attr names are safe alphanumeric
1953
- // identifiers so no escaping is needed beyond the bounded character class.
1954
- const ATTR_RE_CACHE = new Map();
1955
-
1956
- // Cache for per-tag-key regexes used by extractTagValue.
1957
- const TAG_RE_CACHE = new Map();
1958
- function extractAttr(body, attr) {
1959
- // Match `<attr> = "VALUE"` — quoted-string attrs only.
1960
- // Quoted value capped at 4 KiB; canonical Maven URLs are ~150 bytes.
1961
- let re = ATTR_RE_CACHE.get(attr);
1962
- if (!re) {
1963
- re = new RegExp(`\\b${attr}\\s*=\\s*"([^"\\n]{0,4096})"`);
1964
- ATTR_RE_CACHE.set(attr, re);
1965
- }
1966
- const m = re.exec(body);
1967
- return m?.[1];
1968
- }
1969
-
1970
- // Extracts a `key=value` pair from inside a Bazel `tags = [...]` attribute
1971
- // (rules_jvm_external encodes maven_sha256, maven_coordinates etc. this way).
1972
- // Pattern: `"maven_sha256=<hex>"` inside the tags list.
1973
- // Returns undefined when the tag is absent or malformed.
1974
- function extractTagValue(body, tagKey) {
1975
- // Match the full tags = [...] block (bounded at 8 KiB).
1976
- const tagsM = /\btags\s*=\s*\[([\s\S]{0,8192}?)\]/m.exec(body);
1977
- if (!tagsM) {
1978
- return undefined;
1979
- }
1980
- const tagsBlob = tagsM[1];
1981
- // Within the blob, look for "<tagKey>=<value>" inside a quoted string.
1982
- // Bounded at 512 bytes per tag entry (sha256 hex is 64 chars; URLs ~150).
1983
- let tagRe = TAG_RE_CACHE.get(tagKey);
1984
- if (!tagRe) {
1985
- tagRe = new RegExp(`"${tagKey}=([^"\\n]{0,512})"`);
1986
- TAG_RE_CACHE.set(tagKey, tagRe);
1987
- }
1988
- const m = tagRe.exec(tagsBlob);
1989
- return m?.[1];
1990
- }
1991
- function extractDeps(body) {
1992
- // Match `deps = ["a", "b", ...]`. Body length capped at 16 KiB; real
1993
- // dep lists are <2 KiB.
1994
- const m = /\bdeps\s*=\s*\[([\s\S]{0,16384}?)\]/m.exec(body);
1995
- if (!m) {
1996
- return [];
1997
- }
1998
- const out = [];
1999
- // Per-label cap at 512 bytes; real Bazel labels are <100 bytes.
2000
- for (const q of m[1].matchAll(/"([^"\n]{0,512})"/g)) {
2001
- out.push(q[1]);
2002
- }
2003
- return out;
2004
- }
2005
-
2006
- /**
2007
- * Parse `bazel query --output=build` stdout into `ExtractedArtifact[]`.
2008
- * Skips rules without a `maven_coordinates` attribute (those aren't
2009
- * rules_jvm_external lockfile rules).
2010
- */
2011
- function parseBazelBuildOutput(text) {
2012
- const results = [];
2013
- for (const m of text.matchAll(RULE_RE)) {
2014
- const ruleKind = m[1];
2015
- const body = m[2];
2016
- const ruleName = extractAttr(body, 'name');
2017
- // maven_coordinates can be:
2018
- // (a) a top-level rule attribute: `maven_coordinates = "g:a:v"` (newer rje)
2019
- // (b) inside tags = [...]: `"maven_coordinates=g:a:v"` (older rje, e.g. ray)
2020
- const coords = extractAttr(body, 'maven_coordinates') ?? extractTagValue(body, 'maven_coordinates');
2021
- if (!ruleName || !coords) {
2022
- continue;
2023
- }
2024
- // maven_sha256 is encoded inside tags = [...] as "maven_sha256=<hex>" by
2025
- // rules_jvm_external; try tags first, fall back to standalone attr for
2026
- // older rule shapes that may declare it as a top-level attribute.
2027
- const mavenSha256 = extractTagValue(body, 'maven_sha256') ?? extractAttr(body, 'maven_sha256');
2028
- results.push({
2029
- ruleKind,
2030
- ruleName,
2031
- mavenCoordinates: coords,
2032
- mavenUrl: extractAttr(body, 'maven_url'),
2033
- mavenSha256,
2034
- deps: extractDeps(body)
2035
- });
2036
- }
2037
- return results;
2038
- }
2039
- function ruleNameFromCoordinate(c) {
2040
- return c.replace(/[^A-Za-z0-9]/g, '_');
2041
- }
2042
-
2043
- /**
2044
- * Parse supported `external/<repo>/unsorted_deps.json` shapes emitted by
2045
- * rules_jvm_external. Older files use an artifact array with full coordinates;
2046
- * newer v2 lock-file-shaped files use artifact/dependency maps keyed by
2047
- * `group:artifact`. Caller MUST size-cap the input because JSON.parse is
2048
- * unbounded by default.
2049
- */
2050
- function parseUnsortedDepsJson(json) {
2051
- let parsed;
2052
- try {
2053
- parsed = JSON.parse(json);
2054
- } catch {
2055
- return [];
2056
- }
2057
- const maybe = parsed;
2058
- if (Array.isArray(maybe.artifacts)) {
2059
- const out = [];
2060
- for (const a of maybe.artifacts) {
2061
- if (typeof a?.coordinates !== 'string') {
2062
- continue;
2063
- }
2064
- const deps = [];
2065
- if (Array.isArray(a.deps)) {
2066
- for (const d of a.deps) {
2067
- if (typeof d === 'string') {
2068
- deps.push(d);
2069
- }
2070
- }
2071
- }
2072
- out.push({
2073
- ruleKind: 'jvm_import',
2074
- ruleName: ruleNameFromCoordinate(a.coordinates),
2075
- mavenCoordinates: a.coordinates,
2076
- mavenUrl: typeof a.url === 'string' ? a.url : undefined,
2077
- mavenSha256: typeof a.sha256 === 'string' ? a.sha256 : undefined,
2078
- deps
2079
- });
2080
- }
2081
- return out;
2082
- }
2083
- if (!maybe.artifacts || typeof maybe.artifacts !== 'object') {
2084
- return [];
2085
- }
2086
- const dependencies = maybe.dependencies ?? {};
2087
- const out = [];
2088
- for (const [groupArtifact, artifact] of Object.entries(maybe.artifacts)) {
2089
- if (!artifact || typeof artifact.version !== 'string') {
2090
- continue;
2091
- }
2092
- const shasums = artifact.shasums ?? {};
2093
- const jarSha = shasums['jar'];
2094
- if (typeof jarSha === 'string' || Object.keys(shasums).length === 0) {
2095
- out.push(v2Artifact(groupArtifact, artifact.version, jarSha, dependencies));
2096
- }
2097
- for (const [classifier, sha256] of Object.entries(shasums)) {
2098
- if (classifier === 'jar' || typeof sha256 !== 'string') {
2099
- continue;
2100
- }
2101
- const classifierKey = `${groupArtifact}:jar:${classifier}`;
2102
- out.push(v2Artifact(classifierKey, artifact.version, sha256, dependencies));
2103
- }
2104
- }
2105
- return out;
2106
- }
2107
- function v2Artifact(artifactKey, version, sha256, dependencies) {
2108
- return {
2109
- ruleKind: 'jvm_import',
2110
- ruleName: ruleNameFromCoordinate(artifactKey),
2111
- mavenCoordinates: `${artifactKey}:${version}`,
2112
- mavenSha256: sha256,
2113
- deps: Array.isArray(dependencies[artifactKey]) ? dependencies[artifactKey].filter(d => typeof d === 'string') : []
2114
- };
2115
- }
2116
-
2117
- let probed = false;
2118
-
2119
- // Verifies `java` is functional in the current execution environment. Bazel
2120
- // JVM manifest extraction (rules_jvm_external → Coursier) requires a real
2121
- // JDK; the CLI does not attempt to discover Homebrew installs or mutate the
2122
- // caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
2123
- // actionable message so the surfaced error names the prerequisite directly
2124
- // instead of relying on Bazel's downstream diagnostic.
2125
- function ensureJavaOnPath() {
2126
- if (probed) {
2127
- return;
2128
- }
2129
- try {
2130
- childProcess.execSync('java -version', {
2131
- stdio: 'ignore'
2132
- });
2133
- probed = true;
2134
- } catch {
2135
- throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
2136
- }
2137
- }
2138
-
2139
- // Validates that --bazel-output-base is a path we can use as Bazel's output_base.
2140
- // Throws InputError if:
2141
- // - the input contains `..` segments (path traversal guard)
2142
- // - the existing path is not writable
2143
- // - the path cannot be created (parent not writable)
2144
- function validateOutputBase(outputBase, cwd) {
2145
- // Path traversal guard: reject any literal `..` segment in user input.
2146
- // After path.resolve these are normalised away, so we check the raw input.
2147
- // Split on both separators. On Windows `path.sep === '\\'`, so
2148
- // input like `foo/../etc` would not contain a `..` segment under the
2149
- // platform-specific split, bypassing the guard — yet path.resolve below
2150
- // would still normalise the `..` and a traversal target could materialise.
2151
- const segments = outputBase.split(/[\\/]/);
2152
- if (segments.includes('..')) {
2153
- throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
2154
- }
2155
- const resolved = path.resolve(cwd, outputBase);
2156
- if (fs$1.existsSync(resolved)) {
2157
- try {
2158
- fs$1.accessSync(resolved, fs$1.constants.W_OK);
2159
- } catch {
2160
- throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
2161
- }
2162
- return;
2163
- }
2164
- // Path does not exist yet — try to create it so bazel can populate it.
2165
- try {
2166
- fs$1.mkdirSync(resolved, {
2167
- recursive: true
2168
- });
2169
- } catch (e) {
2170
- throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
2171
- }
2172
- }
2173
-
2174
- // Stable shim dir name — same process will get the same dir; concurrent
2175
- // socket-cli invocations on the same machine share it. The symlink target
2176
- // is whatever python3 resolves to NOW; if PATH changes between invocations
2177
- // we replace the symlink.
2178
- const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
2179
-
2180
- // Cache the result for the lifetime of this process.
2181
- let cached = null;
2182
-
2183
- // Safe wrapper around whichBin that returns null instead of throwing when
2184
- // nothrow semantics are broken in older registry versions (realpath 'null' bug).
2185
- async function safeWhichBin(name) {
2186
- try {
2187
- return (await bin.whichBin(name, {
2188
- nothrow: true
2189
- })) ?? null;
2190
- } catch {
2191
- return null;
2192
- }
2193
- }
2194
- async function provisionPythonShim() {
2195
- if (cached) {
2196
- return cached;
2197
- }
2198
- const pythonOnPath = await safeWhichBin('python');
2199
- if (pythonOnPath) {
2200
- cached = {
2201
- augmentedEnv: undefined,
2202
- shimDir: undefined
2203
- };
2204
- return cached;
2205
- }
2206
- const python3OnPath = await safeWhichBin('python3');
2207
- if (!python3OnPath) {
2208
- throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
2209
- }
2210
- const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
2211
- fs$1.mkdirSync(shimDir, {
2212
- recursive: true
2213
- });
2214
- const linkPath = path.join(shimDir, 'python');
2215
- // Replace the symlink defensively in case python3's resolved path moved.
2216
- if (fs$1.existsSync(linkPath)) {
2217
- try {
2218
- fs$1.unlinkSync(linkPath);
2219
- } catch {
2220
- // Tolerate races; the next symlinkSync may still succeed.
2221
- }
2222
- }
2223
- // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
2224
- // so a concurrent socket-cli invocation may re-create the link between our
2225
- // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
2226
- // other process won the race and left a usable shim in place.
2227
- try {
2228
- fs$1.symlinkSync(python3OnPath, linkPath);
2229
- } catch (e) {
2230
- if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
2231
- throw e;
2232
- }
2233
- }
2234
- const augmentedEnv = {
2235
- ...process.env,
2236
- PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
2237
- };
2238
- cached = {
2239
- augmentedEnv,
2240
- shimDir
2241
- };
2242
- return cached;
2243
- }
2244
-
2245
1944
  // Default per-invocation timeout for bazel queries. Bazel cold-cache starts
2246
1945
  // can take several minutes; 10 minutes is generous while still bounding CI hangs.
2247
1946
  const BAZEL_QUERY_TIMEOUT_MS = 600_000;
@@ -2258,42 +1957,58 @@ function splitBazelFlags(flags) {
2258
1957
  }
2259
1958
  return flags.split(/\s+/).filter(Boolean);
2260
1959
  }
2261
- function buildBazelModShowVisibleReposArgv(opts) {
1960
+
1961
+ // Build the shared startup-flag prefix for any bazel invocation. Centralised
1962
+ // so `--output_user_root` propagates to every spawn — principle 7 of the
1963
+ // Maven design requires per-invocation server isolation across query,
1964
+ // cquery, and `bazel mod` commands alike.
1965
+ function buildStartupFlags(opts) {
2262
1966
  const startup = [];
2263
1967
  if (opts.bazelRc) {
2264
1968
  startup.push(`--bazelrc=${opts.bazelRc}`);
2265
1969
  }
1970
+ if (opts.outputUserRoot) {
1971
+ startup.push(`--output_user_root=${opts.outputUserRoot}`);
1972
+ }
2266
1973
  if (opts.bazelOutputBase) {
2267
1974
  startup.push(`--output_base=${opts.bazelOutputBase}`);
2268
1975
  }
1976
+ return startup;
1977
+ }
1978
+ function buildBazelModShowVisibleReposArgv(opts) {
1979
+ const userFlags = splitBazelFlags(opts.bazelFlags);
1980
+ return [...buildStartupFlags(opts), 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
1981
+ }
1982
+ function buildBazelModShowMavenExtensionArgv(opts) {
2269
1983
  const userFlags = splitBazelFlags(opts.bazelFlags);
2270
- return [...startup, 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
1984
+ return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven',
1985
+ // Belt-and-suspenders output reducer mirroring the PyPI path: bias the
1986
+ // report toward the root module's usages. The authoritative pruning is
1987
+ // the importers-filter applied to the parsed output, so this is not
1988
+ // relied on for correctness.
1989
+ '--extension_usages=<root>', ...userFlags];
2271
1990
  }
2272
1991
  function buildBazelModShowPipExtensionArgv(opts) {
2273
- const startup = [];
2274
- if (opts.bazelRc) {
2275
- startup.push(`--bazelrc=${opts.bazelRc}`);
2276
- }
2277
- if (opts.bazelOutputBase) {
2278
- startup.push(`--output_base=${opts.bazelOutputBase}`);
2279
- }
2280
1992
  const userFlags = splitBazelFlags(opts.bazelFlags);
2281
- return [...startup, 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
1993
+ return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
2282
1994
  }
2283
1995
  function buildBazelArgv(queryStr, opts, output = 'build') {
2284
1996
  // Startup flags MUST precede the `query` subcommand.
2285
1997
  // Bazel argv shape: <startup> query <queryFlags> <invocationFlags> <queryStr> --output=<output> <userFlags>
2286
- const startup = [];
2287
- if (opts.bazelRc) {
2288
- startup.push(`--bazelrc=${opts.bazelRc}`);
2289
- }
2290
- if (opts.bazelOutputBase) {
2291
- startup.push(`--output_base=${opts.bazelOutputBase}`);
2292
- }
2293
1998
  // Keep query output stable and avoid updating Bazel lockfiles while extracting.
2294
1999
  const queryFlags = ['--lockfile_mode=off', '--noshow_progress'];
2295
2000
  const userFlags = splitBazelFlags(opts.bazelFlags);
2296
- return [...startup, 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
2001
+ return [...buildStartupFlags(opts), 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
2002
+ }
2003
+
2004
+ // Lightweight presence-check cquery used by the tri-state probe classifier.
2005
+ // `--keep_going --output=label` keeps it fast even on partial-analysis
2006
+ // repos and avoids paying for `--output=jsonproto` plus
2007
+ // `--proto:output_rule_attrs` (which the heavier metadata extraction in
2008
+ // `bazel-cquery.mts` needs but the probe does not).
2009
+ function buildBazelProbeCqueryArgv(repoName, opts) {
2010
+ const userFlags = splitBazelFlags(opts.bazelFlags);
2011
+ return [...buildStartupFlags(opts), 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, `@${repoName}//...`, '--output=label', '--keep_going', ...userFlags];
2297
2012
  }
2298
2013
  function stringField(value) {
2299
2014
  return typeof value === 'string' ? value : '';
@@ -2407,14 +2122,7 @@ async function runBazelQuery(queryStr, opts, output) {
2407
2122
  }
2408
2123
  }
2409
2124
  }
2410
-
2411
- /**
2412
- * Bzlmod-native visible repository enumeration. This is only a candidate
2413
- * source; callers must still validate each returned apparent repo name with a
2414
- * semantic query for generated ecosystem rules.
2415
- */
2416
- async function runBazelModShowVisibleRepos(opts) {
2417
- const argv = buildBazelModShowVisibleReposArgv(opts);
2125
+ async function runBazelOneShot(argv, opts, step) {
2418
2126
  if (opts.verbose) {
2419
2127
  logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
2420
2128
  }
@@ -2446,1115 +2154,1735 @@ async function runBazelModShowVisibleRepos(opts) {
2446
2154
  durationMs: Date.now() - startedAt,
2447
2155
  opts,
2448
2156
  result,
2449
- step: 'bazel mod dump_repo_mapping'
2157
+ step
2450
2158
  });
2451
2159
  return result;
2452
2160
  }
2453
2161
 
2454
2162
  /**
2455
- * Bzlmod-native rules_python pip extension usage inspection. This is the
2456
- * authoritative source for root-module pip.parse metadata when Bazel supports
2457
- * the command; callers keep bounded static parsing as fallback.
2163
+ * Bzlmod-native visible repository enumeration. NOTE: only consumed by the
2164
+ * legacy PyPI path; the Maven path uses `runBazelModShowMavenExtension`
2165
+ * instead because `dump_repo_mapping` over-enumerates apparent names that
2166
+ * are not Maven hubs.
2458
2167
  */
2459
- async function runBazelModShowPipExtension(opts) {
2460
- const argv = buildBazelModShowPipExtensionArgv(opts);
2461
- if (opts.verbose) {
2462
- logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
2463
- }
2464
- const startedAt = Date.now();
2465
- let result;
2466
- try {
2467
- const output = await spawn.spawn(opts.bin, argv, {
2468
- cwd: opts.cwd,
2469
- timeout: BAZEL_QUERY_TIMEOUT_MS,
2470
- ...(opts.env ? {
2471
- env: opts.env
2472
- } : {})
2473
- });
2474
- const {
2475
- code,
2476
- stderr,
2477
- stdout
2478
- } = output;
2479
- result = {
2480
- code,
2481
- stdout,
2482
- stderr
2483
- };
2484
- } catch (e) {
2485
- result = normalizeSpawnError(e);
2486
- }
2487
- logBazelTrace({
2488
- argv,
2489
- durationMs: Date.now() - startedAt,
2490
- opts,
2491
- result,
2492
- step: 'bazel mod show_extension rules_python pip'
2493
- });
2494
- return result;
2168
+ async function runBazelModShowVisibleRepos(opts) {
2169
+ return await runBazelOneShot(buildBazelModShowVisibleReposArgv(opts), opts, 'bazel mod dump_repo_mapping');
2495
2170
  }
2496
2171
 
2497
2172
  /**
2498
- * Build a `RepoProbe` (compatible with bazel-repo-discovery) bound to opts.
2499
- * Used by `discoverMavenRepos` to validate candidate Maven repo
2500
- * names against the running workspace.
2173
+ * Bzlmod-native Maven hub enumeration via the rules_jvm_external maven
2174
+ * extension. The text-format report lists every repo the extension
2175
+ * generated; `parseShowExtensionOutput` (bazel-repo-discovery.mts)
2176
+ * extracts the hubs from the `Fetched repositories:` section.
2501
2177
  */
2502
- function buildProbeFor(opts) {
2503
- return async repoName => {
2504
- const queryStr = `kind("jvm_import rule|aar_import rule", @${repoName}//:*)`;
2505
- const result = await runBazelQuery(queryStr, opts);
2506
- return {
2507
- stdout: result.stdout,
2508
- code: result.code
2509
- };
2510
- };
2178
+ async function runBazelModShowMavenExtension(opts) {
2179
+ return await runBazelOneShot(buildBazelModShowMavenExtensionArgv(opts), opts, 'bazel mod show_extension rules_jvm_external maven');
2511
2180
  }
2512
2181
 
2513
2182
  /**
2514
- * Build a `RepoProbe` for validating pip hub candidates.
2515
- * Queries the hub for package targets (e.g. `@<hub>//...`) and returns
2516
- * stdout so the caller can check for `:pkg` labels or alias rules.
2517
- * Does NOT require `pypi_name=` tags in the hub output, because those
2518
- * tags live on spoke repos, not the hub alias layer.
2183
+ * Bzlmod-native rules_python pip extension usage inspection. Used by the
2184
+ * PyPI path; kept here since the argv shape is identical to the maven
2185
+ * variant modulo the extension target.
2186
+ */
2187
+ async function runBazelModShowPipExtension(opts) {
2188
+ return await runBazelOneShot(buildBazelModShowPipExtensionArgv(opts), opts, 'bazel mod show_extension rules_python pip');
2189
+ }
2190
+
2191
+ /**
2192
+ * Build a `RepoProbe` (compatible with bazel-repo-discovery's tri-state
2193
+ * classifier) bound to opts. Runs the lightweight presence-check cquery
2194
+ * `@<name>//... --output=label --keep_going` — cheap enough to attempt
2195
+ * every conventional Maven hub name without triggering `repository_rule`
2196
+ * fetches on undefined names (Exp 3).
2197
+ */
2198
+ function buildMavenProbeFor(opts) {
2199
+ return async repoName => {
2200
+ const argv = buildBazelProbeCqueryArgv(repoName, opts);
2201
+ const result = await runBazelOneShot(argv, opts, `bazel cquery probe @${repoName}`);
2202
+ return {
2203
+ code: result.code,
2204
+ stdout: result.stdout,
2205
+ stderr: result.stderr
2206
+ };
2207
+ };
2208
+ }
2209
+
2210
+ /**
2211
+ * Build a `RepoProbe` for validating pip hub candidates.
2212
+ * Queries the hub for package targets (e.g. `@<hub>//...`) and returns the
2213
+ * full result triple so the caller can check for `:pkg` labels or alias
2214
+ * rules. Does NOT require `pypi_name=` tags in the hub output, because
2215
+ * those tags live on spoke repos, not the hub alias layer.
2519
2216
  */
2520
2217
  function buildPypiProbeFor(opts) {
2521
2218
  return async hubName => {
2522
2219
  const queryStr = `@${hubName}//...`;
2523
2220
  const result = await runBazelQuery(queryStr, opts);
2524
2221
  return {
2222
+ code: result.code,
2525
2223
  stdout: result.stdout,
2526
- code: result.code
2224
+ stderr: result.stderr
2527
2225
  };
2528
2226
  };
2529
2227
  }
2530
2228
 
2531
- // Maximum size (bytes) we will read for any single Bazel workspace file.
2532
- // Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
2533
- const MAX_WORKSPACE_FILE_BYTES$1 = 5 * 1024 * 1024;
2534
-
2535
- // Maximum candidate count we will return (deduped) before truncating.
2536
- // Real repos have <20; this is a hard ceiling against pathological inputs.
2537
- const MAX_CANDIDATES$1 = 256;
2538
-
2539
- // Regex strategy: anchored, bounded character classes, no nested quantifiers.
2540
- // Match `use_repo(maven, "X", "Y", ...)` with a bounded arg-list window to
2541
- // avoid catastrophic backtracking on hostile input.
2542
-
2543
- // Bzlmod use_repo(maven, "name1", "name2"...).
2544
- // Bounded: matches up to ~4KB of arg list to avoid catastrophic backtracking.
2545
- const USE_REPO_RE = /use_repo\s*\(\s*maven\s*,([^)]{0,4096})\)/g;
2546
- const BAZEL_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
2547
- const BAZEL_REPO_NAME_RE = new RegExp(`^${BAZEL_REPO_NAME_PATTERN}$`);
2548
- // Quoted-name extractor inside the captured argument blob.
2549
- const QUOTED_NAME_RE = new RegExp(`"(${BAZEL_REPO_NAME_PATTERN})"`, 'g');
2550
-
2551
- // Legacy maven_install(name = "X", ...) on a single statement.
2552
- // Match the name= keyword arg specifically; bounded.
2553
- const MAVEN_INSTALL_NAME_RE = new RegExp(`maven_install\\s*\\([^)]{0,8192}?\\bname\\s*=\\s*"(${BAZEL_REPO_NAME_PATTERN})"`, 'g');
2554
- const MAVEN_COORDINATES_MARKER_RE = /\bmaven_coordinates\s*=/;
2229
+ /**
2230
+ * Per-repo metadata cquery + jsonproto parser for the Maven path.
2231
+ *
2232
+ * Pipeline:
2233
+ * 1. Build a cquery argv targeting `attr("tags", "\bmaven_coordinates=",
2234
+ * @<repo>//...)` plus a union variant for the direct `maven_coordinates`
2235
+ * attribute. `--output=jsonproto` +
2236
+ * `--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps`
2237
+ * keeps the payload small while still surfacing the resolved Maven graph.
2238
+ * 2. Spawn under a caller-supplied `outputUserRoot` so the orchestrator can
2239
+ * reap the server cleanly (`bazel --output_user_root=<this> shutdown`
2240
+ * followed by `rm -rf`). The runner itself never deletes anything —
2241
+ * server lifecycle is the orchestrator's concern.
2242
+ * 3. Parse the jsonproto stream defensively: dispatch on `attribute[].type`
2243
+ * and accept both camelCase (`stringValue`, `stringListValue`) and
2244
+ * snake_case (`string_value`, `string_list_value`) payload keys.
2245
+ * 4. Extract the maven coordinate from the direct `maven_coordinates` attr
2246
+ * when present, else scan `tags` for `maven_coordinates=<G:A:V>`.
2247
+ * 5. Resolve each rule's `deps`/`exports`/`runtime_deps` label edges into
2248
+ * versionless Maven coordinates against this repo's own targets, while
2249
+ * `repoName` is still in scope. Edges that point at a hub-prefixed target
2250
+ * we cannot resolve are reported as `unresolvedLabels` so the caller can
2251
+ * flip the hub partial rather than silently dropping graph edges.
2252
+ * 6. Tag every artifact with `workspace:<rel-path>` + `repo:<name>`
2253
+ * provenance via `sourceRepo`.
2254
+ */
2555
2255
 
2556
- // Reads file contents, refusing files that exceed MAX_WORKSPACE_FILE_BYTES.
2557
- // Returns null when the file is missing, oversized, or unreadable.
2558
- function safeReadFile$1(file) {
2559
- if (!fs$1.existsSync(file)) {
2560
- return null;
2256
+ // One Maven artifact recovered from the cquery stream. `ruleKind` is whatever
2257
+ // `ruleClass` jsonproto reports (`jvm_import`, `aar_import`, `java_library`,
2258
+ // `kt_jvm_import`, any future rules_jvm_external rule), so the type is open.
2259
+ // `deps` holds resolved versionless Maven coordinates (the parser resolves the
2260
+ // rule's label edges against this repo's own targets), not raw Bazel labels.
2261
+
2262
+ // Result of parsing one repo's cquery stream: the recovered artifacts (with
2263
+ // resolved coordinate edges in `deps`) plus any hub-prefixed dep labels that
2264
+ // could not be resolved.
2265
+
2266
+ // Maven coordinate token: `g:a:v` (3 parts) or `g:a:v:classifier` /
2267
+ // `g:a:packaging:v` (4-part rules_jvm_external shapes). Tolerant of dots,
2268
+ // dashes, plus, underscores in any part.
2269
+ const MAVEN_COORD_TAG_RE = /^maven_coordinates=(.+)$/;
2270
+
2271
+ // The dep/export/runtime_deps attributes whose label edges encode the
2272
+ // resolved Maven graph. rules_jvm_external writes `jvm_import.deps` (e.g.
2273
+ // `junit` -> `@maven//:org_hamcrest_hamcrest_core`); compile/runtime scopes
2274
+ // surface via `exports`/`runtime_deps`. We union all three.
2275
+ const EDGE_ATTR_NAMES = new Set(['deps', 'exports', 'runtime_deps']);
2276
+
2277
+ // Build the metadata cquery target expression for one repo. The union of
2278
+ // two predicates picks up artifacts that:
2279
+ // - encode the coordinate in the conventional `tags = ["maven_coordinates=..."]`
2280
+ // list (rules_jvm_external's emission for `jvm_import` and friends), or
2281
+ // - declare the coordinate as a direct `maven_coordinates` attribute
2282
+ // (Bazel-native java_library / kt_jvm_import shape).
2283
+ // Note: a `maven_url`-only predicate was intentionally dropped — those rules
2284
+ // carry no coordinate, so selecting them only to discard them downstream is
2285
+ // wasted analysis. If POM-only artifacts ever matter, synthesize
2286
+ // a coordinate from `maven_url` instead of re-adding the selector.
2287
+ function buildMetadataCqueryExpr(repoName) {
2288
+ const r = `@${repoName}//...`;
2289
+ // The `\b` boundary in the tags predicate prevents matches on tag values
2290
+ // like `pre_maven_coordinates=fake`; see todo 2 acceptance test (10).
2291
+ return [`attr("tags", "\\bmaven_coordinates=", ${r})`, `attr("maven_coordinates", ".+", ${r})`].join(' union ');
2292
+ }
2293
+
2294
+ // Build the full cquery argv for a per-repo metadata cquery. Exposed for
2295
+ // argv-shape unit tests without touching `spawn`.
2296
+ function buildMetadataCqueryArgv(repoName, opts) {
2297
+ const startup = [];
2298
+ if (opts.bazelRc) {
2299
+ startup.push(`--bazelrc=${opts.bazelRc}`);
2561
2300
  }
2562
- try {
2563
- const stat = fs$1.statSync(file);
2564
- if (stat.size > MAX_WORKSPACE_FILE_BYTES$1) {
2565
- return null;
2566
- }
2567
- return fs$1.readFileSync(file, 'utf8');
2568
- } catch {
2569
- return null;
2301
+ if (opts.outputUserRoot) {
2302
+ startup.push(`--output_user_root=${opts.outputUserRoot}`);
2303
+ }
2304
+ if (opts.bazelOutputBase) {
2305
+ startup.push(`--output_base=${opts.bazelOutputBase}`);
2570
2306
  }
2307
+ const userFlags = splitBazelFlags(opts.bazelFlags);
2308
+ return [...startup, 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, buildMetadataCqueryExpr(repoName), '--output=jsonproto', '--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps', '--keep_going', ...userFlags];
2571
2309
  }
2572
-
2573
- // Walks workspace root for legacy Starlark sources we can scan: WORKSPACE
2574
- // (and WORKSPACE.bazel) plus top-level .bzl files. Non-recursive by design;
2575
- // Phase 1 explicitly avoids static Starlark parsing at depth.
2576
- function listLegacyStarlarkFiles$1(cwd) {
2577
- const files = [];
2578
- const candidates = ['WORKSPACE', 'WORKSPACE.bazel'];
2579
- for (const c of candidates) {
2580
- const p = path.join(cwd, c);
2581
- if (fs$1.existsSync(p)) {
2582
- files.push(p);
2583
- }
2310
+ function readStringAttr(attr) {
2311
+ if (attr.type !== 'STRING') {
2312
+ return undefined;
2584
2313
  }
2585
- // Top-level .bzl files only.
2586
- try {
2587
- for (const entry of fs$1.readdirSync(cwd)) {
2588
- if (entry.endsWith('.bzl')) {
2589
- files.push(path.join(cwd, entry));
2590
- }
2591
- }
2592
- } catch {
2593
- // Ignore unreadable cwd.
2314
+ if (typeof attr.stringValue === 'string') {
2315
+ return attr.stringValue;
2594
2316
  }
2595
- return files;
2596
- }
2597
-
2598
- // Returns deduplicated, sorted list of items, capped at MAX_CANDIDATES.
2599
- function uniqueSorted(items) {
2600
- const seen = new Set();
2601
- const out = [];
2602
- for (const item of items) {
2603
- if (!seen.has(item)) {
2604
- seen.add(item);
2605
- out.push(item);
2606
- if (out.length >= MAX_CANDIDATES$1) {
2607
- break;
2608
- }
2609
- }
2317
+ if (typeof attr.string_value === 'string') {
2318
+ return attr.string_value;
2610
2319
  }
2611
- return out.sort();
2320
+ return undefined;
2612
2321
  }
2613
- function apparentNameFromJsonValue(value) {
2614
- if (!value || typeof value !== 'object') {
2322
+ function readStringListAttr(attr) {
2323
+ if (attr.type !== 'STRING_LIST') {
2615
2324
  return undefined;
2616
2325
  }
2617
- const obj = value;
2618
- const direct = obj['apparentName'] ?? obj['apparent_name'];
2619
- if (typeof direct === 'string') {
2620
- return direct;
2326
+ if (Array.isArray(attr.stringListValue)) {
2327
+ return attr.stringListValue;
2621
2328
  }
2622
- for (const nested of Object.values(obj)) {
2623
- const found = apparentNameFromJsonValue(nested);
2624
- if (found) {
2625
- return found;
2626
- }
2329
+ if (Array.isArray(attr.string_list_value)) {
2330
+ return attr.string_list_value;
2627
2331
  }
2628
2332
  return undefined;
2629
2333
  }
2630
- function apparentNamesFromRepoMapping(value) {
2631
- if (!value || typeof value !== 'object' || Array.isArray(value)) {
2632
- return [];
2334
+
2335
+ // Reads a `LABEL_LIST` jsonproto attribute. Bazel serializes label lists into
2336
+ // the same string-list payload (`stringListValue` / `string_list_value`) it
2337
+ // uses for `STRING_LIST`, but tags the attribute `type: "LABEL_LIST"`. The
2338
+ // `deps`/`exports`/`runtime_deps` edge attrs are LABEL_LIST, so a STRING_LIST
2339
+ // reader would silently return nothing and leave the graph empty.
2340
+ function readLabelListAttr(attr) {
2341
+ if (attr.type !== 'LABEL_LIST') {
2342
+ return undefined;
2633
2343
  }
2634
- const candidates = [];
2635
- for (const [name, canonicalName] of Object.entries(value)) {
2636
- if (name.startsWith('@') || typeof canonicalName !== 'string') {
2637
- continue;
2638
- }
2639
- if (BAZEL_REPO_NAME_RE.test(name)) {
2640
- candidates.push(name);
2641
- }
2344
+ if (Array.isArray(attr.stringListValue)) {
2345
+ return attr.stringListValue;
2642
2346
  }
2643
- return candidates;
2347
+ if (Array.isArray(attr.string_list_value)) {
2348
+ return attr.string_list_value;
2349
+ }
2350
+ return undefined;
2644
2351
  }
2645
- function normalizeRepoName(name) {
2646
- const repo = name.startsWith('@') ? name.slice(1) : name;
2647
- return BAZEL_REPO_NAME_RE.test(repo) ? repo : undefined;
2352
+
2353
+ // Strip the trailing version segment from a Maven coordinate, preserving any
2354
+ // packaging/classifier segments. `g:a:v` -> `g:a`,
2355
+ // `g:a:packaging:v` -> `g:a:packaging`,
2356
+ // `g:a:packaging:classifier:v` -> `g:a:packaging:classifier`. Coordinates with
2357
+ // fewer than 3 segments have no version to strip and are returned unchanged.
2358
+ // This matches depscan's `coordinateToParts` keying (position 3 = extension,
2359
+ // position 4 = classifier on the versionless key), so AAR/classifier artifacts
2360
+ // key correctly instead of being mis-keyed as bare `group:artifact` jars.
2361
+ function versionlessCoordinate(coord) {
2362
+ const parts = coord.split(':');
2363
+ if (parts.length < 3) {
2364
+ return coord;
2365
+ }
2366
+ return parts.slice(0, -1).join(':');
2367
+ }
2368
+
2369
+ // Recover the `@<repo>//` prefix from a fully-qualified target label, covering
2370
+ // both apparent (`@maven//:foo`) and bzlmod-canonical
2371
+ // (`@@rules_jvm_external++maven+maven//pkg:foo`) forms. Returns undefined for
2372
+ // labels that aren't repo-qualified (e.g. `:src`).
2373
+ function repoPrefixOfLabel(label) {
2374
+ if (!label.startsWith('@')) {
2375
+ return undefined;
2376
+ }
2377
+ const sep = label.indexOf('//');
2378
+ if (sep < 0) {
2379
+ return undefined;
2380
+ }
2381
+ return label.slice(0, sep + 2);
2648
2382
  }
2649
2383
 
2650
- // Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accept the
2651
- // older streamed jsonproto shape in case older Bazel versions or fixtures still
2652
- // return repository records with apparentName fields.
2653
- function parseVisibleRepoCandidates(output) {
2654
- const candidates = [];
2655
- for (const line of output.split(/\r?\n/)) {
2656
- const trimmed = line.trim();
2657
- if (!trimmed) {
2658
- continue;
2659
- }
2660
- try {
2661
- const parsed = JSON.parse(trimmed);
2662
- candidates.push(...apparentNamesFromRepoMapping(parsed));
2663
- const apparentName = apparentNameFromJsonValue(parsed);
2664
- if (apparentName) {
2665
- const repo = normalizeRepoName(apparentName);
2666
- if (repo) {
2667
- candidates.push(repo);
2384
+ // Strip the leading `@<repo>//:` prefix from a fully-qualified target label
2385
+ // to recover the bare rule name (e.g. `com_google_guava_guava`).
2386
+ function ruleNameFromLabel(label) {
2387
+ const colon = label.lastIndexOf(':');
2388
+ return colon >= 0 ? label.slice(colon + 1) : label;
2389
+ }
2390
+
2391
+ // Extract the maven coordinate from a rule's attributes. Prefers the direct
2392
+ // `maven_coordinates` attribute (Bazel-native shape); falls back to scanning
2393
+ // `tags` for a `maven_coordinates=<G:A:V>` entry (rules_jvm_external shape).
2394
+ // Returns undefined if neither yields a non-empty value.
2395
+ function extractMavenCoordinate(rule) {
2396
+ let coord;
2397
+ for (const attr of rule.attribute ?? []) {
2398
+ if (attr.name === 'maven_coordinates') {
2399
+ const direct = readStringAttr(attr);
2400
+ if (direct && direct.length) {
2401
+ coord = direct;
2402
+ }
2403
+ } else if (attr.name === 'tags') {
2404
+ const tags = readStringListAttr(attr);
2405
+ if (tags) {
2406
+ for (const tag of tags) {
2407
+ const m = MAVEN_COORD_TAG_RE.exec(tag);
2408
+ if (m && !coord) {
2409
+ coord = m[1];
2410
+ }
2668
2411
  }
2669
2412
  }
2670
- } catch {
2671
- // Ignore malformed lines; caller will fall back to static discovery when
2672
- // no usable visible repo names are found.
2673
2413
  }
2674
2414
  }
2675
- return uniqueSorted(candidates);
2415
+ return coord;
2676
2416
  }
2677
2417
 
2678
- // Step 1: parse candidate Maven repo names from Bzlmod and legacy entry points.
2679
- function parseMavenRepoCandidates(cwd, verbose) {
2680
- const candidates = [];
2681
-
2682
- // Bzlmod path: parse MODULE.bazel for use_repo(maven, ...).
2683
- const moduleBazel = path.join(cwd, 'MODULE.bazel');
2684
- const moduleContent = safeReadFile$1(moduleBazel);
2685
- if (moduleContent) {
2686
- const bzlmodHits = [];
2687
- for (const m of moduleContent.matchAll(USE_REPO_RE)) {
2688
- const argBlob = m[1] ?? '';
2689
- for (const n of argBlob.matchAll(QUOTED_NAME_RE)) {
2690
- bzlmodHits.push(n[1]);
2418
+ // Collect the union of `deps`/`exports`/`runtime_deps` label edges off a rule.
2419
+ function extractEdgeLabels(rule) {
2420
+ const labels = [];
2421
+ for (const attr of rule.attribute ?? []) {
2422
+ if (attr.name && EDGE_ATTR_NAMES.has(attr.name)) {
2423
+ const list = readLabelListAttr(attr);
2424
+ if (list) {
2425
+ labels.push(...list);
2691
2426
  }
2692
2427
  }
2693
- candidates.push(...bzlmodHits);
2694
- if (verbose) {
2695
- logger.logger.log('[VERBOSE] discovery: scanned', moduleBazel, `(${bzlmodHits.length} use_repo match(es))`);
2696
- }
2697
- } else if (verbose) {
2698
- logger.logger.log('[VERBOSE] discovery:', moduleBazel, 'not present (skipping bzlmod scan)');
2699
2428
  }
2429
+ return labels;
2430
+ }
2700
2431
 
2701
- // Legacy path: scan WORKSPACE + top-level .bzl files for maven_install(name=...).
2702
- const legacyFiles = listLegacyStarlarkFiles$1(cwd);
2703
- if (verbose) {
2704
- logger.logger.log('[VERBOSE] discovery: legacy files considered:', legacyFiles.length ? legacyFiles : '(none)');
2705
- }
2706
- for (const file of legacyFiles) {
2707
- const content = safeReadFile$1(file);
2708
- if (!content) {
2709
- continue;
2710
- }
2711
- const fileHits = [];
2712
- for (const m of content.matchAll(MAVEN_INSTALL_NAME_RE)) {
2713
- fileHits.push(m[1]);
2714
- }
2715
- candidates.push(...fileHits);
2716
- if (verbose) {
2717
- logger.logger.log('[VERBOSE] discovery: scanned', file, `(${fileHits.length} maven_install name match(es))`);
2432
+ // A coordinate-bearing rule recovered from the cquery stream, before its edge
2433
+ // labels are resolved to coordinates.
2434
+
2435
+ // Build the label -> coordinate index from this repo's own coordinate-bearing
2436
+ // targets, keyed by the full emitted rule label (the form dep labels also use,
2437
+ // since both come from the same cquery output). The `:<ruleName>` suffix map
2438
+ // is a fallback for labels that don't full-match.
2439
+ function buildLabelCoordIndex(records) {
2440
+ const fullLabels = new Map();
2441
+ const suffixToCoords = new Map();
2442
+ const hubPrefixes = new Set();
2443
+ for (const rec of records) {
2444
+ const coord = versionlessCoordinate(rec.coord);
2445
+ fullLabels.set(rec.fullLabel, coord);
2446
+ const suffix = `:${rec.ruleName}`;
2447
+ const set = suffixToCoords.get(suffix) ?? new Set();
2448
+ set.add(coord);
2449
+ suffixToCoords.set(suffix, set);
2450
+ const prefix = repoPrefixOfLabel(rec.fullLabel);
2451
+ if (prefix) {
2452
+ hubPrefixes.add(prefix);
2718
2453
  }
2719
2454
  }
2720
- const deduped = uniqueSorted(candidates);
2721
- if (verbose) {
2722
- logger.logger.log('[VERBOSE] discovery: candidate set (pre-seed):', deduped);
2455
+ return {
2456
+ fullLabels,
2457
+ hubPrefixes,
2458
+ suffixToCoords
2459
+ };
2460
+ }
2461
+ function isHubPrefixed(label, hubPrefixes) {
2462
+ for (const prefix of hubPrefixes) {
2463
+ if (label.startsWith(prefix)) {
2464
+ return true;
2465
+ }
2723
2466
  }
2724
- return deduped;
2467
+ return false;
2725
2468
  }
2726
- // Step 2: validate a candidate by running the probe and confirming
2727
- // `maven_coordinates=` appears in stdout (the marker emitted by jvm_import /
2728
- // aar_import rules generated by rules_jvm_external). Returns the probe
2729
- // stdout alongside the verdict so the caller can cache it and reuse it
2730
- // instead of running an identical extraction query.
2731
- async function validateMavenRepo(repoName, probe, verbose) {
2732
- try {
2733
- const result = await probe(repoName);
2734
- if (result.code !== 0) {
2735
- if (verbose) {
2736
- logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (code=${result.code})`);
2737
- }
2469
+ // Resolve one dep label into a versionless coordinate. Classifies into three
2470
+ // buckets (there is deliberately no "seen but coordinate-less" bucket — the
2471
+ // cquery only selects coordinate-bearing targets):
2472
+ // - `coord` — full-label match, unique-suffix fallback, or an already-a-
2473
+ // coordinate `g:a:v` string label.
2474
+ // - `unresolved`— hub-prefixed but resolves to nothing in the selected set
2475
+ // (missing target or ambiguous suffix): a known-dropped edge.
2476
+ // - `drop` — a non-maven target (`@platforms//…`, `:src`): intentional.
2477
+ function resolveDepLabel(label, index) {
2478
+ const full = index.fullLabels.get(label);
2479
+ if (full) {
2480
+ return {
2481
+ coord: full,
2482
+ kind: 'coord'
2483
+ };
2484
+ }
2485
+ if (isHubPrefixed(label, index.hubPrefixes)) {
2486
+ // Suffix fallback, but only when the match is unique.
2487
+ const suffix = `:${ruleNameFromLabel(label)}`;
2488
+ const set = index.suffixToCoords.get(suffix);
2489
+ if (set && set.size === 1) {
2738
2490
  return {
2739
- valid: false,
2740
- stdout: result.stdout
2491
+ coord: [...set][0],
2492
+ kind: 'coord'
2741
2493
  };
2742
2494
  }
2743
- const valid = MAVEN_COORDINATES_MARKER_RE.test(result.stdout);
2744
- if (verbose) {
2745
- logger.logger.log(`[VERBOSE] discovery: probe @${repoName}:`, valid ? 'ACCEPT (maven_coordinates marker found)' : 'REJECT (no maven_coordinates marker in probe stdout)');
2746
- }
2495
+ // Hub-prefixed but missing or ambiguous — a genuinely dropped edge.
2747
2496
  return {
2748
- valid,
2749
- stdout: result.stdout
2497
+ kind: 'unresolved'
2750
2498
  };
2751
- } catch (e) {
2752
- if (verbose) {
2753
- logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (probe threw):`, utils.getErrorCause(e));
2754
- }
2499
+ }
2500
+ // Already-a-coordinate fallback: a bare `g:a:v` string label (not a Bazel
2501
+ // label). Versionless-normalize it. Exclude `//`-prefixed package-relative
2502
+ // labels (`//pkg:thing`) — those are Bazel targets, not coordinates.
2503
+ if (label.includes(':') && !label.startsWith('@') && !label.startsWith(':') && !label.startsWith('//')) {
2755
2504
  return {
2756
- valid: false,
2757
- stdout: ''
2505
+ coord: versionlessCoordinate(label),
2506
+ kind: 'coord'
2758
2507
  };
2759
2508
  }
2509
+ // Non-maven target — intentional drop, not counted.
2510
+ return {
2511
+ kind: 'drop'
2512
+ };
2760
2513
  }
2761
2514
 
2762
- // The default maven_install repo name when no explicit `name=` is given.
2763
- // Included as a seed so repos that define maven_install in a subdirectory
2764
- // .bzl file (not scanned by parseMavenRepoCandidates) are still discovered.
2765
- const DEFAULT_MAVEN_REPO_SEED = 'maven';
2766
-
2767
- // Composition: parse, then validate each candidate; return validated subset
2768
- // as a Map keyed by repo name with the validated probe stdout as value.
2769
- // Map iteration order matches insertion order, so callers that just want
2770
- // the list of repo names can call `Array.from(repos.keys())`. Callers that
2771
- // want to skip re-running the same `bazel query` during extraction can read
2772
- // the cached stdout off the Map and parse it directly.
2773
- //
2774
- // Always seeds with the default `@maven` repo name so repos whose
2775
- // maven_install is defined in a sub-directory .bzl file (not reachable by
2776
- // the top-level static scan) can still be discovered via probe validation.
2777
- async function discoverMavenRepos(cwd, probe, nativeCandidates, verbose) {
2778
- const parsed = nativeCandidates && nativeCandidates.length ? nativeCandidates : parseMavenRepoCandidates(cwd, verbose);
2779
- if (verbose) {
2780
- logger.logger.log('[VERBOSE] discovery: candidate source:', nativeCandidates && nativeCandidates.length ? `bzlmod visible-repos (${nativeCandidates.length})` : `static parse (${parsed.length})`);
2515
+ // Pure parser for the jsonproto cquery stream. Returns one
2516
+ // `ExtractedArtifact` per rule with a recoverable maven coordinate (its `deps`
2517
+ // holding resolved versionless coordinates) plus the set of hub-prefixed dep
2518
+ // labels that could not be resolved. The `sourceRepo` field carries
2519
+ // `<workspaceRelPath>:<repoName>` provenance when a workspace path was
2520
+ // provided; otherwise just the repo name.
2521
+ function parseCqueryJsonproto(stdout, repoName, workspaceRelPath) {
2522
+ if (!stdout.trim()) {
2523
+ return {
2524
+ artifacts: [],
2525
+ unresolvedLabels: []
2526
+ };
2781
2527
  }
2782
- // Seed with the default repo name first (so it appears first in output if
2783
- // validated). Dedup via Set before validation.
2784
- const seen = new Set([DEFAULT_MAVEN_REPO_SEED]);
2785
- const candidates = [DEFAULT_MAVEN_REPO_SEED];
2786
- for (const c of parsed) {
2787
- if (!seen.has(c)) {
2788
- seen.add(c);
2789
- candidates.push(c);
2528
+ // Bazel 5+ emits a single JSON envelope; older versions stream one target
2529
+ // per line. Try envelope-first, then fall back to per-line.
2530
+ const targets = [];
2531
+ try {
2532
+ const parsed = JSON.parse(stdout);
2533
+ if (parsed.results) {
2534
+ for (const r of parsed.results) {
2535
+ if (r.target) {
2536
+ targets.push(r.target);
2537
+ }
2538
+ }
2790
2539
  }
2540
+ } catch {
2541
+ // Fall through to per-line scanning.
2791
2542
  }
2792
- if (verbose) {
2793
- logger.logger.log('[VERBOSE] discovery: candidate set to probe (seed-first, deduped):', candidates);
2794
- }
2795
- const validated = new Map();
2796
- for (const c of candidates) {
2797
- // eslint-disable-next-line no-await-in-loop
2798
- const result = await validateMavenRepo(c, probe, verbose);
2799
- if (result.valid) {
2800
- validated.set(c, result.stdout);
2543
+ if (!targets.length) {
2544
+ for (const line of stdout.split(/\r?\n/)) {
2545
+ const trimmed = line.trim();
2546
+ if (!trimmed) {
2547
+ continue;
2548
+ }
2549
+ try {
2550
+ const parsed = JSON.parse(trimmed);
2551
+ if (parsed?.rule) {
2552
+ targets.push(parsed);
2553
+ }
2554
+ } catch {
2555
+ // Skip malformed lines.
2556
+ }
2801
2557
  }
2802
2558
  }
2803
- if (verbose) {
2804
- logger.logger.log('[VERBOSE] discovery: validated repos:', Array.from(validated.keys()));
2559
+ // First pass: collect coordinate-bearing rules with their raw edge labels.
2560
+ const records = [];
2561
+ for (const target of targets) {
2562
+ if (target.type && target.type !== 'RULE') {
2563
+ continue;
2564
+ }
2565
+ const rule = target.rule;
2566
+ if (!rule || !rule.name) {
2567
+ continue;
2568
+ }
2569
+ const coord = extractMavenCoordinate(rule);
2570
+ if (!coord) {
2571
+ continue;
2572
+ }
2573
+ records.push({
2574
+ coord,
2575
+ edgeLabels: extractEdgeLabels(rule),
2576
+ fullLabel: rule.name,
2577
+ ruleKind: rule.ruleClass ?? rule.rule_class ?? 'unknown',
2578
+ ruleName: ruleNameFromLabel(rule.name)
2579
+ });
2805
2580
  }
2806
- return validated;
2807
- }
2808
-
2809
- // Detects whether the given Bazel workspace uses Bzlmod (MODULE.bazel),
2810
- // legacy WORKSPACE (WORKSPACE or WORKSPACE.bazel), or both (migration).
2811
- // Throws InputError when neither marker file is present.
2812
- function detectWorkspaceMode(cwd) {
2813
- const moduleBazel = fs$1.existsSync(path.join(cwd, 'MODULE.bazel'));
2814
- const workspaceFile = fs$1.existsSync(path.join(cwd, 'WORKSPACE')) || fs$1.existsSync(path.join(cwd, 'WORKSPACE.bazel'));
2815
- if (!moduleBazel && !workspaceFile) {
2816
- throw new utils.InputError(`No Bazel workspace found at ${cwd} (looked for MODULE.bazel, WORKSPACE, WORKSPACE.bazel).`);
2581
+ // Second pass: resolve edge labels against this repo's own targets.
2582
+ const index = buildLabelCoordIndex(records);
2583
+ const provenance = workspaceRelPath ? `${workspaceRelPath}:${repoName}` : repoName;
2584
+ const out = [];
2585
+ const unresolved = new Set();
2586
+ for (const rec of records) {
2587
+ const deps = new Set();
2588
+ for (const label of rec.edgeLabels) {
2589
+ const resolution = resolveDepLabel(label, index);
2590
+ if (resolution.kind === 'coord') {
2591
+ deps.add(resolution.coord);
2592
+ } else if (resolution.kind === 'unresolved') {
2593
+ unresolved.add(label);
2594
+ }
2595
+ }
2596
+ out.push({
2597
+ deps: [...deps],
2598
+ mavenCoordinates: rec.coord,
2599
+ ruleKind: rec.ruleKind,
2600
+ ruleName: rec.ruleName,
2601
+ sourceRepo: provenance
2602
+ });
2817
2603
  }
2818
2604
  return {
2819
- bzlmod: moduleBazel,
2820
- workspace: workspaceFile
2605
+ artifacts: out,
2606
+ unresolvedLabels: [...unresolved]
2821
2607
  };
2822
2608
  }
2823
2609
 
2824
- // Returns the bazel CLI flags needed to invoke the correct workspace mode.
2825
- // Bzlmod-only or migration-window: rely on Bazel 7+ default (Bzlmod on).
2826
- // Legacy-only: explicitly disable Bzlmod and enable WORKSPACE.
2827
- function getBazelInvocationFlags(mode) {
2828
- if (mode.bzlmod) {
2829
- // Bzlmod-only or migration: Bzlmod wins; no flags needed (Bazel 7+ default).
2830
- return [];
2831
- }
2832
- // Legacy-only: explicitly switch to WORKSPACE mode.
2833
- return ['--noenable_bzlmod', '--enable_workspace'];
2834
- }
2835
-
2836
- // Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }.
2837
- // Returns null on malformed input.
2838
- function splitCoord(c) {
2839
- const lastColon = c.lastIndexOf(':');
2840
- if (lastColon < 1) {
2841
- return null;
2842
- }
2843
- return {
2844
- groupArtifact: c.slice(0, lastColon),
2845
- version: c.slice(lastColon + 1)
2846
- };
2847
- }
2848
- // Builds a lookup from rule label suffix (e.g. ":com_google_guava_guava") to canonical coord.
2849
- function buildLabelToCoordMap(artifacts) {
2850
- const fullLabels = new Map();
2851
- const suffixToCoords = new Map();
2852
- for (const a of artifacts) {
2853
- // The rule name (e.g. "com_google_guava_guava") becomes the path under @<repo>//:<name>.
2854
- // We record by ":<name>" suffix so we can look up regardless of repo name.
2855
- const suffix = `:${a.ruleName}`;
2856
- const coords = suffixToCoords.get(suffix) ?? new Set();
2857
- coords.add(a.mavenCoordinates);
2858
- suffixToCoords.set(suffix, coords);
2859
- if (a.sourceRepo) {
2860
- fullLabels.set(`@${a.sourceRepo}//${suffix}`, a.mavenCoordinates);
2610
+ // Classify the runner's raw outcome. Non-zero exit with `--keep_going` is a
2611
+ // `partial` (some target analysis failed; the successful subset is still in
2612
+ // stdout). A clean exit with unresolved hub-prefixed edges is also `partial`
2613
+ // the graph is known-incomplete. Zero exit with no parsed artefacts is
2614
+ // `empty`. Spawn timeout is signalled separately; this helper handles the
2615
+ // post-spawn case.
2616
+ function classifyCqueryOutcome(code, artifactCount, unresolvedCount) {
2617
+ if (code === 0) {
2618
+ if (!artifactCount) {
2619
+ return 'empty';
2861
2620
  }
2621
+ return unresolvedCount > 0 ? 'partial' : 'ok';
2862
2622
  }
2863
- return {
2864
- fullLabels,
2865
- suffixToCoords
2866
- };
2623
+ // --keep_going treats partial-analysis failures with non-zero exit but
2624
+ // still yields the successful subset on stdout. Anything we parsed is
2625
+ // worth keeping.
2626
+ return artifactCount > 0 ? 'partial' : 'error';
2867
2627
  }
2868
2628
 
2869
- // Converts a Bazel dep label to a Maven coordinate, using the label-to-coord map.
2870
- // Returns null when the label is not recognised.
2871
- function depLabelToCoord(label, labelToCoord) {
2872
- // label may be "@maven//:com_google_guava_failureaccess".
2873
- const colon = label.lastIndexOf(':');
2874
- if (colon < 0) {
2875
- return null;
2876
- }
2877
- const fullMatch = labelToCoord.fullLabels.get(label);
2878
- if (fullMatch) {
2879
- return fullMatch;
2880
- }
2881
- const key = label.slice(colon);
2882
- const suffixMatches = labelToCoord.suffixToCoords.get(key);
2883
- if (!suffixMatches) {
2884
- return null;
2629
+ // Spawn the per-repo metadata cquery, parse the result, and return a
2630
+ // structured outcome. On spawn timeout, return `status: 'timeout'` so the
2631
+ // orchestrator can reap the server (`bazel --output_user_root=<dir>
2632
+ // shutdown` + `rm -rf`) before moving on.
2633
+ async function runMetadataCqueryForRepo(args) {
2634
+ const {
2635
+ opts,
2636
+ repoName,
2637
+ timeoutMs,
2638
+ workspaceRelPath,
2639
+ workspaceRoot
2640
+ } = args;
2641
+ const argv = buildMetadataCqueryArgv(repoName, opts);
2642
+ const startedAt = Date.now();
2643
+ try {
2644
+ const result = await spawn.spawn(opts.bin, argv, {
2645
+ cwd: workspaceRoot,
2646
+ timeout: timeoutMs,
2647
+ ...(opts.env ? {
2648
+ env: opts.env
2649
+ } : {})
2650
+ });
2651
+ const {
2652
+ code,
2653
+ stderr,
2654
+ stdout
2655
+ } = result;
2656
+ const {
2657
+ artifacts,
2658
+ unresolvedLabels
2659
+ } = parseCqueryJsonproto(stdout, repoName, workspaceRelPath);
2660
+ return {
2661
+ artifacts,
2662
+ durationMs: Date.now() - startedAt,
2663
+ repoName,
2664
+ status: classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
2665
+ stderr,
2666
+ unresolvedLabels,
2667
+ workspaceRelPath
2668
+ };
2669
+ } catch (e) {
2670
+ const err = e;
2671
+ const stdout = typeof err.stdout === 'string' ? err.stdout : '';
2672
+ const stderr = typeof err.stderr === 'string' ? err.stderr : '';
2673
+ const timedOut = err.timedOut === true || err.killed === true || err.signal === 'SIGTERM' || err.signal === 'SIGKILL';
2674
+ const {
2675
+ artifacts,
2676
+ unresolvedLabels
2677
+ } = stdout ? parseCqueryJsonproto(stdout, repoName, workspaceRelPath) : {
2678
+ artifacts: [],
2679
+ unresolvedLabels: []
2680
+ };
2681
+ // The registry `spawn` rejects on a non-zero exit, so a `--keep_going`
2682
+ // cquery that exits non-zero but still emitted a usable subset lands here
2683
+ // — not in the try block. Classify by what we parsed (subset present =>
2684
+ // `partial`, nothing parsed => `error`) so that partial subset is written
2685
+ // best-effort rather than discarded as a hard error. Timeout stays
2686
+ // distinct so the orchestrator can reap the wedged server.
2687
+ const code = typeof err.code === 'number' ? err.code : 1;
2688
+ return {
2689
+ artifacts,
2690
+ durationMs: Date.now() - startedAt,
2691
+ repoName,
2692
+ status: timedOut ? 'timeout' : classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
2693
+ stderr,
2694
+ unresolvedLabels,
2695
+ workspaceRelPath
2696
+ };
2885
2697
  }
2886
- if (suffixMatches.size > 1) {
2887
- throw new Error(`Ambiguous Bazel dependency label ${label} maps rule suffix ${key} to multiple Maven coordinates: ${Array.from(suffixMatches).sort().join(', ')}. The generated maven_install.json cannot resolve this dependency label losslessly.`);
2698
+ }
2699
+
2700
+ let probed = false;
2701
+
2702
+ // Verifies `java` is functional in the current execution environment. Bazel
2703
+ // JVM manifest extraction (rules_jvm_external → Coursier) requires a real
2704
+ // JDK; the CLI does not attempt to discover Homebrew installs or mutate the
2705
+ // caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
2706
+ // actionable message so the surfaced error names the prerequisite directly
2707
+ // instead of relying on Bazel's downstream diagnostic.
2708
+ function ensureJavaOnPath() {
2709
+ if (probed) {
2710
+ return;
2711
+ }
2712
+ try {
2713
+ childProcess.execSync('java -version', {
2714
+ stdio: 'ignore'
2715
+ });
2716
+ probed = true;
2717
+ } catch {
2718
+ throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
2888
2719
  }
2889
- return Array.from(suffixMatches)[0] ?? null;
2890
2720
  }
2891
- function normalizeToMavenInstallJson(artifacts) {
2892
- const labelToCoord = buildLabelToCoordMap(artifacts);
2893
- const out = {
2894
- artifacts: {},
2895
- dependencies: {}
2896
- };
2897
- const versionsByGroupArtifact = new Map();
2898
- const dependencySets = new Map();
2899
- for (const a of artifacts) {
2900
- const split = splitCoord(a.mavenCoordinates);
2901
- if (!split) {
2902
- continue;
2903
- }
2904
- const existingVersion = versionsByGroupArtifact.get(split.groupArtifact);
2905
- if (existingVersion && existingVersion !== split.version) {
2906
- throw new Error(`Conflicting versions for ${split.groupArtifact}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
2907
- }
2908
- if (!existingVersion) {
2909
- versionsByGroupArtifact.set(split.groupArtifact, split.version);
2910
- out.artifacts[split.groupArtifact] = {
2911
- shasums: a.mavenSha256 ? {
2912
- jar: a.mavenSha256
2913
- } : {},
2914
- version: split.version
2915
- };
2916
- } else if (a.mavenSha256 && !out.artifacts[split.groupArtifact]?.shasums.jar) {
2917
- out.artifacts[split.groupArtifact] = {
2918
- shasums: {
2919
- jar: a.mavenSha256
2920
- },
2921
- version: split.version
2922
- };
2923
- }
2924
- // Dependency keys in maven_install.json use "g:a" (no version),
2925
- // matching the canonical rules_jvm_external lockfile shape.
2926
- // Only emit an entry when there are actual dependencies (lockfile omits
2927
- // artifacts with an empty dep list).
2928
- const depKey = split.groupArtifact;
2929
- const depCoords = dependencySets.get(depKey) ?? new Set();
2930
- for (const depLabel of a.deps) {
2931
- // First try our rule-label lookup (the common case for --output=build text).
2932
- const c = depLabelToCoord(depLabel, labelToCoord);
2933
- if (c) {
2934
- // c is "g:a:v"; strip the version to produce "g:a" per lockfile shape.
2935
- const cs = splitCoord(c);
2936
- depCoords.add(cs ? cs.groupArtifact : c);
2937
- } else if (depLabel.includes(':') && !depLabel.startsWith('@') && !depLabel.startsWith(':')) {
2938
- // unsorted_deps.json deps may be "g:a:v" in older files or
2939
- // "g:a" in v2 lock-file-shaped maps. Strip only when a version is
2940
- // present.
2941
- const parts = depLabel.split(':');
2942
- depCoords.add(parts.length >= 3 ? parts.slice(0, -1).join(':') : depLabel);
2943
- }
2944
- }
2945
- if (depCoords.size) {
2946
- dependencySets.set(depKey, depCoords);
2721
+
2722
+ // Validates that --bazel-output-base is a path we can use as Bazel's output_base.
2723
+ // Throws InputError if:
2724
+ // - the input contains `..` segments (path traversal guard)
2725
+ // - the existing path is not writable
2726
+ // - the path cannot be created (parent not writable)
2727
+ function validateOutputBase(outputBase, cwd) {
2728
+ // Path traversal guard: reject any literal `..` segment in user input.
2729
+ // After path.resolve these are normalised away, so we check the raw input.
2730
+ // Split on both separators. On Windows `path.sep === '\\'`, so
2731
+ // input like `foo/../etc` would not contain a `..` segment under the
2732
+ // platform-specific split, bypassing the guard — yet path.resolve below
2733
+ // would still normalise the `..` and a traversal target could materialise.
2734
+ const segments = outputBase.split(/[\\/]/);
2735
+ if (segments.includes('..')) {
2736
+ throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
2737
+ }
2738
+ const resolved = path.resolve(cwd, outputBase);
2739
+ if (fs$1.existsSync(resolved)) {
2740
+ try {
2741
+ fs$1.accessSync(resolved, fs$1.constants.W_OK);
2742
+ } catch {
2743
+ throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
2947
2744
  }
2745
+ return;
2948
2746
  }
2949
- for (const [depKey, depCoords] of dependencySets) {
2950
- out.dependencies[depKey] = Array.from(depCoords);
2747
+ // Path does not exist yet — try to create it so bazel can populate it.
2748
+ try {
2749
+ fs$1.mkdirSync(resolved, {
2750
+ recursive: true
2751
+ });
2752
+ } catch (e) {
2753
+ throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
2951
2754
  }
2952
- return out;
2953
2755
  }
2954
2756
 
2955
- // Resolves the bazel `external/` dir for the given workspace.
2956
- //
2957
- // Bazel's `bazel-out/` convenience symlink points at
2958
- // `<output_base>/execroot/<workspace>/bazel-out/`; the `external/` dir we
2959
- // want is at `<output_base>/external/`. `path.join` is purely lexical and
2960
- // would collapse `bazel-out/..` to the cwd itself, which is the wrong place
2961
- // Resolve the symlink at the filesystem level and walk up to
2962
- // `<output_base>` instead.
2963
- function bazelExternalDir(cwd, outputBase) {
2964
- if (outputBase) {
2965
- return path.join(outputBase, 'external');
2966
- }
2967
- const bazelOutLink = path.join(cwd, 'bazel-out');
2968
- if (!fs$1.existsSync(bazelOutLink)) {
2969
- return null;
2970
- }
2757
+ // Stable shim dir name same process will get the same dir; concurrent
2758
+ // socket-cli invocations on the same machine share it. The symlink target
2759
+ // is whatever python3 resolves to NOW; if PATH changes between invocations
2760
+ // we replace the symlink.
2761
+ const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
2762
+
2763
+ // Cache the result for the lifetime of this process.
2764
+ let cached = null;
2765
+
2766
+ // Safe wrapper around whichBin that returns null instead of throwing when
2767
+ // nothrow semantics are broken in older registry versions (realpath 'null' bug).
2768
+ async function safeWhichBin(name) {
2971
2769
  try {
2972
- // realpath follows symlinks: .../<output_base>/execroot/<workspace>/bazel-out
2973
- const real = fs$1.realpathSync(bazelOutLink);
2974
- // Walk up bazel-out -> <workspace> -> execroot -> <output_base>, then into external/.
2975
- return path.join(real, '..', '..', '..', 'external');
2770
+ return (await bin.whichBin(name, {
2771
+ nothrow: true
2772
+ })) ?? null;
2976
2773
  } catch {
2977
2774
  return null;
2978
2775
  }
2979
2776
  }
2980
-
2981
- // Internal diagnostic: when truthy, skip the unsorted_deps.json fast path
2982
- // and force the bazel-query regex fallback. Used by bazel-bench to
2983
- // deterministically exercise parseBazelBuildOutput on every CI run. Truthy
2984
- // values are '1', 'true', 'yes' (case-insensitive); anything else (unset,
2985
- // '', '0', 'false') is treated as off. Not exposed as a user-facing CLI
2986
- // flag, so it is read here rather than added to constants.mts.
2987
- function isForceQueryFallbackEnabled() {
2988
- const raw = process.env['SOCKET_BAZEL_FORCE_QUERY_FALLBACK'];
2989
- if (!raw) {
2990
- return false;
2777
+ async function provisionPythonShim() {
2778
+ if (cached) {
2779
+ return cached;
2991
2780
  }
2992
- const normalized = raw.toLowerCase();
2993
- return normalized === '1' || normalized === 'true' || normalized === 'yes';
2994
- }
2995
-
2996
- // Tries `external/<repo>/unsorted_deps.json` first; falls back to parsing the
2997
- // probe stdout the caller already captured during discovery. Discovery runs
2998
- // the same `kind("jvm_import rule|aar_import rule", @<repo>//:*)` query that
2999
- // extraction needs, so reusing its stdout skips one bazel-query invocation
3000
- // per repo on the unpinned path (where unsorted_deps.json isn't on disk).
3001
- async function extractFromOneRepo(repoName, queryOpts, cachedProbeStdout) {
3002
- const verbose = queryOpts.verbose;
3003
- // unsorted_deps.json lives under the bazel external dir.
3004
- // When --output_base is set, it's under that; otherwise under the workspace's
3005
- // bazel-out symlink (resolved via realpath, NOT lexical path.join — the
3006
- // lexical form would collapse `bazel-out/..` to cwd and miss the file).
3007
- const externalDir = bazelExternalDir(queryOpts.cwd, queryOpts.bazelOutputBase);
3008
- if (verbose) {
3009
- logger.logger.log(`[VERBOSE] @${repoName}: external dir:`, externalDir ?? '(unresolved — bazel-out symlink absent)');
2781
+ const pythonOnPath = await safeWhichBin('python');
2782
+ if (pythonOnPath) {
2783
+ cached = {
2784
+ augmentedEnv: undefined,
2785
+ shimDir: undefined
2786
+ };
2787
+ return cached;
3010
2788
  }
3011
- const forceFallback = isForceQueryFallbackEnabled();
3012
- if (forceFallback && verbose) {
3013
- logger.logger.log(`[VERBOSE] @${repoName}: SOCKET_BAZEL_FORCE_QUERY_FALLBACK set; skipping unsorted_deps.json fast path.`);
2789
+ const python3OnPath = await safeWhichBin('python3');
2790
+ if (!python3OnPath) {
2791
+ throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
3014
2792
  }
3015
- const candidates = forceFallback ? [] : externalDir ? [path.join(externalDir, repoName, 'unsorted_deps.json')] : [];
3016
- for (const c of candidates) {
3017
- if (fs$1.existsSync(c)) {
3018
- // Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles.
3019
- // eslint-disable-next-line no-await-in-loop
3020
- const stat = await fs$1.promises.stat(c);
3021
- if (stat.size > 1024 * 1024 * 1024) {
3022
- logger.logger.warn(`Skipping oversized ${c} (${stat.size} bytes); falling back to cached probe stdout.`);
3023
- break;
3024
- }
3025
- const json = fs$1.readFileSync(c, 'utf8');
3026
- const parsed = parseUnsortedDepsJson(json);
3027
- if (parsed.length) {
3028
- if (verbose) {
3029
- logger.logger.log(`[VERBOSE] @${repoName}: source=unsorted_deps.json (${c}, ${parsed.length} artifact(s))`);
3030
- }
3031
- return parsed.map(a => ({
3032
- ...a,
3033
- sourceRepo: repoName
3034
- }));
3035
- }
3036
- } else if (verbose) {
3037
- logger.logger.log(`[VERBOSE] @${repoName}: unsorted_deps.json miss at`, c);
2793
+ const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
2794
+ fs$1.mkdirSync(shimDir, {
2795
+ recursive: true
2796
+ });
2797
+ const linkPath = path.join(shimDir, 'python');
2798
+ // Replace the symlink defensively in case python3's resolved path moved.
2799
+ if (fs$1.existsSync(linkPath)) {
2800
+ try {
2801
+ fs$1.unlinkSync(linkPath);
2802
+ } catch {
2803
+ // Tolerate races; the next symlinkSync may still succeed.
2804
+ }
2805
+ }
2806
+ // The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
2807
+ // so a concurrent socket-cli invocation may re-create the link between our
2808
+ // unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
2809
+ // other process won the race and left a usable shim in place.
2810
+ try {
2811
+ fs$1.symlinkSync(python3OnPath, linkPath);
2812
+ } catch (e) {
2813
+ if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
2814
+ throw e;
3038
2815
  }
3039
2816
  }
3040
- // Reuse the probe stdout that discovery already captured for this repo.
3041
- // The probe ran exactly this query during validation and only validated
3042
- // repos with code === 0 make it into the cache, so retry is unnecessary
3043
- // — if the probe was flaky, the repo wouldn't be in the map.
3044
- if (!cachedProbeStdout) {
3045
- logger.logger.warn(`No cached probe stdout for @${repoName}; skipping. (This shouldn't happen — discovery should have populated it.)`);
2817
+ const augmentedEnv = {
2818
+ ...process.env,
2819
+ PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
2820
+ };
2821
+ cached = {
2822
+ augmentedEnv,
2823
+ shimDir
2824
+ };
2825
+ return cached;
2826
+ }
2827
+
2828
+ /**
2829
+ * Maven hub repo discovery for `socket manifest bazel`.
2830
+ *
2831
+ * - Bzlmod path: `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
2832
+ * emits a text-format report listing every repo the maven extension generated;
2833
+ * `parseShowExtensionOutput` extracts the names of hub repos (items annotated
2834
+ * with `(imported by ...)`) and skips generated per-artifact repos.
2835
+ * - Legacy WORKSPACE path: probe a fixed list of conventional Maven hub names.
2836
+ * Each probe is classified into `populated` / `empty` / `not-defined`; the
2837
+ * orchestrator keeps only the `populated` candidates.
2838
+ *
2839
+ * No Starlark source is read by this module. All semantic interpretation
2840
+ * comes from Bazel itself (`mod show_extension`, `cquery`).
2841
+ */
2842
+
2843
+ // The importer token Bazel prints for a hub generated for the root module
2844
+ // itself (`(imported by <root>, …)`). Hubs imported only by rulesets
2845
+ // (`rules_jvm_external@6.7`, `stardoc@0.7.2`, …) are build-tooling, not the
2846
+ // user's SBOM, and are filtered out by the orchestrator.
2847
+ const ROOT_MODULE_IMPORTER = '<root>';
2848
+
2849
+ // One hub repo from a `bazel mod show_extension` report: its name plus the
2850
+ // modules that imported it (the `(imported by …)` annotation), merged across
2851
+ // every line the repo appears on.
2852
+
2853
+ // Conventional Maven hub names rules_jvm_external sets up under
2854
+ // WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility
2855
+ // lookup never triggers a `repository_rule` fetch) so the orchestrator can
2856
+ // try them all without paying the cost of a real cquery on undefined repos.
2857
+ const CONVENTIONAL_MAVEN_REPO_NAMES = ['maven', 'maven_install', 'maven_dev', 'unpinned_maven', 'maven_unpinned'];
2858
+
2859
+ // Pattern Bazel emits when a probed repo name isn't visible to the main
2860
+ // module. Used to distinguish `not-defined` (skip silently) from `empty`
2861
+ // (the repo exists but has no targets). Tolerant of either single- or
2862
+ // double-quote styles Bazel has used across versions.
2863
+ const NOT_VISIBLE_STDERR_RE = /No repository visible as ['"]?@?[A-Za-z0-9._+-]+['"]? from/;
2864
+ // Other "repo isn't analyzable" patterns Bazel emits, especially under
2865
+ // WORKSPACE mode and on Bazel 6.x. They all map to `not-defined`.
2866
+ const NO_SUCH_PACKAGE_STDERR_RE = /no such package ['"`]?@/;
2867
+ // Pattern emitted when a repo IS visible / defined but yields no targets.
2868
+ // `--keep_going` plus `'no targets found beneath'` is the empty-but-defined
2869
+ // signature. The orchestrator treats `empty` and `not-defined` uniformly
2870
+ // as skips.
2871
+ const NO_TARGETS_STDERR_RE = /no targets found beneath/i;
2872
+ // Anchor for the maven extension's section header in
2873
+ // `bazel mod show_extension` output. Tolerant of the canonical-name form
2874
+ // Bazel uses across versions (`@@rules_jvm_external+`, `@@rules_jvm_external~`,
2875
+ // or any future separator) and of trailing trailing whitespace.
2876
+ const SHOW_EXT_SECTION_HEADER_RE = /^## @@?[A-Za-z0-9._+~-]+\/\/:extensions\.bzl%maven:\s*$/m;
2877
+ // Bullet within `Fetched repositories:` that names a hub repo (one with an
2878
+ // `(imported by ...)` annotation). Bullets without that annotation are
2879
+ // generated per-artifact repos and are skipped.
2880
+ const FETCHED_HUB_BULLET_RE = /^ {2}- (?<name>\S+) \(imported by (?<importers>[^)]+)\)\s*$/;
2881
+
2882
+ // Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
2883
+ // stdout. Returns the hub repos listed under `Fetched repositories:` — i.e.
2884
+ // items annotated with `(imported by ...)` — each carrying the set of modules
2885
+ // that imported it. Generated per-artifact repos (no annotation) are skipped.
2886
+ // A repo can legitimately appear on multiple lines with different importers,
2887
+ // so importers are merged per repo (name-only dedupe would lose that, and the
2888
+ // importers data is what lets the orchestrator keep only root-imported hubs).
2889
+ // Output is sorted by name. Tolerant of `DEBUG:` / `WARNING:` lines from
2890
+ // Bazel; the section header `## @@<canonical>//:extensions.bzl%maven:` is the
2891
+ // anchor.
2892
+ function parseShowExtensionOutput(stdout) {
2893
+ const headerMatch = SHOW_EXT_SECTION_HEADER_RE.exec(stdout);
2894
+ if (!headerMatch) {
2895
+ return [];
2896
+ }
2897
+ const tail = stdout.slice(headerMatch.index + headerMatch[0].length);
2898
+ // Find the `Fetched repositories:` line within the section.
2899
+ const fetchedIdx = tail.indexOf('\nFetched repositories:');
2900
+ if (fetchedIdx === -1) {
3046
2901
  return [];
3047
2902
  }
3048
- if (verbose) {
3049
- logger.logger.log(`[VERBOSE] @${repoName}: source=cached probe stdout (${cachedProbeStdout.length} bytes)`);
2903
+ const afterFetched = tail.slice(fetchedIdx + '\nFetched repositories:'.length);
2904
+ const importersByName = new Map();
2905
+ for (const line of afterFetched.split(/\r?\n/)) {
2906
+ // Stop at the next `## ` section header (some Bazel versions print
2907
+ // multiple extensions in one report).
2908
+ if (line.startsWith('## ')) {
2909
+ break;
2910
+ }
2911
+ // Empty line is fine; bullet that doesn't match is fine (it's an
2912
+ // un-imported generated artifact repo) — skip it.
2913
+ const match = FETCHED_HUB_BULLET_RE.exec(line);
2914
+ if (!match || !match.groups) {
2915
+ continue;
2916
+ }
2917
+ const name = match.groups['name'];
2918
+ if (!name) {
2919
+ continue;
2920
+ }
2921
+ const importers = importersByName.get(name) ?? new Set();
2922
+ for (const importer of (match.groups['importers'] ?? '').split(',').map(s => s.trim()).filter(Boolean)) {
2923
+ importers.add(importer);
2924
+ }
2925
+ importersByName.set(name, importers);
3050
2926
  }
3051
- return parseBazelBuildOutput(cachedProbeStdout).map(a => ({
3052
- ...a,
3053
- sourceRepo: repoName
2927
+ return [...importersByName.keys()].sort().map(name => ({
2928
+ importers: [...importersByName.get(name)].sort(),
2929
+ name
3054
2930
  }));
3055
2931
  }
3056
- async function extractBazelToMaven(opts) {
2932
+
2933
+ // Classify a raw probe result into one of three states. The probe contract
2934
+ // is whatever the runner emits — typically a lightweight
2935
+ // `cquery '@<name>//...' --keep_going --output=label`. The orchestrator
2936
+ // treats `empty` and `not-defined` uniformly as no-ops; the distinction
2937
+ // is preserved for verbose-mode diagnostics.
2938
+ function classifyProbeResult(result) {
2939
+ // A successful probe with any stdout means the repo exists AND has at
2940
+ // least one target — populated.
2941
+ if (result.code === 0 && result.stdout.trim().length > 0) {
2942
+ return 'populated';
2943
+ }
2944
+ // Code 1 with the "no repository visible" message → undefined.
2945
+ if (result.code !== 0 && (NOT_VISIBLE_STDERR_RE.test(result.stderr) || NO_SUCH_PACKAGE_STDERR_RE.test(result.stderr))) {
2946
+ return 'not-defined';
2947
+ }
2948
+ // Code 1 with the "no targets" message → defined but empty.
2949
+ if (result.code !== 0 && NO_TARGETS_STDERR_RE.test(result.stderr)) {
2950
+ return 'empty';
2951
+ }
2952
+ // Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo
2953
+ // name isn't declared (Exp 5c). Treat as not-defined.
2954
+ if (result.code === 0) {
2955
+ return 'not-defined';
2956
+ }
2957
+ // Code 1 with no recognizable message: be conservative and call it
2958
+ // not-defined so the orchestrator skips it without erroring the workspace.
2959
+ return 'not-defined';
2960
+ }
2961
+
2962
+ // Convenience: probe a single candidate and return its classified status,
2963
+ // with optional verbose logging. Pure orchestration around `probe` +
2964
+ // `classifyProbeResult`; isolated so the test suite can exercise the
2965
+ // logging contract independently of the runner implementation.
2966
+ async function probeCandidate(repoName, probe, verbose) {
2967
+ let result;
2968
+ try {
2969
+ result = await probe(repoName);
2970
+ } catch (e) {
2971
+ if (verbose) {
2972
+ logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${e instanceof Error ? e.message : String(e)})`);
2973
+ }
2974
+ return 'not-defined';
2975
+ }
2976
+ const status = classifyProbeResult(result);
2977
+ if (verbose) {
2978
+ logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: ${status}`);
2979
+ }
2980
+ return status;
2981
+ }
2982
+
2983
+ // Detects whether the given Bazel workspace uses Bzlmod (MODULE.bazel),
2984
+ // legacy WORKSPACE (WORKSPACE or WORKSPACE.bazel), or both (migration).
2985
+ // Throws InputError when neither marker file is present.
2986
+ function detectWorkspaceMode(cwd) {
2987
+ const moduleBazel = fs$1.existsSync(path.join(cwd, 'MODULE.bazel'));
2988
+ const workspaceFile = fs$1.existsSync(path.join(cwd, 'WORKSPACE')) || fs$1.existsSync(path.join(cwd, 'WORKSPACE.bazel'));
2989
+ if (!moduleBazel && !workspaceFile) {
2990
+ throw new utils.InputError(`No Bazel workspace found at ${cwd} (looked for MODULE.bazel, WORKSPACE, WORKSPACE.bazel).`);
2991
+ }
2992
+ return {
2993
+ bzlmod: moduleBazel,
2994
+ workspace: workspaceFile
2995
+ };
2996
+ }
2997
+
2998
+ // Returns the bazel CLI flags needed to invoke the correct workspace mode.
2999
+ // Bzlmod-only or migration-window: rely on Bazel 7+ default (Bzlmod on).
3000
+ // Legacy-only: explicitly disable Bzlmod and enable WORKSPACE.
3001
+ function getBazelInvocationFlags(mode) {
3002
+ if (mode.bzlmod) {
3003
+ // Bzlmod-only or migration: Bzlmod wins; no flags needed (Bazel 7+ default).
3004
+ return [];
3005
+ }
3006
+ // Legacy-only: explicitly switch to WORKSPACE mode.
3007
+ return ['--noenable_bzlmod', '--enable_workspace'];
3008
+ }
3009
+
3010
+ /**
3011
+ * Walk the directory tree rooted at `cwd` and return every directory that
3012
+ * looks like a Bazel workspace root — i.e. contains `MODULE.bazel`,
3013
+ * `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots
3014
+ * (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example
3015
+ * `examples/<name>/MODULE.bazel`); the per-workspace algorithm in the
3016
+ * orchestrator runs once per discovered root.
3017
+ *
3018
+ * The walker is dependency-injected with the directory-prune policy:
3019
+ * callers pass the set of basenames and basename prefixes the walk must
3020
+ * refuse to descend into. This module intentionally hardcodes none of
3021
+ * the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose
3022
+ * the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the
3023
+ * Bazel-specific bits (`bazel-*` output_base symlinks,
3024
+ * `.socket-auto-manifest`).
3025
+ *
3026
+ * Discovery is bounded-but-complete: the walk visits directories in
3027
+ * deterministic (sorted) order under a single visited-directory budget
3028
+ * (`MAX_WALK_DIRS`) as the only pathological-input / symlink-loop guard —
3029
+ * there is no depth cap, because the deepest workspace marker observed across
3030
+ * the OSS corpus (9) sat *below* the old depth-8 ceiling, so that ceiling
3031
+ * silently dropped real first-party modules. All roots found within the
3032
+ * budget are collected, sorted, then capped to `MAX_WORKSPACE_ROOTS`. Both
3033
+ * the cap and a budget exhaustion `logger.warn` UNCONDITIONALLY (a missed
3034
+ * module silently drops its Maven hub, so truncation must never be silent).
3035
+ */
3036
+
3037
+
3038
+ // Hard ceiling on workspace roots; 16 sits well above realistic monorepo
3039
+ // counts while tightening the guard against pathological inputs.
3040
+ const MAX_WORKSPACE_ROOTS = 16;
3041
+ // Hard ceiling on directories visited. The sole guard against pathological
3042
+ // inputs and symlink loops (a loop consumes the budget and stops). A few
3043
+ // thousand is far above any realistic first-party tree once the prune policy
3044
+ // has removed vendored/output dirs.
3045
+ const DEFAULT_MAX_WALK_DIRS = 5_000;
3046
+ // Files whose presence promotes a directory to a workspace root.
3047
+ const WORKSPACE_MARKER_FILES = new Set(['MODULE.bazel', 'WORKSPACE', 'WORKSPACE.bazel']);
3048
+ const EMPTY_SET = new Set();
3049
+ const EMPTY_ARRAY = [];
3050
+
3051
+ // Walks the tree rooted at `opts.cwd` and returns absolute paths to every
3052
+ // directory that contains at least one workspace marker file. Output is
3053
+ // sorted for determinism and capped at MAX_WORKSPACE_ROOTS.
3054
+ function findWorkspaceRoots(opts) {
3057
3055
  const {
3058
3056
  cwd,
3059
- out,
3060
3057
  verbose
3061
3058
  } = opts;
3062
- logger.logger.group('bazel2maven:');
3063
- logger.logger.info(`- src dir: \`${cwd}\``);
3064
- logger.logger.info(`- out dir: \`${out}\``);
3065
- if (!fs$1.existsSync(cwd)) {
3066
- logger.logger.warn(`Warning: cwd does not exist: ${cwd}`);
3067
- }
3068
- logger.logger.groupEnd();
3069
- try {
3070
- // Validate caller-provided Bazel filesystem settings before invoking Bazel.
3071
- if (opts.bazelOutputBase) {
3072
- validateOutputBase(opts.bazelOutputBase, opts.cwd);
3059
+ const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET;
3060
+ const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY;
3061
+ const maxWalkDirs = opts.maxWalkDirs ?? DEFAULT_MAX_WALK_DIRS;
3062
+ const roots = [];
3063
+ // LIFO stack; children are pushed in reverse-sorted order so they pop in
3064
+ // ascending order, giving a deterministic traversal.
3065
+ const stack = [cwd];
3066
+ let dirsVisited = 0;
3067
+ let budgetHit = false;
3068
+ while (stack.length) {
3069
+ if (dirsVisited >= maxWalkDirs) {
3070
+ budgetHit = true;
3071
+ break;
3073
3072
  }
3074
- // Java must be available before rules_jvm_external/Coursier runs;
3075
- // python shim follows so its augmented PATH inherits the JDK prefix.
3076
- ensureJavaOnPath();
3077
- const shim = await provisionPythonShim();
3078
- const baseEnv = shim.augmentedEnv ?? opts.env;
3079
-
3080
- // Step 1: workspace detection.
3081
- const mode = detectWorkspaceMode(cwd);
3082
- logger.logger.info(`Workspace mode: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
3083
- const invocationFlags = getBazelInvocationFlags(mode);
3084
-
3085
- // Step 2: bazel binary resolution.
3086
- const bin = await resolveBazelBinary(opts.bin);
3087
- logger.logger.info(`Using bazel: ${bin}`);
3088
- if (verbose) {
3089
- logger.logger.log('[VERBOSE] resolved options:', {
3090
- bin,
3091
- bazelRc: opts.bazelRc ?? '(unset)',
3092
- bazelOutputBase: opts.bazelOutputBase ?? '(unset)',
3093
- bazelFlags: opts.bazelFlags ?? '(unset)',
3094
- invocationFlags
3073
+ const dir = stack.pop();
3074
+ if (dir === undefined) {
3075
+ break;
3076
+ }
3077
+ dirsVisited += 1;
3078
+ let entries;
3079
+ try {
3080
+ entries = fs$1.readdirSync(dir, {
3081
+ withFileTypes: true
3095
3082
  });
3083
+ } catch {
3084
+ continue;
3096
3085
  }
3097
-
3098
- // Step 3: build the shared query options object.
3099
- const queryOpts = {
3100
- bin,
3101
- cwd,
3102
- invocationFlags,
3103
- ...(opts.bazelRc ? {
3104
- bazelRc: opts.bazelRc
3105
- } : {}),
3106
- ...(opts.bazelFlags ? {
3107
- bazelFlags: opts.bazelFlags
3108
- } : {}),
3109
- ...(opts.bazelOutputBase ? {
3110
- bazelOutputBase: opts.bazelOutputBase
3111
- } : {}),
3112
- ...(baseEnv ? {
3113
- env: baseEnv
3114
- } : {}),
3115
- verbose
3116
- };
3117
-
3118
- // Step 4: discover validated Maven repos via the two-step recipe.
3119
- // Bzlmod has a native visible-repository surface; prefer that over static
3120
- // MODULE.bazel parsing and keep bounded parsing as the legacy/fallback path.
3121
- let nativeCandidates;
3122
- if (mode.bzlmod) {
3123
- const visibleRepos = await runBazelModShowVisibleRepos(queryOpts);
3124
- if (visibleRepos.code === 0) {
3125
- nativeCandidates = parseVisibleRepoCandidates(visibleRepos.stdout);
3126
- if (verbose) {
3127
- logger.logger.log('[VERBOSE] Bzlmod visible repo candidates:', nativeCandidates);
3086
+ let isWorkspaceRoot = false;
3087
+ const childNames = [];
3088
+ for (const entry of entries) {
3089
+ if (entry.isFile()) {
3090
+ if (WORKSPACE_MARKER_FILES.has(entry.name)) {
3091
+ isWorkspaceRoot = true;
3128
3092
  }
3129
- } else if (verbose) {
3130
- logger.logger.log('[VERBOSE] bazel mod show_repo failed; falling back to static candidate parsing:', visibleRepos.stderr);
3093
+ continue;
3094
+ }
3095
+ if (!entry.isDirectory()) {
3096
+ continue;
3097
+ }
3098
+ const name = entry.name;
3099
+ if (ignoreDirNames.has(name)) {
3100
+ continue;
3101
+ }
3102
+ let pruned = false;
3103
+ for (const prefix of ignoreDirPrefixes) {
3104
+ if (name.startsWith(prefix)) {
3105
+ pruned = true;
3106
+ break;
3107
+ }
3108
+ }
3109
+ if (!pruned) {
3110
+ childNames.push(name);
3131
3111
  }
3132
3112
  }
3133
- // Returns Map<repoName, probeStdout> so extraction can reuse the probe
3134
- // output and skip running an identical bazel-query a second time.
3135
- const probe = buildProbeFor(queryOpts);
3136
- const repos = await discoverMavenRepos(cwd, probe, nativeCandidates, verbose);
3137
- const repoNames = Array.from(repos.keys());
3138
- logger.logger.info(`Discovered ${repos.size} Maven repo(s): ${repoNames.join(', ') || '(none)'}`);
3113
+ if (isWorkspaceRoot) {
3114
+ roots.push(dir);
3115
+ }
3116
+ // Descend regardless of whether this dir is itself a root — nested
3117
+ // workspaces are common (root MODULE.bazel + examples/*/MODULE.bazel).
3118
+ childNames.sort();
3119
+ for (let i = childNames.length - 1; i >= 0; i -= 1) {
3120
+ stack.push(path.join(dir, childNames[i]));
3121
+ }
3122
+ }
3123
+ roots.sort();
3124
+ const kept = roots.slice(0, MAX_WORKSPACE_ROOTS);
3125
+ const droppedCount = roots.length - kept.length;
3126
+ if (budgetHit) {
3127
+ // The dir budget was exhausted, so an unknown number of roots may be
3128
+ // undiscovered — surface it unconditionally.
3129
+ logger.logger.warn(`Bazel workspace walk hit the ${maxWalkDirs}-directory budget; some workspaces beneath ${cwd} may be undiscovered (found ${roots.length}, kept ${kept.length}).`);
3130
+ }
3131
+ if (droppedCount > 0) {
3132
+ // The cap dropped roots. Exact count when the full tree was walked; "≥"
3133
+ // when the budget cut the walk short (more roots may exist).
3134
+ const qualifier = budgetHit ? '≥' : '';
3135
+ logger.logger.warn(`Bazel workspace walk found ${roots.length} workspace root(s); capping at ${MAX_WORKSPACE_ROOTS} and dropping ${qualifier}${droppedCount}.`);
3136
+ if (verbose) {
3137
+ logger.logger.log('[VERBOSE] workspace walker: dropped roots:', roots.slice(MAX_WORKSPACE_ROOTS));
3138
+ }
3139
+ }
3140
+ return kept;
3141
+ }
3142
+
3143
+ // Best-effort-per-hub produces four distinct run outcomes a single `ok`
3144
+ // boolean would conflate:
3145
+ // - `complete` — every discovered hub extracted cleanly; >=1 manifest.
3146
+ // - `partial` — >=1 manifest written, but at least one hub failed,
3147
+ // timed out, or dropped edges. Worth uploading, but the
3148
+ // graph is known-incomplete.
3149
+ // - `noEcosystem` — no Bazel/Maven found. Whether that's an error is
3150
+ // caller-dependent (tolerated in auto mode, error in
3151
+ // explicit mode), so it must NOT be flattened into the
3152
+ // failure states.
3153
+ // - `hardFailure` — zero manifests written and it wasn't `noEcosystem`
3154
+ // (discovery threw, or every discovered hub failed).
3155
+ // Always an error for every caller.
3156
+
3157
+ const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000;
3158
+ const REAP_TIMEOUT_MS = 10_000;
3159
+
3160
+ // Default directory-prune policy for the Bazel workspace walk. The
3161
+ // orchestrator applies this unconditionally so neither caller (the explicit
3162
+ // `socket manifest bazel` command nor `--auto-manifest`) can omit it and let
3163
+ // the walk descend `node_modules`/VCS/vendored trees. Callers may
3164
+ // pass extra names/prefixes to EXTEND, not replace, this set.
3165
+ const DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES = new Set([...utils.IGNORED_DIRS, '.hg', '.idea', '.pnpm-store', '.socket-auto-manifest', '.svn', '.vscode']);
3166
+ // Bazel's `bazel-*` output_base symlinks.
3167
+ const DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES = ['bazel-'];
3168
+ // Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }.
3169
+ // Returns null on malformed input.
3170
+ function splitCoord(c) {
3171
+ const lastColon = c.lastIndexOf(':');
3172
+ if (lastColon < 1) {
3173
+ return null;
3174
+ }
3175
+ return {
3176
+ groupArtifact: c.slice(0, lastColon),
3177
+ version: c.slice(lastColon + 1)
3178
+ };
3179
+ }
3180
+ // A versionless `maven_install.json` key must have 2-4 non-empty
3181
+ // colon-separated segments (`g:a`, `g:a:ext`, `g:a:ext:classifier`) — exactly
3182
+ // the range depscan's `coordinateToParts` accepts. A key outside that range,
3183
+ // or with an empty segment, is rejected after upload, so reject it locally.
3184
+ function isValidVersionlessKey(key) {
3185
+ const parts = key.split(':');
3186
+ if (parts.length < 2 || parts.length > 4) {
3187
+ return false;
3188
+ }
3189
+ return parts.every(p => p.length > 0);
3190
+ }
3139
3191
 
3140
- // Step 5: extract artifacts from each repo (preferring unsorted_deps.json).
3141
- const allArtifacts = [];
3142
- for (const [repo, probeStdout] of repos) {
3143
- // eslint-disable-next-line no-await-in-loop
3144
- const artifacts = await extractFromOneRepo(repo, queryOpts, probeStdout);
3145
- allArtifacts.push(...artifacts);
3146
- logger.logger.info(`@${repo}: ${artifacts.length} artifact(s)`);
3192
+ // Builds a modern `maven_install.json` from artifacts whose `deps` already
3193
+ // hold resolved versionless coordinates (the cquery parser resolves edge
3194
+ // labels against each repo's own targets while `repoName` is in scope, so no
3195
+ // label-to-coordinate resolution happens here). Keys are versionless `g:a`
3196
+ // (preserving any packaging/classifier segments); dependency values are the
3197
+ // resolved coordinate sets.
3198
+ //
3199
+ // Two-phase so the emitted graph is internally closed and survives the server
3200
+ // parser, which rejects malformed coordinates and edges referencing unlisted
3201
+ // artifacts (and can abort after enough errors). Phase 1 builds (and
3202
+ // validates) the artifact keys; phase 2 emits only edges whose source AND
3203
+ // target are valid emitted keys. Anything dropped is reported so the caller
3204
+ // can flip the hub partial — never silently lost post-upload.
3205
+ function normalizeToMavenInstallJson(artifacts) {
3206
+ const out = {
3207
+ artifacts: {},
3208
+ dependencies: {}
3209
+ };
3210
+ const droppedArtifacts = [];
3211
+ const prunedEdges = [];
3212
+ const versionsByGroupArtifact = new Map();
3213
+ // Phase 1: artifacts. Validate each key (shape + non-empty version) before
3214
+ // accepting it; record the set of valid emitted keys.
3215
+ const depsByKey = new Map();
3216
+ for (const a of artifacts) {
3217
+ const split = splitCoord(a.mavenCoordinates);
3218
+ if (!split) {
3219
+ droppedArtifacts.push(a.mavenCoordinates);
3220
+ continue;
3147
3221
  }
3148
-
3149
- // Step 6: normalize to maven_install.json shape.
3150
- const normalized = normalizeToMavenInstallJson(allArtifacts);
3151
-
3152
- // Step 7: write outputs.
3153
- // Standalone output writes directly to `out`; auto-manifest uses a sibling directory
3154
- // to avoid colliding with a repo's checked-in rules_jvm_external lockfile and
3155
- // to avoid repo-root gitignore patterns such as `/maven_install.json`.
3156
- const layout = opts.outLayout ?? 'standalone';
3157
- const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
3158
- fs$1.mkdirSync(manifestDir, {
3159
- recursive: true
3160
- });
3161
- const manifestPath = path.join(manifestDir, 'maven_install.json');
3162
- await fs$1.promises.writeFile(manifestPath, JSON.stringify(normalized, null, 2), 'utf8');
3163
- if (verbose) {
3164
- logger.logger.log('[VERBOSE] outputs:', {
3165
- artifactCount: allArtifacts.length,
3166
- generatedManifest: path.relative(out, manifestPath),
3167
- layout,
3168
- manifest: manifestPath,
3169
- mavenRepos: repoNames,
3170
- tool: 'socket manifest bazel',
3171
- workspace: {
3172
- bzlmod: mode.bzlmod,
3173
- legacyWorkspace: mode.workspace
3174
- }
3175
- });
3222
+ const key = split.groupArtifact;
3223
+ // A `g:a:` coordinate strips to the valid-shaped key `g:a` but an empty
3224
+ // version, which the server rejects — require both.
3225
+ if (!isValidVersionlessKey(key) || !split.version) {
3226
+ droppedArtifacts.push(a.mavenCoordinates);
3227
+ continue;
3176
3228
  }
3177
- if (!allArtifacts.length) {
3178
- if (!repos.size) {
3179
- if (verbose) {
3180
- logger.logger.info('No Maven artifacts extracted. failureCategory=no-supported-ecosystem');
3181
- }
3182
- return {
3183
- artifactCount: 0,
3184
- manifestPath,
3185
- noEcosystemFound: true,
3186
- ok: false
3187
- };
3188
- }
3189
- logger.logger.fail(`Discovered Maven repo(s) ${repoNames.join(', ')} but extracted zero artifacts. failureCategory=ecosystem-detected-but-empty`);
3190
- return {
3191
- artifactCount: 0,
3192
- manifestPath,
3193
- ok: false
3229
+ const existingVersion = versionsByGroupArtifact.get(key);
3230
+ if (existingVersion && existingVersion !== split.version) {
3231
+ throw new Error(`Conflicting versions for ${key}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
3232
+ }
3233
+ if (!existingVersion) {
3234
+ versionsByGroupArtifact.set(key, split.version);
3235
+ out.artifacts[key] = {
3236
+ version: split.version
3194
3237
  };
3195
3238
  }
3196
- logger.logger.success(`Wrote ${allArtifacts.length} artifact(s) to ${path.relative(cwd, manifestPath)}.`);
3197
- return {
3198
- artifactCount: allArtifacts.length,
3199
- manifestPath,
3200
- ok: true
3201
- };
3202
- } catch (e) {
3203
- // Always surface the error message; users should not have to
3204
- // re-run a multi-minute bazel build with --verbose just to see whether
3205
- // the failure was a missing dependency, permission error, or network blip.
3206
- logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
3207
- if (verbose) {
3208
- logger.logger.group('[VERBOSE] error:');
3209
- logger.logger.log(e);
3210
- logger.logger.groupEnd();
3211
- } else {
3212
- logger.logger.info('Re-run with --verbose for the full stack.');
3239
+ // Accumulate the candidate edge set keyed by "g:a" (no version), matching
3240
+ // the canonical rules_jvm_external lockfile shape. Pruned against valid
3241
+ // keys in phase 2.
3242
+ const depCoords = depsByKey.get(key) ?? new Set();
3243
+ for (const depCoord of a.deps) {
3244
+ depCoords.add(depCoord);
3245
+ }
3246
+ if (depCoords.size) {
3247
+ depsByKey.set(key, depCoords);
3248
+ }
3249
+ }
3250
+ // Phase 2: edges. Emit only where both source and target are emitted keys.
3251
+ const validKeys = new Set(Object.keys(out.artifacts));
3252
+ for (const [key, depCoords] of depsByKey) {
3253
+ if (!validKeys.has(key)) {
3254
+ for (const target of depCoords) {
3255
+ prunedEdges.push(`${key} -> ${target}`);
3256
+ }
3257
+ continue;
3258
+ }
3259
+ const kept = [];
3260
+ for (const target of depCoords) {
3261
+ if (validKeys.has(target)) {
3262
+ kept.push(target);
3263
+ } else {
3264
+ prunedEdges.push(`${key} -> ${target}`);
3265
+ }
3266
+ }
3267
+ if (kept.length) {
3268
+ out.dependencies[key] = kept;
3269
+ }
3270
+ }
3271
+ return {
3272
+ droppedArtifacts,
3273
+ json: out,
3274
+ prunedEdges
3275
+ };
3276
+ }
3277
+
3278
+ // Cross-workspace dedup keyed on the full Maven coordinate string
3279
+ // (`g:a:v[:classifier]`). The metadata cquery emits one entry per rule,
3280
+ // so the same `androidx.annotation:annotation:1.8.2` can show up in
3281
+ // `examples/dagger/@maven` and `examples/ksp/@maven` in rules_kotlin —
3282
+ // downstream only needs it once. Each occurrence resolves its edges against
3283
+ // its own repo's targets, so the resolved `deps` can legitimately differ
3284
+ // between occurrences; union them rather than keeping only the first, or
3285
+ // real graph edges would be silently dropped.
3286
+ function dedupArtifactsByCoord(artifacts) {
3287
+ const byCoord = new Map();
3288
+ for (const a of artifacts) {
3289
+ const existing = byCoord.get(a.mavenCoordinates);
3290
+ if (!existing) {
3291
+ byCoord.set(a.mavenCoordinates, {
3292
+ ...a,
3293
+ deps: [...a.deps]
3294
+ });
3295
+ continue;
3296
+ }
3297
+ const merged = new Set(existing.deps);
3298
+ for (const dep of a.deps) {
3299
+ merged.add(dep);
3213
3300
  }
3301
+ existing.deps = [...merged];
3302
+ }
3303
+ return [...byCoord.values()];
3304
+ }
3305
+ // Dedup, normalize, and write one hub's manifest. The path mirrors the
3306
+ // workspace tree: `<manifestDir>/<relPath>/<name>.json`, where `<name>` is
3307
+ // `maven_install.json` for a hub literally named `maven`, else
3308
+ // `<hub>_maven_install.json` (matching the server walker's
3309
+ // `**/*_maven_install.json` glob). The root workspace (`relPath===''`) writes
3310
+ // at `<manifestDir>/<name>.json`. Returns `manifestPath: undefined` (no file
3311
+ // written) when the hub yields zero valid artifacts, plus the dropped/pruned
3312
+ // accounting so the caller can flip the hub partial.
3313
+ async function writeHubManifest(args) {
3314
+ const {
3315
+ artifacts,
3316
+ manifestDir,
3317
+ relPath,
3318
+ repoName
3319
+ } = args;
3320
+ const deduped = dedupArtifactsByCoord(artifacts);
3321
+ const {
3322
+ droppedArtifacts,
3323
+ json,
3324
+ prunedEdges
3325
+ } = normalizeToMavenInstallJson(deduped);
3326
+ const artifactCount = Object.keys(json.artifacts).length;
3327
+ if (!artifactCount) {
3214
3328
  return {
3215
3329
  artifactCount: 0,
3216
- ok: false
3330
+ droppedArtifacts,
3331
+ manifestPath: undefined,
3332
+ prunedEdges
3217
3333
  };
3218
3334
  }
3335
+ const fileName = repoName === 'maven' ? 'maven_install.json' : `${repoName}_maven_install.json`;
3336
+ const hubDir = relPath ? path.join(manifestDir, relPath) : manifestDir;
3337
+ fs$1.mkdirSync(hubDir, {
3338
+ recursive: true
3339
+ });
3340
+ const manifestPath = path.join(hubDir, fileName);
3341
+ await fs$1.promises.writeFile(manifestPath, JSON.stringify(json, null, 2), 'utf8');
3342
+ return {
3343
+ artifactCount,
3344
+ droppedArtifacts,
3345
+ manifestPath,
3346
+ prunedEdges
3347
+ };
3219
3348
  }
3220
3349
 
3221
- async function convertGradleToFacts({
3222
- bin,
3223
- configs,
3224
- cwd,
3225
- gradleOpts,
3226
- ignoreUnresolved,
3227
- verbose
3228
- }) {
3229
- const rBin = path.resolve(cwd, bin);
3230
- const binExists = fs$1.existsSync(rBin);
3231
- const cwdExists = fs$1.existsSync(cwd);
3232
- logger.logger.group('gradle2facts:');
3233
- logger.logger.info(`- executing: \`${rBin}\``);
3234
- if (!binExists) {
3235
- logger.logger.warn(`Warning: It appears the executable could not be found. An error might be printed later because of that.`);
3236
- }
3237
- logger.logger.info(`- src dir: \`${cwd}\``);
3238
- if (!cwdExists) {
3239
- logger.logger.warn(`Warning: It appears the src dir could not be found. An error might be printed later because of that.`);
3240
- }
3241
- logger.logger.groupEnd();
3242
- try {
3243
- // The init script is bundled alongside the existing pom-generating one.
3244
- // See .config/rollup.dist.config.mjs:copySocketFactsInitGradle.
3245
- const initLocation = path.join(constants.default.distPath, 'socket-facts.init.gradle');
3246
- // Disable Gradle's configuration cache for the facts run. The init
3247
- // script resolves dependencies via the legacy
3248
- // `Configuration.resolvedConfiguration` API (the only public API that
3249
- // surfaces classifier + extension metadata) and registers per-
3250
- // subproject tasks that share a `gradle.ext` accumulator — neither
3251
- // pattern is compatible with the configuration cache, which would
3252
- // otherwise be on by default for projects with
3253
- // `org.gradle.configuration-cache=true` in `gradle.properties`. The
3254
- // Provider-based CC-safe alternatives (`ResolutionResult` /
3255
- // `ArtifactView.resolvedArtifacts`) only exist in Gradle 7.4+ and
3256
- // they don't expose classifier/extension, so they aren't a usable
3257
- // replacement here. Using `-D` rather than `--no-configuration-cache`
3258
- // keeps us compatible with older Gradle versions that don't recognize
3259
- // the flag — the system property is silently ignored when the
3260
- // feature doesn't exist.
3261
- // Both knobs are passed as Gradle project properties so the init script
3262
- // can read them via `rp.findProperty(...)`, matching how
3263
- // `socket.outputDirectory` / `socket.outputFile` are already wired.
3264
- const socketProps = [];
3265
- if (ignoreUnresolved) {
3266
- socketProps.push('-Psocket.ignoreUnresolved=true');
3267
- }
3268
- if (configs) {
3269
- socketProps.push(`-Psocket.configs=${configs}`);
3270
- }
3271
- const commandArgs = ['-Dorg.gradle.configuration-cache=false', ...socketProps, '--init-script', initLocation, ...gradleOpts, 'socketFacts'];
3272
- if (verbose) {
3273
- logger.logger.log('[VERBOSE] Executing:', [bin], ', args:', commandArgs);
3274
- }
3275
- logger.logger.log(`Generating Socket facts from \`${bin}\` on \`${cwd}\` ...`);
3276
- const output = await execGradle$1(rBin, commandArgs, cwd, verbose);
3277
- if (output.code) {
3278
- process.exitCode = 1;
3279
- logger.logger.fail(`Gradle exited with exit code ${output.code}`);
3280
- if (!verbose) {
3281
- logger.logger.group('stderr:');
3282
- logger.logger.error(output.stderr);
3283
- logger.logger.groupEnd();
3350
+ // Build the per-workspace candidate Maven hub list.
3351
+ //
3352
+ // Bzlmod mode: trust `bazel mod show_extension` as the authoritative hub
3353
+ // list, keeping only hubs imported by <root>.
3354
+ //
3355
+ // WORKSPACE mode: no equivalent of `show_extension`, so probe the
3356
+ // conventional hub names.
3357
+ //
3358
+ // On `show_extension` failure (or a parse that yields zero root hubs) under
3359
+ // Bzlmod, fall through to the conventional-name probe so partial discovery
3360
+ // is still possible.
3361
+ async function discoverCandidatesForWorkspace(workspaceRoot, mode, queryOpts, verbose) {
3362
+ const candidates = [];
3363
+ let showExtensionSucceeded = false;
3364
+ if (mode.bzlmod) {
3365
+ const extResult = await runBazelModShowMavenExtension(queryOpts);
3366
+ if (extResult.code === 0) {
3367
+ // The maven extension generates a hub for EVERY module that uses it —
3368
+ // the root's own `maven.install` hub(s) plus the rulesets' internal
3369
+ // hubs (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs
3370
+ // imported by <root>; the rest are build-tooling, not the user's SBOM.
3371
+ const entries = parseShowExtensionOutput(extResult.stdout);
3372
+ const kept = entries.filter(e => e.importers.includes(ROOT_MODULE_IMPORTER));
3373
+ candidates.push(...kept.map(e => e.name));
3374
+ // Gate the probe fallback on the KEPT count, not the raw parse: a
3375
+ // report listing only transitive ruleset hubs (all filtered out) must
3376
+ // still fall through to conventional probing so a root @maven isn't
3377
+ // missed.
3378
+ showExtensionSucceeded = kept.length > 0;
3379
+ if (verbose) {
3380
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension kept root hub(s)`, kept.map(e => e.name));
3381
+ for (const dropped of entries) {
3382
+ if (!dropped.importers.includes(ROOT_MODULE_IMPORTER)) {
3383
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: dropped ${dropped.name} — imported by ${dropped.importers.join(', ')}, not ${ROOT_MODULE_IMPORTER}`);
3384
+ }
3385
+ }
3284
3386
  }
3285
- return;
3286
- }
3287
- logger.logger.success('Executed gradle successfully');
3288
- if (verbose) {
3289
- // Output already streamed; the "Reported exports:" summary lines were
3290
- // visible inline. No need to repeat them from a captured stdout.
3291
- logger.logger.log('');
3292
- logger.logger.log('Next step is to generate a Scan by running the `socket scan create` command on the same directory.');
3293
- return;
3387
+ } else if (verbose) {
3388
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`);
3294
3389
  }
3295
- const exports = Array.from(output.stdout.matchAll(/^Socket facts file written to: (.*)/gm), m => m[1]);
3296
- if (exports.length) {
3297
- logger.logger.log('Reported exports:');
3298
- for (const fn of exports) {
3299
- logger.logger.log('- ', fn);
3300
- }
3301
- } else {
3302
- // Gradle script may have skipped emission when no resolvable
3303
- // dependencies were found (see the `components.isEmpty()` branch in
3304
- // socket-facts.init.gradle). Surface the skip reason if present so
3305
- // the user understands why nothing was written.
3306
- const skipMatch = output.stdout.match(/^\[socket-facts\] no resolvable dependencies.*/m);
3307
- if (skipMatch) {
3308
- logger.logger.warn(skipMatch[0]);
3309
- }
3390
+ }
3391
+ // Probe candidates the show_extension path could not authoritatively
3392
+ // enumerate: when it produced root hubs, probe nothing extra; otherwise
3393
+ // (WORKSPACE mode, a failed show_extension, or a parse with zero root
3394
+ // hubs) probe the conventional hub names.
3395
+ const seen = new Set(candidates);
3396
+ const toProbe = (showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES]).filter(name => !seen.has(name));
3397
+ if (!toProbe.length) {
3398
+ return candidates;
3399
+ }
3400
+ const probe = buildMavenProbeFor(queryOpts);
3401
+ for (const name of toProbe) {
3402
+ // eslint-disable-next-line no-await-in-loop
3403
+ const status = await probeCandidate(name, probe, verbose);
3404
+ if (status === 'populated') {
3405
+ candidates.push(name);
3406
+ seen.add(name);
3310
3407
  }
3311
- logger.logger.log('');
3312
- logger.logger.log('Next step is to generate a Scan by running the `socket scan create` command on the same directory.');
3408
+ }
3409
+ return candidates;
3410
+ }
3411
+
3412
+ // Best-effort reap of a Bazel server. Spawned with a short timeout so
3413
+ // a wedged server can't itself hang the cleanup; failures are swallowed
3414
+ // because the caller will `rm -rf` the output_user_root regardless.
3415
+ async function reapBazelServer(bin, outputUserRoot, verbose) {
3416
+ try {
3417
+ await spawn.spawn(bin, [`--output_user_root=${outputUserRoot}`, 'shutdown'], {
3418
+ timeout: REAP_TIMEOUT_MS
3419
+ });
3313
3420
  } catch (e) {
3314
- process.exitCode = 1;
3315
- logger.logger.fail('There was an unexpected error while generating Socket facts' + (verbose ? '' : ' (use --verbose for details)'));
3421
+ // Server may already be dead, or shutdown itself timed out — the
3422
+ // tempdir removal below is sufficient cleanup.
3316
3423
  if (verbose) {
3317
- logger.logger.group('[VERBOSE] error:');
3318
- logger.logger.log(e);
3319
- logger.logger.groupEnd();
3424
+ logger.logger.log(`[VERBOSE] reapBazelServer: shutdown failed for ${outputUserRoot} (${utils.getErrorCause(e)}); tempdir removal will still run`);
3320
3425
  }
3321
3426
  }
3322
3427
  }
3323
- async function execGradle$1(bin, commandArgs, cwd, verbose) {
3324
- // When verbose, stream gradle stdout/stderr directly to the user's
3325
- // terminal — no spinner, no capture. The trade-off is that the post-run
3326
- // "Reported exports:" summary is skipped (the lines were already visible
3327
- // inline). For huge builds where the user wants to see progress, this is
3328
- // the right default. Non-verbose runs still get the spinner + summary.
3329
- if (verbose) {
3330
- logger.logger.info('(Running gradle with output streaming. This can take a while.)');
3331
- const output = await spawn.spawn(bin, commandArgs, {
3332
- cwd,
3333
- stdio: 'inherit'
3334
- });
3335
- return {
3336
- code: output.code,
3337
- stdout: '',
3338
- stderr: ''
3339
- };
3340
- }
3341
- const {
3342
- spinner
3343
- } = constants.default;
3344
- let pass = false;
3428
+ async function removeTempdir(dir, verbose) {
3345
3429
  try {
3346
- logger.logger.info('(Running gradle can take a while, depending on the size of the project)');
3347
- logger.logger.info('(No live output. Pass --verbose to stream gradle output instead.)');
3348
- spinner.start(`Running gradlew...`);
3349
- const output = await spawn.spawn(bin, commandArgs, {
3350
- cwd
3430
+ await fs$1.promises.rm(dir, {
3431
+ recursive: true,
3432
+ force: true
3351
3433
  });
3352
- pass = true;
3353
- const {
3354
- code,
3355
- stderr,
3356
- stdout
3357
- } = output;
3358
- return {
3359
- code,
3360
- stdout,
3361
- stderr
3362
- };
3363
- } finally {
3364
- if (pass) {
3365
- spinner.successAndStop('Gracefully completed gradlew execution.');
3366
- } else {
3367
- spinner.failAndStop('There was an error while trying to run gradlew.');
3434
+ } catch (e) {
3435
+ // Best effort. The next CLI invocation lands a fresh tempdir.
3436
+ if (verbose) {
3437
+ logger.logger.log(`[VERBOSE] removeTempdir: ${dir} not fully removed (${utils.getErrorCause(e)}); a stale dir may linger until the next OS tempdir sweep`);
3368
3438
  }
3369
3439
  }
3370
3440
  }
3371
-
3372
- // Shown when the sbt launcher dies on a modern JDK. sbt 0.13 (and some early
3373
- // 1.x) install a SecurityManager, which JDK 18+ removed, so the launcher
3374
- // throws before our plugin runs. We don't pick a JDK for the user — they own
3375
- // their toolchain — but we point them at the fix.
3376
- const JDK_HINT = 'Hint: old sbt (0.13.x and early 1.x) cannot run on modern JDKs because the Java Security Manager was removed in JDK 18+. Run with a compatible JDK by setting JAVA_HOME (e.g. Java 11) or passing `--sbt-opts "--java-home <path>"`.';
3377
-
3378
- // The socket-owned global base sbt compiles our plugin into. Living under the
3379
- // app data dir (not the user's `~/.sbt`) means we never mutate their sbt
3380
- // config, while persisting the compiled plugin between runs. sbt namespaces
3381
- // the compiled output by Scala/sbt version (`target/scala-2.10/sbt-0.13`,
3382
- // `target/scala-2.12/sbt-1.0`, ...), so a single base safely serves every sbt
3383
- // version with no version detection needed.
3384
- function resolveGlobalBase() {
3385
- const {
3386
- socketAppDataPath
3387
- } = constants.default;
3388
- return socketAppDataPath ? path.join(path.dirname(socketAppDataPath), 'sbt-facts') : path.join(os.tmpdir(), 'socket-sbt-facts');
3441
+ function makeOutputUserRoot() {
3442
+ return fs$1.mkdtempSync(path.join(os.tmpdir(), 'socket-bazel-'));
3389
3443
  }
3390
3444
 
3391
- // Drop the shipped plugin source into `<globalBase>/plugins/`, rewriting only
3392
- // when its content changed so sbt's incremental compiler can reuse the cache.
3393
- async function ensurePluginSource(pluginSrcPath, pluginsDir) {
3394
- const source = await fs$1.promises.readFile(pluginSrcPath, 'utf8');
3395
- const destPath = path.join(pluginsDir, 'SocketFactsPlugin.scala');
3396
- let current;
3397
- if (fs$1.existsSync(destPath)) {
3398
- current = await fs$1.promises.readFile(destPath, 'utf8');
3399
- }
3400
- if (current !== source) {
3401
- await fs$1.promises.mkdir(pluginsDir, {
3402
- recursive: true
3403
- });
3404
- await fs$1.promises.writeFile(destPath, source, 'utf8');
3405
- }
3445
+ // Construct the BazelQueryOptions shape used for a single workspace's
3446
+ // queries. Lifted to module scope (out of the per-workspace loop) so
3447
+ // ESLint's consistent-function-scoping is happy; takes everything it
3448
+ // previously closed over as explicit params.
3449
+ function buildQueryOpts(args) {
3450
+ const {
3451
+ baseEnv,
3452
+ bin,
3453
+ invocationFlags,
3454
+ opts,
3455
+ outputUserRoot,
3456
+ spawnCwd,
3457
+ verbose
3458
+ } = args;
3459
+ return {
3460
+ bin,
3461
+ cwd: spawnCwd,
3462
+ invocationFlags,
3463
+ outputUserRoot,
3464
+ ...(opts.bazelRc ? {
3465
+ bazelRc: opts.bazelRc
3466
+ } : {}),
3467
+ ...(opts.bazelFlags ? {
3468
+ bazelFlags: opts.bazelFlags
3469
+ } : {}),
3470
+ ...(opts.bazelOutputBase ? {
3471
+ bazelOutputBase: opts.bazelOutputBase
3472
+ } : {}),
3473
+ ...(baseEnv ? {
3474
+ env: baseEnv
3475
+ } : {}),
3476
+ verbose
3477
+ };
3406
3478
  }
3407
- async function convertSbtToFacts({
3408
- bin,
3409
- configs,
3410
- cwd,
3411
- ignoreUnresolved,
3412
- sbtOpts,
3413
- verbose
3414
- }) {
3415
- logger.logger.group('sbt2facts:');
3416
- logger.logger.info(`- executing: \`${bin}\``);
3479
+ async function extractBazelToMaven(opts) {
3480
+ const {
3481
+ cwd,
3482
+ out,
3483
+ verbose
3484
+ } = opts;
3485
+ logger.logger.group('bazel2maven:');
3417
3486
  logger.logger.info(`- src dir: \`${cwd}\``);
3487
+ logger.logger.info(`- out dir: \`${out}\``);
3418
3488
  if (!fs$1.existsSync(cwd)) {
3419
- logger.logger.warn('Warning: It appears the src dir could not be found. An error might be printed later because of that.');
3489
+ logger.logger.warn(`Warning: cwd does not exist: ${cwd}`);
3420
3490
  }
3421
3491
  logger.logger.groupEnd();
3422
- try {
3423
- const pluginSrcPath = path.join(constants.default.distPath, 'socket-facts.plugin.scala');
3424
- const globalBase = resolveGlobalBase();
3425
- await ensurePluginSource(pluginSrcPath, path.join(globalBase, 'plugins'));
3492
+ const perRepoTimeoutMs = opts.perRepoTimeoutMs ?? DEFAULT_PER_REPO_TIMEOUT_MS;
3426
3493
 
3427
- // `-Dsbt.global.base` points sbt at our isolated plugins dir, so the
3428
- // source-only plugin activates without touching the user's `~/.sbt`. The
3429
- // resolution options are passed as JVM system properties the plugin reads.
3430
- const socketProps = [];
3431
- if (ignoreUnresolved) {
3432
- socketProps.push('-Dsocket.ignoreUnresolved=true');
3433
- }
3434
- if (configs) {
3435
- socketProps.push(`-Dsocket.configs=${configs}`);
3494
+ // Validate config + ensure toolchains BEFORE we mint a tempdir.
3495
+ let bin;
3496
+ let baseEnv;
3497
+ try {
3498
+ if (opts.bazelOutputBase) {
3499
+ validateOutputBase(opts.bazelOutputBase, opts.cwd);
3436
3500
  }
3437
- const commandArgs = [`-Dsbt.global.base=${globalBase}`, ...socketProps, ...sbtOpts, '--batch', 'socketFacts'];
3501
+ ensureJavaOnPath();
3502
+ const shim = await provisionPythonShim();
3503
+ baseEnv = shim.augmentedEnv ?? opts.env;
3504
+ bin = await resolveBazelBinary(opts.bin);
3505
+ } catch (e) {
3506
+ logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
3438
3507
  if (verbose) {
3439
- logger.logger.log('[VERBOSE] Executing:', [bin], ', args:', commandArgs);
3508
+ logger.logger.group('[VERBOSE] error:');
3509
+ logger.logger.log(e);
3510
+ logger.logger.groupEnd();
3440
3511
  }
3441
- logger.logger.log(`Generating Socket facts from \`${bin}\` on \`${cwd}\` ...`);
3442
- const output = await execSbt(bin, commandArgs, cwd, verbose);
3443
- if (output.code) {
3444
- process.exitCode = 1;
3445
- logger.logger.fail(`sbt exited with exit code ${output.code}`);
3446
- if (!verbose) {
3447
- const errorLines = extractErrorLines(output.stdout, output.stderr);
3448
- if (errorLines) {
3449
- logger.logger.group('sbt output:');
3450
- logger.logger.error(errorLines);
3451
- logger.logger.groupEnd();
3452
- }
3453
- }
3454
- if (/security ?manager/i.test(output.stdout + output.stderr)) {
3455
- logger.logger.warn(JDK_HINT);
3456
- }
3457
- return;
3512
+ return {
3513
+ artifactCount: 0,
3514
+ manifestPaths: [],
3515
+ status: 'hardFailure'
3516
+ };
3517
+ }
3518
+ logger.logger.info(`Using bazel: ${bin}`);
3519
+
3520
+ // Track every output_user_root we mint so we can reap them all in
3521
+ // the cleanup pass, even if a per-repo timeout forced a re-mint.
3522
+ let outputUserRoot = makeOutputUserRoot();
3523
+ const mintedRoots = [outputUserRoot];
3524
+ if (verbose) {
3525
+ logger.logger.log(`[VERBOSE] initial --output_user_root=${outputUserRoot} (will be reaped on completion)`);
3526
+ }
3527
+ const layout = opts.outLayout ?? 'standalone';
3528
+ const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
3529
+ // One manifest per (workspace, hub), written best-effort: a single wedged
3530
+ // hub must not discard the manifests every other hub produced.
3531
+ const manifestPaths = [];
3532
+ let totalArtifacts = 0;
3533
+ let anyRepos = false;
3534
+ let hubsSucceeded = 0;
3535
+ let hubsFailed = 0;
3536
+ try {
3537
+ // Always apply the default prune policy so no caller can forget it;
3538
+ // callers EXTEND it via ignoreDirNames/ignoreDirPrefixes.
3539
+ const ignoreDirNames = new Set([...DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES, ...(opts.ignoreDirNames ?? [])]);
3540
+ const ignoreDirPrefixes = [...DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES, ...(opts.ignoreDirPrefixes ?? [])];
3541
+ const workspaceRoots = findWorkspaceRoots({
3542
+ cwd,
3543
+ ignoreDirNames,
3544
+ ignoreDirPrefixes,
3545
+ verbose
3546
+ });
3547
+ if (!workspaceRoots.length) {
3548
+ logger.logger.warn(`No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`);
3549
+ return {
3550
+ artifactCount: 0,
3551
+ manifestPaths: [],
3552
+ status: 'noEcosystem'
3553
+ };
3458
3554
  }
3459
- logger.logger.success('Executed sbt successfully');
3460
3555
  if (verbose) {
3461
- // Output already streamed inline; nothing to re-summarize.
3462
- logger.logger.log('');
3463
- logger.logger.log('Next step is to generate a Scan by running the `socket scan create` command on the same directory.');
3464
- return;
3556
+ logger.logger.log(`[VERBOSE] discovered ${workspaceRoots.length} workspace root(s):`, workspaceRoots);
3465
3557
  }
3466
- // `spawn` already strips ANSI from captured output, and the plugin prints
3467
- // these lines bare (via println, no sbt `[info]` prefix), so plain line
3468
- // matching is stable.
3469
- const exports = [];
3470
- for (const m of output.stdout.matchAll(/Socket facts file written to: (.+)/g)) {
3471
- const reported = m[1]?.trim();
3472
- if (reported) {
3473
- exports.push(reported);
3558
+ for (const workspaceRoot of workspaceRoots) {
3559
+ const relPath = path.relative(cwd, workspaceRoot);
3560
+ let mode;
3561
+ try {
3562
+ mode = detectWorkspaceMode(workspaceRoot);
3563
+ } catch (e) {
3564
+ if (verbose) {
3565
+ logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: detect failed (${utils.getErrorCause(e)}); skipping`);
3566
+ }
3567
+ continue;
3474
3568
  }
3475
- }
3476
- if (exports.length) {
3477
- logger.logger.log('Reported exports:');
3478
- for (const fn of exports) {
3479
- logger.logger.log('- ', fn);
3569
+ logger.logger.info(`Workspace ${relPath || '.'}: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
3570
+ const invocationFlags = getBazelInvocationFlags(mode);
3571
+ const queryOptsFor = userRoot => buildQueryOpts({
3572
+ baseEnv,
3573
+ bin,
3574
+ invocationFlags,
3575
+ opts,
3576
+ outputUserRoot: userRoot,
3577
+ spawnCwd: workspaceRoot,
3578
+ verbose
3579
+ });
3580
+
3581
+ // eslint-disable-next-line no-await-in-loop
3582
+ const candidates = await discoverCandidatesForWorkspace(workspaceRoot, mode, queryOptsFor(outputUserRoot), verbose);
3583
+ logger.logger.info(`Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${candidates.join(', ') || '(none)'}`);
3584
+ for (const repoName of candidates) {
3585
+ anyRepos = true;
3586
+ if (verbose) {
3587
+ logger.logger.log(`[VERBOSE] workspace ${relPath || '.'}: running metadata cquery for @${repoName} (timeout ${perRepoTimeoutMs}ms)`);
3588
+ }
3589
+ // eslint-disable-next-line no-await-in-loop
3590
+ const result = await runMetadataCqueryForRepo({
3591
+ opts: queryOptsFor(outputUserRoot),
3592
+ repoName,
3593
+ timeoutMs: perRepoTimeoutMs,
3594
+ workspaceRelPath: relPath,
3595
+ workspaceRoot
3596
+ });
3597
+ if (result.status === 'timeout') {
3598
+ logger.logger.warn(`@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`);
3599
+ hubsFailed += 1;
3600
+ // eslint-disable-next-line no-await-in-loop
3601
+ await reapBazelServer(bin, outputUserRoot, verbose);
3602
+ // eslint-disable-next-line no-await-in-loop
3603
+ await removeTempdir(outputUserRoot, verbose);
3604
+ outputUserRoot = makeOutputUserRoot();
3605
+ mintedRoots.push(outputUserRoot);
3606
+ if (verbose) {
3607
+ logger.logger.log(`[VERBOSE] minted fresh --output_user_root=${outputUserRoot} after timeout`);
3608
+ }
3609
+ continue;
3610
+ }
3611
+ if (result.status === 'error') {
3612
+ logger.logger.warn(`@${repoName}: cquery failed; skipping this hub`);
3613
+ hubsFailed += 1;
3614
+ continue;
3615
+ }
3616
+ // A scan must never silently upload a graph missing edges it knows
3617
+ // it dropped: warn unconditionally and treat the hub as partial.
3618
+ let hubPartial = result.unresolvedLabels.length > 0;
3619
+ if (hubPartial) {
3620
+ logger.logger.warn(`@${repoName}: dropped ${result.unresolvedLabels.length} unresolved dependency edge(s): ${result.unresolvedLabels.join(', ')}`);
3621
+ }
3622
+ // A non-zero cquery exit that still yielded a usable subset
3623
+ // (--keep_going) is reported as `partial` even with no unresolved
3624
+ // labels — the graph is known-incomplete, so flip the hub partial.
3625
+ if (result.status === 'partial' && !result.unresolvedLabels.length) {
3626
+ hubPartial = true;
3627
+ logger.logger.warn(`@${repoName}: cquery partially failed (--keep_going); the dependency graph may be incomplete`);
3628
+ }
3629
+ let written;
3630
+ try {
3631
+ // eslint-disable-next-line no-await-in-loop
3632
+ written = await writeHubManifest({
3633
+ artifacts: result.artifacts,
3634
+ cwd,
3635
+ manifestDir,
3636
+ relPath,
3637
+ repoName,
3638
+ verbose
3639
+ });
3640
+ } catch (e) {
3641
+ // Best-effort per hub: a write failure must not abort the walk and
3642
+ // discard the manifests other hubs already produced.
3643
+ logger.logger.warn(`@${repoName}: failed to write manifest (${utils.getErrorCause(e)}); skipping this hub`);
3644
+ hubsFailed += 1;
3645
+ continue;
3646
+ }
3647
+ if (written.droppedArtifacts.length) {
3648
+ hubPartial = true;
3649
+ logger.logger.warn(`@${repoName}: dropped ${written.droppedArtifacts.length} malformed Maven coordinate(s): ${written.droppedArtifacts.join(', ')}`);
3650
+ }
3651
+ if (written.prunedEdges.length) {
3652
+ hubPartial = true;
3653
+ logger.logger.warn(`@${repoName}: pruned ${written.prunedEdges.length} dependency edge(s) referencing unlisted artifacts: ${written.prunedEdges.join(', ')}`);
3654
+ }
3655
+ if (written.manifestPath) {
3656
+ manifestPaths.push(written.manifestPath);
3657
+ totalArtifacts += written.artifactCount;
3658
+ if (hubPartial) {
3659
+ hubsFailed += 1;
3660
+ } else {
3661
+ hubsSucceeded += 1;
3662
+ }
3663
+ if (verbose) {
3664
+ logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status}, ${written.artifactCount} artifact(s) -> ${written.manifestPath}`);
3665
+ }
3666
+ } else {
3667
+ // No artifacts to write (empty hub). Not itself a failure, but if
3668
+ // edges were dropped the partial signal still applies.
3669
+ if (hubPartial) {
3670
+ hubsFailed += 1;
3671
+ }
3672
+ if (verbose) {
3673
+ logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status} (no manifest written)`);
3674
+ }
3675
+ }
3480
3676
  }
3481
- } else {
3482
- // The plugin skips emission when the build has no resolvable deps.
3483
- const skipMatch = output.stdout.match(/\[socket-facts\] no resolvable dependencies.*/);
3484
- if (skipMatch) {
3485
- logger.logger.warn(skipMatch[0]);
3677
+ }
3678
+ if (!manifestPaths.length) {
3679
+ if (!anyRepos) {
3680
+ if (verbose) {
3681
+ logger.logger.info('No Maven artifacts extracted. failureCategory=no-supported-ecosystem');
3682
+ }
3683
+ return {
3684
+ artifactCount: 0,
3685
+ manifestPaths: [],
3686
+ status: 'noEcosystem'
3687
+ };
3486
3688
  }
3689
+ logger.logger.fail('Discovered Maven repo(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty');
3690
+ return {
3691
+ artifactCount: 0,
3692
+ manifestPaths: [],
3693
+ status: 'hardFailure'
3694
+ };
3487
3695
  }
3488
- logger.logger.log('');
3489
- logger.logger.log('Next step is to generate a Scan by running the `socket scan create` command on the same directory.');
3490
- } catch (e) {
3491
- process.exitCode = 1;
3492
- // A missing sbt launcher is the most common setup failure; surface it
3493
- // clearly instead of the generic message.
3494
- if (e instanceof Error && e.code === 'ENOENT') {
3495
- logger.logger.fail(`Could not run \`${bin}\`. Make sure sbt is installed and on your PATH, or pass --bin with the path to your sbt launcher.`);
3696
+ const status = hubsFailed ? 'partial' : 'complete';
3697
+ if (status === 'complete') {
3698
+ logger.logger.success(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total.`);
3496
3699
  } else {
3497
- logger.logger.fail('There was an unexpected error while generating Socket facts' + (verbose ? '' : ' (use --verbose for details)'));
3700
+ logger.logger.warn(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete.`);
3701
+ }
3702
+ if (verbose) {
3703
+ logger.logger.log('[VERBOSE] outputs:', {
3704
+ artifactCount: totalArtifacts,
3705
+ hubsFailed,
3706
+ hubsSucceeded,
3707
+ layout,
3708
+ manifestPaths,
3709
+ status
3710
+ });
3498
3711
  }
3712
+ return {
3713
+ artifactCount: totalArtifacts,
3714
+ manifestPaths,
3715
+ status
3716
+ };
3717
+ } catch (e) {
3718
+ logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
3499
3719
  if (verbose) {
3500
3720
  logger.logger.group('[VERBOSE] error:');
3501
3721
  logger.logger.log(e);
3502
3722
  logger.logger.groupEnd();
3723
+ } else {
3724
+ logger.logger.info('Re-run with --verbose for the full stack.');
3725
+ }
3726
+ return {
3727
+ artifactCount: 0,
3728
+ manifestPaths: [],
3729
+ status: 'hardFailure'
3730
+ };
3731
+ } finally {
3732
+ for (const dir of mintedRoots) {
3733
+ // eslint-disable-next-line no-await-in-loop
3734
+ await reapBazelServer(bin, dir, verbose);
3735
+ // eslint-disable-next-line no-await-in-loop
3736
+ await removeTempdir(dir, verbose);
3503
3737
  }
3504
3738
  }
3505
3739
  }
3506
3740
 
3507
- // Pull the actionable lines out of a noisy sbt run so a failure surfaces the
3508
- // plugin's own message (and sbt's `[error]` lines) without dumping the whole
3509
- // resolution log.
3510
- function extractErrorLines(stdout, stderr) {
3511
- return `${stdout}\n${stderr}`.split('\n').filter(line => /\[error]|Socket facts|could not resolve|unresolved/i.test(line)).join('\n').trim();
3512
- }
3513
- async function execSbt(bin, commandArgs, cwd, verbose) {
3514
- // When verbose, stream sbt output straight to the terminal so the user can
3515
- // watch resolution progress; otherwise show a spinner and capture output for
3516
- // the post-run summary.
3741
+ // Delegates Socket facts generation for a JVM build tool to the Coana CLI's
3742
+ // `manifest <ecosystem>` command. The build-tool resolution scripts (the Gradle
3743
+ // init script and the sbt plugin) live in Coana now, so socket-cli no longer
3744
+ // runs them itself; it only asks Coana for the uploadable `.socket.facts.json`.
3745
+ //
3746
+ // The resolved artifact-paths sidecar is intentionally NOT requested here: it
3747
+ // only matters for reachability analysis, which is internal to Coana, so Coana
3748
+ // emits it itself when it runs reachability. `socket manifest` only needs the
3749
+ // facts file.
3750
+ //
3751
+ // `spawnCoanaDlx` resolves the Coana CLI via dlx (or a local build when
3752
+ // `SOCKET_CLI_COANA_LOCAL_PATH` is set). `bin` (the gradle/sbt executable) is
3753
+ // always resolved by the caller to a concrete default (`<cwd>/gradlew`, or
3754
+ // `sbt` on PATH) before we get here, so it is forwarded verbatim; the empty
3755
+ // guard below is just a cheap safeguard against passing `--bin ''`.
3756
+ async function runCoanaManifestFacts({
3757
+ bin,
3758
+ buildOpts,
3759
+ buildOptsFlag,
3760
+ cwd,
3761
+ ecosystem,
3762
+ excludeConfigs,
3763
+ ignoreUnresolved,
3764
+ includeConfigs,
3765
+ verbose
3766
+ }) {
3767
+ // Pin the facts output location explicitly rather than relying on Coana's
3768
+ // "project root" default. `factsPath` is then the single source of truth for
3769
+ // both what we tell Coana to write and what we verify exists below, so the
3770
+ // two can't drift apart if Coana's default ever changes. This is deliberately
3771
+ // NOT user-configurable: Socket facts always land in the project root so that
3772
+ // `socket scan create <project>` finds them (see cmd-manifest-scala.mts, which
3773
+ // rejects --out/--stdout in facts mode).
3774
+ const factsDir = cwd;
3775
+ const factsFile = constants.default.DOT_SOCKET_DOT_FACTS_JSON;
3776
+ const factsPath = path.join(factsDir, factsFile);
3777
+ // `coana manifest <ecosystem> <path>` emits `.socket.facts.json` by default;
3778
+ // there is no `--facts` flag (the artifact-paths sidecar is reachability-
3779
+ // internal and not requested here).
3780
+ const coanaArgs = ['manifest', ecosystem, cwd, '--output-dir', factsDir, '--output-file', factsFile];
3781
+ if (bin) {
3782
+ coanaArgs.push('--bin', bin);
3783
+ }
3784
+ if (includeConfigs) {
3785
+ coanaArgs.push('--include-configs', includeConfigs);
3786
+ }
3787
+ if (excludeConfigs) {
3788
+ coanaArgs.push('--exclude-configs', excludeConfigs);
3789
+ }
3790
+ if (ignoreUnresolved) {
3791
+ coanaArgs.push('--ignore-unresolved');
3792
+ }
3517
3793
  if (verbose) {
3518
- logger.logger.info('(Running sbt with output streaming. This can take a while.)');
3519
- const output = await spawn.spawn(bin, commandArgs, {
3520
- cwd,
3521
- stdio: 'inherit'
3522
- });
3523
- return {
3524
- code: output.code,
3525
- stdout: '',
3526
- stderr: ''
3527
- };
3794
+ coanaArgs.push('--debug');
3528
3795
  }
3529
- const {
3530
- spinner
3531
- } = constants.default;
3532
- let pass = false;
3533
- try {
3534
- logger.logger.info('(Running sbt can take a while, depending on the size of the project)');
3535
- logger.logger.info('(No live output. Pass --verbose to stream sbt output instead.)');
3536
- spinner.start('Running sbt...');
3537
- const output = await spawn.spawn(bin, commandArgs, {
3538
- cwd
3539
- });
3540
- pass = true;
3541
- const {
3542
- code,
3543
- stderr,
3544
- stdout
3545
- } = output;
3546
- return {
3547
- code,
3548
- stdout,
3549
- stderr
3550
- };
3551
- } finally {
3552
- if (pass) {
3553
- spinner.successAndStop('Gracefully completed sbt execution.');
3554
- } else {
3555
- spinner.failAndStop('There was an error while trying to run sbt.');
3556
- }
3796
+ // `--gradle-opts` / `--sbt-opts` are variadic on the Coana side; keep them
3797
+ // last so the pass-through values don't swallow any following flags.
3798
+ if (buildOpts.length) {
3799
+ coanaArgs.push(buildOptsFlag, ...buildOpts);
3557
3800
  }
3801
+ logger.logger.log(`Generating Socket facts for the ${ecosystem} project at \`${cwd}\` ...`);
3802
+ if (verbose) {
3803
+ logger.logger.log('[VERBOSE] coana args:', coanaArgs);
3804
+ }
3805
+
3806
+ // Stream Coana's output so the user sees build-tool progress and Coana's own
3807
+ // "Socket facts file written to: ..." line.
3808
+ const result = await utils.spawnCoanaDlx(coanaArgs, undefined, {
3809
+ cwd
3810
+ }, {
3811
+ stdio: 'inherit'
3812
+ });
3813
+ if (!result.ok) {
3814
+ process.exitCode = 1;
3815
+ logger.logger.fail(result.message || 'Coana failed to generate Socket facts');
3816
+ return;
3817
+ }
3818
+ // A zero exit code doesn't guarantee a facts file was written: Coana skips
3819
+ // emitting it when there are no resolvable dependencies (e.g. with
3820
+ // --ignore-unresolved). We pinned the output to `factsPath` above, so confirm
3821
+ // it exists before claiming success; otherwise the "next step: socket scan
3822
+ // create" line would mislead.
3823
+ if (!fs$1.existsSync(factsPath)) {
3824
+ logger.logger.warn(`Coana completed but wrote no ${factsFile} (no resolvable dependencies?); nothing to upload.`);
3825
+ return;
3826
+ }
3827
+ logger.logger.success('Generated Socket facts');
3828
+ logger.logger.log('');
3829
+ logger.logger.log('Next step is to generate a Scan by running the `socket scan create` command on the same directory.');
3830
+ }
3831
+
3832
+ // Generates a `.socket.facts.json` for a Gradle project by delegating to the
3833
+ // Coana CLI's `manifest gradle` command (which owns the Gradle init script that
3834
+ // resolves the dependency graph). socket-cli no longer runs gradle itself; an
3835
+ // explicit `bin` is forwarded as `--bin`, otherwise Coana defaults to
3836
+ // `./gradlew`.
3837
+ async function convertGradleToFacts({
3838
+ bin,
3839
+ cwd,
3840
+ excludeConfigs,
3841
+ gradleOpts,
3842
+ ignoreUnresolved,
3843
+ includeConfigs,
3844
+ verbose
3845
+ }) {
3846
+ await runCoanaManifestFacts({
3847
+ bin,
3848
+ buildOpts: gradleOpts,
3849
+ buildOptsFlag: '--gradle-opts',
3850
+ cwd,
3851
+ ecosystem: 'gradle',
3852
+ excludeConfigs,
3853
+ ignoreUnresolved,
3854
+ includeConfigs,
3855
+ verbose
3856
+ });
3857
+ }
3858
+
3859
+ // Generates a `.socket.facts.json` for an sbt project by delegating to the
3860
+ // Coana CLI's `manifest sbt` command (which owns the sbt plugin that resolves
3861
+ // the dependency graph). socket-cli no longer runs sbt itself; an explicit
3862
+ // `bin` is forwarded as `--bin`, otherwise Coana defaults to `sbt` on PATH.
3863
+ // JDK-compatibility guidance (sbt 0.13/early 1.x cannot run on modern JDKs) is
3864
+ // handled by Coana; pass a compatible JDK via `--sbt-opts "--java-home <path>"`
3865
+ // or `JAVA_HOME`.
3866
+ async function convertSbtToFacts({
3867
+ bin,
3868
+ cwd,
3869
+ excludeConfigs,
3870
+ ignoreUnresolved,
3871
+ includeConfigs,
3872
+ sbtOpts,
3873
+ verbose
3874
+ }) {
3875
+ await runCoanaManifestFacts({
3876
+ bin,
3877
+ buildOpts: sbtOpts,
3878
+ buildOptsFlag: '--sbt-opts',
3879
+ cwd,
3880
+ ecosystem: 'sbt',
3881
+ excludeConfigs,
3882
+ ignoreUnresolved,
3883
+ includeConfigs,
3884
+ verbose
3885
+ });
3558
3886
  }
3559
3887
 
3560
3888
  async function convertGradleToMaven({
@@ -4014,9 +4342,9 @@ async function generateAutoManifest({
4014
4342
  logger.logger.info(`Using this ${constants.SOCKET_JSON} for defaults:`, sockJson);
4015
4343
  }
4016
4344
  if (!sockJson?.defaults?.manifest?.sbt?.disabled && detected.sbt) {
4017
- // Args shared by both paths. The facts-only knobs (`configs`,
4018
- // `ignoreUnresolved`) and the pom-only `out` are added per branch so
4019
- // neither handler is spread properties it doesn't accept.
4345
+ // Args shared by both paths. The facts-only knobs (`includeConfigs`,
4346
+ // `excludeConfigs`, `ignoreUnresolved`) and the pom-only `out` are added
4347
+ // per branch so neither handler is spread properties it doesn't accept.
4020
4348
  const sbtArgs = {
4021
4349
  // Note: `sbt` is more likely to be resolved against PATH env.
4022
4350
  bin: sockJson.defaults?.manifest?.sbt?.bin ?? 'sbt',
@@ -4024,12 +4352,15 @@ async function generateAutoManifest({
4024
4352
  sbtOpts: sockJson.defaults?.manifest?.sbt?.sbtOpts?.split(' ').map(s => s.trim()).filter(Boolean) ?? [],
4025
4353
  verbose: Boolean(sockJson.defaults?.manifest?.sbt?.verbose)
4026
4354
  };
4027
- if (sockJson.defaults?.manifest?.sbt?.facts) {
4355
+ // Socket facts is the default; opt into pom generation with
4356
+ // `defaults.manifest.sbt.facts: false` in socket.json.
4357
+ if (sockJson.defaults?.manifest?.sbt?.facts !== false) {
4028
4358
  logger.logger.log('Detected a Scala sbt build, generating Socket facts...');
4029
4359
  await convertSbtToFacts({
4030
4360
  ...sbtArgs,
4031
- configs: sockJson.defaults?.manifest?.sbt?.configs ?? '',
4032
- ignoreUnresolved: Boolean(sockJson.defaults?.manifest?.sbt?.ignoreUnresolved)
4361
+ excludeConfigs: sockJson.defaults?.manifest?.sbt?.excludeConfigs ?? '',
4362
+ ignoreUnresolved: Boolean(sockJson.defaults?.manifest?.sbt?.ignoreUnresolved),
4363
+ includeConfigs: sockJson.defaults?.manifest?.sbt?.includeConfigs ?? ''
4033
4364
  });
4034
4365
  } else {
4035
4366
  logger.logger.log('Detected a Scala sbt build, generating pom files with sbt...');
@@ -4049,12 +4380,15 @@ async function generateAutoManifest({
4049
4380
  verbose: Boolean(sockJson.defaults?.manifest?.gradle?.verbose),
4050
4381
  gradleOpts: sockJson.defaults?.manifest?.gradle?.gradleOpts?.split(' ').map(s => s.trim()).filter(Boolean) ?? []
4051
4382
  };
4052
- if (sockJson.defaults?.manifest?.gradle?.facts) {
4383
+ // Socket facts is the default; opt into pom generation with
4384
+ // `defaults.manifest.gradle.facts: false` in socket.json.
4385
+ if (sockJson.defaults?.manifest?.gradle?.facts !== false) {
4053
4386
  logger.logger.log('Detected a gradle build (Gradle, Kotlin, Scala), generating Socket facts...');
4054
4387
  await convertGradleToFacts({
4055
4388
  ...gradleArgs,
4056
- configs: sockJson.defaults?.manifest?.gradle?.configs ?? '',
4057
- ignoreUnresolved: Boolean(sockJson.defaults?.manifest?.gradle?.ignoreUnresolved)
4389
+ excludeConfigs: sockJson.defaults?.manifest?.gradle?.excludeConfigs ?? '',
4390
+ ignoreUnresolved: Boolean(sockJson.defaults?.manifest?.gradle?.ignoreUnresolved),
4391
+ includeConfigs: sockJson.defaults?.manifest?.gradle?.includeConfigs ?? ''
4058
4392
  });
4059
4393
  } else {
4060
4394
  logger.logger.log('Detected a gradle build (Gradle, Kotlin, Scala), running default gradle generator...');
@@ -4084,12 +4418,20 @@ async function generateAutoManifest({
4084
4418
  outLayout: 'flat',
4085
4419
  verbose: Boolean(bazelConfig?.verbose) || verbose
4086
4420
  });
4087
- if (!mavenResult.ok && !mavenResult.noEcosystemFound) {
4421
+
4422
+ // Only a hard failure (zero manifests, ecosystem present) aborts the
4423
+ // wider scan. A partial run still produced manifests worth uploading; an
4424
+ // absent ecosystem is tolerated here (it's only an error when EVERY
4425
+ // ecosystem is absent, which the caller decides).
4426
+ if (mavenResult.status === 'hardFailure') {
4088
4427
  throw new Error('Bazel auto-manifest generation failed for ecosystem(s): maven');
4089
4428
  }
4090
- if (mavenResult.ok && mavenResult.manifestPath) {
4091
- generatedFiles.push(mavenResult.manifestPath);
4092
- } else if (mavenResult.noEcosystemFound) {
4429
+ if (mavenResult.status === 'complete' || mavenResult.status === 'partial') {
4430
+ generatedFiles.push(...mavenResult.manifestPaths);
4431
+ if (mavenResult.status === 'partial') {
4432
+ logger.logger.warn(`Bazel Maven manifest generation was partial (${mavenResult.manifestPaths.length} manifest(s) written); some hubs failed or had incomplete dependency graphs. Uploading what was generated.`);
4433
+ }
4434
+ } else {
4093
4435
  logger.logger.info('No supported Bazel Maven ecosystem detected.');
4094
4436
  }
4095
4437
  }
@@ -4307,6 +4649,13 @@ async function handleCreateNewScan({
4307
4649
  const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined;
4308
4650
  if (reach && scanId && tier1ReachabilityScanId) {
4309
4651
  await finalizeTier1Scan(tier1ReachabilityScanId, scanId);
4652
+ } else if (reach.runReachabilityAnalysis && scanId && !tier1ReachabilityScanId) {
4653
+ // Reachability analysis ran and a scan was created, but no tier 1
4654
+ // reachability scan id was extracted from the facts file. Surface this
4655
+ // instead of silently skipping finalize — otherwise the tier 1 row stays
4656
+ // stuck (e.g. at COANA_DONE) and the full scan is never linked to its
4657
+ // reachability report.
4658
+ logger.logger.warn('Reachability analysis ran but no tier 1 reachability scan ID was found; skipping tier 1 finalize. The scan was created but its reachability report was not linked.');
4310
4659
  }
4311
4660
 
4312
4661
  // On a successful scan, clean up the `.socket.facts.json` coana wrote at
@@ -7734,6 +8083,85 @@ async function run$G(argv, importMeta, context) {
7734
8083
  await spawnPromise;
7735
8084
  }
7736
8085
 
8086
+ // Result shape returned by `validatePypiHub`. Kept local to the PyPI module
8087
+ // since validation here is hub-alias-marker based (different from the
8088
+ // Maven-side tri-state classifier).
8089
+
8090
+ // PyPI-only repo-name predicate (Bazel apparent-name grammar).
8091
+ const PYPI_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
8092
+ const PYPI_REPO_NAME_RE = new RegExp(`^${PYPI_REPO_NAME_PATTERN}$`);
8093
+ function pypiApparentNameFromJsonValue(value) {
8094
+ if (!value || typeof value !== 'object') {
8095
+ return undefined;
8096
+ }
8097
+ const obj = value;
8098
+ const direct = obj['apparentName'] ?? obj['apparent_name'];
8099
+ if (typeof direct === 'string') {
8100
+ return direct;
8101
+ }
8102
+ for (const nested of Object.values(obj)) {
8103
+ const found = pypiApparentNameFromJsonValue(nested);
8104
+ if (found) {
8105
+ return found;
8106
+ }
8107
+ }
8108
+ return undefined;
8109
+ }
8110
+ function pypiApparentNamesFromRepoMapping(value) {
8111
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
8112
+ return [];
8113
+ }
8114
+ const candidates = [];
8115
+ for (const [name, canonicalName] of Object.entries(value)) {
8116
+ if (name.startsWith('@') || typeof canonicalName !== 'string') {
8117
+ continue;
8118
+ }
8119
+ if (PYPI_REPO_NAME_RE.test(name)) {
8120
+ candidates.push(name);
8121
+ }
8122
+ }
8123
+ return candidates;
8124
+ }
8125
+ function pypiNormalizeRepoName(name) {
8126
+ const repo = name.startsWith('@') ? name.slice(1) : name;
8127
+ return PYPI_REPO_NAME_RE.test(repo) ? repo : undefined;
8128
+ }
8129
+
8130
+ // Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accepts
8131
+ // the older streamed jsonproto shape (apparentName / apparent_name records).
8132
+ // PyPI-only; the Maven path consumes `bazel mod show_extension` instead.
8133
+ function parseVisibleRepoCandidates(output) {
8134
+ const seen = new Set();
8135
+ const candidates = [];
8136
+ for (const line of output.split(/\r?\n/)) {
8137
+ const trimmed = line.trim();
8138
+ if (!trimmed) {
8139
+ continue;
8140
+ }
8141
+ try {
8142
+ const parsed = JSON.parse(trimmed);
8143
+ for (const c of pypiApparentNamesFromRepoMapping(parsed)) {
8144
+ if (!seen.has(c)) {
8145
+ seen.add(c);
8146
+ candidates.push(c);
8147
+ }
8148
+ }
8149
+ const apparentName = pypiApparentNameFromJsonValue(parsed);
8150
+ if (apparentName) {
8151
+ const repo = pypiNormalizeRepoName(apparentName);
8152
+ if (repo && !seen.has(repo)) {
8153
+ seen.add(repo);
8154
+ candidates.push(repo);
8155
+ }
8156
+ }
8157
+ } catch {
8158
+ // Skip malformed lines; caller falls back to static discovery when no
8159
+ // usable visible repo names are found.
8160
+ }
8161
+ }
8162
+ return candidates.sort();
8163
+ }
8164
+
7737
8165
  // Maximum size (bytes) we will read for any single Bazel workspace file.
7738
8166
  // Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
7739
8167
  const MAX_WORKSPACE_FILE_BYTES = 5 * 1024 * 1024;
@@ -8676,6 +9104,13 @@ const config$e = {
8676
9104
  Note: this command generates dependency manifests for Bazel workspaces.
8677
9105
  It does not run reachability analysis.
8678
9106
 
9107
+ Maven hub discovery: under Bzlmod, hubs are enumerated from
9108
+ \`bazel mod show_extension\` and filtered to the root module's own hubs.
9109
+ Under legacy WORKSPACE mode (no \`show_extension\`), only conventionally
9110
+ named hubs are probed (\`maven\`, \`maven_install\`, \`maven_dev\`, …). A hub
9111
+ with a non-conventional name that \`show_extension\` does not enumerate is
9112
+ not discovered yet; a flag to name extra hubs is planned.
9113
+
8679
9114
  To generate AND upload in one step, use \`socket scan create --auto-manifest\`
8680
9115
  instead — it detects Bazel workspaces, generates Maven manifests by
8681
9116
  default, and uploads the result. This subcommand is for generation only.
@@ -8697,21 +9132,29 @@ const cmdManifestBazel = {
8697
9132
  // failures that must propagate to a non-zero CLI exit; returns void on
8698
9133
  // success.
8699
9134
  //
8700
- // - Hard failure: ok === false && !noEcosystemFound. The ecosystem was
8701
- // detected (or the runner crashed), but extraction failed. Always a
8702
- // non-zero exit, even when another ecosystem succeeded.
8703
- // - No-discovery: noEcosystemFound === true. Genuinely absent ecosystem.
8704
- // Auto-detect mode tolerates this when at least one other ecosystem
8705
- // succeeded; explicit mode treats it as an error.
9135
+ // - `complete`/`partial` both count as produced output (>=1 manifest).
9136
+ // `partial` additionally warns a known-incomplete SBOM is still emitted,
9137
+ // not a hard error.
9138
+ // - `hardFailure`: the ecosystem was detected (or the runner crashed) but
9139
+ // wrote zero manifests. Always a non-zero exit, even when another
9140
+ // ecosystem succeeded.
9141
+ // - `noEcosystem`: genuinely absent ecosystem. Auto-detect mode tolerates it
9142
+ // when at least one other ecosystem produced output; explicit mode treats
9143
+ // it as an error (the user requested an ecosystem that isn't there).
8706
9144
  function evaluateEcosystemOutcomes(outcomes, isExplicit) {
8707
- const hardFailures = outcomes.filter(o => !o.ok && !o.noEcosystemFound);
8708
- const noDiscoveries = outcomes.filter(o => o.noEcosystemFound);
8709
- const successes = outcomes.filter(o => o.ok && o.manifestPath);
9145
+ const produced = outcomes.filter(o => (o.status === 'complete' || o.status === 'partial') && o.manifestPaths.length > 0);
9146
+ const hardFailures = outcomes.filter(o => o.status === 'hardFailure');
9147
+ const noDiscoveries = outcomes.filter(o => o.status === 'noEcosystem');
9148
+ for (const partial of outcomes) {
9149
+ if (partial.status === 'partial') {
9150
+ logger.logger.warn(`Bazel ${partial.ecosystem} manifest generation was partial; the uploaded SBOM is known-incomplete.`);
9151
+ }
9152
+ }
8710
9153
  if (!isExplicit) {
8711
9154
  if (hardFailures.length) {
8712
9155
  throw new utils.InputError(`Bazel auto-manifest generation hit hard failure(s) in ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
8713
9156
  }
8714
- if (successes.length) {
9157
+ if (produced.length) {
8715
9158
  return;
8716
9159
  }
8717
9160
  if (noDiscoveries.length === outcomes.length) {
@@ -8720,7 +9163,8 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
8720
9163
  return;
8721
9164
  }
8722
9165
 
8723
- // Explicit mode: every requested ecosystem must succeed.
9166
+ // Explicit mode: every requested ecosystem must produce output. A partial
9167
+ // run counts (it wrote manifests); absent or hard-failed ecosystems error.
8724
9168
  if (noDiscoveries.length) {
8725
9169
  throw new utils.InputError(`No Bazel rules found for explicitly requested ecosystem(s): ${noDiscoveries.map(f => f.ecosystem).join(', ')}.`);
8726
9170
  }
@@ -8728,6 +9172,32 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
8728
9172
  throw new utils.InputError(`Bazel manifest generation failed for explicitly requested ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
8729
9173
  }
8730
9174
  }
9175
+
9176
+ // Map the legacy PyPI result shape (single manifestPath + ok/noEcosystem
9177
+ // booleans) into the shared status vocabulary so both ecosystems flow through
9178
+ // one success gate. PyPI has no partial state. Only a `complete` outcome
9179
+ // carries a manifest path; `noEcosystem`/`hardFailure` carry none, preserving
9180
+ // the invariant that a non-success outcome produced no usable output (a
9181
+ // detected-but-empty PyPI run writes a stub file but is still a hard failure,
9182
+ // and that stub must not be surfaced as produced output).
9183
+ function pypiOutcome(result) {
9184
+ if (result.noEcosystemFound) {
9185
+ return {
9186
+ manifestPaths: [],
9187
+ status: 'noEcosystem'
9188
+ };
9189
+ }
9190
+ if (result.ok && result.manifestPath) {
9191
+ return {
9192
+ manifestPaths: [result.manifestPath],
9193
+ status: 'complete'
9194
+ };
9195
+ }
9196
+ return {
9197
+ manifestPaths: [],
9198
+ status: 'hardFailure'
9199
+ };
9200
+ }
8731
9201
  async function run$F(argv, importMeta, {
8732
9202
  parentName
8733
9203
  }) {
@@ -8861,9 +9331,8 @@ async function run$F(argv, importMeta, {
8861
9331
  });
8862
9332
  outcomes.push({
8863
9333
  ecosystem: 'maven',
8864
- ok: mavenResult.ok,
8865
- noEcosystemFound: mavenResult.noEcosystemFound,
8866
- manifestPath: mavenResult.manifestPath
9334
+ manifestPaths: mavenResult.manifestPaths,
9335
+ status: mavenResult.status
8867
9336
  });
8868
9337
  } else if (eco === 'pypi') {
8869
9338
  // eslint-disable-next-line no-await-in-loop
@@ -8879,9 +9348,7 @@ async function run$F(argv, importMeta, {
8879
9348
  });
8880
9349
  outcomes.push({
8881
9350
  ecosystem: 'pypi',
8882
- ok: pypiResult.ok,
8883
- noEcosystemFound: pypiResult.noEcosystemFound,
8884
- manifestPath: pypiResult.manifestPath
9351
+ ...pypiOutcome(pypiResult)
8885
9352
  });
8886
9353
  }
8887
9354
  }
@@ -9140,7 +9607,7 @@ async function run$D(argv, importMeta, {
9140
9607
 
9141
9608
  const config$b = {
9142
9609
  commandName: 'gradle',
9143
- description: '[beta] Use Gradle to generate a manifest file (`pom.xml`) for a Gradle/Java/Kotlin/etc project',
9610
+ description: '[beta] Generate a Socket facts file (or `pom.xml` with --pom) for a Gradle/Java/Kotlin/etc project',
9144
9611
  hidden: false,
9145
9612
  flags: {
9146
9613
  ...flags.commonFlags,
@@ -9150,15 +9617,23 @@ const config$b = {
9150
9617
  },
9151
9618
  facts: {
9152
9619
  type: 'boolean',
9153
- description: 'Emit a Socket facts JSON file (`.socket.facts.json`) describing the resolved dependency graph instead of generating `pom.xml` files'
9620
+ description: 'Emit a Socket facts JSON file (`.socket.facts.json`) describing the resolved dependency graph. This is the default; pass `--pom` to generate `pom.xml` files instead'
9621
+ },
9622
+ pom: {
9623
+ type: 'boolean',
9624
+ description: 'Generate `pom.xml` manifest file(s) instead of the default Socket facts file (`.socket.facts.json`)'
9625
+ },
9626
+ includeConfigs: {
9627
+ type: 'string',
9628
+ description: 'When generating facts: comma-separated glob patterns matched against Gradle configuration names (case-sensitive, `*` and `?` wildcards). Only configurations matching at least one pattern are resolved. e.g. `*CompileClasspath,*RuntimeClasspath`. Default: every resolvable configuration except AGP instrumented-test classpaths'
9154
9629
  },
9155
- configs: {
9630
+ excludeConfigs: {
9156
9631
  type: 'string',
9157
- description: 'With --facts: comma-separated glob patterns matched against Gradle configuration names (case-sensitive, `*` and `?` wildcards). e.g. `*CompileClasspath,*RuntimeClasspath` to skip tooling configs. Default: every resolvable configuration except AGP instrumented-test classpaths'
9632
+ description: 'When generating facts: comma-separated glob patterns; Gradle configurations matching any pattern are skipped (applied after --include-configs)'
9158
9633
  },
9159
9634
  ignoreUnresolved: {
9160
9635
  type: 'boolean',
9161
- description: 'With --facts: warn on unresolved dependencies instead of failing the run (unresolved deps are not emitted to the facts file)'
9636
+ description: 'When generating facts: warn on unresolved dependencies instead of failing the run (unresolved deps are not emitted to the facts file)'
9162
9637
  },
9163
9638
  gradleOpts: {
9164
9639
  type: 'string',
@@ -9176,38 +9651,32 @@ const config$b = {
9176
9651
  Options
9177
9652
  ${utils.getFlagListOutput(config.flags)}
9178
9653
 
9179
- Uses gradle, preferably through your local project \`gradlew\`, to generate a
9180
- \`pom.xml\` file for each task. If you have no \`gradlew\` you can try the
9181
- global \`gradle\` binary but that may not work (hard to predict).
9182
-
9183
- The \`pom.xml\` is a manifest file similar to \`package.json\` for npm or
9184
- or ${constants.REQUIREMENTS_TXT} for PyPi), but specifically for Maven, which is Java's
9185
- dependency repository. Languages like Kotlin and Scala piggy back on it too.
9654
+ By default, emits a single \`.socket.facts.json\` describing the resolved
9655
+ dependency graph of the whole build, using gradle (preferably your local
9656
+ \`gradlew\`). An unresolved dependency is a fatal error. You can pass
9657
+ --include-configs / --exclude-configs (comma-separated glob patterns) to
9658
+ control which configurations are resolved (e.g.
9659
+ --include-configs=\`*CompileClasspath,*RuntimeClasspath\`), and
9660
+ --ignore-unresolved to warn on unresolved dependencies instead of failing.
9186
9661
 
9187
- There are some caveats with the gradle to \`pom.xml\` conversion:
9662
+ Pass --pom to instead generate \`pom.xml\` manifest files via gradle (one per
9663
+ task). The \`pom.xml\` is a manifest file similar to \`package.json\` for npm
9664
+ (or ${constants.REQUIREMENTS_TXT} for PyPi), but specifically for Maven, which is
9665
+ Java's dependency repository. Caveats of the \`pom.xml\` conversion:
9188
9666
 
9189
- - each task will generate its own xml file and by default it generates one xml
9190
- for every task. (This may be a good thing!)
9667
+ - each task generates its own xml file (one per task by default)
9191
9668
 
9192
- - it's possible certain features don't translate well into the xml. If you
9193
- think something is missing that could be supported please reach out.
9669
+ - certain features may not translate well into the xml; reach out if
9670
+ something you need is missing
9194
9671
 
9195
9672
  - it works with your \`gradlew\` from your repo and local settings and config
9196
9673
 
9197
- Pass --facts to instead emit a single \`.socket.facts.json\` describing the
9198
- resolved dependency graph of the whole build (no \`pom.xml\` files). An
9199
- unresolved dependency is a fatal error. With --facts you can pass
9200
- --configs=<comma-separated glob patterns> to restrict resolution to
9201
- matching configurations (e.g. \`*CompileClasspath,*RuntimeClasspath\`),
9202
- and --ignore-unresolved to warn on unresolved dependencies instead of
9203
- failing the run.
9204
-
9205
9674
  Support is beta. Please report issues or give us feedback on what's missing.
9206
9675
 
9207
9676
  Examples
9208
9677
 
9209
9678
  $ ${command} .
9210
- $ ${command} --facts .
9679
+ $ ${command} --pom .
9211
9680
  $ ${command} --bin=../gradlew .
9212
9681
  `
9213
9682
  };
@@ -9241,10 +9710,11 @@ async function run$C(argv, importMeta, {
9241
9710
  require$$9.debugFn('inspect', `override: ${constants.SOCKET_JSON} gradle`, sockJson?.defaults?.manifest?.gradle);
9242
9711
  let {
9243
9712
  bin,
9244
- configs,
9713
+ excludeConfigs,
9245
9714
  facts,
9246
9715
  gradleOpts,
9247
9716
  ignoreUnresolved,
9717
+ includeConfigs,
9248
9718
  verbose
9249
9719
  } = cli.flags;
9250
9720
 
@@ -9277,16 +9747,34 @@ async function run$C(argv, importMeta, {
9277
9747
  if (sockJson.defaults?.manifest?.gradle?.facts !== undefined) {
9278
9748
  facts = sockJson.defaults?.manifest?.gradle?.facts;
9279
9749
  logger.logger.info(`Using default --facts from ${constants.SOCKET_JSON}:`, facts);
9750
+ } else {
9751
+ // Socket facts generation is the default; pass --pom to generate poms.
9752
+ facts = true;
9753
+ }
9754
+ }
9755
+ // --pom opts into legacy pom.xml generation. It overrides the facts default
9756
+ // (and the socket.json default) but conflicts with an explicit --facts.
9757
+ if (cli.flags['pom']) {
9758
+ if (cli.flags['facts'] !== undefined) {
9759
+ logger.logger.warn('The `--facts` and `--pom` options are mutually exclusive; generating Socket facts.');
9280
9760
  } else {
9281
9761
  facts = false;
9282
9762
  }
9283
9763
  }
9284
- if (configs === undefined) {
9285
- if (sockJson.defaults?.manifest?.gradle?.configs !== undefined) {
9286
- configs = sockJson.defaults?.manifest?.gradle?.configs;
9287
- logger.logger.info(`Using default --configs from ${constants.SOCKET_JSON}:`, configs);
9764
+ if (includeConfigs === undefined) {
9765
+ if (sockJson.defaults?.manifest?.gradle?.includeConfigs !== undefined) {
9766
+ includeConfigs = sockJson.defaults?.manifest?.gradle?.includeConfigs;
9767
+ logger.logger.info(`Using default --include-configs from ${constants.SOCKET_JSON}:`, includeConfigs);
9288
9768
  } else {
9289
- configs = '';
9769
+ includeConfigs = '';
9770
+ }
9771
+ }
9772
+ if (excludeConfigs === undefined) {
9773
+ if (sockJson.defaults?.manifest?.gradle?.excludeConfigs !== undefined) {
9774
+ excludeConfigs = sockJson.defaults?.manifest?.gradle?.excludeConfigs;
9775
+ logger.logger.info(`Using default --exclude-configs from ${constants.SOCKET_JSON}:`, excludeConfigs);
9776
+ } else {
9777
+ excludeConfigs = '';
9290
9778
  }
9291
9779
  }
9292
9780
  if (ignoreUnresolved === undefined) {
@@ -9298,13 +9786,12 @@ async function run$C(argv, importMeta, {
9298
9786
  }
9299
9787
  }
9300
9788
 
9301
- // `--configs` and `--ignore-unresolved` only affect --facts; the pom path
9302
- // (the legacy `socketGenerateMaven` task) has no equivalent knobs. Warn
9303
- // rather than silently ignore an explicitly-passed flag. (socket.json
9304
- // defaults don't trip this — only a flag actually present on the command
9305
- // line does.)
9306
- if (!facts && (cli.flags['configs'] !== undefined || cli.flags['ignoreUnresolved'] !== undefined)) {
9307
- logger.logger.warn('The `--configs` and `--ignore-unresolved` options only apply with `--facts`; ignoring them.');
9789
+ // `--include-configs`, `--exclude-configs`, and `--ignore-unresolved` only
9790
+ // affect facts generation; the pom path has no equivalent knobs. Warn rather
9791
+ // than silently ignore an explicitly-passed flag. (socket.json defaults don't
9792
+ // trip this — only a flag actually present on the command line does.)
9793
+ if (!facts && (cli.flags['includeConfigs'] !== undefined || cli.flags['excludeConfigs'] !== undefined || cli.flags['ignoreUnresolved'] !== undefined)) {
9794
+ logger.logger.warn('The `--include-configs`, `--exclude-configs`, and `--ignore-unresolved` options only apply when generating Socket facts (not with `--pom`); ignoring them.');
9308
9795
  }
9309
9796
  if (verbose) {
9310
9797
  logger.logger.group('- ', parentName, config$b.commandName, ':');
@@ -9341,10 +9828,11 @@ async function run$C(argv, importMeta, {
9341
9828
  if (facts) {
9342
9829
  await convertGradleToFacts({
9343
9830
  bin: String(bin),
9344
- configs: String(configs || ''),
9345
9831
  cwd,
9832
+ excludeConfigs: String(excludeConfigs || ''),
9346
9833
  gradleOpts: parsedGradleOpts,
9347
9834
  ignoreUnresolved: Boolean(ignoreUnresolved),
9835
+ includeConfigs: String(includeConfigs || ''),
9348
9836
  verbose: Boolean(verbose)
9349
9837
  });
9350
9838
  return;
@@ -9364,7 +9852,7 @@ async function run$C(argv, importMeta, {
9364
9852
  // command. Room for improvement.
9365
9853
  const config$a = {
9366
9854
  commandName: 'kotlin',
9367
- description: '[beta] Use Gradle to generate a manifest file (`pom.xml`) for a Kotlin project',
9855
+ description: '[beta] Generate a Socket facts file (or `pom.xml` with --pom) for a Kotlin project',
9368
9856
  hidden: false,
9369
9857
  flags: {
9370
9858
  ...flags.commonFlags,
@@ -9374,15 +9862,23 @@ const config$a = {
9374
9862
  },
9375
9863
  facts: {
9376
9864
  type: 'boolean',
9377
- description: 'Emit a Socket facts JSON file (`.socket.facts.json`) describing the resolved dependency graph instead of generating `pom.xml` files'
9865
+ description: 'Emit a Socket facts JSON file (`.socket.facts.json`) describing the resolved dependency graph. This is the default; pass `--pom` to generate `pom.xml` files instead'
9866
+ },
9867
+ pom: {
9868
+ type: 'boolean',
9869
+ description: 'Generate `pom.xml` manifest file(s) instead of the default Socket facts file (`.socket.facts.json`)'
9870
+ },
9871
+ includeConfigs: {
9872
+ type: 'string',
9873
+ description: 'When generating facts: comma-separated glob patterns matched against Gradle configuration names (case-sensitive, `*` and `?` wildcards). Only configurations matching at least one pattern are resolved. e.g. `*CompileClasspath,*RuntimeClasspath`. Default: every resolvable configuration except AGP instrumented-test classpaths'
9378
9874
  },
9379
- configs: {
9875
+ excludeConfigs: {
9380
9876
  type: 'string',
9381
- description: 'With --facts: comma-separated glob patterns matched against Gradle configuration names (case-sensitive, `*` and `?` wildcards). e.g. `*CompileClasspath,*RuntimeClasspath` to skip tooling configs. Default: every resolvable configuration except AGP instrumented-test classpaths'
9877
+ description: 'When generating facts: comma-separated glob patterns; Gradle configurations matching any pattern are skipped (applied after --include-configs)'
9382
9878
  },
9383
9879
  ignoreUnresolved: {
9384
9880
  type: 'boolean',
9385
- description: 'With --facts: warn on unresolved dependencies instead of failing the run (unresolved deps are not emitted to the facts file)'
9881
+ description: 'When generating facts: warn on unresolved dependencies instead of failing the run (unresolved deps are not emitted to the facts file)'
9386
9882
  },
9387
9883
  gradleOpts: {
9388
9884
  type: 'string',
@@ -9400,21 +9896,23 @@ const config$a = {
9400
9896
  Options
9401
9897
  ${utils.getFlagListOutput(config.flags)}
9402
9898
 
9403
- Uses gradle, preferably through your local project \`gradlew\`, to generate a
9404
- \`pom.xml\` file for each task. If you have no \`gradlew\` you can try the
9405
- global \`gradle\` binary but that may not work (hard to predict).
9406
-
9407
- The \`pom.xml\` is a manifest file similar to \`package.json\` for npm or
9408
- or ${constants.REQUIREMENTS_TXT} for PyPi), but specifically for Maven, which is Java's
9409
- dependency repository. Languages like Kotlin and Scala piggy back on it too.
9899
+ By default, emits a single \`.socket.facts.json\` describing the resolved
9900
+ dependency graph of the whole build, using gradle (preferably your local
9901
+ \`gradlew\`). An unresolved dependency is a fatal error. You can pass
9902
+ --include-configs / --exclude-configs (comma-separated glob patterns) to
9903
+ control which configurations are resolved (e.g.
9904
+ --include-configs=\`*CompileClasspath,*RuntimeClasspath\`), and
9905
+ --ignore-unresolved to warn on unresolved dependencies instead of failing.
9410
9906
 
9411
- There are some caveats with the gradle to \`pom.xml\` conversion:
9907
+ Pass --pom to instead generate \`pom.xml\` manifest files via gradle (one per
9908
+ task). The \`pom.xml\` is a manifest file similar to \`package.json\` for npm
9909
+ (or ${constants.REQUIREMENTS_TXT} for PyPi), but specifically for Maven, which is
9910
+ Java's dependency repository. Caveats of the \`pom.xml\` conversion:
9412
9911
 
9413
- - each task will generate its own xml file and by default it generates one xml
9414
- for every task. (This may be a good thing!)
9912
+ - each task generates its own xml file (one per task by default)
9415
9913
 
9416
- - it's possible certain features don't translate well into the xml. If you
9417
- think something is missing that could be supported please reach out.
9914
+ - certain features may not translate well into the xml; reach out if
9915
+ something you need is missing
9418
9916
 
9419
9917
  - it works with your \`gradlew\` from your repo and local settings and config
9420
9918
 
@@ -9423,6 +9921,7 @@ const config$a = {
9423
9921
  Examples
9424
9922
 
9425
9923
  $ ${command} .
9924
+ $ ${command} --pom .
9426
9925
  $ ${command} --bin=../gradlew .
9427
9926
  `
9428
9927
  };
@@ -9456,10 +9955,11 @@ async function run$B(argv, importMeta, {
9456
9955
  require$$9.debugFn('inspect', `override: ${constants.SOCKET_JSON} gradle`, sockJson?.defaults?.manifest?.gradle);
9457
9956
  let {
9458
9957
  bin,
9459
- configs,
9958
+ excludeConfigs,
9460
9959
  facts,
9461
9960
  gradleOpts,
9462
9961
  ignoreUnresolved,
9962
+ includeConfigs,
9463
9963
  verbose
9464
9964
  } = cli.flags;
9465
9965
 
@@ -9492,16 +9992,34 @@ async function run$B(argv, importMeta, {
9492
9992
  if (sockJson.defaults?.manifest?.gradle?.facts !== undefined) {
9493
9993
  facts = sockJson.defaults?.manifest?.gradle?.facts;
9494
9994
  logger.logger.info(`Using default --facts from ${constants.SOCKET_JSON}:`, facts);
9995
+ } else {
9996
+ // Socket facts generation is the default; pass --pom to generate poms.
9997
+ facts = true;
9998
+ }
9999
+ }
10000
+ // --pom opts into legacy pom.xml generation. It overrides the facts default
10001
+ // (and the socket.json default) but conflicts with an explicit --facts.
10002
+ if (cli.flags['pom']) {
10003
+ if (cli.flags['facts'] !== undefined) {
10004
+ logger.logger.warn('The `--facts` and `--pom` options are mutually exclusive; generating Socket facts.');
9495
10005
  } else {
9496
10006
  facts = false;
9497
10007
  }
9498
10008
  }
9499
- if (configs === undefined) {
9500
- if (sockJson.defaults?.manifest?.gradle?.configs !== undefined) {
9501
- configs = sockJson.defaults?.manifest?.gradle?.configs;
9502
- logger.logger.info(`Using default --configs from ${constants.SOCKET_JSON}:`, configs);
10009
+ if (includeConfigs === undefined) {
10010
+ if (sockJson.defaults?.manifest?.gradle?.includeConfigs !== undefined) {
10011
+ includeConfigs = sockJson.defaults?.manifest?.gradle?.includeConfigs;
10012
+ logger.logger.info(`Using default --include-configs from ${constants.SOCKET_JSON}:`, includeConfigs);
10013
+ } else {
10014
+ includeConfigs = '';
10015
+ }
10016
+ }
10017
+ if (excludeConfigs === undefined) {
10018
+ if (sockJson.defaults?.manifest?.gradle?.excludeConfigs !== undefined) {
10019
+ excludeConfigs = sockJson.defaults?.manifest?.gradle?.excludeConfigs;
10020
+ logger.logger.info(`Using default --exclude-configs from ${constants.SOCKET_JSON}:`, excludeConfigs);
9503
10021
  } else {
9504
- configs = '';
10022
+ excludeConfigs = '';
9505
10023
  }
9506
10024
  }
9507
10025
  if (ignoreUnresolved === undefined) {
@@ -9512,8 +10030,11 @@ async function run$B(argv, importMeta, {
9512
10030
  ignoreUnresolved = false;
9513
10031
  }
9514
10032
  }
9515
- if (!facts && (cli.flags['configs'] !== undefined || cli.flags['ignoreUnresolved'] !== undefined)) {
9516
- logger.logger.warn('The `--configs` and `--ignore-unresolved` options only apply with `--facts`; ignoring them.');
10033
+
10034
+ // `--include-configs`, `--exclude-configs`, and `--ignore-unresolved` only
10035
+ // affect facts generation; the pom path has no equivalent knobs.
10036
+ if (!facts && (cli.flags['includeConfigs'] !== undefined || cli.flags['excludeConfigs'] !== undefined || cli.flags['ignoreUnresolved'] !== undefined)) {
10037
+ logger.logger.warn('The `--include-configs`, `--exclude-configs`, and `--ignore-unresolved` options only apply when generating Socket facts (not with `--pom`); ignoring them.');
9517
10038
  }
9518
10039
  if (verbose) {
9519
10040
  logger.logger.group('- ', parentName, config$a.commandName, ':');
@@ -9550,10 +10071,11 @@ async function run$B(argv, importMeta, {
9550
10071
  if (facts) {
9551
10072
  await convertGradleToFacts({
9552
10073
  bin: String(bin),
9553
- configs: String(configs || ''),
9554
10074
  cwd,
10075
+ excludeConfigs: String(excludeConfigs || ''),
9555
10076
  gradleOpts: parsedGradleOpts,
9556
10077
  ignoreUnresolved: Boolean(ignoreUnresolved),
10078
+ includeConfigs: String(includeConfigs || ''),
9557
10079
  verbose: Boolean(verbose)
9558
10080
  });
9559
10081
  return;
@@ -9568,7 +10090,7 @@ async function run$B(argv, importMeta, {
9568
10090
 
9569
10091
  const config$9 = {
9570
10092
  commandName: 'scala',
9571
- description: "[beta] Generate a manifest file (`pom.xml`) from Scala's `build.sbt` file",
10093
+ description: '[beta] Generate a Socket facts file (or `pom.xml` with --pom) from a Scala `build.sbt` project',
9572
10094
  hidden: false,
9573
10095
  flags: {
9574
10096
  ...flags.commonFlags,
@@ -9578,23 +10100,31 @@ const config$9 = {
9578
10100
  },
9579
10101
  facts: {
9580
10102
  type: 'boolean',
9581
- description: 'Emit a Socket facts JSON file (`.socket.facts.json`) describing the resolved dependency graph instead of generating `pom.xml` files'
10103
+ description: 'Emit a Socket facts JSON file (`.socket.facts.json`) describing the resolved dependency graph. This is the default; pass `--pom` to generate `pom.xml` files instead'
10104
+ },
10105
+ pom: {
10106
+ type: 'boolean',
10107
+ description: 'Generate `pom.xml` manifest file(s) instead of the default Socket facts file (`.socket.facts.json`)'
10108
+ },
10109
+ includeConfigs: {
10110
+ type: 'string',
10111
+ description: 'When generating facts: comma-separated glob patterns matched against sbt configuration names (case-sensitive, `*` and `?` wildcards). Only configurations matching at least one pattern are resolved. e.g. `compile,test`. Default: compile,optional,provided,runtime,test'
9582
10112
  },
9583
- configs: {
10113
+ excludeConfigs: {
9584
10114
  type: 'string',
9585
- description: 'With --facts: comma-separated glob patterns matched against sbt configuration names (case-sensitive, `*` and `?` wildcards). Bare names (no wildcards) act as exact-name filters. Default: compile,optional,provided,runtime,test'
10115
+ description: 'When generating facts: comma-separated glob patterns; sbt configurations matching any pattern are skipped (applied after --include-configs)'
9586
10116
  },
9587
10117
  ignoreUnresolved: {
9588
10118
  type: 'boolean',
9589
- description: 'With --facts: warn on unresolved dependencies instead of failing the run (unresolved deps are not emitted to the facts file)'
10119
+ description: 'When generating facts: warn on unresolved dependencies instead of failing the run (unresolved deps are not emitted to the facts file)'
9590
10120
  },
9591
10121
  out: {
9592
10122
  type: 'string',
9593
- description: 'Path of output file; where to store the resulting manifest, see also --stdout'
10123
+ description: 'Only with --pom: path of the output `pom.xml`, see also --stdout. Does not apply when generating Socket facts (always written to the project root as `.socket.facts.json`)'
9594
10124
  },
9595
10125
  stdout: {
9596
10126
  type: 'boolean',
9597
- description: 'Print resulting pom.xml to stdout (supersedes --out)'
10127
+ description: 'Only with --pom: print the resulting `pom.xml` to stdout (supersedes --out). Does not apply when generating Socket facts'
9598
10128
  },
9599
10129
  sbtOpts: {
9600
10130
  type: 'string',
@@ -9612,11 +10142,18 @@ const config$9 = {
9612
10142
  Options
9613
10143
  ${utils.getFlagListOutput(config.flags)}
9614
10144
 
9615
- Uses \`sbt makePom\` to generate a \`pom.xml\` from your \`build.sbt\` file.
9616
- This xml file is the dependency manifest (like a package.json
9617
- for Node.js or ${constants.REQUIREMENTS_TXT} for PyPi), but specifically for Scala.
10145
+ By default, emits a single \`.socket.facts.json\` describing the resolved
10146
+ dependency graph of the whole build. It reads dependency metadata only and
10147
+ never downloads artifacts; an unresolved dependency is a fatal error. You
10148
+ can pass --include-configs / --exclude-configs (comma-separated glob
10149
+ patterns) to control which sbt configurations are resolved (e.g.
10150
+ --include-configs=\`compile,test\`), and --ignore-unresolved to warn on
10151
+ unresolved dependencies instead of failing the run.
9618
10152
 
9619
- There are some caveats with \`build.sbt\` to \`pom.xml\` conversion:
10153
+ Pass --pom to instead generate a \`pom.xml\` via \`sbt makePom\` from your
10154
+ \`build.sbt\`. The xml is the dependency manifest (like a package.json for
10155
+ Node.js or ${constants.REQUIREMENTS_TXT} for PyPi), but specifically for Scala.
10156
+ Caveats of the \`build.sbt\` to \`pom.xml\` conversion:
9620
10157
 
9621
10158
  - the xml is exported as pom.xml at the project root so Socket scan picks
9622
10159
  it up; sbt itself first writes it inside your /target/sbt<version> folder
@@ -9634,15 +10171,6 @@ const config$9 = {
9634
10171
 
9635
10172
  You can specify --bin to override the path to the \`sbt\` binary to invoke.
9636
10173
 
9637
- Pass --facts to instead emit a single \`.socket.facts.json\` describing the
9638
- resolved dependency graph of the whole build (no \`pom.xml\` files). It reads
9639
- dependency metadata only and never downloads artifacts; an unresolved
9640
- dependency is a fatal error. With --facts you can pass
9641
- --configs=<comma-separated glob patterns> to choose which sbt configurations
9642
- to resolve (e.g. \`compile,test\` for exact names or \`*Test*\` for variants),
9643
- and --ignore-unresolved to warn on unresolved dependencies instead of
9644
- failing the run.
9645
-
9646
10174
  Support is beta. Please report issues or give us feedback on what's missing.
9647
10175
 
9648
10176
  This is only for SBT. If your Scala setup uses gradle, please see the help
@@ -9651,7 +10179,7 @@ const config$9 = {
9651
10179
  Examples
9652
10180
 
9653
10181
  $ ${command}
9654
- $ ${command} --facts .
10182
+ $ ${command} --pom .
9655
10183
  $ ${command} ./proj --bin=/usr/bin/sbt --file=boot.sbt
9656
10184
  `
9657
10185
  };
@@ -9685,9 +10213,10 @@ async function run$A(argv, importMeta, {
9685
10213
  require$$9.debugFn('inspect', `override: ${constants.SOCKET_JSON} sbt`, sockJson?.defaults?.manifest?.sbt);
9686
10214
  let {
9687
10215
  bin,
9688
- configs,
10216
+ excludeConfigs,
9689
10217
  facts,
9690
10218
  ignoreUnresolved,
10219
+ includeConfigs,
9691
10220
  out,
9692
10221
  sbtOpts,
9693
10222
  stdout,
@@ -9707,16 +10236,34 @@ async function run$A(argv, importMeta, {
9707
10236
  if (sockJson.defaults?.manifest?.sbt?.facts !== undefined) {
9708
10237
  facts = sockJson.defaults?.manifest?.sbt?.facts;
9709
10238
  logger.logger.info(`Using default --facts from ${constants.SOCKET_JSON}:`, facts);
10239
+ } else {
10240
+ // Socket facts generation is the default; pass --pom to generate poms.
10241
+ facts = true;
10242
+ }
10243
+ }
10244
+ // --pom opts into legacy pom.xml generation. It overrides the facts default
10245
+ // (and the socket.json default) but conflicts with an explicit --facts.
10246
+ if (cli.flags['pom']) {
10247
+ if (cli.flags['facts'] !== undefined) {
10248
+ logger.logger.warn('The `--facts` and `--pom` options are mutually exclusive; generating Socket facts.');
9710
10249
  } else {
9711
10250
  facts = false;
9712
10251
  }
9713
10252
  }
9714
- if (configs === undefined) {
9715
- if (sockJson.defaults?.manifest?.sbt?.configs !== undefined) {
9716
- configs = sockJson.defaults?.manifest?.sbt?.configs;
9717
- logger.logger.info(`Using default --configs from ${constants.SOCKET_JSON}:`, configs);
10253
+ if (includeConfigs === undefined) {
10254
+ if (sockJson.defaults?.manifest?.sbt?.includeConfigs !== undefined) {
10255
+ includeConfigs = sockJson.defaults?.manifest?.sbt?.includeConfigs;
10256
+ logger.logger.info(`Using default --include-configs from ${constants.SOCKET_JSON}:`, includeConfigs);
10257
+ } else {
10258
+ includeConfigs = '';
10259
+ }
10260
+ }
10261
+ if (excludeConfigs === undefined) {
10262
+ if (sockJson.defaults?.manifest?.sbt?.excludeConfigs !== undefined) {
10263
+ excludeConfigs = sockJson.defaults?.manifest?.sbt?.excludeConfigs;
10264
+ logger.logger.info(`Using default --exclude-configs from ${constants.SOCKET_JSON}:`, excludeConfigs);
9718
10265
  } else {
9719
- configs = '';
10266
+ excludeConfigs = '';
9720
10267
  }
9721
10268
  }
9722
10269
  if (ignoreUnresolved === undefined) {
@@ -9756,21 +10303,13 @@ async function run$A(argv, importMeta, {
9756
10303
  verbose = false;
9757
10304
  }
9758
10305
 
9759
- // `--configs` and `--ignore-unresolved` only affect --facts; the pom path
9760
- // (`sbt makePom`) has no equivalent knobs. Warn rather than silently ignore
9761
- // an explicitly-passed flag. (socket.json defaults don't trip this only a
9762
- // flag actually present on the command line does.)
9763
- if (!facts && (cli.flags['configs'] !== undefined || cli.flags['ignoreUnresolved'] !== undefined)) {
9764
- logger.logger.warn('The `--configs` and `--ignore-unresolved` options only apply with `--facts`; ignoring them.');
9765
- }
9766
-
9767
- // Conversely, --out / --stdout only affect the pom path; with --facts the
9768
- // plugin always writes `.socket.facts.json` to the build root (its
9769
- // socket.outputDirectory/outputFile JVM props aren't exposed by the CLI), so
9770
- // warn rather than let `--facts --out custom.json` silently write nothing
9771
- // there.
9772
- if (facts && (cli.flags['out'] !== undefined || cli.flags['stdout'] !== undefined)) {
9773
- logger.logger.warn('The `--out` and `--stdout` options do not apply with `--facts`; the facts file is always written to the build root.');
10306
+ // `--include-configs`, `--exclude-configs`, and `--ignore-unresolved` only
10307
+ // affect facts generation; the pom path (`sbt makePom`) has no equivalent
10308
+ // knobs. Warn rather than silently ignore an explicitly-passed flag.
10309
+ // (socket.json defaults don't trip this — only a flag actually present on the
10310
+ // command line does.)
10311
+ if (!facts && (cli.flags['includeConfigs'] !== undefined || cli.flags['excludeConfigs'] !== undefined || cli.flags['ignoreUnresolved'] !== undefined)) {
10312
+ logger.logger.warn('The `--include-configs`, `--exclude-configs`, and `--ignore-unresolved` options only apply when generating Socket facts (not with `--pom`); ignoring them.');
9774
10313
  }
9775
10314
  if (verbose) {
9776
10315
  logger.logger.group('- ', parentName, config$9.commandName, ':');
@@ -9784,11 +10323,20 @@ async function run$A(argv, importMeta, {
9784
10323
  // try, store contents in a file in some folder, target that folder... what
9785
10324
  // would the file name be?
9786
10325
 
10326
+ // --out / --stdout only affect the pom path. Socket facts are always written
10327
+ // to the project root as `.socket.facts.json` so that `socket scan create`
10328
+ // picks them up, so reject these flags in facts mode rather than silently
10329
+ // ignoring an explicitly-passed output location.
9787
10330
  const wasValidInput = utils.checkCommandInput(outputKind, {
9788
10331
  nook: true,
9789
10332
  test: cli.input.length <= 1,
9790
10333
  message: 'Can only accept one DIR (make sure to escape spaces!)',
9791
10334
  fail: 'received ' + cli.input.length
10335
+ }, {
10336
+ nook: true,
10337
+ test: !(facts && (cli.flags['out'] !== undefined || cli.flags['stdout'] !== undefined)),
10338
+ message: 'The `--out` and `--stdout` options only apply with `--pom`; Socket facts are always written to the project root as `.socket.facts.json`',
10339
+ fail: 'remove --out/--stdout, or pass --pom'
9792
10340
  });
9793
10341
  if (!wasValidInput) {
9794
10342
  return;
@@ -9808,9 +10356,10 @@ async function run$A(argv, importMeta, {
9808
10356
  if (facts) {
9809
10357
  await convertSbtToFacts({
9810
10358
  bin: String(bin),
9811
- configs: String(configs || ''),
9812
10359
  cwd,
10360
+ excludeConfigs: String(excludeConfigs || ''),
9813
10361
  ignoreUnresolved: Boolean(ignoreUnresolved),
10362
+ includeConfigs: String(includeConfigs || ''),
9814
10363
  sbtOpts: parsedSbtOpts,
9815
10364
  verbose: Boolean(verbose)
9816
10365
  });
@@ -9871,19 +10420,19 @@ async function setupManifestConfig(cwd, defaultOnReadError = false) {
9871
10420
  }, {
9872
10421
  name: 'Gradle'.padEnd(30, ' '),
9873
10422
  value: 'gradle',
9874
- description: 'Generate pom.xml files through gradle'
10423
+ description: 'Generate a Socket facts file or pom.xml through gradle'
9875
10424
  }, {
9876
10425
  name: 'Kotlin (gradle)'.padEnd(30, ' '),
9877
10426
  value: 'gradle',
9878
- description: 'Generate pom.xml files (for Kotlin) through gradle'
10427
+ description: 'Generate a Socket facts file or pom.xml (for Kotlin) through gradle'
9879
10428
  }, {
9880
10429
  name: 'Scala (gradle)'.padEnd(30, ' '),
9881
10430
  value: 'gradle',
9882
- description: 'Generate pom.xml files (for Scala) through gradle'
10431
+ description: 'Generate a Socket facts file or pom.xml (for Scala) through gradle'
9883
10432
  }, {
9884
10433
  name: 'Scala (sbt)'.padEnd(30, ' '),
9885
10434
  value: 'sbt',
9886
- description: 'Generate pom.xml files through sbt'
10435
+ description: 'Generate a Socket facts file or pom.xml through sbt'
9887
10436
  }];
9888
10437
  choices.forEach(obj => {
9889
10438
  if (detected[obj.value]) {
@@ -10065,6 +10614,15 @@ async function setupGradle(config) {
10065
10614
  } else {
10066
10615
  delete config.facts;
10067
10616
  }
10617
+
10618
+ // The config filters and --ignore-unresolved only apply to facts generation
10619
+ // (the default); skip them when pom generation (--pom) is selected.
10620
+ if (config.facts !== false) {
10621
+ const factsOptions = await setupFactsOptions(config);
10622
+ if (!factsOptions.ok || factsOptions.data.canceled) {
10623
+ return factsOptions;
10624
+ }
10625
+ }
10068
10626
  const verbose = await askForVerboseFlag(config.verbose);
10069
10627
  if (verbose === undefined) {
10070
10628
  return canceledByUser$1();
@@ -10106,9 +10664,10 @@ async function setupSbt(config) {
10106
10664
  delete config.facts;
10107
10665
  }
10108
10666
 
10109
- // --facts emits a .socket.facts.json instead of pom.xml files, so the pom
10110
- // output questions (stdout/outfile) don't apply when it is enabled.
10111
- if (config.facts !== true) {
10667
+ // Socket facts is the default. The pom output questions (stdout/outfile)
10668
+ // only apply when pom generation (--pom) is explicitly selected; otherwise
10669
+ // ask the facts-only options.
10670
+ if (config.facts === false) {
10112
10671
  const stdout = await askForStdout(config.stdout);
10113
10672
  if (stdout === undefined) {
10114
10673
  return canceledByUser$1();
@@ -10134,6 +10693,11 @@ async function setupSbt(config) {
10134
10693
  }
10135
10694
  }
10136
10695
  }
10696
+ } else {
10697
+ const factsOptions = await setupFactsOptions(config);
10698
+ if (!factsOptions.ok || factsOptions.data.canceled) {
10699
+ return factsOptions;
10700
+ }
10137
10701
  }
10138
10702
  const verbose = await askForVerboseFlag(config.verbose);
10139
10703
  if (verbose === undefined) {
@@ -10228,15 +10792,34 @@ async function askForVerboseFlag(current) {
10228
10792
  }
10229
10793
  async function askForFactsFlag(current) {
10230
10794
  return await prompts.select({
10231
- message: '(--facts) Emit a Socket facts JSON file instead of generating pom.xml?',
10795
+ message: '(--facts / --pom) Which manifest should this generate?',
10796
+ choices: [{
10797
+ name: 'Socket facts (default)',
10798
+ value: 'yes',
10799
+ description: 'Generate a .socket.facts.json file describing the resolved dependency graph'
10800
+ }, {
10801
+ name: 'pom.xml',
10802
+ value: 'no',
10803
+ description: 'Generate pom.xml manifest files instead (the --pom path)'
10804
+ }, {
10805
+ name: '(leave default)',
10806
+ value: '',
10807
+ description: 'Do not store a setting; uses the default (Socket facts)'
10808
+ }],
10809
+ default: current === true ? 'yes' : current === false ? 'no' : ''
10810
+ });
10811
+ }
10812
+ async function askForIgnoreUnresolvedFlag(current) {
10813
+ return await prompts.select({
10814
+ message: '(--ignore-unresolved) Warn on unresolved dependencies instead of failing?',
10232
10815
  choices: [{
10233
10816
  name: 'no',
10234
10817
  value: 'no',
10235
- description: 'Generate pom.xml files (default behavior)'
10818
+ description: 'Fail the run when a declared dependency cannot resolve'
10236
10819
  }, {
10237
10820
  name: 'yes',
10238
10821
  value: 'yes',
10239
- description: 'Generate a .socket.facts.json file describing the resolved dependency graph'
10822
+ description: 'Warn and continue; unresolved dependencies are omitted from the facts file'
10240
10823
  }, {
10241
10824
  name: '(leave default)',
10242
10825
  value: '',
@@ -10245,6 +10828,44 @@ async function askForFactsFlag(current) {
10245
10828
  default: current === true ? 'yes' : current === false ? 'no' : ''
10246
10829
  });
10247
10830
  }
10831
+
10832
+ // Prompts for the facts-only options shared by gradle and sbt: the config
10833
+ // include/exclude filters and --ignore-unresolved. Mutates `config` in place.
10834
+ async function setupFactsOptions(config) {
10835
+ const includeConfigs = await prompts.input({
10836
+ message: '(--include-configs) Comma-separated config-name globs to resolve (blank = all configurations)',
10837
+ default: config.includeConfigs || '',
10838
+ required: false
10839
+ });
10840
+ if (includeConfigs === undefined) {
10841
+ return canceledByUser$1();
10842
+ } else if (includeConfigs) {
10843
+ config.includeConfigs = includeConfigs;
10844
+ } else {
10845
+ delete config.includeConfigs;
10846
+ }
10847
+ const excludeConfigs = await prompts.input({
10848
+ message: '(--exclude-configs) Comma-separated config-name globs to skip (blank = none)',
10849
+ default: config.excludeConfigs || '',
10850
+ required: false
10851
+ });
10852
+ if (excludeConfigs === undefined) {
10853
+ return canceledByUser$1();
10854
+ } else if (excludeConfigs) {
10855
+ config.excludeConfigs = excludeConfigs;
10856
+ } else {
10857
+ delete config.excludeConfigs;
10858
+ }
10859
+ const ignoreUnresolved = await askForIgnoreUnresolvedFlag(config.ignoreUnresolved);
10860
+ if (ignoreUnresolved === undefined) {
10861
+ return canceledByUser$1();
10862
+ } else if (ignoreUnresolved === 'yes' || ignoreUnresolved === 'no') {
10863
+ config.ignoreUnresolved = ignoreUnresolved === 'yes';
10864
+ } else {
10865
+ delete config.ignoreUnresolved;
10866
+ }
10867
+ return notCanceled$1();
10868
+ }
10248
10869
  function canceledByUser$1() {
10249
10870
  logger.logger.log('');
10250
10871
  logger.logger.info('User canceled');
@@ -14446,7 +15067,7 @@ const reachabilityFlags = {
14446
15067
  reachConcurrency: {
14447
15068
  type: 'number',
14448
15069
  default: 1,
14449
- description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available. NPM reachability analysis does not support concurrent execution, so the concurrency level is ignored for NPM.'
15070
+ description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available.'
14450
15071
  },
14451
15072
  reachContinueOnAnalysisErrors: {
14452
15073
  type: 'boolean',
@@ -16828,6 +17449,7 @@ async function run$8(argv, importMeta, {
16828
17449
  }
16829
17450
 
16830
17451
  async function outputScanReach(result, {
17452
+ cwd,
16831
17453
  outputKind,
16832
17454
  outputPath
16833
17455
  }) {
@@ -16848,7 +17470,11 @@ async function outputScanReach(result, {
16848
17470
  logger.logger.info(`Reachability report has been written to: ${actualOutputPath}`);
16849
17471
 
16850
17472
  // Warn about individual vulnerabilities where reachability analysis errored.
16851
- const errors = utils.extractReachabilityErrors(result.data.reachabilityReport);
17473
+ // Resolve the report path against the scan `cwd` (not `process.cwd()`):
17474
+ // Coana writes the facts file relative to `cwd` and `reachabilityReport`
17475
+ // is a `cwd`-relative path, so reading the bare relative path would miss
17476
+ // the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`).
17477
+ const errors = utils.extractReachabilityErrors(path.resolve(cwd, result.data.reachabilityReport));
16852
17478
  if (errors.length) {
16853
17479
  logger.logger.log('');
16854
17480
  logger.logger.warn(`Reachability analysis returned ${errors.length} ${words.pluralize('error', errors.length)} for individual ${words.pluralize('vulnerability', errors.length)}:`);
@@ -16877,6 +17503,7 @@ async function handleScanReach({
16877
17503
  });
16878
17504
  if (!supportedFilesCResult.ok) {
16879
17505
  await outputScanReach(supportedFilesCResult, {
17506
+ cwd,
16880
17507
  outputKind,
16881
17508
  outputPath
16882
17509
  });
@@ -16924,7 +17551,22 @@ async function handleScanReach({
16924
17551
  uploadManifests: true
16925
17552
  });
16926
17553
  spinner.stop();
17554
+
17555
+ // Standalone reachability has no full scan to bind to, but the tier1
17556
+ // reachability scan row still needs to transition to its DONE terminal
17557
+ // state — otherwise it sits at the post-Coana intermediate state forever
17558
+ // and looks indistinguishable from a stuck run. Pass `null` as the full
17559
+ // scan id; the endpoint accepts it for this flow. Best-effort: never
17560
+ // block the user-visible output on this.
17561
+ const tier1Id = result.ok ? result.data?.tier1ReachabilityScanId : undefined;
17562
+ if (tier1Id) {
17563
+ const finalizeResult = await finalizeTier1Scan(tier1Id, null);
17564
+ if (!finalizeResult.ok) {
17565
+ logger.logger.warn(`Failed to finalize tier1 reachability scan: ${finalizeResult.message}${finalizeResult.cause ? ` — ${finalizeResult.cause}` : ''}`);
17566
+ }
17567
+ }
16927
17568
  await outputScanReach(result, {
17569
+ cwd,
16928
17570
  outputKind,
16929
17571
  outputPath
16930
17572
  });
@@ -19067,5 +19709,5 @@ process.on('unhandledRejection', async (reason, promise) => {
19067
19709
  // eslint-disable-next-line n/no-process-exit
19068
19710
  process.exit(1);
19069
19711
  });
19070
- //# debugId=52e1770b-8fec-41b9-83a1-5c52a6251b6c
19712
+ //# debugId=b1bb7e64-091d-4be2-bb99-bb2297bb5ec2
19071
19713
  //# sourceMappingURL=cli.js.map