socket 1.1.111 → 1.1.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -1
- package/dist/cli.js +1600 -926
- package/dist/cli.js.map +1 -1
- package/dist/constants.js +4 -4
- package/dist/constants.js.map +1 -1
- package/dist/tsconfig.dts.tsbuildinfo +1 -1
- package/dist/types/commands/manifest/bazel/bazel-cquery.d.mts +70 -0
- package/dist/types/commands/manifest/bazel/bazel-cquery.d.mts.map +1 -0
- package/dist/types/commands/manifest/bazel/bazel-pypi-discovery.d.mts +14 -1
- package/dist/types/commands/manifest/bazel/bazel-pypi-discovery.d.mts.map +1 -1
- package/dist/types/commands/manifest/bazel/bazel-query-runner.d.mts +58 -14
- package/dist/types/commands/manifest/bazel/bazel-query-runner.d.mts.map +1 -1
- package/dist/types/commands/manifest/bazel/bazel-repo-discovery.d.mts +43 -30
- package/dist/types/commands/manifest/bazel/bazel-repo-discovery.d.mts.map +1 -1
- package/dist/types/commands/manifest/bazel/bazel-workspace-walk.d.mts +18 -0
- package/dist/types/commands/manifest/bazel/bazel-workspace-walk.d.mts.map +1 -0
- package/dist/types/commands/manifest/bazel/cmd-manifest-bazel.d.mts +12 -10
- package/dist/types/commands/manifest/bazel/cmd-manifest-bazel.d.mts.map +1 -1
- package/dist/types/commands/manifest/bazel/extract_bazel_to_maven.d.mts +70 -8
- package/dist/types/commands/manifest/bazel/extract_bazel_to_maven.d.mts.map +1 -1
- package/dist/types/commands/manifest/generate_auto_manifest.d.mts.map +1 -1
- package/dist/types/commands/scan/finalize-tier1-scan.d.mts +6 -4
- package/dist/types/commands/scan/finalize-tier1-scan.d.mts.map +1 -1
- package/dist/types/commands/scan/handle-create-new-scan.d.mts.map +1 -1
- package/dist/types/commands/scan/handle-scan-reach.d.mts.map +1 -1
- package/dist/types/commands/scan/output-scan-reach.d.mts +2 -1
- package/dist/types/commands/scan/output-scan-reach.d.mts.map +1 -1
- package/dist/types/commands/scan/perform-reachability-analysis.d.mts.map +1 -1
- package/dist/types/utils/glob.d.mts +1 -0
- package/dist/types/utils/glob.d.mts.map +1 -1
- package/dist/utils.js +35 -16
- package/dist/utils.js.map +1 -1
- package/package.json +2 -2
- package/requirements.json +1 -1
- package/dist/types/commands/manifest/bazel/bazel-build-parser.d.mts +0 -34
- package/dist/types/commands/manifest/bazel/bazel-build-parser.d.mts.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -15,10 +15,10 @@ var words = require('../external/@socketsecurity/registry/lib/words');
|
|
|
15
15
|
var fs$1 = require('node:fs');
|
|
16
16
|
var arrays = require('../external/@socketsecurity/registry/lib/arrays');
|
|
17
17
|
var prompts = require('../external/@socketsecurity/registry/lib/prompts');
|
|
18
|
-
var bin = require('../external/@socketsecurity/registry/lib/bin');
|
|
19
|
-
var childProcess = require('node:child_process');
|
|
20
18
|
var os = require('node:os');
|
|
21
19
|
var spawn = require('../external/@socketsecurity/registry/lib/spawn');
|
|
20
|
+
var bin = require('../external/@socketsecurity/registry/lib/bin');
|
|
21
|
+
var childProcess = require('node:child_process');
|
|
22
22
|
var fs$2 = require('../external/@socketsecurity/registry/lib/fs');
|
|
23
23
|
var strings = require('../external/@socketsecurity/registry/lib/strings');
|
|
24
24
|
var path$1 = require('../external/@socketsecurity/registry/lib/path');
|
|
@@ -1110,8 +1110,10 @@ async function fetchSupportedScanFileNames(options) {
|
|
|
1110
1110
|
|
|
1111
1111
|
/**
|
|
1112
1112
|
* Finalize a tier1 reachability scan.
|
|
1113
|
-
* - Associates the tier1 reachability scan metadata with the full scan
|
|
1114
|
-
*
|
|
1113
|
+
* - Associates the tier1 reachability scan metadata with the full scan
|
|
1114
|
+
* (or with `null` when called from a standalone reachability flow that
|
|
1115
|
+
* has no full scan to bind to).
|
|
1116
|
+
* - Transitions the tier1 reachability scan to its DONE terminal state.
|
|
1115
1117
|
*/
|
|
1116
1118
|
async function finalizeTier1Scan(tier1ReachabilityScanId, scanId) {
|
|
1117
1119
|
// we do not use the SDK here because the tier1-reachability-scan/finalize is a hidden
|
|
@@ -1835,12 +1837,21 @@ async function performReachabilityAnalysis(options) {
|
|
|
1835
1837
|
}
|
|
1836
1838
|
return coanaResult;
|
|
1837
1839
|
}
|
|
1840
|
+
|
|
1841
|
+
// Coana writes the facts file relative to the scan `cwd` (it is spawned
|
|
1842
|
+
// with `cwd` above), so resolve the read path against `cwd` too. Reading
|
|
1843
|
+
// the bare relative path would resolve against `process.cwd()` and miss
|
|
1844
|
+
// the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`), silently
|
|
1845
|
+
// dropping the tier 1 scan id and skipping finalize downstream.
|
|
1846
|
+
const resolvedReportPath = path.resolve(cwd, outputFilePath);
|
|
1838
1847
|
return {
|
|
1839
1848
|
ok: true,
|
|
1840
1849
|
data: {
|
|
1841
|
-
// Use the actual output filename for the scan.
|
|
1850
|
+
// Use the actual output filename for the scan. Keep this `cwd`-relative
|
|
1851
|
+
// so the upload (which relativizes against `cwd`) and the post-success
|
|
1852
|
+
// unlink (`path.resolve(cwd, reachabilityReport)`) keep working.
|
|
1842
1853
|
reachabilityReport: outputFilePath,
|
|
1843
|
-
tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(
|
|
1854
|
+
tier1ReachabilityScanId: utils.extractTier1ReachabilityScanId(resolvedReportPath)
|
|
1844
1855
|
}
|
|
1845
1856
|
};
|
|
1846
1857
|
}
|
|
@@ -1930,318 +1941,6 @@ async function resolveBazelBinary(explicit) {
|
|
|
1930
1941
|
throw new utils.InputError('Could not find bazelisk or bazel on PATH. ' + 'Install bazelisk (recommended; https://github.com/bazelbuild/bazelisk) ' + 'or bazel, or pass --bazel <path>.');
|
|
1931
1942
|
}
|
|
1932
1943
|
|
|
1933
|
-
/**
|
|
1934
|
-
* Parse `bazel query --output=build` text and `unsorted_deps.json` files
|
|
1935
|
-
* (rules_jvm_external) into a uniform `ExtractedArtifact` shape consumed by
|
|
1936
|
-
* the converter.
|
|
1937
|
-
*
|
|
1938
|
-
* Security gate: every regex uses bounded character classes to prevent
|
|
1939
|
-
* catastrophic backtracking on hostile bazel-query output. Rules without
|
|
1940
|
-
* `maven_coordinates=` are skipped. Caller is responsible for size-capping
|
|
1941
|
-
* the input string.
|
|
1942
|
-
*/
|
|
1943
|
-
|
|
1944
|
-
// Per-rule block matcher: matches `<kind>(...)` where kind is jvm_import or
|
|
1945
|
-
// aar_import, bounded by `^)` (closing paren on its own line) — Bazel
|
|
1946
|
-
// `--output=build` output convention. Body length capped at 8 KiB; real
|
|
1947
|
-
// rules are ~500 bytes, so the cap is 16x normal. Prevents pathological
|
|
1948
|
-
// backtracking on hostile input.
|
|
1949
|
-
const RULE_RE = /^(jvm_import|aar_import)\(([\s\S]{0,8192}?)^\)/gm;
|
|
1950
|
-
|
|
1951
|
-
// Cache for per-attribute regexes — avoids recompiling the same pattern on
|
|
1952
|
-
// every rule block. Keyed by attr name; all attr names are safe alphanumeric
|
|
1953
|
-
// identifiers so no escaping is needed beyond the bounded character class.
|
|
1954
|
-
const ATTR_RE_CACHE = new Map();
|
|
1955
|
-
|
|
1956
|
-
// Cache for per-tag-key regexes used by extractTagValue.
|
|
1957
|
-
const TAG_RE_CACHE = new Map();
|
|
1958
|
-
function extractAttr(body, attr) {
|
|
1959
|
-
// Match `<attr> = "VALUE"` — quoted-string attrs only.
|
|
1960
|
-
// Quoted value capped at 4 KiB; canonical Maven URLs are ~150 bytes.
|
|
1961
|
-
let re = ATTR_RE_CACHE.get(attr);
|
|
1962
|
-
if (!re) {
|
|
1963
|
-
re = new RegExp(`\\b${attr}\\s*=\\s*"([^"\\n]{0,4096})"`);
|
|
1964
|
-
ATTR_RE_CACHE.set(attr, re);
|
|
1965
|
-
}
|
|
1966
|
-
const m = re.exec(body);
|
|
1967
|
-
return m?.[1];
|
|
1968
|
-
}
|
|
1969
|
-
|
|
1970
|
-
// Extracts a `key=value` pair from inside a Bazel `tags = [...]` attribute
|
|
1971
|
-
// (rules_jvm_external encodes maven_sha256, maven_coordinates etc. this way).
|
|
1972
|
-
// Pattern: `"maven_sha256=<hex>"` inside the tags list.
|
|
1973
|
-
// Returns undefined when the tag is absent or malformed.
|
|
1974
|
-
function extractTagValue(body, tagKey) {
|
|
1975
|
-
// Match the full tags = [...] block (bounded at 8 KiB).
|
|
1976
|
-
const tagsM = /\btags\s*=\s*\[([\s\S]{0,8192}?)\]/m.exec(body);
|
|
1977
|
-
if (!tagsM) {
|
|
1978
|
-
return undefined;
|
|
1979
|
-
}
|
|
1980
|
-
const tagsBlob = tagsM[1];
|
|
1981
|
-
// Within the blob, look for "<tagKey>=<value>" inside a quoted string.
|
|
1982
|
-
// Bounded at 512 bytes per tag entry (sha256 hex is 64 chars; URLs ~150).
|
|
1983
|
-
let tagRe = TAG_RE_CACHE.get(tagKey);
|
|
1984
|
-
if (!tagRe) {
|
|
1985
|
-
tagRe = new RegExp(`"${tagKey}=([^"\\n]{0,512})"`);
|
|
1986
|
-
TAG_RE_CACHE.set(tagKey, tagRe);
|
|
1987
|
-
}
|
|
1988
|
-
const m = tagRe.exec(tagsBlob);
|
|
1989
|
-
return m?.[1];
|
|
1990
|
-
}
|
|
1991
|
-
function extractDeps(body) {
|
|
1992
|
-
// Match `deps = ["a", "b", ...]`. Body length capped at 16 KiB; real
|
|
1993
|
-
// dep lists are <2 KiB.
|
|
1994
|
-
const m = /\bdeps\s*=\s*\[([\s\S]{0,16384}?)\]/m.exec(body);
|
|
1995
|
-
if (!m) {
|
|
1996
|
-
return [];
|
|
1997
|
-
}
|
|
1998
|
-
const out = [];
|
|
1999
|
-
// Per-label cap at 512 bytes; real Bazel labels are <100 bytes.
|
|
2000
|
-
for (const q of m[1].matchAll(/"([^"\n]{0,512})"/g)) {
|
|
2001
|
-
out.push(q[1]);
|
|
2002
|
-
}
|
|
2003
|
-
return out;
|
|
2004
|
-
}
|
|
2005
|
-
|
|
2006
|
-
/**
|
|
2007
|
-
* Parse `bazel query --output=build` stdout into `ExtractedArtifact[]`.
|
|
2008
|
-
* Skips rules without a `maven_coordinates` attribute (those aren't
|
|
2009
|
-
* rules_jvm_external lockfile rules).
|
|
2010
|
-
*/
|
|
2011
|
-
function parseBazelBuildOutput(text) {
|
|
2012
|
-
const results = [];
|
|
2013
|
-
for (const m of text.matchAll(RULE_RE)) {
|
|
2014
|
-
const ruleKind = m[1];
|
|
2015
|
-
const body = m[2];
|
|
2016
|
-
const ruleName = extractAttr(body, 'name');
|
|
2017
|
-
// maven_coordinates can be:
|
|
2018
|
-
// (a) a top-level rule attribute: `maven_coordinates = "g:a:v"` (newer rje)
|
|
2019
|
-
// (b) inside tags = [...]: `"maven_coordinates=g:a:v"` (older rje, e.g. ray)
|
|
2020
|
-
const coords = extractAttr(body, 'maven_coordinates') ?? extractTagValue(body, 'maven_coordinates');
|
|
2021
|
-
if (!ruleName || !coords) {
|
|
2022
|
-
continue;
|
|
2023
|
-
}
|
|
2024
|
-
// maven_sha256 is encoded inside tags = [...] as "maven_sha256=<hex>" by
|
|
2025
|
-
// rules_jvm_external; try tags first, fall back to standalone attr for
|
|
2026
|
-
// older rule shapes that may declare it as a top-level attribute.
|
|
2027
|
-
const mavenSha256 = extractTagValue(body, 'maven_sha256') ?? extractAttr(body, 'maven_sha256');
|
|
2028
|
-
results.push({
|
|
2029
|
-
ruleKind,
|
|
2030
|
-
ruleName,
|
|
2031
|
-
mavenCoordinates: coords,
|
|
2032
|
-
mavenUrl: extractAttr(body, 'maven_url'),
|
|
2033
|
-
mavenSha256,
|
|
2034
|
-
deps: extractDeps(body)
|
|
2035
|
-
});
|
|
2036
|
-
}
|
|
2037
|
-
return results;
|
|
2038
|
-
}
|
|
2039
|
-
function ruleNameFromCoordinate(c) {
|
|
2040
|
-
return c.replace(/[^A-Za-z0-9]/g, '_');
|
|
2041
|
-
}
|
|
2042
|
-
|
|
2043
|
-
/**
|
|
2044
|
-
* Parse supported `external/<repo>/unsorted_deps.json` shapes emitted by
|
|
2045
|
-
* rules_jvm_external. Older files use an artifact array with full coordinates;
|
|
2046
|
-
* newer v2 lock-file-shaped files use artifact/dependency maps keyed by
|
|
2047
|
-
* `group:artifact`. Caller MUST size-cap the input because JSON.parse is
|
|
2048
|
-
* unbounded by default.
|
|
2049
|
-
*/
|
|
2050
|
-
function parseUnsortedDepsJson(json) {
|
|
2051
|
-
let parsed;
|
|
2052
|
-
try {
|
|
2053
|
-
parsed = JSON.parse(json);
|
|
2054
|
-
} catch {
|
|
2055
|
-
return [];
|
|
2056
|
-
}
|
|
2057
|
-
const maybe = parsed;
|
|
2058
|
-
if (Array.isArray(maybe.artifacts)) {
|
|
2059
|
-
const out = [];
|
|
2060
|
-
for (const a of maybe.artifacts) {
|
|
2061
|
-
if (typeof a?.coordinates !== 'string') {
|
|
2062
|
-
continue;
|
|
2063
|
-
}
|
|
2064
|
-
const deps = [];
|
|
2065
|
-
if (Array.isArray(a.deps)) {
|
|
2066
|
-
for (const d of a.deps) {
|
|
2067
|
-
if (typeof d === 'string') {
|
|
2068
|
-
deps.push(d);
|
|
2069
|
-
}
|
|
2070
|
-
}
|
|
2071
|
-
}
|
|
2072
|
-
out.push({
|
|
2073
|
-
ruleKind: 'jvm_import',
|
|
2074
|
-
ruleName: ruleNameFromCoordinate(a.coordinates),
|
|
2075
|
-
mavenCoordinates: a.coordinates,
|
|
2076
|
-
mavenUrl: typeof a.url === 'string' ? a.url : undefined,
|
|
2077
|
-
mavenSha256: typeof a.sha256 === 'string' ? a.sha256 : undefined,
|
|
2078
|
-
deps
|
|
2079
|
-
});
|
|
2080
|
-
}
|
|
2081
|
-
return out;
|
|
2082
|
-
}
|
|
2083
|
-
if (!maybe.artifacts || typeof maybe.artifacts !== 'object') {
|
|
2084
|
-
return [];
|
|
2085
|
-
}
|
|
2086
|
-
const dependencies = maybe.dependencies ?? {};
|
|
2087
|
-
const out = [];
|
|
2088
|
-
for (const [groupArtifact, artifact] of Object.entries(maybe.artifacts)) {
|
|
2089
|
-
if (!artifact || typeof artifact.version !== 'string') {
|
|
2090
|
-
continue;
|
|
2091
|
-
}
|
|
2092
|
-
const shasums = artifact.shasums ?? {};
|
|
2093
|
-
const jarSha = shasums['jar'];
|
|
2094
|
-
if (typeof jarSha === 'string' || Object.keys(shasums).length === 0) {
|
|
2095
|
-
out.push(v2Artifact(groupArtifact, artifact.version, jarSha, dependencies));
|
|
2096
|
-
}
|
|
2097
|
-
for (const [classifier, sha256] of Object.entries(shasums)) {
|
|
2098
|
-
if (classifier === 'jar' || typeof sha256 !== 'string') {
|
|
2099
|
-
continue;
|
|
2100
|
-
}
|
|
2101
|
-
const classifierKey = `${groupArtifact}:jar:${classifier}`;
|
|
2102
|
-
out.push(v2Artifact(classifierKey, artifact.version, sha256, dependencies));
|
|
2103
|
-
}
|
|
2104
|
-
}
|
|
2105
|
-
return out;
|
|
2106
|
-
}
|
|
2107
|
-
function v2Artifact(artifactKey, version, sha256, dependencies) {
|
|
2108
|
-
return {
|
|
2109
|
-
ruleKind: 'jvm_import',
|
|
2110
|
-
ruleName: ruleNameFromCoordinate(artifactKey),
|
|
2111
|
-
mavenCoordinates: `${artifactKey}:${version}`,
|
|
2112
|
-
mavenSha256: sha256,
|
|
2113
|
-
deps: Array.isArray(dependencies[artifactKey]) ? dependencies[artifactKey].filter(d => typeof d === 'string') : []
|
|
2114
|
-
};
|
|
2115
|
-
}
|
|
2116
|
-
|
|
2117
|
-
let probed = false;
|
|
2118
|
-
|
|
2119
|
-
// Verifies `java` is functional in the current execution environment. Bazel
|
|
2120
|
-
// JVM manifest extraction (rules_jvm_external → Coursier) requires a real
|
|
2121
|
-
// JDK; the CLI does not attempt to discover Homebrew installs or mutate the
|
|
2122
|
-
// caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
|
|
2123
|
-
// actionable message so the surfaced error names the prerequisite directly
|
|
2124
|
-
// instead of relying on Bazel's downstream diagnostic.
|
|
2125
|
-
function ensureJavaOnPath() {
|
|
2126
|
-
if (probed) {
|
|
2127
|
-
return;
|
|
2128
|
-
}
|
|
2129
|
-
try {
|
|
2130
|
-
childProcess.execSync('java -version', {
|
|
2131
|
-
stdio: 'ignore'
|
|
2132
|
-
});
|
|
2133
|
-
probed = true;
|
|
2134
|
-
} catch {
|
|
2135
|
-
throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
|
|
2136
|
-
}
|
|
2137
|
-
}
|
|
2138
|
-
|
|
2139
|
-
// Validates that --bazel-output-base is a path we can use as Bazel's output_base.
|
|
2140
|
-
// Throws InputError if:
|
|
2141
|
-
// - the input contains `..` segments (path traversal guard)
|
|
2142
|
-
// - the existing path is not writable
|
|
2143
|
-
// - the path cannot be created (parent not writable)
|
|
2144
|
-
function validateOutputBase(outputBase, cwd) {
|
|
2145
|
-
// Path traversal guard: reject any literal `..` segment in user input.
|
|
2146
|
-
// After path.resolve these are normalised away, so we check the raw input.
|
|
2147
|
-
// Split on both separators. On Windows `path.sep === '\\'`, so
|
|
2148
|
-
// input like `foo/../etc` would not contain a `..` segment under the
|
|
2149
|
-
// platform-specific split, bypassing the guard — yet path.resolve below
|
|
2150
|
-
// would still normalise the `..` and a traversal target could materialise.
|
|
2151
|
-
const segments = outputBase.split(/[\\/]/);
|
|
2152
|
-
if (segments.includes('..')) {
|
|
2153
|
-
throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
|
|
2154
|
-
}
|
|
2155
|
-
const resolved = path.resolve(cwd, outputBase);
|
|
2156
|
-
if (fs$1.existsSync(resolved)) {
|
|
2157
|
-
try {
|
|
2158
|
-
fs$1.accessSync(resolved, fs$1.constants.W_OK);
|
|
2159
|
-
} catch {
|
|
2160
|
-
throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
|
|
2161
|
-
}
|
|
2162
|
-
return;
|
|
2163
|
-
}
|
|
2164
|
-
// Path does not exist yet — try to create it so bazel can populate it.
|
|
2165
|
-
try {
|
|
2166
|
-
fs$1.mkdirSync(resolved, {
|
|
2167
|
-
recursive: true
|
|
2168
|
-
});
|
|
2169
|
-
} catch (e) {
|
|
2170
|
-
throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
|
|
2171
|
-
}
|
|
2172
|
-
}
|
|
2173
|
-
|
|
2174
|
-
// Stable shim dir name — same process will get the same dir; concurrent
|
|
2175
|
-
// socket-cli invocations on the same machine share it. The symlink target
|
|
2176
|
-
// is whatever python3 resolves to NOW; if PATH changes between invocations
|
|
2177
|
-
// we replace the symlink.
|
|
2178
|
-
const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
|
|
2179
|
-
|
|
2180
|
-
// Cache the result for the lifetime of this process.
|
|
2181
|
-
let cached = null;
|
|
2182
|
-
|
|
2183
|
-
// Safe wrapper around whichBin that returns null instead of throwing when
|
|
2184
|
-
// nothrow semantics are broken in older registry versions (realpath 'null' bug).
|
|
2185
|
-
async function safeWhichBin(name) {
|
|
2186
|
-
try {
|
|
2187
|
-
return (await bin.whichBin(name, {
|
|
2188
|
-
nothrow: true
|
|
2189
|
-
})) ?? null;
|
|
2190
|
-
} catch {
|
|
2191
|
-
return null;
|
|
2192
|
-
}
|
|
2193
|
-
}
|
|
2194
|
-
async function provisionPythonShim() {
|
|
2195
|
-
if (cached) {
|
|
2196
|
-
return cached;
|
|
2197
|
-
}
|
|
2198
|
-
const pythonOnPath = await safeWhichBin('python');
|
|
2199
|
-
if (pythonOnPath) {
|
|
2200
|
-
cached = {
|
|
2201
|
-
augmentedEnv: undefined,
|
|
2202
|
-
shimDir: undefined
|
|
2203
|
-
};
|
|
2204
|
-
return cached;
|
|
2205
|
-
}
|
|
2206
|
-
const python3OnPath = await safeWhichBin('python3');
|
|
2207
|
-
if (!python3OnPath) {
|
|
2208
|
-
throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
|
|
2209
|
-
}
|
|
2210
|
-
const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
|
|
2211
|
-
fs$1.mkdirSync(shimDir, {
|
|
2212
|
-
recursive: true
|
|
2213
|
-
});
|
|
2214
|
-
const linkPath = path.join(shimDir, 'python');
|
|
2215
|
-
// Replace the symlink defensively in case python3's resolved path moved.
|
|
2216
|
-
if (fs$1.existsSync(linkPath)) {
|
|
2217
|
-
try {
|
|
2218
|
-
fs$1.unlinkSync(linkPath);
|
|
2219
|
-
} catch {
|
|
2220
|
-
// Tolerate races; the next symlinkSync may still succeed.
|
|
2221
|
-
}
|
|
2222
|
-
}
|
|
2223
|
-
// The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
|
|
2224
|
-
// so a concurrent socket-cli invocation may re-create the link between our
|
|
2225
|
-
// unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
|
|
2226
|
-
// other process won the race and left a usable shim in place.
|
|
2227
|
-
try {
|
|
2228
|
-
fs$1.symlinkSync(python3OnPath, linkPath);
|
|
2229
|
-
} catch (e) {
|
|
2230
|
-
if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
|
|
2231
|
-
throw e;
|
|
2232
|
-
}
|
|
2233
|
-
}
|
|
2234
|
-
const augmentedEnv = {
|
|
2235
|
-
...process.env,
|
|
2236
|
-
PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
|
|
2237
|
-
};
|
|
2238
|
-
cached = {
|
|
2239
|
-
augmentedEnv,
|
|
2240
|
-
shimDir
|
|
2241
|
-
};
|
|
2242
|
-
return cached;
|
|
2243
|
-
}
|
|
2244
|
-
|
|
2245
1944
|
// Default per-invocation timeout for bazel queries. Bazel cold-cache starts
|
|
2246
1945
|
// can take several minutes; 10 minutes is generous while still bounding CI hangs.
|
|
2247
1946
|
const BAZEL_QUERY_TIMEOUT_MS = 600_000;
|
|
@@ -2258,42 +1957,58 @@ function splitBazelFlags(flags) {
|
|
|
2258
1957
|
}
|
|
2259
1958
|
return flags.split(/\s+/).filter(Boolean);
|
|
2260
1959
|
}
|
|
2261
|
-
|
|
1960
|
+
|
|
1961
|
+
// Build the shared startup-flag prefix for any bazel invocation. Centralised
|
|
1962
|
+
// so `--output_user_root` propagates to every spawn — principle 7 of the
|
|
1963
|
+
// Maven design requires per-invocation server isolation across query,
|
|
1964
|
+
// cquery, and `bazel mod` commands alike.
|
|
1965
|
+
function buildStartupFlags(opts) {
|
|
2262
1966
|
const startup = [];
|
|
2263
1967
|
if (opts.bazelRc) {
|
|
2264
1968
|
startup.push(`--bazelrc=${opts.bazelRc}`);
|
|
2265
1969
|
}
|
|
1970
|
+
if (opts.outputUserRoot) {
|
|
1971
|
+
startup.push(`--output_user_root=${opts.outputUserRoot}`);
|
|
1972
|
+
}
|
|
2266
1973
|
if (opts.bazelOutputBase) {
|
|
2267
1974
|
startup.push(`--output_base=${opts.bazelOutputBase}`);
|
|
2268
1975
|
}
|
|
1976
|
+
return startup;
|
|
1977
|
+
}
|
|
1978
|
+
function buildBazelModShowVisibleReposArgv(opts) {
|
|
1979
|
+
const userFlags = splitBazelFlags(opts.bazelFlags);
|
|
1980
|
+
return [...buildStartupFlags(opts), 'mod', 'dump_repo_mapping', '', '--output=json', ...userFlags];
|
|
1981
|
+
}
|
|
1982
|
+
function buildBazelModShowMavenExtensionArgv(opts) {
|
|
2269
1983
|
const userFlags = splitBazelFlags(opts.bazelFlags);
|
|
2270
|
-
return [...
|
|
1984
|
+
return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven',
|
|
1985
|
+
// Belt-and-suspenders output reducer mirroring the PyPI path: bias the
|
|
1986
|
+
// report toward the root module's usages. The authoritative pruning is
|
|
1987
|
+
// the importers-filter applied to the parsed output, so this is not
|
|
1988
|
+
// relied on for correctness.
|
|
1989
|
+
'--extension_usages=<root>', ...userFlags];
|
|
2271
1990
|
}
|
|
2272
1991
|
function buildBazelModShowPipExtensionArgv(opts) {
|
|
2273
|
-
const startup = [];
|
|
2274
|
-
if (opts.bazelRc) {
|
|
2275
|
-
startup.push(`--bazelrc=${opts.bazelRc}`);
|
|
2276
|
-
}
|
|
2277
|
-
if (opts.bazelOutputBase) {
|
|
2278
|
-
startup.push(`--output_base=${opts.bazelOutputBase}`);
|
|
2279
|
-
}
|
|
2280
1992
|
const userFlags = splitBazelFlags(opts.bazelFlags);
|
|
2281
|
-
return [...
|
|
1993
|
+
return [...buildStartupFlags(opts), 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', '--extension_usages=<root>', ...userFlags];
|
|
2282
1994
|
}
|
|
2283
1995
|
function buildBazelArgv(queryStr, opts, output = 'build') {
|
|
2284
1996
|
// Startup flags MUST precede the `query` subcommand.
|
|
2285
1997
|
// Bazel argv shape: <startup> query <queryFlags> <invocationFlags> <queryStr> --output=<output> <userFlags>
|
|
2286
|
-
const startup = [];
|
|
2287
|
-
if (opts.bazelRc) {
|
|
2288
|
-
startup.push(`--bazelrc=${opts.bazelRc}`);
|
|
2289
|
-
}
|
|
2290
|
-
if (opts.bazelOutputBase) {
|
|
2291
|
-
startup.push(`--output_base=${opts.bazelOutputBase}`);
|
|
2292
|
-
}
|
|
2293
1998
|
// Keep query output stable and avoid updating Bazel lockfiles while extracting.
|
|
2294
1999
|
const queryFlags = ['--lockfile_mode=off', '--noshow_progress'];
|
|
2295
2000
|
const userFlags = splitBazelFlags(opts.bazelFlags);
|
|
2296
|
-
return [...
|
|
2001
|
+
return [...buildStartupFlags(opts), 'query', ...queryFlags, ...opts.invocationFlags, queryStr, `--output=${output}`, ...userFlags];
|
|
2002
|
+
}
|
|
2003
|
+
|
|
2004
|
+
// Lightweight presence-check cquery used by the tri-state probe classifier.
|
|
2005
|
+
// `--keep_going --output=label` keeps it fast even on partial-analysis
|
|
2006
|
+
// repos and avoids paying for `--output=jsonproto` plus
|
|
2007
|
+
// `--proto:output_rule_attrs` (which the heavier metadata extraction in
|
|
2008
|
+
// `bazel-cquery.mts` needs but the probe does not).
|
|
2009
|
+
function buildBazelProbeCqueryArgv(repoName, opts) {
|
|
2010
|
+
const userFlags = splitBazelFlags(opts.bazelFlags);
|
|
2011
|
+
return [...buildStartupFlags(opts), 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, `@${repoName}//...`, '--output=label', '--keep_going', ...userFlags];
|
|
2297
2012
|
}
|
|
2298
2013
|
function stringField(value) {
|
|
2299
2014
|
return typeof value === 'string' ? value : '';
|
|
@@ -2407,14 +2122,7 @@ async function runBazelQuery(queryStr, opts, output) {
|
|
|
2407
2122
|
}
|
|
2408
2123
|
}
|
|
2409
2124
|
}
|
|
2410
|
-
|
|
2411
|
-
/**
|
|
2412
|
-
* Bzlmod-native visible repository enumeration. This is only a candidate
|
|
2413
|
-
* source; callers must still validate each returned apparent repo name with a
|
|
2414
|
-
* semantic query for generated ecosystem rules.
|
|
2415
|
-
*/
|
|
2416
|
-
async function runBazelModShowVisibleRepos(opts) {
|
|
2417
|
-
const argv = buildBazelModShowVisibleReposArgv(opts);
|
|
2125
|
+
async function runBazelOneShot(argv, opts, step) {
|
|
2418
2126
|
if (opts.verbose) {
|
|
2419
2127
|
logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
|
|
2420
2128
|
}
|
|
@@ -2446,364 +2154,830 @@ async function runBazelModShowVisibleRepos(opts) {
|
|
|
2446
2154
|
durationMs: Date.now() - startedAt,
|
|
2447
2155
|
opts,
|
|
2448
2156
|
result,
|
|
2449
|
-
step
|
|
2157
|
+
step
|
|
2450
2158
|
});
|
|
2451
2159
|
return result;
|
|
2452
2160
|
}
|
|
2453
2161
|
|
|
2454
2162
|
/**
|
|
2455
|
-
* Bzlmod-native
|
|
2456
|
-
*
|
|
2457
|
-
*
|
|
2163
|
+
* Bzlmod-native visible repository enumeration. NOTE: only consumed by the
|
|
2164
|
+
* legacy PyPI path; the Maven path uses `runBazelModShowMavenExtension`
|
|
2165
|
+
* instead because `dump_repo_mapping` over-enumerates apparent names that
|
|
2166
|
+
* are not Maven hubs.
|
|
2458
2167
|
*/
|
|
2459
|
-
async function
|
|
2460
|
-
|
|
2461
|
-
if (opts.verbose) {
|
|
2462
|
-
logger.logger.log('[VERBOSE] Executing:', opts.bin, ', args:', argv);
|
|
2463
|
-
}
|
|
2464
|
-
const startedAt = Date.now();
|
|
2465
|
-
let result;
|
|
2466
|
-
try {
|
|
2467
|
-
const output = await spawn.spawn(opts.bin, argv, {
|
|
2468
|
-
cwd: opts.cwd,
|
|
2469
|
-
timeout: BAZEL_QUERY_TIMEOUT_MS,
|
|
2470
|
-
...(opts.env ? {
|
|
2471
|
-
env: opts.env
|
|
2472
|
-
} : {})
|
|
2473
|
-
});
|
|
2474
|
-
const {
|
|
2475
|
-
code,
|
|
2476
|
-
stderr,
|
|
2477
|
-
stdout
|
|
2478
|
-
} = output;
|
|
2479
|
-
result = {
|
|
2480
|
-
code,
|
|
2481
|
-
stdout,
|
|
2482
|
-
stderr
|
|
2483
|
-
};
|
|
2484
|
-
} catch (e) {
|
|
2485
|
-
result = normalizeSpawnError(e);
|
|
2486
|
-
}
|
|
2487
|
-
logBazelTrace({
|
|
2488
|
-
argv,
|
|
2489
|
-
durationMs: Date.now() - startedAt,
|
|
2490
|
-
opts,
|
|
2491
|
-
result,
|
|
2492
|
-
step: 'bazel mod show_extension rules_python pip'
|
|
2493
|
-
});
|
|
2494
|
-
return result;
|
|
2168
|
+
async function runBazelModShowVisibleRepos(opts) {
|
|
2169
|
+
return await runBazelOneShot(buildBazelModShowVisibleReposArgv(opts), opts, 'bazel mod dump_repo_mapping');
|
|
2495
2170
|
}
|
|
2496
2171
|
|
|
2497
2172
|
/**
|
|
2498
|
-
*
|
|
2499
|
-
*
|
|
2500
|
-
*
|
|
2173
|
+
* Bzlmod-native Maven hub enumeration via the rules_jvm_external maven
|
|
2174
|
+
* extension. The text-format report lists every repo the extension
|
|
2175
|
+
* generated; `parseShowExtensionOutput` (bazel-repo-discovery.mts)
|
|
2176
|
+
* extracts the hubs from the `Fetched repositories:` section.
|
|
2501
2177
|
*/
|
|
2502
|
-
function
|
|
2503
|
-
return
|
|
2504
|
-
const queryStr = `kind("jvm_import rule|aar_import rule", @${repoName}//:*)`;
|
|
2505
|
-
const result = await runBazelQuery(queryStr, opts);
|
|
2506
|
-
return {
|
|
2507
|
-
stdout: result.stdout,
|
|
2508
|
-
code: result.code
|
|
2509
|
-
};
|
|
2510
|
-
};
|
|
2178
|
+
async function runBazelModShowMavenExtension(opts) {
|
|
2179
|
+
return await runBazelOneShot(buildBazelModShowMavenExtensionArgv(opts), opts, 'bazel mod show_extension rules_jvm_external maven');
|
|
2511
2180
|
}
|
|
2512
2181
|
|
|
2513
2182
|
/**
|
|
2514
|
-
*
|
|
2515
|
-
*
|
|
2516
|
-
*
|
|
2517
|
-
|
|
2518
|
-
|
|
2183
|
+
* Bzlmod-native rules_python pip extension usage inspection. Used by the
|
|
2184
|
+
* PyPI path; kept here since the argv shape is identical to the maven
|
|
2185
|
+
* variant modulo the extension target.
|
|
2186
|
+
*/
|
|
2187
|
+
async function runBazelModShowPipExtension(opts) {
|
|
2188
|
+
return await runBazelOneShot(buildBazelModShowPipExtensionArgv(opts), opts, 'bazel mod show_extension rules_python pip');
|
|
2189
|
+
}
|
|
2190
|
+
|
|
2191
|
+
/**
|
|
2192
|
+
* Build a `RepoProbe` (compatible with bazel-repo-discovery's tri-state
|
|
2193
|
+
* classifier) bound to opts. Runs the lightweight presence-check cquery
|
|
2194
|
+
* `@<name>//... --output=label --keep_going` — cheap enough to attempt
|
|
2195
|
+
* every conventional Maven hub name without triggering `repository_rule`
|
|
2196
|
+
* fetches on undefined names (Exp 3).
|
|
2197
|
+
*/
|
|
2198
|
+
function buildMavenProbeFor(opts) {
|
|
2199
|
+
return async repoName => {
|
|
2200
|
+
const argv = buildBazelProbeCqueryArgv(repoName, opts);
|
|
2201
|
+
const result = await runBazelOneShot(argv, opts, `bazel cquery probe @${repoName}`);
|
|
2202
|
+
return {
|
|
2203
|
+
code: result.code,
|
|
2204
|
+
stdout: result.stdout,
|
|
2205
|
+
stderr: result.stderr
|
|
2206
|
+
};
|
|
2207
|
+
};
|
|
2208
|
+
}
|
|
2209
|
+
|
|
2210
|
+
/**
|
|
2211
|
+
* Build a `RepoProbe` for validating pip hub candidates.
|
|
2212
|
+
* Queries the hub for package targets (e.g. `@<hub>//...`) and returns the
|
|
2213
|
+
* full result triple so the caller can check for `:pkg` labels or alias
|
|
2214
|
+
* rules. Does NOT require `pypi_name=` tags in the hub output, because
|
|
2215
|
+
* those tags live on spoke repos, not the hub alias layer.
|
|
2519
2216
|
*/
|
|
2520
2217
|
function buildPypiProbeFor(opts) {
|
|
2521
2218
|
return async hubName => {
|
|
2522
2219
|
const queryStr = `@${hubName}//...`;
|
|
2523
2220
|
const result = await runBazelQuery(queryStr, opts);
|
|
2524
2221
|
return {
|
|
2222
|
+
code: result.code,
|
|
2525
2223
|
stdout: result.stdout,
|
|
2526
|
-
|
|
2224
|
+
stderr: result.stderr
|
|
2527
2225
|
};
|
|
2528
2226
|
};
|
|
2529
2227
|
}
|
|
2530
2228
|
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2229
|
+
/**
|
|
2230
|
+
* Per-repo metadata cquery + jsonproto parser for the Maven path.
|
|
2231
|
+
*
|
|
2232
|
+
* Pipeline:
|
|
2233
|
+
* 1. Build a cquery argv targeting `attr("tags", "\bmaven_coordinates=",
|
|
2234
|
+
* @<repo>//...)` plus a union variant for the direct `maven_coordinates`
|
|
2235
|
+
* attribute. `--output=jsonproto` +
|
|
2236
|
+
* `--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps`
|
|
2237
|
+
* keeps the payload small while still surfacing the resolved Maven graph.
|
|
2238
|
+
* 2. Spawn under a caller-supplied `outputUserRoot` so the orchestrator can
|
|
2239
|
+
* reap the server cleanly (`bazel --output_user_root=<this> shutdown`
|
|
2240
|
+
* followed by `rm -rf`). The runner itself never deletes anything —
|
|
2241
|
+
* server lifecycle is the orchestrator's concern.
|
|
2242
|
+
* 3. Parse the jsonproto stream defensively: dispatch on `attribute[].type`
|
|
2243
|
+
* and accept both camelCase (`stringValue`, `stringListValue`) and
|
|
2244
|
+
* snake_case (`string_value`, `string_list_value`) payload keys.
|
|
2245
|
+
* 4. Extract the maven coordinate from the direct `maven_coordinates` attr
|
|
2246
|
+
* when present, else scan `tags` for `maven_coordinates=<G:A:V>`.
|
|
2247
|
+
* 5. Resolve each rule's `deps`/`exports`/`runtime_deps` label edges into
|
|
2248
|
+
* versionless Maven coordinates against this repo's own targets, while
|
|
2249
|
+
* `repoName` is still in scope. Edges that point at a hub-prefixed target
|
|
2250
|
+
* we cannot resolve are reported as `unresolvedLabels` so the caller can
|
|
2251
|
+
* flip the hub partial rather than silently dropping graph edges.
|
|
2252
|
+
* 6. Tag every artifact with `workspace:<rel-path>` + `repo:<name>`
|
|
2253
|
+
* provenance via `sourceRepo`.
|
|
2254
|
+
*/
|
|
2538
2255
|
|
|
2539
|
-
//
|
|
2540
|
-
//
|
|
2541
|
-
//
|
|
2542
|
-
|
|
2543
|
-
//
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
//
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2256
|
+
// One Maven artifact recovered from the cquery stream. `ruleKind` is whatever
|
|
2257
|
+
// `ruleClass` jsonproto reports (`jvm_import`, `aar_import`, `java_library`,
|
|
2258
|
+
// `kt_jvm_import`, any future rules_jvm_external rule), so the type is open.
|
|
2259
|
+
// `deps` holds resolved versionless Maven coordinates (the parser resolves the
|
|
2260
|
+
// rule's label edges against this repo's own targets), not raw Bazel labels.
|
|
2261
|
+
|
|
2262
|
+
// Result of parsing one repo's cquery stream: the recovered artifacts (with
|
|
2263
|
+
// resolved coordinate edges in `deps`) plus any hub-prefixed dep labels that
|
|
2264
|
+
// could not be resolved.
|
|
2265
|
+
|
|
2266
|
+
// Maven coordinate token: `g:a:v` (3 parts) or `g:a:v:classifier` /
|
|
2267
|
+
// `g:a:packaging:v` (4-part rules_jvm_external shapes). Tolerant of dots,
|
|
2268
|
+
// dashes, plus, underscores in any part.
|
|
2269
|
+
const MAVEN_COORD_TAG_RE = /^maven_coordinates=(.+)$/;
|
|
2270
|
+
|
|
2271
|
+
// The dep/export/runtime_deps attributes whose label edges encode the
|
|
2272
|
+
// resolved Maven graph. rules_jvm_external writes `jvm_import.deps` (e.g.
|
|
2273
|
+
// `junit` -> `@maven//:org_hamcrest_hamcrest_core`); compile/runtime scopes
|
|
2274
|
+
// surface via `exports`/`runtime_deps`. We union all three.
|
|
2275
|
+
const EDGE_ATTR_NAMES = new Set(['deps', 'exports', 'runtime_deps']);
|
|
2276
|
+
|
|
2277
|
+
// Build the metadata cquery target expression for one repo. The union of
|
|
2278
|
+
// two predicates picks up artifacts that:
|
|
2279
|
+
// - encode the coordinate in the conventional `tags = ["maven_coordinates=..."]`
|
|
2280
|
+
// list (rules_jvm_external's emission for `jvm_import` and friends), or
|
|
2281
|
+
// - declare the coordinate as a direct `maven_coordinates` attribute
|
|
2282
|
+
// (Bazel-native java_library / kt_jvm_import shape).
|
|
2283
|
+
// Note: a `maven_url`-only predicate was intentionally dropped — those rules
|
|
2284
|
+
// carry no coordinate, so selecting them only to discard them downstream is
|
|
2285
|
+
// wasted analysis. If POM-only artifacts ever matter, synthesize
|
|
2286
|
+
// a coordinate from `maven_url` instead of re-adding the selector.
|
|
2287
|
+
function buildMetadataCqueryExpr(repoName) {
|
|
2288
|
+
const r = `@${repoName}//...`;
|
|
2289
|
+
// The `\b` boundary in the tags predicate prevents matches on tag values
|
|
2290
|
+
// like `pre_maven_coordinates=fake`; see todo 2 acceptance test (10).
|
|
2291
|
+
return [`attr("tags", "\\bmaven_coordinates=", ${r})`, `attr("maven_coordinates", ".+", ${r})`].join(' union ');
|
|
2292
|
+
}
|
|
2293
|
+
|
|
2294
|
+
// Build the full cquery argv for a per-repo metadata cquery. Exposed for
|
|
2295
|
+
// argv-shape unit tests without touching `spawn`.
|
|
2296
|
+
function buildMetadataCqueryArgv(repoName, opts) {
|
|
2297
|
+
const startup = [];
|
|
2298
|
+
if (opts.bazelRc) {
|
|
2299
|
+
startup.push(`--bazelrc=${opts.bazelRc}`);
|
|
2300
|
+
}
|
|
2301
|
+
if (opts.outputUserRoot) {
|
|
2302
|
+
startup.push(`--output_user_root=${opts.outputUserRoot}`);
|
|
2303
|
+
}
|
|
2304
|
+
if (opts.bazelOutputBase) {
|
|
2305
|
+
startup.push(`--output_base=${opts.bazelOutputBase}`);
|
|
2306
|
+
}
|
|
2307
|
+
const userFlags = splitBazelFlags(opts.bazelFlags);
|
|
2308
|
+
return [...startup, 'cquery', '--lockfile_mode=off', '--noshow_progress', ...opts.invocationFlags, buildMetadataCqueryExpr(repoName), '--output=jsonproto', '--proto:output_rule_attrs=tags,maven_coordinates,deps,exports,runtime_deps', '--keep_going', ...userFlags];
|
|
2309
|
+
}
|
|
2310
|
+
function readStringAttr(attr) {
|
|
2311
|
+
if (attr.type !== 'STRING') {
|
|
2312
|
+
return undefined;
|
|
2313
|
+
}
|
|
2314
|
+
if (typeof attr.stringValue === 'string') {
|
|
2315
|
+
return attr.stringValue;
|
|
2316
|
+
}
|
|
2317
|
+
if (typeof attr.string_value === 'string') {
|
|
2318
|
+
return attr.string_value;
|
|
2319
|
+
}
|
|
2320
|
+
return undefined;
|
|
2321
|
+
}
|
|
2322
|
+
function readStringListAttr(attr) {
|
|
2323
|
+
if (attr.type !== 'STRING_LIST') {
|
|
2324
|
+
return undefined;
|
|
2325
|
+
}
|
|
2326
|
+
if (Array.isArray(attr.stringListValue)) {
|
|
2327
|
+
return attr.stringListValue;
|
|
2328
|
+
}
|
|
2329
|
+
if (Array.isArray(attr.string_list_value)) {
|
|
2330
|
+
return attr.string_list_value;
|
|
2331
|
+
}
|
|
2332
|
+
return undefined;
|
|
2333
|
+
}
|
|
2555
2334
|
|
|
2556
|
-
// Reads
|
|
2557
|
-
//
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2335
|
+
// Reads a `LABEL_LIST` jsonproto attribute. Bazel serializes label lists into
|
|
2336
|
+
// the same string-list payload (`stringListValue` / `string_list_value`) it
|
|
2337
|
+
// uses for `STRING_LIST`, but tags the attribute `type: "LABEL_LIST"`. The
|
|
2338
|
+
// `deps`/`exports`/`runtime_deps` edge attrs are LABEL_LIST, so a STRING_LIST
|
|
2339
|
+
// reader would silently return nothing and leave the graph empty.
|
|
2340
|
+
function readLabelListAttr(attr) {
|
|
2341
|
+
if (attr.type !== 'LABEL_LIST') {
|
|
2342
|
+
return undefined;
|
|
2561
2343
|
}
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
if (stat.size > MAX_WORKSPACE_FILE_BYTES$1) {
|
|
2565
|
-
return null;
|
|
2566
|
-
}
|
|
2567
|
-
return fs$1.readFileSync(file, 'utf8');
|
|
2568
|
-
} catch {
|
|
2569
|
-
return null;
|
|
2344
|
+
if (Array.isArray(attr.stringListValue)) {
|
|
2345
|
+
return attr.stringListValue;
|
|
2570
2346
|
}
|
|
2347
|
+
if (Array.isArray(attr.string_list_value)) {
|
|
2348
|
+
return attr.string_list_value;
|
|
2349
|
+
}
|
|
2350
|
+
return undefined;
|
|
2571
2351
|
}
|
|
2572
2352
|
|
|
2573
|
-
//
|
|
2574
|
-
//
|
|
2575
|
-
//
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2580
|
-
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
|
|
2353
|
+
// Strip the trailing version segment from a Maven coordinate, preserving any
|
|
2354
|
+
// packaging/classifier segments. `g:a:v` -> `g:a`,
|
|
2355
|
+
// `g:a:packaging:v` -> `g:a:packaging`,
|
|
2356
|
+
// `g:a:packaging:classifier:v` -> `g:a:packaging:classifier`. Coordinates with
|
|
2357
|
+
// fewer than 3 segments have no version to strip and are returned unchanged.
|
|
2358
|
+
// This matches depscan's `coordinateToParts` keying (position 3 = extension,
|
|
2359
|
+
// position 4 = classifier on the versionless key), so AAR/classifier artifacts
|
|
2360
|
+
// key correctly instead of being mis-keyed as bare `group:artifact` jars.
|
|
2361
|
+
function versionlessCoordinate(coord) {
|
|
2362
|
+
const parts = coord.split(':');
|
|
2363
|
+
if (parts.length < 3) {
|
|
2364
|
+
return coord;
|
|
2365
|
+
}
|
|
2366
|
+
return parts.slice(0, -1).join(':');
|
|
2367
|
+
}
|
|
2368
|
+
|
|
2369
|
+
// Recover the `@<repo>//` prefix from a fully-qualified target label, covering
|
|
2370
|
+
// both apparent (`@maven//:foo`) and bzlmod-canonical
|
|
2371
|
+
// (`@@rules_jvm_external++maven+maven//pkg:foo`) forms. Returns undefined for
|
|
2372
|
+
// labels that aren't repo-qualified (e.g. `:src`).
|
|
2373
|
+
function repoPrefixOfLabel(label) {
|
|
2374
|
+
if (!label.startsWith('@')) {
|
|
2375
|
+
return undefined;
|
|
2584
2376
|
}
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2377
|
+
const sep = label.indexOf('//');
|
|
2378
|
+
if (sep < 0) {
|
|
2379
|
+
return undefined;
|
|
2380
|
+
}
|
|
2381
|
+
return label.slice(0, sep + 2);
|
|
2382
|
+
}
|
|
2383
|
+
|
|
2384
|
+
// Strip the leading `@<repo>//:` prefix from a fully-qualified target label
|
|
2385
|
+
// to recover the bare rule name (e.g. `com_google_guava_guava`).
|
|
2386
|
+
function ruleNameFromLabel(label) {
|
|
2387
|
+
const colon = label.lastIndexOf(':');
|
|
2388
|
+
return colon >= 0 ? label.slice(colon + 1) : label;
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2391
|
+
// Extract the maven coordinate from a rule's attributes. Prefers the direct
|
|
2392
|
+
// `maven_coordinates` attribute (Bazel-native shape); falls back to scanning
|
|
2393
|
+
// `tags` for a `maven_coordinates=<G:A:V>` entry (rules_jvm_external shape).
|
|
2394
|
+
// Returns undefined if neither yields a non-empty value.
|
|
2395
|
+
function extractMavenCoordinate(rule) {
|
|
2396
|
+
let coord;
|
|
2397
|
+
for (const attr of rule.attribute ?? []) {
|
|
2398
|
+
if (attr.name === 'maven_coordinates') {
|
|
2399
|
+
const direct = readStringAttr(attr);
|
|
2400
|
+
if (direct && direct.length) {
|
|
2401
|
+
coord = direct;
|
|
2402
|
+
}
|
|
2403
|
+
} else if (attr.name === 'tags') {
|
|
2404
|
+
const tags = readStringListAttr(attr);
|
|
2405
|
+
if (tags) {
|
|
2406
|
+
for (const tag of tags) {
|
|
2407
|
+
const m = MAVEN_COORD_TAG_RE.exec(tag);
|
|
2408
|
+
if (m && !coord) {
|
|
2409
|
+
coord = m[1];
|
|
2410
|
+
}
|
|
2411
|
+
}
|
|
2590
2412
|
}
|
|
2591
2413
|
}
|
|
2592
|
-
} catch {
|
|
2593
|
-
// Ignore unreadable cwd.
|
|
2594
2414
|
}
|
|
2595
|
-
return
|
|
2415
|
+
return coord;
|
|
2596
2416
|
}
|
|
2597
2417
|
|
|
2598
|
-
//
|
|
2599
|
-
function
|
|
2600
|
-
const
|
|
2601
|
-
const
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
if (out.length >= MAX_CANDIDATES$1) {
|
|
2607
|
-
break;
|
|
2418
|
+
// Collect the union of `deps`/`exports`/`runtime_deps` label edges off a rule.
|
|
2419
|
+
function extractEdgeLabels(rule) {
|
|
2420
|
+
const labels = [];
|
|
2421
|
+
for (const attr of rule.attribute ?? []) {
|
|
2422
|
+
if (attr.name && EDGE_ATTR_NAMES.has(attr.name)) {
|
|
2423
|
+
const list = readLabelListAttr(attr);
|
|
2424
|
+
if (list) {
|
|
2425
|
+
labels.push(...list);
|
|
2608
2426
|
}
|
|
2609
2427
|
}
|
|
2610
2428
|
}
|
|
2611
|
-
return
|
|
2429
|
+
return labels;
|
|
2612
2430
|
}
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2431
|
+
|
|
2432
|
+
// A coordinate-bearing rule recovered from the cquery stream, before its edge
|
|
2433
|
+
// labels are resolved to coordinates.
|
|
2434
|
+
|
|
2435
|
+
// Build the label -> coordinate index from this repo's own coordinate-bearing
|
|
2436
|
+
// targets, keyed by the full emitted rule label (the form dep labels also use,
|
|
2437
|
+
// since both come from the same cquery output). The `:<ruleName>` suffix map
|
|
2438
|
+
// is a fallback for labels that don't full-match.
|
|
2439
|
+
function buildLabelCoordIndex(records) {
|
|
2440
|
+
const fullLabels = new Map();
|
|
2441
|
+
const suffixToCoords = new Map();
|
|
2442
|
+
const hubPrefixes = new Set();
|
|
2443
|
+
for (const rec of records) {
|
|
2444
|
+
const coord = versionlessCoordinate(rec.coord);
|
|
2445
|
+
fullLabels.set(rec.fullLabel, coord);
|
|
2446
|
+
const suffix = `:${rec.ruleName}`;
|
|
2447
|
+
const set = suffixToCoords.get(suffix) ?? new Set();
|
|
2448
|
+
set.add(coord);
|
|
2449
|
+
suffixToCoords.set(suffix, set);
|
|
2450
|
+
const prefix = repoPrefixOfLabel(rec.fullLabel);
|
|
2451
|
+
if (prefix) {
|
|
2452
|
+
hubPrefixes.add(prefix);
|
|
2453
|
+
}
|
|
2621
2454
|
}
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
|
|
2455
|
+
return {
|
|
2456
|
+
fullLabels,
|
|
2457
|
+
hubPrefixes,
|
|
2458
|
+
suffixToCoords
|
|
2459
|
+
};
|
|
2460
|
+
}
|
|
2461
|
+
function isHubPrefixed(label, hubPrefixes) {
|
|
2462
|
+
for (const prefix of hubPrefixes) {
|
|
2463
|
+
if (label.startsWith(prefix)) {
|
|
2464
|
+
return true;
|
|
2626
2465
|
}
|
|
2627
2466
|
}
|
|
2628
|
-
return
|
|
2467
|
+
return false;
|
|
2629
2468
|
}
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
2469
|
+
// Resolve one dep label into a versionless coordinate. Classifies into three
|
|
2470
|
+
// buckets (there is deliberately no "seen but coordinate-less" bucket — the
|
|
2471
|
+
// cquery only selects coordinate-bearing targets):
|
|
2472
|
+
// - `coord` — full-label match, unique-suffix fallback, or an already-a-
|
|
2473
|
+
// coordinate `g:a:v` string label.
|
|
2474
|
+
// - `unresolved`— hub-prefixed but resolves to nothing in the selected set
|
|
2475
|
+
// (missing target or ambiguous suffix): a known-dropped edge.
|
|
2476
|
+
// - `drop` — a non-maven target (`@platforms//…`, `:src`): intentional.
|
|
2477
|
+
function resolveDepLabel(label, index) {
|
|
2478
|
+
const full = index.fullLabels.get(label);
|
|
2479
|
+
if (full) {
|
|
2480
|
+
return {
|
|
2481
|
+
coord: full,
|
|
2482
|
+
kind: 'coord'
|
|
2483
|
+
};
|
|
2633
2484
|
}
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2485
|
+
if (isHubPrefixed(label, index.hubPrefixes)) {
|
|
2486
|
+
// Suffix fallback, but only when the match is unique.
|
|
2487
|
+
const suffix = `:${ruleNameFromLabel(label)}`;
|
|
2488
|
+
const set = index.suffixToCoords.get(suffix);
|
|
2489
|
+
if (set && set.size === 1) {
|
|
2490
|
+
return {
|
|
2491
|
+
coord: [...set][0],
|
|
2492
|
+
kind: 'coord'
|
|
2493
|
+
};
|
|
2641
2494
|
}
|
|
2495
|
+
// Hub-prefixed but missing or ambiguous — a genuinely dropped edge.
|
|
2496
|
+
return {
|
|
2497
|
+
kind: 'unresolved'
|
|
2498
|
+
};
|
|
2642
2499
|
}
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
2647
|
-
|
|
2500
|
+
// Already-a-coordinate fallback: a bare `g:a:v` string label (not a Bazel
|
|
2501
|
+
// label). Versionless-normalize it. Exclude `//`-prefixed package-relative
|
|
2502
|
+
// labels (`//pkg:thing`) — those are Bazel targets, not coordinates.
|
|
2503
|
+
if (label.includes(':') && !label.startsWith('@') && !label.startsWith(':') && !label.startsWith('//')) {
|
|
2504
|
+
return {
|
|
2505
|
+
coord: versionlessCoordinate(label),
|
|
2506
|
+
kind: 'coord'
|
|
2507
|
+
};
|
|
2508
|
+
}
|
|
2509
|
+
// Non-maven target — intentional drop, not counted.
|
|
2510
|
+
return {
|
|
2511
|
+
kind: 'drop'
|
|
2512
|
+
};
|
|
2648
2513
|
}
|
|
2649
2514
|
|
|
2650
|
-
//
|
|
2651
|
-
//
|
|
2652
|
-
//
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2515
|
+
// Pure parser for the jsonproto cquery stream. Returns one
|
|
2516
|
+
// `ExtractedArtifact` per rule with a recoverable maven coordinate (its `deps`
|
|
2517
|
+
// holding resolved versionless coordinates) plus the set of hub-prefixed dep
|
|
2518
|
+
// labels that could not be resolved. The `sourceRepo` field carries
|
|
2519
|
+
// `<workspaceRelPath>:<repoName>` provenance when a workspace path was
|
|
2520
|
+
// provided; otherwise just the repo name.
|
|
2521
|
+
function parseCqueryJsonproto(stdout, repoName, workspaceRelPath) {
|
|
2522
|
+
if (!stdout.trim()) {
|
|
2523
|
+
return {
|
|
2524
|
+
artifacts: [],
|
|
2525
|
+
unresolvedLabels: []
|
|
2526
|
+
};
|
|
2527
|
+
}
|
|
2528
|
+
// Bazel 5+ emits a single JSON envelope; older versions stream one target
|
|
2529
|
+
// per line. Try envelope-first, then fall back to per-line.
|
|
2530
|
+
const targets = [];
|
|
2531
|
+
try {
|
|
2532
|
+
const parsed = JSON.parse(stdout);
|
|
2533
|
+
if (parsed.results) {
|
|
2534
|
+
for (const r of parsed.results) {
|
|
2535
|
+
if (r.target) {
|
|
2536
|
+
targets.push(r.target);
|
|
2668
2537
|
}
|
|
2669
2538
|
}
|
|
2670
|
-
} catch {
|
|
2671
|
-
// Ignore malformed lines; caller will fall back to static discovery when
|
|
2672
|
-
// no usable visible repo names are found.
|
|
2673
2539
|
}
|
|
2540
|
+
} catch {
|
|
2541
|
+
// Fall through to per-line scanning.
|
|
2674
2542
|
}
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
const argBlob = m[1] ?? '';
|
|
2689
|
-
for (const n of argBlob.matchAll(QUOTED_NAME_RE)) {
|
|
2690
|
-
bzlmodHits.push(n[1]);
|
|
2543
|
+
if (!targets.length) {
|
|
2544
|
+
for (const line of stdout.split(/\r?\n/)) {
|
|
2545
|
+
const trimmed = line.trim();
|
|
2546
|
+
if (!trimmed) {
|
|
2547
|
+
continue;
|
|
2548
|
+
}
|
|
2549
|
+
try {
|
|
2550
|
+
const parsed = JSON.parse(trimmed);
|
|
2551
|
+
if (parsed?.rule) {
|
|
2552
|
+
targets.push(parsed);
|
|
2553
|
+
}
|
|
2554
|
+
} catch {
|
|
2555
|
+
// Skip malformed lines.
|
|
2691
2556
|
}
|
|
2692
2557
|
}
|
|
2693
|
-
candidates.push(...bzlmodHits);
|
|
2694
|
-
if (verbose) {
|
|
2695
|
-
logger.logger.log('[VERBOSE] discovery: scanned', moduleBazel, `(${bzlmodHits.length} use_repo match(es))`);
|
|
2696
|
-
}
|
|
2697
|
-
} else if (verbose) {
|
|
2698
|
-
logger.logger.log('[VERBOSE] discovery:', moduleBazel, 'not present (skipping bzlmod scan)');
|
|
2699
2558
|
}
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
const
|
|
2703
|
-
|
|
2704
|
-
logger.logger.log('[VERBOSE] discovery: legacy files considered:', legacyFiles.length ? legacyFiles : '(none)');
|
|
2705
|
-
}
|
|
2706
|
-
for (const file of legacyFiles) {
|
|
2707
|
-
const content = safeReadFile$1(file);
|
|
2708
|
-
if (!content) {
|
|
2559
|
+
// First pass: collect coordinate-bearing rules with their raw edge labels.
|
|
2560
|
+
const records = [];
|
|
2561
|
+
for (const target of targets) {
|
|
2562
|
+
if (target.type && target.type !== 'RULE') {
|
|
2709
2563
|
continue;
|
|
2710
2564
|
}
|
|
2711
|
-
const
|
|
2712
|
-
|
|
2713
|
-
|
|
2565
|
+
const rule = target.rule;
|
|
2566
|
+
if (!rule || !rule.name) {
|
|
2567
|
+
continue;
|
|
2714
2568
|
}
|
|
2715
|
-
|
|
2716
|
-
if (
|
|
2717
|
-
|
|
2569
|
+
const coord = extractMavenCoordinate(rule);
|
|
2570
|
+
if (!coord) {
|
|
2571
|
+
continue;
|
|
2718
2572
|
}
|
|
2573
|
+
records.push({
|
|
2574
|
+
coord,
|
|
2575
|
+
edgeLabels: extractEdgeLabels(rule),
|
|
2576
|
+
fullLabel: rule.name,
|
|
2577
|
+
ruleKind: rule.ruleClass ?? rule.rule_class ?? 'unknown',
|
|
2578
|
+
ruleName: ruleNameFromLabel(rule.name)
|
|
2579
|
+
});
|
|
2719
2580
|
}
|
|
2720
|
-
|
|
2721
|
-
|
|
2722
|
-
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
|
|
2726
|
-
|
|
2727
|
-
|
|
2728
|
-
|
|
2729
|
-
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
const result = await probe(repoName);
|
|
2734
|
-
if (result.code !== 0) {
|
|
2735
|
-
if (verbose) {
|
|
2736
|
-
logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: REJECT (code=${result.code})`);
|
|
2581
|
+
// Second pass: resolve edge labels against this repo's own targets.
|
|
2582
|
+
const index = buildLabelCoordIndex(records);
|
|
2583
|
+
const provenance = workspaceRelPath ? `${workspaceRelPath}:${repoName}` : repoName;
|
|
2584
|
+
const out = [];
|
|
2585
|
+
const unresolved = new Set();
|
|
2586
|
+
for (const rec of records) {
|
|
2587
|
+
const deps = new Set();
|
|
2588
|
+
for (const label of rec.edgeLabels) {
|
|
2589
|
+
const resolution = resolveDepLabel(label, index);
|
|
2590
|
+
if (resolution.kind === 'coord') {
|
|
2591
|
+
deps.add(resolution.coord);
|
|
2592
|
+
} else if (resolution.kind === 'unresolved') {
|
|
2593
|
+
unresolved.add(label);
|
|
2737
2594
|
}
|
|
2738
|
-
return {
|
|
2739
|
-
valid: false,
|
|
2740
|
-
stdout: result.stdout
|
|
2741
|
-
};
|
|
2742
2595
|
}
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2596
|
+
out.push({
|
|
2597
|
+
deps: [...deps],
|
|
2598
|
+
mavenCoordinates: rec.coord,
|
|
2599
|
+
ruleKind: rec.ruleKind,
|
|
2600
|
+
ruleName: rec.ruleName,
|
|
2601
|
+
sourceRepo: provenance
|
|
2602
|
+
});
|
|
2603
|
+
}
|
|
2604
|
+
return {
|
|
2605
|
+
artifacts: out,
|
|
2606
|
+
unresolvedLabels: [...unresolved]
|
|
2607
|
+
};
|
|
2608
|
+
}
|
|
2609
|
+
|
|
2610
|
+
// Classify the runner's raw outcome. Non-zero exit with `--keep_going` is a
|
|
2611
|
+
// `partial` (some target analysis failed; the successful subset is still in
|
|
2612
|
+
// stdout). A clean exit with unresolved hub-prefixed edges is also `partial`
|
|
2613
|
+
// — the graph is known-incomplete. Zero exit with no parsed artefacts is
|
|
2614
|
+
// `empty`. Spawn timeout is signalled separately; this helper handles the
|
|
2615
|
+
// post-spawn case.
|
|
2616
|
+
function classifyCqueryOutcome(code, artifactCount, unresolvedCount) {
|
|
2617
|
+
if (code === 0) {
|
|
2618
|
+
if (!artifactCount) {
|
|
2619
|
+
return 'empty';
|
|
2746
2620
|
}
|
|
2621
|
+
return unresolvedCount > 0 ? 'partial' : 'ok';
|
|
2622
|
+
}
|
|
2623
|
+
// --keep_going treats partial-analysis failures with non-zero exit but
|
|
2624
|
+
// still yields the successful subset on stdout. Anything we parsed is
|
|
2625
|
+
// worth keeping.
|
|
2626
|
+
return artifactCount > 0 ? 'partial' : 'error';
|
|
2627
|
+
}
|
|
2628
|
+
|
|
2629
|
+
// Spawn the per-repo metadata cquery, parse the result, and return a
|
|
2630
|
+
// structured outcome. On spawn timeout, return `status: 'timeout'` so the
|
|
2631
|
+
// orchestrator can reap the server (`bazel --output_user_root=<dir>
|
|
2632
|
+
// shutdown` + `rm -rf`) before moving on.
|
|
2633
|
+
async function runMetadataCqueryForRepo(args) {
|
|
2634
|
+
const {
|
|
2635
|
+
opts,
|
|
2636
|
+
repoName,
|
|
2637
|
+
timeoutMs,
|
|
2638
|
+
workspaceRelPath,
|
|
2639
|
+
workspaceRoot
|
|
2640
|
+
} = args;
|
|
2641
|
+
const argv = buildMetadataCqueryArgv(repoName, opts);
|
|
2642
|
+
const startedAt = Date.now();
|
|
2643
|
+
try {
|
|
2644
|
+
const result = await spawn.spawn(opts.bin, argv, {
|
|
2645
|
+
cwd: workspaceRoot,
|
|
2646
|
+
timeout: timeoutMs,
|
|
2647
|
+
...(opts.env ? {
|
|
2648
|
+
env: opts.env
|
|
2649
|
+
} : {})
|
|
2650
|
+
});
|
|
2651
|
+
const {
|
|
2652
|
+
code,
|
|
2653
|
+
stderr,
|
|
2654
|
+
stdout
|
|
2655
|
+
} = result;
|
|
2656
|
+
const {
|
|
2657
|
+
artifacts,
|
|
2658
|
+
unresolvedLabels
|
|
2659
|
+
} = parseCqueryJsonproto(stdout, repoName, workspaceRelPath);
|
|
2747
2660
|
return {
|
|
2748
|
-
|
|
2749
|
-
|
|
2661
|
+
artifacts,
|
|
2662
|
+
durationMs: Date.now() - startedAt,
|
|
2663
|
+
repoName,
|
|
2664
|
+
status: classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
|
|
2665
|
+
stderr,
|
|
2666
|
+
unresolvedLabels,
|
|
2667
|
+
workspaceRelPath
|
|
2750
2668
|
};
|
|
2751
2669
|
} catch (e) {
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2670
|
+
const err = e;
|
|
2671
|
+
const stdout = typeof err.stdout === 'string' ? err.stdout : '';
|
|
2672
|
+
const stderr = typeof err.stderr === 'string' ? err.stderr : '';
|
|
2673
|
+
const timedOut = err.timedOut === true || err.killed === true || err.signal === 'SIGTERM' || err.signal === 'SIGKILL';
|
|
2674
|
+
const {
|
|
2675
|
+
artifacts,
|
|
2676
|
+
unresolvedLabels
|
|
2677
|
+
} = stdout ? parseCqueryJsonproto(stdout, repoName, workspaceRelPath) : {
|
|
2678
|
+
artifacts: [],
|
|
2679
|
+
unresolvedLabels: []
|
|
2680
|
+
};
|
|
2681
|
+
// The registry `spawn` rejects on a non-zero exit, so a `--keep_going`
|
|
2682
|
+
// cquery that exits non-zero but still emitted a usable subset lands here
|
|
2683
|
+
// — not in the try block. Classify by what we parsed (subset present =>
|
|
2684
|
+
// `partial`, nothing parsed => `error`) so that partial subset is written
|
|
2685
|
+
// best-effort rather than discarded as a hard error. Timeout stays
|
|
2686
|
+
// distinct so the orchestrator can reap the wedged server.
|
|
2687
|
+
const code = typeof err.code === 'number' ? err.code : 1;
|
|
2755
2688
|
return {
|
|
2756
|
-
|
|
2757
|
-
|
|
2689
|
+
artifacts,
|
|
2690
|
+
durationMs: Date.now() - startedAt,
|
|
2691
|
+
repoName,
|
|
2692
|
+
status: timedOut ? 'timeout' : classifyCqueryOutcome(code, artifacts.length, unresolvedLabels.length),
|
|
2693
|
+
stderr,
|
|
2694
|
+
unresolvedLabels,
|
|
2695
|
+
workspaceRelPath
|
|
2758
2696
|
};
|
|
2759
2697
|
}
|
|
2760
2698
|
}
|
|
2761
2699
|
|
|
2762
|
-
|
|
2763
|
-
// Included as a seed so repos that define maven_install in a subdirectory
|
|
2764
|
-
// .bzl file (not scanned by parseMavenRepoCandidates) are still discovered.
|
|
2765
|
-
const DEFAULT_MAVEN_REPO_SEED = 'maven';
|
|
2700
|
+
let probed = false;
|
|
2766
2701
|
|
|
2767
|
-
//
|
|
2768
|
-
//
|
|
2769
|
-
//
|
|
2770
|
-
//
|
|
2771
|
-
//
|
|
2772
|
-
//
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
2776
|
-
// the top-level static scan) can still be discovered via probe validation.
|
|
2777
|
-
async function discoverMavenRepos(cwd, probe, nativeCandidates, verbose) {
|
|
2778
|
-
const parsed = nativeCandidates && nativeCandidates.length ? nativeCandidates : parseMavenRepoCandidates(cwd, verbose);
|
|
2779
|
-
if (verbose) {
|
|
2780
|
-
logger.logger.log('[VERBOSE] discovery: candidate source:', nativeCandidates && nativeCandidates.length ? `bzlmod visible-repos (${nativeCandidates.length})` : `static parse (${parsed.length})`);
|
|
2702
|
+
// Verifies `java` is functional in the current execution environment. Bazel
|
|
2703
|
+
// JVM manifest extraction (rules_jvm_external → Coursier) requires a real
|
|
2704
|
+
// JDK; the CLI does not attempt to discover Homebrew installs or mutate the
|
|
2705
|
+
// caller's PATH/JAVA_HOME. If `java -version` fails we throw with an
|
|
2706
|
+
// actionable message so the surfaced error names the prerequisite directly
|
|
2707
|
+
// instead of relying on Bazel's downstream diagnostic.
|
|
2708
|
+
function ensureJavaOnPath() {
|
|
2709
|
+
if (probed) {
|
|
2710
|
+
return;
|
|
2781
2711
|
}
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2712
|
+
try {
|
|
2713
|
+
childProcess.execSync('java -version', {
|
|
2714
|
+
stdio: 'ignore'
|
|
2715
|
+
});
|
|
2716
|
+
probed = true;
|
|
2717
|
+
} catch {
|
|
2718
|
+
throw new Error('Java is required for Bazel JVM manifest extraction ' + '(rules_jvm_external invokes Coursier, which needs a JDK). ' + 'Install a JDK (e.g. Temurin or OpenJDK) and ensure `java` is on PATH.');
|
|
2719
|
+
}
|
|
2720
|
+
}
|
|
2721
|
+
|
|
2722
|
+
// Validates that --bazel-output-base is a path we can use as Bazel's output_base.
|
|
2723
|
+
// Throws InputError if:
|
|
2724
|
+
// - the input contains `..` segments (path traversal guard)
|
|
2725
|
+
// - the existing path is not writable
|
|
2726
|
+
// - the path cannot be created (parent not writable)
|
|
2727
|
+
function validateOutputBase(outputBase, cwd) {
|
|
2728
|
+
// Path traversal guard: reject any literal `..` segment in user input.
|
|
2729
|
+
// After path.resolve these are normalised away, so we check the raw input.
|
|
2730
|
+
// Split on both separators. On Windows `path.sep === '\\'`, so
|
|
2731
|
+
// input like `foo/../etc` would not contain a `..` segment under the
|
|
2732
|
+
// platform-specific split, bypassing the guard — yet path.resolve below
|
|
2733
|
+
// would still normalise the `..` and a traversal target could materialise.
|
|
2734
|
+
const segments = outputBase.split(/[\\/]/);
|
|
2735
|
+
if (segments.includes('..')) {
|
|
2736
|
+
throw new utils.InputError(`--bazel-output-base must not contain '..' segments: ${outputBase}`);
|
|
2737
|
+
}
|
|
2738
|
+
const resolved = path.resolve(cwd, outputBase);
|
|
2739
|
+
if (fs$1.existsSync(resolved)) {
|
|
2740
|
+
try {
|
|
2741
|
+
fs$1.accessSync(resolved, fs$1.constants.W_OK);
|
|
2742
|
+
} catch {
|
|
2743
|
+
throw new utils.InputError(`--bazel-output-base is not writable: ${resolved}`);
|
|
2790
2744
|
}
|
|
2745
|
+
return;
|
|
2791
2746
|
}
|
|
2792
|
-
|
|
2793
|
-
|
|
2747
|
+
// Path does not exist yet — try to create it so bazel can populate it.
|
|
2748
|
+
try {
|
|
2749
|
+
fs$1.mkdirSync(resolved, {
|
|
2750
|
+
recursive: true
|
|
2751
|
+
});
|
|
2752
|
+
} catch (e) {
|
|
2753
|
+
throw new utils.InputError(`--bazel-output-base could not be created at ${resolved}: ${utils.getErrorCause(e)}`);
|
|
2794
2754
|
}
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2755
|
+
}
|
|
2756
|
+
|
|
2757
|
+
// Stable shim dir name — same process will get the same dir; concurrent
|
|
2758
|
+
// socket-cli invocations on the same machine share it. The symlink target
|
|
2759
|
+
// is whatever python3 resolves to NOW; if PATH changes between invocations
|
|
2760
|
+
// we replace the symlink.
|
|
2761
|
+
const SHIM_SUBDIR = 'socket-cli-bazel-python-shim';
|
|
2762
|
+
|
|
2763
|
+
// Cache the result for the lifetime of this process.
|
|
2764
|
+
let cached = null;
|
|
2765
|
+
|
|
2766
|
+
// Safe wrapper around whichBin that returns null instead of throwing when
|
|
2767
|
+
// nothrow semantics are broken in older registry versions (realpath 'null' bug).
|
|
2768
|
+
async function safeWhichBin(name) {
|
|
2769
|
+
try {
|
|
2770
|
+
return (await bin.whichBin(name, {
|
|
2771
|
+
nothrow: true
|
|
2772
|
+
})) ?? null;
|
|
2773
|
+
} catch {
|
|
2774
|
+
return null;
|
|
2775
|
+
}
|
|
2776
|
+
}
|
|
2777
|
+
async function provisionPythonShim() {
|
|
2778
|
+
if (cached) {
|
|
2779
|
+
return cached;
|
|
2780
|
+
}
|
|
2781
|
+
const pythonOnPath = await safeWhichBin('python');
|
|
2782
|
+
if (pythonOnPath) {
|
|
2783
|
+
cached = {
|
|
2784
|
+
augmentedEnv: undefined,
|
|
2785
|
+
shimDir: undefined
|
|
2786
|
+
};
|
|
2787
|
+
return cached;
|
|
2788
|
+
}
|
|
2789
|
+
const python3OnPath = await safeWhichBin('python3');
|
|
2790
|
+
if (!python3OnPath) {
|
|
2791
|
+
throw new utils.InputError('Neither `python` nor `python3` found on PATH. Older versions of ' + 'rules_jvm_external require a `python` interpreter for repository ' + 'rules. Install Python 3 and ensure it is on PATH, then retry.');
|
|
2792
|
+
}
|
|
2793
|
+
const shimDir = path.join(os.tmpdir(), SHIM_SUBDIR);
|
|
2794
|
+
fs$1.mkdirSync(shimDir, {
|
|
2795
|
+
recursive: true
|
|
2796
|
+
});
|
|
2797
|
+
const linkPath = path.join(shimDir, 'python');
|
|
2798
|
+
// Replace the symlink defensively in case python3's resolved path moved.
|
|
2799
|
+
if (fs$1.existsSync(linkPath)) {
|
|
2800
|
+
try {
|
|
2801
|
+
fs$1.unlinkSync(linkPath);
|
|
2802
|
+
} catch {
|
|
2803
|
+
// Tolerate races; the next symlinkSync may still succeed.
|
|
2801
2804
|
}
|
|
2802
2805
|
}
|
|
2806
|
+
// The shim dir is process-shared (os.tmpdir()/socket-cli-bazel-python-shim),
|
|
2807
|
+
// so a concurrent socket-cli invocation may re-create the link between our
|
|
2808
|
+
// unlinkSync and symlinkSync. Tolerate EEXIST when the link is back: the
|
|
2809
|
+
// other process won the race and left a usable shim in place.
|
|
2810
|
+
try {
|
|
2811
|
+
fs$1.symlinkSync(python3OnPath, linkPath);
|
|
2812
|
+
} catch (e) {
|
|
2813
|
+
if (e.code === 'EEXIST' && fs$1.existsSync(linkPath)) ; else {
|
|
2814
|
+
throw e;
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
const augmentedEnv = {
|
|
2818
|
+
...process.env,
|
|
2819
|
+
PATH: `${shimDir}${path.delimiter}${process.env['PATH'] ?? ''}`
|
|
2820
|
+
};
|
|
2821
|
+
cached = {
|
|
2822
|
+
augmentedEnv,
|
|
2823
|
+
shimDir
|
|
2824
|
+
};
|
|
2825
|
+
return cached;
|
|
2826
|
+
}
|
|
2827
|
+
|
|
2828
|
+
/**
|
|
2829
|
+
* Maven hub repo discovery for `socket manifest bazel`.
|
|
2830
|
+
*
|
|
2831
|
+
* - Bzlmod path: `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
|
|
2832
|
+
* emits a text-format report listing every repo the maven extension generated;
|
|
2833
|
+
* `parseShowExtensionOutput` extracts the names of hub repos (items annotated
|
|
2834
|
+
* with `(imported by ...)`) and skips generated per-artifact repos.
|
|
2835
|
+
* - Legacy WORKSPACE path: probe a fixed list of conventional Maven hub names.
|
|
2836
|
+
* Each probe is classified into `populated` / `empty` / `not-defined`; the
|
|
2837
|
+
* orchestrator keeps only the `populated` candidates.
|
|
2838
|
+
*
|
|
2839
|
+
* No Starlark source is read by this module. All semantic interpretation
|
|
2840
|
+
* comes from Bazel itself (`mod show_extension`, `cquery`).
|
|
2841
|
+
*/
|
|
2842
|
+
|
|
2843
|
+
// The importer token Bazel prints for a hub generated for the root module
|
|
2844
|
+
// itself (`(imported by <root>, …)`). Hubs imported only by rulesets
|
|
2845
|
+
// (`rules_jvm_external@6.7`, `stardoc@0.7.2`, …) are build-tooling, not the
|
|
2846
|
+
// user's SBOM, and are filtered out by the orchestrator.
|
|
2847
|
+
const ROOT_MODULE_IMPORTER = '<root>';
|
|
2848
|
+
|
|
2849
|
+
// One hub repo from a `bazel mod show_extension` report: its name plus the
|
|
2850
|
+
// modules that imported it (the `(imported by …)` annotation), merged across
|
|
2851
|
+
// every line the repo appears on.
|
|
2852
|
+
|
|
2853
|
+
// Conventional Maven hub names rules_jvm_external sets up under
|
|
2854
|
+
// WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility
|
|
2855
|
+
// lookup never triggers a `repository_rule` fetch) so the orchestrator can
|
|
2856
|
+
// try them all without paying the cost of a real cquery on undefined repos.
|
|
2857
|
+
const CONVENTIONAL_MAVEN_REPO_NAMES = ['maven', 'maven_install', 'maven_dev', 'unpinned_maven', 'maven_unpinned'];
|
|
2858
|
+
|
|
2859
|
+
// Pattern Bazel emits when a probed repo name isn't visible to the main
|
|
2860
|
+
// module. Used to distinguish `not-defined` (skip silently) from `empty`
|
|
2861
|
+
// (the repo exists but has no targets). Tolerant of either single- or
|
|
2862
|
+
// double-quote styles Bazel has used across versions.
|
|
2863
|
+
const NOT_VISIBLE_STDERR_RE = /No repository visible as ['"]?@?[A-Za-z0-9._+-]+['"]? from/;
|
|
2864
|
+
// Other "repo isn't analyzable" patterns Bazel emits, especially under
|
|
2865
|
+
// WORKSPACE mode and on Bazel 6.x. They all map to `not-defined`.
|
|
2866
|
+
const NO_SUCH_PACKAGE_STDERR_RE = /no such package ['"`]?@/;
|
|
2867
|
+
// Pattern emitted when a repo IS visible / defined but yields no targets.
|
|
2868
|
+
// `--keep_going` plus `'no targets found beneath'` is the empty-but-defined
|
|
2869
|
+
// signature. The orchestrator treats `empty` and `not-defined` uniformly
|
|
2870
|
+
// as skips.
|
|
2871
|
+
const NO_TARGETS_STDERR_RE = /no targets found beneath/i;
|
|
2872
|
+
// Anchor for the maven extension's section header in
|
|
2873
|
+
// `bazel mod show_extension` output. Tolerant of the canonical-name form
|
|
2874
|
+
// Bazel uses across versions (`@@rules_jvm_external+`, `@@rules_jvm_external~`,
|
|
2875
|
+
// or any future separator) and of trailing trailing whitespace.
|
|
2876
|
+
const SHOW_EXT_SECTION_HEADER_RE = /^## @@?[A-Za-z0-9._+~-]+\/\/:extensions\.bzl%maven:\s*$/m;
|
|
2877
|
+
// Bullet within `Fetched repositories:` that names a hub repo (one with an
|
|
2878
|
+
// `(imported by ...)` annotation). Bullets without that annotation are
|
|
2879
|
+
// generated per-artifact repos and are skipped.
|
|
2880
|
+
const FETCHED_HUB_BULLET_RE = /^ {2}- (?<name>\S+) \(imported by (?<importers>[^)]+)\)\s*$/;
|
|
2881
|
+
|
|
2882
|
+
// Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
|
|
2883
|
+
// stdout. Returns the hub repos listed under `Fetched repositories:` — i.e.
|
|
2884
|
+
// items annotated with `(imported by ...)` — each carrying the set of modules
|
|
2885
|
+
// that imported it. Generated per-artifact repos (no annotation) are skipped.
|
|
2886
|
+
// A repo can legitimately appear on multiple lines with different importers,
|
|
2887
|
+
// so importers are merged per repo (name-only dedupe would lose that, and the
|
|
2888
|
+
// importers data is what lets the orchestrator keep only root-imported hubs).
|
|
2889
|
+
// Output is sorted by name. Tolerant of `DEBUG:` / `WARNING:` lines from
|
|
2890
|
+
// Bazel; the section header `## @@<canonical>//:extensions.bzl%maven:` is the
|
|
2891
|
+
// anchor.
|
|
2892
|
+
function parseShowExtensionOutput(stdout) {
|
|
2893
|
+
const headerMatch = SHOW_EXT_SECTION_HEADER_RE.exec(stdout);
|
|
2894
|
+
if (!headerMatch) {
|
|
2895
|
+
return [];
|
|
2896
|
+
}
|
|
2897
|
+
const tail = stdout.slice(headerMatch.index + headerMatch[0].length);
|
|
2898
|
+
// Find the `Fetched repositories:` line within the section.
|
|
2899
|
+
const fetchedIdx = tail.indexOf('\nFetched repositories:');
|
|
2900
|
+
if (fetchedIdx === -1) {
|
|
2901
|
+
return [];
|
|
2902
|
+
}
|
|
2903
|
+
const afterFetched = tail.slice(fetchedIdx + '\nFetched repositories:'.length);
|
|
2904
|
+
const importersByName = new Map();
|
|
2905
|
+
for (const line of afterFetched.split(/\r?\n/)) {
|
|
2906
|
+
// Stop at the next `## ` section header (some Bazel versions print
|
|
2907
|
+
// multiple extensions in one report).
|
|
2908
|
+
if (line.startsWith('## ')) {
|
|
2909
|
+
break;
|
|
2910
|
+
}
|
|
2911
|
+
// Empty line is fine; bullet that doesn't match is fine (it's an
|
|
2912
|
+
// un-imported generated artifact repo) — skip it.
|
|
2913
|
+
const match = FETCHED_HUB_BULLET_RE.exec(line);
|
|
2914
|
+
if (!match || !match.groups) {
|
|
2915
|
+
continue;
|
|
2916
|
+
}
|
|
2917
|
+
const name = match.groups['name'];
|
|
2918
|
+
if (!name) {
|
|
2919
|
+
continue;
|
|
2920
|
+
}
|
|
2921
|
+
const importers = importersByName.get(name) ?? new Set();
|
|
2922
|
+
for (const importer of (match.groups['importers'] ?? '').split(',').map(s => s.trim()).filter(Boolean)) {
|
|
2923
|
+
importers.add(importer);
|
|
2924
|
+
}
|
|
2925
|
+
importersByName.set(name, importers);
|
|
2926
|
+
}
|
|
2927
|
+
return [...importersByName.keys()].sort().map(name => ({
|
|
2928
|
+
importers: [...importersByName.get(name)].sort(),
|
|
2929
|
+
name
|
|
2930
|
+
}));
|
|
2931
|
+
}
|
|
2932
|
+
|
|
2933
|
+
// Classify a raw probe result into one of three states. The probe contract
|
|
2934
|
+
// is whatever the runner emits — typically a lightweight
|
|
2935
|
+
// `cquery '@<name>//...' --keep_going --output=label`. The orchestrator
|
|
2936
|
+
// treats `empty` and `not-defined` uniformly as no-ops; the distinction
|
|
2937
|
+
// is preserved for verbose-mode diagnostics.
|
|
2938
|
+
function classifyProbeResult(result) {
|
|
2939
|
+
// A successful probe with any stdout means the repo exists AND has at
|
|
2940
|
+
// least one target — populated.
|
|
2941
|
+
if (result.code === 0 && result.stdout.trim().length > 0) {
|
|
2942
|
+
return 'populated';
|
|
2943
|
+
}
|
|
2944
|
+
// Code 1 with the "no repository visible" message → undefined.
|
|
2945
|
+
if (result.code !== 0 && (NOT_VISIBLE_STDERR_RE.test(result.stderr) || NO_SUCH_PACKAGE_STDERR_RE.test(result.stderr))) {
|
|
2946
|
+
return 'not-defined';
|
|
2947
|
+
}
|
|
2948
|
+
// Code 1 with the "no targets" message → defined but empty.
|
|
2949
|
+
if (result.code !== 0 && NO_TARGETS_STDERR_RE.test(result.stderr)) {
|
|
2950
|
+
return 'empty';
|
|
2951
|
+
}
|
|
2952
|
+
// Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo
|
|
2953
|
+
// name isn't declared (Exp 5c). Treat as not-defined.
|
|
2954
|
+
if (result.code === 0) {
|
|
2955
|
+
return 'not-defined';
|
|
2956
|
+
}
|
|
2957
|
+
// Code 1 with no recognizable message: be conservative and call it
|
|
2958
|
+
// not-defined so the orchestrator skips it without erroring the workspace.
|
|
2959
|
+
return 'not-defined';
|
|
2960
|
+
}
|
|
2961
|
+
|
|
2962
|
+
// Convenience: probe a single candidate and return its classified status,
|
|
2963
|
+
// with optional verbose logging. Pure orchestration around `probe` +
|
|
2964
|
+
// `classifyProbeResult`; isolated so the test suite can exercise the
|
|
2965
|
+
// logging contract independently of the runner implementation.
|
|
2966
|
+
async function probeCandidate(repoName, probe, verbose) {
|
|
2967
|
+
let result;
|
|
2968
|
+
try {
|
|
2969
|
+
result = await probe(repoName);
|
|
2970
|
+
} catch (e) {
|
|
2971
|
+
if (verbose) {
|
|
2972
|
+
logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${e instanceof Error ? e.message : String(e)})`);
|
|
2973
|
+
}
|
|
2974
|
+
return 'not-defined';
|
|
2975
|
+
}
|
|
2976
|
+
const status = classifyProbeResult(result);
|
|
2803
2977
|
if (verbose) {
|
|
2804
|
-
logger.logger.log(
|
|
2978
|
+
logger.logger.log(`[VERBOSE] discovery: probe @${repoName}: ${status}`);
|
|
2805
2979
|
}
|
|
2806
|
-
return
|
|
2980
|
+
return status;
|
|
2807
2981
|
}
|
|
2808
2982
|
|
|
2809
2983
|
// Detects whether the given Bazel workspace uses Bzlmod (MODULE.bazel),
|
|
@@ -2833,6 +3007,164 @@ function getBazelInvocationFlags(mode) {
|
|
|
2833
3007
|
return ['--noenable_bzlmod', '--enable_workspace'];
|
|
2834
3008
|
}
|
|
2835
3009
|
|
|
3010
|
+
/**
|
|
3011
|
+
* Walk the directory tree rooted at `cwd` and return every directory that
|
|
3012
|
+
* looks like a Bazel workspace root — i.e. contains `MODULE.bazel`,
|
|
3013
|
+
* `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots
|
|
3014
|
+
* (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example
|
|
3015
|
+
* `examples/<name>/MODULE.bazel`); the per-workspace algorithm in the
|
|
3016
|
+
* orchestrator runs once per discovered root.
|
|
3017
|
+
*
|
|
3018
|
+
* The walker is dependency-injected with the directory-prune policy:
|
|
3019
|
+
* callers pass the set of basenames and basename prefixes the walk must
|
|
3020
|
+
* refuse to descend into. This module intentionally hardcodes none of
|
|
3021
|
+
* the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose
|
|
3022
|
+
* the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the
|
|
3023
|
+
* Bazel-specific bits (`bazel-*` output_base symlinks,
|
|
3024
|
+
* `.socket-auto-manifest`).
|
|
3025
|
+
*
|
|
3026
|
+
* Discovery is bounded-but-complete: the walk visits directories in
|
|
3027
|
+
* deterministic (sorted) order under a single visited-directory budget
|
|
3028
|
+
* (`MAX_WALK_DIRS`) as the only pathological-input / symlink-loop guard —
|
|
3029
|
+
* there is no depth cap, because the deepest workspace marker observed across
|
|
3030
|
+
* the OSS corpus (9) sat *below* the old depth-8 ceiling, so that ceiling
|
|
3031
|
+
* silently dropped real first-party modules. All roots found within the
|
|
3032
|
+
* budget are collected, sorted, then capped to `MAX_WORKSPACE_ROOTS`. Both
|
|
3033
|
+
* the cap and a budget exhaustion `logger.warn` UNCONDITIONALLY (a missed
|
|
3034
|
+
* module silently drops its Maven hub, so truncation must never be silent).
|
|
3035
|
+
*/
|
|
3036
|
+
|
|
3037
|
+
|
|
3038
|
+
// Hard ceiling on workspace roots; 16 sits well above realistic monorepo
|
|
3039
|
+
// counts while tightening the guard against pathological inputs.
|
|
3040
|
+
const MAX_WORKSPACE_ROOTS = 16;
|
|
3041
|
+
// Hard ceiling on directories visited. The sole guard against pathological
|
|
3042
|
+
// inputs and symlink loops (a loop consumes the budget and stops). A few
|
|
3043
|
+
// thousand is far above any realistic first-party tree once the prune policy
|
|
3044
|
+
// has removed vendored/output dirs.
|
|
3045
|
+
const DEFAULT_MAX_WALK_DIRS = 5_000;
|
|
3046
|
+
// Files whose presence promotes a directory to a workspace root.
|
|
3047
|
+
const WORKSPACE_MARKER_FILES = new Set(['MODULE.bazel', 'WORKSPACE', 'WORKSPACE.bazel']);
|
|
3048
|
+
const EMPTY_SET = new Set();
|
|
3049
|
+
const EMPTY_ARRAY = [];
|
|
3050
|
+
|
|
3051
|
+
// Walks the tree rooted at `opts.cwd` and returns absolute paths to every
|
|
3052
|
+
// directory that contains at least one workspace marker file. Output is
|
|
3053
|
+
// sorted for determinism and capped at MAX_WORKSPACE_ROOTS.
|
|
3054
|
+
function findWorkspaceRoots(opts) {
|
|
3055
|
+
const {
|
|
3056
|
+
cwd,
|
|
3057
|
+
verbose
|
|
3058
|
+
} = opts;
|
|
3059
|
+
const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET;
|
|
3060
|
+
const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY;
|
|
3061
|
+
const maxWalkDirs = opts.maxWalkDirs ?? DEFAULT_MAX_WALK_DIRS;
|
|
3062
|
+
const roots = [];
|
|
3063
|
+
// LIFO stack; children are pushed in reverse-sorted order so they pop in
|
|
3064
|
+
// ascending order, giving a deterministic traversal.
|
|
3065
|
+
const stack = [cwd];
|
|
3066
|
+
let dirsVisited = 0;
|
|
3067
|
+
let budgetHit = false;
|
|
3068
|
+
while (stack.length) {
|
|
3069
|
+
if (dirsVisited >= maxWalkDirs) {
|
|
3070
|
+
budgetHit = true;
|
|
3071
|
+
break;
|
|
3072
|
+
}
|
|
3073
|
+
const dir = stack.pop();
|
|
3074
|
+
if (dir === undefined) {
|
|
3075
|
+
break;
|
|
3076
|
+
}
|
|
3077
|
+
dirsVisited += 1;
|
|
3078
|
+
let entries;
|
|
3079
|
+
try {
|
|
3080
|
+
entries = fs$1.readdirSync(dir, {
|
|
3081
|
+
withFileTypes: true
|
|
3082
|
+
});
|
|
3083
|
+
} catch {
|
|
3084
|
+
continue;
|
|
3085
|
+
}
|
|
3086
|
+
let isWorkspaceRoot = false;
|
|
3087
|
+
const childNames = [];
|
|
3088
|
+
for (const entry of entries) {
|
|
3089
|
+
if (entry.isFile()) {
|
|
3090
|
+
if (WORKSPACE_MARKER_FILES.has(entry.name)) {
|
|
3091
|
+
isWorkspaceRoot = true;
|
|
3092
|
+
}
|
|
3093
|
+
continue;
|
|
3094
|
+
}
|
|
3095
|
+
if (!entry.isDirectory()) {
|
|
3096
|
+
continue;
|
|
3097
|
+
}
|
|
3098
|
+
const name = entry.name;
|
|
3099
|
+
if (ignoreDirNames.has(name)) {
|
|
3100
|
+
continue;
|
|
3101
|
+
}
|
|
3102
|
+
let pruned = false;
|
|
3103
|
+
for (const prefix of ignoreDirPrefixes) {
|
|
3104
|
+
if (name.startsWith(prefix)) {
|
|
3105
|
+
pruned = true;
|
|
3106
|
+
break;
|
|
3107
|
+
}
|
|
3108
|
+
}
|
|
3109
|
+
if (!pruned) {
|
|
3110
|
+
childNames.push(name);
|
|
3111
|
+
}
|
|
3112
|
+
}
|
|
3113
|
+
if (isWorkspaceRoot) {
|
|
3114
|
+
roots.push(dir);
|
|
3115
|
+
}
|
|
3116
|
+
// Descend regardless of whether this dir is itself a root — nested
|
|
3117
|
+
// workspaces are common (root MODULE.bazel + examples/*/MODULE.bazel).
|
|
3118
|
+
childNames.sort();
|
|
3119
|
+
for (let i = childNames.length - 1; i >= 0; i -= 1) {
|
|
3120
|
+
stack.push(path.join(dir, childNames[i]));
|
|
3121
|
+
}
|
|
3122
|
+
}
|
|
3123
|
+
roots.sort();
|
|
3124
|
+
const kept = roots.slice(0, MAX_WORKSPACE_ROOTS);
|
|
3125
|
+
const droppedCount = roots.length - kept.length;
|
|
3126
|
+
if (budgetHit) {
|
|
3127
|
+
// The dir budget was exhausted, so an unknown number of roots may be
|
|
3128
|
+
// undiscovered — surface it unconditionally.
|
|
3129
|
+
logger.logger.warn(`Bazel workspace walk hit the ${maxWalkDirs}-directory budget; some workspaces beneath ${cwd} may be undiscovered (found ${roots.length}, kept ${kept.length}).`);
|
|
3130
|
+
}
|
|
3131
|
+
if (droppedCount > 0) {
|
|
3132
|
+
// The cap dropped roots. Exact count when the full tree was walked; "≥"
|
|
3133
|
+
// when the budget cut the walk short (more roots may exist).
|
|
3134
|
+
const qualifier = budgetHit ? '≥' : '';
|
|
3135
|
+
logger.logger.warn(`Bazel workspace walk found ${roots.length} workspace root(s); capping at ${MAX_WORKSPACE_ROOTS} and dropping ${qualifier}${droppedCount}.`);
|
|
3136
|
+
if (verbose) {
|
|
3137
|
+
logger.logger.log('[VERBOSE] workspace walker: dropped roots:', roots.slice(MAX_WORKSPACE_ROOTS));
|
|
3138
|
+
}
|
|
3139
|
+
}
|
|
3140
|
+
return kept;
|
|
3141
|
+
}
|
|
3142
|
+
|
|
3143
|
+
// Best-effort-per-hub produces four distinct run outcomes a single `ok`
|
|
3144
|
+
// boolean would conflate:
|
|
3145
|
+
// - `complete` — every discovered hub extracted cleanly; >=1 manifest.
|
|
3146
|
+
// - `partial` — >=1 manifest written, but at least one hub failed,
|
|
3147
|
+
// timed out, or dropped edges. Worth uploading, but the
|
|
3148
|
+
// graph is known-incomplete.
|
|
3149
|
+
// - `noEcosystem` — no Bazel/Maven found. Whether that's an error is
|
|
3150
|
+
// caller-dependent (tolerated in auto mode, error in
|
|
3151
|
+
// explicit mode), so it must NOT be flattened into the
|
|
3152
|
+
// failure states.
|
|
3153
|
+
// - `hardFailure` — zero manifests written and it wasn't `noEcosystem`
|
|
3154
|
+
// (discovery threw, or every discovered hub failed).
|
|
3155
|
+
// Always an error for every caller.
|
|
3156
|
+
|
|
3157
|
+
const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000;
|
|
3158
|
+
const REAP_TIMEOUT_MS = 10_000;
|
|
3159
|
+
|
|
3160
|
+
// Default directory-prune policy for the Bazel workspace walk. The
|
|
3161
|
+
// orchestrator applies this unconditionally so neither caller (the explicit
|
|
3162
|
+
// `socket manifest bazel` command nor `--auto-manifest`) can omit it and let
|
|
3163
|
+
// the walk descend `node_modules`/VCS/vendored trees. Callers may
|
|
3164
|
+
// pass extra names/prefixes to EXTEND, not replace, this set.
|
|
3165
|
+
const DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES = new Set([...utils.IGNORED_DIRS, '.hg', '.idea', '.pnpm-store', '.socket-auto-manifest', '.svn', '.vscode']);
|
|
3166
|
+
// Bazel's `bazel-*` output_base symlinks.
|
|
3167
|
+
const DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES = ['bazel-'];
|
|
2836
3168
|
// Splits "g:a:v" -> { groupArtifact: "g:a", version: "v" }.
|
|
2837
3169
|
// Returns null on malformed input.
|
|
2838
3170
|
function splitCoord(c) {
|
|
@@ -2845,213 +3177,304 @@ function splitCoord(c) {
|
|
|
2845
3177
|
version: c.slice(lastColon + 1)
|
|
2846
3178
|
};
|
|
2847
3179
|
}
|
|
2848
|
-
//
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
const coords = suffixToCoords.get(suffix) ?? new Set();
|
|
2857
|
-
coords.add(a.mavenCoordinates);
|
|
2858
|
-
suffixToCoords.set(suffix, coords);
|
|
2859
|
-
if (a.sourceRepo) {
|
|
2860
|
-
fullLabels.set(`@${a.sourceRepo}//${suffix}`, a.mavenCoordinates);
|
|
2861
|
-
}
|
|
3180
|
+
// A versionless `maven_install.json` key must have 2-4 non-empty
|
|
3181
|
+
// colon-separated segments (`g:a`, `g:a:ext`, `g:a:ext:classifier`) — exactly
|
|
3182
|
+
// the range depscan's `coordinateToParts` accepts. A key outside that range,
|
|
3183
|
+
// or with an empty segment, is rejected after upload, so reject it locally.
|
|
3184
|
+
function isValidVersionlessKey(key) {
|
|
3185
|
+
const parts = key.split(':');
|
|
3186
|
+
if (parts.length < 2 || parts.length > 4) {
|
|
3187
|
+
return false;
|
|
2862
3188
|
}
|
|
2863
|
-
return
|
|
2864
|
-
fullLabels,
|
|
2865
|
-
suffixToCoords
|
|
2866
|
-
};
|
|
3189
|
+
return parts.every(p => p.length > 0);
|
|
2867
3190
|
}
|
|
2868
3191
|
|
|
2869
|
-
//
|
|
2870
|
-
//
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
const suffixMatches = labelToCoord.suffixToCoords.get(key);
|
|
2883
|
-
if (!suffixMatches) {
|
|
2884
|
-
return null;
|
|
2885
|
-
}
|
|
2886
|
-
if (suffixMatches.size > 1) {
|
|
2887
|
-
throw new Error(`Ambiguous Bazel dependency label ${label} maps rule suffix ${key} to multiple Maven coordinates: ${Array.from(suffixMatches).sort().join(', ')}. The generated maven_install.json cannot resolve this dependency label losslessly.`);
|
|
2888
|
-
}
|
|
2889
|
-
return Array.from(suffixMatches)[0] ?? null;
|
|
2890
|
-
}
|
|
3192
|
+
// Builds a modern `maven_install.json` from artifacts whose `deps` already
|
|
3193
|
+
// hold resolved versionless coordinates (the cquery parser resolves edge
|
|
3194
|
+
// labels against each repo's own targets while `repoName` is in scope, so no
|
|
3195
|
+
// label-to-coordinate resolution happens here). Keys are versionless `g:a`
|
|
3196
|
+
// (preserving any packaging/classifier segments); dependency values are the
|
|
3197
|
+
// resolved coordinate sets.
|
|
3198
|
+
//
|
|
3199
|
+
// Two-phase so the emitted graph is internally closed and survives the server
|
|
3200
|
+
// parser, which rejects malformed coordinates and edges referencing unlisted
|
|
3201
|
+
// artifacts (and can abort after enough errors). Phase 1 builds (and
|
|
3202
|
+
// validates) the artifact keys; phase 2 emits only edges whose source AND
|
|
3203
|
+
// target are valid emitted keys. Anything dropped is reported so the caller
|
|
3204
|
+
// can flip the hub partial — never silently lost post-upload.
|
|
2891
3205
|
function normalizeToMavenInstallJson(artifacts) {
|
|
2892
|
-
const labelToCoord = buildLabelToCoordMap(artifacts);
|
|
2893
3206
|
const out = {
|
|
2894
3207
|
artifacts: {},
|
|
2895
3208
|
dependencies: {}
|
|
2896
3209
|
};
|
|
3210
|
+
const droppedArtifacts = [];
|
|
3211
|
+
const prunedEdges = [];
|
|
2897
3212
|
const versionsByGroupArtifact = new Map();
|
|
2898
|
-
|
|
3213
|
+
// Phase 1: artifacts. Validate each key (shape + non-empty version) before
|
|
3214
|
+
// accepting it; record the set of valid emitted keys.
|
|
3215
|
+
const depsByKey = new Map();
|
|
2899
3216
|
for (const a of artifacts) {
|
|
2900
3217
|
const split = splitCoord(a.mavenCoordinates);
|
|
2901
3218
|
if (!split) {
|
|
3219
|
+
droppedArtifacts.push(a.mavenCoordinates);
|
|
3220
|
+
continue;
|
|
3221
|
+
}
|
|
3222
|
+
const key = split.groupArtifact;
|
|
3223
|
+
// A `g:a:` coordinate strips to the valid-shaped key `g:a` but an empty
|
|
3224
|
+
// version, which the server rejects — require both.
|
|
3225
|
+
if (!isValidVersionlessKey(key) || !split.version) {
|
|
3226
|
+
droppedArtifacts.push(a.mavenCoordinates);
|
|
2902
3227
|
continue;
|
|
2903
3228
|
}
|
|
2904
|
-
const existingVersion = versionsByGroupArtifact.get(
|
|
3229
|
+
const existingVersion = versionsByGroupArtifact.get(key);
|
|
2905
3230
|
if (existingVersion && existingVersion !== split.version) {
|
|
2906
|
-
throw new Error(`Conflicting versions for ${
|
|
3231
|
+
throw new Error(`Conflicting versions for ${key}: ${existingVersion}, ${split.version}. The generated maven_install.json cannot represent multiple versions for the same group:artifact losslessly.`);
|
|
2907
3232
|
}
|
|
2908
3233
|
if (!existingVersion) {
|
|
2909
|
-
versionsByGroupArtifact.set(
|
|
2910
|
-
out.artifacts[
|
|
2911
|
-
shasums: a.mavenSha256 ? {
|
|
2912
|
-
jar: a.mavenSha256
|
|
2913
|
-
} : {},
|
|
2914
|
-
version: split.version
|
|
2915
|
-
};
|
|
2916
|
-
} else if (a.mavenSha256 && !out.artifacts[split.groupArtifact]?.shasums.jar) {
|
|
2917
|
-
out.artifacts[split.groupArtifact] = {
|
|
2918
|
-
shasums: {
|
|
2919
|
-
jar: a.mavenSha256
|
|
2920
|
-
},
|
|
3234
|
+
versionsByGroupArtifact.set(key, split.version);
|
|
3235
|
+
out.artifacts[key] = {
|
|
2921
3236
|
version: split.version
|
|
2922
3237
|
};
|
|
2923
3238
|
}
|
|
2924
|
-
//
|
|
2925
|
-
//
|
|
2926
|
-
//
|
|
2927
|
-
|
|
2928
|
-
const
|
|
2929
|
-
|
|
2930
|
-
for (const depLabel of a.deps) {
|
|
2931
|
-
// First try our rule-label lookup (the common case for --output=build text).
|
|
2932
|
-
const c = depLabelToCoord(depLabel, labelToCoord);
|
|
2933
|
-
if (c) {
|
|
2934
|
-
// c is "g:a:v"; strip the version to produce "g:a" per lockfile shape.
|
|
2935
|
-
const cs = splitCoord(c);
|
|
2936
|
-
depCoords.add(cs ? cs.groupArtifact : c);
|
|
2937
|
-
} else if (depLabel.includes(':') && !depLabel.startsWith('@') && !depLabel.startsWith(':')) {
|
|
2938
|
-
// unsorted_deps.json deps may be "g:a:v" in older files or
|
|
2939
|
-
// "g:a" in v2 lock-file-shaped maps. Strip only when a version is
|
|
2940
|
-
// present.
|
|
2941
|
-
const parts = depLabel.split(':');
|
|
2942
|
-
depCoords.add(parts.length >= 3 ? parts.slice(0, -1).join(':') : depLabel);
|
|
2943
|
-
}
|
|
3239
|
+
// Accumulate the candidate edge set keyed by "g:a" (no version), matching
|
|
3240
|
+
// the canonical rules_jvm_external lockfile shape. Pruned against valid
|
|
3241
|
+
// keys in phase 2.
|
|
3242
|
+
const depCoords = depsByKey.get(key) ?? new Set();
|
|
3243
|
+
for (const depCoord of a.deps) {
|
|
3244
|
+
depCoords.add(depCoord);
|
|
2944
3245
|
}
|
|
2945
3246
|
if (depCoords.size) {
|
|
2946
|
-
|
|
3247
|
+
depsByKey.set(key, depCoords);
|
|
2947
3248
|
}
|
|
2948
3249
|
}
|
|
2949
|
-
|
|
2950
|
-
|
|
3250
|
+
// Phase 2: edges. Emit only where both source and target are emitted keys.
|
|
3251
|
+
const validKeys = new Set(Object.keys(out.artifacts));
|
|
3252
|
+
for (const [key, depCoords] of depsByKey) {
|
|
3253
|
+
if (!validKeys.has(key)) {
|
|
3254
|
+
for (const target of depCoords) {
|
|
3255
|
+
prunedEdges.push(`${key} -> ${target}`);
|
|
3256
|
+
}
|
|
3257
|
+
continue;
|
|
3258
|
+
}
|
|
3259
|
+
const kept = [];
|
|
3260
|
+
for (const target of depCoords) {
|
|
3261
|
+
if (validKeys.has(target)) {
|
|
3262
|
+
kept.push(target);
|
|
3263
|
+
} else {
|
|
3264
|
+
prunedEdges.push(`${key} -> ${target}`);
|
|
3265
|
+
}
|
|
3266
|
+
}
|
|
3267
|
+
if (kept.length) {
|
|
3268
|
+
out.dependencies[key] = kept;
|
|
3269
|
+
}
|
|
3270
|
+
}
|
|
3271
|
+
return {
|
|
3272
|
+
droppedArtifacts,
|
|
3273
|
+
json: out,
|
|
3274
|
+
prunedEdges
|
|
3275
|
+
};
|
|
3276
|
+
}
|
|
3277
|
+
|
|
3278
|
+
// Cross-workspace dedup keyed on the full Maven coordinate string
|
|
3279
|
+
// (`g:a:v[:classifier]`). The metadata cquery emits one entry per rule,
|
|
3280
|
+
// so the same `androidx.annotation:annotation:1.8.2` can show up in
|
|
3281
|
+
// `examples/dagger/@maven` and `examples/ksp/@maven` in rules_kotlin —
|
|
3282
|
+
// downstream only needs it once. Each occurrence resolves its edges against
|
|
3283
|
+
// its own repo's targets, so the resolved `deps` can legitimately differ
|
|
3284
|
+
// between occurrences; union them rather than keeping only the first, or
|
|
3285
|
+
// real graph edges would be silently dropped.
|
|
3286
|
+
function dedupArtifactsByCoord(artifacts) {
|
|
3287
|
+
const byCoord = new Map();
|
|
3288
|
+
for (const a of artifacts) {
|
|
3289
|
+
const existing = byCoord.get(a.mavenCoordinates);
|
|
3290
|
+
if (!existing) {
|
|
3291
|
+
byCoord.set(a.mavenCoordinates, {
|
|
3292
|
+
...a,
|
|
3293
|
+
deps: [...a.deps]
|
|
3294
|
+
});
|
|
3295
|
+
continue;
|
|
3296
|
+
}
|
|
3297
|
+
const merged = new Set(existing.deps);
|
|
3298
|
+
for (const dep of a.deps) {
|
|
3299
|
+
merged.add(dep);
|
|
3300
|
+
}
|
|
3301
|
+
existing.deps = [...merged];
|
|
3302
|
+
}
|
|
3303
|
+
return [...byCoord.values()];
|
|
3304
|
+
}
|
|
3305
|
+
// Dedup, normalize, and write one hub's manifest. The path mirrors the
|
|
3306
|
+
// workspace tree: `<manifestDir>/<relPath>/<name>.json`, where `<name>` is
|
|
3307
|
+
// `maven_install.json` for a hub literally named `maven`, else
|
|
3308
|
+
// `<hub>_maven_install.json` (matching the server walker's
|
|
3309
|
+
// `**/*_maven_install.json` glob). The root workspace (`relPath===''`) writes
|
|
3310
|
+
// at `<manifestDir>/<name>.json`. Returns `manifestPath: undefined` (no file
|
|
3311
|
+
// written) when the hub yields zero valid artifacts, plus the dropped/pruned
|
|
3312
|
+
// accounting so the caller can flip the hub partial.
|
|
3313
|
+
async function writeHubManifest(args) {
|
|
3314
|
+
const {
|
|
3315
|
+
artifacts,
|
|
3316
|
+
manifestDir,
|
|
3317
|
+
relPath,
|
|
3318
|
+
repoName
|
|
3319
|
+
} = args;
|
|
3320
|
+
const deduped = dedupArtifactsByCoord(artifacts);
|
|
3321
|
+
const {
|
|
3322
|
+
droppedArtifacts,
|
|
3323
|
+
json,
|
|
3324
|
+
prunedEdges
|
|
3325
|
+
} = normalizeToMavenInstallJson(deduped);
|
|
3326
|
+
const artifactCount = Object.keys(json.artifacts).length;
|
|
3327
|
+
if (!artifactCount) {
|
|
3328
|
+
return {
|
|
3329
|
+
artifactCount: 0,
|
|
3330
|
+
droppedArtifacts,
|
|
3331
|
+
manifestPath: undefined,
|
|
3332
|
+
prunedEdges
|
|
3333
|
+
};
|
|
3334
|
+
}
|
|
3335
|
+
const fileName = repoName === 'maven' ? 'maven_install.json' : `${repoName}_maven_install.json`;
|
|
3336
|
+
const hubDir = relPath ? path.join(manifestDir, relPath) : manifestDir;
|
|
3337
|
+
fs$1.mkdirSync(hubDir, {
|
|
3338
|
+
recursive: true
|
|
3339
|
+
});
|
|
3340
|
+
const manifestPath = path.join(hubDir, fileName);
|
|
3341
|
+
await fs$1.promises.writeFile(manifestPath, JSON.stringify(json, null, 2), 'utf8');
|
|
3342
|
+
return {
|
|
3343
|
+
artifactCount,
|
|
3344
|
+
droppedArtifacts,
|
|
3345
|
+
manifestPath,
|
|
3346
|
+
prunedEdges
|
|
3347
|
+
};
|
|
3348
|
+
}
|
|
3349
|
+
|
|
3350
|
+
// Build the per-workspace candidate Maven hub list.
|
|
3351
|
+
//
|
|
3352
|
+
// Bzlmod mode: trust `bazel mod show_extension` as the authoritative hub
|
|
3353
|
+
// list, keeping only hubs imported by <root>.
|
|
3354
|
+
//
|
|
3355
|
+
// WORKSPACE mode: no equivalent of `show_extension`, so probe the
|
|
3356
|
+
// conventional hub names.
|
|
3357
|
+
//
|
|
3358
|
+
// On `show_extension` failure (or a parse that yields zero root hubs) under
|
|
3359
|
+
// Bzlmod, fall through to the conventional-name probe so partial discovery
|
|
3360
|
+
// is still possible.
|
|
3361
|
+
async function discoverCandidatesForWorkspace(workspaceRoot, mode, queryOpts, verbose) {
|
|
3362
|
+
const candidates = [];
|
|
3363
|
+
let showExtensionSucceeded = false;
|
|
3364
|
+
if (mode.bzlmod) {
|
|
3365
|
+
const extResult = await runBazelModShowMavenExtension(queryOpts);
|
|
3366
|
+
if (extResult.code === 0) {
|
|
3367
|
+
// The maven extension generates a hub for EVERY module that uses it —
|
|
3368
|
+
// the root's own `maven.install` hub(s) plus the rulesets' internal
|
|
3369
|
+
// hubs (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs
|
|
3370
|
+
// imported by <root>; the rest are build-tooling, not the user's SBOM.
|
|
3371
|
+
const entries = parseShowExtensionOutput(extResult.stdout);
|
|
3372
|
+
const kept = entries.filter(e => e.importers.includes(ROOT_MODULE_IMPORTER));
|
|
3373
|
+
candidates.push(...kept.map(e => e.name));
|
|
3374
|
+
// Gate the probe fallback on the KEPT count, not the raw parse: a
|
|
3375
|
+
// report listing only transitive ruleset hubs (all filtered out) must
|
|
3376
|
+
// still fall through to conventional probing so a root @maven isn't
|
|
3377
|
+
// missed.
|
|
3378
|
+
showExtensionSucceeded = kept.length > 0;
|
|
3379
|
+
if (verbose) {
|
|
3380
|
+
logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension kept root hub(s)`, kept.map(e => e.name));
|
|
3381
|
+
for (const dropped of entries) {
|
|
3382
|
+
if (!dropped.importers.includes(ROOT_MODULE_IMPORTER)) {
|
|
3383
|
+
logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: dropped ${dropped.name} — imported by ${dropped.importers.join(', ')}, not ${ROOT_MODULE_IMPORTER}`);
|
|
3384
|
+
}
|
|
3385
|
+
}
|
|
3386
|
+
}
|
|
3387
|
+
} else if (verbose) {
|
|
3388
|
+
logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`);
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
// Probe candidates the show_extension path could not authoritatively
|
|
3392
|
+
// enumerate: when it produced root hubs, probe nothing extra; otherwise
|
|
3393
|
+
// (WORKSPACE mode, a failed show_extension, or a parse with zero root
|
|
3394
|
+
// hubs) probe the conventional hub names.
|
|
3395
|
+
const seen = new Set(candidates);
|
|
3396
|
+
const toProbe = (showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES]).filter(name => !seen.has(name));
|
|
3397
|
+
if (!toProbe.length) {
|
|
3398
|
+
return candidates;
|
|
3399
|
+
}
|
|
3400
|
+
const probe = buildMavenProbeFor(queryOpts);
|
|
3401
|
+
for (const name of toProbe) {
|
|
3402
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3403
|
+
const status = await probeCandidate(name, probe, verbose);
|
|
3404
|
+
if (status === 'populated') {
|
|
3405
|
+
candidates.push(name);
|
|
3406
|
+
seen.add(name);
|
|
3407
|
+
}
|
|
2951
3408
|
}
|
|
2952
|
-
return
|
|
3409
|
+
return candidates;
|
|
2953
3410
|
}
|
|
2954
3411
|
|
|
2955
|
-
//
|
|
2956
|
-
//
|
|
2957
|
-
//
|
|
2958
|
-
|
|
2959
|
-
// want is at `<output_base>/external/`. `path.join` is purely lexical and
|
|
2960
|
-
// would collapse `bazel-out/..` to the cwd itself, which is the wrong place
|
|
2961
|
-
// Resolve the symlink at the filesystem level and walk up to
|
|
2962
|
-
// `<output_base>` instead.
|
|
2963
|
-
function bazelExternalDir(cwd, outputBase) {
|
|
2964
|
-
if (outputBase) {
|
|
2965
|
-
return path.join(outputBase, 'external');
|
|
2966
|
-
}
|
|
2967
|
-
const bazelOutLink = path.join(cwd, 'bazel-out');
|
|
2968
|
-
if (!fs$1.existsSync(bazelOutLink)) {
|
|
2969
|
-
return null;
|
|
2970
|
-
}
|
|
3412
|
+
// Best-effort reap of a Bazel server. Spawned with a short timeout so
|
|
3413
|
+
// a wedged server can't itself hang the cleanup; failures are swallowed
|
|
3414
|
+
// because the caller will `rm -rf` the output_user_root regardless.
|
|
3415
|
+
async function reapBazelServer(bin, outputUserRoot, verbose) {
|
|
2971
3416
|
try {
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
3417
|
+
await spawn.spawn(bin, [`--output_user_root=${outputUserRoot}`, 'shutdown'], {
|
|
3418
|
+
timeout: REAP_TIMEOUT_MS
|
|
3419
|
+
});
|
|
3420
|
+
} catch (e) {
|
|
3421
|
+
// Server may already be dead, or shutdown itself timed out — the
|
|
3422
|
+
// tempdir removal below is sufficient cleanup.
|
|
3423
|
+
if (verbose) {
|
|
3424
|
+
logger.logger.log(`[VERBOSE] reapBazelServer: shutdown failed for ${outputUserRoot} (${utils.getErrorCause(e)}); tempdir removal will still run`);
|
|
3425
|
+
}
|
|
2978
3426
|
}
|
|
2979
3427
|
}
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
return false;
|
|
2991
|
-
}
|
|
2992
|
-
const normalized = raw.toLowerCase();
|
|
2993
|
-
return normalized === '1' || normalized === 'true' || normalized === 'yes';
|
|
2994
|
-
}
|
|
2995
|
-
|
|
2996
|
-
// Tries `external/<repo>/unsorted_deps.json` first; falls back to parsing the
|
|
2997
|
-
// probe stdout the caller already captured during discovery. Discovery runs
|
|
2998
|
-
// the same `kind("jvm_import rule|aar_import rule", @<repo>//:*)` query that
|
|
2999
|
-
// extraction needs, so reusing its stdout skips one bazel-query invocation
|
|
3000
|
-
// per repo on the unpinned path (where unsorted_deps.json isn't on disk).
|
|
3001
|
-
async function extractFromOneRepo(repoName, queryOpts, cachedProbeStdout) {
|
|
3002
|
-
const verbose = queryOpts.verbose;
|
|
3003
|
-
// unsorted_deps.json lives under the bazel external dir.
|
|
3004
|
-
// When --output_base is set, it's under that; otherwise under the workspace's
|
|
3005
|
-
// bazel-out symlink (resolved via realpath, NOT lexical path.join — the
|
|
3006
|
-
// lexical form would collapse `bazel-out/..` to cwd and miss the file).
|
|
3007
|
-
const externalDir = bazelExternalDir(queryOpts.cwd, queryOpts.bazelOutputBase);
|
|
3008
|
-
if (verbose) {
|
|
3009
|
-
logger.logger.log(`[VERBOSE] @${repoName}: external dir:`, externalDir ?? '(unresolved — bazel-out symlink absent)');
|
|
3010
|
-
}
|
|
3011
|
-
const forceFallback = isForceQueryFallbackEnabled();
|
|
3012
|
-
if (forceFallback && verbose) {
|
|
3013
|
-
logger.logger.log(`[VERBOSE] @${repoName}: SOCKET_BAZEL_FORCE_QUERY_FALLBACK set; skipping unsorted_deps.json fast path.`);
|
|
3014
|
-
}
|
|
3015
|
-
const candidates = forceFallback ? [] : externalDir ? [path.join(externalDir, repoName, 'unsorted_deps.json')] : [];
|
|
3016
|
-
for (const c of candidates) {
|
|
3017
|
-
if (fs$1.existsSync(c)) {
|
|
3018
|
-
// Bound the read to 1GB to prevent OOM on hostile content while allowing large real-world lockfiles.
|
|
3019
|
-
// eslint-disable-next-line no-await-in-loop
|
|
3020
|
-
const stat = await fs$1.promises.stat(c);
|
|
3021
|
-
if (stat.size > 1024 * 1024 * 1024) {
|
|
3022
|
-
logger.logger.warn(`Skipping oversized ${c} (${stat.size} bytes); falling back to cached probe stdout.`);
|
|
3023
|
-
break;
|
|
3024
|
-
}
|
|
3025
|
-
const json = fs$1.readFileSync(c, 'utf8');
|
|
3026
|
-
const parsed = parseUnsortedDepsJson(json);
|
|
3027
|
-
if (parsed.length) {
|
|
3028
|
-
if (verbose) {
|
|
3029
|
-
logger.logger.log(`[VERBOSE] @${repoName}: source=unsorted_deps.json (${c}, ${parsed.length} artifact(s))`);
|
|
3030
|
-
}
|
|
3031
|
-
return parsed.map(a => ({
|
|
3032
|
-
...a,
|
|
3033
|
-
sourceRepo: repoName
|
|
3034
|
-
}));
|
|
3035
|
-
}
|
|
3036
|
-
} else if (verbose) {
|
|
3037
|
-
logger.logger.log(`[VERBOSE] @${repoName}: unsorted_deps.json miss at`, c);
|
|
3428
|
+
async function removeTempdir(dir, verbose) {
|
|
3429
|
+
try {
|
|
3430
|
+
await fs$1.promises.rm(dir, {
|
|
3431
|
+
recursive: true,
|
|
3432
|
+
force: true
|
|
3433
|
+
});
|
|
3434
|
+
} catch (e) {
|
|
3435
|
+
// Best effort. The next CLI invocation lands a fresh tempdir.
|
|
3436
|
+
if (verbose) {
|
|
3437
|
+
logger.logger.log(`[VERBOSE] removeTempdir: ${dir} not fully removed (${utils.getErrorCause(e)}); a stale dir may linger until the next OS tempdir sweep`);
|
|
3038
3438
|
}
|
|
3039
3439
|
}
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3440
|
+
}
|
|
3441
|
+
function makeOutputUserRoot() {
|
|
3442
|
+
return fs$1.mkdtempSync(path.join(os.tmpdir(), 'socket-bazel-'));
|
|
3443
|
+
}
|
|
3444
|
+
|
|
3445
|
+
// Construct the BazelQueryOptions shape used for a single workspace's
|
|
3446
|
+
// queries. Lifted to module scope (out of the per-workspace loop) so
|
|
3447
|
+
// ESLint's consistent-function-scoping is happy; takes everything it
|
|
3448
|
+
// previously closed over as explicit params.
|
|
3449
|
+
function buildQueryOpts(args) {
|
|
3450
|
+
const {
|
|
3451
|
+
baseEnv,
|
|
3452
|
+
bin,
|
|
3453
|
+
invocationFlags,
|
|
3454
|
+
opts,
|
|
3455
|
+
outputUserRoot,
|
|
3456
|
+
spawnCwd,
|
|
3457
|
+
verbose
|
|
3458
|
+
} = args;
|
|
3459
|
+
return {
|
|
3460
|
+
bin,
|
|
3461
|
+
cwd: spawnCwd,
|
|
3462
|
+
invocationFlags,
|
|
3463
|
+
outputUserRoot,
|
|
3464
|
+
...(opts.bazelRc ? {
|
|
3465
|
+
bazelRc: opts.bazelRc
|
|
3466
|
+
} : {}),
|
|
3467
|
+
...(opts.bazelFlags ? {
|
|
3468
|
+
bazelFlags: opts.bazelFlags
|
|
3469
|
+
} : {}),
|
|
3470
|
+
...(opts.bazelOutputBase ? {
|
|
3471
|
+
bazelOutputBase: opts.bazelOutputBase
|
|
3472
|
+
} : {}),
|
|
3473
|
+
...(baseEnv ? {
|
|
3474
|
+
env: baseEnv
|
|
3475
|
+
} : {}),
|
|
3476
|
+
verbose
|
|
3477
|
+
};
|
|
3055
3478
|
}
|
|
3056
3479
|
async function extractBazelToMaven(opts) {
|
|
3057
3480
|
const {
|
|
@@ -3066,143 +3489,232 @@ async function extractBazelToMaven(opts) {
|
|
|
3066
3489
|
logger.logger.warn(`Warning: cwd does not exist: ${cwd}`);
|
|
3067
3490
|
}
|
|
3068
3491
|
logger.logger.groupEnd();
|
|
3492
|
+
const perRepoTimeoutMs = opts.perRepoTimeoutMs ?? DEFAULT_PER_REPO_TIMEOUT_MS;
|
|
3493
|
+
|
|
3494
|
+
// Validate config + ensure toolchains BEFORE we mint a tempdir.
|
|
3495
|
+
let bin;
|
|
3496
|
+
let baseEnv;
|
|
3069
3497
|
try {
|
|
3070
|
-
// Validate caller-provided Bazel filesystem settings before invoking Bazel.
|
|
3071
3498
|
if (opts.bazelOutputBase) {
|
|
3072
3499
|
validateOutputBase(opts.bazelOutputBase, opts.cwd);
|
|
3073
3500
|
}
|
|
3074
|
-
// Java must be available before rules_jvm_external/Coursier runs;
|
|
3075
|
-
// python shim follows so its augmented PATH inherits the JDK prefix.
|
|
3076
3501
|
ensureJavaOnPath();
|
|
3077
3502
|
const shim = await provisionPythonShim();
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
logger.logger.info(`Workspace mode: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
|
|
3083
|
-
const invocationFlags = getBazelInvocationFlags(mode);
|
|
3084
|
-
|
|
3085
|
-
// Step 2: bazel binary resolution.
|
|
3086
|
-
const bin = await resolveBazelBinary(opts.bin);
|
|
3087
|
-
logger.logger.info(`Using bazel: ${bin}`);
|
|
3503
|
+
baseEnv = shim.augmentedEnv ?? opts.env;
|
|
3504
|
+
bin = await resolveBazelBinary(opts.bin);
|
|
3505
|
+
} catch (e) {
|
|
3506
|
+
logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
|
|
3088
3507
|
if (verbose) {
|
|
3089
|
-
logger.logger.
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
bazelOutputBase: opts.bazelOutputBase ?? '(unset)',
|
|
3093
|
-
bazelFlags: opts.bazelFlags ?? '(unset)',
|
|
3094
|
-
invocationFlags
|
|
3095
|
-
});
|
|
3508
|
+
logger.logger.group('[VERBOSE] error:');
|
|
3509
|
+
logger.logger.log(e);
|
|
3510
|
+
logger.logger.groupEnd();
|
|
3096
3511
|
}
|
|
3512
|
+
return {
|
|
3513
|
+
artifactCount: 0,
|
|
3514
|
+
manifestPaths: [],
|
|
3515
|
+
status: 'hardFailure'
|
|
3516
|
+
};
|
|
3517
|
+
}
|
|
3518
|
+
logger.logger.info(`Using bazel: ${bin}`);
|
|
3097
3519
|
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
3520
|
+
// Track every output_user_root we mint so we can reap them all in
|
|
3521
|
+
// the cleanup pass, even if a per-repo timeout forced a re-mint.
|
|
3522
|
+
let outputUserRoot = makeOutputUserRoot();
|
|
3523
|
+
const mintedRoots = [outputUserRoot];
|
|
3524
|
+
if (verbose) {
|
|
3525
|
+
logger.logger.log(`[VERBOSE] initial --output_user_root=${outputUserRoot} (will be reaped on completion)`);
|
|
3526
|
+
}
|
|
3527
|
+
const layout = opts.outLayout ?? 'standalone';
|
|
3528
|
+
const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
|
|
3529
|
+
// One manifest per (workspace, hub), written best-effort: a single wedged
|
|
3530
|
+
// hub must not discard the manifests every other hub produced.
|
|
3531
|
+
const manifestPaths = [];
|
|
3532
|
+
let totalArtifacts = 0;
|
|
3533
|
+
let anyRepos = false;
|
|
3534
|
+
let hubsSucceeded = 0;
|
|
3535
|
+
let hubsFailed = 0;
|
|
3536
|
+
try {
|
|
3537
|
+
// Always apply the default prune policy so no caller can forget it;
|
|
3538
|
+
// callers EXTEND it via ignoreDirNames/ignoreDirPrefixes.
|
|
3539
|
+
const ignoreDirNames = new Set([...DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES, ...(opts.ignoreDirNames ?? [])]);
|
|
3540
|
+
const ignoreDirPrefixes = [...DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES, ...(opts.ignoreDirPrefixes ?? [])];
|
|
3541
|
+
const workspaceRoots = findWorkspaceRoots({
|
|
3101
3542
|
cwd,
|
|
3102
|
-
|
|
3103
|
-
|
|
3104
|
-
bazelRc: opts.bazelRc
|
|
3105
|
-
} : {}),
|
|
3106
|
-
...(opts.bazelFlags ? {
|
|
3107
|
-
bazelFlags: opts.bazelFlags
|
|
3108
|
-
} : {}),
|
|
3109
|
-
...(opts.bazelOutputBase ? {
|
|
3110
|
-
bazelOutputBase: opts.bazelOutputBase
|
|
3111
|
-
} : {}),
|
|
3112
|
-
...(baseEnv ? {
|
|
3113
|
-
env: baseEnv
|
|
3114
|
-
} : {}),
|
|
3543
|
+
ignoreDirNames,
|
|
3544
|
+
ignoreDirPrefixes,
|
|
3115
3545
|
verbose
|
|
3116
|
-
};
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3546
|
+
});
|
|
3547
|
+
if (!workspaceRoots.length) {
|
|
3548
|
+
logger.logger.warn(`No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`);
|
|
3549
|
+
return {
|
|
3550
|
+
artifactCount: 0,
|
|
3551
|
+
manifestPaths: [],
|
|
3552
|
+
status: 'noEcosystem'
|
|
3553
|
+
};
|
|
3554
|
+
}
|
|
3555
|
+
if (verbose) {
|
|
3556
|
+
logger.logger.log(`[VERBOSE] discovered ${workspaceRoots.length} workspace root(s):`, workspaceRoots);
|
|
3557
|
+
}
|
|
3558
|
+
for (const workspaceRoot of workspaceRoots) {
|
|
3559
|
+
const relPath = path.relative(cwd, workspaceRoot);
|
|
3560
|
+
let mode;
|
|
3561
|
+
try {
|
|
3562
|
+
mode = detectWorkspaceMode(workspaceRoot);
|
|
3563
|
+
} catch (e) {
|
|
3126
3564
|
if (verbose) {
|
|
3127
|
-
logger.logger.log(
|
|
3565
|
+
logger.logger.log(`[VERBOSE] workspace ${workspaceRoot}: detect failed (${utils.getErrorCause(e)}); skipping`);
|
|
3128
3566
|
}
|
|
3129
|
-
|
|
3130
|
-
logger.logger.log('[VERBOSE] bazel mod show_repo failed; falling back to static candidate parsing:', visibleRepos.stderr);
|
|
3567
|
+
continue;
|
|
3131
3568
|
}
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3569
|
+
logger.logger.info(`Workspace ${relPath || '.'}: bzlmod=${mode.bzlmod} workspace=${mode.workspace}`);
|
|
3570
|
+
const invocationFlags = getBazelInvocationFlags(mode);
|
|
3571
|
+
const queryOptsFor = userRoot => buildQueryOpts({
|
|
3572
|
+
baseEnv,
|
|
3573
|
+
bin,
|
|
3574
|
+
invocationFlags,
|
|
3575
|
+
opts,
|
|
3576
|
+
outputUserRoot: userRoot,
|
|
3577
|
+
spawnCwd: workspaceRoot,
|
|
3578
|
+
verbose
|
|
3579
|
+
});
|
|
3139
3580
|
|
|
3140
|
-
// Step 5: extract artifacts from each repo (preferring unsorted_deps.json).
|
|
3141
|
-
const allArtifacts = [];
|
|
3142
|
-
for (const [repo, probeStdout] of repos) {
|
|
3143
3581
|
// eslint-disable-next-line no-await-in-loop
|
|
3144
|
-
const
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
const normalized = normalizeToMavenInstallJson(allArtifacts);
|
|
3151
|
-
|
|
3152
|
-
// Step 7: write outputs.
|
|
3153
|
-
// Standalone output writes directly to `out`; auto-manifest uses a sibling directory
|
|
3154
|
-
// to avoid colliding with a repo's checked-in rules_jvm_external lockfile and
|
|
3155
|
-
// to avoid repo-root gitignore patterns such as `/maven_install.json`.
|
|
3156
|
-
const layout = opts.outLayout ?? 'standalone';
|
|
3157
|
-
const manifestDir = layout === 'flat' ? path.join(out, '.socket-auto-manifest') : out;
|
|
3158
|
-
fs$1.mkdirSync(manifestDir, {
|
|
3159
|
-
recursive: true
|
|
3160
|
-
});
|
|
3161
|
-
const manifestPath = path.join(manifestDir, 'maven_install.json');
|
|
3162
|
-
await fs$1.promises.writeFile(manifestPath, JSON.stringify(normalized, null, 2), 'utf8');
|
|
3163
|
-
if (verbose) {
|
|
3164
|
-
logger.logger.log('[VERBOSE] outputs:', {
|
|
3165
|
-
artifactCount: allArtifacts.length,
|
|
3166
|
-
generatedManifest: path.relative(out, manifestPath),
|
|
3167
|
-
layout,
|
|
3168
|
-
manifest: manifestPath,
|
|
3169
|
-
mavenRepos: repoNames,
|
|
3170
|
-
tool: 'socket manifest bazel',
|
|
3171
|
-
workspace: {
|
|
3172
|
-
bzlmod: mode.bzlmod,
|
|
3173
|
-
legacyWorkspace: mode.workspace
|
|
3582
|
+
const candidates = await discoverCandidatesForWorkspace(workspaceRoot, mode, queryOptsFor(outputUserRoot), verbose);
|
|
3583
|
+
logger.logger.info(`Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${candidates.join(', ') || '(none)'}`);
|
|
3584
|
+
for (const repoName of candidates) {
|
|
3585
|
+
anyRepos = true;
|
|
3586
|
+
if (verbose) {
|
|
3587
|
+
logger.logger.log(`[VERBOSE] workspace ${relPath || '.'}: running metadata cquery for @${repoName} (timeout ${perRepoTimeoutMs}ms)`);
|
|
3174
3588
|
}
|
|
3175
|
-
|
|
3589
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3590
|
+
const result = await runMetadataCqueryForRepo({
|
|
3591
|
+
opts: queryOptsFor(outputUserRoot),
|
|
3592
|
+
repoName,
|
|
3593
|
+
timeoutMs: perRepoTimeoutMs,
|
|
3594
|
+
workspaceRelPath: relPath,
|
|
3595
|
+
workspaceRoot
|
|
3596
|
+
});
|
|
3597
|
+
if (result.status === 'timeout') {
|
|
3598
|
+
logger.logger.warn(`@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`);
|
|
3599
|
+
hubsFailed += 1;
|
|
3600
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3601
|
+
await reapBazelServer(bin, outputUserRoot, verbose);
|
|
3602
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3603
|
+
await removeTempdir(outputUserRoot, verbose);
|
|
3604
|
+
outputUserRoot = makeOutputUserRoot();
|
|
3605
|
+
mintedRoots.push(outputUserRoot);
|
|
3606
|
+
if (verbose) {
|
|
3607
|
+
logger.logger.log(`[VERBOSE] minted fresh --output_user_root=${outputUserRoot} after timeout`);
|
|
3608
|
+
}
|
|
3609
|
+
continue;
|
|
3610
|
+
}
|
|
3611
|
+
if (result.status === 'error') {
|
|
3612
|
+
logger.logger.warn(`@${repoName}: cquery failed; skipping this hub`);
|
|
3613
|
+
hubsFailed += 1;
|
|
3614
|
+
continue;
|
|
3615
|
+
}
|
|
3616
|
+
// A scan must never silently upload a graph missing edges it knows
|
|
3617
|
+
// it dropped: warn unconditionally and treat the hub as partial.
|
|
3618
|
+
let hubPartial = result.unresolvedLabels.length > 0;
|
|
3619
|
+
if (hubPartial) {
|
|
3620
|
+
logger.logger.warn(`@${repoName}: dropped ${result.unresolvedLabels.length} unresolved dependency edge(s): ${result.unresolvedLabels.join(', ')}`);
|
|
3621
|
+
}
|
|
3622
|
+
// A non-zero cquery exit that still yielded a usable subset
|
|
3623
|
+
// (--keep_going) is reported as `partial` even with no unresolved
|
|
3624
|
+
// labels — the graph is known-incomplete, so flip the hub partial.
|
|
3625
|
+
if (result.status === 'partial' && !result.unresolvedLabels.length) {
|
|
3626
|
+
hubPartial = true;
|
|
3627
|
+
logger.logger.warn(`@${repoName}: cquery partially failed (--keep_going); the dependency graph may be incomplete`);
|
|
3628
|
+
}
|
|
3629
|
+
let written;
|
|
3630
|
+
try {
|
|
3631
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3632
|
+
written = await writeHubManifest({
|
|
3633
|
+
artifacts: result.artifacts,
|
|
3634
|
+
cwd,
|
|
3635
|
+
manifestDir,
|
|
3636
|
+
relPath,
|
|
3637
|
+
repoName,
|
|
3638
|
+
verbose
|
|
3639
|
+
});
|
|
3640
|
+
} catch (e) {
|
|
3641
|
+
// Best-effort per hub: a write failure must not abort the walk and
|
|
3642
|
+
// discard the manifests other hubs already produced.
|
|
3643
|
+
logger.logger.warn(`@${repoName}: failed to write manifest (${utils.getErrorCause(e)}); skipping this hub`);
|
|
3644
|
+
hubsFailed += 1;
|
|
3645
|
+
continue;
|
|
3646
|
+
}
|
|
3647
|
+
if (written.droppedArtifacts.length) {
|
|
3648
|
+
hubPartial = true;
|
|
3649
|
+
logger.logger.warn(`@${repoName}: dropped ${written.droppedArtifacts.length} malformed Maven coordinate(s): ${written.droppedArtifacts.join(', ')}`);
|
|
3650
|
+
}
|
|
3651
|
+
if (written.prunedEdges.length) {
|
|
3652
|
+
hubPartial = true;
|
|
3653
|
+
logger.logger.warn(`@${repoName}: pruned ${written.prunedEdges.length} dependency edge(s) referencing unlisted artifacts: ${written.prunedEdges.join(', ')}`);
|
|
3654
|
+
}
|
|
3655
|
+
if (written.manifestPath) {
|
|
3656
|
+
manifestPaths.push(written.manifestPath);
|
|
3657
|
+
totalArtifacts += written.artifactCount;
|
|
3658
|
+
if (hubPartial) {
|
|
3659
|
+
hubsFailed += 1;
|
|
3660
|
+
} else {
|
|
3661
|
+
hubsSucceeded += 1;
|
|
3662
|
+
}
|
|
3663
|
+
if (verbose) {
|
|
3664
|
+
logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status}, ${written.artifactCount} artifact(s) -> ${written.manifestPath}`);
|
|
3665
|
+
}
|
|
3666
|
+
} else {
|
|
3667
|
+
// No artifacts to write (empty hub). Not itself a failure, but if
|
|
3668
|
+
// edges were dropped the partial signal still applies.
|
|
3669
|
+
if (hubPartial) {
|
|
3670
|
+
hubsFailed += 1;
|
|
3671
|
+
}
|
|
3672
|
+
if (verbose) {
|
|
3673
|
+
logger.logger.log(`[VERBOSE] @${repoName}: status=${result.status} (no manifest written)`);
|
|
3674
|
+
}
|
|
3675
|
+
}
|
|
3676
|
+
}
|
|
3176
3677
|
}
|
|
3177
|
-
if (!
|
|
3178
|
-
if (!
|
|
3678
|
+
if (!manifestPaths.length) {
|
|
3679
|
+
if (!anyRepos) {
|
|
3179
3680
|
if (verbose) {
|
|
3180
3681
|
logger.logger.info('No Maven artifacts extracted. failureCategory=no-supported-ecosystem');
|
|
3181
3682
|
}
|
|
3182
3683
|
return {
|
|
3183
3684
|
artifactCount: 0,
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
ok: false
|
|
3685
|
+
manifestPaths: [],
|
|
3686
|
+
status: 'noEcosystem'
|
|
3187
3687
|
};
|
|
3188
3688
|
}
|
|
3189
|
-
logger.logger.fail(
|
|
3689
|
+
logger.logger.fail('Discovered Maven repo(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty');
|
|
3190
3690
|
return {
|
|
3191
3691
|
artifactCount: 0,
|
|
3192
|
-
|
|
3193
|
-
|
|
3692
|
+
manifestPaths: [],
|
|
3693
|
+
status: 'hardFailure'
|
|
3194
3694
|
};
|
|
3195
3695
|
}
|
|
3196
|
-
|
|
3696
|
+
const status = hubsFailed ? 'partial' : 'complete';
|
|
3697
|
+
if (status === 'complete') {
|
|
3698
|
+
logger.logger.success(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total.`);
|
|
3699
|
+
} else {
|
|
3700
|
+
logger.logger.warn(`Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete.`);
|
|
3701
|
+
}
|
|
3702
|
+
if (verbose) {
|
|
3703
|
+
logger.logger.log('[VERBOSE] outputs:', {
|
|
3704
|
+
artifactCount: totalArtifacts,
|
|
3705
|
+
hubsFailed,
|
|
3706
|
+
hubsSucceeded,
|
|
3707
|
+
layout,
|
|
3708
|
+
manifestPaths,
|
|
3709
|
+
status
|
|
3710
|
+
});
|
|
3711
|
+
}
|
|
3197
3712
|
return {
|
|
3198
|
-
artifactCount:
|
|
3199
|
-
|
|
3200
|
-
|
|
3713
|
+
artifactCount: totalArtifacts,
|
|
3714
|
+
manifestPaths,
|
|
3715
|
+
status
|
|
3201
3716
|
};
|
|
3202
3717
|
} catch (e) {
|
|
3203
|
-
// Always surface the error message; users should not have to
|
|
3204
|
-
// re-run a multi-minute bazel build with --verbose just to see whether
|
|
3205
|
-
// the failure was a missing dependency, permission error, or network blip.
|
|
3206
3718
|
logger.logger.fail(`Unexpected error in bazel2maven: ${utils.getErrorCause(e)}`);
|
|
3207
3719
|
if (verbose) {
|
|
3208
3720
|
logger.logger.group('[VERBOSE] error:');
|
|
@@ -3213,8 +3725,16 @@ async function extractBazelToMaven(opts) {
|
|
|
3213
3725
|
}
|
|
3214
3726
|
return {
|
|
3215
3727
|
artifactCount: 0,
|
|
3216
|
-
|
|
3728
|
+
manifestPaths: [],
|
|
3729
|
+
status: 'hardFailure'
|
|
3217
3730
|
};
|
|
3731
|
+
} finally {
|
|
3732
|
+
for (const dir of mintedRoots) {
|
|
3733
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3734
|
+
await reapBazelServer(bin, dir, verbose);
|
|
3735
|
+
// eslint-disable-next-line no-await-in-loop
|
|
3736
|
+
await removeTempdir(dir, verbose);
|
|
3737
|
+
}
|
|
3218
3738
|
}
|
|
3219
3739
|
}
|
|
3220
3740
|
|
|
@@ -4084,12 +4604,20 @@ async function generateAutoManifest({
|
|
|
4084
4604
|
outLayout: 'flat',
|
|
4085
4605
|
verbose: Boolean(bazelConfig?.verbose) || verbose
|
|
4086
4606
|
});
|
|
4087
|
-
|
|
4607
|
+
|
|
4608
|
+
// Only a hard failure (zero manifests, ecosystem present) aborts the
|
|
4609
|
+
// wider scan. A partial run still produced manifests worth uploading; an
|
|
4610
|
+
// absent ecosystem is tolerated here (it's only an error when EVERY
|
|
4611
|
+
// ecosystem is absent, which the caller decides).
|
|
4612
|
+
if (mavenResult.status === 'hardFailure') {
|
|
4088
4613
|
throw new Error('Bazel auto-manifest generation failed for ecosystem(s): maven');
|
|
4089
4614
|
}
|
|
4090
|
-
if (mavenResult.
|
|
4091
|
-
generatedFiles.push(mavenResult.
|
|
4092
|
-
|
|
4615
|
+
if (mavenResult.status === 'complete' || mavenResult.status === 'partial') {
|
|
4616
|
+
generatedFiles.push(...mavenResult.manifestPaths);
|
|
4617
|
+
if (mavenResult.status === 'partial') {
|
|
4618
|
+
logger.logger.warn(`Bazel Maven manifest generation was partial (${mavenResult.manifestPaths.length} manifest(s) written); some hubs failed or had incomplete dependency graphs. Uploading what was generated.`);
|
|
4619
|
+
}
|
|
4620
|
+
} else {
|
|
4093
4621
|
logger.logger.info('No supported Bazel Maven ecosystem detected.');
|
|
4094
4622
|
}
|
|
4095
4623
|
}
|
|
@@ -4307,6 +4835,13 @@ async function handleCreateNewScan({
|
|
|
4307
4835
|
const scanId = fullScanCResult.ok ? fullScanCResult.data?.id : undefined;
|
|
4308
4836
|
if (reach && scanId && tier1ReachabilityScanId) {
|
|
4309
4837
|
await finalizeTier1Scan(tier1ReachabilityScanId, scanId);
|
|
4838
|
+
} else if (reach.runReachabilityAnalysis && scanId && !tier1ReachabilityScanId) {
|
|
4839
|
+
// Reachability analysis ran and a scan was created, but no tier 1
|
|
4840
|
+
// reachability scan id was extracted from the facts file. Surface this
|
|
4841
|
+
// instead of silently skipping finalize — otherwise the tier 1 row stays
|
|
4842
|
+
// stuck (e.g. at COANA_DONE) and the full scan is never linked to its
|
|
4843
|
+
// reachability report.
|
|
4844
|
+
logger.logger.warn('Reachability analysis ran but no tier 1 reachability scan ID was found; skipping tier 1 finalize. The scan was created but its reachability report was not linked.');
|
|
4310
4845
|
}
|
|
4311
4846
|
|
|
4312
4847
|
// On a successful scan, clean up the `.socket.facts.json` coana wrote at
|
|
@@ -7734,6 +8269,85 @@ async function run$G(argv, importMeta, context) {
|
|
|
7734
8269
|
await spawnPromise;
|
|
7735
8270
|
}
|
|
7736
8271
|
|
|
8272
|
+
// Result shape returned by `validatePypiHub`. Kept local to the PyPI module
|
|
8273
|
+
// since validation here is hub-alias-marker based (different from the
|
|
8274
|
+
// Maven-side tri-state classifier).
|
|
8275
|
+
|
|
8276
|
+
// PyPI-only repo-name predicate (Bazel apparent-name grammar).
|
|
8277
|
+
const PYPI_REPO_NAME_PATTERN = '[A-Za-z0-9._+-]{1,129}';
|
|
8278
|
+
const PYPI_REPO_NAME_RE = new RegExp(`^${PYPI_REPO_NAME_PATTERN}$`);
|
|
8279
|
+
function pypiApparentNameFromJsonValue(value) {
|
|
8280
|
+
if (!value || typeof value !== 'object') {
|
|
8281
|
+
return undefined;
|
|
8282
|
+
}
|
|
8283
|
+
const obj = value;
|
|
8284
|
+
const direct = obj['apparentName'] ?? obj['apparent_name'];
|
|
8285
|
+
if (typeof direct === 'string') {
|
|
8286
|
+
return direct;
|
|
8287
|
+
}
|
|
8288
|
+
for (const nested of Object.values(obj)) {
|
|
8289
|
+
const found = pypiApparentNameFromJsonValue(nested);
|
|
8290
|
+
if (found) {
|
|
8291
|
+
return found;
|
|
8292
|
+
}
|
|
8293
|
+
}
|
|
8294
|
+
return undefined;
|
|
8295
|
+
}
|
|
8296
|
+
function pypiApparentNamesFromRepoMapping(value) {
|
|
8297
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
8298
|
+
return [];
|
|
8299
|
+
}
|
|
8300
|
+
const candidates = [];
|
|
8301
|
+
for (const [name, canonicalName] of Object.entries(value)) {
|
|
8302
|
+
if (name.startsWith('@') || typeof canonicalName !== 'string') {
|
|
8303
|
+
continue;
|
|
8304
|
+
}
|
|
8305
|
+
if (PYPI_REPO_NAME_RE.test(name)) {
|
|
8306
|
+
candidates.push(name);
|
|
8307
|
+
}
|
|
8308
|
+
}
|
|
8309
|
+
return candidates;
|
|
8310
|
+
}
|
|
8311
|
+
function pypiNormalizeRepoName(name) {
|
|
8312
|
+
const repo = name.startsWith('@') ? name.slice(1) : name;
|
|
8313
|
+
return PYPI_REPO_NAME_RE.test(repo) ? repo : undefined;
|
|
8314
|
+
}
|
|
8315
|
+
|
|
8316
|
+
// Parse `bazel mod dump_repo_mapping "" --output=json` output. Also accepts
|
|
8317
|
+
// the older streamed jsonproto shape (apparentName / apparent_name records).
|
|
8318
|
+
// PyPI-only; the Maven path consumes `bazel mod show_extension` instead.
|
|
8319
|
+
function parseVisibleRepoCandidates(output) {
|
|
8320
|
+
const seen = new Set();
|
|
8321
|
+
const candidates = [];
|
|
8322
|
+
for (const line of output.split(/\r?\n/)) {
|
|
8323
|
+
const trimmed = line.trim();
|
|
8324
|
+
if (!trimmed) {
|
|
8325
|
+
continue;
|
|
8326
|
+
}
|
|
8327
|
+
try {
|
|
8328
|
+
const parsed = JSON.parse(trimmed);
|
|
8329
|
+
for (const c of pypiApparentNamesFromRepoMapping(parsed)) {
|
|
8330
|
+
if (!seen.has(c)) {
|
|
8331
|
+
seen.add(c);
|
|
8332
|
+
candidates.push(c);
|
|
8333
|
+
}
|
|
8334
|
+
}
|
|
8335
|
+
const apparentName = pypiApparentNameFromJsonValue(parsed);
|
|
8336
|
+
if (apparentName) {
|
|
8337
|
+
const repo = pypiNormalizeRepoName(apparentName);
|
|
8338
|
+
if (repo && !seen.has(repo)) {
|
|
8339
|
+
seen.add(repo);
|
|
8340
|
+
candidates.push(repo);
|
|
8341
|
+
}
|
|
8342
|
+
}
|
|
8343
|
+
} catch {
|
|
8344
|
+
// Skip malformed lines; caller falls back to static discovery when no
|
|
8345
|
+
// usable visible repo names are found.
|
|
8346
|
+
}
|
|
8347
|
+
}
|
|
8348
|
+
return candidates.sort();
|
|
8349
|
+
}
|
|
8350
|
+
|
|
7737
8351
|
// Maximum size (bytes) we will read for any single Bazel workspace file.
|
|
7738
8352
|
// Prevents DoS via maliciously large MODULE.bazel / WORKSPACE / .bzl files.
|
|
7739
8353
|
const MAX_WORKSPACE_FILE_BYTES = 5 * 1024 * 1024;
|
|
@@ -8676,6 +9290,13 @@ const config$e = {
|
|
|
8676
9290
|
Note: this command generates dependency manifests for Bazel workspaces.
|
|
8677
9291
|
It does not run reachability analysis.
|
|
8678
9292
|
|
|
9293
|
+
Maven hub discovery: under Bzlmod, hubs are enumerated from
|
|
9294
|
+
\`bazel mod show_extension\` and filtered to the root module's own hubs.
|
|
9295
|
+
Under legacy WORKSPACE mode (no \`show_extension\`), only conventionally
|
|
9296
|
+
named hubs are probed (\`maven\`, \`maven_install\`, \`maven_dev\`, …). A hub
|
|
9297
|
+
with a non-conventional name that \`show_extension\` does not enumerate is
|
|
9298
|
+
not discovered yet; a flag to name extra hubs is planned.
|
|
9299
|
+
|
|
8679
9300
|
To generate AND upload in one step, use \`socket scan create --auto-manifest\`
|
|
8680
9301
|
instead — it detects Bazel workspaces, generates Maven manifests by
|
|
8681
9302
|
default, and uploads the result. This subcommand is for generation only.
|
|
@@ -8697,21 +9318,29 @@ const cmdManifestBazel = {
|
|
|
8697
9318
|
// failures that must propagate to a non-zero CLI exit; returns void on
|
|
8698
9319
|
// success.
|
|
8699
9320
|
//
|
|
8700
|
-
// -
|
|
8701
|
-
//
|
|
8702
|
-
//
|
|
8703
|
-
// -
|
|
8704
|
-
//
|
|
8705
|
-
// succeeded
|
|
9321
|
+
// - `complete`/`partial` both count as produced output (>=1 manifest).
|
|
9322
|
+
// `partial` additionally warns — a known-incomplete SBOM is still emitted,
|
|
9323
|
+
// not a hard error.
|
|
9324
|
+
// - `hardFailure`: the ecosystem was detected (or the runner crashed) but
|
|
9325
|
+
// wrote zero manifests. Always a non-zero exit, even when another
|
|
9326
|
+
// ecosystem succeeded.
|
|
9327
|
+
// - `noEcosystem`: genuinely absent ecosystem. Auto-detect mode tolerates it
|
|
9328
|
+
// when at least one other ecosystem produced output; explicit mode treats
|
|
9329
|
+
// it as an error (the user requested an ecosystem that isn't there).
|
|
8706
9330
|
function evaluateEcosystemOutcomes(outcomes, isExplicit) {
|
|
8707
|
-
const
|
|
8708
|
-
const
|
|
8709
|
-
const
|
|
9331
|
+
const produced = outcomes.filter(o => (o.status === 'complete' || o.status === 'partial') && o.manifestPaths.length > 0);
|
|
9332
|
+
const hardFailures = outcomes.filter(o => o.status === 'hardFailure');
|
|
9333
|
+
const noDiscoveries = outcomes.filter(o => o.status === 'noEcosystem');
|
|
9334
|
+
for (const partial of outcomes) {
|
|
9335
|
+
if (partial.status === 'partial') {
|
|
9336
|
+
logger.logger.warn(`Bazel ${partial.ecosystem} manifest generation was partial; the uploaded SBOM is known-incomplete.`);
|
|
9337
|
+
}
|
|
9338
|
+
}
|
|
8710
9339
|
if (!isExplicit) {
|
|
8711
9340
|
if (hardFailures.length) {
|
|
8712
9341
|
throw new utils.InputError(`Bazel auto-manifest generation hit hard failure(s) in ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
|
|
8713
9342
|
}
|
|
8714
|
-
if (
|
|
9343
|
+
if (produced.length) {
|
|
8715
9344
|
return;
|
|
8716
9345
|
}
|
|
8717
9346
|
if (noDiscoveries.length === outcomes.length) {
|
|
@@ -8720,7 +9349,8 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
|
|
|
8720
9349
|
return;
|
|
8721
9350
|
}
|
|
8722
9351
|
|
|
8723
|
-
// Explicit mode: every requested ecosystem must
|
|
9352
|
+
// Explicit mode: every requested ecosystem must produce output. A partial
|
|
9353
|
+
// run counts (it wrote manifests); absent or hard-failed ecosystems error.
|
|
8724
9354
|
if (noDiscoveries.length) {
|
|
8725
9355
|
throw new utils.InputError(`No Bazel rules found for explicitly requested ecosystem(s): ${noDiscoveries.map(f => f.ecosystem).join(', ')}.`);
|
|
8726
9356
|
}
|
|
@@ -8728,6 +9358,32 @@ function evaluateEcosystemOutcomes(outcomes, isExplicit) {
|
|
|
8728
9358
|
throw new utils.InputError(`Bazel manifest generation failed for explicitly requested ecosystem(s): ${hardFailures.map(f => f.ecosystem).join(', ')}.`);
|
|
8729
9359
|
}
|
|
8730
9360
|
}
|
|
9361
|
+
|
|
9362
|
+
// Map the legacy PyPI result shape (single manifestPath + ok/noEcosystem
|
|
9363
|
+
// booleans) into the shared status vocabulary so both ecosystems flow through
|
|
9364
|
+
// one success gate. PyPI has no partial state. Only a `complete` outcome
|
|
9365
|
+
// carries a manifest path; `noEcosystem`/`hardFailure` carry none, preserving
|
|
9366
|
+
// the invariant that a non-success outcome produced no usable output (a
|
|
9367
|
+
// detected-but-empty PyPI run writes a stub file but is still a hard failure,
|
|
9368
|
+
// and that stub must not be surfaced as produced output).
|
|
9369
|
+
function pypiOutcome(result) {
|
|
9370
|
+
if (result.noEcosystemFound) {
|
|
9371
|
+
return {
|
|
9372
|
+
manifestPaths: [],
|
|
9373
|
+
status: 'noEcosystem'
|
|
9374
|
+
};
|
|
9375
|
+
}
|
|
9376
|
+
if (result.ok && result.manifestPath) {
|
|
9377
|
+
return {
|
|
9378
|
+
manifestPaths: [result.manifestPath],
|
|
9379
|
+
status: 'complete'
|
|
9380
|
+
};
|
|
9381
|
+
}
|
|
9382
|
+
return {
|
|
9383
|
+
manifestPaths: [],
|
|
9384
|
+
status: 'hardFailure'
|
|
9385
|
+
};
|
|
9386
|
+
}
|
|
8731
9387
|
async function run$F(argv, importMeta, {
|
|
8732
9388
|
parentName
|
|
8733
9389
|
}) {
|
|
@@ -8861,9 +9517,8 @@ async function run$F(argv, importMeta, {
|
|
|
8861
9517
|
});
|
|
8862
9518
|
outcomes.push({
|
|
8863
9519
|
ecosystem: 'maven',
|
|
8864
|
-
|
|
8865
|
-
|
|
8866
|
-
manifestPath: mavenResult.manifestPath
|
|
9520
|
+
manifestPaths: mavenResult.manifestPaths,
|
|
9521
|
+
status: mavenResult.status
|
|
8867
9522
|
});
|
|
8868
9523
|
} else if (eco === 'pypi') {
|
|
8869
9524
|
// eslint-disable-next-line no-await-in-loop
|
|
@@ -8879,9 +9534,7 @@ async function run$F(argv, importMeta, {
|
|
|
8879
9534
|
});
|
|
8880
9535
|
outcomes.push({
|
|
8881
9536
|
ecosystem: 'pypi',
|
|
8882
|
-
|
|
8883
|
-
noEcosystemFound: pypiResult.noEcosystemFound,
|
|
8884
|
-
manifestPath: pypiResult.manifestPath
|
|
9537
|
+
...pypiOutcome(pypiResult)
|
|
8885
9538
|
});
|
|
8886
9539
|
}
|
|
8887
9540
|
}
|
|
@@ -14446,7 +15099,7 @@ const reachabilityFlags = {
|
|
|
14446
15099
|
reachConcurrency: {
|
|
14447
15100
|
type: 'number',
|
|
14448
15101
|
default: 1,
|
|
14449
|
-
description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available.
|
|
15102
|
+
description: 'Set the maximum number of concurrent reachability analysis runs. It is recommended to choose a concurrency level that ensures each analysis run has at least the --reach-analysis-memory-limit amount of memory available.'
|
|
14450
15103
|
},
|
|
14451
15104
|
reachContinueOnAnalysisErrors: {
|
|
14452
15105
|
type: 'boolean',
|
|
@@ -16828,6 +17481,7 @@ async function run$8(argv, importMeta, {
|
|
|
16828
17481
|
}
|
|
16829
17482
|
|
|
16830
17483
|
async function outputScanReach(result, {
|
|
17484
|
+
cwd,
|
|
16831
17485
|
outputKind,
|
|
16832
17486
|
outputPath
|
|
16833
17487
|
}) {
|
|
@@ -16848,7 +17502,11 @@ async function outputScanReach(result, {
|
|
|
16848
17502
|
logger.logger.info(`Reachability report has been written to: ${actualOutputPath}`);
|
|
16849
17503
|
|
|
16850
17504
|
// Warn about individual vulnerabilities where reachability analysis errored.
|
|
16851
|
-
|
|
17505
|
+
// Resolve the report path against the scan `cwd` (not `process.cwd()`):
|
|
17506
|
+
// Coana writes the facts file relative to `cwd` and `reachabilityReport`
|
|
17507
|
+
// is a `cwd`-relative path, so reading the bare relative path would miss
|
|
17508
|
+
// the file whenever `cwd !== process.cwd()` (e.g. `--cwd <dir>`).
|
|
17509
|
+
const errors = utils.extractReachabilityErrors(path.resolve(cwd, result.data.reachabilityReport));
|
|
16852
17510
|
if (errors.length) {
|
|
16853
17511
|
logger.logger.log('');
|
|
16854
17512
|
logger.logger.warn(`Reachability analysis returned ${errors.length} ${words.pluralize('error', errors.length)} for individual ${words.pluralize('vulnerability', errors.length)}:`);
|
|
@@ -16877,6 +17535,7 @@ async function handleScanReach({
|
|
|
16877
17535
|
});
|
|
16878
17536
|
if (!supportedFilesCResult.ok) {
|
|
16879
17537
|
await outputScanReach(supportedFilesCResult, {
|
|
17538
|
+
cwd,
|
|
16880
17539
|
outputKind,
|
|
16881
17540
|
outputPath
|
|
16882
17541
|
});
|
|
@@ -16924,7 +17583,22 @@ async function handleScanReach({
|
|
|
16924
17583
|
uploadManifests: true
|
|
16925
17584
|
});
|
|
16926
17585
|
spinner.stop();
|
|
17586
|
+
|
|
17587
|
+
// Standalone reachability has no full scan to bind to, but the tier1
|
|
17588
|
+
// reachability scan row still needs to transition to its DONE terminal
|
|
17589
|
+
// state — otherwise it sits at the post-Coana intermediate state forever
|
|
17590
|
+
// and looks indistinguishable from a stuck run. Pass `null` as the full
|
|
17591
|
+
// scan id; the endpoint accepts it for this flow. Best-effort: never
|
|
17592
|
+
// block the user-visible output on this.
|
|
17593
|
+
const tier1Id = result.ok ? result.data?.tier1ReachabilityScanId : undefined;
|
|
17594
|
+
if (tier1Id) {
|
|
17595
|
+
const finalizeResult = await finalizeTier1Scan(tier1Id, null);
|
|
17596
|
+
if (!finalizeResult.ok) {
|
|
17597
|
+
logger.logger.warn(`Failed to finalize tier1 reachability scan: ${finalizeResult.message}${finalizeResult.cause ? ` — ${finalizeResult.cause}` : ''}`);
|
|
17598
|
+
}
|
|
17599
|
+
}
|
|
16927
17600
|
await outputScanReach(result, {
|
|
17601
|
+
cwd,
|
|
16928
17602
|
outputKind,
|
|
16929
17603
|
outputPath
|
|
16930
17604
|
});
|
|
@@ -19067,5 +19741,5 @@ process.on('unhandledRejection', async (reason, promise) => {
|
|
|
19067
19741
|
// eslint-disable-next-line n/no-process-exit
|
|
19068
19742
|
process.exit(1);
|
|
19069
19743
|
});
|
|
19070
|
-
//# debugId=
|
|
19744
|
+
//# debugId=614e598d-c01b-4289-b35e-bff2af2ac507
|
|
19071
19745
|
//# sourceMappingURL=cli.js.map
|