pkgxray 0.7.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/audit.js +3 -0
- package/bin/mcp-server.js +5 -0
- package/package.json +1 -1
- package/src/auditor.js +53 -2
- package/src/diff.js +298 -0
- package/src/github.js +135 -2
- package/src/quarantine.js +103 -3
package/bin/audit.js
CHANGED
|
@@ -51,6 +51,9 @@ function parseArgs(argv) {
|
|
|
51
51
|
options.vulnerabilityCheck = false;
|
|
52
52
|
} else if (arg === "--no-github") {
|
|
53
53
|
options.githubMetadata = false;
|
|
54
|
+
options.githubDiff = false;
|
|
55
|
+
} else if (arg === "--no-github-diff") {
|
|
56
|
+
options.githubDiff = false;
|
|
54
57
|
} else {
|
|
55
58
|
throw new Error(`Unknown argument: ${arg}`);
|
|
56
59
|
}
|
package/bin/mcp-server.js
CHANGED
|
@@ -107,6 +107,11 @@ function guardToolDefinition() {
|
|
|
107
107
|
default: true,
|
|
108
108
|
description: "Set false to skip the GitHub provenance cross-check."
|
|
109
109
|
},
|
|
110
|
+
githubDiff: {
|
|
111
|
+
type: "boolean",
|
|
112
|
+
default: true,
|
|
113
|
+
description: "Set false to skip the npm-vs-GitHub source diff (saves a tarball download)."
|
|
114
|
+
},
|
|
110
115
|
outputFormat: {
|
|
111
116
|
type: "string",
|
|
112
117
|
enum: ["markdown", "json"],
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pkgxray",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Zero-dep local CLI and MCP server that scans npm packages for supply-chain risk. OSV vuln pre-check, sandboxed quarantine, tarball-integrity verification, calibrated static heuristics, GitHub provenance cross-check.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Jack Adams-Lovell",
|
package/src/auditor.js
CHANGED
|
@@ -157,7 +157,8 @@ function normalizeEvidence(input) {
|
|
|
157
157
|
evidence.knownVulnerabilities || evidence.vulnerabilities || evidence.osvVulnerabilities || [],
|
|
158
158
|
sourceFiles: normalizeSourceFiles(
|
|
159
159
|
evidence.sourceFiles || evidence.SOURCE_FILES || evidence.files || {}
|
|
160
|
-
)
|
|
160
|
+
),
|
|
161
|
+
npmVsGithubDiff: evidence.npmVsGithubDiff || null
|
|
161
162
|
};
|
|
162
163
|
}
|
|
163
164
|
|
|
@@ -243,7 +244,9 @@ const BAND_DEFINITIONS = [
|
|
|
243
244
|
{ band: "github-archived", label: "github-archived", categories: ["github-archived"], rationale: "Linked repository is archived or disabled — no maintenance, security issues will not be fixed." },
|
|
244
245
|
{ band: "github-young", label: "github-young", categories: ["github-young"], rationale: "Linked repository was created within the last 30 days — common slopsquat shape." },
|
|
245
246
|
{ band: "github-lonely", label: "github-lonely", categories: ["github-lonely"], rationale: "0 stars + 0 forks + low watcher count on a young repo. Low community signal." },
|
|
246
|
-
{ band: "github-stale", label: "github-stale", categories: ["github-stale"], rationale: "Repository hasn't been pushed to in over two years and isn't formally archived." }
|
|
247
|
+
{ band: "github-stale", label: "github-stale", categories: ["github-stale"], rationale: "Repository hasn't been pushed to in over two years and isn't formally archived." },
|
|
248
|
+
{ band: "npm-vs-github-divergence", label: "npm-vs-github-divergence", categories: ["npm-vs-github-divergence"], rationale: "Published npm tarball contains source files that aren't in (or differ from) the linked GitHub repo at the matching ref. Strong account-takeover / build-tampering signal." },
|
|
249
|
+
{ band: "npm-vs-github-clean", label: "npm-vs-github-clean", categories: ["npm-vs-github-clean"], rationale: "npm tarball matches the linked GitHub repo at the published version." }
|
|
247
250
|
];
|
|
248
251
|
|
|
249
252
|
const SEVERITY_RANK = { info: 0, low: 1, medium: 2, high: 3 };
|
|
@@ -291,6 +294,54 @@ function auditMetadata(evidence, findings) {
|
|
|
291
294
|
inspectMetadataObject("NPM_METADATA", evidence.npmMetadata, findings);
|
|
292
295
|
inspectGithubMetadata(evidence, findings);
|
|
293
296
|
inspectKnownVulnerabilities(evidence.knownVulnerabilities, findings);
|
|
297
|
+
inspectNpmVsGithubDiff(evidence, findings);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function inspectNpmVsGithubDiff(evidence, findings) {
|
|
301
|
+
const diff = evidence.npmVsGithubDiff;
|
|
302
|
+
if (!diff || !diff.compared) {
|
|
303
|
+
// Not gating — silent skip. Common reasons: no github repo,
|
|
304
|
+
// ref not found, github fetch failed.
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
const c = diff.counts || {};
|
|
308
|
+
if (c.extraSource > 0) {
|
|
309
|
+
const examples = (diff.suspiciousExtras || [])
|
|
310
|
+
.filter((f) => f.category === "extra-source")
|
|
311
|
+
.slice(0, 5)
|
|
312
|
+
.map((f) => f.path);
|
|
313
|
+
findings.push({
|
|
314
|
+
severity: "high",
|
|
315
|
+
category: "npm-vs-github-divergence",
|
|
316
|
+
file: "NPM_VS_GITHUB",
|
|
317
|
+
snippet: `npm tarball contains ${c.extraSource} source file(s) not in the linked GitHub repo @${diff.githubRef}: ${examples.join(", ")}`,
|
|
318
|
+
rationale:
|
|
319
|
+
"Source files present in the published tarball but absent from the matching GitHub ref. Classic account-takeover / build-server-compromise signal."
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
if (c.mismatchedSource > 0) {
|
|
323
|
+
const examples = (diff.suspiciousMismatches || [])
|
|
324
|
+
.filter((f) => f.category === "content-mismatch-source")
|
|
325
|
+
.slice(0, 5)
|
|
326
|
+
.map((f) => f.path);
|
|
327
|
+
findings.push({
|
|
328
|
+
severity: "high",
|
|
329
|
+
category: "npm-vs-github-divergence",
|
|
330
|
+
file: "NPM_VS_GITHUB",
|
|
331
|
+
snippet: `${c.mismatchedSource} source file(s) differ between npm tarball and GitHub repo @${diff.githubRef}: ${examples.join(", ")}`,
|
|
332
|
+
rationale:
|
|
333
|
+
"Source files with the same path but different SHA256 in the published tarball vs the linked GitHub repo at the matching ref. Strong tampering signal."
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
if (c.extraSource === 0 && c.mismatchedSource === 0 && (c.matched > 0 || c.npmFiles > 0)) {
|
|
337
|
+
findings.push({
|
|
338
|
+
severity: "info",
|
|
339
|
+
category: "npm-vs-github-clean",
|
|
340
|
+
file: "NPM_VS_GITHUB",
|
|
341
|
+
snippet: `${c.matched}/${c.npmFiles} files match GitHub @${diff.githubRef}`,
|
|
342
|
+
rationale: "npm tarball source files match the linked GitHub repo at the matching ref."
|
|
343
|
+
});
|
|
344
|
+
}
|
|
294
345
|
}
|
|
295
346
|
|
|
296
347
|
const YOUNG_REPO_DAYS = 30;
|
package/src/diff.js
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const fs = require("node:fs");
|
|
4
|
+
const fsp = require("node:fs/promises");
|
|
5
|
+
const path = require("node:path");
|
|
6
|
+
const crypto = require("node:crypto");
|
|
7
|
+
|
|
8
|
+
const SKIP_DIRS = new Set([
|
|
9
|
+
".git",
|
|
10
|
+
"node_modules",
|
|
11
|
+
".github",
|
|
12
|
+
".vscode",
|
|
13
|
+
".idea",
|
|
14
|
+
"coverage",
|
|
15
|
+
"__pycache__"
|
|
16
|
+
]);
|
|
17
|
+
|
|
18
|
+
// File patterns that are expected to differ — never used to drive findings.
|
|
19
|
+
const ALWAYS_IGNORE = [
|
|
20
|
+
/(?:^|\/)package\.json$/,
|
|
21
|
+
/(?:^|\/)package-lock\.json$/,
|
|
22
|
+
/(?:^|\/)yarn\.lock$/,
|
|
23
|
+
/(?:^|\/)pnpm-lock\.yaml$/,
|
|
24
|
+
/(?:^|\/)\.npmignore$/,
|
|
25
|
+
/(?:^|\/)\.gitignore$/,
|
|
26
|
+
/(?:^|\/)\.gitattributes$/,
|
|
27
|
+
/(?:^|\/)CHANGELOG(?:\.md)?$/i,
|
|
28
|
+
/(?:^|\/)CONTRIBUTING(?:\.md)?$/i,
|
|
29
|
+
/(?:^|\/)\.npmrc$/
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
// Patterns that mean "this is build output" — only flagged if no build script
|
|
33
|
+
// exists. With a prepare/prepack script, extras here are expected.
|
|
34
|
+
const BUILD_OUTPUT_PATTERNS = [
|
|
35
|
+
/(?:^|\/)dist\//,
|
|
36
|
+
/(?:^|\/)build\//,
|
|
37
|
+
/(?:^|\/)lib\//,
|
|
38
|
+
/(?:^|\/)es\//,
|
|
39
|
+
/(?:^|\/)esm\//,
|
|
40
|
+
/(?:^|\/)cjs\//,
|
|
41
|
+
/(?:^|\/)umd\//,
|
|
42
|
+
/\.min\.js$/,
|
|
43
|
+
/\.min\.mjs$/,
|
|
44
|
+
/\.min\.css$/,
|
|
45
|
+
/\.d\.ts$/,
|
|
46
|
+
/\.d\.cts$/,
|
|
47
|
+
/\.d\.mts$/,
|
|
48
|
+
/\.js\.map$/,
|
|
49
|
+
/\.css\.map$/
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
// Source extensions whose contents we care about most. Mismatches or extras
|
|
53
|
+
// here are the strongest ATO signal.
|
|
54
|
+
const SOURCE_EXTENSIONS = new Set([
|
|
55
|
+
".js", ".cjs", ".mjs", ".jsx",
|
|
56
|
+
".ts", ".tsx",
|
|
57
|
+
".vue", ".svelte",
|
|
58
|
+
".py", ".rb", ".go", ".rs", ".java", ".cs", ".php",
|
|
59
|
+
".sh", ".ps1", ".bash",
|
|
60
|
+
".json", ".toml", ".yaml", ".yml"
|
|
61
|
+
]);
|
|
62
|
+
|
|
63
|
+
function isAlwaysIgnored(relPath) {
|
|
64
|
+
return ALWAYS_IGNORE.some((re) => re.test(relPath));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function isBuildOutput(relPath) {
|
|
68
|
+
return BUILD_OUTPUT_PATTERNS.some((re) => re.test(relPath));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function isSourceFile(relPath) {
|
|
72
|
+
const ext = path.extname(relPath).toLowerCase();
|
|
73
|
+
return SOURCE_EXTENSIONS.has(ext);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function hashTree(root, subdir, limits) {
|
|
77
|
+
const baseDir = subdir ? path.join(root, subdir) : root;
|
|
78
|
+
try {
|
|
79
|
+
await fsp.access(baseDir);
|
|
80
|
+
} catch {
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
const result = new Map();
|
|
84
|
+
const queue = [""];
|
|
85
|
+
let totalBytes = 0;
|
|
86
|
+
let totalFiles = 0;
|
|
87
|
+
const maxFiles = limits.maxFiles || 5000;
|
|
88
|
+
const maxBytes = limits.maxBytes || 50 * 1024 * 1024;
|
|
89
|
+
const maxFileBytes = limits.maxFileBytes || 1024 * 1024;
|
|
90
|
+
|
|
91
|
+
while (queue.length > 0 && totalFiles < maxFiles && totalBytes < maxBytes) {
|
|
92
|
+
const rel = queue.shift();
|
|
93
|
+
const full = path.join(baseDir, rel);
|
|
94
|
+
let entries;
|
|
95
|
+
try {
|
|
96
|
+
entries = await fsp.readdir(full, { withFileTypes: true });
|
|
97
|
+
} catch {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
for (const entry of entries) {
|
|
101
|
+
const childRel = rel ? `${rel}/${entry.name}` : entry.name;
|
|
102
|
+
if (entry.isDirectory()) {
|
|
103
|
+
if (SKIP_DIRS.has(entry.name)) continue;
|
|
104
|
+
queue.push(childRel);
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
if (!entry.isFile()) continue;
|
|
108
|
+
const childFull = path.join(baseDir, childRel);
|
|
109
|
+
let stat;
|
|
110
|
+
try {
|
|
111
|
+
stat = await fsp.stat(childFull);
|
|
112
|
+
} catch {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
if (stat.size > maxFileBytes) {
|
|
116
|
+
result.set(childRel, { size: stat.size, sha256: "skipped:too-large" });
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
if (totalBytes + stat.size > maxBytes) {
|
|
120
|
+
// soft cap — record presence without hash
|
|
121
|
+
result.set(childRel, { size: stat.size, sha256: "skipped:tree-budget" });
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
const hash = await hashFile(childFull);
|
|
125
|
+
result.set(childRel, { size: stat.size, sha256: hash });
|
|
126
|
+
totalBytes += stat.size;
|
|
127
|
+
totalFiles += 1;
|
|
128
|
+
if (totalFiles >= maxFiles) break;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return result;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function hashFile(filePath) {
|
|
135
|
+
return new Promise((resolve, reject) => {
|
|
136
|
+
const hash = crypto.createHash("sha256");
|
|
137
|
+
fs.createReadStream(filePath)
|
|
138
|
+
.on("data", (chunk) => hash.update(chunk))
|
|
139
|
+
.on("error", reject)
|
|
140
|
+
.on("end", () => resolve(hash.digest("hex")));
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Compare a staged npm package against the matching GitHub repo subtree.
|
|
145
|
+
// Returns null if the comparison wasn't possible.
|
|
146
|
+
async function diffNpmVsGithub({ npmStagedPath, githubStagedPath, subdir, hasBuildScript }) {
|
|
147
|
+
const limits = { maxFiles: 5000, maxBytes: 50 * 1024 * 1024 };
|
|
148
|
+
const [npmTree, ghTree] = await Promise.all([
|
|
149
|
+
hashTree(npmStagedPath, "", limits),
|
|
150
|
+
hashTree(githubStagedPath, subdir || "", limits)
|
|
151
|
+
]);
|
|
152
|
+
if (!npmTree || !ghTree) {
|
|
153
|
+
return {
|
|
154
|
+
compared: false,
|
|
155
|
+
reason: !ghTree ? "github-subdir-missing" : "npm-tree-missing"
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Pre-compute the set of directories that EXIST in github. We use this to
|
|
160
|
+
// decide whether an extra file is in a "real source dir" (sibling source
|
|
161
|
+
// files exist in github) or in a path github doesn't have at all (more
|
|
162
|
+
// likely build output).
|
|
163
|
+
const ghDirs = new Set();
|
|
164
|
+
for (const ghPath of ghTree.keys()) {
|
|
165
|
+
const parts = ghPath.split("/");
|
|
166
|
+
for (let i = 1; i < parts.length; i += 1) {
|
|
167
|
+
ghDirs.add(parts.slice(0, i).join("/"));
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const extraInNpm = [];
|
|
172
|
+
const mismatched = [];
|
|
173
|
+
const matched = [];
|
|
174
|
+
|
|
175
|
+
for (const [rel, npmEntry] of npmTree.entries()) {
|
|
176
|
+
if (isAlwaysIgnored(rel)) continue;
|
|
177
|
+
const ghEntry = ghTree.get(rel);
|
|
178
|
+
if (!ghEntry) {
|
|
179
|
+
const parentDir = rel.includes("/") ? rel.split("/").slice(0, -1).join("/") : "";
|
|
180
|
+
const parentExistsInGh = parentDir === "" || ghDirs.has(parentDir);
|
|
181
|
+
// An extra file inside a directory that exists in github is the strong
|
|
182
|
+
// ATO signal — github has the dir, the attacker just dropped one more
|
|
183
|
+
// file in it. An extra file at a path github doesn't have at all is
|
|
184
|
+
// more likely build output the repo never committed.
|
|
185
|
+
const inLikelySourceDir = /^(?:src|tests?|scripts|spec)\//.test(rel);
|
|
186
|
+
let category;
|
|
187
|
+
if (parentExistsInGh && isSourceFile(rel)) {
|
|
188
|
+
category = "extra-source";
|
|
189
|
+
} else if (isBuildOutput(rel)) {
|
|
190
|
+
category = hasBuildScript ? "expected-build-output" : "extra-build-output";
|
|
191
|
+
} else if (isSourceFile(rel)) {
|
|
192
|
+
category = hasBuildScript && !inLikelySourceDir
|
|
193
|
+
? "expected-build-output"
|
|
194
|
+
: "extra-source";
|
|
195
|
+
} else {
|
|
196
|
+
category = "extra-other";
|
|
197
|
+
}
|
|
198
|
+
extraInNpm.push({ path: rel, category, size: npmEntry.size });
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
if (npmEntry.sha256 !== ghEntry.sha256) {
|
|
202
|
+
// skipped hashes don't count as a mismatch
|
|
203
|
+
if (npmEntry.sha256.startsWith("skipped") || ghEntry.sha256.startsWith("skipped")) {
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
const inLikelySourceDir = /^(?:src|tests?|scripts|spec)\//.test(rel);
|
|
207
|
+
const category = isBuildOutput(rel)
|
|
208
|
+
? hasBuildScript ? "expected-build-output" : "content-mismatch-build"
|
|
209
|
+
: isSourceFile(rel)
|
|
210
|
+
? hasBuildScript && !inLikelySourceDir
|
|
211
|
+
? "expected-build-output"
|
|
212
|
+
: "content-mismatch-source"
|
|
213
|
+
: "content-mismatch-other";
|
|
214
|
+
mismatched.push({ path: rel, category, npmSize: npmEntry.size, ghSize: ghEntry.size });
|
|
215
|
+
} else {
|
|
216
|
+
matched.push(rel);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const extraSource = extraInNpm.filter((f) => f.category === "extra-source");
|
|
221
|
+
const extraBuild = extraInNpm.filter((f) => f.category === "extra-build-output");
|
|
222
|
+
const mismatchedSource = mismatched.filter((f) => f.category === "content-mismatch-source");
|
|
223
|
+
|
|
224
|
+
// Tree-overlap sanity check. Many real packages don't publish a 1:1 mirror
|
|
225
|
+
// of their repo (lodash publishes flat per-function modules, react bundles
|
|
226
|
+
// src/ to root, monorepos publish a subtree). If the npm tarball and the
|
|
227
|
+
// github tree have very little overlap at matching paths, comparing them
|
|
228
|
+
// produces mostly noise. Bail out with an honest "no-overlap" reason rather
|
|
229
|
+
// than firing HIGH on legit packages.
|
|
230
|
+
const consideredFiles = npmTree.size; // already excludes node_modules etc.
|
|
231
|
+
const overlapRatio = consideredFiles > 0 ? (matched.length + mismatched.length) / consideredFiles : 0;
|
|
232
|
+
const MIN_OVERLAP_RATIO = 0.3;
|
|
233
|
+
|
|
234
|
+
if (overlapRatio < MIN_OVERLAP_RATIO && extraSource.length > 20) {
|
|
235
|
+
return {
|
|
236
|
+
compared: false,
|
|
237
|
+
reason: "tree-layout-differs",
|
|
238
|
+
hasBuildScript,
|
|
239
|
+
subdir: subdir || null,
|
|
240
|
+
overlapRatio: Number(overlapRatio.toFixed(2)),
|
|
241
|
+
counts: {
|
|
242
|
+
npmFiles: npmTree.size,
|
|
243
|
+
ghFiles: ghTree.size,
|
|
244
|
+
matched: matched.length,
|
|
245
|
+
mismatched: mismatched.length,
|
|
246
|
+
extraInNpm: extraInNpm.length
|
|
247
|
+
},
|
|
248
|
+
note:
|
|
249
|
+
"npm tarball and GitHub tree have very little overlap at matching paths — the package is likely built / bundled before publish. Diff would be unreliable; skipping."
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Second skip case: of the files that DO exist at matching paths, most
|
|
254
|
+
// mismatch. That means the published artifacts are generated from the repo
|
|
255
|
+
// source (typical of `prepublish` / `release-please` / `changesets` build
|
|
256
|
+
// flows that minify or transform entry files). Diff is unreliable here.
|
|
257
|
+
const overlapCount = matched.length + mismatched.length;
|
|
258
|
+
if (overlapCount >= 5 && mismatched.length > matched.length * 2) {
|
|
259
|
+
return {
|
|
260
|
+
compared: false,
|
|
261
|
+
reason: "tree-mostly-generated",
|
|
262
|
+
hasBuildScript,
|
|
263
|
+
subdir: subdir || null,
|
|
264
|
+
overlapRatio: Number(overlapRatio.toFixed(2)),
|
|
265
|
+
counts: {
|
|
266
|
+
npmFiles: npmTree.size,
|
|
267
|
+
ghFiles: ghTree.size,
|
|
268
|
+
matched: matched.length,
|
|
269
|
+
mismatched: mismatched.length,
|
|
270
|
+
extraInNpm: extraInNpm.length
|
|
271
|
+
},
|
|
272
|
+
note:
|
|
273
|
+
"Most overlapping files differ in content — published artifacts are likely generated from the repo source (e.g. bundling, transpilation). Diff would be unreliable; skipping."
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
compared: true,
|
|
279
|
+
hasBuildScript,
|
|
280
|
+
subdir: subdir || null,
|
|
281
|
+
overlapRatio: Number(overlapRatio.toFixed(2)),
|
|
282
|
+
counts: {
|
|
283
|
+
npmFiles: npmTree.size,
|
|
284
|
+
ghFiles: ghTree.size,
|
|
285
|
+
matched: matched.length,
|
|
286
|
+
mismatched: mismatched.length,
|
|
287
|
+
extraInNpm: extraInNpm.length,
|
|
288
|
+
extraSource: extraSource.length,
|
|
289
|
+
mismatchedSource: mismatchedSource.length
|
|
290
|
+
},
|
|
291
|
+
suspiciousExtras: extraSource.concat(extraBuild).slice(0, 25),
|
|
292
|
+
suspiciousMismatches: mismatchedSource.concat(
|
|
293
|
+
mismatched.filter((f) => f.category === "content-mismatch-build")
|
|
294
|
+
).slice(0, 25)
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
module.exports = { diffNpmVsGithub };
|
package/src/github.js
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
|
+
const fs = require("node:fs");
|
|
3
4
|
const fsp = require("node:fs/promises");
|
|
4
5
|
const os = require("node:os");
|
|
5
6
|
const path = require("node:path");
|
|
6
7
|
const https = require("node:https");
|
|
8
|
+
const crypto = require("node:crypto");
|
|
9
|
+
const { spawn } = require("node:child_process");
|
|
7
10
|
|
|
8
11
|
const USER_AGENT = "pkgxray/0.6.0";
|
|
9
12
|
const CACHE_DIR = path.join(os.homedir(), ".cache", "pkgxray", "github");
|
|
@@ -46,7 +49,9 @@ function parseGithubRepo(repository) {
|
|
|
46
49
|
/^github:([^/]+)\/(.+)$/,
|
|
47
50
|
/^(?:https?|git):\/\/github\.com\/([^/]+)\/([^/?#]+)/,
|
|
48
51
|
/^git@github\.com:([^/]+)\/([^/?#]+)/,
|
|
49
|
-
/^ssh:\/\/git@github\.com\/([^/]+)\/([^/?#]+)
|
|
52
|
+
/^ssh:\/\/git@github\.com\/([^/]+)\/([^/?#]+)/,
|
|
53
|
+
// npm shorthand: bare "owner/repo" defaults to GitHub
|
|
54
|
+
/^([A-Za-z0-9_.-]+)\/([A-Za-z0-9_.-]+)$/
|
|
50
55
|
];
|
|
51
56
|
for (const pattern of patterns) {
|
|
52
57
|
const match = cleaned.match(pattern);
|
|
@@ -160,7 +165,135 @@ async function fetchRepoMetadata(repository, options = {}) {
|
|
|
160
165
|
}
|
|
161
166
|
}
|
|
162
167
|
|
|
168
|
+
// ---- Tarball download + ref resolution ----
|
|
169
|
+
|
|
170
|
+
const TARBALL_CACHE_DIR = path.join(os.homedir(), ".cache", "pkgxray", "tarballs");
|
|
171
|
+
const TARBALL_TTL_MS = 24 * 60 * 60 * 1000; // 24h
|
|
172
|
+
const TARBALL_TIMEOUT_MS = 15000;
|
|
173
|
+
const MAX_TARBALL_BYTES = 100 * 1024 * 1024; // 100MB
|
|
174
|
+
|
|
175
|
+
function tarballCachePath(owner, repo, ref) {
|
|
176
|
+
const key = crypto
|
|
177
|
+
.createHash("sha1")
|
|
178
|
+
.update(`${owner}/${repo}@${ref}`)
|
|
179
|
+
.digest("hex");
|
|
180
|
+
return path.join(TARBALL_CACHE_DIR, `${key}.tgz`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async function downloadCodeload(url, destination) {
|
|
184
|
+
return new Promise((resolve, reject) => {
|
|
185
|
+
const file = fs.createWriteStream(destination, { mode: 0o600 });
|
|
186
|
+
let written = 0;
|
|
187
|
+
let cleanedUp = false;
|
|
188
|
+
const cleanup = (err) => {
|
|
189
|
+
if (cleanedUp) return;
|
|
190
|
+
cleanedUp = true;
|
|
191
|
+
file.destroy();
|
|
192
|
+
fs.unlink(destination, () => reject(err));
|
|
193
|
+
};
|
|
194
|
+
const get = (currentUrl, hops) => {
|
|
195
|
+
if (hops > 5) return cleanup(new Error("Too many redirects"));
|
|
196
|
+
const parsed = new URL(currentUrl);
|
|
197
|
+
const request = https.get(
|
|
198
|
+
{
|
|
199
|
+
hostname: parsed.hostname,
|
|
200
|
+
path: parsed.pathname + parsed.search,
|
|
201
|
+
headers: { "user-agent": USER_AGENT },
|
|
202
|
+
timeout: TARBALL_TIMEOUT_MS
|
|
203
|
+
},
|
|
204
|
+
(response) => {
|
|
205
|
+
if ([301, 302, 303, 307, 308].includes(response.statusCode) && response.headers.location) {
|
|
206
|
+
response.resume();
|
|
207
|
+
return get(new URL(response.headers.location, currentUrl).toString(), hops + 1);
|
|
208
|
+
}
|
|
209
|
+
if (response.statusCode === 404) {
|
|
210
|
+
response.resume();
|
|
211
|
+
const err = new Error(`GitHub codeload 404: ${currentUrl}`);
|
|
212
|
+
err.statusCode = 404;
|
|
213
|
+
return cleanup(err);
|
|
214
|
+
}
|
|
215
|
+
if (response.statusCode < 200 || response.statusCode >= 300) {
|
|
216
|
+
response.resume();
|
|
217
|
+
return cleanup(new Error(`Codeload HTTP ${response.statusCode}`));
|
|
218
|
+
}
|
|
219
|
+
response.on("data", (chunk) => {
|
|
220
|
+
written += chunk.length;
|
|
221
|
+
if (written > MAX_TARBALL_BYTES) {
|
|
222
|
+
response.destroy();
|
|
223
|
+
cleanup(new Error(`Codeload exceeded ${MAX_TARBALL_BYTES} bytes`));
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
response.pipe(file);
|
|
227
|
+
file.on("finish", () => file.close(() => resolve()));
|
|
228
|
+
}
|
|
229
|
+
);
|
|
230
|
+
request.on("error", cleanup);
|
|
231
|
+
request.on("timeout", () => request.destroy(new Error("Codeload request timed out")));
|
|
232
|
+
};
|
|
233
|
+
get(url, 0);
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function run(command, args) {
|
|
238
|
+
return new Promise((resolve, reject) => {
|
|
239
|
+
const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"] });
|
|
240
|
+
let stderr = "";
|
|
241
|
+
child.stderr.on("data", (chunk) => {
|
|
242
|
+
stderr += chunk;
|
|
243
|
+
});
|
|
244
|
+
child.on("error", reject);
|
|
245
|
+
child.on("close", (code) => {
|
|
246
|
+
if (code === 0) resolve();
|
|
247
|
+
else reject(new Error(`${command} exited with ${code}: ${stderr.trim()}`));
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
async function extractTarball(archivePath, destination) {
|
|
253
|
+
await fsp.mkdir(destination, { recursive: true, mode: 0o700 });
|
|
254
|
+
return run("tar", [
|
|
255
|
+
"-xzf", archivePath,
|
|
256
|
+
"-C", destination,
|
|
257
|
+
"--strip-components", "1",
|
|
258
|
+
"--no-same-owner", "--no-same-permissions"
|
|
259
|
+
]);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Try refs in order until one downloads. Caches the first successful one.
|
|
263
|
+
async function fetchRepoTarballForVersion(owner, repo, version, defaultBranch) {
|
|
264
|
+
await fsp.mkdir(TARBALL_CACHE_DIR, { recursive: true, mode: 0o700 });
|
|
265
|
+
const candidates = [];
|
|
266
|
+
if (version) {
|
|
267
|
+
candidates.push(`v${version}`);
|
|
268
|
+
candidates.push(version);
|
|
269
|
+
}
|
|
270
|
+
if (defaultBranch) candidates.push(defaultBranch);
|
|
271
|
+
|
|
272
|
+
for (const ref of candidates) {
|
|
273
|
+
const cachePath = tarballCachePath(owner, repo, ref);
|
|
274
|
+
try {
|
|
275
|
+
const stat = await fsp.stat(cachePath);
|
|
276
|
+
if (Date.now() - stat.mtimeMs < TARBALL_TTL_MS) {
|
|
277
|
+
return { ref, archivePath: cachePath, fromCache: true };
|
|
278
|
+
}
|
|
279
|
+
} catch {
|
|
280
|
+
// not cached, fall through to download
|
|
281
|
+
}
|
|
282
|
+
const url = `https://codeload.github.com/${owner}/${repo}/tar.gz/${encodeURIComponent(ref)}`;
|
|
283
|
+
try {
|
|
284
|
+
await downloadCodeload(url, cachePath);
|
|
285
|
+
return { ref, archivePath: cachePath, fromCache: false };
|
|
286
|
+
} catch (error) {
|
|
287
|
+
if (error.statusCode === 404) continue;
|
|
288
|
+
throw error;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
|
|
163
294
|
module.exports = {
|
|
164
295
|
parseGithubRepo,
|
|
165
|
-
fetchRepoMetadata
|
|
296
|
+
fetchRepoMetadata,
|
|
297
|
+
fetchRepoTarballForVersion,
|
|
298
|
+
extractTarball
|
|
166
299
|
};
|
package/src/quarantine.js
CHANGED
|
@@ -8,7 +8,8 @@ const os = require("node:os");
|
|
|
8
8
|
const path = require("node:path");
|
|
9
9
|
const { spawn } = require("node:child_process");
|
|
10
10
|
const { auditEvidence } = require("./auditor");
|
|
11
|
-
const { fetchRepoMetadata } = require("./github");
|
|
11
|
+
const { fetchRepoMetadata, fetchRepoTarballForVersion, extractTarball: extractTarballGh } = require("./github");
|
|
12
|
+
const { diffNpmVsGithub } = require("./diff");
|
|
12
13
|
|
|
13
14
|
const DEFAULT_MAX_FILE_BYTES = 256 * 1024;
|
|
14
15
|
const DEFAULT_MAX_FILES = 600;
|
|
@@ -104,13 +105,41 @@ async function guardExtension(reference, options = {}) {
|
|
|
104
105
|
const githubMetadata = await githubMetadataPromise;
|
|
105
106
|
timings.githubMetadataMs = elapsed(githubStart);
|
|
106
107
|
|
|
108
|
+
// npm vs GitHub diff (Phase 3) — only for npm packages where we have repo
|
|
109
|
+
// metadata. Runs serially after we have both trees; tarballs are cached so
|
|
110
|
+
// re-runs are fast.
|
|
111
|
+
let npmVsGithubDiff = null;
|
|
112
|
+
if (
|
|
113
|
+
options.githubDiff !== false &&
|
|
114
|
+
(resolved.type === "npm" || resolved.type === "local") &&
|
|
115
|
+
githubMetadata && githubMetadata.found &&
|
|
116
|
+
vulnerabilities.length === 0 &&
|
|
117
|
+
Object.keys(sourceFiles).length > 0
|
|
118
|
+
) {
|
|
119
|
+
const diffStart = now();
|
|
120
|
+
try {
|
|
121
|
+
npmVsGithubDiff = await runNpmVsGithubDiff({
|
|
122
|
+
resolved,
|
|
123
|
+
npmStagedPath: stagedPath,
|
|
124
|
+
githubMetadata,
|
|
125
|
+
workspace
|
|
126
|
+
});
|
|
127
|
+
} catch (error) {
|
|
128
|
+
npmVsGithubDiff = { compared: false, reason: "diff-error", message: error.message };
|
|
129
|
+
}
|
|
130
|
+
timings.diffMs = elapsed(diffStart);
|
|
131
|
+
} else {
|
|
132
|
+
timings.diffMs = 0;
|
|
133
|
+
}
|
|
134
|
+
|
|
107
135
|
const evidence = {
|
|
108
136
|
packageName: resolved.packageName || reference,
|
|
109
137
|
npmMetadata: resolved.npmMetadata || null,
|
|
110
138
|
githubMetadata,
|
|
111
139
|
webPresence: null,
|
|
112
140
|
knownVulnerabilities: vulnerabilities,
|
|
113
|
-
sourceFiles
|
|
141
|
+
sourceFiles,
|
|
142
|
+
npmVsGithubDiff
|
|
114
143
|
};
|
|
115
144
|
const auditStart = now();
|
|
116
145
|
const report = auditEvidence(evidence);
|
|
@@ -123,6 +152,7 @@ async function guardExtension(reference, options = {}) {
|
|
|
123
152
|
resolved,
|
|
124
153
|
sourceFiles,
|
|
125
154
|
githubMetadata,
|
|
155
|
+
npmVsGithubDiff,
|
|
126
156
|
vulnerabilityPrecheck: {
|
|
127
157
|
enabled: options.vulnerabilityCheck !== false,
|
|
128
158
|
database: "OSV",
|
|
@@ -143,14 +173,84 @@ async function guardExtension(reference, options = {}) {
|
|
|
143
173
|
return result;
|
|
144
174
|
}
|
|
145
175
|
|
|
176
|
+
async function runNpmVsGithubDiff({ resolved, npmStagedPath, githubMetadata, workspace }) {
|
|
177
|
+
const version = resolved.version;
|
|
178
|
+
const tarball = await fetchRepoTarballForVersion(
|
|
179
|
+
githubMetadata.owner,
|
|
180
|
+
githubMetadata.repo,
|
|
181
|
+
version,
|
|
182
|
+
githubMetadata.default_branch
|
|
183
|
+
);
|
|
184
|
+
if (!tarball) {
|
|
185
|
+
return { compared: false, reason: "no-matching-ref", versionTried: version };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const ghStagePath = path.join(workspace, "github-tree");
|
|
189
|
+
await extractTarballGh(tarball.archivePath, ghStagePath);
|
|
190
|
+
|
|
191
|
+
// package.json may set repository.directory for monorepos — narrow the
|
|
192
|
+
// comparison to that subpath if present.
|
|
193
|
+
const pkgRepo = resolved.npmMetadata && resolved.npmMetadata.repository;
|
|
194
|
+
const subdir = pkgRepo && typeof pkgRepo === "object" ? pkgRepo.directory || null : null;
|
|
195
|
+
|
|
196
|
+
// Detect a publish-time build script (means built artifacts ≠ repo is normal)
|
|
197
|
+
const scripts = await readScripts(npmStagedPath);
|
|
198
|
+
const hasBuildScript = Boolean(scripts.prepare || scripts.prepack || scripts.build);
|
|
199
|
+
|
|
200
|
+
const diff = await diffNpmVsGithub({
|
|
201
|
+
npmStagedPath,
|
|
202
|
+
githubStagedPath: ghStagePath,
|
|
203
|
+
subdir,
|
|
204
|
+
hasBuildScript
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
...diff,
|
|
209
|
+
githubRef: tarball.ref,
|
|
210
|
+
tarballFromCache: tarball.fromCache
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async function readScripts(stagedPath) {
|
|
215
|
+
try {
|
|
216
|
+
const pkg = JSON.parse(await fsp.readFile(path.join(stagedPath, "package.json"), "utf8"));
|
|
217
|
+
return pkg.scripts || {};
|
|
218
|
+
} catch {
|
|
219
|
+
return {};
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
146
223
|
async function stageReference(reference, stagedPath, options) {
|
|
147
224
|
const parsed = parseReference(reference);
|
|
148
225
|
if (parsed.type === "local") {
|
|
149
226
|
await copyLocalPath(parsed.path, stagedPath);
|
|
227
|
+
// Populate npmMetadata from the staged package.json so downstream phases
|
|
228
|
+
// (github metadata cross-check, npm-vs-github diff) can work on local
|
|
229
|
+
// packages too.
|
|
230
|
+
let npmMetadata = null;
|
|
231
|
+
let packageName = path.basename(parsed.path);
|
|
232
|
+
let version = null;
|
|
233
|
+
try {
|
|
234
|
+
const pkg = JSON.parse(await fsp.readFile(path.join(stagedPath, "package.json"), "utf8"));
|
|
235
|
+
packageName = pkg.name || packageName;
|
|
236
|
+
version = pkg.version || null;
|
|
237
|
+
if (pkg.repository) {
|
|
238
|
+
npmMetadata = {
|
|
239
|
+
name: pkg.name || packageName,
|
|
240
|
+
version: pkg.version || null,
|
|
241
|
+
repository: pkg.repository,
|
|
242
|
+
maintainers: []
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
} catch {
|
|
246
|
+
// no package.json or unparseable — fine, just no metadata
|
|
247
|
+
}
|
|
150
248
|
return {
|
|
151
249
|
type: "local",
|
|
152
250
|
source: parsed.path,
|
|
153
|
-
packageName
|
|
251
|
+
packageName,
|
|
252
|
+
version,
|
|
253
|
+
npmMetadata
|
|
154
254
|
};
|
|
155
255
|
}
|
|
156
256
|
|