pkgxray 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/audit.js CHANGED
@@ -51,6 +51,9 @@ function parseArgs(argv) {
51
51
  options.vulnerabilityCheck = false;
52
52
  } else if (arg === "--no-github") {
53
53
  options.githubMetadata = false;
54
+ options.githubDiff = false;
55
+ } else if (arg === "--no-github-diff") {
56
+ options.githubDiff = false;
54
57
  } else {
55
58
  throw new Error(`Unknown argument: ${arg}`);
56
59
  }
package/bin/mcp-server.js CHANGED
@@ -107,6 +107,11 @@ function guardToolDefinition() {
107
107
  default: true,
108
108
  description: "Set false to skip the GitHub provenance cross-check."
109
109
  },
110
+ githubDiff: {
111
+ type: "boolean",
112
+ default: true,
113
+ description: "Set false to skip the npm-vs-GitHub source diff (saves a tarball download)."
114
+ },
110
115
  outputFormat: {
111
116
  type: "string",
112
117
  enum: ["markdown", "json"],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pkgxray",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "description": "Zero-dep local CLI and MCP server that scans npm packages for supply-chain risk. OSV vuln pre-check, sandboxed quarantine, tarball-integrity verification, calibrated static heuristics, GitHub provenance cross-check.",
5
5
  "license": "MIT",
6
6
  "author": "Jack Adams-Lovell",
package/src/auditor.js CHANGED
@@ -157,7 +157,8 @@ function normalizeEvidence(input) {
157
157
  evidence.knownVulnerabilities || evidence.vulnerabilities || evidence.osvVulnerabilities || [],
158
158
  sourceFiles: normalizeSourceFiles(
159
159
  evidence.sourceFiles || evidence.SOURCE_FILES || evidence.files || {}
160
- )
160
+ ),
161
+ npmVsGithubDiff: evidence.npmVsGithubDiff || null
161
162
  };
162
163
  }
163
164
 
@@ -243,7 +244,9 @@ const BAND_DEFINITIONS = [
243
244
  { band: "github-archived", label: "github-archived", categories: ["github-archived"], rationale: "Linked repository is archived or disabled — no maintenance, security issues will not be fixed." },
244
245
  { band: "github-young", label: "github-young", categories: ["github-young"], rationale: "Linked repository was created within the last 30 days — common slopsquat shape." },
245
246
  { band: "github-lonely", label: "github-lonely", categories: ["github-lonely"], rationale: "0 stars + 0 forks + low watcher count on a young repo. Low community signal." },
246
- { band: "github-stale", label: "github-stale", categories: ["github-stale"], rationale: "Repository hasn't been pushed to in over two years and isn't formally archived." }
247
+ { band: "github-stale", label: "github-stale", categories: ["github-stale"], rationale: "Repository hasn't been pushed to in over two years and isn't formally archived." },
248
+ { band: "npm-vs-github-divergence", label: "npm-vs-github-divergence", categories: ["npm-vs-github-divergence"], rationale: "Published npm tarball contains source files that aren't in (or differ from) the linked GitHub repo at the matching ref. Strong account-takeover / build-tampering signal." },
249
+ { band: "npm-vs-github-clean", label: "npm-vs-github-clean", categories: ["npm-vs-github-clean"], rationale: "npm tarball matches the linked GitHub repo at the published version." }
247
250
  ];
248
251
 
249
252
  const SEVERITY_RANK = { info: 0, low: 1, medium: 2, high: 3 };
@@ -291,6 +294,54 @@ function auditMetadata(evidence, findings) {
291
294
  inspectMetadataObject("NPM_METADATA", evidence.npmMetadata, findings);
292
295
  inspectGithubMetadata(evidence, findings);
293
296
  inspectKnownVulnerabilities(evidence.knownVulnerabilities, findings);
297
+ inspectNpmVsGithubDiff(evidence, findings);
298
+ }
299
+
300
+ function inspectNpmVsGithubDiff(evidence, findings) {
301
+ const diff = evidence.npmVsGithubDiff;
302
+ if (!diff || !diff.compared) {
303
+ // Not gating — silent skip. Common reasons: no github repo,
304
+ // ref not found, github fetch failed.
305
+ return;
306
+ }
307
+ const c = diff.counts || {};
308
+ if (c.extraSource > 0) {
309
+ const examples = (diff.suspiciousExtras || [])
310
+ .filter((f) => f.category === "extra-source")
311
+ .slice(0, 5)
312
+ .map((f) => f.path);
313
+ findings.push({
314
+ severity: "high",
315
+ category: "npm-vs-github-divergence",
316
+ file: "NPM_VS_GITHUB",
317
+ snippet: `npm tarball contains ${c.extraSource} source file(s) not in the linked GitHub repo @${diff.githubRef}: ${examples.join(", ")}`,
318
+ rationale:
319
+ "Source files present in the published tarball but absent from the matching GitHub ref. Classic account-takeover / build-server-compromise signal."
320
+ });
321
+ }
322
+ if (c.mismatchedSource > 0) {
323
+ const examples = (diff.suspiciousMismatches || [])
324
+ .filter((f) => f.category === "content-mismatch-source")
325
+ .slice(0, 5)
326
+ .map((f) => f.path);
327
+ findings.push({
328
+ severity: "high",
329
+ category: "npm-vs-github-divergence",
330
+ file: "NPM_VS_GITHUB",
331
+ snippet: `${c.mismatchedSource} source file(s) differ between npm tarball and GitHub repo @${diff.githubRef}: ${examples.join(", ")}`,
332
+ rationale:
333
+ "Source files with the same path but different SHA256 in the published tarball vs the linked GitHub repo at the matching ref. Strong tampering signal."
334
+ });
335
+ }
336
+ if (c.extraSource === 0 && c.mismatchedSource === 0 && (c.matched > 0 || c.npmFiles > 0)) {
337
+ findings.push({
338
+ severity: "info",
339
+ category: "npm-vs-github-clean",
340
+ file: "NPM_VS_GITHUB",
341
+ snippet: `${c.matched}/${c.npmFiles} files match GitHub @${diff.githubRef}`,
342
+ rationale: "npm tarball source files match the linked GitHub repo at the matching ref."
343
+ });
344
+ }
294
345
  }
295
346
 
296
347
  const YOUNG_REPO_DAYS = 30;
package/src/diff.js ADDED
@@ -0,0 +1,281 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const fsp = require("node:fs/promises");
5
+ const path = require("node:path");
6
+ const crypto = require("node:crypto");
7
+
8
+ const SKIP_DIRS = new Set([
9
+ ".git",
10
+ "node_modules",
11
+ ".github",
12
+ ".vscode",
13
+ ".idea",
14
+ "coverage",
15
+ "__pycache__"
16
+ ]);
17
+
18
+ // File patterns that are expected to differ — never used to drive findings.
19
+ const ALWAYS_IGNORE = [
20
+ /(?:^|\/)package\.json$/,
21
+ /(?:^|\/)package-lock\.json$/,
22
+ /(?:^|\/)yarn\.lock$/,
23
+ /(?:^|\/)pnpm-lock\.yaml$/,
24
+ /(?:^|\/)\.npmignore$/,
25
+ /(?:^|\/)\.gitignore$/,
26
+ /(?:^|\/)\.gitattributes$/,
27
+ /(?:^|\/)CHANGELOG(?:\.md)?$/i,
28
+ /(?:^|\/)CONTRIBUTING(?:\.md)?$/i,
29
+ /(?:^|\/)\.npmrc$/
30
+ ];
31
+
32
+ // Patterns that mean "this is build output" — only flagged if no build script
33
+ // exists. With a prepare/prepack script, extras here are expected.
34
+ const BUILD_OUTPUT_PATTERNS = [
35
+ /(?:^|\/)dist\//,
36
+ /(?:^|\/)build\//,
37
+ /(?:^|\/)lib\//,
38
+ /(?:^|\/)es\//,
39
+ /(?:^|\/)esm\//,
40
+ /(?:^|\/)cjs\//,
41
+ /(?:^|\/)umd\//,
42
+ /\.min\.js$/,
43
+ /\.min\.mjs$/,
44
+ /\.min\.css$/,
45
+ /\.d\.ts$/,
46
+ /\.d\.cts$/,
47
+ /\.d\.mts$/,
48
+ /\.js\.map$/,
49
+ /\.css\.map$/
50
+ ];
51
+
52
+ // Source extensions whose contents we care about most. Mismatches or extras
53
+ // here are the strongest ATO signal.
54
+ const SOURCE_EXTENSIONS = new Set([
55
+ ".js", ".cjs", ".mjs", ".jsx",
56
+ ".ts", ".tsx",
57
+ ".vue", ".svelte",
58
+ ".py", ".rb", ".go", ".rs", ".java", ".cs", ".php",
59
+ ".sh", ".ps1", ".bash",
60
+ ".json", ".toml", ".yaml", ".yml"
61
+ ]);
62
+
63
+ function isAlwaysIgnored(relPath) {
64
+ return ALWAYS_IGNORE.some((re) => re.test(relPath));
65
+ }
66
+
67
+ function isBuildOutput(relPath) {
68
+ return BUILD_OUTPUT_PATTERNS.some((re) => re.test(relPath));
69
+ }
70
+
71
+ function isSourceFile(relPath) {
72
+ const ext = path.extname(relPath).toLowerCase();
73
+ return SOURCE_EXTENSIONS.has(ext);
74
+ }
75
+
76
+ async function hashTree(root, subdir, limits) {
77
+ const baseDir = subdir ? path.join(root, subdir) : root;
78
+ try {
79
+ await fsp.access(baseDir);
80
+ } catch {
81
+ return null;
82
+ }
83
+ const result = new Map();
84
+ const queue = [""];
85
+ let totalBytes = 0;
86
+ let totalFiles = 0;
87
+ const maxFiles = limits.maxFiles || 5000;
88
+ const maxBytes = limits.maxBytes || 50 * 1024 * 1024;
89
+ const maxFileBytes = limits.maxFileBytes || 1024 * 1024;
90
+
91
+ while (queue.length > 0 && totalFiles < maxFiles && totalBytes < maxBytes) {
92
+ const rel = queue.shift();
93
+ const full = path.join(baseDir, rel);
94
+ let entries;
95
+ try {
96
+ entries = await fsp.readdir(full, { withFileTypes: true });
97
+ } catch {
98
+ continue;
99
+ }
100
+ for (const entry of entries) {
101
+ const childRel = rel ? `${rel}/${entry.name}` : entry.name;
102
+ if (entry.isDirectory()) {
103
+ if (SKIP_DIRS.has(entry.name)) continue;
104
+ queue.push(childRel);
105
+ continue;
106
+ }
107
+ if (!entry.isFile()) continue;
108
+ const childFull = path.join(baseDir, childRel);
109
+ let stat;
110
+ try {
111
+ stat = await fsp.stat(childFull);
112
+ } catch {
113
+ continue;
114
+ }
115
+ if (stat.size > maxFileBytes) {
116
+ result.set(childRel, { size: stat.size, sha256: "skipped:too-large" });
117
+ continue;
118
+ }
119
+ if (totalBytes + stat.size > maxBytes) {
120
+ // soft cap — record presence without hash
121
+ result.set(childRel, { size: stat.size, sha256: "skipped:tree-budget" });
122
+ continue;
123
+ }
124
+ const hash = await hashFile(childFull);
125
+ result.set(childRel, { size: stat.size, sha256: hash });
126
+ totalBytes += stat.size;
127
+ totalFiles += 1;
128
+ if (totalFiles >= maxFiles) break;
129
+ }
130
+ }
131
+ return result;
132
+ }
133
+
134
+ function hashFile(filePath) {
135
+ return new Promise((resolve, reject) => {
136
+ const hash = crypto.createHash("sha256");
137
+ fs.createReadStream(filePath)
138
+ .on("data", (chunk) => hash.update(chunk))
139
+ .on("error", reject)
140
+ .on("end", () => resolve(hash.digest("hex")));
141
+ });
142
+ }
143
+
144
+ // Compare a staged npm package against the matching GitHub repo subtree.
145
+ // Returns null if the comparison wasn't possible.
146
+ async function diffNpmVsGithub({ npmStagedPath, githubStagedPath, subdir, hasBuildScript }) {
147
+ const limits = { maxFiles: 5000, maxBytes: 50 * 1024 * 1024 };
148
+ const [npmTree, ghTree] = await Promise.all([
149
+ hashTree(npmStagedPath, "", limits),
150
+ hashTree(githubStagedPath, subdir || "", limits)
151
+ ]);
152
+ if (!npmTree || !ghTree) {
153
+ return {
154
+ compared: false,
155
+ reason: !ghTree ? "github-subdir-missing" : "npm-tree-missing"
156
+ };
157
+ }
158
+
159
+ const extraInNpm = [];
160
+ const mismatched = [];
161
+ const matched = [];
162
+
163
+ for (const [rel, npmEntry] of npmTree.entries()) {
164
+ if (isAlwaysIgnored(rel)) continue;
165
+ const ghEntry = ghTree.get(rel);
166
+ if (!ghEntry) {
167
+ // Files in the npm tarball but NOT in the github repo at the matching
168
+ // ref. If the package has a build script, root-level JS at non-source
169
+ // paths is probably bundled / generated and we can't reliably catch
170
+ // tampering there — demote to silent. We DO still surface extras in
171
+ // paths that look like source trees (`src/`, `lib/`, `tests/`,
172
+ // `scripts/`) since those should be 1:1 even with a build step.
173
+ const inLikelySourceDir = /^(?:src|tests?|scripts|spec)\//.test(rel);
174
+ const category = isBuildOutput(rel)
175
+ ? hasBuildScript ? "expected-build-output" : "extra-build-output"
176
+ : isSourceFile(rel)
177
+ ? hasBuildScript && !inLikelySourceDir
178
+ ? "expected-build-output"
179
+ : "extra-source"
180
+ : "extra-other";
181
+ extraInNpm.push({ path: rel, category, size: npmEntry.size });
182
+ continue;
183
+ }
184
+ if (npmEntry.sha256 !== ghEntry.sha256) {
185
+ // skipped hashes don't count as a mismatch
186
+ if (npmEntry.sha256.startsWith("skipped") || ghEntry.sha256.startsWith("skipped")) {
187
+ continue;
188
+ }
189
+ const inLikelySourceDir = /^(?:src|tests?|scripts|spec)\//.test(rel);
190
+ const category = isBuildOutput(rel)
191
+ ? hasBuildScript ? "expected-build-output" : "content-mismatch-build"
192
+ : isSourceFile(rel)
193
+ ? hasBuildScript && !inLikelySourceDir
194
+ ? "expected-build-output"
195
+ : "content-mismatch-source"
196
+ : "content-mismatch-other";
197
+ mismatched.push({ path: rel, category, npmSize: npmEntry.size, ghSize: ghEntry.size });
198
+ } else {
199
+ matched.push(rel);
200
+ }
201
+ }
202
+
203
+ const extraSource = extraInNpm.filter((f) => f.category === "extra-source");
204
+ const extraBuild = extraInNpm.filter((f) => f.category === "extra-build-output");
205
+ const mismatchedSource = mismatched.filter((f) => f.category === "content-mismatch-source");
206
+
207
+ // Tree-overlap sanity check. Many real packages don't publish a 1:1 mirror
208
+ // of their repo (lodash publishes flat per-function modules, react bundles
209
+ // src/ to root, monorepos publish a subtree). If the npm tarball and the
210
+ // github tree have very little overlap at matching paths, comparing them
211
+ // produces mostly noise. Bail out with an honest "no-overlap" reason rather
212
+ // than firing HIGH on legit packages.
213
+ const consideredFiles = npmTree.size; // already excludes node_modules etc.
214
+ const overlapRatio = consideredFiles > 0 ? (matched.length + mismatched.length) / consideredFiles : 0;
215
+ const MIN_OVERLAP_RATIO = 0.3;
216
+
217
+ if (overlapRatio < MIN_OVERLAP_RATIO && extraSource.length > 20) {
218
+ return {
219
+ compared: false,
220
+ reason: "tree-layout-differs",
221
+ hasBuildScript,
222
+ subdir: subdir || null,
223
+ overlapRatio: Number(overlapRatio.toFixed(2)),
224
+ counts: {
225
+ npmFiles: npmTree.size,
226
+ ghFiles: ghTree.size,
227
+ matched: matched.length,
228
+ mismatched: mismatched.length,
229
+ extraInNpm: extraInNpm.length
230
+ },
231
+ note:
232
+ "npm tarball and GitHub tree have very little overlap at matching paths — the package is likely built / bundled before publish. Diff would be unreliable; skipping."
233
+ };
234
+ }
235
+
236
+ // Second skip case: of the files that DO exist at matching paths, most
237
+ // mismatch. That means the published artifacts are generated from the repo
238
+ // source (typical of `prepublish` / `release-please` / `changesets` build
239
+ // flows that minify or transform entry files). Diff is unreliable here.
240
+ const overlapCount = matched.length + mismatched.length;
241
+ if (overlapCount >= 5 && mismatched.length > matched.length * 2) {
242
+ return {
243
+ compared: false,
244
+ reason: "tree-mostly-generated",
245
+ hasBuildScript,
246
+ subdir: subdir || null,
247
+ overlapRatio: Number(overlapRatio.toFixed(2)),
248
+ counts: {
249
+ npmFiles: npmTree.size,
250
+ ghFiles: ghTree.size,
251
+ matched: matched.length,
252
+ mismatched: mismatched.length,
253
+ extraInNpm: extraInNpm.length
254
+ },
255
+ note:
256
+ "Most overlapping files differ in content — published artifacts are likely generated from the repo source (e.g. bundling, transpilation). Diff would be unreliable; skipping."
257
+ };
258
+ }
259
+
260
+ return {
261
+ compared: true,
262
+ hasBuildScript,
263
+ subdir: subdir || null,
264
+ overlapRatio: Number(overlapRatio.toFixed(2)),
265
+ counts: {
266
+ npmFiles: npmTree.size,
267
+ ghFiles: ghTree.size,
268
+ matched: matched.length,
269
+ mismatched: mismatched.length,
270
+ extraInNpm: extraInNpm.length,
271
+ extraSource: extraSource.length,
272
+ mismatchedSource: mismatchedSource.length
273
+ },
274
+ suspiciousExtras: extraSource.concat(extraBuild).slice(0, 25),
275
+ suspiciousMismatches: mismatchedSource.concat(
276
+ mismatched.filter((f) => f.category === "content-mismatch-build")
277
+ ).slice(0, 25)
278
+ };
279
+ }
280
+
281
+ module.exports = { diffNpmVsGithub };
package/src/github.js CHANGED
@@ -1,9 +1,12 @@
1
1
  "use strict";
2
2
 
3
+ const fs = require("node:fs");
3
4
  const fsp = require("node:fs/promises");
4
5
  const os = require("node:os");
5
6
  const path = require("node:path");
6
7
  const https = require("node:https");
8
+ const crypto = require("node:crypto");
9
+ const { spawn } = require("node:child_process");
7
10
 
8
11
  const USER_AGENT = "pkgxray/0.6.0";
9
12
  const CACHE_DIR = path.join(os.homedir(), ".cache", "pkgxray", "github");
@@ -160,7 +163,135 @@ async function fetchRepoMetadata(repository, options = {}) {
160
163
  }
161
164
  }
162
165
 
166
+ // ---- Tarball download + ref resolution ----
167
+
168
+ const TARBALL_CACHE_DIR = path.join(os.homedir(), ".cache", "pkgxray", "tarballs");
169
+ const TARBALL_TTL_MS = 24 * 60 * 60 * 1000; // 24h
170
+ const TARBALL_TIMEOUT_MS = 15000;
171
+ const MAX_TARBALL_BYTES = 100 * 1024 * 1024; // 100MB
172
+
173
+ function tarballCachePath(owner, repo, ref) {
174
+ const key = crypto
175
+ .createHash("sha1")
176
+ .update(`${owner}/${repo}@${ref}`)
177
+ .digest("hex");
178
+ return path.join(TARBALL_CACHE_DIR, `${key}.tgz`);
179
+ }
180
+
181
+ async function downloadCodeload(url, destination) {
182
+ return new Promise((resolve, reject) => {
183
+ const file = fs.createWriteStream(destination, { mode: 0o600 });
184
+ let written = 0;
185
+ let cleanedUp = false;
186
+ const cleanup = (err) => {
187
+ if (cleanedUp) return;
188
+ cleanedUp = true;
189
+ file.destroy();
190
+ fs.unlink(destination, () => reject(err));
191
+ };
192
+ const get = (currentUrl, hops) => {
193
+ if (hops > 5) return cleanup(new Error("Too many redirects"));
194
+ const parsed = new URL(currentUrl);
195
+ const request = https.get(
196
+ {
197
+ hostname: parsed.hostname,
198
+ path: parsed.pathname + parsed.search,
199
+ headers: { "user-agent": USER_AGENT },
200
+ timeout: TARBALL_TIMEOUT_MS
201
+ },
202
+ (response) => {
203
+ if ([301, 302, 303, 307, 308].includes(response.statusCode) && response.headers.location) {
204
+ response.resume();
205
+ return get(new URL(response.headers.location, currentUrl).toString(), hops + 1);
206
+ }
207
+ if (response.statusCode === 404) {
208
+ response.resume();
209
+ const err = new Error(`GitHub codeload 404: ${currentUrl}`);
210
+ err.statusCode = 404;
211
+ return cleanup(err);
212
+ }
213
+ if (response.statusCode < 200 || response.statusCode >= 300) {
214
+ response.resume();
215
+ return cleanup(new Error(`Codeload HTTP ${response.statusCode}`));
216
+ }
217
+ response.on("data", (chunk) => {
218
+ written += chunk.length;
219
+ if (written > MAX_TARBALL_BYTES) {
220
+ response.destroy();
221
+ cleanup(new Error(`Codeload exceeded ${MAX_TARBALL_BYTES} bytes`));
222
+ }
223
+ });
224
+ response.pipe(file);
225
+ file.on("finish", () => file.close(() => resolve()));
226
+ }
227
+ );
228
+ request.on("error", cleanup);
229
+ request.on("timeout", () => request.destroy(new Error("Codeload request timed out")));
230
+ };
231
+ get(url, 0);
232
+ });
233
+ }
234
+
235
+ function run(command, args) {
236
+ return new Promise((resolve, reject) => {
237
+ const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"] });
238
+ let stderr = "";
239
+ child.stderr.on("data", (chunk) => {
240
+ stderr += chunk;
241
+ });
242
+ child.on("error", reject);
243
+ child.on("close", (code) => {
244
+ if (code === 0) resolve();
245
+ else reject(new Error(`${command} exited with ${code}: ${stderr.trim()}`));
246
+ });
247
+ });
248
+ }
249
+
250
+ async function extractTarball(archivePath, destination) {
251
+ await fsp.mkdir(destination, { recursive: true, mode: 0o700 });
252
+ return run("tar", [
253
+ "-xzf", archivePath,
254
+ "-C", destination,
255
+ "--strip-components", "1",
256
+ "--no-same-owner", "--no-same-permissions"
257
+ ]);
258
+ }
259
+
260
+ // Try refs in order until one downloads. Caches the first successful one.
261
+ async function fetchRepoTarballForVersion(owner, repo, version, defaultBranch) {
262
+ await fsp.mkdir(TARBALL_CACHE_DIR, { recursive: true, mode: 0o700 });
263
+ const candidates = [];
264
+ if (version) {
265
+ candidates.push(`v${version}`);
266
+ candidates.push(version);
267
+ }
268
+ if (defaultBranch) candidates.push(defaultBranch);
269
+
270
+ for (const ref of candidates) {
271
+ const cachePath = tarballCachePath(owner, repo, ref);
272
+ try {
273
+ const stat = await fsp.stat(cachePath);
274
+ if (Date.now() - stat.mtimeMs < TARBALL_TTL_MS) {
275
+ return { ref, archivePath: cachePath, fromCache: true };
276
+ }
277
+ } catch {
278
+ // not cached, fall through to download
279
+ }
280
+ const url = `https://codeload.github.com/${owner}/${repo}/tar.gz/${encodeURIComponent(ref)}`;
281
+ try {
282
+ await downloadCodeload(url, cachePath);
283
+ return { ref, archivePath: cachePath, fromCache: false };
284
+ } catch (error) {
285
+ if (error.statusCode === 404) continue;
286
+ throw error;
287
+ }
288
+ }
289
+ return null;
290
+ }
291
+
163
292
  module.exports = {
164
293
  parseGithubRepo,
165
- fetchRepoMetadata
294
+ fetchRepoMetadata,
295
+ fetchRepoTarballForVersion,
296
+ extractTarball
166
297
  };
package/src/quarantine.js CHANGED
@@ -8,7 +8,8 @@ const os = require("node:os");
8
8
  const path = require("node:path");
9
9
  const { spawn } = require("node:child_process");
10
10
  const { auditEvidence } = require("./auditor");
11
- const { fetchRepoMetadata } = require("./github");
11
+ const { fetchRepoMetadata, fetchRepoTarballForVersion, extractTarball: extractTarballGh } = require("./github");
12
+ const { diffNpmVsGithub } = require("./diff");
12
13
 
13
14
  const DEFAULT_MAX_FILE_BYTES = 256 * 1024;
14
15
  const DEFAULT_MAX_FILES = 600;
@@ -104,13 +105,41 @@ async function guardExtension(reference, options = {}) {
104
105
  const githubMetadata = await githubMetadataPromise;
105
106
  timings.githubMetadataMs = elapsed(githubStart);
106
107
 
108
+ // npm vs GitHub diff (Phase 3) — only for npm packages where we have repo
109
+ // metadata. Runs serially after we have both trees; tarballs are cached so
110
+ // re-runs are fast.
111
+ let npmVsGithubDiff = null;
112
+ if (
113
+ options.githubDiff !== false &&
114
+ resolved.type === "npm" &&
115
+ githubMetadata && githubMetadata.found &&
116
+ vulnerabilities.length === 0 &&
117
+ Object.keys(sourceFiles).length > 0
118
+ ) {
119
+ const diffStart = now();
120
+ try {
121
+ npmVsGithubDiff = await runNpmVsGithubDiff({
122
+ resolved,
123
+ npmStagedPath: stagedPath,
124
+ githubMetadata,
125
+ workspace
126
+ });
127
+ } catch (error) {
128
+ npmVsGithubDiff = { compared: false, reason: "diff-error", message: error.message };
129
+ }
130
+ timings.diffMs = elapsed(diffStart);
131
+ } else {
132
+ timings.diffMs = 0;
133
+ }
134
+
107
135
  const evidence = {
108
136
  packageName: resolved.packageName || reference,
109
137
  npmMetadata: resolved.npmMetadata || null,
110
138
  githubMetadata,
111
139
  webPresence: null,
112
140
  knownVulnerabilities: vulnerabilities,
113
- sourceFiles
141
+ sourceFiles,
142
+ npmVsGithubDiff
114
143
  };
115
144
  const auditStart = now();
116
145
  const report = auditEvidence(evidence);
@@ -123,6 +152,7 @@ async function guardExtension(reference, options = {}) {
123
152
  resolved,
124
153
  sourceFiles,
125
154
  githubMetadata,
155
+ npmVsGithubDiff,
126
156
  vulnerabilityPrecheck: {
127
157
  enabled: options.vulnerabilityCheck !== false,
128
158
  database: "OSV",
@@ -143,6 +173,53 @@ async function guardExtension(reference, options = {}) {
143
173
  return result;
144
174
  }
145
175
 
176
+ async function runNpmVsGithubDiff({ resolved, npmStagedPath, githubMetadata, workspace }) {
177
+ const version = resolved.version;
178
+ const tarball = await fetchRepoTarballForVersion(
179
+ githubMetadata.owner,
180
+ githubMetadata.repo,
181
+ version,
182
+ githubMetadata.default_branch
183
+ );
184
+ if (!tarball) {
185
+ return { compared: false, reason: "no-matching-ref", versionTried: version };
186
+ }
187
+
188
+ const ghStagePath = path.join(workspace, "github-tree");
189
+ await extractTarballGh(tarball.archivePath, ghStagePath);
190
+
191
+ // package.json may set repository.directory for monorepos — narrow the
192
+ // comparison to that subpath if present.
193
+ const pkgRepo = resolved.npmMetadata && resolved.npmMetadata.repository;
194
+ const subdir = pkgRepo && typeof pkgRepo === "object" ? pkgRepo.directory || null : null;
195
+
196
+ // Detect a publish-time build script (means built artifacts ≠ repo is normal)
197
+ const scripts = await readScripts(npmStagedPath);
198
+ const hasBuildScript = Boolean(scripts.prepare || scripts.prepack || scripts.build);
199
+
200
+ const diff = await diffNpmVsGithub({
201
+ npmStagedPath,
202
+ githubStagedPath: ghStagePath,
203
+ subdir,
204
+ hasBuildScript
205
+ });
206
+
207
+ return {
208
+ ...diff,
209
+ githubRef: tarball.ref,
210
+ tarballFromCache: tarball.fromCache
211
+ };
212
+ }
213
+
214
+ async function readScripts(stagedPath) {
215
+ try {
216
+ const pkg = JSON.parse(await fsp.readFile(path.join(stagedPath, "package.json"), "utf8"));
217
+ return pkg.scripts || {};
218
+ } catch {
219
+ return {};
220
+ }
221
+ }
222
+
146
223
  async function stageReference(reference, stagedPath, options) {
147
224
  const parsed = parseReference(reference);
148
225
  if (parsed.type === "local") {