pkgxray 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pkgxray",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "description": "Zero-dep local CLI and MCP server that scans npm packages for supply-chain risk. OSV vuln pre-check, sandboxed quarantine, tarball-integrity verification, calibrated static heuristics, GitHub provenance cross-check.",
5
5
  "license": "MIT",
6
6
  "author": "Jack Adams-Lovell",
package/src/diff.js CHANGED
@@ -156,6 +156,18 @@ async function diffNpmVsGithub({ npmStagedPath, githubStagedPath, subdir, hasBui
156
156
  };
157
157
  }
158
158
 
159
+ // Pre-compute the set of directories that EXIST in github. We use this to
160
+ // decide whether an extra file is in a "real source dir" (sibling source
161
+ // files exist in github) or in a path github doesn't have at all (more
162
+ // likely build output).
163
+ const ghDirs = new Set();
164
+ for (const ghPath of ghTree.keys()) {
165
+ const parts = ghPath.split("/");
166
+ for (let i = 1; i < parts.length; i += 1) {
167
+ ghDirs.add(parts.slice(0, i).join("/"));
168
+ }
169
+ }
170
+
159
171
  const extraInNpm = [];
160
172
  const mismatched = [];
161
173
  const matched = [];
@@ -164,20 +176,25 @@ async function diffNpmVsGithub({ npmStagedPath, githubStagedPath, subdir, hasBui
164
176
  if (isAlwaysIgnored(rel)) continue;
165
177
  const ghEntry = ghTree.get(rel);
166
178
  if (!ghEntry) {
167
- // Files in the npm tarball but NOT in the github repo at the matching
168
- // ref. If the package has a build script, root-level JS at non-source
169
- // paths is probably bundled / generated and we can't reliably catch
170
- // tampering theredemote to silent. We DO still surface extras in
171
- // paths that look like source trees (`src/`, `lib/`, `tests/`,
172
- // `scripts/`) since those should be 1:1 even with a build step.
179
+ const parentDir = rel.includes("/") ? rel.split("/").slice(0, -1).join("/") : "";
180
+ const parentExistsInGh = parentDir === "" || ghDirs.has(parentDir);
181
+ // An extra file inside a directory that exists in github is the strong
182
+ // ATO signalgithub has the dir, the attacker just dropped one more
183
+ // file in it. An extra file at a path github doesn't have at all is
184
+ // more likely build output the repo never committed.
173
185
  const inLikelySourceDir = /^(?:src|tests?|scripts|spec)\//.test(rel);
174
- const category = isBuildOutput(rel)
175
- ? hasBuildScript ? "expected-build-output" : "extra-build-output"
176
- : isSourceFile(rel)
177
- ? hasBuildScript && !inLikelySourceDir
178
- ? "expected-build-output"
179
- : "extra-source"
180
- : "extra-other";
186
+ let category;
187
+ if (parentExistsInGh && isSourceFile(rel)) {
188
+ category = "extra-source";
189
+ } else if (isBuildOutput(rel)) {
190
+ category = hasBuildScript ? "expected-build-output" : "extra-build-output";
191
+ } else if (isSourceFile(rel)) {
192
+ category = hasBuildScript && !inLikelySourceDir
193
+ ? "expected-build-output"
194
+ : "extra-source";
195
+ } else {
196
+ category = "extra-other";
197
+ }
181
198
  extraInNpm.push({ path: rel, category, size: npmEntry.size });
182
199
  continue;
183
200
  }
package/src/github.js CHANGED
@@ -49,7 +49,9 @@ function parseGithubRepo(repository) {
49
49
  /^github:([^/]+)\/(.+)$/,
50
50
  /^(?:https?|git):\/\/github\.com\/([^/]+)\/([^/?#]+)/,
51
51
  /^git@github\.com:([^/]+)\/([^/?#]+)/,
52
- /^ssh:\/\/git@github\.com\/([^/]+)\/([^/?#]+)/
52
+ /^ssh:\/\/git@github\.com\/([^/]+)\/([^/?#]+)/,
53
+ // npm shorthand: bare "owner/repo" defaults to GitHub
54
+ /^([A-Za-z0-9_.-]+)\/([A-Za-z0-9_.-]+)$/
53
55
  ];
54
56
  for (const pattern of patterns) {
55
57
  const match = cleaned.match(pattern);
package/src/quarantine.js CHANGED
@@ -13,6 +13,10 @@ const { diffNpmVsGithub } = require("./diff");
13
13
 
14
14
  const DEFAULT_MAX_FILE_BYTES = 256 * 1024;
15
15
  const DEFAULT_MAX_FILES = 600;
16
+ const DEFAULT_TARBALL_MAX_BYTES = 256 * 1024 * 1024;
17
+ const DEFAULT_TARBALL_MAX_ENTRIES = 5000;
18
+ const DEFAULT_DOWNLOAD_MAX_BYTES = 64 * 1024 * 1024;
19
+ const DEFAULT_DOWNLOAD_MAX_REDIRECTS = 5;
16
20
  const SKIP_DIRS = new Set([
17
21
  ".git",
18
22
  "node_modules",
@@ -111,7 +115,7 @@ async function guardExtension(reference, options = {}) {
111
115
  let npmVsGithubDiff = null;
112
116
  if (
113
117
  options.githubDiff !== false &&
114
- resolved.type === "npm" &&
118
+ (resolved.type === "npm" || resolved.type === "local") &&
115
119
  githubMetadata && githubMetadata.found &&
116
120
  vulnerabilities.length === 0 &&
117
121
  Object.keys(sourceFiles).length > 0
@@ -224,10 +228,33 @@ async function stageReference(reference, stagedPath, options) {
224
228
  const parsed = parseReference(reference);
225
229
  if (parsed.type === "local") {
226
230
  await copyLocalPath(parsed.path, stagedPath);
231
+ // Populate npmMetadata from the staged package.json so downstream phases
232
+ // (github metadata cross-check, npm-vs-github diff) can work on local
233
+ // packages too.
234
+ let npmMetadata = null;
235
+ let packageName = path.basename(parsed.path);
236
+ let version = null;
237
+ try {
238
+ const pkg = JSON.parse(await fsp.readFile(path.join(stagedPath, "package.json"), "utf8"));
239
+ packageName = pkg.name || packageName;
240
+ version = pkg.version || null;
241
+ if (pkg.repository) {
242
+ npmMetadata = {
243
+ name: pkg.name || packageName,
244
+ version: pkg.version || null,
245
+ repository: pkg.repository,
246
+ maintainers: []
247
+ };
248
+ }
249
+ } catch {
250
+ // no package.json or unparseable — fine, just no metadata
251
+ }
227
252
  return {
228
253
  type: "local",
229
254
  source: parsed.path,
230
- packageName: path.basename(parsed.path)
255
+ packageName,
256
+ version,
257
+ npmMetadata
231
258
  };
232
259
  }
233
260
 
@@ -365,6 +392,8 @@ async function resolveNpmPackage(specifier, options) {
365
392
  version: metadata.version,
366
393
  needsDownload: true,
367
394
  tarballUrl,
395
+ integrity: (metadata.dist && metadata.dist.integrity) || null,
396
+ shasum: (metadata.dist && metadata.dist.shasum) || null,
368
397
  npmMetadata: npmMetadataForEvidence(metadata)
369
398
  };
370
399
  }
@@ -374,10 +403,67 @@ async function downloadResolvedPackage(resolved, stagedPath) {
374
403
  await fsp.mkdir(path.dirname(stagedPath), { recursive: true, mode: 0o700 });
375
404
  await downloadFile(resolved.tarballUrl, archivePath);
376
405
  resolved.sha256 = await hashFile(archivePath);
406
+
407
+ // Verify against the npm registry's published integrity field BEFORE
408
+ // extracting. Delete the partial file on mismatch so we never leave a
409
+ // hostile tarball on disk.
410
+ try {
411
+ await verifyNpmTarballIntegrity(resolved, archivePath);
412
+ } catch (error) {
413
+ await fsp.rm(archivePath, { force: true });
414
+ throw error;
415
+ }
416
+
377
417
  await fsp.mkdir(stagedPath, { recursive: true, mode: 0o700 });
378
418
  await extractTarball(archivePath, stagedPath);
379
419
  }
380
420
 
421
+ async function verifyNpmTarballIntegrity(resolved, archivePath) {
422
+ if (resolved.integrity) {
423
+ const firstEntry = String(resolved.integrity).trim().split(/\s+/)[0];
424
+ const dashIndex = firstEntry.indexOf("-");
425
+ if (dashIndex <= 0) {
426
+ throw new Error(`npm tarball integrity field is malformed: ${resolved.integrity}`);
427
+ }
428
+ const algo = firstEntry.slice(0, dashIndex);
429
+ const expectedBase64 = firstEntry.slice(dashIndex + 1);
430
+ const actualBase64 = await hashFileDigest(archivePath, algo, "base64");
431
+ if (actualBase64 !== expectedBase64) {
432
+ throw new Error(
433
+ `npm tarball integrity mismatch: expected ${firstEntry} got ${algo}-${actualBase64}`
434
+ );
435
+ }
436
+ return;
437
+ }
438
+ if (resolved.shasum) {
439
+ const expectedHex = String(resolved.shasum).trim().toLowerCase();
440
+ const actualHex = (await hashFileDigest(archivePath, "sha1", "hex")).toLowerCase();
441
+ if (actualHex !== expectedHex) {
442
+ throw new Error(
443
+ `npm tarball integrity mismatch: expected sha1-${expectedHex} got sha1-${actualHex}`
444
+ );
445
+ }
446
+ return;
447
+ }
448
+ throw new Error("npm tarball has no published integrity field");
449
+ }
450
+
451
+ function hashFileDigest(filePath, algorithm, encoding) {
452
+ return new Promise((resolve, reject) => {
453
+ let hash;
454
+ try {
455
+ hash = crypto.createHash(algorithm);
456
+ } catch (error) {
457
+ reject(error);
458
+ return;
459
+ }
460
+ fs.createReadStream(filePath)
461
+ .on("data", (chunk) => hash.update(chunk))
462
+ .on("error", reject)
463
+ .on("end", () => resolve(hash.digest(encoding)));
464
+ });
465
+ }
466
+
381
467
  function npmMetadataForEvidence(metadata) {
382
468
  return {
383
469
  name: metadata.name,
@@ -389,18 +475,24 @@ function npmMetadataForEvidence(metadata) {
389
475
  };
390
476
  }
391
477
 
478
+ // fix-5: fetch the single version metadata endpoint instead of the full
479
+ // packument. For popular packages (lodash, react) this is the difference
480
+ // between a 10MB+ download and a few KB.
392
481
  async function fetchNpmMetadata(specifier, registry) {
393
482
  const parsed = parseNpmSpecifier(specifier);
394
483
  const encodedName = encodeURIComponent(parsed.name);
395
- const metadataUrl = `${registry.replace(/\/$/, "")}/${encodedName}`;
396
- const packageMetadata = await fetchJson(metadataUrl);
397
- const version =
398
- parsed.version ||
399
- (packageMetadata["dist-tags"] && packageMetadata["dist-tags"].latest);
400
- if (!version || !packageMetadata.versions || !packageMetadata.versions[version]) {
401
- throw new Error(`Version not found for npm package: ${specifier}`);
484
+ const versionPath = parsed.version
485
+ ? encodeURIComponent(parsed.version)
486
+ : "latest";
487
+ const url = `${registry.replace(/\/$/, "")}/${encodedName}/${versionPath}`;
488
+ try {
489
+ return await fetchJson(url);
490
+ } catch (error) {
491
+ if (error && error.statusCode === 404) {
492
+ throw new Error(`Version not found for npm package: ${specifier}`);
493
+ }
494
+ throw error;
402
495
  }
403
- return packageMetadata.versions[version];
404
496
  }
405
497
 
406
498
  function parseNpmSpecifier(specifier) {
@@ -428,9 +520,11 @@ function parseNpmSpecifier(specifier) {
428
520
  function fetchJson(url) {
429
521
  return new Promise((resolve, reject) => {
430
522
  https
431
- .get(url, { headers: { "user-agent": "supply-chain-auditor/0.1.0" } }, (response) => {
523
+ .get(url, { headers: { "user-agent": "pkgxray/0.9.0" } }, (response) => {
432
524
  if (response.statusCode < 200 || response.statusCode >= 300) {
433
- reject(new Error(`HTTP ${response.statusCode} from ${url}`));
525
+ const error = new Error(`HTTP ${response.statusCode} from ${url}`);
526
+ error.statusCode = response.statusCode;
527
+ reject(error);
434
528
  response.resume();
435
529
  return;
436
530
  }
@@ -533,22 +627,70 @@ function postJson(url, payload) {
533
627
  });
534
628
  }
535
629
 
536
- function downloadFile(url, destination) {
630
+ function downloadFile(url, destination, options = {}) {
631
+ const maxBytes = options.maxBytes || DEFAULT_DOWNLOAD_MAX_BYTES;
632
+ const maxRedirects = options.maxRedirects || DEFAULT_DOWNLOAD_MAX_REDIRECTS;
633
+ const originalUrl = url;
634
+ const http = require("node:http");
635
+
537
636
  return new Promise((resolve, reject) => {
538
637
  const file = fs.createWriteStream(destination, { mode: 0o600 });
539
- https
540
- .get(url, { headers: { "user-agent": "supply-chain-auditor/0.1.0" } }, (response) => {
541
- if (response.statusCode < 200 || response.statusCode >= 300) {
542
- reject(new Error(`HTTP ${response.statusCode} from ${url}`));
543
- response.resume();
544
- return;
638
+ let written = 0;
639
+ let settled = false;
640
+ const fail = (err) => {
641
+ if (settled) return;
642
+ settled = true;
643
+ file.destroy();
644
+ fs.unlink(destination, () => reject(err));
645
+ };
646
+ const succeed = () => {
647
+ if (settled) return;
648
+ settled = true;
649
+ file.close(() => resolve());
650
+ };
651
+
652
+ const get = (currentUrl, hops) => {
653
+ if (hops > maxRedirects) {
654
+ return fail(new Error(`Too many redirects from ${originalUrl}`));
655
+ }
656
+ const parsed = new URL(currentUrl);
657
+ const client = parsed.protocol === "http:" ? http : https;
658
+ const request = client.get(
659
+ {
660
+ hostname: parsed.hostname,
661
+ port: parsed.port || (parsed.protocol === "http:" ? 80 : 443),
662
+ path: parsed.pathname + parsed.search,
663
+ headers: { "user-agent": "pkgxray/0.9.0" }
664
+ },
665
+ (response) => {
666
+ if (
667
+ [301, 302, 303, 307, 308].includes(response.statusCode) &&
668
+ response.headers.location
669
+ ) {
670
+ response.resume();
671
+ return get(new URL(response.headers.location, currentUrl).toString(), hops + 1);
672
+ }
673
+ if (response.statusCode < 200 || response.statusCode >= 300) {
674
+ response.resume();
675
+ return fail(new Error(`HTTP ${response.statusCode} from ${currentUrl}`));
676
+ }
677
+ response.on("data", (chunk) => {
678
+ written += chunk.length;
679
+ if (written > maxBytes) {
680
+ response.destroy();
681
+ return fail(
682
+ new Error(`Download exceeded max size of ${maxBytes} bytes from ${originalUrl}`)
683
+ );
684
+ }
685
+ });
686
+ response.pipe(file);
687
+ file.on("finish", succeed);
688
+ file.on("error", fail);
545
689
  }
546
- response.pipe(file);
547
- file.on("finish", () => {
548
- file.close(resolve);
549
- });
550
- })
551
- .on("error", reject);
690
+ );
691
+ request.on("error", fail);
692
+ };
693
+ get(url, 0);
552
694
  });
553
695
  }
554
696
 
@@ -563,8 +705,138 @@ async function hashFile(filePath) {
563
705
  return hash.digest("hex");
564
706
  }
565
707
 
566
- function extractTarball(archivePath, destination) {
567
- return run("tar", ["-xzf", archivePath, "-C", destination, "--strip-components", "1"]);
708
+ async function extractTarball(archivePath, destination, options = {}) {
709
+ const maxBytes = options.maxTarballBytes || DEFAULT_TARBALL_MAX_BYTES;
710
+ const maxEntries = options.maxTarballEntries || DEFAULT_TARBALL_MAX_ENTRIES;
711
+
712
+ const listing = await runCapture("tar", ["-tvzf", archivePath]);
713
+ const lines = listing.split("\n").filter((line) => line.trim().length > 0);
714
+
715
+ if (lines.length > maxEntries) {
716
+ throw new Error(`Tarball rejected: ${lines.length} entries exceeds limit of ${maxEntries}`);
717
+ }
718
+
719
+ let totalBytes = 0;
720
+ for (const line of lines) {
721
+ const entry = parseTarListingLine(line);
722
+ if (!entry) {
723
+ throw new Error(`Tarball rejected: unparseable listing line: ${line}`);
724
+ }
725
+ assertSafeTarPath(entry.path);
726
+ if (entry.linkTarget !== null) {
727
+ assertSafeSymlinkTarget(entry.path, entry.linkTarget);
728
+ }
729
+ totalBytes += entry.size;
730
+ if (totalBytes > maxBytes) {
731
+ throw new Error(`Tarball rejected: uncompressed size exceeds limit of ${maxBytes} bytes`);
732
+ }
733
+ }
734
+
735
+ await run("tar", [
736
+ "-xzf", archivePath,
737
+ "-C", destination,
738
+ "--strip-components", "1",
739
+ "--no-same-owner", "--no-same-permissions"
740
+ ]);
741
+ }
742
+
743
+ // tar -tvzf listing formats differ between bsdtar (macOS) and GNU tar:
744
+ // bsdtar: "-rw-r--r-- 0 user group 1234 Jan 1 2020 path" (8 fields before path)
745
+ // GNU: "-rw-r--r-- user/group 1234 2020-01-01 12:00 path" (5 fields before path)
746
+ // Detect format by whether field 2 contains "/".
747
+ function parseTarListingLine(line) {
748
+ const parts = line.split(/\s+/).filter((p) => p.length > 0);
749
+ const mode = parts[0];
750
+ if (!mode || mode.length === 0) return null;
751
+ const typeChar = mode[0];
752
+
753
+ let sizeFieldIndex;
754
+ let prefixFieldCount;
755
+ if (parts.length >= 2 && parts[1].includes("/")) {
756
+ sizeFieldIndex = 2;
757
+ prefixFieldCount = 5;
758
+ } else {
759
+ sizeFieldIndex = 4;
760
+ prefixFieldCount = 8;
761
+ }
762
+
763
+ if (parts.length < prefixFieldCount + 1) return null;
764
+ const size = Number.parseInt(parts[sizeFieldIndex], 10);
765
+ if (!Number.isFinite(size) || size < 0) return null;
766
+
767
+ // Find byte offset of the (prefixFieldCount+1)-th whitespace field.
768
+ let fieldsSeen = 0;
769
+ let i = 0;
770
+ while (i < line.length && fieldsSeen < prefixFieldCount) {
771
+ while (i < line.length && /\s/.test(line[i])) i++;
772
+ if (i >= line.length) return null;
773
+ while (i < line.length && !/\s/.test(line[i])) i++;
774
+ fieldsSeen++;
775
+ }
776
+ while (i < line.length && /\s/.test(line[i])) i++;
777
+ if (i >= line.length) return null;
778
+
779
+ const remainder = line.slice(i);
780
+ let entryPath = remainder;
781
+ let linkTarget = null;
782
+ const arrowIdx = remainder.indexOf(" -> ");
783
+ if (arrowIdx !== -1) {
784
+ entryPath = remainder.slice(0, arrowIdx);
785
+ linkTarget = remainder.slice(arrowIdx + 4);
786
+ } else if (typeChar === "l") {
787
+ return null;
788
+ }
789
+ if (entryPath.length === 0) return null;
790
+ return { path: entryPath, size, linkTarget, typeChar };
791
+ }
792
+
793
+ function assertSafeTarPath(entryPath) {
794
+ if (entryPath.startsWith("/")) {
795
+ throw new Error(`Tarball rejected: absolute path entry: ${entryPath}`);
796
+ }
797
+ if (/^[A-Za-z]:[\\/]/.test(entryPath)) {
798
+ throw new Error(`Tarball rejected: drive-letter path entry: ${entryPath}`);
799
+ }
800
+ for (const segment of entryPath.split(/[\\/]+/)) {
801
+ if (segment === "..") {
802
+ throw new Error(`Tarball rejected: parent-traversal segment in: ${entryPath}`);
803
+ }
804
+ }
805
+ }
806
+
807
+ function assertSafeSymlinkTarget(entryPath, linkTarget) {
808
+ if (linkTarget.length === 0) {
809
+ throw new Error(`Tarball rejected: empty link target for: ${entryPath}`);
810
+ }
811
+ if (linkTarget.startsWith("/")) {
812
+ throw new Error(`Tarball rejected: absolute link target: ${entryPath} -> ${linkTarget}`);
813
+ }
814
+ if (/^[A-Za-z]:[\\/]/.test(linkTarget)) {
815
+ throw new Error(`Tarball rejected: drive-letter link target: ${entryPath} -> ${linkTarget}`);
816
+ }
817
+ const normalizedPath = entryPath.replace(/\\/g, "/");
818
+ const normalizedTarget = linkTarget.replace(/\\/g, "/");
819
+ const linkDir = path.posix.dirname(normalizedPath);
820
+ const joined = linkDir === "." ? normalizedTarget : path.posix.join(linkDir, normalizedTarget);
821
+ const normalized = path.posix.normalize(joined);
822
+ if (normalized.startsWith("../") || normalized === "..") {
823
+ throw new Error(`Tarball rejected: link escapes destination: ${entryPath} -> ${linkTarget}`);
824
+ }
825
+ }
826
+
827
+ function runCapture(command, args) {
828
+ return new Promise((resolve, reject) => {
829
+ const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"] });
830
+ let stdout = "";
831
+ let stderr = "";
832
+ child.stdout.on("data", (chunk) => { stdout += chunk; });
833
+ child.stderr.on("data", (chunk) => { stderr += chunk; });
834
+ child.on("error", reject);
835
+ child.on("close", (code) => {
836
+ if (code === 0) resolve(stdout);
837
+ else reject(new Error(`${command} exited with ${code}: ${stderr.trim()}`));
838
+ });
839
+ });
568
840
  }
569
841
 
570
842
  function run(command, args) {