pkgxray 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/quarantine.js +274 -25
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pkgxray",
3
- "version": "0.8.1",
3
+ "version": "0.9.0",
4
4
  "description": "Zero-dep local CLI and MCP server that scans npm packages for supply-chain risk. OSV vuln pre-check, sandboxed quarantine, tarball-integrity verification, calibrated static heuristics, GitHub provenance cross-check.",
5
5
  "license": "MIT",
6
6
  "author": "Jack Adams-Lovell",
package/src/quarantine.js CHANGED
@@ -13,6 +13,10 @@ const { diffNpmVsGithub } = require("./diff");
13
13
 
14
14
  const DEFAULT_MAX_FILE_BYTES = 256 * 1024;
15
15
  const DEFAULT_MAX_FILES = 600;
16
+ const DEFAULT_TARBALL_MAX_BYTES = 256 * 1024 * 1024;
17
+ const DEFAULT_TARBALL_MAX_ENTRIES = 5000;
18
+ const DEFAULT_DOWNLOAD_MAX_BYTES = 64 * 1024 * 1024;
19
+ const DEFAULT_DOWNLOAD_MAX_REDIRECTS = 5;
16
20
  const SKIP_DIRS = new Set([
17
21
  ".git",
18
22
  "node_modules",
@@ -388,6 +392,8 @@ async function resolveNpmPackage(specifier, options) {
388
392
  version: metadata.version,
389
393
  needsDownload: true,
390
394
  tarballUrl,
395
+ integrity: (metadata.dist && metadata.dist.integrity) || null,
396
+ shasum: (metadata.dist && metadata.dist.shasum) || null,
391
397
  npmMetadata: npmMetadataForEvidence(metadata)
392
398
  };
393
399
  }
@@ -397,10 +403,67 @@ async function downloadResolvedPackage(resolved, stagedPath) {
397
403
  await fsp.mkdir(path.dirname(stagedPath), { recursive: true, mode: 0o700 });
398
404
  await downloadFile(resolved.tarballUrl, archivePath);
399
405
  resolved.sha256 = await hashFile(archivePath);
406
+
407
+ // Verify against the npm registry's published integrity field BEFORE
408
+ // extracting. Delete the partial file on mismatch so we never leave a
409
+ // hostile tarball on disk.
410
+ try {
411
+ await verifyNpmTarballIntegrity(resolved, archivePath);
412
+ } catch (error) {
413
+ await fsp.rm(archivePath, { force: true });
414
+ throw error;
415
+ }
416
+
400
417
  await fsp.mkdir(stagedPath, { recursive: true, mode: 0o700 });
401
418
  await extractTarball(archivePath, stagedPath);
402
419
  }
403
420
 
421
+ async function verifyNpmTarballIntegrity(resolved, archivePath) {
422
+ if (resolved.integrity) {
423
+ const firstEntry = String(resolved.integrity).trim().split(/\s+/)[0];
424
+ const dashIndex = firstEntry.indexOf("-");
425
+ if (dashIndex <= 0) {
426
+ throw new Error(`npm tarball integrity field is malformed: ${resolved.integrity}`);
427
+ }
428
+ const algo = firstEntry.slice(0, dashIndex);
429
+ const expectedBase64 = firstEntry.slice(dashIndex + 1);
430
+ const actualBase64 = await hashFileDigest(archivePath, algo, "base64");
431
+ if (actualBase64 !== expectedBase64) {
432
+ throw new Error(
433
+ `npm tarball integrity mismatch: expected ${firstEntry} got ${algo}-${actualBase64}`
434
+ );
435
+ }
436
+ return;
437
+ }
438
+ if (resolved.shasum) {
439
+ const expectedHex = String(resolved.shasum).trim().toLowerCase();
440
+ const actualHex = (await hashFileDigest(archivePath, "sha1", "hex")).toLowerCase();
441
+ if (actualHex !== expectedHex) {
442
+ throw new Error(
443
+ `npm tarball integrity mismatch: expected sha1-${expectedHex} got sha1-${actualHex}`
444
+ );
445
+ }
446
+ return;
447
+ }
448
+ throw new Error("npm tarball has no published integrity field");
449
+ }
450
+
451
+ function hashFileDigest(filePath, algorithm, encoding) {
452
+ return new Promise((resolve, reject) => {
453
+ let hash;
454
+ try {
455
+ hash = crypto.createHash(algorithm);
456
+ } catch (error) {
457
+ reject(error);
458
+ return;
459
+ }
460
+ fs.createReadStream(filePath)
461
+ .on("data", (chunk) => hash.update(chunk))
462
+ .on("error", reject)
463
+ .on("end", () => resolve(hash.digest(encoding)));
464
+ });
465
+ }
466
+
404
467
  function npmMetadataForEvidence(metadata) {
405
468
  return {
406
469
  name: metadata.name,
@@ -412,18 +475,24 @@ function npmMetadataForEvidence(metadata) {
412
475
  };
413
476
  }
414
477
 
478
+ // fix-5: fetch the single version metadata endpoint instead of the full
479
+ // packument. For popular packages (lodash, react) this is the difference
480
+ // between a 10MB+ download and a few KB.
415
481
  async function fetchNpmMetadata(specifier, registry) {
416
482
  const parsed = parseNpmSpecifier(specifier);
417
483
  const encodedName = encodeURIComponent(parsed.name);
418
- const metadataUrl = `${registry.replace(/\/$/, "")}/${encodedName}`;
419
- const packageMetadata = await fetchJson(metadataUrl);
420
- const version =
421
- parsed.version ||
422
- (packageMetadata["dist-tags"] && packageMetadata["dist-tags"].latest);
423
- if (!version || !packageMetadata.versions || !packageMetadata.versions[version]) {
424
- throw new Error(`Version not found for npm package: ${specifier}`);
484
+ const versionPath = parsed.version
485
+ ? encodeURIComponent(parsed.version)
486
+ : "latest";
487
+ const url = `${registry.replace(/\/$/, "")}/${encodedName}/${versionPath}`;
488
+ try {
489
+ return await fetchJson(url);
490
+ } catch (error) {
491
+ if (error && error.statusCode === 404) {
492
+ throw new Error(`Version not found for npm package: ${specifier}`);
493
+ }
494
+ throw error;
425
495
  }
426
- return packageMetadata.versions[version];
427
496
  }
428
497
 
429
498
  function parseNpmSpecifier(specifier) {
@@ -451,9 +520,11 @@ function parseNpmSpecifier(specifier) {
451
520
  function fetchJson(url) {
452
521
  return new Promise((resolve, reject) => {
453
522
  https
454
- .get(url, { headers: { "user-agent": "supply-chain-auditor/0.1.0" } }, (response) => {
523
+ .get(url, { headers: { "user-agent": "pkgxray/0.9.0" } }, (response) => {
455
524
  if (response.statusCode < 200 || response.statusCode >= 300) {
456
- reject(new Error(`HTTP ${response.statusCode} from ${url}`));
525
+ const error = new Error(`HTTP ${response.statusCode} from ${url}`);
526
+ error.statusCode = response.statusCode;
527
+ reject(error);
457
528
  response.resume();
458
529
  return;
459
530
  }
@@ -556,22 +627,70 @@ function postJson(url, payload) {
556
627
  });
557
628
  }
558
629
 
559
- function downloadFile(url, destination) {
630
+ function downloadFile(url, destination, options = {}) {
631
+ const maxBytes = options.maxBytes || DEFAULT_DOWNLOAD_MAX_BYTES;
632
+ const maxRedirects = options.maxRedirects || DEFAULT_DOWNLOAD_MAX_REDIRECTS;
633
+ const originalUrl = url;
634
+ const http = require("node:http");
635
+
560
636
  return new Promise((resolve, reject) => {
561
637
  const file = fs.createWriteStream(destination, { mode: 0o600 });
562
- https
563
- .get(url, { headers: { "user-agent": "supply-chain-auditor/0.1.0" } }, (response) => {
564
- if (response.statusCode < 200 || response.statusCode >= 300) {
565
- reject(new Error(`HTTP ${response.statusCode} from ${url}`));
566
- response.resume();
567
- return;
638
+ let written = 0;
639
+ let settled = false;
640
+ const fail = (err) => {
641
+ if (settled) return;
642
+ settled = true;
643
+ file.destroy();
644
+ fs.unlink(destination, () => reject(err));
645
+ };
646
+ const succeed = () => {
647
+ if (settled) return;
648
+ settled = true;
649
+ file.close(() => resolve());
650
+ };
651
+
652
+ const get = (currentUrl, hops) => {
653
+ if (hops > maxRedirects) {
654
+ return fail(new Error(`Too many redirects from ${originalUrl}`));
655
+ }
656
+ const parsed = new URL(currentUrl);
657
+ const client = parsed.protocol === "http:" ? http : https;
658
+ const request = client.get(
659
+ {
660
+ hostname: parsed.hostname,
661
+ port: parsed.port || (parsed.protocol === "http:" ? 80 : 443),
662
+ path: parsed.pathname + parsed.search,
663
+ headers: { "user-agent": "pkgxray/0.9.0" }
664
+ },
665
+ (response) => {
666
+ if (
667
+ [301, 302, 303, 307, 308].includes(response.statusCode) &&
668
+ response.headers.location
669
+ ) {
670
+ response.resume();
671
+ return get(new URL(response.headers.location, currentUrl).toString(), hops + 1);
672
+ }
673
+ if (response.statusCode < 200 || response.statusCode >= 300) {
674
+ response.resume();
675
+ return fail(new Error(`HTTP ${response.statusCode} from ${currentUrl}`));
676
+ }
677
+ response.on("data", (chunk) => {
678
+ written += chunk.length;
679
+ if (written > maxBytes) {
680
+ response.destroy();
681
+ return fail(
682
+ new Error(`Download exceeded max size of ${maxBytes} bytes from ${originalUrl}`)
683
+ );
684
+ }
685
+ });
686
+ response.pipe(file);
687
+ file.on("finish", succeed);
688
+ file.on("error", fail);
568
689
  }
569
- response.pipe(file);
570
- file.on("finish", () => {
571
- file.close(resolve);
572
- });
573
- })
574
- .on("error", reject);
690
+ );
691
+ request.on("error", fail);
692
+ };
693
+ get(url, 0);
575
694
  });
576
695
  }
577
696
 
@@ -586,8 +705,138 @@ async function hashFile(filePath) {
586
705
  return hash.digest("hex");
587
706
  }
588
707
 
589
- function extractTarball(archivePath, destination) {
590
- return run("tar", ["-xzf", archivePath, "-C", destination, "--strip-components", "1"]);
708
+ async function extractTarball(archivePath, destination, options = {}) {
709
+ const maxBytes = options.maxTarballBytes || DEFAULT_TARBALL_MAX_BYTES;
710
+ const maxEntries = options.maxTarballEntries || DEFAULT_TARBALL_MAX_ENTRIES;
711
+
712
+ const listing = await runCapture("tar", ["-tvzf", archivePath]);
713
+ const lines = listing.split("\n").filter((line) => line.trim().length > 0);
714
+
715
+ if (lines.length > maxEntries) {
716
+ throw new Error(`Tarball rejected: ${lines.length} entries exceeds limit of ${maxEntries}`);
717
+ }
718
+
719
+ let totalBytes = 0;
720
+ for (const line of lines) {
721
+ const entry = parseTarListingLine(line);
722
+ if (!entry) {
723
+ throw new Error(`Tarball rejected: unparseable listing line: ${line}`);
724
+ }
725
+ assertSafeTarPath(entry.path);
726
+ if (entry.linkTarget !== null) {
727
+ assertSafeSymlinkTarget(entry.path, entry.linkTarget);
728
+ }
729
+ totalBytes += entry.size;
730
+ if (totalBytes > maxBytes) {
731
+ throw new Error(`Tarball rejected: uncompressed size exceeds limit of ${maxBytes} bytes`);
732
+ }
733
+ }
734
+
735
+ await run("tar", [
736
+ "-xzf", archivePath,
737
+ "-C", destination,
738
+ "--strip-components", "1",
739
+ "--no-same-owner", "--no-same-permissions"
740
+ ]);
741
+ }
742
+
743
+ // tar -tvzf listing formats differ between bsdtar (macOS) and GNU tar:
744
+ // bsdtar: "-rw-r--r-- 0 user group 1234 Jan 1 2020 path" (8 fields before path)
745
+ // GNU: "-rw-r--r-- user/group 1234 2020-01-01 12:00 path" (5 fields before path)
746
+ // Detect format by whether field 2 contains "/".
747
+ function parseTarListingLine(line) {
748
+ const parts = line.split(/\s+/).filter((p) => p.length > 0);
749
+ const mode = parts[0];
750
+ if (!mode || mode.length === 0) return null;
751
+ const typeChar = mode[0];
752
+
753
+ let sizeFieldIndex;
754
+ let prefixFieldCount;
755
+ if (parts.length >= 2 && parts[1].includes("/")) {
756
+ sizeFieldIndex = 2;
757
+ prefixFieldCount = 5;
758
+ } else {
759
+ sizeFieldIndex = 4;
760
+ prefixFieldCount = 8;
761
+ }
762
+
763
+ if (parts.length < prefixFieldCount + 1) return null;
764
+ const size = Number.parseInt(parts[sizeFieldIndex], 10);
765
+ if (!Number.isFinite(size) || size < 0) return null;
766
+
767
+ // Find byte offset of the (prefixFieldCount+1)-th whitespace field.
768
+ let fieldsSeen = 0;
769
+ let i = 0;
770
+ while (i < line.length && fieldsSeen < prefixFieldCount) {
771
+ while (i < line.length && /\s/.test(line[i])) i++;
772
+ if (i >= line.length) return null;
773
+ while (i < line.length && !/\s/.test(line[i])) i++;
774
+ fieldsSeen++;
775
+ }
776
+ while (i < line.length && /\s/.test(line[i])) i++;
777
+ if (i >= line.length) return null;
778
+
779
+ const remainder = line.slice(i);
780
+ let entryPath = remainder;
781
+ let linkTarget = null;
782
+ const arrowIdx = remainder.indexOf(" -> ");
783
+ if (arrowIdx !== -1) {
784
+ entryPath = remainder.slice(0, arrowIdx);
785
+ linkTarget = remainder.slice(arrowIdx + 4);
786
+ } else if (typeChar === "l") {
787
+ return null;
788
+ }
789
+ if (entryPath.length === 0) return null;
790
+ return { path: entryPath, size, linkTarget, typeChar };
791
+ }
792
+
793
+ function assertSafeTarPath(entryPath) {
794
+ if (entryPath.startsWith("/")) {
795
+ throw new Error(`Tarball rejected: absolute path entry: ${entryPath}`);
796
+ }
797
+ if (/^[A-Za-z]:[\\/]/.test(entryPath)) {
798
+ throw new Error(`Tarball rejected: drive-letter path entry: ${entryPath}`);
799
+ }
800
+ for (const segment of entryPath.split(/[\\/]+/)) {
801
+ if (segment === "..") {
802
+ throw new Error(`Tarball rejected: parent-traversal segment in: ${entryPath}`);
803
+ }
804
+ }
805
+ }
806
+
807
+ function assertSafeSymlinkTarget(entryPath, linkTarget) {
808
+ if (linkTarget.length === 0) {
809
+ throw new Error(`Tarball rejected: empty link target for: ${entryPath}`);
810
+ }
811
+ if (linkTarget.startsWith("/")) {
812
+ throw new Error(`Tarball rejected: absolute link target: ${entryPath} -> ${linkTarget}`);
813
+ }
814
+ if (/^[A-Za-z]:[\\/]/.test(linkTarget)) {
815
+ throw new Error(`Tarball rejected: drive-letter link target: ${entryPath} -> ${linkTarget}`);
816
+ }
817
+ const normalizedPath = entryPath.replace(/\\/g, "/");
818
+ const normalizedTarget = linkTarget.replace(/\\/g, "/");
819
+ const linkDir = path.posix.dirname(normalizedPath);
820
+ const joined = linkDir === "." ? normalizedTarget : path.posix.join(linkDir, normalizedTarget);
821
+ const normalized = path.posix.normalize(joined);
822
+ if (normalized.startsWith("../") || normalized === "..") {
823
+ throw new Error(`Tarball rejected: link escapes destination: ${entryPath} -> ${linkTarget}`);
824
+ }
825
+ }
826
+
827
+ function runCapture(command, args) {
828
+ return new Promise((resolve, reject) => {
829
+ const child = spawn(command, args, { stdio: ["ignore", "pipe", "pipe"] });
830
+ let stdout = "";
831
+ let stderr = "";
832
+ child.stdout.on("data", (chunk) => { stdout += chunk; });
833
+ child.stderr.on("data", (chunk) => { stderr += chunk; });
834
+ child.on("error", reject);
835
+ child.on("close", (code) => {
836
+ if (code === 0) resolve(stdout);
837
+ else reject(new Error(`${command} exited with ${code}: ${stderr.trim()}`));
838
+ });
839
+ });
591
840
  }
592
841
 
593
842
  function run(command, args) {