npm - dependencyiq - Versions diffs - 2.0.0 → 2.1.0 - Mend

dependencyiq 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +374 -0
package/package.json +1 -1
package/src/agent.js +1 -1
package/src/blastRadius.js +37 -1
package/src/fleetAggregator.js +155 -155
package/src/fleetDashboardGenerator.js +199 -199
package/src/fleetSnapshot.js +103 -103
package/src/httpRetry.js +48 -48
package/src/orbitClient.js +40 -0
package/src/scanners/supplyChainTrustSignals.js +472 -472

package/src/scanners/supplyChainTrustSignals.js CHANGED Viewed

@@ -1,472 +1,472 @@
-/**
- * Supply-Chain Trust Signals.
- *
- * The risk score in riskCalculator.js answers "how bad is this CVE, given
- * real exposure." This module answers a different question that CVSS
- * can't: "does this dependency change *look like* the early shape of a
- * maintainer-compromise attack, independent of whether a CVE has been
- * filed yet?" Log4Shell-style CVEs get filed eventually; account-takeover
- * attacks (axios Mar 2026, ua-parser-js Oct 2021, event-stream Nov 2018)
- * are visible in the registry data *before* anyone files a CVE, if you
- * know what to look at:
- *
- *   1. Lifecycle script risk    — did this version add/change a
- *      preinstall/install/postinstall script? That's the actual payload
- *      delivery mechanism in the axios and event-stream attacks.
- *   2. Publish cadence anomaly  — was this version published unusually
- *      fast after the previous one, relative to this package's own
- *      historical release cadence? ua-parser-js shipped 3 malicious
- *      versions in ~4 hours; axios shipped 2 in one sitting after a
- *      credential compromise. A z-score against the package's own
- *      history catches "uncharacteristically fast," not "fast."
- *   3. Dependency confusion     — does this name resolve from a private
- *      registry while an identically-named package also exists
- *      publicly? That's the exact pattern from the 2021 Birsan research
- *      that paid out across Apple/PayPal/Shopify/Netflix/Uber.
- *   4. Typosquatting            — is this name one or two edits away
- *      from a genuinely popular package?
- *   5. Single-maintainer        — a real, available bus-factor proxy
- *      (current maintainer count), not a guess about abandonment.
- *
- * Design rule carried over from riskCalculator.js: never blend this into
- * the CVSS-based risk score. A package can be perfectly patched (CVSS
- * score of 0 findings) and still carry a high trust-risk score — these
- * are reported side by side, never merged into one number, so a reader
- * can never mistake "no known CVE" for "trustworthy."
- *
- * Every signal that can't be computed (network failure, unsupported
- * ecosystem, insufficient history) reports `available: false` and
- * contributes exactly 0 — consistent with this project's "never
- * fabricate" rule. A signal that is honestly unavailable is not the same
- * as a signal that found nothing wrong.
- */
-const axios = require('axios');
-const { withRetry } = require('../httpRetry');
-const NPM_REGISTRY = 'https://registry.npmjs.org';
-const PYPI_REGISTRY = 'https://pypi.org/pypi';
-const LIFECYCLE_SCRIPT_KEYS = ['preinstall', 'install', 'postinstall'];
-// A small, real list of genuinely high-download packages per ecosystem,
-// used only as typosquat *targets* — not exhaustive, just enough to catch
-// the "one edit away from something everyone installs" pattern.
-const POPULAR_PACKAGES = {
-  npm: [
-    'lodash', 'react', 'react-dom', 'express', 'axios', 'chalk', 'commander',
-    'debug', 'async', 'underscore', 'moment', 'webpack', 'eslint', 'jest',
-    'typescript', 'request', 'vue', 'angular', 'jquery', 'babel', 'redux',
-    'next', 'mongoose', 'socket.io', 'dotenv', 'cors', 'uuid', 'yargs',
-  ],
-  PyPI: [
-    'requests', 'numpy', 'flask', 'django', 'pandas', 'boto3', 'urllib3',
-    'pyyaml', 'setuptools', 'click', 'pillow', 'pytest', 'scipy', 'sqlalchemy',
-    'jinja2', 'cryptography', 'certifi', 'six', 'idna', 'attrs',
-  ],
-};
-// ---------------------------------------------------------------------------
-// 1. Typosquatting — pure, no network.
-// ---------------------------------------------------------------------------
-/** Standard Levenshtein edit distance (insert/delete/substitute = cost 1). */
-function levenshteinDistance(a, b) {
-  const m = a.length;
-  const n = b.length;
-  if (m === 0) return n;
-  if (n === 0) return m;
-  let prev = Array.from({ length: n + 1 }, (_, j) => j);
-  for (let i = 1; i <= m; i += 1) {
-    const curr = [i];
-    for (let j = 1; j <= n; j += 1) {
-      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
-      curr[j] = Math.min(
-        prev[j] + 1, // deletion
-        curr[j - 1] + 1, // insertion
-        prev[j - 1] + cost // substitution
-      );
-    }
-    prev = curr;
-  }
-  return prev[n];
-}
-/**
- * Is `packageName` suspiciously close to a genuinely popular package?
- * @returns {Object} { available: true, suspected, closestMatch, distance, similarity }
- */
-function detectTyposquat(packageName, ecosystem) {
-  const candidates = POPULAR_PACKAGES[ecosystem] || [];
-  if (candidates.length === 0) {
-    return { available: false, reason: `No popular-package list for ecosystem "${ecosystem}"` };
-  }
-  if (candidates.includes(packageName)) {
-    return { available: true, suspected: false, closestMatch: packageName, distance: 0, similarity: 1 };
-  }
-  let best = null;
-  for (const candidate of candidates) {
-    // Skip comparisons that can't plausibly be a typo (very different length).
-    const plausibleLength = Math.abs(candidate.length - packageName.length) <= 3;
-    if (plausibleLength) {
-      const distance = levenshteinDistance(packageName, candidate);
-      if (!best || distance < best.distance) best = { candidate, distance };
-    }
-  }
-  if (!best) return { available: true, suspected: false, closestMatch: null, distance: null, similarity: null };
-  const maxLen = Math.max(packageName.length, best.candidate.length);
-  const similarity = 1 - best.distance / maxLen;
-  // Distance 1-2 on a real package name (length >= 4, to avoid noisy
-  // false positives on very short names) is the typosquat zone; distance 0
-  // is an exact match (handled above), and >2 is just "a different word."
-  const suspected = packageName.length >= 4 && best.distance > 0 && best.distance <= 2 && similarity >= 0.6;
-  return { available: true, suspected, closestMatch: best.candidate, distance: best.distance, similarity };
-}
-// ---------------------------------------------------------------------------
-// 2. Dependency confusion — pure core + thin network wrapper.
-// ---------------------------------------------------------------------------
-const PUBLIC_REGISTRY_HOST_PATTERN = /registry\.npmjs\.org|pypi\.org|files\.pythonhosted\.org/i;
-/**
- * Pure decision: given where this install actually resolved from, and
- * whether a same-named package exists on the *public* registry, is this
- * dependency-confusion-shaped?
- */
-function assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry }) {
-  if (!resolvedUrl) {
-    return { available: false, reason: 'No lockfile resolved-URL provided — cannot determine install source' };
-  }
-  const resolvedFromPublic = PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl);
-  if (resolvedFromPublic) {
-    return { available: true, risk: false, resolvedFromPublic: true, reason: 'Resolved from the public registry — no confusion risk' };
-  }
-  if (existsOnPublicRegistry) {
-    return {
-      available: true,
-      risk: true,
-      resolvedFromPublic: false,
-      reason: 'Resolves from a private/internal registry, but a same-named package also exists publicly — a misconfigured environment (missing scope/registry override) could silently install the public package instead.',
-    };
-  }
-  return {
-    available: true,
-    risk: false,
-    resolvedFromPublic: false,
-    reason: 'Resolves from a private registry; no same-named package on the public registry yet — lower squat risk today, but the name is unclaimed and could be registered later.',
-  };
-}
-async function publicRegistryHasPackage(packageName, ecosystem) {
-  const url = ecosystem === 'PyPI'
-    ? `${PYPI_REGISTRY}/${encodeURIComponent(packageName)}/json`
-    : `${NPM_REGISTRY}/${encodeURIComponent(packageName)}`;
-  try {
-    // A 404 here is a real, final answer ("this name doesn't exist
-    // publicly") not a transient failure — withRetry already knows not
-    // to retry 4xx, so it surfaces immediately for the catch below.
-    await withRetry(() => axios.get(url, { timeout: 8000 }));
-    return true;
-  } catch (error) {
-    if (error.response?.status === 404) return false;
-    throw error;
-  }
-}
-async function checkDependencyConfusion(packageName, ecosystem, resolvedUrl) {
-  if (!resolvedUrl) return assessDependencyConfusion({ resolvedUrl: null });
-  if (PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl)) {
-    return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry: null });
-  }
-  try {
-    const existsOnPublicRegistry = await publicRegistryHasPackage(packageName, ecosystem);
-    return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry });
-  } catch (error) {
-    return { available: false, reason: `Public registry lookup failed: ${error.message}` };
-  }
-}
-// ---------------------------------------------------------------------------
-// 3. Publish cadence anomaly — pure z-score core + thin network wrapper.
-// ---------------------------------------------------------------------------
-function mean(values) {
-  return values.reduce((sum, v) => sum + v, 0) / values.length;
-}
-function stdDev(values) {
-  const m = mean(values);
-  const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
-  return Math.sqrt(variance);
-}
-/**
- * Was `lastIntervalHours` (the gap between the previous release and the
- * one being installed now) anomalously short compared to this package's
- * own historical release cadence?
- *
- *   z = (mean(historicalIntervals) - lastInterval) / max(stdDev(historicalIntervals), 1)
- *
- * A positive z means "faster than usual"; z is in units of the package's
- * own historical standard deviation, so a package that normally ships
- * erratically (high stdDev) needs a much shorter interval to look
- * anomalous than one that ships on a metronomic schedule — the z-score
- * adapts to each package's own baseline instead of using one global
- * threshold. The `stdDev` floor of 1 hour avoids a divide-by-near-zero
- * blowup for packages with an almost perfectly regular cadence.
- *
- * Flagging requires BOTH a statistical anomaly (z > 2, i.e. more than two
- * of the package's own standard deviations faster than its norm) AND an
- * absolute floor (< 24h) — the z-score alone would also flag a perfectly
- * normal package whose typical cadence is itself sub-daily.
- *
- * @param {number[]} historicalIntervalsHours - gaps between releases
- *   *before* the release being evaluated, oldest pattern first
- * @param {number} lastIntervalHours - gap immediately before the release
- *   being evaluated
- * @returns {Object} { available, zScore, meanIntervalHours, stdDevHours,
- *   lastIntervalHours, anomalous, historicalReleaseCount }
- */
-function computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours) {
-  const MIN_HISTORY = 3;
-  if (!Array.isArray(historicalIntervalsHours) || historicalIntervalsHours.length < MIN_HISTORY) {
-    return { available: false, reason: `Need at least ${MIN_HISTORY} prior releases to establish a cadence baseline, found ${historicalIntervalsHours?.length || 0}` };
-  }
-  if (typeof lastIntervalHours !== 'number' || Number.isNaN(lastIntervalHours)) {
-    return { available: false, reason: 'No measurable interval for the release being evaluated' };
-  }
-  const meanIntervalHours = mean(historicalIntervalsHours);
-  const stdDevHours = stdDev(historicalIntervalsHours);
-  const zScore = (meanIntervalHours - lastIntervalHours) / Math.max(stdDevHours, 1);
-  const anomalous = zScore > 2 && lastIntervalHours < 24;
-  return {
-    available: true,
-    zScore: Math.round(zScore * 100) / 100,
-    meanIntervalHours: Math.round(meanIntervalHours * 100) / 100,
-    stdDevHours: Math.round(stdDevHours * 100) / 100,
-    lastIntervalHours: Math.round(lastIntervalHours * 100) / 100,
-    anomalous,
-    historicalReleaseCount: historicalIntervalsHours.length,
-  };
-}
-function realVersionTimeline(versionTimes, allVersions) {
-  return (allVersions || [])
-    .filter(v => !/[-+]/.test(v)) // skip pre-releases/build metadata
-    .map(v => ({ version: v, time: versionTimes[v] ? new Date(versionTimes[v]).getTime() : null }))
-    .filter(v => v.time !== null && !Number.isNaN(v.time))
-    .sort((a, b) => a.time - b.time);
-}
-async function fetchNpmPackument(packageName) {
-  const { data } = await withRetry(() => axios.get(`${NPM_REGISTRY}/${encodeURIComponent(packageName)}`, { timeout: 8000 }));
-  return data;
-}
-async function analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) {
-  if (ecosystem !== 'npm') {
-    return { available: false, reason: `Publish-cadence analysis only implemented for npm, not ${ecosystem}` };
-  }
-  let data;
-  try {
-    data = await fetchNpmPackument(packageName);
-  } catch (error) {
-    return { available: false, reason: `Registry lookup failed: ${error.message}` };
-  }
-  const versionTimes = data.time || {};
-  const timeline = realVersionTimeline(versionTimes, Object.keys(data.versions || {}));
-  const index = timeline.findIndex(v => v.version === targetVersion);
-  if (index <= 0) {
-    return { available: false, reason: index === -1 ? 'Target version not found in registry history' : 'Target version is the first release — no prior interval to compare' };
-  }
-  const historicalIntervalsHours = [];
-  for (let i = 1; i < index; i += 1) {
-    historicalIntervalsHours.push((timeline[i].time - timeline[i - 1].time) / 3600000);
-  }
-  const lastIntervalHours = (timeline[index].time - timeline[index - 1].time) / 3600000;
-  return computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours);
-}
-// ---------------------------------------------------------------------------
-// 4. Maintainer count — pure core + thin network wrapper.
-// ---------------------------------------------------------------------------
-function assessMaintainerSignal(maintainerCount) {
-  if (typeof maintainerCount !== 'number' || maintainerCount < 0) {
-    return { available: false, reason: 'No maintainer count available' };
-  }
-  const singleMaintainer = maintainerCount <= 1;
-  return {
-    available: true,
-    maintainerCount,
-    singleMaintainer,
-    reason: singleMaintainer
-      ? 'Single maintainer on record — no second account needs to be compromised, and no second reviewer would catch a malicious publish.'
-      : `${maintainerCount} maintainers on record.`,
-  };
-}
-async function analyzeMaintainerSignal(packageName, ecosystem) {
-  if (ecosystem !== 'npm') {
-    return { available: false, reason: `Maintainer-list lookup only implemented for npm, not ${ecosystem}` };
-  }
-  try {
-    const data = await fetchNpmPackument(packageName);
-    return assessMaintainerSignal((data.maintainers || []).length);
-  } catch (error) {
-    return { available: false, reason: `Registry lookup failed: ${error.message}` };
-  }
-}
-// ---------------------------------------------------------------------------
-// 5. Lifecycle script risk — pure core + thin network wrapper.
-// ---------------------------------------------------------------------------
-function assessLifecycleScriptRisk(targetScripts = {}, previousScripts = {}) {
-  const flaggedScripts = LIFECYCLE_SCRIPT_KEYS.filter(
-    key => targetScripts[key] && targetScripts[key] !== previousScripts[key]
-  );
-  return {
-    available: true,
-    flaggedScripts,
-    riskyScriptsPresent: flaggedScripts.length > 0,
-    reason: flaggedScripts.length > 0
-      ? `Install-time script(s) added or changed in this version: ${flaggedScripts.join(', ')} — this is the actual code-execution mechanism behind the axios and event-stream compromises, independent of any filed CVE.`
-      : 'No new or changed install-time (preinstall/install/postinstall) scripts.',
-  };
-}
-async function analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) {
-  if (ecosystem !== 'npm') {
-    return { available: false, reason: `Lifecycle-script analysis only implemented for npm, not ${ecosystem}` };
-  }
-  let data;
-  try {
-    data = await fetchNpmPackument(packageName);
-  } catch (error) {
-    return { available: false, reason: `Registry lookup failed: ${error.message}` };
-  }
-  const versions = data.versions || {};
-  if (!versions[targetVersion]) {
-    return { available: false, reason: `Version ${targetVersion} not found in registry packument` };
-  }
-  const targetScripts = versions[targetVersion].scripts || {};
-  const previousScripts = previousVersion ? (versions[previousVersion]?.scripts || {}) : {};
-  return assessLifecycleScriptRisk(targetScripts, previousScripts);
-}
-// ---------------------------------------------------------------------------
-// Combinator — the "decision trail" for trust, mirroring riskCalculator.js
-// ---------------------------------------------------------------------------
-// Weights sum to 100. Lifecycle script risk is weighted highest because it
-// is the actual payload-delivery mechanism, not a proxy for one; publish
-// cadence is the strongest *behavioural* proxy (it's what would have
-// flagged axios/ua-parser-js/event-stream before any CVE existed).
-const TRUST_WEIGHTS = {
-  lifecycleScript: 35,
-  publishAnomaly: 25,
-  dependencyConfusion: 20,
-  typosquat: 15,
-  singleMaintainer: 5,
-};
-/**
- * Combine the five signals into one labeled trust assessment. Pure: takes
- * already-computed signal results (each shaped like the `assess*`
- * functions above), never fetches anything itself.
- *
- * Unavailable signals contribute 0, not a guessed value — the `reasons`
- * list says so explicitly, so a 0 contribution from "no data" is never
- * visually confused with a 0 contribution from "checked, found nothing."
- *
- * @returns {Object} { score, classification, contributions, weights, reasons }
- */
-function computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer }) {
-  const reasons = [];
-  const lifecycleFlag = lifecycleScript?.available && lifecycleScript.riskyScriptsPresent;
-  const contributions = {
-    lifecycleScript: lifecycleFlag ? TRUST_WEIGHTS.lifecycleScript : 0,
-    publishAnomaly: publishAnomaly?.available
-      ? TRUST_WEIGHTS.publishAnomaly * Math.min(Math.max(publishAnomaly.zScore, 0) / 4, 1) * (publishAnomaly.anomalous ? 1 : 0.25)
-      : 0,
-    dependencyConfusion: dependencyConfusion?.available && dependencyConfusion.risk ? TRUST_WEIGHTS.dependencyConfusion : 0,
-    typosquat: typosquat?.available && typosquat.suspected ? TRUST_WEIGHTS.typosquat : 0,
-    singleMaintainer: maintainer?.available && maintainer.singleMaintainer ? TRUST_WEIGHTS.singleMaintainer : 0,
-  };
-  if (lifecycleScript?.available) reasons.push(lifecycleScript.reason);
-  else reasons.push(`Lifecycle script check unavailable: ${lifecycleScript?.reason || 'no data'}`);
-  if (publishAnomaly?.available) {
-    reasons.push(publishAnomaly.anomalous
-      ? `Published ${publishAnomaly.lastIntervalHours}h after the previous release — ${publishAnomaly.zScore}σ faster than this package's own historical cadence (mean ${publishAnomaly.meanIntervalHours}h, n=${publishAnomaly.historicalReleaseCount}).`
-      : `Publish cadence is within this package's historical norm (z=${publishAnomaly.zScore}).`);
-  } else {
-    reasons.push(`Publish-cadence check unavailable: ${publishAnomaly?.reason || 'no data'}`);
-  }
-  if (dependencyConfusion?.available) reasons.push(dependencyConfusion.reason);
-  else reasons.push(`Dependency-confusion check unavailable: ${dependencyConfusion?.reason || 'no data'}`);
-  if (typosquat?.available) {
-    reasons.push(typosquat.suspected
-      ? `Name is ${typosquat.distance} edit(s) from popular package "${typosquat.closestMatch}" (similarity ${Math.round(typosquat.similarity * 100)}%).`
-      : 'No typosquat match against known popular packages.');
-  } else {
-    reasons.push(`Typosquat check unavailable: ${typosquat?.reason || 'no data'}`);
-  }
-  if (maintainer?.available) reasons.push(maintainer.reason);
-  else reasons.push(`Maintainer-count check unavailable: ${maintainer?.reason || 'no data'}`);
-  const rawScore = Object.values(contributions).reduce((sum, v) => sum + v, 0);
-  const score = Math.round(Math.min(Math.max(rawScore, 0), 100));
-  const classification = score >= 60 ? 'CRITICAL' : score >= 30 ? 'ELEVATED' : 'NORMAL';
-  return { score, classification, contributions, weights: TRUST_WEIGHTS, reasons };
-}
-/**
- * Run all five signals for one dependency and combine them.
- * @param {Object} opts - { packageName, ecosystem, targetVersion,
- *   previousVersion, resolvedUrl }
- */
-async function assessSupplyChainTrust({ packageName, ecosystem, targetVersion, previousVersion, resolvedUrl }) {
-  const [lifecycleScript, publishAnomaly, dependencyConfusion, maintainer] = await Promise.all([
-    targetVersion ? analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) : { available: false, reason: 'No target version supplied' },
-    targetVersion ? analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) : { available: false, reason: 'No target version supplied' },
-    checkDependencyConfusion(packageName, ecosystem, resolvedUrl),
-    analyzeMaintainerSignal(packageName, ecosystem),
-  ]);
-  const typosquat = detectTyposquat(packageName, ecosystem);
-  const result = computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer });
-  return { packageName, ecosystem, ...result, signals: { lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer } };
-}
-module.exports = {
-  levenshteinDistance,
-  detectTyposquat,
-  assessDependencyConfusion,
-  checkDependencyConfusion,
-  computeCadenceAnomaly,
-  analyzePublishCadenceAnomaly,
-  assessMaintainerSignal,
-  analyzeMaintainerSignal,
-  assessLifecycleScriptRisk,
-  analyzeLifecycleScriptRisk,
-  computeTrustScore,
-  assessSupplyChainTrust,
-  TRUST_WEIGHTS,
-};
+/**
+ * Supply-Chain Trust Signals.
+ *
+ * The risk score in riskCalculator.js answers "how bad is this CVE, given
+ * real exposure." This module answers a different question that CVSS
+ * can't: "does this dependency change *look like* the early shape of a
+ * maintainer-compromise attack, independent of whether a CVE has been
+ * filed yet?" Log4Shell-style CVEs get filed eventually; account-takeover
+ * attacks (axios Mar 2026, ua-parser-js Oct 2021, event-stream Nov 2018)
+ * are visible in the registry data *before* anyone files a CVE, if you
+ * know what to look at:
+ *
+ *   1. Lifecycle script risk    — did this version add/change a
+ *      preinstall/install/postinstall script? That's the actual payload
+ *      delivery mechanism in the axios and event-stream attacks.
+ *   2. Publish cadence anomaly  — was this version published unusually
+ *      fast after the previous one, relative to this package's own
+ *      historical release cadence? ua-parser-js shipped 3 malicious
+ *      versions in ~4 hours; axios shipped 2 in one sitting after a
+ *      credential compromise. A z-score against the package's own
+ *      history catches "uncharacteristically fast," not "fast."
+ *   3. Dependency confusion     — does this name resolve from a private
+ *      registry while an identically-named package also exists
+ *      publicly? That's the exact pattern from the 2021 Birsan research
+ *      that paid out across Apple/PayPal/Shopify/Netflix/Uber.
+ *   4. Typosquatting            — is this name one or two edits away
+ *      from a genuinely popular package?
+ *   5. Single-maintainer        — a real, available bus-factor proxy
+ *      (current maintainer count), not a guess about abandonment.
+ *
+ * Design rule carried over from riskCalculator.js: never blend this into
+ * the CVSS-based risk score. A package can be perfectly patched (CVSS
+ * score of 0 findings) and still carry a high trust-risk score — these
+ * are reported side by side, never merged into one number, so a reader
+ * can never mistake "no known CVE" for "trustworthy."
+ *
+ * Every signal that can't be computed (network failure, unsupported
+ * ecosystem, insufficient history) reports `available: false` and
+ * contributes exactly 0 — consistent with this project's "never
+ * fabricate" rule. A signal that is honestly unavailable is not the same
+ * as a signal that found nothing wrong.
+ */
+const axios = require('axios');
+const { withRetry } = require('../httpRetry');
+const NPM_REGISTRY = 'https://registry.npmjs.org';
+const PYPI_REGISTRY = 'https://pypi.org/pypi';
+const LIFECYCLE_SCRIPT_KEYS = ['preinstall', 'install', 'postinstall'];
+// A small, real list of genuinely high-download packages per ecosystem,
+// used only as typosquat *targets* — not exhaustive, just enough to catch
+// the "one edit away from something everyone installs" pattern.
+const POPULAR_PACKAGES = {
+  npm: [
+    'lodash', 'react', 'react-dom', 'express', 'axios', 'chalk', 'commander',
+    'debug', 'async', 'underscore', 'moment', 'webpack', 'eslint', 'jest',
+    'typescript', 'request', 'vue', 'angular', 'jquery', 'babel', 'redux',
+    'next', 'mongoose', 'socket.io', 'dotenv', 'cors', 'uuid', 'yargs',
+  ],
+  PyPI: [
+    'requests', 'numpy', 'flask', 'django', 'pandas', 'boto3', 'urllib3',
+    'pyyaml', 'setuptools', 'click', 'pillow', 'pytest', 'scipy', 'sqlalchemy',
+    'jinja2', 'cryptography', 'certifi', 'six', 'idna', 'attrs',
+  ],
+};
+// ---------------------------------------------------------------------------
+// 1. Typosquatting — pure, no network.
+// ---------------------------------------------------------------------------
+/** Standard Levenshtein edit distance (insert/delete/substitute = cost 1). */
+function levenshteinDistance(a, b) {
+  const m = a.length;
+  const n = b.length;
+  if (m === 0) return n;
+  if (n === 0) return m;
+  let prev = Array.from({ length: n + 1 }, (_, j) => j);
+  for (let i = 1; i <= m; i += 1) {
+    const curr = [i];
+    for (let j = 1; j <= n; j += 1) {
+      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
+      curr[j] = Math.min(
+        prev[j] + 1, // deletion
+        curr[j - 1] + 1, // insertion
+        prev[j - 1] + cost // substitution
+      );
+    }
+    prev = curr;
+  }
+  return prev[n];
+}
+/**
+ * Is `packageName` suspiciously close to a genuinely popular package?
+ * @returns {Object} { available: true, suspected, closestMatch, distance, similarity }
+ */
+function detectTyposquat(packageName, ecosystem) {
+  const candidates = POPULAR_PACKAGES[ecosystem] || [];
+  if (candidates.length === 0) {
+    return { available: false, reason: `No popular-package list for ecosystem "${ecosystem}"` };
+  }
+  if (candidates.includes(packageName)) {
+    return { available: true, suspected: false, closestMatch: packageName, distance: 0, similarity: 1 };
+  }
+  let best = null;
+  for (const candidate of candidates) {
+    // Skip comparisons that can't plausibly be a typo (very different length).
+    const plausibleLength = Math.abs(candidate.length - packageName.length) <= 3;
+    if (plausibleLength) {
+      const distance = levenshteinDistance(packageName, candidate);
+      if (!best || distance < best.distance) best = { candidate, distance };
+    }
+  }
+  if (!best) return { available: true, suspected: false, closestMatch: null, distance: null, similarity: null };
+  const maxLen = Math.max(packageName.length, best.candidate.length);
+  const similarity = 1 - best.distance / maxLen;
+  // Distance 1-2 on a real package name (length >= 4, to avoid noisy
+  // false positives on very short names) is the typosquat zone; distance 0
+  // is an exact match (handled above), and >2 is just "a different word."
+  const suspected = packageName.length >= 4 && best.distance > 0 && best.distance <= 2 && similarity >= 0.6;
+  return { available: true, suspected, closestMatch: best.candidate, distance: best.distance, similarity };
+}
+// ---------------------------------------------------------------------------
+// 2. Dependency confusion — pure core + thin network wrapper.
+// ---------------------------------------------------------------------------
+const PUBLIC_REGISTRY_HOST_PATTERN = /registry\.npmjs\.org|pypi\.org|files\.pythonhosted\.org/i;
+/**
+ * Pure decision: given where this install actually resolved from, and
+ * whether a same-named package exists on the *public* registry, is this
+ * dependency-confusion-shaped?
+ */
+function assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry }) {
+  if (!resolvedUrl) {
+    return { available: false, reason: 'No lockfile resolved-URL provided — cannot determine install source' };
+  }
+  const resolvedFromPublic = PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl);
+  if (resolvedFromPublic) {
+    return { available: true, risk: false, resolvedFromPublic: true, reason: 'Resolved from the public registry — no confusion risk' };
+  }
+  if (existsOnPublicRegistry) {
+    return {
+      available: true,
+      risk: true,
+      resolvedFromPublic: false,
+      reason: 'Resolves from a private/internal registry, but a same-named package also exists publicly — a misconfigured environment (missing scope/registry override) could silently install the public package instead.',
+    };
+  }
+  return {
+    available: true,
+    risk: false,
+    resolvedFromPublic: false,
+    reason: 'Resolves from a private registry; no same-named package on the public registry yet — lower squat risk today, but the name is unclaimed and could be registered later.',
+  };
+}
+async function publicRegistryHasPackage(packageName, ecosystem) {
+  const url = ecosystem === 'PyPI'
+    ? `${PYPI_REGISTRY}/${encodeURIComponent(packageName)}/json`
+    : `${NPM_REGISTRY}/${encodeURIComponent(packageName)}`;
+  try {
+    // A 404 here is a real, final answer ("this name doesn't exist
+    // publicly") not a transient failure — withRetry already knows not
+    // to retry 4xx, so it surfaces immediately for the catch below.
+    await withRetry(() => axios.get(url, { timeout: 8000 }));
+    return true;
+  } catch (error) {
+    if (error.response?.status === 404) return false;
+    throw error;
+  }
+}
+async function checkDependencyConfusion(packageName, ecosystem, resolvedUrl) {
+  if (!resolvedUrl) return assessDependencyConfusion({ resolvedUrl: null });
+  if (PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl)) {
+    return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry: null });
+  }
+  try {
+    const existsOnPublicRegistry = await publicRegistryHasPackage(packageName, ecosystem);
+    return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry });
+  } catch (error) {
+    return { available: false, reason: `Public registry lookup failed: ${error.message}` };
+  }
+}
+// ---------------------------------------------------------------------------
+// 3. Publish cadence anomaly — pure z-score core + thin network wrapper.
+// ---------------------------------------------------------------------------
+function mean(values) {
+  return values.reduce((sum, v) => sum + v, 0) / values.length;
+}
+function stdDev(values) {
+  const m = mean(values);
+  const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
+  return Math.sqrt(variance);
+}
+/**
+ * Was `lastIntervalHours` (the gap between the previous release and the
+ * one being installed now) anomalously short compared to this package's
+ * own historical release cadence?
+ *
+ *   z = (mean(historicalIntervals) - lastInterval) / max(stdDev(historicalIntervals), 1)
+ *
+ * A positive z means "faster than usual"; z is in units of the package's
+ * own historical standard deviation, so a package that normally ships
+ * erratically (high stdDev) needs a much shorter interval to look
+ * anomalous than one that ships on a metronomic schedule — the z-score
+ * adapts to each package's own baseline instead of using one global
+ * threshold. The `stdDev` floor of 1 hour avoids a divide-by-near-zero
+ * blowup for packages with an almost perfectly regular cadence.
+ *
+ * Flagging requires BOTH a statistical anomaly (z > 2, i.e. more than two
+ * of the package's own standard deviations faster than its norm) AND an
+ * absolute floor (< 24h) — the z-score alone would also flag a perfectly
+ * normal package whose typical cadence is itself sub-daily.
+ *
+ * @param {number[]} historicalIntervalsHours - gaps between releases
+ *   *before* the release being evaluated, oldest pattern first
+ * @param {number} lastIntervalHours - gap immediately before the release
+ *   being evaluated
+ * @returns {Object} { available, zScore, meanIntervalHours, stdDevHours,
+ *   lastIntervalHours, anomalous, historicalReleaseCount }
+ */
+function computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours) {
+  const MIN_HISTORY = 3;
+  if (!Array.isArray(historicalIntervalsHours) || historicalIntervalsHours.length < MIN_HISTORY) {
+    return { available: false, reason: `Need at least ${MIN_HISTORY} prior releases to establish a cadence baseline, found ${historicalIntervalsHours?.length || 0}` };
+  }
+  if (typeof lastIntervalHours !== 'number' || Number.isNaN(lastIntervalHours)) {
+    return { available: false, reason: 'No measurable interval for the release being evaluated' };
+  }
+  const meanIntervalHours = mean(historicalIntervalsHours);
+  const stdDevHours = stdDev(historicalIntervalsHours);
+  const zScore = (meanIntervalHours - lastIntervalHours) / Math.max(stdDevHours, 1);
+  const anomalous = zScore > 2 && lastIntervalHours < 24;
+  return {
+    available: true,
+    zScore: Math.round(zScore * 100) / 100,
+    meanIntervalHours: Math.round(meanIntervalHours * 100) / 100,
+    stdDevHours: Math.round(stdDevHours * 100) / 100,
+    lastIntervalHours: Math.round(lastIntervalHours * 100) / 100,
+    anomalous,
+    historicalReleaseCount: historicalIntervalsHours.length,
+  };
+}
+function realVersionTimeline(versionTimes, allVersions) {
+  return (allVersions || [])
+    .filter(v => !/[-+]/.test(v)) // skip pre-releases/build metadata
+    .map(v => ({ version: v, time: versionTimes[v] ? new Date(versionTimes[v]).getTime() : null }))
+    .filter(v => v.time !== null && !Number.isNaN(v.time))
+    .sort((a, b) => a.time - b.time);
+}
+async function fetchNpmPackument(packageName) {
+  const { data } = await withRetry(() => axios.get(`${NPM_REGISTRY}/${encodeURIComponent(packageName)}`, { timeout: 8000 }));
+  return data;
+}
+async function analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) {
+  if (ecosystem !== 'npm') {
+    return { available: false, reason: `Publish-cadence analysis only implemented for npm, not ${ecosystem}` };
+  }
+  let data;
+  try {
+    data = await fetchNpmPackument(packageName);
+  } catch (error) {
+    return { available: false, reason: `Registry lookup failed: ${error.message}` };
+  }
+  const versionTimes = data.time || {};
+  const timeline = realVersionTimeline(versionTimes, Object.keys(data.versions || {}));
+  const index = timeline.findIndex(v => v.version === targetVersion);
+  if (index <= 0) {
+    return { available: false, reason: index === -1 ? 'Target version not found in registry history' : 'Target version is the first release — no prior interval to compare' };
+  }
+  const historicalIntervalsHours = [];
+  for (let i = 1; i < index; i += 1) {
+    historicalIntervalsHours.push((timeline[i].time - timeline[i - 1].time) / 3600000);
+  }
+  const lastIntervalHours = (timeline[index].time - timeline[index - 1].time) / 3600000;
+  return computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours);
+}
+// ---------------------------------------------------------------------------
+// 4. Maintainer count — pure core + thin network wrapper.
+// ---------------------------------------------------------------------------
+function assessMaintainerSignal(maintainerCount) {
+  if (typeof maintainerCount !== 'number' || maintainerCount < 0) {
+    return { available: false, reason: 'No maintainer count available' };
+  }
+  const singleMaintainer = maintainerCount <= 1;
+  return {
+    available: true,
+    maintainerCount,
+    singleMaintainer,
+    reason: singleMaintainer
+      ? 'Single maintainer on record — no second account needs to be compromised, and no second reviewer would catch a malicious publish.'
+      : `${maintainerCount} maintainers on record.`,
+  };
+}
+async function analyzeMaintainerSignal(packageName, ecosystem) {
+  if (ecosystem !== 'npm') {
+    return { available: false, reason: `Maintainer-list lookup only implemented for npm, not ${ecosystem}` };
+  }
+  try {
+    const data = await fetchNpmPackument(packageName);
+    return assessMaintainerSignal((data.maintainers || []).length);
+  } catch (error) {
+    return { available: false, reason: `Registry lookup failed: ${error.message}` };
+  }
+}
+// ---------------------------------------------------------------------------
+// 5. Lifecycle script risk — pure core + thin network wrapper.
+// ---------------------------------------------------------------------------
+function assessLifecycleScriptRisk(targetScripts = {}, previousScripts = {}) {
+  const flaggedScripts = LIFECYCLE_SCRIPT_KEYS.filter(
+    key => targetScripts[key] && targetScripts[key] !== previousScripts[key]
+  );
+  return {
+    available: true,
+    flaggedScripts,
+    riskyScriptsPresent: flaggedScripts.length > 0,
+    reason: flaggedScripts.length > 0
+      ? `Install-time script(s) added or changed in this version: ${flaggedScripts.join(', ')} — this is the actual code-execution mechanism behind the axios and event-stream compromises, independent of any filed CVE.`
+      : 'No new or changed install-time (preinstall/install/postinstall) scripts.',
+  };
+}
+async function analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) {
+  if (ecosystem !== 'npm') {
+    return { available: false, reason: `Lifecycle-script analysis only implemented for npm, not ${ecosystem}` };
+  }
+  let data;
+  try {
+    data = await fetchNpmPackument(packageName);
+  } catch (error) {
+    return { available: false, reason: `Registry lookup failed: ${error.message}` };
+  }
+  const versions = data.versions || {};
+  if (!versions[targetVersion]) {
+    return { available: false, reason: `Version ${targetVersion} not found in registry packument` };
+  }
+  const targetScripts = versions[targetVersion].scripts || {};
+  const previousScripts = previousVersion ? (versions[previousVersion]?.scripts || {}) : {};
+  return assessLifecycleScriptRisk(targetScripts, previousScripts);
+}
+// ---------------------------------------------------------------------------
+// Combinator — the "decision trail" for trust, mirroring riskCalculator.js
+// ---------------------------------------------------------------------------
+// Weights sum to 100. Lifecycle script risk is weighted highest because it
+// is the actual payload-delivery mechanism, not a proxy for one; publish
+// cadence is the strongest *behavioural* proxy (it's what would have
+// flagged axios/ua-parser-js/event-stream before any CVE existed).
+const TRUST_WEIGHTS = {
+  lifecycleScript: 35,
+  publishAnomaly: 25,
+  dependencyConfusion: 20,
+  typosquat: 15,
+  singleMaintainer: 5,
+};
+/**
+ * Combine the five signals into one labeled trust assessment. Pure: takes
+ * already-computed signal results (each shaped like the `assess*`
+ * functions above), never fetches anything itself.
+ *
+ * Unavailable signals contribute 0, not a guessed value — the `reasons`
+ * list says so explicitly, so a 0 contribution from "no data" is never
+ * visually confused with a 0 contribution from "checked, found nothing."
+ *
+ * @returns {Object} { score, classification, contributions, weights, reasons }
+ */
+function computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer }) {
+  const reasons = [];
+  const lifecycleFlag = lifecycleScript?.available && lifecycleScript.riskyScriptsPresent;
+  const contributions = {
+    lifecycleScript: lifecycleFlag ? TRUST_WEIGHTS.lifecycleScript : 0,
+    publishAnomaly: publishAnomaly?.available
+      ? TRUST_WEIGHTS.publishAnomaly * Math.min(Math.max(publishAnomaly.zScore, 0) / 4, 1) * (publishAnomaly.anomalous ? 1 : 0.25)
+      : 0,
+    dependencyConfusion: dependencyConfusion?.available && dependencyConfusion.risk ? TRUST_WEIGHTS.dependencyConfusion : 0,
+    typosquat: typosquat?.available && typosquat.suspected ? TRUST_WEIGHTS.typosquat : 0,
+    singleMaintainer: maintainer?.available && maintainer.singleMaintainer ? TRUST_WEIGHTS.singleMaintainer : 0,
+  };
+  if (lifecycleScript?.available) reasons.push(lifecycleScript.reason);
+  else reasons.push(`Lifecycle script check unavailable: ${lifecycleScript?.reason || 'no data'}`);
+  if (publishAnomaly?.available) {
+    reasons.push(publishAnomaly.anomalous
+      ? `Published ${publishAnomaly.lastIntervalHours}h after the previous release — ${publishAnomaly.zScore}σ faster than this package's own historical cadence (mean ${publishAnomaly.meanIntervalHours}h, n=${publishAnomaly.historicalReleaseCount}).`
+      : `Publish cadence is within this package's historical norm (z=${publishAnomaly.zScore}).`);
+  } else {
+    reasons.push(`Publish-cadence check unavailable: ${publishAnomaly?.reason || 'no data'}`);
+  }
+  if (dependencyConfusion?.available) reasons.push(dependencyConfusion.reason);
+  else reasons.push(`Dependency-confusion check unavailable: ${dependencyConfusion?.reason || 'no data'}`);
+  if (typosquat?.available) {
+    reasons.push(typosquat.suspected
+      ? `Name is ${typosquat.distance} edit(s) from popular package "${typosquat.closestMatch}" (similarity ${Math.round(typosquat.similarity * 100)}%).`
+      : 'No typosquat match against known popular packages.');
+  } else {
+    reasons.push(`Typosquat check unavailable: ${typosquat?.reason || 'no data'}`);
+  }
+  if (maintainer?.available) reasons.push(maintainer.reason);
+  else reasons.push(`Maintainer-count check unavailable: ${maintainer?.reason || 'no data'}`);
+  const rawScore = Object.values(contributions).reduce((sum, v) => sum + v, 0);
+  const score = Math.round(Math.min(Math.max(rawScore, 0), 100));
+  const classification = score >= 60 ? 'CRITICAL' : score >= 30 ? 'ELEVATED' : 'NORMAL';
+  return { score, classification, contributions, weights: TRUST_WEIGHTS, reasons };
+}
+/**
+ * Run all five signals for one dependency and combine them.
+ * @param {Object} opts - { packageName, ecosystem, targetVersion,
+ *   previousVersion, resolvedUrl }
+ */
+async function assessSupplyChainTrust({ packageName, ecosystem, targetVersion, previousVersion, resolvedUrl }) {
+  const [lifecycleScript, publishAnomaly, dependencyConfusion, maintainer] = await Promise.all([
+    targetVersion ? analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) : { available: false, reason: 'No target version supplied' },
+    targetVersion ? analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) : { available: false, reason: 'No target version supplied' },
+    checkDependencyConfusion(packageName, ecosystem, resolvedUrl),
+    analyzeMaintainerSignal(packageName, ecosystem),
+  ]);
+  const typosquat = detectTyposquat(packageName, ecosystem);
+  const result = computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer });
+  return { packageName, ecosystem, ...result, signals: { lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer } };
+}
+module.exports = {
+  levenshteinDistance,
+  detectTyposquat,
+  assessDependencyConfusion,
+  checkDependencyConfusion,
+  computeCadenceAnomaly,
+  analyzePublishCadenceAnomaly,
+  assessMaintainerSignal,
+  analyzeMaintainerSignal,
+  assessLifecycleScriptRisk,
+  analyzeLifecycleScriptRisk,
+  computeTrustScore,
+  assessSupplyChainTrust,
+  TRUST_WEIGHTS,
+};