dependencyiq 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,472 +1,472 @@
1
- /**
2
- * Supply-Chain Trust Signals.
3
- *
4
- * The risk score in riskCalculator.js answers "how bad is this CVE, given
5
- * real exposure." This module answers a different question that CVSS
6
- * can't: "does this dependency change *look like* the early shape of a
7
- * maintainer-compromise attack, independent of whether a CVE has been
8
- * filed yet?" Log4Shell-style CVEs get filed eventually; account-takeover
9
- * attacks (axios Mar 2026, ua-parser-js Oct 2021, event-stream Nov 2018)
10
- * are visible in the registry data *before* anyone files a CVE, if you
11
- * know what to look at:
12
- *
13
- * 1. Lifecycle script risk — did this version add/change a
14
- * preinstall/install/postinstall script? That's the actual payload
15
- * delivery mechanism in the axios and event-stream attacks.
16
- * 2. Publish cadence anomaly — was this version published unusually
17
- * fast after the previous one, relative to this package's own
18
- * historical release cadence? ua-parser-js shipped 3 malicious
19
- * versions in ~4 hours; axios shipped 2 in one sitting after a
20
- * credential compromise. A z-score against the package's own
21
- * history catches "uncharacteristically fast," not "fast."
22
- * 3. Dependency confusion — does this name resolve from a private
23
- * registry while an identically-named package also exists
24
- * publicly? That's the exact pattern from the 2021 Birsan research
25
- * that paid out across Apple/PayPal/Shopify/Netflix/Uber.
26
- * 4. Typosquatting — is this name one or two edits away
27
- * from a genuinely popular package?
28
- * 5. Single-maintainer — a real, available bus-factor proxy
29
- * (current maintainer count), not a guess about abandonment.
30
- *
31
- * Design rule carried over from riskCalculator.js: never blend this into
32
- * the CVSS-based risk score. A package can be perfectly patched (CVSS
33
- * score of 0 findings) and still carry a high trust-risk score — these
34
- * are reported side by side, never merged into one number, so a reader
35
- * can never mistake "no known CVE" for "trustworthy."
36
- *
37
- * Every signal that can't be computed (network failure, unsupported
38
- * ecosystem, insufficient history) reports `available: false` and
39
- * contributes exactly 0 — consistent with this project's "never
40
- * fabricate" rule. A signal that is honestly unavailable is not the same
41
- * as a signal that found nothing wrong.
42
- */
43
-
44
- const axios = require('axios');
45
- const { withRetry } = require('../httpRetry');
46
-
47
- const NPM_REGISTRY = 'https://registry.npmjs.org';
48
- const PYPI_REGISTRY = 'https://pypi.org/pypi';
49
- const LIFECYCLE_SCRIPT_KEYS = ['preinstall', 'install', 'postinstall'];
50
-
51
- // A small, real list of genuinely high-download packages per ecosystem,
52
- // used only as typosquat *targets* — not exhaustive, just enough to catch
53
- // the "one edit away from something everyone installs" pattern.
54
- const POPULAR_PACKAGES = {
55
- npm: [
56
- 'lodash', 'react', 'react-dom', 'express', 'axios', 'chalk', 'commander',
57
- 'debug', 'async', 'underscore', 'moment', 'webpack', 'eslint', 'jest',
58
- 'typescript', 'request', 'vue', 'angular', 'jquery', 'babel', 'redux',
59
- 'next', 'mongoose', 'socket.io', 'dotenv', 'cors', 'uuid', 'yargs',
60
- ],
61
- PyPI: [
62
- 'requests', 'numpy', 'flask', 'django', 'pandas', 'boto3', 'urllib3',
63
- 'pyyaml', 'setuptools', 'click', 'pillow', 'pytest', 'scipy', 'sqlalchemy',
64
- 'jinja2', 'cryptography', 'certifi', 'six', 'idna', 'attrs',
65
- ],
66
- };
67
-
68
- // ---------------------------------------------------------------------------
69
- // 1. Typosquatting — pure, no network.
70
- // ---------------------------------------------------------------------------
71
-
72
- /** Standard Levenshtein edit distance (insert/delete/substitute = cost 1). */
73
- function levenshteinDistance(a, b) {
74
- const m = a.length;
75
- const n = b.length;
76
- if (m === 0) return n;
77
- if (n === 0) return m;
78
-
79
- let prev = Array.from({ length: n + 1 }, (_, j) => j);
80
- for (let i = 1; i <= m; i += 1) {
81
- const curr = [i];
82
- for (let j = 1; j <= n; j += 1) {
83
- const cost = a[i - 1] === b[j - 1] ? 0 : 1;
84
- curr[j] = Math.min(
85
- prev[j] + 1, // deletion
86
- curr[j - 1] + 1, // insertion
87
- prev[j - 1] + cost // substitution
88
- );
89
- }
90
- prev = curr;
91
- }
92
- return prev[n];
93
- }
94
-
95
- /**
96
- * Is `packageName` suspiciously close to a genuinely popular package?
97
- * @returns {Object} { available: true, suspected, closestMatch, distance, similarity }
98
- */
99
- function detectTyposquat(packageName, ecosystem) {
100
- const candidates = POPULAR_PACKAGES[ecosystem] || [];
101
- if (candidates.length === 0) {
102
- return { available: false, reason: `No popular-package list for ecosystem "${ecosystem}"` };
103
- }
104
- if (candidates.includes(packageName)) {
105
- return { available: true, suspected: false, closestMatch: packageName, distance: 0, similarity: 1 };
106
- }
107
-
108
- let best = null;
109
- for (const candidate of candidates) {
110
- // Skip comparisons that can't plausibly be a typo (very different length).
111
- const plausibleLength = Math.abs(candidate.length - packageName.length) <= 3;
112
- if (plausibleLength) {
113
- const distance = levenshteinDistance(packageName, candidate);
114
- if (!best || distance < best.distance) best = { candidate, distance };
115
- }
116
- }
117
- if (!best) return { available: true, suspected: false, closestMatch: null, distance: null, similarity: null };
118
-
119
- const maxLen = Math.max(packageName.length, best.candidate.length);
120
- const similarity = 1 - best.distance / maxLen;
121
- // Distance 1-2 on a real package name (length >= 4, to avoid noisy
122
- // false positives on very short names) is the typosquat zone; distance 0
123
- // is an exact match (handled above), and >2 is just "a different word."
124
- const suspected = packageName.length >= 4 && best.distance > 0 && best.distance <= 2 && similarity >= 0.6;
125
-
126
- return { available: true, suspected, closestMatch: best.candidate, distance: best.distance, similarity };
127
- }
128
-
129
- // ---------------------------------------------------------------------------
130
- // 2. Dependency confusion — pure core + thin network wrapper.
131
- // ---------------------------------------------------------------------------
132
-
133
- const PUBLIC_REGISTRY_HOST_PATTERN = /registry\.npmjs\.org|pypi\.org|files\.pythonhosted\.org/i;
134
-
135
- /**
136
- * Pure decision: given where this install actually resolved from, and
137
- * whether a same-named package exists on the *public* registry, is this
138
- * dependency-confusion-shaped?
139
- */
140
- function assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry }) {
141
- if (!resolvedUrl) {
142
- return { available: false, reason: 'No lockfile resolved-URL provided — cannot determine install source' };
143
- }
144
- const resolvedFromPublic = PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl);
145
- if (resolvedFromPublic) {
146
- return { available: true, risk: false, resolvedFromPublic: true, reason: 'Resolved from the public registry — no confusion risk' };
147
- }
148
- if (existsOnPublicRegistry) {
149
- return {
150
- available: true,
151
- risk: true,
152
- resolvedFromPublic: false,
153
- reason: 'Resolves from a private/internal registry, but a same-named package also exists publicly — a misconfigured environment (missing scope/registry override) could silently install the public package instead.',
154
- };
155
- }
156
- return {
157
- available: true,
158
- risk: false,
159
- resolvedFromPublic: false,
160
- reason: 'Resolves from a private registry; no same-named package on the public registry yet — lower squat risk today, but the name is unclaimed and could be registered later.',
161
- };
162
- }
163
-
164
- async function publicRegistryHasPackage(packageName, ecosystem) {
165
- const url = ecosystem === 'PyPI'
166
- ? `${PYPI_REGISTRY}/${encodeURIComponent(packageName)}/json`
167
- : `${NPM_REGISTRY}/${encodeURIComponent(packageName)}`;
168
- try {
169
- // A 404 here is a real, final answer ("this name doesn't exist
170
- // publicly") not a transient failure — withRetry already knows not
171
- // to retry 4xx, so it surfaces immediately for the catch below.
172
- await withRetry(() => axios.get(url, { timeout: 8000 }));
173
- return true;
174
- } catch (error) {
175
- if (error.response?.status === 404) return false;
176
- throw error;
177
- }
178
- }
179
-
180
- async function checkDependencyConfusion(packageName, ecosystem, resolvedUrl) {
181
- if (!resolvedUrl) return assessDependencyConfusion({ resolvedUrl: null });
182
- if (PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl)) {
183
- return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry: null });
184
- }
185
- try {
186
- const existsOnPublicRegistry = await publicRegistryHasPackage(packageName, ecosystem);
187
- return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry });
188
- } catch (error) {
189
- return { available: false, reason: `Public registry lookup failed: ${error.message}` };
190
- }
191
- }
192
-
193
- // ---------------------------------------------------------------------------
194
- // 3. Publish cadence anomaly — pure z-score core + thin network wrapper.
195
- // ---------------------------------------------------------------------------
196
-
197
- function mean(values) {
198
- return values.reduce((sum, v) => sum + v, 0) / values.length;
199
- }
200
-
201
- function stdDev(values) {
202
- const m = mean(values);
203
- const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
204
- return Math.sqrt(variance);
205
- }
206
-
207
- /**
208
- * Was `lastIntervalHours` (the gap between the previous release and the
209
- * one being installed now) anomalously short compared to this package's
210
- * own historical release cadence?
211
- *
212
- * z = (mean(historicalIntervals) - lastInterval) / max(stdDev(historicalIntervals), 1)
213
- *
214
- * A positive z means "faster than usual"; z is in units of the package's
215
- * own historical standard deviation, so a package that normally ships
216
- * erratically (high stdDev) needs a much shorter interval to look
217
- * anomalous than one that ships on a metronomic schedule — the z-score
218
- * adapts to each package's own baseline instead of using one global
219
- * threshold. The `stdDev` floor of 1 hour avoids a divide-by-near-zero
220
- * blowup for packages with an almost perfectly regular cadence.
221
- *
222
- * Flagging requires BOTH a statistical anomaly (z > 2, i.e. more than two
223
- * of the package's own standard deviations faster than its norm) AND an
224
- * absolute floor (< 24h) — the z-score alone would also flag a perfectly
225
- * normal package whose typical cadence is itself sub-daily.
226
- *
227
- * @param {number[]} historicalIntervalsHours - gaps between releases
228
- * *before* the release being evaluated, oldest pattern first
229
- * @param {number} lastIntervalHours - gap immediately before the release
230
- * being evaluated
231
- * @returns {Object} { available, zScore, meanIntervalHours, stdDevHours,
232
- * lastIntervalHours, anomalous, historicalReleaseCount }
233
- */
234
- function computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours) {
235
- const MIN_HISTORY = 3;
236
- if (!Array.isArray(historicalIntervalsHours) || historicalIntervalsHours.length < MIN_HISTORY) {
237
- return { available: false, reason: `Need at least ${MIN_HISTORY} prior releases to establish a cadence baseline, found ${historicalIntervalsHours?.length || 0}` };
238
- }
239
- if (typeof lastIntervalHours !== 'number' || Number.isNaN(lastIntervalHours)) {
240
- return { available: false, reason: 'No measurable interval for the release being evaluated' };
241
- }
242
-
243
- const meanIntervalHours = mean(historicalIntervalsHours);
244
- const stdDevHours = stdDev(historicalIntervalsHours);
245
- const zScore = (meanIntervalHours - lastIntervalHours) / Math.max(stdDevHours, 1);
246
- const anomalous = zScore > 2 && lastIntervalHours < 24;
247
-
248
- return {
249
- available: true,
250
- zScore: Math.round(zScore * 100) / 100,
251
- meanIntervalHours: Math.round(meanIntervalHours * 100) / 100,
252
- stdDevHours: Math.round(stdDevHours * 100) / 100,
253
- lastIntervalHours: Math.round(lastIntervalHours * 100) / 100,
254
- anomalous,
255
- historicalReleaseCount: historicalIntervalsHours.length,
256
- };
257
- }
258
-
259
- function realVersionTimeline(versionTimes, allVersions) {
260
- return (allVersions || [])
261
- .filter(v => !/[-+]/.test(v)) // skip pre-releases/build metadata
262
- .map(v => ({ version: v, time: versionTimes[v] ? new Date(versionTimes[v]).getTime() : null }))
263
- .filter(v => v.time !== null && !Number.isNaN(v.time))
264
- .sort((a, b) => a.time - b.time);
265
- }
266
-
267
- async function fetchNpmPackument(packageName) {
268
- const { data } = await withRetry(() => axios.get(`${NPM_REGISTRY}/${encodeURIComponent(packageName)}`, { timeout: 8000 }));
269
- return data;
270
- }
271
-
272
- async function analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) {
273
- if (ecosystem !== 'npm') {
274
- return { available: false, reason: `Publish-cadence analysis only implemented for npm, not ${ecosystem}` };
275
- }
276
- let data;
277
- try {
278
- data = await fetchNpmPackument(packageName);
279
- } catch (error) {
280
- return { available: false, reason: `Registry lookup failed: ${error.message}` };
281
- }
282
-
283
- const versionTimes = data.time || {};
284
- const timeline = realVersionTimeline(versionTimes, Object.keys(data.versions || {}));
285
- const index = timeline.findIndex(v => v.version === targetVersion);
286
- if (index <= 0) {
287
- return { available: false, reason: index === -1 ? 'Target version not found in registry history' : 'Target version is the first release — no prior interval to compare' };
288
- }
289
-
290
- const historicalIntervalsHours = [];
291
- for (let i = 1; i < index; i += 1) {
292
- historicalIntervalsHours.push((timeline[i].time - timeline[i - 1].time) / 3600000);
293
- }
294
- const lastIntervalHours = (timeline[index].time - timeline[index - 1].time) / 3600000;
295
-
296
- return computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours);
297
- }
298
-
299
- // ---------------------------------------------------------------------------
300
- // 4. Maintainer count — pure core + thin network wrapper.
301
- // ---------------------------------------------------------------------------
302
-
303
- function assessMaintainerSignal(maintainerCount) {
304
- if (typeof maintainerCount !== 'number' || maintainerCount < 0) {
305
- return { available: false, reason: 'No maintainer count available' };
306
- }
307
- const singleMaintainer = maintainerCount <= 1;
308
- return {
309
- available: true,
310
- maintainerCount,
311
- singleMaintainer,
312
- reason: singleMaintainer
313
- ? 'Single maintainer on record — no second account needs to be compromised, and no second reviewer would catch a malicious publish.'
314
- : `${maintainerCount} maintainers on record.`,
315
- };
316
- }
317
-
318
- async function analyzeMaintainerSignal(packageName, ecosystem) {
319
- if (ecosystem !== 'npm') {
320
- return { available: false, reason: `Maintainer-list lookup only implemented for npm, not ${ecosystem}` };
321
- }
322
- try {
323
- const data = await fetchNpmPackument(packageName);
324
- return assessMaintainerSignal((data.maintainers || []).length);
325
- } catch (error) {
326
- return { available: false, reason: `Registry lookup failed: ${error.message}` };
327
- }
328
- }
329
-
330
- // ---------------------------------------------------------------------------
331
- // 5. Lifecycle script risk — pure core + thin network wrapper.
332
- // ---------------------------------------------------------------------------
333
-
334
- function assessLifecycleScriptRisk(targetScripts = {}, previousScripts = {}) {
335
- const flaggedScripts = LIFECYCLE_SCRIPT_KEYS.filter(
336
- key => targetScripts[key] && targetScripts[key] !== previousScripts[key]
337
- );
338
- return {
339
- available: true,
340
- flaggedScripts,
341
- riskyScriptsPresent: flaggedScripts.length > 0,
342
- reason: flaggedScripts.length > 0
343
- ? `Install-time script(s) added or changed in this version: ${flaggedScripts.join(', ')} — this is the actual code-execution mechanism behind the axios and event-stream compromises, independent of any filed CVE.`
344
- : 'No new or changed install-time (preinstall/install/postinstall) scripts.',
345
- };
346
- }
347
-
348
- async function analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) {
349
- if (ecosystem !== 'npm') {
350
- return { available: false, reason: `Lifecycle-script analysis only implemented for npm, not ${ecosystem}` };
351
- }
352
- let data;
353
- try {
354
- data = await fetchNpmPackument(packageName);
355
- } catch (error) {
356
- return { available: false, reason: `Registry lookup failed: ${error.message}` };
357
- }
358
- const versions = data.versions || {};
359
- if (!versions[targetVersion]) {
360
- return { available: false, reason: `Version ${targetVersion} not found in registry packument` };
361
- }
362
- const targetScripts = versions[targetVersion].scripts || {};
363
- const previousScripts = previousVersion ? (versions[previousVersion]?.scripts || {}) : {};
364
- return assessLifecycleScriptRisk(targetScripts, previousScripts);
365
- }
366
-
367
- // ---------------------------------------------------------------------------
368
- // Combinator — the "decision trail" for trust, mirroring riskCalculator.js
369
- // ---------------------------------------------------------------------------
370
-
371
- // Weights sum to 100. Lifecycle script risk is weighted highest because it
372
- // is the actual payload-delivery mechanism, not a proxy for one; publish
373
- // cadence is the strongest *behavioural* proxy (it's what would have
374
- // flagged axios/ua-parser-js/event-stream before any CVE existed).
375
- const TRUST_WEIGHTS = {
376
- lifecycleScript: 35,
377
- publishAnomaly: 25,
378
- dependencyConfusion: 20,
379
- typosquat: 15,
380
- singleMaintainer: 5,
381
- };
382
-
383
- /**
384
- * Combine the five signals into one labeled trust assessment. Pure: takes
385
- * already-computed signal results (each shaped like the `assess*`
386
- * functions above), never fetches anything itself.
387
- *
388
- * Unavailable signals contribute 0, not a guessed value — the `reasons`
389
- * list says so explicitly, so a 0 contribution from "no data" is never
390
- * visually confused with a 0 contribution from "checked, found nothing."
391
- *
392
- * @returns {Object} { score, classification, contributions, weights, reasons }
393
- */
394
- function computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer }) {
395
- const reasons = [];
396
-
397
- const lifecycleFlag = lifecycleScript?.available && lifecycleScript.riskyScriptsPresent;
398
- const contributions = {
399
- lifecycleScript: lifecycleFlag ? TRUST_WEIGHTS.lifecycleScript : 0,
400
- publishAnomaly: publishAnomaly?.available
401
- ? TRUST_WEIGHTS.publishAnomaly * Math.min(Math.max(publishAnomaly.zScore, 0) / 4, 1) * (publishAnomaly.anomalous ? 1 : 0.25)
402
- : 0,
403
- dependencyConfusion: dependencyConfusion?.available && dependencyConfusion.risk ? TRUST_WEIGHTS.dependencyConfusion : 0,
404
- typosquat: typosquat?.available && typosquat.suspected ? TRUST_WEIGHTS.typosquat : 0,
405
- singleMaintainer: maintainer?.available && maintainer.singleMaintainer ? TRUST_WEIGHTS.singleMaintainer : 0,
406
- };
407
-
408
- if (lifecycleScript?.available) reasons.push(lifecycleScript.reason);
409
- else reasons.push(`Lifecycle script check unavailable: ${lifecycleScript?.reason || 'no data'}`);
410
-
411
- if (publishAnomaly?.available) {
412
- reasons.push(publishAnomaly.anomalous
413
- ? `Published ${publishAnomaly.lastIntervalHours}h after the previous release — ${publishAnomaly.zScore}σ faster than this package's own historical cadence (mean ${publishAnomaly.meanIntervalHours}h, n=${publishAnomaly.historicalReleaseCount}).`
414
- : `Publish cadence is within this package's historical norm (z=${publishAnomaly.zScore}).`);
415
- } else {
416
- reasons.push(`Publish-cadence check unavailable: ${publishAnomaly?.reason || 'no data'}`);
417
- }
418
-
419
- if (dependencyConfusion?.available) reasons.push(dependencyConfusion.reason);
420
- else reasons.push(`Dependency-confusion check unavailable: ${dependencyConfusion?.reason || 'no data'}`);
421
-
422
- if (typosquat?.available) {
423
- reasons.push(typosquat.suspected
424
- ? `Name is ${typosquat.distance} edit(s) from popular package "${typosquat.closestMatch}" (similarity ${Math.round(typosquat.similarity * 100)}%).`
425
- : 'No typosquat match against known popular packages.');
426
- } else {
427
- reasons.push(`Typosquat check unavailable: ${typosquat?.reason || 'no data'}`);
428
- }
429
-
430
- if (maintainer?.available) reasons.push(maintainer.reason);
431
- else reasons.push(`Maintainer-count check unavailable: ${maintainer?.reason || 'no data'}`);
432
-
433
- const rawScore = Object.values(contributions).reduce((sum, v) => sum + v, 0);
434
- const score = Math.round(Math.min(Math.max(rawScore, 0), 100));
435
- const classification = score >= 60 ? 'CRITICAL' : score >= 30 ? 'ELEVATED' : 'NORMAL';
436
-
437
- return { score, classification, contributions, weights: TRUST_WEIGHTS, reasons };
438
- }
439
-
440
- /**
441
- * Run all five signals for one dependency and combine them.
442
- * @param {Object} opts - { packageName, ecosystem, targetVersion,
443
- * previousVersion, resolvedUrl }
444
- */
445
- async function assessSupplyChainTrust({ packageName, ecosystem, targetVersion, previousVersion, resolvedUrl }) {
446
- const [lifecycleScript, publishAnomaly, dependencyConfusion, maintainer] = await Promise.all([
447
- targetVersion ? analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) : { available: false, reason: 'No target version supplied' },
448
- targetVersion ? analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) : { available: false, reason: 'No target version supplied' },
449
- checkDependencyConfusion(packageName, ecosystem, resolvedUrl),
450
- analyzeMaintainerSignal(packageName, ecosystem),
451
- ]);
452
- const typosquat = detectTyposquat(packageName, ecosystem);
453
-
454
- const result = computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer });
455
- return { packageName, ecosystem, ...result, signals: { lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer } };
456
- }
457
-
458
- module.exports = {
459
- levenshteinDistance,
460
- detectTyposquat,
461
- assessDependencyConfusion,
462
- checkDependencyConfusion,
463
- computeCadenceAnomaly,
464
- analyzePublishCadenceAnomaly,
465
- assessMaintainerSignal,
466
- analyzeMaintainerSignal,
467
- assessLifecycleScriptRisk,
468
- analyzeLifecycleScriptRisk,
469
- computeTrustScore,
470
- assessSupplyChainTrust,
471
- TRUST_WEIGHTS,
472
- };
1
+ /**
2
+ * Supply-Chain Trust Signals.
3
+ *
4
+ * The risk score in riskCalculator.js answers "how bad is this CVE, given
5
+ * real exposure." This module answers a different question that CVSS
6
+ * can't: "does this dependency change *look like* the early shape of a
7
+ * maintainer-compromise attack, independent of whether a CVE has been
8
+ * filed yet?" Log4Shell-style CVEs get filed eventually; account-takeover
9
+ * attacks (axios Mar 2026, ua-parser-js Oct 2021, event-stream Nov 2018)
10
+ * are visible in the registry data *before* anyone files a CVE, if you
11
+ * know what to look at:
12
+ *
13
+ * 1. Lifecycle script risk — did this version add/change a
14
+ * preinstall/install/postinstall script? That's the actual payload
15
+ * delivery mechanism in the axios and event-stream attacks.
16
+ * 2. Publish cadence anomaly — was this version published unusually
17
+ * fast after the previous one, relative to this package's own
18
+ * historical release cadence? ua-parser-js shipped 3 malicious
19
+ * versions in ~4 hours; axios shipped 2 in one sitting after a
20
+ * credential compromise. A z-score against the package's own
21
+ * history catches "uncharacteristically fast," not "fast."
22
+ * 3. Dependency confusion — does this name resolve from a private
23
+ * registry while an identically-named package also exists
24
+ * publicly? That's the exact pattern from the 2021 Birsan research
25
+ * that paid out across Apple/PayPal/Shopify/Netflix/Uber.
26
+ * 4. Typosquatting — is this name one or two edits away
27
+ * from a genuinely popular package?
28
+ * 5. Single-maintainer — a real, available bus-factor proxy
29
+ * (current maintainer count), not a guess about abandonment.
30
+ *
31
+ * Design rule carried over from riskCalculator.js: never blend this into
32
+ * the CVSS-based risk score. A package can be perfectly patched (CVSS
33
+ * score of 0 findings) and still carry a high trust-risk score — these
34
+ * are reported side by side, never merged into one number, so a reader
35
+ * can never mistake "no known CVE" for "trustworthy."
36
+ *
37
+ * Every signal that can't be computed (network failure, unsupported
38
+ * ecosystem, insufficient history) reports `available: false` and
39
+ * contributes exactly 0 — consistent with this project's "never
40
+ * fabricate" rule. A signal that is honestly unavailable is not the same
41
+ * as a signal that found nothing wrong.
42
+ */
43
+
44
+ const axios = require('axios');
45
+ const { withRetry } = require('../httpRetry');
46
+
47
+ const NPM_REGISTRY = 'https://registry.npmjs.org';
48
+ const PYPI_REGISTRY = 'https://pypi.org/pypi';
49
+ const LIFECYCLE_SCRIPT_KEYS = ['preinstall', 'install', 'postinstall'];
50
+
51
+ // A small, real list of genuinely high-download packages per ecosystem,
52
+ // used only as typosquat *targets* — not exhaustive, just enough to catch
53
+ // the "one edit away from something everyone installs" pattern.
54
+ const POPULAR_PACKAGES = {
55
+ npm: [
56
+ 'lodash', 'react', 'react-dom', 'express', 'axios', 'chalk', 'commander',
57
+ 'debug', 'async', 'underscore', 'moment', 'webpack', 'eslint', 'jest',
58
+ 'typescript', 'request', 'vue', 'angular', 'jquery', 'babel', 'redux',
59
+ 'next', 'mongoose', 'socket.io', 'dotenv', 'cors', 'uuid', 'yargs',
60
+ ],
61
+ PyPI: [
62
+ 'requests', 'numpy', 'flask', 'django', 'pandas', 'boto3', 'urllib3',
63
+ 'pyyaml', 'setuptools', 'click', 'pillow', 'pytest', 'scipy', 'sqlalchemy',
64
+ 'jinja2', 'cryptography', 'certifi', 'six', 'idna', 'attrs',
65
+ ],
66
+ };
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // 1. Typosquatting — pure, no network.
70
+ // ---------------------------------------------------------------------------
71
+
72
+ /** Standard Levenshtein edit distance (insert/delete/substitute = cost 1). */
73
+ function levenshteinDistance(a, b) {
74
+ const m = a.length;
75
+ const n = b.length;
76
+ if (m === 0) return n;
77
+ if (n === 0) return m;
78
+
79
+ let prev = Array.from({ length: n + 1 }, (_, j) => j);
80
+ for (let i = 1; i <= m; i += 1) {
81
+ const curr = [i];
82
+ for (let j = 1; j <= n; j += 1) {
83
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
84
+ curr[j] = Math.min(
85
+ prev[j] + 1, // deletion
86
+ curr[j - 1] + 1, // insertion
87
+ prev[j - 1] + cost // substitution
88
+ );
89
+ }
90
+ prev = curr;
91
+ }
92
+ return prev[n];
93
+ }
94
+
95
+ /**
96
+ * Is `packageName` suspiciously close to a genuinely popular package?
97
+ * @returns {Object} { available: true, suspected, closestMatch, distance, similarity }
98
+ */
99
+ function detectTyposquat(packageName, ecosystem) {
100
+ const candidates = POPULAR_PACKAGES[ecosystem] || [];
101
+ if (candidates.length === 0) {
102
+ return { available: false, reason: `No popular-package list for ecosystem "${ecosystem}"` };
103
+ }
104
+ if (candidates.includes(packageName)) {
105
+ return { available: true, suspected: false, closestMatch: packageName, distance: 0, similarity: 1 };
106
+ }
107
+
108
+ let best = null;
109
+ for (const candidate of candidates) {
110
+ // Skip comparisons that can't plausibly be a typo (very different length).
111
+ const plausibleLength = Math.abs(candidate.length - packageName.length) <= 3;
112
+ if (plausibleLength) {
113
+ const distance = levenshteinDistance(packageName, candidate);
114
+ if (!best || distance < best.distance) best = { candidate, distance };
115
+ }
116
+ }
117
+ if (!best) return { available: true, suspected: false, closestMatch: null, distance: null, similarity: null };
118
+
119
+ const maxLen = Math.max(packageName.length, best.candidate.length);
120
+ const similarity = 1 - best.distance / maxLen;
121
+ // Distance 1-2 on a real package name (length >= 4, to avoid noisy
122
+ // false positives on very short names) is the typosquat zone; distance 0
123
+ // is an exact match (handled above), and >2 is just "a different word."
124
+ const suspected = packageName.length >= 4 && best.distance > 0 && best.distance <= 2 && similarity >= 0.6;
125
+
126
+ return { available: true, suspected, closestMatch: best.candidate, distance: best.distance, similarity };
127
+ }
128
+
129
+ // ---------------------------------------------------------------------------
130
+ // 2. Dependency confusion — pure core + thin network wrapper.
131
+ // ---------------------------------------------------------------------------
132
+
133
+ const PUBLIC_REGISTRY_HOST_PATTERN = /registry\.npmjs\.org|pypi\.org|files\.pythonhosted\.org/i;
134
+
135
+ /**
136
+ * Pure decision: given where this install actually resolved from, and
137
+ * whether a same-named package exists on the *public* registry, is this
138
+ * dependency-confusion-shaped?
139
+ */
140
+ function assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry }) {
141
+ if (!resolvedUrl) {
142
+ return { available: false, reason: 'No lockfile resolved-URL provided — cannot determine install source' };
143
+ }
144
+ const resolvedFromPublic = PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl);
145
+ if (resolvedFromPublic) {
146
+ return { available: true, risk: false, resolvedFromPublic: true, reason: 'Resolved from the public registry — no confusion risk' };
147
+ }
148
+ if (existsOnPublicRegistry) {
149
+ return {
150
+ available: true,
151
+ risk: true,
152
+ resolvedFromPublic: false,
153
+ reason: 'Resolves from a private/internal registry, but a same-named package also exists publicly — a misconfigured environment (missing scope/registry override) could silently install the public package instead.',
154
+ };
155
+ }
156
+ return {
157
+ available: true,
158
+ risk: false,
159
+ resolvedFromPublic: false,
160
+ reason: 'Resolves from a private registry; no same-named package on the public registry yet — lower squat risk today, but the name is unclaimed and could be registered later.',
161
+ };
162
+ }
163
+
164
+ async function publicRegistryHasPackage(packageName, ecosystem) {
165
+ const url = ecosystem === 'PyPI'
166
+ ? `${PYPI_REGISTRY}/${encodeURIComponent(packageName)}/json`
167
+ : `${NPM_REGISTRY}/${encodeURIComponent(packageName)}`;
168
+ try {
169
+ // A 404 here is a real, final answer ("this name doesn't exist
170
+ // publicly") not a transient failure — withRetry already knows not
171
+ // to retry 4xx, so it surfaces immediately for the catch below.
172
+ await withRetry(() => axios.get(url, { timeout: 8000 }));
173
+ return true;
174
+ } catch (error) {
175
+ if (error.response?.status === 404) return false;
176
+ throw error;
177
+ }
178
+ }
179
+
180
+ async function checkDependencyConfusion(packageName, ecosystem, resolvedUrl) {
181
+ if (!resolvedUrl) return assessDependencyConfusion({ resolvedUrl: null });
182
+ if (PUBLIC_REGISTRY_HOST_PATTERN.test(resolvedUrl)) {
183
+ return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry: null });
184
+ }
185
+ try {
186
+ const existsOnPublicRegistry = await publicRegistryHasPackage(packageName, ecosystem);
187
+ return assessDependencyConfusion({ resolvedUrl, existsOnPublicRegistry });
188
+ } catch (error) {
189
+ return { available: false, reason: `Public registry lookup failed: ${error.message}` };
190
+ }
191
+ }
192
+
193
+ // ---------------------------------------------------------------------------
194
+ // 3. Publish cadence anomaly — pure z-score core + thin network wrapper.
195
+ // ---------------------------------------------------------------------------
196
+
197
+ function mean(values) {
198
+ return values.reduce((sum, v) => sum + v, 0) / values.length;
199
+ }
200
+
201
+ function stdDev(values) {
202
+ const m = mean(values);
203
+ const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / values.length;
204
+ return Math.sqrt(variance);
205
+ }
206
+
207
+ /**
208
+ * Was `lastIntervalHours` (the gap between the previous release and the
209
+ * one being installed now) anomalously short compared to this package's
210
+ * own historical release cadence?
211
+ *
212
+ * z = (mean(historicalIntervals) - lastInterval) / max(stdDev(historicalIntervals), 1)
213
+ *
214
+ * A positive z means "faster than usual"; z is in units of the package's
215
+ * own historical standard deviation, so a package that normally ships
216
+ * erratically (high stdDev) needs a much shorter interval to look
217
+ * anomalous than one that ships on a metronomic schedule — the z-score
218
+ * adapts to each package's own baseline instead of using one global
219
+ * threshold. The `stdDev` floor of 1 hour avoids a divide-by-near-zero
220
+ * blowup for packages with an almost perfectly regular cadence.
221
+ *
222
+ * Flagging requires BOTH a statistical anomaly (z > 2, i.e. more than two
223
+ * of the package's own standard deviations faster than its norm) AND an
224
+ * absolute floor (< 24h) — the z-score alone would also flag a perfectly
225
+ * normal package whose typical cadence is itself sub-daily.
226
+ *
227
+ * @param {number[]} historicalIntervalsHours - gaps between releases
228
+ * *before* the release being evaluated, oldest pattern first
229
+ * @param {number} lastIntervalHours - gap immediately before the release
230
+ * being evaluated
231
+ * @returns {Object} { available, zScore, meanIntervalHours, stdDevHours,
232
+ * lastIntervalHours, anomalous, historicalReleaseCount }
233
+ */
234
+ function computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours) {
235
+ const MIN_HISTORY = 3;
236
+ if (!Array.isArray(historicalIntervalsHours) || historicalIntervalsHours.length < MIN_HISTORY) {
237
+ return { available: false, reason: `Need at least ${MIN_HISTORY} prior releases to establish a cadence baseline, found ${historicalIntervalsHours?.length || 0}` };
238
+ }
239
+ if (typeof lastIntervalHours !== 'number' || Number.isNaN(lastIntervalHours)) {
240
+ return { available: false, reason: 'No measurable interval for the release being evaluated' };
241
+ }
242
+
243
+ const meanIntervalHours = mean(historicalIntervalsHours);
244
+ const stdDevHours = stdDev(historicalIntervalsHours);
245
+ const zScore = (meanIntervalHours - lastIntervalHours) / Math.max(stdDevHours, 1);
246
+ const anomalous = zScore > 2 && lastIntervalHours < 24;
247
+
248
+ return {
249
+ available: true,
250
+ zScore: Math.round(zScore * 100) / 100,
251
+ meanIntervalHours: Math.round(meanIntervalHours * 100) / 100,
252
+ stdDevHours: Math.round(stdDevHours * 100) / 100,
253
+ lastIntervalHours: Math.round(lastIntervalHours * 100) / 100,
254
+ anomalous,
255
+ historicalReleaseCount: historicalIntervalsHours.length,
256
+ };
257
+ }
258
+
259
+ function realVersionTimeline(versionTimes, allVersions) {
260
+ return (allVersions || [])
261
+ .filter(v => !/[-+]/.test(v)) // skip pre-releases/build metadata
262
+ .map(v => ({ version: v, time: versionTimes[v] ? new Date(versionTimes[v]).getTime() : null }))
263
+ .filter(v => v.time !== null && !Number.isNaN(v.time))
264
+ .sort((a, b) => a.time - b.time);
265
+ }
266
+
267
+ async function fetchNpmPackument(packageName) {
268
+ const { data } = await withRetry(() => axios.get(`${NPM_REGISTRY}/${encodeURIComponent(packageName)}`, { timeout: 8000 }));
269
+ return data;
270
+ }
271
+
272
+ async function analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) {
273
+ if (ecosystem !== 'npm') {
274
+ return { available: false, reason: `Publish-cadence analysis only implemented for npm, not ${ecosystem}` };
275
+ }
276
+ let data;
277
+ try {
278
+ data = await fetchNpmPackument(packageName);
279
+ } catch (error) {
280
+ return { available: false, reason: `Registry lookup failed: ${error.message}` };
281
+ }
282
+
283
+ const versionTimes = data.time || {};
284
+ const timeline = realVersionTimeline(versionTimes, Object.keys(data.versions || {}));
285
+ const index = timeline.findIndex(v => v.version === targetVersion);
286
+ if (index <= 0) {
287
+ return { available: false, reason: index === -1 ? 'Target version not found in registry history' : 'Target version is the first release — no prior interval to compare' };
288
+ }
289
+
290
+ const historicalIntervalsHours = [];
291
+ for (let i = 1; i < index; i += 1) {
292
+ historicalIntervalsHours.push((timeline[i].time - timeline[i - 1].time) / 3600000);
293
+ }
294
+ const lastIntervalHours = (timeline[index].time - timeline[index - 1].time) / 3600000;
295
+
296
+ return computeCadenceAnomaly(historicalIntervalsHours, lastIntervalHours);
297
+ }
298
+
299
+ // ---------------------------------------------------------------------------
300
+ // 4. Maintainer count — pure core + thin network wrapper.
301
+ // ---------------------------------------------------------------------------
302
+
303
+ function assessMaintainerSignal(maintainerCount) {
304
+ if (typeof maintainerCount !== 'number' || maintainerCount < 0) {
305
+ return { available: false, reason: 'No maintainer count available' };
306
+ }
307
+ const singleMaintainer = maintainerCount <= 1;
308
+ return {
309
+ available: true,
310
+ maintainerCount,
311
+ singleMaintainer,
312
+ reason: singleMaintainer
313
+ ? 'Single maintainer on record — no second account needs to be compromised, and no second reviewer would catch a malicious publish.'
314
+ : `${maintainerCount} maintainers on record.`,
315
+ };
316
+ }
317
+
318
+ async function analyzeMaintainerSignal(packageName, ecosystem) {
319
+ if (ecosystem !== 'npm') {
320
+ return { available: false, reason: `Maintainer-list lookup only implemented for npm, not ${ecosystem}` };
321
+ }
322
+ try {
323
+ const data = await fetchNpmPackument(packageName);
324
+ return assessMaintainerSignal((data.maintainers || []).length);
325
+ } catch (error) {
326
+ return { available: false, reason: `Registry lookup failed: ${error.message}` };
327
+ }
328
+ }
329
+
330
+ // ---------------------------------------------------------------------------
331
+ // 5. Lifecycle script risk — pure core + thin network wrapper.
332
+ // ---------------------------------------------------------------------------
333
+
334
+ function assessLifecycleScriptRisk(targetScripts = {}, previousScripts = {}) {
335
+ const flaggedScripts = LIFECYCLE_SCRIPT_KEYS.filter(
336
+ key => targetScripts[key] && targetScripts[key] !== previousScripts[key]
337
+ );
338
+ return {
339
+ available: true,
340
+ flaggedScripts,
341
+ riskyScriptsPresent: flaggedScripts.length > 0,
342
+ reason: flaggedScripts.length > 0
343
+ ? `Install-time script(s) added or changed in this version: ${flaggedScripts.join(', ')} — this is the actual code-execution mechanism behind the axios and event-stream compromises, independent of any filed CVE.`
344
+ : 'No new or changed install-time (preinstall/install/postinstall) scripts.',
345
+ };
346
+ }
347
+
348
+ async function analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) {
349
+ if (ecosystem !== 'npm') {
350
+ return { available: false, reason: `Lifecycle-script analysis only implemented for npm, not ${ecosystem}` };
351
+ }
352
+ let data;
353
+ try {
354
+ data = await fetchNpmPackument(packageName);
355
+ } catch (error) {
356
+ return { available: false, reason: `Registry lookup failed: ${error.message}` };
357
+ }
358
+ const versions = data.versions || {};
359
+ if (!versions[targetVersion]) {
360
+ return { available: false, reason: `Version ${targetVersion} not found in registry packument` };
361
+ }
362
+ const targetScripts = versions[targetVersion].scripts || {};
363
+ const previousScripts = previousVersion ? (versions[previousVersion]?.scripts || {}) : {};
364
+ return assessLifecycleScriptRisk(targetScripts, previousScripts);
365
+ }
366
+
367
+ // ---------------------------------------------------------------------------
368
+ // Combinator — the "decision trail" for trust, mirroring riskCalculator.js
369
+ // ---------------------------------------------------------------------------
370
+
371
+ // Weights sum to 100. Lifecycle script risk is weighted highest because it
372
+ // is the actual payload-delivery mechanism, not a proxy for one; publish
373
+ // cadence is the strongest *behavioural* proxy (it's what would have
374
+ // flagged axios/ua-parser-js/event-stream before any CVE existed).
375
+ const TRUST_WEIGHTS = {
376
+ lifecycleScript: 35,
377
+ publishAnomaly: 25,
378
+ dependencyConfusion: 20,
379
+ typosquat: 15,
380
+ singleMaintainer: 5,
381
+ };
382
+
383
+ /**
384
+ * Combine the five signals into one labeled trust assessment. Pure: takes
385
+ * already-computed signal results (each shaped like the `assess*`
386
+ * functions above), never fetches anything itself.
387
+ *
388
+ * Unavailable signals contribute 0, not a guessed value — the `reasons`
389
+ * list says so explicitly, so a 0 contribution from "no data" is never
390
+ * visually confused with a 0 contribution from "checked, found nothing."
391
+ *
392
+ * @returns {Object} { score, classification, contributions, weights, reasons }
393
+ */
394
+ function computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer }) {
395
+ const reasons = [];
396
+
397
+ const lifecycleFlag = lifecycleScript?.available && lifecycleScript.riskyScriptsPresent;
398
+ const contributions = {
399
+ lifecycleScript: lifecycleFlag ? TRUST_WEIGHTS.lifecycleScript : 0,
400
+ publishAnomaly: publishAnomaly?.available
401
+ ? TRUST_WEIGHTS.publishAnomaly * Math.min(Math.max(publishAnomaly.zScore, 0) / 4, 1) * (publishAnomaly.anomalous ? 1 : 0.25)
402
+ : 0,
403
+ dependencyConfusion: dependencyConfusion?.available && dependencyConfusion.risk ? TRUST_WEIGHTS.dependencyConfusion : 0,
404
+ typosquat: typosquat?.available && typosquat.suspected ? TRUST_WEIGHTS.typosquat : 0,
405
+ singleMaintainer: maintainer?.available && maintainer.singleMaintainer ? TRUST_WEIGHTS.singleMaintainer : 0,
406
+ };
407
+
408
+ if (lifecycleScript?.available) reasons.push(lifecycleScript.reason);
409
+ else reasons.push(`Lifecycle script check unavailable: ${lifecycleScript?.reason || 'no data'}`);
410
+
411
+ if (publishAnomaly?.available) {
412
+ reasons.push(publishAnomaly.anomalous
413
+ ? `Published ${publishAnomaly.lastIntervalHours}h after the previous release — ${publishAnomaly.zScore}σ faster than this package's own historical cadence (mean ${publishAnomaly.meanIntervalHours}h, n=${publishAnomaly.historicalReleaseCount}).`
414
+ : `Publish cadence is within this package's historical norm (z=${publishAnomaly.zScore}).`);
415
+ } else {
416
+ reasons.push(`Publish-cadence check unavailable: ${publishAnomaly?.reason || 'no data'}`);
417
+ }
418
+
419
+ if (dependencyConfusion?.available) reasons.push(dependencyConfusion.reason);
420
+ else reasons.push(`Dependency-confusion check unavailable: ${dependencyConfusion?.reason || 'no data'}`);
421
+
422
+ if (typosquat?.available) {
423
+ reasons.push(typosquat.suspected
424
+ ? `Name is ${typosquat.distance} edit(s) from popular package "${typosquat.closestMatch}" (similarity ${Math.round(typosquat.similarity * 100)}%).`
425
+ : 'No typosquat match against known popular packages.');
426
+ } else {
427
+ reasons.push(`Typosquat check unavailable: ${typosquat?.reason || 'no data'}`);
428
+ }
429
+
430
+ if (maintainer?.available) reasons.push(maintainer.reason);
431
+ else reasons.push(`Maintainer-count check unavailable: ${maintainer?.reason || 'no data'}`);
432
+
433
+ const rawScore = Object.values(contributions).reduce((sum, v) => sum + v, 0);
434
+ const score = Math.round(Math.min(Math.max(rawScore, 0), 100));
435
+ const classification = score >= 60 ? 'CRITICAL' : score >= 30 ? 'ELEVATED' : 'NORMAL';
436
+
437
+ return { score, classification, contributions, weights: TRUST_WEIGHTS, reasons };
438
+ }
439
+
440
+ /**
441
+ * Run all five signals for one dependency and combine them.
442
+ * @param {Object} opts - { packageName, ecosystem, targetVersion,
443
+ * previousVersion, resolvedUrl }
444
+ */
445
+ async function assessSupplyChainTrust({ packageName, ecosystem, targetVersion, previousVersion, resolvedUrl }) {
446
+ const [lifecycleScript, publishAnomaly, dependencyConfusion, maintainer] = await Promise.all([
447
+ targetVersion ? analyzeLifecycleScriptRisk(packageName, ecosystem, targetVersion, previousVersion) : { available: false, reason: 'No target version supplied' },
448
+ targetVersion ? analyzePublishCadenceAnomaly(packageName, ecosystem, targetVersion) : { available: false, reason: 'No target version supplied' },
449
+ checkDependencyConfusion(packageName, ecosystem, resolvedUrl),
450
+ analyzeMaintainerSignal(packageName, ecosystem),
451
+ ]);
452
+ const typosquat = detectTyposquat(packageName, ecosystem);
453
+
454
+ const result = computeTrustScore({ lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer });
455
+ return { packageName, ecosystem, ...result, signals: { lifecycleScript, publishAnomaly, dependencyConfusion, typosquat, maintainer } };
456
+ }
457
+
458
+ module.exports = {
459
+ levenshteinDistance,
460
+ detectTyposquat,
461
+ assessDependencyConfusion,
462
+ checkDependencyConfusion,
463
+ computeCadenceAnomaly,
464
+ analyzePublishCadenceAnomaly,
465
+ assessMaintainerSignal,
466
+ analyzeMaintainerSignal,
467
+ assessLifecycleScriptRisk,
468
+ analyzeLifecycleScriptRisk,
469
+ computeTrustScore,
470
+ assessSupplyChainTrust,
471
+ TRUST_WEIGHTS,
472
+ };