muaddib-scanner 2.2.28 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,512 +1,536 @@
1
- const fs = require('fs');
2
- const path = require('path');
3
- const { getPackageMetadata } = require('./npm-registry.js');
4
-
5
- // In-memory cache to avoid re-querying the same package in one scan
6
- const metadataCache = new Map();
7
- const MAX_METADATA_CACHE_SIZE = 500;
8
- const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
9
-
10
- // Top 100 packages npm les plus populaires (cibles de typosquatting)
11
- const POPULAR_PACKAGES = [
12
- 'lodash', 'express', 'react', 'axios', 'chalk', 'commander', 'moment',
13
- 'request', 'async', 'bluebird', 'underscore', 'uuid', 'debug', 'mkdirp',
14
- 'glob', 'minimist', 'webpack', 'babel-core', 'typescript', 'eslint',
15
- 'prettier', 'jest', 'mocha', 'chai', 'sinon', 'mongoose', 'sequelize',
16
- 'redis', 'mongodb', 'socket.io', 'express-session',
17
- 'body-parser', 'cookie-parser', 'cors', 'helmet', 'morgan', 'dotenv',
18
- 'jsonwebtoken', 'bcrypt', 'passport', 'nodemailer', 'aws-sdk', 'stripe',
19
- 'twilio', 'firebase', 'graphql', 'apollo-server', 'nuxt',
20
- 'gatsby', 'angular', 'svelte', 'electron', 'puppeteer', 'cheerio',
21
- 'sharp', 'jimp', 'canvas', 'pdf-lib', 'exceljs', 'csv-parser', 'xml2js',
22
- 'yaml', 'config', 'yargs', 'colors',
23
- 'winston', 'bunyan', 'pino', 'log4js', 'ramda', 'immutable',
24
- 'mobx', 'redux', 'zustand', 'formik', 'yup', 'ajv', 'validator',
25
- 'date-fns', 'dayjs', 'luxon', 'numeral', 'accounting', 'currency.js',
26
- 'lodash-es', 'core-js', 'regenerator-runtime', 'tslib', 'classnames',
27
- 'prop-types', 'cross-env', 'node-fetch', 'got'
28
- ];
29
-
30
- // Packages legitimes courts ou qui ressemblent a des populaires
31
- const WHITELIST = new Set([
32
- // Packages tres courts legitimes
33
- 'qs', 'pg', 'ms', 'ws', 'ip', 'on', 'is', 'it', 'to', 'or', 'fs', 'os',
34
- 'co', 'q', 'n', 'i', 'a', 'v', 'x', 'y', 'z',
35
- 'ejs', 'nyc', 'ini', 'joi', 'vue', 'npm', 'got', 'ora',
36
- 'vary', 'mime', 'send', 'etag', 'raw', 'tar', 'uid', 'cjs',
37
- 'rxjs', 'yarn', 'pnpm', 'next', 'targz',
38
-
39
- // Packages legitimes avec noms similaires
40
- 'acorn', 'acorn-walk', 'js-yaml', 'cross-env', 'node-fetch', 'node-gyp',
41
- 'core-js', 'lodash-es', 'date-fns', 'ts-node', 'ts-jest',
42
- 'css-loader', 'style-loader', 'file-loader', 'url-loader', 'babel-loader',
43
- 'vue-loader', 'react-dom', 'react-router', 'react-redux', 'vue-router',
44
- 'express-session', 'body-parser', 'cookie-parser',
45
-
46
- // Packages Express.js communs
47
- 'accepts', 'array-flatten', 'content-disposition', 'content-type',
48
- 'depd', 'destroy', 'encodeurl', 'escape-html', 'fresh', 'merge-descriptors',
49
- 'methods', 'on-finished', 'parseurl', 'path-to-regexp', 'proxy-addr',
50
- 'range-parser', 'safe-buffer', 'safer-buffer', 'setprototypeof',
51
- 'statuses', 'type-is', 'unpipe', 'utils-merge',
52
-
53
- // Packages CLI et outils legitimes
54
- 'jest-cli', 'prettier-2', 'prettier-1', 'eslint-cli',
55
- 'inquirer', 'enquirer', 'prompts',
56
- 'mysql2', 'pg-native', 'sqlite3', 'better-sqlite3',
57
- 'node-sass', 'sass', 'less',
58
- 'esbuild', 'rollup', 'parcel', 'vite',
59
- 'husky', 'lint-staged', 'commitlint',
60
- 'nodemon', 'pm2', 'forever', 'concurrently',
61
- 'lerna', 'turbo', 'nx',
62
- 'chalk', 'colors', 'picocolors', 'colorette',
63
- 'commander', 'yargs', 'meow', 'cac',
64
- 'execa', 'shelljs', 'cross-spawn',
65
- 'rimraf', 'del', 'trash-cli',
66
- 'globby', 'fast-glob', 'tiny-glob',
67
- 'chokidar', 'watchpack', 'nsfw',
68
- 'dotenv', 'dotenv-expand', 'env-cmd',
69
-
70
- // Packages Vite et outils associes
71
- 'vitest', 'vitepress',
72
- 'eslint-config-prettier', 'eslint-plugin-prettier',
73
- 'eslint-scope', 'eslint-visitor-keys',
74
- 'esbuild-register',
75
- 'neo-async',
76
-
77
- // Packages with names close to other popular packages (not typosquats)
78
- 'chai', // resembles chalk (missing_char)
79
- 'pino', // resembles sinon (missing_char)
80
- 'ioredis', // resembles redis (extra prefix)
81
- 'bcryptjs', // resembles bcrypt (suffix)
82
- 'recast', // resembles react (extra_char)
83
- 'asyncdi', // resembles async (suffix)
84
- 'redux', // resembles redis (wrong_char)
85
- 'args', // resembles yargs (missing_char)
86
- 'oxlint', // resembles eslint (wrong_char)
87
- 'vasync' // resembles async (extra prefix)
88
- ]);
89
-
90
-
91
- // Pre-computed lowercase versions for performance
92
- const POPULAR_PACKAGES_LOWER = POPULAR_PACKAGES.map(p => p.toLowerCase());
93
-
94
- // Seuil minimum de longueur pour eviter faux positifs
95
- const MIN_PACKAGE_LENGTH = 4;
96
-
97
- const SEVERITY_ORDER = ['LOW', 'MEDIUM', 'HIGH', 'CRITICAL'];
98
-
99
- function maxSeverity(a, b) {
100
- return SEVERITY_ORDER.indexOf(a) >= SEVERITY_ORDER.indexOf(b) ? a : b;
101
- }
102
-
103
- async function getCachedMetadata(packageName) {
104
- if (metadataCache.has(packageName)) {
105
- const entry = metadataCache.get(packageName);
106
- // TTL check: evict stale entries
107
- if (Date.now() - entry.ts < CACHE_TTL_MS) {
108
- return entry.data;
109
- }
110
- metadataCache.delete(packageName);
111
- }
112
- const result = await getPackageMetadata(packageName);
113
- // Bounded cache: evict oldest entry if at limit
114
- if (metadataCache.size >= MAX_METADATA_CACHE_SIZE) {
115
- const firstKey = metadataCache.keys().next().value;
116
- metadataCache.delete(firstKey);
117
- }
118
- metadataCache.set(packageName, { data: result, ts: Date.now() });
119
- return result;
120
- }
121
-
122
- function scoreMetadata(meta) {
123
- let score = 0;
124
- let severity = 'HIGH'; // base severity from Levenshtein match
125
-
126
- if (!meta) {
127
- // Package not found on npm = suspect
128
- return { score: 20, severity: 'HIGH', factors: ['not_on_npm'] };
129
- }
130
-
131
- const factors = [];
132
-
133
- // 1. Age
134
- if (meta.age_days !== null && meta.age_days < 7) {
135
- score += 30;
136
- severity = maxSeverity(severity, 'CRITICAL');
137
- factors.push('age<7d');
138
- } else if (meta.age_days !== null && meta.age_days < 30) {
139
- score += 15;
140
- severity = maxSeverity(severity, 'HIGH');
141
- factors.push('age<30d');
142
- }
143
-
144
- // 2. Downloads
145
- if (meta.weekly_downloads < 100) {
146
- score += 25;
147
- severity = maxSeverity(severity, 'HIGH');
148
- factors.push('downloads<100');
149
- } else if (meta.weekly_downloads < 1000) {
150
- score += 10;
151
- severity = maxSeverity(severity, 'MEDIUM');
152
- factors.push('downloads<1000');
153
- }
154
-
155
- // 3. Author package count
156
- if (meta.author_package_count <= 1) {
157
- score += 20;
158
- severity = maxSeverity(severity, 'HIGH');
159
- factors.push('single_pkg_author');
160
- }
161
-
162
- // 4. No README
163
- if (!meta.has_readme) {
164
- score += 10;
165
- severity = maxSeverity(severity, 'MEDIUM');
166
- factors.push('no_readme');
167
- }
168
-
169
- // 5. No repository
170
- if (!meta.has_repository) {
171
- score += 10;
172
- severity = maxSeverity(severity, 'MEDIUM');
173
- factors.push('no_repo');
174
- }
175
-
176
- return { score, severity, factors };
177
- }
178
-
179
- const PROTO_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
180
-
181
- /**
182
- * Safely merge dependency objects, filtering out prototype pollution keys.
183
- */
184
- function safeMerge(...objs) {
185
- const result = {};
186
- for (const obj of objs) {
187
- if (!obj || typeof obj !== 'object') continue;
188
- for (const [key, value] of Object.entries(obj)) {
189
- if (!PROTO_KEYS.has(key)) {
190
- result[key] = value;
191
- }
192
- }
193
- }
194
- return result;
195
- }
196
-
197
- async function scanTyposquatting(targetPath) {
198
- const threats = [];
199
- metadataCache.clear();
200
- const packageJsonPath = path.join(targetPath, 'package.json');
201
-
202
- if (!fs.existsSync(packageJsonPath)) {
203
- return threats;
204
- }
205
-
206
- let packageJson;
207
- try {
208
- packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
209
- } catch {
210
- return threats;
211
- }
212
- const dependencies = safeMerge(
213
- packageJson.dependencies,
214
- packageJson.devDependencies,
215
- packageJson.peerDependencies,
216
- packageJson.optionalDependencies
217
- );
218
-
219
- // Phase 1: Levenshtein matches (synchronous)
220
- const candidates = [];
221
- for (const depName of Object.keys(dependencies)) {
222
- const match = findTyposquatMatch(depName);
223
- if (match) {
224
- candidates.push({ depName, match });
225
- }
226
- }
227
-
228
- if (candidates.length === 0) return threats;
229
-
230
- // Phase 2: API enrichment (batched to avoid socket exhaustion)
231
- const BATCH_SIZE = 10;
232
- const metadataResults = [];
233
- for (let i = 0; i < candidates.length; i += BATCH_SIZE) {
234
- const batch = candidates.slice(i, i + BATCH_SIZE);
235
- const batchResults = await Promise.all(
236
- batch.map(c => getCachedMetadata(c.depName))
237
- );
238
- metadataResults.push(...batchResults);
239
- }
240
-
241
- // Phase 3: Composite scoring
242
- for (let i = 0; i < candidates.length; i++) {
243
- const { depName, match } = candidates[i];
244
- const meta = metadataResults[i];
245
- const mf = scoreMetadata(meta);
246
-
247
- const finalSeverity = maxSeverity('HIGH', mf.severity);
248
-
249
- // Build detail message
250
- let details;
251
- if (!meta) {
252
- details = 'Package not found on npm (suspect).';
253
- } else {
254
- details = 'Age: ' + meta.age_days + 'd'
255
- + ', Downloads: ' + meta.weekly_downloads + '/week'
256
- + ', Author packages: ' + meta.author_package_count
257
- + ', No README: ' + String(!meta.has_readme)
258
- + ', No repo: ' + String(!meta.has_repository);
259
- }
260
-
261
- const confidence = mf.score >= 40 ? 'CRITICAL'
262
- : mf.score >= 20 ? 'HIGH'
263
- : mf.score > 0 ? 'MEDIUM'
264
- : 'LOW';
265
-
266
- const message = 'Package "' + depName + '" resembles "' + match.original
267
- + '" (' + match.type + '). ' + details + '. Confidence: ' + confidence;
268
-
269
- threats.push({
270
- type: 'typosquat_detected',
271
- severity: finalSeverity,
272
- message: message,
273
- file: 'package.json',
274
- details: {
275
- suspicious: depName,
276
- legitimate: match.original,
277
- technique: match.type,
278
- distance: match.distance,
279
- composite_score: mf.score,
280
- factors: mf.factors,
281
- metadata: meta
282
- }
283
- });
284
- }
285
-
286
- return threats;
287
- }
288
-
289
- function findTyposquatMatch(name) {
290
- const nameLower = name.toLowerCase();
291
-
292
- // Ignore les packages whitelistes
293
- if (WHITELIST.has(nameLower)) return null;
294
-
295
- // Ignore les packages scoped (@org/package)
296
- if (name.startsWith('@')) return null;
297
-
298
- // Ignore les packages tres courts (trop de faux positifs)
299
- if (name.length < MIN_PACKAGE_LENGTH) return null;
300
-
301
- // Ignore les packages avec suffixes legitimes courants
302
- if (isLegitimateVariant(nameLower)) return null;
303
-
304
- for (let i = 0; i < POPULAR_PACKAGES.length; i++) {
305
- const popularLower = POPULAR_PACKAGES_LOWER[i];
306
- const popular = POPULAR_PACKAGES[i];
307
-
308
- // Ignore si c'est exactement le meme
309
- if (nameLower === popularLower) continue;
310
-
311
- // Ignore si le package populaire est trop court
312
- if (popular.length < MIN_PACKAGE_LENGTH) continue;
313
-
314
- // Length pre-filter: Levenshtein distance >= |len(a) - len(b)|
315
- if (Math.abs(nameLower.length - popularLower.length) > 2) continue;
316
-
317
- const distance = levenshteinDistance(nameLower, popularLower);
318
-
319
- // Distance de 1 = tres suspect (une seule lettre de difference)
320
- if (distance === 1) {
321
- return {
322
- original: popular,
323
- type: detectTyposquatType(name, popular),
324
- distance: distance
325
- };
326
- }
327
-
328
- // Distance de 2 seulement si le package est assez long (>= 5 chars)
329
- if (distance === 2 && popular.length >= 5) {
330
- return {
331
- original: popular,
332
- type: detectTyposquatType(name, popular),
333
- distance: distance
334
- };
335
- }
336
- }
337
-
338
- return null;
339
- }
340
-
341
- function isLegitimateVariant(name) {
342
- // Suffixes legitimes qui ne sont PAS du typosquatting
343
- const legitimateSuffixes = [
344
- '-cli', '-core', '-utils', '-plugin', '-loader', '-webpack',
345
- '-react', '-vue', '-angular', '-node', '-browser',
346
- '-esm', '-cjs', '-umd', '-vite',
347
- '-types', '-typings',
348
- '2', '3', '4', '5', // versions majeures (mysql2, etc)
349
- '-v2', '-v3', '-next', '-latest', '-stable', '-lts'
350
- ];
351
-
352
- for (const suffix of legitimateSuffixes) {
353
- if (name.endsWith(suffix)) return true;
354
- }
355
-
356
- // Prefixes legitimes
357
- const legitimatePrefixes = [
358
- '@types/', '@babel/', '@jest/', '@testing-library/',
359
- 'eslint-plugin-', 'eslint-config-',
360
- 'babel-plugin-', 'babel-preset-',
361
- 'webpack-plugin-', 'rollup-plugin-', 'vite-plugin-'
362
- ];
363
-
364
- for (const prefix of legitimatePrefixes) {
365
- if (name.startsWith(prefix)) return true;
366
- }
367
-
368
- return false;
369
- }
370
-
371
- function detectTyposquatType(typo, original) {
372
- if (typo.length === original.length - 1) return 'missing_char';
373
- if (typo.length === original.length + 1) return 'extra_char';
374
- if (typo.length === original.length) {
375
- // Check for adjacent character swap
376
- for (let i = 0; i < typo.length - 1; i++) {
377
- if (typo[i] === original[i + 1] && typo[i + 1] === original[i]) {
378
- // Verify remaining chars match
379
- const before = typo.slice(0, i) === original.slice(0, i);
380
- const after = typo.slice(i + 2) === original.slice(i + 2);
381
- if (before && after) return 'swapped_chars';
382
- }
383
- }
384
- return 'wrong_char';
385
- }
386
- return 'unknown';
387
- }
388
-
389
- function levenshteinDistance(a, b) {
390
- // Two-row optimization: O(min(m,n)) space instead of O(m*n)
391
- if (a.length < b.length) { const t = a; a = b; b = t; }
392
- let prev = new Array(b.length + 1);
393
- let curr = new Array(b.length + 1);
394
-
395
- for (let j = 0; j <= b.length; j++) prev[j] = j;
396
-
397
- for (let i = 1; i <= a.length; i++) {
398
- curr[0] = i;
399
- for (let j = 1; j <= b.length; j++) {
400
- if (a.charAt(i - 1) === b.charAt(j - 1)) {
401
- curr[j] = prev[j - 1];
402
- } else {
403
- curr[j] = Math.min(prev[j - 1] + 1, curr[j - 1] + 1, prev[j] + 1);
404
- }
405
- }
406
- const tmp = prev; prev = curr; curr = tmp;
407
- }
408
-
409
- return prev[b.length];
410
- }
411
-
412
- function clearMetadataCache() {
413
- metadataCache.clear();
414
- }
415
-
416
- // ============================================
417
- // PyPI TYPOSQUATTING
418
- // ============================================
419
-
420
- // Top 50 PyPI packages les plus populaires (cibles de typosquatting)
421
- const POPULAR_PYPI_PACKAGES = [
422
- 'requests', 'flask', 'django', 'numpy', 'pandas', 'scipy', 'matplotlib',
423
- 'pillow', 'boto3', 'setuptools', 'pip', 'wheel', 'urllib3', 'certifi',
424
- 'six', 'python-dateutil', 'pyyaml', 'cryptography', 'jinja2', 'markupsafe',
425
- 'click', 'sqlalchemy', 'beautifulsoup4', 'lxml', 'pytest', 'coverage',
426
- 'tox', 'black', 'mypy', 'pylint', 'fastapi', 'uvicorn', 'gunicorn',
427
- 'celery', 'redis', 'psycopg2', 'pymongo', 'httpx', 'aiohttp', 'tornado',
428
- 'scrapy', 'selenium', 'paramiko', 'fabric', 'ansible', 'tensorflow',
429
- 'torch', 'scikit-learn', 'keras', 'transformers'
430
- ];
431
-
432
- // PEP 503 normalization: case-insensitive, hyphens/underscores/dots equivalent
433
- function normalizePyPI(name) {
434
- return name.toLowerCase().replace(/[-_.]+/g, '-');
435
- }
436
-
437
- // Pre-computed normalized versions for O(n) comparison
438
- const POPULAR_PYPI_NORMALIZED = POPULAR_PYPI_PACKAGES.map(normalizePyPI);
439
-
440
- // Set for O(1) exact-match check (skip popular packages themselves)
441
- const POPULAR_PYPI_SET = new Set(POPULAR_PYPI_NORMALIZED);
442
-
443
- // Legitimate PyPI packages that look like typosquats but are not
444
- const PYPI_WHITELIST = new Set([
445
- 'boto', // legitimate AWS SDK predecessor of boto3
446
- 'torchvision', // legitimate PyTorch ecosystem
447
- 'torchaudio', // legitimate PyTorch ecosystem
448
- 'tensorflow-gpu', // legitimate TF variant
449
- 'scikit-image', // legitimate scikit ecosystem
450
- 'scikit-optimize', // legitimate scikit ecosystem
451
- 'paramiko2', // fork of paramiko
452
- ]);
453
-
454
- const MIN_PYPI_LENGTH = 4;
455
-
456
- /**
457
- * Find a PyPI typosquat match using PEP 503 normalization + Levenshtein.
458
- * No npm-registry-style API scoring — just distance-based detection.
459
- *
460
- * @param {string} name - PyPI package name from dependency file
461
- * @returns {{original: string, type: string, distance: number}|null}
462
- */
463
- function findPyPITyposquatMatch(name) {
464
- const normalized = normalizePyPI(name);
465
-
466
- // Skip if it IS a popular package (exact match after normalization)
467
- if (POPULAR_PYPI_SET.has(normalized)) return null;
468
-
469
- // Skip whitelisted
470
- if (PYPI_WHITELIST.has(normalized)) return null;
471
-
472
- // Skip very short names (too many false positives)
473
- if (normalized.length < MIN_PYPI_LENGTH) return null;
474
-
475
- for (let i = 0; i < POPULAR_PYPI_PACKAGES.length; i++) {
476
- const popularNorm = POPULAR_PYPI_NORMALIZED[i];
477
- const popular = POPULAR_PYPI_PACKAGES[i];
478
-
479
- // Skip exact match (after normalization)
480
- if (normalized === popularNorm) continue;
481
-
482
- // Skip short popular packages
483
- if (popularNorm.length < MIN_PYPI_LENGTH) continue;
484
-
485
- // Length pre-filter: Levenshtein distance >= |len(a) - len(b)|
486
- if (Math.abs(normalized.length - popularNorm.length) > 2) continue;
487
-
488
- const distance = levenshteinDistance(normalized, popularNorm);
489
-
490
- // Distance 1 = very suspect (one char difference)
491
- if (distance === 1) {
492
- return {
493
- original: popular,
494
- type: detectTyposquatType(normalized, popularNorm),
495
- distance: distance
496
- };
497
- }
498
-
499
- // Distance 2 only for longer packages (>= 5 chars)
500
- if (distance === 2 && popularNorm.length >= 5) {
501
- return {
502
- original: popular,
503
- type: detectTyposquatType(normalized, popularNorm),
504
- distance: distance
505
- };
506
- }
507
- }
508
-
509
- return null;
510
- }
511
-
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const { getPackageMetadata } = require('./npm-registry.js');
4
+
5
+ // In-memory cache to avoid re-querying the same package in one scan
6
+ const metadataCache = new Map();
7
+ const MAX_METADATA_CACHE_SIZE = 500;
8
+ const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
9
+
10
+ // Top 100 packages npm les plus populaires (cibles de typosquatting)
11
+ const POPULAR_PACKAGES = [
12
+ 'lodash', 'express', 'react', 'axios', 'chalk', 'commander', 'moment',
13
+ 'request', 'async', 'bluebird', 'underscore', 'uuid', 'debug', 'mkdirp',
14
+ 'glob', 'minimist', 'webpack', 'babel-core', 'typescript', 'eslint',
15
+ 'prettier', 'jest', 'mocha', 'chai', 'sinon', 'mongoose', 'sequelize',
16
+ 'redis', 'mongodb', 'socket.io', 'express-session',
17
+ 'body-parser', 'cookie-parser', 'cors', 'helmet', 'morgan', 'dotenv',
18
+ 'jsonwebtoken', 'bcrypt', 'passport', 'nodemailer', 'aws-sdk', 'stripe',
19
+ 'twilio', 'firebase', 'graphql', 'apollo-server', 'nuxt',
20
+ 'gatsby', 'angular', 'svelte', 'electron', 'puppeteer', 'cheerio',
21
+ 'sharp', 'jimp', 'canvas', 'pdf-lib', 'exceljs', 'csv-parser', 'xml2js',
22
+ 'yaml', 'config', 'yargs', 'colors',
23
+ 'winston', 'bunyan', 'pino', 'log4js', 'ramda', 'immutable',
24
+ 'mobx', 'redux', 'zustand', 'formik', 'yup', 'ajv', 'validator',
25
+ 'date-fns', 'dayjs', 'luxon', 'numeral', 'accounting', 'currency.js',
26
+ 'lodash-es', 'core-js', 'regenerator-runtime', 'tslib', 'classnames',
27
+ 'prop-types', 'cross-env', 'node-fetch', 'got'
28
+ ];
29
+
30
+ // Packages legitimes courts ou qui ressemblent a des populaires
31
+ const WHITELIST = new Set([
32
+ // Packages tres courts legitimes
33
+ 'qs', 'pg', 'ms', 'ws', 'ip', 'on', 'is', 'it', 'to', 'or', 'fs', 'os',
34
+ 'co', 'q', 'n', 'i', 'a', 'v', 'x', 'y', 'z',
35
+ 'ejs', 'nyc', 'ini', 'joi', 'vue', 'npm', 'got', 'ora',
36
+ 'vary', 'mime', 'send', 'etag', 'raw', 'tar', 'uid', 'cjs',
37
+ 'rxjs', 'yarn', 'pnpm', 'next', 'targz',
38
+
39
+ // Packages legitimes avec noms similaires
40
+ 'acorn', 'acorn-walk', 'js-yaml', 'cross-env', 'node-fetch', 'node-gyp',
41
+ 'core-js', 'lodash-es', 'date-fns', 'ts-node', 'ts-jest',
42
+ 'css-loader', 'style-loader', 'file-loader', 'url-loader', 'babel-loader',
43
+ 'vue-loader', 'react-dom', 'react-router', 'react-redux', 'vue-router',
44
+ 'express-session', 'body-parser', 'cookie-parser',
45
+
46
+ // Packages Express.js communs
47
+ 'accepts', 'array-flatten', 'content-disposition', 'content-type',
48
+ 'depd', 'destroy', 'encodeurl', 'escape-html', 'fresh', 'merge-descriptors',
49
+ 'methods', 'on-finished', 'parseurl', 'path-to-regexp', 'proxy-addr',
50
+ 'range-parser', 'safe-buffer', 'safer-buffer', 'setprototypeof',
51
+ 'statuses', 'type-is', 'unpipe', 'utils-merge',
52
+
53
+ // Packages CLI et outils legitimes
54
+ 'jest-cli', 'prettier-2', 'prettier-1', 'eslint-cli',
55
+ 'inquirer', 'enquirer', 'prompts',
56
+ 'mysql2', 'pg-native', 'sqlite3', 'better-sqlite3',
57
+ 'node-sass', 'sass', 'less',
58
+ 'esbuild', 'rollup', 'parcel', 'vite',
59
+ 'husky', 'lint-staged', 'commitlint',
60
+ 'nodemon', 'pm2', 'forever', 'concurrently',
61
+ 'lerna', 'turbo', 'nx',
62
+ 'chalk', 'colors', 'picocolors', 'colorette',
63
+ 'commander', 'yargs', 'meow', 'cac',
64
+ 'execa', 'shelljs', 'cross-spawn',
65
+ 'rimraf', 'del', 'trash-cli',
66
+ 'globby', 'fast-glob', 'tiny-glob',
67
+ 'chokidar', 'watchpack', 'nsfw',
68
+ 'dotenv', 'dotenv-expand', 'env-cmd',
69
+
70
+ // Packages Vite et outils associes
71
+ 'vitest', 'vitepress',
72
+ 'eslint-config-prettier', 'eslint-plugin-prettier',
73
+ 'eslint-scope', 'eslint-visitor-keys',
74
+ 'esbuild-register',
75
+ 'neo-async',
76
+
77
+ // Packages with names close to other popular packages (not typosquats)
78
+ 'chai', // resembles chalk (missing_char)
79
+ 'pino', // resembles sinon (missing_char)
80
+ 'ioredis', // resembles redis (extra prefix)
81
+ 'bcryptjs', // resembles bcrypt (suffix)
82
+ 'recast', // resembles react (extra_char)
83
+ 'asyncdi', // resembles async (suffix)
84
+ 'redux', // resembles redis (wrong_char)
85
+ 'args', // resembles yargs (missing_char)
86
+ 'oxlint', // resembles eslint (wrong_char)
87
+ 'vasync', // resembles async (extra prefix)
88
+
89
+ // FPR P1: Benign packages falsely flagged as typosquat in evaluation
90
+ 'conf', // resembles config
91
+ 'defu', // resembles debug
92
+ 'ohash', // resembles lodash
93
+ 'cors', // resembles colors
94
+ 'meant', // resembles react
95
+ 'whelk', // resembles chalk
96
+ 'tslog', // resembles tslib
97
+ 'mkdist', // resembles mkdirp
98
+ 'jshint', // resembles eslint
99
+ 'dtslint', // resembles eslint
100
+ 'redis', // resembles redux
101
+ 'cypress', // resembles express
102
+ 'colord', // resembles colors
103
+ 'read', // resembles react
104
+ 'ulid', // resembles uuid
105
+ 'tslint', // resembles eslint
106
+ 'jison', // resembles sinon
107
+ 'reds', // resembles redis
108
+ 'docdash', // resembles lodash
109
+ 'yarpm', // resembles yargs
110
+ 'canvg', // resembles canvas
111
+ 'obug' // internal sub-dependency
112
+ ]);
113
+
114
+
115
+ // Pre-computed lowercase versions for performance
116
+ const POPULAR_PACKAGES_LOWER = POPULAR_PACKAGES.map(p => p.toLowerCase());
117
+
118
+ // Seuil minimum de longueur pour eviter faux positifs
119
+ const MIN_PACKAGE_LENGTH = 4;
120
+
121
+ const SEVERITY_ORDER = ['LOW', 'MEDIUM', 'HIGH', 'CRITICAL'];
122
+
123
+ function maxSeverity(a, b) {
124
+ return SEVERITY_ORDER.indexOf(a) >= SEVERITY_ORDER.indexOf(b) ? a : b;
125
+ }
126
+
127
+ async function getCachedMetadata(packageName) {
128
+ if (metadataCache.has(packageName)) {
129
+ const entry = metadataCache.get(packageName);
130
+ // TTL check: evict stale entries
131
+ if (Date.now() - entry.ts < CACHE_TTL_MS) {
132
+ return entry.data;
133
+ }
134
+ metadataCache.delete(packageName);
135
+ }
136
+ const result = await getPackageMetadata(packageName);
137
+ // Bounded cache: evict oldest entry if at limit
138
+ if (metadataCache.size >= MAX_METADATA_CACHE_SIZE) {
139
+ const firstKey = metadataCache.keys().next().value;
140
+ metadataCache.delete(firstKey);
141
+ }
142
+ metadataCache.set(packageName, { data: result, ts: Date.now() });
143
+ return result;
144
+ }
145
+
146
+ function scoreMetadata(meta) {
147
+ let score = 0;
148
+ let severity = 'HIGH'; // base severity from Levenshtein match
149
+
150
+ if (!meta) {
151
+ // Package not found on npm = suspect
152
+ return { score: 20, severity: 'HIGH', factors: ['not_on_npm'] };
153
+ }
154
+
155
+ const factors = [];
156
+
157
+ // 1. Age
158
+ if (meta.age_days !== null && meta.age_days < 7) {
159
+ score += 30;
160
+ severity = maxSeverity(severity, 'CRITICAL');
161
+ factors.push('age<7d');
162
+ } else if (meta.age_days !== null && meta.age_days < 30) {
163
+ score += 15;
164
+ severity = maxSeverity(severity, 'HIGH');
165
+ factors.push('age<30d');
166
+ }
167
+
168
+ // 2. Downloads
169
+ if (meta.weekly_downloads < 100) {
170
+ score += 25;
171
+ severity = maxSeverity(severity, 'HIGH');
172
+ factors.push('downloads<100');
173
+ } else if (meta.weekly_downloads < 1000) {
174
+ score += 10;
175
+ severity = maxSeverity(severity, 'MEDIUM');
176
+ factors.push('downloads<1000');
177
+ }
178
+
179
+ // 3. Author package count
180
+ if (meta.author_package_count <= 1) {
181
+ score += 20;
182
+ severity = maxSeverity(severity, 'HIGH');
183
+ factors.push('single_pkg_author');
184
+ }
185
+
186
+ // 4. No README
187
+ if (!meta.has_readme) {
188
+ score += 10;
189
+ severity = maxSeverity(severity, 'MEDIUM');
190
+ factors.push('no_readme');
191
+ }
192
+
193
+ // 5. No repository
194
+ if (!meta.has_repository) {
195
+ score += 10;
196
+ severity = maxSeverity(severity, 'MEDIUM');
197
+ factors.push('no_repo');
198
+ }
199
+
200
+ return { score, severity, factors };
201
+ }
202
+
203
+ const PROTO_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
204
+
205
+ /**
206
+ * Safely merge dependency objects, filtering out prototype pollution keys.
207
+ */
208
+ function safeMerge(...objs) {
209
+ const result = {};
210
+ for (const obj of objs) {
211
+ if (!obj || typeof obj !== 'object') continue;
212
+ for (const [key, value] of Object.entries(obj)) {
213
+ if (!PROTO_KEYS.has(key)) {
214
+ result[key] = value;
215
+ }
216
+ }
217
+ }
218
+ return result;
219
+ }
220
+
221
+ async function scanTyposquatting(targetPath) {
222
+ const threats = [];
223
+ metadataCache.clear();
224
+ const packageJsonPath = path.join(targetPath, 'package.json');
225
+
226
+ if (!fs.existsSync(packageJsonPath)) {
227
+ return threats;
228
+ }
229
+
230
+ let packageJson;
231
+ try {
232
+ packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
233
+ } catch {
234
+ return threats;
235
+ }
236
+ const dependencies = safeMerge(
237
+ packageJson.dependencies,
238
+ packageJson.devDependencies,
239
+ packageJson.peerDependencies,
240
+ packageJson.optionalDependencies
241
+ );
242
+
243
+ // Phase 1: Levenshtein matches (synchronous)
244
+ const candidates = [];
245
+ for (const depName of Object.keys(dependencies)) {
246
+ const match = findTyposquatMatch(depName);
247
+ if (match) {
248
+ candidates.push({ depName, match });
249
+ }
250
+ }
251
+
252
+ if (candidates.length === 0) return threats;
253
+
254
+ // Phase 2: API enrichment (batched to avoid socket exhaustion)
255
+ const BATCH_SIZE = 10;
256
+ const metadataResults = [];
257
+ for (let i = 0; i < candidates.length; i += BATCH_SIZE) {
258
+ const batch = candidates.slice(i, i + BATCH_SIZE);
259
+ const batchResults = await Promise.all(
260
+ batch.map(c => getCachedMetadata(c.depName))
261
+ );
262
+ metadataResults.push(...batchResults);
263
+ }
264
+
265
+ // Phase 3: Composite scoring
266
+ for (let i = 0; i < candidates.length; i++) {
267
+ const { depName, match } = candidates[i];
268
+ const meta = metadataResults[i];
269
+ const mf = scoreMetadata(meta);
270
+
271
+ const finalSeverity = maxSeverity('HIGH', mf.severity);
272
+
273
+ // Build detail message
274
+ let details;
275
+ if (!meta) {
276
+ details = 'Package not found on npm (suspect).';
277
+ } else {
278
+ details = 'Age: ' + meta.age_days + 'd'
279
+ + ', Downloads: ' + meta.weekly_downloads + '/week'
280
+ + ', Author packages: ' + meta.author_package_count
281
+ + ', No README: ' + String(!meta.has_readme)
282
+ + ', No repo: ' + String(!meta.has_repository);
283
+ }
284
+
285
+ const confidence = mf.score >= 40 ? 'CRITICAL'
286
+ : mf.score >= 20 ? 'HIGH'
287
+ : mf.score > 0 ? 'MEDIUM'
288
+ : 'LOW';
289
+
290
+ const message = 'Package "' + depName + '" resembles "' + match.original
291
+ + '" (' + match.type + '). ' + details + '. Confidence: ' + confidence;
292
+
293
+ threats.push({
294
+ type: 'typosquat_detected',
295
+ severity: finalSeverity,
296
+ message: message,
297
+ file: 'package.json',
298
+ details: {
299
+ suspicious: depName,
300
+ legitimate: match.original,
301
+ technique: match.type,
302
+ distance: match.distance,
303
+ composite_score: mf.score,
304
+ factors: mf.factors,
305
+ metadata: meta
306
+ }
307
+ });
308
+ }
309
+
310
+ return threats;
311
+ }
312
+
313
+ function findTyposquatMatch(name) {
314
+ const nameLower = name.toLowerCase();
315
+
316
+ // Ignore les packages whitelistes
317
+ if (WHITELIST.has(nameLower)) return null;
318
+
319
+ // Ignore les packages scoped (@org/package)
320
+ if (name.startsWith('@')) return null;
321
+
322
+ // Ignore les packages tres courts (trop de faux positifs)
323
+ if (name.length < MIN_PACKAGE_LENGTH) return null;
324
+
325
+ // Ignore les packages avec suffixes legitimes courants
326
+ if (isLegitimateVariant(nameLower)) return null;
327
+
328
+ for (let i = 0; i < POPULAR_PACKAGES.length; i++) {
329
+ const popularLower = POPULAR_PACKAGES_LOWER[i];
330
+ const popular = POPULAR_PACKAGES[i];
331
+
332
+ // Ignore si c'est exactement le meme
333
+ if (nameLower === popularLower) continue;
334
+
335
+ // Ignore si le package populaire est trop court
336
+ if (popular.length < MIN_PACKAGE_LENGTH) continue;
337
+
338
+ // Length pre-filter: Levenshtein distance >= |len(a) - len(b)|
339
+ if (Math.abs(nameLower.length - popularLower.length) > 2) continue;
340
+
341
+ const distance = levenshteinDistance(nameLower, popularLower);
342
+
343
+ // Distance de 1 = tres suspect (une seule lettre de difference)
344
+ if (distance === 1) {
345
+ return {
346
+ original: popular,
347
+ type: detectTyposquatType(name, popular),
348
+ distance: distance
349
+ };
350
+ }
351
+
352
+ // Distance de 2 seulement si le package est assez long (>= 5 chars)
353
+ if (distance === 2 && popular.length >= 5) {
354
+ return {
355
+ original: popular,
356
+ type: detectTyposquatType(name, popular),
357
+ distance: distance
358
+ };
359
+ }
360
+ }
361
+
362
+ return null;
363
+ }
364
+
365
+ function isLegitimateVariant(name) {
366
+ // Suffixes legitimes qui ne sont PAS du typosquatting
367
+ const legitimateSuffixes = [
368
+ '-cli', '-core', '-utils', '-plugin', '-loader', '-webpack',
369
+ '-react', '-vue', '-angular', '-node', '-browser',
370
+ '-esm', '-cjs', '-umd', '-vite',
371
+ '-types', '-typings',
372
+ '2', '3', '4', '5', // versions majeures (mysql2, etc)
373
+ '-v2', '-v3', '-next', '-latest', '-stable', '-lts'
374
+ ];
375
+
376
+ for (const suffix of legitimateSuffixes) {
377
+ if (name.endsWith(suffix)) return true;
378
+ }
379
+
380
+ // Prefixes legitimes
381
+ const legitimatePrefixes = [
382
+ '@types/', '@babel/', '@jest/', '@testing-library/',
383
+ 'eslint-plugin-', 'eslint-config-',
384
+ 'babel-plugin-', 'babel-preset-',
385
+ 'webpack-plugin-', 'rollup-plugin-', 'vite-plugin-'
386
+ ];
387
+
388
+ for (const prefix of legitimatePrefixes) {
389
+ if (name.startsWith(prefix)) return true;
390
+ }
391
+
392
+ return false;
393
+ }
394
+
395
+ function detectTyposquatType(typo, original) {
396
+ if (typo.length === original.length - 1) return 'missing_char';
397
+ if (typo.length === original.length + 1) return 'extra_char';
398
+ if (typo.length === original.length) {
399
+ // Check for adjacent character swap
400
+ for (let i = 0; i < typo.length - 1; i++) {
401
+ if (typo[i] === original[i + 1] && typo[i + 1] === original[i]) {
402
+ // Verify remaining chars match
403
+ const before = typo.slice(0, i) === original.slice(0, i);
404
+ const after = typo.slice(i + 2) === original.slice(i + 2);
405
+ if (before && after) return 'swapped_chars';
406
+ }
407
+ }
408
+ return 'wrong_char';
409
+ }
410
+ return 'unknown';
411
+ }
412
+
413
+ function levenshteinDistance(a, b) {
414
+ // Two-row optimization: O(min(m,n)) space instead of O(m*n)
415
+ if (a.length < b.length) { const t = a; a = b; b = t; }
416
+ let prev = new Array(b.length + 1);
417
+ let curr = new Array(b.length + 1);
418
+
419
+ for (let j = 0; j <= b.length; j++) prev[j] = j;
420
+
421
+ for (let i = 1; i <= a.length; i++) {
422
+ curr[0] = i;
423
+ for (let j = 1; j <= b.length; j++) {
424
+ if (a.charAt(i - 1) === b.charAt(j - 1)) {
425
+ curr[j] = prev[j - 1];
426
+ } else {
427
+ curr[j] = Math.min(prev[j - 1] + 1, curr[j - 1] + 1, prev[j] + 1);
428
+ }
429
+ }
430
+ const tmp = prev; prev = curr; curr = tmp;
431
+ }
432
+
433
+ return prev[b.length];
434
+ }
435
+
436
+ function clearMetadataCache() {
437
+ metadataCache.clear();
438
+ }
439
+
440
+ // ============================================
441
+ // PyPI TYPOSQUATTING
442
+ // ============================================
443
+
444
+ // Top 50 PyPI packages les plus populaires (cibles de typosquatting)
445
+ const POPULAR_PYPI_PACKAGES = [
446
+ 'requests', 'flask', 'django', 'numpy', 'pandas', 'scipy', 'matplotlib',
447
+ 'pillow', 'boto3', 'setuptools', 'pip', 'wheel', 'urllib3', 'certifi',
448
+ 'six', 'python-dateutil', 'pyyaml', 'cryptography', 'jinja2', 'markupsafe',
449
+ 'click', 'sqlalchemy', 'beautifulsoup4', 'lxml', 'pytest', 'coverage',
450
+ 'tox', 'black', 'mypy', 'pylint', 'fastapi', 'uvicorn', 'gunicorn',
451
+ 'celery', 'redis', 'psycopg2', 'pymongo', 'httpx', 'aiohttp', 'tornado',
452
+ 'scrapy', 'selenium', 'paramiko', 'fabric', 'ansible', 'tensorflow',
453
+ 'torch', 'scikit-learn', 'keras', 'transformers'
454
+ ];
455
+
456
+ // PEP 503 normalization: case-insensitive, hyphens/underscores/dots equivalent
457
+ function normalizePyPI(name) {
458
+ return name.toLowerCase().replace(/[-_.]+/g, '-');
459
+ }
460
+
461
+ // Pre-computed normalized versions for O(n) comparison
462
+ const POPULAR_PYPI_NORMALIZED = POPULAR_PYPI_PACKAGES.map(normalizePyPI);
463
+
464
+ // Set for O(1) exact-match check (skip popular packages themselves)
465
+ const POPULAR_PYPI_SET = new Set(POPULAR_PYPI_NORMALIZED);
466
+
467
+ // Legitimate PyPI packages that look like typosquats but are not
468
+ const PYPI_WHITELIST = new Set([
469
+ 'boto', // legitimate AWS SDK predecessor of boto3
470
+ 'torchvision', // legitimate PyTorch ecosystem
471
+ 'torchaudio', // legitimate PyTorch ecosystem
472
+ 'tensorflow-gpu', // legitimate TF variant
473
+ 'scikit-image', // legitimate scikit ecosystem
474
+ 'scikit-optimize', // legitimate scikit ecosystem
475
+ 'paramiko2', // fork of paramiko
476
+ ]);
477
+
478
+ const MIN_PYPI_LENGTH = 4;
479
+
480
+ /**
481
+ * Find a PyPI typosquat match using PEP 503 normalization + Levenshtein.
482
+ * No npm-registry-style API scoring — just distance-based detection.
483
+ *
484
+ * @param {string} name - PyPI package name from dependency file
485
+ * @returns {{original: string, type: string, distance: number}|null}
486
+ */
487
+ function findPyPITyposquatMatch(name) {
488
+ const normalized = normalizePyPI(name);
489
+
490
+ // Skip if it IS a popular package (exact match after normalization)
491
+ if (POPULAR_PYPI_SET.has(normalized)) return null;
492
+
493
+ // Skip whitelisted
494
+ if (PYPI_WHITELIST.has(normalized)) return null;
495
+
496
+ // Skip very short names (too many false positives)
497
+ if (normalized.length < MIN_PYPI_LENGTH) return null;
498
+
499
+ for (let i = 0; i < POPULAR_PYPI_PACKAGES.length; i++) {
500
+ const popularNorm = POPULAR_PYPI_NORMALIZED[i];
501
+ const popular = POPULAR_PYPI_PACKAGES[i];
502
+
503
+ // Skip exact match (after normalization)
504
+ if (normalized === popularNorm) continue;
505
+
506
+ // Skip short popular packages
507
+ if (popularNorm.length < MIN_PYPI_LENGTH) continue;
508
+
509
+ // Length pre-filter: Levenshtein distance >= |len(a) - len(b)|
510
+ if (Math.abs(normalized.length - popularNorm.length) > 2) continue;
511
+
512
+ const distance = levenshteinDistance(normalized, popularNorm);
513
+
514
+ // Distance 1 = very suspect (one char difference)
515
+ if (distance === 1) {
516
+ return {
517
+ original: popular,
518
+ type: detectTyposquatType(normalized, popularNorm),
519
+ distance: distance
520
+ };
521
+ }
522
+
523
+ // Distance 2 only for longer packages (>= 5 chars)
524
+ if (distance === 2 && popularNorm.length >= 5) {
525
+ return {
526
+ original: popular,
527
+ type: detectTyposquatType(normalized, popularNorm),
528
+ distance: distance
529
+ };
530
+ }
531
+ }
532
+
533
+ return null;
534
+ }
535
+
512
536
  module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch };