muaddib-scanner 2.11.45 → 2.11.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.45.json → self-scan-v2.11.47.json} +1 -1
- package/src/pipeline/processor.js +54 -0
- package/src/response/playbooks.js +22 -0
- package/src/rules/index.js +44 -0
- package/src/scanner/pypi-maintainer.js +87 -0
- package/src/scanner/pypi-registry.js +239 -0
- package/src/scanner/pypi-release-zero.js +59 -0
- package/src/scanner/python-source.js +53 -1
- package/src/scanner/python.js +77 -0
package/package.json
CHANGED
|
@@ -11,6 +11,10 @@ const { debugLog } = require('../utils.js');
|
|
|
11
11
|
const { getPackageMetadata } = require('../scanner/npm-registry.js');
|
|
12
12
|
const { checkReleaseZero } = require('../scanner/release-zero.js');
|
|
13
13
|
const { checkUnclaimedMaintainerEmail, checkCompromisedDomain } = require('../scanner/email-domain.js');
|
|
14
|
+
const { getPyPIPackageMetadata } = require('../scanner/pypi-registry.js');
|
|
15
|
+
const { runPyPIMaintainerChecks } = require('../scanner/pypi-maintainer.js');
|
|
16
|
+
const { checkPyPIReleaseZero } = require('../scanner/pypi-release-zero.js');
|
|
17
|
+
const { detectScannedPackageName } = require('../scanner/python.js');
|
|
14
18
|
|
|
15
19
|
// Auto-sandbox compound trigger : optional out-of-tree dependency. Lazy-load
|
|
16
20
|
// it so the pipeline still works when the file is absent (some dev machines
|
|
@@ -247,6 +251,56 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
247
251
|
}
|
|
248
252
|
}
|
|
249
253
|
|
|
254
|
+
// ───── PyPI side — same shape as the npm block above (v2.11.47, Phase 2) ─────
|
|
255
|
+
// Fetch PyPI registry metadata for the scanned package iff we can identify it
|
|
256
|
+
// (pyproject.toml / setup.py / setup.cfg). Same MUADDIB_NO_REGISTRY_FETCH
|
|
257
|
+
// master switch. Failure mode: silent (returns null), all downstream PyPI
|
|
258
|
+
// checks degrade gracefully.
|
|
259
|
+
if (
|
|
260
|
+
_pkgMeta &&
|
|
261
|
+
globalThis.process.env.MUADDIB_NO_REGISTRY_FETCH !== '1'
|
|
262
|
+
) {
|
|
263
|
+
let pypiPackageName = null;
|
|
264
|
+
try {
|
|
265
|
+
pypiPackageName = detectScannedPackageName(targetPath);
|
|
266
|
+
} catch (err) {
|
|
267
|
+
debugLog('[PYPI-NAME] detect failed: ' + err.message);
|
|
268
|
+
}
|
|
269
|
+
if (pypiPackageName) {
|
|
270
|
+
try {
|
|
271
|
+
const pypiMeta = await getPyPIPackageMetadata(pypiPackageName);
|
|
272
|
+
if (pypiMeta) {
|
|
273
|
+
_pkgMeta.pypiPackageName = pypiPackageName;
|
|
274
|
+
_pkgMeta.pypiRegistryMeta = pypiMeta;
|
|
275
|
+
}
|
|
276
|
+
} catch (err) {
|
|
277
|
+
debugLog('[PYPI-REGISTRY-META] fetch failed for ' + pypiPackageName + ': ' + err.message);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (_pkgMeta && _pkgMeta.pypiRegistryMeta) {
|
|
283
|
+
// PYPI-003 — release-zero (v0.x.x + age <30d). MEDIUM, composite-only.
|
|
284
|
+
try {
|
|
285
|
+
const rz = checkPyPIReleaseZero(_pkgMeta.pypiRegistryMeta.latest_version, _pkgMeta.pypiRegistryMeta);
|
|
286
|
+
if (rz) deduped.push(rz);
|
|
287
|
+
} catch (err) {
|
|
288
|
+
debugLog('[PYPI-RELEASE-ZERO] check failed: ' + err.message);
|
|
289
|
+
}
|
|
290
|
+
// MAINTAINER-005 + MAINTAINER-006, PyPI-flavoured (file → pyproject.toml,
|
|
291
|
+
// wording mentions PyPI). Same env opt-outs as npm
|
|
292
|
+
// (MUADDIB_EMAIL_DOMAIN_CHECK=0, MUADDIB_RDAP_CHECK=0).
|
|
293
|
+
try {
|
|
294
|
+
const pypiThreats = await runPyPIMaintainerChecks(
|
|
295
|
+
_pkgMeta.pypiPackageName,
|
|
296
|
+
_pkgMeta.pypiRegistryMeta
|
|
297
|
+
);
|
|
298
|
+
for (const t of pypiThreats) deduped.push(t);
|
|
299
|
+
} catch (err) {
|
|
300
|
+
debugLog('[PYPI-MAINTAINER] check failed: ' + err.message);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
250
304
|
// Cross-scanner compound: detached_process + suspicious_dataflow in same file
|
|
251
305
|
// Catches cases where credential flow is detected by dataflow scanner, not AST scanner
|
|
252
306
|
{
|
|
@@ -498,6 +498,28 @@ const PLAYBOOKS = {
|
|
|
498
498
|
'qui ne tracent que les "import X" statiques. Inspecter les appels suivants au module dynamiquement ' +
|
|
499
499
|
'importe — combine a exec/subprocess/fetch indique malveillance avec haute confiance.',
|
|
500
500
|
|
|
501
|
+
pypi_release_zero:
|
|
502
|
+
'MEDIUM: Package PyPI v0.x.x publie il y a moins de 30 jours. Pattern de lure / ship-as-vulnerable. ' +
|
|
503
|
+
'Composite-only: cette regle seule ne suffit pas, mais combinee avec PYAST-001 (cmdclass) ou un IOC ' +
|
|
504
|
+
'string TrapDoor, c\'est un signal fort. Inspecter manuellement: qui est l\'auteur, est-ce que d\'autres ' +
|
|
505
|
+
'packages aux noms similaires ont ete publies le meme jour, est-ce que le code source ressemble a une ' +
|
|
506
|
+
'lib utile ou a un stub vide.',
|
|
507
|
+
|
|
508
|
+
fork_exec_inline_interpreter:
|
|
509
|
+
'HIGH: subprocess.X([<interpreter>, -e|-c, ...]) — fork-exec d\'un interpreteur inline ' +
|
|
510
|
+
'(node -e, python -c, bash -c, ruby -e, perl -e, php -r, ...). Pattern transversal: Python ouvre ' +
|
|
511
|
+
'un interpreteur d\'un autre langage et lui passe du code dans argv. Signe canonique d\'un staging ' +
|
|
512
|
+
'multi-langage (TrapDoor mai 2026). NE PAS installer. Inspecter le code passe en argv pour identifier ' +
|
|
513
|
+
'le payload reel. Si le 3eme element de la liste est une variable, suivre l\'assignment en amont.',
|
|
514
|
+
|
|
515
|
+
fetch_to_fork_exec_inline:
|
|
516
|
+
'CRITIQUE: Pattern TrapDoor exact. Le meme fichier Python fetch un payload reseau ' +
|
|
517
|
+
'(urllib/requests/httpx/aiohttp) ET le passe a subprocess.X([<interpreter>, -e, ...]) dans le meme ' +
|
|
518
|
+
'fichier. Le payload distant est execute cote interpreter forked (Node/Bash/Ruby/...) — echappe a ' +
|
|
519
|
+
'PYAST-005 parce qu\'il n\'y a pas d\'exec/eval Python. NE PAS installer. Bloquer le domaine du fetch ' +
|
|
520
|
+
'au firewall. Si execute: incident response complet, regenerer tous les secrets — le payload distant ' +
|
|
521
|
+
'a pu exfiltrer SSH, AWS, GitHub, npm tokens.',
|
|
522
|
+
|
|
501
523
|
ai_agent_abuse:
|
|
502
524
|
'CRITIQUE: Un agent IA (Claude, Gemini, Q) est invoque avec des flags de bypass de securite ' +
|
|
503
525
|
'(--dangerously-skip-permissions, --yolo, --trust-all-tools). Technique s1ngularity/Nx. ' +
|
package/src/rules/index.js
CHANGED
|
@@ -224,6 +224,19 @@ const RULES = {
|
|
|
224
224
|
],
|
|
225
225
|
mitre: 'T1195.002'
|
|
226
226
|
},
|
|
227
|
+
pypi_release_zero: {
|
|
228
|
+
id: 'MUADDIB-PYPI-003',
|
|
229
|
+
name: 'PyPI Release-Zero (v0.x.x recently published)',
|
|
230
|
+
severity: 'MEDIUM',
|
|
231
|
+
confidence: 'medium',
|
|
232
|
+
domain: 'malware',
|
|
233
|
+
description: 'Package PyPI dont la latest_version est 0.x.x (release-zero PEP 440) ET dont la premiere publication est < 30 jours. Pattern de lure / ship-as-vulnerable / typosquat-staging. Equivalent PyPI de PKG-022 cote npm. Composite-only : la severite reste MEDIUM pour eviter les FPs sur les vrais projets early-stage.',
|
|
234
|
+
references: [
|
|
235
|
+
'https://attack.mitre.org/techniques/T1195/002/',
|
|
236
|
+
'https://peps.python.org/pep-0440/'
|
|
237
|
+
],
|
|
238
|
+
mitre: 'T1195.002'
|
|
239
|
+
},
|
|
227
240
|
|
|
228
241
|
// PYSRC-001 a 008 — Python source scanner (TrapDoor PyPI gap, v2.11.25).
|
|
229
242
|
// python.js est manifest-only ; ast.js/dataflow.js sont JS-only ; ioc-strings.js
|
|
@@ -338,6 +351,37 @@ const RULES = {
|
|
|
338
351
|
],
|
|
339
352
|
mitre: 'T1027.013'
|
|
340
353
|
},
|
|
354
|
+
// PYSRC-009 / 010 — fork-exec inline interpreter (v2.11.46). Comble le gap
|
|
355
|
+
// TrapDoor exact : subprocess.run(["node"|"bash"|..., "-e"|"-c", payload]).
|
|
356
|
+
// Pattern transversal multi-langage qui echappe a PYAST-005 parce qu'il n'y
|
|
357
|
+
// a pas d'exec()/eval() Python — l'execution est cote interpreter forked.
|
|
358
|
+
fork_exec_inline_interpreter: {
|
|
359
|
+
id: 'MUADDIB-PYSRC-009',
|
|
360
|
+
name: 'Python Fork-Exec Inline Interpreter',
|
|
361
|
+
severity: 'HIGH',
|
|
362
|
+
confidence: 'high',
|
|
363
|
+
domain: 'malware',
|
|
364
|
+
description: 'subprocess.{Popen,run,call,check_output,check_call,getoutput}([<interpreter>, <inline-flag>, ...]) — fork-exec d\'un interpreteur inline (node -e, python -c, bash -c, ruby -e, perl -e, php -r, ...). Signe canonique d\'un staging multi-langage : Python ouvre un autre interpreteur et lui passe du code dans argv. Pattern central de TrapDoor (mai 2026).',
|
|
365
|
+
references: [
|
|
366
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
367
|
+
'https://attack.mitre.org/techniques/T1059/'
|
|
368
|
+
],
|
|
369
|
+
mitre: 'T1059'
|
|
370
|
+
},
|
|
371
|
+
fetch_to_fork_exec_inline: {
|
|
372
|
+
id: 'MUADDIB-PYSRC-010',
|
|
373
|
+
name: 'Python Fetch + Fork-Exec Inline Interpreter (TrapDoor signature)',
|
|
374
|
+
severity: 'CRITICAL',
|
|
375
|
+
confidence: 'high',
|
|
376
|
+
domain: 'malware',
|
|
377
|
+
description: 'Compound : le meme fichier Python contient un fetch reseau (urllib/requests/httpx/aiohttp/http.client) ET un subprocess.X([<interpreter>, -e|-c, ...]). Signature directe de TrapDoor : telecharge un payload depuis le C2 puis l\'execute via fork-exec d\'un interpreteur inline. Echappe a PYAST-005 (fetch+exec taint) parce que l\'execution est cote Node/Bash/Ruby/... pas cote Python.',
|
|
378
|
+
references: [
|
|
379
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
380
|
+
'https://attack.mitre.org/techniques/T1105/',
|
|
381
|
+
'https://attack.mitre.org/techniques/T1059/'
|
|
382
|
+
],
|
|
383
|
+
mitre: 'T1105'
|
|
384
|
+
},
|
|
341
385
|
|
|
342
386
|
// PYAST-001 a 008 — Python AST scanner via tree-sitter (TrapDoor PyPI parity,
|
|
343
387
|
// v2.11.42+). Mirror du `ast.js` cote npm : full CST walk avec scope tracking,
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* PyPI maintainer / metadata checks — wires the existing ecosystem-agnostic
|
|
5
|
+
* email-domain.js (MAINTAINER-005 MX + MAINTAINER-006 RDAP) onto PyPI
|
|
6
|
+
* metadata returned by pypi-registry.js.
|
|
7
|
+
*
|
|
8
|
+
* Created v2.11.47. Mirror of how npm calls these from processor.js, but
|
|
9
|
+
* for the PyPI side. We reuse the same threat types (`unclaimed_maintainer_email`,
|
|
10
|
+
* `compromised_email_domain`) — they're conceptually identical, only the
|
|
11
|
+
* registry context differs. We post-process the returned threats to swap the
|
|
12
|
+
* `file` field (package.json → pyproject.toml) and tweak the message wording
|
|
13
|
+
* so the operator sees "PyPI" not "npm" in the report.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const {
|
|
17
|
+
checkUnclaimedMaintainerEmail,
|
|
18
|
+
checkCompromisedDomain
|
|
19
|
+
} = require('./email-domain.js');
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Adapt an npm-flavoured threat from email-domain.js to a PyPI-flavoured one.
|
|
23
|
+
* Returns a new object (does not mutate the input).
|
|
24
|
+
*/
|
|
25
|
+
function adaptThreatToPyPI(threat, declarationFile) {
|
|
26
|
+
if (!threat || typeof threat !== 'object') return threat;
|
|
27
|
+
const adapted = { ...threat, file: declarationFile || 'pyproject.toml' };
|
|
28
|
+
if (typeof threat.message === 'string') {
|
|
29
|
+
adapted.message = threat.message
|
|
30
|
+
.replace(/\bnpm\b/g, 'PyPI')
|
|
31
|
+
.replace(/take over the account/g, 'take over the PyPI account');
|
|
32
|
+
}
|
|
33
|
+
return adapted;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Entry point for PyPI maintainer-domain checks.
|
|
38
|
+
*
|
|
39
|
+
* @param {string} packageName - PyPI package name being scanned.
|
|
40
|
+
* @param {object} pypiRegistryMeta - Output of getPyPIPackageMetadata().
|
|
41
|
+
* Must have shape { maintainer_emails: string[], created_at: ISO | null }.
|
|
42
|
+
* @param {object} options - { resolveMx, fetchRdap, declarationFile } — first
|
|
43
|
+
* two are forwarded to email-domain.js (test injection); declarationFile is
|
|
44
|
+
* the path to display in threat.file (defaults to 'pyproject.toml').
|
|
45
|
+
* @returns {Promise<Array>} threats array (empty if metadata is missing or
|
|
46
|
+
* the env opt-outs MUADDIB_EMAIL_DOMAIN_CHECK=0 / MUADDIB_RDAP_CHECK=0 are set).
|
|
47
|
+
*/
|
|
48
|
+
async function runPyPIMaintainerChecks(packageName, pypiRegistryMeta, options = {}) {
|
|
49
|
+
if (!pypiRegistryMeta || typeof pypiRegistryMeta !== 'object') return [];
|
|
50
|
+
if (!Array.isArray(pypiRegistryMeta.maintainer_emails) || pypiRegistryMeta.maintainer_emails.length === 0) {
|
|
51
|
+
return [];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const declarationFile = options.declarationFile || 'pyproject.toml';
|
|
55
|
+
// email-domain.js reads `meta.maintainer_emails` and `meta.created_at`.
|
|
56
|
+
// The shape matches one-to-one with what pypi-registry.js returns.
|
|
57
|
+
const helperMeta = {
|
|
58
|
+
maintainer_emails: pypiRegistryMeta.maintainer_emails,
|
|
59
|
+
created_at: pypiRegistryMeta.created_at
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const threats = [];
|
|
63
|
+
|
|
64
|
+
let mxThreats = [];
|
|
65
|
+
try {
|
|
66
|
+
mxThreats = await checkUnclaimedMaintainerEmail(helperMeta, {
|
|
67
|
+
resolveMx: options.resolveMx
|
|
68
|
+
});
|
|
69
|
+
} catch { /* silent — same posture as email-domain.js */ }
|
|
70
|
+
for (const t of mxThreats) threats.push(adaptThreatToPyPI(t, declarationFile));
|
|
71
|
+
|
|
72
|
+
let rdapThreats = [];
|
|
73
|
+
try {
|
|
74
|
+
rdapThreats = await checkCompromisedDomain(helperMeta, {
|
|
75
|
+
fetchRdap: options.fetchRdap
|
|
76
|
+
});
|
|
77
|
+
} catch { /* silent */ }
|
|
78
|
+
for (const t of rdapThreats) threats.push(adaptThreatToPyPI(t, declarationFile));
|
|
79
|
+
|
|
80
|
+
return threats;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
module.exports = {
|
|
84
|
+
runPyPIMaintainerChecks,
|
|
85
|
+
// Exposed for unit tests
|
|
86
|
+
_internal: { adaptThreatToPyPI }
|
|
87
|
+
};
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* PyPI registry metadata fetcher — mirror of `src/scanner/npm-registry.js`
|
|
5
|
+
* for the PyPI ecosystem. Closes the npm/PyPI asymmetry on the metadata axis.
|
|
6
|
+
*
|
|
7
|
+
* Created v2.11.47 to enable PyPI-side maintainer/email/release-zero
|
|
8
|
+
* detections (port of MAINTAINER-005/006/PKG-022 to PyPI).
|
|
9
|
+
*
|
|
10
|
+
* Architecture parity with npm-registry.js :
|
|
11
|
+
* - built-in fetch (no external dep)
|
|
12
|
+
* - 10s timeout, 3 retries with exponential backoff
|
|
13
|
+
* - 429 backoff respecting Retry-After
|
|
14
|
+
* - throttle via http-limiter.js semaphore (shared with npm — same MUAD'DIB
|
|
15
|
+
* self-DoS protection ; rate budget is global, ok since target hosts differ)
|
|
16
|
+
* - 5min in-process cache keyed by package name
|
|
17
|
+
* - returns null on any failure (never throws — pipeline safety)
|
|
18
|
+
* - gated upstream by `MUADDIB_NO_REGISTRY_FETCH === '1'` (same master switch)
|
|
19
|
+
*
|
|
20
|
+
* URL : https://pypi.org/pypi/<package>/json (canonical PEP 691 JSON API)
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const { debugLog } = require('../utils.js');
|
|
24
|
+
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
25
|
+
|
|
26
|
+
const PYPI_REGISTRY_URL = 'https://pypi.org/pypi';
|
|
27
|
+
const REQUEST_TIMEOUT = 10000; // 10 seconds
|
|
28
|
+
const MAX_RETRIES = 3;
|
|
29
|
+
const CACHE_TTL = 5 * 60 * 1000; // 5 minutes — mirror npm-registry
|
|
30
|
+
|
|
31
|
+
// PEP 503 normalized package name: lowercase letters / digits / `-` `_` `.`
|
|
32
|
+
// Case-insensitive on input, server normalizes.
|
|
33
|
+
const PYPI_PACKAGE_REGEX = /^[A-Za-z0-9_.-]{1,214}$/;
|
|
34
|
+
|
|
35
|
+
// In-process cache : Map<packageName, { fetchedAt: number, data: object | null }>
|
|
36
|
+
// Negative caching (data === null) is honored too — avoids repeat 404 hammering.
|
|
37
|
+
const _pypiMetadataCache = new Map();
|
|
38
|
+
|
|
39
|
+
// AbortSignal.timeout polyfill — mirror npm-registry.js
|
|
40
|
+
function createTimeoutSignal(ms) {
|
|
41
|
+
if (typeof AbortSignal !== 'undefined' && AbortSignal.timeout) {
|
|
42
|
+
return { signal: AbortSignal.timeout(ms), cleanup: () => {} };
|
|
43
|
+
}
|
|
44
|
+
const controller = new AbortController();
|
|
45
|
+
const timer = setTimeout(() => controller.abort(), ms);
|
|
46
|
+
return { signal: controller.signal, cleanup: () => clearTimeout(timer) };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function fetchWithRetry(url) {
|
|
50
|
+
let lastError = null;
|
|
51
|
+
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
52
|
+
let response;
|
|
53
|
+
const { signal, cleanup } = createTimeoutSignal(REQUEST_TIMEOUT);
|
|
54
|
+
try {
|
|
55
|
+
response = await fetch(url, { signal });
|
|
56
|
+
} catch (err) {
|
|
57
|
+
cleanup();
|
|
58
|
+
lastError = err;
|
|
59
|
+
if (attempt < MAX_RETRIES - 1) {
|
|
60
|
+
const backoff = Math.min(1000 * Math.pow(2, attempt), 8000);
|
|
61
|
+
await new Promise(r => setTimeout(r, backoff));
|
|
62
|
+
}
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
cleanup();
|
|
66
|
+
|
|
67
|
+
if (response.status === 404) {
|
|
68
|
+
try { await response.text(); } catch (e) { debugLog('pypi-registry: response drain failed:', e.message); }
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (response.status === 429) {
|
|
73
|
+
try { await response.text(); } catch (e) { debugLog('pypi-registry: response drain failed:', e.message); }
|
|
74
|
+
const retryAfter = parseInt(response.headers.get('retry-after'), 10);
|
|
75
|
+
const delay = Math.min(retryAfter && retryAfter > 0 ? retryAfter * 1000 : 2000, 30000);
|
|
76
|
+
await new Promise(r => setTimeout(r, delay));
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
try { await response.text(); } catch (e) { debugLog('pypi-registry: response drain failed:', e.message); }
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
return await response.json();
|
|
87
|
+
} catch {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (lastError) debugLog('pypi-registry: retries exhausted for ' + url + ': ' + lastError.message);
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extract a deduped, lowercased list of maintainer emails from PyPI metadata.
|
|
97
|
+
* PyPI distinguishes `author_email` and `maintainer_email` (top-level strings,
|
|
98
|
+
* not arrays). Either or both may be present. Some packages list multiple
|
|
99
|
+
* addresses separated by commas — split on those.
|
|
100
|
+
*/
|
|
101
|
+
function extractMaintainerEmails(infoBlock) {
|
|
102
|
+
const out = new Set();
|
|
103
|
+
if (!infoBlock || typeof infoBlock !== 'object') return [];
|
|
104
|
+
for (const field of ['author_email', 'maintainer_email']) {
|
|
105
|
+
const raw = infoBlock[field];
|
|
106
|
+
if (typeof raw !== 'string' || !raw.includes('@')) continue;
|
|
107
|
+
// Split on commas (PEP 621-style multi-author) and on "Name <email>" wrappers
|
|
108
|
+
const parts = raw.split(',');
|
|
109
|
+
for (const part of parts) {
|
|
110
|
+
const m = part.match(/<([^>]+@[^>]+)>/) || part.match(/([^\s<>]+@[^\s<>]+)/);
|
|
111
|
+
if (m && m[1].includes('@')) out.add(m[1].toLowerCase().trim());
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return Array.from(out);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Extract per-version publish timestamps from PyPI metadata.
|
|
119
|
+
* `releases` is an object keyed by version string, each value is an array of
|
|
120
|
+
* file entries with `upload_time_iso_8601`. Use the earliest upload time per
|
|
121
|
+
* version (a release may have multiple files for sdist + wheels).
|
|
122
|
+
*/
|
|
123
|
+
function extractReleaseTimes(releases) {
|
|
124
|
+
if (!releases || typeof releases !== 'object') return {};
|
|
125
|
+
const out = {};
|
|
126
|
+
for (const [version, files] of Object.entries(releases)) {
|
|
127
|
+
if (!Array.isArray(files) || files.length === 0) continue;
|
|
128
|
+
let earliest = null;
|
|
129
|
+
for (const f of files) {
|
|
130
|
+
const t = typeof f === 'object' && f ? (f.upload_time_iso_8601 || f.upload_time) : null;
|
|
131
|
+
if (typeof t !== 'string') continue;
|
|
132
|
+
if (earliest === null || t < earliest) earliest = t;
|
|
133
|
+
}
|
|
134
|
+
if (earliest) out[version] = earliest;
|
|
135
|
+
}
|
|
136
|
+
return out;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Fetch + parse PyPI registry metadata. Returns null on validation fail,
|
|
141
|
+
* cache hit of a previous null, network fail, or 404.
|
|
142
|
+
*
|
|
143
|
+
* Cached for 5 minutes (positive AND negative results).
|
|
144
|
+
*
|
|
145
|
+
* @param {string} packageName
|
|
146
|
+
* @returns {Promise<{
|
|
147
|
+
* created_at: string | null,
|
|
148
|
+
* latest_release_at: string | null,
|
|
149
|
+
* age_days: number | null,
|
|
150
|
+
* latest_version: string | null,
|
|
151
|
+
* version_count: number,
|
|
152
|
+
* maintainer_emails: string[],
|
|
153
|
+
* yanked: boolean,
|
|
154
|
+
* description: string,
|
|
155
|
+
* home_page: string | null,
|
|
156
|
+
* project_urls: object | null,
|
|
157
|
+
* releases: { [version: string]: string }
|
|
158
|
+
* } | null>}
|
|
159
|
+
*/
|
|
160
|
+
async function getPyPIPackageMetadata(packageName) {
|
|
161
|
+
if (typeof packageName !== 'string' || !PYPI_PACKAGE_REGEX.test(packageName)) return null;
|
|
162
|
+
const normalized = packageName.toLowerCase();
|
|
163
|
+
|
|
164
|
+
// Cache check (honors negative cache)
|
|
165
|
+
const cached = _pypiMetadataCache.get(normalized);
|
|
166
|
+
if (cached && (Date.now() - cached.fetchedAt) < CACHE_TTL) {
|
|
167
|
+
return cached.data;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const url = PYPI_REGISTRY_URL + '/' + encodeURIComponent(packageName) + '/json';
|
|
171
|
+
let raw;
|
|
172
|
+
await acquireRegistrySlot();
|
|
173
|
+
try {
|
|
174
|
+
raw = await fetchWithRetry(url);
|
|
175
|
+
} finally {
|
|
176
|
+
releaseRegistrySlot();
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (!raw || typeof raw !== 'object') {
|
|
180
|
+
_pypiMetadataCache.set(normalized, { fetchedAt: Date.now(), data: null });
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const info = raw.info || {};
|
|
185
|
+
const releases = raw.releases || {};
|
|
186
|
+
const releaseTimes = extractReleaseTimes(releases);
|
|
187
|
+
|
|
188
|
+
// earliest + latest publish dates across all release-versions
|
|
189
|
+
let createdAt = null;
|
|
190
|
+
let latestReleaseAt = null;
|
|
191
|
+
for (const t of Object.values(releaseTimes)) {
|
|
192
|
+
if (createdAt === null || t < createdAt) createdAt = t;
|
|
193
|
+
if (latestReleaseAt === null || t > latestReleaseAt) latestReleaseAt = t;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const ageDays = createdAt
|
|
197
|
+
? Math.floor((Date.now() - new Date(createdAt).getTime()) / (1000 * 60 * 60 * 24))
|
|
198
|
+
: null;
|
|
199
|
+
|
|
200
|
+
// Latest version: prefer info.version, fallback to highest key in releases
|
|
201
|
+
const latestVersion = (typeof info.version === 'string' && info.version) || null;
|
|
202
|
+
|
|
203
|
+
// Yanked status of the latest version (PyPI sets a "yanked" boolean on each file).
|
|
204
|
+
let yanked = false;
|
|
205
|
+
if (latestVersion && Array.isArray(releases[latestVersion])) {
|
|
206
|
+
yanked = releases[latestVersion].every(f => f && f.yanked === true);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const data = {
|
|
210
|
+
created_at: createdAt,
|
|
211
|
+
latest_release_at: latestReleaseAt,
|
|
212
|
+
age_days: ageDays,
|
|
213
|
+
latest_version: latestVersion,
|
|
214
|
+
version_count: Object.keys(releaseTimes).length,
|
|
215
|
+
maintainer_emails: extractMaintainerEmails(info),
|
|
216
|
+
yanked,
|
|
217
|
+
description: typeof info.summary === 'string' ? info.summary
|
|
218
|
+
: (typeof info.description === 'string' ? info.description.slice(0, 1000) : ''),
|
|
219
|
+
home_page: typeof info.home_page === 'string' && info.home_page ? info.home_page : null,
|
|
220
|
+
project_urls: (info.project_urls && typeof info.project_urls === 'object') ? info.project_urls : null,
|
|
221
|
+
releases: releaseTimes
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
_pypiMetadataCache.set(normalized, { fetchedAt: Date.now(), data });
|
|
225
|
+
return data;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
module.exports = {
|
|
229
|
+
getPyPIPackageMetadata,
|
|
230
|
+
// Exposed for unit tests
|
|
231
|
+
_internal: {
|
|
232
|
+
PYPI_PACKAGE_REGEX,
|
|
233
|
+
extractMaintainerEmails,
|
|
234
|
+
extractReleaseTimes,
|
|
235
|
+
fetchWithRetry,
|
|
236
|
+
_pypiMetadataCache,
|
|
237
|
+
_resetCache: () => _pypiMetadataCache.clear()
|
|
238
|
+
}
|
|
239
|
+
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* F2-PyPI — Release-Zero detection (PEP 440 variant of release-zero.js).
|
|
5
|
+
*
|
|
6
|
+
* Created v2.11.47 to mirror npm's release-zero.js for the PyPI ecosystem.
|
|
7
|
+
*
|
|
8
|
+
* Threat model: an attacker publishes a brand-new package with version 0.0.0
|
|
9
|
+
* (or any 0.x.x variant) as a lure or as a ship-as-vulnerable placeholder.
|
|
10
|
+
* On its own a v0.x.x is noise (many legit early-stage projects sit there
|
|
11
|
+
* forever), so we conjunction-gate with `age_days < 30`: a recently-published
|
|
12
|
+
* 0.x.x is suspicious; an abandoned 0.x.x from 2017 is not.
|
|
13
|
+
*
|
|
14
|
+
* PyPI-specific differences vs npm release-zero.js:
|
|
15
|
+
* - PEP 440 versions, not semver. We accept 0, 0.1, 0.0.0, 0.1.0a1,
|
|
16
|
+
* 0.0.0.dev1, 0.1.0rc2, 0.1.0.post1 — anything that starts with "0"
|
|
17
|
+
* or "0.0..." in the release/pre/dev segment.
|
|
18
|
+
* - No `preinstall`/`postinstall` lifecycle hook concept in PyPI manifests.
|
|
19
|
+
* The functionally-equivalent vector — `setup.py cmdclass` override —
|
|
20
|
+
* is already covered by PYAST-001. We don't double-detect here.
|
|
21
|
+
* - Conjunction is just `recently published` (no script check).
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
// Match a PEP 440 "release segment" that is exactly 0 in every component.
|
|
25
|
+
// Accepts: 0 | 0.0 | 0.0.0 | 0.0.0.0
|
|
26
|
+
// Optional pre/post/dev segment: a1 | b2 | rc3 | .dev1 | .post1
|
|
27
|
+
// Also allows the rare epoch prefix `0!` (PEP 440 §epoch).
|
|
28
|
+
const PYPI_RELEASE_ZERO_RE = /^(?:0!)?0(?:\.0)*(?:[abc]\d+|rc\d+|\.dev\d+|\.post\d+)?$/i;
|
|
29
|
+
|
|
30
|
+
const RECENT_PUBLISH_THRESHOLD_DAYS = 30;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @param {string} version - PyPI version string from registry meta (latest_version).
|
|
34
|
+
* @param {object} pypiRegistryMeta - { age_days: number | null, ... }.
|
|
35
|
+
* @returns {object | null} threat object or null.
|
|
36
|
+
*/
|
|
37
|
+
function checkPyPIReleaseZero(version, pypiRegistryMeta) {
|
|
38
|
+
if (typeof version !== 'string' || version.length === 0) return null;
|
|
39
|
+
if (!PYPI_RELEASE_ZERO_RE.test(version)) return null;
|
|
40
|
+
if (!pypiRegistryMeta || typeof pypiRegistryMeta !== 'object') return null;
|
|
41
|
+
|
|
42
|
+
const ageDays = pypiRegistryMeta.age_days;
|
|
43
|
+
if (typeof ageDays !== 'number' || ageDays >= RECENT_PUBLISH_THRESHOLD_DAYS) return null;
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
type: 'pypi_release_zero',
|
|
47
|
+
severity: 'MEDIUM',
|
|
48
|
+
message: 'PyPI package latest version is "' + version + '" (release-zero) and was first published only ' + ageDays + ' day(s) ago — possible lure / ship-as-vulnerable / typosquat-staging pattern. setup.py cmdclass install-time hooks are covered separately by PYAST-001.',
|
|
49
|
+
file: 'pyproject.toml',
|
|
50
|
+
count: 1,
|
|
51
|
+
version,
|
|
52
|
+
age_days: ageDays
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
module.exports = {
|
|
57
|
+
checkPyPIReleaseZero,
|
|
58
|
+
_internal: { PYPI_RELEASE_ZERO_RE, RECENT_PUBLISH_THRESHOLD_DAYS }
|
|
59
|
+
};
|
|
@@ -188,6 +188,34 @@ function detectDynamicDangerousImport(content) {
|
|
|
188
188
|
return /__import__\s*\(\s*['"](subprocess|os|requests|urllib|urllib2|socket|http|ssl|ctypes|importlib)['"]/.test(content);
|
|
189
189
|
}
|
|
190
190
|
|
|
191
|
+
// PYSRC-009 — fork-exec d'un interpréteur inline. Pattern :
|
|
192
|
+
// subprocess.{Popen,run,call,check_output,check_call,getoutput}(
|
|
193
|
+
// [ "<interpreter>", "<inline-flag>", <code>, ... ], ...)
|
|
194
|
+
// Le premier élément de la liste est un nom d'interpréteur (literal string),
|
|
195
|
+
// le deuxième est un flag d'exécution inline (-e, -c, --eval, --command, -r).
|
|
196
|
+
// C'est le signe canonique d'un staging multi-langage (Python ouvre un
|
|
197
|
+
// interpréteur Node/Bash/Ruby/... et lui passe du code dans argv).
|
|
198
|
+
// Référence : TrapDoor (mai 2026) — `subprocess.run(["node", "-e", payload])`
|
|
199
|
+
// avec payload fetched depuis le C2 — pattern qui échappe à PYAST-005 parce
|
|
200
|
+
// qu'il n'y a pas d'`exec()`/`eval()` Python, l'exécution est côté interpréteur
|
|
201
|
+
// forked.
|
|
202
|
+
const FORK_EXEC_INLINE_INTERPRETER_RE = (() => {
|
|
203
|
+
const SUBPROC_CALLEES = 'Popen|run|call|check_output|check_call|getoutput|getstatusoutput';
|
|
204
|
+
const INTERPRETERS = 'node|nodejs|deno|bun|python|python2|python3|python3\\.\\d+|bash|sh|zsh|dash|ksh|ash|ruby|perl|php|lua|powershell|pwsh|cmd';
|
|
205
|
+
const INLINE_FLAGS = '-e|-c|--eval|--command|-r';
|
|
206
|
+
// All template inputs above are file-local constants — no user input flows
|
|
207
|
+
// here, so the security/detect-non-literal-regexp warning is a false positive.
|
|
208
|
+
// eslint-disable-next-line security/detect-non-literal-regexp
|
|
209
|
+
return new RegExp(
|
|
210
|
+
String.raw`\bsubprocess\.(?:${SUBPROC_CALLEES})\s*\(\s*\[\s*['"](?:${INTERPRETERS})['"]\s*,\s*['"](?:${INLINE_FLAGS})['"]`,
|
|
211
|
+
'i'
|
|
212
|
+
);
|
|
213
|
+
})();
|
|
214
|
+
|
|
215
|
+
function detectForkExecInlineInterpreter(content) {
|
|
216
|
+
return FORK_EXEC_INLINE_INTERPRETER_RE.test(content);
|
|
217
|
+
}
|
|
218
|
+
|
|
191
219
|
/**
|
|
192
220
|
* Scan Python source files under targetPath for import-time / install-time RCE.
|
|
193
221
|
*
|
|
@@ -237,6 +265,7 @@ function scanPythonSource(targetPath) {
|
|
|
237
265
|
const hasBase64 = detectBase64Decode(cleaned);
|
|
238
266
|
const hasDeser = detectDeserialization(cleaned);
|
|
239
267
|
const hasDynImport = detectDynamicDangerousImport(cleaned);
|
|
268
|
+
const hasForkExecInline = detectForkExecInlineInterpreter(cleaned);
|
|
240
269
|
|
|
241
270
|
if (hasExec) {
|
|
242
271
|
threats.push({
|
|
@@ -296,6 +325,28 @@ function scanPythonSource(targetPath) {
|
|
|
296
325
|
file: relPath
|
|
297
326
|
});
|
|
298
327
|
}
|
|
328
|
+
// PYSRC-009: fork-exec inline interpreter — pattern transversal (node -e,
|
|
329
|
+
// python -c, bash -c, ...). HIGH because it's structurally suspicious in
|
|
330
|
+
// __init__.py / setup.py but has some legit uses (build scripts).
|
|
331
|
+
if (hasForkExecInline) {
|
|
332
|
+
threats.push({
|
|
333
|
+
type: 'fork_exec_inline_interpreter',
|
|
334
|
+
severity: 'HIGH',
|
|
335
|
+
message: `${relPath}: subprocess.X(['<interpreter>', '-e|-c', ...]) — fork-exec of an inline interpreter (node/python/bash/ruby/perl/...). Canonical staging pattern for multi-language malware.`,
|
|
336
|
+
file: relPath
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
// PYSRC-010: fetch + fork-exec inline = TrapDoor signature exact. CRITICAL
|
|
340
|
+
// because the file fetches remote bytes and feeds them to a forked
|
|
341
|
+
// interpreter — RCE-equivalent without using Python's exec/eval.
|
|
342
|
+
if (hasFetch && hasForkExecInline) {
|
|
343
|
+
threats.push({
|
|
344
|
+
type: 'fetch_to_fork_exec_inline',
|
|
345
|
+
severity: 'CRITICAL',
|
|
346
|
+
message: `${relPath}: network fetch (urllib/requests/...) AND subprocess.X(['<interpreter>', '-e', ...]) in same file — TrapDoor signature: fetch remote payload then fork-exec it through an inline interpreter. Escapes PYAST-005 (which only tracks Python exec/eval) because execution is on the forked interpreter side.`,
|
|
347
|
+
file: relPath
|
|
348
|
+
});
|
|
349
|
+
}
|
|
299
350
|
}
|
|
300
351
|
|
|
301
352
|
return threats;
|
|
@@ -314,6 +365,7 @@ module.exports = {
|
|
|
314
365
|
detectNetworkFetch,
|
|
315
366
|
detectBase64Decode,
|
|
316
367
|
detectDeserialization,
|
|
317
|
-
detectDynamicDangerousImport
|
|
368
|
+
detectDynamicDangerousImport,
|
|
369
|
+
detectForkExecInlineInterpreter
|
|
318
370
|
}
|
|
319
371
|
};
|
package/src/scanner/python.js
CHANGED
|
@@ -433,10 +433,87 @@ function escapeRegex(str) {
|
|
|
433
433
|
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
434
434
|
}
|
|
435
435
|
|
|
436
|
+
/**
|
|
437
|
+
* Try to detect the NAME of the Python package being scanned. Used by
|
|
438
|
+
* `pypi-registry.js` to fetch metadata for the scanned package itself.
|
|
439
|
+
*
|
|
440
|
+
* Priority order :
|
|
441
|
+
* 1. pyproject.toml [project] name (PEP 621)
|
|
442
|
+
* 2. pyproject.toml [tool.poetry] name
|
|
443
|
+
* 3. setup.py setup(name="...")
|
|
444
|
+
* 4. setup.cfg [metadata] name
|
|
445
|
+
*
|
|
446
|
+
* Returns null if no canonical name can be extracted.
|
|
447
|
+
*
|
|
448
|
+
* @param {string} targetPath
|
|
449
|
+
* @returns {string | null}
|
|
450
|
+
*/
|
|
451
|
+
function detectScannedPackageName(targetPath) {
|
|
452
|
+
// Generic helper: extract a section [section_header] up to the next [section]
|
|
453
|
+
// or end of file. JS regex has no \Z, so we use `(?=\n\[|$)` with no /m flag
|
|
454
|
+
// on the lookahead anchor.
|
|
455
|
+
function extractSection(content, header) {
|
|
456
|
+
// Find [header] line, then capture until the next [ at column 0 or EOF.
|
|
457
|
+
const startRe = new RegExp('^\\[' + header.replace(/[.[\]\\]/g, '\\$&') + '\\][^\\n]*\\n', 'm');
|
|
458
|
+
const startMatch = content.match(startRe);
|
|
459
|
+
if (!startMatch) return null;
|
|
460
|
+
const start = startMatch.index + startMatch[0].length;
|
|
461
|
+
const rest = content.slice(start);
|
|
462
|
+
const nextSection = rest.search(/^\[/m);
|
|
463
|
+
return nextSection === -1 ? rest : rest.slice(0, nextSection);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// 1. pyproject.toml
|
|
467
|
+
const pyproject = path.join(targetPath, 'pyproject.toml');
|
|
468
|
+
if (fs.existsSync(pyproject)) {
|
|
469
|
+
let content;
|
|
470
|
+
try { content = fs.readFileSync(pyproject, 'utf8'); } catch { content = ''; }
|
|
471
|
+
// [project] name = "X" — PEP 621
|
|
472
|
+
const projectSection = extractSection(content, 'project');
|
|
473
|
+
if (projectSection) {
|
|
474
|
+
const m = projectSection.match(/^\s*name\s*=\s*["']([^"']+)["']/m);
|
|
475
|
+
if (m) return m[1].trim();
|
|
476
|
+
}
|
|
477
|
+
// [tool.poetry] name = "X"
|
|
478
|
+
const poetrySection = extractSection(content, 'tool.poetry');
|
|
479
|
+
if (poetrySection) {
|
|
480
|
+
const m = poetrySection.match(/^\s*name\s*=\s*["']([^"']+)["']/m);
|
|
481
|
+
if (m) return m[1].trim();
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// 2. setup.py — regex on setup(... name="X" ...). Dotall flag for multi-line args.
|
|
486
|
+
const setupPy = path.join(targetPath, 'setup.py');
|
|
487
|
+
if (fs.existsSync(setupPy)) {
|
|
488
|
+
let content;
|
|
489
|
+
try { content = fs.readFileSync(setupPy, 'utf8'); } catch { content = ''; }
|
|
490
|
+
const m = content.match(/\bsetup\s*\(([\s\S]*?)\)/);
|
|
491
|
+
if (m) {
|
|
492
|
+
const nameMatch = m[1].match(/\bname\s*=\s*["']([^"']+)["']/);
|
|
493
|
+
if (nameMatch) return nameMatch[1].trim();
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// 3. setup.cfg [metadata] name = X
|
|
498
|
+
const setupCfg = path.join(targetPath, 'setup.cfg');
|
|
499
|
+
if (fs.existsSync(setupCfg)) {
|
|
500
|
+
let content;
|
|
501
|
+
try { content = fs.readFileSync(setupCfg, 'utf8'); } catch { content = ''; }
|
|
502
|
+
const metaSection = extractSection(content, 'metadata');
|
|
503
|
+
if (metaSection) {
|
|
504
|
+
const m = metaSection.match(/^\s*name\s*=\s*(.+?)\s*$/m);
|
|
505
|
+
if (m) return m[1].trim();
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
return null;
|
|
510
|
+
}
|
|
511
|
+
|
|
436
512
|
module.exports = {
|
|
437
513
|
parseRequirementsTxt,
|
|
438
514
|
parseSetupPy,
|
|
439
515
|
parsePyprojectToml,
|
|
440
516
|
detectPythonProject,
|
|
517
|
+
detectScannedPackageName,
|
|
441
518
|
normalizePythonName
|
|
442
519
|
};
|