muaddib-scanner 2.11.44 → 2.11.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.44",
3
+ "version": "2.11.45",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-05-25T13:23:09.000Z",
3
+ "timestamp": "2026-05-25T14:34:21.859Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -504,6 +504,30 @@ const PLAYBOOKS = {
504
504
  'NE PAS installer. Verifier si l\'agent a ete execute. Si oui, considerer la machine compromise. ' +
505
505
  'Auditer les fichiers sensibles (.ssh, .aws, .env) pour des acces non autorises.',
506
506
 
507
+ pyast_fetch_to_exec_taint:
508
+ 'CRITIQUE: Pattern TrapDoor confirme par taint AST. Une variable Python a recu un payload via un fetch reseau ' +
509
+ '(urllib/requests/httpx/aiohttp/http.client) puis est passee a exec()/eval() au niveau module — RCE direct ' +
510
+ 'a l\'import / pip install. NE PAS installer. Bloquer le domaine du fetch dans le firewall. ' +
511
+ 'Si execute: incident response complet, regenerer TOUS les secrets sur la machine.',
512
+
513
+ pyast_base64_to_exec_taint:
514
+ 'CRITIQUE: Pattern d\'obfuscation confirme par taint AST. Une variable Python a recu un payload decodé ' +
515
+ '(base64/codecs/zlib/gzip/binascii) puis est passee a exec()/eval() au niveau module. NE PAS installer. ' +
516
+ 'Decoder manuellement le payload (python3 -c "import base64; print(base64.b64decode(b\'<blob>\'))") pour ' +
517
+ 'identifier le code masque avant d\'evaluer la portee.',
518
+
519
+ pyast_ctypes_shellcode_load:
520
+ 'HIGH: Loader de shellcode native suspect — ctypes.CDLL/WinDLL/LoadLibrary avec (a) un path en zone ' +
521
+ 'world-writable (/tmp, /var/tmp, /dev/shm, ~/, C:\\Windows\\Temp\\) ou (b) un argument taintee venant ' +
522
+ 'd\'un fetch reseau ou d\'un decode. Pattern classique RAT Python (charge un .so/.dll droppe en memoire). ' +
523
+ 'NE PAS installer. Inspecter le path / la variable pour confirmer la provenance du binaire native.',
524
+
525
+ pyast_env_to_network_write:
526
+ 'HIGH (ou CRITIQUE si env name match credential pattern): exfiltration de credentials confirmee par taint ' +
527
+ 'AST. Une variable Python a recu une valeur depuis os.environ / os.getenv puis est envoyee dans le body ' +
528
+ 'd\'une requete POST/PUT/PATCH (requests/httpx/urllib.Request). NE PAS installer. Si l\'env var name est ' +
529
+ 'sensible (TOKEN/KEY/SECRET/...), revoke immediatement la credential exposee. Bloquer le domaine du POST.',
530
+
507
531
  canary_exfiltration:
508
532
  'CRITIQUE: Le package a tente de voler des credentials (honey tokens). Comportement malveillant confirme. ' +
509
533
  'NE PAS installer. Signaler immediatement sur npm/PyPI. ' +
@@ -427,6 +427,64 @@ const RULES = {
427
427
  mitre: 'T1027'
428
428
  },
429
429
 
430
+ // PYAST-005, 006, 009, 010 — Phase 1b (v2.11.45) : detecteurs taint-aware
431
+ // qui utilisent ctx.moduleTaint populee par handle-assignment.js.
432
+ // Mini-taint intra-procedural mono-fichier, single-hop. Voir python-ast-detectors/
433
+ // taint-tracker.js pour les sources + le plan Phase 1b pour les limitations.
434
+ pyast_fetch_to_exec_taint: {
435
+ id: 'MUADDIB-PYAST-005',
436
+ name: 'Python Fetch + Exec Taint (TrapDoor compound)',
437
+ severity: 'CRITICAL',
438
+ confidence: 'high',
439
+ domain: 'malware',
440
+ description: 'Compound taint-aware : variable assignee depuis un fetch reseau (urllib / requests / httpx / aiohttp / http.client) puis passee a exec()/eval() au niveau module. Signature directe de remote-payload-then-RCE — pattern TrapDoor mai 2026 et Lazarus PyPI series.',
441
+ references: [
442
+ 'https://socket.dev/blog/trapdoor-crypto-stealer-npm-pypi-crates',
443
+ 'https://attack.mitre.org/techniques/T1105/',
444
+ 'https://attack.mitre.org/techniques/T1059/006/'
445
+ ],
446
+ mitre: 'T1105'
447
+ },
448
+ pyast_base64_to_exec_taint: {
449
+ id: 'MUADDIB-PYAST-006',
450
+ name: 'Python Base64/Decode + Exec Taint (Obfuscated Payload)',
451
+ severity: 'CRITICAL',
452
+ confidence: 'high',
453
+ domain: 'malware',
454
+ description: 'Compound taint-aware : variable assignee depuis un decode (base64 / codecs / zlib / gzip / binascii / bytes.fromhex) puis passee a exec()/eval() au niveau module. Pattern d\'obfuscation pour echapper a la revue + grep statique. Vu dans W4SP / Crystal / Lumma stealers PyPI.',
455
+ references: [
456
+ 'https://attack.mitre.org/techniques/T1027/',
457
+ 'https://attack.mitre.org/techniques/T1059/006/'
458
+ ],
459
+ mitre: 'T1027'
460
+ },
461
+ pyast_ctypes_shellcode_load: {
462
+ id: 'MUADDIB-PYAST-009',
463
+ name: 'Python ctypes Shellcode Loader',
464
+ severity: 'HIGH',
465
+ confidence: 'medium',
466
+ domain: 'malware',
467
+ description: 'ctypes.CDLL / WinDLL / LoadLibrary appele avec (a) un path suspect (/tmp, /var/tmp, /dev/shm, ~/, C:\\Windows\\Temp\\, ...) ou (b) un argument taintee venant d\'un fetch ou d\'un decode. Pattern de loader de shellcode native (.so / .dll dropped sur disque puis charge en memoire). Vu dans les campagnes RATs Python.',
468
+ references: [
469
+ 'https://docs.python.org/3/library/ctypes.html',
470
+ 'https://attack.mitre.org/techniques/T1055/'
471
+ ],
472
+ mitre: 'T1055'
473
+ },
474
+ pyast_env_to_network_write: {
475
+ id: 'MUADDIB-PYAST-010',
476
+ name: 'Python Env Read + Network POST Taint (Credential Exfil)',
477
+ severity: 'HIGH',
478
+ confidence: 'high',
479
+ domain: 'malware',
480
+ description: 'Compound taint-aware : variable assignee depuis os.environ[X] / os.environ.get(X) / os.getenv(X) puis envoyee dans le body d\'une requete POST/PUT/PATCH (requests / httpx / urllib.Request). Pattern d\'exfiltration de credentials. Severity escaladee a CRITICAL si le nom de la variable d\'env match un pattern sensible (TOKEN, KEY, SECRET, PASSWORD, NPM_, AWS_, SSH, API, GITHUB_, HF_, ANTHROPIC, ...).',
481
+ references: [
482
+ 'https://attack.mitre.org/techniques/T1041/',
483
+ 'https://attack.mitre.org/techniques/T1552/001/'
484
+ ],
485
+ mitre: 'T1041'
486
+ },
487
+
430
488
  suspicious_file: {
431
489
  id: 'MUADDIB-DEP-002',
432
490
  name: 'Suspicious File in Dependency',
@@ -0,0 +1,38 @@
1
+ 'use strict';
2
+
3
+ const { classifyTaintSource } = require('./taint-tracker.js');
4
+
5
+ /**
6
+ * Visit `assignment` nodes at module level (scope_depth === 0) and populate
7
+ * `ctx.moduleTaint`. Cleared on reassignment.
8
+ *
9
+ * V1 restrictions (intentional — see plan file Phase 1b):
10
+ * - module level only ; assignments inside functions/classes/lambdas are ignored
11
+ * - LHS must be a bare identifier (no tuple unpack, no attribute, no subscript)
12
+ * - single hop only (no alias propagation A → B → sink)
13
+ * - reassignment to a non-source value CLEARS the taint
14
+ */
15
+ function handleAssignment(node, ctx, scopeDepth) {
16
+ if (scopeDepth !== 0) return;
17
+ if (!ctx.moduleTaint) return; // defensive — should always be initialised per-file
18
+
19
+ const left = node.childForFieldName('left');
20
+ const right = node.childForFieldName('right');
21
+ if (!left || !right) return;
22
+
23
+ // Tuple/list LHS, attribute LHS, subscript LHS — V1 skips (Phase 3 alias
24
+ // tracking will handle attribute/subscript). Bare identifier only.
25
+ if (left.type !== 'identifier') return;
26
+
27
+ const taint = classifyTaintSource(right);
28
+ if (taint) {
29
+ ctx.moduleTaint.set(left.text, taint);
30
+ } else if (ctx.moduleTaint.has(left.text)) {
31
+ // Reassignment to a non-source value — clear previous taint.
32
+ // Prevents FP where `payload = source(); payload = "harmless"; exec(payload)`
33
+ // would otherwise still flag based on the original taint.
34
+ ctx.moduleTaint.delete(left.text);
35
+ }
36
+ }
37
+
38
+ module.exports = { handleAssignment };
@@ -8,12 +8,19 @@ const {
8
8
  isTruthyLiteral,
9
9
  lineOf
10
10
  } = require('./helpers.js');
11
+ const { lookupTaint, isEnvSensitive } = require('./taint-tracker.js');
11
12
 
12
13
  /**
13
- * Visitor for `call` nodes. Emits PYAST-003, PYAST-004, PYAST-007, PYAST-008.
14
+ * Visitor for `call` nodes. Emits PYAST-003, PYAST-004, PYAST-005, PYAST-006,
15
+ * PYAST-007, PYAST-008, PYAST-009, PYAST-010.
14
16
  *
15
17
  * PYAST-001 / PYAST-002 are emitted by `handle-setup-call.js` which is a
16
18
  * specialised pass over the same node type — it only fires on `setup(...)`.
19
+ *
20
+ * Taint-aware detectors (005/006/009/010) read `ctx.moduleTaint` populated
21
+ * by `handle-assignment.js`. They only fire at scope_depth === 0 (module-level
22
+ * sinks paired with module-level sources — see plan Phase 1b for the
23
+ * intra-procedural / single-hop restrictions).
17
24
  */
18
25
 
19
26
  const MODULE_EXEC_CALLEES = new Set(['exec', 'eval']);
@@ -58,6 +65,66 @@ const DANGEROUS_DYNAMIC_IMPORTS = new Set([
58
65
  'importlib'
59
66
  ]);
60
67
 
68
+ // Network "write" sinks for PYAST-010. POST/PUT/PATCH-style sends.
69
+ const NETWORK_WRITE_CALLEES = new Set([
70
+ 'requests.post',
71
+ 'requests.put',
72
+ 'requests.patch',
73
+ 'requests.delete',
74
+ 'requests.request',
75
+ 'httpx.post',
76
+ 'httpx.put',
77
+ 'httpx.patch',
78
+ 'httpx.delete',
79
+ 'httpx.request',
80
+ 'urllib.request.urlopen', // can send body when called on a Request object
81
+ 'urllib.request.Request'
82
+ ]);
83
+
84
+ const NETWORK_DATA_KWARGS = new Set(['data', 'json', 'body', 'files', 'params']);
85
+
86
+ // ctypes loaders for PYAST-009.
87
+ const CTYPES_LOAD_CALLEES = new Set([
88
+ 'ctypes.CDLL',
89
+ 'ctypes.WinDLL',
90
+ 'ctypes.cdll.LoadLibrary',
91
+ 'ctypes.windll.LoadLibrary',
92
+ 'ctypes.PyDLL'
93
+ ]);
94
+
95
+ const SUSPICIOUS_PATH_RE = /^(\/tmp\/|\/var\/tmp\/|\/dev\/shm\/|~\/|\$HOME\/|C:\\Users\\Public\\|C:\\Windows\\Temp\\|\.\/_?cache\/)/i;
96
+
97
+ // ---------------------------------------------------------------------------
98
+ // Helper: iterate positional args of a call, skipping syntax noise.
99
+ // ---------------------------------------------------------------------------
100
+ function* positionalArgs(callNode) {
101
+ const args = callNode.childForFieldName('arguments');
102
+ if (!args) return;
103
+ for (const child of args.children) {
104
+ if (child.type === 'keyword_argument' || child.type === ','
105
+ || child.type === '(' || child.type === ')') continue;
106
+ yield child;
107
+ }
108
+ }
109
+
110
+ // Returns the value node of a kwarg with the given name, or null.
111
+ function getKwargValue(callNode, kwName) {
112
+ const args = callNode.childForFieldName('arguments');
113
+ if (!args) return null;
114
+ for (const child of args.children) {
115
+ if (child.type !== 'keyword_argument') continue;
116
+ const nameNode = child.childForFieldName('name');
117
+ if (nameNode && nameNode.text === kwName) {
118
+ return child.childForFieldName('value');
119
+ }
120
+ }
121
+ return null;
122
+ }
123
+
124
+ // ---------------------------------------------------------------------------
125
+ // Main visitor
126
+ // ---------------------------------------------------------------------------
127
+
61
128
  function handleCallExpression(node, ctx, scopeDepth) {
62
129
  const callee = calleeDottedName(node);
63
130
  if (!callee) return;
@@ -71,6 +138,31 @@ function handleCallExpression(node, ctx, scopeDepth) {
71
138
  file: ctx.relFile,
72
139
  line: lineOf(node)
73
140
  });
141
+
142
+ // PYAST-005 / PYAST-006: taint-aware compounds layered on top of PYAST-003.
143
+ // Walk the positional args; if any is a tainted identifier we fire the
144
+ // appropriate compound. Multiple sources in the same call → multiple emits.
145
+ for (const arg of positionalArgs(node)) {
146
+ const taint = lookupTaint(ctx, arg);
147
+ if (!taint) continue;
148
+ if (taint.sourceType === 'fetch') {
149
+ ctx.threats.push({
150
+ type: 'pyast_fetch_to_exec_taint',
151
+ severity: 'CRITICAL',
152
+ message: `${ctx.relFile}:${lineOf(node)}: ${callee}(${arg.text}) — argument was assigned earlier from a network fetch (urllib / requests / http.client / httpx / aiohttp). TrapDoor-style remote-payload-then-RCE.`,
153
+ file: ctx.relFile,
154
+ line: lineOf(node)
155
+ });
156
+ } else if (taint.sourceType === 'base64') {
157
+ ctx.threats.push({
158
+ type: 'pyast_base64_to_exec_taint',
159
+ severity: 'CRITICAL',
160
+ message: `${ctx.relFile}:${lineOf(node)}: ${callee}(${arg.text}) — argument was assigned earlier from a decode call (base64 / codecs / zlib / gzip / binascii). Obfuscated payload execution pattern.`,
161
+ file: ctx.relFile,
162
+ line: lineOf(node)
163
+ });
164
+ }
165
+ }
74
166
  }
75
167
 
76
168
  // PYAST-004: subprocess.X(..., shell=True) at module level.
@@ -113,6 +205,99 @@ function handleCallExpression(node, ctx, scopeDepth) {
113
205
  });
114
206
  }
115
207
  }
208
+
209
+ // PYAST-009: ctypes.CDLL / WinDLL / LoadLibrary with suspicious path OR
210
+ // tainted argument. Fires at any scope depth (loading shellcode is dangerous
211
+ // wherever it runs, but module-level is the worst).
212
+ if (CTYPES_LOAD_CALLEES.has(callee)) {
213
+ const firstArg = firstPositionalArg(node);
214
+ if (firstArg) {
215
+ const litPath = stringLiteralValue(firstArg);
216
+ if (litPath && SUSPICIOUS_PATH_RE.test(litPath)) {
217
+ ctx.threats.push({
218
+ type: 'pyast_ctypes_shellcode_load',
219
+ severity: 'HIGH',
220
+ message: `${ctx.relFile}:${lineOf(node)}: ${callee}('${litPath}') — loads a native library from a suspicious path (temp / world-writable / user-cache). Common shellcode loader pattern.`,
221
+ file: ctx.relFile,
222
+ line: lineOf(node)
223
+ });
224
+ } else {
225
+ const taint = lookupTaint(ctx, firstArg);
226
+ if (taint && (taint.sourceType === 'fetch' || taint.sourceType === 'base64')) {
227
+ ctx.threats.push({
228
+ type: 'pyast_ctypes_shellcode_load',
229
+ severity: 'HIGH',
230
+ message: `${ctx.relFile}:${lineOf(node)}: ${callee}(${firstArg.text}) — native library loaded from a tainted argument (assigned from ${taint.sourceType === 'fetch' ? 'network fetch' : 'base64/decode chain'}). Shellcode loader pattern.`,
231
+ file: ctx.relFile,
232
+ line: lineOf(node)
233
+ });
234
+ }
235
+ }
236
+ }
237
+ }
238
+
239
+ // PYAST-010: env var read → network POST/PUT/etc. sink at module level.
240
+ // Walks the call's positional args + sensitive kwargs (data, json, body, ...)
241
+ // looking for a tainted identifier with sourceType === 'env'. Severity
242
+ // escalates to CRITICAL if the env var name matches the sensitive pattern.
243
+ if (NETWORK_WRITE_CALLEES.has(callee) && scopeDepth === 0) {
244
+ const candidates = [];
245
+ for (const arg of positionalArgs(node)) candidates.push(arg);
246
+ for (const kwName of NETWORK_DATA_KWARGS) {
247
+ const v = getKwargValue(node, kwName);
248
+ if (v) candidates.push(v);
249
+ }
250
+ for (const arg of candidates) {
251
+ // Direct identifier: data=token
252
+ if (arg.type === 'identifier') {
253
+ const taint = lookupTaint(ctx, arg);
254
+ if (taint && taint.sourceType === 'env') {
255
+ emitEnvNetwork(ctx, node, callee, arg.text, taint.envVarName);
256
+ break; // one finding per call
257
+ }
258
+ }
259
+ // Container literal: data={"t": token}, json=[token]
260
+ // Walk one level deep looking for tainted identifiers.
261
+ if (arg.type === 'dictionary' || arg.type === 'list' || arg.type === 'tuple') {
262
+ const tainted = findTaintedIdentifierIn(arg, ctx);
263
+ if (tainted) {
264
+ emitEnvNetwork(ctx, node, callee, tainted.text, tainted.taint.envVarName);
265
+ break;
266
+ }
267
+ }
268
+ }
269
+ }
270
+ }
271
+
272
+ function emitEnvNetwork(ctx, callNode, callee, varName, envVarName) {
273
+ const sensitive = isEnvSensitive(envVarName);
274
+ ctx.threats.push({
275
+ type: 'pyast_env_to_network_write',
276
+ severity: sensitive ? 'CRITICAL' : 'HIGH',
277
+ message: `${ctx.relFile}:${lineOf(callNode)}: ${callee}(...) at module level receives '${varName}' which was assigned from os.environ['${envVarName}']${sensitive ? ' — sensitive env var name matches credential pattern, credential exfil suspected.' : ' — env-to-network exfil pattern.'}`,
278
+ file: ctx.relFile,
279
+ line: lineOf(callNode)
280
+ });
281
+ }
282
+
283
+ // Walks one level inside a dict / list / tuple looking for an identifier whose
284
+ // taint sourceType === 'env'. Returns { text, taint } or null. Single hop only
285
+ // — does not recurse into nested containers (V1 limitation, matches plan).
286
+ function findTaintedIdentifierIn(containerNode, ctx) {
287
+ for (const child of containerNode.children) {
288
+ if (child.type === 'identifier') {
289
+ const taint = lookupTaint(ctx, child);
290
+ if (taint && taint.sourceType === 'env') return { text: child.text, taint };
291
+ }
292
+ if (child.type === 'pair') {
293
+ const v = child.childForFieldName('value');
294
+ if (v && v.type === 'identifier') {
295
+ const taint = lookupTaint(ctx, v);
296
+ if (taint && taint.sourceType === 'env') return { text: v.text, taint };
297
+ }
298
+ }
299
+ }
300
+ return null;
116
301
  }
117
302
 
118
303
  module.exports = { handleCallExpression };
@@ -2,6 +2,7 @@
2
2
 
3
3
  const { handleCallExpression } = require('./handle-call-expression.js');
4
4
  const { handleSetupCall } = require('./handle-setup-call.js');
5
+ const { handleAssignment } = require('./handle-assignment.js');
5
6
  const helpers = require('./helpers.js');
6
7
 
7
8
  // Two visitors run on the `call` node type. `walk()` only dispatches one
@@ -13,7 +14,8 @@ function callDispatcher(node, ctx, scopeDepth) {
13
14
 
14
15
  module.exports = {
15
16
  visitors: {
16
- call: callDispatcher
17
+ call: callDispatcher,
18
+ assignment: handleAssignment
17
19
  },
18
20
  helpers
19
21
  };
@@ -0,0 +1,210 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Mini taint tracker — Phase 1b of the PYAST roadmap.
5
+ *
6
+ * Scope (V1, deliberately minimal — see plan file for the full design):
7
+ * - intra-procedural, module-level only (scope_depth === 0)
8
+ * - single assignment hop : `var = source_expr` then `sink(..., var, ...)`
9
+ * - reassignment clears taint
10
+ * - bare identifiers only (no attribute / subscript LHS)
11
+ * - no multi-hop chains (`a = src(); b = a; sink(b)` is NOT tracked — Phase 3)
12
+ *
13
+ * Sources :
14
+ * - 'fetch' : network reads (urllib, requests, httpx, aiohttp, http.client)
15
+ * - 'base64' : decoders (base64.*, codecs.decode, zlib.decompress, gzip.decompress, binascii.unhexlify, bytes.fromhex)
16
+ * - 'env' : os.environ access (subscript + .get + os.getenv)
17
+ *
18
+ * Sinks are NOT defined here — they live in handle-call-expression.js. This
19
+ * module is the pure-source-classifier + lookup helper.
20
+ */
21
+
22
+ const { dottedName, stringLiteralValue } = require('./helpers.js');
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // FETCH source detection
26
+ // ---------------------------------------------------------------------------
27
+
28
+ const FETCH_DOTTED_CALLEES = new Set([
29
+ 'urllib.request.urlopen',
30
+ 'urllib2.urlopen',
31
+ 'requests.get',
32
+ 'requests.post',
33
+ 'requests.put',
34
+ 'requests.delete',
35
+ 'requests.patch',
36
+ 'requests.head',
37
+ 'requests.options',
38
+ 'requests.request',
39
+ 'httpx.get',
40
+ 'httpx.post',
41
+ 'httpx.put',
42
+ 'httpx.delete',
43
+ 'httpx.patch',
44
+ 'httpx.head',
45
+ 'httpx.options',
46
+ 'httpx.request'
47
+ ]);
48
+
49
+ // Returns true if the call node is one of the http-class instantiations
50
+ // (`http.client.HTTPSConnection(...)`, `http.client.HTTPConnection(...)`).
51
+ function isHttpClientConnectionCall(callNode) {
52
+ const name = dottedName(callNode.childForFieldName('function'));
53
+ return name === 'http.client.HTTPSConnection' || name === 'http.client.HTTPConnection';
54
+ }
55
+
56
+ // Returns true if the expression is a "fetch" source, i.e. its evaluation
57
+ // produces attacker-controlled bytes. Handles chains like `.read()` / `.text`
58
+ // / `.content` / `.json()` applied to the fetch result.
59
+ function isFetchSource(node) {
60
+ if (!node) return false;
61
+
62
+ // Direct call: requests.get(...) , urllib.request.urlopen(...)
63
+ if (node.type === 'call') {
64
+ const name = dottedName(node.childForFieldName('function'));
65
+ if (name && FETCH_DOTTED_CALLEES.has(name)) return true;
66
+ if (isHttpClientConnectionCall(node)) return true; // produces a connection object — treated as fetch
67
+ // Method call on a fetch result: requests.get(...).text, urlopen(...).read()
68
+ // The .read() / .json() form is itself a `call` node whose function is an
69
+ // `attribute` whose object is the inner call. Walk one level.
70
+ const fn = node.childForFieldName('function');
71
+ if (fn && fn.type === 'attribute') {
72
+ const inner = fn.childForFieldName('object');
73
+ const methodNode = fn.childForFieldName('attribute');
74
+ const methodName = methodNode && methodNode.text;
75
+ if (['read', 'json', 'text', 'content', 'iter_content', 'iter_lines'].includes(methodName)) {
76
+ if (isFetchSource(inner)) return true;
77
+ }
78
+ }
79
+ }
80
+
81
+ // Attribute access on a fetch result: `r.text`, `r.content`, `r.json` (no call)
82
+ if (node.type === 'attribute') {
83
+ const inner = node.childForFieldName('object');
84
+ const attr = node.childForFieldName('attribute');
85
+ if (attr && ['text', 'content', 'json'].includes(attr.text)) {
86
+ if (isFetchSource(inner)) return true;
87
+ }
88
+ }
89
+
90
+ return false;
91
+ }
92
+
93
+ // ---------------------------------------------------------------------------
94
+ // BASE64 / decode source detection
95
+ // ---------------------------------------------------------------------------
96
+
97
+ const DECODE_DOTTED_CALLEES = new Set([
98
+ 'base64.b64decode',
99
+ 'base64.b32decode',
100
+ 'base64.b16decode',
101
+ 'base64.standard_b64decode',
102
+ 'base64.urlsafe_b64decode',
103
+ 'base64.a85decode',
104
+ 'base64.b85decode',
105
+ 'codecs.decode',
106
+ 'zlib.decompress',
107
+ 'gzip.decompress',
108
+ 'bz2.decompress',
109
+ 'lzma.decompress',
110
+ 'binascii.unhexlify',
111
+ 'binascii.a2b_base64',
112
+ 'binascii.a2b_hex',
113
+ 'bytes.fromhex' // `bytes.fromhex("...")` decodes a hex string
114
+ ]);
115
+
116
+ function isBase64Source(node) {
117
+ if (!node || node.type !== 'call') return false;
118
+ const name = dottedName(node.childForFieldName('function'));
119
+ if (name && DECODE_DOTTED_CALLEES.has(name)) return true;
120
+ return false;
121
+ }
122
+
123
+ // ---------------------------------------------------------------------------
124
+ // ENV source detection — returns { sourceType: 'env', envVarName }
125
+ // ---------------------------------------------------------------------------
126
+
127
+ // Sensitive env var name patterns — match triggers severity escalation
128
+ // for PYAST-010. Conservative list (substring match, case-insensitive).
129
+ const SENSITIVE_ENV_RE = /(TOKEN|KEY|SECRET|PASSWORD|PASSWD|CREDENTIAL|CRED|NPM_|AWS_|SSH|API|GITHUB_|GH_|HF_|ANTHROPIC|OPENAI|SLACK|DISCORD|TELEGRAM|STRIPE|GCP|AZURE|DATABASE_URL|DB_PASS)/i;
130
+
131
+ function isEnvSensitive(envVarName) {
132
+ return typeof envVarName === 'string' && SENSITIVE_ENV_RE.test(envVarName);
133
+ }
134
+
135
+ // Returns the env var name (string) if `node` reads from os.environ / os.getenv,
136
+ // or null. For subscript access like `os.environ['X']` returns 'X'.
137
+ // For `os.environ[X]` (computed key) returns '<computed>'.
138
+ function classifyEnvSource(node) {
139
+ if (!node) return null;
140
+
141
+ // subscript: os.environ['X'] or os.environ[X]
142
+ if (node.type === 'subscript') {
143
+ const obj = node.childForFieldName('value');
144
+ if (obj && dottedName(obj) === 'os.environ') {
145
+ const subscript = node.childForFieldName('subscript');
146
+ const lit = stringLiteralValue(subscript);
147
+ return lit !== null ? lit : '<computed>';
148
+ }
149
+ }
150
+
151
+ // call: os.environ.get('X', ...) or os.getenv('X', ...)
152
+ if (node.type === 'call') {
153
+ const name = dottedName(node.childForFieldName('function'));
154
+ if (name === 'os.environ.get' || name === 'os.getenv') {
155
+ const args = node.childForFieldName('arguments');
156
+ if (args) {
157
+ for (const child of args.children) {
158
+ if (child.type === 'keyword_argument' || child.type === ',' ||
159
+ child.type === '(' || child.type === ')') continue;
160
+ const lit = stringLiteralValue(child);
161
+ if (lit !== null) return lit;
162
+ return '<computed>';
163
+ }
164
+ }
165
+ }
166
+ }
167
+
168
+ return null;
169
+ }
170
+
171
+ // ---------------------------------------------------------------------------
172
+ // Public API
173
+ // ---------------------------------------------------------------------------
174
+
175
+ /**
176
+ * Classify the taint of an RHS expression node. Returns:
177
+ * { sourceType: 'fetch'|'base64'|'env', envVarName?: string }
178
+ * or null if the node is not a recognised tainted source.
179
+ */
180
+ function classifyTaintSource(node) {
181
+ if (!node) return null;
182
+ if (isFetchSource(node)) return { sourceType: 'fetch' };
183
+ if (isBase64Source(node)) return { sourceType: 'base64' };
184
+ const envVarName = classifyEnvSource(node);
185
+ if (envVarName !== null) return { sourceType: 'env', envVarName };
186
+ return null;
187
+ }
188
+
189
+ /**
190
+ * Returns the taint record for a variable, or null.
191
+ * Caller filters on sourceType if needed.
192
+ */
193
+ function lookupTaint(ctx, identifierNode) {
194
+ if (!identifierNode || identifierNode.type !== 'identifier') return null;
195
+ if (!ctx.moduleTaint) return null;
196
+ return ctx.moduleTaint.get(identifierNode.text) || null;
197
+ }
198
+
199
+ module.exports = {
200
+ classifyTaintSource,
201
+ lookupTaint,
202
+ isEnvSensitive,
203
+ // Exposed for unit tests
204
+ _internal: {
205
+ isFetchSource,
206
+ isBase64Source,
207
+ classifyEnvSource,
208
+ SENSITIVE_ENV_RE
209
+ }
210
+ };
@@ -147,7 +147,11 @@ function scanPythonAST(targetPath) {
147
147
  threats,
148
148
  relFile: path.relative(targetPath, file),
149
149
  source,
150
- invisibleCount
150
+ invisibleCount,
151
+ // Per-file taint map populated by handle-assignment.js at scope_depth==0
152
+ // and read by handle-call-expression.js for compound detections
153
+ // (PYAST-005/006/009/010). See python-ast-detectors/taint-tracker.js.
154
+ moduleTaint: new Map()
151
155
  };
152
156
 
153
157
  walk(tree.rootNode, ctx, visitors);