muaddib-scanner 2.2.5 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/bin/muaddib.js +16 -2
  2. package/datasets/benign/packages-npm.txt +576 -77
  3. package/datasets/benign/packages-pypi.txt +146 -31
  4. package/datasets/ground-truth/README.md +54 -0
  5. package/datasets/ground-truth/known-malware.json +622 -0
  6. package/datasets/holdout-v5/callback-exfil/main.js +8 -0
  7. package/datasets/holdout-v5/callback-exfil/package.json +5 -0
  8. package/datasets/holdout-v5/callback-exfil/reader.js +10 -0
  9. package/datasets/holdout-v5/class-method-exfil/collector.js +10 -0
  10. package/datasets/holdout-v5/class-method-exfil/main.js +7 -0
  11. package/datasets/holdout-v5/class-method-exfil/package.json +5 -0
  12. package/datasets/holdout-v5/conditional-split/detector.js +2 -0
  13. package/datasets/holdout-v5/conditional-split/package.json +5 -0
  14. package/datasets/holdout-v5/conditional-split/stealer.js +16 -0
  15. package/datasets/holdout-v5/event-emitter-flow/listener.js +12 -0
  16. package/datasets/holdout-v5/event-emitter-flow/package.json +5 -0
  17. package/datasets/holdout-v5/event-emitter-flow/scanner.js +11 -0
  18. package/datasets/holdout-v5/mixed-inline-split/index.js +6 -0
  19. package/datasets/holdout-v5/mixed-inline-split/package.json +5 -0
  20. package/datasets/holdout-v5/mixed-inline-split/reader.js +3 -0
  21. package/datasets/holdout-v5/mixed-inline-split/sender.js +6 -0
  22. package/datasets/holdout-v5/named-export-steal/main.js +6 -0
  23. package/datasets/holdout-v5/named-export-steal/package.json +5 -0
  24. package/datasets/holdout-v5/named-export-steal/utils.js +1 -0
  25. package/datasets/holdout-v5/reexport-chain/a.js +2 -0
  26. package/datasets/holdout-v5/reexport-chain/b.js +1 -0
  27. package/datasets/holdout-v5/reexport-chain/c.js +11 -0
  28. package/datasets/holdout-v5/reexport-chain/package.json +5 -0
  29. package/datasets/holdout-v5/split-env-exfil/env.js +2 -0
  30. package/datasets/holdout-v5/split-env-exfil/exfil.js +5 -0
  31. package/datasets/holdout-v5/split-env-exfil/package.json +5 -0
  32. package/datasets/holdout-v5/split-npmrc-steal/index.js +2 -0
  33. package/datasets/holdout-v5/split-npmrc-steal/package.json +5 -0
  34. package/datasets/holdout-v5/split-npmrc-steal/reader.js +8 -0
  35. package/datasets/holdout-v5/split-npmrc-steal/sender.js +17 -0
  36. package/datasets/holdout-v5/three-hop-chain/package.json +5 -0
  37. package/datasets/holdout-v5/three-hop-chain/reader.js +8 -0
  38. package/datasets/holdout-v5/three-hop-chain/sender.js +11 -0
  39. package/datasets/holdout-v5/three-hop-chain/transform.js +3 -0
  40. package/package.json +1 -1
  41. package/src/commands/evaluate.js +191 -31
  42. package/src/index.js +20 -1
  43. package/src/response/playbooks.js +5 -0
  44. package/src/rules/index.js +13 -0
  45. package/src/scanner/module-graph.js +883 -0
  46. package/tmp-summary.js +24 -0
  47. package/tmp-test-pack.js +66 -0
@@ -0,0 +1,883 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const acorn = require('acorn');
4
+ const { findFiles } = require('../utils');
5
+
6
+ // --- Sensitive source patterns ---
7
+ const SENSITIVE_MODULES = new Set(['fs', 'child_process', 'dns', 'os']);
8
+
9
+ const ACORN_OPTIONS = {
10
+ ecmaVersion: 'latest',
11
+ sourceType: 'module',
12
+ allowReturnOutsideFunction: true,
13
+ allowImportExportEverywhere: true,
14
+ };
15
+
16
+ // --- Sink patterns for cross-file detection ---
17
+ const SINK_CALLEE_NAMES = new Set(['fetch', 'eval', 'Function', 'WebSocket', 'XMLHttpRequest']);
18
+ const SINK_MEMBER_METHODS = new Set([
19
+ 'https.request', 'https.get', 'http.request', 'http.get',
20
+ 'child_process.exec', 'child_process.execSync', 'child_process.spawn',
21
+ ]);
22
+ const SINK_INSTANCE_METHODS = new Set(['connect', 'write', 'send']);
23
+
24
+ // =============================================================================
25
+ // STEP 1 — Module dependency graph
26
+ // =============================================================================
27
+
28
+ /**
29
+ * Build a dependency graph of local modules within a package.
30
+ * Only tracks local imports (./ ../) — node_modules are ignored.
31
+ */
32
+ function buildModuleGraph(packagePath) {
33
+ const graph = {};
34
+ const files = findFiles(packagePath, {
35
+ extensions: ['.js'],
36
+ excludedDirs: ['node_modules', '.git'],
37
+ });
38
+ for (const absFile of files) {
39
+ const relFile = toRel(absFile, packagePath);
40
+ const imports = extractLocalImports(absFile, packagePath);
41
+ graph[relFile] = imports;
42
+ }
43
+ return graph;
44
+ }
45
+
46
+ function extractLocalImports(filePath, packagePath) {
47
+ const ast = parseFile(filePath);
48
+ if (!ast) return [];
49
+
50
+ const imports = [];
51
+ const fileDir = path.dirname(filePath);
52
+
53
+ for (const node of ast.body) {
54
+ if (node.type === 'ImportDeclaration' && node.source && typeof node.source.value === 'string') {
55
+ const spec = node.source.value;
56
+ if (isLocalImport(spec)) {
57
+ const resolved = resolveLocal(fileDir, spec, packagePath);
58
+ if (resolved) imports.push(resolved);
59
+ }
60
+ }
61
+ }
62
+ walkForRequires(ast, fileDir, packagePath, imports);
63
+ return [...new Set(imports)];
64
+ }
65
+
66
+ function walkForRequires(node, fileDir, packagePath, imports) {
67
+ if (!node || typeof node !== 'object') return;
68
+ if (
69
+ node.type === 'CallExpression' &&
70
+ node.callee && node.callee.type === 'Identifier' &&
71
+ node.callee.name === 'require' &&
72
+ node.arguments.length === 1 &&
73
+ node.arguments[0].type === 'Literal' &&
74
+ typeof node.arguments[0].value === 'string'
75
+ ) {
76
+ const spec = node.arguments[0].value;
77
+ if (isLocalImport(spec)) {
78
+ const resolved = resolveLocal(fileDir, spec, packagePath);
79
+ if (resolved) imports.push(resolved);
80
+ }
81
+ }
82
+ for (const key of Object.keys(node)) {
83
+ if (key === 'type') continue;
84
+ const child = node[key];
85
+ if (Array.isArray(child)) {
86
+ for (const item of child) {
87
+ if (item && typeof item === 'object' && item.type) {
88
+ walkForRequires(item, fileDir, packagePath, imports);
89
+ }
90
+ }
91
+ } else if (child && typeof child === 'object' && child.type) {
92
+ walkForRequires(child, fileDir, packagePath, imports);
93
+ }
94
+ }
95
+ }
96
+
97
+ // =============================================================================
98
+ // STEP 2 — Annotate tainted exports
99
+ // =============================================================================
100
+
101
+ /**
102
+ * For each file in the graph, find exports and check if they depend on
103
+ * sensitive sources (fs.readFileSync, process.env, os.homedir, etc.).
104
+ *
105
+ * Returns: { 'reader.js': { default: { tainted: true, source: '...', detail: '...' } }, ... }
106
+ */
107
+ function annotateTaintedExports(graph, packagePath) {
108
+ const result = {};
109
+ for (const relFile of Object.keys(graph)) {
110
+ const absFile = path.resolve(packagePath, relFile);
111
+ result[relFile] = analyzeExports(absFile);
112
+ }
113
+ return result;
114
+ }
115
+
116
+ function analyzeExports(filePath) {
117
+ const ast = parseFile(filePath);
118
+ if (!ast) return {};
119
+
120
+ // Track which local variables hold sensitive module references
121
+ // e.g. const fs = require('fs') → moduleVars['fs'] = 'fs'
122
+ const moduleVars = {};
123
+ // Track which local variables hold tainted values
124
+ // e.g. const data = fs.readFileSync(...) → taintedVars['data'] = { source, detail }
125
+ const taintedVars = {};
126
+
127
+ // Track class declarations: class Foo { ... }
128
+ const classDefs = {};
129
+ walkAST(ast, (node) => {
130
+ if (node.type === 'ClassDeclaration' && node.id && node.id.name) {
131
+ classDefs[node.id.name] = node;
132
+ }
133
+ });
134
+
135
+ // First pass: collect require assignments and tainted variable assignments
136
+ walkAST(ast, (node) => {
137
+ // const fs = require('fs')
138
+ if (node.type === 'VariableDeclaration') {
139
+ for (const decl of node.declarations) {
140
+ if (!decl.init || !decl.id) continue;
141
+
142
+ // const fs = require('fs')
143
+ if (isRequireCall(decl.init) && SENSITIVE_MODULES.has(decl.init.arguments[0].value)) {
144
+ if (decl.id.type === 'Identifier') {
145
+ moduleVars[decl.id.name] = decl.init.arguments[0].value;
146
+ }
147
+ }
148
+
149
+ // const data = fs.readFileSync(...) or const token = process.env.XXX
150
+ if (decl.id.type === 'Identifier') {
151
+ const taint = checkNodeTaint(decl.init, moduleVars);
152
+ if (taint) {
153
+ taintedVars[decl.id.name] = taint;
154
+ }
155
+ }
156
+ }
157
+ }
158
+
159
+ // Also handle: let x; x = fs.readFileSync(...)
160
+ if (node.type === 'ExpressionStatement' && node.expression.type === 'AssignmentExpression') {
161
+ const left = node.expression.left;
162
+ const right = node.expression.right;
163
+ if (left.type === 'Identifier') {
164
+ const taint = checkNodeTaint(right, moduleVars);
165
+ if (taint) {
166
+ taintedVars[left.name] = taint;
167
+ }
168
+ }
169
+ }
170
+ });
171
+
172
+ // Second pass: find exports and check if they are tainted
173
+ const exports = {};
174
+ walkAST(ast, (node) => {
175
+ // module.exports = value OR module.exports = { ... }
176
+ if (isModuleExportsAssign(node)) {
177
+ const value = node.expression.right;
178
+ const exportName = getExportName(node.expression.left);
179
+
180
+ // Direct taint on the value itself
181
+ const taint = checkNodeTaint(value, moduleVars);
182
+ if (taint) {
183
+ exports[exportName] = { tainted: true, source: taint.source, detail: taint.detail };
184
+ return;
185
+ }
186
+
187
+ // Variable reference → check taintedVars
188
+ if (value.type === 'Identifier' && taintedVars[value.name]) {
189
+ const t = taintedVars[value.name];
190
+ exports[exportName] = { tainted: true, source: t.source, detail: t.detail };
191
+ return;
192
+ }
193
+
194
+ // Object literal: module.exports = { read: function() { ... } }
195
+ if (value.type === 'ObjectExpression' && exportName === 'default') {
196
+ for (const prop of value.properties) {
197
+ if (!prop.key) continue;
198
+ const propName = prop.key.name || prop.key.value || 'unknown';
199
+
200
+ // Check function body for taint
201
+ const funcBody = getFunctionBody(prop.value);
202
+ if (funcBody) {
203
+ const bodyTaint = scanBodyForTaint(funcBody, moduleVars, taintedVars);
204
+ if (bodyTaint) {
205
+ exports[propName] = { tainted: true, source: bodyTaint.source, detail: bodyTaint.detail };
206
+ }
207
+ } else {
208
+ // Value is a direct expression
209
+ const vTaint = checkNodeTaint(prop.value, moduleVars);
210
+ if (vTaint) {
211
+ exports[propName] = { tainted: true, source: vTaint.source, detail: vTaint.detail };
212
+ } else if (prop.value.type === 'Identifier' && taintedVars[prop.value.name]) {
213
+ const t = taintedVars[prop.value.name];
214
+ exports[propName] = { tainted: true, source: t.source, detail: t.detail };
215
+ }
216
+ }
217
+ }
218
+ return;
219
+ }
220
+
221
+ // Class expression: module.exports = class { ... }
222
+ if (value.type === 'ClassExpression') {
223
+ analyzeClassBody(value, moduleVars, taintedVars, exports);
224
+ return;
225
+ }
226
+
227
+ // Class reference: module.exports = ClassName (where ClassName is a ClassDeclaration)
228
+ if (value.type === 'Identifier' && classDefs[value.name]) {
229
+ analyzeClassBody(classDefs[value.name], moduleVars, taintedVars, exports);
230
+ return;
231
+ }
232
+
233
+ // Function/arrow: module.exports = function() { ... }
234
+ const funcBody = getFunctionBody(value);
235
+ if (funcBody) {
236
+ const bodyTaint = scanBodyForTaint(funcBody, moduleVars, taintedVars);
237
+ if (bodyTaint) {
238
+ exports[exportName] = { tainted: true, source: bodyTaint.source, detail: bodyTaint.detail };
239
+ }
240
+ }
241
+ }
242
+ });
243
+
244
+ return exports;
245
+ }
246
+
247
+ /**
248
+ * Analyze class body methods for tainted sources.
249
+ * Populates exports with named tainted methods.
250
+ */
251
+ function analyzeClassBody(classNode, moduleVars, taintedVars, exports) {
252
+ if (!classNode.body || !classNode.body.body) return;
253
+ for (const member of classNode.body.body) {
254
+ if (member.type !== 'MethodDefinition') continue;
255
+ const methodName = member.key && (member.key.name || member.key.value);
256
+ if (!methodName || methodName === 'constructor') continue;
257
+ const funcBody = getFunctionBody(member.value);
258
+ if (funcBody) {
259
+ const bodyTaint = scanBodyForTaint(funcBody, moduleVars, taintedVars);
260
+ if (bodyTaint) {
261
+ exports[methodName] = { tainted: true, source: bodyTaint.source, detail: bodyTaint.detail };
262
+ }
263
+ }
264
+ }
265
+ }
266
+
267
+ /**
268
+ * Check if a single AST node represents a sensitive source call.
269
+ */
270
+ function checkNodeTaint(node, moduleVars) {
271
+ if (!node) return null;
272
+
273
+ // process.env or process.env.XXX
274
+ if (node.type === 'MemberExpression') {
275
+ const chain = getMemberChain(node);
276
+ if (chain.startsWith('process.env')) {
277
+ const detail = chain.length > 'process.env'.length ? chain.slice('process.env.'.length) : '';
278
+ return { source: 'process.env', detail };
279
+ }
280
+ }
281
+
282
+ // require('fs').readFileSync(...) (inline require)
283
+ if (node.type === 'CallExpression' && node.callee.type === 'MemberExpression') {
284
+ const obj = node.callee.object;
285
+ const prop = node.callee.property;
286
+ const methodName = prop.name || prop.value;
287
+
288
+ // Check inline require: require('fs').readFileSync(...)
289
+ if (isRequireCall(obj) && SENSITIVE_MODULES.has(obj.arguments[0].value)) {
290
+ const mod = obj.arguments[0].value;
291
+ return describeSensitiveCall(mod, methodName, node.arguments);
292
+ }
293
+
294
+ // Check variable-based: fs.readFileSync(...)
295
+ if (obj.type === 'Identifier' && moduleVars[obj.name]) {
296
+ const mod = moduleVars[obj.name];
297
+ return describeSensitiveCall(mod, methodName, node.arguments);
298
+ }
299
+ }
300
+
301
+ // Bare call: exec(...), spawn(...)
302
+ if (node.type === 'CallExpression' && node.callee.type === 'Identifier') {
303
+ const name = node.callee.name;
304
+ if (name === 'exec' || name === 'execSync' || name === 'spawn' || name === 'spawnSync') {
305
+ const detail = extractLiteralArg(node.arguments);
306
+ return { source: `child_process.${name}`, detail };
307
+ }
308
+ }
309
+
310
+ return null;
311
+ }
312
+
313
+ function describeSensitiveCall(mod, method, args) {
314
+ const detail = extractLiteralArg(args);
315
+ if (mod === 'fs' && (method === 'readFileSync' || method === 'readFile')) {
316
+ return { source: `fs.${method}`, detail };
317
+ }
318
+ if (mod === 'os' && method === 'homedir') {
319
+ return { source: 'os.homedir', detail: '' };
320
+ }
321
+ if (mod === 'child_process' && (method === 'exec' || method === 'execSync' || method === 'spawn')) {
322
+ return { source: `child_process.${method}`, detail };
323
+ }
324
+ if (mod === 'dns' && method === 'resolveTxt') {
325
+ return { source: 'dns.resolveTxt', detail };
326
+ }
327
+ return null;
328
+ }
329
+
330
+ /**
331
+ * Scan a function body (array of statements) for any tainted expression.
332
+ * Returns the first taint found, or null.
333
+ */
334
+ function scanBodyForTaint(body, moduleVars, taintedVars) {
335
+ // Collect local tainted vars within this function scope too
336
+ const localTainted = { ...taintedVars };
337
+
338
+ let found = null;
339
+ walkAST({ type: 'Program', body }, (node) => {
340
+ if (found) return;
341
+
342
+ // Variable assignment inside function
343
+ if (node.type === 'VariableDeclaration') {
344
+ for (const decl of node.declarations) {
345
+ if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
346
+ if (isRequireCall(decl.init) && SENSITIVE_MODULES.has(decl.init.arguments[0].value)) {
347
+ moduleVars[decl.id.name] = decl.init.arguments[0].value;
348
+ }
349
+ const t = checkNodeTaint(decl.init, moduleVars);
350
+ if (t) localTainted[decl.id.name] = t;
351
+ }
352
+ }
353
+
354
+ const taint = checkNodeTaint(node, moduleVars);
355
+ if (taint) {
356
+ found = taint;
357
+ return;
358
+ }
359
+
360
+ // Return of a tainted variable
361
+ if (node.type === 'ReturnStatement' && node.argument) {
362
+ if (node.argument.type === 'Identifier' && localTainted[node.argument.name]) {
363
+ found = localTainted[node.argument.name];
364
+ }
365
+ }
366
+ });
367
+ return found;
368
+ }
369
+
370
+ // =============================================================================
371
+ // STEP 3 — Cross-file flow detection
372
+ // =============================================================================
373
+
374
+ /**
375
+ * Detect cross-file dataflows: a tainted export from one module reaches a
376
+ * network/exec sink in another module.
377
+ * Max 2 levels of re-export (A → B → C).
378
+ */
379
+ function detectCrossFileFlows(graph, taintedExports, packagePath) {
380
+ // Expand taint through re-exports (max 2 levels)
381
+ const expandedTaint = expandTaintThroughReexports(graph, taintedExports, packagePath);
382
+
383
+ const flows = [];
384
+
385
+ for (const relFile of Object.keys(graph)) {
386
+ const absFile = path.resolve(packagePath, relFile);
387
+ const ast = parseFile(absFile);
388
+ if (!ast) continue;
389
+
390
+ // Find which local variables are tainted via imports
391
+ const localTaint = collectImportTaint(ast, relFile, graph, expandedTaint, packagePath);
392
+ if (Object.keys(localTaint).length === 0) continue;
393
+
394
+ // Find sinks that use tainted variables
395
+ const sinks = findSinksUsingTainted(ast, localTaint);
396
+ for (const sink of sinks) {
397
+ const taintInfo = localTaint[sink.taintedVar];
398
+ flows.push({
399
+ severity: 'CRITICAL',
400
+ type: 'cross_file_dataflow',
401
+ sourceFile: taintInfo.sourceFile,
402
+ source: `${taintInfo.source}${taintInfo.detail ? '(' + taintInfo.detail + ')' : ''}`,
403
+ sinkFile: relFile,
404
+ sink: sink.sink,
405
+ description: `Credential read in ${taintInfo.sourceFile} exported and sent to network in ${relFile}`,
406
+ });
407
+ }
408
+ }
409
+
410
+ return flows;
411
+ }
412
+
413
+ /**
414
+ * Expand taint through re-exports: if module B imports from A and re-exports,
415
+ * B's exports are also tainted. Max 2 levels.
416
+ */
417
+ function expandTaintThroughReexports(graph, taintedExports, packagePath) {
418
+ const expanded = {};
419
+ for (const f of Object.keys(taintedExports)) {
420
+ expanded[f] = { ...taintedExports[f] };
421
+ }
422
+
423
+ for (let level = 0; level < 2; level++) {
424
+ let changed = false;
425
+ for (const relFile of Object.keys(graph)) {
426
+ const absFile = path.resolve(packagePath, relFile);
427
+ const ast = parseFile(absFile);
428
+ if (!ast) continue;
429
+
430
+ const localTaint = collectImportTaint(ast, relFile, graph, expanded, packagePath);
431
+
432
+ // Propagate taint through local variable assignments:
433
+ // e.g. const encoded = Buffer.from(raw) where raw is tainted
434
+ if (Object.keys(localTaint).length > 0) {
435
+ propagateLocalTaint(ast, localTaint);
436
+ }
437
+
438
+ // Check if any export returns a tainted variable (or inline require)
439
+ if (!expanded[relFile]) expanded[relFile] = {};
440
+ const fileDir = path.dirname(absFile);
441
+ walkAST(ast, (node) => {
442
+ if (!isModuleExportsAssign(node)) return;
443
+ const value = node.expression.right;
444
+ const exportName = getExportName(node.expression.left);
445
+
446
+ // Direct re-export: module.exports = taintedVar
447
+ if (value.type === 'Identifier' && localTaint[value.name]) {
448
+ if (!expanded[relFile][exportName]) {
449
+ expanded[relFile][exportName] = {
450
+ tainted: true,
451
+ source: localTaint[value.name].source,
452
+ detail: localTaint[value.name].detail,
453
+ sourceFile: localTaint[value.name].sourceFile,
454
+ };
455
+ changed = true;
456
+ }
457
+ return;
458
+ }
459
+
460
+ // Inline re-export: module.exports = require('./x')
461
+ if (isRequireCall(value) && isLocalImport(value.arguments[0].value)) {
462
+ const spec = value.arguments[0].value;
463
+ const resolved = resolveLocal(fileDir, spec, packagePath);
464
+ if (resolved && expanded[resolved]) {
465
+ const defTaint = expanded[resolved]['default'];
466
+ if (defTaint && defTaint.tainted && !expanded[relFile][exportName]) {
467
+ expanded[relFile][exportName] = {
468
+ tainted: true,
469
+ source: defTaint.source,
470
+ detail: defTaint.detail,
471
+ sourceFile: defTaint.sourceFile || resolved,
472
+ };
473
+ changed = true;
474
+ }
475
+ }
476
+ return;
477
+ }
478
+
479
+ // Wrapped re-export: module.exports = fn(taintedVar)
480
+ if (value.type === 'CallExpression') {
481
+ const tArg = findFirstTaintedArg(value.arguments, localTaint);
482
+ if (tArg && !expanded[relFile][exportName]) {
483
+ expanded[relFile][exportName] = {
484
+ tainted: true,
485
+ source: localTaint[tArg].source,
486
+ detail: localTaint[tArg].detail,
487
+ sourceFile: localTaint[tArg].sourceFile,
488
+ };
489
+ changed = true;
490
+ }
491
+ }
492
+ });
493
+ }
494
+ if (!changed) break;
495
+ }
496
+
497
+ return expanded;
498
+ }
499
+
500
+ /**
501
+ * Propagate taint through local variable assignments.
502
+ * If `const x = fn(taintedVar)`, then x is also tainted.
503
+ */
504
+ function propagateLocalTaint(ast, localTaint) {
505
+ walkAST(ast, (node) => {
506
+ if (node.type !== 'VariableDeclaration') return;
507
+ for (const decl of node.declarations) {
508
+ if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
509
+ if (localTaint[decl.id.name]) continue; // already tainted
510
+ const tArg = findFirstTaintedArgInExpr(decl.init, localTaint);
511
+ if (tArg) {
512
+ localTaint[decl.id.name] = { ...localTaint[tArg] };
513
+ }
514
+ }
515
+ });
516
+ }
517
+
518
+ /**
519
+ * Find the first tainted identifier among function call arguments.
520
+ */
521
+ function findFirstTaintedArg(args, taintMap) {
522
+ if (!args) return null;
523
+ for (const arg of args) {
524
+ if (arg.type === 'Identifier' && taintMap[arg.name] && !arg.name.startsWith('__module__')) {
525
+ return arg.name;
526
+ }
527
+ }
528
+ return null;
529
+ }
530
+
531
+ /**
532
+ * Recursively check if an expression uses a tainted variable as argument.
533
+ * Handles: fn(tainted), fn(a, fn2(tainted)), fn(tainted).method()
534
+ */
535
+ function findFirstTaintedArgInExpr(node, taintMap) {
536
+ if (!node) return null;
537
+ if (node.type === 'Identifier' && taintMap[node.name] && !node.name.startsWith('__module__')) {
538
+ return node.name;
539
+ }
540
+ if (node.type === 'CallExpression') {
541
+ const fromArgs = findFirstTaintedArg(node.arguments, taintMap);
542
+ if (fromArgs) return fromArgs;
543
+ // Check callee for chained calls: fn(x).method()
544
+ return findFirstTaintedArgInExpr(node.callee, taintMap);
545
+ }
546
+ if (node.type === 'MemberExpression') {
547
+ return findFirstTaintedArgInExpr(node.object, taintMap);
548
+ }
549
+ return null;
550
+ }
551
+
552
+ /**
553
+ * For a given file's AST, find which local variables receive tainted values
554
+ * via require('./...') imports.
555
+ */
556
+ function collectImportTaint(ast, currentFile, graph, taintedExports, packagePath) {
557
+ const localTaint = {};
558
+ const fileDir = path.dirname(path.resolve(packagePath, currentFile));
559
+
560
+ walkAST(ast, (node) => {
561
+ if (node.type !== 'VariableDeclaration') return;
562
+ for (const decl of node.declarations) {
563
+ if (!decl.init || !decl.id) continue;
564
+
565
+ // const reader = require('./reader')
566
+ if (isRequireCall(decl.init) && isLocalImport(decl.init.arguments[0].value)) {
567
+ const spec = decl.init.arguments[0].value;
568
+ const resolved = resolveLocal(fileDir, spec, packagePath);
569
+ if (!resolved || !taintedExports[resolved]) continue;
570
+ const modTaint = taintedExports[resolved];
571
+
572
+ if (decl.id.type === 'Identifier') {
573
+ // Whole module import — check 'default' export
574
+ const defTaint = modTaint['default'];
575
+ if (defTaint && defTaint.tainted) {
576
+ localTaint[decl.id.name] = {
577
+ source: defTaint.source,
578
+ detail: defTaint.detail || '',
579
+ sourceFile: defTaint.sourceFile || resolved,
580
+ };
581
+ }
582
+ // Also mark any named-export access later via member expressions
583
+ // Store the module reference for named export resolution
584
+ localTaint['__module__' + decl.id.name] = { resolved, modTaint };
585
+ }
586
+
587
+ // const { getToken } = require('./utils')
588
+ if (decl.id.type === 'ObjectPattern') {
589
+ for (const prop of decl.id.properties) {
590
+ const key = prop.key && (prop.key.name || prop.key.value);
591
+ const localName = prop.value && prop.value.name;
592
+ if (key && localName && modTaint[key] && modTaint[key].tainted) {
593
+ localTaint[localName] = {
594
+ source: modTaint[key].source,
595
+ detail: modTaint[key].detail || '',
596
+ sourceFile: modTaint[key].sourceFile || resolved,
597
+ };
598
+ }
599
+ }
600
+ }
601
+ }
602
+ }
603
+ });
604
+
605
+ // Resolve member access, class instances, and method calls
606
+ walkAST(ast, (node) => {
607
+ if (node.type !== 'VariableDeclaration') return;
608
+ for (const decl of node.declarations) {
609
+ if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
610
+
611
+ // const c = new Collector() — propagate module ref to instance
612
+ if (decl.init.type === 'NewExpression' && decl.init.callee.type === 'Identifier') {
613
+ const modRef = localTaint['__module__' + decl.init.callee.name];
614
+ if (modRef) {
615
+ localTaint['__module__' + decl.id.name] = modRef;
616
+ }
617
+ }
618
+
619
+ // const data = reader.getData() or const data = reader.data
620
+ if (decl.init.type === 'MemberExpression' && decl.init.object.type === 'Identifier') {
621
+ const modRef = localTaint['__module__' + decl.init.object.name];
622
+ if (modRef) {
623
+ const propName = decl.init.property.name || decl.init.property.value;
624
+ if (modRef.modTaint[propName] && modRef.modTaint[propName].tainted) {
625
+ const t = modRef.modTaint[propName];
626
+ localTaint[decl.id.name] = {
627
+ source: t.source,
628
+ detail: t.detail || '',
629
+ sourceFile: t.sourceFile || modRef.resolved,
630
+ };
631
+ }
632
+ }
633
+ }
634
+ if (decl.init.type === 'CallExpression' && decl.init.callee.type === 'MemberExpression') {
635
+ const callee = decl.init.callee;
636
+ if (callee.object.type === 'Identifier') {
637
+ const modRef = localTaint['__module__' + callee.object.name];
638
+ if (modRef) {
639
+ const propName = callee.property.name || callee.property.value;
640
+ if (modRef.modTaint[propName] && modRef.modTaint[propName].tainted) {
641
+ const t = modRef.modTaint[propName];
642
+ localTaint[decl.id.name] = {
643
+ source: t.source,
644
+ detail: t.detail || '',
645
+ sourceFile: t.sourceFile || modRef.resolved,
646
+ };
647
+ }
648
+ }
649
+ }
650
+ }
651
+ }
652
+ });
653
+
654
+ // Clean up internal markers
655
+ for (const key of Object.keys(localTaint)) {
656
+ if (key.startsWith('__module__')) delete localTaint[key];
657
+ }
658
+
659
+ return localTaint;
660
+ }
661
+
662
+ /**
663
+ * Find sink calls in the AST that use a tainted variable as argument.
664
+ */
665
+ function findSinksUsingTainted(ast, localTaint) {
666
+ const taintedNames = new Set(Object.keys(localTaint));
667
+ const sinks = [];
668
+
669
+ walkAST(ast, (node) => {
670
+ if (node.type !== 'CallExpression') return;
671
+
672
+ const sinkName = getSinkName(node);
673
+ if (!sinkName) return;
674
+
675
+ // Check if any argument references a tainted variable
676
+ const taintedArg = findTaintedArgument(node.arguments, taintedNames);
677
+ if (taintedArg) {
678
+ sinks.push({ sink: sinkName, taintedVar: taintedArg });
679
+ }
680
+ });
681
+
682
+ return sinks;
683
+ }
684
+
685
+ function getSinkName(callNode) {
686
+ const callee = callNode.callee;
687
+
688
+ // fetch(url), eval(code), WebSocket(url)
689
+ if (callee.type === 'Identifier' && SINK_CALLEE_NAMES.has(callee.name)) {
690
+ return `${callee.name}()`;
691
+ }
692
+
693
+ // https.request(...), child_process.exec(...)
694
+ if (callee.type === 'MemberExpression') {
695
+ const chain = getMemberChain(callee);
696
+ if (SINK_MEMBER_METHODS.has(chain)) {
697
+ return `${chain}()`;
698
+ }
699
+ // instance.connect(), socket.write(), ws.send()
700
+ const method = callee.property.name || callee.property.value;
701
+ if (SINK_INSTANCE_METHODS.has(method)) {
702
+ return `${method}()`;
703
+ }
704
+ }
705
+
706
+ // new WebSocket(url), new XMLHttpRequest()
707
+ if (callNode.type === 'NewExpression') {
708
+ if (callee.type === 'Identifier' && (callee.name === 'WebSocket' || callee.name === 'XMLHttpRequest')) {
709
+ return `new ${callee.name}()`;
710
+ }
711
+ }
712
+
713
+ return null;
714
+ }
715
+
716
+ function findTaintedArgument(args, taintedNames) {
717
+ if (!args) return null;
718
+ for (const arg of args) {
719
+ if (arg.type === 'Identifier' && taintedNames.has(arg.name)) {
720
+ return arg.name;
721
+ }
722
+ // Template literal: `https://evil.com/?d=${data}`
723
+ if (arg.type === 'TemplateLiteral') {
724
+ for (const expr of arg.expressions) {
725
+ if (expr.type === 'Identifier' && taintedNames.has(expr.name)) {
726
+ return expr.name;
727
+ }
728
+ }
729
+ }
730
+ // Concatenation: 'url' + data
731
+ if (arg.type === 'BinaryExpression' && arg.operator === '+') {
732
+ const left = findTaintedInExpr(arg.left, taintedNames);
733
+ if (left) return left;
734
+ const right = findTaintedInExpr(arg.right, taintedNames);
735
+ if (right) return right;
736
+ }
737
+ // Object: { body: data }
738
+ if (arg.type === 'ObjectExpression') {
739
+ for (const prop of arg.properties) {
740
+ if (prop.value && prop.value.type === 'Identifier' && taintedNames.has(prop.value.name)) {
741
+ return prop.value.name;
742
+ }
743
+ }
744
+ }
745
+ }
746
+ return null;
747
+ }
748
+
749
+ function findTaintedInExpr(node, taintedNames) {
750
+ if (node.type === 'Identifier' && taintedNames.has(node.name)) return node.name;
751
+ if (node.type === 'BinaryExpression' && node.operator === '+') {
752
+ return findTaintedInExpr(node.left, taintedNames) || findTaintedInExpr(node.right, taintedNames);
753
+ }
754
+ return null;
755
+ }
756
+
757
+ // =============================================================================
758
+ // Shared helpers
759
+ // =============================================================================
760
+
761
+ function parseFile(filePath) {
762
+ let content;
763
+ try {
764
+ content = fs.readFileSync(filePath, 'utf8');
765
+ } catch {
766
+ return null;
767
+ }
768
+ try {
769
+ return acorn.parse(content, ACORN_OPTIONS);
770
+ } catch {
771
+ return null;
772
+ }
773
+ }
774
+
775
+ function walkAST(node, visitor) {
776
+ if (!node || typeof node !== 'object') return;
777
+ if (node.type) visitor(node);
778
+ for (const key of Object.keys(node)) {
779
+ if (key === 'type') continue;
780
+ const child = node[key];
781
+ if (Array.isArray(child)) {
782
+ for (const item of child) {
783
+ if (item && typeof item === 'object' && item.type) walkAST(item, visitor);
784
+ }
785
+ } else if (child && typeof child === 'object' && child.type) {
786
+ walkAST(child, visitor);
787
+ }
788
+ }
789
+ }
790
+
791
+ function isRequireCall(node) {
792
+ return (
793
+ node && node.type === 'CallExpression' &&
794
+ node.callee && node.callee.type === 'Identifier' &&
795
+ node.callee.name === 'require' &&
796
+ node.arguments.length === 1 &&
797
+ node.arguments[0].type === 'Literal' &&
798
+ typeof node.arguments[0].value === 'string'
799
+ );
800
+ }
801
+
802
+ function isLocalImport(spec) {
803
+ return spec.startsWith('./') || spec.startsWith('../');
804
+ }
805
+
806
+ function isModuleExportsAssign(node) {
807
+ if (node.type !== 'ExpressionStatement') return false;
808
+ const expr = node.expression;
809
+ if (expr.type !== 'AssignmentExpression' || expr.operator !== '=') return false;
810
+ const left = expr.left;
811
+ // module.exports = ...
812
+ if (left.type === 'MemberExpression' && left.object.type === 'Identifier' && left.object.name === 'module' &&
813
+ left.property.name === 'exports') return true;
814
+ // module.exports.foo = ...
815
+ if (left.type === 'MemberExpression' && left.object.type === 'MemberExpression' &&
816
+ left.object.object.type === 'Identifier' && left.object.object.name === 'module' &&
817
+ left.object.property.name === 'exports') return true;
818
+ // exports.foo = ...
819
+ if (left.type === 'MemberExpression' && left.object.type === 'Identifier' && left.object.name === 'exports') return true;
820
+ return false;
821
+ }
822
+
823
+ function getExportName(left) {
824
+ // module.exports = ... → 'default'
825
+ if (left.type === 'MemberExpression' && left.object.type === 'Identifier' && left.object.name === 'module') {
826
+ if (left.property.name === 'exports') return 'default';
827
+ }
828
+ // module.exports.foo = ... → 'foo'
829
+ if (left.type === 'MemberExpression' && left.object.type === 'MemberExpression') {
830
+ return left.property.name || left.property.value || 'default';
831
+ }
832
+ // exports.foo = ... → 'foo'
833
+ if (left.type === 'MemberExpression' && left.object.type === 'Identifier' && left.object.name === 'exports') {
834
+ return left.property.name || left.property.value || 'default';
835
+ }
836
+ return 'default';
837
+ }
838
+
839
+ function getFunctionBody(node) {
840
+ if (!node) return null;
841
+ if (node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') {
842
+ if (node.body.type === 'BlockStatement') return node.body.body;
843
+ // Arrow with expression body: () => expr
844
+ return [{ type: 'ReturnStatement', argument: node.body }];
845
+ }
846
+ return null;
847
+ }
848
+
849
+ function getMemberChain(node) {
850
+ if (node.type === 'Identifier') return node.name;
851
+ if (node.type === 'MemberExpression') {
852
+ const obj = getMemberChain(node.object);
853
+ const prop = node.property.name || node.property.value || '';
854
+ return `${obj}.${prop}`;
855
+ }
856
+ return '';
857
+ }
858
+
859
+ function extractLiteralArg(args) {
860
+ if (!args || args.length === 0) return '';
861
+ const first = args[0];
862
+ if (first.type === 'Literal' && typeof first.value === 'string') return first.value;
863
+ if (first.type === 'TemplateLiteral' && first.quasis.length === 1) return first.quasis[0].value.raw;
864
+ return '';
865
+ }
866
+
867
+ function resolveLocal(fileDir, spec, packagePath) {
868
+ const abs = path.resolve(fileDir, spec);
869
+ if (isFileExists(abs)) return toRel(abs, packagePath);
870
+ if (isFileExists(abs + '.js')) return toRel(abs + '.js', packagePath);
871
+ if (isFileExists(path.join(abs, 'index.js'))) return toRel(path.join(abs, 'index.js'), packagePath);
872
+ return null;
873
+ }
874
+
875
+ function isFileExists(p) {
876
+ try { return fs.statSync(p).isFile(); } catch { return false; }
877
+ }
878
+
879
+ function toRel(abs, packagePath) {
880
+ return path.relative(packagePath, abs).replace(/\\/g, '/');
881
+ }
882
+
883
+ module.exports = { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows };