muaddib-scanner 2.6.0 → 2.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,14 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
3
  const acorn = require('acorn');
4
- const { findFiles, EXCLUDED_DIRS } = require('../utils');
4
+ const { findFiles, EXCLUDED_DIRS, debugLog } = require('../utils');
5
5
  const { ACORN_OPTIONS: BASE_ACORN_OPTIONS, safeParse } = require('../shared/constants.js');
6
6
 
7
+ // --- Bounded path limits ---
8
+ const MAX_GRAPH_NODES = 50; // Max files in dependency graph
9
+ const MAX_GRAPH_EDGES = 200; // Max total import edges
10
+ const MAX_FLOWS = 20; // Max cross-file flow findings per package
11
+
7
12
  // --- Sensitive source patterns ---
8
13
  const SENSITIVE_MODULES = new Set(['fs', 'child_process', 'dns', 'os', 'dgram']);
9
14
 
@@ -18,6 +23,7 @@ const SINK_CALLEE_NAMES = new Set(['fetch', 'eval', 'Function', 'WebSocket', 'XM
18
23
  const SINK_MEMBER_METHODS = new Set([
19
24
  'https.request', 'https.get', 'http.request', 'http.get',
20
25
  'child_process.exec', 'child_process.execSync', 'child_process.spawn',
26
+ 'dns.resolveTxt', 'dns.resolve', 'dns.resolve4', 'dns.resolve6',
21
27
  ]);
22
28
  const SINK_INSTANCE_METHODS = new Set(['connect', 'write', 'send']);
23
29
 
@@ -35,9 +41,23 @@ function buildModuleGraph(packagePath) {
35
41
  extensions: ['.js', '.mjs', '.cjs'],
36
42
  excludedDirs: EXCLUDED_DIRS,
37
43
  });
44
+
45
+ // Bounded path: skip module graph for very large packages
46
+ if (files.length > MAX_GRAPH_NODES) {
47
+ debugLog(`[MODULE-GRAPH] Skipping: ${files.length} files exceeds MAX_GRAPH_NODES (${MAX_GRAPH_NODES})`);
48
+ return graph;
49
+ }
50
+
51
+ let totalEdges = 0;
38
52
  for (const absFile of files) {
39
53
  const relFile = toRel(absFile, packagePath);
40
54
  const imports = extractLocalImports(absFile, packagePath);
55
+ totalEdges += imports.length;
56
+ if (totalEdges > MAX_GRAPH_EDGES) {
57
+ debugLog(`[MODULE-GRAPH] Edge limit reached (${totalEdges} > ${MAX_GRAPH_EDGES}), returning partial graph`);
58
+ graph[relFile] = imports;
59
+ break;
60
+ }
41
61
  graph[relFile] = imports;
42
62
  }
43
63
  return graph;
@@ -424,11 +444,11 @@ function checkNodeTaint(node, moduleVars) {
424
444
 
425
445
  function describeSensitiveCall(mod, method, args) {
426
446
  const detail = extractLiteralArg(args);
427
- if (mod === 'fs' && (method === 'readFileSync' || method === 'readFile')) {
447
+ if (mod === 'fs' && (method === 'readFileSync' || method === 'readFile' || method === 'createReadStream')) {
428
448
  return { source: `fs.${method}`, detail };
429
449
  }
430
- if (mod === 'os' && method === 'homedir') {
431
- return { source: 'os.homedir', detail: '' };
450
+ if (mod === 'os' && (method === 'homedir' || method === 'hostname' || method === 'userInfo' || method === 'networkInterfaces')) {
451
+ return { source: `os.${method}`, detail: '' };
432
452
  }
433
453
  if (mod === 'child_process' && (method === 'exec' || method === 'execSync' || method === 'spawn')) {
434
454
  return { source: `child_process.${method}`, detail };
@@ -488,7 +508,13 @@ function scanBodyForTaint(body, moduleVars, taintedVars) {
488
508
  * network/exec sink in another module.
489
509
  * Max 2 levels of re-export (A → B → C).
490
510
  */
491
- function detectCrossFileFlows(graph, taintedExports, packagePath) {
511
+ function detectCrossFileFlows(graph, taintedExports, sinkExportsOrPath, packagePath) {
512
+ // Backward compat: old callers pass (graph, tainted, path) — 3 args
513
+ let sinkExports = sinkExportsOrPath;
514
+ if (typeof sinkExportsOrPath === 'string') {
515
+ packagePath = sinkExportsOrPath;
516
+ sinkExports = null;
517
+ }
492
518
  // Expand taint through re-exports (max 2 levels)
493
519
  const expandedTaint = expandTaintThroughReexports(graph, taintedExports, packagePath);
494
520
 
@@ -499,6 +525,18 @@ function detectCrossFileFlows(graph, taintedExports, packagePath) {
499
525
  const ast = parseFile(absFile);
500
526
  if (!ast) continue;
501
527
 
528
+ // Pipe chain cross-file flows (runs before localTaint check — doesn't need localTaint)
529
+ if (sinkExports && flows.length < MAX_FLOWS) {
530
+ const pipeFlows = findPipeChainCrossFileFlows(ast, relFile, graph, expandedTaint, sinkExports, packagePath);
531
+ for (const flow of pipeFlows) {
532
+ if (flows.length >= MAX_FLOWS) break;
533
+ const key = `${flow.sourceFile}→${flow.sinkFile}`;
534
+ if (!flows.some(f => `${f.sourceFile}→${f.sinkFile}` === key)) {
535
+ flows.push(flow);
536
+ }
537
+ }
538
+ }
539
+
502
540
  // Find which local variables are tainted via imports
503
541
  const localTaint = collectImportTaint(ast, relFile, graph, expandedTaint, packagePath);
504
542
  if (Object.keys(localTaint).length === 0) continue;
@@ -506,9 +544,10 @@ function detectCrossFileFlows(graph, taintedExports, packagePath) {
506
544
  // Propagate taint through local variable assignments (e.g., const data = read())
507
545
  propagateLocalTaint(ast, localTaint);
508
546
 
509
- // Find sinks that use tainted variables
547
+ // Find sinks that use tainted variables (direct sink calls at call site)
510
548
  const sinks = findSinksUsingTainted(ast, localTaint);
511
549
  for (const sink of sinks) {
550
+ if (flows.length >= MAX_FLOWS) break;
512
551
  const taintInfo = localTaint[sink.taintedVar];
513
552
  flows.push({
514
553
  severity: 'CRITICAL',
@@ -520,6 +559,24 @@ function detectCrossFileFlows(graph, taintedExports, packagePath) {
520
559
  description: `Credential read in ${taintInfo.sourceFile} exported and sent to network in ${relFile}`,
521
560
  });
522
561
  }
562
+
563
+ // Find imported methods that internally contain sinks and receive tainted args
564
+ if (sinkExports && flows.length < MAX_FLOWS) {
565
+ const importedSinkFlows = findImportedSinkMethodCalls(ast, localTaint, relFile, graph, sinkExports, packagePath);
566
+ for (const flow of importedSinkFlows) {
567
+ if (flows.length >= MAX_FLOWS) break;
568
+ // Deduplicate: skip if same sourceFile→sinkFile already found
569
+ const key = `${flow.sourceFile}→${flow.sinkFile}`;
570
+ if (!flows.some(f => `${f.sourceFile}→${f.sinkFile}` === key)) {
571
+ flows.push(flow);
572
+ }
573
+ }
574
+ }
575
+
576
+ if (flows.length >= MAX_FLOWS) {
577
+ debugLog(`[MODULE-GRAPH] Flow limit reached (${MAX_FLOWS}), returning partial results`);
578
+ break;
579
+ }
523
580
  }
524
581
 
525
582
  return flows;
@@ -865,6 +922,67 @@ function collectImportTaint(ast, currentFile, graph, taintedExports, packagePath
865
922
  }
866
923
  });
867
924
 
925
+ // Resolve this.X = new Y() in class constructors, then this.X.method() in methods
926
+ walkAST(ast, (node) => {
927
+ if (node.type !== 'ClassDeclaration' && node.type !== 'ClassExpression') return;
928
+ if (!node.body || node.body.type !== 'ClassBody') return;
929
+
930
+ // Phase 1: scan constructor for this.X = new Y() assignments
931
+ const thisRefs = {}; // propName → __module__ ref
932
+ for (const method of node.body.body) {
933
+ if (method.type !== 'MethodDefinition' || method.kind !== 'constructor') continue;
934
+ const ctorBody = method.value && method.value.body;
935
+ if (!ctorBody) continue;
936
+ walkAST(ctorBody, (n) => {
937
+ // this.reader = new Reader(...)
938
+ if (n.type === 'ExpressionStatement' && n.expression.type === 'AssignmentExpression' &&
939
+ n.expression.left.type === 'MemberExpression' &&
940
+ n.expression.left.object.type === 'ThisExpression' &&
941
+ n.expression.right.type === 'NewExpression' &&
942
+ n.expression.right.callee.type === 'Identifier') {
943
+ const prop = n.expression.left.property.name || n.expression.left.property.value;
944
+ const className = n.expression.right.callee.name;
945
+ const modRef = localTaint['__module__' + className];
946
+ if (prop && modRef) {
947
+ thisRefs[prop] = modRef;
948
+ }
949
+ }
950
+ });
951
+ }
952
+
953
+ if (Object.keys(thisRefs).length === 0) return;
954
+
955
+ // Phase 2: scan all methods for this.X.method() calls that return tainted data
956
+ for (const method of node.body.body) {
957
+ if (method.type !== 'MethodDefinition' || method.kind === 'constructor') continue;
958
+ const methodBody = method.value && method.value.body;
959
+ if (!methodBody) continue;
960
+ walkAST(methodBody, (n) => {
961
+ if (n.type !== 'VariableDeclaration') return;
962
+ for (const decl of n.declarations) {
963
+ if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
964
+ // const data = this.reader.readAll()
965
+ if (decl.init.type === 'CallExpression' &&
966
+ decl.init.callee.type === 'MemberExpression' &&
967
+ decl.init.callee.object.type === 'MemberExpression' &&
968
+ decl.init.callee.object.object.type === 'ThisExpression') {
969
+ const thisProp = decl.init.callee.object.property.name || decl.init.callee.object.property.value;
970
+ const methodName = decl.init.callee.property.name || decl.init.callee.property.value;
971
+ const modRef = thisRefs[thisProp];
972
+ if (modRef && methodName && modRef.modTaint[methodName] && modRef.modTaint[methodName].tainted) {
973
+ const t = modRef.modTaint[methodName];
974
+ localTaint[decl.id.name] = {
975
+ source: t.source,
976
+ detail: t.detail || '',
977
+ sourceFile: t.sourceFile || modRef.resolved,
978
+ };
979
+ }
980
+ }
981
+ }
982
+ });
983
+ }
984
+ });
985
+
868
986
  // Clean up internal markers
869
987
  for (const key of Object.keys(localTaint)) {
870
988
  if (key.startsWith('__module__')) delete localTaint[key];
@@ -884,18 +1002,78 @@ function findSinksUsingTainted(ast, localTaint) {
884
1002
  if (node.type !== 'CallExpression') return;
885
1003
 
886
1004
  const sinkName = getSinkName(node);
887
- if (!sinkName) return;
1005
+ if (sinkName) {
1006
+ // Check if any argument references a tainted variable
1007
+ const taintedArg = findTaintedArgument(node.arguments, taintedNames);
1008
+ if (taintedArg) {
1009
+ sinks.push({ sink: sinkName, taintedVar: taintedArg });
1010
+ }
1011
+ }
888
1012
 
889
- // Check if any argument references a tainted variable
890
- const taintedArg = findTaintedArgument(node.arguments, taintedNames);
891
- if (taintedArg) {
892
- sinks.push({ sink: sinkName, taintedVar: taintedArg });
1013
+ // Pipe chain detection: tainted.pipe(transform).pipe(networkSink)
1014
+ // .pipe() returns the destination, so chains propagate taint.
1015
+ // Walk up to MAX_PIPE_DEPTH steps.
1016
+ if (node.callee && node.callee.type === 'MemberExpression') {
1017
+ const method = node.callee.property.name || node.callee.property.value;
1018
+ if (method === 'pipe') {
1019
+ const pipeSource = resolvePipeChainSource(node, taintedNames, 0);
1020
+ if (pipeSource) {
1021
+ // The final .pipe() destination — check if it's a known sink
1022
+ const destArg = node.arguments && node.arguments[0];
1023
+ if (destArg) {
1024
+ const destSink = getArgSinkName(destArg);
1025
+ if (destSink) {
1026
+ sinks.push({ sink: destSink, taintedVar: pipeSource });
1027
+ }
1028
+ }
1029
+ }
1030
+ }
893
1031
  }
894
1032
  });
895
1033
 
896
1034
  return sinks;
897
1035
  }
898
1036
 
1037
+ const MAX_PIPE_DEPTH = 5;
1038
+
1039
+ /**
1040
+ * Walk a .pipe() chain leftward to find the tainted source variable.
1041
+ * tainted.pipe(a).pipe(b) → check tainted, recurse through .pipe(a)
1042
+ */
1043
+ function resolvePipeChainSource(pipeCallNode, taintedNames, depth) {
1044
+ if (depth > MAX_PIPE_DEPTH) return null;
1045
+ const obj = pipeCallNode.callee && pipeCallNode.callee.object;
1046
+ if (!obj) return null;
1047
+
1048
+ // Base case: taintedVar.pipe(...)
1049
+ if (obj.type === 'Identifier' && taintedNames.has(obj.name)) {
1050
+ return obj.name;
1051
+ }
1052
+
1053
+ // Recursive case: something.pipe(...).pipe(...)
1054
+ if (obj.type === 'CallExpression' && obj.callee && obj.callee.type === 'MemberExpression') {
1055
+ const method = obj.callee.property.name || obj.callee.property.value;
1056
+ if (method === 'pipe') {
1057
+ return resolvePipeChainSource(obj, taintedNames, depth + 1);
1058
+ }
1059
+ }
1060
+
1061
+ return null;
1062
+ }
1063
+
1064
+ /**
1065
+ * Check if a .pipe() destination argument is a network sink.
1066
+ * E.g., net.connect(...), http.request(...)
1067
+ */
1068
+ function getArgSinkName(argNode) {
1069
+ // net.connect(...) as pipe destination
1070
+ if (argNode.type === 'CallExpression') {
1071
+ return getSinkName(argNode);
1072
+ }
1073
+ // Direct identifier that is a known sink instance (less common)
1074
+ return null;
1075
+ }
1076
+
899
1077
  function getSinkName(callNode) {
900
1078
  const callee = callNode.callee;
901
1079
 
@@ -972,6 +1150,304 @@ function findTaintedInExpr(node, taintedNames) {
972
1150
  return null;
973
1151
  }
974
1152
 
1153
+ /**
1154
+ * Find calls to imported methods whose bodies contain sinks, receiving tainted args.
1155
+ * Pattern: reporter.report(taintedData) where report() internally calls https.request().
1156
+ */
1157
+ function findImportedSinkMethodCalls(ast, localTaint, relFile, graph, sinkExports, packagePath) {
1158
+ const flows = [];
1159
+ const taintedNames = new Set(Object.keys(localTaint));
1160
+ const fileDir = path.dirname(path.resolve(packagePath, relFile));
1161
+
1162
+ // Build map: varName → resolved module path (for require-based imports)
1163
+ const moduleRefs = {};
1164
+ walkAST(ast, (node) => {
1165
+ if (node.type !== 'VariableDeclaration') return;
1166
+ for (const decl of node.declarations) {
1167
+ if (!decl.init || !decl.id) continue;
1168
+ // const reporter = require('./reporter')
1169
+ if (decl.id.type === 'Identifier' && isRequireCall(decl.init) && isLocalImport(decl.init.arguments[0].value)) {
1170
+ const resolved = resolveLocal(fileDir, decl.init.arguments[0].value, packagePath);
1171
+ if (resolved) moduleRefs[decl.id.name] = resolved;
1172
+ }
1173
+ // const r = new Reporter() where Reporter was required above
1174
+ if (decl.id.type === 'Identifier' && decl.init.type === 'NewExpression' && decl.init.callee.type === 'Identifier') {
1175
+ const ctorRef = moduleRefs[decl.init.callee.name];
1176
+ if (ctorRef) moduleRefs[decl.id.name] = ctorRef;
1177
+ }
1178
+ }
1179
+ });
1180
+
1181
+ // Also handle ESM: import reporter from './reporter'
1182
+ for (const node of ast.body) {
1183
+ if (node.type === 'ImportDeclaration' && node.source && typeof node.source.value === 'string') {
1184
+ if (isLocalImport(node.source.value)) {
1185
+ const resolved = resolveLocal(fileDir, node.source.value, packagePath);
1186
+ if (!resolved) continue;
1187
+ for (const spec of node.specifiers) {
1188
+ moduleRefs[spec.local.name] = resolved;
1189
+ }
1190
+ }
1191
+ }
1192
+ }
1193
+
1194
+ // Resolve this.X = new Y() in class constructors → thisRefs map
1195
+ const thisRefs = {}; // propName → resolved module path
1196
+ walkAST(ast, (node) => {
1197
+ if (node.type !== 'ClassDeclaration' && node.type !== 'ClassExpression') return;
1198
+ if (!node.body || node.body.type !== 'ClassBody') return;
1199
+ for (const method of node.body.body) {
1200
+ if (method.type !== 'MethodDefinition' || method.kind !== 'constructor') continue;
1201
+ const ctorBody = method.value && method.value.body;
1202
+ if (!ctorBody) continue;
1203
+ walkAST(ctorBody, (n) => {
1204
+ if (n.type === 'ExpressionStatement' && n.expression.type === 'AssignmentExpression' &&
1205
+ n.expression.left.type === 'MemberExpression' &&
1206
+ n.expression.left.object.type === 'ThisExpression' &&
1207
+ n.expression.right.type === 'NewExpression' &&
1208
+ n.expression.right.callee.type === 'Identifier') {
1209
+ const prop = n.expression.left.property.name || n.expression.left.property.value;
1210
+ const className = n.expression.right.callee.name;
1211
+ const resolved = moduleRefs[className];
1212
+ if (prop && resolved) {
1213
+ thisRefs[prop] = resolved;
1214
+ }
1215
+ }
1216
+ });
1217
+ }
1218
+ });
1219
+
1220
+ // Find obj.method(taintedArg) where obj's module has sink exports
1221
+ walkAST(ast, (node) => {
1222
+ if (node.type !== 'CallExpression') return;
1223
+ const callee = node.callee;
1224
+
1225
+ // Pattern: obj.method(taintedArg)
1226
+ if (callee.type === 'MemberExpression' && callee.object.type === 'Identifier') {
1227
+ const objName = callee.object.name;
1228
+ const methodName = callee.property.name || callee.property.value;
1229
+ const resolved = moduleRefs[objName];
1230
+ if (!resolved || !methodName) return;
1231
+
1232
+ // Check if this method is a known sink export
1233
+ const moduleSinks = sinkExports[resolved];
1234
+ if (!moduleSinks) return;
1235
+ const sinkInfo = moduleSinks[methodName] || moduleSinks['default'];
1236
+ if (!sinkInfo || !sinkInfo.hasSink) return;
1237
+
1238
+ // Check if any argument is tainted
1239
+ const taintedArg = findTaintedArgument(node.arguments, taintedNames);
1240
+ if (!taintedArg) return;
1241
+
1242
+ const taintInfo = localTaint[taintedArg];
1243
+ if (!taintInfo) return;
1244
+
1245
+ flows.push({
1246
+ severity: 'CRITICAL',
1247
+ type: 'cross_file_dataflow',
1248
+ sourceFile: taintInfo.sourceFile,
1249
+ source: `${taintInfo.source}${taintInfo.detail ? '(' + taintInfo.detail + ')' : ''}`,
1250
+ sinkFile: relFile,
1251
+ sink: sinkInfo.sink,
1252
+ description: `Credential read in ${taintInfo.sourceFile} flows to imported sink method ${objName}.${methodName}() (${sinkInfo.sink}) in ${relFile}`,
1253
+ });
1254
+ }
1255
+
1256
+ // Pattern: this.transport.report(taintedArg) where transport's module has sink exports
1257
+ if (callee.type === 'MemberExpression' &&
1258
+ callee.object.type === 'MemberExpression' &&
1259
+ callee.object.object.type === 'ThisExpression') {
1260
+ const thisProp = callee.object.property.name || callee.object.property.value;
1261
+ const methodName = callee.property.name || callee.property.value;
1262
+ const resolved = thisRefs[thisProp];
1263
+ if (resolved && methodName) {
1264
+ const moduleSinks = sinkExports[resolved];
1265
+ if (moduleSinks) {
1266
+ const sinkInfo = moduleSinks[methodName] || moduleSinks['default'];
1267
+ if (sinkInfo && sinkInfo.hasSink) {
1268
+ const taintedArg = findTaintedArgument(node.arguments, taintedNames);
1269
+ if (taintedArg) {
1270
+ const taintInfo = localTaint[taintedArg];
1271
+ if (taintInfo) {
1272
+ flows.push({
1273
+ severity: 'CRITICAL',
1274
+ type: 'cross_file_dataflow',
1275
+ sourceFile: taintInfo.sourceFile,
1276
+ source: `${taintInfo.source}${taintInfo.detail ? '(' + taintInfo.detail + ')' : ''}`,
1277
+ sinkFile: relFile,
1278
+ sink: sinkInfo.sink,
1279
+ description: `Credential read in ${taintInfo.sourceFile} flows via this.${thisProp}.${methodName}() (${sinkInfo.sink}) in ${relFile}`,
1280
+ });
1281
+ }
1282
+ }
1283
+ }
1284
+ }
1285
+ }
1286
+ }
1287
+
1288
+ // Pattern: sinkFn(taintedArg) where sinkFn is a direct import of a sink function
1289
+ if (callee.type === 'Identifier') {
1290
+ const resolved = moduleRefs[callee.name];
1291
+ if (!resolved) return;
1292
+ const moduleSinks = sinkExports[resolved];
1293
+ if (!moduleSinks) return;
1294
+ const sinkInfo = moduleSinks['default'] || moduleSinks[callee.name];
1295
+ if (!sinkInfo || !sinkInfo.hasSink) return;
1296
+
1297
+ const taintedArg = findTaintedArgument(node.arguments, taintedNames);
1298
+ if (!taintedArg) return;
1299
+
1300
+ const taintInfo = localTaint[taintedArg];
1301
+ if (!taintInfo) return;
1302
+
1303
+ flows.push({
1304
+ severity: 'CRITICAL',
1305
+ type: 'cross_file_dataflow',
1306
+ sourceFile: taintInfo.sourceFile,
1307
+ source: `${taintInfo.source}${taintInfo.detail ? '(' + taintInfo.detail + ')' : ''}`,
1308
+ sinkFile: relFile,
1309
+ sink: sinkInfo.sink,
1310
+ description: `Credential read in ${taintInfo.sourceFile} flows to imported sink function ${callee.name}() (${sinkInfo.sink}) in ${relFile}`,
1311
+ });
1312
+ }
1313
+ });
1314
+
1315
+ return flows;
1316
+ }
1317
+
1318
+ /**
1319
+ * Detect cross-file flows through .pipe() chains involving imported module instances.
1320
+ * Pattern: reader.stream().pipe(transform).pipe(sink.createWritable())
1321
+ * where reader and sink are instances of imported module classes.
1322
+ */
1323
+ function findPipeChainCrossFileFlows(ast, relFile, graph, taintedExports, sinkExports, packagePath) {
1324
+ const flows = [];
1325
+ const fileDir = path.dirname(path.resolve(packagePath, relFile));
1326
+
1327
+ // Build moduleRefs: varName → resolved module path
1328
+ const moduleRefs = {};
1329
+ walkAST(ast, (node) => {
1330
+ if (node.type !== 'VariableDeclaration') return;
1331
+ for (const decl of node.declarations) {
1332
+ if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
1333
+ if (isRequireCall(decl.init) && isLocalImport(decl.init.arguments[0].value)) {
1334
+ const resolved = resolveLocal(fileDir, decl.init.arguments[0].value, packagePath);
1335
+ if (resolved) moduleRefs[decl.id.name] = resolved;
1336
+ }
1337
+ if (decl.init.type === 'NewExpression' && decl.init.callee.type === 'Identifier') {
1338
+ const ctorRef = moduleRefs[decl.init.callee.name];
1339
+ if (ctorRef) moduleRefs[decl.id.name] = ctorRef;
1340
+ }
1341
+ }
1342
+ });
1343
+
1344
+ // Walk AST for .pipe() chains
1345
+ walkAST(ast, (node) => {
1346
+ if (node.type !== 'CallExpression') return;
1347
+ if (!node.callee || node.callee.type !== 'MemberExpression') return;
1348
+ const method = node.callee.property.name || node.callee.property.value;
1349
+ if (method !== 'pipe') return;
1350
+
1351
+ // Walk leftward to find the source of the pipe chain
1352
+ let sourceInfo = null;
1353
+ let current = node;
1354
+ let depth = 0;
1355
+ while (current && depth < MAX_PIPE_DEPTH) {
1356
+ const obj = current.callee && current.callee.object;
1357
+ if (!obj) break;
1358
+
1359
+ if (obj.type === 'CallExpression' && obj.callee && obj.callee.type === 'MemberExpression') {
1360
+ const innerMethod = obj.callee.property.name || obj.callee.property.value;
1361
+ // Another .pipe() (intermediate step) — keep walking regardless of sub-object type
1362
+ if (innerMethod === 'pipe') {
1363
+ current = obj;
1364
+ depth++;
1365
+ continue;
1366
+ }
1367
+ // Non-pipe method call on an Identifier: instance.method() from a tainted module
1368
+ if (obj.callee.object.type === 'Identifier') {
1369
+ const objName = obj.callee.object.name;
1370
+ const resolved = moduleRefs[objName];
1371
+ if (resolved && taintedExports[resolved]) {
1372
+ const modTaint = taintedExports[resolved];
1373
+ if (modTaint[innerMethod] && modTaint[innerMethod].tainted) {
1374
+ const t = modTaint[innerMethod];
1375
+ sourceInfo = {
1376
+ source: `${t.source}${t.detail ? '(' + t.detail + ')' : ''}`,
1377
+ sourceFile: t.sourceFile || resolved,
1378
+ };
1379
+ }
1380
+ }
1381
+ }
1382
+ }
1383
+ break;
1384
+ }
1385
+
1386
+ if (!sourceInfo) return;
1387
+
1388
+ // Check ALL pipe destinations in the chain (walk outward)
1389
+ // For the current (outermost) .pipe(), check its argument
1390
+ const checkPipeDest = (pipeNode) => {
1391
+ const destArg = pipeNode.arguments && pipeNode.arguments[0];
1392
+ if (!destArg) return null;
1393
+
1394
+ // Direct sink: net.connect(), https.request()
1395
+ const directSink = getSinkName(destArg);
1396
+ if (directSink) return { sink: directSink, sinkFile: relFile };
1397
+
1398
+ // Module method sink: sink.createWritable() where sink module has sink exports
1399
+ if (destArg.type === 'CallExpression' && destArg.callee && destArg.callee.type === 'MemberExpression' &&
1400
+ destArg.callee.object.type === 'Identifier') {
1401
+ const destObj = destArg.callee.object.name;
1402
+ const destMethod = destArg.callee.property.name || destArg.callee.property.value;
1403
+ const destResolved = moduleRefs[destObj];
1404
+ if (destResolved && sinkExports[destResolved]) {
1405
+ const sInfo = sinkExports[destResolved][destMethod] || sinkExports[destResolved]['default'];
1406
+ if (sInfo && sInfo.hasSink) {
1407
+ return { sink: sInfo.sink, sinkFile: destResolved };
1408
+ }
1409
+ }
1410
+ }
1411
+ return null;
1412
+ };
1413
+
1414
+ // Check the outermost .pipe() destination
1415
+ let sinkResult = checkPipeDest(node);
1416
+
1417
+ // Also walk inward through intermediate .pipe() steps to check their destinations
1418
+ if (!sinkResult) {
1419
+ let inner = node.callee && node.callee.object;
1420
+ let d = 0;
1421
+ while (inner && d < MAX_PIPE_DEPTH && !sinkResult) {
1422
+ if (inner.type === 'CallExpression' && inner.callee && inner.callee.type === 'MemberExpression') {
1423
+ const m = inner.callee.property.name || inner.callee.property.value;
1424
+ if (m === 'pipe') {
1425
+ sinkResult = checkPipeDest(inner);
1426
+ inner = inner.callee.object;
1427
+ d++;
1428
+ continue;
1429
+ }
1430
+ }
1431
+ break;
1432
+ }
1433
+ }
1434
+
1435
+ if (sinkResult) {
1436
+ flows.push({
1437
+ severity: 'CRITICAL',
1438
+ type: 'cross_file_dataflow',
1439
+ sourceFile: sourceInfo.sourceFile,
1440
+ source: sourceInfo.source,
1441
+ sinkFile: relFile,
1442
+ sink: sinkResult.sink,
1443
+ description: `${sourceInfo.source} in ${sourceInfo.sourceFile} piped to ${sinkResult.sink} in ${relFile}`,
1444
+ });
1445
+ }
1446
+ });
1447
+
1448
+ return flows;
1449
+ }
1450
+
975
1451
  // =============================================================================
976
1452
  // Shared helpers
977
1453
  // =============================================================================
@@ -1139,7 +1615,7 @@ function analyzeSinkExports(filePath) {
1139
1615
  if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
1140
1616
  if (isRequireCall(decl.init)) {
1141
1617
  const mod = decl.init.arguments[0].value;
1142
- if (mod === 'http' || mod === 'https' || mod === 'net' || mod === 'dgram') {
1618
+ if (mod === 'http' || mod === 'https' || mod === 'net' || mod === 'dgram' || mod === 'dns') {
1143
1619
  sinkModuleVars[decl.id.name] = mod;
1144
1620
  }
1145
1621
  }
@@ -1147,7 +1623,7 @@ function analyzeSinkExports(filePath) {
1147
1623
  }
1148
1624
  if (node.type === 'ImportDeclaration' && node.source && typeof node.source.value === 'string') {
1149
1625
  const mod = node.source.value;
1150
- if (mod === 'http' || mod === 'https' || mod === 'net' || mod === 'dgram') {
1626
+ if (mod === 'http' || mod === 'https' || mod === 'net' || mod === 'dgram' || mod === 'dns') {
1151
1627
  for (const spec of node.specifiers) {
1152
1628
  sinkModuleVars[spec.local.name] = mod;
1153
1629
  }
@@ -1175,7 +1651,7 @@ function analyzeSinkExports(filePath) {
1175
1651
  // Variable-based: const h = require('https'); h.request()
1176
1652
  if (node.callee.object.type === 'Identifier' && sinkModuleVars[node.callee.object.name]) {
1177
1653
  const method = node.callee.property.name || node.callee.property.value;
1178
- if (method === 'request' || method === 'get') {
1654
+ if (method === 'request' || method === 'get' || method === 'resolveTxt' || method === 'resolve' || method === 'resolve4' || method === 'resolve6') {
1179
1655
  found = sinkModuleVars[node.callee.object.name] + '.' + method + '()';
1180
1656
  return;
1181
1657
  }
@@ -1215,6 +1691,32 @@ function analyzeSinkExports(filePath) {
1215
1691
  }
1216
1692
  }
1217
1693
  }
1694
+ } else if ((value.type === 'ClassExpression' || value.type === 'Identifier') && exportName === 'default') {
1695
+ // module.exports = ClassName — resolve class from local declarations
1696
+ let classNode = value;
1697
+ if (value.type === 'Identifier') {
1698
+ // Find class declaration in AST
1699
+ walkAST(ast, (n) => {
1700
+ if (n.type === 'ClassDeclaration' && n.id && n.id.name === value.name) {
1701
+ classNode = n;
1702
+ }
1703
+ });
1704
+ }
1705
+ if (classNode.body && classNode.body.type === 'ClassBody') {
1706
+ for (const method of classNode.body.body) {
1707
+ if (method.type !== 'MethodDefinition' || method.kind === 'constructor') continue;
1708
+ const methodName = method.key && (method.key.name || method.key.value);
1709
+ const funcBody = method.value && method.value.body;
1710
+ if (!methodName || !funcBody) continue;
1711
+ const body = funcBody.type === 'BlockStatement' ? funcBody.body : null;
1712
+ if (body) {
1713
+ const sink = bodyHasSink(body);
1714
+ if (sink) {
1715
+ sinkExports[methodName] = { hasSink: true, sink };
1716
+ }
1717
+ }
1718
+ }
1719
+ }
1218
1720
  } else {
1219
1721
  const funcBody = getFunctionBody(value);
1220
1722
  if (funcBody) {
@@ -1370,9 +1872,217 @@ function detectCallbackCrossFileFlows(graph, taintedExports, sinkExports, packag
1370
1872
  return flows;
1371
1873
  }
1372
1874
 
1875
+ // =============================================================================
1876
+ // STEP 5 — EventEmitter cross-module detection
1877
+ // =============================================================================
1878
+
1879
+ // Standard Node.js event names that are NOT indicative of malicious intent
1880
+ const BENIGN_EVENT_NAMES = new Set([
1881
+ 'error', 'end', 'close', 'data', 'finish', 'readable', 'drain',
1882
+ 'connect', 'listening', 'message', 'timeout', 'response', 'request',
1883
+ 'open', 'pause', 'resume', 'pipe', 'unpipe', 'exit', 'disconnect',
1884
+ ]);
1885
+
1886
+ const MAX_EMITTER_FLOWS = 2; // Cap per package to prevent explosion on event-heavy libs
1887
+
1888
+ /**
1889
+ * Detect cross-file EventEmitter flows.
1890
+ * Pattern: file A does emitter.emit('event', taintedData),
1891
+ * file B does emitter.on('event', (data) => networkSink(data))
1892
+ * where the emitter is shared via a common imported module.
1893
+ */
1894
+ function detectEventEmitterFlows(graph, taintedExports, sinkExports, packagePath) {
1895
+ const expandedTaint = expandTaintThroughReexports(graph, taintedExports, packagePath);
1896
+ const flows = [];
1897
+
1898
+ // Phase 1: collect all emit() and on() calls across files
1899
+ const emitCalls = []; // { file, eventName, taintedSource, emitterVar }
1900
+ const onCalls = []; // { file, eventName, hasSink, sinkName, emitterVar }
1901
+
1902
+ for (const relFile of Object.keys(graph)) {
1903
+ const absFile = path.resolve(packagePath, relFile);
1904
+ const ast = parseFile(absFile);
1905
+ if (!ast) continue;
1906
+
1907
+ // Build taint map for this file (imports + local sources)
1908
+ const localTaint = collectImportTaint(ast, relFile, graph, expandedTaint, packagePath);
1909
+
1910
+ // Also detect local taint sources (process.env, fs.readFileSync, os.homedir)
1911
+ // collectImportTaint only handles cross-file imports; we need intra-file sources too
1912
+ const moduleVars = {};
1913
+ walkAST(ast, (n) => {
1914
+ if (n.type === 'VariableDeclaration') {
1915
+ for (const decl of n.declarations) {
1916
+ if (!decl.init || !decl.id || decl.id.type !== 'Identifier') continue;
1917
+ if (isRequireCall(decl.init) && SENSITIVE_MODULES.has(decl.init.arguments[0].value)) {
1918
+ moduleVars[decl.id.name] = decl.init.arguments[0].value;
1919
+ }
1920
+ const t = checkNodeTaint(decl.init, moduleVars);
1921
+ if (t && !localTaint[decl.id.name]) {
1922
+ localTaint[decl.id.name] = { source: t.source, detail: t.detail || '', sourceFile: relFile };
1923
+ }
1924
+ // ObjectExpression: const metrics = { hostname: os.hostname(), ... }
1925
+ // If any property value is tainted, the whole object is tainted
1926
+ if (!t && !localTaint[decl.id.name] && decl.init.type === 'ObjectExpression') {
1927
+ for (const prop of decl.init.properties) {
1928
+ if (!prop.value) continue;
1929
+ const pt = checkNodeTaint(prop.value, moduleVars);
1930
+ if (pt) {
1931
+ localTaint[decl.id.name] = { source: pt.source, detail: pt.detail || '', sourceFile: relFile };
1932
+ break;
1933
+ }
1934
+ }
1935
+ }
1936
+ }
1937
+ }
1938
+ });
1939
+
1940
+ propagateLocalTaint(ast, localTaint);
1941
+ const taintedNames = new Set(Object.keys(localTaint));
1942
+
1943
+ // Collect class method bodies for resolving this.method() calls in handlers
1944
+ const classMethodBodies = Object.create(null);
1945
+ walkAST(ast, (n) => {
1946
+ if (n.type !== 'ClassDeclaration' && n.type !== 'ClassExpression') return;
1947
+ if (!n.body || n.body.type !== 'ClassBody') return;
1948
+ for (const member of n.body.body) {
1949
+ if (member.type !== 'MethodDefinition') continue;
1950
+ const name = member.key && (member.key.name || member.key.value);
1951
+ if (!name || name === 'constructor') continue;
1952
+ const body = member.value && member.value.body;
1953
+ if (body && body.type === 'BlockStatement') {
1954
+ if (!classMethodBodies[name]) classMethodBodies[name] = [];
1955
+ classMethodBodies[name].push(body.body);
1956
+ }
1957
+ }
1958
+ });
1959
+
1960
+ walkAST(ast, (node) => {
1961
+ if (node.type !== 'CallExpression' || !node.callee || node.callee.type !== 'MemberExpression') return;
1962
+ const method = node.callee.property.name || node.callee.property.value;
1963
+ if (!method) return;
1964
+
1965
+ const emitterVar = getEmitterVarName(node.callee.object);
1966
+
1967
+ // emitter.emit('eventName', data)
1968
+ if (method === 'emit' && node.arguments.length >= 2) {
1969
+ const eventNameNode = node.arguments[0];
1970
+ const eventName = (eventNameNode.type === 'Literal' && typeof eventNameNode.value === 'string') ? eventNameNode.value : null;
1971
+ if (!eventName || BENIGN_EVENT_NAMES.has(eventName)) return;
1972
+
1973
+ // Check if any data argument (2nd+) is tainted
1974
+ const dataArgs = node.arguments.slice(1);
1975
+ const taintedArg = findTaintedArgument(dataArgs, taintedNames);
1976
+ if (!taintedArg) return;
1977
+
1978
+ emitCalls.push({
1979
+ file: relFile,
1980
+ eventName,
1981
+ taintedSource: localTaint[taintedArg],
1982
+ emitterVar,
1983
+ });
1984
+ }
1985
+
1986
+ // emitter.on('eventName', handler) — check if handler has a network sink
1987
+ if ((method === 'on' || method === 'addListener' || method === 'once') && node.arguments.length >= 2) {
1988
+ const eventNameNode = node.arguments[0];
1989
+ const eventName = (eventNameNode.type === 'Literal' && typeof eventNameNode.value === 'string') ? eventNameNode.value : null;
1990
+ if (!eventName || BENIGN_EVENT_NAMES.has(eventName)) return;
1991
+
1992
+ const handler = node.arguments[1];
1993
+ if (handler.type !== 'FunctionExpression' && handler.type !== 'ArrowFunctionExpression') return;
1994
+
1995
+ // Check if handler body contains a network sink
1996
+ const handlerBody = handler.body.type === 'BlockStatement' ? handler.body.body : [handler.body];
1997
+ let sinkFound = null;
1998
+ walkAST({ type: 'Program', body: handlerBody }, (inner) => {
1999
+ if (sinkFound) return;
2000
+ if (inner.type === 'CallExpression') {
2001
+ const sName = getSinkName(inner);
2002
+ if (sName) sinkFound = sName;
2003
+ }
2004
+ });
2005
+
2006
+ // If no direct sink, check this.method() calls → resolve to class method bodies
2007
+ if (!sinkFound && Object.keys(classMethodBodies).length > 0) {
2008
+ walkAST({ type: 'Program', body: handlerBody }, (inner) => {
2009
+ if (sinkFound) return;
2010
+ if (inner.type === 'CallExpression' &&
2011
+ inner.callee.type === 'MemberExpression' &&
2012
+ inner.callee.object.type === 'ThisExpression') {
2013
+ const methodName = inner.callee.property.name || inner.callee.property.value;
2014
+ if (methodName && classMethodBodies[methodName]) {
2015
+ for (const methodBody of classMethodBodies[methodName]) {
2016
+ walkAST({ type: 'Program', body: methodBody }, (n2) => {
2017
+ if (sinkFound) return;
2018
+ if (n2.type === 'CallExpression') {
2019
+ const sName = getSinkName(n2);
2020
+ if (sName) sinkFound = sName;
2021
+ }
2022
+ });
2023
+ if (sinkFound) return;
2024
+ }
2025
+ }
2026
+ }
2027
+ });
2028
+ }
2029
+
2030
+ if (sinkFound) {
2031
+ onCalls.push({
2032
+ file: relFile,
2033
+ eventName,
2034
+ hasSink: true,
2035
+ sinkName: sinkFound,
2036
+ emitterVar,
2037
+ });
2038
+ }
2039
+ }
2040
+ });
2041
+ }
2042
+
2043
+ // Phase 2: match emit + on by event name (cross-file only)
2044
+ for (const emit of emitCalls) {
2045
+ if (flows.length >= MAX_EMITTER_FLOWS) break;
2046
+ for (const on of onCalls) {
2047
+ if (flows.length >= MAX_EMITTER_FLOWS) break;
2048
+ if (emit.eventName !== on.eventName) continue;
2049
+ if (emit.file === on.file) continue; // intra-file handled by dataflow scanner
2050
+
2051
+ // Dedup by event name
2052
+ if (flows.some(f => f.description.includes(emit.eventName))) continue;
2053
+
2054
+ const taintInfo = emit.taintedSource;
2055
+ flows.push({
2056
+ severity: 'CRITICAL',
2057
+ type: 'cross_file_dataflow',
2058
+ sourceFile: taintInfo.sourceFile || emit.file,
2059
+ source: `${taintInfo.source}${taintInfo.detail ? '(' + taintInfo.detail + ')' : ''}`,
2060
+ sinkFile: on.file,
2061
+ sink: on.sinkName,
2062
+ description: `Credential emitted via EventEmitter '${emit.eventName}' in ${emit.file} → handler with ${on.sinkName} in ${on.file}`,
2063
+ });
2064
+ }
2065
+ }
2066
+
2067
+ return flows;
2068
+ }
2069
+
2070
+ /**
2071
+ * Extract the variable name from an emitter expression.
2072
+ * Handles: emitter.emit(), this.emitter.emit(), bus.emit()
2073
+ */
2074
+ function getEmitterVarName(node) {
2075
+ if (node.type === 'Identifier') return node.name;
2076
+ if (node.type === 'MemberExpression' && node.object.type === 'ThisExpression') {
2077
+ return 'this.' + (node.property.name || node.property.value);
2078
+ }
2079
+ return null;
2080
+ }
2081
+
1373
2082
  module.exports = {
1374
2083
  buildModuleGraph, annotateTaintedExports, detectCrossFileFlows,
1375
- annotateSinkExports, detectCallbackCrossFileFlows,
2084
+ annotateSinkExports, detectCallbackCrossFileFlows, detectEventEmitterFlows,
1376
2085
  resolveLocal, extractLocalImports, parseFile, isLocalImport, toRel, isFileExists,
1377
- tryResolveConcatRequire
2086
+ tryResolveConcatRequire,
2087
+ MAX_GRAPH_NODES, MAX_GRAPH_EDGES, MAX_FLOWS
1378
2088
  };