circle-ir 3.6.0 → 3.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9211,9 +9211,7 @@ var DEFAULT_SINKS = [
9211
9211
  { method: "resolveURI", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9212
9212
  { method: "resolve", class: "SourceResolver", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9213
9213
  { method: "getSource", class: "SourceResolver", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9214
- // URL-based resource loading
9215
- { method: "URL", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [0] },
9216
- { method: "openStream", class: "URL", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [] },
9214
+ // NOTE: new URL(userInput) is SSRF (CWE-918), not path traversal — see ssrf section below
9217
9215
  // Servlet context resource loading
9218
9216
  { method: "getResource", class: "ServletContext", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9219
9217
  { method: "getResourceAsStream", class: "ServletContext", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
@@ -9250,8 +9248,7 @@ var DEFAULT_SINKS = [
9250
9248
  { method: "extract", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
9251
9249
  { method: "extractAll", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
9252
9250
  { method: "unjar", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
9253
- // Additional file constructors
9254
- { method: "BufferedReader", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9251
+ // Additional file constructors — BufferedReader(Reader) is NOT a path traversal sink; it wraps a Reader, not a file path
9255
9252
  { method: "PrintWriter", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9256
9253
  { method: "Scanner", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9257
9254
  // Topic/queue names (for message queue systems - can be exploited for path traversal)
@@ -9774,9 +9771,12 @@ var DEFAULT_SINKS = [
9774
9771
  { method: "execSync", class: "child_process", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
9775
9772
  { method: "spawn", class: "child_process", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
9776
9773
  { method: "spawnSync", class: "child_process", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
9777
- // Also match without receiver (destructured imports)
9774
+ // Also match without receiver (destructured imports: const { exec } = require('child_process'))
9778
9775
  { method: "exec", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9779
9776
  { method: "execSync", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9777
+ { method: "spawn", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9778
+ { method: "spawnSync", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9779
+ { method: "execFile", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9780
9780
  // Node.js File System (path traversal)
9781
9781
  { method: "readFile", class: "fs", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0] },
9782
9782
  { method: "readFileSync", class: "fs", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0] },
@@ -9819,7 +9819,9 @@ var DEFAULT_SINKS = [
9819
9819
  { method: "request", class: "axios", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9820
9820
  { method: "fetch", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9821
9821
  { method: "request", class: "http", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9822
+ { method: "get", class: "http", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9822
9823
  { method: "request", class: "https", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9824
+ { method: "get", class: "https", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9823
9825
  // needle library (used in NodeGoat)
9824
9826
  { method: "get", class: "needle", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9825
9827
  { method: "post", class: "needle", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
@@ -10239,6 +10241,21 @@ function getDefaultConfig() {
10239
10241
  }
10240
10242
 
10241
10243
  // src/analysis/taint-matcher.ts
10244
+ var PYTHON_TAINTED_PATTERNS = [
10245
+ { pattern: /\brequest\.args\b/, sourceType: "http_param" },
10246
+ { pattern: /\brequest\.form\b/, sourceType: "http_body" },
10247
+ { pattern: /\brequest\.json\b/, sourceType: "http_body" },
10248
+ { pattern: /\brequest\.data\b/, sourceType: "http_body" },
10249
+ { pattern: /\brequest\.files?\b/, sourceType: "file_input" },
10250
+ { pattern: /\brequest\.headers?\b/, sourceType: "http_header" },
10251
+ { pattern: /\brequest\.cookies\b/, sourceType: "http_cookie" },
10252
+ { pattern: /\brequest\.GET\b/, sourceType: "http_param" },
10253
+ { pattern: /\brequest\.POST\b/, sourceType: "http_body" },
10254
+ { pattern: /\brequest\.META\b/, sourceType: "http_header" },
10255
+ { pattern: /\brequest\.FILES\b/, sourceType: "file_input" },
10256
+ { pattern: /\brequest\.query_params\b/, sourceType: "http_param" },
10257
+ { pattern: /\brequest\.path_params\b/, sourceType: "http_param" }
10258
+ ];
10242
10259
  function analyzeTaint(calls, types, config = getDefaultConfig()) {
10243
10260
  const sources = findSources(calls, types, config.sources);
10244
10261
  const sinks = findSinks(calls, config.sinks);
@@ -10321,6 +10338,28 @@ function findSources(calls, types, patterns) {
10321
10338
  }
10322
10339
  }
10323
10340
  }
10341
+ for (const call of calls) {
10342
+ for (const arg of call.arguments) {
10343
+ if (!arg.expression) continue;
10344
+ for (const { pattern, sourceType } of PYTHON_TAINTED_PATTERNS) {
10345
+ if (pattern.test(arg.expression)) {
10346
+ const alreadyExists = sources.some(
10347
+ (s) => s.line === call.location.line && s.type === sourceType
10348
+ );
10349
+ if (!alreadyExists) {
10350
+ sources.push({
10351
+ type: sourceType,
10352
+ location: `${arg.expression} in ${call.in_method || "anonymous"}`,
10353
+ severity: "high",
10354
+ line: call.location.line,
10355
+ confidence: 1
10356
+ });
10357
+ }
10358
+ break;
10359
+ }
10360
+ }
10361
+ }
10362
+ }
10324
10363
  const sourceMap = /* @__PURE__ */ new Map();
10325
10364
  for (const source of sources) {
10326
10365
  const key = `${source.line}:${source.type}`;
@@ -11274,7 +11313,20 @@ function analyzeInterprocedural(types, calls, dfg, sources, sinks, sanitizers, o
11274
11313
  "hashCode",
11275
11314
  "equals",
11276
11315
  "clone",
11277
- "clear"
11316
+ "clear",
11317
+ // StringBuilder / StringBuffer / Writer accumulator methods — taint propagates through these
11318
+ // but the CWE-668 sink check should not fire on pure string accumulation
11319
+ "append",
11320
+ "insert",
11321
+ "prepend",
11322
+ "concat",
11323
+ "delete",
11324
+ "deleteCharAt",
11325
+ "replace",
11326
+ "reverse",
11327
+ "write",
11328
+ "writeln",
11329
+ "println"
11278
11330
  ]);
11279
11331
  const safeUtilityMethods = /* @__PURE__ */ new Set([
11280
11332
  // Path validation and normalization
@@ -11305,7 +11357,25 @@ function analyzeInterprocedural(types, calls, dfg, sources, sinks, sanitizers, o
11305
11357
  "validate",
11306
11358
  "validateInput",
11307
11359
  "check",
11308
- "verify"
11360
+ "verify",
11361
+ // I/O stream wrappers — pure decorators that wrap a stream, not security sinks
11362
+ // e.g. new InputStreamReader(proc.getInputStream()) is safe; the underlying stream is the source
11363
+ "InputStreamReader",
11364
+ "OutputStreamWriter",
11365
+ "BufferedInputStream",
11366
+ "BufferedOutputStream",
11367
+ "ByteArrayInputStream",
11368
+ "ByteArrayOutputStream",
11369
+ "DataInputStream",
11370
+ "DataOutputStream",
11371
+ "PushbackInputStream",
11372
+ "SequenceInputStream",
11373
+ "BufferedReader",
11374
+ "BufferedWriter",
11375
+ "PrintStream",
11376
+ "PrintWriter",
11377
+ "ObjectOutputStream"
11378
+ // ObjectInputStream IS a sink (deserialization), keep it out
11309
11379
  ]);
11310
11380
  const sanitizerMethods = /* @__PURE__ */ new Set();
11311
11381
  for (const san of sanitizers) {
@@ -16509,6 +16579,29 @@ var JS_TAINTED_PATTERNS = [
16509
16579
  { pattern: /\bdocument\.querySelector\b/, type: "dom_input" },
16510
16580
  { pattern: /\.value\b/, type: "dom_input" }
16511
16581
  ];
16582
+ var PYTHON_TAINTED_PATTERNS2 = [
16583
+ { pattern: /\brequest\.args\b/, type: "http_param" },
16584
+ { pattern: /\brequest\.form\b/, type: "http_body" },
16585
+ { pattern: /\brequest\.json\b/, type: "http_body" },
16586
+ { pattern: /\brequest\.data\b/, type: "http_body" },
16587
+ { pattern: /\brequest\.files?\b/, type: "file_input" },
16588
+ { pattern: /\brequest\.headers?\b/, type: "http_header" },
16589
+ { pattern: /\brequest\.cookies\b/, type: "http_cookie" },
16590
+ { pattern: /\brequest\.GET\b/, type: "http_param" },
16591
+ { pattern: /\brequest\.POST\b/, type: "http_body" },
16592
+ { pattern: /\brequest\.META\b/, type: "http_header" },
16593
+ { pattern: /\brequest\.FILES\b/, type: "file_input" },
16594
+ { pattern: /\brequest\.query_params\b/, type: "http_param" },
16595
+ { pattern: /\brequest\.path_params\b/, type: "http_param" },
16596
+ // Flask raw query/body strings
16597
+ { pattern: /\brequest\.query_string\b/, type: "http_param" },
16598
+ { pattern: /\brequest\.get_data\s*\(/, type: "http_body" },
16599
+ // Request wrapper helper methods (common in OWASP-style benchmarks and real wrappers)
16600
+ { pattern: /\bget_form_parameter\s*\(/, type: "http_body" },
16601
+ { pattern: /\bget_query_parameter\s*\(/, type: "http_param" },
16602
+ { pattern: /\bget_header_value\s*\(/, type: "http_header" },
16603
+ { pattern: /\bget_cookie_value\s*\(/, type: "http_cookie" }
16604
+ ];
16512
16605
  function findJavaScriptAssignmentSources(sourceCode, language) {
16513
16606
  const sources = [];
16514
16607
  if (!["javascript", "typescript"].includes(language)) {
@@ -16544,6 +16637,212 @@ function findJavaScriptAssignmentSources(sourceCode, language) {
16544
16637
  }
16545
16638
  return sources;
16546
16639
  }
16640
+ function findPythonAssignmentSources(sourceCode, language) {
16641
+ const sources = [];
16642
+ if (language !== "python") {
16643
+ return sources;
16644
+ }
16645
+ const lines = sourceCode.split("\n");
16646
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
16647
+ const line = lines[lineNum];
16648
+ const lineNumber = lineNum + 1;
16649
+ if (line.trimStart().startsWith("#")) continue;
16650
+ const assignmentMatch = line.match(/^(\s*\w[\w.]*)\s*(?::\s*\w[\w\[\], .]*)?\s*=\s*(.+)/);
16651
+ if (assignmentMatch) {
16652
+ const rhs = assignmentMatch[2];
16653
+ for (const { pattern, type } of PYTHON_TAINTED_PATTERNS2) {
16654
+ if (pattern.test(rhs)) {
16655
+ const varMatch = line.match(/^\s*(\w+)\s*/);
16656
+ const varName = varMatch ? varMatch[1] : "unknown";
16657
+ const alreadyExists = sources.some(
16658
+ (s) => s.line === lineNumber && s.type === type
16659
+ );
16660
+ if (!alreadyExists) {
16661
+ sources.push({
16662
+ type,
16663
+ location: `${varName} = ${rhs.trim().substring(0, 50)}${rhs.length > 50 ? "..." : ""}`,
16664
+ severity: "high",
16665
+ line: lineNumber,
16666
+ confidence: 0.95,
16667
+ variable: varName
16668
+ });
16669
+ }
16670
+ break;
16671
+ }
16672
+ }
16673
+ }
16674
+ }
16675
+ return sources;
16676
+ }
16677
+ function buildPythonTaintedVars(sourceCode) {
16678
+ const tainted = /* @__PURE__ */ new Map();
16679
+ const containerTainted = /* @__PURE__ */ new Map();
16680
+ const lines = sourceCode.split("\n");
16681
+ for (let i2 = 0; i2 < lines.length; i2++) {
16682
+ const line = lines[i2];
16683
+ if (line.trimStart().startsWith("#")) continue;
16684
+ const subscriptAssign = line.match(/^\s*(\w+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/);
16685
+ if (subscriptAssign) {
16686
+ const [, container, , key, rhs2] = subscriptAssign;
16687
+ const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs2));
16688
+ if (isTaintedRhs) {
16689
+ containerTainted.set(`${container}['${key}']`, i2 + 1);
16690
+ }
16691
+ continue;
16692
+ }
16693
+ const setCallMatch = line.match(/^\s*(\w+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/);
16694
+ if (setCallMatch) {
16695
+ const [, obj, , section, , key, rhs2] = setCallMatch;
16696
+ const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs2));
16697
+ if (isTaintedRhs) {
16698
+ containerTainted.set(`${obj}['${section}']['${key}']`, i2 + 1);
16699
+ }
16700
+ continue;
16701
+ }
16702
+ const augAssign = line.match(/^\s*(\w+)\s*\+=\s*(.+)$/);
16703
+ if (augAssign) {
16704
+ const [, augLhs, augRhs] = augAssign;
16705
+ const rhsTainted = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(augRhs));
16706
+ if (rhsTainted || tainted.has(augLhs)) {
16707
+ tainted.set(augLhs, tainted.get(augLhs) ?? i2 + 1);
16708
+ }
16709
+ continue;
16710
+ }
16711
+ const forLoopMatch = line.match(/^\s*for\s+(\w+)\s+in\s+(.+?)(?:\s*:\s*)?$/);
16712
+ if (forLoopMatch) {
16713
+ const [, iterVar, iterExpr] = forLoopMatch;
16714
+ const isDirectSource2 = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(iterExpr));
16715
+ const isPropagated = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(iterExpr));
16716
+ if (isDirectSource2 || isPropagated) {
16717
+ tainted.set(iterVar, i2 + 1);
16718
+ }
16719
+ continue;
16720
+ }
16721
+ const assignMatch = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
16722
+ if (!assignMatch) continue;
16723
+ const [, lhs, rhs] = assignMatch;
16724
+ const isDirectSource = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(rhs));
16725
+ let propagatedFrom;
16726
+ const dictAccessMatch = rhs.trim().match(/^(\w+)\[(['"])([^'"]+)\2\]$/);
16727
+ if (dictAccessMatch) {
16728
+ const [, container, , key] = dictAccessMatch;
16729
+ if (containerTainted.has(`${container}['${key}']`)) {
16730
+ propagatedFrom = `${container}['${key}']`;
16731
+ }
16732
+ }
16733
+ if (!propagatedFrom) {
16734
+ const confGetMatch = rhs.trim().match(/^(\w+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/);
16735
+ if (confGetMatch) {
16736
+ const [, obj, , section, , key] = confGetMatch;
16737
+ if (containerTainted.has(`${obj}['${section}']['${key}']`)) {
16738
+ propagatedFrom = `${obj}['${section}']['${key}']`;
16739
+ }
16740
+ }
16741
+ }
16742
+ if (!propagatedFrom) {
16743
+ const isSafeEnvRead = /\bos\.environ\.get\s*\(/.test(rhs) || /\bos\.getenv\s*\(/.test(rhs);
16744
+ if (!isSafeEnvRead) {
16745
+ propagatedFrom = [...tainted.keys()].find((v) => new RegExp(`\\b${v}\\b`).test(rhs));
16746
+ }
16747
+ }
16748
+ if (isDirectSource) {
16749
+ tainted.set(lhs, i2 + 1);
16750
+ } else if (propagatedFrom !== void 0) {
16751
+ tainted.set(lhs, i2 + 1);
16752
+ } else if (tainted.has(lhs)) {
16753
+ const prevNonBlank = lines.slice(0, i2).reverse().find((l) => l.trim() && !l.trimStart().startsWith("#"));
16754
+ const isNullGuard = prevNonBlank !== void 0 && (new RegExp(`^\\s*if\\s+not\\s+${lhs}\\s*:`).test(prevNonBlank) || new RegExp(`^\\s*if\\s+${lhs}\\s+is\\s+None\\s*:`).test(prevNonBlank));
16755
+ if (!isNullGuard) {
16756
+ tainted.delete(lhs);
16757
+ }
16758
+ }
16759
+ }
16760
+ return tainted;
16761
+ }
16762
+ function buildJavaScriptTaintedVars(sourceCode, language) {
16763
+ if (!["javascript", "typescript"].includes(language)) return /* @__PURE__ */ new Map();
16764
+ const tainted = /* @__PURE__ */ new Map();
16765
+ const lines = sourceCode.split("\n");
16766
+ for (let i2 = 0; i2 < lines.length; i2++) {
16767
+ const line = lines[i2];
16768
+ const trimmed = line.trimStart();
16769
+ if (trimmed.startsWith("//") || trimmed.startsWith("*")) continue;
16770
+ const assignMatch = line.match(/(?:(?:var|let|const)\s+)?(\w+)\s*=\s*(.+)/);
16771
+ if (!assignMatch) continue;
16772
+ const [, lhs, rhs] = assignMatch;
16773
+ if (["if", "while", "for", "return", "true", "false", "null", "undefined", "case"].includes(lhs)) continue;
16774
+ const isDirectSource = JS_TAINTED_PATTERNS.some((p) => p.pattern.test(rhs));
16775
+ const isTaintedPropagation = tainted.size > 0 && [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs));
16776
+ if (isDirectSource || isTaintedPropagation) {
16777
+ tainted.set(lhs, i2 + 1);
16778
+ }
16779
+ }
16780
+ return tainted;
16781
+ }
16782
+ function findPythonQuoteSanitizedVars(sourceCode) {
16783
+ const sanitized = /* @__PURE__ */ new Set();
16784
+ const lines = sourceCode.split("\n");
16785
+ for (let i2 = 0; i2 < lines.length - 1; i2++) {
16786
+ const m = lines[i2].match(/^\s*if\s+(?:'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*")\s+in\s+(\w+)\s*:/);
16787
+ if (!m) continue;
16788
+ const ifIndent = (lines[i2].match(/^(\s*)/) ?? ["", ""])[1].length;
16789
+ let foundExit = false;
16790
+ for (let j = i2 + 1; j <= Math.min(i2 + 5, lines.length - 1); j++) {
16791
+ const jLine = lines[j] ?? "";
16792
+ if (!jLine.trim()) continue;
16793
+ const jIndent = (jLine.match(/^(\s*)/) ?? ["", ""])[1].length;
16794
+ if (jIndent <= ifIndent) break;
16795
+ if (/^(return|raise|abort|continue|break)\b/.test(jLine.trim())) {
16796
+ foundExit = true;
16797
+ break;
16798
+ }
16799
+ }
16800
+ if (foundExit) {
16801
+ sanitized.add(m[1]);
16802
+ }
16803
+ }
16804
+ return sanitized;
16805
+ }
16806
+ function findPythonTrustBoundaryViolations(sourceCode, language, taintedVars) {
16807
+ if (language !== "python" || taintedVars.size === 0) return [];
16808
+ const violations = [];
16809
+ const lines = sourceCode.split("\n");
16810
+ const SESSION_WRITE = /(?:flask\.)?session\[([^\]]+)\]\s*=\s*(.+)$/;
16811
+ const taintedKeys = [...taintedVars.keys()];
16812
+ const earliestSourceLine = Math.min(...[...taintedVars.values()]);
16813
+ for (let i2 = 0; i2 < lines.length; i2++) {
16814
+ const line = lines[i2];
16815
+ if (line.trimStart().startsWith("#")) continue;
16816
+ const m = line.match(SESSION_WRITE);
16817
+ if (!m) continue;
16818
+ const [, keyExpr, valueExpr] = m;
16819
+ const keyTainted = taintedKeys.some((v) => new RegExp(`\\b${v}\\b`).test(keyExpr));
16820
+ const valueTainted = taintedKeys.some((v) => new RegExp(`\\b${v}\\b`).test(valueExpr));
16821
+ if (keyTainted || valueTainted) {
16822
+ violations.push({ sourceLine: earliestSourceLine, sinkLine: i2 + 1 });
16823
+ }
16824
+ }
16825
+ return violations;
16826
+ }
16827
+ function findPythonReturnXSSSinks(sourceCode, language, taintedVars) {
16828
+ if (language !== "python" || taintedVars.size === 0) return [];
16829
+ const sinks = [];
16830
+ const lines = sourceCode.split("\n");
16831
+ const taintedKeys = [...taintedVars.keys()];
16832
+ for (let i2 = 0; i2 < lines.length; i2++) {
16833
+ const line = lines[i2];
16834
+ if (line.trimStart().startsWith("#")) continue;
16835
+ const returnMatch = line.match(/^\s*(?:return|yield)\s+(.+)$/);
16836
+ if (!returnMatch) continue;
16837
+ const expr = returnMatch[1];
16838
+ const hasTaintedVar = taintedKeys.some((v) => new RegExp(`\\b${v}\\b`).test(expr));
16839
+ if (!hasTaintedVar) continue;
16840
+ const looksLikeHTML = expr.includes("<") || /['"]\s*\+/.test(expr) || /\+\s*['"]/.test(expr) || /f['"][^'"]*\{/.test(expr);
16841
+ if (!looksLikeHTML) continue;
16842
+ sinks.push({ sinkLine: i2 + 1 });
16843
+ }
16844
+ return sinks;
16845
+ }
16547
16846
  function findJavaScriptDOMSinks(sourceCode, language) {
16548
16847
  const sinks = [];
16549
16848
  if (!["javascript", "typescript"].includes(language)) {
@@ -16783,6 +17082,8 @@ async function analyze(code, filePath, language, options = {}) {
16783
17082
  taint.sources.push(...getterSources);
16784
17083
  const jsAssignmentSources = findJavaScriptAssignmentSources(code, language);
16785
17084
  taint.sources.push(...jsAssignmentSources);
17085
+ const pythonAssignmentSources = findPythonAssignmentSources(code, language);
17086
+ taint.sources.push(...pythonAssignmentSources);
16786
17087
  const jsDOMSinks = findJavaScriptDOMSinks(code, language);
16787
17088
  for (const domSink of jsDOMSinks) {
16788
17089
  const alreadyExists = taint.sinks.some(
@@ -16823,6 +17124,82 @@ async function analyze(code, filePath, language, options = {}) {
16823
17124
  constPropResult.synchronizedLines
16824
17125
  );
16825
17126
  taint.sinks = filterSanitizedSinks(taint.sinks, taint.sanitizers ?? [], calls);
17127
+ if (language === "python") {
17128
+ const pyTaintedVars = buildPythonTaintedVars(code);
17129
+ const pySanitizedVars = findPythonQuoteSanitizedVars(code);
17130
+ for (const line of code.split("\n")) {
17131
+ const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
17132
+ if (!am) continue;
17133
+ const [, lhs, rhs] = am;
17134
+ if ([...pySanitizedVars].some((v) => new RegExp(`\\b${v}\\b`).test(rhs))) {
17135
+ pySanitizedVars.add(lhs);
17136
+ }
17137
+ }
17138
+ for (const line of code.split("\n")) {
17139
+ const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
17140
+ if (!am) continue;
17141
+ const [, lhs, rhs] = am;
17142
+ const hasReplaceOnTainted = [...pyTaintedVars.keys()].some(
17143
+ (v) => new RegExp(`\\b${v}\\.replace\\s*\\(`).test(rhs)
17144
+ );
17145
+ if (hasReplaceOnTainted) pySanitizedVars.add(lhs);
17146
+ }
17147
+ const pySourceLines = code.split("\n");
17148
+ taint.sinks = taint.sinks.filter((sink) => {
17149
+ if (sink.type !== "xpath_injection") return true;
17150
+ const sinkLineText = pySourceLines[sink.line - 1] ?? "";
17151
+ const taintedVarOnLine = [...pyTaintedVars.keys()].find(
17152
+ (v) => new RegExp(`\\b${v}\\b`).test(sinkLineText)
17153
+ );
17154
+ if (!taintedVarOnLine) return false;
17155
+ if (pySanitizedVars.has(taintedVarOnLine)) return false;
17156
+ if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText)) return false;
17157
+ return true;
17158
+ });
17159
+ const trustViolations = findPythonTrustBoundaryViolations(code, language, pyTaintedVars);
17160
+ for (const v of trustViolations) {
17161
+ const alreadyExists = taint.sinks.some(
17162
+ (s) => s.line === v.sinkLine && s.type === "trust_boundary"
17163
+ );
17164
+ if (!alreadyExists) {
17165
+ taint.sinks.push({
17166
+ type: "trust_boundary",
17167
+ cwe: "CWE-501",
17168
+ line: v.sinkLine,
17169
+ location: `session write at line ${v.sinkLine}`,
17170
+ confidence: 0.85
17171
+ });
17172
+ }
17173
+ }
17174
+ const pyReturnXSS = findPythonReturnXSSSinks(code, language, pyTaintedVars);
17175
+ for (const r of pyReturnXSS) {
17176
+ const alreadyExists = taint.sinks.some(
17177
+ (s) => s.line === r.sinkLine && s.type === "xss"
17178
+ );
17179
+ if (!alreadyExists) {
17180
+ taint.sinks.push({
17181
+ type: "xss",
17182
+ cwe: "CWE-79",
17183
+ line: r.sinkLine,
17184
+ location: `return HTML with user input at line ${r.sinkLine}`,
17185
+ confidence: 0.9
17186
+ });
17187
+ }
17188
+ }
17189
+ }
17190
+ if (["javascript", "typescript"].includes(language)) {
17191
+ const jsTaintedVars = buildJavaScriptTaintedVars(code, language);
17192
+ if (jsTaintedVars.size > 0) {
17193
+ const jsSourceLines = code.split("\n");
17194
+ taint.sinks = taint.sinks.filter((sink) => {
17195
+ if (sink.type !== "xss") return true;
17196
+ const sinkLineText = jsSourceLines[sink.line - 1] ?? "";
17197
+ if ([...jsTaintedVars.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(sinkLineText))) return true;
17198
+ if (JS_TAINTED_PATTERNS.some((p) => p.pattern.test(sinkLineText))) return true;
17199
+ return false;
17200
+ });
17201
+ }
17202
+ }
16826
17203
  if (taint.sources.length > 0 && taint.sinks.length > 0) {
16827
17204
  const propagationResult = propagateTaint(
16828
17205
  dfg,
@@ -16942,6 +17319,7 @@ async function analyze(code, filePath, language, options = {}) {
16942
17319
  }
16943
17320
  );
16944
17321
  for (const sink of interProc.propagatedSinks) {
17322
+ if (sink.type === "external_taint_escape") continue;
16945
17323
  if (!taint.sinks.some((s) => s.line === sink.line)) {
16946
17324
  taint.sinks.push(sink);
16947
17325
  }
@@ -17152,7 +17530,59 @@ async function analyzeForAPI(code, filePath, language, options = {}) {
17152
17530
  constPropResult.synchronizedLines
17153
17531
  );
17154
17532
  filteredSinks = filterSanitizedSinks(filteredSinks, taint.sanitizers ?? [], calls);
17533
+ let pythonTaintedVars = /* @__PURE__ */ new Map();
17534
+ if (language === "python") {
17535
+ pythonTaintedVars = buildPythonTaintedVars(code);
17536
+ const pythonSanitizedVars = findPythonQuoteSanitizedVars(code);
17537
+ for (const line of code.split("\n")) {
17538
+ const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
17539
+ if (!am) continue;
17540
+ const [, lhs, rhs] = am;
17541
+ if ([...pythonSanitizedVars].some((v) => new RegExp(`\\b${v}\\b`).test(rhs))) {
17542
+ pythonSanitizedVars.add(lhs);
17543
+ }
17544
+ }
17545
+ for (const line of code.split("\n")) {
17546
+ const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
17547
+ if (!am) continue;
17548
+ const [, lhs, rhs] = am;
17549
+ const hasReplaceOnTainted = [...pythonTaintedVars.keys()].some(
17550
+ (v) => new RegExp(`\\b${v}\\.replace\\s*\\(`).test(rhs)
17551
+ );
17552
+ if (hasReplaceOnTainted) pythonSanitizedVars.add(lhs);
17553
+ }
17554
+ const sourceLines = code.split("\n");
17555
+ filteredSinks = filteredSinks.filter((sink) => {
17556
+ if (sink.type !== "xpath_injection") return true;
17557
+ const sinkLineText = sourceLines[sink.line - 1] ?? "";
17558
+ const taintedVarOnLine = [...pythonTaintedVars.keys()].find(
17559
+ (v) => new RegExp(`\\b${v}\\b`).test(sinkLineText)
17560
+ );
17561
+ if (!taintedVarOnLine) return false;
17562
+ if (pythonSanitizedVars.has(taintedVarOnLine)) return false;
17563
+ if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText)) return false;
17564
+ return true;
17565
+ });
17566
+ }
17155
17567
  const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
17568
+ if (language === "python") {
17569
+ const trustViolations = findPythonTrustBoundaryViolations(code, language, pythonTaintedVars);
17570
+ for (const v of trustViolations) {
17571
+ const alreadyReported = vulnerabilities.some(
17572
+ (existing) => existing.sink.line === v.sinkLine && existing.type === "trust_boundary"
17573
+ );
17574
+ if (!alreadyReported) {
17575
+ vulnerabilities.push({
17576
+ type: "trust_boundary",
17577
+ cwe: "CWE-501",
17578
+ severity: "medium",
17579
+ source: { line: v.sourceLine, type: "http_param" },
17580
+ sink: { line: v.sinkLine, type: "trust_boundary" },
17581
+ confidence: 0.85
17582
+ });
17583
+ }
17584
+ }
17585
+ }
17156
17586
  const analysisTime = performance.now() - analysisStart;
17157
17587
  const totalTime = performance.now() - startTime;
17158
17588
  return {
@@ -9319,9 +9319,7 @@ var DEFAULT_SINKS = [
9319
9319
  { method: "resolveURI", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9320
9320
  { method: "resolve", class: "SourceResolver", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9321
9321
  { method: "getSource", class: "SourceResolver", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9322
- // URL-based resource loading
9323
- { method: "URL", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [0] },
9324
- { method: "openStream", class: "URL", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [] },
9322
+ // NOTE: new URL(userInput) is SSRF (CWE-918), not path traversal — see ssrf section below
9325
9323
  // Servlet context resource loading
9326
9324
  { method: "getResource", class: "ServletContext", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9327
9325
  { method: "getResourceAsStream", class: "ServletContext", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
@@ -9358,8 +9356,7 @@ var DEFAULT_SINKS = [
9358
9356
  { method: "extract", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
9359
9357
  { method: "extractAll", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
9360
9358
  { method: "unjar", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
9361
- // Additional file constructors
9362
- { method: "BufferedReader", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9359
+ // Additional file constructors — BufferedReader(Reader) is NOT a path traversal sink; it wraps a Reader, not a file path
9363
9360
  { method: "PrintWriter", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9364
9361
  { method: "Scanner", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
9365
9362
  // Topic/queue names (for message queue systems - can be exploited for path traversal)
@@ -9882,9 +9879,12 @@ var DEFAULT_SINKS = [
9882
9879
  { method: "execSync", class: "child_process", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
9883
9880
  { method: "spawn", class: "child_process", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
9884
9881
  { method: "spawnSync", class: "child_process", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
9885
- // Also match without receiver (destructured imports)
9882
+ // Also match without receiver (destructured imports: const { exec } = require('child_process'))
9886
9883
  { method: "exec", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9887
9884
  { method: "execSync", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9885
+ { method: "spawn", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9886
+ { method: "spawnSync", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9887
+ { method: "execFile", type: "command_injection", cwe: "CWE-78", severity: "high", arg_positions: [0] },
9888
9888
  // Node.js File System (path traversal)
9889
9889
  { method: "readFile", class: "fs", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0] },
9890
9890
  { method: "readFileSync", class: "fs", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0] },
@@ -9927,7 +9927,9 @@ var DEFAULT_SINKS = [
9927
9927
  { method: "request", class: "axios", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9928
9928
  { method: "fetch", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9929
9929
  { method: "request", class: "http", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9930
+ { method: "get", class: "http", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9930
9931
  { method: "request", class: "https", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9932
+ { method: "get", class: "https", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9931
9933
  // needle library (used in NodeGoat)
9932
9934
  { method: "get", class: "needle", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
9933
9935
  { method: "post", class: "needle", type: "ssrf", cwe: "CWE-918", severity: "high", arg_positions: [0] },
@@ -10347,6 +10349,21 @@ function getDefaultConfig() {
10347
10349
  }
10348
10350
 
10349
10351
  // src/analysis/taint-matcher.ts
10352
+ var PYTHON_TAINTED_PATTERNS = [
10353
+ { pattern: /\brequest\.args\b/, sourceType: "http_param" },
10354
+ { pattern: /\brequest\.form\b/, sourceType: "http_body" },
10355
+ { pattern: /\brequest\.json\b/, sourceType: "http_body" },
10356
+ { pattern: /\brequest\.data\b/, sourceType: "http_body" },
10357
+ { pattern: /\brequest\.files?\b/, sourceType: "file_input" },
10358
+ { pattern: /\brequest\.headers?\b/, sourceType: "http_header" },
10359
+ { pattern: /\brequest\.cookies\b/, sourceType: "http_cookie" },
10360
+ { pattern: /\brequest\.GET\b/, sourceType: "http_param" },
10361
+ { pattern: /\brequest\.POST\b/, sourceType: "http_body" },
10362
+ { pattern: /\brequest\.META\b/, sourceType: "http_header" },
10363
+ { pattern: /\brequest\.FILES\b/, sourceType: "file_input" },
10364
+ { pattern: /\brequest\.query_params\b/, sourceType: "http_param" },
10365
+ { pattern: /\brequest\.path_params\b/, sourceType: "http_param" }
10366
+ ];
10350
10367
  function analyzeTaint(calls, types, config = getDefaultConfig()) {
10351
10368
  const sources = findSources(calls, types, config.sources);
10352
10369
  const sinks = findSinks(calls, config.sinks);
@@ -10429,6 +10446,28 @@ function findSources(calls, types, patterns) {
10429
10446
  }
10430
10447
  }
10431
10448
  }
10449
+ for (const call of calls) {
10450
+ for (const arg of call.arguments) {
10451
+ if (!arg.expression) continue;
10452
+ for (const { pattern, sourceType } of PYTHON_TAINTED_PATTERNS) {
10453
+ if (pattern.test(arg.expression)) {
10454
+ const alreadyExists = sources.some(
10455
+ (s) => s.line === call.location.line && s.type === sourceType
10456
+ );
10457
+ if (!alreadyExists) {
10458
+ sources.push({
10459
+ type: sourceType,
10460
+ location: `${arg.expression} in ${call.in_method || "anonymous"}`,
10461
+ severity: "high",
10462
+ line: call.location.line,
10463
+ confidence: 1
10464
+ });
10465
+ }
10466
+ break;
10467
+ }
10468
+ }
10469
+ }
10470
+ }
10432
10471
  const sourceMap = /* @__PURE__ */ new Map();
10433
10472
  for (const source of sources) {
10434
10473
  const key = `${source.line}:${source.type}`;