circle-ir 3.59.0 → 3.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/analysis/config-loader.d.ts.map +1 -1
  2. package/dist/analysis/config-loader.js +58 -17
  3. package/dist/analysis/config-loader.js.map +1 -1
  4. package/dist/analysis/html/html-merge.d.ts.map +1 -1
  5. package/dist/analysis/html/html-merge.js +10 -0
  6. package/dist/analysis/html/html-merge.js.map +1 -1
  7. package/dist/analysis/interprocedural.d.ts.map +1 -1
  8. package/dist/analysis/interprocedural.js +44 -11
  9. package/dist/analysis/interprocedural.js.map +1 -1
  10. package/dist/analysis/passes/language-sources-pass.d.ts +7 -1
  11. package/dist/analysis/passes/language-sources-pass.d.ts.map +1 -1
  12. package/dist/analysis/passes/language-sources-pass.js +112 -15
  13. package/dist/analysis/passes/language-sources-pass.js.map +1 -1
  14. package/dist/analysis/passes/missing-public-doc-pass.d.ts.map +1 -1
  15. package/dist/analysis/passes/missing-public-doc-pass.js +2 -1
  16. package/dist/analysis/passes/missing-public-doc-pass.js.map +1 -1
  17. package/dist/analysis/passes/sink-filter-pass.d.ts.map +1 -1
  18. package/dist/analysis/passes/sink-filter-pass.js +4 -1
  19. package/dist/analysis/passes/sink-filter-pass.js.map +1 -1
  20. package/dist/analysis/passes/taint-propagation-pass.d.ts.map +1 -1
  21. package/dist/analysis/passes/taint-propagation-pass.js +222 -10
  22. package/dist/analysis/passes/taint-propagation-pass.js.map +1 -1
  23. package/dist/analysis/passes/weak-random-pass.d.ts.map +1 -1
  24. package/dist/analysis/passes/weak-random-pass.js +2 -1
  25. package/dist/analysis/passes/weak-random-pass.js.map +1 -1
  26. package/dist/analysis/taint-matcher.d.ts.map +1 -1
  27. package/dist/analysis/taint-matcher.js +83 -7
  28. package/dist/analysis/taint-matcher.js.map +1 -1
  29. package/dist/analysis/taint-propagation.d.ts.map +1 -1
  30. package/dist/analysis/taint-propagation.js +32 -0
  31. package/dist/analysis/taint-propagation.js.map +1 -1
  32. package/dist/analyzer.d.ts.map +1 -1
  33. package/dist/analyzer.js +19 -2
  34. package/dist/analyzer.js.map +1 -1
  35. package/dist/browser/circle-ir.js +526 -67
  36. package/dist/core/circle-ir-core.cjs +270 -33
  37. package/dist/core/circle-ir-core.js +270 -33
  38. package/dist/core/extractors/calls.js +181 -1
  39. package/dist/core/extractors/calls.js.map +1 -1
  40. package/dist/core/extractors/cfg.js +1 -1
  41. package/dist/core/extractors/cfg.js.map +1 -1
  42. package/dist/core/extractors/dfg.js +29 -3
  43. package/dist/core/extractors/dfg.js.map +1 -1
  44. package/dist/core/extractors/imports.js +1 -1
  45. package/dist/core/extractors/imports.js.map +1 -1
  46. package/dist/core/extractors/runtime-registrations.js +1 -1
  47. package/dist/core/extractors/runtime-registrations.js.map +1 -1
  48. package/dist/core/extractors/types.js +1 -1
  49. package/dist/core/extractors/types.js.map +1 -1
  50. package/dist/core/parser.d.ts +1 -1
  51. package/dist/core/parser.d.ts.map +1 -1
  52. package/dist/graph/scope-graph.d.ts.map +1 -1
  53. package/dist/graph/scope-graph.js +1 -0
  54. package/dist/graph/scope-graph.js.map +1 -1
  55. package/dist/languages/plugins/bash.d.ts.map +1 -1
  56. package/dist/languages/plugins/bash.js +17 -0
  57. package/dist/languages/plugins/bash.js.map +1 -1
  58. package/dist/languages/registry.d.ts.map +1 -1
  59. package/dist/languages/registry.js +6 -0
  60. package/dist/languages/registry.js.map +1 -1
  61. package/dist/languages/types.d.ts +1 -1
  62. package/dist/languages/types.d.ts.map +1 -1
  63. package/dist/types/index.d.ts +9 -1
  64. package/dist/types/index.d.ts.map +1 -1
  65. package/dist/wasm/tree-sitter-tsx.wasm +0 -0
  66. package/package.json +2 -1
@@ -4341,7 +4341,7 @@ function detectLanguage(tree) {
4341
4341
  }
4342
4342
  function extractTypes(tree, cache, language) {
4343
4343
  const effectiveLanguage = language ?? detectLanguage(tree);
4344
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
4344
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
4345
4345
  const isPython = effectiveLanguage === "python";
4346
4346
  const isRust = effectiveLanguage === "rust";
4347
4347
  if (effectiveLanguage === "go") {
@@ -5727,7 +5727,7 @@ function detectLanguageFromTree(tree, cache) {
5727
5727
  function extractCalls(tree, cache, language) {
5728
5728
  const calls = [];
5729
5729
  const detectedLanguage = language ?? detectLanguageFromTree(tree, cache);
5730
- const isJavaScript = detectedLanguage === "javascript" || detectedLanguage === "typescript";
5730
+ const isJavaScript = detectedLanguage === "javascript" || detectedLanguage === "typescript" || detectedLanguage === "tsx";
5731
5731
  const isPython = detectedLanguage === "python";
5732
5732
  const isRust = detectedLanguage === "rust";
5733
5733
  if (detectedLanguage === "go") {
@@ -5776,8 +5776,137 @@ function extractJavaScriptCalls(tree, cache) {
5776
5776
  calls.push(callInfo);
5777
5777
  }
5778
5778
  }
5779
+ const jsxAttributes = getNodesFromCache(tree.rootNode, "jsx_attribute", cache);
5780
+ for (const attr of jsxAttributes) {
5781
+ const callInfo = extractJSXAttributeSink(attr);
5782
+ if (callInfo) {
5783
+ calls.push(callInfo);
5784
+ }
5785
+ }
5786
+ const assignments = getNodesFromCache(tree.rootNode, "assignment_expression", cache);
5787
+ for (const assign of assignments) {
5788
+ const callInfo = extractDomPropertyAssignmentSink(assign);
5789
+ if (callInfo) {
5790
+ calls.push(callInfo);
5791
+ }
5792
+ }
5779
5793
  return calls;
5780
5794
  }
5795
+ var DOM_XSS_ASSIGNMENT_PROPERTIES = /* @__PURE__ */ new Set([
5796
+ "innerHTML",
5797
+ "outerHTML"
5798
+ ]);
5799
+ function extractDomPropertyAssignmentSink(node) {
5800
+ const leftNode = node.childForFieldName("left");
5801
+ const rightNode = node.childForFieldName("right");
5802
+ if (!leftNode || !rightNode) return null;
5803
+ if (leftNode.type !== "member_expression") return null;
5804
+ const propertyNode = leftNode.childForFieldName("property");
5805
+ const objectNode = leftNode.childForFieldName("object");
5806
+ if (!propertyNode) return null;
5807
+ const propertyName = getNodeText(propertyNode);
5808
+ if (!DOM_XSS_ASSIGNMENT_PROPERTIES.has(propertyName)) return null;
5809
+ const receiver = objectNode ? getNodeText(objectNode) : null;
5810
+ const expression = getNodeText(rightNode);
5811
+ const { variable, literal } = analyzeJSArgument(rightNode);
5812
+ const enclosingFunc = findJSEnclosingFunction(node);
5813
+ return {
5814
+ method_name: propertyName,
5815
+ receiver,
5816
+ arguments: [
5817
+ {
5818
+ position: 0,
5819
+ expression,
5820
+ variable,
5821
+ literal
5822
+ }
5823
+ ],
5824
+ location: {
5825
+ line: node.startPosition.row + 1,
5826
+ column: node.startPosition.column
5827
+ },
5828
+ in_method: enclosingFunc,
5829
+ resolved: true,
5830
+ resolution: {
5831
+ status: "resolved",
5832
+ target: `DOM.${propertyName}`
5833
+ }
5834
+ };
5835
+ }
5836
+ function extractJSXAttributeSink(attr) {
5837
+ let nameNode = null;
5838
+ for (let i2 = 0; i2 < attr.childCount; i2++) {
5839
+ const child = attr.child(i2);
5840
+ if (child && child.type === "property_identifier") {
5841
+ nameNode = child;
5842
+ break;
5843
+ }
5844
+ }
5845
+ if (!nameNode) return null;
5846
+ const attrName = getNodeText(nameNode);
5847
+ if (attrName !== "dangerouslySetInnerHTML") return null;
5848
+ let valueExpr = null;
5849
+ for (let i2 = 0; i2 < attr.childCount; i2++) {
5850
+ const child = attr.child(i2);
5851
+ if (child && child.type === "jsx_expression") {
5852
+ valueExpr = child;
5853
+ break;
5854
+ }
5855
+ }
5856
+ if (!valueExpr) return null;
5857
+ let htmlValue = null;
5858
+ for (let i2 = 0; i2 < valueExpr.childCount; i2++) {
5859
+ const inner = valueExpr.child(i2);
5860
+ if (!inner || inner.type !== "object") continue;
5861
+ for (let j = 0; j < inner.childCount; j++) {
5862
+ const pair = inner.child(j);
5863
+ if (!pair || pair.type !== "pair") continue;
5864
+ const keyNode = pair.childForFieldName("key");
5865
+ if (!keyNode) continue;
5866
+ const keyText = getNodeText(keyNode).replace(/^["']|["']$/g, "");
5867
+ if (keyText === "__html") {
5868
+ htmlValue = pair.childForFieldName("value");
5869
+ break;
5870
+ }
5871
+ }
5872
+ if (htmlValue) break;
5873
+ }
5874
+ if (!htmlValue) {
5875
+ for (let i2 = 0; i2 < valueExpr.childCount; i2++) {
5876
+ const inner = valueExpr.child(i2);
5877
+ if (inner && inner.type !== "{" && inner.type !== "}") {
5878
+ htmlValue = inner;
5879
+ break;
5880
+ }
5881
+ }
5882
+ }
5883
+ if (!htmlValue) return null;
5884
+ const expression = getNodeText(htmlValue);
5885
+ const { variable, literal } = analyzeJSArgument(htmlValue);
5886
+ const enclosingFunc = findJSEnclosingFunction(attr);
5887
+ return {
5888
+ method_name: "dangerouslySetInnerHTML",
5889
+ receiver: null,
5890
+ arguments: [
5891
+ {
5892
+ position: 0,
5893
+ expression,
5894
+ variable,
5895
+ literal
5896
+ }
5897
+ ],
5898
+ location: {
5899
+ line: attr.startPosition.row + 1,
5900
+ column: attr.startPosition.column
5901
+ },
5902
+ in_method: enclosingFunc,
5903
+ resolved: true,
5904
+ resolution: {
5905
+ status: "resolved",
5906
+ target: "react.dangerouslySetInnerHTML"
5907
+ }
5908
+ };
5909
+ }
5781
5910
  function buildJSResolutionContext(tree, cache) {
5782
5911
  const context = {
5783
5912
  functionNames: /* @__PURE__ */ new Set(),
@@ -7285,7 +7414,7 @@ function detectLanguage2(tree) {
7285
7414
  }
7286
7415
  function extractImports(tree, language) {
7287
7416
  const effectiveLanguage = language ?? detectLanguage2(tree);
7288
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
7417
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
7289
7418
  const isPython = effectiveLanguage === "python";
7290
7419
  const isRust = effectiveLanguage === "rust";
7291
7420
  if (effectiveLanguage === "go") {
@@ -7977,7 +8106,7 @@ function detectLanguage3(tree) {
7977
8106
  }
7978
8107
  function buildCFG(tree, language) {
7979
8108
  const effectiveLanguage = language ?? detectLanguage3(tree);
7980
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
8109
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
7981
8110
  const allBlocks = [];
7982
8111
  const allEdges = [];
7983
8112
  let blockIdCounter = 0;
@@ -8552,7 +8681,7 @@ function detectLanguage4(tree) {
8552
8681
  }
8553
8682
  function buildDFG(tree, cache, language) {
8554
8683
  const effectiveLanguage = language ?? detectLanguage4(tree);
8555
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
8684
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
8556
8685
  if (isJavaScript) {
8557
8686
  return buildJavaScriptDFG(tree, cache);
8558
8687
  }
@@ -9360,7 +9489,18 @@ function buildBashDFG(tree) {
9360
9489
  if (varNameNode) {
9361
9490
  const varName = getNodeText(varNameNode);
9362
9491
  if (varName && !varName.startsWith("?") && !varName.startsWith("#")) {
9363
- const reachingDef = findReachingDef(varName, scopeStack);
9492
+ let reachingDef = findReachingDef(varName, scopeStack);
9493
+ if (reachingDef === null && !positionalParams.includes(varName)) {
9494
+ const def = {
9495
+ id: defIdCounter++,
9496
+ variable: varName,
9497
+ line: 0,
9498
+ kind: "param"
9499
+ };
9500
+ defs.push(def);
9501
+ scopeStack[0].set(varName, def.id);
9502
+ reachingDef = def.id;
9503
+ }
9364
9504
  uses.push({
9365
9505
  id: useIdCounter++,
9366
9506
  variable: varName,
@@ -9373,7 +9513,18 @@ function buildBashDFG(tree) {
9373
9513
  const varNameNode = node.namedChildCount > 0 ? node.namedChild(0) : null;
9374
9514
  if (varNameNode && varNameNode.type === "variable_name") {
9375
9515
  const varName = getNodeText(varNameNode);
9376
- const reachingDef = findReachingDef(varName, scopeStack);
9516
+ let reachingDef = findReachingDef(varName, scopeStack);
9517
+ if (reachingDef === null && !positionalParams.includes(varName)) {
9518
+ const def = {
9519
+ id: defIdCounter++,
9520
+ variable: varName,
9521
+ line: 0,
9522
+ kind: "param"
9523
+ };
9524
+ defs.push(def);
9525
+ scopeStack[0].set(varName, def.id);
9526
+ reachingDef = def.id;
9527
+ }
9377
9528
  uses.push({
9378
9529
  id: useIdCounter++,
9379
9530
  variable: varName,
@@ -9835,7 +9986,7 @@ var FRAMEWORK_MODULE_PATTERNS = [
9835
9986
  /^@nestjs\/core$/
9836
9987
  ];
9837
9988
  function extractRuntimeRegistrations(tree, cache, language, imports) {
9838
- if (language === "javascript" || language === "typescript") {
9989
+ if (language === "javascript" || language === "typescript" || language === "tsx") {
9839
9990
  return extractJSRuntimeRegistrations(tree, cache, imports);
9840
9991
  }
9841
9992
  if (language === "python") {
@@ -10786,8 +10937,12 @@ var DEFAULT_SOURCES = [
10786
10937
  { method: "get", class: "cookies", type: "http_cookie", severity: "high", return_tainted: true },
10787
10938
  { property: "json", object: "request", type: "http_body", severity: "high", property_tainted: true },
10788
10939
  { property: "data", object: "request", type: "http_body", severity: "high", property_tainted: true },
10940
+ { property: "stream", object: "request", type: "http_body", severity: "high", property_tainted: true },
10789
10941
  { property: "path", object: "request", type: "http_path", severity: "medium", property_tainted: true },
10790
10942
  { property: "query_string", object: "request", type: "http_query", severity: "high", property_tainted: true },
10943
+ // Flask request.get_data() — raw request bytes (method form, parallel to request.data property)
10944
+ { method: "get_data", class: "request", type: "http_body", severity: "high", return_tainted: true },
10945
+ { method: "get_json", class: "request", type: "http_body", severity: "high", return_tainted: true },
10791
10946
  // Django request object
10792
10947
  { method: "get", class: "GET", type: "http_param", severity: "high", return_tainted: true },
10793
10948
  { method: "get", class: "POST", type: "http_param", severity: "high", return_tainted: true },
@@ -10999,14 +11154,19 @@ var DEFAULT_SINKS = [
10999
11154
  { method: "setExecutable", class: "ExecTask", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11000
11155
  { method: "setCommand", class: "ExecTask", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11001
11156
  { method: "execute", class: "Java", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11002
- // Shell/Bash utilities
11003
- { method: "bash", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11004
- { method: "shell", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11005
- { method: "sh", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11006
- { method: "spawn", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11007
- { method: "fork", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11008
- { method: "popen", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11009
- { method: "system", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11157
+ // Shell/Bash utilities — these are method-call sinks in host languages
11158
+ // (Java Runtime/ProcessBuilder, JS child_process spawn/exec, Python subprocess, etc.).
11159
+ // When the analyzed file IS a bash/shell script, the bash plugin's per-flag entries
11160
+ // (argPositions: [1] for `bash -c <cmd>`) MUST win. Restrict these generic entries
11161
+ // to non-shell languages so they don't collide on the dedup key
11162
+ // `${location}:${call.location.line}:${pattern.cwe}`.
11163
+ { method: "bash", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11164
+ { method: "shell", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11165
+ { method: "sh", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11166
+ { method: "spawn", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11167
+ { method: "fork", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11168
+ { method: "popen", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11169
+ { method: "system", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11010
11170
  // Apache Commons Exec
11011
11171
  // Note: bare class 'Executor' removed (see comment above) — DefaultExecutor matched explicitly.
11012
11172
  { method: "setCommandline", class: "DefaultExecutor", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
@@ -11096,6 +11256,12 @@ var DEFAULT_SINKS = [
11096
11256
  { method: "unzip", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11097
11257
  { method: "extract", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11098
11258
  { method: "extractAll", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11259
+ // Python zipfile/tarfile use lowercase extractall (PEP 8 naming)
11260
+ { method: "extractall", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0], languages: ["python"] },
11261
+ // Python zipfile.ZipFile(path) — tainted archive path enables Zip-Slip via malicious archive
11262
+ { method: "ZipFile", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["python"] },
11263
+ // Flask send_from_directory: untrusted filename can escape directory via ../
11264
+ { method: "send_from_directory", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [1], languages: ["python"] },
11099
11265
  { method: "unjar", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11100
11266
  // Additional file constructors — BufferedReader(Reader) is NOT a path traversal sink; it wraps a Reader, not a file path
11101
11267
  { method: "PrintWriter", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
@@ -12180,16 +12346,42 @@ var DEFAULT_SINKS = [
12180
12346
  // value position so a tainted variable is detected.
12181
12347
  { method: "Set", class: "Header", type: "crlf", cwe: "CWE-113", severity: "medium", arg_positions: [1], languages: ["go"] },
12182
12348
  { method: "Add", class: "Header", type: "crlf", cwe: "CWE-113", severity: "medium", arg_positions: [1], languages: ["go"] },
12183
- // Mass-assignment (CWE-915) — Sprint 6, #86.
12184
- // JS Object.assign(target, ...sources) — sources are arg 1..N, and if any
12185
- // source is request-tainted, every key gets written onto the target. We
12186
- // flag the source positions; the analyzer only needs one tainted to fire.
12187
- { method: "assign", class: "Object", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12188
- // Lodash bulk-merge helpers behave identically.
12189
- { method: "merge", class: "_", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12190
- { method: "extend", class: "_", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12349
+ // Mass-assignment (CWE-915 / CWE-1321) — Sprint 6, #86; cognium-dev #68 Sprint 10.
12350
+ // JS Object.assign(target, ...sources), `_.merge`, `_.extend`, `$.extend`,
12351
+ // `Object.defineProperty` when fed an attacker-controlled bag, they write
12352
+ // arbitrary keys onto the target (or, for `__proto__`/`constructor.prototype`,
12353
+ // pollute the prototype chain). The CWE is CWE-1321 (Prototype Pollution),
12354
+ // which subsumes mass assignment for JS sinks operating on plain Objects.
12355
+ // We keep the existing `mass_assignment` SinkType so consumers route the
12356
+ // findings the same way; only the CWE shifts to flag prototype-pollution.
12357
+ { method: "assign", class: "Object", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12358
+ { method: "defineProperty", class: "Object", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2], languages: ["javascript", "typescript"] },
12359
+ { method: "defineProperties", class: "Object", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1], languages: ["javascript", "typescript"] },
12360
+ // Lodash bulk-merge helpers behave identically. `_.merge` and `lodash.merge`
12361
+ // are aliases — match both receivers.
12362
+ { method: "merge", class: "_", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12363
+ { method: "merge", class: "lodash", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12364
+ { method: "extend", class: "_", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12365
+ { method: "extend", class: "lodash", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12366
+ { method: "defaultsDeep", class: "_", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12367
+ { method: "defaultsDeep", class: "lodash", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12191
12368
  // jQuery $.extend(target, source) (legacy).
12192
- { method: "extend", class: "$", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] }
12369
+ { method: "extend", class: "$", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12370
+ { method: "extend", class: "jQuery", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12371
+ // DOM-XSS via property assignment (CWE-79) — cognium-dev #68 Sprint 10.
12372
+ // `el.innerHTML = tainted` / `el.outerHTML = tainted`. The JS call extractor
12373
+ // emits a synthetic CallInfo with method=`innerHTML`/`outerHTML` for each
12374
+ // matching assignment_expression. These classless entries catch them.
12375
+ { method: "innerHTML", type: "xss", cwe: "CWE-79", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12376
+ { method: "outerHTML", type: "xss", cwe: "CWE-79", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12377
+ // node-serialize.unserialize (CWE-502) — cognium-dev #68 Sprint 10.
12378
+ // The node-serialize package evaluates `_$$ND_FUNC$$_` IIFE payloads on
12379
+ // decode, turning untrusted input into RCE. Match both receiver-bound
12380
+ // calls (`serialize.unserialize(x)`) and destructured imports
12381
+ // (`const { unserialize } = require('node-serialize')`).
12382
+ { method: "unserialize", class: "serialize", type: "deserialization", cwe: "CWE-502", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12383
+ { method: "unserialize", class: "node-serialize", type: "deserialization", cwe: "CWE-502", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12384
+ { method: "unserialize", type: "deserialization", cwe: "CWE-502", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] }
12193
12385
  ];
12194
12386
  var DEFAULT_SANITIZERS = [
12195
12387
  // SQL Injection - proper parameter binding sanitizes input
@@ -12551,7 +12743,8 @@ function findSources(calls, types, patterns, sourceLines, language) {
12551
12743
  location: formatCallLocation(call),
12552
12744
  severity: pattern.severity,
12553
12745
  line: call.location.line,
12554
- confidence: 1
12746
+ confidence: 1,
12747
+ in_method: call.in_method ?? void 0
12555
12748
  });
12556
12749
  }
12557
12750
  }
@@ -12568,7 +12761,8 @@ function findSources(calls, types, patterns, sourceLines, language) {
12568
12761
  location: `@${pattern.annotation} ${param.name} in ${method.name}`,
12569
12762
  severity: pattern.severity,
12570
12763
  line: paramLine,
12571
- confidence: 1
12764
+ confidence: 1,
12765
+ in_method: method.name
12572
12766
  });
12573
12767
  }
12574
12768
  }
@@ -12588,7 +12782,8 @@ function findSources(calls, types, patterns, sourceLines, language) {
12588
12782
  location: `@${pattern.method_annotation} ${param.name} in ${method.name}`,
12589
12783
  severity: pattern.severity,
12590
12784
  line: paramLine,
12591
- confidence: 1
12785
+ confidence: 1,
12786
+ in_method: method.name
12592
12787
  });
12593
12788
  }
12594
12789
  }
@@ -12615,7 +12810,8 @@ function findSources(calls, types, patterns, sourceLines, language) {
12615
12810
  severity: "high",
12616
12811
  line: paramLine,
12617
12812
  confidence: 1,
12618
- variable: param.name
12813
+ variable: param.name,
12814
+ in_method: method.name
12619
12815
  });
12620
12816
  }
12621
12817
  }
@@ -12634,8 +12830,9 @@ function findSources(calls, types, patterns, sourceLines, language) {
12634
12830
  location: `${param.type || "any"} ${param.name} in ${method.name}`,
12635
12831
  severity: "medium",
12636
12832
  line: paramLine,
12637
- confidence: param.type ? 0.7 : 0.5
12833
+ confidence: param.type ? 0.7 : 0.5,
12638
12834
  // Lower confidence for untyped params
12835
+ in_method: method.name
12639
12836
  });
12640
12837
  }
12641
12838
  }
@@ -12655,7 +12852,8 @@ function findSources(calls, types, patterns, sourceLines, language) {
12655
12852
  location: `${arg.expression} in ${call.in_method || "anonymous"}`,
12656
12853
  severity: "high",
12657
12854
  line: call.location.line,
12658
- confidence: 1
12855
+ confidence: 1,
12856
+ in_method: call.in_method ?? void 0
12659
12857
  });
12660
12858
  }
12661
12859
  }
@@ -12676,7 +12874,8 @@ function findSources(calls, types, patterns, sourceLines, language) {
12676
12874
  location: `${arg.expression} in ${call.in_method || "anonymous"}`,
12677
12875
  severity: "high",
12678
12876
  line: call.location.line,
12679
- confidence: 1
12877
+ confidence: 1,
12878
+ in_method: call.in_method ?? void 0
12680
12879
  });
12681
12880
  }
12682
12881
  break;
@@ -12707,6 +12906,18 @@ function findSources(calls, types, patterns, sourceLines, language) {
12707
12906
  if (m) s.variable = m[1];
12708
12907
  }
12709
12908
  }
12909
+ if (language === "java" && sourceLines) {
12910
+ const JAVA_ASSIGN_LHS = /^\s*(?:(?:final|public|private|protected|static|synchronized|volatile|transient)\s+)*(?:[A-Za-z_][\w.]*(?:\s*<[^=]*>)?(?:\s*\[\s*\])*\s+)?([A-Za-z_]\w*)\s*=(?!=)/;
12911
+ for (const s of result) {
12912
+ if (s.variable && s.variable.length > 0) continue;
12913
+ const lineText = sourceLines[s.line - 1] ?? "";
12914
+ const m = JAVA_ASSIGN_LHS.exec(lineText);
12915
+ if (!m) continue;
12916
+ const rhs = lineText.slice(m[0].length).trimStart();
12917
+ if (/^new\b/.test(rhs)) continue;
12918
+ s.variable = m[1];
12919
+ }
12920
+ }
12710
12921
  return result;
12711
12922
  }
12712
12923
  function isInterproceduralTaintableType(typeName) {
@@ -12883,7 +13094,12 @@ function matchesSourcePattern(call, pattern) {
12883
13094
  if (call.receiver_type && call.receiver_type === pattern.class) {
12884
13095
  } else if (call.receiver_type_fqn && call.receiver_type_fqn.endsWith("." + pattern.class)) {
12885
13096
  } else if (!call.receiver) {
12886
- return false;
13097
+ const target = call.resolution?.target;
13098
+ const expectedTail = `${pattern.class}.${pattern.method}`;
13099
+ if (target && (target === expectedTail || target.endsWith("." + expectedTail))) {
13100
+ } else {
13101
+ return false;
13102
+ }
12887
13103
  } else if (!receiverMightBeClass(call.receiver, pattern.class)) {
12888
13104
  return false;
12889
13105
  }
@@ -13150,7 +13366,12 @@ function matchesSinkPattern(call, pattern, typeHierarchy, language) {
13150
13366
  }
13151
13367
  return false;
13152
13368
  } else if (!call.receiver && !call.receiver_type) {
13153
- return false;
13369
+ const target = call.resolution?.target;
13370
+ const expectedTail = `${pattern.class}.${pattern.method}`;
13371
+ if (target && (target === expectedTail || target.endsWith("." + expectedTail))) {
13372
+ } else {
13373
+ return false;
13374
+ }
13154
13375
  }
13155
13376
  }
13156
13377
  if (!pattern.class && call.receiver) {
@@ -14928,6 +15149,7 @@ function findInitialTaint(sources, callsByLine, defsByLine) {
14928
15149
  for (const def of defsNextLine) {
14929
15150
  const callsOnSourceLine = callsByLine.get(source.line) ?? [];
14930
15151
  if (callsOnSourceLine.length > 0) {
15152
+ if (source.variable && def.variable !== source.variable) continue;
14931
15153
  tainted.push({
14932
15154
  variable: def.variable,
14933
15155
  defId: def.id,
@@ -14939,6 +15161,21 @@ function findInitialTaint(sources, callsByLine, defsByLine) {
14939
15161
  });
14940
15162
  }
14941
15163
  }
15164
+ if (source.variable) {
15165
+ const paramDefs = defsByLine.get(0) ?? [];
15166
+ for (const def of paramDefs) {
15167
+ if (def.kind === "param" && def.variable === source.variable) {
15168
+ tainted.push({
15169
+ variable: def.variable,
15170
+ defId: def.id,
15171
+ line: def.line,
15172
+ sourceType: source.type,
15173
+ sourceLine: source.line,
15174
+ confidence: source.confidence
15175
+ });
15176
+ }
15177
+ }
15178
+ }
14942
15179
  }
14943
15180
  return tainted;
14944
15181
  }
@@ -15107,6 +15344,13 @@ function analyzeInterprocedural(graphOrTypes, callsOrSources, dfgOrSinks, source
15107
15344
  for (const def of graph.defsAtLine(source.line)) {
15108
15345
  seedIds.add(def.id);
15109
15346
  }
15347
+ if (source.variable) {
15348
+ for (const def of graph.defsAtLine(0)) {
15349
+ if (def.kind === "param" && def.variable === source.variable) {
15350
+ seedIds.add(def.id);
15351
+ }
15352
+ }
15353
+ }
15110
15354
  }
15111
15355
  const taintedDefIds = graph.propagateTaintedDefIds(seedIds);
15112
15356
  const taintedVarsFromCP = options.taintedVariables ?? /* @__PURE__ */ new Set();
@@ -15258,7 +15502,36 @@ function analyzeInterprocedural(graphOrTypes, callsOrSources, dfgOrSinks, source
15258
15502
  const targetMethod = getMethodNode(methodNodes, call.method_name);
15259
15503
  if (!targetMethod) {
15260
15504
  if (taintedArgPositions.length > 0 && !collectionMethods.has(call.method_name) && !sanitizerMethods.has(call.method_name) && !safeUtilityMethods.has(call.method_name)) {
15261
- const sink = {
15505
+ const isBash = graph.ir.meta.language === "bash";
15506
+ const bashSafeBuiltins = /* @__PURE__ */ new Set([
15507
+ "echo",
15508
+ "printf",
15509
+ "test",
15510
+ "[",
15511
+ "[[",
15512
+ "true",
15513
+ "false",
15514
+ ":",
15515
+ "declare",
15516
+ "local",
15517
+ "export",
15518
+ "readonly",
15519
+ "typeset"
15520
+ ]);
15521
+ if (isBash && bashSafeBuiltins.has(call.method_name)) {
15522
+ continue;
15523
+ }
15524
+ const sink = isBash ? {
15525
+ type: "command_injection",
15526
+ cwe: "CWE-78",
15527
+ location: `Tainted data (${taintedArgVars.join(", ")}) passed unquoted to shell utility ${call.method_name}`,
15528
+ line: call.location.line,
15529
+ confidence: 0.6,
15530
+ method: call.method_name,
15531
+ argPositions: taintedArgPositions
15532
+ } : {
15533
+ // Create an "external_taint_escape" sink for this call
15534
+ // This represents tainted data being passed to code we can't analyze
15262
15535
  type: "external_taint_escape",
15263
15536
  cwe: "CWE-668",
15264
15537
  // Exposure of Resource to Wrong Sphere
@@ -18154,6 +18427,9 @@ var DefaultLanguageRegistry = class {
18154
18427
  }
18155
18428
  }
18156
18429
  get(language) {
18430
+ if (language === "tsx") {
18431
+ return this.plugins.get("javascript");
18432
+ }
18157
18433
  return this.plugins.get(language);
18158
18434
  }
18159
18435
  getForFile(filePath) {
@@ -20590,6 +20866,23 @@ var BashPlugin = class extends BaseLanguagePlugin {
20590
20866
  cwe: "CWE-918",
20591
20867
  severity: "high",
20592
20868
  argPositions: [0]
20869
+ },
20870
+ // File inclusion — `source` and POSIX `.` execute arbitrary shell code
20871
+ // from the file at the supplied path. With user-controlled input this is
20872
+ // an RCE primitive equivalent to eval() on file contents (CWE-98).
20873
+ {
20874
+ method: "source",
20875
+ type: "path_traversal",
20876
+ cwe: "CWE-98",
20877
+ severity: "critical",
20878
+ argPositions: [0]
20879
+ },
20880
+ {
20881
+ method: ".",
20882
+ type: "path_traversal",
20883
+ cwe: "CWE-98",
20884
+ severity: "critical",
20885
+ argPositions: [0]
20593
20886
  }
20594
20887
  ];
20595
20888
  }
@@ -21739,6 +22032,7 @@ function mergeHtmlResults(htmlMeta, scriptResults, attributeFindings) {
21739
22032
  const allSources = [];
21740
22033
  const allSinks = [];
21741
22034
  const allSanitizers = [];
22035
+ const allFlows = [];
21742
22036
  const allImports = [];
21743
22037
  const allExports = [];
21744
22038
  const allFindings = [];
@@ -21824,6 +22118,14 @@ function mergeHtmlResults(htmlMeta, scriptResults, attributeFindings) {
21824
22118
  line: sanitizer.line + lineShift
21825
22119
  });
21826
22120
  }
22121
+ for (const flow of ir.taint.flows ?? []) {
22122
+ allFlows.push({
22123
+ ...flow,
22124
+ source_line: flow.source_line + lineShift,
22125
+ sink_line: flow.sink_line + lineShift,
22126
+ path: flow.path.map((step) => ({ ...step, line: step.line + lineShift }))
22127
+ });
22128
+ }
21827
22129
  for (const imp of ir.imports) {
21828
22130
  allImports.push({
21829
22131
  ...imp,
@@ -21843,7 +22145,8 @@ function mergeHtmlResults(htmlMeta, scriptResults, attributeFindings) {
21843
22145
  const taint = {
21844
22146
  sources: allSources,
21845
22147
  sinks: allSinks,
21846
- sanitizers: allSanitizers.length > 0 ? allSanitizers : void 0
22148
+ sanitizers: allSanitizers.length > 0 ? allSanitizers : void 0,
22149
+ flows: allFlows.length > 0 ? allFlows : void 0
21847
22150
  };
21848
22151
  const cfg = {
21849
22152
  blocks: allCfgBlocks,
@@ -22044,6 +22347,7 @@ var LanguageSourcesPass = class {
22044
22347
  const constProp = ctx.getResult("constant-propagation");
22045
22348
  const additionalSources = [];
22046
22349
  const additionalSinks = [];
22350
+ const additionalSanitizers = [];
22047
22351
  additionalSources.push(...findGetterSources(types, constProp.instanceFieldTaint, code));
22048
22352
  additionalSources.push(...findOopFieldReadSources(types, code, language));
22049
22353
  additionalSources.push(...findJavaScriptAssignmentSources(code, language));
@@ -22097,9 +22401,10 @@ var LanguageSourcesPass = class {
22097
22401
  for (const finding of bashFindings) {
22098
22402
  ctx.addFinding(finding);
22099
22403
  }
22404
+ additionalSanitizers.push(...findBashRegexAllowlistSanitizers(code));
22100
22405
  }
22101
22406
  attachSourceLineCode(additionalSources, additionalSinks, code);
22102
- return { additionalSources, additionalSinks, pyTaintedVars, pySanitizedVars, jsTaintedVars };
22407
+ return { additionalSources, additionalSinks, additionalSanitizers, pyTaintedVars, pySanitizedVars, jsTaintedVars };
22103
22408
  }
22104
22409
  };
22105
22410
  function findGetterSources(types, instanceFieldTaint, _sourceCode) {
@@ -22327,55 +22632,55 @@ function buildPythonTaintedVars(sourceCode) {
22327
22632
  for (let i2 = 0; i2 < lines.length; i2++) {
22328
22633
  const line = lines[i2];
22329
22634
  if (line.trimStart().startsWith("#")) continue;
22330
- const subscriptAssign = line.match(/^\s*(\w+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/);
22635
+ const subscriptAssign = line.match(/^\s*([\p{L}\p{N}_]+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/u);
22331
22636
  if (subscriptAssign) {
22332
22637
  const [, container, , key, rhs2] = subscriptAssign;
22333
- const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs2));
22638
+ const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(rhs2));
22334
22639
  if (isTaintedRhs) containerTainted.set(`${container}['${key}']`, i2 + 1);
22335
22640
  continue;
22336
22641
  }
22337
- const setCallMatch = line.match(/^\s*(\w+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/);
22642
+ const setCallMatch = line.match(/^\s*([\p{L}\p{N}_]+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/u);
22338
22643
  if (setCallMatch) {
22339
22644
  const [, obj, , section, , key, rhs2] = setCallMatch;
22340
- const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs2));
22645
+ const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(rhs2));
22341
22646
  if (isTaintedRhs) containerTainted.set(`${obj}['${section}']['${key}']`, i2 + 1);
22342
22647
  continue;
22343
22648
  }
22344
- const containerAppendMatch = line.match(/^\s*(\w+)\.(append|extend|insert|add|push|put|appendleft)\s*\(\s*(.+?)\s*\)\s*$/);
22649
+ const containerAppendMatch = line.match(/^\s*([\p{L}\p{N}_]+)\.(append|extend|insert|add|push|put|appendleft)\s*\(\s*(.+?)\s*\)\s*$/u);
22345
22650
  if (containerAppendMatch) {
22346
22651
  const [, receiver, , argExpr] = containerAppendMatch;
22347
- const argIsTainted = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(argExpr));
22652
+ const argIsTainted = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(argExpr));
22348
22653
  const argIsDirectSource = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(argExpr));
22349
22654
  if (argIsTainted || argIsDirectSource) tainted.set(receiver, tainted.get(receiver) ?? i2 + 1);
22350
22655
  continue;
22351
22656
  }
22352
- const augAssign = line.match(/^\s*(\w+)\s*\+=\s*(.+)$/);
22657
+ const augAssign = line.match(/^\s*([\p{L}\p{N}_]+)\s*\+=\s*(.+)$/u);
22353
22658
  if (augAssign) {
22354
22659
  const [, augLhs, augRhs] = augAssign;
22355
- const rhsTainted = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(augRhs));
22660
+ const rhsTainted = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(augRhs));
22356
22661
  if (rhsTainted || tainted.has(augLhs)) tainted.set(augLhs, tainted.get(augLhs) ?? i2 + 1);
22357
22662
  continue;
22358
22663
  }
22359
- const forLoopMatch = line.match(/^\s*for\s+(\w+)\s+in\s+(.+?)(?:\s*:\s*)?$/);
22664
+ const forLoopMatch = line.match(/^\s*for\s+([\p{L}\p{N}_]+)\s+in\s+(.+?)(?:\s*:\s*)?$/u);
22360
22665
  if (forLoopMatch) {
22361
22666
  const [, iterVar, iterExpr] = forLoopMatch;
22362
22667
  const isDirectSource2 = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(iterExpr));
22363
- const isPropagated = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(iterExpr));
22668
+ const isPropagated = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(iterExpr));
22364
22669
  if (isDirectSource2 || isPropagated) tainted.set(iterVar, i2 + 1);
22365
22670
  continue;
22366
22671
  }
22367
- const assignMatch = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
22672
+ const assignMatch = line.match(/^\s*([\p{L}\p{N}_]+)\s*=\s*(.+)$/u);
22368
22673
  if (!assignMatch) continue;
22369
22674
  const [, lhs, rhs] = assignMatch;
22370
22675
  const isDirectSource = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(rhs));
22371
22676
  let propagatedFrom;
22372
- const dictAccessMatch = rhs.trim().match(/^(\w+)\[(['"])([^'"]+)\2\]$/);
22677
+ const dictAccessMatch = rhs.trim().match(/^([\p{L}\p{N}_]+)\[(['"])([^'"]+)\2\]$/u);
22373
22678
  if (dictAccessMatch) {
22374
22679
  const [, container, , key] = dictAccessMatch;
22375
22680
  if (containerTainted.has(`${container}['${key}']`)) propagatedFrom = `${container}['${key}']`;
22376
22681
  }
22377
22682
  if (!propagatedFrom) {
22378
- const confGetMatch = rhs.trim().match(/^(\w+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/);
22683
+ const confGetMatch = rhs.trim().match(/^([\p{L}\p{N}_]+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/u);
22379
22684
  if (confGetMatch) {
22380
22685
  const [, obj, , section, , key] = confGetMatch;
22381
22686
  if (containerTainted.has(`${obj}['${section}']['${key}']`)) propagatedFrom = `${obj}['${section}']['${key}']`;
@@ -22383,7 +22688,7 @@ function buildPythonTaintedVars(sourceCode) {
22383
22688
  }
22384
22689
  if (!propagatedFrom) {
22385
22690
  const isSafeEnvRead = /\bos\.environ\.get\s*\(/.test(rhs) || /\bos\.getenv\s*\(/.test(rhs);
22386
- if (!isSafeEnvRead) propagatedFrom = [...tainted.keys()].find((v) => new RegExp(`\\b${v}\\b`).test(rhs));
22691
+ if (!isSafeEnvRead) propagatedFrom = [...tainted.keys()].find((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(rhs));
22387
22692
  }
22388
22693
  if (isDirectSource) {
22389
22694
  tainted.set(lhs, i2 + 1);
@@ -22744,6 +23049,52 @@ function findBashPatternFindings(sourceCode, file) {
22744
23049
  }
22745
23050
  return findings;
22746
23051
  }
23052
+ function findBashRegexAllowlistSanitizers(code) {
23053
+ const sanitizers = [];
23054
+ const lines = code.split("\n");
23055
+ const guardRe = /^\s*if\s+\[\[\s*!\s*"?\$\{?(\w+)\}?"?\s*=~\s*(\S+)\s*\]\]\s*;\s*then\s+(exit|return|die)\b/;
23056
+ for (let i2 = 0; i2 < lines.length; i2++) {
23057
+ const m = guardRe.exec(lines[i2]);
23058
+ if (!m) continue;
23059
+ const regexLiteral = m[2];
23060
+ if (!isSafeBashAllowlistRegex(regexLiteral)) continue;
23061
+ const ifLine1Indexed = i2 + 1;
23062
+ for (let l = ifLine1Indexed + 1; l <= lines.length; l++) {
23063
+ sanitizers.push({
23064
+ type: "regex_allowlist",
23065
+ method: "=~",
23066
+ line: l,
23067
+ sanitizes: [
23068
+ "command_injection",
23069
+ "path_traversal",
23070
+ "sql_injection",
23071
+ "code_injection",
23072
+ "ssrf",
23073
+ "xss",
23074
+ "open_redirect",
23075
+ "log_injection"
23076
+ ]
23077
+ });
23078
+ }
23079
+ }
23080
+ return sanitizers;
23081
+ }
23082
+ function isSafeBashAllowlistRegex(literal) {
23083
+ if (!literal.startsWith("^") || !literal.endsWith("$")) return false;
23084
+ const body2 = literal.slice(1, -1);
23085
+ if (body2.length === 0) return false;
23086
+ if (body2.includes(".*") || body2.includes(".+")) return false;
23087
+ if (body2.includes("|")) return false;
23088
+ if (/\\\d/.test(body2)) return false;
23089
+ const safeToken = /\[[^\]]+\][+*?]?|\\.|[A-Za-z0-9_\-./]|[+*?]/g;
23090
+ let consumed = 0;
23091
+ let match;
23092
+ while ((match = safeToken.exec(body2)) !== null) {
23093
+ if (match.index !== consumed) return false;
23094
+ consumed += match[0].length;
23095
+ }
23096
+ return consumed === body2.length;
23097
+ }
22747
23098
 
22748
23099
  // src/analysis/passes/sink-filter-pass.ts
22749
23100
  var JS_XSS_SANITIZERS = [
@@ -22783,7 +23134,10 @@ var SinkFilterPass = class {
22783
23134
  sinks.push(s);
22784
23135
  }
22785
23136
  }
22786
- const sanitizers = taintMatcher.sanitizers;
23137
+ const sanitizers = [
23138
+ ...taintMatcher.sanitizers,
23139
+ ...langSources.additionalSanitizers ?? []
23140
+ ];
22787
23141
  let filtered = sinks.filter((sink) => !constProp.unreachableLines.has(sink.line));
22788
23142
  filtered = filterCleanArraySinks(filtered, calls, constProp.taintedArrayElements, constProp.symbols);
22789
23143
  filtered = filterCleanVariableSinks(
@@ -23140,13 +23494,13 @@ var TaintPropagationPass = class {
23140
23494
  confidence: flow.confidence,
23141
23495
  sanitized: flow.sanitized
23142
23496
  }));
23143
- const arrayFlows = detectArrayElementFlows(calls, sources, sinks, constProp.taintedArrayElements, constProp.unreachableLines) ?? [];
23497
+ const arrayFlows = detectArrayElementFlows(calls, sources, sinks, constProp.taintedArrayElements, constProp.unreachableLines, types) ?? [];
23144
23498
  for (const f of arrayFlows) {
23145
23499
  if (!flows.some((x) => x.source_line === f.source_line && x.sink_line === f.sink_line)) {
23146
23500
  flows.push(f);
23147
23501
  }
23148
23502
  }
23149
- const collectionFlows = detectCollectionFlows(calls, sources, sinks, constProp.tainted, constProp.unreachableLines, ctx.code) ?? [];
23503
+ const collectionFlows = detectCollectionFlows(calls, sources, sinks, constProp.tainted, constProp.unreachableLines, ctx.code, types) ?? [];
23150
23504
  for (const f of collectionFlows) {
23151
23505
  if (flows.some((x) => x.source_line === f.source_line && x.sink_line === f.sink_line)) continue;
23152
23506
  const flowForCheck = {
@@ -23193,7 +23547,7 @@ var TaintPropagationPass = class {
23193
23547
  flows.push(f);
23194
23548
  }
23195
23549
  const sanitizedNames = constProp.sanitizedVars;
23196
- const finalFlows = sanitizedNames.size === 0 ? flows : flows.filter((f) => {
23550
+ let finalFlows = sanitizedNames.size === 0 ? flows : flows.filter((f) => {
23197
23551
  if (f.path.length === 0) return true;
23198
23552
  const sourceVar = f.path[0].variable;
23199
23553
  if (!sourceVar) return true;
@@ -23203,10 +23557,48 @@ var TaintPropagationPass = class {
23203
23557
  }
23204
23558
  return true;
23205
23559
  });
23560
+ if (ctx.language === "java" && typeof ctx.code === "string") {
23561
+ finalFlows = finalFlows.filter((f) => {
23562
+ if (f.sink_type !== "path_traversal" && f.sink_type !== "xxe") return true;
23563
+ if (!isInJavaSanitizedMethod(ctx.code, types, f.sink_line, f.sink_type)) return true;
23564
+ return false;
23565
+ });
23566
+ }
23206
23567
  return { flows: finalFlows };
23207
23568
  }
23208
23569
  };
23209
- function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLines, code) {
23570
+ function isInJavaSanitizedMethod(code, types, sinkLine, sinkType) {
23571
+ if (!types || types.length === 0) return false;
23572
+ let methodStart = -1;
23573
+ let methodEnd = -1;
23574
+ for (const t of types) {
23575
+ for (const m of t.methods) {
23576
+ if (sinkLine >= m.start_line && sinkLine <= m.end_line) {
23577
+ methodStart = m.start_line;
23578
+ methodEnd = m.end_line;
23579
+ break;
23580
+ }
23581
+ }
23582
+ if (methodStart > 0) break;
23583
+ }
23584
+ if (methodStart < 0) return false;
23585
+ const lines = code.split("\n");
23586
+ const body2 = lines.slice(methodStart - 1, methodEnd).join("\n");
23587
+ if (sinkType === "path_traversal") {
23588
+ if (!/\.getCanonicalPath\s*\(/.test(body2)) return false;
23589
+ if (!/\.startsWith\s*\([^)]*getCanonicalPath/.test(body2)) return false;
23590
+ if (!/\bthrow\s+new\b/.test(body2)) return false;
23591
+ return true;
23592
+ }
23593
+ if (sinkType === "xxe") {
23594
+ const setFeatureRe = /\.setFeature\s*\(\s*"(?:[^"]*disallow-doctype-decl|[^"]*external-general-entities|[^"]*external-parameter-entities|[^"]*load-external-dtd)"/;
23595
+ if (setFeatureRe.test(body2)) return true;
23596
+ if (/\.setProperty\s*\([^,]*SUPPORT_DTD[^,]*,\s*false\s*\)/.test(body2)) return true;
23597
+ return false;
23598
+ }
23599
+ return false;
23600
+ }
23601
+ function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLines, code, types) {
23210
23602
  const flows = [];
23211
23603
  const callsByLine = /* @__PURE__ */ new Map();
23212
23604
  for (const call of calls) {
@@ -23226,8 +23618,11 @@ function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLi
23226
23618
  const varName = arg.variable;
23227
23619
  const scopedName = call.in_method ? `${call.in_method}:${varName}` : varName;
23228
23620
  if (taintedVars.has(varName) || taintedVars.has(scopedName)) {
23229
- const source = sources[0];
23621
+ const source = pickScopedSource(sources, sink.line, call.in_method ?? null, types, varName);
23230
23622
  if (source) {
23623
+ if (source.variable && source.variable !== varName && source.in_method && call.in_method && source.in_method !== call.in_method) {
23624
+ continue;
23625
+ }
23231
23626
  if (typeof code === "string" && isReassignedToLiteralBetween(code, varName, source.line, sink.line)) {
23232
23627
  continue;
23233
23628
  }
@@ -23263,8 +23658,11 @@ function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLi
23263
23658
  const collectionVar = match[1];
23264
23659
  const scopedCollection = call.in_method ? `${call.in_method}:${collectionVar}` : collectionVar;
23265
23660
  if (taintedVars.has(collectionVar) || taintedVars.has(scopedCollection)) {
23266
- const source = sources[0];
23661
+ const source = pickScopedSource(sources, sink.line, call.in_method ?? null, types, collectionVar);
23267
23662
  if (source) {
23663
+ if (source.variable && source.variable !== collectionVar && source.in_method && call.in_method && source.in_method !== call.in_method) {
23664
+ continue;
23665
+ }
23268
23666
  if (typeof code === "string" && isReassignedToLiteralBetween(code, collectionVar, source.line, sink.line)) {
23269
23667
  continue;
23270
23668
  }
@@ -23290,7 +23688,7 @@ function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLi
23290
23688
  }
23291
23689
  return flows;
23292
23690
  }
23293
- function detectArrayElementFlows(calls, sources, sinks, taintedArrayElements, unreachableLines) {
23691
+ function detectArrayElementFlows(calls, sources, sinks, taintedArrayElements, unreachableLines, types) {
23294
23692
  const flows = [];
23295
23693
  const callsByLine = /* @__PURE__ */ new Map();
23296
23694
  for (const call of calls) {
@@ -23314,7 +23712,7 @@ function detectArrayElementFlows(calls, sources, sinks, taintedArrayElements, un
23314
23712
  if (taintedIndices) {
23315
23713
  const isTainted = taintedIndices.has(indexStr) || taintedIndices.has("*");
23316
23714
  if (isTainted) {
23317
- const source = sources[0];
23715
+ const source = pickScopedSource(sources, sink.line, call.in_method ?? null, types, arrayName);
23318
23716
  if (source) {
23319
23717
  flows.push({
23320
23718
  source_line: source.line,
@@ -23416,10 +23814,13 @@ function isReassignedToLiteralBetween(code, variable, srcLine, sinkLine) {
23416
23814
  const reGuarded = new RegExp(
23417
23815
  `^\\s*if\\b.*\\b${variable}\\s*=\\s*${strLit}\\s*;?\\s*$`
23418
23816
  );
23817
+ const reSwitchCase = new RegExp(
23818
+ `^\\s*(?:case\\b.*?|default\\s*):\\s*${variable}\\s*=\\s*${strLit}\\s*;?\\s*(?:break\\s*;?)?\\s*$`
23819
+ );
23419
23820
  for (let i2 = lo; i2 < hi; i2++) {
23420
23821
  const line = lines[i2];
23421
23822
  if (!line) continue;
23422
- if (reNaked.test(line) || reGuarded.test(line)) return true;
23823
+ if (reNaked.test(line) || reGuarded.test(line) || reSwitchCase.test(line)) return true;
23423
23824
  }
23424
23825
  return false;
23425
23826
  }
@@ -23513,7 +23914,7 @@ function detectExpressionScanFlows(calls, sources, sinks, sanitizers, unreachabl
23513
23914
  for (const s of sourcesWithVar) {
23514
23915
  if (reCache.has(s.variable)) continue;
23515
23916
  const escaped = s.variable.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
23516
- reCache.set(s.variable, new RegExp(`\\b${escaped}\\b`));
23917
+ reCache.set(s.variable, new RegExp(`(?<![\\p{L}\\p{N}_])${escaped}(?![\\p{L}\\p{N}_])`, "u"));
23517
23918
  }
23518
23919
  const callsByLine = /* @__PURE__ */ new Map();
23519
23920
  for (const call of calls) {
@@ -23533,6 +23934,9 @@ function detectExpressionScanFlows(calls, sources, sinks, sanitizers, unreachabl
23533
23934
  if (!expr) continue;
23534
23935
  for (const source of sourcesWithVar) {
23535
23936
  if (source.line >= sink.line) continue;
23937
+ if (source.in_method && call.in_method && source.in_method !== call.in_method) {
23938
+ continue;
23939
+ }
23536
23940
  const re = reCache.get(source.variable);
23537
23941
  if (!re || !re.test(expr)) continue;
23538
23942
  if (flows.some(
@@ -23596,6 +24000,44 @@ function detectExpressionScanFlows(calls, sources, sinks, sanitizers, unreachabl
23596
24000
  }
23597
24001
  return flows;
23598
24002
  }
24003
+ function pickScopedSource(sources, sinkLine, methodName, types, taintedVar) {
24004
+ if (sources.length === 0) return void 0;
24005
+ const closestPreceding = (cands) => {
24006
+ let best;
24007
+ for (const s of cands) {
24008
+ if (s.line >= sinkLine) continue;
24009
+ if (!best || s.line > best.line) best = s;
24010
+ }
24011
+ return best;
24012
+ };
24013
+ if (taintedVar) {
24014
+ const byVar = sources.filter((s) => s.variable === taintedVar);
24015
+ const pick = closestPreceding(byVar);
24016
+ if (pick) return pick;
24017
+ }
24018
+ if (methodName && types && types.length > 0) {
24019
+ let methodStart = -1;
24020
+ let methodEnd = -1;
24021
+ for (const t of types) {
24022
+ for (const m of t.methods) {
24023
+ if (m.name === methodName) {
24024
+ methodStart = m.start_line;
24025
+ methodEnd = m.end_line;
24026
+ break;
24027
+ }
24028
+ }
24029
+ if (methodStart > 0) break;
24030
+ }
24031
+ if (methodStart > 0 && methodEnd >= methodStart) {
24032
+ const inScope = sources.filter((s) => s.line >= methodStart && s.line <= methodEnd);
24033
+ const pick = closestPreceding(inScope);
24034
+ if (pick) return pick;
24035
+ }
24036
+ }
24037
+ const globalPick = closestPreceding(sources);
24038
+ if (globalPick) return globalPick;
24039
+ return sources[0];
24040
+ }
23599
24041
 
23600
24042
  // src/analysis/passes/interprocedural-pass.ts
23601
24043
  var InterproceduralPass = class {
@@ -24037,6 +24479,7 @@ function isPublicMethod(method, language) {
24037
24479
  return method.modifiers.includes("public");
24038
24480
  case "javascript":
24039
24481
  case "typescript":
24482
+ case "tsx":
24040
24483
  return !method.modifiers.includes("private") && !method.modifiers.includes("protected");
24041
24484
  case "python":
24042
24485
  return !method.name.startsWith("_");
@@ -24055,7 +24498,7 @@ var MissingPublicDocPass = class {
24055
24498
  if (UTIL_DIR_RE.test(graph.ir.meta.file)) {
24056
24499
  return { missingDocMethods: [], missingDocTypes: [] };
24057
24500
  }
24058
- if (!["java", "javascript", "typescript", "python"].includes(language)) {
24501
+ if (!["java", "javascript", "typescript", "tsx", "python"].includes(language)) {
24059
24502
  return { missingDocMethods: [], missingDocTypes: [] };
24060
24503
  }
24061
24504
  const lines = code.split("\n");
@@ -24558,6 +25001,7 @@ function hasDeclKeyword(lineText, language) {
24558
25001
  return /\b(?:int|long|float|double|boolean|byte|char|short|var|final)\b/.test(lineText) || /\b[A-Z]\w*(?:<[^>]*>)?\s+\w/.test(lineText);
24559
25002
  case "javascript":
24560
25003
  case "typescript":
25004
+ case "tsx":
24561
25005
  return /\b(?:let|const|var)\s+[\w{[]/.test(lineText);
24562
25006
  case "rust":
24563
25007
  return /\blet\s+(?:mut\s+)?\w/.test(lineText);
@@ -28496,6 +28940,7 @@ var WeakRandomPass = class {
28496
28940
  return "Use the `secrets` module (`secrets.token_bytes`, `secrets.token_hex`, `secrets.choice`, `secrets.randbelow`).";
28497
28941
  case "javascript":
28498
28942
  case "typescript":
28943
+ case "tsx":
28499
28944
  return "Use `crypto.randomBytes(n)` (Node.js) or `crypto.getRandomValues(typedArray)` (browser).";
28500
28945
  case "go":
28501
28946
  return "Use `crypto/rand` instead of `math/rand`. Example: `b := make([]byte, 32); _, _ = rand.Read(b)` (where `rand` is `crypto/rand`).";
@@ -28532,7 +28977,7 @@ var WeakRandomPass = class {
28532
28977
  }
28533
28978
  return null;
28534
28979
  }
28535
- if (language === "javascript" || language === "typescript") {
28980
+ if (language === "javascript" || language === "typescript" || language === "tsx") {
28536
28981
  if (method === "random" && (receiver === "Math" || receiver.endsWith(".Math"))) {
28537
28982
  return "Math.random";
28538
28983
  }
@@ -29940,6 +30385,7 @@ function getNodeTypesForLanguage(language) {
29940
30385
  ]);
29941
30386
  case "javascript":
29942
30387
  case "typescript":
30388
+ case "tsx":
29943
30389
  return /* @__PURE__ */ new Set([
29944
30390
  "call_expression",
29945
30391
  "new_expression",
@@ -29952,7 +30398,13 @@ function getNodeTypesForLanguage(language) {
29952
30398
  "import_statement",
29953
30399
  "export_statement",
29954
30400
  "member_expression",
29955
- "assignment_expression"
30401
+ "assignment_expression",
30402
+ // JSX node types — tree-sitter-tsx grammar (.tsx/.jsx routing)
30403
+ "jsx_element",
30404
+ "jsx_self_closing_element",
30405
+ "jsx_opening_element",
30406
+ "jsx_attribute",
30407
+ "jsx_expression"
29956
30408
  ]);
29957
30409
  case "bash":
29958
30410
  return /* @__PURE__ */ new Set([
@@ -30016,8 +30468,15 @@ async function analyze(code, filePath, language, options = {}) {
30016
30468
  if (language === "html") {
30017
30469
  return analyzeHtmlFile(code, filePath, options);
30018
30470
  }
30019
- logger.debug("Analyzing file", { filePath, language, codeLength: code.length });
30020
- const tree = await parse(code, language);
30471
+ let parseGrammar = language;
30472
+ if (language === "javascript" || language === "typescript") {
30473
+ const lower = filePath.toLowerCase();
30474
+ if (lower.endsWith(".tsx") || lower.endsWith(".jsx")) {
30475
+ parseGrammar = "tsx";
30476
+ }
30477
+ }
30478
+ logger.debug("Analyzing file", { filePath, language, parseGrammar, codeLength: code.length });
30479
+ const tree = await parse(code, parseGrammar);
30021
30480
  try {
30022
30481
  logger.trace("Parsed AST", { rootNodeType: tree.rootNode.type });
30023
30482
  const parseStatus = extractParseStatus(tree);