circle-ir 3.58.0 → 3.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/analysis/config-loader.d.ts.map +1 -1
  2. package/dist/analysis/config-loader.js +58 -17
  3. package/dist/analysis/config-loader.js.map +1 -1
  4. package/dist/analysis/html/html-merge.d.ts.map +1 -1
  5. package/dist/analysis/html/html-merge.js +10 -0
  6. package/dist/analysis/html/html-merge.js.map +1 -1
  7. package/dist/analysis/interprocedural.d.ts.map +1 -1
  8. package/dist/analysis/interprocedural.js +44 -11
  9. package/dist/analysis/interprocedural.js.map +1 -1
  10. package/dist/analysis/passes/language-sources-pass.d.ts +7 -1
  11. package/dist/analysis/passes/language-sources-pass.d.ts.map +1 -1
  12. package/dist/analysis/passes/language-sources-pass.js +283 -15
  13. package/dist/analysis/passes/language-sources-pass.js.map +1 -1
  14. package/dist/analysis/passes/missing-public-doc-pass.d.ts.map +1 -1
  15. package/dist/analysis/passes/missing-public-doc-pass.js +2 -1
  16. package/dist/analysis/passes/missing-public-doc-pass.js.map +1 -1
  17. package/dist/analysis/passes/sink-filter-pass.d.ts.map +1 -1
  18. package/dist/analysis/passes/sink-filter-pass.js +4 -1
  19. package/dist/analysis/passes/sink-filter-pass.js.map +1 -1
  20. package/dist/analysis/passes/taint-propagation-pass.js +2 -1
  21. package/dist/analysis/passes/taint-propagation-pass.js.map +1 -1
  22. package/dist/analysis/passes/weak-random-pass.d.ts.map +1 -1
  23. package/dist/analysis/passes/weak-random-pass.js +2 -1
  24. package/dist/analysis/passes/weak-random-pass.js.map +1 -1
  25. package/dist/analysis/taint-matcher.d.ts.map +1 -1
  26. package/dist/analysis/taint-matcher.js +29 -7
  27. package/dist/analysis/taint-matcher.js.map +1 -1
  28. package/dist/analysis/taint-propagation.d.ts.map +1 -1
  29. package/dist/analysis/taint-propagation.js +20 -0
  30. package/dist/analysis/taint-propagation.js.map +1 -1
  31. package/dist/analyzer.d.ts.map +1 -1
  32. package/dist/analyzer.js +19 -2
  33. package/dist/analyzer.js.map +1 -1
  34. package/dist/browser/circle-ir.js +512 -51
  35. package/dist/core/circle-ir-core.cjs +243 -26
  36. package/dist/core/circle-ir-core.js +243 -26
  37. package/dist/core/extractors/calls.js +181 -1
  38. package/dist/core/extractors/calls.js.map +1 -1
  39. package/dist/core/extractors/cfg.js +1 -1
  40. package/dist/core/extractors/cfg.js.map +1 -1
  41. package/dist/core/extractors/dfg.js +29 -3
  42. package/dist/core/extractors/dfg.js.map +1 -1
  43. package/dist/core/extractors/imports.js +1 -1
  44. package/dist/core/extractors/imports.js.map +1 -1
  45. package/dist/core/extractors/runtime-registrations.js +1 -1
  46. package/dist/core/extractors/runtime-registrations.js.map +1 -1
  47. package/dist/core/extractors/types.js +1 -1
  48. package/dist/core/extractors/types.js.map +1 -1
  49. package/dist/core/parser.d.ts +1 -1
  50. package/dist/core/parser.d.ts.map +1 -1
  51. package/dist/graph/scope-graph.d.ts.map +1 -1
  52. package/dist/graph/scope-graph.js +1 -0
  53. package/dist/graph/scope-graph.js.map +1 -1
  54. package/dist/languages/plugins/bash.d.ts.map +1 -1
  55. package/dist/languages/plugins/bash.js +17 -0
  56. package/dist/languages/plugins/bash.js.map +1 -1
  57. package/dist/languages/registry.d.ts.map +1 -1
  58. package/dist/languages/registry.js +6 -0
  59. package/dist/languages/registry.js.map +1 -1
  60. package/dist/languages/types.d.ts +1 -1
  61. package/dist/languages/types.d.ts.map +1 -1
  62. package/dist/types/index.d.ts +1 -1
  63. package/dist/types/index.d.ts.map +1 -1
  64. package/dist/wasm/tree-sitter-tsx.wasm +0 -0
  65. package/package.json +2 -1
@@ -4341,7 +4341,7 @@ function detectLanguage(tree) {
4341
4341
  }
4342
4342
  function extractTypes(tree, cache, language) {
4343
4343
  const effectiveLanguage = language ?? detectLanguage(tree);
4344
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
4344
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
4345
4345
  const isPython = effectiveLanguage === "python";
4346
4346
  const isRust = effectiveLanguage === "rust";
4347
4347
  if (effectiveLanguage === "go") {
@@ -5727,7 +5727,7 @@ function detectLanguageFromTree(tree, cache) {
5727
5727
  function extractCalls(tree, cache, language) {
5728
5728
  const calls = [];
5729
5729
  const detectedLanguage = language ?? detectLanguageFromTree(tree, cache);
5730
- const isJavaScript = detectedLanguage === "javascript" || detectedLanguage === "typescript";
5730
+ const isJavaScript = detectedLanguage === "javascript" || detectedLanguage === "typescript" || detectedLanguage === "tsx";
5731
5731
  const isPython = detectedLanguage === "python";
5732
5732
  const isRust = detectedLanguage === "rust";
5733
5733
  if (detectedLanguage === "go") {
@@ -5776,8 +5776,137 @@ function extractJavaScriptCalls(tree, cache) {
5776
5776
  calls.push(callInfo);
5777
5777
  }
5778
5778
  }
5779
+ const jsxAttributes = getNodesFromCache(tree.rootNode, "jsx_attribute", cache);
5780
+ for (const attr of jsxAttributes) {
5781
+ const callInfo = extractJSXAttributeSink(attr);
5782
+ if (callInfo) {
5783
+ calls.push(callInfo);
5784
+ }
5785
+ }
5786
+ const assignments = getNodesFromCache(tree.rootNode, "assignment_expression", cache);
5787
+ for (const assign of assignments) {
5788
+ const callInfo = extractDomPropertyAssignmentSink(assign);
5789
+ if (callInfo) {
5790
+ calls.push(callInfo);
5791
+ }
5792
+ }
5779
5793
  return calls;
5780
5794
  }
5795
+ var DOM_XSS_ASSIGNMENT_PROPERTIES = /* @__PURE__ */ new Set([
5796
+ "innerHTML",
5797
+ "outerHTML"
5798
+ ]);
5799
+ function extractDomPropertyAssignmentSink(node) {
5800
+ const leftNode = node.childForFieldName("left");
5801
+ const rightNode = node.childForFieldName("right");
5802
+ if (!leftNode || !rightNode) return null;
5803
+ if (leftNode.type !== "member_expression") return null;
5804
+ const propertyNode = leftNode.childForFieldName("property");
5805
+ const objectNode = leftNode.childForFieldName("object");
5806
+ if (!propertyNode) return null;
5807
+ const propertyName = getNodeText(propertyNode);
5808
+ if (!DOM_XSS_ASSIGNMENT_PROPERTIES.has(propertyName)) return null;
5809
+ const receiver = objectNode ? getNodeText(objectNode) : null;
5810
+ const expression = getNodeText(rightNode);
5811
+ const { variable, literal } = analyzeJSArgument(rightNode);
5812
+ const enclosingFunc = findJSEnclosingFunction(node);
5813
+ return {
5814
+ method_name: propertyName,
5815
+ receiver,
5816
+ arguments: [
5817
+ {
5818
+ position: 0,
5819
+ expression,
5820
+ variable,
5821
+ literal
5822
+ }
5823
+ ],
5824
+ location: {
5825
+ line: node.startPosition.row + 1,
5826
+ column: node.startPosition.column
5827
+ },
5828
+ in_method: enclosingFunc,
5829
+ resolved: true,
5830
+ resolution: {
5831
+ status: "resolved",
5832
+ target: `DOM.${propertyName}`
5833
+ }
5834
+ };
5835
+ }
5836
+ function extractJSXAttributeSink(attr) {
5837
+ let nameNode = null;
5838
+ for (let i2 = 0; i2 < attr.childCount; i2++) {
5839
+ const child = attr.child(i2);
5840
+ if (child && child.type === "property_identifier") {
5841
+ nameNode = child;
5842
+ break;
5843
+ }
5844
+ }
5845
+ if (!nameNode) return null;
5846
+ const attrName = getNodeText(nameNode);
5847
+ if (attrName !== "dangerouslySetInnerHTML") return null;
5848
+ let valueExpr = null;
5849
+ for (let i2 = 0; i2 < attr.childCount; i2++) {
5850
+ const child = attr.child(i2);
5851
+ if (child && child.type === "jsx_expression") {
5852
+ valueExpr = child;
5853
+ break;
5854
+ }
5855
+ }
5856
+ if (!valueExpr) return null;
5857
+ let htmlValue = null;
5858
+ for (let i2 = 0; i2 < valueExpr.childCount; i2++) {
5859
+ const inner = valueExpr.child(i2);
5860
+ if (!inner || inner.type !== "object") continue;
5861
+ for (let j = 0; j < inner.childCount; j++) {
5862
+ const pair = inner.child(j);
5863
+ if (!pair || pair.type !== "pair") continue;
5864
+ const keyNode = pair.childForFieldName("key");
5865
+ if (!keyNode) continue;
5866
+ const keyText = getNodeText(keyNode).replace(/^["']|["']$/g, "");
5867
+ if (keyText === "__html") {
5868
+ htmlValue = pair.childForFieldName("value");
5869
+ break;
5870
+ }
5871
+ }
5872
+ if (htmlValue) break;
5873
+ }
5874
+ if (!htmlValue) {
5875
+ for (let i2 = 0; i2 < valueExpr.childCount; i2++) {
5876
+ const inner = valueExpr.child(i2);
5877
+ if (inner && inner.type !== "{" && inner.type !== "}") {
5878
+ htmlValue = inner;
5879
+ break;
5880
+ }
5881
+ }
5882
+ }
5883
+ if (!htmlValue) return null;
5884
+ const expression = getNodeText(htmlValue);
5885
+ const { variable, literal } = analyzeJSArgument(htmlValue);
5886
+ const enclosingFunc = findJSEnclosingFunction(attr);
5887
+ return {
5888
+ method_name: "dangerouslySetInnerHTML",
5889
+ receiver: null,
5890
+ arguments: [
5891
+ {
5892
+ position: 0,
5893
+ expression,
5894
+ variable,
5895
+ literal
5896
+ }
5897
+ ],
5898
+ location: {
5899
+ line: attr.startPosition.row + 1,
5900
+ column: attr.startPosition.column
5901
+ },
5902
+ in_method: enclosingFunc,
5903
+ resolved: true,
5904
+ resolution: {
5905
+ status: "resolved",
5906
+ target: "react.dangerouslySetInnerHTML"
5907
+ }
5908
+ };
5909
+ }
5781
5910
  function buildJSResolutionContext(tree, cache) {
5782
5911
  const context = {
5783
5912
  functionNames: /* @__PURE__ */ new Set(),
@@ -7285,7 +7414,7 @@ function detectLanguage2(tree) {
7285
7414
  }
7286
7415
  function extractImports(tree, language) {
7287
7416
  const effectiveLanguage = language ?? detectLanguage2(tree);
7288
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
7417
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
7289
7418
  const isPython = effectiveLanguage === "python";
7290
7419
  const isRust = effectiveLanguage === "rust";
7291
7420
  if (effectiveLanguage === "go") {
@@ -7977,7 +8106,7 @@ function detectLanguage3(tree) {
7977
8106
  }
7978
8107
  function buildCFG(tree, language) {
7979
8108
  const effectiveLanguage = language ?? detectLanguage3(tree);
7980
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
8109
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
7981
8110
  const allBlocks = [];
7982
8111
  const allEdges = [];
7983
8112
  let blockIdCounter = 0;
@@ -8552,7 +8681,7 @@ function detectLanguage4(tree) {
8552
8681
  }
8553
8682
  function buildDFG(tree, cache, language) {
8554
8683
  const effectiveLanguage = language ?? detectLanguage4(tree);
8555
- const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript";
8684
+ const isJavaScript = effectiveLanguage === "javascript" || effectiveLanguage === "typescript" || effectiveLanguage === "tsx";
8556
8685
  if (isJavaScript) {
8557
8686
  return buildJavaScriptDFG(tree, cache);
8558
8687
  }
@@ -9360,7 +9489,18 @@ function buildBashDFG(tree) {
9360
9489
  if (varNameNode) {
9361
9490
  const varName = getNodeText(varNameNode);
9362
9491
  if (varName && !varName.startsWith("?") && !varName.startsWith("#")) {
9363
- const reachingDef = findReachingDef(varName, scopeStack);
9492
+ let reachingDef = findReachingDef(varName, scopeStack);
9493
+ if (reachingDef === null && !positionalParams.includes(varName)) {
9494
+ const def = {
9495
+ id: defIdCounter++,
9496
+ variable: varName,
9497
+ line: 0,
9498
+ kind: "param"
9499
+ };
9500
+ defs.push(def);
9501
+ scopeStack[0].set(varName, def.id);
9502
+ reachingDef = def.id;
9503
+ }
9364
9504
  uses.push({
9365
9505
  id: useIdCounter++,
9366
9506
  variable: varName,
@@ -9373,7 +9513,18 @@ function buildBashDFG(tree) {
9373
9513
  const varNameNode = node.namedChildCount > 0 ? node.namedChild(0) : null;
9374
9514
  if (varNameNode && varNameNode.type === "variable_name") {
9375
9515
  const varName = getNodeText(varNameNode);
9376
- const reachingDef = findReachingDef(varName, scopeStack);
9516
+ let reachingDef = findReachingDef(varName, scopeStack);
9517
+ if (reachingDef === null && !positionalParams.includes(varName)) {
9518
+ const def = {
9519
+ id: defIdCounter++,
9520
+ variable: varName,
9521
+ line: 0,
9522
+ kind: "param"
9523
+ };
9524
+ defs.push(def);
9525
+ scopeStack[0].set(varName, def.id);
9526
+ reachingDef = def.id;
9527
+ }
9377
9528
  uses.push({
9378
9529
  id: useIdCounter++,
9379
9530
  variable: varName,
@@ -9835,7 +9986,7 @@ var FRAMEWORK_MODULE_PATTERNS = [
9835
9986
  /^@nestjs\/core$/
9836
9987
  ];
9837
9988
  function extractRuntimeRegistrations(tree, cache, language, imports) {
9838
- if (language === "javascript" || language === "typescript") {
9989
+ if (language === "javascript" || language === "typescript" || language === "tsx") {
9839
9990
  return extractJSRuntimeRegistrations(tree, cache, imports);
9840
9991
  }
9841
9992
  if (language === "python") {
@@ -10786,8 +10937,12 @@ var DEFAULT_SOURCES = [
10786
10937
  { method: "get", class: "cookies", type: "http_cookie", severity: "high", return_tainted: true },
10787
10938
  { property: "json", object: "request", type: "http_body", severity: "high", property_tainted: true },
10788
10939
  { property: "data", object: "request", type: "http_body", severity: "high", property_tainted: true },
10940
+ { property: "stream", object: "request", type: "http_body", severity: "high", property_tainted: true },
10789
10941
  { property: "path", object: "request", type: "http_path", severity: "medium", property_tainted: true },
10790
10942
  { property: "query_string", object: "request", type: "http_query", severity: "high", property_tainted: true },
10943
+ // Flask request.get_data() — raw request bytes (method form, parallel to request.data property)
10944
+ { method: "get_data", class: "request", type: "http_body", severity: "high", return_tainted: true },
10945
+ { method: "get_json", class: "request", type: "http_body", severity: "high", return_tainted: true },
10791
10946
  // Django request object
10792
10947
  { method: "get", class: "GET", type: "http_param", severity: "high", return_tainted: true },
10793
10948
  { method: "get", class: "POST", type: "http_param", severity: "high", return_tainted: true },
@@ -10999,14 +11154,19 @@ var DEFAULT_SINKS = [
10999
11154
  { method: "setExecutable", class: "ExecTask", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11000
11155
  { method: "setCommand", class: "ExecTask", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11001
11156
  { method: "execute", class: "Java", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11002
- // Shell/Bash utilities
11003
- { method: "bash", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11004
- { method: "shell", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11005
- { method: "sh", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11006
- { method: "spawn", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11007
- { method: "fork", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11008
- { method: "popen", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11009
- { method: "system", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11157
+ // Shell/Bash utilities — these are method-call sinks in host languages
11158
+ // (Java Runtime/ProcessBuilder, JS child_process spawn/exec, Python subprocess, etc.).
11159
+ // When the analyzed file IS a bash/shell script, the bash plugin's per-flag entries
11160
+ // (argPositions: [1] for `bash -c <cmd>`) MUST win. Restrict these generic entries
11161
+ // to non-shell languages so they don't collide on the dedup key
11162
+ // `${location}:${call.location.line}:${pattern.cwe}`.
11163
+ { method: "bash", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11164
+ { method: "shell", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11165
+ { method: "sh", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11166
+ { method: "spawn", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11167
+ { method: "fork", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11168
+ { method: "popen", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11169
+ { method: "system", languages: ["java", "javascript", "typescript", "python", "go", "rust"], type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
11010
11170
  // Apache Commons Exec
11011
11171
  // Note: bare class 'Executor' removed (see comment above) — DefaultExecutor matched explicitly.
11012
11172
  { method: "setCommandline", class: "DefaultExecutor", type: "command_injection", cwe: "CWE-78", severity: "critical", arg_positions: [0] },
@@ -11096,6 +11256,12 @@ var DEFAULT_SINKS = [
11096
11256
  { method: "unzip", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11097
11257
  { method: "extract", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11098
11258
  { method: "extractAll", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11259
+ // Python zipfile/tarfile use lowercase extractall (PEP 8 naming)
11260
+ { method: "extractall", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0], languages: ["python"] },
11261
+ // Python zipfile.ZipFile(path) — tainted archive path enables Zip-Slip via malicious archive
11262
+ { method: "ZipFile", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["python"] },
11263
+ // Flask send_from_directory: untrusted filename can escape directory via ../
11264
+ { method: "send_from_directory", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [1], languages: ["python"] },
11099
11265
  { method: "unjar", type: "path_traversal", cwe: "CWE-22", severity: "critical", arg_positions: [0, 1] },
11100
11266
  // Additional file constructors — BufferedReader(Reader) is NOT a path traversal sink; it wraps a Reader, not a file path
11101
11267
  { method: "PrintWriter", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
@@ -12180,16 +12346,42 @@ var DEFAULT_SINKS = [
12180
12346
  // value position so a tainted variable is detected.
12181
12347
  { method: "Set", class: "Header", type: "crlf", cwe: "CWE-113", severity: "medium", arg_positions: [1], languages: ["go"] },
12182
12348
  { method: "Add", class: "Header", type: "crlf", cwe: "CWE-113", severity: "medium", arg_positions: [1], languages: ["go"] },
12183
- // Mass-assignment (CWE-915) — Sprint 6, #86.
12184
- // JS Object.assign(target, ...sources) — sources are arg 1..N, and if any
12185
- // source is request-tainted, every key gets written onto the target. We
12186
- // flag the source positions; the analyzer only needs one tainted to fire.
12187
- { method: "assign", class: "Object", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12188
- // Lodash bulk-merge helpers behave identically.
12189
- { method: "merge", class: "_", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12190
- { method: "extend", class: "_", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12349
+ // Mass-assignment (CWE-915 / CWE-1321) — Sprint 6, #86; cognium-dev #68 Sprint 10.
12350
+ // JS Object.assign(target, ...sources), `_.merge`, `_.extend`, `$.extend`,
12351
+ // `Object.defineProperty` when fed an attacker-controlled bag, they write
12352
+ // arbitrary keys onto the target (or, for `__proto__`/`constructor.prototype`,
12353
+ // pollute the prototype chain). The CWE is CWE-1321 (Prototype Pollution),
12354
+ // which subsumes mass assignment for JS sinks operating on plain Objects.
12355
+ // We keep the existing `mass_assignment` SinkType so consumers route the
12356
+ // findings the same way; only the CWE shifts to flag prototype-pollution.
12357
+ { method: "assign", class: "Object", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12358
+ { method: "defineProperty", class: "Object", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2], languages: ["javascript", "typescript"] },
12359
+ { method: "defineProperties", class: "Object", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1], languages: ["javascript", "typescript"] },
12360
+ // Lodash bulk-merge helpers behave identically. `_.merge` and `lodash.merge`
12361
+ // are aliases — match both receivers.
12362
+ { method: "merge", class: "_", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12363
+ { method: "merge", class: "lodash", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12364
+ { method: "extend", class: "_", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12365
+ { method: "extend", class: "lodash", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12366
+ { method: "defaultsDeep", class: "_", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12367
+ { method: "defaultsDeep", class: "lodash", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12191
12368
  // jQuery $.extend(target, source) (legacy).
12192
- { method: "extend", class: "$", type: "mass_assignment", cwe: "CWE-915", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] }
12369
+ { method: "extend", class: "$", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12370
+ { method: "extend", class: "jQuery", type: "mass_assignment", cwe: "CWE-1321", severity: "high", arg_positions: [1, 2, 3], languages: ["javascript", "typescript"] },
12371
+ // DOM-XSS via property assignment (CWE-79) — cognium-dev #68 Sprint 10.
12372
+ // `el.innerHTML = tainted` / `el.outerHTML = tainted`. The JS call extractor
12373
+ // emits a synthetic CallInfo with method=`innerHTML`/`outerHTML` for each
12374
+ // matching assignment_expression. These classless entries catch them.
12375
+ { method: "innerHTML", type: "xss", cwe: "CWE-79", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12376
+ { method: "outerHTML", type: "xss", cwe: "CWE-79", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12377
+ // node-serialize.unserialize (CWE-502) — cognium-dev #68 Sprint 10.
12378
+ // The node-serialize package evaluates `_$$ND_FUNC$$_` IIFE payloads on
12379
+ // decode, turning untrusted input into RCE. Match both receiver-bound
12380
+ // calls (`serialize.unserialize(x)`) and destructured imports
12381
+ // (`const { unserialize } = require('node-serialize')`).
12382
+ { method: "unserialize", class: "serialize", type: "deserialization", cwe: "CWE-502", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12383
+ { method: "unserialize", class: "node-serialize", type: "deserialization", cwe: "CWE-502", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] },
12384
+ { method: "unserialize", type: "deserialization", cwe: "CWE-502", severity: "critical", arg_positions: [0], languages: ["javascript", "typescript"] }
12193
12385
  ];
12194
12386
  var DEFAULT_SANITIZERS = [
12195
12387
  // SQL Injection - proper parameter binding sanitizes input
@@ -12883,7 +13075,12 @@ function matchesSourcePattern(call, pattern) {
12883
13075
  if (call.receiver_type && call.receiver_type === pattern.class) {
12884
13076
  } else if (call.receiver_type_fqn && call.receiver_type_fqn.endsWith("." + pattern.class)) {
12885
13077
  } else if (!call.receiver) {
12886
- return false;
13078
+ const target = call.resolution?.target;
13079
+ const expectedTail = `${pattern.class}.${pattern.method}`;
13080
+ if (target && (target === expectedTail || target.endsWith("." + expectedTail))) {
13081
+ } else {
13082
+ return false;
13083
+ }
12887
13084
  } else if (!receiverMightBeClass(call.receiver, pattern.class)) {
12888
13085
  return false;
12889
13086
  }
@@ -13150,7 +13347,12 @@ function matchesSinkPattern(call, pattern, typeHierarchy, language) {
13150
13347
  }
13151
13348
  return false;
13152
13349
  } else if (!call.receiver && !call.receiver_type) {
13153
- return false;
13350
+ const target = call.resolution?.target;
13351
+ const expectedTail = `${pattern.class}.${pattern.method}`;
13352
+ if (target && (target === expectedTail || target.endsWith("." + expectedTail))) {
13353
+ } else {
13354
+ return false;
13355
+ }
13154
13356
  }
13155
13357
  }
13156
13358
  if (!pattern.class && call.receiver) {
@@ -14939,6 +15141,21 @@ function findInitialTaint(sources, callsByLine, defsByLine) {
14939
15141
  });
14940
15142
  }
14941
15143
  }
15144
+ if (source.variable) {
15145
+ const paramDefs = defsByLine.get(0) ?? [];
15146
+ for (const def of paramDefs) {
15147
+ if (def.kind === "param" && def.variable === source.variable) {
15148
+ tainted.push({
15149
+ variable: def.variable,
15150
+ defId: def.id,
15151
+ line: def.line,
15152
+ sourceType: source.type,
15153
+ sourceLine: source.line,
15154
+ confidence: source.confidence
15155
+ });
15156
+ }
15157
+ }
15158
+ }
14942
15159
  }
14943
15160
  return tainted;
14944
15161
  }
@@ -15107,6 +15324,13 @@ function analyzeInterprocedural(graphOrTypes, callsOrSources, dfgOrSinks, source
15107
15324
  for (const def of graph.defsAtLine(source.line)) {
15108
15325
  seedIds.add(def.id);
15109
15326
  }
15327
+ if (source.variable) {
15328
+ for (const def of graph.defsAtLine(0)) {
15329
+ if (def.kind === "param" && def.variable === source.variable) {
15330
+ seedIds.add(def.id);
15331
+ }
15332
+ }
15333
+ }
15110
15334
  }
15111
15335
  const taintedDefIds = graph.propagateTaintedDefIds(seedIds);
15112
15336
  const taintedVarsFromCP = options.taintedVariables ?? /* @__PURE__ */ new Set();
@@ -15258,7 +15482,36 @@ function analyzeInterprocedural(graphOrTypes, callsOrSources, dfgOrSinks, source
15258
15482
  const targetMethod = getMethodNode(methodNodes, call.method_name);
15259
15483
  if (!targetMethod) {
15260
15484
  if (taintedArgPositions.length > 0 && !collectionMethods.has(call.method_name) && !sanitizerMethods.has(call.method_name) && !safeUtilityMethods.has(call.method_name)) {
15261
- const sink = {
15485
+ const isBash = graph.ir.meta.language === "bash";
15486
+ const bashSafeBuiltins = /* @__PURE__ */ new Set([
15487
+ "echo",
15488
+ "printf",
15489
+ "test",
15490
+ "[",
15491
+ "[[",
15492
+ "true",
15493
+ "false",
15494
+ ":",
15495
+ "declare",
15496
+ "local",
15497
+ "export",
15498
+ "readonly",
15499
+ "typeset"
15500
+ ]);
15501
+ if (isBash && bashSafeBuiltins.has(call.method_name)) {
15502
+ continue;
15503
+ }
15504
+ const sink = isBash ? {
15505
+ type: "command_injection",
15506
+ cwe: "CWE-78",
15507
+ location: `Tainted data (${taintedArgVars.join(", ")}) passed unquoted to shell utility ${call.method_name}`,
15508
+ line: call.location.line,
15509
+ confidence: 0.6,
15510
+ method: call.method_name,
15511
+ argPositions: taintedArgPositions
15512
+ } : {
15513
+ // Create an "external_taint_escape" sink for this call
15514
+ // This represents tainted data being passed to code we can't analyze
15262
15515
  type: "external_taint_escape",
15263
15516
  cwe: "CWE-668",
15264
15517
  // Exposure of Resource to Wrong Sphere
@@ -18154,6 +18407,9 @@ var DefaultLanguageRegistry = class {
18154
18407
  }
18155
18408
  }
18156
18409
  get(language) {
18410
+ if (language === "tsx") {
18411
+ return this.plugins.get("javascript");
18412
+ }
18157
18413
  return this.plugins.get(language);
18158
18414
  }
18159
18415
  getForFile(filePath) {
@@ -20590,6 +20846,23 @@ var BashPlugin = class extends BaseLanguagePlugin {
20590
20846
  cwe: "CWE-918",
20591
20847
  severity: "high",
20592
20848
  argPositions: [0]
20849
+ },
20850
+ // File inclusion — `source` and POSIX `.` execute arbitrary shell code
20851
+ // from the file at the supplied path. With user-controlled input this is
20852
+ // an RCE primitive equivalent to eval() on file contents (CWE-98).
20853
+ {
20854
+ method: "source",
20855
+ type: "path_traversal",
20856
+ cwe: "CWE-98",
20857
+ severity: "critical",
20858
+ argPositions: [0]
20859
+ },
20860
+ {
20861
+ method: ".",
20862
+ type: "path_traversal",
20863
+ cwe: "CWE-98",
20864
+ severity: "critical",
20865
+ argPositions: [0]
20593
20866
  }
20594
20867
  ];
20595
20868
  }
@@ -21739,6 +22012,7 @@ function mergeHtmlResults(htmlMeta, scriptResults, attributeFindings) {
21739
22012
  const allSources = [];
21740
22013
  const allSinks = [];
21741
22014
  const allSanitizers = [];
22015
+ const allFlows = [];
21742
22016
  const allImports = [];
21743
22017
  const allExports = [];
21744
22018
  const allFindings = [];
@@ -21824,6 +22098,14 @@ function mergeHtmlResults(htmlMeta, scriptResults, attributeFindings) {
21824
22098
  line: sanitizer.line + lineShift
21825
22099
  });
21826
22100
  }
22101
+ for (const flow of ir.taint.flows ?? []) {
22102
+ allFlows.push({
22103
+ ...flow,
22104
+ source_line: flow.source_line + lineShift,
22105
+ sink_line: flow.sink_line + lineShift,
22106
+ path: flow.path.map((step) => ({ ...step, line: step.line + lineShift }))
22107
+ });
22108
+ }
21827
22109
  for (const imp of ir.imports) {
21828
22110
  allImports.push({
21829
22111
  ...imp,
@@ -21843,7 +22125,8 @@ function mergeHtmlResults(htmlMeta, scriptResults, attributeFindings) {
21843
22125
  const taint = {
21844
22126
  sources: allSources,
21845
22127
  sinks: allSinks,
21846
- sanitizers: allSanitizers.length > 0 ? allSanitizers : void 0
22128
+ sanitizers: allSanitizers.length > 0 ? allSanitizers : void 0,
22129
+ flows: allFlows.length > 0 ? allFlows : void 0
21847
22130
  };
21848
22131
  const cfg = {
21849
22132
  blocks: allCfgBlocks,
@@ -22044,7 +22327,9 @@ var LanguageSourcesPass = class {
22044
22327
  const constProp = ctx.getResult("constant-propagation");
22045
22328
  const additionalSources = [];
22046
22329
  const additionalSinks = [];
22330
+ const additionalSanitizers = [];
22047
22331
  additionalSources.push(...findGetterSources(types, constProp.instanceFieldTaint, code));
22332
+ additionalSources.push(...findOopFieldReadSources(types, code, language));
22048
22333
  additionalSources.push(...findJavaScriptAssignmentSources(code, language));
22049
22334
  const jsDOMSinks = findJavaScriptDOMSinks(code, language);
22050
22335
  for (const s of jsDOMSinks) {
@@ -22096,9 +22381,10 @@ var LanguageSourcesPass = class {
22096
22381
  for (const finding of bashFindings) {
22097
22382
  ctx.addFinding(finding);
22098
22383
  }
22384
+ additionalSanitizers.push(...findBashRegexAllowlistSanitizers(code));
22099
22385
  }
22100
22386
  attachSourceLineCode(additionalSources, additionalSinks, code);
22101
- return { additionalSources, additionalSinks, pyTaintedVars, pySanitizedVars, jsTaintedVars };
22387
+ return { additionalSources, additionalSinks, additionalSanitizers, pyTaintedVars, pySanitizedVars, jsTaintedVars };
22102
22388
  }
22103
22389
  };
22104
22390
  function findGetterSources(types, instanceFieldTaint, _sourceCode) {
@@ -22149,6 +22435,115 @@ function findGetterSources(types, instanceFieldTaint, _sourceCode) {
22149
22435
  }
22150
22436
  return sources;
22151
22437
  }
22438
+ function findOopFieldReadSources(types, sourceCode, language) {
22439
+ if (language !== "java" && language !== "python") return [];
22440
+ const sources = [];
22441
+ const lines = sourceCode.split("\n");
22442
+ const isPython = language === "python";
22443
+ const SELF = isPython ? "self" : "this";
22444
+ const javaHttpPattern = /\b(?:req|request|httpRequest|servletRequest|httpServletRequest)\.(?:getParameter|getParameterValues|getParameterMap|getHeader|getHeaders|getCookies|getQueryString|getPathInfo|getRequestURI|getRequestURL|getInputStream|getReader)\b/;
22445
+ const fieldAssignRe = new RegExp(`^\\s*${SELF}\\.([A-Za-z_]\\w*)\\s*=\\s*(.+?)(?:;\\s*)?$`);
22446
+ const commentPrefix = isPython ? "#" : "//";
22447
+ for (const type of types) {
22448
+ if (type.kind !== "class") continue;
22449
+ if (type.name === "<module>") continue;
22450
+ let ctor;
22451
+ for (const m of type.methods) {
22452
+ if (isPython) {
22453
+ if (m.name === "__init__") {
22454
+ ctor = m;
22455
+ break;
22456
+ }
22457
+ } else {
22458
+ if (m.name === type.name) {
22459
+ ctor = m;
22460
+ break;
22461
+ }
22462
+ }
22463
+ }
22464
+ if (!ctor) continue;
22465
+ const paramNames = /* @__PURE__ */ new Set();
22466
+ for (const p of ctor.parameters) {
22467
+ if (p.name === "self" || p.name === "this") continue;
22468
+ paramNames.add(p.name);
22469
+ }
22470
+ const fieldTaint = /* @__PURE__ */ new Map();
22471
+ const ctorStart = ctor.start_line;
22472
+ const ctorEnd = ctor.end_line;
22473
+ for (let i2 = ctorStart - 1; i2 < Math.min(ctorEnd, lines.length); i2++) {
22474
+ const line = lines[i2] ?? "";
22475
+ if (line.trim().startsWith(commentPrefix)) continue;
22476
+ const m = line.match(fieldAssignRe);
22477
+ if (!m) continue;
22478
+ const fieldName = m[1];
22479
+ const rhs = m[2].trim().replace(/;\s*$/, "");
22480
+ let sourceType = null;
22481
+ if (paramNames.has(rhs)) {
22482
+ sourceType = "interprocedural_param";
22483
+ } else if (!isPython && javaHttpPattern.test(rhs)) {
22484
+ sourceType = "http_param";
22485
+ } else if (isPython) {
22486
+ for (const { pattern, type: type2 } of PYTHON_TAINTED_PATTERNS2) {
22487
+ if (pattern.test(rhs)) {
22488
+ sourceType = type2;
22489
+ break;
22490
+ }
22491
+ }
22492
+ }
22493
+ if (sourceType) {
22494
+ fieldTaint.set(fieldName, { line: i2 + 1, type: sourceType });
22495
+ }
22496
+ }
22497
+ if (fieldTaint.size === 0) continue;
22498
+ for (const [fieldName, info2] of fieldTaint) {
22499
+ sources.push({
22500
+ type: info2.type,
22501
+ location: `${type.name}.${SELF}.${fieldName} (constructor-injected field, #78)`,
22502
+ severity: "high",
22503
+ line: info2.line,
22504
+ confidence: 0.85,
22505
+ variable: `${SELF}.${fieldName}`
22506
+ });
22507
+ }
22508
+ for (const m of type.methods) {
22509
+ if (m === ctor) continue;
22510
+ const nonSelfParams = m.parameters.filter((p) => p.name !== "self" && p.name !== "this");
22511
+ if (nonSelfParams.length !== 0) continue;
22512
+ const mStart = m.start_line;
22513
+ const mEnd = m.end_line;
22514
+ let returnedField = null;
22515
+ let returnStatementCount = 0;
22516
+ const returnRe = new RegExp(`\\breturn\\s+${SELF}\\.([A-Za-z_]\\w*)\\s*[;}]?`);
22517
+ for (let i2 = mStart - 1; i2 < Math.min(mEnd, lines.length); i2++) {
22518
+ const raw = lines[i2] ?? "";
22519
+ const trimmed = raw.trim();
22520
+ if (!trimmed) continue;
22521
+ if (trimmed.startsWith(commentPrefix)) continue;
22522
+ const rm = trimmed.match(returnRe);
22523
+ if (rm) {
22524
+ returnedField = rm[1];
22525
+ returnStatementCount++;
22526
+ } else if (/\breturn\b/.test(trimmed)) {
22527
+ returnStatementCount = 99;
22528
+ break;
22529
+ }
22530
+ }
22531
+ if (returnStatementCount === 1 && returnedField && fieldTaint.has(returnedField)) {
22532
+ const fieldInfo = fieldTaint.get(returnedField);
22533
+ const getterVar = isPython ? `${SELF}.${m.name}` : m.name;
22534
+ sources.push({
22535
+ type: fieldInfo.type,
22536
+ location: `${type.name}.${m.name} returns tainted field '${returnedField}' (#78)`,
22537
+ severity: "high",
22538
+ line: m.start_line,
22539
+ confidence: 0.85,
22540
+ variable: getterVar
22541
+ });
22542
+ }
22543
+ }
22544
+ }
22545
+ return sources;
22546
+ }
22152
22547
  function findJavaScriptAssignmentSources(sourceCode, language) {
22153
22548
  if (!["javascript", "typescript"].includes(language)) return [];
22154
22549
  const sources = [];
@@ -22217,55 +22612,55 @@ function buildPythonTaintedVars(sourceCode) {
22217
22612
  for (let i2 = 0; i2 < lines.length; i2++) {
22218
22613
  const line = lines[i2];
22219
22614
  if (line.trimStart().startsWith("#")) continue;
22220
- const subscriptAssign = line.match(/^\s*(\w+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/);
22615
+ const subscriptAssign = line.match(/^\s*([\p{L}\p{N}_]+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/u);
22221
22616
  if (subscriptAssign) {
22222
22617
  const [, container, , key, rhs2] = subscriptAssign;
22223
- const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs2));
22618
+ const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(rhs2));
22224
22619
  if (isTaintedRhs) containerTainted.set(`${container}['${key}']`, i2 + 1);
22225
22620
  continue;
22226
22621
  }
22227
- const setCallMatch = line.match(/^\s*(\w+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/);
22622
+ const setCallMatch = line.match(/^\s*([\p{L}\p{N}_]+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/u);
22228
22623
  if (setCallMatch) {
22229
22624
  const [, obj, , section, , key, rhs2] = setCallMatch;
22230
- const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(rhs2));
22625
+ const isTaintedRhs = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(rhs2));
22231
22626
  if (isTaintedRhs) containerTainted.set(`${obj}['${section}']['${key}']`, i2 + 1);
22232
22627
  continue;
22233
22628
  }
22234
- const containerAppendMatch = line.match(/^\s*(\w+)\.(append|extend|insert|add|push|put|appendleft)\s*\(\s*(.+?)\s*\)\s*$/);
22629
+ const containerAppendMatch = line.match(/^\s*([\p{L}\p{N}_]+)\.(append|extend|insert|add|push|put|appendleft)\s*\(\s*(.+?)\s*\)\s*$/u);
22235
22630
  if (containerAppendMatch) {
22236
22631
  const [, receiver, , argExpr] = containerAppendMatch;
22237
- const argIsTainted = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(argExpr));
22632
+ const argIsTainted = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(argExpr));
22238
22633
  const argIsDirectSource = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(argExpr));
22239
22634
  if (argIsTainted || argIsDirectSource) tainted.set(receiver, tainted.get(receiver) ?? i2 + 1);
22240
22635
  continue;
22241
22636
  }
22242
- const augAssign = line.match(/^\s*(\w+)\s*\+=\s*(.+)$/);
22637
+ const augAssign = line.match(/^\s*([\p{L}\p{N}_]+)\s*\+=\s*(.+)$/u);
22243
22638
  if (augAssign) {
22244
22639
  const [, augLhs, augRhs] = augAssign;
22245
- const rhsTainted = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(augRhs));
22640
+ const rhsTainted = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(augRhs));
22246
22641
  if (rhsTainted || tainted.has(augLhs)) tainted.set(augLhs, tainted.get(augLhs) ?? i2 + 1);
22247
22642
  continue;
22248
22643
  }
22249
- const forLoopMatch = line.match(/^\s*for\s+(\w+)\s+in\s+(.+?)(?:\s*:\s*)?$/);
22644
+ const forLoopMatch = line.match(/^\s*for\s+([\p{L}\p{N}_]+)\s+in\s+(.+?)(?:\s*:\s*)?$/u);
22250
22645
  if (forLoopMatch) {
22251
22646
  const [, iterVar, iterExpr] = forLoopMatch;
22252
22647
  const isDirectSource2 = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(iterExpr));
22253
- const isPropagated = [...tainted.keys()].some((v) => new RegExp(`\\b${v}\\b`).test(iterExpr));
22648
+ const isPropagated = [...tainted.keys()].some((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(iterExpr));
22254
22649
  if (isDirectSource2 || isPropagated) tainted.set(iterVar, i2 + 1);
22255
22650
  continue;
22256
22651
  }
22257
- const assignMatch = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
22652
+ const assignMatch = line.match(/^\s*([\p{L}\p{N}_]+)\s*=\s*(.+)$/u);
22258
22653
  if (!assignMatch) continue;
22259
22654
  const [, lhs, rhs] = assignMatch;
22260
22655
  const isDirectSource = PYTHON_TAINTED_PATTERNS2.some((p) => p.pattern.test(rhs));
22261
22656
  let propagatedFrom;
22262
- const dictAccessMatch = rhs.trim().match(/^(\w+)\[(['"])([^'"]+)\2\]$/);
22657
+ const dictAccessMatch = rhs.trim().match(/^([\p{L}\p{N}_]+)\[(['"])([^'"]+)\2\]$/u);
22263
22658
  if (dictAccessMatch) {
22264
22659
  const [, container, , key] = dictAccessMatch;
22265
22660
  if (containerTainted.has(`${container}['${key}']`)) propagatedFrom = `${container}['${key}']`;
22266
22661
  }
22267
22662
  if (!propagatedFrom) {
22268
- const confGetMatch = rhs.trim().match(/^(\w+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/);
22663
+ const confGetMatch = rhs.trim().match(/^([\p{L}\p{N}_]+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/u);
22269
22664
  if (confGetMatch) {
22270
22665
  const [, obj, , section, , key] = confGetMatch;
22271
22666
  if (containerTainted.has(`${obj}['${section}']['${key}']`)) propagatedFrom = `${obj}['${section}']['${key}']`;
@@ -22273,7 +22668,7 @@ function buildPythonTaintedVars(sourceCode) {
22273
22668
  }
22274
22669
  if (!propagatedFrom) {
22275
22670
  const isSafeEnvRead = /\bos\.environ\.get\s*\(/.test(rhs) || /\bos\.getenv\s*\(/.test(rhs);
22276
- if (!isSafeEnvRead) propagatedFrom = [...tainted.keys()].find((v) => new RegExp(`\\b${v}\\b`).test(rhs));
22671
+ if (!isSafeEnvRead) propagatedFrom = [...tainted.keys()].find((v) => new RegExp(`(?<![\\p{L}\\p{N}_])${v}(?![\\p{L}\\p{N}_])`, "u").test(rhs));
22277
22672
  }
22278
22673
  if (isDirectSource) {
22279
22674
  tainted.set(lhs, i2 + 1);
@@ -22634,6 +23029,52 @@ function findBashPatternFindings(sourceCode, file) {
22634
23029
  }
22635
23030
  return findings;
22636
23031
  }
23032
+ function findBashRegexAllowlistSanitizers(code) {
23033
+ const sanitizers = [];
23034
+ const lines = code.split("\n");
23035
+ const guardRe = /^\s*if\s+\[\[\s*!\s*"?\$\{?(\w+)\}?"?\s*=~\s*(\S+)\s*\]\]\s*;\s*then\s+(exit|return|die)\b/;
23036
+ for (let i2 = 0; i2 < lines.length; i2++) {
23037
+ const m = guardRe.exec(lines[i2]);
23038
+ if (!m) continue;
23039
+ const regexLiteral = m[2];
23040
+ if (!isSafeBashAllowlistRegex(regexLiteral)) continue;
23041
+ const ifLine1Indexed = i2 + 1;
23042
+ for (let l = ifLine1Indexed + 1; l <= lines.length; l++) {
23043
+ sanitizers.push({
23044
+ type: "regex_allowlist",
23045
+ method: "=~",
23046
+ line: l,
23047
+ sanitizes: [
23048
+ "command_injection",
23049
+ "path_traversal",
23050
+ "sql_injection",
23051
+ "code_injection",
23052
+ "ssrf",
23053
+ "xss",
23054
+ "open_redirect",
23055
+ "log_injection"
23056
+ ]
23057
+ });
23058
+ }
23059
+ }
23060
+ return sanitizers;
23061
+ }
23062
+ function isSafeBashAllowlistRegex(literal) {
23063
+ if (!literal.startsWith("^") || !literal.endsWith("$")) return false;
23064
+ const body2 = literal.slice(1, -1);
23065
+ if (body2.length === 0) return false;
23066
+ if (body2.includes(".*") || body2.includes(".+")) return false;
23067
+ if (body2.includes("|")) return false;
23068
+ if (/\\\d/.test(body2)) return false;
23069
+ const safeToken = /\[[^\]]+\][+*?]?|\\.|[A-Za-z0-9_\-./]|[+*?]/g;
23070
+ let consumed = 0;
23071
+ let match;
23072
+ while ((match = safeToken.exec(body2)) !== null) {
23073
+ if (match.index !== consumed) return false;
23074
+ consumed += match[0].length;
23075
+ }
23076
+ return consumed === body2.length;
23077
+ }
22637
23078
 
22638
23079
  // src/analysis/passes/sink-filter-pass.ts
22639
23080
  var JS_XSS_SANITIZERS = [
@@ -22673,7 +23114,10 @@ var SinkFilterPass = class {
22673
23114
  sinks.push(s);
22674
23115
  }
22675
23116
  }
22676
- const sanitizers = taintMatcher.sanitizers;
23117
+ const sanitizers = [
23118
+ ...taintMatcher.sanitizers,
23119
+ ...langSources.additionalSanitizers ?? []
23120
+ ];
22677
23121
  let filtered = sinks.filter((sink) => !constProp.unreachableLines.has(sink.line));
22678
23122
  filtered = filterCleanArraySinks(filtered, calls, constProp.taintedArrayElements, constProp.symbols);
22679
23123
  filtered = filterCleanVariableSinks(
@@ -23403,7 +23847,7 @@ function detectExpressionScanFlows(calls, sources, sinks, sanitizers, unreachabl
23403
23847
  for (const s of sourcesWithVar) {
23404
23848
  if (reCache.has(s.variable)) continue;
23405
23849
  const escaped = s.variable.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
23406
- reCache.set(s.variable, new RegExp(`\\b${escaped}\\b`));
23850
+ reCache.set(s.variable, new RegExp(`(?<![\\p{L}\\p{N}_])${escaped}(?![\\p{L}\\p{N}_])`, "u"));
23407
23851
  }
23408
23852
  const callsByLine = /* @__PURE__ */ new Map();
23409
23853
  for (const call of calls) {
@@ -23927,6 +24371,7 @@ function isPublicMethod(method, language) {
23927
24371
  return method.modifiers.includes("public");
23928
24372
  case "javascript":
23929
24373
  case "typescript":
24374
+ case "tsx":
23930
24375
  return !method.modifiers.includes("private") && !method.modifiers.includes("protected");
23931
24376
  case "python":
23932
24377
  return !method.name.startsWith("_");
@@ -23945,7 +24390,7 @@ var MissingPublicDocPass = class {
23945
24390
  if (UTIL_DIR_RE.test(graph.ir.meta.file)) {
23946
24391
  return { missingDocMethods: [], missingDocTypes: [] };
23947
24392
  }
23948
- if (!["java", "javascript", "typescript", "python"].includes(language)) {
24393
+ if (!["java", "javascript", "typescript", "tsx", "python"].includes(language)) {
23949
24394
  return { missingDocMethods: [], missingDocTypes: [] };
23950
24395
  }
23951
24396
  const lines = code.split("\n");
@@ -24448,6 +24893,7 @@ function hasDeclKeyword(lineText, language) {
24448
24893
  return /\b(?:int|long|float|double|boolean|byte|char|short|var|final)\b/.test(lineText) || /\b[A-Z]\w*(?:<[^>]*>)?\s+\w/.test(lineText);
24449
24894
  case "javascript":
24450
24895
  case "typescript":
24896
+ case "tsx":
24451
24897
  return /\b(?:let|const|var)\s+[\w{[]/.test(lineText);
24452
24898
  case "rust":
24453
24899
  return /\blet\s+(?:mut\s+)?\w/.test(lineText);
@@ -28386,6 +28832,7 @@ var WeakRandomPass = class {
28386
28832
  return "Use the `secrets` module (`secrets.token_bytes`, `secrets.token_hex`, `secrets.choice`, `secrets.randbelow`).";
28387
28833
  case "javascript":
28388
28834
  case "typescript":
28835
+ case "tsx":
28389
28836
  return "Use `crypto.randomBytes(n)` (Node.js) or `crypto.getRandomValues(typedArray)` (browser).";
28390
28837
  case "go":
28391
28838
  return "Use `crypto/rand` instead of `math/rand`. Example: `b := make([]byte, 32); _, _ = rand.Read(b)` (where `rand` is `crypto/rand`).";
@@ -28422,7 +28869,7 @@ var WeakRandomPass = class {
28422
28869
  }
28423
28870
  return null;
28424
28871
  }
28425
- if (language === "javascript" || language === "typescript") {
28872
+ if (language === "javascript" || language === "typescript" || language === "tsx") {
28426
28873
  if (method === "random" && (receiver === "Math" || receiver.endsWith(".Math"))) {
28427
28874
  return "Math.random";
28428
28875
  }
@@ -29830,6 +30277,7 @@ function getNodeTypesForLanguage(language) {
29830
30277
  ]);
29831
30278
  case "javascript":
29832
30279
  case "typescript":
30280
+ case "tsx":
29833
30281
  return /* @__PURE__ */ new Set([
29834
30282
  "call_expression",
29835
30283
  "new_expression",
@@ -29842,7 +30290,13 @@ function getNodeTypesForLanguage(language) {
29842
30290
  "import_statement",
29843
30291
  "export_statement",
29844
30292
  "member_expression",
29845
- "assignment_expression"
30293
+ "assignment_expression",
30294
+ // JSX node types — tree-sitter-tsx grammar (.tsx/.jsx routing)
30295
+ "jsx_element",
30296
+ "jsx_self_closing_element",
30297
+ "jsx_opening_element",
30298
+ "jsx_attribute",
30299
+ "jsx_expression"
29846
30300
  ]);
29847
30301
  case "bash":
29848
30302
  return /* @__PURE__ */ new Set([
@@ -29906,8 +30360,15 @@ async function analyze(code, filePath, language, options = {}) {
29906
30360
  if (language === "html") {
29907
30361
  return analyzeHtmlFile(code, filePath, options);
29908
30362
  }
29909
- logger.debug("Analyzing file", { filePath, language, codeLength: code.length });
29910
- const tree = await parse(code, language);
30363
+ let parseGrammar = language;
30364
+ if (language === "javascript" || language === "typescript") {
30365
+ const lower = filePath.toLowerCase();
30366
+ if (lower.endsWith(".tsx") || lower.endsWith(".jsx")) {
30367
+ parseGrammar = "tsx";
30368
+ }
30369
+ }
30370
+ logger.debug("Analyzing file", { filePath, language, parseGrammar, codeLength: code.length });
30371
+ const tree = await parse(code, parseGrammar);
29911
30372
  try {
29912
30373
  logger.trace("Parsed AST", { rootNodeType: tree.rootNode.type });
29913
30374
  const parseStatus = extractParseStatus(tree);