circle-ir 3.57.0 → 3.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/configs/sinks/golang.json +61 -0
  2. package/configs/sinks/nodejs.json +11 -6
  3. package/configs/sinks/python.json +24 -0
  4. package/configs/sinks/rust.json +30 -0
  5. package/configs/sinks/sql.yaml +53 -0
  6. package/dist/analysis/config-loader.d.ts.map +1 -1
  7. package/dist/analysis/config-loader.js +57 -9
  8. package/dist/analysis/config-loader.js.map +1 -1
  9. package/dist/analysis/constant-propagation/patterns.d.ts.map +1 -1
  10. package/dist/analysis/constant-propagation/patterns.js +12 -0
  11. package/dist/analysis/constant-propagation/patterns.js.map +1 -1
  12. package/dist/analysis/constant-propagation/propagator.d.ts +62 -0
  13. package/dist/analysis/constant-propagation/propagator.d.ts.map +1 -1
  14. package/dist/analysis/constant-propagation/propagator.js +275 -7
  15. package/dist/analysis/constant-propagation/propagator.js.map +1 -1
  16. package/dist/analysis/passes/language-sources-pass.d.ts.map +1 -1
  17. package/dist/analysis/passes/language-sources-pass.js +226 -14
  18. package/dist/analysis/passes/language-sources-pass.js.map +1 -1
  19. package/dist/analysis/passes/security-headers-pass.d.ts.map +1 -1
  20. package/dist/analysis/passes/security-headers-pass.js +93 -0
  21. package/dist/analysis/passes/security-headers-pass.js.map +1 -1
  22. package/dist/analysis/passes/sink-filter-pass.d.ts.map +1 -1
  23. package/dist/analysis/passes/sink-filter-pass.js +16 -1
  24. package/dist/analysis/passes/sink-filter-pass.js.map +1 -1
  25. package/dist/analysis/passes/taint-propagation-pass.d.ts.map +1 -1
  26. package/dist/analysis/passes/taint-propagation-pass.js +153 -9
  27. package/dist/analysis/passes/taint-propagation-pass.js.map +1 -1
  28. package/dist/analysis/taint-matcher.d.ts.map +1 -1
  29. package/dist/analysis/taint-matcher.js +116 -2
  30. package/dist/analysis/taint-matcher.js.map +1 -1
  31. package/dist/analysis/taint-propagation.d.ts.map +1 -1
  32. package/dist/analysis/taint-propagation.js +25 -1
  33. package/dist/analysis/taint-propagation.js.map +1 -1
  34. package/dist/browser/circle-ir.js +610 -45
  35. package/dist/core/circle-ir-core.cjs +368 -21
  36. package/dist/core/circle-ir-core.js +368 -21
  37. package/dist/types/config.d.ts +7 -0
  38. package/dist/types/config.d.ts.map +1 -1
  39. package/package.json +1 -1
@@ -10245,11 +10245,14 @@ var DEFAULT_SOURCES = [
10245
10245
  // Rocket
10246
10246
  { method: "param", class: "Request", type: "http_param", severity: "high", return_tainted: true },
10247
10247
  { method: "cookies", class: "Request", type: "http_cookie", severity: "high", return_tainted: true },
10248
- // Axum extractors
10249
- { method: "Json", type: "http_body", severity: "high", return_tainted: true },
10250
- { method: "Query", type: "http_param", severity: "high", return_tainted: true },
10251
- { method: "Path", type: "http_path", severity: "high", return_tainted: true },
10252
- { method: "Form", type: "http_param", severity: "high", return_tainted: true },
10248
+ // Axum extractors — Rust-only. The simple names `Json`/`Query`/`Path`/`Form`
10249
+ // collide with stdlib types in other ecosystems (notably Python's
10250
+ // `pathlib.Path` constructor and `flask.Form`), so they MUST be
10251
+ // language-scoped to Rust to avoid spurious source matches.
10252
+ { method: "Json", type: "http_body", severity: "high", return_tainted: true, languages: ["rust"] },
10253
+ { method: "Query", type: "http_param", severity: "high", return_tainted: true, languages: ["rust"] },
10254
+ { method: "Path", type: "http_path", severity: "high", return_tainted: true, languages: ["rust"] },
10255
+ { method: "Form", type: "http_param", severity: "high", return_tainted: true, languages: ["rust"] },
10253
10256
  // Rust std library
10254
10257
  { method: "var", class: "env", type: "env_input", severity: "medium", return_tainted: true },
10255
10258
  { method: "var_os", class: "env", type: "env_input", severity: "medium", return_tainted: true },
@@ -10452,10 +10455,15 @@ var DEFAULT_SINKS = [
10452
10455
  { method: "PathResource", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
10453
10456
  // Additional resource/file patterns
10454
10457
  { method: "forFile", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
10455
- { method: "resolve", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
10456
- { method: "resolve", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
10457
- { method: "resolveSibling", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
10458
- { method: "relativize", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [0] },
10458
+ // Java NIO `Path.resolve(other)` joining with an untrusted `other` can
10459
+ // escape the parent directory. Language-scoped to Java because the simple
10460
+ // name `resolve` collides with Python `pathlib.Path.resolve()`
10461
+ // (a canonicalization SANITIZER, no argument), JS `Promise.resolve(...)`,
10462
+ // and Rust `Path::canonicalize` variants. Sprint 9 #48.2.
10463
+ { method: "resolve", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["java"] },
10464
+ { method: "resolve", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["java"] },
10465
+ { method: "resolveSibling", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["java"] },
10466
+ { method: "relativize", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [0], languages: ["java"] },
10459
10467
  // Static file configuration
10460
10468
  { method: "staticFiles", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
10461
10469
  { method: "setRoot", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
@@ -11612,6 +11620,16 @@ var DEFAULT_SANITIZERS = [
11612
11620
  // Returns just filename, strips path
11613
11621
  { method: "canonicalize", removes: ["path_traversal"] },
11614
11622
  // Resolves symlinks and normalizes
11623
+ // Go path sanitizers (#51) — filepath.Base strips directory components
11624
+ // (fully sanitizes), filepath.Clean / path.Clean normalize away ../ segments
11625
+ // (defense-in-depth — mirrors Java getCanonicalPath in this table; the
11626
+ // stricter Clean+HasPrefix guard recognition is tracked separately).
11627
+ // EvalSymlinks is the Go equivalent of Java's Path.toRealPath.
11628
+ { method: "Base", class: "filepath", removes: ["path_traversal"] },
11629
+ { method: "Base", class: "path", removes: ["path_traversal"] },
11630
+ { method: "Clean", class: "filepath", removes: ["path_traversal"] },
11631
+ { method: "Clean", class: "path", removes: ["path_traversal"] },
11632
+ { method: "EvalSymlinks", class: "filepath", removes: ["path_traversal"] },
11615
11633
  // Log Injection sanitizers
11616
11634
  { method: "replace", removes: ["log_injection"] },
11617
11635
  // Used to remove newlines/control chars
@@ -11706,6 +11724,8 @@ var DEFAULT_SANITIZERS = [
11706
11724
  { method: "abspath", class: "os.path", removes: ["path_traversal"] },
11707
11725
  { method: "realpath", class: "path", removes: ["path_traversal"] },
11708
11726
  { method: "abspath", class: "path", removes: ["path_traversal"] },
11727
+ // pathlib.Path.resolve() — canonicalizes path, resolves symlinks (Python 3)
11728
+ { method: "resolve", class: "Path", removes: ["path_traversal"] },
11709
11729
  // Python Type coercion
11710
11730
  { method: "int", removes: ["sql_injection", "command_injection", "xss"] },
11711
11731
  { method: "float", removes: ["sql_injection", "command_injection"] },
@@ -11738,8 +11758,36 @@ var DEFAULT_SANITIZERS = [
11738
11758
  { method: "encode_attribute", class: "html_escape", removes: ["xss"] },
11739
11759
  { method: "escape_html", removes: ["xss"] },
11740
11760
  // Rust Type coercion (parsing)
11741
- { method: "parse", removes: ["sql_injection", "command_injection", "xss"] }
11761
+ { method: "parse", removes: ["sql_injection", "command_injection", "xss"] },
11742
11762
  // str.parse::<i32>()
11763
+ // =========================================================================
11764
+ // Type-cast taint barriers (#57)
11765
+ // Numeric/UUID casts cannot carry a string-injection payload.
11766
+ // =========================================================================
11767
+ // Java numeric parse — Integer.parseInt, Long.parseLong, etc.
11768
+ { method: "parseInt", class: "Integer", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11769
+ { method: "parseLong", class: "Long", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11770
+ { method: "parseFloat", class: "Float", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11771
+ { method: "parseDouble", class: "Double", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11772
+ { method: "parseShort", class: "Short", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11773
+ { method: "parseByte", class: "Byte", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11774
+ // Java UUID parse — UUID.fromString rejects non-UUID strings
11775
+ { method: "fromString", class: "UUID", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11776
+ // JavaScript numeric coercion (Number/parseInt/parseFloat already covered above; add path_traversal/code_injection)
11777
+ { method: "BigInt", removes: ["sql_injection", "nosql_injection", "command_injection", "path_traversal", "code_injection"] },
11778
+ // Go numeric parse — strconv.Atoi, ParseInt, ParseFloat, ParseUint, ParseBool
11779
+ { method: "Atoi", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11780
+ { method: "ParseInt", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11781
+ { method: "ParseFloat", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11782
+ { method: "ParseUint", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11783
+ { method: "ParseBool", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11784
+ // Go UUID parse
11785
+ { method: "Parse", class: "uuid", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11786
+ { method: "MustParse", class: "uuid", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11787
+ // Python — int/float already covered above; add bool + UUID/Decimal casts
11788
+ { method: "bool", removes: ["sql_injection", "command_injection", "xss", "code_injection"] },
11789
+ { method: "UUID", class: "uuid", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
11790
+ { method: "Decimal", class: "decimal", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] }
11743
11791
  ];
11744
11792
  function getDefaultConfig() {
11745
11793
  return {
@@ -11769,7 +11817,7 @@ function analyzeTaint(calls, types, config = getDefaultConfig(), typeHierarchy,
11769
11817
  const sourceLines = code !== void 0 ? code.split("\n") : void 0;
11770
11818
  const sources = findSources(calls, types, config.sources, sourceLines, language);
11771
11819
  const sinks = findSinks(calls, config.sinks, typeHierarchy, language, sourceLines);
11772
- const sanitizers = findSanitizers(calls, types, config.sanitizers);
11820
+ const sanitizers = findSanitizers(calls, types, config.sanitizers, sourceLines);
11773
11821
  return { sources, sinks, sanitizers };
11774
11822
  }
11775
11823
  function attachSourceLineCode(sources, sinks, code) {
@@ -11789,6 +11837,9 @@ function findSources(calls, types, patterns, sourceLines, language) {
11789
11837
  const sources = [];
11790
11838
  for (const call of calls) {
11791
11839
  for (const pattern of patterns) {
11840
+ if (pattern.languages && pattern.languages.length > 0 && language !== void 0 && !pattern.languages.includes(language)) {
11841
+ continue;
11842
+ }
11792
11843
  if (matchesSourcePattern(call, pattern)) {
11793
11844
  sources.push({
11794
11845
  type: pattern.type,
@@ -12425,6 +12476,15 @@ function receiverMightBeClass(receiver, className) {
12425
12476
  if (receiver === className) {
12426
12477
  return true;
12427
12478
  }
12479
+ if (receiver.endsWith(")")) {
12480
+ const ctorMatch = receiver.match(/^(\w+)\(/);
12481
+ if (ctorMatch) {
12482
+ const ctorName = ctorMatch[1];
12483
+ if (ctorName === className || ctorName.toLowerCase() === className.toLowerCase()) {
12484
+ return true;
12485
+ }
12486
+ }
12487
+ }
12428
12488
  if (receiver.includes("::")) {
12429
12489
  const scopePrefix = receiver.match(/^(\w+)::/);
12430
12490
  if (scopePrefix) {
@@ -12692,7 +12752,7 @@ function calculateSinkConfidence(call, pattern) {
12692
12752
  }
12693
12753
  return Math.min(confidence, 1);
12694
12754
  }
12695
- function findSanitizers(calls, types, patterns) {
12755
+ function findSanitizers(calls, types, patterns, sourceLines) {
12696
12756
  const sanitizers = [];
12697
12757
  const sanitizerMethods = /* @__PURE__ */ new Set();
12698
12758
  for (const type of types) {
@@ -12702,6 +12762,66 @@ function findSanitizers(calls, types, patterns) {
12702
12762
  }
12703
12763
  }
12704
12764
  }
12765
+ const wrapperSanitizers = /* @__PURE__ */ new Map();
12766
+ for (const type of types) {
12767
+ for (const method of type.methods) {
12768
+ const bodySize = method.end_line - method.start_line;
12769
+ if (bodySize < 0 || bodySize > 2) continue;
12770
+ const paramNames = new Set(method.parameters.map((p) => p.name));
12771
+ if (paramNames.size === 0) continue;
12772
+ const inside = [];
12773
+ for (const c of calls) {
12774
+ if (c.location.line < method.start_line || c.location.line > method.end_line) continue;
12775
+ if (c.method_name === method.name) continue;
12776
+ inside.push(c);
12777
+ }
12778
+ if (inside.length !== 1) continue;
12779
+ const innerCall = inside[0];
12780
+ let matched;
12781
+ for (const pattern of patterns) {
12782
+ if (matchesSanitizerPattern(innerCall, pattern)) {
12783
+ matched = pattern;
12784
+ break;
12785
+ }
12786
+ }
12787
+ if (!matched || !matched.removes || matched.removes.length === 0) continue;
12788
+ let argOk = false;
12789
+ for (const arg of innerCall.arguments) {
12790
+ if (arg.variable && paramNames.has(arg.variable)) {
12791
+ argOk = true;
12792
+ break;
12793
+ }
12794
+ }
12795
+ if (!argOk) continue;
12796
+ if (sourceLines) {
12797
+ const lineText = sourceLines[innerCall.location.line - 1] ?? "";
12798
+ const stripped = lineText.trim();
12799
+ const returnMatch = stripped.match(/^return\s+(?:await\s+)?(.*)$/);
12800
+ if (!returnMatch) continue;
12801
+ const after = returnMatch[1].replace(/;\s*$/, "").trimEnd();
12802
+ const callPrefix = innerCall.receiver ? `${innerCall.receiver}.${innerCall.method_name}(` : `${innerCall.method_name}(`;
12803
+ if (!after.startsWith(callPrefix)) continue;
12804
+ if (!after.endsWith(")")) continue;
12805
+ }
12806
+ const existing = wrapperSanitizers.get(method.name);
12807
+ if (existing) {
12808
+ const set = /* @__PURE__ */ new Set([...existing, ...matched.removes]);
12809
+ wrapperSanitizers.set(method.name, Array.from(set));
12810
+ } else {
12811
+ wrapperSanitizers.set(method.name, [...matched.removes]);
12812
+ }
12813
+ }
12814
+ }
12815
+ for (const call of calls) {
12816
+ const removes = wrapperSanitizers.get(call.method_name);
12817
+ if (!removes) continue;
12818
+ sanitizers.push({
12819
+ type: "derived_wrapper",
12820
+ method: formatSanitizerMethod(call),
12821
+ line: call.location.line,
12822
+ sanitizes: removes
12823
+ });
12824
+ }
12705
12825
  for (const call of calls) {
12706
12826
  if (sanitizerMethods.has(call.method_name)) {
12707
12827
  sanitizers.push({
@@ -13024,6 +13144,15 @@ var CodeGraph = class {
13024
13144
  };
13025
13145
 
13026
13146
  // src/analysis/taint-propagation.ts
13147
+ function buildSanitizersByLine(sanitizers) {
13148
+ const out2 = /* @__PURE__ */ new Map();
13149
+ for (const san of sanitizers) {
13150
+ const existing = out2.get(san.line);
13151
+ if (existing) existing.push(san);
13152
+ else out2.set(san.line, [san]);
13153
+ }
13154
+ return out2;
13155
+ }
13027
13156
  function propagateTaint(graphOrDfg, callsOrSources, sourcesOrSinks, sinksOrSanitizers, sanitizersArg) {
13028
13157
  let graph;
13029
13158
  let sources;
@@ -13059,7 +13188,7 @@ function propagateTaint(graphOrDfg, callsOrSources, sourcesOrSinks, sinksOrSanit
13059
13188
  const defsByLine = graph.defsByLine;
13060
13189
  const usesByLine = graph.usesByLine;
13061
13190
  const callsByLine = graph.callsByLine;
13062
- const sanitizersByLine = graph.sanitizersByLine;
13191
+ const sanitizersByLine = sanitizers.length > 0 ? buildSanitizersByLine(sanitizers) : graph.sanitizersByLine;
13063
13192
  const defById = graph.defById;
13064
13193
  const rawInitialTaint = findInitialTaint(sources, callsByLine, defsByLine);
13065
13194
  const initialTaint = rawInitialTaint.filter((tv) => {
@@ -13732,7 +13861,32 @@ var SANITIZER_METHODS = /* @__PURE__ */ new Set([
13732
13861
  "validatePath",
13733
13862
  "validateCityName",
13734
13863
  "validateInput",
13735
- "sanitizeInput"
13864
+ "sanitizeInput",
13865
+ // Type-cast barriers (#57) — numeric/boolean casts cannot carry a string
13866
+ // injection payload. Conservative whitelist; ambiguous names like `valueOf`,
13867
+ // `Parse`, `fromString` are intentionally excluded.
13868
+ // Java
13869
+ "parseInt",
13870
+ "parseLong",
13871
+ "parseFloat",
13872
+ "parseDouble",
13873
+ "parseShort",
13874
+ "parseByte",
13875
+ "fromString",
13876
+ // UUID.fromString — parses strict UUID format, rejects injection
13877
+ // JS/TS (parseInt/parseFloat covered above)
13878
+ "Number",
13879
+ "BigInt",
13880
+ // Go
13881
+ "Atoi",
13882
+ "ParseInt",
13883
+ "ParseFloat",
13884
+ "ParseUint",
13885
+ "ParseBool",
13886
+ // Python
13887
+ "int",
13888
+ "float",
13889
+ "bool"
13736
13890
  ]);
13737
13891
  var ANTI_SANITIZER_METHODS = /* @__PURE__ */ new Set([
13738
13892
  // URL decoding (reverses URL encoding)
@@ -13862,6 +14016,10 @@ var ConstantPropagator = class _ConstantPropagator {
13862
14016
  inConstructor = false;
13863
14017
  // Map constructor parameter names to their positions (0-indexed)
13864
14018
  constructorParamPositions = /* @__PURE__ */ new Map();
14019
+ // Sprint 9 #58.1 — names of `static final Pattern` fields whose compiled
14020
+ // regex is strict-anchored (provably matches a bounded character set).
14021
+ // Populated lazily on first access via `getSafePatternFields()`.
14022
+ safePatternFieldsCache = null;
13865
14023
  /**
13866
14024
  * Analyze source code and build constant propagation state.
13867
14025
  */
@@ -13895,6 +14053,7 @@ var ConstantPropagator = class _ConstantPropagator {
13895
14053
  this.currentClassName = null;
13896
14054
  this.inConstructor = false;
13897
14055
  this.constructorParamPositions.clear();
14056
+ this.safePatternFieldsCache = null;
13898
14057
  this.collectClassFields(tree.rootNode);
13899
14058
  for (const methodName of sanitizerMethods) {
13900
14059
  this.methodReturnsSanitized.add(methodName);
@@ -13904,6 +14063,7 @@ var ConstantPropagator = class _ConstantPropagator {
13904
14063
  (name2) => this.lookupSymbol(name2)
13905
14064
  );
13906
14065
  this.analyzeMethodReturns(tree.rootNode);
14066
+ this.seedPythonModuleConstants(tree.rootNode);
13907
14067
  this.visit(tree.rootNode);
13908
14068
  this.refineTaintFromConstants();
13909
14069
  const resultTainted = new Set(this.tainted);
@@ -14264,6 +14424,162 @@ var ConstantPropagator = class _ConstantPropagator {
14264
14424
  }
14265
14425
  }
14266
14426
  }
14427
+ /**
14428
+ * Sprint 9 #55 — seed the symbol table with Python module-level constant
14429
+ * assignments. Walks only direct children of the `module` root and adds
14430
+ * `IDENT = <primitive literal>` to `symbols` so `if IDENT:` guards inside
14431
+ * downstream functions can be folded to dead code.
14432
+ *
14433
+ * Recognized literal RHS kinds: `true`/`false` (booleans), integer/float
14434
+ * literals, string literals. The ExpressionEvaluator already understands
14435
+ * each via the same lookup callback; we just need the symbol present.
14436
+ */
14437
+ /**
14438
+ * Sprint 9 #55 — gate `field_declaration` folding to primitive literals.
14439
+ *
14440
+ * The deep-nesting regression (cognium-ai#88) constructs a Java
14441
+ * `static final String hyphenData = "a" + "b" + ... (10k segments)` at the
14442
+ * class level. `handleVariableDeclaration` would otherwise dispatch
14443
+ * `evaluateExpression` on the deeply nested binary AST and blow the V8
14444
+ * stack. The dead-code-by-const-guard pattern (`if (DEBUG)`) only requires
14445
+ * `boolean`/`integer`/`string` (single-literal) RHS folding, so restrict
14446
+ * to those node types.
14447
+ */
14448
+ fieldDeclHasPrimitiveLiteralValue(node) {
14449
+ const primitive = /* @__PURE__ */ new Set([
14450
+ // Java literal node types
14451
+ "true",
14452
+ "false",
14453
+ "null_literal",
14454
+ "decimal_integer_literal",
14455
+ "hex_integer_literal",
14456
+ "octal_integer_literal",
14457
+ "binary_integer_literal",
14458
+ "decimal_floating_point_literal",
14459
+ "hex_floating_point_literal",
14460
+ "character_literal",
14461
+ "string_literal",
14462
+ // JS/TS literal node types (defensive, in case other langs reuse it)
14463
+ "number",
14464
+ "string"
14465
+ ]);
14466
+ for (const child of node.children) {
14467
+ if (child.type !== "variable_declarator") continue;
14468
+ const value = child.childForFieldName("value");
14469
+ if (!value) continue;
14470
+ if (!primitive.has(value.type)) return false;
14471
+ }
14472
+ return true;
14473
+ }
14474
+ /**
14475
+ * Sprint 9 #58.1 — collect the set of class-level `Pattern` field names
14476
+ * whose compiled regex is strict-anchored, i.e. provably matches a
14477
+ * bounded character set with no wildcard escape. A subsequent
14478
+ * `if (!FIELD.matcher(var).matches()) throw ...;` guard then proves
14479
+ * `var` is sanitized after the if.
14480
+ *
14481
+ * Recognized initializer shapes (scanned via source-text regex to avoid
14482
+ * threading another AST walk):
14483
+ * `static final Pattern FIELD = Pattern.compile("regex");`
14484
+ *
14485
+ * Strict-anchored regex criteria:
14486
+ * - starts with `^` and ends with `$`
14487
+ * - after stripping `[...]` character classes, must not contain `.` or
14488
+ * `|` (a `.` could match anything; `|` admits an arbitrary alternative)
14489
+ */
14490
+ getSafePatternFields() {
14491
+ if (this.safePatternFieldsCache !== null) return this.safePatternFieldsCache;
14492
+ const set = /* @__PURE__ */ new Set();
14493
+ const re = /\b(?:public\s+|private\s+|protected\s+)?(?:static\s+final|final\s+static)\s+(?:java\.util\.regex\.)?Pattern\s+(\w+)\s*=\s*(?:java\.util\.regex\.)?Pattern\s*\.\s*compile\s*\(\s*"((?:[^"\\]|\\.)*)"/g;
14494
+ let m;
14495
+ while ((m = re.exec(this.source)) !== null) {
14496
+ const name2 = m[1];
14497
+ const regex = m[2];
14498
+ if (this.isStrictAnchoredRegex(regex)) set.add(name2);
14499
+ }
14500
+ this.safePatternFieldsCache = set;
14501
+ return set;
14502
+ }
14503
+ isStrictAnchoredRegex(re) {
14504
+ if (!re.startsWith("^") || !re.endsWith("$")) return false;
14505
+ const stripped = re.replace(/\[(?:[^\]\\]|\\.)*\]/g, "");
14506
+ const cleaned = stripped.replace(/\\./g, "");
14507
+ if (cleaned.includes(".")) return false;
14508
+ if (cleaned.includes("|")) return false;
14509
+ return true;
14510
+ }
14511
+ /**
14512
+ * Sprint 9 #58.1 — detect the regex-allowlist guard pattern.
14513
+ *
14514
+ * if (!SAFE_NAME.matcher(var).matches()) { throw ...; }
14515
+ *
14516
+ * Returns the guarded variable name if the pattern matches AND
14517
+ * `SAFE_NAME` is a recognized strict-anchored Pattern field, otherwise
14518
+ * null. Caller drops the variable from `tainted` after the if-block.
14519
+ */
14520
+ detectRegexAllowlistGuard(condition, consequence) {
14521
+ if (!consequence) return null;
14522
+ let condText = getNodeText2(condition, this.source).replace(/\s+/g, "");
14523
+ while (condText.startsWith("(") && condText.endsWith(")")) {
14524
+ const inner = condText.slice(1, -1);
14525
+ let depth = 0;
14526
+ let balanced = true;
14527
+ for (let i2 = 0; i2 < inner.length; i2++) {
14528
+ if (inner[i2] === "(") depth++;
14529
+ else if (inner[i2] === ")") depth--;
14530
+ if (depth < 0) {
14531
+ balanced = false;
14532
+ break;
14533
+ }
14534
+ }
14535
+ if (!balanced || depth !== 0) break;
14536
+ condText = inner;
14537
+ }
14538
+ const m = condText.match(/^!(\w+)\.matcher\((\w+)\)\.matches\(\)$/);
14539
+ if (!m) return null;
14540
+ const patternName = m[1];
14541
+ const varName = m[2];
14542
+ if (!this.getSafePatternFields().has(patternName)) return null;
14543
+ if (!this.consequenceContainsThrow(consequence)) return null;
14544
+ return varName;
14545
+ }
14546
+ consequenceContainsThrow(node) {
14547
+ if (node.type === "throw_statement") return true;
14548
+ const stack = [node];
14549
+ while (stack.length > 0) {
14550
+ const n = stack.pop();
14551
+ if (!n) continue;
14552
+ if (n.type === "throw_statement") return true;
14553
+ if (n.type === "if_statement" || n.type === "switch_statement") continue;
14554
+ for (const c of n.children) stack.push(c);
14555
+ }
14556
+ return false;
14557
+ }
14558
+ seedPythonModuleConstants(root) {
14559
+ if (root.type !== "module") return;
14560
+ for (const child of root.children) {
14561
+ const target = child.type === "assignment" ? child : child.type === "expression_statement" && child.children.length > 0 ? child.children[0] : null;
14562
+ if (!target || target.type !== "assignment") continue;
14563
+ const left = target.childForFieldName("left");
14564
+ const right = target.childForFieldName("right");
14565
+ if (!left || !right) continue;
14566
+ if (left.type !== "identifier") continue;
14567
+ const allowed = /* @__PURE__ */ new Set([
14568
+ "true",
14569
+ "false",
14570
+ "none",
14571
+ "integer",
14572
+ "float",
14573
+ "string"
14574
+ ]);
14575
+ if (!allowed.has(right.type)) continue;
14576
+ const name2 = getNodeText2(left, this.source);
14577
+ if (!name2) continue;
14578
+ const value = this.evaluateExpression(right);
14579
+ if (!isKnown(value)) continue;
14580
+ this.symbols.set(name2, value);
14581
+ }
14582
+ }
14267
14583
  findAllMethods(node) {
14268
14584
  const methods = [];
14269
14585
  const stack = [node];
@@ -14358,6 +14674,11 @@ var ConstantPropagator = class _ConstantPropagator {
14358
14674
  case "local_variable_declaration":
14359
14675
  this.handleVariableDeclaration(node);
14360
14676
  return false;
14677
+ case "field_declaration":
14678
+ if (this.fieldDeclHasPrimitiveLiteralValue(node)) {
14679
+ this.handleVariableDeclaration(node);
14680
+ }
14681
+ return false;
14361
14682
  case "assignment_expression":
14362
14683
  this.handleAssignment(node);
14363
14684
  return false;
@@ -14848,6 +15169,16 @@ var ConstantPropagator = class _ConstantPropagator {
14848
15169
  }
14849
15170
  this.inConditionalBranch = wasInConditional;
14850
15171
  this.tainted = /* @__PURE__ */ new Set([...taintedBefore, ...taintedAfterThen, ...taintedAfterElse]);
15172
+ const guardedVar = this.detectRegexAllowlistGuard(condition, consequence);
15173
+ if (guardedVar) {
15174
+ this.tainted.delete(guardedVar);
15175
+ this.sanitizedVars.add(guardedVar);
15176
+ const scoped = this.getScopedName(guardedVar);
15177
+ if (scoped !== guardedVar) {
15178
+ this.tainted.delete(scoped);
15179
+ this.sanitizedVars.add(scoped);
15180
+ }
15181
+ }
14851
15182
  }
14852
15183
  }
14853
15184
  /**
@@ -15038,17 +15369,33 @@ var ConstantPropagator = class _ConstantPropagator {
15038
15369
  /**
15039
15370
  * Check if an expression is a call to a sanitizer method.
15040
15371
  * This includes both built-in sanitizers and @sanitizer annotated methods.
15372
+ * Handles Java (`method_invocation`), Go/JS/TS (`call_expression`), and
15373
+ * Python (`call`) AST shapes.
15041
15374
  */
15042
15375
  isSanitizerMethodCall(node) {
15043
- if (node.type !== "method_invocation") {
15044
- return false;
15376
+ const methodName = this.extractCallName(node);
15377
+ if (!methodName) return false;
15378
+ return SANITIZER_METHODS.has(methodName) || this.methodReturnsSanitized.has(methodName);
15379
+ }
15380
+ /**
15381
+ * Extract the trailing method/function name from any call node shape:
15382
+ * Java `method_invocation` — name field
15383
+ * Go/JS `call_expression` — function field (identifier or selector/member)
15384
+ * Python `call` — function field (identifier or attribute)
15385
+ */
15386
+ extractCallName(node) {
15387
+ let fnNode = null;
15388
+ if (node.type === "method_invocation") {
15389
+ fnNode = node.childForFieldName("name");
15390
+ } else if (node.type === "call_expression" || node.type === "call") {
15391
+ fnNode = node.childForFieldName("function");
15045
15392
  }
15046
- const nameNode = node.childForFieldName("name");
15047
- if (!nameNode) {
15048
- return false;
15393
+ if (!fnNode) return null;
15394
+ if (fnNode.type === "selector_expression" || fnNode.type === "member_expression" || fnNode.type === "attribute") {
15395
+ const tail = fnNode.childForFieldName("field") || fnNode.childForFieldName("property") || fnNode.childForFieldName("attribute");
15396
+ if (tail) return getNodeText2(tail, this.source);
15049
15397
  }
15050
- const methodName = getNodeText2(nameNode, this.source);
15051
- return SANITIZER_METHODS.has(methodName) || this.methodReturnsSanitized.has(methodName);
15398
+ return getNodeText2(fnNode, this.source);
15052
15399
  }
15053
15400
  /**
15054
15401
  * Check if an expression is a call to an anti-sanitizer method.