circle-ir 3.57.0 → 3.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/configs/sinks/golang.json +61 -0
- package/configs/sinks/nodejs.json +11 -6
- package/configs/sinks/python.json +24 -0
- package/configs/sinks/rust.json +30 -0
- package/configs/sinks/sql.yaml +53 -0
- package/dist/analysis/config-loader.d.ts.map +1 -1
- package/dist/analysis/config-loader.js +57 -9
- package/dist/analysis/config-loader.js.map +1 -1
- package/dist/analysis/constant-propagation/patterns.d.ts.map +1 -1
- package/dist/analysis/constant-propagation/patterns.js +12 -0
- package/dist/analysis/constant-propagation/patterns.js.map +1 -1
- package/dist/analysis/constant-propagation/propagator.d.ts +62 -0
- package/dist/analysis/constant-propagation/propagator.d.ts.map +1 -1
- package/dist/analysis/constant-propagation/propagator.js +275 -7
- package/dist/analysis/constant-propagation/propagator.js.map +1 -1
- package/dist/analysis/passes/language-sources-pass.d.ts.map +1 -1
- package/dist/analysis/passes/language-sources-pass.js +226 -14
- package/dist/analysis/passes/language-sources-pass.js.map +1 -1
- package/dist/analysis/passes/security-headers-pass.d.ts.map +1 -1
- package/dist/analysis/passes/security-headers-pass.js +93 -0
- package/dist/analysis/passes/security-headers-pass.js.map +1 -1
- package/dist/analysis/passes/sink-filter-pass.d.ts.map +1 -1
- package/dist/analysis/passes/sink-filter-pass.js +16 -1
- package/dist/analysis/passes/sink-filter-pass.js.map +1 -1
- package/dist/analysis/passes/taint-propagation-pass.d.ts.map +1 -1
- package/dist/analysis/passes/taint-propagation-pass.js +153 -9
- package/dist/analysis/passes/taint-propagation-pass.js.map +1 -1
- package/dist/analysis/taint-matcher.d.ts.map +1 -1
- package/dist/analysis/taint-matcher.js +116 -2
- package/dist/analysis/taint-matcher.js.map +1 -1
- package/dist/analysis/taint-propagation.d.ts.map +1 -1
- package/dist/analysis/taint-propagation.js +25 -1
- package/dist/analysis/taint-propagation.js.map +1 -1
- package/dist/browser/circle-ir.js +610 -45
- package/dist/core/circle-ir-core.cjs +368 -21
- package/dist/core/circle-ir-core.js +368 -21
- package/dist/types/config.d.ts +7 -0
- package/dist/types/config.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -10245,11 +10245,14 @@ var DEFAULT_SOURCES = [
|
|
|
10245
10245
|
// Rocket
|
|
10246
10246
|
{ method: "param", class: "Request", type: "http_param", severity: "high", return_tainted: true },
|
|
10247
10247
|
{ method: "cookies", class: "Request", type: "http_cookie", severity: "high", return_tainted: true },
|
|
10248
|
-
// Axum extractors
|
|
10249
|
-
|
|
10250
|
-
|
|
10251
|
-
|
|
10252
|
-
{ method: "
|
|
10248
|
+
// Axum extractors — Rust-only. The simple names `Json`/`Query`/`Path`/`Form`
|
|
10249
|
+
// collide with stdlib types in other ecosystems (notably Python's
|
|
10250
|
+
// `pathlib.Path` constructor and `flask.Form`), so they MUST be
|
|
10251
|
+
// language-scoped to Rust to avoid spurious source matches.
|
|
10252
|
+
{ method: "Json", type: "http_body", severity: "high", return_tainted: true, languages: ["rust"] },
|
|
10253
|
+
{ method: "Query", type: "http_param", severity: "high", return_tainted: true, languages: ["rust"] },
|
|
10254
|
+
{ method: "Path", type: "http_path", severity: "high", return_tainted: true, languages: ["rust"] },
|
|
10255
|
+
{ method: "Form", type: "http_param", severity: "high", return_tainted: true, languages: ["rust"] },
|
|
10253
10256
|
// Rust std library
|
|
10254
10257
|
{ method: "var", class: "env", type: "env_input", severity: "medium", return_tainted: true },
|
|
10255
10258
|
{ method: "var_os", class: "env", type: "env_input", severity: "medium", return_tainted: true },
|
|
@@ -10452,10 +10455,15 @@ var DEFAULT_SINKS = [
|
|
|
10452
10455
|
{ method: "PathResource", class: "constructor", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
|
|
10453
10456
|
// Additional resource/file patterns
|
|
10454
10457
|
{ method: "forFile", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
|
|
10455
|
-
|
|
10456
|
-
|
|
10457
|
-
|
|
10458
|
-
|
|
10458
|
+
// Java NIO `Path.resolve(other)` — joining with an untrusted `other` can
|
|
10459
|
+
// escape the parent directory. Language-scoped to Java because the simple
|
|
10460
|
+
// name `resolve` collides with Python `pathlib.Path.resolve()`
|
|
10461
|
+
// (a canonicalization SANITIZER, no argument), JS `Promise.resolve(...)`,
|
|
10462
|
+
// and Rust `Path::canonicalize` variants. Sprint 9 #48.2.
|
|
10463
|
+
{ method: "resolve", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["java"] },
|
|
10464
|
+
{ method: "resolve", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["java"] },
|
|
10465
|
+
{ method: "resolveSibling", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0], languages: ["java"] },
|
|
10466
|
+
{ method: "relativize", class: "Path", type: "path_traversal", cwe: "CWE-22", severity: "medium", arg_positions: [0], languages: ["java"] },
|
|
10459
10467
|
// Static file configuration
|
|
10460
10468
|
{ method: "staticFiles", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
|
|
10461
10469
|
{ method: "setRoot", type: "path_traversal", cwe: "CWE-22", severity: "high", arg_positions: [0] },
|
|
@@ -11612,6 +11620,16 @@ var DEFAULT_SANITIZERS = [
|
|
|
11612
11620
|
// Returns just filename, strips path
|
|
11613
11621
|
{ method: "canonicalize", removes: ["path_traversal"] },
|
|
11614
11622
|
// Resolves symlinks and normalizes
|
|
11623
|
+
// Go path sanitizers (#51) — filepath.Base strips directory components
|
|
11624
|
+
// (fully sanitizes), filepath.Clean / path.Clean normalize away ../ segments
|
|
11625
|
+
// (defense-in-depth — mirrors Java getCanonicalPath in this table; the
|
|
11626
|
+
// stricter Clean+HasPrefix guard recognition is tracked separately).
|
|
11627
|
+
// EvalSymlinks is the Go equivalent of Java's Path.toRealPath.
|
|
11628
|
+
{ method: "Base", class: "filepath", removes: ["path_traversal"] },
|
|
11629
|
+
{ method: "Base", class: "path", removes: ["path_traversal"] },
|
|
11630
|
+
{ method: "Clean", class: "filepath", removes: ["path_traversal"] },
|
|
11631
|
+
{ method: "Clean", class: "path", removes: ["path_traversal"] },
|
|
11632
|
+
{ method: "EvalSymlinks", class: "filepath", removes: ["path_traversal"] },
|
|
11615
11633
|
// Log Injection sanitizers
|
|
11616
11634
|
{ method: "replace", removes: ["log_injection"] },
|
|
11617
11635
|
// Used to remove newlines/control chars
|
|
@@ -11706,6 +11724,8 @@ var DEFAULT_SANITIZERS = [
|
|
|
11706
11724
|
{ method: "abspath", class: "os.path", removes: ["path_traversal"] },
|
|
11707
11725
|
{ method: "realpath", class: "path", removes: ["path_traversal"] },
|
|
11708
11726
|
{ method: "abspath", class: "path", removes: ["path_traversal"] },
|
|
11727
|
+
// pathlib.Path.resolve() — canonicalizes path, resolves symlinks (Python 3)
|
|
11728
|
+
{ method: "resolve", class: "Path", removes: ["path_traversal"] },
|
|
11709
11729
|
// Python Type coercion
|
|
11710
11730
|
{ method: "int", removes: ["sql_injection", "command_injection", "xss"] },
|
|
11711
11731
|
{ method: "float", removes: ["sql_injection", "command_injection"] },
|
|
@@ -11738,8 +11758,36 @@ var DEFAULT_SANITIZERS = [
|
|
|
11738
11758
|
{ method: "encode_attribute", class: "html_escape", removes: ["xss"] },
|
|
11739
11759
|
{ method: "escape_html", removes: ["xss"] },
|
|
11740
11760
|
// Rust Type coercion (parsing)
|
|
11741
|
-
{ method: "parse", removes: ["sql_injection", "command_injection", "xss"] }
|
|
11761
|
+
{ method: "parse", removes: ["sql_injection", "command_injection", "xss"] },
|
|
11742
11762
|
// str.parse::<i32>()
|
|
11763
|
+
// =========================================================================
|
|
11764
|
+
// Type-cast taint barriers (#57)
|
|
11765
|
+
// Numeric/UUID casts cannot carry a string-injection payload.
|
|
11766
|
+
// =========================================================================
|
|
11767
|
+
// Java numeric parse — Integer.parseInt, Long.parseLong, etc.
|
|
11768
|
+
{ method: "parseInt", class: "Integer", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11769
|
+
{ method: "parseLong", class: "Long", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11770
|
+
{ method: "parseFloat", class: "Float", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11771
|
+
{ method: "parseDouble", class: "Double", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11772
|
+
{ method: "parseShort", class: "Short", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11773
|
+
{ method: "parseByte", class: "Byte", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11774
|
+
// Java UUID parse — UUID.fromString rejects non-UUID strings
|
|
11775
|
+
{ method: "fromString", class: "UUID", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11776
|
+
// JavaScript numeric coercion (Number/parseInt/parseFloat already covered above; add path_traversal/code_injection)
|
|
11777
|
+
{ method: "BigInt", removes: ["sql_injection", "nosql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11778
|
+
// Go numeric parse — strconv.Atoi, ParseInt, ParseFloat, ParseUint, ParseBool
|
|
11779
|
+
{ method: "Atoi", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11780
|
+
{ method: "ParseInt", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11781
|
+
{ method: "ParseFloat", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11782
|
+
{ method: "ParseUint", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11783
|
+
{ method: "ParseBool", class: "strconv", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11784
|
+
// Go UUID parse
|
|
11785
|
+
{ method: "Parse", class: "uuid", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11786
|
+
{ method: "MustParse", class: "uuid", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11787
|
+
// Python — int/float already covered above; add bool + UUID/Decimal casts
|
|
11788
|
+
{ method: "bool", removes: ["sql_injection", "command_injection", "xss", "code_injection"] },
|
|
11789
|
+
{ method: "UUID", class: "uuid", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] },
|
|
11790
|
+
{ method: "Decimal", class: "decimal", removes: ["sql_injection", "command_injection", "path_traversal", "code_injection"] }
|
|
11743
11791
|
];
|
|
11744
11792
|
function getDefaultConfig() {
|
|
11745
11793
|
return {
|
|
@@ -11769,7 +11817,7 @@ function analyzeTaint(calls, types, config = getDefaultConfig(), typeHierarchy,
|
|
|
11769
11817
|
const sourceLines = code !== void 0 ? code.split("\n") : void 0;
|
|
11770
11818
|
const sources = findSources(calls, types, config.sources, sourceLines, language);
|
|
11771
11819
|
const sinks = findSinks(calls, config.sinks, typeHierarchy, language, sourceLines);
|
|
11772
|
-
const sanitizers = findSanitizers(calls, types, config.sanitizers);
|
|
11820
|
+
const sanitizers = findSanitizers(calls, types, config.sanitizers, sourceLines);
|
|
11773
11821
|
return { sources, sinks, sanitizers };
|
|
11774
11822
|
}
|
|
11775
11823
|
function attachSourceLineCode(sources, sinks, code) {
|
|
@@ -11789,6 +11837,9 @@ function findSources(calls, types, patterns, sourceLines, language) {
|
|
|
11789
11837
|
const sources = [];
|
|
11790
11838
|
for (const call of calls) {
|
|
11791
11839
|
for (const pattern of patterns) {
|
|
11840
|
+
if (pattern.languages && pattern.languages.length > 0 && language !== void 0 && !pattern.languages.includes(language)) {
|
|
11841
|
+
continue;
|
|
11842
|
+
}
|
|
11792
11843
|
if (matchesSourcePattern(call, pattern)) {
|
|
11793
11844
|
sources.push({
|
|
11794
11845
|
type: pattern.type,
|
|
@@ -12425,6 +12476,15 @@ function receiverMightBeClass(receiver, className) {
|
|
|
12425
12476
|
if (receiver === className) {
|
|
12426
12477
|
return true;
|
|
12427
12478
|
}
|
|
12479
|
+
if (receiver.endsWith(")")) {
|
|
12480
|
+
const ctorMatch = receiver.match(/^(\w+)\(/);
|
|
12481
|
+
if (ctorMatch) {
|
|
12482
|
+
const ctorName = ctorMatch[1];
|
|
12483
|
+
if (ctorName === className || ctorName.toLowerCase() === className.toLowerCase()) {
|
|
12484
|
+
return true;
|
|
12485
|
+
}
|
|
12486
|
+
}
|
|
12487
|
+
}
|
|
12428
12488
|
if (receiver.includes("::")) {
|
|
12429
12489
|
const scopePrefix = receiver.match(/^(\w+)::/);
|
|
12430
12490
|
if (scopePrefix) {
|
|
@@ -12692,7 +12752,7 @@ function calculateSinkConfidence(call, pattern) {
|
|
|
12692
12752
|
}
|
|
12693
12753
|
return Math.min(confidence, 1);
|
|
12694
12754
|
}
|
|
12695
|
-
function findSanitizers(calls, types, patterns) {
|
|
12755
|
+
function findSanitizers(calls, types, patterns, sourceLines) {
|
|
12696
12756
|
const sanitizers = [];
|
|
12697
12757
|
const sanitizerMethods = /* @__PURE__ */ new Set();
|
|
12698
12758
|
for (const type of types) {
|
|
@@ -12702,6 +12762,66 @@ function findSanitizers(calls, types, patterns) {
|
|
|
12702
12762
|
}
|
|
12703
12763
|
}
|
|
12704
12764
|
}
|
|
12765
|
+
const wrapperSanitizers = /* @__PURE__ */ new Map();
|
|
12766
|
+
for (const type of types) {
|
|
12767
|
+
for (const method of type.methods) {
|
|
12768
|
+
const bodySize = method.end_line - method.start_line;
|
|
12769
|
+
if (bodySize < 0 || bodySize > 2) continue;
|
|
12770
|
+
const paramNames = new Set(method.parameters.map((p) => p.name));
|
|
12771
|
+
if (paramNames.size === 0) continue;
|
|
12772
|
+
const inside = [];
|
|
12773
|
+
for (const c of calls) {
|
|
12774
|
+
if (c.location.line < method.start_line || c.location.line > method.end_line) continue;
|
|
12775
|
+
if (c.method_name === method.name) continue;
|
|
12776
|
+
inside.push(c);
|
|
12777
|
+
}
|
|
12778
|
+
if (inside.length !== 1) continue;
|
|
12779
|
+
const innerCall = inside[0];
|
|
12780
|
+
let matched;
|
|
12781
|
+
for (const pattern of patterns) {
|
|
12782
|
+
if (matchesSanitizerPattern(innerCall, pattern)) {
|
|
12783
|
+
matched = pattern;
|
|
12784
|
+
break;
|
|
12785
|
+
}
|
|
12786
|
+
}
|
|
12787
|
+
if (!matched || !matched.removes || matched.removes.length === 0) continue;
|
|
12788
|
+
let argOk = false;
|
|
12789
|
+
for (const arg of innerCall.arguments) {
|
|
12790
|
+
if (arg.variable && paramNames.has(arg.variable)) {
|
|
12791
|
+
argOk = true;
|
|
12792
|
+
break;
|
|
12793
|
+
}
|
|
12794
|
+
}
|
|
12795
|
+
if (!argOk) continue;
|
|
12796
|
+
if (sourceLines) {
|
|
12797
|
+
const lineText = sourceLines[innerCall.location.line - 1] ?? "";
|
|
12798
|
+
const stripped = lineText.trim();
|
|
12799
|
+
const returnMatch = stripped.match(/^return\s+(?:await\s+)?(.*)$/);
|
|
12800
|
+
if (!returnMatch) continue;
|
|
12801
|
+
const after = returnMatch[1].replace(/;\s*$/, "").trimEnd();
|
|
12802
|
+
const callPrefix = innerCall.receiver ? `${innerCall.receiver}.${innerCall.method_name}(` : `${innerCall.method_name}(`;
|
|
12803
|
+
if (!after.startsWith(callPrefix)) continue;
|
|
12804
|
+
if (!after.endsWith(")")) continue;
|
|
12805
|
+
}
|
|
12806
|
+
const existing = wrapperSanitizers.get(method.name);
|
|
12807
|
+
if (existing) {
|
|
12808
|
+
const set = /* @__PURE__ */ new Set([...existing, ...matched.removes]);
|
|
12809
|
+
wrapperSanitizers.set(method.name, Array.from(set));
|
|
12810
|
+
} else {
|
|
12811
|
+
wrapperSanitizers.set(method.name, [...matched.removes]);
|
|
12812
|
+
}
|
|
12813
|
+
}
|
|
12814
|
+
}
|
|
12815
|
+
for (const call of calls) {
|
|
12816
|
+
const removes = wrapperSanitizers.get(call.method_name);
|
|
12817
|
+
if (!removes) continue;
|
|
12818
|
+
sanitizers.push({
|
|
12819
|
+
type: "derived_wrapper",
|
|
12820
|
+
method: formatSanitizerMethod(call),
|
|
12821
|
+
line: call.location.line,
|
|
12822
|
+
sanitizes: removes
|
|
12823
|
+
});
|
|
12824
|
+
}
|
|
12705
12825
|
for (const call of calls) {
|
|
12706
12826
|
if (sanitizerMethods.has(call.method_name)) {
|
|
12707
12827
|
sanitizers.push({
|
|
@@ -13024,6 +13144,15 @@ var CodeGraph = class {
|
|
|
13024
13144
|
};
|
|
13025
13145
|
|
|
13026
13146
|
// src/analysis/taint-propagation.ts
|
|
13147
|
+
function buildSanitizersByLine(sanitizers) {
|
|
13148
|
+
const out2 = /* @__PURE__ */ new Map();
|
|
13149
|
+
for (const san of sanitizers) {
|
|
13150
|
+
const existing = out2.get(san.line);
|
|
13151
|
+
if (existing) existing.push(san);
|
|
13152
|
+
else out2.set(san.line, [san]);
|
|
13153
|
+
}
|
|
13154
|
+
return out2;
|
|
13155
|
+
}
|
|
13027
13156
|
function propagateTaint(graphOrDfg, callsOrSources, sourcesOrSinks, sinksOrSanitizers, sanitizersArg) {
|
|
13028
13157
|
let graph;
|
|
13029
13158
|
let sources;
|
|
@@ -13059,7 +13188,7 @@ function propagateTaint(graphOrDfg, callsOrSources, sourcesOrSinks, sinksOrSanit
|
|
|
13059
13188
|
const defsByLine = graph.defsByLine;
|
|
13060
13189
|
const usesByLine = graph.usesByLine;
|
|
13061
13190
|
const callsByLine = graph.callsByLine;
|
|
13062
|
-
const sanitizersByLine = graph.sanitizersByLine;
|
|
13191
|
+
const sanitizersByLine = sanitizers.length > 0 ? buildSanitizersByLine(sanitizers) : graph.sanitizersByLine;
|
|
13063
13192
|
const defById = graph.defById;
|
|
13064
13193
|
const rawInitialTaint = findInitialTaint(sources, callsByLine, defsByLine);
|
|
13065
13194
|
const initialTaint = rawInitialTaint.filter((tv) => {
|
|
@@ -13732,7 +13861,32 @@ var SANITIZER_METHODS = /* @__PURE__ */ new Set([
|
|
|
13732
13861
|
"validatePath",
|
|
13733
13862
|
"validateCityName",
|
|
13734
13863
|
"validateInput",
|
|
13735
|
-
"sanitizeInput"
|
|
13864
|
+
"sanitizeInput",
|
|
13865
|
+
// Type-cast barriers (#57) — numeric/boolean casts cannot carry a string
|
|
13866
|
+
// injection payload. Conservative whitelist; ambiguous names like `valueOf`,
|
|
13867
|
+
// `Parse`, `fromString` are intentionally excluded.
|
|
13868
|
+
// Java
|
|
13869
|
+
"parseInt",
|
|
13870
|
+
"parseLong",
|
|
13871
|
+
"parseFloat",
|
|
13872
|
+
"parseDouble",
|
|
13873
|
+
"parseShort",
|
|
13874
|
+
"parseByte",
|
|
13875
|
+
"fromString",
|
|
13876
|
+
// UUID.fromString — parses strict UUID format, rejects injection
|
|
13877
|
+
// JS/TS (parseInt/parseFloat covered above)
|
|
13878
|
+
"Number",
|
|
13879
|
+
"BigInt",
|
|
13880
|
+
// Go
|
|
13881
|
+
"Atoi",
|
|
13882
|
+
"ParseInt",
|
|
13883
|
+
"ParseFloat",
|
|
13884
|
+
"ParseUint",
|
|
13885
|
+
"ParseBool",
|
|
13886
|
+
// Python
|
|
13887
|
+
"int",
|
|
13888
|
+
"float",
|
|
13889
|
+
"bool"
|
|
13736
13890
|
]);
|
|
13737
13891
|
var ANTI_SANITIZER_METHODS = /* @__PURE__ */ new Set([
|
|
13738
13892
|
// URL decoding (reverses URL encoding)
|
|
@@ -13862,6 +14016,10 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
13862
14016
|
inConstructor = false;
|
|
13863
14017
|
// Map constructor parameter names to their positions (0-indexed)
|
|
13864
14018
|
constructorParamPositions = /* @__PURE__ */ new Map();
|
|
14019
|
+
// Sprint 9 #58.1 — names of `static final Pattern` fields whose compiled
|
|
14020
|
+
// regex is strict-anchored (provably matches a bounded character set).
|
|
14021
|
+
// Populated lazily on first access via `getSafePatternFields()`.
|
|
14022
|
+
safePatternFieldsCache = null;
|
|
13865
14023
|
/**
|
|
13866
14024
|
* Analyze source code and build constant propagation state.
|
|
13867
14025
|
*/
|
|
@@ -13895,6 +14053,7 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
13895
14053
|
this.currentClassName = null;
|
|
13896
14054
|
this.inConstructor = false;
|
|
13897
14055
|
this.constructorParamPositions.clear();
|
|
14056
|
+
this.safePatternFieldsCache = null;
|
|
13898
14057
|
this.collectClassFields(tree.rootNode);
|
|
13899
14058
|
for (const methodName of sanitizerMethods) {
|
|
13900
14059
|
this.methodReturnsSanitized.add(methodName);
|
|
@@ -13904,6 +14063,7 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
13904
14063
|
(name2) => this.lookupSymbol(name2)
|
|
13905
14064
|
);
|
|
13906
14065
|
this.analyzeMethodReturns(tree.rootNode);
|
|
14066
|
+
this.seedPythonModuleConstants(tree.rootNode);
|
|
13907
14067
|
this.visit(tree.rootNode);
|
|
13908
14068
|
this.refineTaintFromConstants();
|
|
13909
14069
|
const resultTainted = new Set(this.tainted);
|
|
@@ -14264,6 +14424,162 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
14264
14424
|
}
|
|
14265
14425
|
}
|
|
14266
14426
|
}
|
|
14427
|
+
/**
|
|
14428
|
+
* Sprint 9 #55 — seed the symbol table with Python module-level constant
|
|
14429
|
+
* assignments. Walks only direct children of the `module` root and adds
|
|
14430
|
+
* `IDENT = <primitive literal>` to `symbols` so `if IDENT:` guards inside
|
|
14431
|
+
* downstream functions can be folded to dead code.
|
|
14432
|
+
*
|
|
14433
|
+
* Recognized literal RHS kinds: `true`/`false` (booleans), integer/float
|
|
14434
|
+
* literals, string literals. The ExpressionEvaluator already understands
|
|
14435
|
+
* each via the same lookup callback; we just need the symbol present.
|
|
14436
|
+
*/
|
|
14437
|
+
/**
|
|
14438
|
+
* Sprint 9 #55 — gate `field_declaration` folding to primitive literals.
|
|
14439
|
+
*
|
|
14440
|
+
* The deep-nesting regression (cognium-ai#88) constructs a Java
|
|
14441
|
+
* `static final String hyphenData = "a" + "b" + ... (10k segments)` at the
|
|
14442
|
+
* class level. `handleVariableDeclaration` would otherwise dispatch
|
|
14443
|
+
* `evaluateExpression` on the deeply nested binary AST and blow the V8
|
|
14444
|
+
* stack. The dead-code-by-const-guard pattern (`if (DEBUG)`) only requires
|
|
14445
|
+
* `boolean`/`integer`/`string` (single-literal) RHS folding, so restrict
|
|
14446
|
+
* to those node types.
|
|
14447
|
+
*/
|
|
14448
|
+
fieldDeclHasPrimitiveLiteralValue(node) {
|
|
14449
|
+
const primitive = /* @__PURE__ */ new Set([
|
|
14450
|
+
// Java literal node types
|
|
14451
|
+
"true",
|
|
14452
|
+
"false",
|
|
14453
|
+
"null_literal",
|
|
14454
|
+
"decimal_integer_literal",
|
|
14455
|
+
"hex_integer_literal",
|
|
14456
|
+
"octal_integer_literal",
|
|
14457
|
+
"binary_integer_literal",
|
|
14458
|
+
"decimal_floating_point_literal",
|
|
14459
|
+
"hex_floating_point_literal",
|
|
14460
|
+
"character_literal",
|
|
14461
|
+
"string_literal",
|
|
14462
|
+
// JS/TS literal node types (defensive, in case other langs reuse it)
|
|
14463
|
+
"number",
|
|
14464
|
+
"string"
|
|
14465
|
+
]);
|
|
14466
|
+
for (const child of node.children) {
|
|
14467
|
+
if (child.type !== "variable_declarator") continue;
|
|
14468
|
+
const value = child.childForFieldName("value");
|
|
14469
|
+
if (!value) continue;
|
|
14470
|
+
if (!primitive.has(value.type)) return false;
|
|
14471
|
+
}
|
|
14472
|
+
return true;
|
|
14473
|
+
}
|
|
14474
|
+
/**
|
|
14475
|
+
* Sprint 9 #58.1 — collect the set of class-level `Pattern` field names
|
|
14476
|
+
* whose compiled regex is strict-anchored, i.e. provably matches a
|
|
14477
|
+
* bounded character set with no wildcard escape. A subsequent
|
|
14478
|
+
* `if (!FIELD.matcher(var).matches()) throw ...;` guard then proves
|
|
14479
|
+
* `var` is sanitized after the if.
|
|
14480
|
+
*
|
|
14481
|
+
* Recognized initializer shapes (scanned via source-text regex to avoid
|
|
14482
|
+
* threading another AST walk):
|
|
14483
|
+
* `static final Pattern FIELD = Pattern.compile("regex");`
|
|
14484
|
+
*
|
|
14485
|
+
* Strict-anchored regex criteria:
|
|
14486
|
+
* - starts with `^` and ends with `$`
|
|
14487
|
+
* - after stripping `[...]` character classes, must not contain `.` or
|
|
14488
|
+
* `|` (a `.` could match anything; `|` admits an arbitrary alternative)
|
|
14489
|
+
*/
|
|
14490
|
+
getSafePatternFields() {
|
|
14491
|
+
if (this.safePatternFieldsCache !== null) return this.safePatternFieldsCache;
|
|
14492
|
+
const set = /* @__PURE__ */ new Set();
|
|
14493
|
+
const re = /\b(?:public\s+|private\s+|protected\s+)?(?:static\s+final|final\s+static)\s+(?:java\.util\.regex\.)?Pattern\s+(\w+)\s*=\s*(?:java\.util\.regex\.)?Pattern\s*\.\s*compile\s*\(\s*"((?:[^"\\]|\\.)*)"/g;
|
|
14494
|
+
let m;
|
|
14495
|
+
while ((m = re.exec(this.source)) !== null) {
|
|
14496
|
+
const name2 = m[1];
|
|
14497
|
+
const regex = m[2];
|
|
14498
|
+
if (this.isStrictAnchoredRegex(regex)) set.add(name2);
|
|
14499
|
+
}
|
|
14500
|
+
this.safePatternFieldsCache = set;
|
|
14501
|
+
return set;
|
|
14502
|
+
}
|
|
14503
|
+
isStrictAnchoredRegex(re) {
|
|
14504
|
+
if (!re.startsWith("^") || !re.endsWith("$")) return false;
|
|
14505
|
+
const stripped = re.replace(/\[(?:[^\]\\]|\\.)*\]/g, "");
|
|
14506
|
+
const cleaned = stripped.replace(/\\./g, "");
|
|
14507
|
+
if (cleaned.includes(".")) return false;
|
|
14508
|
+
if (cleaned.includes("|")) return false;
|
|
14509
|
+
return true;
|
|
14510
|
+
}
|
|
14511
|
+
/**
|
|
14512
|
+
* Sprint 9 #58.1 — detect the regex-allowlist guard pattern.
|
|
14513
|
+
*
|
|
14514
|
+
* if (!SAFE_NAME.matcher(var).matches()) { throw ...; }
|
|
14515
|
+
*
|
|
14516
|
+
* Returns the guarded variable name if the pattern matches AND
|
|
14517
|
+
* `SAFE_NAME` is a recognized strict-anchored Pattern field, otherwise
|
|
14518
|
+
* null. Caller drops the variable from `tainted` after the if-block.
|
|
14519
|
+
*/
|
|
14520
|
+
detectRegexAllowlistGuard(condition, consequence) {
|
|
14521
|
+
if (!consequence) return null;
|
|
14522
|
+
let condText = getNodeText2(condition, this.source).replace(/\s+/g, "");
|
|
14523
|
+
while (condText.startsWith("(") && condText.endsWith(")")) {
|
|
14524
|
+
const inner = condText.slice(1, -1);
|
|
14525
|
+
let depth = 0;
|
|
14526
|
+
let balanced = true;
|
|
14527
|
+
for (let i2 = 0; i2 < inner.length; i2++) {
|
|
14528
|
+
if (inner[i2] === "(") depth++;
|
|
14529
|
+
else if (inner[i2] === ")") depth--;
|
|
14530
|
+
if (depth < 0) {
|
|
14531
|
+
balanced = false;
|
|
14532
|
+
break;
|
|
14533
|
+
}
|
|
14534
|
+
}
|
|
14535
|
+
if (!balanced || depth !== 0) break;
|
|
14536
|
+
condText = inner;
|
|
14537
|
+
}
|
|
14538
|
+
const m = condText.match(/^!(\w+)\.matcher\((\w+)\)\.matches\(\)$/);
|
|
14539
|
+
if (!m) return null;
|
|
14540
|
+
const patternName = m[1];
|
|
14541
|
+
const varName = m[2];
|
|
14542
|
+
if (!this.getSafePatternFields().has(patternName)) return null;
|
|
14543
|
+
if (!this.consequenceContainsThrow(consequence)) return null;
|
|
14544
|
+
return varName;
|
|
14545
|
+
}
|
|
14546
|
+
consequenceContainsThrow(node) {
|
|
14547
|
+
if (node.type === "throw_statement") return true;
|
|
14548
|
+
const stack = [node];
|
|
14549
|
+
while (stack.length > 0) {
|
|
14550
|
+
const n = stack.pop();
|
|
14551
|
+
if (!n) continue;
|
|
14552
|
+
if (n.type === "throw_statement") return true;
|
|
14553
|
+
if (n.type === "if_statement" || n.type === "switch_statement") continue;
|
|
14554
|
+
for (const c of n.children) stack.push(c);
|
|
14555
|
+
}
|
|
14556
|
+
return false;
|
|
14557
|
+
}
|
|
14558
|
+
seedPythonModuleConstants(root) {
|
|
14559
|
+
if (root.type !== "module") return;
|
|
14560
|
+
for (const child of root.children) {
|
|
14561
|
+
const target = child.type === "assignment" ? child : child.type === "expression_statement" && child.children.length > 0 ? child.children[0] : null;
|
|
14562
|
+
if (!target || target.type !== "assignment") continue;
|
|
14563
|
+
const left = target.childForFieldName("left");
|
|
14564
|
+
const right = target.childForFieldName("right");
|
|
14565
|
+
if (!left || !right) continue;
|
|
14566
|
+
if (left.type !== "identifier") continue;
|
|
14567
|
+
const allowed = /* @__PURE__ */ new Set([
|
|
14568
|
+
"true",
|
|
14569
|
+
"false",
|
|
14570
|
+
"none",
|
|
14571
|
+
"integer",
|
|
14572
|
+
"float",
|
|
14573
|
+
"string"
|
|
14574
|
+
]);
|
|
14575
|
+
if (!allowed.has(right.type)) continue;
|
|
14576
|
+
const name2 = getNodeText2(left, this.source);
|
|
14577
|
+
if (!name2) continue;
|
|
14578
|
+
const value = this.evaluateExpression(right);
|
|
14579
|
+
if (!isKnown(value)) continue;
|
|
14580
|
+
this.symbols.set(name2, value);
|
|
14581
|
+
}
|
|
14582
|
+
}
|
|
14267
14583
|
findAllMethods(node) {
|
|
14268
14584
|
const methods = [];
|
|
14269
14585
|
const stack = [node];
|
|
@@ -14358,6 +14674,11 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
14358
14674
|
case "local_variable_declaration":
|
|
14359
14675
|
this.handleVariableDeclaration(node);
|
|
14360
14676
|
return false;
|
|
14677
|
+
case "field_declaration":
|
|
14678
|
+
if (this.fieldDeclHasPrimitiveLiteralValue(node)) {
|
|
14679
|
+
this.handleVariableDeclaration(node);
|
|
14680
|
+
}
|
|
14681
|
+
return false;
|
|
14361
14682
|
case "assignment_expression":
|
|
14362
14683
|
this.handleAssignment(node);
|
|
14363
14684
|
return false;
|
|
@@ -14848,6 +15169,16 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
14848
15169
|
}
|
|
14849
15170
|
this.inConditionalBranch = wasInConditional;
|
|
14850
15171
|
this.tainted = /* @__PURE__ */ new Set([...taintedBefore, ...taintedAfterThen, ...taintedAfterElse]);
|
|
15172
|
+
const guardedVar = this.detectRegexAllowlistGuard(condition, consequence);
|
|
15173
|
+
if (guardedVar) {
|
|
15174
|
+
this.tainted.delete(guardedVar);
|
|
15175
|
+
this.sanitizedVars.add(guardedVar);
|
|
15176
|
+
const scoped = this.getScopedName(guardedVar);
|
|
15177
|
+
if (scoped !== guardedVar) {
|
|
15178
|
+
this.tainted.delete(scoped);
|
|
15179
|
+
this.sanitizedVars.add(scoped);
|
|
15180
|
+
}
|
|
15181
|
+
}
|
|
14851
15182
|
}
|
|
14852
15183
|
}
|
|
14853
15184
|
/**
|
|
@@ -15038,17 +15369,33 @@ var ConstantPropagator = class _ConstantPropagator {
|
|
|
15038
15369
|
/**
|
|
15039
15370
|
* Check if an expression is a call to a sanitizer method.
|
|
15040
15371
|
* This includes both built-in sanitizers and @sanitizer annotated methods.
|
|
15372
|
+
* Handles Java (`method_invocation`), Go/JS/TS (`call_expression`), and
|
|
15373
|
+
* Python (`call`) AST shapes.
|
|
15041
15374
|
*/
|
|
15042
15375
|
isSanitizerMethodCall(node) {
|
|
15043
|
-
|
|
15044
|
-
|
|
15376
|
+
const methodName = this.extractCallName(node);
|
|
15377
|
+
if (!methodName) return false;
|
|
15378
|
+
return SANITIZER_METHODS.has(methodName) || this.methodReturnsSanitized.has(methodName);
|
|
15379
|
+
}
|
|
15380
|
+
/**
|
|
15381
|
+
* Extract the trailing method/function name from any call node shape:
|
|
15382
|
+
* Java `method_invocation` — name field
|
|
15383
|
+
* Go/JS `call_expression` — function field (identifier or selector/member)
|
|
15384
|
+
* Python `call` — function field (identifier or attribute)
|
|
15385
|
+
*/
|
|
15386
|
+
extractCallName(node) {
|
|
15387
|
+
let fnNode = null;
|
|
15388
|
+
if (node.type === "method_invocation") {
|
|
15389
|
+
fnNode = node.childForFieldName("name");
|
|
15390
|
+
} else if (node.type === "call_expression" || node.type === "call") {
|
|
15391
|
+
fnNode = node.childForFieldName("function");
|
|
15045
15392
|
}
|
|
15046
|
-
|
|
15047
|
-
if (
|
|
15048
|
-
|
|
15393
|
+
if (!fnNode) return null;
|
|
15394
|
+
if (fnNode.type === "selector_expression" || fnNode.type === "member_expression" || fnNode.type === "attribute") {
|
|
15395
|
+
const tail = fnNode.childForFieldName("field") || fnNode.childForFieldName("property") || fnNode.childForFieldName("attribute");
|
|
15396
|
+
if (tail) return getNodeText2(tail, this.source);
|
|
15049
15397
|
}
|
|
15050
|
-
|
|
15051
|
-
return SANITIZER_METHODS.has(methodName) || this.methodReturnsSanitized.has(methodName);
|
|
15398
|
+
return getNodeText2(fnNode, this.source);
|
|
15052
15399
|
}
|
|
15053
15400
|
/**
|
|
15054
15401
|
* Check if an expression is a call to an anti-sanitizer method.
|