circle-ir 3.3.1 → 3.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3998,6 +3998,7 @@ var parserInitializing = null;
3998
3998
  var loadedLanguages = /* @__PURE__ */ new Map();
3999
3999
  var loadingLanguages = /* @__PURE__ */ new Map();
4000
4000
  var configuredLanguagePaths = {};
4001
+ var configuredLanguageModules = {};
4001
4002
  async function initParser(options = {}) {
4002
4003
  if (parserInitialized) {
4003
4004
  return;
@@ -4005,14 +4006,28 @@ async function initParser(options = {}) {
4005
4006
  if (parserInitializing) {
4006
4007
  return parserInitializing;
4007
4008
  }
4008
- const wasmPath = options.wasmPath ?? await getDefaultWasmPath();
4009
4009
  if (options.languagePaths) {
4010
4010
  configuredLanguagePaths = options.languagePaths;
4011
4011
  }
4012
+ if (options.languageModules) {
4013
+ configuredLanguageModules = options.languageModules;
4014
+ }
4012
4015
  parserInitializing = (async () => {
4013
- await Parser.init({
4014
- locateFile: () => wasmPath
4015
- });
4016
+ if (options.wasmModule) {
4017
+ await Parser.init({
4018
+ locateFile: () => "web-tree-sitter.wasm",
4019
+ instantiateWasm(imports, callback) {
4020
+ const instance2 = new WebAssembly.Instance(options.wasmModule, imports);
4021
+ callback(instance2, options.wasmModule);
4022
+ return instance2.exports;
4023
+ }
4024
+ });
4025
+ } else {
4026
+ const wasmPath = options.wasmPath ?? await getDefaultWasmPath();
4027
+ await Parser.init({
4028
+ locateFile: () => wasmPath
4029
+ });
4030
+ }
4016
4031
  parserInitialized = true;
4017
4032
  parserInitializing = null;
4018
4033
  })();
@@ -4030,6 +4045,17 @@ async function loadLanguage(language, wasmPath) {
4030
4045
  if (loading) {
4031
4046
  return loading;
4032
4047
  }
4048
+ const grammarName = language === "typescript" ? "javascript" : language;
4049
+ const wasmModule = configuredLanguageModules[language] ?? configuredLanguageModules[grammarName];
4050
+ if (wasmModule) {
4051
+ const loadPromise2 = (async () => {
4052
+ const lang = await Language.load(wasmModule);
4053
+ loadedLanguages.set(language, lang);
4054
+ return lang;
4055
+ })();
4056
+ loadingLanguages.set(language, loadPromise2);
4057
+ return loadPromise2;
4058
+ }
4033
4059
  const path = wasmPath ?? configuredLanguagePaths[language] ?? await getDefaultLanguagePath(language);
4034
4060
  const loadPromise = (async () => {
4035
4061
  const lang = await Language.load(path);
@@ -10904,7 +10930,12 @@ function propagateTaint(dfg, calls, sources, sinks, sanitizers) {
10904
10930
  existing.push(san);
10905
10931
  sanitizersByLine.set(san.line, existing);
10906
10932
  }
10907
- const initialTaint = findInitialTaint(sources, dfg, callsByLine, defsByLine);
10933
+ const rawInitialTaint = findInitialTaint(sources, dfg, callsByLine, defsByLine);
10934
+ const initialTaint = rawInitialTaint.filter((tv) => {
10935
+ if (tv.line === tv.sourceLine) return true;
10936
+ const sanCheck = checkSanitized(tv.sourceLine, tv.line, tv.sourceType, sanitizersByLine);
10937
+ return !sanCheck.sanitized;
10938
+ });
10908
10939
  taintedVars.push(...initialTaint);
10909
10940
  const propagatedTaint = propagateThroughChains(
10910
10941
  initialTaint,
@@ -11046,7 +11077,36 @@ function propagateThroughChains(initialTaint, chains, defById, sanitizersByLine)
11046
11077
  }
11047
11078
  return propagated;
11048
11079
  }
11049
- function checkSanitized(_fromLine, _toLine, _sinkType, _sanitizersByLine) {
11080
+ var KNOWN_SINK_TYPES = /* @__PURE__ */ new Set([
11081
+ "sql_injection",
11082
+ "xss",
11083
+ "path_traversal",
11084
+ "command_injection",
11085
+ "ssrf",
11086
+ "ldap_injection",
11087
+ "xpath_injection",
11088
+ "log_injection",
11089
+ "xxe",
11090
+ "deserialization",
11091
+ "code_injection"
11092
+ ]);
11093
+ function checkSanitized(_fromLine, toLine, sinkType, sanitizersByLine) {
11094
+ const sanitizersAtTarget = sanitizersByLine.get(toLine);
11095
+ if (!sanitizersAtTarget || sanitizersAtTarget.length === 0) {
11096
+ return { sanitized: false };
11097
+ }
11098
+ const isKnownSinkType = KNOWN_SINK_TYPES.has(sinkType);
11099
+ for (const san of sanitizersAtTarget) {
11100
+ if (isKnownSinkType) {
11101
+ if (san.sanitizes.includes(sinkType)) {
11102
+ return { sanitized: true, sanitizer: san };
11103
+ }
11104
+ } else {
11105
+ if (san.sanitizes.length > 0) {
11106
+ return { sanitized: true, sanitizer: san };
11107
+ }
11108
+ }
11109
+ }
11050
11110
  return { sanitized: false };
11051
11111
  }
11052
11112
  function buildTaintFlow(source, sink, taintInfo, dfg, defById) {
@@ -11131,6 +11191,37 @@ function analyzeInterprocedural(types, calls, dfg, sources, sinks, sanitizers, o
11131
11191
  "clone",
11132
11192
  "clear"
11133
11193
  ]);
11194
+ const safeUtilityMethods = /* @__PURE__ */ new Set([
11195
+ // Path validation and normalization
11196
+ "normalizePath",
11197
+ "normalizeLineEndings",
11198
+ "isPathWithin",
11199
+ "isPathWithinAllowedDirectories",
11200
+ "isPathAllowed",
11201
+ "validatePath",
11202
+ "resolvePath",
11203
+ "resolve",
11204
+ "relative",
11205
+ "join",
11206
+ // File utilities (reading/processing, not writing)
11207
+ "tailFile",
11208
+ "headFile",
11209
+ "readFileContent",
11210
+ "readFile",
11211
+ "read",
11212
+ // Pattern matching (used in validation)
11213
+ "minimatch",
11214
+ "match",
11215
+ "test",
11216
+ "includes",
11217
+ "startsWith",
11218
+ "endsWith",
11219
+ // General validation
11220
+ "validate",
11221
+ "validateInput",
11222
+ "check",
11223
+ "verify"
11224
+ ]);
11134
11225
  const sanitizerMethods = /* @__PURE__ */ new Set();
11135
11226
  for (const san of sanitizers) {
11136
11227
  sanitizerMethods.add(san.method);
@@ -11155,7 +11246,7 @@ function analyzeInterprocedural(types, calls, dfg, sources, sinks, sanitizers, o
11155
11246
  }
11156
11247
  const targetMethod = getMethodNode(methodNodes, call.method_name);
11157
11248
  if (!targetMethod) {
11158
- if (taintedArgPositions.length > 0 && !collectionMethods.has(call.method_name) && !sanitizerMethods.has(call.method_name)) {
11249
+ if (taintedArgPositions.length > 0 && !collectionMethods.has(call.method_name) && !sanitizerMethods.has(call.method_name) && !safeUtilityMethods.has(call.method_name)) {
11159
11250
  const sink = {
11160
11251
  type: "external_taint_escape",
11161
11252
  cwe: "CWE-668",
@@ -11839,13 +11930,33 @@ var SANITIZER_METHODS = /* @__PURE__ */ new Set([
11839
11930
  "getCanonicalPath",
11840
11931
  "normalize",
11841
11932
  "toRealPath",
11933
+ // JavaScript/TypeScript URL Encoding
11934
+ "encodeURIComponent",
11935
+ "encodeURI",
11936
+ // JavaScript/TypeScript String Validation
11937
+ "match",
11938
+ "test",
11939
+ "startsWith",
11940
+ "includes",
11941
+ // Path Validation and Normalization
11942
+ "normalizePath",
11943
+ "normalizeLineEndings",
11944
+ "isPathWithin",
11945
+ "isPathWithinAllowedDirectories",
11946
+ "isPathAllowed",
11947
+ "relative",
11948
+ "join",
11842
11949
  // General
11843
11950
  "sanitize",
11844
11951
  "encode",
11845
11952
  "escape",
11846
11953
  "clean",
11847
11954
  "filter",
11848
- "validate"
11955
+ "validate",
11956
+ "validatePath",
11957
+ "validateCityName",
11958
+ "validateInput",
11959
+ "sanitizeInput"
11849
11960
  ]);
11850
11961
  var ANTI_SANITIZER_METHODS = /* @__PURE__ */ new Set([
11851
11962
  // URL decoding (reverses URL encoding)
@@ -16070,7 +16181,9 @@ async function initAnalyzer(options = {}) {
16070
16181
  registerBuiltinPlugins();
16071
16182
  await initParser({
16072
16183
  wasmPath: options.wasmPath,
16073
- languagePaths: options.languagePaths
16184
+ wasmModule: options.wasmModule,
16185
+ languagePaths: options.languagePaths,
16186
+ languageModules: options.languageModules
16074
16187
  });
16075
16188
  initialized = true;
16076
16189
  }
@@ -16576,7 +16689,17 @@ async function analyzeForAPI(code, filePath, language, options = {}) {
16576
16689
  const constPropResult = analyzeConstantPropagation(tree, code);
16577
16690
  const config = options.taintConfig ?? getDefaultConfig();
16578
16691
  const taint = analyzeTaint(calls, types, config);
16579
- const filteredSinks = taint.sinks.filter((sink) => !constPropResult.unreachableLines.has(sink.line));
16692
+ let filteredSinks = taint.sinks.filter((sink) => !constPropResult.unreachableLines.has(sink.line));
16693
+ filteredSinks = filterCleanVariableSinks(
16694
+ filteredSinks,
16695
+ calls,
16696
+ constPropResult.tainted,
16697
+ constPropResult.symbols,
16698
+ void 0,
16699
+ constPropResult.sanitizedVars,
16700
+ constPropResult.synchronizedLines
16701
+ );
16702
+ filteredSinks = filterSanitizedSinks(filteredSinks, taint.sanitizers ?? [], calls);
16580
16703
  const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
16581
16704
  const analysisTime = performance.now() - analysisStart;
16582
16705
  const totalTime = performance.now() - startTime;
@@ -16780,6 +16903,12 @@ function filterCleanVariableSinks(sinks, calls, taintedVars, symbols, dfg, sanit
16780
16903
  }
16781
16904
  allArgsAreClean = false;
16782
16905
  } else {
16906
+ if (arg.literal != null) {
16907
+ continue;
16908
+ }
16909
+ if (arg.expression && !arg.variable && isStringLiteralExpression(arg.expression)) {
16910
+ continue;
16911
+ }
16783
16912
  allArgsAreClean = false;
16784
16913
  }
16785
16914
  }
@@ -16790,6 +16919,10 @@ function filterCleanVariableSinks(sinks, calls, taintedVars, symbols, dfg, sanit
16790
16919
  return true;
16791
16920
  });
16792
16921
  }
16922
+ function isStringLiteralExpression(expr) {
16923
+ const trimmed = expr.trim();
16924
+ return trimmed.startsWith('"') && trimmed.endsWith('"') || trimmed.startsWith("'") && trimmed.endsWith("'");
16925
+ }
16793
16926
  function filterSanitizedSinks(sinks, sanitizers, calls) {
16794
16927
  if (!sanitizers || sanitizers.length === 0) {
16795
16928
  return sinks;
@@ -17024,10 +17157,28 @@ function isAnalyzerInitialized() {
17024
17157
 
17025
17158
  // src/browser.ts
17026
17159
  async function init2(options) {
17027
- await initAnalyzer({
17028
- wasmPath: options.wasmUrl,
17160
+ const initOptions = {
17029
17161
  taintConfig: options.taintConfig
17030
- });
17162
+ };
17163
+ if (typeof options.wasmUrl === "string") {
17164
+ initOptions.wasmPath = options.wasmUrl;
17165
+ } else {
17166
+ initOptions.wasmModule = options.wasmUrl;
17167
+ }
17168
+ if (options.languageUrls) {
17169
+ const paths = {};
17170
+ const modules = {};
17171
+ for (const [lang, value] of Object.entries(options.languageUrls)) {
17172
+ if (typeof value === "string") {
17173
+ paths[lang] = value;
17174
+ } else if (value) {
17175
+ modules[lang] = value;
17176
+ }
17177
+ }
17178
+ if (Object.keys(paths).length > 0) initOptions.languagePaths = paths;
17179
+ if (Object.keys(modules).length > 0) initOptions.languageModules = modules;
17180
+ }
17181
+ await initAnalyzer(initOptions);
17031
17182
  }
17032
17183
  async function analyzeCode(code, options = {}) {
17033
17184
  const filePath = options.filePath ?? "input.java";
package/dist/browser.d.ts CHANGED
@@ -8,17 +8,18 @@ import type { CircleIR, AnalysisResponse } from './types/index.js';
8
8
  import type { SupportedLanguage } from './core/index.js';
9
9
  export interface BrowserAnalyzerOptions extends AnalyzerOptions {
10
10
  /**
11
- * URL to the tree-sitter.wasm file.
12
- * Required for browser usage.
11
+ * URL to the tree-sitter.wasm file, or a pre-compiled WebAssembly.Module.
12
+ * String URL for browser usage, WebAssembly.Module for Cloudflare Workers.
13
13
  */
14
- wasmUrl: string;
14
+ wasmUrl: string | WebAssembly.Module;
15
15
  /**
16
- * URLs to language grammar WASM files.
16
+ * URLs to language grammar WASM files, or pre-compiled WebAssembly.Modules.
17
+ * String URLs for browser usage, WebAssembly.Modules for Cloudflare Workers.
17
18
  */
18
- languageUrls?: Partial<Record<SupportedLanguage, string>>;
19
+ languageUrls?: Partial<Record<SupportedLanguage, string | WebAssembly.Module>>;
19
20
  }
20
21
  /**
21
- * Initialize the analyzer for browser usage.
22
+ * Initialize the analyzer for browser/worker usage.
22
23
  */
23
24
  export declare function init(options: BrowserAnalyzerOptions): Promise<void>;
24
25
  /**
package/dist/browser.js CHANGED
@@ -5,13 +5,35 @@
5
5
  */
6
6
  import { initAnalyzer, analyze, analyzeForAPI, isAnalyzerInitialized, } from './analyzer.js';
7
7
  /**
8
- * Initialize the analyzer for browser usage.
8
+ * Initialize the analyzer for browser/worker usage.
9
9
  */
10
10
  export async function init(options) {
11
- await initAnalyzer({
12
- wasmPath: options.wasmUrl,
11
+ const initOptions = {
13
12
  taintConfig: options.taintConfig,
14
- });
13
+ };
14
+ if (typeof options.wasmUrl === 'string') {
15
+ initOptions.wasmPath = options.wasmUrl;
16
+ }
17
+ else {
18
+ initOptions.wasmModule = options.wasmUrl;
19
+ }
20
+ if (options.languageUrls) {
21
+ const paths = {};
22
+ const modules = {};
23
+ for (const [lang, value] of Object.entries(options.languageUrls)) {
24
+ if (typeof value === 'string') {
25
+ paths[lang] = value;
26
+ }
27
+ else if (value) {
28
+ modules[lang] = value;
29
+ }
30
+ }
31
+ if (Object.keys(paths).length > 0)
32
+ initOptions.languagePaths = paths;
33
+ if (Object.keys(modules).length > 0)
34
+ initOptions.languageModules = modules;
35
+ }
36
+ await initAnalyzer(initOptions);
15
37
  }
16
38
  /**
17
39
  * Analyze source code and return full Circle-IR output.
@@ -1 +1 @@
1
- {"version":3,"file":"browser.js","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EACL,YAAY,EACZ,OAAO,EACP,aAAa,EACb,qBAAqB,GAEtB,MAAM,eAAe,CAAC;AAiBvB;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,OAA+B;IACxD,MAAM,YAAY,CAAC;QACjB,QAAQ,EAAE,OAAO,CAAC,OAAO;QACzB,WAAW,EAAE,OAAO,CAAC,WAAW;KACjC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAAY,EACZ,UAGI,EAAE;IAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,YAAY,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC;IAE5C,IAAI,CAAC,qBAAqB,EAAE,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,UAGI,EAAE;IAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,YAAY,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC;IAE5C,IAAI,CAAC,qBAAqB,EAAE,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;AACjD,CAAC"}
1
+ {"version":3,"file":"browser.js","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EACL,YAAY,EACZ,OAAO,EACP,aAAa,EACb,qBAAqB,GAEtB,MAAM,eAAe,CAAC;AAkBvB;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,OAA+B;IACxD,MAAM,WAAW,GAAuC;QACtD,WAAW,EAAE,OAAO,CAAC,WAAW;KACjC,CAAC;IAEF,IAAI,OAAO,OAAO,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACxC,WAAW,CAAC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC;IACzC,CAAC;SAAM,CAAC;QACN,WAAW,CAAC,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC;IAC3C,CAAC;IAED,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QACzB,MAAM,KAAK,GAA+C,EAAE,CAAC;QAC7D,MAAM,OAAO,GAA2D,EAAE,CAAC;QAE3E,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;YACjE,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC9B,KAAK,CAAC,IAAyB,CAAC,GAAG,KAAK,CAAC;YAC3C,CAAC;iBAAM,IAAI,KAAK,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAyB,CAAC,GAAG,KAAK,CAAC;YAC7C,CAAC;QACH,CAAC;QAED,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,WAAW,CAAC,aAAa,GAAG,KAAK,CAAC;QACrE,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,WAAW,CAAC,eAAe,GAAG,OAAO,CAAC;IAC7E,CAAC;IAED,MAAM,YAAY,CAAC,WAAW,CAAC,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAAY,EACZ,UAGI,EAAE;IAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,YAAY,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC;IAE5C,IAAI,CAAC,qBAAqB,EAAE,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,UAGI,EAAE;IAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,YAAY,CAAC;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC;IAE5C,IAAI,CAAC,qBAAqB,EAAE,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;AACjD,CAAC"}
@@ -4063,6 +4063,7 @@ var parserInitializing = null;
4063
4063
  var loadedLanguages = /* @__PURE__ */ new Map();
4064
4064
  var loadingLanguages = /* @__PURE__ */ new Map();
4065
4065
  var configuredLanguagePaths = {};
4066
+ var configuredLanguageModules = {};
4066
4067
  async function initParser(options = {}) {
4067
4068
  if (parserInitialized) {
4068
4069
  return;
@@ -4070,14 +4071,28 @@ async function initParser(options = {}) {
4070
4071
  if (parserInitializing) {
4071
4072
  return parserInitializing;
4072
4073
  }
4073
- const wasmPath = options.wasmPath ?? await getDefaultWasmPath();
4074
4074
  if (options.languagePaths) {
4075
4075
  configuredLanguagePaths = options.languagePaths;
4076
4076
  }
4077
+ if (options.languageModules) {
4078
+ configuredLanguageModules = options.languageModules;
4079
+ }
4077
4080
  parserInitializing = (async () => {
4078
- await Parser.init({
4079
- locateFile: () => wasmPath
4080
- });
4081
+ if (options.wasmModule) {
4082
+ await Parser.init({
4083
+ locateFile: () => "web-tree-sitter.wasm",
4084
+ instantiateWasm(imports, callback) {
4085
+ const instance2 = new WebAssembly.Instance(options.wasmModule, imports);
4086
+ callback(instance2, options.wasmModule);
4087
+ return instance2.exports;
4088
+ }
4089
+ });
4090
+ } else {
4091
+ const wasmPath = options.wasmPath ?? await getDefaultWasmPath();
4092
+ await Parser.init({
4093
+ locateFile: () => wasmPath
4094
+ });
4095
+ }
4081
4096
  parserInitialized = true;
4082
4097
  parserInitializing = null;
4083
4098
  })();
@@ -4095,6 +4110,17 @@ async function loadLanguage(language, wasmPath) {
4095
4110
  if (loading) {
4096
4111
  return loading;
4097
4112
  }
4113
+ const grammarName = language === "typescript" ? "javascript" : language;
4114
+ const wasmModule = configuredLanguageModules[language] ?? configuredLanguageModules[grammarName];
4115
+ if (wasmModule) {
4116
+ const loadPromise2 = (async () => {
4117
+ const lang = await Language.load(wasmModule);
4118
+ loadedLanguages.set(language, lang);
4119
+ return lang;
4120
+ })();
4121
+ loadingLanguages.set(language, loadPromise2);
4122
+ return loadPromise2;
4123
+ }
4098
4124
  const path = wasmPath ?? configuredLanguagePaths[language] ?? await getDefaultLanguagePath(language);
4099
4125
  const loadPromise = (async () => {
4100
4126
  const lang = await Language.load(path);
@@ -10844,7 +10870,12 @@ function propagateTaint(dfg, calls, sources, sinks, sanitizers) {
10844
10870
  existing.push(san);
10845
10871
  sanitizersByLine.set(san.line, existing);
10846
10872
  }
10847
- const initialTaint = findInitialTaint(sources, dfg, callsByLine, defsByLine);
10873
+ const rawInitialTaint = findInitialTaint(sources, dfg, callsByLine, defsByLine);
10874
+ const initialTaint = rawInitialTaint.filter((tv) => {
10875
+ if (tv.line === tv.sourceLine) return true;
10876
+ const sanCheck = checkSanitized(tv.sourceLine, tv.line, tv.sourceType, sanitizersByLine);
10877
+ return !sanCheck.sanitized;
10878
+ });
10848
10879
  taintedVars.push(...initialTaint);
10849
10880
  const propagatedTaint = propagateThroughChains(
10850
10881
  initialTaint,
@@ -10986,7 +11017,36 @@ function propagateThroughChains(initialTaint, chains, defById, sanitizersByLine)
10986
11017
  }
10987
11018
  return propagated;
10988
11019
  }
10989
- function checkSanitized(_fromLine, _toLine, _sinkType, _sanitizersByLine) {
11020
+ var KNOWN_SINK_TYPES = /* @__PURE__ */ new Set([
11021
+ "sql_injection",
11022
+ "xss",
11023
+ "path_traversal",
11024
+ "command_injection",
11025
+ "ssrf",
11026
+ "ldap_injection",
11027
+ "xpath_injection",
11028
+ "log_injection",
11029
+ "xxe",
11030
+ "deserialization",
11031
+ "code_injection"
11032
+ ]);
11033
+ function checkSanitized(_fromLine, toLine, sinkType, sanitizersByLine) {
11034
+ const sanitizersAtTarget = sanitizersByLine.get(toLine);
11035
+ if (!sanitizersAtTarget || sanitizersAtTarget.length === 0) {
11036
+ return { sanitized: false };
11037
+ }
11038
+ const isKnownSinkType = KNOWN_SINK_TYPES.has(sinkType);
11039
+ for (const san of sanitizersAtTarget) {
11040
+ if (isKnownSinkType) {
11041
+ if (san.sanitizes.includes(sinkType)) {
11042
+ return { sanitized: true, sanitizer: san };
11043
+ }
11044
+ } else {
11045
+ if (san.sanitizes.length > 0) {
11046
+ return { sanitized: true, sanitizer: san };
11047
+ }
11048
+ }
11049
+ }
10990
11050
  return { sanitized: false };
10991
11051
  }
10992
11052
  function buildTaintFlow(source, sink, taintInfo, dfg, defById) {
@@ -11435,13 +11495,33 @@ var SANITIZER_METHODS = /* @__PURE__ */ new Set([
11435
11495
  "getCanonicalPath",
11436
11496
  "normalize",
11437
11497
  "toRealPath",
11498
+ // JavaScript/TypeScript URL Encoding
11499
+ "encodeURIComponent",
11500
+ "encodeURI",
11501
+ // JavaScript/TypeScript String Validation
11502
+ "match",
11503
+ "test",
11504
+ "startsWith",
11505
+ "includes",
11506
+ // Path Validation and Normalization
11507
+ "normalizePath",
11508
+ "normalizeLineEndings",
11509
+ "isPathWithin",
11510
+ "isPathWithinAllowedDirectories",
11511
+ "isPathAllowed",
11512
+ "relative",
11513
+ "join",
11438
11514
  // General
11439
11515
  "sanitize",
11440
11516
  "encode",
11441
11517
  "escape",
11442
11518
  "clean",
11443
11519
  "filter",
11444
- "validate"
11520
+ "validate",
11521
+ "validatePath",
11522
+ "validateCityName",
11523
+ "validateInput",
11524
+ "sanitizeInput"
11445
11525
  ]);
11446
11526
  var ANTI_SANITIZER_METHODS = /* @__PURE__ */ new Set([
11447
11527
  // URL decoding (reverses URL encoding)
@@ -3998,6 +3998,7 @@ var parserInitializing = null;
3998
3998
  var loadedLanguages = /* @__PURE__ */ new Map();
3999
3999
  var loadingLanguages = /* @__PURE__ */ new Map();
4000
4000
  var configuredLanguagePaths = {};
4001
+ var configuredLanguageModules = {};
4001
4002
  async function initParser(options = {}) {
4002
4003
  if (parserInitialized) {
4003
4004
  return;
@@ -4005,14 +4006,28 @@ async function initParser(options = {}) {
4005
4006
  if (parserInitializing) {
4006
4007
  return parserInitializing;
4007
4008
  }
4008
- const wasmPath = options.wasmPath ?? await getDefaultWasmPath();
4009
4009
  if (options.languagePaths) {
4010
4010
  configuredLanguagePaths = options.languagePaths;
4011
4011
  }
4012
+ if (options.languageModules) {
4013
+ configuredLanguageModules = options.languageModules;
4014
+ }
4012
4015
  parserInitializing = (async () => {
4013
- await Parser.init({
4014
- locateFile: () => wasmPath
4015
- });
4016
+ if (options.wasmModule) {
4017
+ await Parser.init({
4018
+ locateFile: () => "web-tree-sitter.wasm",
4019
+ instantiateWasm(imports, callback) {
4020
+ const instance2 = new WebAssembly.Instance(options.wasmModule, imports);
4021
+ callback(instance2, options.wasmModule);
4022
+ return instance2.exports;
4023
+ }
4024
+ });
4025
+ } else {
4026
+ const wasmPath = options.wasmPath ?? await getDefaultWasmPath();
4027
+ await Parser.init({
4028
+ locateFile: () => wasmPath
4029
+ });
4030
+ }
4016
4031
  parserInitialized = true;
4017
4032
  parserInitializing = null;
4018
4033
  })();
@@ -4030,6 +4045,17 @@ async function loadLanguage(language, wasmPath) {
4030
4045
  if (loading) {
4031
4046
  return loading;
4032
4047
  }
4048
+ const grammarName = language === "typescript" ? "javascript" : language;
4049
+ const wasmModule = configuredLanguageModules[language] ?? configuredLanguageModules[grammarName];
4050
+ if (wasmModule) {
4051
+ const loadPromise2 = (async () => {
4052
+ const lang = await Language.load(wasmModule);
4053
+ loadedLanguages.set(language, lang);
4054
+ return lang;
4055
+ })();
4056
+ loadingLanguages.set(language, loadPromise2);
4057
+ return loadPromise2;
4058
+ }
4033
4059
  const path = wasmPath ?? configuredLanguagePaths[language] ?? await getDefaultLanguagePath(language);
4034
4060
  const loadPromise = (async () => {
4035
4061
  const lang = await Language.load(path);
@@ -10779,7 +10805,12 @@ function propagateTaint(dfg, calls, sources, sinks, sanitizers) {
10779
10805
  existing.push(san);
10780
10806
  sanitizersByLine.set(san.line, existing);
10781
10807
  }
10782
- const initialTaint = findInitialTaint(sources, dfg, callsByLine, defsByLine);
10808
+ const rawInitialTaint = findInitialTaint(sources, dfg, callsByLine, defsByLine);
10809
+ const initialTaint = rawInitialTaint.filter((tv) => {
10810
+ if (tv.line === tv.sourceLine) return true;
10811
+ const sanCheck = checkSanitized(tv.sourceLine, tv.line, tv.sourceType, sanitizersByLine);
10812
+ return !sanCheck.sanitized;
10813
+ });
10783
10814
  taintedVars.push(...initialTaint);
10784
10815
  const propagatedTaint = propagateThroughChains(
10785
10816
  initialTaint,
@@ -10921,7 +10952,36 @@ function propagateThroughChains(initialTaint, chains, defById, sanitizersByLine)
10921
10952
  }
10922
10953
  return propagated;
10923
10954
  }
10924
- function checkSanitized(_fromLine, _toLine, _sinkType, _sanitizersByLine) {
10955
+ var KNOWN_SINK_TYPES = /* @__PURE__ */ new Set([
10956
+ "sql_injection",
10957
+ "xss",
10958
+ "path_traversal",
10959
+ "command_injection",
10960
+ "ssrf",
10961
+ "ldap_injection",
10962
+ "xpath_injection",
10963
+ "log_injection",
10964
+ "xxe",
10965
+ "deserialization",
10966
+ "code_injection"
10967
+ ]);
10968
+ function checkSanitized(_fromLine, toLine, sinkType, sanitizersByLine) {
10969
+ const sanitizersAtTarget = sanitizersByLine.get(toLine);
10970
+ if (!sanitizersAtTarget || sanitizersAtTarget.length === 0) {
10971
+ return { sanitized: false };
10972
+ }
10973
+ const isKnownSinkType = KNOWN_SINK_TYPES.has(sinkType);
10974
+ for (const san of sanitizersAtTarget) {
10975
+ if (isKnownSinkType) {
10976
+ if (san.sanitizes.includes(sinkType)) {
10977
+ return { sanitized: true, sanitizer: san };
10978
+ }
10979
+ } else {
10980
+ if (san.sanitizes.length > 0) {
10981
+ return { sanitized: true, sanitizer: san };
10982
+ }
10983
+ }
10984
+ }
10925
10985
  return { sanitized: false };
10926
10986
  }
10927
10987
  function buildTaintFlow(source, sink, taintInfo, dfg, defById) {
@@ -11370,13 +11430,33 @@ var SANITIZER_METHODS = /* @__PURE__ */ new Set([
11370
11430
  "getCanonicalPath",
11371
11431
  "normalize",
11372
11432
  "toRealPath",
11433
+ // JavaScript/TypeScript URL Encoding
11434
+ "encodeURIComponent",
11435
+ "encodeURI",
11436
+ // JavaScript/TypeScript String Validation
11437
+ "match",
11438
+ "test",
11439
+ "startsWith",
11440
+ "includes",
11441
+ // Path Validation and Normalization
11442
+ "normalizePath",
11443
+ "normalizeLineEndings",
11444
+ "isPathWithin",
11445
+ "isPathWithinAllowedDirectories",
11446
+ "isPathAllowed",
11447
+ "relative",
11448
+ "join",
11373
11449
  // General
11374
11450
  "sanitize",
11375
11451
  "encode",
11376
11452
  "escape",
11377
11453
  "clean",
11378
11454
  "filter",
11379
- "validate"
11455
+ "validate",
11456
+ "validatePath",
11457
+ "validateCityName",
11458
+ "validateInput",
11459
+ "sanitizeInput"
11380
11460
  ]);
11381
11461
  var ANTI_SANITIZER_METHODS = /* @__PURE__ */ new Set([
11382
11462
  // URL decoding (reverses URL encoding)
@@ -15,11 +15,23 @@ interface ParserOptions {
15
15
  * In browsers/workers, must be provided.
16
16
  */
17
17
  wasmPath?: string;
18
+ /**
19
+ * Pre-compiled WebAssembly.Module for tree-sitter.wasm.
20
+ * Use this for Cloudflare Workers where dynamic WASM compilation is blocked.
21
+ * Takes precedence over wasmPath when provided.
22
+ */
23
+ wasmModule?: WebAssembly.Module;
18
24
  /**
19
25
  * Custom paths/URLs to language grammar WASM files.
20
26
  * Key is the language name, value is the path/URL.
21
27
  */
22
28
  languagePaths?: Partial<Record<SupportedLanguage, string>>;
29
+ /**
30
+ * Pre-compiled WebAssembly.Module for language grammars.
31
+ * Use this for Cloudflare Workers where dynamic WASM compilation is blocked.
32
+ * Takes precedence over languagePaths when provided.
33
+ */
34
+ languageModules?: Partial<Record<SupportedLanguage, WebAssembly.Module>>;
23
35
  }
24
36
  /**
25
37
  * Initialize the Tree-sitter parser runtime.