circle-ir 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +200 -0
  3. package/configs/sinks/code_injection.yaml +672 -0
  4. package/configs/sinks/command.yaml +917 -0
  5. package/configs/sinks/deserialization.yaml +105 -0
  6. package/configs/sinks/ldap.yaml +136 -0
  7. package/configs/sinks/nodejs.json +629 -0
  8. package/configs/sinks/path.yaml +715 -0
  9. package/configs/sinks/python.json +501 -0
  10. package/configs/sinks/rust.json +339 -0
  11. package/configs/sinks/sql.yaml +233 -0
  12. package/configs/sinks/ssrf.yaml +160 -0
  13. package/configs/sinks/xpath.yaml +121 -0
  14. package/configs/sinks/xss.yaml +727 -0
  15. package/configs/sources/db_sources.yaml +90 -0
  16. package/configs/sources/env_sources.yaml +94 -0
  17. package/configs/sources/express.json +197 -0
  18. package/configs/sources/file_sources.yaml +164 -0
  19. package/configs/sources/http_sources.yaml +379 -0
  20. package/configs/sources/io_sources.yaml +519 -0
  21. package/configs/sources/network_sources.yaml +99 -0
  22. package/configs/sources/python.json +230 -0
  23. package/configs/sources/rust.json +286 -0
  24. package/configs/sources/spring.yaml +70 -0
  25. package/dist/analysis/advisory-db.d.ts +86 -0
  26. package/dist/analysis/advisory-db.js +104 -0
  27. package/dist/analysis/advisory-db.js.map +1 -0
  28. package/dist/analysis/cargo-parser.d.ts +42 -0
  29. package/dist/analysis/cargo-parser.js +102 -0
  30. package/dist/analysis/cargo-parser.js.map +1 -0
  31. package/dist/analysis/config-loader.d.ts +37 -0
  32. package/dist/analysis/config-loader.js +1561 -0
  33. package/dist/analysis/config-loader.js.map +1 -0
  34. package/dist/analysis/constant-propagation/ast-utils.d.ts +25 -0
  35. package/dist/analysis/constant-propagation/ast-utils.js +34 -0
  36. package/dist/analysis/constant-propagation/ast-utils.js.map +1 -0
  37. package/dist/analysis/constant-propagation/evaluator.d.ts +32 -0
  38. package/dist/analysis/constant-propagation/evaluator.js +296 -0
  39. package/dist/analysis/constant-propagation/evaluator.js.map +1 -0
  40. package/dist/analysis/constant-propagation/index.d.ts +62 -0
  41. package/dist/analysis/constant-propagation/index.js +152 -0
  42. package/dist/analysis/constant-propagation/index.js.map +1 -0
  43. package/dist/analysis/constant-propagation/patterns.d.ts +8 -0
  44. package/dist/analysis/constant-propagation/patterns.js +126 -0
  45. package/dist/analysis/constant-propagation/patterns.js.map +1 -0
  46. package/dist/analysis/constant-propagation/propagator.d.ts +180 -0
  47. package/dist/analysis/constant-propagation/propagator.js +1985 -0
  48. package/dist/analysis/constant-propagation/propagator.js.map +1 -0
  49. package/dist/analysis/constant-propagation/types.d.ts +63 -0
  50. package/dist/analysis/constant-propagation/types.js +5 -0
  51. package/dist/analysis/constant-propagation/types.js.map +1 -0
  52. package/dist/analysis/constant-propagation.d.ts +9 -0
  53. package/dist/analysis/constant-propagation.js +18 -0
  54. package/dist/analysis/constant-propagation.js.map +1 -0
  55. package/dist/analysis/dependency-scanner.d.ts +79 -0
  56. package/dist/analysis/dependency-scanner.js +122 -0
  57. package/dist/analysis/dependency-scanner.js.map +1 -0
  58. package/dist/analysis/dfg-verifier.d.ts +116 -0
  59. package/dist/analysis/dfg-verifier.js +399 -0
  60. package/dist/analysis/dfg-verifier.js.map +1 -0
  61. package/dist/analysis/findings.d.ts +11 -0
  62. package/dist/analysis/findings.js +228 -0
  63. package/dist/analysis/findings.js.map +1 -0
  64. package/dist/analysis/index.d.ts +16 -0
  65. package/dist/analysis/index.js +18 -0
  66. package/dist/analysis/index.js.map +1 -0
  67. package/dist/analysis/interprocedural.d.ts +99 -0
  68. package/dist/analysis/interprocedural.js +526 -0
  69. package/dist/analysis/interprocedural.js.map +1 -0
  70. package/dist/analysis/path-finder.d.ts +133 -0
  71. package/dist/analysis/path-finder.js +354 -0
  72. package/dist/analysis/path-finder.js.map +1 -0
  73. package/dist/analysis/rules.d.ts +75 -0
  74. package/dist/analysis/rules.js +332 -0
  75. package/dist/analysis/rules.js.map +1 -0
  76. package/dist/analysis/semver.d.ts +27 -0
  77. package/dist/analysis/semver.js +127 -0
  78. package/dist/analysis/semver.js.map +1 -0
  79. package/dist/analysis/taint-matcher.d.ts +15 -0
  80. package/dist/analysis/taint-matcher.js +634 -0
  81. package/dist/analysis/taint-matcher.js.map +1 -0
  82. package/dist/analysis/taint-propagation.d.ts +67 -0
  83. package/dist/analysis/taint-propagation.js +298 -0
  84. package/dist/analysis/taint-propagation.js.map +1 -0
  85. package/dist/analysis/unresolved.d.ts +14 -0
  86. package/dist/analysis/unresolved.js +202 -0
  87. package/dist/analysis/unresolved.js.map +1 -0
  88. package/dist/analyzer.d.ts +43 -0
  89. package/dist/analyzer.js +1010 -0
  90. package/dist/analyzer.js.map +1 -0
  91. package/dist/browser/circle-ir.js +16576 -0
  92. package/dist/browser.d.ts +38 -0
  93. package/dist/browser.js +38 -0
  94. package/dist/browser.js.map +1 -0
  95. package/dist/core/circle-ir-core.cjs +13626 -0
  96. package/dist/core/circle-ir-core.d.ts +59 -0
  97. package/dist/core/circle-ir-core.js +13591 -0
  98. package/dist/core/extractors/calls.d.ts +13 -0
  99. package/dist/core/extractors/calls.js +1429 -0
  100. package/dist/core/extractors/calls.js.map +1 -0
  101. package/dist/core/extractors/cfg.d.ts +9 -0
  102. package/dist/core/extractors/cfg.js +519 -0
  103. package/dist/core/extractors/cfg.js.map +1 -0
  104. package/dist/core/extractors/dfg.d.ts +12 -0
  105. package/dist/core/extractors/dfg.js +1081 -0
  106. package/dist/core/extractors/dfg.js.map +1 -0
  107. package/dist/core/extractors/exports.d.ts +14 -0
  108. package/dist/core/extractors/exports.js +80 -0
  109. package/dist/core/extractors/exports.js.map +1 -0
  110. package/dist/core/extractors/imports.d.ts +9 -0
  111. package/dist/core/extractors/imports.js +739 -0
  112. package/dist/core/extractors/imports.js.map +1 -0
  113. package/dist/core/extractors/index.d.ts +10 -0
  114. package/dist/core/extractors/index.js +11 -0
  115. package/dist/core/extractors/index.js.map +1 -0
  116. package/dist/core/extractors/meta.d.ts +10 -0
  117. package/dist/core/extractors/meta.js +109 -0
  118. package/dist/core/extractors/meta.js.map +1 -0
  119. package/dist/core/extractors/types.d.ts +10 -0
  120. package/dist/core/extractors/types.js +1479 -0
  121. package/dist/core/extractors/types.js.map +1 -0
  122. package/dist/core/index.d.ts +5 -0
  123. package/dist/core/index.js +8 -0
  124. package/dist/core/index.js.map +1 -0
  125. package/dist/core/parser.d.ts +84 -0
  126. package/dist/core/parser.js +250 -0
  127. package/dist/core/parser.js.map +1 -0
  128. package/dist/core-lib.d.ts +59 -0
  129. package/dist/core-lib.js +62 -0
  130. package/dist/core-lib.js.map +1 -0
  131. package/dist/index.d.ts +15 -0
  132. package/dist/index.js +20 -0
  133. package/dist/index.js.map +1 -0
  134. package/dist/languages/index.d.ts +11 -0
  135. package/dist/languages/index.js +14 -0
  136. package/dist/languages/index.js.map +1 -0
  137. package/dist/languages/plugins/base.d.ts +44 -0
  138. package/dist/languages/plugins/base.js +82 -0
  139. package/dist/languages/plugins/base.js.map +1 -0
  140. package/dist/languages/plugins/index.d.ts +14 -0
  141. package/dist/languages/plugins/index.js +25 -0
  142. package/dist/languages/plugins/index.js.map +1 -0
  143. package/dist/languages/plugins/java.d.ts +49 -0
  144. package/dist/languages/plugins/java.js +402 -0
  145. package/dist/languages/plugins/java.js.map +1 -0
  146. package/dist/languages/plugins/javascript.d.ts +48 -0
  147. package/dist/languages/plugins/javascript.js +445 -0
  148. package/dist/languages/plugins/javascript.js.map +1 -0
  149. package/dist/languages/plugins/python.d.ts +47 -0
  150. package/dist/languages/plugins/python.js +480 -0
  151. package/dist/languages/plugins/python.js.map +1 -0
  152. package/dist/languages/plugins/rust.d.ts +47 -0
  153. package/dist/languages/plugins/rust.js +405 -0
  154. package/dist/languages/plugins/rust.js.map +1 -0
  155. package/dist/languages/registry.d.ts +30 -0
  156. package/dist/languages/registry.js +80 -0
  157. package/dist/languages/registry.js.map +1 -0
  158. package/dist/languages/types.d.ts +184 -0
  159. package/dist/languages/types.js +8 -0
  160. package/dist/languages/types.js.map +1 -0
  161. package/dist/resolution/cross-file.d.ts +146 -0
  162. package/dist/resolution/cross-file.js +439 -0
  163. package/dist/resolution/cross-file.js.map +1 -0
  164. package/dist/resolution/index.d.ts +12 -0
  165. package/dist/resolution/index.js +10 -0
  166. package/dist/resolution/index.js.map +1 -0
  167. package/dist/resolution/symbol-table.d.ts +136 -0
  168. package/dist/resolution/symbol-table.js +336 -0
  169. package/dist/resolution/symbol-table.js.map +1 -0
  170. package/dist/resolution/type-hierarchy.d.ts +124 -0
  171. package/dist/resolution/type-hierarchy.js +515 -0
  172. package/dist/resolution/type-hierarchy.js.map +1 -0
  173. package/dist/types/config.d.ts +45 -0
  174. package/dist/types/config.js +5 -0
  175. package/dist/types/config.js.map +1 -0
  176. package/dist/types/index.d.ts +392 -0
  177. package/dist/types/index.js +7 -0
  178. package/dist/types/index.js.map +1 -0
  179. package/dist/utils/logger.d.ts +85 -0
  180. package/dist/utils/logger.js +198 -0
  181. package/dist/utils/logger.js.map +1 -0
  182. package/dist/wasm/tree-sitter-java.wasm +0 -0
  183. package/dist/wasm/tree-sitter-javascript.wasm +0 -0
  184. package/dist/wasm/tree-sitter-python.wasm +0 -0
  185. package/dist/wasm/tree-sitter-rust.wasm +0 -0
  186. package/dist/wasm/web-tree-sitter.wasm +0 -0
  187. package/docs/SPEC.md +1021 -0
  188. package/examples/browser-example.html +610 -0
  189. package/examples/node-example.ts +215 -0
  190. package/package.json +107 -0
  191. package/wasm/tree-sitter-java.wasm +0 -0
  192. package/wasm/tree-sitter-javascript.wasm +0 -0
  193. package/wasm/tree-sitter-python.wasm +0 -0
  194. package/wasm/tree-sitter-rust.wasm +0 -0
@@ -0,0 +1,1985 @@
1
+ /**
2
+ * Main Constant Propagator class.
3
+ *
4
+ * Tracks constant values through variable assignments and evaluates expressions
5
+ * to detect dead code and reduce false positives in taint analysis.
6
+ */
7
+ import { isKnown, createUnknown, getNodeText, getNodeLine } from './ast-utils.js';
8
+ import { ExpressionEvaluator } from './evaluator.js';
9
+ import { TAINT_PATTERN_REGEX, SANITIZER_METHODS, PROPAGATOR_METHODS, ANTI_SANITIZER_METHODS } from './patterns.js';
10
+ /**
11
+ * Constant Propagator for taint analysis.
12
+ *
13
+ * Key features:
14
+ * - Tracks variable → constant value mappings
15
+ * - Evaluates arithmetic, comparison, and string expressions
16
+ * - Detects dead/unreachable code via if/switch/ternary evaluation
17
+ * - Integrates with taint analysis to skip false positives
18
+ */
19
+ export class ConstantPropagator {
20
+ symbols = new Map();
21
+ tainted = new Set();
22
+ unreachableLines = new Set();
23
+ taintedCollections = new Map();
24
+ // Track variables explicitly assigned from sanitizer calls
25
+ sanitizedVars = new Set();
26
+ source = '';
27
+ evaluator;
28
+ // Track the expression node that defined each variable (for refinement)
29
+ definitionNodes = new Map();
30
+ // Track if we're inside a conditional branch (for conservative taint handling)
31
+ inConditionalBranch = false;
32
+ // Track which methods always return constants (inter-procedural analysis)
33
+ methodReturnsConstant = new Set();
34
+ // Track which methods always return sanitized values (inter-procedural analysis)
35
+ methodReturnsSanitized = new Set();
36
+ // Track which methods return a specific parameter (index) - for taint propagation
37
+ methodReturnsParameter = new Map();
38
+ // Track which methods return safe (non-tainted) values even with tainted input
39
+ methodReturnsSafeValue = new Set();
40
+ // Additional taint patterns (for test harnesses to inject custom patterns)
41
+ additionalTaintPatterns = [];
42
+ // Track list elements by index for precise list taint tracking
43
+ listElements = new Map();
44
+ // Track loop variables (should not be overwritten with constant values)
45
+ loopVariables = new Set();
46
+ // Track tainted array elements: array name → set of tainted indices (or '*' for whole array)
47
+ taintedArrayElements = new Map();
48
+ // Track current method name for scoping local variables
49
+ currentMethod = null;
50
+ // Track conditional taints: which variables were tainted under which conditions
51
+ // Maps condition expression string → set of variables tainted under that condition
52
+ conditionalTaints = new Map();
53
+ // Stack of condition expressions we're currently inside (for nested ifs)
54
+ conditionStack = [];
55
+ // Track which lines are under which conditions
56
+ lineConditions = new Map();
57
+ // Track lines that are inside synchronized blocks (where field strong updates are safe)
58
+ synchronizedLines = new Set();
59
+ // Track if we're currently inside a synchronized block
60
+ inSynchronizedBlock = false;
61
+ // Track iterator sources: iterator variable name → collection name it was created from
62
+ iteratorSources = new Map();
63
+ // Track class field names (declared at class level, not local variables)
64
+ classFields = new Set();
65
+ // Track tainted method parameters for inter-procedural analysis
66
+ taintedParametersList = [];
67
+ // Track instance fields assigned from tainted constructor parameters
68
+ instanceFieldTaint = new Map();
69
+ // Track current class name for field taint tracking
70
+ currentClassName = null;
71
+ // Track if we're currently inside a constructor (vs regular method)
72
+ inConstructor = false;
73
+ // Map constructor parameter names to their positions (0-indexed)
74
+ constructorParamPositions = new Map();
75
+ /**
76
+ * Analyze source code and build constant propagation state.
77
+ */
78
+ analyze(tree, sourceCode, additionalTaintPatterns = [], sanitizerMethods = [], taintedParameters = []) {
79
+ this.source = sourceCode;
80
+ this.additionalTaintPatterns = additionalTaintPatterns;
81
+ this.taintedParametersList = taintedParameters;
82
+ this.symbols.clear();
83
+ this.tainted.clear();
84
+ this.unreachableLines.clear();
85
+ this.taintedCollections.clear();
86
+ this.definitionNodes.clear();
87
+ this.inConditionalBranch = false;
88
+ this.methodReturnsConstant.clear();
89
+ this.methodReturnsSanitized.clear();
90
+ this.methodReturnsParameter.clear();
91
+ this.methodReturnsSafeValue.clear();
92
+ this.listElements.clear();
93
+ this.loopVariables.clear();
94
+ this.taintedArrayElements.clear();
95
+ this.sanitizedVars.clear();
96
+ this.currentMethod = null;
97
+ this.conditionalTaints.clear();
98
+ this.conditionStack = [];
99
+ this.lineConditions.clear();
100
+ this.synchronizedLines.clear();
101
+ this.inSynchronizedBlock = false;
102
+ this.iteratorSources.clear();
103
+ this.classFields.clear();
104
+ this.instanceFieldTaint.clear();
105
+ this.currentClassName = null;
106
+ this.inConstructor = false;
107
+ this.constructorParamPositions.clear();
108
+ // Pre-pass: identify class fields
109
+ this.collectClassFields(tree.rootNode);
110
+ // Pre-populate methodReturnsSanitized with methods marked with @sanitizer annotation
111
+ for (const methodName of sanitizerMethods) {
112
+ this.methodReturnsSanitized.add(methodName);
113
+ }
114
+ // Create evaluator with symbol lookup that handles scoped names
115
+ this.evaluator = new ExpressionEvaluator(this.source, (name) => this.lookupSymbol(name));
116
+ // Pre-pass: identify methods that always return constants or sanitized values
117
+ this.analyzeMethodReturns(tree.rootNode);
118
+ // First pass: collect symbols, taint, and unreachable lines
119
+ this.visit(tree.rootNode);
120
+ // Second pass: refine taint for variables derived from constants
121
+ this.refineTaintFromConstants();
122
+ // Build result with both scoped and unscoped names for backward compatibility
123
+ // Unscoped names are needed for legacy code and tests
124
+ const resultTainted = new Set(this.tainted);
125
+ const resultSanitized = new Set(this.sanitizedVars);
126
+ const resultSymbols = new Map(this.symbols);
127
+ // Add unscoped versions of scoped names for backward compatibility
128
+ for (const name of this.tainted) {
129
+ if (name.includes(':')) {
130
+ const unscoped = name.substring(name.indexOf(':') + 1);
131
+ resultTainted.add(unscoped);
132
+ }
133
+ }
134
+ for (const name of this.sanitizedVars) {
135
+ if (name.includes(':')) {
136
+ const unscoped = name.substring(name.indexOf(':') + 1);
137
+ resultSanitized.add(unscoped);
138
+ }
139
+ }
140
+ // Add unscoped symbols for backward compatibility with tests
141
+ // The scoped versions take priority in filterCleanVariableSinks
142
+ for (const [name, value] of this.symbols) {
143
+ if (name.includes(':')) {
144
+ const unscoped = name.substring(name.indexOf(':') + 1);
145
+ // Only add if not already present (scoped version wins on conflict)
146
+ if (!resultSymbols.has(unscoped)) {
147
+ resultSymbols.set(unscoped, value);
148
+ }
149
+ }
150
+ }
151
+ return {
152
+ symbols: resultSymbols,
153
+ tainted: resultTainted,
154
+ unreachableLines: new Set(this.unreachableLines),
155
+ taintedCollections: new Map(this.taintedCollections),
156
+ taintedArrayElements: new Map(this.taintedArrayElements),
157
+ sanitizedVars: resultSanitized,
158
+ conditionalTaints: new Map(this.conditionalTaints),
159
+ lineConditions: new Map(this.lineConditions),
160
+ synchronizedLines: new Set(this.synchronizedLines),
161
+ instanceFieldTaint: new Map(this.instanceFieldTaint),
162
+ };
163
+ }
164
+ /**
165
+ * Evaluate an expression to determine its constant value.
166
+ */
167
+ evaluateExpression(node) {
168
+ return this.evaluator.evaluate(node);
169
+ }
170
+ /**
171
+ * Check if a variable has a known constant value.
172
+ */
173
+ getValue(varName) {
174
+ return this.symbols.get(varName);
175
+ }
176
+ /**
177
+ * Check if a variable is tainted.
178
+ */
179
+ isTainted(varName) {
180
+ return this.tainted.has(varName);
181
+ }
182
+ /**
183
+ * Check if a line is reachable (not dead code).
184
+ */
185
+ isLineReachable(line) {
186
+ return !this.unreachableLines.has(line);
187
+ }
188
+ // ===========================================================================
189
+ // Inter-procedural Analysis
190
+ // ===========================================================================
191
+ /**
192
+ * Pre-pass: Analyze all methods to detect those that always return constants or sanitized values.
193
+ */
194
+ analyzeMethodReturns(root) {
195
+ const methods = this.findAllMethods(root);
196
+ for (const method of methods) {
197
+ const methodName = this.getMethodName(method);
198
+ if (!methodName)
199
+ continue;
200
+ const body = method.childForFieldName('body');
201
+ if (!body)
202
+ continue;
203
+ // Find all return statements
204
+ const returns = [];
205
+ const findReturns = (n) => {
206
+ if (n.type === 'return_statement') {
207
+ returns.push(n);
208
+ }
209
+ for (const child of n.children) {
210
+ findReturns(child);
211
+ }
212
+ };
213
+ findReturns(body);
214
+ if (returns.length === 0)
215
+ continue;
216
+ // Create temp propagator for analysis
217
+ const tempPropagator = new ConstantPropagator();
218
+ tempPropagator.source = this.source;
219
+ tempPropagator.additionalTaintPatterns = this.additionalTaintPatterns;
220
+ // Mark parameters as tainted for parameter-return tracking
221
+ const params = this.getMethodParameters(method);
222
+ for (const paramName of params) {
223
+ tempPropagator.tainted.add(paramName);
224
+ }
225
+ // Initialize evaluator for temp propagator
226
+ tempPropagator.evaluator = new ExpressionEvaluator(tempPropagator.source, (name) => tempPropagator.symbols.get(name));
227
+ tempPropagator.visit(body);
228
+ // Analyze returns
229
+ let allReturnsConstant = true;
230
+ let allReturnsSanitized = true;
231
+ let allReturnsSafe = true; // Track if all returns are non-tainted
232
+ let returnedParamIndex = -1;
233
+ let hasReachableReturn = false;
234
+ for (const ret of returns) {
235
+ const retLine = getNodeLine(ret);
236
+ if (tempPropagator.unreachableLines.has(retLine)) {
237
+ continue;
238
+ }
239
+ hasReachableReturn = true;
240
+ const valueNode = ret.children.find(c => c.type !== 'return' && c.type !== ';' && c.type !== 'comment');
241
+ if (!valueNode) {
242
+ allReturnsConstant = false;
243
+ allReturnsSanitized = false;
244
+ allReturnsSafe = false; // void return or unknown - not safe
245
+ continue;
246
+ }
247
+ const value = tempPropagator.evaluateExpression(valueNode);
248
+ if (!isKnown(value)) {
249
+ allReturnsConstant = false;
250
+ }
251
+ if (!this.isSanitizerCall(valueNode, body)) {
252
+ allReturnsSanitized = false;
253
+ }
254
+ // Check if return value is tainted
255
+ if (tempPropagator.isTaintedExpression(valueNode)) {
256
+ allReturnsSafe = false;
257
+ }
258
+ if (params.length > 0) {
259
+ const returnExpr = getNodeText(valueNode, this.source);
260
+ const directParamIdx = params.indexOf(returnExpr);
261
+ if (directParamIdx >= 0) {
262
+ if (returnedParamIndex >= 0 && returnedParamIndex !== directParamIdx) {
263
+ returnedParamIndex = -2;
264
+ }
265
+ else if (returnedParamIndex !== -2) {
266
+ returnedParamIndex = directParamIdx;
267
+ }
268
+ }
269
+ else if (valueNode.type === 'identifier' && tempPropagator.tainted.has(returnExpr)) {
270
+ const derivedFrom = this.findParameterSource(body, returnExpr, params);
271
+ if (derivedFrom >= 0) {
272
+ if (returnedParamIndex >= 0 && returnedParamIndex !== derivedFrom) {
273
+ returnedParamIndex = -2;
274
+ }
275
+ else if (returnedParamIndex !== -2) {
276
+ returnedParamIndex = derivedFrom;
277
+ }
278
+ }
279
+ }
280
+ }
281
+ }
282
+ if (hasReachableReturn) {
283
+ if (allReturnsConstant) {
284
+ this.methodReturnsConstant.add(methodName);
285
+ }
286
+ if (allReturnsSanitized) {
287
+ this.methodReturnsSanitized.add(methodName);
288
+ }
289
+ if (returnedParamIndex >= 0) {
290
+ this.methodReturnsParameter.set(methodName, returnedParamIndex);
291
+ }
292
+ // Track methods that return safe values even with tainted input
293
+ // Only add if it's not already covered by constant/sanitized returns
294
+ if (allReturnsSafe && !allReturnsConstant && !allReturnsSanitized) {
295
+ this.methodReturnsSafeValue.add(methodName);
296
+ }
297
+ }
298
+ }
299
+ }
300
+ findParameterSource(body, varName, params) {
301
+ const visited = new Set();
302
+ const queue = [varName];
303
+ while (queue.length > 0) {
304
+ const current = queue.shift();
305
+ if (visited.has(current))
306
+ continue;
307
+ visited.add(current);
308
+ const paramIdx = params.indexOf(current);
309
+ if (paramIdx >= 0) {
310
+ return paramIdx;
311
+ }
312
+ const findAssignment = (n) => {
313
+ if (n.type === 'local_variable_declaration') {
314
+ const declarator = n.children.find(c => c.type === 'variable_declarator');
315
+ if (declarator) {
316
+ const nameNode = declarator.childForFieldName('name');
317
+ const valueNode = declarator.childForFieldName('value');
318
+ if (nameNode && valueNode) {
319
+ const name = getNodeText(nameNode, this.source);
320
+ if (name === current) {
321
+ return this.extractSourceVariable(valueNode, params);
322
+ }
323
+ }
324
+ }
325
+ }
326
+ if (n.type === 'assignment_expression') {
327
+ const left = n.childForFieldName('left');
328
+ const right = n.childForFieldName('right');
329
+ if (left && right) {
330
+ const name = getNodeText(left, this.source);
331
+ if (name === current) {
332
+ return this.extractSourceVariable(right, params);
333
+ }
334
+ }
335
+ }
336
+ for (const child of n.children) {
337
+ const result = findAssignment(child);
338
+ if (result)
339
+ return result;
340
+ }
341
+ return null;
342
+ };
343
+ const source = findAssignment(body);
344
+ if (source) {
345
+ queue.push(source);
346
+ }
347
+ }
348
+ return -1;
349
+ }
350
+ extractSourceVariable(node, params) {
351
+ if (node.type === 'identifier') {
352
+ return getNodeText(node, this.source);
353
+ }
354
+ if (node.type === 'method_invocation' || node.type === 'object_creation_expression') {
355
+ const argsNode = node.childForFieldName('arguments');
356
+ if (argsNode) {
357
+ for (const arg of argsNode.children) {
358
+ if (arg.type === 'identifier') {
359
+ const argName = getNodeText(arg, this.source);
360
+ if (params.includes(argName)) {
361
+ return argName;
362
+ }
363
+ }
364
+ }
365
+ }
366
+ const obj = node.childForFieldName('object');
367
+ if (obj && obj.type === 'identifier') {
368
+ const objName = getNodeText(obj, this.source);
369
+ if (params.includes(objName)) {
370
+ return objName;
371
+ }
372
+ }
373
+ }
374
+ if (node.type === 'ternary_expression') {
375
+ const consequence = node.childForFieldName('consequence');
376
+ const alternative = node.childForFieldName('alternative');
377
+ if (consequence) {
378
+ const consVar = this.extractSourceVariable(consequence, params);
379
+ if (consVar && params.includes(consVar))
380
+ return consVar;
381
+ }
382
+ if (alternative) {
383
+ const altVar = this.extractSourceVariable(alternative, params);
384
+ if (altVar && params.includes(altVar))
385
+ return altVar;
386
+ }
387
+ }
388
+ if (node.type === 'binary_expression') {
389
+ const left = node.childForFieldName('left');
390
+ const right = node.childForFieldName('right');
391
+ if (left) {
392
+ const leftVar = this.extractSourceVariable(left, params);
393
+ if (leftVar && params.includes(leftVar))
394
+ return leftVar;
395
+ }
396
+ if (right) {
397
+ const rightVar = this.extractSourceVariable(right, params);
398
+ if (rightVar && params.includes(rightVar))
399
+ return rightVar;
400
+ }
401
+ }
402
+ return null;
403
+ }
404
+ getMethodParameters(method) {
405
+ const params = [];
406
+ const paramsNode = method.childForFieldName('parameters');
407
+ if (!paramsNode)
408
+ return params;
409
+ for (const child of paramsNode.children) {
410
+ if (child.type === 'formal_parameter' || child.type === 'spread_parameter') {
411
+ const nameNode = child.childForFieldName('name');
412
+ if (nameNode) {
413
+ params.push(getNodeText(nameNode, this.source));
414
+ }
415
+ }
416
+ }
417
+ return params;
418
+ }
419
+ isSanitizerCall(node, methodBody) {
420
+ if (node.type === 'method_invocation') {
421
+ const nameNode = node.childForFieldName('name');
422
+ if (nameNode) {
423
+ const methodName = getNodeText(nameNode, this.source);
424
+ if (SANITIZER_METHODS.has(methodName)) {
425
+ return true;
426
+ }
427
+ }
428
+ }
429
+ if (node.type === 'identifier' && methodBody) {
430
+ const varName = getNodeText(node, this.source);
431
+ if (this.variableIsAssignedFromSanitizer(varName, methodBody)) {
432
+ return true;
433
+ }
434
+ }
435
+ return false;
436
+ }
437
+ variableIsAssignedFromSanitizer(varName, methodBody) {
438
+ const findAssignments = (n) => {
439
+ if (n.type === 'local_variable_declaration') {
440
+ const declarator = n.children.find(c => c.type === 'variable_declarator');
441
+ if (declarator) {
442
+ const nameNode = declarator.childForFieldName('name');
443
+ const valueNode = declarator.childForFieldName('value');
444
+ if (nameNode && valueNode) {
445
+ const name = getNodeText(nameNode, this.source);
446
+ if (name === varName) {
447
+ return this.isSanitizerCall(valueNode);
448
+ }
449
+ }
450
+ }
451
+ }
452
+ if (n.type === 'assignment_expression') {
453
+ const leftNode = n.childForFieldName('left');
454
+ const rightNode = n.childForFieldName('right');
455
+ if (leftNode && rightNode) {
456
+ const name = getNodeText(leftNode, this.source);
457
+ if (name === varName) {
458
+ return this.isSanitizerCall(rightNode);
459
+ }
460
+ }
461
+ }
462
+ for (const child of n.children) {
463
+ if (findAssignments(child)) {
464
+ return true;
465
+ }
466
+ }
467
+ return false;
468
+ };
469
+ return findAssignments(methodBody);
470
+ }
471
+ /**
472
+ * Collect all class field names (instance/static variables declared at class level).
473
+ * These are variables declared directly in the class body, not inside methods.
474
+ */
475
+ collectClassFields(root) {
476
+ const traverse = (n, inClass, inMethod) => {
477
+ if (!n)
478
+ return;
479
+ // Track when we enter a class body
480
+ if (n.type === 'class_body') {
481
+ for (const child of n.children) {
482
+ // Field declarations are direct children of class_body
483
+ if (child.type === 'field_declaration') {
484
+ // Find the variable declarator(s) in this field declaration
485
+ for (const declarator of child.children) {
486
+ if (declarator.type === 'variable_declarator') {
487
+ const nameNode = declarator.childForFieldName('name');
488
+ if (nameNode) {
489
+ const fieldName = getNodeText(nameNode, this.source);
490
+ this.classFields.add(fieldName);
491
+ }
492
+ }
493
+ }
494
+ }
495
+ // Recurse into methods without marking them as fields
496
+ if (child.type === 'method_declaration' || child.type === 'constructor_declaration') {
497
+ traverse(child, true, true);
498
+ }
499
+ else {
500
+ traverse(child, true, false);
501
+ }
502
+ }
503
+ return;
504
+ }
505
+ for (const child of n.children) {
506
+ traverse(child, inClass, inMethod);
507
+ }
508
+ };
509
+ traverse(root, false, false);
510
+ }
511
+ findAllMethods(node) {
512
+ const methods = [];
513
+ const traverse = (n) => {
514
+ if (!n)
515
+ return;
516
+ if (n.type === 'method_declaration' || n.type === 'function_declaration') {
517
+ methods.push(n);
518
+ }
519
+ for (const child of n.children) {
520
+ if (child)
521
+ traverse(child);
522
+ }
523
+ };
524
+ traverse(node);
525
+ return methods;
526
+ }
527
+ getMethodName(method) {
528
+ const nameNode = method.childForFieldName('name');
529
+ if (nameNode) {
530
+ return getNodeText(nameNode, this.source);
531
+ }
532
+ return null;
533
+ }
534
+ // ===========================================================================
535
+ // Taint Refinement
536
+ // ===========================================================================
537
+ refineTaintFromConstants() {
538
+ let changed = true;
539
+ let iterations = 0;
540
+ const maxIterations = 10;
541
+ while (changed && iterations < maxIterations) {
542
+ changed = false;
543
+ iterations++;
544
+ const toRemove = [];
545
+ for (const varName of this.tainted) {
546
+ const symbol = this.symbols.get(varName);
547
+ if (symbol && symbol.type !== 'unknown') {
548
+ toRemove.push(varName);
549
+ continue;
550
+ }
551
+ const defNode = this.definitionNodes.get(varName);
552
+ if (defNode) {
553
+ // Extract method name from scoped variable name for proper taint lookup
554
+ // e.g., "handleRequest:param" -> currentMethod = "handleRequest"
555
+ const prevMethod = this.currentMethod;
556
+ if (varName.includes(':')) {
557
+ this.currentMethod = varName.substring(0, varName.indexOf(':'));
558
+ }
559
+ const isTainted = this.isTaintedExpression(defNode);
560
+ // Restore previous method context
561
+ this.currentMethod = prevMethod;
562
+ if (!isTainted) {
563
+ toRemove.push(varName);
564
+ }
565
+ }
566
+ }
567
+ for (const varName of toRemove) {
568
+ this.tainted.delete(varName);
569
+ this.definitionNodes.delete(varName);
570
+ changed = true;
571
+ }
572
+ }
573
+ }
574
+ // ===========================================================================
575
+ // AST Visitor
576
+ // ===========================================================================
577
+ visit(node) {
578
+ const line = getNodeLine(node);
579
+ if (this.unreachableLines.has(line)) {
580
+ return;
581
+ }
582
+ // Track which condition this line is under for correlated predicate analysis
583
+ if (this.conditionStack.length > 0 && !this.lineConditions.has(line)) {
584
+ // Use the innermost (most recent) condition
585
+ this.lineConditions.set(line, this.conditionStack[this.conditionStack.length - 1]);
586
+ }
587
+ switch (node.type) {
588
+ case 'method_declaration':
589
+ case 'constructor_declaration':
590
+ this.handleMethodDeclaration(node);
591
+ return; // Don't visit children directly, handleMethodDeclaration does it
592
+ case 'local_variable_declaration':
593
+ this.handleVariableDeclaration(node);
594
+ break;
595
+ case 'assignment_expression':
596
+ this.handleAssignment(node);
597
+ break;
598
+ case 'update_expression':
599
+ this.handleUpdateExpression(node);
600
+ break;
601
+ case 'if_statement':
602
+ this.handleIfStatement(node);
603
+ return;
604
+ case 'switch_expression':
605
+ case 'switch_statement':
606
+ this.handleSwitch(node);
607
+ return;
608
+ case 'ternary_expression':
609
+ this.handleTernary(node);
610
+ break;
611
+ case 'expression_statement':
612
+ this.handleExpressionStatement(node);
613
+ break;
614
+ case 'for_statement':
615
+ case 'enhanced_for_statement':
616
+ case 'while_statement':
617
+ case 'do_statement':
618
+ this.handleLoopStatement(node);
619
+ return;
620
+ case 'synchronized_statement':
621
+ this.handleSynchronizedStatement(node);
622
+ return;
623
+ default:
624
+ for (const child of node.children) {
625
+ this.visit(child);
626
+ }
627
+ }
628
+ }
629
+ /**
630
+ * Handle method declarations - scope local variables to this method.
631
+ * This prevents local variables from one method bleeding into another.
632
+ */
633
+ handleMethodDeclaration(node) {
634
+ const nameNode = node.childForFieldName('name');
635
+ const methodName = nameNode ? getNodeText(nameNode, this.source) : null;
636
+ // Save the previous method context
637
+ const prevMethod = this.currentMethod;
638
+ const prevInConstructor = this.inConstructor;
639
+ const prevClassName = this.currentClassName;
640
+ this.currentMethod = methodName;
641
+ // Detect if this is a constructor
642
+ this.inConstructor = node.type === 'constructor_declaration';
643
+ this.constructorParamPositions.clear();
644
+ // For constructors, find the parent class name
645
+ if (this.inConstructor) {
646
+ let parent = node.parent;
647
+ while (parent) {
648
+ if (parent.type === 'class_declaration' || parent.type === 'class_body') {
649
+ if (parent.type === 'class_declaration') {
650
+ const classNameNode = parent.childForFieldName('name');
651
+ if (classNameNode) {
652
+ this.currentClassName = getNodeText(classNameNode, this.source);
653
+ }
654
+ break;
655
+ }
656
+ else {
657
+ // class_body - look at parent for class_declaration
658
+ parent = parent.parent;
659
+ }
660
+ }
661
+ else {
662
+ parent = parent.parent;
663
+ }
664
+ }
665
+ }
666
+ // Mark inter-procedural tainted parameters and track positions for constructors
667
+ const parameters = node.childForFieldName('parameters');
668
+ if (parameters) {
669
+ let paramPosition = 0;
670
+ for (const param of parameters.children) {
671
+ if (param.type === 'formal_parameter' || param.type === 'spread_parameter') {
672
+ const paramNameNode = param.childForFieldName('name');
673
+ if (paramNameNode) {
674
+ const paramName = getNodeText(paramNameNode, this.source);
675
+ // Track constructor parameter positions
676
+ if (this.inConstructor) {
677
+ this.constructorParamPositions.set(paramName, paramPosition);
678
+ }
679
+ // Check if this parameter should be marked as tainted
680
+ if (methodName) {
681
+ for (const tp of this.taintedParametersList) {
682
+ if (tp.methodName === methodName && tp.paramName === paramName) {
683
+ const scopedName = this.getScopedName(paramName);
684
+ this.tainted.add(scopedName);
685
+ this.tainted.add(paramName); // Also add unscoped for flexibility
686
+ }
687
+ }
688
+ }
689
+ paramPosition++;
690
+ }
691
+ }
692
+ }
693
+ }
694
+ // Visit the method body
695
+ const body = node.childForFieldName('body');
696
+ if (body) {
697
+ this.visit(body);
698
+ }
699
+ // Restore the previous method context
700
+ this.currentMethod = prevMethod;
701
+ this.inConstructor = prevInConstructor;
702
+ this.currentClassName = prevClassName;
703
+ }
704
+ /**
705
+ * Get the scoped name for a variable (includes method name if in a method).
706
+ * This ensures local variables from different methods don't conflict.
707
+ */
708
+ getScopedName(varName) {
709
+ // If the variable already has scope indicators (contains . or :), use as-is
710
+ if (varName.includes('.') || varName.includes(':')) {
711
+ return varName;
712
+ }
713
+ // Scope local variables by method name
714
+ if (this.currentMethod) {
715
+ return `${this.currentMethod}:${varName}`;
716
+ }
717
+ return varName;
718
+ }
719
+ /**
720
+ * Look up a variable value, checking both scoped and unscoped names.
721
+ * This handles cases where we need to find a variable that might be
722
+ * either local (scoped) or global (unscoped, like class fields).
723
+ */
724
+ lookupSymbol(varName) {
725
+ // First try the scoped name (local variable in current method)
726
+ if (this.currentMethod && !varName.includes('.') && !varName.includes(':')) {
727
+ const scopedName = `${this.currentMethod}:${varName}`;
728
+ const scopedValue = this.symbols.get(scopedName);
729
+ if (scopedValue) {
730
+ return scopedValue;
731
+ }
732
+ }
733
+ // Fall back to unscoped name (class fields, etc.)
734
+ return this.symbols.get(varName);
735
+ }
736
+ handleLoopStatement(node) {
737
+ // For loops: mark the loop variable as unknown since it changes during iteration
738
+ // This prevents false dead code detection for conditions depending on loop variables
739
+ const loopVarNames = new Set();
740
+ if (node.type === 'for_statement') {
741
+ // Find the init part and extract variable names
742
+ const initNode = node.childForFieldName('init');
743
+ if (initNode) {
744
+ this.collectLoopVariableNames(initNode, loopVarNames);
745
+ }
746
+ // Also check update expression for variables that are modified
747
+ const updateNode = node.childForFieldName('update');
748
+ if (updateNode) {
749
+ this.collectLoopVariableNames(updateNode, loopVarNames);
750
+ }
751
+ }
752
+ else if (node.type === 'enhanced_for_statement') {
753
+ // Enhanced for: for (Type item : collection)
754
+ const nameNode = node.childForFieldName('name');
755
+ if (nameNode) {
756
+ const varName = getNodeText(nameNode, this.source);
757
+ loopVarNames.add(varName);
758
+ }
759
+ }
760
+ // Mark all loop variables as unknown BEFORE visiting children
761
+ // Also add to loopVariables set so they're not overwritten in handleVariableDeclaration
762
+ for (const varName of loopVarNames) {
763
+ this.symbols.set(varName, createUnknown(getNodeLine(node)));
764
+ this.loopVariables.add(varName);
765
+ }
766
+ // Track iterator assignments in for-loop init (e.g., for(Iterator iter = list.iterator(); ...))
767
+ if (node.type === 'for_statement') {
768
+ const initNode = node.childForFieldName('init');
769
+ if (initNode) {
770
+ this.trackIteratorsInNode(initNode);
771
+ }
772
+ }
773
+ // Visit all children (condition, body, etc.)
774
+ for (const child of node.children) {
775
+ this.visit(child);
776
+ }
777
+ // After visiting children, ensure loop variables stay unknown (they may have been overwritten)
778
+ for (const varName of loopVarNames) {
779
+ this.symbols.set(varName, createUnknown(getNodeLine(node)));
780
+ }
781
+ }
782
+ collectLoopVariableNames(node, names) {
783
+ // Find all variable names that are defined/modified in this node
784
+ if (node.type === 'local_variable_declaration') {
785
+ for (const child of node.children) {
786
+ if (child.type === 'variable_declarator') {
787
+ const nameNode = child.childForFieldName('name');
788
+ if (nameNode) {
789
+ names.add(getNodeText(nameNode, this.source));
790
+ }
791
+ }
792
+ }
793
+ }
794
+ else if (node.type === 'assignment_expression' || node.type === 'update_expression') {
795
+ // Find the variable being assigned/updated
796
+ const leftNode = node.childForFieldName('left') || node.childForFieldName('operand');
797
+ if (leftNode && leftNode.type === 'identifier') {
798
+ names.add(getNodeText(leftNode, this.source));
799
+ }
800
+ }
801
+ // Recurse into children
802
+ for (const child of node.children) {
803
+ if (child)
804
+ this.collectLoopVariableNames(child, names);
805
+ }
806
+ }
807
+ /**
808
+ * Handle synchronized statements.
809
+ * Operations inside synchronized blocks are atomic, so field strong updates are safe.
810
+ */
811
+ handleSynchronizedStatement(node) {
812
+ const wasInSyncBlock = this.inSynchronizedBlock;
813
+ this.inSynchronizedBlock = true;
814
+ // Visit all children and track their lines as synchronized
815
+ for (const child of node.children) {
816
+ this.collectSynchronizedLines(child);
817
+ this.visit(child);
818
+ }
819
+ this.inSynchronizedBlock = wasInSyncBlock;
820
+ }
821
+ /**
822
+ * Recursively collect line numbers that are inside a synchronized block.
823
+ */
824
+ collectSynchronizedLines(node) {
825
+ const line = getNodeLine(node);
826
+ if (line > 0) {
827
+ this.synchronizedLines.add(line);
828
+ }
829
+ for (const child of node.children) {
830
+ if (child) {
831
+ this.collectSynchronizedLines(child);
832
+ }
833
+ }
834
+ }
835
+ markLoopVariables(node) {
836
+ // Find all variable declarations or assignments and mark them as unknown
837
+ if (node.type === 'local_variable_declaration') {
838
+ for (const child of node.children) {
839
+ if (child.type === 'variable_declarator') {
840
+ const nameNode = child.childForFieldName('name');
841
+ if (nameNode) {
842
+ const varName = getNodeText(nameNode, this.source);
843
+ this.symbols.set(varName, createUnknown(getNodeLine(node)));
844
+ }
845
+ }
846
+ }
847
+ }
848
+ else if (node.type === 'assignment_expression' || node.type === 'update_expression') {
849
+ // Find the variable being assigned/updated
850
+ const leftNode = node.childForFieldName('left') || node.childForFieldName('operand');
851
+ if (leftNode) {
852
+ const varName = getNodeText(leftNode, this.source);
853
+ this.symbols.set(varName, createUnknown(getNodeLine(node)));
854
+ }
855
+ }
856
+ // Recurse into children
857
+ for (const child of node.children) {
858
+ this.markLoopVariables(child);
859
+ }
860
+ }
861
+ // ===========================================================================
862
+ // Variable Tracking
863
+ // ===========================================================================
864
+ handleVariableDeclaration(node) {
865
+ for (const child of node.children) {
866
+ if (child.type === 'variable_declarator') {
867
+ const nameNode = child.childForFieldName('name');
868
+ const valueNode = child.childForFieldName('value');
869
+ if (nameNode) {
870
+ const varName = getNodeText(nameNode, this.source);
871
+ const scopedName = this.getScopedName(varName);
872
+ const line = getNodeLine(node);
873
+ // Skip loop variables - they should stay unknown
874
+ if (this.loopVariables.has(varName) || this.loopVariables.has(scopedName)) {
875
+ continue;
876
+ }
877
+ if (valueNode) {
878
+ // Track iterator assignments: iter = collection.iterator()
879
+ this.trackIteratorAssignment(scopedName, valueNode);
880
+ const isTainted = this.isTaintedExpression(valueNode);
881
+ if (isTainted) {
882
+ this.tainted.add(scopedName);
883
+ this.sanitizedVars.delete(scopedName); // No longer sanitized if receiving tainted value
884
+ this.definitionNodes.set(scopedName, valueNode);
885
+ this.symbols.set(scopedName, createUnknown(line));
886
+ }
887
+ else {
888
+ if (this.inConditionalBranch) {
889
+ this.symbols.set(scopedName, createUnknown(line));
890
+ continue;
891
+ }
892
+ this.tainted.delete(scopedName);
893
+ this.definitionNodes.delete(scopedName);
894
+ const value = this.evaluateExpression(valueNode);
895
+ this.symbols.set(scopedName, value);
896
+ // Track if this variable was explicitly assigned from a sanitizer call
897
+ if (this.isSanitizerMethodCall(valueNode)) {
898
+ this.sanitizedVars.add(scopedName);
899
+ }
900
+ // Check if this is an anti-sanitizer call that reintroduces taint
901
+ // e.g., URLDecoder.decode(sanitizedVar) produces tainted output
902
+ if (this.antiSanitizerReintroducesTaint(valueNode)) {
903
+ this.tainted.add(scopedName);
904
+ this.sanitizedVars.delete(scopedName);
905
+ }
906
+ }
907
+ }
908
+ else {
909
+ this.symbols.set(scopedName, createUnknown(line));
910
+ }
911
+ }
912
+ }
913
+ }
914
+ }
915
+ handleAssignment(node) {
916
+ const left = node.childForFieldName('left');
917
+ const right = node.childForFieldName('right');
918
+ if (!left || !right)
919
+ return;
920
+ // Handle chained assignments like o1 = o2 = o3 = value
921
+ // Process the right side first if it's also an assignment
922
+ if (right.type === 'assignment_expression') {
923
+ this.handleAssignment(right);
924
+ }
925
+ // Check if this is an array element assignment: array[index] = value
926
+ if (left.type === 'array_access' || left.type === 'subscript_expression') {
927
+ this.handleArrayElementAssignment(left, right, node);
928
+ return;
929
+ }
930
+ const varName = getNodeText(left, this.source);
931
+ // Only scope simple variable names, not field access like this.field
932
+ const scopedName = varName.includes('.') ? varName : this.getScopedName(varName);
933
+ const line = getNodeLine(node);
934
+ // Track iterator assignments: iter = collection.iterator()
935
+ this.trackIteratorAssignment(scopedName, right);
936
+ if (this.isTaintedExpression(right)) {
937
+ this.tainted.add(scopedName);
938
+ this.sanitizedVars.delete(scopedName); // No longer sanitized if receiving tainted value
939
+ this.definitionNodes.set(scopedName, right);
940
+ this.symbols.set(scopedName, createUnknown(line));
941
+ // Track constructor field assignments: this.field = taintedParam
942
+ if (this.inConstructor && varName.startsWith('this.')) {
943
+ const fieldName = varName.substring(5); // Remove 'this.' prefix
944
+ const rightText = getNodeText(right, this.source);
945
+ // Check if right side is a constructor parameter
946
+ if (this.constructorParamPositions.has(rightText)) {
947
+ const paramPosition = this.constructorParamPositions.get(rightText);
948
+ const taintType = this.getTaintTypeForVariable(rightText);
949
+ this.instanceFieldTaint.set(fieldName, {
950
+ fieldName,
951
+ className: this.currentClassName || 'Unknown',
952
+ sourceParam: rightText,
953
+ paramPosition,
954
+ taintType: taintType || 'interprocedural_param',
955
+ assignmentLine: line,
956
+ });
957
+ }
958
+ }
959
+ }
960
+ else {
961
+ if (this.inConditionalBranch) {
962
+ this.symbols.set(scopedName, createUnknown(line));
963
+ return;
964
+ }
965
+ // Check if this is a class field assignment outside synchronized block
966
+ // Class fields are shared across threads, so strong updates are unsafe
967
+ // unless we're in a synchronized block
968
+ const baseVarName = varName.includes('.') ? varName.split('.').pop() : varName;
969
+ const isClassField = this.classFields.has(baseVarName);
970
+ if (isClassField && !this.inSynchronizedBlock) {
971
+ // Don't remove taint from class fields outside synchronized blocks
972
+ // Another thread could have set the field to a tainted value
973
+ // Mark as unknown since we can't guarantee the value
974
+ this.symbols.set(scopedName, createUnknown(line));
975
+ // Keep the variable tainted if it was previously tainted
976
+ // (don't call this.tainted.delete)
977
+ }
978
+ else {
979
+ this.tainted.delete(scopedName);
980
+ this.definitionNodes.delete(scopedName);
981
+ const value = this.evaluateExpression(right);
982
+ this.symbols.set(scopedName, value);
983
+ // Track if this variable was explicitly assigned from a sanitizer call
984
+ if (this.isSanitizerMethodCall(right)) {
985
+ this.sanitizedVars.add(scopedName);
986
+ }
987
+ }
988
+ // Check if this is an anti-sanitizer call that reintroduces taint
989
+ // e.g., URLDecoder.decode(sanitizedVar) produces tainted output
990
+ if (this.antiSanitizerReintroducesTaint(right)) {
991
+ this.tainted.add(scopedName);
992
+ this.sanitizedVars.delete(scopedName);
993
+ }
994
+ }
995
+ }
996
+ handleArrayElementAssignment(left, right, node) {
997
+ // Extract array name and index from array[index]
998
+ const arrayNode = left.childForFieldName('array') || left.child(0);
999
+ const indexNode = left.childForFieldName('index') || left.child(2);
1000
+ if (!arrayNode)
1001
+ return;
1002
+ const arrayName = getNodeText(arrayNode, this.source);
1003
+ // Determine the index key (numeric or '*' for unknown)
1004
+ let indexKey = '*';
1005
+ if (indexNode) {
1006
+ const indexValue = this.evaluateExpression(indexNode);
1007
+ if (isKnown(indexValue) && (indexValue.type === 'int' || indexValue.type === 'string')) {
1008
+ indexKey = String(indexValue.value);
1009
+ }
1010
+ }
1011
+ const isTainted = this.isTaintedExpression(right);
1012
+ if (isTainted) {
1013
+ // Mark this array element as tainted
1014
+ if (!this.taintedArrayElements.has(arrayName)) {
1015
+ this.taintedArrayElements.set(arrayName, new Set());
1016
+ }
1017
+ this.taintedArrayElements.get(arrayName).add(indexKey);
1018
+ }
1019
+ else {
1020
+ // Mark this array element as clean (remove from tainted set)
1021
+ const taintedIndices = this.taintedArrayElements.get(arrayName);
1022
+ if (taintedIndices) {
1023
+ taintedIndices.delete(indexKey);
1024
+ // If we're assigning to a specific index and '*' is in the set,
1025
+ // we can't remove '*' because other indices might still be tainted
1026
+ }
1027
+ }
1028
+ }
1029
+ handleUpdateExpression(node) {
1030
+ // Handle x++, ++x, x--, --x
1031
+ // The operand is a positional child (identifier), not a named field
1032
+ const operand = node.children.find(c => c.type === 'identifier');
1033
+ if (!operand) {
1034
+ return;
1035
+ }
1036
+ const varName = getNodeText(operand, this.source);
1037
+ const scopedName = this.getScopedName(varName);
1038
+ const line = getNodeLine(node);
1039
+ // Skip loop variables
1040
+ if (this.loopVariables.has(varName) || this.loopVariables.has(scopedName)) {
1041
+ return;
1042
+ }
1043
+ const currentValue = this.symbols.get(scopedName);
1044
+ if (!currentValue || !isKnown(currentValue) || currentValue.type !== 'int') {
1045
+ // If not a known integer, mark as unknown
1046
+ this.symbols.set(scopedName, createUnknown(line));
1047
+ return;
1048
+ }
1049
+ // Determine operator (++ or --)
1050
+ const operatorNode = node.children.find(c => c.type === '++' || c.type === '--');
1051
+ if (!operatorNode) {
1052
+ this.symbols.set(scopedName, createUnknown(line));
1053
+ return;
1054
+ }
1055
+ const op = operatorNode.type;
1056
+ const currentInt = currentValue.value;
1057
+ const newValue = op === '++' ? currentInt + 1 : currentInt - 1;
1058
+ this.symbols.set(scopedName, {
1059
+ value: newValue,
1060
+ type: 'int',
1061
+ sourceLine: line,
1062
+ });
1063
+ }
1064
+ // ===========================================================================
1065
+ // Control Flow Analysis (Dead Code Detection)
1066
+ // ===========================================================================
1067
+ handleIfStatement(node) {
1068
+ const condition = node.childForFieldName('condition');
1069
+ const consequence = node.childForFieldName('consequence');
1070
+ const alternative = node.childForFieldName('alternative');
1071
+ if (!condition) {
1072
+ for (const child of node.children) {
1073
+ this.visit(child);
1074
+ }
1075
+ return;
1076
+ }
1077
+ const condValue = this.evaluateExpression(condition);
1078
+ if (isKnown(condValue) && condValue.type === 'bool') {
1079
+ if (condValue.value === true) {
1080
+ if (alternative) {
1081
+ this.markUnreachable(alternative);
1082
+ }
1083
+ if (consequence) {
1084
+ this.visit(consequence);
1085
+ }
1086
+ }
1087
+ else {
1088
+ if (consequence) {
1089
+ this.markUnreachable(consequence);
1090
+ }
1091
+ if (alternative) {
1092
+ this.visit(alternative);
1093
+ }
1094
+ }
1095
+ }
1096
+ else {
1097
+ const taintedBefore = new Set(this.tainted);
1098
+ const wasInConditional = this.inConditionalBranch;
1099
+ this.inConditionalBranch = true;
1100
+ // Get the condition expression string for tracking
1101
+ const condExpr = getNodeText(condition, this.source);
1102
+ const normalizedCond = this.normalizeCondition(condExpr);
1103
+ // Check if we're entering a block with a negated condition
1104
+ // If so, temporarily remove taints that were added under the positive condition
1105
+ const negatedCond = this.getNegatedCondition(normalizedCond);
1106
+ const taintsToExclude = this.conditionalTaints.get(negatedCond) || new Set();
1107
+ // Visit then branch with condition context
1108
+ this.conditionStack.push(normalizedCond);
1109
+ if (consequence) {
1110
+ this.visit(consequence);
1111
+ }
1112
+ this.conditionStack.pop();
1113
+ const taintedAfterThen = new Set(this.tainted);
1114
+ // Track which variables were newly tainted in the then branch
1115
+ const newlyTaintedInThen = new Set();
1116
+ for (const v of taintedAfterThen) {
1117
+ if (!taintedBefore.has(v)) {
1118
+ newlyTaintedInThen.add(v);
1119
+ }
1120
+ }
1121
+ // Record conditional taints for this condition
1122
+ if (newlyTaintedInThen.size > 0) {
1123
+ if (!this.conditionalTaints.has(normalizedCond)) {
1124
+ this.conditionalTaints.set(normalizedCond, new Set());
1125
+ }
1126
+ for (const v of newlyTaintedInThen) {
1127
+ this.conditionalTaints.get(normalizedCond).add(v);
1128
+ }
1129
+ }
1130
+ // Visit else branch
1131
+ this.tainted = new Set(taintedBefore);
1132
+ this.conditionStack.push(negatedCond);
1133
+ if (alternative) {
1134
+ this.visit(alternative);
1135
+ }
1136
+ this.conditionStack.pop();
1137
+ const taintedAfterElse = new Set(this.tainted);
1138
+ // Track which variables were newly tainted in the else branch
1139
+ const newlyTaintedInElse = new Set();
1140
+ for (const v of taintedAfterElse) {
1141
+ if (!taintedBefore.has(v)) {
1142
+ newlyTaintedInElse.add(v);
1143
+ }
1144
+ }
1145
+ // Record conditional taints for the negated condition
1146
+ if (newlyTaintedInElse.size > 0) {
1147
+ if (!this.conditionalTaints.has(negatedCond)) {
1148
+ this.conditionalTaints.set(negatedCond, new Set());
1149
+ }
1150
+ for (const v of newlyTaintedInElse) {
1151
+ this.conditionalTaints.get(negatedCond).add(v);
1152
+ }
1153
+ }
1154
+ this.inConditionalBranch = wasInConditional;
1155
+ this.tainted = new Set([...taintedBefore, ...taintedAfterThen, ...taintedAfterElse]);
1156
+ }
1157
+ }
1158
+ /**
1159
+ * Normalize a condition expression for comparison.
1160
+ * Strips parentheses and whitespace for consistent matching.
1161
+ */
1162
+ normalizeCondition(cond) {
1163
+ // Remove outer parentheses from parenthesized expressions
1164
+ let normalized = cond.trim();
1165
+ while (normalized.startsWith('(') && normalized.endsWith(')')) {
1166
+ // Check if the parens are balanced (not something like "(a) && (b)")
1167
+ let depth = 0;
1168
+ let balanced = true;
1169
+ for (let i = 0; i < normalized.length - 1; i++) {
1170
+ if (normalized[i] === '(')
1171
+ depth++;
1172
+ else if (normalized[i] === ')')
1173
+ depth--;
1174
+ if (depth === 0 && i > 0) {
1175
+ balanced = false;
1176
+ break;
1177
+ }
1178
+ }
1179
+ if (balanced) {
1180
+ normalized = normalized.slice(1, -1).trim();
1181
+ }
1182
+ else {
1183
+ break;
1184
+ }
1185
+ }
1186
+ return normalized;
1187
+ }
1188
+ /**
1189
+ * Get the negated form of a condition expression.
1190
+ * "x" -> "!x"
1191
+ * "!x" -> "x"
1192
+ */
1193
+ getNegatedCondition(cond) {
1194
+ const normalized = this.normalizeCondition(cond);
1195
+ if (normalized.startsWith('!')) {
1196
+ // !x -> x
1197
+ return this.normalizeCondition(normalized.slice(1));
1198
+ }
1199
+ else {
1200
+ // x -> !x
1201
+ return '!' + normalized;
1202
+ }
1203
+ }
1204
+ /**
1205
+ * Check if a variable's taint should be excluded in the current condition context.
1206
+ * Returns true if the variable was tainted under a condition that is mutually
1207
+ * exclusive with the current condition context.
1208
+ */
1209
+ isExcludedByCondition(varName) {
1210
+ if (this.conditionStack.length === 0) {
1211
+ return false;
1212
+ }
1213
+ // Check if any current condition is the negation of a condition where varName was tainted
1214
+ for (const currentCond of this.conditionStack) {
1215
+ const negatedCond = this.getNegatedCondition(currentCond);
1216
+ const taintsUnderNegated = this.conditionalTaints.get(negatedCond);
1217
+ if (taintsUnderNegated && taintsUnderNegated.has(varName)) {
1218
+ // The variable was tainted under the negated condition,
1219
+ // and we're currently under the opposite condition,
1220
+ // so the taint doesn't apply here
1221
+ return true;
1222
+ }
1223
+ }
1224
+ return false;
1225
+ }
1226
+ handleSwitch(node) {
1227
+ let switchValue = null;
1228
+ for (const child of node.children) {
1229
+ if (child.type === 'parenthesized_expression') {
1230
+ const inner = child.children.find((c) => c.type !== '(' && c.type !== ')');
1231
+ if (inner) {
1232
+ switchValue = this.evaluateExpression(inner);
1233
+ }
1234
+ break;
1235
+ }
1236
+ }
1237
+ const switchBlock = node.children.find((c) => c.type === 'switch_block');
1238
+ if (!switchBlock) {
1239
+ for (const child of node.children) {
1240
+ this.visit(child);
1241
+ }
1242
+ return;
1243
+ }
1244
+ const caseGroups = switchBlock.children.filter((c) => c.type === 'switch_block_statement_group' || c.type === 'switch_rule');
1245
+ if (switchValue && isKnown(switchValue)) {
1246
+ let matchingIdx = -1;
1247
+ let defaultIdx = -1;
1248
+ for (let i = 0; i < caseGroups.length; i++) {
1249
+ const caseGroup = caseGroups[i];
1250
+ for (const child of caseGroup.children) {
1251
+ if (child.type === 'switch_label') {
1252
+ const labelText = getNodeText(child, this.source);
1253
+ if (labelText.includes('default')) {
1254
+ defaultIdx = i;
1255
+ }
1256
+ else {
1257
+ const caseValue = this.extractCaseValue(child);
1258
+ if (caseValue !== null && caseValue === switchValue.value) {
1259
+ matchingIdx = i;
1260
+ }
1261
+ }
1262
+ }
1263
+ }
1264
+ }
1265
+ const startIdx = matchingIdx >= 0 ? matchingIdx : defaultIdx;
1266
+ for (let i = 0; i < startIdx && startIdx >= 0; i++) {
1267
+ this.markUnreachable(caseGroups[i]);
1268
+ }
1269
+ if (startIdx >= 0) {
1270
+ for (let i = startIdx; i < caseGroups.length; i++) {
1271
+ this.visit(caseGroups[i]);
1272
+ const hasBreak = this.hasBreakStatement(caseGroups[i]);
1273
+ if (hasBreak) {
1274
+ for (let j = i + 1; j < caseGroups.length; j++) {
1275
+ this.markUnreachable(caseGroups[j]);
1276
+ }
1277
+ break;
1278
+ }
1279
+ }
1280
+ }
1281
+ }
1282
+ else {
1283
+ for (const caseGroup of caseGroups) {
1284
+ this.visit(caseGroup);
1285
+ }
1286
+ }
1287
+ }
1288
+ handleTernary(node) {
1289
+ const condition = node.childForFieldName('condition');
1290
+ const consequence = node.childForFieldName('consequence');
1291
+ const alternative = node.childForFieldName('alternative');
1292
+ if (condition) {
1293
+ const condValue = this.evaluateExpression(condition);
1294
+ if (isKnown(condValue) && condValue.type === 'bool') {
1295
+ if (condValue.value === true && alternative) {
1296
+ this.markUnreachable(alternative);
1297
+ }
1298
+ else if (condValue.value === false && consequence) {
1299
+ this.markUnreachable(consequence);
1300
+ }
1301
+ }
1302
+ }
1303
+ for (const child of node.children) {
1304
+ if (!this.unreachableLines.has(getNodeLine(child))) {
1305
+ this.visit(child);
1306
+ }
1307
+ }
1308
+ }
1309
+ handleExpressionStatement(node) {
1310
+ for (const child of node.children) {
1311
+ if (child.type === 'method_invocation') {
1312
+ this.checkCollectionTaint(child);
1313
+ }
1314
+ }
1315
+ for (const child of node.children) {
1316
+ this.visit(child);
1317
+ }
1318
+ }
1319
+ markUnreachable(node) {
1320
+ const startLine = node.startPosition.row + 1;
1321
+ const endLine = node.endPosition.row + 1;
1322
+ for (let line = startLine; line <= endLine; line++) {
1323
+ this.unreachableLines.add(line);
1324
+ }
1325
+ }
1326
+ hasBreakStatement(node) {
1327
+ if (node.type === 'break_statement') {
1328
+ return true;
1329
+ }
1330
+ for (const child of node.children) {
1331
+ if (this.hasBreakStatement(child)) {
1332
+ return true;
1333
+ }
1334
+ }
1335
+ return false;
1336
+ }
1337
+ extractCaseValue(labelNode) {
1338
+ for (const child of labelNode.children) {
1339
+ if (child.type === 'decimal_integer_literal') {
1340
+ return parseInt(getNodeText(child, this.source), 10);
1341
+ }
1342
+ if (child.type === 'character_literal') {
1343
+ const text = getNodeText(child, this.source);
1344
+ return text.slice(1, -1);
1345
+ }
1346
+ if (child.type === 'string_literal') {
1347
+ const text = getNodeText(child, this.source);
1348
+ return text.slice(1, -1);
1349
+ }
1350
+ }
1351
+ return null;
1352
+ }
1353
+ // ===========================================================================
1354
+ // Taint Analysis Integration
1355
+ // ===========================================================================
1356
+ /**
1357
+ * Check if an expression is a call to a sanitizer method.
1358
+ * This includes both built-in sanitizers and @sanitizer annotated methods.
1359
+ */
1360
+ isSanitizerMethodCall(node) {
1361
+ if (node.type !== 'method_invocation') {
1362
+ return false;
1363
+ }
1364
+ const nameNode = node.childForFieldName('name');
1365
+ if (!nameNode) {
1366
+ return false;
1367
+ }
1368
+ const methodName = getNodeText(nameNode, this.source);
1369
+ return SANITIZER_METHODS.has(methodName) || this.methodReturnsSanitized.has(methodName);
1370
+ }
1371
+ /**
1372
+ * Check if an expression is a call to an anti-sanitizer method.
1373
+ * Anti-sanitizers reverse the effect of sanitization (e.g., URLDecoder.decode reverses URLEncoder.encode).
1374
+ * If an argument to the anti-sanitizer was previously sanitized, the result is tainted again.
1375
+ */
1376
+ isAntiSanitizerCall(node) {
1377
+ if (node.type !== 'method_invocation') {
1378
+ return false;
1379
+ }
1380
+ const nameNode = node.childForFieldName('name');
1381
+ if (!nameNode) {
1382
+ return false;
1383
+ }
1384
+ const methodName = getNodeText(nameNode, this.source);
1385
+ return ANTI_SANITIZER_METHODS.has(methodName);
1386
+ }
1387
+ /**
1388
+ * Check if an anti-sanitizer call has a sanitized argument (which means the result should be tainted).
1389
+ * For example: URLDecoder.decode(sanitizedVar) should produce tainted output.
1390
+ */
1391
+ antiSanitizerReintroducesTaint(node) {
1392
+ if (!this.isAntiSanitizerCall(node)) {
1393
+ return false;
1394
+ }
1395
+ const argsNode = node.childForFieldName('arguments');
1396
+ if (!argsNode) {
1397
+ return false;
1398
+ }
1399
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1400
+ // Check if any argument is a sanitized variable
1401
+ for (const arg of args) {
1402
+ if (arg.type === 'identifier') {
1403
+ const varName = getNodeText(arg, this.source);
1404
+ const scopedName = this.getScopedName(varName);
1405
+ if (this.sanitizedVars.has(scopedName) || this.sanitizedVars.has(varName)) {
1406
+ return true;
1407
+ }
1408
+ }
1409
+ // Also check if any argument was originally tainted (even if currently not in tainted set)
1410
+ // This handles cases where taint flows through multiple variables
1411
+ if (this.isTaintedExpression(arg)) {
1412
+ return true;
1413
+ }
1414
+ }
1415
+ return false;
1416
+ }
1417
+ /**
1418
+ * Recursively track iterator assignments in a node (for handling for-loop init).
1419
+ */
1420
+ trackIteratorsInNode(node) {
1421
+ if (node.type === 'local_variable_declaration') {
1422
+ for (const child of node.children) {
1423
+ if (child.type === 'variable_declarator') {
1424
+ const nameNode = child.childForFieldName('name');
1425
+ const valueNode = child.childForFieldName('value');
1426
+ if (nameNode && valueNode) {
1427
+ const varName = getNodeText(nameNode, this.source);
1428
+ const scopedName = this.getScopedName(varName);
1429
+ this.trackIteratorAssignment(scopedName, valueNode);
1430
+ }
1431
+ }
1432
+ }
1433
+ }
1434
+ // Recurse into children
1435
+ for (const child of node.children) {
1436
+ if (child)
1437
+ this.trackIteratorsInNode(child);
1438
+ }
1439
+ }
1440
+ /**
1441
+ * Track iterator assignments: when iter = collection.iterator() is called,
1442
+ * record that 'iter' was created from 'collection' so we can propagate taint
1443
+ * through iter.next() calls.
1444
+ */
1445
+ trackIteratorAssignment(varName, valueNode) {
1446
+ if (valueNode.type !== 'method_invocation')
1447
+ return;
1448
+ const nameNode = valueNode.childForFieldName('name');
1449
+ const objectNode = valueNode.childForFieldName('object');
1450
+ if (!nameNode || !objectNode)
1451
+ return;
1452
+ const methodName = getNodeText(nameNode, this.source);
1453
+ // Track iterator() calls
1454
+ if (methodName === 'iterator' || methodName === 'listIterator') {
1455
+ const collectionName = getNodeText(objectNode, this.source);
1456
+ this.iteratorSources.set(varName, collectionName);
1457
+ }
1458
+ }
1459
+ /**
1460
+ * Check if a collection is tainted (has any tainted elements).
1461
+ */
1462
+ isCollectionTainted(collectionName) {
1463
+ // Check if the collection has tainted elements via list tracking
1464
+ const listElems = this.listElements.get(collectionName);
1465
+ if (listElems) {
1466
+ for (const elem of listElems) {
1467
+ if (elem === '__TAINTED__')
1468
+ return true;
1469
+ if (elem !== null) {
1470
+ const scopedElem = this.currentMethod ? `${this.currentMethod}:${elem}` : elem;
1471
+ if (this.tainted.has(elem) || this.tainted.has(scopedElem)) {
1472
+ return true;
1473
+ }
1474
+ }
1475
+ }
1476
+ }
1477
+ // Check if the collection has tainted keys (for maps)
1478
+ const taintedKeys = this.taintedCollections.get(collectionName);
1479
+ if (taintedKeys && taintedKeys.size > 0) {
1480
+ return true;
1481
+ }
1482
+ // Check if the collection variable itself is tainted
1483
+ const scopedCollection = this.currentMethod ? `${this.currentMethod}:${collectionName}` : collectionName;
1484
+ if (this.tainted.has(collectionName) || this.tainted.has(scopedCollection)) {
1485
+ return true;
1486
+ }
1487
+ return false;
1488
+ }
1489
+ /**
1490
+ * Get the taint type for a variable based on how it was tainted.
1491
+ * Returns the taint type (e.g., 'http_param', 'io_input') or null if not found.
1492
+ */
1493
+ getTaintTypeForVariable(varName) {
1494
+ // Check if it's a tainted parameter from the list
1495
+ for (const tp of this.taintedParametersList) {
1496
+ if (tp.paramName === varName) {
1497
+ // For now, return a generic type - the actual type would come from source matching
1498
+ return 'interprocedural_param';
1499
+ }
1500
+ }
1501
+ // If the variable is tainted but we don't know the type, return generic
1502
+ if (this.tainted.has(varName) || this.tainted.has(this.getScopedName(varName))) {
1503
+ return 'interprocedural_param';
1504
+ }
1505
+ return null;
1506
+ }
1507
+ isTaintedExpression(node) {
1508
+ const text = getNodeText(node, this.source);
1509
+ if (node.type === 'method_invocation') {
1510
+ const nameNode = node.childForFieldName('name');
1511
+ const objectNode = node.childForFieldName('object');
1512
+ if (nameNode) {
1513
+ const methodName = getNodeText(nameNode, this.source);
1514
+ if (SANITIZER_METHODS.has(methodName)) {
1515
+ return false;
1516
+ }
1517
+ if (this.methodReturnsConstant.has(methodName)) {
1518
+ return false;
1519
+ }
1520
+ if (this.methodReturnsSanitized.has(methodName)) {
1521
+ return false;
1522
+ }
1523
+ // Method returns safe value even with tainted input
1524
+ if (this.methodReturnsSafeValue.has(methodName)) {
1525
+ return false;
1526
+ }
1527
+ const returnedParamIdx = this.methodReturnsParameter.get(methodName);
1528
+ if (returnedParamIdx !== undefined && returnedParamIdx >= 0) {
1529
+ const argsNode = node.childForFieldName('arguments');
1530
+ if (argsNode) {
1531
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1532
+ if (args.length > returnedParamIdx) {
1533
+ const argNode = args[returnedParamIdx];
1534
+ if (this.isTaintedExpression(argNode)) {
1535
+ return true;
1536
+ }
1537
+ }
1538
+ }
1539
+ }
1540
+ if (PROPAGATOR_METHODS.has(methodName)) {
1541
+ const argsNode = node.childForFieldName('arguments');
1542
+ if (argsNode) {
1543
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1544
+ for (const argNode of args) {
1545
+ if (this.isTaintedExpression(argNode)) {
1546
+ return true;
1547
+ }
1548
+ }
1549
+ }
1550
+ }
1551
+ // IMPORTANT: Handle list.get() BEFORE generic object taint check
1552
+ // This allows precise index tracking to work correctly
1553
+ if (methodName === 'get') {
1554
+ const argsNode = node.childForFieldName('arguments');
1555
+ if (objectNode && argsNode) {
1556
+ const collectionName = getNodeText(objectNode, this.source);
1557
+ const listElems = this.listElements.get(collectionName);
1558
+ if (listElems) {
1559
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1560
+ if (args.length > 0) {
1561
+ // First, check if ANY element in the collection is tainted
1562
+ // This is a conservative approach for safety analysis
1563
+ const hasAnyTainted = listElems.some(e => {
1564
+ if (e === null)
1565
+ return false;
1566
+ if (e === '__TAINTED__')
1567
+ return true;
1568
+ const scopedE = this.currentMethod ? `${this.currentMethod}:${e}` : e;
1569
+ return this.tainted.has(e) || this.tainted.has(scopedE);
1570
+ });
1571
+ const indexValue = this.evaluateExpression(args[0]);
1572
+ if (isKnown(indexValue) && indexValue.type === 'int') {
1573
+ const index = indexValue.value;
1574
+ if (index >= 0 && index < listElems.length) {
1575
+ const elem = listElems[index];
1576
+ if (elem === null) {
1577
+ // Index points to a literal value (like "safe" or "moresafe")
1578
+ // which was added directly, not via a variable - this is clean
1579
+ return false;
1580
+ }
1581
+ if (elem === '__TAINTED__') {
1582
+ return true;
1583
+ }
1584
+ // Check both scoped and unscoped name for taint
1585
+ const scopedElem = this.currentMethod ? `${this.currentMethod}:${elem}` : elem;
1586
+ if (this.tainted.has(elem) || this.tainted.has(scopedElem)) {
1587
+ return true;
1588
+ }
1589
+ // Specific element at known index is clean (not tainted variable)
1590
+ return false;
1591
+ }
1592
+ }
1593
+ // Unknown index - return true if any element is tainted
1594
+ return hasAnyTainted;
1595
+ }
1596
+ }
1597
+ const taintedKeys = this.taintedCollections.get(collectionName);
1598
+ if (taintedKeys) {
1599
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1600
+ if (args.length > 0) {
1601
+ const keyValue = this.evaluateExpression(args[0]);
1602
+ if (isKnown(keyValue) && keyValue.type === 'string') {
1603
+ const keyStr = String(keyValue.value);
1604
+ if (!taintedKeys.has(keyStr) && !taintedKeys.has('*')) {
1605
+ return false;
1606
+ }
1607
+ if (taintedKeys.has(keyStr)) {
1608
+ return true;
1609
+ }
1610
+ }
1611
+ return true;
1612
+ }
1613
+ }
1614
+ else if (!listElems) {
1615
+ // No key tracking and no list element tracking
1616
+ // Fall back to checking if the collection itself is tainted
1617
+ if (this.isCollectionTainted(collectionName)) {
1618
+ return true;
1619
+ }
1620
+ // Collection is not tainted - safe to return false
1621
+ return false;
1622
+ }
1623
+ }
1624
+ }
1625
+ // Handle getLast(), getFirst() - return tainted if list has any tainted element
1626
+ if (methodName === 'getLast' || methodName === 'getFirst' || methodName === 'peek' ||
1627
+ methodName === 'peekFirst' || methodName === 'peekLast' || methodName === 'poll' ||
1628
+ methodName === 'pollFirst' || methodName === 'pollLast' || methodName === 'element') {
1629
+ if (objectNode) {
1630
+ const collectionName = getNodeText(objectNode, this.source);
1631
+ if (this.isCollectionTainted(collectionName)) {
1632
+ return true;
1633
+ }
1634
+ }
1635
+ }
1636
+ // Handle toArray() - return tainted if collection is tainted
1637
+ if (methodName === 'toArray') {
1638
+ if (objectNode) {
1639
+ const collectionName = getNodeText(objectNode, this.source);
1640
+ if (this.isCollectionTainted(collectionName)) {
1641
+ return true;
1642
+ }
1643
+ }
1644
+ }
1645
+ // Handle iterator.next() - return tainted if the iterator's source collection is tainted
1646
+ if (methodName === 'next') {
1647
+ if (objectNode) {
1648
+ const iteratorName = getNodeText(objectNode, this.source);
1649
+ // Check if this iterator was created from a tainted collection
1650
+ const sourceCollection = this.iteratorSources.get(iteratorName);
1651
+ if (sourceCollection) {
1652
+ // We know this is an iterator.next() call - return based on collection taint
1653
+ return this.isCollectionTainted(sourceCollection);
1654
+ }
1655
+ // Also check with scoped name
1656
+ const scopedIterator = this.currentMethod ? `${this.currentMethod}:${iteratorName}` : iteratorName;
1657
+ const scopedSourceCollection = this.iteratorSources.get(scopedIterator);
1658
+ if (scopedSourceCollection) {
1659
+ // We know this is an iterator.next() call - return based on collection taint
1660
+ return this.isCollectionTainted(scopedSourceCollection);
1661
+ }
1662
+ // If we have no record of this iterator, fall through to other checks
1663
+ }
1664
+ }
1665
+ // Generic object taint check - applies to methods NOT handled above
1666
+ // Skip for 'get' since it has precise index tracking
1667
+ if (methodName !== 'get' && objectNode) {
1668
+ const objectText = getNodeText(objectNode, this.source);
1669
+ if (objectNode.type === 'identifier' && this.tainted.has(objectText)) {
1670
+ return true;
1671
+ }
1672
+ if (this.isTaintedExpression(objectNode)) {
1673
+ return true;
1674
+ }
1675
+ }
1676
+ }
1677
+ }
1678
+ if (node.type === 'array_access' || node.type === 'subscript_expression') {
1679
+ const arrayNode = node.childForFieldName('array') || node.child(0);
1680
+ const indexNode = node.childForFieldName('index') || node.child(2);
1681
+ if (arrayNode) {
1682
+ const arrayName = getNodeText(arrayNode, this.source);
1683
+ // Check if the whole array is tainted
1684
+ if (arrayNode.type === 'identifier' && this.tainted.has(arrayName)) {
1685
+ return true;
1686
+ }
1687
+ // Check element-level taint tracking
1688
+ const taintedIndices = this.taintedArrayElements.get(arrayName);
1689
+ if (taintedIndices) {
1690
+ // If '*' is in the set, the whole array has tainted elements
1691
+ if (taintedIndices.has('*')) {
1692
+ return true;
1693
+ }
1694
+ // Check specific index
1695
+ if (indexNode) {
1696
+ const indexValue = this.evaluateExpression(indexNode);
1697
+ if (isKnown(indexValue) && (indexValue.type === 'int' || indexValue.type === 'string')) {
1698
+ const indexKey = String(indexValue.value);
1699
+ if (taintedIndices.has(indexKey)) {
1700
+ return true;
1701
+ }
1702
+ // Specific index is NOT tainted - return false for this access
1703
+ return false;
1704
+ }
1705
+ // Unknown index - check if ANY element is tainted
1706
+ if (taintedIndices.size > 0) {
1707
+ return true;
1708
+ }
1709
+ }
1710
+ }
1711
+ // Recursively check if arrayNode itself is tainted
1712
+ if (this.isTaintedExpression(arrayNode)) {
1713
+ return true;
1714
+ }
1715
+ }
1716
+ }
1717
+ if (node.type === 'ternary_expression') {
1718
+ const condition = node.childForFieldName('condition');
1719
+ const consequence = node.childForFieldName('consequence');
1720
+ const alternative = node.childForFieldName('alternative');
1721
+ if (condition) {
1722
+ const condValue = this.evaluateExpression(condition);
1723
+ if (isKnown(condValue) && condValue.type === 'bool') {
1724
+ if (condValue.value === true && consequence) {
1725
+ return this.isTaintedExpression(consequence);
1726
+ }
1727
+ else if (condValue.value === false && alternative) {
1728
+ return this.isTaintedExpression(alternative);
1729
+ }
1730
+ }
1731
+ }
1732
+ return ((consequence ? this.isTaintedExpression(consequence) : false) ||
1733
+ (alternative ? this.isTaintedExpression(alternative) : false));
1734
+ }
1735
+ // Handle cast expressions - evaluate the inner expression, not the full text
1736
+ // This prevents false positives from patterns like ".next(" matching "(String) iter.next()"
1737
+ if (node.type === 'cast_expression') {
1738
+ const value = node.childForFieldName('value');
1739
+ if (value) {
1740
+ return this.isTaintedExpression(value);
1741
+ }
1742
+ }
1743
+ // Handle object creation expressions - check for collection copy constructors
1744
+ // e.g., new ArrayList(taintedList), new HashMap(taintedMap), List.copyOf(tainted)
1745
+ if (node.type === 'object_creation_expression') {
1746
+ const typeNode = node.childForFieldName('type');
1747
+ const argsNode = node.childForFieldName('arguments');
1748
+ if (typeNode && argsNode) {
1749
+ const typeName = getNodeText(typeNode, this.source);
1750
+ // Check if this is a known collection type
1751
+ const collectionTypes = ['ArrayList', 'LinkedList', 'HashSet', 'TreeSet', 'HashMap', 'TreeMap', 'LinkedHashMap', 'LinkedHashSet', 'Vector', 'CopyOnWriteArrayList', 'ConcurrentHashMap'];
1752
+ const isCollectionType = collectionTypes.some(t => typeName.includes(t));
1753
+ if (isCollectionType) {
1754
+ // Check if any argument is a tainted collection (copy constructor)
1755
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1756
+ for (const arg of args) {
1757
+ if (arg.type === 'identifier') {
1758
+ const argName = getNodeText(arg, this.source);
1759
+ if (this.isCollectionTainted(argName)) {
1760
+ return true;
1761
+ }
1762
+ }
1763
+ if (this.isTaintedExpression(arg)) {
1764
+ return true;
1765
+ }
1766
+ }
1767
+ }
1768
+ }
1769
+ }
1770
+ // Check taint patterns, but exclude known-safe iterator.next() calls
1771
+ // The .next( pattern is meant for Scanner.next(), not Iterator.next()
1772
+ if (TAINT_PATTERN_REGEX.test(text)) {
1773
+ // If this matches .next( but is a known iterator, skip this pattern
1774
+ if (text.includes('.next(') && node.type === 'method_invocation') {
1775
+ const objectNode = node.childForFieldName('object');
1776
+ if (objectNode) {
1777
+ const iteratorName = getNodeText(objectNode, this.source);
1778
+ const scopedIterator = this.currentMethod ? `${this.currentMethod}:${iteratorName}` : iteratorName;
1779
+ // If we've tracked this as an iterator, don't match the .next( pattern
1780
+ if (this.iteratorSources.has(iteratorName) || this.iteratorSources.has(scopedIterator)) {
1781
+ // Fall through to check other patterns and iterator-specific handling
1782
+ }
1783
+ else {
1784
+ return true;
1785
+ }
1786
+ }
1787
+ else {
1788
+ return true;
1789
+ }
1790
+ }
1791
+ else {
1792
+ return true;
1793
+ }
1794
+ }
1795
+ for (const pattern of this.additionalTaintPatterns) {
1796
+ if (text.includes(pattern)) {
1797
+ return true;
1798
+ }
1799
+ }
1800
+ if (node.type === 'identifier') {
1801
+ // Check both scoped and unscoped taint
1802
+ // Scoped: methodName:varName (for local variables)
1803
+ // Unscoped: varName (for class fields or when not in a method)
1804
+ const scopedName = this.currentMethod ? `${this.currentMethod}:${text}` : text;
1805
+ const isTainted = this.tainted.has(scopedName) || this.tainted.has(text);
1806
+ if (isTainted) {
1807
+ // Check if this taint is excluded by correlated predicate analysis
1808
+ // If the variable was tainted under condition C, and we're now under !C,
1809
+ // the taint doesn't apply in this context
1810
+ if (this.isExcludedByCondition(scopedName) || this.isExcludedByCondition(text)) {
1811
+ return false;
1812
+ }
1813
+ }
1814
+ return isTainted;
1815
+ }
1816
+ for (const child of node.children) {
1817
+ if (this.isTaintedExpression(child)) {
1818
+ return true;
1819
+ }
1820
+ }
1821
+ return false;
1822
+ }
1823
+ checkCollectionTaint(node) {
1824
+ const objectNode = node.childForFieldName('object');
1825
+ const nameNode = node.childForFieldName('name');
1826
+ const argsNode = node.childForFieldName('arguments');
1827
+ if (!objectNode || !nameNode || !argsNode)
1828
+ return;
1829
+ const methodName = getNodeText(nameNode, this.source);
1830
+ const collectionName = getNodeText(objectNode, this.source);
1831
+ if (methodName === 'put') {
1832
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1833
+ if (args.length >= 2) {
1834
+ const valueArg = args[1];
1835
+ if (this.isTaintedExpression(valueArg)) {
1836
+ const keyValue = this.evaluateExpression(args[0]);
1837
+ const keyStr = isKnown(keyValue) && keyValue.type === 'string'
1838
+ ? String(keyValue.value)
1839
+ : '*';
1840
+ if (!this.taintedCollections.has(collectionName)) {
1841
+ this.taintedCollections.set(collectionName, new Set());
1842
+ }
1843
+ this.taintedCollections.get(collectionName).add(keyStr);
1844
+ }
1845
+ }
1846
+ }
1847
+ if (methodName === 'add' || methodName === 'addLast') {
1848
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1849
+ if (args.length >= 1) {
1850
+ if (!this.listElements.has(collectionName)) {
1851
+ this.listElements.set(collectionName, []);
1852
+ }
1853
+ const list = this.listElements.get(collectionName);
1854
+ const valueArg = args[0];
1855
+ let isTainted = false;
1856
+ if (valueArg.type === 'identifier') {
1857
+ const varName = getNodeText(valueArg, this.source);
1858
+ // Check both scoped and unscoped name for taint
1859
+ const scopedName = this.currentMethod ? `${this.currentMethod}:${varName}` : varName;
1860
+ isTainted = this.tainted.has(scopedName) || this.tainted.has(varName);
1861
+ list.push(isTainted ? varName : null);
1862
+ }
1863
+ else if (this.isTaintedExpression(valueArg)) {
1864
+ list.push('__TAINTED__');
1865
+ isTainted = true;
1866
+ }
1867
+ else {
1868
+ list.push(null);
1869
+ }
1870
+ // Also mark the collection variable itself as tainted if it contains tainted elements
1871
+ if (isTainted) {
1872
+ const scopedCollection = this.currentMethod ? `${this.currentMethod}:${collectionName}` : collectionName;
1873
+ this.tainted.add(scopedCollection);
1874
+ }
1875
+ }
1876
+ }
1877
+ // Handle addAll() - copy all elements from source collection to target
1878
+ if (methodName === 'addAll') {
1879
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1880
+ if (args.length >= 1) {
1881
+ const sourceArg = args[0];
1882
+ if (sourceArg.type === 'identifier') {
1883
+ const sourceName = getNodeText(sourceArg, this.source);
1884
+ // Check if the source collection is tainted
1885
+ if (this.isCollectionTainted(sourceName)) {
1886
+ if (!this.listElements.has(collectionName)) {
1887
+ this.listElements.set(collectionName, []);
1888
+ }
1889
+ // Add a tainted marker to indicate the collection now has tainted elements
1890
+ this.listElements.get(collectionName).push('__TAINTED__');
1891
+ // Also mark the target collection as tainted
1892
+ const scopedCollection = this.currentMethod ? `${this.currentMethod}:${collectionName}` : collectionName;
1893
+ this.tainted.add(scopedCollection);
1894
+ }
1895
+ }
1896
+ else if (this.isTaintedExpression(sourceArg)) {
1897
+ if (!this.listElements.has(collectionName)) {
1898
+ this.listElements.set(collectionName, []);
1899
+ }
1900
+ this.listElements.get(collectionName).push('__TAINTED__');
1901
+ const scopedCollection = this.currentMethod ? `${this.currentMethod}:${collectionName}` : collectionName;
1902
+ this.tainted.add(scopedCollection);
1903
+ }
1904
+ }
1905
+ }
1906
+ // Handle putAll() - copy all entries from source map to target
1907
+ if (methodName === 'putAll') {
1908
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1909
+ if (args.length >= 1) {
1910
+ const sourceArg = args[0];
1911
+ if (sourceArg.type === 'identifier') {
1912
+ const sourceName = getNodeText(sourceArg, this.source);
1913
+ // Check if the source map has any tainted keys
1914
+ const sourceTaintedKeys = this.taintedCollections.get(sourceName);
1915
+ if (sourceTaintedKeys && sourceTaintedKeys.size > 0) {
1916
+ if (!this.taintedCollections.has(collectionName)) {
1917
+ this.taintedCollections.set(collectionName, new Set());
1918
+ }
1919
+ // Copy all tainted keys from source to target
1920
+ for (const key of sourceTaintedKeys) {
1921
+ this.taintedCollections.get(collectionName).add(key);
1922
+ }
1923
+ }
1924
+ // Also check if source collection itself is tainted
1925
+ if (this.isCollectionTainted(sourceName)) {
1926
+ const scopedCollection = this.currentMethod ? `${this.currentMethod}:${collectionName}` : collectionName;
1927
+ this.tainted.add(scopedCollection);
1928
+ }
1929
+ }
1930
+ else if (this.isTaintedExpression(sourceArg)) {
1931
+ if (!this.taintedCollections.has(collectionName)) {
1932
+ this.taintedCollections.set(collectionName, new Set());
1933
+ }
1934
+ this.taintedCollections.get(collectionName).add('*');
1935
+ const scopedCollection = this.currentMethod ? `${this.currentMethod}:${collectionName}` : collectionName;
1936
+ this.tainted.add(scopedCollection);
1937
+ }
1938
+ }
1939
+ }
1940
+ if (methodName === 'addFirst') {
1941
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1942
+ if (args.length >= 1) {
1943
+ if (!this.listElements.has(collectionName)) {
1944
+ this.listElements.set(collectionName, []);
1945
+ }
1946
+ const list = this.listElements.get(collectionName);
1947
+ const valueArg = args[0];
1948
+ if (valueArg.type === 'identifier') {
1949
+ const varName = getNodeText(valueArg, this.source);
1950
+ // Check both scoped and unscoped name for taint
1951
+ const scopedName = this.currentMethod ? `${this.currentMethod}:${varName}` : varName;
1952
+ const isTainted = this.tainted.has(scopedName) || this.tainted.has(varName);
1953
+ list.unshift(isTainted ? varName : null);
1954
+ }
1955
+ else if (this.isTaintedExpression(valueArg)) {
1956
+ list.unshift('__TAINTED__');
1957
+ }
1958
+ else {
1959
+ list.unshift(null);
1960
+ }
1961
+ }
1962
+ }
1963
+ // retainAll() does NOT transfer taint - it only keeps elements that already exist in the target collection
1964
+ // c2.retainAll(c1) keeps only elements in c2 that are in c1 - the values come from c2, not c1
1965
+ // So if c2 has "abc" and c1 has tainted_value, c2 either has "abc" or is empty - never tainted
1966
+ if (methodName === 'retainAll') {
1967
+ // retainAll doesn't introduce taint, so we don't need to do anything
1968
+ // The target collection keeps its own (non-tainted) values
1969
+ }
1970
+ if (methodName === 'remove') {
1971
+ const args = argsNode.children.filter((c) => c.type !== '(' && c.type !== ')' && c.type !== ',');
1972
+ if (args.length >= 1 && this.listElements.has(collectionName)) {
1973
+ const list = this.listElements.get(collectionName);
1974
+ const indexValue = this.evaluateExpression(args[0]);
1975
+ if (isKnown(indexValue) && indexValue.type === 'int') {
1976
+ const index = indexValue.value;
1977
+ if (index >= 0 && index < list.length) {
1978
+ list.splice(index, 1);
1979
+ }
1980
+ }
1981
+ }
1982
+ }
1983
+ }
1984
+ }
1985
+ //# sourceMappingURL=propagator.js.map