circle-ir 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +200 -0
  3. package/configs/sinks/code_injection.yaml +672 -0
  4. package/configs/sinks/command.yaml +917 -0
  5. package/configs/sinks/deserialization.yaml +105 -0
  6. package/configs/sinks/ldap.yaml +136 -0
  7. package/configs/sinks/nodejs.json +629 -0
  8. package/configs/sinks/path.yaml +715 -0
  9. package/configs/sinks/python.json +501 -0
  10. package/configs/sinks/rust.json +339 -0
  11. package/configs/sinks/sql.yaml +233 -0
  12. package/configs/sinks/ssrf.yaml +160 -0
  13. package/configs/sinks/xpath.yaml +121 -0
  14. package/configs/sinks/xss.yaml +727 -0
  15. package/configs/sources/db_sources.yaml +90 -0
  16. package/configs/sources/env_sources.yaml +94 -0
  17. package/configs/sources/express.json +197 -0
  18. package/configs/sources/file_sources.yaml +164 -0
  19. package/configs/sources/http_sources.yaml +379 -0
  20. package/configs/sources/io_sources.yaml +519 -0
  21. package/configs/sources/network_sources.yaml +99 -0
  22. package/configs/sources/python.json +230 -0
  23. package/configs/sources/rust.json +286 -0
  24. package/configs/sources/spring.yaml +70 -0
  25. package/dist/analysis/advisory-db.d.ts +86 -0
  26. package/dist/analysis/advisory-db.js +104 -0
  27. package/dist/analysis/advisory-db.js.map +1 -0
  28. package/dist/analysis/cargo-parser.d.ts +42 -0
  29. package/dist/analysis/cargo-parser.js +102 -0
  30. package/dist/analysis/cargo-parser.js.map +1 -0
  31. package/dist/analysis/config-loader.d.ts +37 -0
  32. package/dist/analysis/config-loader.js +1561 -0
  33. package/dist/analysis/config-loader.js.map +1 -0
  34. package/dist/analysis/constant-propagation/ast-utils.d.ts +25 -0
  35. package/dist/analysis/constant-propagation/ast-utils.js +34 -0
  36. package/dist/analysis/constant-propagation/ast-utils.js.map +1 -0
  37. package/dist/analysis/constant-propagation/evaluator.d.ts +32 -0
  38. package/dist/analysis/constant-propagation/evaluator.js +296 -0
  39. package/dist/analysis/constant-propagation/evaluator.js.map +1 -0
  40. package/dist/analysis/constant-propagation/index.d.ts +62 -0
  41. package/dist/analysis/constant-propagation/index.js +152 -0
  42. package/dist/analysis/constant-propagation/index.js.map +1 -0
  43. package/dist/analysis/constant-propagation/patterns.d.ts +8 -0
  44. package/dist/analysis/constant-propagation/patterns.js +126 -0
  45. package/dist/analysis/constant-propagation/patterns.js.map +1 -0
  46. package/dist/analysis/constant-propagation/propagator.d.ts +180 -0
  47. package/dist/analysis/constant-propagation/propagator.js +1985 -0
  48. package/dist/analysis/constant-propagation/propagator.js.map +1 -0
  49. package/dist/analysis/constant-propagation/types.d.ts +63 -0
  50. package/dist/analysis/constant-propagation/types.js +5 -0
  51. package/dist/analysis/constant-propagation/types.js.map +1 -0
  52. package/dist/analysis/constant-propagation.d.ts +9 -0
  53. package/dist/analysis/constant-propagation.js +18 -0
  54. package/dist/analysis/constant-propagation.js.map +1 -0
  55. package/dist/analysis/dependency-scanner.d.ts +79 -0
  56. package/dist/analysis/dependency-scanner.js +122 -0
  57. package/dist/analysis/dependency-scanner.js.map +1 -0
  58. package/dist/analysis/dfg-verifier.d.ts +116 -0
  59. package/dist/analysis/dfg-verifier.js +399 -0
  60. package/dist/analysis/dfg-verifier.js.map +1 -0
  61. package/dist/analysis/findings.d.ts +11 -0
  62. package/dist/analysis/findings.js +228 -0
  63. package/dist/analysis/findings.js.map +1 -0
  64. package/dist/analysis/index.d.ts +16 -0
  65. package/dist/analysis/index.js +18 -0
  66. package/dist/analysis/index.js.map +1 -0
  67. package/dist/analysis/interprocedural.d.ts +99 -0
  68. package/dist/analysis/interprocedural.js +526 -0
  69. package/dist/analysis/interprocedural.js.map +1 -0
  70. package/dist/analysis/path-finder.d.ts +133 -0
  71. package/dist/analysis/path-finder.js +354 -0
  72. package/dist/analysis/path-finder.js.map +1 -0
  73. package/dist/analysis/rules.d.ts +75 -0
  74. package/dist/analysis/rules.js +332 -0
  75. package/dist/analysis/rules.js.map +1 -0
  76. package/dist/analysis/semver.d.ts +27 -0
  77. package/dist/analysis/semver.js +127 -0
  78. package/dist/analysis/semver.js.map +1 -0
  79. package/dist/analysis/taint-matcher.d.ts +15 -0
  80. package/dist/analysis/taint-matcher.js +634 -0
  81. package/dist/analysis/taint-matcher.js.map +1 -0
  82. package/dist/analysis/taint-propagation.d.ts +67 -0
  83. package/dist/analysis/taint-propagation.js +298 -0
  84. package/dist/analysis/taint-propagation.js.map +1 -0
  85. package/dist/analysis/unresolved.d.ts +14 -0
  86. package/dist/analysis/unresolved.js +202 -0
  87. package/dist/analysis/unresolved.js.map +1 -0
  88. package/dist/analyzer.d.ts +43 -0
  89. package/dist/analyzer.js +1010 -0
  90. package/dist/analyzer.js.map +1 -0
  91. package/dist/browser/circle-ir.js +16576 -0
  92. package/dist/browser.d.ts +38 -0
  93. package/dist/browser.js +38 -0
  94. package/dist/browser.js.map +1 -0
  95. package/dist/core/circle-ir-core.cjs +13626 -0
  96. package/dist/core/circle-ir-core.d.ts +59 -0
  97. package/dist/core/circle-ir-core.js +13591 -0
  98. package/dist/core/extractors/calls.d.ts +13 -0
  99. package/dist/core/extractors/calls.js +1429 -0
  100. package/dist/core/extractors/calls.js.map +1 -0
  101. package/dist/core/extractors/cfg.d.ts +9 -0
  102. package/dist/core/extractors/cfg.js +519 -0
  103. package/dist/core/extractors/cfg.js.map +1 -0
  104. package/dist/core/extractors/dfg.d.ts +12 -0
  105. package/dist/core/extractors/dfg.js +1081 -0
  106. package/dist/core/extractors/dfg.js.map +1 -0
  107. package/dist/core/extractors/exports.d.ts +14 -0
  108. package/dist/core/extractors/exports.js +80 -0
  109. package/dist/core/extractors/exports.js.map +1 -0
  110. package/dist/core/extractors/imports.d.ts +9 -0
  111. package/dist/core/extractors/imports.js +739 -0
  112. package/dist/core/extractors/imports.js.map +1 -0
  113. package/dist/core/extractors/index.d.ts +10 -0
  114. package/dist/core/extractors/index.js +11 -0
  115. package/dist/core/extractors/index.js.map +1 -0
  116. package/dist/core/extractors/meta.d.ts +10 -0
  117. package/dist/core/extractors/meta.js +109 -0
  118. package/dist/core/extractors/meta.js.map +1 -0
  119. package/dist/core/extractors/types.d.ts +10 -0
  120. package/dist/core/extractors/types.js +1479 -0
  121. package/dist/core/extractors/types.js.map +1 -0
  122. package/dist/core/index.d.ts +5 -0
  123. package/dist/core/index.js +8 -0
  124. package/dist/core/index.js.map +1 -0
  125. package/dist/core/parser.d.ts +84 -0
  126. package/dist/core/parser.js +250 -0
  127. package/dist/core/parser.js.map +1 -0
  128. package/dist/core-lib.d.ts +59 -0
  129. package/dist/core-lib.js +62 -0
  130. package/dist/core-lib.js.map +1 -0
  131. package/dist/index.d.ts +15 -0
  132. package/dist/index.js +20 -0
  133. package/dist/index.js.map +1 -0
  134. package/dist/languages/index.d.ts +11 -0
  135. package/dist/languages/index.js +14 -0
  136. package/dist/languages/index.js.map +1 -0
  137. package/dist/languages/plugins/base.d.ts +44 -0
  138. package/dist/languages/plugins/base.js +82 -0
  139. package/dist/languages/plugins/base.js.map +1 -0
  140. package/dist/languages/plugins/index.d.ts +14 -0
  141. package/dist/languages/plugins/index.js +25 -0
  142. package/dist/languages/plugins/index.js.map +1 -0
  143. package/dist/languages/plugins/java.d.ts +49 -0
  144. package/dist/languages/plugins/java.js +402 -0
  145. package/dist/languages/plugins/java.js.map +1 -0
  146. package/dist/languages/plugins/javascript.d.ts +48 -0
  147. package/dist/languages/plugins/javascript.js +445 -0
  148. package/dist/languages/plugins/javascript.js.map +1 -0
  149. package/dist/languages/plugins/python.d.ts +47 -0
  150. package/dist/languages/plugins/python.js +480 -0
  151. package/dist/languages/plugins/python.js.map +1 -0
  152. package/dist/languages/plugins/rust.d.ts +47 -0
  153. package/dist/languages/plugins/rust.js +405 -0
  154. package/dist/languages/plugins/rust.js.map +1 -0
  155. package/dist/languages/registry.d.ts +30 -0
  156. package/dist/languages/registry.js +80 -0
  157. package/dist/languages/registry.js.map +1 -0
  158. package/dist/languages/types.d.ts +184 -0
  159. package/dist/languages/types.js +8 -0
  160. package/dist/languages/types.js.map +1 -0
  161. package/dist/resolution/cross-file.d.ts +146 -0
  162. package/dist/resolution/cross-file.js +439 -0
  163. package/dist/resolution/cross-file.js.map +1 -0
  164. package/dist/resolution/index.d.ts +12 -0
  165. package/dist/resolution/index.js +10 -0
  166. package/dist/resolution/index.js.map +1 -0
  167. package/dist/resolution/symbol-table.d.ts +136 -0
  168. package/dist/resolution/symbol-table.js +336 -0
  169. package/dist/resolution/symbol-table.js.map +1 -0
  170. package/dist/resolution/type-hierarchy.d.ts +124 -0
  171. package/dist/resolution/type-hierarchy.js +515 -0
  172. package/dist/resolution/type-hierarchy.js.map +1 -0
  173. package/dist/types/config.d.ts +45 -0
  174. package/dist/types/config.js +5 -0
  175. package/dist/types/config.js.map +1 -0
  176. package/dist/types/index.d.ts +392 -0
  177. package/dist/types/index.js +7 -0
  178. package/dist/types/index.js.map +1 -0
  179. package/dist/utils/logger.d.ts +85 -0
  180. package/dist/utils/logger.js +198 -0
  181. package/dist/utils/logger.js.map +1 -0
  182. package/dist/wasm/tree-sitter-java.wasm +0 -0
  183. package/dist/wasm/tree-sitter-javascript.wasm +0 -0
  184. package/dist/wasm/tree-sitter-python.wasm +0 -0
  185. package/dist/wasm/tree-sitter-rust.wasm +0 -0
  186. package/dist/wasm/web-tree-sitter.wasm +0 -0
  187. package/docs/SPEC.md +1021 -0
  188. package/examples/browser-example.html +610 -0
  189. package/examples/node-example.ts +215 -0
  190. package/package.json +107 -0
  191. package/wasm/tree-sitter-java.wasm +0 -0
  192. package/wasm/tree-sitter-javascript.wasm +0 -0
  193. package/wasm/tree-sitter-python.wasm +0 -0
  194. package/wasm/tree-sitter-rust.wasm +0 -0
@@ -0,0 +1,1010 @@
1
+ /**
2
+ * Circle-IR Analyzer
3
+ *
4
+ * Main entry point for analyzing source code and producing Circle-IR output.
5
+ * This is the core analyzer - for LLM-enhanced analysis, use circle-ir-ai.
6
+ */
7
+ import { initParser, parse, extractMeta, extractTypes, extractCalls, extractImports, extractExports, buildCFG, buildDFG, collectAllNodes, } from './core/index.js';
8
+ import { analyzeTaint, getDefaultConfig, detectUnresolved, propagateTaint, analyzeInterprocedural, findTaintBridges, analyzeConstantPropagation, isFalsePositive, isCorrelatedPredicateFP } from './analysis/index.js';
9
+ import { registerBuiltinPlugins } from './languages/index.js';
10
+ import { logger } from './utils/logger.js';
11
+ /**
12
+ * Find getter methods that return tainted fields from constructor assignments.
13
+ * This enables detection of taint through: constructor param → field → getter return.
14
+ */
15
+ function findGetterSources(types, instanceFieldTaint, sourceCode) {
16
+ const sources = [];
17
+ if (instanceFieldTaint.size === 0) {
18
+ return sources;
19
+ }
20
+ // Iterate through all classes and methods
21
+ for (const type of types) {
22
+ for (const method of type.methods) {
23
+ // Look for getter pattern: getXxx() returning a field
24
+ const methodName = method.name;
25
+ // Check for getter naming convention: getXxx, isXxx, or just xxx
26
+ let potentialFieldName = null;
27
+ if (methodName.startsWith('get') && methodName.length > 3) {
28
+ // getField -> field (lowercase first letter)
29
+ potentialFieldName = methodName.charAt(3).toLowerCase() + methodName.substring(4);
30
+ }
31
+ else if (methodName.startsWith('is') && methodName.length > 2) {
32
+ // isField -> field
33
+ potentialFieldName = methodName.charAt(2).toLowerCase() + methodName.substring(3);
34
+ }
35
+ // Check if the method body returns a tainted field
36
+ // Simple check: method has no parameters and returns a field that's tracked as tainted
37
+ if (method.parameters.length === 0) {
38
+ // Check both the potential field name from naming convention and exact match
39
+ const fieldsToCheck = potentialFieldName
40
+ ? [potentialFieldName, methodName]
41
+ : [methodName];
42
+ for (const fieldName of fieldsToCheck) {
43
+ const fieldTaint = instanceFieldTaint.get(fieldName);
44
+ if (fieldTaint && fieldTaint.className === type.name) {
45
+ sources.push({
46
+ type: 'constructor_field',
47
+ location: `${type.name}.${methodName}() returns tainted field '${fieldName}' (from constructor param '${fieldTaint.sourceParam}')`,
48
+ severity: 'high',
49
+ line: method.start_line,
50
+ confidence: 0.95,
51
+ });
52
+ break; // Found a match, no need to check more fields
53
+ }
54
+ }
55
+ }
56
+ // Also check for direct field name match (e.g., method name() returns this.name)
57
+ for (const [fieldName, fieldTaint] of instanceFieldTaint) {
58
+ if (fieldTaint.className === type.name) {
59
+ // Check if method name matches field name directly (common pattern)
60
+ if (methodName === fieldName && method.parameters.length === 0) {
61
+ // Avoid duplicates
62
+ const alreadyAdded = sources.some(s => s.location.includes(`${type.name}.${methodName}()`));
63
+ if (!alreadyAdded) {
64
+ sources.push({
65
+ type: 'constructor_field',
66
+ location: `${type.name}.${methodName}() returns tainted field '${fieldName}' (from constructor param '${fieldTaint.sourceParam}')`,
67
+ severity: 'high',
68
+ line: method.start_line,
69
+ confidence: 0.95,
70
+ });
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+ }
77
+ return sources;
78
+ }
79
+ let initialized = false;
80
+ /**
81
+ * Initialize the analyzer. Must be called before analyze().
82
+ */
83
+ export async function initAnalyzer(options = {}) {
84
+ if (initialized)
85
+ return;
86
+ // Register built-in language plugins
87
+ registerBuiltinPlugins();
88
+ await initParser({
89
+ wasmPath: options.wasmPath,
90
+ languagePaths: options.languagePaths,
91
+ });
92
+ initialized = true;
93
+ }
94
+ /**
95
+ * Build enriched metadata section from analysis results.
96
+ */
97
+ function buildEnriched(types, _calls, sources, sinks) {
98
+ // Classify functions by role based on analysis
99
+ const functions = [];
100
+ for (const type of types) {
101
+ for (const method of type.methods) {
102
+ // Determine role based on annotations and naming
103
+ let role = 'utility';
104
+ let trustBoundary = 'internal';
105
+ // Check for controller annotations
106
+ if (method.annotations.some(a => a.includes('RequestMapping') ||
107
+ a.includes('GetMapping') ||
108
+ a.includes('PostMapping') ||
109
+ a.includes('RestController') ||
110
+ a.includes('Controller'))) {
111
+ role = 'controller';
112
+ trustBoundary = 'entry_point';
113
+ }
114
+ // Check for repository/DAO patterns
115
+ else if (type.name.toLowerCase().includes('repository') ||
116
+ type.name.toLowerCase().includes('dao') ||
117
+ method.annotations.some(a => a.includes('Repository'))) {
118
+ role = 'repository';
119
+ }
120
+ // Check for service patterns
121
+ else if (type.name.toLowerCase().includes('service') ||
122
+ method.annotations.some(a => a.includes('Service'))) {
123
+ role = 'service';
124
+ }
125
+ // Determine risk level
126
+ const hasSources = sources.some(s => s.method === method.name);
127
+ const hasSinks = sinks.some(s => s.method === method.name);
128
+ let risk = 'low';
129
+ if (hasSinks)
130
+ risk = 'high';
131
+ else if (hasSources)
132
+ risk = 'medium';
133
+ // Only include functions with meaningful roles
134
+ if (role !== 'utility' || risk !== 'low') {
135
+ functions.push({
136
+ method_name: `${type.name}.${method.name}`,
137
+ role,
138
+ risk,
139
+ trust_boundary: trustBoundary,
140
+ summary: `${role} method in ${type.name}`,
141
+ });
142
+ }
143
+ }
144
+ }
145
+ return {
146
+ functions: functions.length > 0 ? functions : undefined,
147
+ };
148
+ }
149
+ /**
150
+ * Analyze source code and produce Circle-IR output.
151
+ */
152
+ export async function analyze(code, filePath, language, options = {}) {
153
+ if (!initialized) {
154
+ await initAnalyzer(options);
155
+ }
156
+ logger.debug('Analyzing file', { filePath, language, codeLength: code.length });
157
+ // Parse the code
158
+ const tree = await parse(code, language);
159
+ logger.trace('Parsed AST', { rootNodeType: tree.rootNode.type });
160
+ // Collect all node types in a single traversal for better performance
161
+ // Different languages have different AST node types
162
+ const isJavaScript = language === 'javascript' || language === 'typescript';
163
+ const isRust = language === 'rust';
164
+ const isPython = language === 'python';
165
+ let nodeTypesToCollect;
166
+ if (isRust) {
167
+ nodeTypesToCollect = new Set([
168
+ // Rust AST nodes
169
+ 'call_expression',
170
+ 'macro_invocation',
171
+ 'function_item',
172
+ 'struct_item',
173
+ 'impl_item',
174
+ 'enum_item',
175
+ 'trait_item',
176
+ 'mod_item',
177
+ 'use_declaration',
178
+ 'let_declaration',
179
+ 'field_expression',
180
+ 'scoped_identifier',
181
+ ]);
182
+ }
183
+ else if (isPython) {
184
+ nodeTypesToCollect = new Set([
185
+ // Python AST nodes
186
+ 'call',
187
+ 'function_definition',
188
+ 'class_definition',
189
+ 'import_statement',
190
+ 'import_from_statement',
191
+ 'assignment',
192
+ 'attribute',
193
+ 'subscript',
194
+ ]);
195
+ }
196
+ else if (isJavaScript) {
197
+ nodeTypesToCollect = new Set([
198
+ // JavaScript/TypeScript AST nodes
199
+ 'call_expression',
200
+ 'new_expression',
201
+ 'class_declaration',
202
+ 'function_declaration',
203
+ 'arrow_function',
204
+ 'method_definition',
205
+ 'variable_declaration',
206
+ 'lexical_declaration',
207
+ 'import_statement',
208
+ 'export_statement',
209
+ 'member_expression',
210
+ 'assignment_expression',
211
+ ]);
212
+ }
213
+ else {
214
+ nodeTypesToCollect = new Set([
215
+ // Java AST nodes
216
+ 'method_invocation',
217
+ 'object_creation_expression',
218
+ 'class_declaration',
219
+ 'method_declaration',
220
+ 'constructor_declaration',
221
+ 'field_declaration',
222
+ 'import_declaration',
223
+ 'interface_declaration',
224
+ 'enum_declaration',
225
+ ]);
226
+ }
227
+ const nodeCache = collectAllNodes(tree.rootNode, nodeTypesToCollect);
228
+ // Extract all components using the cached nodes
229
+ const meta = extractMeta(code, tree, filePath, language);
230
+ const types = extractTypes(tree, nodeCache, language);
231
+ const calls = extractCalls(tree, nodeCache, language);
232
+ const imports = extractImports(tree, language);
233
+ const exports = extractExports(types);
234
+ const cfg = buildCFG(tree, language);
235
+ const dfg = buildDFG(tree, nodeCache, language);
236
+ // Extract @sanitizer annotated method names (from Javadoc comments)
237
+ const sanitizerMethods = [];
238
+ for (const type of types) {
239
+ for (const method of type.methods) {
240
+ if (method.annotations.includes('sanitizer')) {
241
+ sanitizerMethods.push(method.name);
242
+ }
243
+ }
244
+ }
245
+ // First, do a preliminary taint analysis to find inter-procedural parameter sources
246
+ // These need to be passed to constant propagation so it can track taint from method parameters
247
+ const baseConfig = options.taintConfig ?? getDefaultConfig();
248
+ const preliminaryTaint = analyzeTaint(calls, types, baseConfig);
249
+ // Extract inter-procedural parameter sources
250
+ const taintedParameters = [];
251
+ for (const source of preliminaryTaint.sources) {
252
+ if (source.type === 'interprocedural_param') {
253
+ // Location format: "ParamType paramName in methodName"
254
+ const match = source.location.match(/(\S+)\s+(\S+)\s+in\s+(\S+)/);
255
+ if (match) {
256
+ taintedParameters.push({
257
+ methodName: match[3],
258
+ paramName: match[2],
259
+ });
260
+ }
261
+ }
262
+ }
263
+ // Run constant propagation with tainted parameters
264
+ const constPropResult = analyzeConstantPropagation(tree, code, {
265
+ sanitizerMethods,
266
+ taintedParameters,
267
+ });
268
+ // Analyze taint with config
269
+ const taint = analyzeTaint(calls, types, baseConfig);
270
+ // Add sources for getters that return tainted constructor fields
271
+ const getterSources = findGetterSources(types, constPropResult.instanceFieldTaint, code);
272
+ taint.sources.push(...getterSources);
273
+ logger.debug('Initial taint analysis', {
274
+ sources: taint.sources.length,
275
+ sinks: taint.sinks.length,
276
+ sanitizers: taint.sanitizers?.length ?? 0,
277
+ getterSources: getterSources.length,
278
+ });
279
+ // Filter sinks that are in dead code (unreachable)
280
+ taint.sinks = taint.sinks.filter(sink => !constPropResult.unreachableLines.has(sink.line));
281
+ // Filter sinks that use clean array elements (strong updates)
282
+ taint.sinks = filterCleanArraySinks(taint.sinks, calls, constPropResult.taintedArrayElements, constPropResult.symbols);
283
+ // Filter sinks that use variables proven clean by constant propagation (strong updates)
284
+ taint.sinks = filterCleanVariableSinks(taint.sinks, calls, constPropResult.tainted, constPropResult.symbols, dfg, constPropResult.sanitizedVars, constPropResult.synchronizedLines);
285
+ // Filter sinks that are wrapped by sanitizers on the same line
286
+ taint.sinks = filterSanitizedSinks(taint.sinks, taint.sanitizers ?? [], calls);
287
+ // Propagate taint through dataflow to find verified flows
288
+ if (taint.sources.length > 0 && taint.sinks.length > 0) {
289
+ const propagationResult = propagateTaint(dfg, calls, taint.sources, taint.sinks, taint.sanitizers ?? []);
290
+ // Filter flows using constant propagation (eliminate false positives)
291
+ const verifiedFlows = propagationResult.flows.filter(flow => {
292
+ // Check if the sink line is in dead code
293
+ if (constPropResult.unreachableLines.has(flow.sink.line)) {
294
+ return false;
295
+ }
296
+ // Check each step in the path - if any variable has a constant value, skip
297
+ for (const step of flow.path) {
298
+ const fpCheck = isFalsePositive(constPropResult, step.line, step.variable);
299
+ if (fpCheck.isFalsePositive) {
300
+ return false;
301
+ }
302
+ }
303
+ // Check for correlated predicates: if the sink is under condition !C
304
+ // and the taint was added under condition C, they're mutually exclusive
305
+ if (isCorrelatedPredicateFP(constPropResult, flow)) {
306
+ return false;
307
+ }
308
+ return true;
309
+ });
310
+ // Convert flows to TaintFlowInfo format
311
+ taint.flows = verifiedFlows.map(flow => ({
312
+ source_line: flow.source.line,
313
+ sink_line: flow.sink.line,
314
+ source_type: flow.source.type,
315
+ sink_type: flow.sink.type,
316
+ path: flow.path.map(step => ({
317
+ variable: step.variable,
318
+ line: step.line,
319
+ type: step.type,
320
+ })),
321
+ confidence: flow.confidence,
322
+ sanitized: flow.sanitized,
323
+ }));
324
+ // Add array element flows that DFG-based analysis might miss
325
+ const arrayFlows = detectArrayElementFlows(calls, taint.sources, taint.sinks, constPropResult.taintedArrayElements, constPropResult.unreachableLines);
326
+ if (arrayFlows && arrayFlows.length > 0) {
327
+ if (!taint.flows) {
328
+ taint.flows = [];
329
+ }
330
+ for (const flow of arrayFlows) {
331
+ // Avoid duplicates
332
+ if (!taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
333
+ taint.flows.push(flow);
334
+ }
335
+ }
336
+ }
337
+ // Add collection/iterator flows that DFG-based analysis might miss
338
+ const collectionFlows = detectCollectionFlows(calls, taint.sources, taint.sinks, constPropResult.tainted, constPropResult.unreachableLines);
339
+ if (collectionFlows && collectionFlows.length > 0) {
340
+ if (!taint.flows) {
341
+ taint.flows = [];
342
+ }
343
+ for (const flow of collectionFlows) {
344
+ // Avoid duplicates
345
+ if (taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
346
+ continue;
347
+ }
348
+ // Apply the same filtering as DFG-based flows
349
+ const flowForCheck = {
350
+ source: { line: flow.source_line, type: flow.source_type },
351
+ sink: { line: flow.sink_line, type: flow.sink_type },
352
+ path: flow.path.map(p => ({ variable: p.variable, line: p.line })),
353
+ };
354
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
355
+ if (isCorrelatedPredicateFP(constPropResult, flowForCheck)) {
356
+ continue;
357
+ }
358
+ // Check if any step in the path is a false positive
359
+ let isFP = false;
360
+ for (const step of flow.path) {
361
+ const fpCheck = isFalsePositive(constPropResult, step.line, step.variable);
362
+ if (fpCheck.isFalsePositive) {
363
+ isFP = true;
364
+ break;
365
+ }
366
+ }
367
+ if (isFP) {
368
+ continue;
369
+ }
370
+ taint.flows.push(flow);
371
+ }
372
+ }
373
+ // Add direct parameter-to-sink flows that DFG might miss
374
+ const paramFlows = detectParameterSinkFlows(types, calls, taint.sources, taint.sinks, constPropResult.unreachableLines);
375
+ if (paramFlows && paramFlows.length > 0) {
376
+ if (!taint.flows) {
377
+ taint.flows = [];
378
+ }
379
+ for (const flow of paramFlows) {
380
+ // Avoid duplicates
381
+ if (!taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
382
+ taint.flows.push(flow);
383
+ }
384
+ }
385
+ }
386
+ // Perform inter-procedural analysis
387
+ const interProc = analyzeInterprocedural(types, calls, dfg, taint.sources, taint.sinks, taint.sanitizers ?? [], {
388
+ taintedVariables: constPropResult.tainted,
389
+ });
390
+ // Add inter-procedural sinks to the taint sinks
391
+ for (const sink of interProc.propagatedSinks) {
392
+ if (!taint.sinks.some(s => s.line === sink.line)) {
393
+ taint.sinks.push(sink);
394
+ }
395
+ }
396
+ // Build inter-procedural info
397
+ const taintBridges = findTaintBridges(interProc);
398
+ taint.interprocedural = {
399
+ tainted_methods: Array.from(interProc.taintedMethods),
400
+ taint_bridges: taintBridges,
401
+ method_flows: interProc.callEdges
402
+ .filter(edge => interProc.taintedMethods.has(edge.calleeMethod))
403
+ .map(edge => ({
404
+ caller: edge.callerMethod,
405
+ callee: edge.calleeMethod,
406
+ call_line: edge.callLine,
407
+ tainted_args: edge.taintedArgs,
408
+ returns_taint: interProc.taintedReturns.has(edge.calleeMethod),
409
+ })),
410
+ };
411
+ }
412
+ // Perform inter-procedural analysis even when no initial sinks (can detect external taint escapes)
413
+ if (taint.sources.length > 0 && taint.sinks.length === 0) {
414
+ const interProc = analyzeInterprocedural(types, calls, dfg, taint.sources, [], // No initial sinks
415
+ taint.sanitizers ?? [], {
416
+ taintedVariables: constPropResult.tainted,
417
+ });
418
+ // Add inter-procedural sinks (e.g., external_taint_escape)
419
+ for (const sink of interProc.propagatedSinks) {
420
+ if (!constPropResult.unreachableLines.has(sink.line) &&
421
+ !taint.sinks.some(s => s.line === sink.line)) {
422
+ taint.sinks.push(sink);
423
+ }
424
+ }
425
+ // Build inter-procedural info
426
+ if (interProc.taintedMethods.size > 0 || interProc.propagatedSinks.length > 0) {
427
+ const taintBridges = findTaintBridges(interProc);
428
+ taint.interprocedural = {
429
+ tainted_methods: Array.from(interProc.taintedMethods),
430
+ taint_bridges: taintBridges,
431
+ method_flows: interProc.callEdges
432
+ .filter(edge => interProc.taintedMethods.has(edge.calleeMethod))
433
+ .map(edge => ({
434
+ caller: edge.callerMethod,
435
+ callee: edge.calleeMethod,
436
+ call_line: edge.callLine,
437
+ tainted_args: edge.taintedArgs,
438
+ returns_taint: interProc.taintedReturns.has(edge.calleeMethod),
439
+ })),
440
+ };
441
+ }
442
+ // If we found new sinks, create flows from sources
443
+ if (taint.sinks.length > 0) {
444
+ taint.flows = taint.sinks.map(sink => ({
445
+ source_line: taint.sources[0].line,
446
+ sink_line: sink.line,
447
+ source_type: taint.sources[0].type,
448
+ sink_type: sink.type,
449
+ path: [
450
+ { variable: 'input', line: taint.sources[0].line, type: 'source' },
451
+ { variable: 'input', line: sink.line, type: 'sink' },
452
+ ],
453
+ confidence: taint.sources[0].confidence * sink.confidence,
454
+ sanitized: false,
455
+ }));
456
+ }
457
+ }
458
+ // Detect unresolved items
459
+ const unresolved = detectUnresolved(calls, types, dfg);
460
+ // Build enriched section
461
+ const enriched = buildEnriched(types, calls, taint.sources, taint.sinks);
462
+ logger.debug('Analysis complete', {
463
+ filePath,
464
+ finalSources: taint.sources.length,
465
+ finalSinks: taint.sinks.length,
466
+ flows: taint.flows?.length ?? 0,
467
+ unresolvedItems: unresolved.length,
468
+ });
469
+ return {
470
+ meta,
471
+ types,
472
+ calls,
473
+ cfg,
474
+ dfg,
475
+ taint,
476
+ imports,
477
+ exports,
478
+ unresolved,
479
+ enriched,
480
+ };
481
+ }
482
+ /**
483
+ * Analyze code and return a simplified API response format.
484
+ */
485
+ export async function analyzeForAPI(code, filePath, language, options = {}) {
486
+ const startTime = performance.now();
487
+ if (!initialized) {
488
+ await initAnalyzer(options);
489
+ }
490
+ const parseStart = performance.now();
491
+ const tree = await parse(code, language);
492
+ const parseTime = performance.now() - parseStart;
493
+ const analysisStart = performance.now();
494
+ // Collect all node types in a single traversal for better performance
495
+ const isJavaScript = language === 'javascript' || language === 'typescript';
496
+ const isRust = language === 'rust';
497
+ const isPython = language === 'python';
498
+ let nodeTypesToCollect;
499
+ if (isRust) {
500
+ nodeTypesToCollect = new Set([
501
+ 'call_expression', 'macro_invocation', 'function_item', 'struct_item',
502
+ 'impl_item', 'enum_item', 'trait_item', 'mod_item', 'use_declaration',
503
+ 'let_declaration', 'field_expression', 'scoped_identifier',
504
+ ]);
505
+ }
506
+ else if (isPython) {
507
+ nodeTypesToCollect = new Set([
508
+ 'call', 'function_definition', 'class_definition', 'import_statement',
509
+ 'import_from_statement', 'assignment', 'attribute', 'subscript',
510
+ ]);
511
+ }
512
+ else if (isJavaScript) {
513
+ nodeTypesToCollect = new Set([
514
+ 'call_expression', 'new_expression', 'class_declaration', 'function_declaration',
515
+ 'arrow_function', 'method_definition', 'variable_declaration', 'lexical_declaration',
516
+ 'import_statement', 'export_statement',
517
+ ]);
518
+ }
519
+ else {
520
+ nodeTypesToCollect = new Set([
521
+ 'method_invocation', 'object_creation_expression', 'class_declaration',
522
+ 'method_declaration', 'field_declaration', 'import_declaration',
523
+ 'interface_declaration', 'enum_declaration',
524
+ ]);
525
+ }
526
+ const nodeCache = collectAllNodes(tree.rootNode, nodeTypesToCollect);
527
+ const types = extractTypes(tree, nodeCache, language);
528
+ const calls = extractCalls(tree, nodeCache, language);
529
+ // Run constant propagation
530
+ const constPropResult = analyzeConstantPropagation(tree, code);
531
+ const config = options.taintConfig ?? getDefaultConfig();
532
+ const taint = analyzeTaint(calls, types, config);
533
+ // Filter sinks in dead code
534
+ const filteredSinks = taint.sinks.filter(sink => !constPropResult.unreachableLines.has(sink.line));
535
+ // Generate vulnerabilities from source-sink pairs
536
+ const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
537
+ const analysisTime = performance.now() - analysisStart;
538
+ const totalTime = performance.now() - startTime;
539
+ return {
540
+ success: true,
541
+ analysis: {
542
+ sources: taint.sources,
543
+ sinks: filteredSinks,
544
+ vulnerabilities,
545
+ },
546
+ meta: {
547
+ parseTimeMs: Math.round(parseTime),
548
+ analysisTimeMs: Math.round(analysisTime),
549
+ totalTimeMs: Math.round(totalTime),
550
+ },
551
+ };
552
+ }
553
+ /**
554
+ * Find potential vulnerabilities by matching sources to sinks.
555
+ */
556
+ function findVulnerabilities(sources, sinks, calls, constPropResult) {
557
+ const vulnerabilities = [];
558
+ const sourceToSinkMapping = {
559
+ http_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'xpath_injection', 'ldap_injection', 'ssrf'],
560
+ http_body: ['sql_injection', 'command_injection', 'deserialization', 'xxe', 'xss', 'code_injection'],
561
+ http_header: ['sql_injection', 'xss', 'ssrf'],
562
+ http_cookie: ['sql_injection', 'xss'],
563
+ http_path: ['path_traversal', 'sql_injection', 'ssrf'],
564
+ http_query: ['sql_injection', 'command_injection', 'xss', 'ssrf'],
565
+ io_input: ['command_injection', 'path_traversal', 'deserialization', 'xxe', 'code_injection', 'xss'],
566
+ env_input: ['command_injection', 'path_traversal'],
567
+ db_input: ['xss', 'sql_injection'],
568
+ file_input: ['deserialization', 'xxe', 'path_traversal', 'command_injection', 'code_injection', 'xss'],
569
+ network_input: ['sql_injection', 'command_injection', 'xss', 'ssrf'],
570
+ config_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'ssrf'],
571
+ interprocedural_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'xpath_injection', 'ldap_injection', 'ssrf', 'code_injection'],
572
+ plugin_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'code_injection'],
573
+ constructor_field: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'xpath_injection', 'ldap_injection', 'ssrf', 'code_injection', 'deserialization', 'xxe'],
574
+ };
575
+ for (const source of sources) {
576
+ const potentialSinks = sourceToSinkMapping[source.type] ?? [];
577
+ for (const sink of sinks) {
578
+ if (potentialSinks.includes(sink.type)) {
579
+ // Check if we have constant propagation data to verify actual taint flow
580
+ if (calls && constPropResult) {
581
+ const sinkCall = calls.find(c => c.location.line === sink.line);
582
+ if (sinkCall) {
583
+ if (sink.type === 'sql_injection' && sinkCall.arguments.length > 0) {
584
+ const queryArg = sinkCall.arguments[0];
585
+ if (queryArg.variable) {
586
+ const isConstant = constPropResult.symbols.has(queryArg.variable) &&
587
+ constPropResult.symbols.get(queryArg.variable)?.type === 'string';
588
+ const isTainted = constPropResult.tainted.has(queryArg.variable);
589
+ if (isConstant && !isTainted) {
590
+ continue;
591
+ }
592
+ }
593
+ if (queryArg.expression) {
594
+ const hasConcatenation = queryArg.expression.includes('+');
595
+ if (!hasConcatenation) {
596
+ const anyArgTainted = sinkCall.arguments.some(arg => arg.variable && constPropResult.tainted.has(arg.variable));
597
+ if (!anyArgTainted || !queryArg.expression?.includes('+')) {
598
+ const queryValue = constPropResult.symbols.get(queryArg.variable || '')?.value;
599
+ if (typeof queryValue === 'string' &&
600
+ (queryValue.includes('?') || queryValue.includes('$') || queryValue.includes(':'))) {
601
+ continue;
602
+ }
603
+ }
604
+ }
605
+ }
606
+ }
607
+ }
608
+ }
609
+ const confidence = calculateVulnConfidence(source, sink);
610
+ vulnerabilities.push({
611
+ type: sink.type,
612
+ cwe: sink.cwe,
613
+ severity: sink.confidence > 0.9 ? 'critical' : 'high',
614
+ source: {
615
+ line: source.line,
616
+ type: source.type,
617
+ },
618
+ sink: {
619
+ line: sink.line,
620
+ type: sink.type,
621
+ },
622
+ confidence,
623
+ });
624
+ }
625
+ }
626
+ }
627
+ // Deduplicate vulnerabilities
628
+ const vulnMap = new Map();
629
+ for (const vuln of vulnerabilities) {
630
+ const key = `${vuln.source.line}:${vuln.sink.line}:${vuln.type}`;
631
+ const existing = vulnMap.get(key);
632
+ if (!existing || vuln.confidence > existing.confidence) {
633
+ vulnMap.set(key, vuln);
634
+ }
635
+ }
636
+ const dedupedVulns = Array.from(vulnMap.values());
637
+ dedupedVulns.sort((a, b) => b.confidence - a.confidence);
638
+ return dedupedVulns;
639
+ }
640
+ function calculateVulnConfidence(source, sink) {
641
+ let confidence = 0.5;
642
+ const lineDiff = Math.abs(source.line - sink.line);
643
+ if (lineDiff < 10) {
644
+ confidence += 0.3;
645
+ }
646
+ else if (lineDiff < 50) {
647
+ confidence += 0.15;
648
+ }
649
+ if (source.severity === 'high') {
650
+ confidence += 0.1;
651
+ }
652
+ confidence = confidence * sink.confidence;
653
+ return Math.min(confidence, 1.0);
654
+ }
655
+ function evaluateSimpleExpression(expr, symbols) {
656
+ let evaluated = expr;
657
+ for (const [name, val] of symbols) {
658
+ if (val.type === 'int' || val.type === 'float') {
659
+ const regex = new RegExp(`\\b${name}\\b`, 'g');
660
+ evaluated = evaluated.replace(regex, String(val.value));
661
+ }
662
+ }
663
+ try {
664
+ if (/^[\d\s+\-*/().]+$/.test(evaluated)) {
665
+ const result = Function('"use strict"; return (' + evaluated + ')')();
666
+ if (typeof result === 'number' && !isNaN(result)) {
667
+ return String(Math.floor(result));
668
+ }
669
+ }
670
+ }
671
+ catch {
672
+ // Evaluation failed
673
+ }
674
+ return expr;
675
+ }
676
+ function filterCleanArraySinks(sinks, calls, taintedArrayElements, symbols) {
677
+ const callsByLine = new Map();
678
+ for (const call of calls) {
679
+ const existing = callsByLine.get(call.location.line) ?? [];
680
+ existing.push(call);
681
+ callsByLine.set(call.location.line, existing);
682
+ }
683
+ return sinks.filter(sink => {
684
+ const callsAtSink = callsByLine.get(sink.line) ?? [];
685
+ for (const call of callsAtSink) {
686
+ for (const arg of call.arguments) {
687
+ const arrayAccessMatch = arg.expression?.match(/^(\w+)\[(\d+|[^[\]]+)\]$/);
688
+ if (arrayAccessMatch) {
689
+ const arrayName = arrayAccessMatch[1];
690
+ let indexStr = arrayAccessMatch[2];
691
+ indexStr = evaluateSimpleExpression(indexStr, symbols);
692
+ const taintedIndices = taintedArrayElements.get(arrayName);
693
+ if (taintedIndices !== undefined) {
694
+ const isTainted = taintedIndices.has(indexStr) || taintedIndices.has('*');
695
+ if (!isTainted) {
696
+ return false;
697
+ }
698
+ }
699
+ }
700
+ }
701
+ }
702
+ return true;
703
+ });
704
+ }
705
+ function filterCleanVariableSinks(sinks, calls, taintedVars, symbols, dfg, sanitizedVars, synchronizedLines) {
706
+ const fieldNames = new Set();
707
+ if (dfg) {
708
+ for (const def of dfg.defs) {
709
+ if (def.kind === 'field') {
710
+ fieldNames.add(def.variable);
711
+ }
712
+ }
713
+ }
714
+ const callsByLine = new Map();
715
+ for (const call of calls) {
716
+ const existing = callsByLine.get(call.location.line) ?? [];
717
+ existing.push(call);
718
+ callsByLine.set(call.location.line, existing);
719
+ }
720
+ return sinks.filter(sink => {
721
+ const callsAtSink = callsByLine.get(sink.line) ?? [];
722
+ const isInSynchronizedBlock = synchronizedLines?.has(sink.line) ?? false;
723
+ for (const call of callsAtSink) {
724
+ let allArgsAreClean = true;
725
+ const methodName = call.in_method;
726
+ for (const arg of call.arguments) {
727
+ if (arg.variable && !arg.expression?.includes('[')) {
728
+ const varName = arg.variable;
729
+ const scopedName = methodName ? `${methodName}:${varName}` : varName;
730
+ if (fieldNames.has(varName) && !isInSynchronizedBlock) {
731
+ allArgsAreClean = false;
732
+ continue;
733
+ }
734
+ if (sanitizedVars?.has(scopedName) || sanitizedVars?.has(varName)) {
735
+ continue;
736
+ }
737
+ if (taintedVars.has(scopedName) || taintedVars.has(varName)) {
738
+ allArgsAreClean = false;
739
+ continue;
740
+ }
741
+ const symbolValue = symbols.get(scopedName) ?? symbols.get(varName);
742
+ if (symbolValue && symbolValue.type !== 'unknown') {
743
+ continue;
744
+ }
745
+ allArgsAreClean = false;
746
+ }
747
+ else {
748
+ allArgsAreClean = false;
749
+ }
750
+ }
751
+ if (allArgsAreClean && call.arguments.length > 0) {
752
+ return false;
753
+ }
754
+ }
755
+ return true;
756
+ });
757
+ }
758
+ function filterSanitizedSinks(sinks, sanitizers, calls) {
759
+ if (!sanitizers || sanitizers.length === 0) {
760
+ return sinks;
761
+ }
762
+ const sanitizersByLine = new Map();
763
+ for (const san of sanitizers) {
764
+ const existing = sanitizersByLine.get(san.line) ?? [];
765
+ existing.push(san);
766
+ sanitizersByLine.set(san.line, existing);
767
+ }
768
+ const callsByLine = new Map();
769
+ for (const call of calls) {
770
+ const existing = callsByLine.get(call.location.line) ?? [];
771
+ existing.push(call);
772
+ callsByLine.set(call.location.line, existing);
773
+ }
774
+ return sinks.filter(sink => {
775
+ const lineSanitizers = sanitizersByLine.get(sink.line);
776
+ if (!lineSanitizers || lineSanitizers.length === 0) {
777
+ return true;
778
+ }
779
+ for (const san of lineSanitizers) {
780
+ if (san.sanitizes.includes(sink.type)) {
781
+ const lineCalls = callsByLine.get(sink.line) ?? [];
782
+ for (const call of lineCalls) {
783
+ for (const arg of call.arguments) {
784
+ const expr = arg.expression || '';
785
+ const sanMethodMatch = san.method.match(/(?:(\w+)\.)?(\w+)\(\)/);
786
+ if (sanMethodMatch) {
787
+ const sanMethodName = sanMethodMatch[2];
788
+ const sanClassName = sanMethodMatch[1];
789
+ if (sanClassName) {
790
+ if (expr.includes(`${sanClassName}.${sanMethodName}(`)) {
791
+ return false;
792
+ }
793
+ }
794
+ else if (expr.includes(`${sanMethodName}(`)) {
795
+ return false;
796
+ }
797
+ }
798
+ }
799
+ }
800
+ }
801
+ }
802
+ return true;
803
+ });
804
+ }
805
+ function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLines) {
806
+ const flows = [];
807
+ const callsByLine = new Map();
808
+ for (const call of calls) {
809
+ const existing = callsByLine.get(call.location.line) ?? [];
810
+ existing.push(call);
811
+ callsByLine.set(call.location.line, existing);
812
+ }
813
+ for (const sink of sinks) {
814
+ if (unreachableLines.has(sink.line)) {
815
+ continue;
816
+ }
817
+ const callsAtSink = callsByLine.get(sink.line) ?? [];
818
+ for (const call of callsAtSink) {
819
+ for (const arg of call.arguments) {
820
+ if (arg.variable) {
821
+ const varName = arg.variable;
822
+ const scopedName = call.in_method ? `${call.in_method}:${varName}` : varName;
823
+ if (taintedVars.has(varName) || taintedVars.has(scopedName)) {
824
+ const source = sources[0];
825
+ if (source) {
826
+ flows.push({
827
+ source_line: source.line,
828
+ sink_line: sink.line,
829
+ source_type: source.type,
830
+ sink_type: sink.type,
831
+ path: [
832
+ { variable: varName, line: source.line, type: 'source' },
833
+ { variable: varName, line: sink.line, type: 'sink' },
834
+ ],
835
+ confidence: 0.8,
836
+ sanitized: false,
837
+ });
838
+ }
839
+ }
840
+ }
841
+ if (arg.expression) {
842
+ const expr = arg.expression;
843
+ const collectionMethods = ['getLast', 'getFirst', 'get', 'next', 'poll', 'peek', 'toArray'];
844
+ for (const method of collectionMethods) {
845
+ const methodPattern = new RegExp(`(\\w+)\\.${method}\\(`);
846
+ const match = expr.match(methodPattern);
847
+ if (match) {
848
+ const collectionVar = match[1];
849
+ const scopedCollection = call.in_method ? `${call.in_method}:${collectionVar}` : collectionVar;
850
+ if (taintedVars.has(collectionVar) || taintedVars.has(scopedCollection)) {
851
+ const source = sources[0];
852
+ if (source) {
853
+ flows.push({
854
+ source_line: source.line,
855
+ sink_line: sink.line,
856
+ source_type: source.type,
857
+ sink_type: sink.type,
858
+ path: [
859
+ { variable: collectionVar, line: source.line, type: 'source' },
860
+ { variable: collectionVar, line: sink.line, type: 'sink' },
861
+ ],
862
+ confidence: 0.75,
863
+ sanitized: false,
864
+ });
865
+ }
866
+ }
867
+ }
868
+ }
869
+ }
870
+ }
871
+ }
872
+ }
873
+ return flows;
874
+ }
875
+ function detectArrayElementFlows(calls, sources, sinks, taintedArrayElements, unreachableLines) {
876
+ const flows = [];
877
+ const callsByLine = new Map();
878
+ for (const call of calls) {
879
+ const existing = callsByLine.get(call.location.line) ?? [];
880
+ existing.push(call);
881
+ callsByLine.set(call.location.line, existing);
882
+ }
883
+ for (const sink of sinks) {
884
+ if (unreachableLines.has(sink.line)) {
885
+ continue;
886
+ }
887
+ const callsAtSink = callsByLine.get(sink.line) ?? [];
888
+ for (const call of callsAtSink) {
889
+ for (const arg of call.arguments) {
890
+ const arrayAccessMatch = arg.expression?.match(/^(\w+)\[(\d+|[^[\]]+)\]$/);
891
+ if (arrayAccessMatch) {
892
+ const arrayName = arrayAccessMatch[1];
893
+ const indexStr = arrayAccessMatch[2];
894
+ const taintedIndices = taintedArrayElements.get(arrayName);
895
+ if (taintedIndices) {
896
+ const isTainted = taintedIndices.has(indexStr) || taintedIndices.has('*');
897
+ if (isTainted) {
898
+ const source = sources[0];
899
+ if (source) {
900
+ flows.push({
901
+ source_line: source.line,
902
+ sink_line: sink.line,
903
+ source_type: source.type,
904
+ sink_type: sink.type,
905
+ path: [
906
+ { variable: arrayName, line: source.line, type: 'source' },
907
+ { variable: `${arrayName}[${indexStr}]`, line: sink.line, type: 'sink' },
908
+ ],
909
+ confidence: 0.85,
910
+ sanitized: false,
911
+ });
912
+ }
913
+ }
914
+ }
915
+ }
916
+ }
917
+ }
918
+ }
919
+ return flows;
920
+ }
921
+ /**
922
+ * Detect direct method parameter to sink flows.
923
+ * This handles cases where a tainted method parameter is directly used in a sink
924
+ * without intermediate variable assignments (which DFG chains might miss).
925
+ */
926
+ function detectParameterSinkFlows(types, calls, sources, sinks, unreachableLines) {
927
+ const flows = [];
928
+ // Build a map of method name -> parameter sources
929
+ const paramSourcesByMethod = new Map();
930
+ for (const source of sources) {
931
+ if (source.type === 'interprocedural_param') {
932
+ // Extract method and param name from location like "String paramName in methodName"
933
+ const match = source.location.match(/(\S+)\s+(\S+)\s+in\s+(\S+)/);
934
+ if (match) {
935
+ const paramName = match[2];
936
+ const methodName = match[3];
937
+ let methodParams = paramSourcesByMethod.get(methodName);
938
+ if (!methodParams) {
939
+ methodParams = new Map();
940
+ paramSourcesByMethod.set(methodName, methodParams);
941
+ }
942
+ methodParams.set(paramName, source);
943
+ }
944
+ }
945
+ }
946
+ if (paramSourcesByMethod.size === 0) {
947
+ return flows;
948
+ }
949
+ // Build map of calls by line
950
+ const callsByLine = new Map();
951
+ for (const call of calls) {
952
+ const existing = callsByLine.get(call.location.line) ?? [];
953
+ existing.push(call);
954
+ callsByLine.set(call.location.line, existing);
955
+ }
956
+ // For each sink, check if it uses a tainted parameter directly
957
+ for (const sink of sinks) {
958
+ if (unreachableLines.has(sink.line)) {
959
+ continue;
960
+ }
961
+ const callsAtSink = callsByLine.get(sink.line) ?? [];
962
+ for (const call of callsAtSink) {
963
+ const methodName = call.in_method;
964
+ if (!methodName)
965
+ continue;
966
+ const methodParamSources = paramSourcesByMethod.get(methodName);
967
+ if (!methodParamSources)
968
+ continue;
969
+ // Check if any argument is a tainted parameter
970
+ for (const arg of call.arguments) {
971
+ if (arg.variable) {
972
+ const paramSource = methodParamSources.get(arg.variable);
973
+ if (paramSource) {
974
+ // Found a direct parameter-to-sink flow
975
+ // Check if we already have this flow
976
+ const exists = flows.some(f => f.source_line === paramSource.line && f.sink_line === sink.line);
977
+ if (!exists) {
978
+ flows.push({
979
+ source_line: paramSource.line,
980
+ sink_line: sink.line,
981
+ source_type: paramSource.type,
982
+ sink_type: sink.type,
983
+ path: [
984
+ { variable: arg.variable, line: paramSource.line, type: 'source' },
985
+ { variable: arg.variable, line: sink.line, type: 'sink' },
986
+ ],
987
+ confidence: 0.75, // Lower confidence for interprocedural
988
+ sanitized: false,
989
+ });
990
+ }
991
+ }
992
+ }
993
+ }
994
+ }
995
+ }
996
+ return flows;
997
+ }
998
+ /**
999
+ * Check if the analyzer is initialized.
1000
+ */
1001
+ export function isAnalyzerInitialized() {
1002
+ return initialized;
1003
+ }
1004
+ /**
1005
+ * Reset the analyzer (mainly for testing).
1006
+ */
1007
+ export function resetAnalyzer() {
1008
+ initialized = false;
1009
+ }
1010
+ //# sourceMappingURL=analyzer.js.map