circle-ir 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +200 -0
- package/configs/sinks/code_injection.yaml +672 -0
- package/configs/sinks/command.yaml +917 -0
- package/configs/sinks/deserialization.yaml +105 -0
- package/configs/sinks/ldap.yaml +136 -0
- package/configs/sinks/nodejs.json +629 -0
- package/configs/sinks/path.yaml +715 -0
- package/configs/sinks/python.json +501 -0
- package/configs/sinks/rust.json +339 -0
- package/configs/sinks/sql.yaml +233 -0
- package/configs/sinks/ssrf.yaml +160 -0
- package/configs/sinks/xpath.yaml +121 -0
- package/configs/sinks/xss.yaml +727 -0
- package/configs/sources/db_sources.yaml +90 -0
- package/configs/sources/env_sources.yaml +94 -0
- package/configs/sources/express.json +197 -0
- package/configs/sources/file_sources.yaml +164 -0
- package/configs/sources/http_sources.yaml +379 -0
- package/configs/sources/io_sources.yaml +519 -0
- package/configs/sources/network_sources.yaml +99 -0
- package/configs/sources/python.json +230 -0
- package/configs/sources/rust.json +286 -0
- package/configs/sources/spring.yaml +70 -0
- package/dist/analysis/advisory-db.d.ts +86 -0
- package/dist/analysis/advisory-db.js +104 -0
- package/dist/analysis/advisory-db.js.map +1 -0
- package/dist/analysis/cargo-parser.d.ts +42 -0
- package/dist/analysis/cargo-parser.js +102 -0
- package/dist/analysis/cargo-parser.js.map +1 -0
- package/dist/analysis/config-loader.d.ts +37 -0
- package/dist/analysis/config-loader.js +1561 -0
- package/dist/analysis/config-loader.js.map +1 -0
- package/dist/analysis/constant-propagation/ast-utils.d.ts +25 -0
- package/dist/analysis/constant-propagation/ast-utils.js +34 -0
- package/dist/analysis/constant-propagation/ast-utils.js.map +1 -0
- package/dist/analysis/constant-propagation/evaluator.d.ts +32 -0
- package/dist/analysis/constant-propagation/evaluator.js +296 -0
- package/dist/analysis/constant-propagation/evaluator.js.map +1 -0
- package/dist/analysis/constant-propagation/index.d.ts +62 -0
- package/dist/analysis/constant-propagation/index.js +152 -0
- package/dist/analysis/constant-propagation/index.js.map +1 -0
- package/dist/analysis/constant-propagation/patterns.d.ts +8 -0
- package/dist/analysis/constant-propagation/patterns.js +126 -0
- package/dist/analysis/constant-propagation/patterns.js.map +1 -0
- package/dist/analysis/constant-propagation/propagator.d.ts +180 -0
- package/dist/analysis/constant-propagation/propagator.js +1985 -0
- package/dist/analysis/constant-propagation/propagator.js.map +1 -0
- package/dist/analysis/constant-propagation/types.d.ts +63 -0
- package/dist/analysis/constant-propagation/types.js +5 -0
- package/dist/analysis/constant-propagation/types.js.map +1 -0
- package/dist/analysis/constant-propagation.d.ts +9 -0
- package/dist/analysis/constant-propagation.js +18 -0
- package/dist/analysis/constant-propagation.js.map +1 -0
- package/dist/analysis/dependency-scanner.d.ts +79 -0
- package/dist/analysis/dependency-scanner.js +122 -0
- package/dist/analysis/dependency-scanner.js.map +1 -0
- package/dist/analysis/dfg-verifier.d.ts +116 -0
- package/dist/analysis/dfg-verifier.js +399 -0
- package/dist/analysis/dfg-verifier.js.map +1 -0
- package/dist/analysis/findings.d.ts +11 -0
- package/dist/analysis/findings.js +228 -0
- package/dist/analysis/findings.js.map +1 -0
- package/dist/analysis/index.d.ts +16 -0
- package/dist/analysis/index.js +18 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/interprocedural.d.ts +99 -0
- package/dist/analysis/interprocedural.js +526 -0
- package/dist/analysis/interprocedural.js.map +1 -0
- package/dist/analysis/path-finder.d.ts +133 -0
- package/dist/analysis/path-finder.js +354 -0
- package/dist/analysis/path-finder.js.map +1 -0
- package/dist/analysis/rules.d.ts +75 -0
- package/dist/analysis/rules.js +332 -0
- package/dist/analysis/rules.js.map +1 -0
- package/dist/analysis/semver.d.ts +27 -0
- package/dist/analysis/semver.js +127 -0
- package/dist/analysis/semver.js.map +1 -0
- package/dist/analysis/taint-matcher.d.ts +15 -0
- package/dist/analysis/taint-matcher.js +634 -0
- package/dist/analysis/taint-matcher.js.map +1 -0
- package/dist/analysis/taint-propagation.d.ts +67 -0
- package/dist/analysis/taint-propagation.js +298 -0
- package/dist/analysis/taint-propagation.js.map +1 -0
- package/dist/analysis/unresolved.d.ts +14 -0
- package/dist/analysis/unresolved.js +202 -0
- package/dist/analysis/unresolved.js.map +1 -0
- package/dist/analyzer.d.ts +43 -0
- package/dist/analyzer.js +1010 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/browser/circle-ir.js +16576 -0
- package/dist/browser.d.ts +38 -0
- package/dist/browser.js +38 -0
- package/dist/browser.js.map +1 -0
- package/dist/core/circle-ir-core.cjs +13626 -0
- package/dist/core/circle-ir-core.d.ts +59 -0
- package/dist/core/circle-ir-core.js +13591 -0
- package/dist/core/extractors/calls.d.ts +13 -0
- package/dist/core/extractors/calls.js +1429 -0
- package/dist/core/extractors/calls.js.map +1 -0
- package/dist/core/extractors/cfg.d.ts +9 -0
- package/dist/core/extractors/cfg.js +519 -0
- package/dist/core/extractors/cfg.js.map +1 -0
- package/dist/core/extractors/dfg.d.ts +12 -0
- package/dist/core/extractors/dfg.js +1081 -0
- package/dist/core/extractors/dfg.js.map +1 -0
- package/dist/core/extractors/exports.d.ts +14 -0
- package/dist/core/extractors/exports.js +80 -0
- package/dist/core/extractors/exports.js.map +1 -0
- package/dist/core/extractors/imports.d.ts +9 -0
- package/dist/core/extractors/imports.js +739 -0
- package/dist/core/extractors/imports.js.map +1 -0
- package/dist/core/extractors/index.d.ts +10 -0
- package/dist/core/extractors/index.js +11 -0
- package/dist/core/extractors/index.js.map +1 -0
- package/dist/core/extractors/meta.d.ts +10 -0
- package/dist/core/extractors/meta.js +109 -0
- package/dist/core/extractors/meta.js.map +1 -0
- package/dist/core/extractors/types.d.ts +10 -0
- package/dist/core/extractors/types.js +1479 -0
- package/dist/core/extractors/types.js.map +1 -0
- package/dist/core/index.d.ts +5 -0
- package/dist/core/index.js +8 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/parser.d.ts +84 -0
- package/dist/core/parser.js +250 -0
- package/dist/core/parser.js.map +1 -0
- package/dist/core-lib.d.ts +59 -0
- package/dist/core-lib.js +62 -0
- package/dist/core-lib.js.map +1 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/languages/index.d.ts +11 -0
- package/dist/languages/index.js +14 -0
- package/dist/languages/index.js.map +1 -0
- package/dist/languages/plugins/base.d.ts +44 -0
- package/dist/languages/plugins/base.js +82 -0
- package/dist/languages/plugins/base.js.map +1 -0
- package/dist/languages/plugins/index.d.ts +14 -0
- package/dist/languages/plugins/index.js +25 -0
- package/dist/languages/plugins/index.js.map +1 -0
- package/dist/languages/plugins/java.d.ts +49 -0
- package/dist/languages/plugins/java.js +402 -0
- package/dist/languages/plugins/java.js.map +1 -0
- package/dist/languages/plugins/javascript.d.ts +48 -0
- package/dist/languages/plugins/javascript.js +445 -0
- package/dist/languages/plugins/javascript.js.map +1 -0
- package/dist/languages/plugins/python.d.ts +47 -0
- package/dist/languages/plugins/python.js +480 -0
- package/dist/languages/plugins/python.js.map +1 -0
- package/dist/languages/plugins/rust.d.ts +47 -0
- package/dist/languages/plugins/rust.js +405 -0
- package/dist/languages/plugins/rust.js.map +1 -0
- package/dist/languages/registry.d.ts +30 -0
- package/dist/languages/registry.js +80 -0
- package/dist/languages/registry.js.map +1 -0
- package/dist/languages/types.d.ts +184 -0
- package/dist/languages/types.js +8 -0
- package/dist/languages/types.js.map +1 -0
- package/dist/resolution/cross-file.d.ts +146 -0
- package/dist/resolution/cross-file.js +439 -0
- package/dist/resolution/cross-file.js.map +1 -0
- package/dist/resolution/index.d.ts +12 -0
- package/dist/resolution/index.js +10 -0
- package/dist/resolution/index.js.map +1 -0
- package/dist/resolution/symbol-table.d.ts +136 -0
- package/dist/resolution/symbol-table.js +336 -0
- package/dist/resolution/symbol-table.js.map +1 -0
- package/dist/resolution/type-hierarchy.d.ts +124 -0
- package/dist/resolution/type-hierarchy.js +515 -0
- package/dist/resolution/type-hierarchy.js.map +1 -0
- package/dist/types/config.d.ts +45 -0
- package/dist/types/config.js +5 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +392 -0
- package/dist/types/index.js +7 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/logger.d.ts +85 -0
- package/dist/utils/logger.js +198 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/wasm/tree-sitter-java.wasm +0 -0
- package/dist/wasm/tree-sitter-javascript.wasm +0 -0
- package/dist/wasm/tree-sitter-python.wasm +0 -0
- package/dist/wasm/tree-sitter-rust.wasm +0 -0
- package/dist/wasm/web-tree-sitter.wasm +0 -0
- package/docs/SPEC.md +1021 -0
- package/examples/browser-example.html +610 -0
- package/examples/node-example.ts +215 -0
- package/package.json +107 -0
- package/wasm/tree-sitter-java.wasm +0 -0
- package/wasm/tree-sitter-javascript.wasm +0 -0
- package/wasm/tree-sitter-python.wasm +0 -0
- package/wasm/tree-sitter-rust.wasm +0 -0
package/dist/analyzer.js
ADDED
|
@@ -0,0 +1,1010 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circle-IR Analyzer
|
|
3
|
+
*
|
|
4
|
+
* Main entry point for analyzing source code and producing Circle-IR output.
|
|
5
|
+
* This is the core analyzer - for LLM-enhanced analysis, use circle-ir-ai.
|
|
6
|
+
*/
|
|
7
|
+
import { initParser, parse, extractMeta, extractTypes, extractCalls, extractImports, extractExports, buildCFG, buildDFG, collectAllNodes, } from './core/index.js';
|
|
8
|
+
import { analyzeTaint, getDefaultConfig, detectUnresolved, propagateTaint, analyzeInterprocedural, findTaintBridges, analyzeConstantPropagation, isFalsePositive, isCorrelatedPredicateFP } from './analysis/index.js';
|
|
9
|
+
import { registerBuiltinPlugins } from './languages/index.js';
|
|
10
|
+
import { logger } from './utils/logger.js';
|
|
11
|
+
/**
|
|
12
|
+
* Find getter methods that return tainted fields from constructor assignments.
|
|
13
|
+
* This enables detection of taint through: constructor param → field → getter return.
|
|
14
|
+
*/
|
|
15
|
+
function findGetterSources(types, instanceFieldTaint, sourceCode) {
|
|
16
|
+
const sources = [];
|
|
17
|
+
if (instanceFieldTaint.size === 0) {
|
|
18
|
+
return sources;
|
|
19
|
+
}
|
|
20
|
+
// Iterate through all classes and methods
|
|
21
|
+
for (const type of types) {
|
|
22
|
+
for (const method of type.methods) {
|
|
23
|
+
// Look for getter pattern: getXxx() returning a field
|
|
24
|
+
const methodName = method.name;
|
|
25
|
+
// Check for getter naming convention: getXxx, isXxx, or just xxx
|
|
26
|
+
let potentialFieldName = null;
|
|
27
|
+
if (methodName.startsWith('get') && methodName.length > 3) {
|
|
28
|
+
// getField -> field (lowercase first letter)
|
|
29
|
+
potentialFieldName = methodName.charAt(3).toLowerCase() + methodName.substring(4);
|
|
30
|
+
}
|
|
31
|
+
else if (methodName.startsWith('is') && methodName.length > 2) {
|
|
32
|
+
// isField -> field
|
|
33
|
+
potentialFieldName = methodName.charAt(2).toLowerCase() + methodName.substring(3);
|
|
34
|
+
}
|
|
35
|
+
// Check if the method body returns a tainted field
|
|
36
|
+
// Simple check: method has no parameters and returns a field that's tracked as tainted
|
|
37
|
+
if (method.parameters.length === 0) {
|
|
38
|
+
// Check both the potential field name from naming convention and exact match
|
|
39
|
+
const fieldsToCheck = potentialFieldName
|
|
40
|
+
? [potentialFieldName, methodName]
|
|
41
|
+
: [methodName];
|
|
42
|
+
for (const fieldName of fieldsToCheck) {
|
|
43
|
+
const fieldTaint = instanceFieldTaint.get(fieldName);
|
|
44
|
+
if (fieldTaint && fieldTaint.className === type.name) {
|
|
45
|
+
sources.push({
|
|
46
|
+
type: 'constructor_field',
|
|
47
|
+
location: `${type.name}.${methodName}() returns tainted field '${fieldName}' (from constructor param '${fieldTaint.sourceParam}')`,
|
|
48
|
+
severity: 'high',
|
|
49
|
+
line: method.start_line,
|
|
50
|
+
confidence: 0.95,
|
|
51
|
+
});
|
|
52
|
+
break; // Found a match, no need to check more fields
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// Also check for direct field name match (e.g., method name() returns this.name)
|
|
57
|
+
for (const [fieldName, fieldTaint] of instanceFieldTaint) {
|
|
58
|
+
if (fieldTaint.className === type.name) {
|
|
59
|
+
// Check if method name matches field name directly (common pattern)
|
|
60
|
+
if (methodName === fieldName && method.parameters.length === 0) {
|
|
61
|
+
// Avoid duplicates
|
|
62
|
+
const alreadyAdded = sources.some(s => s.location.includes(`${type.name}.${methodName}()`));
|
|
63
|
+
if (!alreadyAdded) {
|
|
64
|
+
sources.push({
|
|
65
|
+
type: 'constructor_field',
|
|
66
|
+
location: `${type.name}.${methodName}() returns tainted field '${fieldName}' (from constructor param '${fieldTaint.sourceParam}')`,
|
|
67
|
+
severity: 'high',
|
|
68
|
+
line: method.start_line,
|
|
69
|
+
confidence: 0.95,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return sources;
|
|
78
|
+
}
|
|
79
|
+
let initialized = false;
|
|
80
|
+
/**
|
|
81
|
+
* Initialize the analyzer. Must be called before analyze().
|
|
82
|
+
*/
|
|
83
|
+
export async function initAnalyzer(options = {}) {
|
|
84
|
+
if (initialized)
|
|
85
|
+
return;
|
|
86
|
+
// Register built-in language plugins
|
|
87
|
+
registerBuiltinPlugins();
|
|
88
|
+
await initParser({
|
|
89
|
+
wasmPath: options.wasmPath,
|
|
90
|
+
languagePaths: options.languagePaths,
|
|
91
|
+
});
|
|
92
|
+
initialized = true;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Build enriched metadata section from analysis results.
|
|
96
|
+
*/
|
|
97
|
+
function buildEnriched(types, _calls, sources, sinks) {
|
|
98
|
+
// Classify functions by role based on analysis
|
|
99
|
+
const functions = [];
|
|
100
|
+
for (const type of types) {
|
|
101
|
+
for (const method of type.methods) {
|
|
102
|
+
// Determine role based on annotations and naming
|
|
103
|
+
let role = 'utility';
|
|
104
|
+
let trustBoundary = 'internal';
|
|
105
|
+
// Check for controller annotations
|
|
106
|
+
if (method.annotations.some(a => a.includes('RequestMapping') ||
|
|
107
|
+
a.includes('GetMapping') ||
|
|
108
|
+
a.includes('PostMapping') ||
|
|
109
|
+
a.includes('RestController') ||
|
|
110
|
+
a.includes('Controller'))) {
|
|
111
|
+
role = 'controller';
|
|
112
|
+
trustBoundary = 'entry_point';
|
|
113
|
+
}
|
|
114
|
+
// Check for repository/DAO patterns
|
|
115
|
+
else if (type.name.toLowerCase().includes('repository') ||
|
|
116
|
+
type.name.toLowerCase().includes('dao') ||
|
|
117
|
+
method.annotations.some(a => a.includes('Repository'))) {
|
|
118
|
+
role = 'repository';
|
|
119
|
+
}
|
|
120
|
+
// Check for service patterns
|
|
121
|
+
else if (type.name.toLowerCase().includes('service') ||
|
|
122
|
+
method.annotations.some(a => a.includes('Service'))) {
|
|
123
|
+
role = 'service';
|
|
124
|
+
}
|
|
125
|
+
// Determine risk level
|
|
126
|
+
const hasSources = sources.some(s => s.method === method.name);
|
|
127
|
+
const hasSinks = sinks.some(s => s.method === method.name);
|
|
128
|
+
let risk = 'low';
|
|
129
|
+
if (hasSinks)
|
|
130
|
+
risk = 'high';
|
|
131
|
+
else if (hasSources)
|
|
132
|
+
risk = 'medium';
|
|
133
|
+
// Only include functions with meaningful roles
|
|
134
|
+
if (role !== 'utility' || risk !== 'low') {
|
|
135
|
+
functions.push({
|
|
136
|
+
method_name: `${type.name}.${method.name}`,
|
|
137
|
+
role,
|
|
138
|
+
risk,
|
|
139
|
+
trust_boundary: trustBoundary,
|
|
140
|
+
summary: `${role} method in ${type.name}`,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return {
|
|
146
|
+
functions: functions.length > 0 ? functions : undefined,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Analyze source code and produce Circle-IR output.
|
|
151
|
+
*/
|
|
152
|
+
export async function analyze(code, filePath, language, options = {}) {
|
|
153
|
+
if (!initialized) {
|
|
154
|
+
await initAnalyzer(options);
|
|
155
|
+
}
|
|
156
|
+
logger.debug('Analyzing file', { filePath, language, codeLength: code.length });
|
|
157
|
+
// Parse the code
|
|
158
|
+
const tree = await parse(code, language);
|
|
159
|
+
logger.trace('Parsed AST', { rootNodeType: tree.rootNode.type });
|
|
160
|
+
// Collect all node types in a single traversal for better performance
|
|
161
|
+
// Different languages have different AST node types
|
|
162
|
+
const isJavaScript = language === 'javascript' || language === 'typescript';
|
|
163
|
+
const isRust = language === 'rust';
|
|
164
|
+
const isPython = language === 'python';
|
|
165
|
+
let nodeTypesToCollect;
|
|
166
|
+
if (isRust) {
|
|
167
|
+
nodeTypesToCollect = new Set([
|
|
168
|
+
// Rust AST nodes
|
|
169
|
+
'call_expression',
|
|
170
|
+
'macro_invocation',
|
|
171
|
+
'function_item',
|
|
172
|
+
'struct_item',
|
|
173
|
+
'impl_item',
|
|
174
|
+
'enum_item',
|
|
175
|
+
'trait_item',
|
|
176
|
+
'mod_item',
|
|
177
|
+
'use_declaration',
|
|
178
|
+
'let_declaration',
|
|
179
|
+
'field_expression',
|
|
180
|
+
'scoped_identifier',
|
|
181
|
+
]);
|
|
182
|
+
}
|
|
183
|
+
else if (isPython) {
|
|
184
|
+
nodeTypesToCollect = new Set([
|
|
185
|
+
// Python AST nodes
|
|
186
|
+
'call',
|
|
187
|
+
'function_definition',
|
|
188
|
+
'class_definition',
|
|
189
|
+
'import_statement',
|
|
190
|
+
'import_from_statement',
|
|
191
|
+
'assignment',
|
|
192
|
+
'attribute',
|
|
193
|
+
'subscript',
|
|
194
|
+
]);
|
|
195
|
+
}
|
|
196
|
+
else if (isJavaScript) {
|
|
197
|
+
nodeTypesToCollect = new Set([
|
|
198
|
+
// JavaScript/TypeScript AST nodes
|
|
199
|
+
'call_expression',
|
|
200
|
+
'new_expression',
|
|
201
|
+
'class_declaration',
|
|
202
|
+
'function_declaration',
|
|
203
|
+
'arrow_function',
|
|
204
|
+
'method_definition',
|
|
205
|
+
'variable_declaration',
|
|
206
|
+
'lexical_declaration',
|
|
207
|
+
'import_statement',
|
|
208
|
+
'export_statement',
|
|
209
|
+
'member_expression',
|
|
210
|
+
'assignment_expression',
|
|
211
|
+
]);
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
nodeTypesToCollect = new Set([
|
|
215
|
+
// Java AST nodes
|
|
216
|
+
'method_invocation',
|
|
217
|
+
'object_creation_expression',
|
|
218
|
+
'class_declaration',
|
|
219
|
+
'method_declaration',
|
|
220
|
+
'constructor_declaration',
|
|
221
|
+
'field_declaration',
|
|
222
|
+
'import_declaration',
|
|
223
|
+
'interface_declaration',
|
|
224
|
+
'enum_declaration',
|
|
225
|
+
]);
|
|
226
|
+
}
|
|
227
|
+
const nodeCache = collectAllNodes(tree.rootNode, nodeTypesToCollect);
|
|
228
|
+
// Extract all components using the cached nodes
|
|
229
|
+
const meta = extractMeta(code, tree, filePath, language);
|
|
230
|
+
const types = extractTypes(tree, nodeCache, language);
|
|
231
|
+
const calls = extractCalls(tree, nodeCache, language);
|
|
232
|
+
const imports = extractImports(tree, language);
|
|
233
|
+
const exports = extractExports(types);
|
|
234
|
+
const cfg = buildCFG(tree, language);
|
|
235
|
+
const dfg = buildDFG(tree, nodeCache, language);
|
|
236
|
+
// Extract @sanitizer annotated method names (from Javadoc comments)
|
|
237
|
+
const sanitizerMethods = [];
|
|
238
|
+
for (const type of types) {
|
|
239
|
+
for (const method of type.methods) {
|
|
240
|
+
if (method.annotations.includes('sanitizer')) {
|
|
241
|
+
sanitizerMethods.push(method.name);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
// First, do a preliminary taint analysis to find inter-procedural parameter sources
|
|
246
|
+
// These need to be passed to constant propagation so it can track taint from method parameters
|
|
247
|
+
const baseConfig = options.taintConfig ?? getDefaultConfig();
|
|
248
|
+
const preliminaryTaint = analyzeTaint(calls, types, baseConfig);
|
|
249
|
+
// Extract inter-procedural parameter sources
|
|
250
|
+
const taintedParameters = [];
|
|
251
|
+
for (const source of preliminaryTaint.sources) {
|
|
252
|
+
if (source.type === 'interprocedural_param') {
|
|
253
|
+
// Location format: "ParamType paramName in methodName"
|
|
254
|
+
const match = source.location.match(/(\S+)\s+(\S+)\s+in\s+(\S+)/);
|
|
255
|
+
if (match) {
|
|
256
|
+
taintedParameters.push({
|
|
257
|
+
methodName: match[3],
|
|
258
|
+
paramName: match[2],
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Run constant propagation with tainted parameters
|
|
264
|
+
const constPropResult = analyzeConstantPropagation(tree, code, {
|
|
265
|
+
sanitizerMethods,
|
|
266
|
+
taintedParameters,
|
|
267
|
+
});
|
|
268
|
+
// Analyze taint with config
|
|
269
|
+
const taint = analyzeTaint(calls, types, baseConfig);
|
|
270
|
+
// Add sources for getters that return tainted constructor fields
|
|
271
|
+
const getterSources = findGetterSources(types, constPropResult.instanceFieldTaint, code);
|
|
272
|
+
taint.sources.push(...getterSources);
|
|
273
|
+
logger.debug('Initial taint analysis', {
|
|
274
|
+
sources: taint.sources.length,
|
|
275
|
+
sinks: taint.sinks.length,
|
|
276
|
+
sanitizers: taint.sanitizers?.length ?? 0,
|
|
277
|
+
getterSources: getterSources.length,
|
|
278
|
+
});
|
|
279
|
+
// Filter sinks that are in dead code (unreachable)
|
|
280
|
+
taint.sinks = taint.sinks.filter(sink => !constPropResult.unreachableLines.has(sink.line));
|
|
281
|
+
// Filter sinks that use clean array elements (strong updates)
|
|
282
|
+
taint.sinks = filterCleanArraySinks(taint.sinks, calls, constPropResult.taintedArrayElements, constPropResult.symbols);
|
|
283
|
+
// Filter sinks that use variables proven clean by constant propagation (strong updates)
|
|
284
|
+
taint.sinks = filterCleanVariableSinks(taint.sinks, calls, constPropResult.tainted, constPropResult.symbols, dfg, constPropResult.sanitizedVars, constPropResult.synchronizedLines);
|
|
285
|
+
// Filter sinks that are wrapped by sanitizers on the same line
|
|
286
|
+
taint.sinks = filterSanitizedSinks(taint.sinks, taint.sanitizers ?? [], calls);
|
|
287
|
+
// Propagate taint through dataflow to find verified flows
|
|
288
|
+
if (taint.sources.length > 0 && taint.sinks.length > 0) {
|
|
289
|
+
const propagationResult = propagateTaint(dfg, calls, taint.sources, taint.sinks, taint.sanitizers ?? []);
|
|
290
|
+
// Filter flows using constant propagation (eliminate false positives)
|
|
291
|
+
const verifiedFlows = propagationResult.flows.filter(flow => {
|
|
292
|
+
// Check if the sink line is in dead code
|
|
293
|
+
if (constPropResult.unreachableLines.has(flow.sink.line)) {
|
|
294
|
+
return false;
|
|
295
|
+
}
|
|
296
|
+
// Check each step in the path - if any variable has a constant value, skip
|
|
297
|
+
for (const step of flow.path) {
|
|
298
|
+
const fpCheck = isFalsePositive(constPropResult, step.line, step.variable);
|
|
299
|
+
if (fpCheck.isFalsePositive) {
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
// Check for correlated predicates: if the sink is under condition !C
|
|
304
|
+
// and the taint was added under condition C, they're mutually exclusive
|
|
305
|
+
if (isCorrelatedPredicateFP(constPropResult, flow)) {
|
|
306
|
+
return false;
|
|
307
|
+
}
|
|
308
|
+
return true;
|
|
309
|
+
});
|
|
310
|
+
// Convert flows to TaintFlowInfo format
|
|
311
|
+
taint.flows = verifiedFlows.map(flow => ({
|
|
312
|
+
source_line: flow.source.line,
|
|
313
|
+
sink_line: flow.sink.line,
|
|
314
|
+
source_type: flow.source.type,
|
|
315
|
+
sink_type: flow.sink.type,
|
|
316
|
+
path: flow.path.map(step => ({
|
|
317
|
+
variable: step.variable,
|
|
318
|
+
line: step.line,
|
|
319
|
+
type: step.type,
|
|
320
|
+
})),
|
|
321
|
+
confidence: flow.confidence,
|
|
322
|
+
sanitized: flow.sanitized,
|
|
323
|
+
}));
|
|
324
|
+
// Add array element flows that DFG-based analysis might miss
|
|
325
|
+
const arrayFlows = detectArrayElementFlows(calls, taint.sources, taint.sinks, constPropResult.taintedArrayElements, constPropResult.unreachableLines);
|
|
326
|
+
if (arrayFlows && arrayFlows.length > 0) {
|
|
327
|
+
if (!taint.flows) {
|
|
328
|
+
taint.flows = [];
|
|
329
|
+
}
|
|
330
|
+
for (const flow of arrayFlows) {
|
|
331
|
+
// Avoid duplicates
|
|
332
|
+
if (!taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
|
|
333
|
+
taint.flows.push(flow);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
// Add collection/iterator flows that DFG-based analysis might miss
|
|
338
|
+
const collectionFlows = detectCollectionFlows(calls, taint.sources, taint.sinks, constPropResult.tainted, constPropResult.unreachableLines);
|
|
339
|
+
if (collectionFlows && collectionFlows.length > 0) {
|
|
340
|
+
if (!taint.flows) {
|
|
341
|
+
taint.flows = [];
|
|
342
|
+
}
|
|
343
|
+
for (const flow of collectionFlows) {
|
|
344
|
+
// Avoid duplicates
|
|
345
|
+
if (taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
// Apply the same filtering as DFG-based flows
|
|
349
|
+
const flowForCheck = {
|
|
350
|
+
source: { line: flow.source_line, type: flow.source_type },
|
|
351
|
+
sink: { line: flow.sink_line, type: flow.sink_type },
|
|
352
|
+
path: flow.path.map(p => ({ variable: p.variable, line: p.line })),
|
|
353
|
+
};
|
|
354
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
355
|
+
if (isCorrelatedPredicateFP(constPropResult, flowForCheck)) {
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
// Check if any step in the path is a false positive
|
|
359
|
+
let isFP = false;
|
|
360
|
+
for (const step of flow.path) {
|
|
361
|
+
const fpCheck = isFalsePositive(constPropResult, step.line, step.variable);
|
|
362
|
+
if (fpCheck.isFalsePositive) {
|
|
363
|
+
isFP = true;
|
|
364
|
+
break;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
if (isFP) {
|
|
368
|
+
continue;
|
|
369
|
+
}
|
|
370
|
+
taint.flows.push(flow);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
// Add direct parameter-to-sink flows that DFG might miss
|
|
374
|
+
const paramFlows = detectParameterSinkFlows(types, calls, taint.sources, taint.sinks, constPropResult.unreachableLines);
|
|
375
|
+
if (paramFlows && paramFlows.length > 0) {
|
|
376
|
+
if (!taint.flows) {
|
|
377
|
+
taint.flows = [];
|
|
378
|
+
}
|
|
379
|
+
for (const flow of paramFlows) {
|
|
380
|
+
// Avoid duplicates
|
|
381
|
+
if (!taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
|
|
382
|
+
taint.flows.push(flow);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
// Perform inter-procedural analysis
|
|
387
|
+
const interProc = analyzeInterprocedural(types, calls, dfg, taint.sources, taint.sinks, taint.sanitizers ?? [], {
|
|
388
|
+
taintedVariables: constPropResult.tainted,
|
|
389
|
+
});
|
|
390
|
+
// Add inter-procedural sinks to the taint sinks
|
|
391
|
+
for (const sink of interProc.propagatedSinks) {
|
|
392
|
+
if (!taint.sinks.some(s => s.line === sink.line)) {
|
|
393
|
+
taint.sinks.push(sink);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
// Build inter-procedural info
|
|
397
|
+
const taintBridges = findTaintBridges(interProc);
|
|
398
|
+
taint.interprocedural = {
|
|
399
|
+
tainted_methods: Array.from(interProc.taintedMethods),
|
|
400
|
+
taint_bridges: taintBridges,
|
|
401
|
+
method_flows: interProc.callEdges
|
|
402
|
+
.filter(edge => interProc.taintedMethods.has(edge.calleeMethod))
|
|
403
|
+
.map(edge => ({
|
|
404
|
+
caller: edge.callerMethod,
|
|
405
|
+
callee: edge.calleeMethod,
|
|
406
|
+
call_line: edge.callLine,
|
|
407
|
+
tainted_args: edge.taintedArgs,
|
|
408
|
+
returns_taint: interProc.taintedReturns.has(edge.calleeMethod),
|
|
409
|
+
})),
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
// Perform inter-procedural analysis even when no initial sinks (can detect external taint escapes)
|
|
413
|
+
if (taint.sources.length > 0 && taint.sinks.length === 0) {
|
|
414
|
+
const interProc = analyzeInterprocedural(types, calls, dfg, taint.sources, [], // No initial sinks
|
|
415
|
+
taint.sanitizers ?? [], {
|
|
416
|
+
taintedVariables: constPropResult.tainted,
|
|
417
|
+
});
|
|
418
|
+
// Add inter-procedural sinks (e.g., external_taint_escape)
|
|
419
|
+
for (const sink of interProc.propagatedSinks) {
|
|
420
|
+
if (!constPropResult.unreachableLines.has(sink.line) &&
|
|
421
|
+
!taint.sinks.some(s => s.line === sink.line)) {
|
|
422
|
+
taint.sinks.push(sink);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
// Build inter-procedural info
|
|
426
|
+
if (interProc.taintedMethods.size > 0 || interProc.propagatedSinks.length > 0) {
|
|
427
|
+
const taintBridges = findTaintBridges(interProc);
|
|
428
|
+
taint.interprocedural = {
|
|
429
|
+
tainted_methods: Array.from(interProc.taintedMethods),
|
|
430
|
+
taint_bridges: taintBridges,
|
|
431
|
+
method_flows: interProc.callEdges
|
|
432
|
+
.filter(edge => interProc.taintedMethods.has(edge.calleeMethod))
|
|
433
|
+
.map(edge => ({
|
|
434
|
+
caller: edge.callerMethod,
|
|
435
|
+
callee: edge.calleeMethod,
|
|
436
|
+
call_line: edge.callLine,
|
|
437
|
+
tainted_args: edge.taintedArgs,
|
|
438
|
+
returns_taint: interProc.taintedReturns.has(edge.calleeMethod),
|
|
439
|
+
})),
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
// If we found new sinks, create flows from sources
|
|
443
|
+
if (taint.sinks.length > 0) {
|
|
444
|
+
taint.flows = taint.sinks.map(sink => ({
|
|
445
|
+
source_line: taint.sources[0].line,
|
|
446
|
+
sink_line: sink.line,
|
|
447
|
+
source_type: taint.sources[0].type,
|
|
448
|
+
sink_type: sink.type,
|
|
449
|
+
path: [
|
|
450
|
+
{ variable: 'input', line: taint.sources[0].line, type: 'source' },
|
|
451
|
+
{ variable: 'input', line: sink.line, type: 'sink' },
|
|
452
|
+
],
|
|
453
|
+
confidence: taint.sources[0].confidence * sink.confidence,
|
|
454
|
+
sanitized: false,
|
|
455
|
+
}));
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
// Detect unresolved items
|
|
459
|
+
const unresolved = detectUnresolved(calls, types, dfg);
|
|
460
|
+
// Build enriched section
|
|
461
|
+
const enriched = buildEnriched(types, calls, taint.sources, taint.sinks);
|
|
462
|
+
logger.debug('Analysis complete', {
|
|
463
|
+
filePath,
|
|
464
|
+
finalSources: taint.sources.length,
|
|
465
|
+
finalSinks: taint.sinks.length,
|
|
466
|
+
flows: taint.flows?.length ?? 0,
|
|
467
|
+
unresolvedItems: unresolved.length,
|
|
468
|
+
});
|
|
469
|
+
return {
|
|
470
|
+
meta,
|
|
471
|
+
types,
|
|
472
|
+
calls,
|
|
473
|
+
cfg,
|
|
474
|
+
dfg,
|
|
475
|
+
taint,
|
|
476
|
+
imports,
|
|
477
|
+
exports,
|
|
478
|
+
unresolved,
|
|
479
|
+
enriched,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
/**
|
|
483
|
+
* Analyze code and return a simplified API response format.
|
|
484
|
+
*/
|
|
485
|
+
export async function analyzeForAPI(code, filePath, language, options = {}) {
|
|
486
|
+
const startTime = performance.now();
|
|
487
|
+
if (!initialized) {
|
|
488
|
+
await initAnalyzer(options);
|
|
489
|
+
}
|
|
490
|
+
const parseStart = performance.now();
|
|
491
|
+
const tree = await parse(code, language);
|
|
492
|
+
const parseTime = performance.now() - parseStart;
|
|
493
|
+
const analysisStart = performance.now();
|
|
494
|
+
// Collect all node types in a single traversal for better performance
|
|
495
|
+
const isJavaScript = language === 'javascript' || language === 'typescript';
|
|
496
|
+
const isRust = language === 'rust';
|
|
497
|
+
const isPython = language === 'python';
|
|
498
|
+
let nodeTypesToCollect;
|
|
499
|
+
if (isRust) {
|
|
500
|
+
nodeTypesToCollect = new Set([
|
|
501
|
+
'call_expression', 'macro_invocation', 'function_item', 'struct_item',
|
|
502
|
+
'impl_item', 'enum_item', 'trait_item', 'mod_item', 'use_declaration',
|
|
503
|
+
'let_declaration', 'field_expression', 'scoped_identifier',
|
|
504
|
+
]);
|
|
505
|
+
}
|
|
506
|
+
else if (isPython) {
|
|
507
|
+
nodeTypesToCollect = new Set([
|
|
508
|
+
'call', 'function_definition', 'class_definition', 'import_statement',
|
|
509
|
+
'import_from_statement', 'assignment', 'attribute', 'subscript',
|
|
510
|
+
]);
|
|
511
|
+
}
|
|
512
|
+
else if (isJavaScript) {
|
|
513
|
+
nodeTypesToCollect = new Set([
|
|
514
|
+
'call_expression', 'new_expression', 'class_declaration', 'function_declaration',
|
|
515
|
+
'arrow_function', 'method_definition', 'variable_declaration', 'lexical_declaration',
|
|
516
|
+
'import_statement', 'export_statement',
|
|
517
|
+
]);
|
|
518
|
+
}
|
|
519
|
+
else {
|
|
520
|
+
nodeTypesToCollect = new Set([
|
|
521
|
+
'method_invocation', 'object_creation_expression', 'class_declaration',
|
|
522
|
+
'method_declaration', 'field_declaration', 'import_declaration',
|
|
523
|
+
'interface_declaration', 'enum_declaration',
|
|
524
|
+
]);
|
|
525
|
+
}
|
|
526
|
+
const nodeCache = collectAllNodes(tree.rootNode, nodeTypesToCollect);
|
|
527
|
+
const types = extractTypes(tree, nodeCache, language);
|
|
528
|
+
const calls = extractCalls(tree, nodeCache, language);
|
|
529
|
+
// Run constant propagation
|
|
530
|
+
const constPropResult = analyzeConstantPropagation(tree, code);
|
|
531
|
+
const config = options.taintConfig ?? getDefaultConfig();
|
|
532
|
+
const taint = analyzeTaint(calls, types, config);
|
|
533
|
+
// Filter sinks in dead code
|
|
534
|
+
const filteredSinks = taint.sinks.filter(sink => !constPropResult.unreachableLines.has(sink.line));
|
|
535
|
+
// Generate vulnerabilities from source-sink pairs
|
|
536
|
+
const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
|
|
537
|
+
const analysisTime = performance.now() - analysisStart;
|
|
538
|
+
const totalTime = performance.now() - startTime;
|
|
539
|
+
return {
|
|
540
|
+
success: true,
|
|
541
|
+
analysis: {
|
|
542
|
+
sources: taint.sources,
|
|
543
|
+
sinks: filteredSinks,
|
|
544
|
+
vulnerabilities,
|
|
545
|
+
},
|
|
546
|
+
meta: {
|
|
547
|
+
parseTimeMs: Math.round(parseTime),
|
|
548
|
+
analysisTimeMs: Math.round(analysisTime),
|
|
549
|
+
totalTimeMs: Math.round(totalTime),
|
|
550
|
+
},
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
/**
|
|
554
|
+
* Find potential vulnerabilities by matching sources to sinks.
|
|
555
|
+
*/
|
|
556
|
+
function findVulnerabilities(sources, sinks, calls, constPropResult) {
|
|
557
|
+
const vulnerabilities = [];
|
|
558
|
+
const sourceToSinkMapping = {
|
|
559
|
+
http_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'xpath_injection', 'ldap_injection', 'ssrf'],
|
|
560
|
+
http_body: ['sql_injection', 'command_injection', 'deserialization', 'xxe', 'xss', 'code_injection'],
|
|
561
|
+
http_header: ['sql_injection', 'xss', 'ssrf'],
|
|
562
|
+
http_cookie: ['sql_injection', 'xss'],
|
|
563
|
+
http_path: ['path_traversal', 'sql_injection', 'ssrf'],
|
|
564
|
+
http_query: ['sql_injection', 'command_injection', 'xss', 'ssrf'],
|
|
565
|
+
io_input: ['command_injection', 'path_traversal', 'deserialization', 'xxe', 'code_injection', 'xss'],
|
|
566
|
+
env_input: ['command_injection', 'path_traversal'],
|
|
567
|
+
db_input: ['xss', 'sql_injection'],
|
|
568
|
+
file_input: ['deserialization', 'xxe', 'path_traversal', 'command_injection', 'code_injection', 'xss'],
|
|
569
|
+
network_input: ['sql_injection', 'command_injection', 'xss', 'ssrf'],
|
|
570
|
+
config_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'ssrf'],
|
|
571
|
+
interprocedural_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'xpath_injection', 'ldap_injection', 'ssrf', 'code_injection'],
|
|
572
|
+
plugin_param: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'code_injection'],
|
|
573
|
+
constructor_field: ['sql_injection', 'command_injection', 'path_traversal', 'xss', 'xpath_injection', 'ldap_injection', 'ssrf', 'code_injection', 'deserialization', 'xxe'],
|
|
574
|
+
};
|
|
575
|
+
for (const source of sources) {
|
|
576
|
+
const potentialSinks = sourceToSinkMapping[source.type] ?? [];
|
|
577
|
+
for (const sink of sinks) {
|
|
578
|
+
if (potentialSinks.includes(sink.type)) {
|
|
579
|
+
// Check if we have constant propagation data to verify actual taint flow
|
|
580
|
+
if (calls && constPropResult) {
|
|
581
|
+
const sinkCall = calls.find(c => c.location.line === sink.line);
|
|
582
|
+
if (sinkCall) {
|
|
583
|
+
if (sink.type === 'sql_injection' && sinkCall.arguments.length > 0) {
|
|
584
|
+
const queryArg = sinkCall.arguments[0];
|
|
585
|
+
if (queryArg.variable) {
|
|
586
|
+
const isConstant = constPropResult.symbols.has(queryArg.variable) &&
|
|
587
|
+
constPropResult.symbols.get(queryArg.variable)?.type === 'string';
|
|
588
|
+
const isTainted = constPropResult.tainted.has(queryArg.variable);
|
|
589
|
+
if (isConstant && !isTainted) {
|
|
590
|
+
continue;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
if (queryArg.expression) {
|
|
594
|
+
const hasConcatenation = queryArg.expression.includes('+');
|
|
595
|
+
if (!hasConcatenation) {
|
|
596
|
+
const anyArgTainted = sinkCall.arguments.some(arg => arg.variable && constPropResult.tainted.has(arg.variable));
|
|
597
|
+
if (!anyArgTainted || !queryArg.expression?.includes('+')) {
|
|
598
|
+
const queryValue = constPropResult.symbols.get(queryArg.variable || '')?.value;
|
|
599
|
+
if (typeof queryValue === 'string' &&
|
|
600
|
+
(queryValue.includes('?') || queryValue.includes('$') || queryValue.includes(':'))) {
|
|
601
|
+
continue;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
const confidence = calculateVulnConfidence(source, sink);
|
|
610
|
+
vulnerabilities.push({
|
|
611
|
+
type: sink.type,
|
|
612
|
+
cwe: sink.cwe,
|
|
613
|
+
severity: sink.confidence > 0.9 ? 'critical' : 'high',
|
|
614
|
+
source: {
|
|
615
|
+
line: source.line,
|
|
616
|
+
type: source.type,
|
|
617
|
+
},
|
|
618
|
+
sink: {
|
|
619
|
+
line: sink.line,
|
|
620
|
+
type: sink.type,
|
|
621
|
+
},
|
|
622
|
+
confidence,
|
|
623
|
+
});
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
// Deduplicate vulnerabilities
|
|
628
|
+
const vulnMap = new Map();
|
|
629
|
+
for (const vuln of vulnerabilities) {
|
|
630
|
+
const key = `${vuln.source.line}:${vuln.sink.line}:${vuln.type}`;
|
|
631
|
+
const existing = vulnMap.get(key);
|
|
632
|
+
if (!existing || vuln.confidence > existing.confidence) {
|
|
633
|
+
vulnMap.set(key, vuln);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
const dedupedVulns = Array.from(vulnMap.values());
|
|
637
|
+
dedupedVulns.sort((a, b) => b.confidence - a.confidence);
|
|
638
|
+
return dedupedVulns;
|
|
639
|
+
}
|
|
640
|
+
function calculateVulnConfidence(source, sink) {
|
|
641
|
+
let confidence = 0.5;
|
|
642
|
+
const lineDiff = Math.abs(source.line - sink.line);
|
|
643
|
+
if (lineDiff < 10) {
|
|
644
|
+
confidence += 0.3;
|
|
645
|
+
}
|
|
646
|
+
else if (lineDiff < 50) {
|
|
647
|
+
confidence += 0.15;
|
|
648
|
+
}
|
|
649
|
+
if (source.severity === 'high') {
|
|
650
|
+
confidence += 0.1;
|
|
651
|
+
}
|
|
652
|
+
confidence = confidence * sink.confidence;
|
|
653
|
+
return Math.min(confidence, 1.0);
|
|
654
|
+
}
|
|
655
|
+
function evaluateSimpleExpression(expr, symbols) {
|
|
656
|
+
let evaluated = expr;
|
|
657
|
+
for (const [name, val] of symbols) {
|
|
658
|
+
if (val.type === 'int' || val.type === 'float') {
|
|
659
|
+
const regex = new RegExp(`\\b${name}\\b`, 'g');
|
|
660
|
+
evaluated = evaluated.replace(regex, String(val.value));
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
try {
|
|
664
|
+
if (/^[\d\s+\-*/().]+$/.test(evaluated)) {
|
|
665
|
+
const result = Function('"use strict"; return (' + evaluated + ')')();
|
|
666
|
+
if (typeof result === 'number' && !isNaN(result)) {
|
|
667
|
+
return String(Math.floor(result));
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
catch {
|
|
672
|
+
// Evaluation failed
|
|
673
|
+
}
|
|
674
|
+
return expr;
|
|
675
|
+
}
|
|
676
|
+
function filterCleanArraySinks(sinks, calls, taintedArrayElements, symbols) {
|
|
677
|
+
const callsByLine = new Map();
|
|
678
|
+
for (const call of calls) {
|
|
679
|
+
const existing = callsByLine.get(call.location.line) ?? [];
|
|
680
|
+
existing.push(call);
|
|
681
|
+
callsByLine.set(call.location.line, existing);
|
|
682
|
+
}
|
|
683
|
+
return sinks.filter(sink => {
|
|
684
|
+
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
685
|
+
for (const call of callsAtSink) {
|
|
686
|
+
for (const arg of call.arguments) {
|
|
687
|
+
const arrayAccessMatch = arg.expression?.match(/^(\w+)\[(\d+|[^[\]]+)\]$/);
|
|
688
|
+
if (arrayAccessMatch) {
|
|
689
|
+
const arrayName = arrayAccessMatch[1];
|
|
690
|
+
let indexStr = arrayAccessMatch[2];
|
|
691
|
+
indexStr = evaluateSimpleExpression(indexStr, symbols);
|
|
692
|
+
const taintedIndices = taintedArrayElements.get(arrayName);
|
|
693
|
+
if (taintedIndices !== undefined) {
|
|
694
|
+
const isTainted = taintedIndices.has(indexStr) || taintedIndices.has('*');
|
|
695
|
+
if (!isTainted) {
|
|
696
|
+
return false;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
return true;
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
function filterCleanVariableSinks(sinks, calls, taintedVars, symbols, dfg, sanitizedVars, synchronizedLines) {
|
|
706
|
+
const fieldNames = new Set();
|
|
707
|
+
if (dfg) {
|
|
708
|
+
for (const def of dfg.defs) {
|
|
709
|
+
if (def.kind === 'field') {
|
|
710
|
+
fieldNames.add(def.variable);
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
const callsByLine = new Map();
|
|
715
|
+
for (const call of calls) {
|
|
716
|
+
const existing = callsByLine.get(call.location.line) ?? [];
|
|
717
|
+
existing.push(call);
|
|
718
|
+
callsByLine.set(call.location.line, existing);
|
|
719
|
+
}
|
|
720
|
+
return sinks.filter(sink => {
|
|
721
|
+
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
722
|
+
const isInSynchronizedBlock = synchronizedLines?.has(sink.line) ?? false;
|
|
723
|
+
for (const call of callsAtSink) {
|
|
724
|
+
let allArgsAreClean = true;
|
|
725
|
+
const methodName = call.in_method;
|
|
726
|
+
for (const arg of call.arguments) {
|
|
727
|
+
if (arg.variable && !arg.expression?.includes('[')) {
|
|
728
|
+
const varName = arg.variable;
|
|
729
|
+
const scopedName = methodName ? `${methodName}:${varName}` : varName;
|
|
730
|
+
if (fieldNames.has(varName) && !isInSynchronizedBlock) {
|
|
731
|
+
allArgsAreClean = false;
|
|
732
|
+
continue;
|
|
733
|
+
}
|
|
734
|
+
if (sanitizedVars?.has(scopedName) || sanitizedVars?.has(varName)) {
|
|
735
|
+
continue;
|
|
736
|
+
}
|
|
737
|
+
if (taintedVars.has(scopedName) || taintedVars.has(varName)) {
|
|
738
|
+
allArgsAreClean = false;
|
|
739
|
+
continue;
|
|
740
|
+
}
|
|
741
|
+
const symbolValue = symbols.get(scopedName) ?? symbols.get(varName);
|
|
742
|
+
if (symbolValue && symbolValue.type !== 'unknown') {
|
|
743
|
+
continue;
|
|
744
|
+
}
|
|
745
|
+
allArgsAreClean = false;
|
|
746
|
+
}
|
|
747
|
+
else {
|
|
748
|
+
allArgsAreClean = false;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
if (allArgsAreClean && call.arguments.length > 0) {
|
|
752
|
+
return false;
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
return true;
|
|
756
|
+
});
|
|
757
|
+
}
|
|
758
|
+
function filterSanitizedSinks(sinks, sanitizers, calls) {
|
|
759
|
+
if (!sanitizers || sanitizers.length === 0) {
|
|
760
|
+
return sinks;
|
|
761
|
+
}
|
|
762
|
+
const sanitizersByLine = new Map();
|
|
763
|
+
for (const san of sanitizers) {
|
|
764
|
+
const existing = sanitizersByLine.get(san.line) ?? [];
|
|
765
|
+
existing.push(san);
|
|
766
|
+
sanitizersByLine.set(san.line, existing);
|
|
767
|
+
}
|
|
768
|
+
const callsByLine = new Map();
|
|
769
|
+
for (const call of calls) {
|
|
770
|
+
const existing = callsByLine.get(call.location.line) ?? [];
|
|
771
|
+
existing.push(call);
|
|
772
|
+
callsByLine.set(call.location.line, existing);
|
|
773
|
+
}
|
|
774
|
+
return sinks.filter(sink => {
|
|
775
|
+
const lineSanitizers = sanitizersByLine.get(sink.line);
|
|
776
|
+
if (!lineSanitizers || lineSanitizers.length === 0) {
|
|
777
|
+
return true;
|
|
778
|
+
}
|
|
779
|
+
for (const san of lineSanitizers) {
|
|
780
|
+
if (san.sanitizes.includes(sink.type)) {
|
|
781
|
+
const lineCalls = callsByLine.get(sink.line) ?? [];
|
|
782
|
+
for (const call of lineCalls) {
|
|
783
|
+
for (const arg of call.arguments) {
|
|
784
|
+
const expr = arg.expression || '';
|
|
785
|
+
const sanMethodMatch = san.method.match(/(?:(\w+)\.)?(\w+)\(\)/);
|
|
786
|
+
if (sanMethodMatch) {
|
|
787
|
+
const sanMethodName = sanMethodMatch[2];
|
|
788
|
+
const sanClassName = sanMethodMatch[1];
|
|
789
|
+
if (sanClassName) {
|
|
790
|
+
if (expr.includes(`${sanClassName}.${sanMethodName}(`)) {
|
|
791
|
+
return false;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
else if (expr.includes(`${sanMethodName}(`)) {
|
|
795
|
+
return false;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
return true;
|
|
803
|
+
});
|
|
804
|
+
}
|
|
805
|
+
function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLines) {
|
|
806
|
+
const flows = [];
|
|
807
|
+
const callsByLine = new Map();
|
|
808
|
+
for (const call of calls) {
|
|
809
|
+
const existing = callsByLine.get(call.location.line) ?? [];
|
|
810
|
+
existing.push(call);
|
|
811
|
+
callsByLine.set(call.location.line, existing);
|
|
812
|
+
}
|
|
813
|
+
for (const sink of sinks) {
|
|
814
|
+
if (unreachableLines.has(sink.line)) {
|
|
815
|
+
continue;
|
|
816
|
+
}
|
|
817
|
+
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
818
|
+
for (const call of callsAtSink) {
|
|
819
|
+
for (const arg of call.arguments) {
|
|
820
|
+
if (arg.variable) {
|
|
821
|
+
const varName = arg.variable;
|
|
822
|
+
const scopedName = call.in_method ? `${call.in_method}:${varName}` : varName;
|
|
823
|
+
if (taintedVars.has(varName) || taintedVars.has(scopedName)) {
|
|
824
|
+
const source = sources[0];
|
|
825
|
+
if (source) {
|
|
826
|
+
flows.push({
|
|
827
|
+
source_line: source.line,
|
|
828
|
+
sink_line: sink.line,
|
|
829
|
+
source_type: source.type,
|
|
830
|
+
sink_type: sink.type,
|
|
831
|
+
path: [
|
|
832
|
+
{ variable: varName, line: source.line, type: 'source' },
|
|
833
|
+
{ variable: varName, line: sink.line, type: 'sink' },
|
|
834
|
+
],
|
|
835
|
+
confidence: 0.8,
|
|
836
|
+
sanitized: false,
|
|
837
|
+
});
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
if (arg.expression) {
|
|
842
|
+
const expr = arg.expression;
|
|
843
|
+
const collectionMethods = ['getLast', 'getFirst', 'get', 'next', 'poll', 'peek', 'toArray'];
|
|
844
|
+
for (const method of collectionMethods) {
|
|
845
|
+
const methodPattern = new RegExp(`(\\w+)\\.${method}\\(`);
|
|
846
|
+
const match = expr.match(methodPattern);
|
|
847
|
+
if (match) {
|
|
848
|
+
const collectionVar = match[1];
|
|
849
|
+
const scopedCollection = call.in_method ? `${call.in_method}:${collectionVar}` : collectionVar;
|
|
850
|
+
if (taintedVars.has(collectionVar) || taintedVars.has(scopedCollection)) {
|
|
851
|
+
const source = sources[0];
|
|
852
|
+
if (source) {
|
|
853
|
+
flows.push({
|
|
854
|
+
source_line: source.line,
|
|
855
|
+
sink_line: sink.line,
|
|
856
|
+
source_type: source.type,
|
|
857
|
+
sink_type: sink.type,
|
|
858
|
+
path: [
|
|
859
|
+
{ variable: collectionVar, line: source.line, type: 'source' },
|
|
860
|
+
{ variable: collectionVar, line: sink.line, type: 'sink' },
|
|
861
|
+
],
|
|
862
|
+
confidence: 0.75,
|
|
863
|
+
sanitized: false,
|
|
864
|
+
});
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
return flows;
|
|
874
|
+
}
|
|
875
|
+
function detectArrayElementFlows(calls, sources, sinks, taintedArrayElements, unreachableLines) {
|
|
876
|
+
const flows = [];
|
|
877
|
+
const callsByLine = new Map();
|
|
878
|
+
for (const call of calls) {
|
|
879
|
+
const existing = callsByLine.get(call.location.line) ?? [];
|
|
880
|
+
existing.push(call);
|
|
881
|
+
callsByLine.set(call.location.line, existing);
|
|
882
|
+
}
|
|
883
|
+
for (const sink of sinks) {
|
|
884
|
+
if (unreachableLines.has(sink.line)) {
|
|
885
|
+
continue;
|
|
886
|
+
}
|
|
887
|
+
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
888
|
+
for (const call of callsAtSink) {
|
|
889
|
+
for (const arg of call.arguments) {
|
|
890
|
+
const arrayAccessMatch = arg.expression?.match(/^(\w+)\[(\d+|[^[\]]+)\]$/);
|
|
891
|
+
if (arrayAccessMatch) {
|
|
892
|
+
const arrayName = arrayAccessMatch[1];
|
|
893
|
+
const indexStr = arrayAccessMatch[2];
|
|
894
|
+
const taintedIndices = taintedArrayElements.get(arrayName);
|
|
895
|
+
if (taintedIndices) {
|
|
896
|
+
const isTainted = taintedIndices.has(indexStr) || taintedIndices.has('*');
|
|
897
|
+
if (isTainted) {
|
|
898
|
+
const source = sources[0];
|
|
899
|
+
if (source) {
|
|
900
|
+
flows.push({
|
|
901
|
+
source_line: source.line,
|
|
902
|
+
sink_line: sink.line,
|
|
903
|
+
source_type: source.type,
|
|
904
|
+
sink_type: sink.type,
|
|
905
|
+
path: [
|
|
906
|
+
{ variable: arrayName, line: source.line, type: 'source' },
|
|
907
|
+
{ variable: `${arrayName}[${indexStr}]`, line: sink.line, type: 'sink' },
|
|
908
|
+
],
|
|
909
|
+
confidence: 0.85,
|
|
910
|
+
sanitized: false,
|
|
911
|
+
});
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
return flows;
|
|
920
|
+
}
|
|
921
|
+
/**
|
|
922
|
+
* Detect direct method parameter to sink flows.
|
|
923
|
+
* This handles cases where a tainted method parameter is directly used in a sink
|
|
924
|
+
* without intermediate variable assignments (which DFG chains might miss).
|
|
925
|
+
*/
|
|
926
|
+
function detectParameterSinkFlows(types, calls, sources, sinks, unreachableLines) {
|
|
927
|
+
const flows = [];
|
|
928
|
+
// Build a map of method name -> parameter sources
|
|
929
|
+
const paramSourcesByMethod = new Map();
|
|
930
|
+
for (const source of sources) {
|
|
931
|
+
if (source.type === 'interprocedural_param') {
|
|
932
|
+
// Extract method and param name from location like "String paramName in methodName"
|
|
933
|
+
const match = source.location.match(/(\S+)\s+(\S+)\s+in\s+(\S+)/);
|
|
934
|
+
if (match) {
|
|
935
|
+
const paramName = match[2];
|
|
936
|
+
const methodName = match[3];
|
|
937
|
+
let methodParams = paramSourcesByMethod.get(methodName);
|
|
938
|
+
if (!methodParams) {
|
|
939
|
+
methodParams = new Map();
|
|
940
|
+
paramSourcesByMethod.set(methodName, methodParams);
|
|
941
|
+
}
|
|
942
|
+
methodParams.set(paramName, source);
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
if (paramSourcesByMethod.size === 0) {
|
|
947
|
+
return flows;
|
|
948
|
+
}
|
|
949
|
+
// Build map of calls by line
|
|
950
|
+
const callsByLine = new Map();
|
|
951
|
+
for (const call of calls) {
|
|
952
|
+
const existing = callsByLine.get(call.location.line) ?? [];
|
|
953
|
+
existing.push(call);
|
|
954
|
+
callsByLine.set(call.location.line, existing);
|
|
955
|
+
}
|
|
956
|
+
// For each sink, check if it uses a tainted parameter directly
|
|
957
|
+
for (const sink of sinks) {
|
|
958
|
+
if (unreachableLines.has(sink.line)) {
|
|
959
|
+
continue;
|
|
960
|
+
}
|
|
961
|
+
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
962
|
+
for (const call of callsAtSink) {
|
|
963
|
+
const methodName = call.in_method;
|
|
964
|
+
if (!methodName)
|
|
965
|
+
continue;
|
|
966
|
+
const methodParamSources = paramSourcesByMethod.get(methodName);
|
|
967
|
+
if (!methodParamSources)
|
|
968
|
+
continue;
|
|
969
|
+
// Check if any argument is a tainted parameter
|
|
970
|
+
for (const arg of call.arguments) {
|
|
971
|
+
if (arg.variable) {
|
|
972
|
+
const paramSource = methodParamSources.get(arg.variable);
|
|
973
|
+
if (paramSource) {
|
|
974
|
+
// Found a direct parameter-to-sink flow
|
|
975
|
+
// Check if we already have this flow
|
|
976
|
+
const exists = flows.some(f => f.source_line === paramSource.line && f.sink_line === sink.line);
|
|
977
|
+
if (!exists) {
|
|
978
|
+
flows.push({
|
|
979
|
+
source_line: paramSource.line,
|
|
980
|
+
sink_line: sink.line,
|
|
981
|
+
source_type: paramSource.type,
|
|
982
|
+
sink_type: sink.type,
|
|
983
|
+
path: [
|
|
984
|
+
{ variable: arg.variable, line: paramSource.line, type: 'source' },
|
|
985
|
+
{ variable: arg.variable, line: sink.line, type: 'sink' },
|
|
986
|
+
],
|
|
987
|
+
confidence: 0.75, // Lower confidence for interprocedural
|
|
988
|
+
sanitized: false,
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
return flows;
|
|
997
|
+
}
|
|
998
|
+
/**
|
|
999
|
+
* Check if the analyzer is initialized.
|
|
1000
|
+
*/
|
|
1001
|
+
export function isAnalyzerInitialized() {
|
|
1002
|
+
return initialized;
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Reset the analyzer (mainly for testing).
|
|
1006
|
+
*/
|
|
1007
|
+
export function resetAnalyzer() {
|
|
1008
|
+
initialized = false;
|
|
1009
|
+
}
|
|
1010
|
+
//# sourceMappingURL=analyzer.js.map
|