circle-ir 3.8.4 → 3.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -5
- package/dist/analysis/dfg-verifier.d.ts +3 -14
- package/dist/analysis/dfg-verifier.js +43 -74
- package/dist/analysis/dfg-verifier.js.map +1 -1
- package/dist/analysis/interprocedural.d.ts +5 -1
- package/dist/analysis/interprocedural.js +62 -60
- package/dist/analysis/interprocedural.js.map +1 -1
- package/dist/analysis/metrics/index.d.ts +2 -0
- package/dist/analysis/metrics/index.js +2 -0
- package/dist/analysis/metrics/index.js.map +1 -0
- package/dist/analysis/metrics/metric-pass.d.ts +27 -0
- package/dist/analysis/metrics/metric-pass.js +2 -0
- package/dist/analysis/metrics/metric-pass.js.map +1 -0
- package/dist/analysis/metrics/metric-runner.d.ts +21 -0
- package/dist/analysis/metrics/metric-runner.js +47 -0
- package/dist/analysis/metrics/metric-runner.js.map +1 -0
- package/dist/analysis/metrics/passes/cohesion-metrics-pass.d.ts +21 -0
- package/dist/analysis/metrics/passes/cohesion-metrics-pass.js +100 -0
- package/dist/analysis/metrics/passes/cohesion-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/complexity-metrics-pass.d.ts +15 -0
- package/dist/analysis/metrics/passes/complexity-metrics-pass.js +76 -0
- package/dist/analysis/metrics/passes/complexity-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/composite-metrics-pass.d.ts +17 -0
- package/dist/analysis/metrics/passes/composite-metrics-pass.js +77 -0
- package/dist/analysis/metrics/passes/composite-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/coupling-metrics-pass.d.ts +19 -0
- package/dist/analysis/metrics/passes/coupling-metrics-pass.js +94 -0
- package/dist/analysis/metrics/passes/coupling-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/data-flow-metrics-pass.d.ts +14 -0
- package/dist/analysis/metrics/passes/data-flow-metrics-pass.js +25 -0
- package/dist/analysis/metrics/passes/data-flow-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/documentation-metrics-pass.d.ts +15 -0
- package/dist/analysis/metrics/passes/documentation-metrics-pass.js +64 -0
- package/dist/analysis/metrics/passes/documentation-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/halstead-metrics-pass.d.ts +16 -0
- package/dist/analysis/metrics/passes/halstead-metrics-pass.js +95 -0
- package/dist/analysis/metrics/passes/halstead-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/inheritance-metrics-pass.d.ts +18 -0
- package/dist/analysis/metrics/passes/inheritance-metrics-pass.js +73 -0
- package/dist/analysis/metrics/passes/inheritance-metrics-pass.js.map +1 -0
- package/dist/analysis/metrics/passes/size-metrics-pass.d.ts +11 -0
- package/dist/analysis/metrics/passes/size-metrics-pass.js +64 -0
- package/dist/analysis/metrics/passes/size-metrics-pass.js.map +1 -0
- package/dist/analysis/passes/circular-dependency-pass.d.ts +18 -0
- package/dist/analysis/passes/circular-dependency-pass.js +39 -0
- package/dist/analysis/passes/circular-dependency-pass.js.map +1 -0
- package/dist/analysis/passes/constant-propagation-pass.d.ts +22 -0
- package/dist/analysis/passes/constant-propagation-pass.js +44 -0
- package/dist/analysis/passes/constant-propagation-pass.js.map +1 -0
- package/dist/analysis/passes/cross-file-pass.d.ts +27 -0
- package/dist/analysis/passes/cross-file-pass.js +102 -0
- package/dist/analysis/passes/cross-file-pass.js.map +1 -0
- package/dist/analysis/passes/dead-code-pass.d.ts +25 -0
- package/dist/analysis/passes/dead-code-pass.js +117 -0
- package/dist/analysis/passes/dead-code-pass.js.map +1 -0
- package/dist/analysis/passes/dependency-fan-out-pass.d.ts +19 -0
- package/dist/analysis/passes/dependency-fan-out-pass.js +35 -0
- package/dist/analysis/passes/dependency-fan-out-pass.js.map +1 -0
- package/dist/analysis/passes/interprocedural-pass.d.ts +29 -0
- package/dist/analysis/passes/interprocedural-pass.js +169 -0
- package/dist/analysis/passes/interprocedural-pass.js.map +1 -0
- package/dist/analysis/passes/language-sources-pass.d.ts +76 -0
- package/dist/analysis/passes/language-sources-pass.js +491 -0
- package/dist/analysis/passes/language-sources-pass.js.map +1 -0
- package/dist/analysis/passes/leaked-global-pass.d.ts +34 -0
- package/dist/analysis/passes/leaked-global-pass.js +108 -0
- package/dist/analysis/passes/leaked-global-pass.js.map +1 -0
- package/dist/analysis/passes/missing-await-pass.d.ts +29 -0
- package/dist/analysis/passes/missing-await-pass.js +90 -0
- package/dist/analysis/passes/missing-await-pass.js.map +1 -0
- package/dist/analysis/passes/missing-public-doc-pass.d.ts +35 -0
- package/dist/analysis/passes/missing-public-doc-pass.js +148 -0
- package/dist/analysis/passes/missing-public-doc-pass.js.map +1 -0
- package/dist/analysis/passes/n-plus-one-pass.d.ts +29 -0
- package/dist/analysis/passes/n-plus-one-pass.js +100 -0
- package/dist/analysis/passes/n-plus-one-pass.js.map +1 -0
- package/dist/analysis/passes/null-deref-pass.d.ts +32 -0
- package/dist/analysis/passes/null-deref-pass.js +130 -0
- package/dist/analysis/passes/null-deref-pass.js.map +1 -0
- package/dist/analysis/passes/orphan-module-pass.d.ts +21 -0
- package/dist/analysis/passes/orphan-module-pass.js +38 -0
- package/dist/analysis/passes/orphan-module-pass.js.map +1 -0
- package/dist/analysis/passes/resource-leak-pass.d.ts +43 -0
- package/dist/analysis/passes/resource-leak-pass.js +156 -0
- package/dist/analysis/passes/resource-leak-pass.js.map +1 -0
- package/dist/analysis/passes/sink-filter-pass.d.ts +39 -0
- package/dist/analysis/passes/sink-filter-pass.js +231 -0
- package/dist/analysis/passes/sink-filter-pass.js.map +1 -0
- package/dist/analysis/passes/stale-doc-ref-pass.d.ts +21 -0
- package/dist/analysis/passes/stale-doc-ref-pass.js +96 -0
- package/dist/analysis/passes/stale-doc-ref-pass.js.map +1 -0
- package/dist/analysis/passes/string-concat-loop-pass.d.ts +26 -0
- package/dist/analysis/passes/string-concat-loop-pass.js +87 -0
- package/dist/analysis/passes/string-concat-loop-pass.js.map +1 -0
- package/dist/analysis/passes/sync-io-async-pass.d.ts +28 -0
- package/dist/analysis/passes/sync-io-async-pass.js +80 -0
- package/dist/analysis/passes/sync-io-async-pass.js.map +1 -0
- package/dist/analysis/passes/taint-matcher-pass.d.ts +24 -0
- package/dist/analysis/passes/taint-matcher-pass.js +71 -0
- package/dist/analysis/passes/taint-matcher-pass.js.map +1 -0
- package/dist/analysis/passes/taint-propagation-pass.d.ts +22 -0
- package/dist/analysis/passes/taint-propagation-pass.js +266 -0
- package/dist/analysis/passes/taint-propagation-pass.js.map +1 -0
- package/dist/analysis/passes/todo-in-prod-pass.d.ts +28 -0
- package/dist/analysis/passes/todo-in-prod-pass.js +71 -0
- package/dist/analysis/passes/todo-in-prod-pass.js.map +1 -0
- package/dist/analysis/passes/unchecked-return-pass.d.ts +34 -0
- package/dist/analysis/passes/unchecked-return-pass.js +106 -0
- package/dist/analysis/passes/unchecked-return-pass.js.map +1 -0
- package/dist/analysis/passes/unused-variable-pass.d.ts +36 -0
- package/dist/analysis/passes/unused-variable-pass.js +150 -0
- package/dist/analysis/passes/unused-variable-pass.js.map +1 -0
- package/dist/analysis/passes/variable-shadowing-pass.d.ts +41 -0
- package/dist/analysis/passes/variable-shadowing-pass.js +211 -0
- package/dist/analysis/passes/variable-shadowing-pass.js.map +1 -0
- package/dist/analysis/path-finder.d.ts +3 -13
- package/dist/analysis/path-finder.js +48 -63
- package/dist/analysis/path-finder.js.map +1 -1
- package/dist/analysis/taint-matcher.js +8 -1
- package/dist/analysis/taint-matcher.js.map +1 -1
- package/dist/analysis/taint-propagation.d.ts +5 -1
- package/dist/analysis/taint-propagation.js +44 -41
- package/dist/analysis/taint-propagation.js.map +1 -1
- package/dist/analyzer.d.ts +42 -1
- package/dist/analyzer.js +234 -1476
- package/dist/analyzer.js.map +1 -1
- package/dist/browser/circle-ir.js +3413 -1271
- package/dist/core/circle-ir-core.cjs +360 -106
- package/dist/core/circle-ir-core.js +360 -106
- package/dist/core/extractors/imports.js +18 -0
- package/dist/core/extractors/imports.js.map +1 -1
- package/dist/graph/analysis-pass.d.ts +68 -0
- package/dist/graph/analysis-pass.js +51 -0
- package/dist/graph/analysis-pass.js.map +1 -0
- package/dist/graph/code-graph.d.ts +92 -0
- package/dist/graph/code-graph.js +262 -0
- package/dist/graph/code-graph.js.map +1 -0
- package/dist/graph/import-graph.d.ts +33 -0
- package/dist/graph/import-graph.js +170 -0
- package/dist/graph/import-graph.js.map +1 -0
- package/dist/graph/index.d.ts +4 -0
- package/dist/graph/index.js +5 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/graph/project-graph.d.ts +43 -0
- package/dist/graph/project-graph.js +80 -0
- package/dist/graph/project-graph.js.map +1 -0
- package/dist/graph/scope-graph.d.ts +63 -0
- package/dist/graph/scope-graph.js +89 -0
- package/dist/graph/scope-graph.js.map +1 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/resolution/cross-file.js +52 -19
- package/dist/resolution/cross-file.js.map +1 -1
- package/dist/types/index.d.ts +151 -0
- package/docs/SPEC.md +10 -6
- package/package.json +1 -1
package/dist/analyzer.js
CHANGED
|
@@ -3,532 +3,66 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Main entry point for analyzing source code and producing Circle-IR output.
|
|
5
5
|
* This is the core static analyzer. LLM-based verification and discovery are out of scope for this library.
|
|
6
|
+
*
|
|
7
|
+
* The analysis pipeline runs twenty-one sequential passes over a shared CodeGraph:
|
|
8
|
+
* 1. TaintMatcherPass — config-based source/sink extraction
|
|
9
|
+
* 2. ConstantPropagationPass — dead-code detection, symbol table, field taint
|
|
10
|
+
* 3. LanguageSourcesPass — language-specific sources/sinks (JS, Python, getters)
|
|
11
|
+
* 4. SinkFilterPass — four-stage false-positive elimination
|
|
12
|
+
* 5. TaintPropagationPass — DFG-based flow verification
|
|
13
|
+
* 6. InterproceduralPass — cross-method taint propagation
|
|
14
|
+
* 7. DeadCodePass — CFG blocks unreachable from entry (CWE-561)
|
|
15
|
+
* 8. MissingAwaitPass — unawaited async calls in JS/TS (CWE-252)
|
|
16
|
+
* 9. NPlusOnePass — DB/HTTP calls inside loop bodies (CWE-1049)
|
|
17
|
+
* 10. MissingPublicDocPass — public methods/types without doc comments
|
|
18
|
+
* 11. TodoInProdPass — TODO/FIXME/HACK markers in production code
|
|
19
|
+
* 12. StringConcatLoopPass — string += inside loops, O(n²) allocations (CWE-1046)
|
|
20
|
+
* 13. SyncIoAsyncPass — blocking *Sync calls inside async functions (CWE-1050)
|
|
21
|
+
* 14. UncheckedReturnPass — ignored boolean return from File.delete etc. (CWE-252)
|
|
22
|
+
* 15. NullDerefPass — null-assigned var dereferenced without guard (CWE-476)
|
|
23
|
+
* 16. ResourceLeakPass — stream/connection opened but never closed (CWE-772)
|
|
24
|
+
* 17. VariableShadowingPass — inner scope re-declares outer name (CWE-1109)
|
|
25
|
+
* 18. LeakedGlobalPass — assignment without declaration in JS/TS (CWE-1109)
|
|
26
|
+
* 19. UnusedVariablePass — local variable declared but value never read (CWE-561)
|
|
27
|
+
* 20. DependencyFanOutPass — module imports 20+ other modules (architecture smell)
|
|
28
|
+
* 21. StaleDocRefPass — doc comment references unknown symbol (CWE: none)
|
|
6
29
|
*/
|
|
7
30
|
import { initParser, parse, extractMeta, extractTypes, extractCalls, extractImports, extractExports, buildCFG, buildDFG, collectAllNodes, } from './core/index.js';
|
|
8
|
-
import { analyzeTaint, getDefaultConfig, detectUnresolved,
|
|
9
|
-
import { registerBuiltinPlugins
|
|
31
|
+
import { analyzeTaint, getDefaultConfig, detectUnresolved, analyzeConstantPropagation, isFalsePositive, } from './analysis/index.js';
|
|
32
|
+
import { registerBuiltinPlugins } from './languages/index.js';
|
|
10
33
|
import { logger } from './utils/logger.js';
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
const fieldTaint = instanceFieldTaint.get(fieldName);
|
|
44
|
-
if (fieldTaint && fieldTaint.className === type.name) {
|
|
45
|
-
sources.push({
|
|
46
|
-
type: 'constructor_field',
|
|
47
|
-
location: `${type.name}.${methodName}() returns tainted field '${fieldName}' (from constructor param '${fieldTaint.sourceParam}')`,
|
|
48
|
-
severity: 'high',
|
|
49
|
-
line: method.start_line,
|
|
50
|
-
confidence: 0.95,
|
|
51
|
-
});
|
|
52
|
-
break; // Found a match, no need to check more fields
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
// Also check for direct field name match (e.g., method name() returns this.name)
|
|
57
|
-
for (const [fieldName, fieldTaint] of instanceFieldTaint) {
|
|
58
|
-
if (fieldTaint.className === type.name) {
|
|
59
|
-
// Check if method name matches field name directly (common pattern)
|
|
60
|
-
if (methodName === fieldName && method.parameters.length === 0) {
|
|
61
|
-
// Avoid duplicates
|
|
62
|
-
const alreadyAdded = sources.some(s => s.location.includes(`${type.name}.${methodName}()`));
|
|
63
|
-
if (!alreadyAdded) {
|
|
64
|
-
sources.push({
|
|
65
|
-
type: 'constructor_field',
|
|
66
|
-
location: `${type.name}.${methodName}() returns tainted field '${fieldName}' (from constructor param '${fieldTaint.sourceParam}')`,
|
|
67
|
-
severity: 'high',
|
|
68
|
-
line: method.start_line,
|
|
69
|
-
confidence: 0.95,
|
|
70
|
-
});
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
return sources;
|
|
78
|
-
}
|
|
79
|
-
/**
|
|
80
|
-
* DOM XSS sink property patterns.
|
|
81
|
-
* Used to detect sinks in property assignments like: element.innerHTML = value
|
|
82
|
-
*/
|
|
83
|
-
const JS_DOM_XSS_SINKS = [
|
|
84
|
-
{ pattern: /\.innerHTML\s*=/, type: 'xss', cwe: 'CWE-79', severity: 'critical' },
|
|
85
|
-
{ pattern: /\.outerHTML\s*=/, type: 'xss', cwe: 'CWE-79', severity: 'critical' },
|
|
86
|
-
{ pattern: /document\.write\s*\(/, type: 'xss', cwe: 'CWE-79', severity: 'critical' },
|
|
87
|
-
{ pattern: /document\.writeln\s*\(/, type: 'xss', cwe: 'CWE-79', severity: 'critical' },
|
|
88
|
-
{ pattern: /\.insertAdjacentHTML\s*\(/, type: 'xss', cwe: 'CWE-79', severity: 'critical' },
|
|
89
|
-
{ pattern: /\.src\s*=/, type: 'xss', cwe: 'CWE-79', severity: 'high' },
|
|
90
|
-
{ pattern: /\.href\s*=/, type: 'xss', cwe: 'CWE-79', severity: 'high' },
|
|
91
|
-
];
|
|
92
|
-
/**
|
|
93
|
-
* Tainted JavaScript property access patterns.
|
|
94
|
-
* Used to detect sources in variable assignments like: var x = req.query.id
|
|
95
|
-
*/
|
|
96
|
-
const JS_TAINTED_PATTERNS = [
|
|
97
|
-
{ pattern: /\breq\.query\b/, type: 'http_param' },
|
|
98
|
-
{ pattern: /\breq\.params\b/, type: 'http_param' },
|
|
99
|
-
{ pattern: /\breq\.body\b/, type: 'http_body' },
|
|
100
|
-
{ pattern: /\breq\.headers\b/, type: 'http_header' },
|
|
101
|
-
{ pattern: /\breq\.cookies\b/, type: 'http_cookie' },
|
|
102
|
-
{ pattern: /\breq\.url\b/, type: 'http_path' },
|
|
103
|
-
{ pattern: /\breq\.path\b/, type: 'http_path' },
|
|
104
|
-
{ pattern: /\breq\.originalUrl\b/, type: 'http_path' },
|
|
105
|
-
{ pattern: /\breq\.files?\b/, type: 'file_input' },
|
|
106
|
-
{ pattern: /\brequest\.query\b/, type: 'http_param' },
|
|
107
|
-
{ pattern: /\brequest\.params\b/, type: 'http_param' },
|
|
108
|
-
{ pattern: /\brequest\.body\b/, type: 'http_body' },
|
|
109
|
-
{ pattern: /\brequest\.headers\b/, type: 'http_header' },
|
|
110
|
-
{ pattern: /\bctx\.query\b/, type: 'http_param' },
|
|
111
|
-
{ pattern: /\bctx\.params\b/, type: 'http_param' },
|
|
112
|
-
{ pattern: /\bctx\.request\b/, type: 'http_body' },
|
|
113
|
-
{ pattern: /\bprocess\.env\b/, type: 'env_input' },
|
|
114
|
-
{ pattern: /\bprocess\.argv\b/, type: 'io_input' },
|
|
115
|
-
{ pattern: /\blocation\.search\b/, type: 'http_param' },
|
|
116
|
-
{ pattern: /\blocation\.hash\b/, type: 'http_param' },
|
|
117
|
-
{ pattern: /\blocation\.href\b/, type: 'http_path' },
|
|
118
|
-
{ pattern: /\bdocument\.getElementById\b/, type: 'dom_input' },
|
|
119
|
-
{ pattern: /\bdocument\.querySelector\b/, type: 'dom_input' },
|
|
120
|
-
{ pattern: /\.value\b/, type: 'dom_input' },
|
|
121
|
-
];
|
|
122
|
-
/**
|
|
123
|
-
* Python/Flask/Django tainted request access patterns.
|
|
124
|
-
* Used to detect sources in assignments like: user_id = request.args.get('id')
|
|
125
|
-
* Also covers subscript access: user_id = request.args['id']
|
|
126
|
-
*/
|
|
127
|
-
const PYTHON_TAINTED_PATTERNS = [
|
|
128
|
-
{ pattern: /\brequest\.args\b/, type: 'http_param' },
|
|
129
|
-
{ pattern: /\brequest\.form\b/, type: 'http_body' },
|
|
130
|
-
{ pattern: /\brequest\.json\b/, type: 'http_body' },
|
|
131
|
-
{ pattern: /\brequest\.data\b/, type: 'http_body' },
|
|
132
|
-
{ pattern: /\brequest\.files?\b/, type: 'file_input' },
|
|
133
|
-
{ pattern: /\brequest\.headers?\b/, type: 'http_header' },
|
|
134
|
-
{ pattern: /\brequest\.cookies\b/, type: 'http_cookie' },
|
|
135
|
-
{ pattern: /\brequest\.GET\b/, type: 'http_param' },
|
|
136
|
-
{ pattern: /\brequest\.POST\b/, type: 'http_body' },
|
|
137
|
-
{ pattern: /\brequest\.META\b/, type: 'http_header' },
|
|
138
|
-
{ pattern: /\brequest\.FILES\b/, type: 'file_input' },
|
|
139
|
-
{ pattern: /\brequest\.query_params\b/, type: 'http_param' },
|
|
140
|
-
{ pattern: /\brequest\.path_params\b/, type: 'http_param' },
|
|
141
|
-
// Flask raw query/body strings
|
|
142
|
-
{ pattern: /\brequest\.query_string\b/, type: 'http_param' },
|
|
143
|
-
{ pattern: /\brequest\.get_data\s*\(/, type: 'http_body' },
|
|
144
|
-
// Request wrapper helper methods (common in OWASP-style benchmarks and real wrappers)
|
|
145
|
-
{ pattern: /\bget_form_parameter\s*\(/, type: 'http_body' },
|
|
146
|
-
{ pattern: /\bget_query_parameter\s*\(/, type: 'http_param' },
|
|
147
|
-
{ pattern: /\bget_header_value\s*\(/, type: 'http_header' },
|
|
148
|
-
{ pattern: /\bget_cookie_value\s*\(/, type: 'http_cookie' },
|
|
149
|
-
];
|
|
150
|
-
/**
|
|
151
|
-
* Find JavaScript taint sources from variable assignments.
|
|
152
|
-
* Detects patterns like: var userId = req.query.id
|
|
153
|
-
*/
|
|
154
|
-
function findJavaScriptAssignmentSources(sourceCode, language) {
|
|
155
|
-
const sources = [];
|
|
156
|
-
// Only apply to JavaScript/TypeScript
|
|
157
|
-
if (!['javascript', 'typescript'].includes(language)) {
|
|
158
|
-
return sources;
|
|
159
|
-
}
|
|
160
|
-
const lines = sourceCode.split('\n');
|
|
161
|
-
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
162
|
-
const line = lines[lineNum];
|
|
163
|
-
const lineNumber = lineNum + 1;
|
|
164
|
-
// Look for variable assignments: var/let/const x = ...
|
|
165
|
-
// or simple assignments: x = ...
|
|
166
|
-
const assignmentMatch = line.match(/(?:(?:var|let|const)\s+)?(\w+)\s*=\s*(.+)/);
|
|
167
|
-
if (assignmentMatch) {
|
|
168
|
-
const varName = assignmentMatch[1];
|
|
169
|
-
const rhs = assignmentMatch[2];
|
|
170
|
-
// Check if RHS contains a tainted pattern
|
|
171
|
-
for (const { pattern, type } of JS_TAINTED_PATTERNS) {
|
|
172
|
-
if (pattern.test(rhs)) {
|
|
173
|
-
// Don't add duplicates
|
|
174
|
-
const alreadyExists = sources.some(s => s.line === lineNumber && s.type === type);
|
|
175
|
-
if (!alreadyExists) {
|
|
176
|
-
sources.push({
|
|
177
|
-
type,
|
|
178
|
-
location: `${varName} = ${rhs.trim().substring(0, 50)}${rhs.length > 50 ? '...' : ''}`,
|
|
179
|
-
severity: 'high',
|
|
180
|
-
line: lineNumber,
|
|
181
|
-
confidence: 1.0,
|
|
182
|
-
variable: varName,
|
|
183
|
-
});
|
|
184
|
-
}
|
|
185
|
-
break; // Found a match, no need to check more patterns
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
return sources;
|
|
191
|
-
}
|
|
192
|
-
/**
|
|
193
|
-
* Find Python taint sources from variable assignments and subscript access.
|
|
194
|
-
* Detects patterns like: user_id = request.args.get('id') or request.args['id']
|
|
195
|
-
*/
|
|
196
|
-
function findPythonAssignmentSources(sourceCode, language) {
|
|
197
|
-
const sources = [];
|
|
198
|
-
if (language !== 'python') {
|
|
199
|
-
return sources;
|
|
200
|
-
}
|
|
201
|
-
const lines = sourceCode.split('\n');
|
|
202
|
-
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
203
|
-
const line = lines[lineNum];
|
|
204
|
-
const lineNumber = lineNum + 1;
|
|
205
|
-
// Skip comment lines
|
|
206
|
-
if (line.trimStart().startsWith('#'))
|
|
207
|
-
continue;
|
|
208
|
-
// Look for assignments: x = ... or x: type = ...
|
|
209
|
-
const assignmentMatch = line.match(/^(\s*\w[\w.]*)\s*(?::\s*\w[\w\[\], .]*)?\s*=\s*(.+)/);
|
|
210
|
-
if (assignmentMatch) {
|
|
211
|
-
const rhs = assignmentMatch[2];
|
|
212
|
-
for (const { pattern, type } of PYTHON_TAINTED_PATTERNS) {
|
|
213
|
-
if (pattern.test(rhs)) {
|
|
214
|
-
const varMatch = line.match(/^\s*(\w+)\s*/);
|
|
215
|
-
const varName = varMatch ? varMatch[1] : 'unknown';
|
|
216
|
-
const alreadyExists = sources.some(s => s.line === lineNumber && s.type === type);
|
|
217
|
-
if (!alreadyExists) {
|
|
218
|
-
sources.push({
|
|
219
|
-
type,
|
|
220
|
-
location: `${varName} = ${rhs.trim().substring(0, 50)}${rhs.length > 50 ? '...' : ''}`,
|
|
221
|
-
severity: 'high',
|
|
222
|
-
line: lineNumber,
|
|
223
|
-
confidence: 0.95,
|
|
224
|
-
variable: varName,
|
|
225
|
-
});
|
|
226
|
-
}
|
|
227
|
-
break;
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
return sources;
|
|
233
|
-
}
|
|
234
|
-
/**
|
|
235
|
-
* Build a map of tainted variable names → source line via simple forward
|
|
236
|
-
* line-by-line taint propagation for Python.
|
|
237
|
-
*
|
|
238
|
-
* Seeds from PYTHON_TAINTED_PATTERNS; propagates through assignments where the
|
|
239
|
-
* RHS contains a tainted variable. Uses per-key container taint to distinguish
|
|
240
|
-
* map['tainted_key'] from map['safe_key'] and conf.get(s,tainted_k) vs conf.get(s,safe_k).
|
|
241
|
-
*/
|
|
242
|
-
function buildPythonTaintedVars(sourceCode) {
|
|
243
|
-
const tainted = new Map();
|
|
244
|
-
// Per-key container taint: "map['key']" or "conf['section']['key']" → line number
|
|
245
|
-
const containerTainted = new Map();
|
|
246
|
-
const lines = sourceCode.split('\n');
|
|
247
|
-
for (let i = 0; i < lines.length; i++) {
|
|
248
|
-
const line = lines[i];
|
|
249
|
-
if (line.trimStart().startsWith('#'))
|
|
250
|
-
continue;
|
|
251
|
-
// Subscript assignment: container['key'] = value
|
|
252
|
-
// Tracks taint per-key so map['keyA']='safe' and map['keyB']=param are distinguished.
|
|
253
|
-
const subscriptAssign = line.match(/^\s*(\w+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/);
|
|
254
|
-
if (subscriptAssign) {
|
|
255
|
-
const [, container, , key, rhs2] = subscriptAssign;
|
|
256
|
-
const isTaintedRhs = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(rhs2));
|
|
257
|
-
if (isTaintedRhs) {
|
|
258
|
-
containerTainted.set(`${container}['${key}']`, i + 1);
|
|
259
|
-
}
|
|
260
|
-
continue; // subscript assignments don't match simple variable regex below
|
|
261
|
-
}
|
|
262
|
-
// ConfigParser set: obj.set('section', 'key', value)
|
|
263
|
-
// Tracks per (section, key) so conf.get('s','keyA') and conf.get('s','keyB') are distinct.
|
|
264
|
-
const setCallMatch = line.match(/^\s*(\w+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/);
|
|
265
|
-
if (setCallMatch) {
|
|
266
|
-
const [, obj, , section, , key, rhs2] = setCallMatch;
|
|
267
|
-
const isTaintedRhs = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(rhs2));
|
|
268
|
-
if (isTaintedRhs) {
|
|
269
|
-
containerTainted.set(`${obj}['${section}']['${key}']`, i + 1);
|
|
270
|
-
}
|
|
271
|
-
continue;
|
|
272
|
-
}
|
|
273
|
-
// Augmented assignment: var += expr — taint if either side is tainted
|
|
274
|
-
const augAssign = line.match(/^\s*(\w+)\s*\+=\s*(.+)$/);
|
|
275
|
-
if (augAssign) {
|
|
276
|
-
const [, augLhs, augRhs] = augAssign;
|
|
277
|
-
const rhsTainted = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(augRhs));
|
|
278
|
-
if (rhsTainted || tainted.has(augLhs)) {
|
|
279
|
-
tainted.set(augLhs, tainted.get(augLhs) ?? (i + 1));
|
|
280
|
-
}
|
|
281
|
-
continue;
|
|
282
|
-
}
|
|
283
|
-
// For loop: for var in tainted_source — seed loop variable as tainted
|
|
284
|
-
const forLoopMatch = line.match(/^\s*for\s+(\w+)\s+in\s+(.+?)(?:\s*:\s*)?$/);
|
|
285
|
-
if (forLoopMatch) {
|
|
286
|
-
const [, iterVar, iterExpr] = forLoopMatch;
|
|
287
|
-
const isDirectSource = PYTHON_TAINTED_PATTERNS.some(p => p.pattern.test(iterExpr));
|
|
288
|
-
const isPropagated = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(iterExpr));
|
|
289
|
-
if (isDirectSource || isPropagated) {
|
|
290
|
-
tainted.set(iterVar, i + 1);
|
|
291
|
-
}
|
|
292
|
-
continue;
|
|
293
|
-
}
|
|
294
|
-
// Regular assignment: var = expr
|
|
295
|
-
const assignMatch = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
296
|
-
if (!assignMatch)
|
|
297
|
-
continue;
|
|
298
|
-
const [, lhs, rhs] = assignMatch;
|
|
299
|
-
const isDirectSource = PYTHON_TAINTED_PATTERNS.some(p => p.pattern.test(rhs));
|
|
300
|
-
let propagatedFrom;
|
|
301
|
-
// Per-key dict access: bar = container['key']
|
|
302
|
-
const dictAccessMatch = rhs.trim().match(/^(\w+)\[(['"])([^'"]+)\2\]$/);
|
|
303
|
-
if (dictAccessMatch) {
|
|
304
|
-
const [, container, , key] = dictAccessMatch;
|
|
305
|
-
if (containerTainted.has(`${container}['${key}']`)) {
|
|
306
|
-
propagatedFrom = `${container}['${key}']`;
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
// Per-key configparser get: bar = conf.get('section', 'key')
|
|
310
|
-
if (!propagatedFrom) {
|
|
311
|
-
const confGetMatch = rhs.trim().match(/^(\w+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/);
|
|
312
|
-
if (confGetMatch) {
|
|
313
|
-
const [, obj, , section, , key] = confGetMatch;
|
|
314
|
-
if (containerTainted.has(`${obj}['${section}']['${key}']`)) {
|
|
315
|
-
propagatedFrom = `${obj}['${section}']['${key}']`;
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
// Standard variable propagation (skip os.environ/os.getenv — safe env reads)
|
|
320
|
-
if (!propagatedFrom) {
|
|
321
|
-
const isSafeEnvRead = /\bos\.environ\.get\s*\(/.test(rhs) || /\bos\.getenv\s*\(/.test(rhs);
|
|
322
|
-
if (!isSafeEnvRead) {
|
|
323
|
-
propagatedFrom = [...tainted.keys()].find(v => new RegExp(`\\b${v}\\b`).test(rhs));
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
if (isDirectSource) {
|
|
327
|
-
tainted.set(lhs, i + 1);
|
|
328
|
-
}
|
|
329
|
-
else if (propagatedFrom !== undefined) {
|
|
330
|
-
tainted.set(lhs, i + 1);
|
|
331
|
-
}
|
|
332
|
-
else if (tainted.has(lhs)) {
|
|
333
|
-
// Variable overwritten — preserve taint for null-guard patterns like:
|
|
334
|
-
// if not param:
|
|
335
|
-
// param = ""
|
|
336
|
-
const prevNonBlank = lines.slice(0, i).reverse().find(l => l.trim() && !l.trimStart().startsWith('#'));
|
|
337
|
-
const isNullGuard = prevNonBlank !== undefined && (new RegExp(`^\\s*if\\s+not\\s+${lhs}\\s*:`).test(prevNonBlank) ||
|
|
338
|
-
new RegExp(`^\\s*if\\s+${lhs}\\s+is\\s+None\\s*:`).test(prevNonBlank));
|
|
339
|
-
if (!isNullGuard) {
|
|
340
|
-
tainted.delete(lhs);
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
return tainted;
|
|
345
|
-
}
|
|
346
|
-
/**
|
|
347
|
-
* Forward taint propagation for JavaScript/TypeScript.
|
|
348
|
-
* Tracks which local variables are tainted from HTTP request sources.
|
|
349
|
-
* Used to filter spurious XSS sinks where the argument is NOT actually tainted
|
|
350
|
-
* (e.g., res.send(stdout) where stdout is a callback param from exec(), not user input).
|
|
351
|
-
*/
|
|
352
|
-
function buildJavaScriptTaintedVars(sourceCode, language) {
|
|
353
|
-
if (!['javascript', 'typescript'].includes(language))
|
|
354
|
-
return new Map();
|
|
355
|
-
const tainted = new Map();
|
|
356
|
-
const lines = sourceCode.split('\n');
|
|
357
|
-
for (let i = 0; i < lines.length; i++) {
|
|
358
|
-
const line = lines[i];
|
|
359
|
-
// Skip comment lines
|
|
360
|
-
const trimmed = line.trimStart();
|
|
361
|
-
if (trimmed.startsWith('//') || trimmed.startsWith('*'))
|
|
362
|
-
continue;
|
|
363
|
-
// Match variable assignments: var/let/const x = rhs OR x = rhs
|
|
364
|
-
const assignMatch = line.match(/(?:(?:var|let|const)\s+)?(\w+)\s*=\s*(.+)/);
|
|
365
|
-
if (!assignMatch)
|
|
366
|
-
continue;
|
|
367
|
-
const [, lhs, rhs] = assignMatch;
|
|
368
|
-
// Skip keywords that look like assignments but aren't variable names
|
|
369
|
-
if (['if', 'while', 'for', 'return', 'true', 'false', 'null', 'undefined', 'case'].includes(lhs))
|
|
370
|
-
continue;
|
|
371
|
-
// Seed from direct source patterns (req.query.x, req.body, etc.)
|
|
372
|
-
const isDirectSource = JS_TAINTED_PATTERNS.some(p => p.pattern.test(rhs));
|
|
373
|
-
// Propagate from existing tainted variables
|
|
374
|
-
const isTaintedPropagation = tainted.size > 0 &&
|
|
375
|
-
[...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(rhs));
|
|
376
|
-
if (isDirectSource || isTaintedPropagation) {
|
|
377
|
-
tainted.set(lhs, i + 1);
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
return tainted;
|
|
381
|
-
}
|
|
382
|
-
/**
|
|
383
|
-
* Detect Python apostrophe-check sanitizer guards, e.g.:
|
|
384
|
-
* if "'" in bar:
|
|
385
|
-
* return # or raise / abort
|
|
386
|
-
* Returns the set of variable names that are guarded this way.
|
|
387
|
-
*/
|
|
388
|
-
function findPythonQuoteSanitizedVars(sourceCode) {
|
|
389
|
-
const sanitized = new Set();
|
|
390
|
-
const lines = sourceCode.split('\n');
|
|
391
|
-
for (let i = 0; i < lines.length - 1; i++) {
|
|
392
|
-
// Match any apostrophe/quote check: if "'" in var:, if '\'' in var:, if '"' in var:
|
|
393
|
-
// Uses full quoted-string pattern to handle Python's various literal forms.
|
|
394
|
-
const m = lines[i].match(/^\s*if\s+(?:'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*")\s+in\s+(\w+)\s*:/);
|
|
395
|
-
if (!m)
|
|
396
|
-
continue;
|
|
397
|
-
// Look ahead up to 5 lines for a return/raise/abort/continue/break
|
|
398
|
-
// The guard body may be multi-line (e.g. RESPONSE += (...) \n return).
|
|
399
|
-
// Stop early if we encounter a line at the same or lesser indentation as the if (block exit).
|
|
400
|
-
const ifIndent = (lines[i].match(/^(\s*)/) ?? ['', ''])[1].length;
|
|
401
|
-
let foundExit = false;
|
|
402
|
-
for (let j = i + 1; j <= Math.min(i + 5, lines.length - 1); j++) {
|
|
403
|
-
const jLine = lines[j] ?? '';
|
|
404
|
-
if (!jLine.trim())
|
|
405
|
-
continue; // skip blank lines
|
|
406
|
-
const jIndent = (jLine.match(/^(\s*)/) ?? ['', ''])[1].length;
|
|
407
|
-
if (jIndent <= ifIndent)
|
|
408
|
-
break; // left the if-block
|
|
409
|
-
if (/^(return|raise|abort|continue|break)\b/.test(jLine.trim())) {
|
|
410
|
-
foundExit = true;
|
|
411
|
-
break;
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
if (foundExit) {
|
|
415
|
-
sanitized.add(m[1]);
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
return sanitized;
|
|
419
|
-
}
|
|
420
|
-
/**
|
|
421
|
-
* Detect Python trust boundary violations:
|
|
422
|
-
* flask.session[key] = value (or session[key] = value)
|
|
423
|
-
* where key or value references a tainted variable.
|
|
424
|
-
*/
|
|
425
|
-
function findPythonTrustBoundaryViolations(sourceCode, language, taintedVars) {
|
|
426
|
-
if (language !== 'python' || taintedVars.size === 0)
|
|
427
|
-
return [];
|
|
428
|
-
const violations = [];
|
|
429
|
-
const lines = sourceCode.split('\n');
|
|
430
|
-
const SESSION_WRITE = /(?:flask\.)?session\[([^\]]+)\]\s*=\s*(.+)$/;
|
|
431
|
-
const taintedKeys = [...taintedVars.keys()];
|
|
432
|
-
const earliestSourceLine = Math.min(...[...taintedVars.values()]);
|
|
433
|
-
for (let i = 0; i < lines.length; i++) {
|
|
434
|
-
const line = lines[i];
|
|
435
|
-
if (line.trimStart().startsWith('#'))
|
|
436
|
-
continue;
|
|
437
|
-
const m = line.match(SESSION_WRITE);
|
|
438
|
-
if (!m)
|
|
439
|
-
continue;
|
|
440
|
-
const [, keyExpr, valueExpr] = m;
|
|
441
|
-
const keyTainted = taintedKeys.some(v => new RegExp(`\\b${v}\\b`).test(keyExpr));
|
|
442
|
-
const valueTainted = taintedKeys.some(v => new RegExp(`\\b${v}\\b`).test(valueExpr));
|
|
443
|
-
if (keyTainted || valueTainted) {
|
|
444
|
-
violations.push({ sourceLine: earliestSourceLine, sinkLine: i + 1 });
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
return violations;
|
|
448
|
-
}
|
|
449
|
-
/**
|
|
450
|
-
* Find Python XSS sinks in return/yield statements.
|
|
451
|
-
* Flask/Django routes often return HTML strings directly:
|
|
452
|
-
* return '<h1>' + user_input + '</h1>'
|
|
453
|
-
* return f'<html>{user_input}</html>'
|
|
454
|
-
* These are not call nodes so findSinks() never detects them.
|
|
455
|
-
*/
|
|
456
|
-
function findPythonReturnXSSSinks(sourceCode, language, taintedVars) {
|
|
457
|
-
if (language !== 'python' || taintedVars.size === 0)
|
|
458
|
-
return [];
|
|
459
|
-
const sinks = [];
|
|
460
|
-
const lines = sourceCode.split('\n');
|
|
461
|
-
const taintedKeys = [...taintedVars.keys()];
|
|
462
|
-
for (let i = 0; i < lines.length; i++) {
|
|
463
|
-
const line = lines[i];
|
|
464
|
-
if (line.trimStart().startsWith('#'))
|
|
465
|
-
continue;
|
|
466
|
-
// Match return/yield statements with string content
|
|
467
|
-
const returnMatch = line.match(/^\s*(?:return|yield)\s+(.+)$/);
|
|
468
|
-
if (!returnMatch)
|
|
469
|
-
continue;
|
|
470
|
-
const expr = returnMatch[1];
|
|
471
|
-
// Must contain a tainted variable
|
|
472
|
-
const hasTaintedVar = taintedKeys.some(v => new RegExp(`\\b${v}\\b`).test(expr));
|
|
473
|
-
if (!hasTaintedVar)
|
|
474
|
-
continue;
|
|
475
|
-
// Must look like HTML (contains '<', or is a string concatenation, or f-string with HTML)
|
|
476
|
-
const looksLikeHTML = expr.includes('<') || /['"]\s*\+/.test(expr) || /\+\s*['"]/.test(expr) || /f['"][^'"]*\{/.test(expr);
|
|
477
|
-
if (!looksLikeHTML)
|
|
478
|
-
continue;
|
|
479
|
-
sinks.push({ sinkLine: i + 1 });
|
|
480
|
-
}
|
|
481
|
-
return sinks;
|
|
482
|
-
}
|
|
483
|
-
/**
|
|
484
|
-
* Find DOM XSS sinks from property assignments in JavaScript.
|
|
485
|
-
* Detects patterns like: element.innerHTML = userInput
|
|
486
|
-
*/
|
|
487
|
-
function findJavaScriptDOMSinks(sourceCode, language) {
|
|
488
|
-
const sinks = [];
|
|
489
|
-
// Only apply to JavaScript/TypeScript
|
|
490
|
-
if (!['javascript', 'typescript'].includes(language)) {
|
|
491
|
-
return sinks;
|
|
492
|
-
}
|
|
493
|
-
const lines = sourceCode.split('\n');
|
|
494
|
-
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
495
|
-
const line = lines[lineNum];
|
|
496
|
-
const lineNumber = lineNum + 1;
|
|
497
|
-
// Check for DOM XSS sink patterns
|
|
498
|
-
for (const { pattern, type, cwe, severity } of JS_DOM_XSS_SINKS) {
|
|
499
|
-
if (pattern.test(line)) {
|
|
500
|
-
// Extract the method/property being assigned
|
|
501
|
-
let method = 'innerHTML';
|
|
502
|
-
if (line.includes('.outerHTML'))
|
|
503
|
-
method = 'outerHTML';
|
|
504
|
-
else if (line.includes('document.write('))
|
|
505
|
-
method = 'document.write';
|
|
506
|
-
else if (line.includes('document.writeln('))
|
|
507
|
-
method = 'document.writeln';
|
|
508
|
-
else if (line.includes('.insertAdjacentHTML'))
|
|
509
|
-
method = 'insertAdjacentHTML';
|
|
510
|
-
else if (line.includes('.src'))
|
|
511
|
-
method = 'src';
|
|
512
|
-
else if (line.includes('.href'))
|
|
513
|
-
method = 'href';
|
|
514
|
-
// Don't add duplicates
|
|
515
|
-
const alreadyExists = sinks.some(s => s.line === lineNumber && s.cwe === cwe);
|
|
516
|
-
if (!alreadyExists) {
|
|
517
|
-
sinks.push({
|
|
518
|
-
type,
|
|
519
|
-
cwe,
|
|
520
|
-
severity,
|
|
521
|
-
line: lineNumber,
|
|
522
|
-
location: line.trim().substring(0, 80),
|
|
523
|
-
method,
|
|
524
|
-
});
|
|
525
|
-
}
|
|
526
|
-
break;
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
return sinks;
|
|
531
|
-
}
|
|
34
|
+
import { CodeGraph, AnalysisPipeline, ProjectGraph } from './graph/index.js';
|
|
35
|
+
import { CrossFilePass } from './analysis/passes/cross-file-pass.js';
|
|
36
|
+
// Pass classes
|
|
37
|
+
import { TaintMatcherPass } from './analysis/passes/taint-matcher-pass.js';
|
|
38
|
+
import { ConstantPropagationPass } from './analysis/passes/constant-propagation-pass.js';
|
|
39
|
+
import { LanguageSourcesPass } from './analysis/passes/language-sources-pass.js';
|
|
40
|
+
import { SinkFilterPass, filterCleanVariableSinks, filterSanitizedSinks } from './analysis/passes/sink-filter-pass.js';
|
|
41
|
+
import { TaintPropagationPass } from './analysis/passes/taint-propagation-pass.js';
|
|
42
|
+
import { InterproceduralPass } from './analysis/passes/interprocedural-pass.js';
|
|
43
|
+
import { DeadCodePass } from './analysis/passes/dead-code-pass.js';
|
|
44
|
+
import { MissingAwaitPass } from './analysis/passes/missing-await-pass.js';
|
|
45
|
+
import { NPlusOnePass } from './analysis/passes/n-plus-one-pass.js';
|
|
46
|
+
import { MissingPublicDocPass } from './analysis/passes/missing-public-doc-pass.js';
|
|
47
|
+
import { TodoInProdPass } from './analysis/passes/todo-in-prod-pass.js';
|
|
48
|
+
import { StringConcatLoopPass } from './analysis/passes/string-concat-loop-pass.js';
|
|
49
|
+
import { SyncIoAsyncPass } from './analysis/passes/sync-io-async-pass.js';
|
|
50
|
+
import { UncheckedReturnPass } from './analysis/passes/unchecked-return-pass.js';
|
|
51
|
+
import { NullDerefPass } from './analysis/passes/null-deref-pass.js';
|
|
52
|
+
import { ResourceLeakPass } from './analysis/passes/resource-leak-pass.js';
|
|
53
|
+
import { VariableShadowingPass } from './analysis/passes/variable-shadowing-pass.js';
|
|
54
|
+
import { LeakedGlobalPass } from './analysis/passes/leaked-global-pass.js';
|
|
55
|
+
import { UnusedVariablePass } from './analysis/passes/unused-variable-pass.js';
|
|
56
|
+
import { DependencyFanOutPass } from './analysis/passes/dependency-fan-out-pass.js';
|
|
57
|
+
import { StaleDocRefPass } from './analysis/passes/stale-doc-ref-pass.js';
|
|
58
|
+
// Project-level pass imports
|
|
59
|
+
import { ImportGraph } from './graph/import-graph.js';
|
|
60
|
+
import { CircularDependencyPass } from './analysis/passes/circular-dependency-pass.js';
|
|
61
|
+
import { OrphanModulePass } from './analysis/passes/orphan-module-pass.js';
|
|
62
|
+
// Metrics
|
|
63
|
+
import { MetricRunner } from './analysis/metrics/index.js';
|
|
64
|
+
// Helpers used by analyzeForAPI
|
|
65
|
+
import { buildPythonTaintedVars, buildPythonSanitizedVars, findPythonTrustBoundaryViolations, } from './analysis/passes/language-sources-pass.js';
|
|
532
66
|
let initialized = false;
|
|
533
67
|
/**
|
|
534
68
|
* Initialize the analyzer. Must be called before analyze().
|
|
@@ -601,6 +135,45 @@ function buildEnriched(types, _calls, sources, sinks) {
|
|
|
601
135
|
functions: functions.length > 0 ? functions : undefined,
|
|
602
136
|
};
|
|
603
137
|
}
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
// Node type collection — shared by analyze() and analyzeForAPI()
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
function getNodeTypesForLanguage(language) {
|
|
142
|
+
switch (language) {
|
|
143
|
+
case 'rust':
|
|
144
|
+
return new Set([
|
|
145
|
+
'call_expression', 'macro_invocation', 'function_item', 'struct_item',
|
|
146
|
+
'impl_item', 'enum_item', 'trait_item', 'mod_item', 'use_declaration',
|
|
147
|
+
'let_declaration', 'field_expression', 'scoped_identifier',
|
|
148
|
+
]);
|
|
149
|
+
case 'python':
|
|
150
|
+
return new Set([
|
|
151
|
+
'call', 'function_definition', 'class_definition', 'import_statement',
|
|
152
|
+
'import_from_statement', 'assignment', 'attribute', 'subscript',
|
|
153
|
+
]);
|
|
154
|
+
case 'javascript':
|
|
155
|
+
case 'typescript':
|
|
156
|
+
return new Set([
|
|
157
|
+
'call_expression', 'new_expression', 'class_declaration', 'function_declaration',
|
|
158
|
+
'arrow_function', 'method_definition', 'variable_declaration', 'lexical_declaration',
|
|
159
|
+
'import_statement', 'export_statement', 'member_expression', 'assignment_expression',
|
|
160
|
+
]);
|
|
161
|
+
case 'bash':
|
|
162
|
+
return new Set([
|
|
163
|
+
'command', 'function_definition', 'variable_assignment', 'declaration_command',
|
|
164
|
+
'if_statement', 'for_statement', 'c_style_for_statement', 'while_statement',
|
|
165
|
+
]);
|
|
166
|
+
default:
|
|
167
|
+
return new Set([
|
|
168
|
+
'method_invocation', 'object_creation_expression', 'class_declaration',
|
|
169
|
+
'method_declaration', 'constructor_declaration', 'field_declaration',
|
|
170
|
+
'import_declaration', 'interface_declaration', 'enum_declaration',
|
|
171
|
+
]);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
// Main analysis function
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
604
177
|
/**
|
|
605
178
|
* Analyze source code and produce Circle-IR output.
|
|
606
179
|
*/
|
|
@@ -613,87 +186,8 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
613
186
|
const tree = await parse(code, language);
|
|
614
187
|
logger.trace('Parsed AST', { rootNodeType: tree.rootNode.type });
|
|
615
188
|
// Collect all node types in a single traversal for better performance
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
const isRust = language === 'rust';
|
|
619
|
-
const isPython = language === 'python';
|
|
620
|
-
let nodeTypesToCollect;
|
|
621
|
-
if (isRust) {
|
|
622
|
-
nodeTypesToCollect = new Set([
|
|
623
|
-
// Rust AST nodes
|
|
624
|
-
'call_expression',
|
|
625
|
-
'macro_invocation',
|
|
626
|
-
'function_item',
|
|
627
|
-
'struct_item',
|
|
628
|
-
'impl_item',
|
|
629
|
-
'enum_item',
|
|
630
|
-
'trait_item',
|
|
631
|
-
'mod_item',
|
|
632
|
-
'use_declaration',
|
|
633
|
-
'let_declaration',
|
|
634
|
-
'field_expression',
|
|
635
|
-
'scoped_identifier',
|
|
636
|
-
]);
|
|
637
|
-
}
|
|
638
|
-
else if (isPython) {
|
|
639
|
-
nodeTypesToCollect = new Set([
|
|
640
|
-
// Python AST nodes
|
|
641
|
-
'call',
|
|
642
|
-
'function_definition',
|
|
643
|
-
'class_definition',
|
|
644
|
-
'import_statement',
|
|
645
|
-
'import_from_statement',
|
|
646
|
-
'assignment',
|
|
647
|
-
'attribute',
|
|
648
|
-
'subscript',
|
|
649
|
-
]);
|
|
650
|
-
}
|
|
651
|
-
else if (isJavaScript) {
|
|
652
|
-
nodeTypesToCollect = new Set([
|
|
653
|
-
// JavaScript/TypeScript AST nodes
|
|
654
|
-
'call_expression',
|
|
655
|
-
'new_expression',
|
|
656
|
-
'class_declaration',
|
|
657
|
-
'function_declaration',
|
|
658
|
-
'arrow_function',
|
|
659
|
-
'method_definition',
|
|
660
|
-
'variable_declaration',
|
|
661
|
-
'lexical_declaration',
|
|
662
|
-
'import_statement',
|
|
663
|
-
'export_statement',
|
|
664
|
-
'member_expression',
|
|
665
|
-
'assignment_expression',
|
|
666
|
-
]);
|
|
667
|
-
}
|
|
668
|
-
else if (language === 'bash') {
|
|
669
|
-
nodeTypesToCollect = new Set([
|
|
670
|
-
// Bash AST nodes
|
|
671
|
-
'command',
|
|
672
|
-
'function_definition',
|
|
673
|
-
'variable_assignment',
|
|
674
|
-
'declaration_command',
|
|
675
|
-
'if_statement',
|
|
676
|
-
'for_statement',
|
|
677
|
-
'c_style_for_statement',
|
|
678
|
-
'while_statement',
|
|
679
|
-
]);
|
|
680
|
-
}
|
|
681
|
-
else {
|
|
682
|
-
nodeTypesToCollect = new Set([
|
|
683
|
-
// Java AST nodes
|
|
684
|
-
'method_invocation',
|
|
685
|
-
'object_creation_expression',
|
|
686
|
-
'class_declaration',
|
|
687
|
-
'method_declaration',
|
|
688
|
-
'constructor_declaration',
|
|
689
|
-
'field_declaration',
|
|
690
|
-
'import_declaration',
|
|
691
|
-
'interface_declaration',
|
|
692
|
-
'enum_declaration',
|
|
693
|
-
]);
|
|
694
|
-
}
|
|
695
|
-
const nodeCache = collectAllNodes(tree.rootNode, nodeTypesToCollect);
|
|
696
|
-
// Extract all components using the cached nodes
|
|
189
|
+
const nodeCache = collectAllNodes(tree.rootNode, getNodeTypesForLanguage(language));
|
|
190
|
+
// Extract all IR components
|
|
697
191
|
const meta = extractMeta(code, tree, filePath, language);
|
|
698
192
|
const types = extractTypes(tree, nodeCache, language);
|
|
699
193
|
const calls = extractCalls(tree, nodeCache, language);
|
|
@@ -701,455 +195,51 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
701
195
|
const exports = extractExports(types);
|
|
702
196
|
const cfg = buildCFG(tree, language);
|
|
703
197
|
const dfg = buildDFG(tree, nodeCache, language);
|
|
704
|
-
//
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
// First, do a preliminary taint analysis to find inter-procedural parameter sources
|
|
714
|
-
// These need to be passed to constant propagation so it can track taint from method parameters
|
|
715
|
-
let baseConfig = options.taintConfig ?? getDefaultConfig();
|
|
716
|
-
// Merge language plugin built-in sources/sinks into the config.
|
|
717
|
-
// This handles languages (e.g. Bash) whose patterns are defined on the plugin
|
|
718
|
-
// rather than in YAML config files loaded by getDefaultConfig().
|
|
719
|
-
if (!options.taintConfig) {
|
|
720
|
-
const plugin = getLanguagePlugin(language);
|
|
721
|
-
if (plugin) {
|
|
722
|
-
const pluginSources = plugin.getBuiltinSources();
|
|
723
|
-
const pluginSinks = plugin.getBuiltinSinks();
|
|
724
|
-
if (pluginSources.length > 0 || pluginSinks.length > 0) {
|
|
725
|
-
baseConfig = {
|
|
726
|
-
...baseConfig,
|
|
727
|
-
sources: [
|
|
728
|
-
...baseConfig.sources,
|
|
729
|
-
...pluginSources.map(s => ({
|
|
730
|
-
method: s.method,
|
|
731
|
-
class: s.class,
|
|
732
|
-
annotation: s.annotation,
|
|
733
|
-
type: s.type,
|
|
734
|
-
severity: s.severity,
|
|
735
|
-
return_tainted: s.returnTainted ?? false,
|
|
736
|
-
})),
|
|
737
|
-
],
|
|
738
|
-
sinks: [
|
|
739
|
-
...baseConfig.sinks,
|
|
740
|
-
...pluginSinks.map(s => ({
|
|
741
|
-
method: s.method,
|
|
742
|
-
class: s.class,
|
|
743
|
-
type: s.type,
|
|
744
|
-
cwe: s.cwe,
|
|
745
|
-
severity: s.severity,
|
|
746
|
-
arg_positions: s.argPositions,
|
|
747
|
-
})),
|
|
748
|
-
],
|
|
749
|
-
};
|
|
750
|
-
}
|
|
751
|
-
}
|
|
752
|
-
}
|
|
753
|
-
const preliminaryTaint = analyzeTaint(calls, types, baseConfig);
|
|
754
|
-
// Extract inter-procedural parameter sources
|
|
755
|
-
const taintedParameters = [];
|
|
756
|
-
for (const source of preliminaryTaint.sources) {
|
|
757
|
-
if (source.type === 'interprocedural_param') {
|
|
758
|
-
// Location format: "ParamType paramName in methodName"
|
|
759
|
-
const match = source.location.match(/(\S+)\s+(\S+)\s+in\s+(\S+)/);
|
|
760
|
-
if (match) {
|
|
761
|
-
taintedParameters.push({
|
|
762
|
-
methodName: match[3],
|
|
763
|
-
paramName: match[2],
|
|
764
|
-
});
|
|
765
|
-
}
|
|
766
|
-
}
|
|
767
|
-
}
|
|
768
|
-
// Run constant propagation with tainted parameters
|
|
769
|
-
const constPropResult = analyzeConstantPropagation(tree, code, {
|
|
770
|
-
sanitizerMethods,
|
|
771
|
-
taintedParameters,
|
|
772
|
-
});
|
|
773
|
-
// Analyze taint with config
|
|
774
|
-
const taint = analyzeTaint(calls, types, baseConfig);
|
|
775
|
-
// Add sources for getters that return tainted constructor fields
|
|
776
|
-
const getterSources = findGetterSources(types, constPropResult.instanceFieldTaint, code);
|
|
777
|
-
taint.sources.push(...getterSources);
|
|
778
|
-
// Add sources for JavaScript variable assignments with tainted patterns
|
|
779
|
-
const jsAssignmentSources = findJavaScriptAssignmentSources(code, language);
|
|
780
|
-
taint.sources.push(...jsAssignmentSources);
|
|
781
|
-
// Add sources for Python variable assignments with tainted request patterns
|
|
782
|
-
const pythonAssignmentSources = findPythonAssignmentSources(code, language);
|
|
783
|
-
taint.sources.push(...pythonAssignmentSources);
|
|
784
|
-
// Add sinks for JavaScript DOM XSS patterns (innerHTML, document.write, etc.)
|
|
785
|
-
const jsDOMSinks = findJavaScriptDOMSinks(code, language);
|
|
786
|
-
for (const domSink of jsDOMSinks) {
|
|
787
|
-
// Avoid duplicates
|
|
788
|
-
const alreadyExists = taint.sinks.some(s => s.line === domSink.line && s.cwe === domSink.cwe);
|
|
789
|
-
if (!alreadyExists) {
|
|
790
|
-
taint.sinks.push({
|
|
791
|
-
type: 'xss',
|
|
792
|
-
cwe: domSink.cwe,
|
|
793
|
-
line: domSink.line,
|
|
794
|
-
location: domSink.location,
|
|
795
|
-
method: domSink.method,
|
|
796
|
-
confidence: 1.0,
|
|
797
|
-
});
|
|
798
|
-
}
|
|
799
|
-
}
|
|
800
|
-
logger.debug('Initial taint analysis', {
|
|
801
|
-
sources: taint.sources.length,
|
|
802
|
-
sinks: taint.sinks.length,
|
|
803
|
-
sanitizers: taint.sanitizers?.length ?? 0,
|
|
804
|
-
getterSources: getterSources.length,
|
|
805
|
-
jsDOMSinks: jsDOMSinks.length,
|
|
198
|
+
// Build CodeGraph once — shared across all passes.
|
|
199
|
+
// Taint is empty at construction time; sources/sinks/sanitizers are populated by passes.
|
|
200
|
+
const graph = new CodeGraph({
|
|
201
|
+
meta, types, calls, cfg, dfg,
|
|
202
|
+
taint: { sources: [], sinks: [], sanitizers: [] },
|
|
203
|
+
imports, exports, unresolved: [], enriched: {},
|
|
806
204
|
});
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
}
|
|
842
|
-
const pySourceLines = code.split('\n');
|
|
843
|
-
// Filter XPath sinks: keep only if a tainted var is used at the sink line
|
|
844
|
-
taint.sinks = taint.sinks.filter(sink => {
|
|
845
|
-
if (sink.type !== 'xpath_injection')
|
|
846
|
-
return true;
|
|
847
|
-
const sinkLineText = pySourceLines[sink.line - 1] ?? '';
|
|
848
|
-
const taintedVarOnLine = [...pyTaintedVars.keys()].find(v => new RegExp(`\\b${v}\\b`).test(sinkLineText));
|
|
849
|
-
if (!taintedVarOnLine)
|
|
850
|
-
return false;
|
|
851
|
-
if (pySanitizedVars.has(taintedVarOnLine))
|
|
852
|
-
return false;
|
|
853
|
-
// Suppress parameterized XPath: root.xpath(query, name=bar) where bar is a keyword arg
|
|
854
|
-
if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText))
|
|
855
|
-
return false;
|
|
856
|
-
return true;
|
|
857
|
-
});
|
|
858
|
-
// Add trust boundary sinks from session subscript assignments
|
|
859
|
-
const trustViolations = findPythonTrustBoundaryViolations(code, language, pyTaintedVars);
|
|
860
|
-
for (const v of trustViolations) {
|
|
861
|
-
const alreadyExists = taint.sinks.some(s => s.line === v.sinkLine && s.type === 'trust_boundary');
|
|
862
|
-
if (!alreadyExists) {
|
|
863
|
-
taint.sinks.push({
|
|
864
|
-
type: 'trust_boundary',
|
|
865
|
-
cwe: 'CWE-501',
|
|
866
|
-
line: v.sinkLine,
|
|
867
|
-
location: `session write at line ${v.sinkLine}`,
|
|
868
|
-
confidence: 0.85,
|
|
869
|
-
});
|
|
870
|
-
}
|
|
871
|
-
}
|
|
872
|
-
// Add XSS sinks from return/yield statements (Flask/Django routes return HTML directly)
|
|
873
|
-
const pyReturnXSS = findPythonReturnXSSSinks(code, language, pyTaintedVars);
|
|
874
|
-
for (const r of pyReturnXSS) {
|
|
875
|
-
const alreadyExists = taint.sinks.some(s => s.line === r.sinkLine && s.type === 'xss');
|
|
876
|
-
if (!alreadyExists) {
|
|
877
|
-
taint.sinks.push({
|
|
878
|
-
type: 'xss',
|
|
879
|
-
cwe: 'CWE-79',
|
|
880
|
-
line: r.sinkLine,
|
|
881
|
-
location: `return HTML with user input at line ${r.sinkLine}`,
|
|
882
|
-
confidence: 0.9,
|
|
883
|
-
});
|
|
884
|
-
}
|
|
885
|
-
}
|
|
886
|
-
}
|
|
887
|
-
// JavaScript/TypeScript: filter XSS sinks where the argument variable is NOT actually
|
|
888
|
-
// tainted by user input (e.g., res.send(stdout) — stdout is a callback param from exec(),
|
|
889
|
-
// not a variable derived from req.query/req.body). This prevents FP pairs like:
|
|
890
|
-
// CWE-78 (correct) + CWE-79 (spurious) for the same source when the cmd output is sent.
|
|
891
|
-
if (['javascript', 'typescript'].includes(language)) {
|
|
892
|
-
const jsTaintedVars = buildJavaScriptTaintedVars(code, language);
|
|
893
|
-
if (jsTaintedVars.size > 0) {
|
|
894
|
-
const jsSourceLines = code.split('\n');
|
|
895
|
-
taint.sinks = taint.sinks.filter(sink => {
|
|
896
|
-
if (sink.type !== 'xss')
|
|
897
|
-
return true;
|
|
898
|
-
const sinkLineText = jsSourceLines[sink.line - 1] ?? '';
|
|
899
|
-
// Keep if any known-tainted variable appears on this sink line
|
|
900
|
-
if ([...jsTaintedVars.keys()].some(v => new RegExp(`\\b${v}\\b`).test(sinkLineText)))
|
|
901
|
-
return true;
|
|
902
|
-
// Also keep if the sink line directly references a taint source (inline use, no assignment)
|
|
903
|
-
if (JS_TAINTED_PATTERNS.some(p => p.pattern.test(sinkLineText)))
|
|
904
|
-
return true;
|
|
905
|
-
return false;
|
|
906
|
-
});
|
|
907
|
-
}
|
|
908
|
-
}
|
|
909
|
-
// Propagate taint through dataflow to find verified flows
|
|
910
|
-
if (taint.sources.length > 0 && taint.sinks.length > 0) {
|
|
911
|
-
const propagationResult = propagateTaint(dfg, calls, taint.sources, taint.sinks, taint.sanitizers ?? []);
|
|
912
|
-
// Filter flows using constant propagation (eliminate false positives)
|
|
913
|
-
const verifiedFlows = propagationResult.flows.filter(flow => {
|
|
914
|
-
// Check if the sink line is in dead code
|
|
915
|
-
if (constPropResult.unreachableLines.has(flow.sink.line)) {
|
|
916
|
-
return false;
|
|
917
|
-
}
|
|
918
|
-
// Check each step in the path - if any variable has a constant value, skip
|
|
919
|
-
for (const step of flow.path) {
|
|
920
|
-
const fpCheck = isFalsePositive(constPropResult, step.line, step.variable);
|
|
921
|
-
if (fpCheck.isFalsePositive) {
|
|
922
|
-
return false;
|
|
923
|
-
}
|
|
924
|
-
}
|
|
925
|
-
// Check for correlated predicates: if the sink is under condition !C
|
|
926
|
-
// and the taint was added under condition C, they're mutually exclusive
|
|
927
|
-
if (isCorrelatedPredicateFP(constPropResult, flow)) {
|
|
928
|
-
return false;
|
|
929
|
-
}
|
|
930
|
-
return true;
|
|
931
|
-
});
|
|
932
|
-
// Convert flows to TaintFlowInfo format
|
|
933
|
-
taint.flows = verifiedFlows.map(flow => ({
|
|
934
|
-
source_line: flow.source.line,
|
|
935
|
-
sink_line: flow.sink.line,
|
|
936
|
-
source_type: flow.source.type,
|
|
937
|
-
sink_type: flow.sink.type,
|
|
938
|
-
path: flow.path.map(step => ({
|
|
939
|
-
variable: step.variable,
|
|
940
|
-
line: step.line,
|
|
941
|
-
type: step.type,
|
|
942
|
-
})),
|
|
943
|
-
confidence: flow.confidence,
|
|
944
|
-
sanitized: flow.sanitized,
|
|
945
|
-
}));
|
|
946
|
-
// Add array element flows that DFG-based analysis might miss
|
|
947
|
-
const arrayFlows = detectArrayElementFlows(calls, taint.sources, taint.sinks, constPropResult.taintedArrayElements, constPropResult.unreachableLines);
|
|
948
|
-
if (arrayFlows && arrayFlows.length > 0) {
|
|
949
|
-
if (!taint.flows) {
|
|
950
|
-
taint.flows = [];
|
|
951
|
-
}
|
|
952
|
-
for (const flow of arrayFlows) {
|
|
953
|
-
// Avoid duplicates
|
|
954
|
-
if (!taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
|
|
955
|
-
taint.flows.push(flow);
|
|
956
|
-
}
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
// Add collection/iterator flows that DFG-based analysis might miss
|
|
960
|
-
const collectionFlows = detectCollectionFlows(calls, taint.sources, taint.sinks, constPropResult.tainted, constPropResult.unreachableLines);
|
|
961
|
-
if (collectionFlows && collectionFlows.length > 0) {
|
|
962
|
-
if (!taint.flows) {
|
|
963
|
-
taint.flows = [];
|
|
964
|
-
}
|
|
965
|
-
for (const flow of collectionFlows) {
|
|
966
|
-
// Avoid duplicates
|
|
967
|
-
if (taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
|
|
968
|
-
continue;
|
|
969
|
-
}
|
|
970
|
-
// Apply the same filtering as DFG-based flows
|
|
971
|
-
const flowForCheck = {
|
|
972
|
-
source: { line: flow.source_line, type: flow.source_type },
|
|
973
|
-
sink: { line: flow.sink_line, type: flow.sink_type },
|
|
974
|
-
path: flow.path.map(p => ({ variable: p.variable, line: p.line })),
|
|
975
|
-
};
|
|
976
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
977
|
-
if (isCorrelatedPredicateFP(constPropResult, flowForCheck)) {
|
|
978
|
-
continue;
|
|
979
|
-
}
|
|
980
|
-
// Check if any step in the path is a false positive
|
|
981
|
-
let isFP = false;
|
|
982
|
-
for (const step of flow.path) {
|
|
983
|
-
const fpCheck = isFalsePositive(constPropResult, step.line, step.variable);
|
|
984
|
-
if (fpCheck.isFalsePositive) {
|
|
985
|
-
isFP = true;
|
|
986
|
-
break;
|
|
987
|
-
}
|
|
988
|
-
}
|
|
989
|
-
if (isFP) {
|
|
990
|
-
continue;
|
|
991
|
-
}
|
|
992
|
-
taint.flows.push(flow);
|
|
993
|
-
}
|
|
994
|
-
}
|
|
995
|
-
// Add direct parameter-to-sink flows that DFG might miss
|
|
996
|
-
const paramFlows = detectParameterSinkFlows(types, calls, taint.sources, taint.sinks, constPropResult.unreachableLines);
|
|
997
|
-
if (paramFlows && paramFlows.length > 0) {
|
|
998
|
-
if (!taint.flows) {
|
|
999
|
-
taint.flows = [];
|
|
1000
|
-
}
|
|
1001
|
-
for (const flow of paramFlows) {
|
|
1002
|
-
// Avoid duplicates
|
|
1003
|
-
if (!taint.flows.some(f => f.source_line === flow.source_line && f.sink_line === flow.sink_line)) {
|
|
1004
|
-
taint.flows.push(flow);
|
|
1005
|
-
}
|
|
1006
|
-
}
|
|
1007
|
-
}
|
|
1008
|
-
// Perform inter-procedural analysis
|
|
1009
|
-
const interProc = analyzeInterprocedural(types, calls, dfg, taint.sources, taint.sinks, taint.sanitizers ?? [], {
|
|
1010
|
-
taintedVariables: constPropResult.tainted,
|
|
1011
|
-
});
|
|
1012
|
-
// Add inter-procedural sinks to the taint sinks and generate flows
|
|
1013
|
-
// Skip external_taint_escape (CWE-668) here: they are only used as a last resort
|
|
1014
|
-
// in the fallback path below when no other sinks exist. Adding them when proper sinks
|
|
1015
|
-
// already exist creates duplicate/spurious findings (e.g., http.get already reported
|
|
1016
|
-
// as CWE-918 SSRF; also getting CWE-668 for the same call chain is a FP).
|
|
1017
|
-
for (const sink of interProc.propagatedSinks) {
|
|
1018
|
-
if (sink.type === 'external_taint_escape')
|
|
1019
|
-
continue;
|
|
1020
|
-
if (!taint.sinks.some(s => s.line === sink.line)) {
|
|
1021
|
-
taint.sinks.push(sink);
|
|
1022
|
-
}
|
|
1023
|
-
}
|
|
1024
|
-
// Generate flows for inter-procedural propagated sinks
|
|
1025
|
-
// These sinks are inside called methods where tainted args were passed
|
|
1026
|
-
if (interProc.propagatedSinks.length > 0 && taint.sources.length > 0) {
|
|
1027
|
-
if (!taint.flows) {
|
|
1028
|
-
taint.flows = [];
|
|
1029
|
-
}
|
|
1030
|
-
// Build set of sanitizer method names to skip (methods with @sanitizer annotation)
|
|
1031
|
-
const sanitizerMethodNames = new Set();
|
|
1032
|
-
for (const san of taint.sanitizers ?? []) {
|
|
1033
|
-
if (san.type === 'javadoc_sanitizer') {
|
|
1034
|
-
// Extract method name from "methodName()" format
|
|
1035
|
-
const match = san.method.match(/^(\w+)\(\)$/);
|
|
1036
|
-
if (match)
|
|
1037
|
-
sanitizerMethodNames.add(match[1]);
|
|
1038
|
-
else
|
|
1039
|
-
sanitizerMethodNames.add(san.method);
|
|
1040
|
-
}
|
|
1041
|
-
}
|
|
1042
|
-
for (const sink of interProc.propagatedSinks) {
|
|
1043
|
-
// Skip external taint escape sinks (not real vulnerability sinks)
|
|
1044
|
-
if (sink.type === 'external_taint_escape')
|
|
1045
|
-
continue;
|
|
1046
|
-
// Find which call edge brought taint to this sink's method
|
|
1047
|
-
for (const edge of interProc.callEdges) {
|
|
1048
|
-
if (!interProc.taintedMethods.has(edge.calleeMethod))
|
|
1049
|
-
continue;
|
|
1050
|
-
const method = interProc.methodNodes.get(edge.calleeMethod);
|
|
1051
|
-
if (!method)
|
|
1052
|
-
continue;
|
|
1053
|
-
if (sink.line < method.startLine || sink.line > method.endLine)
|
|
1054
|
-
continue;
|
|
1055
|
-
// Skip sinks inside sanitizer methods (@sanitizer annotation)
|
|
1056
|
-
if (sanitizerMethodNames.has(method.name))
|
|
1057
|
-
continue;
|
|
1058
|
-
// Find the source connected to this call
|
|
1059
|
-
for (const source of taint.sources) {
|
|
1060
|
-
// Source should be in the caller's scope, at or before the call line
|
|
1061
|
-
if (source.line > edge.callLine)
|
|
1062
|
-
continue;
|
|
1063
|
-
// Skip low-confidence interprocedural_param sources
|
|
1064
|
-
if (source.type === 'interprocedural_param' && source.confidence < 0.6)
|
|
1065
|
-
continue;
|
|
1066
|
-
if (taint.flows.some(f => f.source_line === source.line && f.sink_line === sink.line))
|
|
1067
|
-
continue;
|
|
1068
|
-
taint.flows.push({
|
|
1069
|
-
source_line: source.line,
|
|
1070
|
-
sink_line: sink.line,
|
|
1071
|
-
source_type: source.type,
|
|
1072
|
-
sink_type: sink.type,
|
|
1073
|
-
path: [
|
|
1074
|
-
{ variable: source.location, line: source.line, type: 'source' },
|
|
1075
|
-
{ variable: `call to ${method.name}()`, line: edge.callLine, type: 'use' },
|
|
1076
|
-
{ variable: sink.location, line: sink.line, type: 'sink' },
|
|
1077
|
-
],
|
|
1078
|
-
confidence: sink.confidence * source.confidence * 0.85,
|
|
1079
|
-
sanitized: false,
|
|
1080
|
-
});
|
|
1081
|
-
break; // One source per sink is enough
|
|
1082
|
-
}
|
|
1083
|
-
break; // One call edge per sink is enough
|
|
1084
|
-
}
|
|
1085
|
-
}
|
|
1086
|
-
}
|
|
1087
|
-
// Build inter-procedural info
|
|
1088
|
-
const taintBridges = findTaintBridges(interProc);
|
|
1089
|
-
taint.interprocedural = {
|
|
1090
|
-
tainted_methods: Array.from(interProc.taintedMethods),
|
|
1091
|
-
taint_bridges: taintBridges,
|
|
1092
|
-
method_flows: interProc.callEdges
|
|
1093
|
-
.filter(edge => interProc.taintedMethods.has(edge.calleeMethod))
|
|
1094
|
-
.map(edge => ({
|
|
1095
|
-
caller: edge.callerMethod,
|
|
1096
|
-
callee: edge.calleeMethod,
|
|
1097
|
-
call_line: edge.callLine,
|
|
1098
|
-
tainted_args: edge.taintedArgs,
|
|
1099
|
-
returns_taint: interProc.taintedReturns.has(edge.calleeMethod),
|
|
1100
|
-
})),
|
|
1101
|
-
};
|
|
1102
|
-
}
|
|
1103
|
-
// Perform inter-procedural analysis even when no initial sinks (can detect external taint escapes)
|
|
1104
|
-
if (taint.sources.length > 0 && taint.sinks.length === 0) {
|
|
1105
|
-
const interProc = analyzeInterprocedural(types, calls, dfg, taint.sources, [], // No initial sinks
|
|
1106
|
-
taint.sanitizers ?? [], {
|
|
1107
|
-
taintedVariables: constPropResult.tainted,
|
|
1108
|
-
});
|
|
1109
|
-
// Add inter-procedural sinks (e.g., external_taint_escape)
|
|
1110
|
-
for (const sink of interProc.propagatedSinks) {
|
|
1111
|
-
if (!constPropResult.unreachableLines.has(sink.line) &&
|
|
1112
|
-
!taint.sinks.some(s => s.line === sink.line)) {
|
|
1113
|
-
taint.sinks.push(sink);
|
|
1114
|
-
}
|
|
1115
|
-
}
|
|
1116
|
-
// Build inter-procedural info
|
|
1117
|
-
if (interProc.taintedMethods.size > 0 || interProc.propagatedSinks.length > 0) {
|
|
1118
|
-
const taintBridges = findTaintBridges(interProc);
|
|
1119
|
-
taint.interprocedural = {
|
|
1120
|
-
tainted_methods: Array.from(interProc.taintedMethods),
|
|
1121
|
-
taint_bridges: taintBridges,
|
|
1122
|
-
method_flows: interProc.callEdges
|
|
1123
|
-
.filter(edge => interProc.taintedMethods.has(edge.calleeMethod))
|
|
1124
|
-
.map(edge => ({
|
|
1125
|
-
caller: edge.callerMethod,
|
|
1126
|
-
callee: edge.calleeMethod,
|
|
1127
|
-
call_line: edge.callLine,
|
|
1128
|
-
tainted_args: edge.taintedArgs,
|
|
1129
|
-
returns_taint: interProc.taintedReturns.has(edge.calleeMethod),
|
|
1130
|
-
})),
|
|
1131
|
-
};
|
|
1132
|
-
}
|
|
1133
|
-
// If we found new sinks, create flows from sources
|
|
1134
|
-
if (taint.sinks.length > 0) {
|
|
1135
|
-
taint.flows = taint.sinks.map(sink => ({
|
|
1136
|
-
source_line: taint.sources[0].line,
|
|
1137
|
-
sink_line: sink.line,
|
|
1138
|
-
source_type: taint.sources[0].type,
|
|
1139
|
-
sink_type: sink.type,
|
|
1140
|
-
path: [
|
|
1141
|
-
{ variable: 'input', line: taint.sources[0].line, type: 'source' },
|
|
1142
|
-
{ variable: 'input', line: sink.line, type: 'sink' },
|
|
1143
|
-
],
|
|
1144
|
-
confidence: taint.sources[0].confidence * sink.confidence,
|
|
1145
|
-
sanitized: false,
|
|
1146
|
-
}));
|
|
1147
|
-
}
|
|
1148
|
-
}
|
|
1149
|
-
// Detect unresolved items
|
|
205
|
+
const config = options.taintConfig ?? getDefaultConfig();
|
|
206
|
+
// Run the analysis pipeline
|
|
207
|
+
const { results, findings } = new AnalysisPipeline()
|
|
208
|
+
.add(new TaintMatcherPass())
|
|
209
|
+
.add(new ConstantPropagationPass(tree))
|
|
210
|
+
.add(new LanguageSourcesPass())
|
|
211
|
+
.add(new SinkFilterPass())
|
|
212
|
+
.add(new TaintPropagationPass())
|
|
213
|
+
.add(new InterproceduralPass())
|
|
214
|
+
.add(new DeadCodePass())
|
|
215
|
+
.add(new MissingAwaitPass())
|
|
216
|
+
.add(new NPlusOnePass())
|
|
217
|
+
.add(new MissingPublicDocPass())
|
|
218
|
+
.add(new TodoInProdPass())
|
|
219
|
+
.add(new StringConcatLoopPass())
|
|
220
|
+
.add(new SyncIoAsyncPass())
|
|
221
|
+
.add(new UncheckedReturnPass())
|
|
222
|
+
.add(new NullDerefPass())
|
|
223
|
+
.add(new ResourceLeakPass())
|
|
224
|
+
.add(new VariableShadowingPass())
|
|
225
|
+
.add(new LeakedGlobalPass())
|
|
226
|
+
.add(new UnusedVariablePass())
|
|
227
|
+
.add(new DependencyFanOutPass())
|
|
228
|
+
.add(new StaleDocRefPass())
|
|
229
|
+
.run(graph, code, language, config);
|
|
230
|
+
const sinkFilter = results.get('sink-filter');
|
|
231
|
+
const interProc = results.get('interprocedural');
|
|
232
|
+
const taint = {
|
|
233
|
+
sources: sinkFilter.sources,
|
|
234
|
+
sinks: [...sinkFilter.sinks, ...interProc.additionalSinks],
|
|
235
|
+
sanitizers: sinkFilter.sanitizers,
|
|
236
|
+
flows: interProc.additionalFlows,
|
|
237
|
+
interprocedural: interProc.interprocedural,
|
|
238
|
+
};
|
|
1150
239
|
const unresolved = detectUnresolved(calls, types, dfg);
|
|
1151
|
-
// Build enriched section
|
|
1152
240
|
const enriched = buildEnriched(types, calls, taint.sources, taint.sinks);
|
|
241
|
+
// Compute software metrics (CK suite, Halstead, composite scores)
|
|
242
|
+
const metricValues = new MetricRunner().run({ meta, types, calls, cfg, dfg, taint, imports, exports, unresolved, enriched }, code, language);
|
|
1153
243
|
logger.debug('Analysis complete', {
|
|
1154
244
|
filePath,
|
|
1155
245
|
finalSources: taint.sources.length,
|
|
@@ -1158,18 +248,14 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
1158
248
|
unresolvedItems: unresolved.length,
|
|
1159
249
|
});
|
|
1160
250
|
return {
|
|
1161
|
-
meta,
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
cfg,
|
|
1165
|
-
dfg,
|
|
1166
|
-
taint,
|
|
1167
|
-
imports,
|
|
1168
|
-
exports,
|
|
1169
|
-
unresolved,
|
|
1170
|
-
enriched,
|
|
251
|
+
meta, types, calls, cfg, dfg, taint, imports, exports, unresolved, enriched,
|
|
252
|
+
findings: findings.length > 0 ? findings : undefined,
|
|
253
|
+
metrics: { file: filePath, metrics: metricValues },
|
|
1171
254
|
};
|
|
1172
255
|
}
|
|
256
|
+
// ---------------------------------------------------------------------------
|
|
257
|
+
// Simplified API response format
|
|
258
|
+
// ---------------------------------------------------------------------------
|
|
1173
259
|
/**
|
|
1174
260
|
* Analyze code and return a simplified API response format.
|
|
1175
261
|
*/
|
|
@@ -1182,39 +268,7 @@ export async function analyzeForAPI(code, filePath, language, options = {}) {
|
|
|
1182
268
|
const tree = await parse(code, language);
|
|
1183
269
|
const parseTime = performance.now() - parseStart;
|
|
1184
270
|
const analysisStart = performance.now();
|
|
1185
|
-
|
|
1186
|
-
const isJavaScript = language === 'javascript' || language === 'typescript';
|
|
1187
|
-
const isRust = language === 'rust';
|
|
1188
|
-
const isPython = language === 'python';
|
|
1189
|
-
let nodeTypesToCollect;
|
|
1190
|
-
if (isRust) {
|
|
1191
|
-
nodeTypesToCollect = new Set([
|
|
1192
|
-
'call_expression', 'macro_invocation', 'function_item', 'struct_item',
|
|
1193
|
-
'impl_item', 'enum_item', 'trait_item', 'mod_item', 'use_declaration',
|
|
1194
|
-
'let_declaration', 'field_expression', 'scoped_identifier',
|
|
1195
|
-
]);
|
|
1196
|
-
}
|
|
1197
|
-
else if (isPython) {
|
|
1198
|
-
nodeTypesToCollect = new Set([
|
|
1199
|
-
'call', 'function_definition', 'class_definition', 'import_statement',
|
|
1200
|
-
'import_from_statement', 'assignment', 'attribute', 'subscript',
|
|
1201
|
-
]);
|
|
1202
|
-
}
|
|
1203
|
-
else if (isJavaScript) {
|
|
1204
|
-
nodeTypesToCollect = new Set([
|
|
1205
|
-
'call_expression', 'new_expression', 'class_declaration', 'function_declaration',
|
|
1206
|
-
'arrow_function', 'method_definition', 'variable_declaration', 'lexical_declaration',
|
|
1207
|
-
'import_statement', 'export_statement',
|
|
1208
|
-
]);
|
|
1209
|
-
}
|
|
1210
|
-
else {
|
|
1211
|
-
nodeTypesToCollect = new Set([
|
|
1212
|
-
'method_invocation', 'object_creation_expression', 'class_declaration',
|
|
1213
|
-
'method_declaration', 'field_declaration', 'import_declaration',
|
|
1214
|
-
'interface_declaration', 'enum_declaration',
|
|
1215
|
-
]);
|
|
1216
|
-
}
|
|
1217
|
-
const nodeCache = collectAllNodes(tree.rootNode, nodeTypesToCollect);
|
|
271
|
+
const nodeCache = collectAllNodes(tree.rootNode, getNodeTypesForLanguage(language));
|
|
1218
272
|
const types = extractTypes(tree, nodeCache, language);
|
|
1219
273
|
const calls = extractCalls(tree, nodeCache, language);
|
|
1220
274
|
// Run constant propagation
|
|
@@ -1232,40 +286,17 @@ export async function analyzeForAPI(code, filePath, language, options = {}) {
|
|
|
1232
286
|
let pythonTaintedVars = new Map();
|
|
1233
287
|
if (language === 'python') {
|
|
1234
288
|
pythonTaintedVars = buildPythonTaintedVars(code);
|
|
1235
|
-
const pythonSanitizedVars =
|
|
1236
|
-
// Propagate sanitization: if bar is sanitized and query = f"...{bar}...", query is also sanitized
|
|
1237
|
-
for (const line of code.split('\n')) {
|
|
1238
|
-
const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
1239
|
-
if (!am)
|
|
1240
|
-
continue;
|
|
1241
|
-
const [, lhs, rhs] = am;
|
|
1242
|
-
if ([...pythonSanitizedVars].some(v => new RegExp(`\\b${v}\\b`).test(rhs))) {
|
|
1243
|
-
pythonSanitizedVars.add(lhs);
|
|
1244
|
-
}
|
|
1245
|
-
}
|
|
1246
|
-
// Detect inline .replace() sanitizers: query = f"...{bar.replace('\'', ''')}..."
|
|
1247
|
-
for (const line of code.split('\n')) {
|
|
1248
|
-
const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
1249
|
-
if (!am)
|
|
1250
|
-
continue;
|
|
1251
|
-
const [, lhs, rhs] = am;
|
|
1252
|
-
const hasReplaceOnTainted = [...pythonTaintedVars.keys()].some(v => new RegExp(`\\b${v}\\.replace\\s*\\(`).test(rhs));
|
|
1253
|
-
if (hasReplaceOnTainted)
|
|
1254
|
-
pythonSanitizedVars.add(lhs);
|
|
1255
|
-
}
|
|
289
|
+
const pythonSanitizedVars = buildPythonSanitizedVars(code, pythonTaintedVars);
|
|
1256
290
|
const sourceLines = code.split('\n');
|
|
1257
291
|
filteredSinks = filteredSinks.filter(sink => {
|
|
1258
292
|
if (sink.type !== 'xpath_injection')
|
|
1259
293
|
return true;
|
|
1260
|
-
// Keep XPath sink only if a tainted variable is used at the sink line
|
|
1261
294
|
const sinkLineText = sourceLines[sink.line - 1] ?? '';
|
|
1262
295
|
const taintedVarOnLine = [...pythonTaintedVars.keys()].find(v => new RegExp(`\\b${v}\\b`).test(sinkLineText));
|
|
1263
296
|
if (!taintedVarOnLine)
|
|
1264
297
|
return false;
|
|
1265
|
-
// Kill if the variable is protected by an apostrophe guard
|
|
1266
298
|
if (pythonSanitizedVars.has(taintedVarOnLine))
|
|
1267
299
|
return false;
|
|
1268
|
-
// Suppress parameterized XPath: root.xpath(query, name=bar) where bar is a keyword arg
|
|
1269
300
|
if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText))
|
|
1270
301
|
return false;
|
|
1271
302
|
return true;
|
|
@@ -1275,9 +306,8 @@ export async function analyzeForAPI(code, filePath, language, options = {}) {
|
|
|
1275
306
|
const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
|
|
1276
307
|
// Python: detect trust boundary violations (flask.session[key] = taintedVal)
|
|
1277
308
|
if (language === 'python') {
|
|
1278
|
-
const trustViolations = findPythonTrustBoundaryViolations(code,
|
|
309
|
+
const trustViolations = findPythonTrustBoundaryViolations(code, pythonTaintedVars);
|
|
1279
310
|
for (const v of trustViolations) {
|
|
1280
|
-
// Avoid duplicate: only add if no existing vulnerability for same sink line
|
|
1281
311
|
const alreadyReported = vulnerabilities.some(existing => existing.sink.line === v.sinkLine && existing.type === 'trust_boundary');
|
|
1282
312
|
if (!alreadyReported) {
|
|
1283
313
|
vulnerabilities.push({
|
|
@@ -1307,6 +337,9 @@ export async function analyzeForAPI(code, filePath, language, options = {}) {
|
|
|
1307
337
|
},
|
|
1308
338
|
};
|
|
1309
339
|
}
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
// Vulnerability matching (used by analyzeForAPI)
|
|
342
|
+
// ---------------------------------------------------------------------------
|
|
1310
343
|
/**
|
|
1311
344
|
* Find potential vulnerabilities by matching sources to sinks.
|
|
1312
345
|
*/
|
|
@@ -1409,363 +442,9 @@ function calculateVulnConfidence(source, sink) {
|
|
|
1409
442
|
confidence = confidence * sink.confidence;
|
|
1410
443
|
return Math.min(confidence, 1.0);
|
|
1411
444
|
}
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
if (val.type === 'int' || val.type === 'float') {
|
|
1416
|
-
const regex = new RegExp(`\\b${name}\\b`, 'g');
|
|
1417
|
-
evaluated = evaluated.replace(regex, String(val.value));
|
|
1418
|
-
}
|
|
1419
|
-
}
|
|
1420
|
-
try {
|
|
1421
|
-
if (/^[\d\s+\-*/().]+$/.test(evaluated)) {
|
|
1422
|
-
const result = Function('"use strict"; return (' + evaluated + ')')();
|
|
1423
|
-
if (typeof result === 'number' && !isNaN(result)) {
|
|
1424
|
-
return String(Math.floor(result));
|
|
1425
|
-
}
|
|
1426
|
-
}
|
|
1427
|
-
}
|
|
1428
|
-
catch {
|
|
1429
|
-
// Evaluation failed
|
|
1430
|
-
}
|
|
1431
|
-
return expr;
|
|
1432
|
-
}
|
|
1433
|
-
function filterCleanArraySinks(sinks, calls, taintedArrayElements, symbols) {
|
|
1434
|
-
const callsByLine = new Map();
|
|
1435
|
-
for (const call of calls) {
|
|
1436
|
-
const existing = callsByLine.get(call.location.line) ?? [];
|
|
1437
|
-
existing.push(call);
|
|
1438
|
-
callsByLine.set(call.location.line, existing);
|
|
1439
|
-
}
|
|
1440
|
-
return sinks.filter(sink => {
|
|
1441
|
-
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
1442
|
-
for (const call of callsAtSink) {
|
|
1443
|
-
for (const arg of call.arguments) {
|
|
1444
|
-
const arrayAccessMatch = arg.expression?.match(/^(\w+)\[(\d+|[^[\]]+)\]$/);
|
|
1445
|
-
if (arrayAccessMatch) {
|
|
1446
|
-
const arrayName = arrayAccessMatch[1];
|
|
1447
|
-
let indexStr = arrayAccessMatch[2];
|
|
1448
|
-
indexStr = evaluateSimpleExpression(indexStr, symbols);
|
|
1449
|
-
const taintedIndices = taintedArrayElements.get(arrayName);
|
|
1450
|
-
if (taintedIndices !== undefined) {
|
|
1451
|
-
const isTainted = taintedIndices.has(indexStr) || taintedIndices.has('*');
|
|
1452
|
-
if (!isTainted) {
|
|
1453
|
-
return false;
|
|
1454
|
-
}
|
|
1455
|
-
}
|
|
1456
|
-
}
|
|
1457
|
-
}
|
|
1458
|
-
}
|
|
1459
|
-
return true;
|
|
1460
|
-
});
|
|
1461
|
-
}
|
|
1462
|
-
function filterCleanVariableSinks(sinks, calls, taintedVars, symbols, dfg, sanitizedVars, synchronizedLines) {
|
|
1463
|
-
const fieldNames = new Set();
|
|
1464
|
-
if (dfg) {
|
|
1465
|
-
for (const def of dfg.defs) {
|
|
1466
|
-
if (def.kind === 'field') {
|
|
1467
|
-
fieldNames.add(def.variable);
|
|
1468
|
-
}
|
|
1469
|
-
}
|
|
1470
|
-
}
|
|
1471
|
-
const callsByLine = new Map();
|
|
1472
|
-
for (const call of calls) {
|
|
1473
|
-
const existing = callsByLine.get(call.location.line) ?? [];
|
|
1474
|
-
existing.push(call);
|
|
1475
|
-
callsByLine.set(call.location.line, existing);
|
|
1476
|
-
}
|
|
1477
|
-
return sinks.filter(sink => {
|
|
1478
|
-
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
1479
|
-
const isInSynchronizedBlock = synchronizedLines?.has(sink.line) ?? false;
|
|
1480
|
-
for (const call of callsAtSink) {
|
|
1481
|
-
let allArgsAreClean = true;
|
|
1482
|
-
const methodName = call.in_method;
|
|
1483
|
-
for (const arg of call.arguments) {
|
|
1484
|
-
if (arg.variable && !arg.expression?.includes('[')) {
|
|
1485
|
-
const varName = arg.variable;
|
|
1486
|
-
const scopedName = methodName ? `${methodName}:${varName}` : varName;
|
|
1487
|
-
if (fieldNames.has(varName) && !isInSynchronizedBlock) {
|
|
1488
|
-
allArgsAreClean = false;
|
|
1489
|
-
continue;
|
|
1490
|
-
}
|
|
1491
|
-
if (sanitizedVars?.has(scopedName) || sanitizedVars?.has(varName)) {
|
|
1492
|
-
continue;
|
|
1493
|
-
}
|
|
1494
|
-
if (taintedVars.has(scopedName) || taintedVars.has(varName)) {
|
|
1495
|
-
allArgsAreClean = false;
|
|
1496
|
-
continue;
|
|
1497
|
-
}
|
|
1498
|
-
const symbolValue = symbols.get(scopedName) ?? symbols.get(varName);
|
|
1499
|
-
if (symbolValue && symbolValue.type !== 'unknown') {
|
|
1500
|
-
continue;
|
|
1501
|
-
}
|
|
1502
|
-
allArgsAreClean = false;
|
|
1503
|
-
}
|
|
1504
|
-
else {
|
|
1505
|
-
// Check if the argument is a pure literal (string, number, boolean, etc.)
|
|
1506
|
-
// Literals are inherently clean — they can't carry tainted data.
|
|
1507
|
-
if (arg.literal != null) {
|
|
1508
|
-
continue;
|
|
1509
|
-
}
|
|
1510
|
-
// Also check if the expression is a quoted string literal without variable interpolation
|
|
1511
|
-
if (arg.expression && !arg.variable && isStringLiteralExpression(arg.expression)) {
|
|
1512
|
-
continue;
|
|
1513
|
-
}
|
|
1514
|
-
allArgsAreClean = false;
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
1517
|
-
if (allArgsAreClean && call.arguments.length > 0) {
|
|
1518
|
-
return false;
|
|
1519
|
-
}
|
|
1520
|
-
}
|
|
1521
|
-
return true;
|
|
1522
|
-
});
|
|
1523
|
-
}
|
|
1524
|
-
function isStringLiteralExpression(expr) {
|
|
1525
|
-
const trimmed = expr.trim();
|
|
1526
|
-
return (trimmed.startsWith('"') && trimmed.endsWith('"')) ||
|
|
1527
|
-
(trimmed.startsWith("'") && trimmed.endsWith("'"));
|
|
1528
|
-
}
|
|
1529
|
-
function filterSanitizedSinks(sinks, sanitizers, calls) {
|
|
1530
|
-
if (!sanitizers || sanitizers.length === 0) {
|
|
1531
|
-
return sinks;
|
|
1532
|
-
}
|
|
1533
|
-
const sanitizersByLine = new Map();
|
|
1534
|
-
for (const san of sanitizers) {
|
|
1535
|
-
const existing = sanitizersByLine.get(san.line) ?? [];
|
|
1536
|
-
existing.push(san);
|
|
1537
|
-
sanitizersByLine.set(san.line, existing);
|
|
1538
|
-
}
|
|
1539
|
-
const callsByLine = new Map();
|
|
1540
|
-
for (const call of calls) {
|
|
1541
|
-
const existing = callsByLine.get(call.location.line) ?? [];
|
|
1542
|
-
existing.push(call);
|
|
1543
|
-
callsByLine.set(call.location.line, existing);
|
|
1544
|
-
}
|
|
1545
|
-
return sinks.filter(sink => {
|
|
1546
|
-
const lineSanitizers = sanitizersByLine.get(sink.line);
|
|
1547
|
-
if (!lineSanitizers || lineSanitizers.length === 0) {
|
|
1548
|
-
return true;
|
|
1549
|
-
}
|
|
1550
|
-
for (const san of lineSanitizers) {
|
|
1551
|
-
if (san.sanitizes.includes(sink.type)) {
|
|
1552
|
-
const lineCalls = callsByLine.get(sink.line) ?? [];
|
|
1553
|
-
for (const call of lineCalls) {
|
|
1554
|
-
for (const arg of call.arguments) {
|
|
1555
|
-
const expr = arg.expression || '';
|
|
1556
|
-
const sanMethodMatch = san.method.match(/(?:(\w+)\.)?(\w+)\(\)/);
|
|
1557
|
-
if (sanMethodMatch) {
|
|
1558
|
-
const sanMethodName = sanMethodMatch[2];
|
|
1559
|
-
const sanClassName = sanMethodMatch[1];
|
|
1560
|
-
if (sanClassName) {
|
|
1561
|
-
if (expr.includes(`${sanClassName}.${sanMethodName}(`)) {
|
|
1562
|
-
return false;
|
|
1563
|
-
}
|
|
1564
|
-
}
|
|
1565
|
-
else if (expr.includes(`${sanMethodName}(`)) {
|
|
1566
|
-
return false;
|
|
1567
|
-
}
|
|
1568
|
-
}
|
|
1569
|
-
}
|
|
1570
|
-
}
|
|
1571
|
-
}
|
|
1572
|
-
}
|
|
1573
|
-
return true;
|
|
1574
|
-
});
|
|
1575
|
-
}
|
|
1576
|
-
function detectCollectionFlows(calls, sources, sinks, taintedVars, unreachableLines) {
|
|
1577
|
-
const flows = [];
|
|
1578
|
-
const callsByLine = new Map();
|
|
1579
|
-
for (const call of calls) {
|
|
1580
|
-
const existing = callsByLine.get(call.location.line) ?? [];
|
|
1581
|
-
existing.push(call);
|
|
1582
|
-
callsByLine.set(call.location.line, existing);
|
|
1583
|
-
}
|
|
1584
|
-
for (const sink of sinks) {
|
|
1585
|
-
if (unreachableLines.has(sink.line)) {
|
|
1586
|
-
continue;
|
|
1587
|
-
}
|
|
1588
|
-
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
1589
|
-
for (const call of callsAtSink) {
|
|
1590
|
-
for (const arg of call.arguments) {
|
|
1591
|
-
if (arg.variable) {
|
|
1592
|
-
const varName = arg.variable;
|
|
1593
|
-
const scopedName = call.in_method ? `${call.in_method}:${varName}` : varName;
|
|
1594
|
-
if (taintedVars.has(varName) || taintedVars.has(scopedName)) {
|
|
1595
|
-
const source = sources[0];
|
|
1596
|
-
if (source) {
|
|
1597
|
-
flows.push({
|
|
1598
|
-
source_line: source.line,
|
|
1599
|
-
sink_line: sink.line,
|
|
1600
|
-
source_type: source.type,
|
|
1601
|
-
sink_type: sink.type,
|
|
1602
|
-
path: [
|
|
1603
|
-
{ variable: varName, line: source.line, type: 'source' },
|
|
1604
|
-
{ variable: varName, line: sink.line, type: 'sink' },
|
|
1605
|
-
],
|
|
1606
|
-
confidence: 0.8,
|
|
1607
|
-
sanitized: false,
|
|
1608
|
-
});
|
|
1609
|
-
}
|
|
1610
|
-
}
|
|
1611
|
-
}
|
|
1612
|
-
if (arg.expression) {
|
|
1613
|
-
const expr = arg.expression;
|
|
1614
|
-
const collectionMethods = ['getLast', 'getFirst', 'get', 'next', 'poll', 'peek', 'toArray'];
|
|
1615
|
-
for (const method of collectionMethods) {
|
|
1616
|
-
const methodPattern = new RegExp(`(\\w+)\\.${method}\\(`);
|
|
1617
|
-
const match = expr.match(methodPattern);
|
|
1618
|
-
if (match) {
|
|
1619
|
-
const collectionVar = match[1];
|
|
1620
|
-
const scopedCollection = call.in_method ? `${call.in_method}:${collectionVar}` : collectionVar;
|
|
1621
|
-
if (taintedVars.has(collectionVar) || taintedVars.has(scopedCollection)) {
|
|
1622
|
-
const source = sources[0];
|
|
1623
|
-
if (source) {
|
|
1624
|
-
flows.push({
|
|
1625
|
-
source_line: source.line,
|
|
1626
|
-
sink_line: sink.line,
|
|
1627
|
-
source_type: source.type,
|
|
1628
|
-
sink_type: sink.type,
|
|
1629
|
-
path: [
|
|
1630
|
-
{ variable: collectionVar, line: source.line, type: 'source' },
|
|
1631
|
-
{ variable: collectionVar, line: sink.line, type: 'sink' },
|
|
1632
|
-
],
|
|
1633
|
-
confidence: 0.75,
|
|
1634
|
-
sanitized: false,
|
|
1635
|
-
});
|
|
1636
|
-
}
|
|
1637
|
-
}
|
|
1638
|
-
}
|
|
1639
|
-
}
|
|
1640
|
-
}
|
|
1641
|
-
}
|
|
1642
|
-
}
|
|
1643
|
-
}
|
|
1644
|
-
return flows;
|
|
1645
|
-
}
|
|
1646
|
-
function detectArrayElementFlows(calls, sources, sinks, taintedArrayElements, unreachableLines) {
|
|
1647
|
-
const flows = [];
|
|
1648
|
-
const callsByLine = new Map();
|
|
1649
|
-
for (const call of calls) {
|
|
1650
|
-
const existing = callsByLine.get(call.location.line) ?? [];
|
|
1651
|
-
existing.push(call);
|
|
1652
|
-
callsByLine.set(call.location.line, existing);
|
|
1653
|
-
}
|
|
1654
|
-
for (const sink of sinks) {
|
|
1655
|
-
if (unreachableLines.has(sink.line)) {
|
|
1656
|
-
continue;
|
|
1657
|
-
}
|
|
1658
|
-
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
1659
|
-
for (const call of callsAtSink) {
|
|
1660
|
-
for (const arg of call.arguments) {
|
|
1661
|
-
const arrayAccessMatch = arg.expression?.match(/^(\w+)\[(\d+|[^[\]]+)\]$/);
|
|
1662
|
-
if (arrayAccessMatch) {
|
|
1663
|
-
const arrayName = arrayAccessMatch[1];
|
|
1664
|
-
const indexStr = arrayAccessMatch[2];
|
|
1665
|
-
const taintedIndices = taintedArrayElements.get(arrayName);
|
|
1666
|
-
if (taintedIndices) {
|
|
1667
|
-
const isTainted = taintedIndices.has(indexStr) || taintedIndices.has('*');
|
|
1668
|
-
if (isTainted) {
|
|
1669
|
-
const source = sources[0];
|
|
1670
|
-
if (source) {
|
|
1671
|
-
flows.push({
|
|
1672
|
-
source_line: source.line,
|
|
1673
|
-
sink_line: sink.line,
|
|
1674
|
-
source_type: source.type,
|
|
1675
|
-
sink_type: sink.type,
|
|
1676
|
-
path: [
|
|
1677
|
-
{ variable: arrayName, line: source.line, type: 'source' },
|
|
1678
|
-
{ variable: `${arrayName}[${indexStr}]`, line: sink.line, type: 'sink' },
|
|
1679
|
-
],
|
|
1680
|
-
confidence: 0.85,
|
|
1681
|
-
sanitized: false,
|
|
1682
|
-
});
|
|
1683
|
-
}
|
|
1684
|
-
}
|
|
1685
|
-
}
|
|
1686
|
-
}
|
|
1687
|
-
}
|
|
1688
|
-
}
|
|
1689
|
-
}
|
|
1690
|
-
return flows;
|
|
1691
|
-
}
|
|
1692
|
-
/**
|
|
1693
|
-
* Detect direct method parameter to sink flows.
|
|
1694
|
-
* This handles cases where a tainted method parameter is directly used in a sink
|
|
1695
|
-
* without intermediate variable assignments (which DFG chains might miss).
|
|
1696
|
-
*/
|
|
1697
|
-
function detectParameterSinkFlows(types, calls, sources, sinks, unreachableLines) {
|
|
1698
|
-
const flows = [];
|
|
1699
|
-
// Build a map of method name -> parameter sources
|
|
1700
|
-
const paramSourcesByMethod = new Map();
|
|
1701
|
-
for (const source of sources) {
|
|
1702
|
-
if (source.type === 'interprocedural_param') {
|
|
1703
|
-
// Extract method and param name from location like "String paramName in methodName"
|
|
1704
|
-
const match = source.location.match(/(\S+)\s+(\S+)\s+in\s+(\S+)/);
|
|
1705
|
-
if (match) {
|
|
1706
|
-
const paramName = match[2];
|
|
1707
|
-
const methodName = match[3];
|
|
1708
|
-
let methodParams = paramSourcesByMethod.get(methodName);
|
|
1709
|
-
if (!methodParams) {
|
|
1710
|
-
methodParams = new Map();
|
|
1711
|
-
paramSourcesByMethod.set(methodName, methodParams);
|
|
1712
|
-
}
|
|
1713
|
-
methodParams.set(paramName, source);
|
|
1714
|
-
}
|
|
1715
|
-
}
|
|
1716
|
-
}
|
|
1717
|
-
if (paramSourcesByMethod.size === 0) {
|
|
1718
|
-
return flows;
|
|
1719
|
-
}
|
|
1720
|
-
// Build map of calls by line
|
|
1721
|
-
const callsByLine = new Map();
|
|
1722
|
-
for (const call of calls) {
|
|
1723
|
-
const existing = callsByLine.get(call.location.line) ?? [];
|
|
1724
|
-
existing.push(call);
|
|
1725
|
-
callsByLine.set(call.location.line, existing);
|
|
1726
|
-
}
|
|
1727
|
-
// For each sink, check if it uses a tainted parameter directly
|
|
1728
|
-
for (const sink of sinks) {
|
|
1729
|
-
if (unreachableLines.has(sink.line)) {
|
|
1730
|
-
continue;
|
|
1731
|
-
}
|
|
1732
|
-
const callsAtSink = callsByLine.get(sink.line) ?? [];
|
|
1733
|
-
for (const call of callsAtSink) {
|
|
1734
|
-
const methodName = call.in_method;
|
|
1735
|
-
if (!methodName)
|
|
1736
|
-
continue;
|
|
1737
|
-
const methodParamSources = paramSourcesByMethod.get(methodName);
|
|
1738
|
-
if (!methodParamSources)
|
|
1739
|
-
continue;
|
|
1740
|
-
// Check if any argument is a tainted parameter
|
|
1741
|
-
for (const arg of call.arguments) {
|
|
1742
|
-
if (arg.variable) {
|
|
1743
|
-
const paramSource = methodParamSources.get(arg.variable);
|
|
1744
|
-
if (paramSource) {
|
|
1745
|
-
// Found a direct parameter-to-sink flow
|
|
1746
|
-
// Check if we already have this flow
|
|
1747
|
-
const exists = flows.some(f => f.source_line === paramSource.line && f.sink_line === sink.line);
|
|
1748
|
-
if (!exists) {
|
|
1749
|
-
flows.push({
|
|
1750
|
-
source_line: paramSource.line,
|
|
1751
|
-
sink_line: sink.line,
|
|
1752
|
-
source_type: paramSource.type,
|
|
1753
|
-
sink_type: sink.type,
|
|
1754
|
-
path: [
|
|
1755
|
-
{ variable: arg.variable, line: paramSource.line, type: 'source' },
|
|
1756
|
-
{ variable: arg.variable, line: sink.line, type: 'sink' },
|
|
1757
|
-
],
|
|
1758
|
-
confidence: 0.75, // Lower confidence for interprocedural
|
|
1759
|
-
sanitized: false,
|
|
1760
|
-
});
|
|
1761
|
-
}
|
|
1762
|
-
}
|
|
1763
|
-
}
|
|
1764
|
-
}
|
|
1765
|
-
}
|
|
1766
|
-
}
|
|
1767
|
-
return flows;
|
|
1768
|
-
}
|
|
445
|
+
// ---------------------------------------------------------------------------
|
|
446
|
+
// Lifecycle
|
|
447
|
+
// ---------------------------------------------------------------------------
|
|
1769
448
|
/**
|
|
1770
449
|
* Check if the analyzer is initialized.
|
|
1771
450
|
*/
|
|
@@ -1778,4 +457,83 @@ export function isAnalyzerInitialized() {
|
|
|
1778
457
|
export function resetAnalyzer() {
|
|
1779
458
|
initialized = false;
|
|
1780
459
|
}
|
|
460
|
+
// ---------------------------------------------------------------------------
|
|
461
|
+
// Project-level analysis (multi-file)
|
|
462
|
+
// ---------------------------------------------------------------------------
|
|
463
|
+
/**
|
|
464
|
+
* Analyze a set of files as a project, finding cross-file taint flows.
|
|
465
|
+
*
|
|
466
|
+
* Runs single-file `analyze()` on each file in order, then uses
|
|
467
|
+
* `ProjectGraph` + `CrossFileResolver` to surface flows that cross file
|
|
468
|
+
* boundaries. The per-file `CircleIR` outputs are preserved unchanged in
|
|
469
|
+
* `ProjectAnalysis.files`.
|
|
470
|
+
*
|
|
471
|
+
* `findings` is always empty — it requires LLM enrichment which is out of
|
|
472
|
+
* scope for this library (see CLAUDE.md and SPEC.md section 11).
|
|
473
|
+
*/
|
|
474
|
+
export async function analyzeProject(files, options = {}) {
|
|
475
|
+
const fileAnalyses = [];
|
|
476
|
+
const projectGraph = new ProjectGraph();
|
|
477
|
+
const sourceLinesByFile = new Map();
|
|
478
|
+
// 1. Per-file analysis
|
|
479
|
+
for (const { code, filePath, language } of files) {
|
|
480
|
+
const ir = await analyze(code, filePath, language, options);
|
|
481
|
+
fileAnalyses.push({ file: filePath, analysis: ir });
|
|
482
|
+
projectGraph.addFile(filePath, new CodeGraph(ir));
|
|
483
|
+
sourceLinesByFile.set(filePath, code.split('\n'));
|
|
484
|
+
}
|
|
485
|
+
// 2. Cross-file analysis
|
|
486
|
+
const crossFileResult = new CrossFilePass().run(projectGraph, sourceLinesByFile);
|
|
487
|
+
// 3. Import-graph analysis (circular deps + orphan modules)
|
|
488
|
+
const importGraph = new ImportGraph(projectGraph);
|
|
489
|
+
const circularFindings = new CircularDependencyPass().run(projectGraph, importGraph);
|
|
490
|
+
const orphanFindings = new OrphanModulePass().run(projectGraph, importGraph);
|
|
491
|
+
// Attach project-level findings to the appropriate per-file CircleIR.findings
|
|
492
|
+
for (const finding of [...circularFindings, ...orphanFindings]) {
|
|
493
|
+
const fa = fileAnalyses.find(f => f.file === finding.file);
|
|
494
|
+
if (fa) {
|
|
495
|
+
fa.analysis.findings = [...(fa.analysis.findings ?? []), finding];
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
// 4. Assemble ProjectMeta
|
|
499
|
+
const filePaths = files.map(f => f.filePath);
|
|
500
|
+
const totalLoc = fileAnalyses.reduce((sum, f) => sum + (f.analysis.meta.loc ?? 0), 0);
|
|
501
|
+
const meta = {
|
|
502
|
+
name: deriveProjectName(filePaths),
|
|
503
|
+
root: deriveProjectRoot(filePaths),
|
|
504
|
+
language: files[0]?.language ?? 'java',
|
|
505
|
+
total_files: files.length,
|
|
506
|
+
total_loc: totalLoc,
|
|
507
|
+
analyzed_at: new Date().toISOString(),
|
|
508
|
+
};
|
|
509
|
+
return {
|
|
510
|
+
meta,
|
|
511
|
+
files: fileAnalyses,
|
|
512
|
+
type_hierarchy: crossFileResult.typeHierarchy,
|
|
513
|
+
cross_file_calls: crossFileResult.crossFileCalls,
|
|
514
|
+
taint_paths: crossFileResult.taintPaths,
|
|
515
|
+
findings: [],
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
/** Derive a project name from the common root directory of the file paths. */
|
|
519
|
+
function deriveProjectName(paths) {
|
|
520
|
+
if (paths.length === 0)
|
|
521
|
+
return 'unknown';
|
|
522
|
+
const root = deriveProjectRoot(paths);
|
|
523
|
+
return root.split('/').filter(Boolean).pop() ?? 'unknown';
|
|
524
|
+
}
|
|
525
|
+
/** Derive the common ancestor directory from a list of file paths. */
|
|
526
|
+
function deriveProjectRoot(paths) {
|
|
527
|
+
if (paths.length === 0)
|
|
528
|
+
return '/';
|
|
529
|
+
const segments = paths[0].split('/');
|
|
530
|
+
let common = segments.slice(0, -1); // strip filename
|
|
531
|
+
for (const p of paths.slice(1)) {
|
|
532
|
+
const segs = p.split('/');
|
|
533
|
+
common = common.filter((seg, i) => segs[i] === seg);
|
|
534
|
+
}
|
|
535
|
+
return common.join('/') || '/';
|
|
536
|
+
}
|
|
537
|
+
// Re-export isFalsePositive for consumers that use it directly
|
|
538
|
+
export { isFalsePositive };
|
|
1781
539
|
//# sourceMappingURL=analyzer.js.map
|