circle-ir 3.4.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/configs/sinks/python.json +9 -0
- package/configs/sources/python.json +57 -17
- package/dist/analysis/taint-matcher.js +43 -0
- package/dist/analysis/taint-matcher.js.map +1 -1
- package/dist/analyzer.js +424 -2
- package/dist/analyzer.js.map +1 -1
- package/dist/browser/circle-ir.js +693 -1
- package/dist/core/circle-ir-core.cjs +122 -0
- package/dist/core/circle-ir-core.js +122 -0
- package/dist/core/extractors/calls.js +119 -0
- package/dist/core/extractors/calls.js.map +1 -1
- package/dist/core/parser.d.ts +1 -1
- package/dist/languages/plugins/bash.d.ts +51 -0
- package/dist/languages/plugins/bash.js +243 -0
- package/dist/languages/plugins/bash.js.map +1 -0
- package/dist/languages/plugins/index.d.ts +1 -0
- package/dist/languages/plugins/index.js +3 -0
- package/dist/languages/plugins/index.js.map +1 -1
- package/dist/languages/types.d.ts +1 -1
- package/dist/types/index.d.ts +1 -1
- package/dist/wasm/tree-sitter-bash.wasm +0 -0
- package/package.json +2 -1
- package/wasm/tree-sitter-bash.wasm +0 -0
package/dist/analyzer.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { initParser, parse, extractMeta, extractTypes, extractCalls, extractImports, extractExports, buildCFG, buildDFG, collectAllNodes, } from './core/index.js';
|
|
8
8
|
import { analyzeTaint, getDefaultConfig, detectUnresolved, propagateTaint, analyzeInterprocedural, findTaintBridges, analyzeConstantPropagation, isFalsePositive, isCorrelatedPredicateFP } from './analysis/index.js';
|
|
9
|
-
import { registerBuiltinPlugins } from './languages/index.js';
|
|
9
|
+
import { registerBuiltinPlugins, getLanguagePlugin } from './languages/index.js';
|
|
10
10
|
import { logger } from './utils/logger.js';
|
|
11
11
|
/**
|
|
12
12
|
* Find getter methods that return tainted fields from constructor assignments.
|
|
@@ -119,6 +119,34 @@ const JS_TAINTED_PATTERNS = [
|
|
|
119
119
|
{ pattern: /\bdocument\.querySelector\b/, type: 'dom_input' },
|
|
120
120
|
{ pattern: /\.value\b/, type: 'dom_input' },
|
|
121
121
|
];
|
|
122
|
+
/**
|
|
123
|
+
* Python/Flask/Django tainted request access patterns.
|
|
124
|
+
* Used to detect sources in assignments like: user_id = request.args.get('id')
|
|
125
|
+
* Also covers subscript access: user_id = request.args['id']
|
|
126
|
+
*/
|
|
127
|
+
const PYTHON_TAINTED_PATTERNS = [
|
|
128
|
+
{ pattern: /\brequest\.args\b/, type: 'http_param' },
|
|
129
|
+
{ pattern: /\brequest\.form\b/, type: 'http_body' },
|
|
130
|
+
{ pattern: /\brequest\.json\b/, type: 'http_body' },
|
|
131
|
+
{ pattern: /\brequest\.data\b/, type: 'http_body' },
|
|
132
|
+
{ pattern: /\brequest\.files?\b/, type: 'file_input' },
|
|
133
|
+
{ pattern: /\brequest\.headers?\b/, type: 'http_header' },
|
|
134
|
+
{ pattern: /\brequest\.cookies\b/, type: 'http_cookie' },
|
|
135
|
+
{ pattern: /\brequest\.GET\b/, type: 'http_param' },
|
|
136
|
+
{ pattern: /\brequest\.POST\b/, type: 'http_body' },
|
|
137
|
+
{ pattern: /\brequest\.META\b/, type: 'http_header' },
|
|
138
|
+
{ pattern: /\brequest\.FILES\b/, type: 'file_input' },
|
|
139
|
+
{ pattern: /\brequest\.query_params\b/, type: 'http_param' },
|
|
140
|
+
{ pattern: /\brequest\.path_params\b/, type: 'http_param' },
|
|
141
|
+
// Flask raw query/body strings
|
|
142
|
+
{ pattern: /\brequest\.query_string\b/, type: 'http_param' },
|
|
143
|
+
{ pattern: /\brequest\.get_data\s*\(/, type: 'http_body' },
|
|
144
|
+
// Request wrapper helper methods (common in OWASP-style benchmarks and real wrappers)
|
|
145
|
+
{ pattern: /\bget_form_parameter\s*\(/, type: 'http_body' },
|
|
146
|
+
{ pattern: /\bget_query_parameter\s*\(/, type: 'http_param' },
|
|
147
|
+
{ pattern: /\bget_header_value\s*\(/, type: 'http_header' },
|
|
148
|
+
{ pattern: /\bget_cookie_value\s*\(/, type: 'http_cookie' },
|
|
149
|
+
];
|
|
122
150
|
/**
|
|
123
151
|
* Find JavaScript taint sources from variable assignments.
|
|
124
152
|
* Detects patterns like: var userId = req.query.id
|
|
@@ -161,6 +189,227 @@ function findJavaScriptAssignmentSources(sourceCode, language) {
|
|
|
161
189
|
}
|
|
162
190
|
return sources;
|
|
163
191
|
}
|
|
192
|
+
/**
|
|
193
|
+
* Find Python taint sources from variable assignments and subscript access.
|
|
194
|
+
* Detects patterns like: user_id = request.args.get('id') or request.args['id']
|
|
195
|
+
*/
|
|
196
|
+
function findPythonAssignmentSources(sourceCode, language) {
|
|
197
|
+
const sources = [];
|
|
198
|
+
if (language !== 'python') {
|
|
199
|
+
return sources;
|
|
200
|
+
}
|
|
201
|
+
const lines = sourceCode.split('\n');
|
|
202
|
+
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
|
|
203
|
+
const line = lines[lineNum];
|
|
204
|
+
const lineNumber = lineNum + 1;
|
|
205
|
+
// Skip comment lines
|
|
206
|
+
if (line.trimStart().startsWith('#'))
|
|
207
|
+
continue;
|
|
208
|
+
// Look for assignments: x = ... or x: type = ...
|
|
209
|
+
const assignmentMatch = line.match(/^(\s*\w[\w.]*)\s*(?::\s*\w[\w\[\], .]*)?\s*=\s*(.+)/);
|
|
210
|
+
if (assignmentMatch) {
|
|
211
|
+
const rhs = assignmentMatch[2];
|
|
212
|
+
for (const { pattern, type } of PYTHON_TAINTED_PATTERNS) {
|
|
213
|
+
if (pattern.test(rhs)) {
|
|
214
|
+
const varMatch = line.match(/^\s*(\w+)\s*/);
|
|
215
|
+
const varName = varMatch ? varMatch[1] : 'unknown';
|
|
216
|
+
const alreadyExists = sources.some(s => s.line === lineNumber && s.type === type);
|
|
217
|
+
if (!alreadyExists) {
|
|
218
|
+
sources.push({
|
|
219
|
+
type,
|
|
220
|
+
location: `${varName} = ${rhs.trim().substring(0, 50)}${rhs.length > 50 ? '...' : ''}`,
|
|
221
|
+
severity: 'high',
|
|
222
|
+
line: lineNumber,
|
|
223
|
+
confidence: 0.95,
|
|
224
|
+
variable: varName,
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
return sources;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Build a map of tainted variable names → source line via simple forward
|
|
236
|
+
* line-by-line taint propagation for Python.
|
|
237
|
+
*
|
|
238
|
+
* Seeds from PYTHON_TAINTED_PATTERNS; propagates through assignments where the
|
|
239
|
+
* RHS contains a tainted variable. Uses per-key container taint to distinguish
|
|
240
|
+
* map['tainted_key'] from map['safe_key'] and conf.get(s,tainted_k) vs conf.get(s,safe_k).
|
|
241
|
+
*/
|
|
242
|
+
function buildPythonTaintedVars(sourceCode) {
|
|
243
|
+
const tainted = new Map();
|
|
244
|
+
// Per-key container taint: "map['key']" or "conf['section']['key']" → line number
|
|
245
|
+
const containerTainted = new Map();
|
|
246
|
+
const lines = sourceCode.split('\n');
|
|
247
|
+
for (let i = 0; i < lines.length; i++) {
|
|
248
|
+
const line = lines[i];
|
|
249
|
+
if (line.trimStart().startsWith('#'))
|
|
250
|
+
continue;
|
|
251
|
+
// Subscript assignment: container['key'] = value
|
|
252
|
+
// Tracks taint per-key so map['keyA']='safe' and map['keyB']=param are distinguished.
|
|
253
|
+
const subscriptAssign = line.match(/^\s*(\w+)\[(['"])([^'"]+)\2\]\s*=\s*(.+)$/);
|
|
254
|
+
if (subscriptAssign) {
|
|
255
|
+
const [, container, , key, rhs2] = subscriptAssign;
|
|
256
|
+
const isTaintedRhs = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(rhs2));
|
|
257
|
+
if (isTaintedRhs) {
|
|
258
|
+
containerTainted.set(`${container}['${key}']`, i + 1);
|
|
259
|
+
}
|
|
260
|
+
continue; // subscript assignments don't match simple variable regex below
|
|
261
|
+
}
|
|
262
|
+
// ConfigParser set: obj.set('section', 'key', value)
|
|
263
|
+
// Tracks per (section, key) so conf.get('s','keyA') and conf.get('s','keyB') are distinct.
|
|
264
|
+
const setCallMatch = line.match(/^\s*(\w+)\.set\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*,\s*(.+?)\s*\)$/);
|
|
265
|
+
if (setCallMatch) {
|
|
266
|
+
const [, obj, , section, , key, rhs2] = setCallMatch;
|
|
267
|
+
const isTaintedRhs = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(rhs2));
|
|
268
|
+
if (isTaintedRhs) {
|
|
269
|
+
containerTainted.set(`${obj}['${section}']['${key}']`, i + 1);
|
|
270
|
+
}
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
// Augmented assignment: var += expr — taint if either side is tainted
|
|
274
|
+
const augAssign = line.match(/^\s*(\w+)\s*\+=\s*(.+)$/);
|
|
275
|
+
if (augAssign) {
|
|
276
|
+
const [, augLhs, augRhs] = augAssign;
|
|
277
|
+
const rhsTainted = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(augRhs));
|
|
278
|
+
if (rhsTainted || tainted.has(augLhs)) {
|
|
279
|
+
tainted.set(augLhs, tainted.get(augLhs) ?? (i + 1));
|
|
280
|
+
}
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
// For loop: for var in tainted_source — seed loop variable as tainted
|
|
284
|
+
const forLoopMatch = line.match(/^\s*for\s+(\w+)\s+in\s+(.+?)(?:\s*:\s*)?$/);
|
|
285
|
+
if (forLoopMatch) {
|
|
286
|
+
const [, iterVar, iterExpr] = forLoopMatch;
|
|
287
|
+
const isDirectSource = PYTHON_TAINTED_PATTERNS.some(p => p.pattern.test(iterExpr));
|
|
288
|
+
const isPropagated = [...tainted.keys()].some(v => new RegExp(`\\b${v}\\b`).test(iterExpr));
|
|
289
|
+
if (isDirectSource || isPropagated) {
|
|
290
|
+
tainted.set(iterVar, i + 1);
|
|
291
|
+
}
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
// Regular assignment: var = expr
|
|
295
|
+
const assignMatch = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
296
|
+
if (!assignMatch)
|
|
297
|
+
continue;
|
|
298
|
+
const [, lhs, rhs] = assignMatch;
|
|
299
|
+
const isDirectSource = PYTHON_TAINTED_PATTERNS.some(p => p.pattern.test(rhs));
|
|
300
|
+
let propagatedFrom;
|
|
301
|
+
// Per-key dict access: bar = container['key']
|
|
302
|
+
const dictAccessMatch = rhs.trim().match(/^(\w+)\[(['"])([^'"]+)\2\]$/);
|
|
303
|
+
if (dictAccessMatch) {
|
|
304
|
+
const [, container, , key] = dictAccessMatch;
|
|
305
|
+
if (containerTainted.has(`${container}['${key}']`)) {
|
|
306
|
+
propagatedFrom = `${container}['${key}']`;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
// Per-key configparser get: bar = conf.get('section', 'key')
|
|
310
|
+
if (!propagatedFrom) {
|
|
311
|
+
const confGetMatch = rhs.trim().match(/^(\w+)\.get\s*\(\s*(['"])([^'"]+)\2\s*,\s*(['"])([^'"]+)\4\s*\)$/);
|
|
312
|
+
if (confGetMatch) {
|
|
313
|
+
const [, obj, , section, , key] = confGetMatch;
|
|
314
|
+
if (containerTainted.has(`${obj}['${section}']['${key}']`)) {
|
|
315
|
+
propagatedFrom = `${obj}['${section}']['${key}']`;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// Standard variable propagation (skip os.environ/os.getenv — safe env reads)
|
|
320
|
+
if (!propagatedFrom) {
|
|
321
|
+
const isSafeEnvRead = /\bos\.environ\.get\s*\(/.test(rhs) || /\bos\.getenv\s*\(/.test(rhs);
|
|
322
|
+
if (!isSafeEnvRead) {
|
|
323
|
+
propagatedFrom = [...tainted.keys()].find(v => new RegExp(`\\b${v}\\b`).test(rhs));
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
if (isDirectSource) {
|
|
327
|
+
tainted.set(lhs, i + 1);
|
|
328
|
+
}
|
|
329
|
+
else if (propagatedFrom !== undefined) {
|
|
330
|
+
tainted.set(lhs, i + 1);
|
|
331
|
+
}
|
|
332
|
+
else if (tainted.has(lhs)) {
|
|
333
|
+
// Variable overwritten — preserve taint for null-guard patterns like:
|
|
334
|
+
// if not param:
|
|
335
|
+
// param = ""
|
|
336
|
+
const prevNonBlank = lines.slice(0, i).reverse().find(l => l.trim() && !l.trimStart().startsWith('#'));
|
|
337
|
+
const isNullGuard = prevNonBlank !== undefined && (new RegExp(`^\\s*if\\s+not\\s+${lhs}\\s*:`).test(prevNonBlank) ||
|
|
338
|
+
new RegExp(`^\\s*if\\s+${lhs}\\s+is\\s+None\\s*:`).test(prevNonBlank));
|
|
339
|
+
if (!isNullGuard) {
|
|
340
|
+
tainted.delete(lhs);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return tainted;
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Detect Python apostrophe-check sanitizer guards, e.g.:
|
|
348
|
+
* if "'" in bar:
|
|
349
|
+
* return # or raise / abort
|
|
350
|
+
* Returns the set of variable names that are guarded this way.
|
|
351
|
+
*/
|
|
352
|
+
function findPythonQuoteSanitizedVars(sourceCode) {
|
|
353
|
+
const sanitized = new Set();
|
|
354
|
+
const lines = sourceCode.split('\n');
|
|
355
|
+
for (let i = 0; i < lines.length - 1; i++) {
|
|
356
|
+
// Match any apostrophe/quote check: if "'" in var:, if '\'' in var:, if '"' in var:
|
|
357
|
+
// Uses full quoted-string pattern to handle Python's various literal forms.
|
|
358
|
+
const m = lines[i].match(/^\s*if\s+(?:'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*")\s+in\s+(\w+)\s*:/);
|
|
359
|
+
if (!m)
|
|
360
|
+
continue;
|
|
361
|
+
// Look ahead up to 5 lines for a return/raise/abort/continue/break
|
|
362
|
+
// The guard body may be multi-line (e.g. RESPONSE += (...) \n return).
|
|
363
|
+
// Stop early if we encounter a line at the same or lesser indentation as the if (block exit).
|
|
364
|
+
const ifIndent = (lines[i].match(/^(\s*)/) ?? ['', ''])[1].length;
|
|
365
|
+
let foundExit = false;
|
|
366
|
+
for (let j = i + 1; j <= Math.min(i + 5, lines.length - 1); j++) {
|
|
367
|
+
const jLine = lines[j] ?? '';
|
|
368
|
+
if (!jLine.trim())
|
|
369
|
+
continue; // skip blank lines
|
|
370
|
+
const jIndent = (jLine.match(/^(\s*)/) ?? ['', ''])[1].length;
|
|
371
|
+
if (jIndent <= ifIndent)
|
|
372
|
+
break; // left the if-block
|
|
373
|
+
if (/^(return|raise|abort|continue|break)\b/.test(jLine.trim())) {
|
|
374
|
+
foundExit = true;
|
|
375
|
+
break;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
if (foundExit) {
|
|
379
|
+
sanitized.add(m[1]);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
return sanitized;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Detect Python trust boundary violations:
|
|
386
|
+
* flask.session[key] = value (or session[key] = value)
|
|
387
|
+
* where key or value references a tainted variable.
|
|
388
|
+
*/
|
|
389
|
+
function findPythonTrustBoundaryViolations(sourceCode, language, taintedVars) {
|
|
390
|
+
if (language !== 'python' || taintedVars.size === 0)
|
|
391
|
+
return [];
|
|
392
|
+
const violations = [];
|
|
393
|
+
const lines = sourceCode.split('\n');
|
|
394
|
+
const SESSION_WRITE = /(?:flask\.)?session\[([^\]]+)\]\s*=\s*(.+)$/;
|
|
395
|
+
const taintedKeys = [...taintedVars.keys()];
|
|
396
|
+
const earliestSourceLine = Math.min(...[...taintedVars.values()]);
|
|
397
|
+
for (let i = 0; i < lines.length; i++) {
|
|
398
|
+
const line = lines[i];
|
|
399
|
+
if (line.trimStart().startsWith('#'))
|
|
400
|
+
continue;
|
|
401
|
+
const m = line.match(SESSION_WRITE);
|
|
402
|
+
if (!m)
|
|
403
|
+
continue;
|
|
404
|
+
const [, keyExpr, valueExpr] = m;
|
|
405
|
+
const keyTainted = taintedKeys.some(v => new RegExp(`\\b${v}\\b`).test(keyExpr));
|
|
406
|
+
const valueTainted = taintedKeys.some(v => new RegExp(`\\b${v}\\b`).test(valueExpr));
|
|
407
|
+
if (keyTainted || valueTainted) {
|
|
408
|
+
violations.push({ sourceLine: earliestSourceLine, sinkLine: i + 1 });
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
return violations;
|
|
412
|
+
}
|
|
164
413
|
/**
|
|
165
414
|
* Find DOM XSS sinks from property assignments in JavaScript.
|
|
166
415
|
* Detects patterns like: element.innerHTML = userInput
|
|
@@ -346,6 +595,19 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
346
595
|
'assignment_expression',
|
|
347
596
|
]);
|
|
348
597
|
}
|
|
598
|
+
else if (language === 'bash') {
|
|
599
|
+
nodeTypesToCollect = new Set([
|
|
600
|
+
// Bash AST nodes
|
|
601
|
+
'command',
|
|
602
|
+
'function_definition',
|
|
603
|
+
'variable_assignment',
|
|
604
|
+
'declaration_command',
|
|
605
|
+
'if_statement',
|
|
606
|
+
'for_statement',
|
|
607
|
+
'c_style_for_statement',
|
|
608
|
+
'while_statement',
|
|
609
|
+
]);
|
|
610
|
+
}
|
|
349
611
|
else {
|
|
350
612
|
nodeTypesToCollect = new Set([
|
|
351
613
|
// Java AST nodes
|
|
@@ -380,7 +642,44 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
380
642
|
}
|
|
381
643
|
// First, do a preliminary taint analysis to find inter-procedural parameter sources
|
|
382
644
|
// These need to be passed to constant propagation so it can track taint from method parameters
|
|
383
|
-
|
|
645
|
+
let baseConfig = options.taintConfig ?? getDefaultConfig();
|
|
646
|
+
// Merge language plugin built-in sources/sinks into the config.
|
|
647
|
+
// This handles languages (e.g. Bash) whose patterns are defined on the plugin
|
|
648
|
+
// rather than in YAML config files loaded by getDefaultConfig().
|
|
649
|
+
if (!options.taintConfig) {
|
|
650
|
+
const plugin = getLanguagePlugin(language);
|
|
651
|
+
if (plugin) {
|
|
652
|
+
const pluginSources = plugin.getBuiltinSources();
|
|
653
|
+
const pluginSinks = plugin.getBuiltinSinks();
|
|
654
|
+
if (pluginSources.length > 0 || pluginSinks.length > 0) {
|
|
655
|
+
baseConfig = {
|
|
656
|
+
...baseConfig,
|
|
657
|
+
sources: [
|
|
658
|
+
...baseConfig.sources,
|
|
659
|
+
...pluginSources.map(s => ({
|
|
660
|
+
method: s.method,
|
|
661
|
+
class: s.class,
|
|
662
|
+
annotation: s.annotation,
|
|
663
|
+
type: s.type,
|
|
664
|
+
severity: s.severity,
|
|
665
|
+
return_tainted: s.returnTainted ?? false,
|
|
666
|
+
})),
|
|
667
|
+
],
|
|
668
|
+
sinks: [
|
|
669
|
+
...baseConfig.sinks,
|
|
670
|
+
...pluginSinks.map(s => ({
|
|
671
|
+
method: s.method,
|
|
672
|
+
class: s.class,
|
|
673
|
+
type: s.type,
|
|
674
|
+
cwe: s.cwe,
|
|
675
|
+
severity: s.severity,
|
|
676
|
+
arg_positions: s.argPositions,
|
|
677
|
+
})),
|
|
678
|
+
],
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
384
683
|
const preliminaryTaint = analyzeTaint(calls, types, baseConfig);
|
|
385
684
|
// Extract inter-procedural parameter sources
|
|
386
685
|
const taintedParameters = [];
|
|
@@ -409,6 +708,9 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
409
708
|
// Add sources for JavaScript variable assignments with tainted patterns
|
|
410
709
|
const jsAssignmentSources = findJavaScriptAssignmentSources(code, language);
|
|
411
710
|
taint.sources.push(...jsAssignmentSources);
|
|
711
|
+
// Add sources for Python variable assignments with tainted request patterns
|
|
712
|
+
const pythonAssignmentSources = findPythonAssignmentSources(code, language);
|
|
713
|
+
taint.sources.push(...pythonAssignmentSources);
|
|
412
714
|
// Add sinks for JavaScript DOM XSS patterns (innerHTML, document.write, etc.)
|
|
413
715
|
const jsDOMSinks = findJavaScriptDOMSinks(code, language);
|
|
414
716
|
for (const domSink of jsDOMSinks) {
|
|
@@ -440,6 +742,64 @@ export async function analyze(code, filePath, language, options = {}) {
|
|
|
440
742
|
taint.sinks = filterCleanVariableSinks(taint.sinks, calls, constPropResult.tainted, constPropResult.symbols, dfg, constPropResult.sanitizedVars, constPropResult.synchronizedLines);
|
|
441
743
|
// Filter sinks that are wrapped by sanitizers on the same line
|
|
442
744
|
taint.sinks = filterSanitizedSinks(taint.sinks, taint.sanitizers ?? [], calls);
|
|
745
|
+
// Python: reduce XPath false-positives using forward taint propagation +
|
|
746
|
+
// apostrophe-guard sanitizer detection; also detect trust boundary violations
|
|
747
|
+
// (flask.session[key] = value) which are subscript assignments, not call nodes.
|
|
748
|
+
if (language === 'python') {
|
|
749
|
+
const pyTaintedVars = buildPythonTaintedVars(code);
|
|
750
|
+
const pySanitizedVars = findPythonQuoteSanitizedVars(code);
|
|
751
|
+
// Propagate sanitization: if bar is sanitized and query = f"...{bar}...", query is also sanitized
|
|
752
|
+
for (const line of code.split('\n')) {
|
|
753
|
+
const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
754
|
+
if (!am)
|
|
755
|
+
continue;
|
|
756
|
+
const [, lhs, rhs] = am;
|
|
757
|
+
if ([...pySanitizedVars].some(v => new RegExp(`\\b${v}\\b`).test(rhs))) {
|
|
758
|
+
pySanitizedVars.add(lhs);
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
// Detect inline .replace() sanitizers: query = f"...{bar.replace('\'', ''')}..."
|
|
762
|
+
// The tainted var appears with .replace() in the rhs — treat lhs as XPath-safe
|
|
763
|
+
for (const line of code.split('\n')) {
|
|
764
|
+
const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
765
|
+
if (!am)
|
|
766
|
+
continue;
|
|
767
|
+
const [, lhs, rhs] = am;
|
|
768
|
+
const hasReplaceOnTainted = [...pyTaintedVars.keys()].some(v => new RegExp(`\\b${v}\\.replace\\s*\\(`).test(rhs));
|
|
769
|
+
if (hasReplaceOnTainted)
|
|
770
|
+
pySanitizedVars.add(lhs);
|
|
771
|
+
}
|
|
772
|
+
const pySourceLines = code.split('\n');
|
|
773
|
+
// Filter XPath sinks: keep only if a tainted var is used at the sink line
|
|
774
|
+
taint.sinks = taint.sinks.filter(sink => {
|
|
775
|
+
if (sink.type !== 'xpath_injection')
|
|
776
|
+
return true;
|
|
777
|
+
const sinkLineText = pySourceLines[sink.line - 1] ?? '';
|
|
778
|
+
const taintedVarOnLine = [...pyTaintedVars.keys()].find(v => new RegExp(`\\b${v}\\b`).test(sinkLineText));
|
|
779
|
+
if (!taintedVarOnLine)
|
|
780
|
+
return false;
|
|
781
|
+
if (pySanitizedVars.has(taintedVarOnLine))
|
|
782
|
+
return false;
|
|
783
|
+
// Suppress parameterized XPath: root.xpath(query, name=bar) where bar is a keyword arg
|
|
784
|
+
if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText))
|
|
785
|
+
return false;
|
|
786
|
+
return true;
|
|
787
|
+
});
|
|
788
|
+
// Add trust boundary sinks from session subscript assignments
|
|
789
|
+
const trustViolations = findPythonTrustBoundaryViolations(code, language, pyTaintedVars);
|
|
790
|
+
for (const v of trustViolations) {
|
|
791
|
+
const alreadyExists = taint.sinks.some(s => s.line === v.sinkLine && s.type === 'trust_boundary');
|
|
792
|
+
if (!alreadyExists) {
|
|
793
|
+
taint.sinks.push({
|
|
794
|
+
type: 'trust_boundary',
|
|
795
|
+
cwe: 'CWE-501',
|
|
796
|
+
line: v.sinkLine,
|
|
797
|
+
location: `session write at line ${v.sinkLine}`,
|
|
798
|
+
confidence: 0.85,
|
|
799
|
+
});
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
443
803
|
// Propagate taint through dataflow to find verified flows
|
|
444
804
|
if (taint.sources.length > 0 && taint.sinks.length > 0) {
|
|
445
805
|
const propagationResult = propagateTaint(dfg, calls, taint.sources, taint.sinks, taint.sanitizers ?? []);
|
|
@@ -755,8 +1115,70 @@ export async function analyzeForAPI(code, filePath, language, options = {}) {
|
|
|
755
1115
|
filteredSinks = filterCleanVariableSinks(filteredSinks, calls, constPropResult.tainted, constPropResult.symbols, undefined, constPropResult.sanitizedVars, constPropResult.synchronizedLines);
|
|
756
1116
|
// Filter sinks wrapped by sanitizers on the same line
|
|
757
1117
|
filteredSinks = filterSanitizedSinks(filteredSinks, taint.sanitizers ?? [], calls);
|
|
1118
|
+
// Python: reduce XPath false-positives using forward taint propagation +
|
|
1119
|
+
// apostrophe-guard sanitizer detection.
|
|
1120
|
+
let pythonTaintedVars = new Map();
|
|
1121
|
+
if (language === 'python') {
|
|
1122
|
+
pythonTaintedVars = buildPythonTaintedVars(code);
|
|
1123
|
+
const pythonSanitizedVars = findPythonQuoteSanitizedVars(code);
|
|
1124
|
+
// Propagate sanitization: if bar is sanitized and query = f"...{bar}...", query is also sanitized
|
|
1125
|
+
for (const line of code.split('\n')) {
|
|
1126
|
+
const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
1127
|
+
if (!am)
|
|
1128
|
+
continue;
|
|
1129
|
+
const [, lhs, rhs] = am;
|
|
1130
|
+
if ([...pythonSanitizedVars].some(v => new RegExp(`\\b${v}\\b`).test(rhs))) {
|
|
1131
|
+
pythonSanitizedVars.add(lhs);
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
// Detect inline .replace() sanitizers: query = f"...{bar.replace('\'', ''')}..."
|
|
1135
|
+
for (const line of code.split('\n')) {
|
|
1136
|
+
const am = line.match(/^\s*(\w+)\s*=\s*(.+)$/);
|
|
1137
|
+
if (!am)
|
|
1138
|
+
continue;
|
|
1139
|
+
const [, lhs, rhs] = am;
|
|
1140
|
+
const hasReplaceOnTainted = [...pythonTaintedVars.keys()].some(v => new RegExp(`\\b${v}\\.replace\\s*\\(`).test(rhs));
|
|
1141
|
+
if (hasReplaceOnTainted)
|
|
1142
|
+
pythonSanitizedVars.add(lhs);
|
|
1143
|
+
}
|
|
1144
|
+
const sourceLines = code.split('\n');
|
|
1145
|
+
filteredSinks = filteredSinks.filter(sink => {
|
|
1146
|
+
if (sink.type !== 'xpath_injection')
|
|
1147
|
+
return true;
|
|
1148
|
+
// Keep XPath sink only if a tainted variable is used at the sink line
|
|
1149
|
+
const sinkLineText = sourceLines[sink.line - 1] ?? '';
|
|
1150
|
+
const taintedVarOnLine = [...pythonTaintedVars.keys()].find(v => new RegExp(`\\b${v}\\b`).test(sinkLineText));
|
|
1151
|
+
if (!taintedVarOnLine)
|
|
1152
|
+
return false;
|
|
1153
|
+
// Kill if the variable is protected by an apostrophe guard
|
|
1154
|
+
if (pythonSanitizedVars.has(taintedVarOnLine))
|
|
1155
|
+
return false;
|
|
1156
|
+
// Suppress parameterized XPath: root.xpath(query, name=bar) where bar is a keyword arg
|
|
1157
|
+
if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText))
|
|
1158
|
+
return false;
|
|
1159
|
+
return true;
|
|
1160
|
+
});
|
|
1161
|
+
}
|
|
758
1162
|
// Generate vulnerabilities from source-sink pairs
|
|
759
1163
|
const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
|
|
1164
|
+
// Python: detect trust boundary violations (flask.session[key] = taintedVal)
|
|
1165
|
+
if (language === 'python') {
|
|
1166
|
+
const trustViolations = findPythonTrustBoundaryViolations(code, language, pythonTaintedVars);
|
|
1167
|
+
for (const v of trustViolations) {
|
|
1168
|
+
// Avoid duplicate: only add if no existing vulnerability for same sink line
|
|
1169
|
+
const alreadyReported = vulnerabilities.some(existing => existing.sink.line === v.sinkLine && existing.type === 'trust_boundary');
|
|
1170
|
+
if (!alreadyReported) {
|
|
1171
|
+
vulnerabilities.push({
|
|
1172
|
+
type: 'trust_boundary',
|
|
1173
|
+
cwe: 'CWE-501',
|
|
1174
|
+
severity: 'medium',
|
|
1175
|
+
source: { line: v.sourceLine, type: 'http_param' },
|
|
1176
|
+
sink: { line: v.sinkLine, type: 'trust_boundary' },
|
|
1177
|
+
confidence: 0.85,
|
|
1178
|
+
});
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
760
1182
|
const analysisTime = performance.now() - analysisStart;
|
|
761
1183
|
const totalTime = performance.now() - startTime;
|
|
762
1184
|
return {
|