@ps-neko/nekowork 0.2.0-alpha.6 → 0.2.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +27 -8
- package/package.json +9 -3
- package/scripts/cli.js +18 -8
- package/scripts/core/git-mutation-guard.js +65 -0
- package/scripts/lib/acceptance-criteria.js +2 -9
- package/scripts/lib/ast/analyze.js +686 -0
- package/scripts/lib/ast/parse.js +131 -0
- package/scripts/lib/decision.js +4 -58
- package/scripts/lib/diff-parser.js +75 -4
- package/scripts/lib/risk-classifier.js +1 -0
- package/scripts/lib/rules/_helpers.js +90 -10
- package/scripts/lib/rules/ast-dataflow.js +103 -0
- package/scripts/lib/rules/auto-apply-commit-push.js +44 -0
- package/scripts/lib/rules/command-injection.js +72 -0
- package/scripts/lib/rules/cors-wildcard.js +84 -0
- package/scripts/lib/rules/eval-usage.js +102 -0
- package/scripts/lib/rules/hardcoded-credential.js +134 -2
- package/scripts/lib/rules/insecure-tls.js +86 -0
- package/scripts/lib/rules/package-lockfile-risk.js +23 -0
- package/scripts/lib/rules/secret-fallback.js +206 -24
- package/scripts/lib/rules/sql-injection.js +68 -0
- package/scripts/lib/rules/test-or-security-disable.js +102 -0
- package/scripts/lib/session-constants.js +30 -0
- package/scripts/lib/session-io.js +81 -0
- package/scripts/lib/session-resolver.js +17 -0
- package/scripts/lib/verify-helpers.js +442 -0
- package/scripts/orchestrators/_handoff-utils.js +45 -0
- package/scripts/orchestrators/apply.js +33 -17
- package/scripts/orchestrators/gate.js +17 -18
- package/scripts/orchestrators/report.js +4 -48
- package/scripts/orchestrators/verify-pr.js +49 -313
- package/scripts/benchmark/capture-live-ai-diff.js +0 -230
- package/scripts/benchmark/rules.js +0 -214
- package/scripts/benchmark/scrape-oss-positives.js +0 -237
- package/scripts/benchmark/verify-candidates.js +0 -110
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
// Intraprocedural const/taint propagation + dangerous-sink detection.
|
|
2
|
+
//
|
|
3
|
+
// Goal: catch the variable-mediated injection forms the line-oriented regex
|
|
4
|
+
// rules provably miss, WITHOUT introducing a single false positive. A naive
|
|
5
|
+
// taint analyzer over-flags (every Identifier looks "tainted"); that would
|
|
6
|
+
// regress the benchmark. So the rule is inverted and conservative:
|
|
7
|
+
//
|
|
8
|
+
// A value is flagged ONLY when it is provably DYNAMIC (not a compile-time
|
|
9
|
+
// constant string) AND it flows into a dangerous sink. When the binding can't
|
|
10
|
+
// be resolved with confidence we still treat it as dynamic — but the FP guard
|
|
11
|
+
// is the const-propagation: a variable bound only to constant strings is
|
|
12
|
+
// CONST-SAFE and is never flagged.
|
|
13
|
+
//
|
|
14
|
+
// Const-propagation (the prototype's FP fix):
|
|
15
|
+
// const q = `SELECT 1`; db.query(q); // q is CONST-SAFE → NOT flagged
|
|
16
|
+
// const q = "SELECT " + x; db.query(q); // q is DYNAMIC → flagged
|
|
17
|
+
//
|
|
18
|
+
// Scope model: a binding map per function/program scope (lexical chain). A
|
|
19
|
+
// binding is CONST-SAFE iff EVERY assignment to it (declarator init +
|
|
20
|
+
// reassignments) is a const-safe string; any non-const-safe assignment, or a
|
|
21
|
+
// reassignment we can't see as const-safe, makes it DYNAMIC. Function PARAMETERS
|
|
22
|
+
// are always dynamic. Analysis is strictly intraprocedural: a value returned
|
|
23
|
+
// from another function call is dynamic (we never chase across calls — that is
|
|
24
|
+
// where FPs come from).
|
|
25
|
+
|
|
26
|
+
import { parseToAst, walk } from './parse.js';
|
|
27
|
+
|
|
28
|
+
const FN_TYPES = new Set(['FunctionDeclaration', 'FunctionExpression', 'ArrowFunctionExpression']);
|
|
29
|
+
|
|
30
|
+
// SQL-ish sink methods executed against a connection/ORM raw escape hatch.
|
|
31
|
+
const SQL_SINKS = new Set(['query', 'execute', 'raw']);
|
|
32
|
+
|
|
33
|
+
// SQL DML/DDL keyword — a dynamic string only counts as a SQL-injection sink if
|
|
34
|
+
// the surrounding code actually looks like SQL. This keeps a generic
|
|
35
|
+
// `emitter.query(dynamic)` or `cache.execute(fn)` out (huge FP source).
|
|
36
|
+
const SQL_KW_RE = /\b(SELECT|INSERT\s+INTO|INSERT|UPDATE|DELETE\s+FROM|DELETE|REPLACE|MERGE|UNION|DROP\s+TABLE|DROP|ALTER\s+TABLE|ALTER|TRUNCATE|CREATE\s+TABLE|FROM|WHERE)\b/i;
|
|
37
|
+
|
|
38
|
+
// child_process methods that run a SHELL command string (injectable directly).
|
|
39
|
+
const CP_SHELL_EXEC = new Set(['exec', 'execSync']);
|
|
40
|
+
// child_process methods that take (command, args[]) and only become injectable
|
|
41
|
+
// when shell:true is set AND the command is dynamic.
|
|
42
|
+
const CP_SPAWN = new Set(['spawn', 'spawnSync', 'execFile', 'execFileSync']);
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Scope: a binding map + parent link. `bindings` maps name → { dynamic: bool }.
|
|
46
|
+
* A name absent from the whole chain resolves to dynamic (unknown = unsafe).
|
|
47
|
+
*/
|
|
48
|
+
function makeScope(parent) {
|
|
49
|
+
return { parent, bindings: new Map() };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function lookup(scope, name) {
|
|
53
|
+
let s = scope;
|
|
54
|
+
while (s) {
|
|
55
|
+
if (s.bindings.has(name)) return s.bindings.get(name);
|
|
56
|
+
s = s.parent;
|
|
57
|
+
}
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Is `node` a compile-time-constant string expression?
|
|
63
|
+
* Literal → const-safe (any literal; a SQL keyword can
|
|
64
|
+
* only appear in a string literal anyway)
|
|
65
|
+
* TemplateLiteral, 0 expressions → const-safe (`SELECT 1`)
|
|
66
|
+
* BinaryExpression '+' → const-safe iff BOTH sides const-safe
|
|
67
|
+
* Identifier → const-safe iff its binding is const-safe
|
|
68
|
+
* anything else → NOT const-safe
|
|
69
|
+
*/
|
|
70
|
+
function isConstSafe(node, scope) {
|
|
71
|
+
if (!node) return true;
|
|
72
|
+
switch (node.type) {
|
|
73
|
+
case 'Literal':
|
|
74
|
+
return true;
|
|
75
|
+
case 'TemplateLiteral':
|
|
76
|
+
return node.expressions.length === 0;
|
|
77
|
+
case 'BinaryExpression':
|
|
78
|
+
if (node.operator !== '+') return false;
|
|
79
|
+
return isConstSafe(node.left, scope) && isConstSafe(node.right, scope);
|
|
80
|
+
case 'Identifier': {
|
|
81
|
+
const b = lookup(scope, node.name);
|
|
82
|
+
// Unknown identifier (e.g. an import or global) → treat as NOT const-safe
|
|
83
|
+
// so we don't accidentally clear a binding; but it is also not "dynamic
|
|
84
|
+
// user input" on its own. The sink check uses isDynamic(), which is the
|
|
85
|
+
// inverse and the FP guard, so const-safe=false here just means "we are
|
|
86
|
+
// not certain it is a constant".
|
|
87
|
+
return b ? b.constSafe === true : false;
|
|
88
|
+
}
|
|
89
|
+
default:
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Is `node` a DYNAMIC value (runtime-assembled / external), i.e. the thing we
|
|
96
|
+
* flag when it flows into a sink? This is intentionally the conservative
|
|
97
|
+
* inverse of isConstSafe at the leaves, with the const-propagation FP guard:
|
|
98
|
+
* Literal → false
|
|
99
|
+
* TemplateLiteral with ${...} → true (interpolation)
|
|
100
|
+
* TemplateLiteral, no expr → false
|
|
101
|
+
* BinaryExpression '+' → either side dynamic
|
|
102
|
+
* Identifier → binding dynamic? (const-safe binding=false)
|
|
103
|
+
* MemberExpression/CallExpr/... → true
|
|
104
|
+
*/
|
|
105
|
+
function isDynamic(node, scope) {
|
|
106
|
+
if (!node) return false;
|
|
107
|
+
switch (node.type) {
|
|
108
|
+
case 'Literal':
|
|
109
|
+
return false;
|
|
110
|
+
case 'TemplateLiteral':
|
|
111
|
+
return node.expressions.length > 0;
|
|
112
|
+
case 'BinaryExpression':
|
|
113
|
+
if (node.operator === '+') return isDynamic(node.left, scope) || isDynamic(node.right, scope);
|
|
114
|
+
// Other binary ops (e.g. comparisons) yield booleans/numbers, not an
|
|
115
|
+
// injectable command/query string — not the dynamic-string shape.
|
|
116
|
+
return false;
|
|
117
|
+
case 'Identifier': {
|
|
118
|
+
const b = lookup(scope, node.name);
|
|
119
|
+
if (b) return b.dynamic === true;
|
|
120
|
+
// Unknown identifier: a bare top-level/imported name passed straight to a
|
|
121
|
+
// sink. We do NOT flag this — it is not a clear assembled dynamic value
|
|
122
|
+
// and flagging bare identifiers is the #1 FP source. Conservative: false.
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
case 'TaggedTemplateExpression':
|
|
126
|
+
return isDynamic(node.quasi, scope);
|
|
127
|
+
case 'ParenthesizedExpression':
|
|
128
|
+
return isDynamic(node.expression, scope);
|
|
129
|
+
default:
|
|
130
|
+
// MemberExpression (req.body.x), CallExpression, AwaitExpression-wrapped,
|
|
131
|
+
// etc. These are clearly runtime values. But to hold FP=0 we are
|
|
132
|
+
// selective at the SINK level (a SQL sink also requires a SQL keyword);
|
|
133
|
+
// here we report the structural truth.
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Classify the binding produced by an init/assignment expression.
|
|
140
|
+
* Returns { constSafe, dynamic }.
|
|
141
|
+
* - constSafe: the value is provably a constant string (for propagation).
|
|
142
|
+
* - dynamic: the value is a clearly assembled/external dynamic value.
|
|
143
|
+
* A value can be neither (e.g. a bare unknown identifier or a number): not a
|
|
144
|
+
* constant string AND not a flaggable dynamic string.
|
|
145
|
+
*/
|
|
146
|
+
function classifyValue(node, scope) {
|
|
147
|
+
return {
|
|
148
|
+
constSafe: isConstSafe(node, scope),
|
|
149
|
+
dynamic: isDynamic(node, scope),
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Collect bindings declared/assigned directly in a scope body, WITHOUT
|
|
155
|
+
* descending into nested function scopes (those get their own scope). Two-phase
|
|
156
|
+
* per scope:
|
|
157
|
+
* 1. seed every declared name + parameter
|
|
158
|
+
* 2. merge: a name is const-safe only if EVERY assignment is const-safe; any
|
|
159
|
+
* dynamic assignment marks it dynamic.
|
|
160
|
+
* Reassignments that we cannot prove const-safe demote a previously const-safe
|
|
161
|
+
* binding (so `let q="SELECT 1"; q=q+x; query(q)` is dynamic).
|
|
162
|
+
*
|
|
163
|
+
* @param {object} scopeNode Program | Function node
|
|
164
|
+
* @param {object} scope the scope whose bindings we fill
|
|
165
|
+
*/
|
|
166
|
+
function collectBindings(scopeNode, scope) {
|
|
167
|
+
// Phase 0: parameters of a function scope are always dynamic.
|
|
168
|
+
if (FN_TYPES.has(scopeNode.type) && Array.isArray(scopeNode.params)) {
|
|
169
|
+
for (const p of scopeNode.params) {
|
|
170
|
+
for (const name of patternNames(p)) {
|
|
171
|
+
scope.bindings.set(name, { constSafe: false, dynamic: true });
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Phase 1+2: walk the scope body but DO NOT cross into nested functions.
|
|
177
|
+
const body = scopeNode.type === 'Program' ? scopeNode : scopeNode.body;
|
|
178
|
+
walkScopeLocal(body, scopeNode, (node) => {
|
|
179
|
+
if (node.type === 'VariableDeclaration') {
|
|
180
|
+
for (const decl of node.declarations) {
|
|
181
|
+
// Only simple `name = expr` bindings carry a recoverable init expr; a
|
|
182
|
+
// destructuring pattern is treated as dynamic per-name (no init text).
|
|
183
|
+
const simple = decl.id.type === 'Identifier';
|
|
184
|
+
for (const name of patternNames(decl.id)) {
|
|
185
|
+
const cls = decl.init ? classifyValue(decl.init, scope) : { constSafe: false, dynamic: false };
|
|
186
|
+
if (simple && decl.init) cls.initExpr = decl.init;
|
|
187
|
+
mergeBinding(scope, name, cls);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
} else if (node.type === 'AssignmentExpression' && node.left.type === 'Identifier') {
|
|
191
|
+
const name = node.left.name;
|
|
192
|
+
// Compound assignment (+=) on a binding: treat the RHS combined with the
|
|
193
|
+
// existing value. If existing is const-safe and RHS const-safe → still
|
|
194
|
+
// const-safe; otherwise dynamic.
|
|
195
|
+
let cls;
|
|
196
|
+
if (node.operator === '=') {
|
|
197
|
+
cls = classifyValue(node.right, scope);
|
|
198
|
+
cls.initExpr = node.right;
|
|
199
|
+
} else if (node.operator === '+=') {
|
|
200
|
+
const rhsSafe = isConstSafe(node.right, scope);
|
|
201
|
+
const existing = scope.bindings.get(name);
|
|
202
|
+
const existingSafe = existing ? existing.constSafe === true : false;
|
|
203
|
+
cls = { constSafe: rhsSafe && existingSafe, dynamic: isDynamic(node.right, scope) || (existing ? existing.dynamic : false) };
|
|
204
|
+
// A reassignment with += loses a single recoverable init expr; clear it
|
|
205
|
+
// (the SQL-text recovery becomes best-effort, which only risks a MISS,
|
|
206
|
+
// never an FP).
|
|
207
|
+
cls.initExpr = null;
|
|
208
|
+
} else {
|
|
209
|
+
// Other compound ops produce numbers — not a string sink concern.
|
|
210
|
+
cls = { constSafe: false, dynamic: false };
|
|
211
|
+
}
|
|
212
|
+
mergeBinding(scope, name, cls);
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Merge a new classification into a binding. Monotonic toward "unsafe":
|
|
219
|
+
* - once dynamic, stays dynamic
|
|
220
|
+
* - const-safe only if it was const-safe (or unseen) AND the new value is
|
|
221
|
+
* const-safe; a non-const-safe assignment clears const-safe.
|
|
222
|
+
*/
|
|
223
|
+
function mergeBinding(scope, name, cls) {
|
|
224
|
+
const prev = scope.bindings.get(name);
|
|
225
|
+
const newExprs = cls.initExpr ? [cls.initExpr] : [];
|
|
226
|
+
if (!prev) {
|
|
227
|
+
scope.bindings.set(name, { constSafe: cls.constSafe, dynamic: cls.dynamic, initExprs: newExprs });
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
scope.bindings.set(name, {
|
|
231
|
+
constSafe: prev.constSafe && cls.constSafe,
|
|
232
|
+
dynamic: prev.dynamic || cls.dynamic,
|
|
233
|
+
// Accumulate ALL assigned expressions so SQL-text recovery can scan the full
|
|
234
|
+
// assignment history (so `let q="SELECT 1"; q=q+x` still surfaces the
|
|
235
|
+
// SELECT keyword after the dynamic reassignment). This only affects the
|
|
236
|
+
// looksLikeSql gate — it cannot create an FP (the binding must already be
|
|
237
|
+
// dynamic to reach the sink check).
|
|
238
|
+
initExprs: [...(prev.initExprs || []), ...newExprs],
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/** Extract bound names from a binding pattern (Identifier / destructuring). */
|
|
243
|
+
function patternNames(pat, out = []) {
|
|
244
|
+
if (!pat) return out;
|
|
245
|
+
switch (pat.type) {
|
|
246
|
+
case 'Identifier':
|
|
247
|
+
out.push(pat.name);
|
|
248
|
+
break;
|
|
249
|
+
case 'AssignmentPattern':
|
|
250
|
+
patternNames(pat.left, out);
|
|
251
|
+
break;
|
|
252
|
+
case 'RestElement':
|
|
253
|
+
patternNames(pat.argument, out);
|
|
254
|
+
break;
|
|
255
|
+
case 'ArrayPattern':
|
|
256
|
+
for (const el of pat.elements) if (el) patternNames(el, out);
|
|
257
|
+
break;
|
|
258
|
+
case 'ObjectPattern':
|
|
259
|
+
for (const prop of pat.properties) {
|
|
260
|
+
if (prop.type === 'RestElement') patternNames(prop.argument, out);
|
|
261
|
+
else patternNames(prop.value, out);
|
|
262
|
+
}
|
|
263
|
+
break;
|
|
264
|
+
default:
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
return out;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Walk a scope's body visiting every node but NOT descending into nested
|
|
272
|
+
* function bodies (those are separate scopes collected on their own). The
|
|
273
|
+
* scopeNode itself is allowed (we start below it).
|
|
274
|
+
*/
|
|
275
|
+
function walkScopeLocal(root, scopeNode, visit) {
|
|
276
|
+
const SKIP_KEYS = new Set(['loc', 'start', 'end', 'range', 'parent', '__parent']);
|
|
277
|
+
const recurse = (node) => {
|
|
278
|
+
if (!node || typeof node.type !== 'string') return;
|
|
279
|
+
// Do not cross into a nested function scope.
|
|
280
|
+
if (node !== scopeNode && FN_TYPES.has(node.type)) return;
|
|
281
|
+
visit(node);
|
|
282
|
+
for (const key of Object.keys(node)) {
|
|
283
|
+
if (SKIP_KEYS.has(key)) continue;
|
|
284
|
+
const value = node[key];
|
|
285
|
+
if (Array.isArray(value)) {
|
|
286
|
+
for (const child of value) if (child && typeof child.type === 'string') recurse(child);
|
|
287
|
+
} else if (value && typeof value.type === 'string') {
|
|
288
|
+
recurse(value);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
// root is either a Program (has .body array) or a Function body node.
|
|
293
|
+
if (root && root.type === 'Program') {
|
|
294
|
+
for (const stmt of root.body) recurse(stmt);
|
|
295
|
+
} else if (root && root.type === 'BlockStatement') {
|
|
296
|
+
for (const stmt of root.body) recurse(stmt);
|
|
297
|
+
} else if (root) {
|
|
298
|
+
// Arrow with expression body: `() => expr`
|
|
299
|
+
recurse(root);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Build the scope chain and attach a resolved scope to each function/program
|
|
305
|
+
* node, so sink detection can look up the right binding map. Returns a Map from
|
|
306
|
+
* node → scope.
|
|
307
|
+
*/
|
|
308
|
+
function buildScopes(ast) {
|
|
309
|
+
const scopeOf = new Map();
|
|
310
|
+
const programScope = makeScope(null);
|
|
311
|
+
collectBindings(ast, programScope);
|
|
312
|
+
scopeOf.set(ast, programScope);
|
|
313
|
+
|
|
314
|
+
// Walk all function nodes (pre-order, so an enclosing function's scope is
|
|
315
|
+
// always built before its nested functions). Each function gets a child scope
|
|
316
|
+
// whose parent is the nearest enclosing scope already in scopeOf.
|
|
317
|
+
walk(ast, (node, parent) => {
|
|
318
|
+
if (FN_TYPES.has(node.type)) {
|
|
319
|
+
const parentScope = nearestScope(scopeOf, parent) || programScope;
|
|
320
|
+
const scope = makeScope(parentScope);
|
|
321
|
+
collectBindings(node, scope);
|
|
322
|
+
scopeOf.set(node, scope);
|
|
323
|
+
}
|
|
324
|
+
});
|
|
325
|
+
return scopeOf;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/** Climb the __parent chain to the nearest ancestor that already has a scope. */
|
|
329
|
+
function nearestScope(scopeOf, node) {
|
|
330
|
+
let n = node;
|
|
331
|
+
while (n) {
|
|
332
|
+
if (scopeOf.has(n)) return scopeOf.get(n);
|
|
333
|
+
n = n.__parent || null;
|
|
334
|
+
}
|
|
335
|
+
return null;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Analyze source code for variable-mediated injection sinks.
|
|
340
|
+
*
|
|
341
|
+
* @param {string} code
|
|
342
|
+
* @param {string} file reported in findings
|
|
343
|
+
* @param {{ ts?: boolean }} [opts]
|
|
344
|
+
* @returns {{ parsed: boolean, findings: Array }}
|
|
345
|
+
*/
|
|
346
|
+
export function analyze(code, file, opts = {}) {
|
|
347
|
+
const ast = parseToAst(code, { ts: opts.ts });
|
|
348
|
+
if (!ast) return { parsed: false, findings: [] };
|
|
349
|
+
|
|
350
|
+
// Annotate parent links so we can resolve the enclosing scope of any node.
|
|
351
|
+
annotateParents(ast);
|
|
352
|
+
const scopeOf = buildScopes(ast);
|
|
353
|
+
|
|
354
|
+
const findings = [];
|
|
355
|
+
const line = (n) => (n.loc ? n.loc.start.line : 0);
|
|
356
|
+
|
|
357
|
+
walk(ast, (node) => {
|
|
358
|
+
if (node.type === 'CallExpression') {
|
|
359
|
+
handleCall(node, scopeOf, file, line, findings);
|
|
360
|
+
} else if (node.type === 'NewExpression') {
|
|
361
|
+
handleNew(node, scopeOf, file, line, findings);
|
|
362
|
+
}
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
return { parsed: true, findings: dedupe(findings) };
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/** Resolve the binding scope that ENCLOSES a given node (its nearest function
|
|
369
|
+
* or the program). */
|
|
370
|
+
function scopeForNode(scopeOf, node) {
|
|
371
|
+
let n = node.__parent;
|
|
372
|
+
while (n) {
|
|
373
|
+
if (FN_TYPES.has(n.type) && scopeOf.has(n)) return scopeOf.get(n);
|
|
374
|
+
if (n.type === 'Program' && scopeOf.has(n)) return scopeOf.get(n);
|
|
375
|
+
n = n.__parent;
|
|
376
|
+
}
|
|
377
|
+
// Fallback: program scope.
|
|
378
|
+
for (const [k, v] of scopeOf) if (k.type === 'Program') return v;
|
|
379
|
+
return makeScope(null);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function handleCall(node, scopeOf, file, line, findings) {
|
|
383
|
+
const callee = node.callee;
|
|
384
|
+
const scope = scopeForNode(scopeOf, node);
|
|
385
|
+
const args = node.arguments || [];
|
|
386
|
+
|
|
387
|
+
// eval(dynamic)
|
|
388
|
+
if (callee.type === 'Identifier' && callee.name === 'eval') {
|
|
389
|
+
if (args[0] && isDynamic(args[0], scope)) {
|
|
390
|
+
findings.push(evalFinding(file, line(node), node));
|
|
391
|
+
}
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// window/globalThis/self/global .eval(dynamic) — indirect eval
|
|
396
|
+
if (
|
|
397
|
+
callee.type === 'MemberExpression' &&
|
|
398
|
+
callee.property.type === 'Identifier' &&
|
|
399
|
+
callee.property.name === 'eval' &&
|
|
400
|
+
callee.object.type === 'Identifier' &&
|
|
401
|
+
/^(window|globalThis|self|global)$/.test(callee.object.name)
|
|
402
|
+
) {
|
|
403
|
+
if (args[0] && isDynamic(args[0], scope)) {
|
|
404
|
+
findings.push(evalFinding(file, line(node), node));
|
|
405
|
+
}
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (callee.type === 'MemberExpression' && callee.property.type === 'Identifier') {
|
|
410
|
+
const method = callee.property.name;
|
|
411
|
+
|
|
412
|
+
// SQL sink: .query / .execute / .raw with a dynamic, NON-parameterized,
|
|
413
|
+
// SQL-shaped argument.
|
|
414
|
+
if (SQL_SINKS.has(method)) {
|
|
415
|
+
const arg0 = args[0];
|
|
416
|
+
if (arg0 && isDynamic(arg0, scope) && looksLikeSql(arg0, scope) && !isParameterized(node, arg0, scope)) {
|
|
417
|
+
findings.push(sqlFinding(file, line(node), node));
|
|
418
|
+
return;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// child_process exec / execSync with a dynamic command string.
|
|
423
|
+
if (CP_SHELL_EXEC.has(method)) {
|
|
424
|
+
const arg0 = args[0];
|
|
425
|
+
if (arg0 && isDynamic(arg0, scope)) {
|
|
426
|
+
findings.push(cmdFinding(file, line(node), node, 'critical'));
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// spawn / execFile family: only injectable with shell:true AND dynamic cmd.
|
|
432
|
+
if (CP_SPAWN.has(method)) {
|
|
433
|
+
const arg0 = args[0];
|
|
434
|
+
if (arg0 && isDynamic(arg0, scope) && hasShellTrue(args, scope)) {
|
|
435
|
+
findings.push(cmdFinding(file, line(node), node, 'critical'));
|
|
436
|
+
return;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// Bare exec/execSync identifier (destructured from child_process):
|
|
442
|
+
// const { exec } = require('child_process'); exec(cmd);
|
|
443
|
+
if (callee.type === 'Identifier' && CP_SHELL_EXEC.has(callee.name)) {
|
|
444
|
+
const arg0 = args[0];
|
|
445
|
+
if (arg0 && isDynamic(arg0, scope)) {
|
|
446
|
+
findings.push(cmdFinding(file, line(node), node, 'critical'));
|
|
447
|
+
return;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
if (callee.type === 'Identifier' && CP_SPAWN.has(callee.name)) {
|
|
451
|
+
const arg0 = args[0];
|
|
452
|
+
if (arg0 && isDynamic(arg0, scope) && hasShellTrue(args, scope)) {
|
|
453
|
+
findings.push(cmdFinding(file, line(node), node, 'critical'));
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
function handleNew(node, scopeOf, file, line, findings) {
|
|
459
|
+
// new Function(...) — the Function constructor compiles a string body. The
|
|
460
|
+
// dangerous case is a dynamic body; a pure-literal body (rare) is low signal.
|
|
461
|
+
if (node.callee.type === 'Identifier' && node.callee.name === 'Function') {
|
|
462
|
+
const scope = scopeForNode(scopeOf, node);
|
|
463
|
+
const args = node.arguments || [];
|
|
464
|
+
// The body is the LAST argument. Flag when it is dynamic, OR when there are
|
|
465
|
+
// multiple args (codegen shape). A single pure-literal arg is left to the
|
|
466
|
+
// regex rule (which flags new Function outright) to avoid duplicating.
|
|
467
|
+
const body = args[args.length - 1];
|
|
468
|
+
if (body && isDynamic(body, scope)) {
|
|
469
|
+
findings.push(evalFinding(file, line(node), node));
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/** A SQL sink argument only counts if it actually contains SQL keywords. This is
|
|
475
|
+
* the FP guard against generic `.query(dynamic)` / `.execute(dynamic)` on
|
|
476
|
+
* non-SQL emitters. We inspect the literal/template parts and any const-safe
|
|
477
|
+
* identifier bindings reachable from the argument. */
|
|
478
|
+
function looksLikeSql(node, scope) {
|
|
479
|
+
const text = collectStaticText(node, scope, new Set());
|
|
480
|
+
return SQL_KW_RE.test(text);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
/**
|
|
484
|
+
* Gather the static (literal) text contributed by an expression — string
|
|
485
|
+
* literals, template quasis, and the literal parts of identifiers whose binding
|
|
486
|
+
* is a const-safe string we can reconstruct is out of scope; instead we only
|
|
487
|
+
* gather text statically reachable through +/template/identifier-to-init. For
|
|
488
|
+
* identifiers we re-derive their init text by re-reading the binding's source is
|
|
489
|
+
* not stored, so we approximate: an identifier contributes its name (which won't
|
|
490
|
+
* match SQL keywords) UNLESS we can see literal text in the same expression.
|
|
491
|
+
*
|
|
492
|
+
* In practice the dynamic SQL shape always carries the SQL keyword in a literal
|
|
493
|
+
* part of the SAME expression chain (the assembled query string), so collecting
|
|
494
|
+
* literals from the argument expression (following identifier inits within
|
|
495
|
+
* scope) is sufficient and conservative.
|
|
496
|
+
*/
|
|
497
|
+
function collectStaticText(node, scope, seen) {
|
|
498
|
+
if (!node) return '';
|
|
499
|
+
switch (node.type) {
|
|
500
|
+
case 'Literal':
|
|
501
|
+
return typeof node.value === 'string' ? node.value : '';
|
|
502
|
+
case 'TemplateLiteral':
|
|
503
|
+
return node.quasis.map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || '')).join(' ');
|
|
504
|
+
case 'BinaryExpression':
|
|
505
|
+
if (node.operator === '+') {
|
|
506
|
+
return collectStaticText(node.left, scope, seen) + ' ' + collectStaticText(node.right, scope, seen);
|
|
507
|
+
}
|
|
508
|
+
return '';
|
|
509
|
+
case 'Identifier': {
|
|
510
|
+
if (seen.has(node.name)) return '';
|
|
511
|
+
seen.add(node.name);
|
|
512
|
+
// Recover the variable's assembled text by following its binding's init
|
|
513
|
+
// expression(s), recursively. This is how the cross-statement SQL shape
|
|
514
|
+
// `const q = "SELECT..." + x; db.query(q)` surfaces the SELECT keyword
|
|
515
|
+
// even though the literal lives in a different statement.
|
|
516
|
+
const b = lookup(scope, node.name);
|
|
517
|
+
if (!b || !Array.isArray(b.initExprs)) return '';
|
|
518
|
+
return b.initExprs.map((e) => collectStaticText(e, scope, seen)).join(' ');
|
|
519
|
+
}
|
|
520
|
+
case 'TaggedTemplateExpression':
|
|
521
|
+
return collectStaticText(node.quasi, scope, seen);
|
|
522
|
+
default:
|
|
523
|
+
return '';
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* A call is "parameterized" (safe) when it passes a 2nd ArrayExpression of
|
|
529
|
+
* params (pg `$1` + [..], mysql2 `?` + [..]) — the canonical safe shape. The
|
|
530
|
+
* dynamic 1st arg being a bound placeholder-only string is already excluded by
|
|
531
|
+
* looksLikeSql requiring a real keyword + isDynamic; here we just exclude the
|
|
532
|
+
* params-array shape.
|
|
533
|
+
*/
|
|
534
|
+
function isParameterized(callNode, arg0, scope) {
|
|
535
|
+
const args = callNode.arguments || [];
|
|
536
|
+
// 2nd argument is an array literal → parameterized.
|
|
537
|
+
if (args[1] && args[1].type === 'ArrayExpression') return true;
|
|
538
|
+
// 2nd argument is an identifier bound to nothing dynamic and named like a
|
|
539
|
+
// params array (best-effort, conservative): if it's an ArrayExpression via
|
|
540
|
+
// binding we can't easily see — skip. We only treat a literal array as the
|
|
541
|
+
// safe marker to avoid both FPs and FNs.
|
|
542
|
+
void arg0;
|
|
543
|
+
void scope;
|
|
544
|
+
return false;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/** Does the call carry an options object with shell:true? */
|
|
548
|
+
function hasShellTrue(args, scope) {
|
|
549
|
+
for (const a of args) {
|
|
550
|
+
if (a && a.type === 'ObjectExpression') {
|
|
551
|
+
for (const prop of a.properties) {
|
|
552
|
+
if (
|
|
553
|
+
prop.type === 'Property' &&
|
|
554
|
+
!prop.computed &&
|
|
555
|
+
((prop.key.type === 'Identifier' && prop.key.name === 'shell') ||
|
|
556
|
+
(prop.key.type === 'Literal' && prop.key.value === 'shell'))
|
|
557
|
+
) {
|
|
558
|
+
// shell: true (literal true) or shell: <const-safe truthy> — we only
|
|
559
|
+
// treat a literal `true` as enabling the shell, conservatively.
|
|
560
|
+
if (prop.value.type === 'Literal' && prop.value.value === true) return true;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
void scope;
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
/** Attach __parent back-links to every node so a sink can resolve the binding
|
|
570
|
+
* scope that encloses it. Single pass. */
|
|
571
|
+
function annotateParents(ast) {
|
|
572
|
+
const SKIP_KEYS = new Set(['loc', 'start', 'end', 'range', '__parent']);
|
|
573
|
+
const recurse = (node, parent) => {
|
|
574
|
+
if (!node || typeof node.type !== 'string') return;
|
|
575
|
+
node.__parent = parent;
|
|
576
|
+
for (const key of Object.keys(node)) {
|
|
577
|
+
if (SKIP_KEYS.has(key)) continue;
|
|
578
|
+
const value = node[key];
|
|
579
|
+
if (Array.isArray(value)) {
|
|
580
|
+
for (const child of value) if (child && typeof child.type === 'string') recurse(child, node);
|
|
581
|
+
} else if (value && typeof value.type === 'string') {
|
|
582
|
+
recurse(value, node);
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
};
|
|
586
|
+
recurse(ast, null);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// ---- Finding constructors (match the regex-rule finding shape) ----
|
|
590
|
+
|
|
591
|
+
function baseFinding({ rule, pattern, severity, file, line, title, description, recommendation, node }) {
|
|
592
|
+
return {
|
|
593
|
+
id: `RULE_${rule.toUpperCase().replace(/-/g, '_')}_${pattern.toUpperCase().replace(/-/g, '_')}`,
|
|
594
|
+
rule,
|
|
595
|
+
pattern,
|
|
596
|
+
severity,
|
|
597
|
+
category: 'code-injection',
|
|
598
|
+
file,
|
|
599
|
+
line,
|
|
600
|
+
title,
|
|
601
|
+
description,
|
|
602
|
+
recommendation,
|
|
603
|
+
// Match regex-rule convention: blocks_apply iff critical. (HIGH → human
|
|
604
|
+
// review but does not hard-block apply, matching eval-usage/sql-injection.)
|
|
605
|
+
blocks_apply: severity === 'critical',
|
|
606
|
+
match: snippet(node),
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function evalFinding(file, line, node) {
|
|
611
|
+
return baseFinding({
|
|
612
|
+
rule: 'ast-eval-injection',
|
|
613
|
+
pattern: 'ast-eval',
|
|
614
|
+
severity: 'high',
|
|
615
|
+
file,
|
|
616
|
+
line,
|
|
617
|
+
title: 'eval()/Function() with a dynamically-built value (dataflow)',
|
|
618
|
+
description:
|
|
619
|
+
'Dataflow analysis found a runtime-assembled (non-constant) value flowing into eval()/new Function(). Executing assembled strings is a code-injection / RCE vector the line-by-line scanner misses when the value is built across statements.',
|
|
620
|
+
recommendation: 'Remove eval()/new Function(). Use JSON.parse for data or a lookup table for dispatch.',
|
|
621
|
+
node,
|
|
622
|
+
});
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
function sqlFinding(file, line, node) {
|
|
626
|
+
return baseFinding({
|
|
627
|
+
rule: 'ast-sql-injection',
|
|
628
|
+
pattern: 'ast-sql',
|
|
629
|
+
severity: 'high',
|
|
630
|
+
file,
|
|
631
|
+
line,
|
|
632
|
+
title: 'SQL sink fed a dynamically-built query (dataflow)',
|
|
633
|
+
description:
|
|
634
|
+
'Dataflow analysis found a runtime-assembled (non-constant) SQL string flowing into .query()/.execute()/.raw() without a params array. This is a SQL-injection vector the line scanner misses when the query is built in a separate statement.',
|
|
635
|
+
recommendation: 'Use parameterized queries (placeholders + a params array), not an assembled query string.',
|
|
636
|
+
node,
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
function cmdFinding(file, line, node, severity) {
|
|
641
|
+
return baseFinding({
|
|
642
|
+
rule: 'ast-command-injection',
|
|
643
|
+
pattern: 'ast-cmd',
|
|
644
|
+
severity,
|
|
645
|
+
file,
|
|
646
|
+
line,
|
|
647
|
+
title: 'Shell sink fed a dynamically-built command (dataflow)',
|
|
648
|
+
description:
|
|
649
|
+
'Dataflow analysis found a runtime-assembled (non-constant) command string flowing into child_process exec/execSync (or spawn with shell:true). This is an OS-command-injection / RCE vector the line scanner misses when the command is assembled across statements.',
|
|
650
|
+
recommendation: 'Use execFile/spawn with an argument array (no shell), or strictly validate the input.',
|
|
651
|
+
node,
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
function snippet(node) {
|
|
656
|
+
// No raw source on the node; build a short structural marker from the callee.
|
|
657
|
+
try {
|
|
658
|
+
if (node.type === 'CallExpression') {
|
|
659
|
+
const c = node.callee;
|
|
660
|
+
if (c.type === 'Identifier') return `${c.name}(...)`;
|
|
661
|
+
if (c.type === 'MemberExpression' && c.property.type === 'Identifier') {
|
|
662
|
+
const obj = c.object.type === 'Identifier' ? c.object.name : '…';
|
|
663
|
+
return `${obj}.${c.property.name}(...)`;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
if (node.type === 'NewExpression' && node.callee.type === 'Identifier') {
|
|
667
|
+
return `new ${node.callee.name}(...)`;
|
|
668
|
+
}
|
|
669
|
+
} catch {
|
|
670
|
+
/* best-effort */
|
|
671
|
+
}
|
|
672
|
+
return '';
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/** Drop duplicate findings on the same {rule,line}. */
|
|
676
|
+
function dedupe(findings) {
|
|
677
|
+
const seen = new Set();
|
|
678
|
+
const out = [];
|
|
679
|
+
for (const f of findings) {
|
|
680
|
+
const key = `${f.rule}:${f.line}`;
|
|
681
|
+
if (seen.has(key)) continue;
|
|
682
|
+
seen.add(key);
|
|
683
|
+
out.push(f);
|
|
684
|
+
}
|
|
685
|
+
return out.sort((a, b) => a.line - b.line);
|
|
686
|
+
}
|