@ps-neko/nekowork 0.2.0-alpha.6 → 0.2.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +27 -8
  3. package/package.json +9 -3
  4. package/scripts/cli.js +18 -8
  5. package/scripts/core/git-mutation-guard.js +65 -0
  6. package/scripts/lib/acceptance-criteria.js +2 -9
  7. package/scripts/lib/ast/analyze.js +986 -0
  8. package/scripts/lib/ast/parse.js +131 -0
  9. package/scripts/lib/decision.js +4 -58
  10. package/scripts/lib/diff-parser.js +75 -4
  11. package/scripts/lib/risk-classifier.js +1 -0
  12. package/scripts/lib/rules/_helpers.js +90 -10
  13. package/scripts/lib/rules/ast-dataflow.js +103 -0
  14. package/scripts/lib/rules/auto-apply-commit-push.js +44 -0
  15. package/scripts/lib/rules/command-injection.js +72 -0
  16. package/scripts/lib/rules/cors-wildcard.js +84 -0
  17. package/scripts/lib/rules/eval-usage.js +102 -0
  18. package/scripts/lib/rules/hardcoded-credential.js +134 -2
  19. package/scripts/lib/rules/insecure-tls.js +86 -0
  20. package/scripts/lib/rules/package-lockfile-risk.js +23 -0
  21. package/scripts/lib/rules/secret-fallback.js +206 -24
  22. package/scripts/lib/rules/sql-injection.js +68 -0
  23. package/scripts/lib/rules/test-or-security-disable.js +102 -0
  24. package/scripts/lib/session-constants.js +30 -0
  25. package/scripts/lib/session-io.js +81 -0
  26. package/scripts/lib/session-resolver.js +17 -0
  27. package/scripts/lib/verify-helpers.js +442 -0
  28. package/scripts/orchestrators/_handoff-utils.js +45 -0
  29. package/scripts/orchestrators/apply.js +33 -17
  30. package/scripts/orchestrators/gate.js +17 -18
  31. package/scripts/orchestrators/report.js +4 -48
  32. package/scripts/orchestrators/verify-pr.js +49 -313
  33. package/scripts/benchmark/capture-live-ai-diff.js +0 -230
  34. package/scripts/benchmark/rules.js +0 -214
  35. package/scripts/benchmark/scrape-oss-positives.js +0 -237
  36. package/scripts/benchmark/verify-candidates.js +0 -110
@@ -0,0 +1,986 @@
1
+ // Inter-procedural (intra-module) const/taint propagation + dangerous-sink
2
+ // detection.
3
+ //
4
+ // Goal: catch the variable-mediated injection forms the line-oriented regex
5
+ // rules provably miss, WITHOUT introducing a single false positive. A naive
6
+ // taint analyzer over-flags (every Identifier looks "tainted"); that would
7
+ // regress the benchmark. So the rule is inverted and conservative:
8
+ //
9
+ // A value is flagged ONLY when it is provably DYNAMIC (not a compile-time
10
+ // constant string) AND it flows into a dangerous sink. When the binding can't
11
+ // be resolved with confidence we still treat it as dynamic — but the FP guard
12
+ // is the const-propagation: a variable bound only to constant strings is
13
+ // CONST-SAFE and is never flagged.
14
+ //
15
+ // Const-propagation (the prototype's FP fix):
16
+ // const q = `SELECT 1`; db.query(q); // q is CONST-SAFE → NOT flagged
17
+ // const q = "SELECT " + x; db.query(q); // q is DYNAMIC → flagged
18
+ //
19
+ // Scope model: a binding map per function/program scope (lexical chain). A
20
+ // binding is CONST-SAFE iff EVERY assignment to it (declarator init +
21
+ // reassignments) is a const-safe string; any non-const-safe assignment, or a
22
+ // reassignment we can't see as const-safe, makes it DYNAMIC. Function PARAMETERS
23
+ // are always dynamic.
24
+ //
25
+ // Inter-procedural upgrade (intra-module only — never crosses files):
26
+ // 1. Arg-sensitive local-function return-taint resolution. When a sink
27
+ // argument is a CallExpression to a function DEFINED in this module
28
+ // (FunctionDeclaration or const = FunctionExpression/Arrow), the function's
29
+ // return expression(s) are evaluated with its params BOUND to the call
30
+ // site's argument classifications, recovering both the dynamic flag and the
31
+ // static SQL text. This makes
32
+ // function build(x){ return "SELECT "+x } db.query(build(req.id)) // FLAG
33
+ // while keeping
34
+ // function build(){ return "SELECT 1" } db.query(build()) // clean
35
+ // function id(x){ return x } db.query(id("SELECT 1")) // clean
36
+ // The resolver is guarded by a visited-set (cycle guard) and a depth limit
37
+ // (~6). Unknown / non-local calls stay structurally dynamic with NO
38
+ // recovered text, so the SQL-keyword gate still protects against FPs. The
39
+ // resolution is ADDITIVE: it can only turn a clean SQL sink into a finding
40
+ // (by recovering SQL text from a helper) — it never clears an existing one.
41
+ // 2. Sink-alias resolution. A module binding `const X = <obj>.<sinkMethod>`
42
+ // (query/execute/raw → sql alias; exec/execSync → shell alias), where X is a
43
+ // simple const not reassigned, makes a later `X(arg)` call get the same
44
+ // dynamic + SQL-keyword + parameterized treatment as the underlying sink.
45
+ // `const run = console.log; run(...)` is NOT a sink (console.log is not a
46
+ // tracked sink method).
47
+ //
48
+ // Both upgrades inherit the same FP guards (const-propagation, SQL-keyword gate,
49
+ // params-array exemption), so they hold the FP=0 benchmark gate.
50
+
51
+ import { parseToAst, walk } from './parse.js';
52
+
53
+ const FN_TYPES = new Set(['FunctionDeclaration', 'FunctionExpression', 'ArrowFunctionExpression']);
54
+
55
+ // SQL-ish sink methods executed against a connection/ORM raw escape hatch.
56
+ const SQL_SINKS = new Set(['query', 'execute', 'raw']);
57
+
58
+ // SQL DML/DDL keyword — a dynamic string only counts as a SQL-injection sink if
59
+ // the surrounding code actually looks like SQL. This keeps a generic
60
+ // `emitter.query(dynamic)` or `cache.execute(fn)` out (huge FP source).
61
+ const SQL_KW_RE = /\b(SELECT|INSERT\s+INTO|INSERT|UPDATE|DELETE\s+FROM|DELETE|REPLACE|MERGE|UNION|DROP\s+TABLE|DROP|ALTER\s+TABLE|ALTER|TRUNCATE|CREATE\s+TABLE|FROM|WHERE)\b/i;
62
+
63
+ // child_process methods that run a SHELL command string (injectable directly).
64
+ const CP_SHELL_EXEC = new Set(['exec', 'execSync']);
65
+ // child_process methods that take (command, args[]) and only become injectable
66
+ // when shell:true is set AND the command is dynamic.
67
+ const CP_SPAWN = new Set(['spawn', 'spawnSync', 'execFile', 'execFileSync']);
68
+
69
+ // Inter-procedural resolution guards.
70
+ const IP_DEPTH_LIMIT = 6; // max local-call resolution depth (cycle/runaway guard)
71
+
72
+ /**
73
+ * Collect LOCALLY-DEFINED functions by name (module + nested scopes; last wins,
74
+ * matching JS hoisting/redeclaration for our conservative best-effort). A name
75
+ * here resolves to a FunctionDeclaration node, or the FunctionExpression/Arrow
76
+ * bound by `const f = () => …`. Used by the arg-sensitive return-taint resolver.
77
+ *
78
+ * @param {object} ast Program node
79
+ * @returns {Map<string, object>} name → function node
80
+ */
81
+ function collectLocalFns(ast) {
82
+ const fns = new Map();
83
+ walk(ast, (n) => {
84
+ if (n.type === 'FunctionDeclaration' && n.id && n.id.type === 'Identifier') {
85
+ fns.set(n.id.name, n);
86
+ } else if (n.type === 'VariableDeclaration') {
87
+ for (const d of n.declarations) {
88
+ if (
89
+ d.id.type === 'Identifier' &&
90
+ d.init &&
91
+ (d.init.type === 'FunctionExpression' || d.init.type === 'ArrowFunctionExpression')
92
+ ) {
93
+ fns.set(d.id.name, d.init);
94
+ }
95
+ }
96
+ }
97
+ });
98
+ return fns;
99
+ }
100
+
101
+ /**
102
+ * Collect SINK ALIASES: a module binding `const X = <obj>.<sinkMethod>` where
103
+ * sinkMethod ∈ query/execute/raw (→ sql alias) or exec/execSync (→ shell alias).
104
+ * Only a SIMPLE const Identifier binding that is NEVER reassigned qualifies (a
105
+ * reassigned binding cannot be trusted to still point at the sink). A later
106
+ * `X(arg)` call is then treated as the underlying sink. `const run=console.log`
107
+ * is ignored (console.log is not a tracked sink method).
108
+ *
109
+ * @param {object} ast Program node
110
+ * @returns {Map<string, {kind:'sql'|'shell', method:string}>}
111
+ */
112
+ function collectSinkAliases(ast) {
113
+ const candidates = new Map(); // name → {kind, method}
114
+ const reassigned = new Set(); // names reassigned anywhere → disqualified
115
+ walk(ast, (n) => {
116
+ if (n.type === 'VariableDeclaration') {
117
+ for (const d of n.declarations) {
118
+ if (
119
+ d.id.type === 'Identifier' &&
120
+ d.init &&
121
+ d.init.type === 'MemberExpression' &&
122
+ d.init.property.type === 'Identifier' &&
123
+ !d.init.computed
124
+ ) {
125
+ const method = d.init.property.name;
126
+ // Only `const` declarations qualify (let/var can be reassigned to a
127
+ // non-sink; const cannot be rebound).
128
+ if (n.kind !== 'const') continue;
129
+ if (SQL_SINKS.has(method)) candidates.set(d.id.name, { kind: 'sql', method });
130
+ else if (CP_SHELL_EXEC.has(method)) candidates.set(d.id.name, { kind: 'shell', method });
131
+ }
132
+ }
133
+ } else if (n.type === 'AssignmentExpression' && n.left.type === 'Identifier') {
134
+ reassigned.add(n.left.name);
135
+ }
136
+ });
137
+ for (const name of reassigned) candidates.delete(name);
138
+ return candidates;
139
+ }
140
+
141
+ /**
142
+ * Collect the return expressions of a function node. For an arrow with an
143
+ * expression body the body itself is the (single) return. For a block body we
144
+ * gather every ReturnStatement argument, NOT descending into nested functions
145
+ * (a nested closure's return is not this function's return value).
146
+ *
147
+ * @param {object} fn FunctionDeclaration | FunctionExpression | ArrowFunctionExpression
148
+ * @returns {object[]} return-value expressions
149
+ */
150
+ function returnsOf(fn) {
151
+ if (fn.type === 'ArrowFunctionExpression' && fn.body.type !== 'BlockStatement') {
152
+ return [fn.body];
153
+ }
154
+ const out = [];
155
+ const recurse = (node) => {
156
+ if (!node || typeof node.type !== 'string') return;
157
+ if (node.type === 'ReturnStatement') {
158
+ if (node.argument) out.push(node.argument);
159
+ return;
160
+ }
161
+ // Do not descend into a NESTED function — its returns are not ours.
162
+ if (FN_TYPES.has(node.type)) return;
163
+ for (const key of Object.keys(node)) {
164
+ if (key === 'loc' || key === 'start' || key === 'end' || key === 'range' || key === '__parent') continue;
165
+ const v = node[key];
166
+ if (Array.isArray(v)) {
167
+ for (const c of v) if (c && typeof c.type === 'string') recurse(c);
168
+ } else if (v && typeof v.type === 'string') {
169
+ recurse(v);
170
+ }
171
+ }
172
+ };
173
+ recurse(fn.body);
174
+ return out;
175
+ }
176
+
177
+ /**
178
+ * Arg-sensitive evaluator: classify an expression's { dynamic, text } where
179
+ * `text` is the recovered static string (used by the SQL-keyword gate). `env`
180
+ * maps a parameter name → its already-computed { dynamic, text } at the call
181
+ * site. This is the inter-procedural core: a CallExpression to a LOCAL function
182
+ * is resolved by binding its params to the call arguments' classifications and
183
+ * evaluating its return expression(s).
184
+ *
185
+ * Conservative leaves (mirror the prototype): a bare unknown Identifier and a
186
+ * MemberExpression contribute NO text; an unknown/non-local call is structurally
187
+ * dynamic with NO text (so the SQL-keyword gate still guards FPs).
188
+ *
189
+ * @param {object} node
190
+ * @param {Map<string,{dynamic:boolean,text:string}>} env param bindings
191
+ * @param {Map<string,object>} fns local-function map
192
+ * @param {number} depth current recursion depth
193
+ * @param {Set<string>} seen function names on the active call stack (cycle guard)
194
+ * @returns {{dynamic:boolean, text:string}}
195
+ */
196
+ function evalExpr(node, env, fns, depth, seen) {
197
+ if (!node || depth > IP_DEPTH_LIMIT) return { dynamic: depth > IP_DEPTH_LIMIT, text: '' };
198
+ switch (node.type) {
199
+ case 'Literal':
200
+ return { dynamic: false, text: typeof node.value === 'string' ? node.value : '' };
201
+ case 'TemplateLiteral': {
202
+ const text = node.quasis
203
+ .map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || ''))
204
+ .join(' ');
205
+ const dyn = node.expressions.some((e) => evalExpr(e, env, fns, depth + 1, seen).dynamic);
206
+ return { dynamic: node.expressions.length > 0 && dyn, text };
207
+ }
208
+ case 'BinaryExpression': {
209
+ if (node.operator !== '+') return { dynamic: false, text: '' };
210
+ const l = evalExpr(node.left, env, fns, depth + 1, seen);
211
+ const r = evalExpr(node.right, env, fns, depth + 1, seen);
212
+ return { dynamic: l.dynamic || r.dynamic, text: l.text + ' ' + r.text };
213
+ }
214
+ case 'TaggedTemplateExpression':
215
+ return evalExpr(node.quasi, env, fns, depth + 1, seen);
216
+ case 'ParenthesizedExpression':
217
+ return evalExpr(node.expression, env, fns, depth + 1, seen);
218
+ case 'Identifier': {
219
+ if (env.has(node.name)) return env.get(node.name);
220
+ // Unknown bare identifier: conservative — not dynamic-flaggable, no text.
221
+ return { dynamic: false, text: '' };
222
+ }
223
+ case 'CallExpression': {
224
+ const callee = node.callee;
225
+ const name = callee.type === 'Identifier' ? callee.name : null;
226
+ if (name && fns.has(name) && !seen.has(name)) {
227
+ const fn = fns.get(name);
228
+ const argEnv = new Map();
229
+ (fn.params || []).forEach((p, i) => {
230
+ if (p.type === 'Identifier') {
231
+ const arg = node.arguments[i];
232
+ argEnv.set(
233
+ p.name,
234
+ arg ? evalExpr(arg, env, fns, depth + 1, seen) : { dynamic: false, text: '' },
235
+ );
236
+ }
237
+ });
238
+ const seen2 = new Set(seen);
239
+ seen2.add(name);
240
+ let dynamic = false;
241
+ let text = '';
242
+ for (const ret of returnsOf(fn)) {
243
+ const v = evalExpr(ret, argEnv, fns, depth + 1, seen2);
244
+ dynamic = dynamic || v.dynamic;
245
+ text += ' ' + v.text;
246
+ }
247
+ return { dynamic, text };
248
+ }
249
+ // Unknown / non-local / recursive call → structurally dynamic, no text.
250
+ return { dynamic: true, text: '' };
251
+ }
252
+ default:
253
+ // MemberExpression (req.body.x), AwaitExpression, etc. — runtime value,
254
+ // but no statically recoverable text.
255
+ return { dynamic: true, text: '' };
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Build the enclosing-scope param env for a node: every parameter of an
261
+ * enclosing function is dynamic (external/runtime). This seeds evalExpr so a
262
+ * sink-arg call like `db.query(build(req.id))` inside `function h(req){…}` knows
263
+ * `req` is dynamic. Mirrors the prototype's enclosingEnv via the __parent chain.
264
+ *
265
+ * @param {object} node a CallExpression sink node
266
+ * @returns {Map<string,{dynamic:boolean,text:string}>}
267
+ */
268
+ function enclosingParamEnv(node) {
269
+ const env = new Map();
270
+ let n = node.__parent;
271
+ while (n) {
272
+ if (FN_TYPES.has(n.type) && Array.isArray(n.params)) {
273
+ for (const p of n.params) {
274
+ for (const name of patternNames(p)) {
275
+ if (!env.has(name)) env.set(name, { dynamic: true, text: '' });
276
+ }
277
+ }
278
+ }
279
+ n = n.__parent;
280
+ }
281
+ return env;
282
+ }
283
+
284
+ /**
285
+ * Scope: a binding map + parent link. `bindings` maps name → { dynamic: bool }.
286
+ * A name absent from the whole chain resolves to dynamic (unknown = unsafe).
287
+ */
288
+ function makeScope(parent) {
289
+ return { parent, bindings: new Map() };
290
+ }
291
+
292
+ function lookup(scope, name) {
293
+ let s = scope;
294
+ while (s) {
295
+ if (s.bindings.has(name)) return s.bindings.get(name);
296
+ s = s.parent;
297
+ }
298
+ return null;
299
+ }
300
+
301
+ /**
302
+ * Is `node` a compile-time-constant string expression?
303
+ * Literal → const-safe (any literal; a SQL keyword can
304
+ * only appear in a string literal anyway)
305
+ * TemplateLiteral, 0 expressions → const-safe (`SELECT 1`)
306
+ * BinaryExpression '+' → const-safe iff BOTH sides const-safe
307
+ * Identifier → const-safe iff its binding is const-safe
308
+ * anything else → NOT const-safe
309
+ */
310
+ function isConstSafe(node, scope) {
311
+ if (!node) return true;
312
+ switch (node.type) {
313
+ case 'Literal':
314
+ return true;
315
+ case 'TemplateLiteral':
316
+ return node.expressions.length === 0;
317
+ case 'BinaryExpression':
318
+ if (node.operator !== '+') return false;
319
+ return isConstSafe(node.left, scope) && isConstSafe(node.right, scope);
320
+ case 'Identifier': {
321
+ const b = lookup(scope, node.name);
322
+ // Unknown identifier (e.g. an import or global) → treat as NOT const-safe
323
+ // so we don't accidentally clear a binding; but it is also not "dynamic
324
+ // user input" on its own. The sink check uses isDynamic(), which is the
325
+ // inverse and the FP guard, so const-safe=false here just means "we are
326
+ // not certain it is a constant".
327
+ return b ? b.constSafe === true : false;
328
+ }
329
+ default:
330
+ return false;
331
+ }
332
+ }
333
+
334
+ /**
335
+ * Is `node` a DYNAMIC value (runtime-assembled / external), i.e. the thing we
336
+ * flag when it flows into a sink? This is intentionally the conservative
337
+ * inverse of isConstSafe at the leaves, with the const-propagation FP guard:
338
+ * Literal → false
339
+ * TemplateLiteral with ${...} → true (interpolation)
340
+ * TemplateLiteral, no expr → false
341
+ * BinaryExpression '+' → either side dynamic
342
+ * Identifier → binding dynamic? (const-safe binding=false)
343
+ * MemberExpression/CallExpr/... → true
344
+ */
345
+ function isDynamic(node, scope) {
346
+ if (!node) return false;
347
+ switch (node.type) {
348
+ case 'Literal':
349
+ return false;
350
+ case 'TemplateLiteral':
351
+ return node.expressions.length > 0;
352
+ case 'BinaryExpression':
353
+ if (node.operator === '+') return isDynamic(node.left, scope) || isDynamic(node.right, scope);
354
+ // Other binary ops (e.g. comparisons) yield booleans/numbers, not an
355
+ // injectable command/query string — not the dynamic-string shape.
356
+ return false;
357
+ case 'Identifier': {
358
+ const b = lookup(scope, node.name);
359
+ if (b) return b.dynamic === true;
360
+ // Unknown identifier: a bare top-level/imported name passed straight to a
361
+ // sink. We do NOT flag this — it is not a clear assembled dynamic value
362
+ // and flagging bare identifiers is the #1 FP source. Conservative: false.
363
+ return false;
364
+ }
365
+ case 'TaggedTemplateExpression':
366
+ return isDynamic(node.quasi, scope);
367
+ case 'ParenthesizedExpression':
368
+ return isDynamic(node.expression, scope);
369
+ default:
370
+ // MemberExpression (req.body.x), CallExpression, AwaitExpression-wrapped,
371
+ // etc. These are clearly runtime values. But to hold FP=0 we are
372
+ // selective at the SINK level (a SQL sink also requires a SQL keyword);
373
+ // here we report the structural truth.
374
+ return true;
375
+ }
376
+ }
377
+
378
+ /**
379
+ * Classify the binding produced by an init/assignment expression.
380
+ * Returns { constSafe, dynamic }.
381
+ * - constSafe: the value is provably a constant string (for propagation).
382
+ * - dynamic: the value is a clearly assembled/external dynamic value.
383
+ * A value can be neither (e.g. a bare unknown identifier or a number): not a
384
+ * constant string AND not a flaggable dynamic string.
385
+ */
386
+ function classifyValue(node, scope) {
387
+ return {
388
+ constSafe: isConstSafe(node, scope),
389
+ dynamic: isDynamic(node, scope),
390
+ };
391
+ }
392
+
393
+ /**
394
+ * Collect bindings declared/assigned directly in a scope body, WITHOUT
395
+ * descending into nested function scopes (those get their own scope). Two-phase
396
+ * per scope:
397
+ * 1. seed every declared name + parameter
398
+ * 2. merge: a name is const-safe only if EVERY assignment is const-safe; any
399
+ * dynamic assignment marks it dynamic.
400
+ * Reassignments that we cannot prove const-safe demote a previously const-safe
401
+ * binding (so `let q="SELECT 1"; q=q+x; query(q)` is dynamic).
402
+ *
403
+ * @param {object} scopeNode Program | Function node
404
+ * @param {object} scope the scope whose bindings we fill
405
+ */
406
+ function collectBindings(scopeNode, scope) {
407
+ // Phase 0: parameters of a function scope are always dynamic.
408
+ if (FN_TYPES.has(scopeNode.type) && Array.isArray(scopeNode.params)) {
409
+ for (const p of scopeNode.params) {
410
+ for (const name of patternNames(p)) {
411
+ scope.bindings.set(name, { constSafe: false, dynamic: true });
412
+ }
413
+ }
414
+ }
415
+
416
+ // Phase 1+2: walk the scope body but DO NOT cross into nested functions.
417
+ const body = scopeNode.type === 'Program' ? scopeNode : scopeNode.body;
418
+ walkScopeLocal(body, scopeNode, (node) => {
419
+ if (node.type === 'VariableDeclaration') {
420
+ for (const decl of node.declarations) {
421
+ // Only simple `name = expr` bindings carry a recoverable init expr; a
422
+ // destructuring pattern is treated as dynamic per-name (no init text).
423
+ const simple = decl.id.type === 'Identifier';
424
+ for (const name of patternNames(decl.id)) {
425
+ const cls = decl.init ? classifyValue(decl.init, scope) : { constSafe: false, dynamic: false };
426
+ if (simple && decl.init) cls.initExpr = decl.init;
427
+ mergeBinding(scope, name, cls);
428
+ }
429
+ }
430
+ } else if (node.type === 'AssignmentExpression' && node.left.type === 'Identifier') {
431
+ const name = node.left.name;
432
+ // Compound assignment (+=) on a binding: treat the RHS combined with the
433
+ // existing value. If existing is const-safe and RHS const-safe → still
434
+ // const-safe; otherwise dynamic.
435
+ let cls;
436
+ if (node.operator === '=') {
437
+ cls = classifyValue(node.right, scope);
438
+ cls.initExpr = node.right;
439
+ } else if (node.operator === '+=') {
440
+ const rhsSafe = isConstSafe(node.right, scope);
441
+ const existing = scope.bindings.get(name);
442
+ const existingSafe = existing ? existing.constSafe === true : false;
443
+ cls = { constSafe: rhsSafe && existingSafe, dynamic: isDynamic(node.right, scope) || (existing ? existing.dynamic : false) };
444
+ // A reassignment with += loses a single recoverable init expr; clear it
445
+ // (the SQL-text recovery becomes best-effort, which only risks a MISS,
446
+ // never an FP).
447
+ cls.initExpr = null;
448
+ } else {
449
+ // Other compound ops produce numbers — not a string sink concern.
450
+ cls = { constSafe: false, dynamic: false };
451
+ }
452
+ mergeBinding(scope, name, cls);
453
+ }
454
+ });
455
+ }
456
+
457
+ /**
458
+ * Merge a new classification into a binding. Monotonic toward "unsafe":
459
+ * - once dynamic, stays dynamic
460
+ * - const-safe only if it was const-safe (or unseen) AND the new value is
461
+ * const-safe; a non-const-safe assignment clears const-safe.
462
+ */
463
+ function mergeBinding(scope, name, cls) {
464
+ const prev = scope.bindings.get(name);
465
+ const newExprs = cls.initExpr ? [cls.initExpr] : [];
466
+ if (!prev) {
467
+ scope.bindings.set(name, { constSafe: cls.constSafe, dynamic: cls.dynamic, initExprs: newExprs });
468
+ return;
469
+ }
470
+ scope.bindings.set(name, {
471
+ constSafe: prev.constSafe && cls.constSafe,
472
+ dynamic: prev.dynamic || cls.dynamic,
473
+ // Accumulate ALL assigned expressions so SQL-text recovery can scan the full
474
+ // assignment history (so `let q="SELECT 1"; q=q+x` still surfaces the
475
+ // SELECT keyword after the dynamic reassignment). This only affects the
476
+ // looksLikeSql gate — it cannot create an FP (the binding must already be
477
+ // dynamic to reach the sink check).
478
+ initExprs: [...(prev.initExprs || []), ...newExprs],
479
+ });
480
+ }
481
+
482
+ /** Extract bound names from a binding pattern (Identifier / destructuring). */
483
+ function patternNames(pat, out = []) {
484
+ if (!pat) return out;
485
+ switch (pat.type) {
486
+ case 'Identifier':
487
+ out.push(pat.name);
488
+ break;
489
+ case 'AssignmentPattern':
490
+ patternNames(pat.left, out);
491
+ break;
492
+ case 'RestElement':
493
+ patternNames(pat.argument, out);
494
+ break;
495
+ case 'ArrayPattern':
496
+ for (const el of pat.elements) if (el) patternNames(el, out);
497
+ break;
498
+ case 'ObjectPattern':
499
+ for (const prop of pat.properties) {
500
+ if (prop.type === 'RestElement') patternNames(prop.argument, out);
501
+ else patternNames(prop.value, out);
502
+ }
503
+ break;
504
+ default:
505
+ break;
506
+ }
507
+ return out;
508
+ }
509
+
510
+ /**
511
+ * Walk a scope's body visiting every node but NOT descending into nested
512
+ * function bodies (those are separate scopes collected on their own). The
513
+ * scopeNode itself is allowed (we start below it).
514
+ */
515
+ function walkScopeLocal(root, scopeNode, visit) {
516
+ const SKIP_KEYS = new Set(['loc', 'start', 'end', 'range', 'parent', '__parent']);
517
+ const recurse = (node) => {
518
+ if (!node || typeof node.type !== 'string') return;
519
+ // Do not cross into a nested function scope.
520
+ if (node !== scopeNode && FN_TYPES.has(node.type)) return;
521
+ visit(node);
522
+ for (const key of Object.keys(node)) {
523
+ if (SKIP_KEYS.has(key)) continue;
524
+ const value = node[key];
525
+ if (Array.isArray(value)) {
526
+ for (const child of value) if (child && typeof child.type === 'string') recurse(child);
527
+ } else if (value && typeof value.type === 'string') {
528
+ recurse(value);
529
+ }
530
+ }
531
+ };
532
+ // root is either a Program (has .body array) or a Function body node.
533
+ if (root && root.type === 'Program') {
534
+ for (const stmt of root.body) recurse(stmt);
535
+ } else if (root && root.type === 'BlockStatement') {
536
+ for (const stmt of root.body) recurse(stmt);
537
+ } else if (root) {
538
+ // Arrow with expression body: `() => expr`
539
+ recurse(root);
540
+ }
541
+ }
542
+
543
+ /**
544
+ * Build the scope chain and attach a resolved scope to each function/program
545
+ * node, so sink detection can look up the right binding map. Returns a Map from
546
+ * node → scope.
547
+ */
548
+ function buildScopes(ast) {
549
+ const scopeOf = new Map();
550
+ const programScope = makeScope(null);
551
+ collectBindings(ast, programScope);
552
+ scopeOf.set(ast, programScope);
553
+
554
+ // Walk all function nodes (pre-order, so an enclosing function's scope is
555
+ // always built before its nested functions). Each function gets a child scope
556
+ // whose parent is the nearest enclosing scope already in scopeOf.
557
+ walk(ast, (node, parent) => {
558
+ if (FN_TYPES.has(node.type)) {
559
+ const parentScope = nearestScope(scopeOf, parent) || programScope;
560
+ const scope = makeScope(parentScope);
561
+ collectBindings(node, scope);
562
+ scopeOf.set(node, scope);
563
+ }
564
+ });
565
+ return scopeOf;
566
+ }
567
+
568
+ /** Climb the __parent chain to the nearest ancestor that already has a scope. */
569
+ function nearestScope(scopeOf, node) {
570
+ let n = node;
571
+ while (n) {
572
+ if (scopeOf.has(n)) return scopeOf.get(n);
573
+ n = n.__parent || null;
574
+ }
575
+ return null;
576
+ }
577
+
578
+ /**
579
+ * Analyze source code for variable-mediated injection sinks.
580
+ *
581
+ * @param {string} code
582
+ * @param {string} file reported in findings
583
+ * @param {{ ts?: boolean }} [opts]
584
+ * @returns {{ parsed: boolean, findings: Array }}
585
+ */
586
+ export function analyze(code, file, opts = {}) {
587
+ const ast = parseToAst(code, { ts: opts.ts });
588
+ if (!ast) return { parsed: false, findings: [] };
589
+
590
+ // Annotate parent links so we can resolve the enclosing scope of any node.
591
+ annotateParents(ast);
592
+ const scopeOf = buildScopes(ast);
593
+
594
+ // Inter-procedural (intra-module) maps: local functions for arg-sensitive
595
+ // return-taint resolution, and sink aliases for `const X = obj.query` etc.
596
+ const ipCtx = { fns: collectLocalFns(ast), aliases: collectSinkAliases(ast) };
597
+
598
+ const findings = [];
599
+ const line = (n) => (n.loc ? n.loc.start.line : 0);
600
+
601
+ walk(ast, (node) => {
602
+ if (node.type === 'CallExpression') {
603
+ handleCall(node, scopeOf, file, line, findings, ipCtx);
604
+ } else if (node.type === 'NewExpression') {
605
+ handleNew(node, scopeOf, file, line, findings);
606
+ }
607
+ });
608
+
609
+ return { parsed: true, findings: dedupe(findings) };
610
+ }
611
+
612
+ /**
613
+ * Arg-sensitive inter-procedural resolution of a sink argument that is a CALL to
614
+ * a local function. Returns the recovered { dynamic, text } so the caller can
615
+ * apply the SAME dynamic + SQL-keyword gate it uses for intraprocedural values.
616
+ * Returns null when the argument is not a local-function call (the caller then
617
+ * keeps the existing intraprocedural classification — purely additive).
618
+ */
619
+ function resolveLocalCallArg(arg, node, ipCtx) {
620
+ if (!arg || arg.type !== 'CallExpression') return null;
621
+ if (!(arg.callee.type === 'Identifier' && ipCtx.fns.has(arg.callee.name))) return null;
622
+ const env = enclosingParamEnv(node);
623
+ return evalExpr(arg, env, ipCtx.fns, 0, new Set());
624
+ }
625
+
626
+ /** Resolve the binding scope that ENCLOSES a given node (its nearest function
627
+ * or the program). */
628
+ function scopeForNode(scopeOf, node) {
629
+ let n = node.__parent;
630
+ while (n) {
631
+ if (FN_TYPES.has(n.type) && scopeOf.has(n)) return scopeOf.get(n);
632
+ if (n.type === 'Program' && scopeOf.has(n)) return scopeOf.get(n);
633
+ n = n.__parent;
634
+ }
635
+ // Fallback: program scope.
636
+ for (const [k, v] of scopeOf) if (k.type === 'Program') return v;
637
+ return makeScope(null);
638
+ }
639
+
640
+ function handleCall(node, scopeOf, file, line, findings, ipCtx) {
641
+ const callee = node.callee;
642
+ const scope = scopeForNode(scopeOf, node);
643
+ const args = node.arguments || [];
644
+
645
+ // eval(dynamic)
646
+ if (callee.type === 'Identifier' && callee.name === 'eval') {
647
+ if (args[0] && isDynamic(args[0], scope)) {
648
+ findings.push(evalFinding(file, line(node), node));
649
+ }
650
+ return;
651
+ }
652
+
653
+ // window/globalThis/self/global .eval(dynamic) — indirect eval
654
+ if (
655
+ callee.type === 'MemberExpression' &&
656
+ callee.property.type === 'Identifier' &&
657
+ callee.property.name === 'eval' &&
658
+ callee.object.type === 'Identifier' &&
659
+ /^(window|globalThis|self|global)$/.test(callee.object.name)
660
+ ) {
661
+ if (args[0] && isDynamic(args[0], scope)) {
662
+ findings.push(evalFinding(file, line(node), node));
663
+ }
664
+ return;
665
+ }
666
+
667
+ if (callee.type === 'MemberExpression' && callee.property.type === 'Identifier') {
668
+ const method = callee.property.name;
669
+
670
+ // SQL sink: .query / .execute / .raw with a dynamic, NON-parameterized,
671
+ // SQL-shaped argument.
672
+ if (SQL_SINKS.has(method)) {
673
+ const arg0 = args[0];
674
+ if (arg0 && isDynamic(arg0, scope) && looksLikeSql(arg0, scope) && !isParameterized(node, arg0, scope)) {
675
+ findings.push(sqlFinding(file, line(node), node));
676
+ return;
677
+ }
678
+ // INTER-PROCEDURAL (additive): the intraprocedural path above recovers NO
679
+ // SQL text from a CallExpression arg. If arg0 is a call to a LOCAL helper,
680
+ // resolve its return arg-sensitively; flag only when the recovered value
681
+ // is dynamic AND carries a real SQL keyword AND the call is not
682
+ // parameterized. A const-returning helper or an identity-fn(constant) stays
683
+ // clean (no dynamic / no recovered keyword); a non-SQL helper stays clean
684
+ // (keyword gate).
685
+ if (ipCtx && arg0) {
686
+ const ip = resolveLocalCallArg(arg0, node, ipCtx);
687
+ if (ip && ip.dynamic && SQL_KW_RE.test(ip.text) && !isParameterized(node, arg0, scope)) {
688
+ findings.push(sqlFinding(file, line(node), node));
689
+ return;
690
+ }
691
+ }
692
+ }
693
+
694
+ // child_process exec / execSync with a dynamic command string.
695
+ if (CP_SHELL_EXEC.has(method)) {
696
+ const arg0 = args[0];
697
+ if (arg0 && isDynamic(arg0, scope)) {
698
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
699
+ return;
700
+ }
701
+ }
702
+
703
+ // spawn / execFile family: only injectable with shell:true AND dynamic cmd.
704
+ if (CP_SPAWN.has(method)) {
705
+ const arg0 = args[0];
706
+ if (arg0 && isDynamic(arg0, scope) && hasShellTrue(args, scope)) {
707
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
708
+ return;
709
+ }
710
+ }
711
+ }
712
+
713
+ // SINK ALIAS (inter-procedural): `const X = obj.query` / `const X = cp.execSync`
714
+ // makes a later `X(arg)` call the same sink. Apply the SAME guards as the
715
+ // underlying member sink (dynamic + SQL-keyword + parameterized for sql;
716
+ // dynamic for shell). The arg may itself be a local-function call, so reuse the
717
+ // inter-procedural resolver. `const run=console.log; run(...)` is not an alias
718
+ // (console.log is not a tracked sink method) and never reaches here.
719
+ if (callee.type === 'Identifier' && ipCtx && ipCtx.aliases.has(callee.name)) {
720
+ const alias = ipCtx.aliases.get(callee.name);
721
+ const arg0 = args[0];
722
+ if (arg0) {
723
+ const ip = resolveLocalCallArg(arg0, node, ipCtx);
724
+ const dynamic = ip ? ip.dynamic : isDynamic(arg0, scope);
725
+ if (alias.kind === 'shell') {
726
+ if (dynamic) {
727
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
728
+ return;
729
+ }
730
+ } else {
731
+ // sql alias: dynamic + real SQL keyword + not parameterized.
732
+ const text = ip ? ip.text : collectStaticText(arg0, scope, new Set());
733
+ if (dynamic && SQL_KW_RE.test(text) && !isParameterized(node, arg0, scope)) {
734
+ findings.push(sqlFinding(file, line(node), node));
735
+ return;
736
+ }
737
+ }
738
+ }
739
+ }
740
+
741
+ // Bare exec/execSync identifier (destructured from child_process):
742
+ // const { exec } = require('child_process'); exec(cmd);
743
+ if (callee.type === 'Identifier' && CP_SHELL_EXEC.has(callee.name)) {
744
+ const arg0 = args[0];
745
+ if (arg0 && isDynamic(arg0, scope)) {
746
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
747
+ return;
748
+ }
749
+ }
750
+ if (callee.type === 'Identifier' && CP_SPAWN.has(callee.name)) {
751
+ const arg0 = args[0];
752
+ if (arg0 && isDynamic(arg0, scope) && hasShellTrue(args, scope)) {
753
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
754
+ }
755
+ }
756
+ }
757
+
758
+ function handleNew(node, scopeOf, file, line, findings) {
759
+ // new Function(...) — the Function constructor compiles a string body. The
760
+ // dangerous case is a dynamic body; a pure-literal body (rare) is low signal.
761
+ if (node.callee.type === 'Identifier' && node.callee.name === 'Function') {
762
+ const scope = scopeForNode(scopeOf, node);
763
+ const args = node.arguments || [];
764
+ // The body is the LAST argument. Flag when it is dynamic, OR when there are
765
+ // multiple args (codegen shape). A single pure-literal arg is left to the
766
+ // regex rule (which flags new Function outright) to avoid duplicating.
767
+ const body = args[args.length - 1];
768
+ if (body && isDynamic(body, scope)) {
769
+ findings.push(evalFinding(file, line(node), node));
770
+ }
771
+ }
772
+ }
773
+
774
+ /** A SQL sink argument only counts if it actually contains SQL keywords. This is
775
+ * the FP guard against generic `.query(dynamic)` / `.execute(dynamic)` on
776
+ * non-SQL emitters. We inspect the literal/template parts and any const-safe
777
+ * identifier bindings reachable from the argument. */
778
+ function looksLikeSql(node, scope) {
779
+ const text = collectStaticText(node, scope, new Set());
780
+ return SQL_KW_RE.test(text);
781
+ }
782
+
783
+ /**
784
+ * Gather the static (literal) text contributed by an expression — string
785
+ * literals, template quasis, and the literal parts of identifiers whose binding
786
+ * is a const-safe string we can reconstruct is out of scope; instead we only
787
+ * gather text statically reachable through +/template/identifier-to-init. For
788
+ * identifiers we re-derive their init text by re-reading the binding's source is
789
+ * not stored, so we approximate: an identifier contributes its name (which won't
790
+ * match SQL keywords) UNLESS we can see literal text in the same expression.
791
+ *
792
+ * In practice the dynamic SQL shape always carries the SQL keyword in a literal
793
+ * part of the SAME expression chain (the assembled query string), so collecting
794
+ * literals from the argument expression (following identifier inits within
795
+ * scope) is sufficient and conservative.
796
+ */
797
+ function collectStaticText(node, scope, seen) {
798
+ if (!node) return '';
799
+ switch (node.type) {
800
+ case 'Literal':
801
+ return typeof node.value === 'string' ? node.value : '';
802
+ case 'TemplateLiteral':
803
+ return node.quasis.map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || '')).join(' ');
804
+ case 'BinaryExpression':
805
+ if (node.operator === '+') {
806
+ return collectStaticText(node.left, scope, seen) + ' ' + collectStaticText(node.right, scope, seen);
807
+ }
808
+ return '';
809
+ case 'Identifier': {
810
+ if (seen.has(node.name)) return '';
811
+ seen.add(node.name);
812
+ // Recover the variable's assembled text by following its binding's init
813
+ // expression(s), recursively. This is how the cross-statement SQL shape
814
+ // `const q = "SELECT..." + x; db.query(q)` surfaces the SELECT keyword
815
+ // even though the literal lives in a different statement.
816
+ const b = lookup(scope, node.name);
817
+ if (!b || !Array.isArray(b.initExprs)) return '';
818
+ return b.initExprs.map((e) => collectStaticText(e, scope, seen)).join(' ');
819
+ }
820
+ case 'TaggedTemplateExpression':
821
+ return collectStaticText(node.quasi, scope, seen);
822
+ default:
823
+ return '';
824
+ }
825
+ }
826
+
827
+ /**
828
+ * A call is "parameterized" (safe) when it passes a 2nd ArrayExpression of
829
+ * params (pg `$1` + [..], mysql2 `?` + [..]) — the canonical safe shape. The
830
+ * dynamic 1st arg being a bound placeholder-only string is already excluded by
831
+ * looksLikeSql requiring a real keyword + isDynamic; here we just exclude the
832
+ * params-array shape.
833
+ */
834
+ function isParameterized(callNode, arg0, scope) {
835
+ const args = callNode.arguments || [];
836
+ // 2nd argument is an array literal → parameterized.
837
+ if (args[1] && args[1].type === 'ArrayExpression') return true;
838
+ // 2nd argument is an identifier bound to nothing dynamic and named like a
839
+ // params array (best-effort, conservative): if it's an ArrayExpression via
840
+ // binding we can't easily see — skip. We only treat a literal array as the
841
+ // safe marker to avoid both FPs and FNs.
842
+ void arg0;
843
+ void scope;
844
+ return false;
845
+ }
846
+
847
+ /** Does the call carry an options object with shell:true? */
848
+ function hasShellTrue(args, scope) {
849
+ for (const a of args) {
850
+ if (a && a.type === 'ObjectExpression') {
851
+ for (const prop of a.properties) {
852
+ if (
853
+ prop.type === 'Property' &&
854
+ !prop.computed &&
855
+ ((prop.key.type === 'Identifier' && prop.key.name === 'shell') ||
856
+ (prop.key.type === 'Literal' && prop.key.value === 'shell'))
857
+ ) {
858
+ // shell: true (literal true) or shell: <const-safe truthy> — we only
859
+ // treat a literal `true` as enabling the shell, conservatively.
860
+ if (prop.value.type === 'Literal' && prop.value.value === true) return true;
861
+ }
862
+ }
863
+ }
864
+ }
865
+ void scope;
866
+ return false;
867
+ }
868
+
869
+ /** Attach __parent back-links to every node so a sink can resolve the binding
870
+ * scope that encloses it. Single pass. */
871
+ function annotateParents(ast) {
872
+ const SKIP_KEYS = new Set(['loc', 'start', 'end', 'range', '__parent']);
873
+ const recurse = (node, parent) => {
874
+ if (!node || typeof node.type !== 'string') return;
875
+ node.__parent = parent;
876
+ for (const key of Object.keys(node)) {
877
+ if (SKIP_KEYS.has(key)) continue;
878
+ const value = node[key];
879
+ if (Array.isArray(value)) {
880
+ for (const child of value) if (child && typeof child.type === 'string') recurse(child, node);
881
+ } else if (value && typeof value.type === 'string') {
882
+ recurse(value, node);
883
+ }
884
+ }
885
+ };
886
+ recurse(ast, null);
887
+ }
888
+
889
+ // ---- Finding constructors (match the regex-rule finding shape) ----
890
+
891
+ function baseFinding({ rule, pattern, severity, file, line, title, description, recommendation, node }) {
892
+ return {
893
+ id: `RULE_${rule.toUpperCase().replace(/-/g, '_')}_${pattern.toUpperCase().replace(/-/g, '_')}`,
894
+ rule,
895
+ pattern,
896
+ severity,
897
+ category: 'code-injection',
898
+ file,
899
+ line,
900
+ title,
901
+ description,
902
+ recommendation,
903
+ // Match regex-rule convention: blocks_apply iff critical. (HIGH → human
904
+ // review but does not hard-block apply, matching eval-usage/sql-injection.)
905
+ blocks_apply: severity === 'critical',
906
+ match: snippet(node),
907
+ };
908
+ }
909
+
910
+ function evalFinding(file, line, node) {
911
+ return baseFinding({
912
+ rule: 'ast-eval-injection',
913
+ pattern: 'ast-eval',
914
+ severity: 'high',
915
+ file,
916
+ line,
917
+ title: 'eval()/Function() with a dynamically-built value (dataflow)',
918
+ description:
919
+ 'Dataflow analysis found a runtime-assembled (non-constant) value flowing into eval()/new Function(). Executing assembled strings is a code-injection / RCE vector the line-by-line scanner misses when the value is built across statements.',
920
+ recommendation: 'Remove eval()/new Function(). Use JSON.parse for data or a lookup table for dispatch.',
921
+ node,
922
+ });
923
+ }
924
+
925
+ function sqlFinding(file, line, node) {
926
+ return baseFinding({
927
+ rule: 'ast-sql-injection',
928
+ pattern: 'ast-sql',
929
+ severity: 'high',
930
+ file,
931
+ line,
932
+ title: 'SQL sink fed a dynamically-built query (dataflow)',
933
+ description:
934
+ 'Dataflow analysis found a runtime-assembled (non-constant) SQL string flowing into .query()/.execute()/.raw() without a params array. This is a SQL-injection vector the line scanner misses when the query is built in a separate statement.',
935
+ recommendation: 'Use parameterized queries (placeholders + a params array), not an assembled query string.',
936
+ node,
937
+ });
938
+ }
939
+
940
+ function cmdFinding(file, line, node, severity) {
941
+ return baseFinding({
942
+ rule: 'ast-command-injection',
943
+ pattern: 'ast-cmd',
944
+ severity,
945
+ file,
946
+ line,
947
+ title: 'Shell sink fed a dynamically-built command (dataflow)',
948
+ description:
949
+ 'Dataflow analysis found a runtime-assembled (non-constant) command string flowing into child_process exec/execSync (or spawn with shell:true). This is an OS-command-injection / RCE vector the line scanner misses when the command is assembled across statements.',
950
+ recommendation: 'Use execFile/spawn with an argument array (no shell), or strictly validate the input.',
951
+ node,
952
+ });
953
+ }
954
+
955
+ function snippet(node) {
956
+ // No raw source on the node; build a short structural marker from the callee.
957
+ try {
958
+ if (node.type === 'CallExpression') {
959
+ const c = node.callee;
960
+ if (c.type === 'Identifier') return `${c.name}(...)`;
961
+ if (c.type === 'MemberExpression' && c.property.type === 'Identifier') {
962
+ const obj = c.object.type === 'Identifier' ? c.object.name : '…';
963
+ return `${obj}.${c.property.name}(...)`;
964
+ }
965
+ }
966
+ if (node.type === 'NewExpression' && node.callee.type === 'Identifier') {
967
+ return `new ${node.callee.name}(...)`;
968
+ }
969
+ } catch {
970
+ /* best-effort */
971
+ }
972
+ return '';
973
+ }
974
+
975
+ /** Drop duplicate findings on the same {rule,line}. */
976
+ function dedupe(findings) {
977
+ const seen = new Set();
978
+ const out = [];
979
+ for (const f of findings) {
980
+ const key = `${f.rule}:${f.line}`;
981
+ if (seen.has(key)) continue;
982
+ seen.add(key);
983
+ out.push(f);
984
+ }
985
+ return out.sort((a, b) => a.line - b.line);
986
+ }