@ps-neko/nekowork 0.2.0-alpha.6 → 0.2.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +27 -8
  3. package/package.json +9 -3
  4. package/scripts/cli.js +18 -8
  5. package/scripts/core/git-mutation-guard.js +65 -0
  6. package/scripts/lib/acceptance-criteria.js +2 -9
  7. package/scripts/lib/ast/analyze.js +686 -0
  8. package/scripts/lib/ast/parse.js +131 -0
  9. package/scripts/lib/decision.js +4 -58
  10. package/scripts/lib/diff-parser.js +75 -4
  11. package/scripts/lib/risk-classifier.js +1 -0
  12. package/scripts/lib/rules/_helpers.js +90 -10
  13. package/scripts/lib/rules/ast-dataflow.js +103 -0
  14. package/scripts/lib/rules/auto-apply-commit-push.js +44 -0
  15. package/scripts/lib/rules/command-injection.js +72 -0
  16. package/scripts/lib/rules/cors-wildcard.js +84 -0
  17. package/scripts/lib/rules/eval-usage.js +102 -0
  18. package/scripts/lib/rules/hardcoded-credential.js +134 -2
  19. package/scripts/lib/rules/insecure-tls.js +86 -0
  20. package/scripts/lib/rules/package-lockfile-risk.js +23 -0
  21. package/scripts/lib/rules/secret-fallback.js +206 -24
  22. package/scripts/lib/rules/sql-injection.js +68 -0
  23. package/scripts/lib/rules/test-or-security-disable.js +102 -0
  24. package/scripts/lib/session-constants.js +30 -0
  25. package/scripts/lib/session-io.js +81 -0
  26. package/scripts/lib/session-resolver.js +17 -0
  27. package/scripts/lib/verify-helpers.js +442 -0
  28. package/scripts/orchestrators/_handoff-utils.js +45 -0
  29. package/scripts/orchestrators/apply.js +33 -17
  30. package/scripts/orchestrators/gate.js +17 -18
  31. package/scripts/orchestrators/report.js +4 -48
  32. package/scripts/orchestrators/verify-pr.js +49 -313
  33. package/scripts/benchmark/capture-live-ai-diff.js +0 -230
  34. package/scripts/benchmark/rules.js +0 -214
  35. package/scripts/benchmark/scrape-oss-positives.js +0 -237
  36. package/scripts/benchmark/verify-candidates.js +0 -110
@@ -0,0 +1,686 @@
1
+ // Intraprocedural const/taint propagation + dangerous-sink detection.
2
+ //
3
+ // Goal: catch the variable-mediated injection forms the line-oriented regex
4
+ // rules provably miss, WITHOUT introducing a single false positive. A naive
5
+ // taint analyzer over-flags (every Identifier looks "tainted"); that would
6
+ // regress the benchmark. So the rule is inverted and conservative:
7
+ //
8
+ // A value is flagged ONLY when it is provably DYNAMIC (not a compile-time
9
+ // constant string) AND it flows into a dangerous sink. When the binding can't
10
+ // be resolved with confidence we still treat it as dynamic — but the FP guard
11
+ // is the const-propagation: a variable bound only to constant strings is
12
+ // CONST-SAFE and is never flagged.
13
+ //
14
+ // Const-propagation (the prototype's FP fix):
15
+ // const q = `SELECT 1`; db.query(q); // q is CONST-SAFE → NOT flagged
16
+ // const q = "SELECT " + x; db.query(q); // q is DYNAMIC → flagged
17
+ //
18
+ // Scope model: a binding map per function/program scope (lexical chain). A
19
+ // binding is CONST-SAFE iff EVERY assignment to it (declarator init +
20
+ // reassignments) is a const-safe string; any non-const-safe assignment, or a
21
+ // reassignment we can't see as const-safe, makes it DYNAMIC. Function PARAMETERS
22
+ // are always dynamic. Analysis is strictly intraprocedural: a value returned
23
+ // from another function call is dynamic (we never chase across calls — that is
24
+ // where FPs come from).
25
+
26
+ import { parseToAst, walk } from './parse.js';
27
+
28
+ const FN_TYPES = new Set(['FunctionDeclaration', 'FunctionExpression', 'ArrowFunctionExpression']);
29
+
30
+ // SQL-ish sink methods executed against a connection/ORM raw escape hatch.
31
+ const SQL_SINKS = new Set(['query', 'execute', 'raw']);
32
+
33
+ // SQL DML/DDL keyword — a dynamic string only counts as a SQL-injection sink if
34
+ // the surrounding code actually looks like SQL. This keeps a generic
35
+ // `emitter.query(dynamic)` or `cache.execute(fn)` out (huge FP source).
36
+ const SQL_KW_RE = /\b(SELECT|INSERT\s+INTO|INSERT|UPDATE|DELETE\s+FROM|DELETE|REPLACE|MERGE|UNION|DROP\s+TABLE|DROP|ALTER\s+TABLE|ALTER|TRUNCATE|CREATE\s+TABLE|FROM|WHERE)\b/i;
37
+
38
+ // child_process methods that run a SHELL command string (injectable directly).
39
+ const CP_SHELL_EXEC = new Set(['exec', 'execSync']);
40
+ // child_process methods that take (command, args[]) and only become injectable
41
+ // when shell:true is set AND the command is dynamic.
42
+ const CP_SPAWN = new Set(['spawn', 'spawnSync', 'execFile', 'execFileSync']);
43
+
44
+ /**
45
+ * Scope: a binding map + parent link. `bindings` maps name → { dynamic: bool }.
46
+ * A name absent from the whole chain resolves to dynamic (unknown = unsafe).
47
+ */
48
+ function makeScope(parent) {
49
+ return { parent, bindings: new Map() };
50
+ }
51
+
52
+ function lookup(scope, name) {
53
+ let s = scope;
54
+ while (s) {
55
+ if (s.bindings.has(name)) return s.bindings.get(name);
56
+ s = s.parent;
57
+ }
58
+ return null;
59
+ }
60
+
61
+ /**
62
+ * Is `node` a compile-time-constant string expression?
63
+ * Literal → const-safe (any literal; a SQL keyword can
64
+ * only appear in a string literal anyway)
65
+ * TemplateLiteral, 0 expressions → const-safe (`SELECT 1`)
66
+ * BinaryExpression '+' → const-safe iff BOTH sides const-safe
67
+ * Identifier → const-safe iff its binding is const-safe
68
+ * anything else → NOT const-safe
69
+ */
70
+ function isConstSafe(node, scope) {
71
+ if (!node) return true;
72
+ switch (node.type) {
73
+ case 'Literal':
74
+ return true;
75
+ case 'TemplateLiteral':
76
+ return node.expressions.length === 0;
77
+ case 'BinaryExpression':
78
+ if (node.operator !== '+') return false;
79
+ return isConstSafe(node.left, scope) && isConstSafe(node.right, scope);
80
+ case 'Identifier': {
81
+ const b = lookup(scope, node.name);
82
+ // Unknown identifier (e.g. an import or global) → treat as NOT const-safe
83
+ // so we don't accidentally clear a binding; but it is also not "dynamic
84
+ // user input" on its own. The sink check uses isDynamic(), which is the
85
+ // inverse and the FP guard, so const-safe=false here just means "we are
86
+ // not certain it is a constant".
87
+ return b ? b.constSafe === true : false;
88
+ }
89
+ default:
90
+ return false;
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Is `node` a DYNAMIC value (runtime-assembled / external), i.e. the thing we
96
+ * flag when it flows into a sink? This is intentionally the conservative
97
+ * inverse of isConstSafe at the leaves, with the const-propagation FP guard:
98
+ * Literal → false
99
+ * TemplateLiteral with ${...} → true (interpolation)
100
+ * TemplateLiteral, no expr → false
101
+ * BinaryExpression '+' → either side dynamic
102
+ * Identifier → binding dynamic? (const-safe binding=false)
103
+ * MemberExpression/CallExpr/... → true
104
+ */
105
+ function isDynamic(node, scope) {
106
+ if (!node) return false;
107
+ switch (node.type) {
108
+ case 'Literal':
109
+ return false;
110
+ case 'TemplateLiteral':
111
+ return node.expressions.length > 0;
112
+ case 'BinaryExpression':
113
+ if (node.operator === '+') return isDynamic(node.left, scope) || isDynamic(node.right, scope);
114
+ // Other binary ops (e.g. comparisons) yield booleans/numbers, not an
115
+ // injectable command/query string — not the dynamic-string shape.
116
+ return false;
117
+ case 'Identifier': {
118
+ const b = lookup(scope, node.name);
119
+ if (b) return b.dynamic === true;
120
+ // Unknown identifier: a bare top-level/imported name passed straight to a
121
+ // sink. We do NOT flag this — it is not a clear assembled dynamic value
122
+ // and flagging bare identifiers is the #1 FP source. Conservative: false.
123
+ return false;
124
+ }
125
+ case 'TaggedTemplateExpression':
126
+ return isDynamic(node.quasi, scope);
127
+ case 'ParenthesizedExpression':
128
+ return isDynamic(node.expression, scope);
129
+ default:
130
+ // MemberExpression (req.body.x), CallExpression, AwaitExpression-wrapped,
131
+ // etc. These are clearly runtime values. But to hold FP=0 we are
132
+ // selective at the SINK level (a SQL sink also requires a SQL keyword);
133
+ // here we report the structural truth.
134
+ return true;
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Classify the binding produced by an init/assignment expression.
140
+ * Returns { constSafe, dynamic }.
141
+ * - constSafe: the value is provably a constant string (for propagation).
142
+ * - dynamic: the value is a clearly assembled/external dynamic value.
143
+ * A value can be neither (e.g. a bare unknown identifier or a number): not a
144
+ * constant string AND not a flaggable dynamic string.
145
+ */
146
+ function classifyValue(node, scope) {
147
+ return {
148
+ constSafe: isConstSafe(node, scope),
149
+ dynamic: isDynamic(node, scope),
150
+ };
151
+ }
152
+
153
+ /**
154
+ * Collect bindings declared/assigned directly in a scope body, WITHOUT
155
+ * descending into nested function scopes (those get their own scope). Two-phase
156
+ * per scope:
157
+ * 1. seed every declared name + parameter
158
+ * 2. merge: a name is const-safe only if EVERY assignment is const-safe; any
159
+ * dynamic assignment marks it dynamic.
160
+ * Reassignments that we cannot prove const-safe demote a previously const-safe
161
+ * binding (so `let q="SELECT 1"; q=q+x; query(q)` is dynamic).
162
+ *
163
+ * @param {object} scopeNode Program | Function node
164
+ * @param {object} scope the scope whose bindings we fill
165
+ */
166
+ function collectBindings(scopeNode, scope) {
167
+ // Phase 0: parameters of a function scope are always dynamic.
168
+ if (FN_TYPES.has(scopeNode.type) && Array.isArray(scopeNode.params)) {
169
+ for (const p of scopeNode.params) {
170
+ for (const name of patternNames(p)) {
171
+ scope.bindings.set(name, { constSafe: false, dynamic: true });
172
+ }
173
+ }
174
+ }
175
+
176
+ // Phase 1+2: walk the scope body but DO NOT cross into nested functions.
177
+ const body = scopeNode.type === 'Program' ? scopeNode : scopeNode.body;
178
+ walkScopeLocal(body, scopeNode, (node) => {
179
+ if (node.type === 'VariableDeclaration') {
180
+ for (const decl of node.declarations) {
181
+ // Only simple `name = expr` bindings carry a recoverable init expr; a
182
+ // destructuring pattern is treated as dynamic per-name (no init text).
183
+ const simple = decl.id.type === 'Identifier';
184
+ for (const name of patternNames(decl.id)) {
185
+ const cls = decl.init ? classifyValue(decl.init, scope) : { constSafe: false, dynamic: false };
186
+ if (simple && decl.init) cls.initExpr = decl.init;
187
+ mergeBinding(scope, name, cls);
188
+ }
189
+ }
190
+ } else if (node.type === 'AssignmentExpression' && node.left.type === 'Identifier') {
191
+ const name = node.left.name;
192
+ // Compound assignment (+=) on a binding: treat the RHS combined with the
193
+ // existing value. If existing is const-safe and RHS const-safe → still
194
+ // const-safe; otherwise dynamic.
195
+ let cls;
196
+ if (node.operator === '=') {
197
+ cls = classifyValue(node.right, scope);
198
+ cls.initExpr = node.right;
199
+ } else if (node.operator === '+=') {
200
+ const rhsSafe = isConstSafe(node.right, scope);
201
+ const existing = scope.bindings.get(name);
202
+ const existingSafe = existing ? existing.constSafe === true : false;
203
+ cls = { constSafe: rhsSafe && existingSafe, dynamic: isDynamic(node.right, scope) || (existing ? existing.dynamic : false) };
204
+ // A reassignment with += loses a single recoverable init expr; clear it
205
+ // (the SQL-text recovery becomes best-effort, which only risks a MISS,
206
+ // never an FP).
207
+ cls.initExpr = null;
208
+ } else {
209
+ // Other compound ops produce numbers — not a string sink concern.
210
+ cls = { constSafe: false, dynamic: false };
211
+ }
212
+ mergeBinding(scope, name, cls);
213
+ }
214
+ });
215
+ }
216
+
217
+ /**
218
+ * Merge a new classification into a binding. Monotonic toward "unsafe":
219
+ * - once dynamic, stays dynamic
220
+ * - const-safe only if it was const-safe (or unseen) AND the new value is
221
+ * const-safe; a non-const-safe assignment clears const-safe.
222
+ */
223
+ function mergeBinding(scope, name, cls) {
224
+ const prev = scope.bindings.get(name);
225
+ const newExprs = cls.initExpr ? [cls.initExpr] : [];
226
+ if (!prev) {
227
+ scope.bindings.set(name, { constSafe: cls.constSafe, dynamic: cls.dynamic, initExprs: newExprs });
228
+ return;
229
+ }
230
+ scope.bindings.set(name, {
231
+ constSafe: prev.constSafe && cls.constSafe,
232
+ dynamic: prev.dynamic || cls.dynamic,
233
+ // Accumulate ALL assigned expressions so SQL-text recovery can scan the full
234
+ // assignment history (so `let q="SELECT 1"; q=q+x` still surfaces the
235
+ // SELECT keyword after the dynamic reassignment). This only affects the
236
+ // looksLikeSql gate — it cannot create an FP (the binding must already be
237
+ // dynamic to reach the sink check).
238
+ initExprs: [...(prev.initExprs || []), ...newExprs],
239
+ });
240
+ }
241
+
242
+ /** Extract bound names from a binding pattern (Identifier / destructuring). */
243
+ function patternNames(pat, out = []) {
244
+ if (!pat) return out;
245
+ switch (pat.type) {
246
+ case 'Identifier':
247
+ out.push(pat.name);
248
+ break;
249
+ case 'AssignmentPattern':
250
+ patternNames(pat.left, out);
251
+ break;
252
+ case 'RestElement':
253
+ patternNames(pat.argument, out);
254
+ break;
255
+ case 'ArrayPattern':
256
+ for (const el of pat.elements) if (el) patternNames(el, out);
257
+ break;
258
+ case 'ObjectPattern':
259
+ for (const prop of pat.properties) {
260
+ if (prop.type === 'RestElement') patternNames(prop.argument, out);
261
+ else patternNames(prop.value, out);
262
+ }
263
+ break;
264
+ default:
265
+ break;
266
+ }
267
+ return out;
268
+ }
269
+
270
+ /**
271
+ * Walk a scope's body visiting every node but NOT descending into nested
272
+ * function bodies (those are separate scopes collected on their own). The
273
+ * scopeNode itself is allowed (we start below it).
274
+ */
275
+ function walkScopeLocal(root, scopeNode, visit) {
276
+ const SKIP_KEYS = new Set(['loc', 'start', 'end', 'range', 'parent', '__parent']);
277
+ const recurse = (node) => {
278
+ if (!node || typeof node.type !== 'string') return;
279
+ // Do not cross into a nested function scope.
280
+ if (node !== scopeNode && FN_TYPES.has(node.type)) return;
281
+ visit(node);
282
+ for (const key of Object.keys(node)) {
283
+ if (SKIP_KEYS.has(key)) continue;
284
+ const value = node[key];
285
+ if (Array.isArray(value)) {
286
+ for (const child of value) if (child && typeof child.type === 'string') recurse(child);
287
+ } else if (value && typeof value.type === 'string') {
288
+ recurse(value);
289
+ }
290
+ }
291
+ };
292
+ // root is either a Program (has .body array) or a Function body node.
293
+ if (root && root.type === 'Program') {
294
+ for (const stmt of root.body) recurse(stmt);
295
+ } else if (root && root.type === 'BlockStatement') {
296
+ for (const stmt of root.body) recurse(stmt);
297
+ } else if (root) {
298
+ // Arrow with expression body: `() => expr`
299
+ recurse(root);
300
+ }
301
+ }
302
+
303
+ /**
304
+ * Build the scope chain and attach a resolved scope to each function/program
305
+ * node, so sink detection can look up the right binding map. Returns a Map from
306
+ * node → scope.
307
+ */
308
+ function buildScopes(ast) {
309
+ const scopeOf = new Map();
310
+ const programScope = makeScope(null);
311
+ collectBindings(ast, programScope);
312
+ scopeOf.set(ast, programScope);
313
+
314
+ // Walk all function nodes (pre-order, so an enclosing function's scope is
315
+ // always built before its nested functions). Each function gets a child scope
316
+ // whose parent is the nearest enclosing scope already in scopeOf.
317
+ walk(ast, (node, parent) => {
318
+ if (FN_TYPES.has(node.type)) {
319
+ const parentScope = nearestScope(scopeOf, parent) || programScope;
320
+ const scope = makeScope(parentScope);
321
+ collectBindings(node, scope);
322
+ scopeOf.set(node, scope);
323
+ }
324
+ });
325
+ return scopeOf;
326
+ }
327
+
328
+ /** Climb the __parent chain to the nearest ancestor that already has a scope. */
329
+ function nearestScope(scopeOf, node) {
330
+ let n = node;
331
+ while (n) {
332
+ if (scopeOf.has(n)) return scopeOf.get(n);
333
+ n = n.__parent || null;
334
+ }
335
+ return null;
336
+ }
337
+
338
+ /**
339
+ * Analyze source code for variable-mediated injection sinks.
340
+ *
341
+ * @param {string} code
342
+ * @param {string} file reported in findings
343
+ * @param {{ ts?: boolean }} [opts]
344
+ * @returns {{ parsed: boolean, findings: Array }}
345
+ */
346
+ export function analyze(code, file, opts = {}) {
347
+ const ast = parseToAst(code, { ts: opts.ts });
348
+ if (!ast) return { parsed: false, findings: [] };
349
+
350
+ // Annotate parent links so we can resolve the enclosing scope of any node.
351
+ annotateParents(ast);
352
+ const scopeOf = buildScopes(ast);
353
+
354
+ const findings = [];
355
+ const line = (n) => (n.loc ? n.loc.start.line : 0);
356
+
357
+ walk(ast, (node) => {
358
+ if (node.type === 'CallExpression') {
359
+ handleCall(node, scopeOf, file, line, findings);
360
+ } else if (node.type === 'NewExpression') {
361
+ handleNew(node, scopeOf, file, line, findings);
362
+ }
363
+ });
364
+
365
+ return { parsed: true, findings: dedupe(findings) };
366
+ }
367
+
368
+ /** Resolve the binding scope that ENCLOSES a given node (its nearest function
369
+ * or the program). */
370
+ function scopeForNode(scopeOf, node) {
371
+ let n = node.__parent;
372
+ while (n) {
373
+ if (FN_TYPES.has(n.type) && scopeOf.has(n)) return scopeOf.get(n);
374
+ if (n.type === 'Program' && scopeOf.has(n)) return scopeOf.get(n);
375
+ n = n.__parent;
376
+ }
377
+ // Fallback: program scope.
378
+ for (const [k, v] of scopeOf) if (k.type === 'Program') return v;
379
+ return makeScope(null);
380
+ }
381
+
382
+ function handleCall(node, scopeOf, file, line, findings) {
383
+ const callee = node.callee;
384
+ const scope = scopeForNode(scopeOf, node);
385
+ const args = node.arguments || [];
386
+
387
+ // eval(dynamic)
388
+ if (callee.type === 'Identifier' && callee.name === 'eval') {
389
+ if (args[0] && isDynamic(args[0], scope)) {
390
+ findings.push(evalFinding(file, line(node), node));
391
+ }
392
+ return;
393
+ }
394
+
395
+ // window/globalThis/self/global .eval(dynamic) — indirect eval
396
+ if (
397
+ callee.type === 'MemberExpression' &&
398
+ callee.property.type === 'Identifier' &&
399
+ callee.property.name === 'eval' &&
400
+ callee.object.type === 'Identifier' &&
401
+ /^(window|globalThis|self|global)$/.test(callee.object.name)
402
+ ) {
403
+ if (args[0] && isDynamic(args[0], scope)) {
404
+ findings.push(evalFinding(file, line(node), node));
405
+ }
406
+ return;
407
+ }
408
+
409
+ if (callee.type === 'MemberExpression' && callee.property.type === 'Identifier') {
410
+ const method = callee.property.name;
411
+
412
+ // SQL sink: .query / .execute / .raw with a dynamic, NON-parameterized,
413
+ // SQL-shaped argument.
414
+ if (SQL_SINKS.has(method)) {
415
+ const arg0 = args[0];
416
+ if (arg0 && isDynamic(arg0, scope) && looksLikeSql(arg0, scope) && !isParameterized(node, arg0, scope)) {
417
+ findings.push(sqlFinding(file, line(node), node));
418
+ return;
419
+ }
420
+ }
421
+
422
+ // child_process exec / execSync with a dynamic command string.
423
+ if (CP_SHELL_EXEC.has(method)) {
424
+ const arg0 = args[0];
425
+ if (arg0 && isDynamic(arg0, scope)) {
426
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
427
+ return;
428
+ }
429
+ }
430
+
431
+ // spawn / execFile family: only injectable with shell:true AND dynamic cmd.
432
+ if (CP_SPAWN.has(method)) {
433
+ const arg0 = args[0];
434
+ if (arg0 && isDynamic(arg0, scope) && hasShellTrue(args, scope)) {
435
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
436
+ return;
437
+ }
438
+ }
439
+ }
440
+
441
+ // Bare exec/execSync identifier (destructured from child_process):
442
+ // const { exec } = require('child_process'); exec(cmd);
443
+ if (callee.type === 'Identifier' && CP_SHELL_EXEC.has(callee.name)) {
444
+ const arg0 = args[0];
445
+ if (arg0 && isDynamic(arg0, scope)) {
446
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
447
+ return;
448
+ }
449
+ }
450
+ if (callee.type === 'Identifier' && CP_SPAWN.has(callee.name)) {
451
+ const arg0 = args[0];
452
+ if (arg0 && isDynamic(arg0, scope) && hasShellTrue(args, scope)) {
453
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
454
+ }
455
+ }
456
+ }
457
+
458
+ function handleNew(node, scopeOf, file, line, findings) {
459
+ // new Function(...) — the Function constructor compiles a string body. The
460
+ // dangerous case is a dynamic body; a pure-literal body (rare) is low signal.
461
+ if (node.callee.type === 'Identifier' && node.callee.name === 'Function') {
462
+ const scope = scopeForNode(scopeOf, node);
463
+ const args = node.arguments || [];
464
+ // The body is the LAST argument. Flag when it is dynamic, OR when there are
465
+ // multiple args (codegen shape). A single pure-literal arg is left to the
466
+ // regex rule (which flags new Function outright) to avoid duplicating.
467
+ const body = args[args.length - 1];
468
+ if (body && isDynamic(body, scope)) {
469
+ findings.push(evalFinding(file, line(node), node));
470
+ }
471
+ }
472
+ }
473
+
474
+ /** A SQL sink argument only counts if it actually contains SQL keywords. This is
475
+ * the FP guard against generic `.query(dynamic)` / `.execute(dynamic)` on
476
+ * non-SQL emitters. We inspect the literal/template parts and any const-safe
477
+ * identifier bindings reachable from the argument. */
478
+ function looksLikeSql(node, scope) {
479
+ const text = collectStaticText(node, scope, new Set());
480
+ return SQL_KW_RE.test(text);
481
+ }
482
+
483
+ /**
484
+ * Gather the static (literal) text contributed by an expression — string
485
+ * literals, template quasis, and the literal parts of identifiers whose binding
486
+ * is a const-safe string we can reconstruct is out of scope; instead we only
487
+ * gather text statically reachable through +/template/identifier-to-init. For
488
+ * identifiers we re-derive their init text by re-reading the binding's source is
489
+ * not stored, so we approximate: an identifier contributes its name (which won't
490
+ * match SQL keywords) UNLESS we can see literal text in the same expression.
491
+ *
492
+ * In practice the dynamic SQL shape always carries the SQL keyword in a literal
493
+ * part of the SAME expression chain (the assembled query string), so collecting
494
+ * literals from the argument expression (following identifier inits within
495
+ * scope) is sufficient and conservative.
496
+ */
497
+ function collectStaticText(node, scope, seen) {
498
+ if (!node) return '';
499
+ switch (node.type) {
500
+ case 'Literal':
501
+ return typeof node.value === 'string' ? node.value : '';
502
+ case 'TemplateLiteral':
503
+ return node.quasis.map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || '')).join(' ');
504
+ case 'BinaryExpression':
505
+ if (node.operator === '+') {
506
+ return collectStaticText(node.left, scope, seen) + ' ' + collectStaticText(node.right, scope, seen);
507
+ }
508
+ return '';
509
+ case 'Identifier': {
510
+ if (seen.has(node.name)) return '';
511
+ seen.add(node.name);
512
+ // Recover the variable's assembled text by following its binding's init
513
+ // expression(s), recursively. This is how the cross-statement SQL shape
514
+ // `const q = "SELECT..." + x; db.query(q)` surfaces the SELECT keyword
515
+ // even though the literal lives in a different statement.
516
+ const b = lookup(scope, node.name);
517
+ if (!b || !Array.isArray(b.initExprs)) return '';
518
+ return b.initExprs.map((e) => collectStaticText(e, scope, seen)).join(' ');
519
+ }
520
+ case 'TaggedTemplateExpression':
521
+ return collectStaticText(node.quasi, scope, seen);
522
+ default:
523
+ return '';
524
+ }
525
+ }
526
+
527
+ /**
528
+ * A call is "parameterized" (safe) when it passes a 2nd ArrayExpression of
529
+ * params (pg `$1` + [..], mysql2 `?` + [..]) — the canonical safe shape. The
530
+ * dynamic 1st arg being a bound placeholder-only string is already excluded by
531
+ * looksLikeSql requiring a real keyword + isDynamic; here we just exclude the
532
+ * params-array shape.
533
+ */
534
+ function isParameterized(callNode, arg0, scope) {
535
+ const args = callNode.arguments || [];
536
+ // 2nd argument is an array literal → parameterized.
537
+ if (args[1] && args[1].type === 'ArrayExpression') return true;
538
+ // 2nd argument is an identifier bound to nothing dynamic and named like a
539
+ // params array (best-effort, conservative): if it's an ArrayExpression via
540
+ // binding we can't easily see — skip. We only treat a literal array as the
541
+ // safe marker to avoid both FPs and FNs.
542
+ void arg0;
543
+ void scope;
544
+ return false;
545
+ }
546
+
547
+ /** Does the call carry an options object with shell:true? */
548
+ function hasShellTrue(args, scope) {
549
+ for (const a of args) {
550
+ if (a && a.type === 'ObjectExpression') {
551
+ for (const prop of a.properties) {
552
+ if (
553
+ prop.type === 'Property' &&
554
+ !prop.computed &&
555
+ ((prop.key.type === 'Identifier' && prop.key.name === 'shell') ||
556
+ (prop.key.type === 'Literal' && prop.key.value === 'shell'))
557
+ ) {
558
+ // shell: true (literal true) or shell: <const-safe truthy> — we only
559
+ // treat a literal `true` as enabling the shell, conservatively.
560
+ if (prop.value.type === 'Literal' && prop.value.value === true) return true;
561
+ }
562
+ }
563
+ }
564
+ }
565
+ void scope;
566
+ return false;
567
+ }
568
+
569
+ /** Attach __parent back-links to every node so a sink can resolve the binding
570
+ * scope that encloses it. Single pass. */
571
+ function annotateParents(ast) {
572
+ const SKIP_KEYS = new Set(['loc', 'start', 'end', 'range', '__parent']);
573
+ const recurse = (node, parent) => {
574
+ if (!node || typeof node.type !== 'string') return;
575
+ node.__parent = parent;
576
+ for (const key of Object.keys(node)) {
577
+ if (SKIP_KEYS.has(key)) continue;
578
+ const value = node[key];
579
+ if (Array.isArray(value)) {
580
+ for (const child of value) if (child && typeof child.type === 'string') recurse(child, node);
581
+ } else if (value && typeof value.type === 'string') {
582
+ recurse(value, node);
583
+ }
584
+ }
585
+ };
586
+ recurse(ast, null);
587
+ }
588
+
589
+ // ---- Finding constructors (match the regex-rule finding shape) ----
590
+
591
+ function baseFinding({ rule, pattern, severity, file, line, title, description, recommendation, node }) {
592
+ return {
593
+ id: `RULE_${rule.toUpperCase().replace(/-/g, '_')}_${pattern.toUpperCase().replace(/-/g, '_')}`,
594
+ rule,
595
+ pattern,
596
+ severity,
597
+ category: 'code-injection',
598
+ file,
599
+ line,
600
+ title,
601
+ description,
602
+ recommendation,
603
+ // Match regex-rule convention: blocks_apply iff critical. (HIGH → human
604
+ // review but does not hard-block apply, matching eval-usage/sql-injection.)
605
+ blocks_apply: severity === 'critical',
606
+ match: snippet(node),
607
+ };
608
+ }
609
+
610
+ function evalFinding(file, line, node) {
611
+ return baseFinding({
612
+ rule: 'ast-eval-injection',
613
+ pattern: 'ast-eval',
614
+ severity: 'high',
615
+ file,
616
+ line,
617
+ title: 'eval()/Function() with a dynamically-built value (dataflow)',
618
+ description:
619
+ 'Dataflow analysis found a runtime-assembled (non-constant) value flowing into eval()/new Function(). Executing assembled strings is a code-injection / RCE vector the line-by-line scanner misses when the value is built across statements.',
620
+ recommendation: 'Remove eval()/new Function(). Use JSON.parse for data or a lookup table for dispatch.',
621
+ node,
622
+ });
623
+ }
624
+
625
+ function sqlFinding(file, line, node) {
626
+ return baseFinding({
627
+ rule: 'ast-sql-injection',
628
+ pattern: 'ast-sql',
629
+ severity: 'high',
630
+ file,
631
+ line,
632
+ title: 'SQL sink fed a dynamically-built query (dataflow)',
633
+ description:
634
+ 'Dataflow analysis found a runtime-assembled (non-constant) SQL string flowing into .query()/.execute()/.raw() without a params array. This is a SQL-injection vector the line scanner misses when the query is built in a separate statement.',
635
+ recommendation: 'Use parameterized queries (placeholders + a params array), not an assembled query string.',
636
+ node,
637
+ });
638
+ }
639
+
640
+ function cmdFinding(file, line, node, severity) {
641
+ return baseFinding({
642
+ rule: 'ast-command-injection',
643
+ pattern: 'ast-cmd',
644
+ severity,
645
+ file,
646
+ line,
647
+ title: 'Shell sink fed a dynamically-built command (dataflow)',
648
+ description:
649
+ 'Dataflow analysis found a runtime-assembled (non-constant) command string flowing into child_process exec/execSync (or spawn with shell:true). This is an OS-command-injection / RCE vector the line scanner misses when the command is assembled across statements.',
650
+ recommendation: 'Use execFile/spawn with an argument array (no shell), or strictly validate the input.',
651
+ node,
652
+ });
653
+ }
654
+
655
+ function snippet(node) {
656
+ // No raw source on the node; build a short structural marker from the callee.
657
+ try {
658
+ if (node.type === 'CallExpression') {
659
+ const c = node.callee;
660
+ if (c.type === 'Identifier') return `${c.name}(...)`;
661
+ if (c.type === 'MemberExpression' && c.property.type === 'Identifier') {
662
+ const obj = c.object.type === 'Identifier' ? c.object.name : '…';
663
+ return `${obj}.${c.property.name}(...)`;
664
+ }
665
+ }
666
+ if (node.type === 'NewExpression' && node.callee.type === 'Identifier') {
667
+ return `new ${node.callee.name}(...)`;
668
+ }
669
+ } catch {
670
+ /* best-effort */
671
+ }
672
+ return '';
673
+ }
674
+
675
+ /** Drop duplicate findings on the same {rule,line}. */
676
+ function dedupe(findings) {
677
+ const seen = new Set();
678
+ const out = [];
679
+ for (const f of findings) {
680
+ const key = `${f.rule}:${f.line}`;
681
+ if (seen.has(key)) continue;
682
+ seen.add(key);
683
+ out.push(f);
684
+ }
685
+ return out.sort((a, b) => a.line - b.line);
686
+ }