@ps-neko/nekowork 0.2.0-alpha.7 → 0.2.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ps-neko/nekowork",
3
- "version": "0.2.0-alpha.7",
3
+ "version": "0.2.0-alpha.8",
4
4
  "description": "Local verification gate for AI-written code diffs. Deterministic rules decide the verdict, never the LLM. No auto-commit, push, or deploy — you decide at the Human Gate.",
5
5
  "keywords": [
6
6
  "ai-code-review",
@@ -1,4 +1,5 @@
1
- // Intraprocedural const/taint propagation + dangerous-sink detection.
1
+ // Inter-procedural (intra-module) const/taint propagation + dangerous-sink
2
+ // detection.
2
3
  //
3
4
  // Goal: catch the variable-mediated injection forms the line-oriented regex
4
5
  // rules provably miss, WITHOUT introducing a single false positive. A naive
@@ -19,9 +20,33 @@
19
20
  // binding is CONST-SAFE iff EVERY assignment to it (declarator init +
20
21
  // reassignments) is a const-safe string; any non-const-safe assignment, or a
21
22
  // reassignment we can't see as const-safe, makes it DYNAMIC. Function PARAMETERS
22
- // are always dynamic. Analysis is strictly intraprocedural: a value returned
23
- // from another function call is dynamic (we never chase across calls — that is
24
- // where FPs come from).
23
+ // are always dynamic.
24
+ //
25
+ // Inter-procedural upgrade (intra-module only — never crosses files):
26
+ // 1. Arg-sensitive local-function return-taint resolution. When a sink
27
+ // argument is a CallExpression to a function DEFINED in this module
28
+ // (FunctionDeclaration or const = FunctionExpression/Arrow), the function's
29
+ // return expression(s) are evaluated with its params BOUND to the call
30
+ // site's argument classifications, recovering both the dynamic flag and the
31
+ // static SQL text. This makes
32
+ // function build(x){ return "SELECT "+x } db.query(build(req.id)) // FLAG
33
+ // while keeping
34
+ // function build(){ return "SELECT 1" } db.query(build()) // clean
35
+ // function id(x){ return x } db.query(id("SELECT 1")) // clean
36
+ // The resolver is guarded by a visited-set (cycle guard) and a depth limit
37
+ // (~6). Unknown / non-local calls stay structurally dynamic with NO
38
+ // recovered text, so the SQL-keyword gate still protects against FPs. The
39
+ // resolution is ADDITIVE: it can only turn a clean SQL sink into a finding
40
+ // (by recovering SQL text from a helper) — it never clears an existing one.
41
+ // 2. Sink-alias resolution. A module binding `const X = <obj>.<sinkMethod>`
42
+ // (query/execute/raw → sql alias; exec/execSync → shell alias), where X is a
43
+ // simple const not reassigned, makes a later `X(arg)` call get the same
44
+ // dynamic + SQL-keyword + parameterized treatment as the underlying sink.
45
+ // `const run = console.log; run(...)` is NOT a sink (console.log is not a
46
+ // tracked sink method).
47
+ //
48
+ // Both upgrades inherit the same FP guards (const-propagation, SQL-keyword gate,
49
+ // params-array exemption), so they hold the FP=0 benchmark gate.
25
50
 
26
51
  import { parseToAst, walk } from './parse.js';
27
52
 
@@ -41,6 +66,221 @@ const CP_SHELL_EXEC = new Set(['exec', 'execSync']);
41
66
  // when shell:true is set AND the command is dynamic.
42
67
  const CP_SPAWN = new Set(['spawn', 'spawnSync', 'execFile', 'execFileSync']);
43
68
 
69
+ // Inter-procedural resolution guards.
70
+ const IP_DEPTH_LIMIT = 6; // max local-call resolution depth (cycle/runaway guard)
71
+
72
+ /**
73
+ * Collect LOCALLY-DEFINED functions by name (module + nested scopes; last wins,
74
+ * matching JS hoisting/redeclaration for our conservative best-effort). A name
75
+ * here resolves to a FunctionDeclaration node, or the FunctionExpression/Arrow
76
+ * bound by `const f = () => …`. Used by the arg-sensitive return-taint resolver.
77
+ *
78
+ * @param {object} ast Program node
79
+ * @returns {Map<string, object>} name → function node
80
+ */
81
+ function collectLocalFns(ast) {
82
+ const fns = new Map();
83
+ walk(ast, (n) => {
84
+ if (n.type === 'FunctionDeclaration' && n.id && n.id.type === 'Identifier') {
85
+ fns.set(n.id.name, n);
86
+ } else if (n.type === 'VariableDeclaration') {
87
+ for (const d of n.declarations) {
88
+ if (
89
+ d.id.type === 'Identifier' &&
90
+ d.init &&
91
+ (d.init.type === 'FunctionExpression' || d.init.type === 'ArrowFunctionExpression')
92
+ ) {
93
+ fns.set(d.id.name, d.init);
94
+ }
95
+ }
96
+ }
97
+ });
98
+ return fns;
99
+ }
100
+
101
+ /**
102
+ * Collect SINK ALIASES: a module binding `const X = <obj>.<sinkMethod>` where
103
+ * sinkMethod ∈ query/execute/raw (→ sql alias) or exec/execSync (→ shell alias).
104
+ * Only a SIMPLE const Identifier binding that is NEVER reassigned qualifies (a
105
+ * reassigned binding cannot be trusted to still point at the sink). A later
106
+ * `X(arg)` call is then treated as the underlying sink. `const run=console.log`
107
+ * is ignored (console.log is not a tracked sink method).
108
+ *
109
+ * @param {object} ast Program node
110
+ * @returns {Map<string, {kind:'sql'|'shell', method:string}>}
111
+ */
112
+ function collectSinkAliases(ast) {
113
+ const candidates = new Map(); // name → {kind, method}
114
+ const reassigned = new Set(); // names reassigned anywhere → disqualified
115
+ walk(ast, (n) => {
116
+ if (n.type === 'VariableDeclaration') {
117
+ for (const d of n.declarations) {
118
+ if (
119
+ d.id.type === 'Identifier' &&
120
+ d.init &&
121
+ d.init.type === 'MemberExpression' &&
122
+ d.init.property.type === 'Identifier' &&
123
+ !d.init.computed
124
+ ) {
125
+ const method = d.init.property.name;
126
+ // Only `const` declarations qualify (let/var can be reassigned to a
127
+ // non-sink; const cannot be rebound).
128
+ if (n.kind !== 'const') continue;
129
+ if (SQL_SINKS.has(method)) candidates.set(d.id.name, { kind: 'sql', method });
130
+ else if (CP_SHELL_EXEC.has(method)) candidates.set(d.id.name, { kind: 'shell', method });
131
+ }
132
+ }
133
+ } else if (n.type === 'AssignmentExpression' && n.left.type === 'Identifier') {
134
+ reassigned.add(n.left.name);
135
+ }
136
+ });
137
+ for (const name of reassigned) candidates.delete(name);
138
+ return candidates;
139
+ }
140
+
141
+ /**
142
+ * Collect the return expressions of a function node. For an arrow with an
143
+ * expression body the body itself is the (single) return. For a block body we
144
+ * gather every ReturnStatement argument, NOT descending into nested functions
145
+ * (a nested closure's return is not this function's return value).
146
+ *
147
+ * @param {object} fn FunctionDeclaration | FunctionExpression | ArrowFunctionExpression
148
+ * @returns {object[]} return-value expressions
149
+ */
150
+ function returnsOf(fn) {
151
+ if (fn.type === 'ArrowFunctionExpression' && fn.body.type !== 'BlockStatement') {
152
+ return [fn.body];
153
+ }
154
+ const out = [];
155
+ const recurse = (node) => {
156
+ if (!node || typeof node.type !== 'string') return;
157
+ if (node.type === 'ReturnStatement') {
158
+ if (node.argument) out.push(node.argument);
159
+ return;
160
+ }
161
+ // Do not descend into a NESTED function — its returns are not ours.
162
+ if (FN_TYPES.has(node.type)) return;
163
+ for (const key of Object.keys(node)) {
164
+ if (key === 'loc' || key === 'start' || key === 'end' || key === 'range' || key === '__parent') continue;
165
+ const v = node[key];
166
+ if (Array.isArray(v)) {
167
+ for (const c of v) if (c && typeof c.type === 'string') recurse(c);
168
+ } else if (v && typeof v.type === 'string') {
169
+ recurse(v);
170
+ }
171
+ }
172
+ };
173
+ recurse(fn.body);
174
+ return out;
175
+ }
176
+
177
+ /**
178
+ * Arg-sensitive evaluator: classify an expression's { dynamic, text } where
179
+ * `text` is the recovered static string (used by the SQL-keyword gate). `env`
180
+ * maps a parameter name → its already-computed { dynamic, text } at the call
181
+ * site. This is the inter-procedural core: a CallExpression to a LOCAL function
182
+ * is resolved by binding its params to the call arguments' classifications and
183
+ * evaluating its return expression(s).
184
+ *
185
+ * Conservative leaves (mirror the prototype): a bare unknown Identifier and a
186
+ * MemberExpression contribute NO text; an unknown/non-local call is structurally
187
+ * dynamic with NO text (so the SQL-keyword gate still guards FPs).
188
+ *
189
+ * @param {object} node
190
+ * @param {Map<string,{dynamic:boolean,text:string}>} env param bindings
191
+ * @param {Map<string,object>} fns local-function map
192
+ * @param {number} depth current recursion depth
193
+ * @param {Set<string>} seen function names on the active call stack (cycle guard)
194
+ * @returns {{dynamic:boolean, text:string}}
195
+ */
196
+ function evalExpr(node, env, fns, depth, seen) {
197
+ if (!node || depth > IP_DEPTH_LIMIT) return { dynamic: depth > IP_DEPTH_LIMIT, text: '' };
198
+ switch (node.type) {
199
+ case 'Literal':
200
+ return { dynamic: false, text: typeof node.value === 'string' ? node.value : '' };
201
+ case 'TemplateLiteral': {
202
+ const text = node.quasis
203
+ .map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || ''))
204
+ .join(' ');
205
+ const dyn = node.expressions.some((e) => evalExpr(e, env, fns, depth + 1, seen).dynamic);
206
+ return { dynamic: node.expressions.length > 0 && dyn, text };
207
+ }
208
+ case 'BinaryExpression': {
209
+ if (node.operator !== '+') return { dynamic: false, text: '' };
210
+ const l = evalExpr(node.left, env, fns, depth + 1, seen);
211
+ const r = evalExpr(node.right, env, fns, depth + 1, seen);
212
+ return { dynamic: l.dynamic || r.dynamic, text: l.text + ' ' + r.text };
213
+ }
214
+ case 'TaggedTemplateExpression':
215
+ return evalExpr(node.quasi, env, fns, depth + 1, seen);
216
+ case 'ParenthesizedExpression':
217
+ return evalExpr(node.expression, env, fns, depth + 1, seen);
218
+ case 'Identifier': {
219
+ if (env.has(node.name)) return env.get(node.name);
220
+ // Unknown bare identifier: conservative — not dynamic-flaggable, no text.
221
+ return { dynamic: false, text: '' };
222
+ }
223
+ case 'CallExpression': {
224
+ const callee = node.callee;
225
+ const name = callee.type === 'Identifier' ? callee.name : null;
226
+ if (name && fns.has(name) && !seen.has(name)) {
227
+ const fn = fns.get(name);
228
+ const argEnv = new Map();
229
+ (fn.params || []).forEach((p, i) => {
230
+ if (p.type === 'Identifier') {
231
+ const arg = node.arguments[i];
232
+ argEnv.set(
233
+ p.name,
234
+ arg ? evalExpr(arg, env, fns, depth + 1, seen) : { dynamic: false, text: '' },
235
+ );
236
+ }
237
+ });
238
+ const seen2 = new Set(seen);
239
+ seen2.add(name);
240
+ let dynamic = false;
241
+ let text = '';
242
+ for (const ret of returnsOf(fn)) {
243
+ const v = evalExpr(ret, argEnv, fns, depth + 1, seen2);
244
+ dynamic = dynamic || v.dynamic;
245
+ text += ' ' + v.text;
246
+ }
247
+ return { dynamic, text };
248
+ }
249
+ // Unknown / non-local / recursive call → structurally dynamic, no text.
250
+ return { dynamic: true, text: '' };
251
+ }
252
+ default:
253
+ // MemberExpression (req.body.x), AwaitExpression, etc. — runtime value,
254
+ // but no statically recoverable text.
255
+ return { dynamic: true, text: '' };
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Build the enclosing-scope param env for a node: every parameter of an
261
+ * enclosing function is dynamic (external/runtime). This seeds evalExpr so a
262
+ * sink-arg call like `db.query(build(req.id))` inside `function h(req){…}` knows
263
+ * `req` is dynamic. Mirrors the prototype's enclosingEnv via the __parent chain.
264
+ *
265
+ * @param {object} node a CallExpression sink node
266
+ * @returns {Map<string,{dynamic:boolean,text:string}>}
267
+ */
268
+ function enclosingParamEnv(node) {
269
+ const env = new Map();
270
+ let n = node.__parent;
271
+ while (n) {
272
+ if (FN_TYPES.has(n.type) && Array.isArray(n.params)) {
273
+ for (const p of n.params) {
274
+ for (const name of patternNames(p)) {
275
+ if (!env.has(name)) env.set(name, { dynamic: true, text: '' });
276
+ }
277
+ }
278
+ }
279
+ n = n.__parent;
280
+ }
281
+ return env;
282
+ }
283
+
44
284
  /**
45
285
  * Scope: a binding map + parent link. `bindings` maps name → { dynamic: bool }.
46
286
  * A name absent from the whole chain resolves to dynamic (unknown = unsafe).
@@ -351,12 +591,16 @@ export function analyze(code, file, opts = {}) {
351
591
  annotateParents(ast);
352
592
  const scopeOf = buildScopes(ast);
353
593
 
594
+ // Inter-procedural (intra-module) maps: local functions for arg-sensitive
595
+ // return-taint resolution, and sink aliases for `const X = obj.query` etc.
596
+ const ipCtx = { fns: collectLocalFns(ast), aliases: collectSinkAliases(ast) };
597
+
354
598
  const findings = [];
355
599
  const line = (n) => (n.loc ? n.loc.start.line : 0);
356
600
 
357
601
  walk(ast, (node) => {
358
602
  if (node.type === 'CallExpression') {
359
- handleCall(node, scopeOf, file, line, findings);
603
+ handleCall(node, scopeOf, file, line, findings, ipCtx);
360
604
  } else if (node.type === 'NewExpression') {
361
605
  handleNew(node, scopeOf, file, line, findings);
362
606
  }
@@ -365,6 +609,20 @@ export function analyze(code, file, opts = {}) {
365
609
  return { parsed: true, findings: dedupe(findings) };
366
610
  }
367
611
 
612
+ /**
613
+ * Arg-sensitive inter-procedural resolution of a sink argument that is a CALL to
614
+ * a local function. Returns the recovered { dynamic, text } so the caller can
615
+ * apply the SAME dynamic + SQL-keyword gate it uses for intraprocedural values.
616
+ * Returns null when the argument is not a local-function call (the caller then
617
+ * keeps the existing intraprocedural classification — purely additive).
618
+ */
619
+ function resolveLocalCallArg(arg, node, ipCtx) {
620
+ if (!arg || arg.type !== 'CallExpression') return null;
621
+ if (!(arg.callee.type === 'Identifier' && ipCtx.fns.has(arg.callee.name))) return null;
622
+ const env = enclosingParamEnv(node);
623
+ return evalExpr(arg, env, ipCtx.fns, 0, new Set());
624
+ }
625
+
368
626
  /** Resolve the binding scope that ENCLOSES a given node (its nearest function
369
627
  * or the program). */
370
628
  function scopeForNode(scopeOf, node) {
@@ -379,7 +637,7 @@ function scopeForNode(scopeOf, node) {
379
637
  return makeScope(null);
380
638
  }
381
639
 
382
- function handleCall(node, scopeOf, file, line, findings) {
640
+ function handleCall(node, scopeOf, file, line, findings, ipCtx) {
383
641
  const callee = node.callee;
384
642
  const scope = scopeForNode(scopeOf, node);
385
643
  const args = node.arguments || [];
@@ -417,6 +675,20 @@ function handleCall(node, scopeOf, file, line, findings) {
417
675
  findings.push(sqlFinding(file, line(node), node));
418
676
  return;
419
677
  }
678
+ // INTER-PROCEDURAL (additive): the intraprocedural path above recovers NO
679
+ // SQL text from a CallExpression arg. If arg0 is a call to a LOCAL helper,
680
+ // resolve its return arg-sensitively; flag only when the recovered value
681
+ // is dynamic AND carries a real SQL keyword AND the call is not
682
+ // parameterized. A const-returning helper or an identity-fn(constant) stays
683
+ // clean (no dynamic / no recovered keyword); a non-SQL helper stays clean
684
+ // (keyword gate).
685
+ if (ipCtx && arg0) {
686
+ const ip = resolveLocalCallArg(arg0, node, ipCtx);
687
+ if (ip && ip.dynamic && SQL_KW_RE.test(ip.text) && !isParameterized(node, arg0, scope)) {
688
+ findings.push(sqlFinding(file, line(node), node));
689
+ return;
690
+ }
691
+ }
420
692
  }
421
693
 
422
694
  // child_process exec / execSync with a dynamic command string.
@@ -438,6 +710,34 @@ function handleCall(node, scopeOf, file, line, findings) {
438
710
  }
439
711
  }
440
712
 
713
+ // SINK ALIAS (inter-procedural): `const X = obj.query` / `const X = cp.execSync`
714
+ // makes a later `X(arg)` call the same sink. Apply the SAME guards as the
715
+ // underlying member sink (dynamic + SQL-keyword + parameterized for sql;
716
+ // dynamic for shell). The arg may itself be a local-function call, so reuse the
717
+ // inter-procedural resolver. `const run=console.log; run(...)` is not an alias
718
+ // (console.log is not a tracked sink method) and never reaches here.
719
+ if (callee.type === 'Identifier' && ipCtx && ipCtx.aliases.has(callee.name)) {
720
+ const alias = ipCtx.aliases.get(callee.name);
721
+ const arg0 = args[0];
722
+ if (arg0) {
723
+ const ip = resolveLocalCallArg(arg0, node, ipCtx);
724
+ const dynamic = ip ? ip.dynamic : isDynamic(arg0, scope);
725
+ if (alias.kind === 'shell') {
726
+ if (dynamic) {
727
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
728
+ return;
729
+ }
730
+ } else {
731
+ // sql alias: dynamic + real SQL keyword + not parameterized.
732
+ const text = ip ? ip.text : collectStaticText(arg0, scope, new Set());
733
+ if (dynamic && SQL_KW_RE.test(text) && !isParameterized(node, arg0, scope)) {
734
+ findings.push(sqlFinding(file, line(node), node));
735
+ return;
736
+ }
737
+ }
738
+ }
739
+ }
740
+
441
741
  // Bare exec/execSync identifier (destructured from child_process):
442
742
  // const { exec } = require('child_process'); exec(cmd);
443
743
  if (callee.type === 'Identifier' && CP_SHELL_EXEC.has(callee.name)) {