@ps-neko/nekowork 0.2.0-alpha.7 → 0.2.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,11 +10,12 @@ hardcoded credentials, auto-push/commit, test/security disables, risky package
10
10
  hooks, eval, insecure TLS, CORS wildcard, basic SQL/command injection, and AST
11
11
  dataflow taint for variable-mediated injection) and routes everything else to a
12
12
  human decision. It is **not an exhaustive security audit** — the AST rule is
13
- intraprocedural (single-function, JS/TS); cross-function and whole-program dataflow
14
- are out of scope. The verdict is deterministic (same diff, same result), and it never
13
+ inter-procedural (intra-module, JS/TS): it follows taint across functions within a
14
+ single file (local-helper returns, sink aliasing); cross-file and whole-program
15
+ dataflow are out of scope. The verdict is deterministic (same diff, same result), and it never
15
16
  commits, pushes, or deploys on its own. **You** make the final call.
16
17
 
17
- > Note: the published `@alpha` (0.2.0-alpha.7) now ships all **11 rules** described
18
+ > Note: the published `@alpha` (0.2.0-alpha.8) now ships all **11 rules** described
18
19
  > above (incl. eval, insecure TLS, CORS wildcard, SQL/command injection, AST dataflow)
19
20
  > and adds **one tiny, well-known dependency** (`acorn`, the JS parser — MIT, zero
20
21
  > transitive dependencies) for the AST engine. Always install with the **`@alpha`**
@@ -85,7 +86,7 @@ step — it is not triggered by `decision.json`.
85
86
 
86
87
  - [Quickstart](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/QUICKSTART.md)
87
88
  - [How verification works](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/SCOPE-1.0.md)
88
- - [Benchmark](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/BENCHMARK.md) — 11 rules, 184/184 (100%) recall, 0/120 FP; 30 real OSS positives on `secret-fallback`, the newer rules (incl. sql/command injection and `ast-dataflow`) are synthetic-only
89
+ - [Benchmark](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/BENCHMARK.md) — 11 rules, 234/234 (100%) recall, 0/130 FP; ~82 real OSS positives across rules (incl. 30 on `secret-fallback`), synthetic share 63%; `hardcoded-credential` stays synthetic-only by design
89
90
  - [Integration](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/INTEGRATION.md)
90
91
 
91
92
  ## License
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ps-neko/nekowork",
3
- "version": "0.2.0-alpha.7",
3
+ "version": "0.2.0-alpha.9",
4
4
  "description": "Local verification gate for AI-written code diffs. Deterministic rules decide the verdict, never the LLM. No auto-commit, push, or deploy — you decide at the Human Gate.",
5
5
  "keywords": [
6
6
  "ai-code-review",
package/scripts/check.js CHANGED
@@ -72,20 +72,73 @@ function checkHasCommit() {
72
72
  }
73
73
  }
74
74
 
75
+ // Mirror scripts/lib/diff-parser.js isSelfOutput: verify-pr drops its own output
76
+ // (REPORT.md + .nekowork/**) from every diff source, so those artifacts must not
77
+ // count as "working-tree changes" here either. Case-insensitive to match the
78
+ // parser (Windows/macOS case-insensitive filesystems resolve REPORT.MD etc. to
79
+ // the same files).
80
+ function isSelfOutput(relPath) {
81
+ const lower = String(relPath).toLowerCase();
82
+ return lower === 'report.md' || lower.startsWith('.nekowork/');
83
+ }
84
+
85
+ // Parse one `git status --porcelain` line into its repo-relative path. Porcelain
86
+ // v1 format is `XY <path>` (2 status chars + space + path); renames use
87
+ // `XY old -> new`, where the post-rename path is what verify-pr would scan.
88
+ function porcelainPath(line) {
89
+ let p = line.slice(3);
90
+ const arrow = p.indexOf(' -> ');
91
+ if (arrow !== -1) p = p.slice(arrow + ' -> '.length);
92
+ // Porcelain quotes paths with special chars; strip surrounding quotes.
93
+ if (p.startsWith('"') && p.endsWith('"')) p = p.slice(1, -1);
94
+ return p.replace(/\\/g, '/');
95
+ }
96
+
75
97
  function checkDiff() {
98
+ // Use `git status --porcelain` (NOT `git diff`): plain `git diff` omits
99
+ // UNTRACKED new files, but verify-pr DOES scan them (synthesizeUntrackedDiff).
100
+ // Reporting "no diff" while verify-pr finds untracked criticals is a misleading
101
+ // false-negative. Porcelain lists untracked with `??`, so it matches verify-pr's
102
+ // diff scope. We then drop nekowork's own output so its artifacts don't count.
76
103
  const r = spawnSync('git', ['status', '--porcelain'], { encoding: 'utf8' });
77
104
  if (r.status !== 0) {
78
105
  record('git-diff', STATUSES.WARN, 'could not check working-tree state');
79
106
  return;
80
107
  }
81
- const lines = r.stdout.split('\n').filter(l => l && !l.startsWith('??'));
82
- if (lines.length > 0) {
83
- record('git-diff', STATUSES.PASS, `${lines.length} modified file(s) — verify-pr will scan these`);
108
+ const changed = r.stdout
109
+ .split('\n')
110
+ .filter(Boolean)
111
+ .map(porcelainPath)
112
+ .filter(p => p && !isSelfOutput(p));
113
+ if (changed.length > 0) {
114
+ record('git-diff', STATUSES.PASS, `working-tree changes detected (${changed.length} file(s)) — verify-pr will scan them`);
84
115
  } else {
85
- record('git-diff', STATUSES.WARN, 'no working-tree diff — `verify-pr` will report no changes');
116
+ record('git-diff', STATUSES.WARN, 'no changes to scan — `verify-pr` will report no changes');
86
117
  }
87
118
  }
88
119
 
120
+ // Gentle, non-blocking hint: verify-pr leaves its evidence output (.nekowork/ and
121
+ // REPORT.md) in the user's repo, which then shows up in `git status`. If those
122
+ // artifacts already exist AND are not gitignored, suggest adding them. Returns a
123
+ // hint string or null. Never a check/failure — just a nudge.
124
+ function gitignoreHint() {
125
+ const artifacts = ['.nekowork/', 'REPORT.md'];
126
+ const present = artifacts.filter(a => {
127
+ try { return fs.existsSync(path.resolve(process.cwd(), a.replace(/\/$/, ''))); } catch { return false; }
128
+ });
129
+ if (present.length === 0) return null;
130
+ // git check-ignore exits 0 if the path IS ignored, 1 if not. Hint only for
131
+ // artifacts that exist but are NOT ignored.
132
+ const notIgnored = present.filter(a => {
133
+ const r = spawnSync('git', ['check-ignore', '-q', a], { encoding: 'utf8' });
134
+ return r.status !== 0;
135
+ });
136
+ if (notIgnored.length === 0) return null;
137
+ return 'Tip: NEKOWORK wrote evidence (.nekowork/, REPORT.md) into this repo. '
138
+ + 'Add them to .gitignore so they don\'t clutter `git status`:\n'
139
+ + ' echo -e ".nekowork/\\nREPORT.md" >> .gitignore';
140
+ }
141
+
89
142
  checkNode();
90
143
  checkGitBinary();
91
144
  checkInsideRepo();
@@ -121,6 +174,15 @@ if (json) {
121
174
  } else {
122
175
  console.log('Ready. Next: `nekowork verify-pr`');
123
176
  }
177
+ // Only meaningful inside a repo (where check-ignore works). git-repo PASS implies that.
178
+ const repoOk = checks.find(c => c.name === 'git-repo')?.status === STATUSES.PASS;
179
+ if (repoOk) {
180
+ const hint = gitignoreHint();
181
+ if (hint) {
182
+ console.log('');
183
+ console.log(` [i] ${hint}`);
184
+ }
185
+ }
124
186
  }
125
187
 
126
188
  process.exit(worstRank);
@@ -1,4 +1,5 @@
1
- // Intraprocedural const/taint propagation + dangerous-sink detection.
1
+ // Inter-procedural (intra-module) const/taint propagation + dangerous-sink
2
+ // detection.
2
3
  //
3
4
  // Goal: catch the variable-mediated injection forms the line-oriented regex
4
5
  // rules provably miss, WITHOUT introducing a single false positive. A naive
@@ -19,9 +20,33 @@
19
20
  // binding is CONST-SAFE iff EVERY assignment to it (declarator init +
20
21
  // reassignments) is a const-safe string; any non-const-safe assignment, or a
21
22
  // reassignment we can't see as const-safe, makes it DYNAMIC. Function PARAMETERS
22
- // are always dynamic. Analysis is strictly intraprocedural: a value returned
23
- // from another function call is dynamic (we never chase across calls — that is
24
- // where FPs come from).
23
+ // are always dynamic.
24
+ //
25
+ // Inter-procedural upgrade (intra-module only — never crosses files):
26
+ // 1. Arg-sensitive local-function return-taint resolution. When a sink
27
+ // argument is a CallExpression to a function DEFINED in this module
28
+ // (FunctionDeclaration or const = FunctionExpression/Arrow), the function's
29
+ // return expression(s) are evaluated with its params BOUND to the call
30
+ // site's argument classifications, recovering both the dynamic flag and the
31
+ // static SQL text. This makes
32
+ // function build(x){ return "SELECT "+x } db.query(build(req.id)) // FLAG
33
+ // while keeping
34
+ // function build(){ return "SELECT 1" } db.query(build()) // clean
35
+ // function id(x){ return x } db.query(id("SELECT 1")) // clean
36
+ // The resolver is guarded by a visited-set (cycle guard) and a depth limit
37
+ // (~6). Unknown / non-local calls stay structurally dynamic with NO
38
+ // recovered text, so the SQL-keyword gate still protects against FPs. The
39
+ // resolution is ADDITIVE: it can only turn a clean SQL sink into a finding
40
+ // (by recovering SQL text from a helper) — it never clears an existing one.
41
+ // 2. Sink-alias resolution. A module binding `const X = <obj>.<sinkMethod>`
42
+ // (query/execute/raw → sql alias; exec/execSync → shell alias), where X is a
43
+ // simple const not reassigned, makes a later `X(arg)` call get the same
44
+ // dynamic + SQL-keyword + parameterized treatment as the underlying sink.
45
+ // `const run = console.log; run(...)` is NOT a sink (console.log is not a
46
+ // tracked sink method).
47
+ //
48
+ // Both upgrades inherit the same FP guards (const-propagation, SQL-keyword gate,
49
+ // params-array exemption), so they hold the FP=0 benchmark gate.
25
50
 
26
51
  import { parseToAst, walk } from './parse.js';
27
52
 
@@ -41,6 +66,221 @@ const CP_SHELL_EXEC = new Set(['exec', 'execSync']);
41
66
  // when shell:true is set AND the command is dynamic.
42
67
  const CP_SPAWN = new Set(['spawn', 'spawnSync', 'execFile', 'execFileSync']);
43
68
 
69
+ // Inter-procedural resolution guards.
70
+ const IP_DEPTH_LIMIT = 6; // max local-call resolution depth (cycle/runaway guard)
71
+
72
+ /**
73
+ * Collect LOCALLY-DEFINED functions by name (module + nested scopes; last wins,
74
+ * matching JS hoisting/redeclaration for our conservative best-effort). A name
75
+ * here resolves to a FunctionDeclaration node, or the FunctionExpression/Arrow
76
+ * bound by `const f = () => …`. Used by the arg-sensitive return-taint resolver.
77
+ *
78
+ * @param {object} ast Program node
79
+ * @returns {Map<string, object>} name → function node
80
+ */
81
+ function collectLocalFns(ast) {
82
+ const fns = new Map();
83
+ walk(ast, (n) => {
84
+ if (n.type === 'FunctionDeclaration' && n.id && n.id.type === 'Identifier') {
85
+ fns.set(n.id.name, n);
86
+ } else if (n.type === 'VariableDeclaration') {
87
+ for (const d of n.declarations) {
88
+ if (
89
+ d.id.type === 'Identifier' &&
90
+ d.init &&
91
+ (d.init.type === 'FunctionExpression' || d.init.type === 'ArrowFunctionExpression')
92
+ ) {
93
+ fns.set(d.id.name, d.init);
94
+ }
95
+ }
96
+ }
97
+ });
98
+ return fns;
99
+ }
100
+
101
+ /**
102
+ * Collect SINK ALIASES: a module binding `const X = <obj>.<sinkMethod>` where
103
+ * sinkMethod ∈ query/execute/raw (→ sql alias) or exec/execSync (→ shell alias).
104
+ * Only a SIMPLE const Identifier binding that is NEVER reassigned qualifies (a
105
+ * reassigned binding cannot be trusted to still point at the sink). A later
106
+ * `X(arg)` call is then treated as the underlying sink. `const run=console.log`
107
+ * is ignored (console.log is not a tracked sink method).
108
+ *
109
+ * @param {object} ast Program node
110
+ * @returns {Map<string, {kind:'sql'|'shell', method:string}>}
111
+ */
112
+ function collectSinkAliases(ast) {
113
+ const candidates = new Map(); // name → {kind, method}
114
+ const reassigned = new Set(); // names reassigned anywhere → disqualified
115
+ walk(ast, (n) => {
116
+ if (n.type === 'VariableDeclaration') {
117
+ for (const d of n.declarations) {
118
+ if (
119
+ d.id.type === 'Identifier' &&
120
+ d.init &&
121
+ d.init.type === 'MemberExpression' &&
122
+ d.init.property.type === 'Identifier' &&
123
+ !d.init.computed
124
+ ) {
125
+ const method = d.init.property.name;
126
+ // Only `const` declarations qualify (let/var can be reassigned to a
127
+ // non-sink; const cannot be rebound).
128
+ if (n.kind !== 'const') continue;
129
+ if (SQL_SINKS.has(method)) candidates.set(d.id.name, { kind: 'sql', method });
130
+ else if (CP_SHELL_EXEC.has(method)) candidates.set(d.id.name, { kind: 'shell', method });
131
+ }
132
+ }
133
+ } else if (n.type === 'AssignmentExpression' && n.left.type === 'Identifier') {
134
+ reassigned.add(n.left.name);
135
+ }
136
+ });
137
+ for (const name of reassigned) candidates.delete(name);
138
+ return candidates;
139
+ }
140
+
141
+ /**
142
+ * Collect the return expressions of a function node. For an arrow with an
143
+ * expression body the body itself is the (single) return. For a block body we
144
+ * gather every ReturnStatement argument, NOT descending into nested functions
145
+ * (a nested closure's return is not this function's return value).
146
+ *
147
+ * @param {object} fn FunctionDeclaration | FunctionExpression | ArrowFunctionExpression
148
+ * @returns {object[]} return-value expressions
149
+ */
150
+ function returnsOf(fn) {
151
+ if (fn.type === 'ArrowFunctionExpression' && fn.body.type !== 'BlockStatement') {
152
+ return [fn.body];
153
+ }
154
+ const out = [];
155
+ const recurse = (node) => {
156
+ if (!node || typeof node.type !== 'string') return;
157
+ if (node.type === 'ReturnStatement') {
158
+ if (node.argument) out.push(node.argument);
159
+ return;
160
+ }
161
+ // Do not descend into a NESTED function — its returns are not ours.
162
+ if (FN_TYPES.has(node.type)) return;
163
+ for (const key of Object.keys(node)) {
164
+ if (key === 'loc' || key === 'start' || key === 'end' || key === 'range' || key === '__parent') continue;
165
+ const v = node[key];
166
+ if (Array.isArray(v)) {
167
+ for (const c of v) if (c && typeof c.type === 'string') recurse(c);
168
+ } else if (v && typeof v.type === 'string') {
169
+ recurse(v);
170
+ }
171
+ }
172
+ };
173
+ recurse(fn.body);
174
+ return out;
175
+ }
176
+
177
+ /**
178
+ * Arg-sensitive evaluator: classify an expression's { dynamic, text } where
179
+ * `text` is the recovered static string (used by the SQL-keyword gate). `env`
180
+ * maps a parameter name → its already-computed { dynamic, text } at the call
181
+ * site. This is the inter-procedural core: a CallExpression to a LOCAL function
182
+ * is resolved by binding its params to the call arguments' classifications and
183
+ * evaluating its return expression(s).
184
+ *
185
+ * Conservative leaves (mirror the prototype): a bare unknown Identifier and a
186
+ * MemberExpression contribute NO text; an unknown/non-local call is structurally
187
+ * dynamic with NO text (so the SQL-keyword gate still guards FPs).
188
+ *
189
+ * @param {object} node
190
+ * @param {Map<string,{dynamic:boolean,text:string}>} env param bindings
191
+ * @param {Map<string,object>} fns local-function map
192
+ * @param {number} depth current recursion depth
193
+ * @param {Set<string>} seen function names on the active call stack (cycle guard)
194
+ * @returns {{dynamic:boolean, text:string}}
195
+ */
196
+ function evalExpr(node, env, fns, depth, seen) {
197
+ if (!node || depth > IP_DEPTH_LIMIT) return { dynamic: depth > IP_DEPTH_LIMIT, text: '' };
198
+ switch (node.type) {
199
+ case 'Literal':
200
+ return { dynamic: false, text: typeof node.value === 'string' ? node.value : '' };
201
+ case 'TemplateLiteral': {
202
+ const text = node.quasis
203
+ .map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || ''))
204
+ .join(' ');
205
+ const dyn = node.expressions.some((e) => evalExpr(e, env, fns, depth + 1, seen).dynamic);
206
+ return { dynamic: node.expressions.length > 0 && dyn, text };
207
+ }
208
+ case 'BinaryExpression': {
209
+ if (node.operator !== '+') return { dynamic: false, text: '' };
210
+ const l = evalExpr(node.left, env, fns, depth + 1, seen);
211
+ const r = evalExpr(node.right, env, fns, depth + 1, seen);
212
+ return { dynamic: l.dynamic || r.dynamic, text: l.text + ' ' + r.text };
213
+ }
214
+ case 'TaggedTemplateExpression':
215
+ return evalExpr(node.quasi, env, fns, depth + 1, seen);
216
+ case 'ParenthesizedExpression':
217
+ return evalExpr(node.expression, env, fns, depth + 1, seen);
218
+ case 'Identifier': {
219
+ if (env.has(node.name)) return env.get(node.name);
220
+ // Unknown bare identifier: conservative — not dynamic-flaggable, no text.
221
+ return { dynamic: false, text: '' };
222
+ }
223
+ case 'CallExpression': {
224
+ const callee = node.callee;
225
+ const name = callee.type === 'Identifier' ? callee.name : null;
226
+ if (name && fns.has(name) && !seen.has(name)) {
227
+ const fn = fns.get(name);
228
+ const argEnv = new Map();
229
+ (fn.params || []).forEach((p, i) => {
230
+ if (p.type === 'Identifier') {
231
+ const arg = node.arguments[i];
232
+ argEnv.set(
233
+ p.name,
234
+ arg ? evalExpr(arg, env, fns, depth + 1, seen) : { dynamic: false, text: '' },
235
+ );
236
+ }
237
+ });
238
+ const seen2 = new Set(seen);
239
+ seen2.add(name);
240
+ let dynamic = false;
241
+ let text = '';
242
+ for (const ret of returnsOf(fn)) {
243
+ const v = evalExpr(ret, argEnv, fns, depth + 1, seen2);
244
+ dynamic = dynamic || v.dynamic;
245
+ text += ' ' + v.text;
246
+ }
247
+ return { dynamic, text };
248
+ }
249
+ // Unknown / non-local / recursive call → structurally dynamic, no text.
250
+ return { dynamic: true, text: '' };
251
+ }
252
+ default:
253
+ // MemberExpression (req.body.x), AwaitExpression, etc. — runtime value,
254
+ // but no statically recoverable text.
255
+ return { dynamic: true, text: '' };
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Build the enclosing-scope param env for a node: every parameter of an
261
+ * enclosing function is dynamic (external/runtime). This seeds evalExpr so a
262
+ * sink-arg call like `db.query(build(req.id))` inside `function h(req){…}` knows
263
+ * `req` is dynamic. Mirrors the prototype's enclosingEnv via the __parent chain.
264
+ *
265
+ * @param {object} node a CallExpression sink node
266
+ * @returns {Map<string,{dynamic:boolean,text:string}>}
267
+ */
268
+ function enclosingParamEnv(node) {
269
+ const env = new Map();
270
+ let n = node.__parent;
271
+ while (n) {
272
+ if (FN_TYPES.has(n.type) && Array.isArray(n.params)) {
273
+ for (const p of n.params) {
274
+ for (const name of patternNames(p)) {
275
+ if (!env.has(name)) env.set(name, { dynamic: true, text: '' });
276
+ }
277
+ }
278
+ }
279
+ n = n.__parent;
280
+ }
281
+ return env;
282
+ }
283
+
44
284
  /**
45
285
  * Scope: a binding map + parent link. `bindings` maps name → { dynamic: bool }.
46
286
  * A name absent from the whole chain resolves to dynamic (unknown = unsafe).
@@ -351,12 +591,16 @@ export function analyze(code, file, opts = {}) {
351
591
  annotateParents(ast);
352
592
  const scopeOf = buildScopes(ast);
353
593
 
594
+ // Inter-procedural (intra-module) maps: local functions for arg-sensitive
595
+ // return-taint resolution, and sink aliases for `const X = obj.query` etc.
596
+ const ipCtx = { fns: collectLocalFns(ast), aliases: collectSinkAliases(ast) };
597
+
354
598
  const findings = [];
355
599
  const line = (n) => (n.loc ? n.loc.start.line : 0);
356
600
 
357
601
  walk(ast, (node) => {
358
602
  if (node.type === 'CallExpression') {
359
- handleCall(node, scopeOf, file, line, findings);
603
+ handleCall(node, scopeOf, file, line, findings, ipCtx);
360
604
  } else if (node.type === 'NewExpression') {
361
605
  handleNew(node, scopeOf, file, line, findings);
362
606
  }
@@ -365,6 +609,20 @@ export function analyze(code, file, opts = {}) {
365
609
  return { parsed: true, findings: dedupe(findings) };
366
610
  }
367
611
 
612
+ /**
613
+ * Arg-sensitive inter-procedural resolution of a sink argument that is a CALL to
614
+ * a local function. Returns the recovered { dynamic, text } so the caller can
615
+ * apply the SAME dynamic + SQL-keyword gate it uses for intraprocedural values.
616
+ * Returns null when the argument is not a local-function call (the caller then
617
+ * keeps the existing intraprocedural classification — purely additive).
618
+ */
619
+ function resolveLocalCallArg(arg, node, ipCtx) {
620
+ if (!arg || arg.type !== 'CallExpression') return null;
621
+ if (!(arg.callee.type === 'Identifier' && ipCtx.fns.has(arg.callee.name))) return null;
622
+ const env = enclosingParamEnv(node);
623
+ return evalExpr(arg, env, ipCtx.fns, 0, new Set());
624
+ }
625
+
368
626
  /** Resolve the binding scope that ENCLOSES a given node (its nearest function
369
627
  * or the program). */
370
628
  function scopeForNode(scopeOf, node) {
@@ -379,7 +637,7 @@ function scopeForNode(scopeOf, node) {
379
637
  return makeScope(null);
380
638
  }
381
639
 
382
- function handleCall(node, scopeOf, file, line, findings) {
640
+ function handleCall(node, scopeOf, file, line, findings, ipCtx) {
383
641
  const callee = node.callee;
384
642
  const scope = scopeForNode(scopeOf, node);
385
643
  const args = node.arguments || [];
@@ -417,6 +675,20 @@ function handleCall(node, scopeOf, file, line, findings) {
417
675
  findings.push(sqlFinding(file, line(node), node));
418
676
  return;
419
677
  }
678
+ // INTER-PROCEDURAL (additive): the intraprocedural path above recovers NO
679
+ // SQL text from a CallExpression arg. If arg0 is a call to a LOCAL helper,
680
+ // resolve its return arg-sensitively; flag only when the recovered value
681
+ // is dynamic AND carries a real SQL keyword AND the call is not
682
+ // parameterized. A const-returning helper or an identity-fn(constant) stays
683
+ // clean (no dynamic / no recovered keyword); a non-SQL helper stays clean
684
+ // (keyword gate).
685
+ if (ipCtx && arg0) {
686
+ const ip = resolveLocalCallArg(arg0, node, ipCtx);
687
+ if (ip && ip.dynamic && SQL_KW_RE.test(ip.text) && !isParameterized(node, arg0, scope)) {
688
+ findings.push(sqlFinding(file, line(node), node));
689
+ return;
690
+ }
691
+ }
420
692
  }
421
693
 
422
694
  // child_process exec / execSync with a dynamic command string.
@@ -438,6 +710,34 @@ function handleCall(node, scopeOf, file, line, findings) {
438
710
  }
439
711
  }
440
712
 
713
+ // SINK ALIAS (inter-procedural): `const X = obj.query` / `const X = cp.execSync`
714
+ // makes a later `X(arg)` call the same sink. Apply the SAME guards as the
715
+ // underlying member sink (dynamic + SQL-keyword + parameterized for sql;
716
+ // dynamic for shell). The arg may itself be a local-function call, so reuse the
717
+ // inter-procedural resolver. `const run=console.log; run(...)` is not an alias
718
+ // (console.log is not a tracked sink method) and never reaches here.
719
+ if (callee.type === 'Identifier' && ipCtx && ipCtx.aliases.has(callee.name)) {
720
+ const alias = ipCtx.aliases.get(callee.name);
721
+ const arg0 = args[0];
722
+ if (arg0) {
723
+ const ip = resolveLocalCallArg(arg0, node, ipCtx);
724
+ const dynamic = ip ? ip.dynamic : isDynamic(arg0, scope);
725
+ if (alias.kind === 'shell') {
726
+ if (dynamic) {
727
+ findings.push(cmdFinding(file, line(node), node, 'critical'));
728
+ return;
729
+ }
730
+ } else {
731
+ // sql alias: dynamic + real SQL keyword + not parameterized.
732
+ const text = ip ? ip.text : collectStaticText(arg0, scope, new Set());
733
+ if (dynamic && SQL_KW_RE.test(text) && !isParameterized(node, arg0, scope)) {
734
+ findings.push(sqlFinding(file, line(node), node));
735
+ return;
736
+ }
737
+ }
738
+ }
739
+ }
740
+
441
741
  // Bare exec/execSync identifier (destructured from child_process):
442
742
  // const { exec } = require('child_process'); exec(cmd);
443
743
  if (callee.type === 'Identifier' && CP_SHELL_EXEC.has(callee.name)) {
@@ -6,6 +6,9 @@
6
6
  // - exec("ls " + userInput)
7
7
  // - execSync(`rm -rf ${path}`)
8
8
  // - spawn(`sh -c ${cmd}`, { shell: true })
9
+ // - subprocess.run(f"git checkout {branch}", shell=True) (Python)
10
+ // - os.system("rm -rf " + path) (Python)
11
+ // - exec.Command("sh", "-c", "tar " + name) (Go)
9
12
  //
10
13
  // SAFE forms that must NOT fire (FP=0 against a diverse negative set):
11
14
  // - array-arg spawn/execFile: spawn('ls', ['-la', dir]) (no shell parsing)
@@ -13,6 +16,12 @@
13
16
  // - exec with a plain variable that is itself the whole command and was
14
17
  // validated elsewhere is out of scope (we only flag interpolation INTO a
15
18
  // command string, which is the unambiguous injection shape).
19
+ // - subprocess.run(["ls", "-la"]) / subprocess.run("ls", shell=False) (Py)
20
+ // - os.system("ls -la") (static literal, Python)
21
+ // - exec.Command("ls", "-la") (arg array, Go)
22
+ //
23
+ // Multi-language coverage mirrors insecure-tls.js: each language gets its own
24
+ // regex pattern; the JS engine never sees the Python/Go forms and vice versa.
16
25
  //
17
26
  // Severity: high (critical when force flags / rm appear is left to other rules).
18
27
 
@@ -25,6 +34,23 @@ import { makeRegexScanner } from './_helpers.js';
25
34
  const CONCAT_STR = '(?:"[^"\\n]*"|\'[^\'\\n]*\')\\s*\\+'; // "lit" + / 'lit' +
26
35
  const TEMPLATE_INTERP = '`[^`\\n]*\\$\\{[^}]+\\}[^`\\n]*`'; // `...${x}...`
27
36
 
37
+ // Python dynamic-command shapes. A command argument is dynamic when it is:
38
+ // - an f-string: f"... {x} ..." / f'... {x} ...'
39
+ // - a concatenation: "..." + x (string literal followed by `+`)
40
+ // - a %-format: "..." % x (string literal followed by `%`)
41
+ // - a bare variable: cmd (identifier, not a quote) — only for
42
+ // the shell=True / os.system / os.popen sinks where a
43
+ // non-literal first arg is the injectable shape.
44
+ // A pure string literal (`"ls -la"`) or a list literal (`["ls", "-la"]`) is
45
+ // the safe shape and must NOT match.
46
+ const PY_FSTRING = 'f(?:"[^"\\n]*\\{[^}]+\\}[^"\\n]*"|\'[^\'\\n]*\\{[^}]+\\}[^\'\\n]*\')';
47
+ const PY_CONCAT = '(?:"[^"\\n]*"|\'[^\'\\n]*\')\\s*[+%]'; // "lit" + x / "lit" % x
48
+ // A non-literal, non-list first argument: an identifier (variable) optionally
49
+ // with attribute/subscript access. Excludes a leading quote (string literal)
50
+ // and a leading `[` (list args).
51
+ const PY_VAR = '[A-Za-z_][\\w.\\[\\]\'"]*';
52
+ const PY_DYNAMIC = `(?:${PY_FSTRING}|${PY_CONCAT})`;
53
+
28
54
  const PATTERNS = [
29
55
  {
30
56
  // child_process exec / execSync with a concatenated command string.
@@ -59,6 +85,63 @@ const PATTERNS = [
59
85
  description: 'spawn/exec with shell:true parses shell metacharacters; the command is built from interpolated/concatenated input — a command-injection / RCE vector.',
60
86
  recommendation: 'Drop shell:true and pass an argument array, or strictly validate the input. Never combine shell:true with assembled command strings.',
61
87
  },
88
+ {
89
+ // Python: subprocess.run / call / Popen with shell=True AND a dynamic
90
+ // command (f-string / concat / %-format / bare variable). shell=True hands
91
+ // the command string to /bin/sh, so an assembled command is injectable.
92
+ // A list-literal first arg (`subprocess.run(["ls","-la"])`) or shell=False
93
+ // never matches — the regex requires a non-list dynamic command followed by
94
+ // shell=True within the same call.
95
+ // subprocess.run(f"git checkout {branch}", shell=True)
96
+ // subprocess.Popen("rm -rf " + path, shell=True)
97
+ // subprocess.call(cmd, shell=True)
98
+ id: 'py-subprocess-shell-true',
99
+ re: new RegExp(`\\bsubprocess\\.(?:run|call|Popen|check_output|check_call)\\s*\\(\\s*(?:${PY_DYNAMIC}|${PY_VAR})[\\s\\S]{0,200}?shell\\s*=\\s*True`, 'g'),
100
+ severity: 'critical',
101
+ title: 'Python subprocess with shell=True and a dynamic command',
102
+ description: 'subprocess.run/call/Popen with shell=True runs the command string through the shell; the command is built from an f-string / concatenation / variable — an OS-command-injection vector.',
103
+ recommendation: 'Drop shell=True and pass an argument list (subprocess.run(["git", "checkout", branch])), or strictly validate the input.',
104
+ },
105
+ {
106
+ // Python: os.system with a dynamic command (f-string / concat / %-format).
107
+ // A static literal (os.system("ls -la")) is the safe shape and is excluded
108
+ // by requiring an f-string or a literal-then-(+/%) concatenation.
109
+ // os.system(f"rm -rf {path}") os.system("tar " + name)
110
+ id: 'py-os-system',
111
+ re: new RegExp(`\\bos\\.system\\s*\\(\\s*(?:${PY_DYNAMIC})`, 'g'),
112
+ severity: 'critical',
113
+ title: 'Python os.system with a dynamic command',
114
+ description: 'os.system runs the string through the shell; the command is assembled from an f-string / concatenation / %-format of a variable — an OS-command-injection vector.',
115
+ recommendation: 'Use subprocess.run with an argument list and shell=False, or strictly validate the input. Never feed os.system an assembled command.',
116
+ },
117
+ {
118
+ // Python: os.popen with a dynamic command (f-string / concat / %-format /
119
+ // bare variable). os.popen always goes through the shell, so any non-literal
120
+ // command is injectable. A static literal first arg does not match.
121
+ // os.popen(f"ls {dir}") os.popen("grep " + pat) os.popen(cmd)
122
+ id: 'py-os-popen',
123
+ re: new RegExp(`\\bos\\.popen\\s*\\(\\s*(?:${PY_DYNAMIC}|${PY_VAR}\\s*[,)])`, 'g'),
124
+ severity: 'high',
125
+ title: 'Python os.popen with a dynamic command',
126
+ description: 'os.popen runs the command through the shell; the command is built from an f-string / concatenation / variable — an OS-command-injection vector.',
127
+ recommendation: 'Use subprocess.run with an argument list (shell=False). Do not pass an assembled command to os.popen.',
128
+ },
129
+ {
130
+ // Go: exec.Command("sh", "-c", <dynamic>) / ("bash", "-c", <dynamic>).
131
+ // Passing a shell with `-c` and a concatenated / Sprintf'd / variable third
132
+ // argument re-introduces shell parsing — the Go command-injection shape.
133
+ // exec.Command("ls", "-la") (a real binary + literal args) does NOT match
134
+ // because the first arg must be a shell (sh/bash) followed by -c.
135
+ // exec.Command("sh", "-c", "tar " + name)
136
+ // exec.Command("bash", "-c", fmt.Sprintf("rm %s", path))
137
+ // exec.Command("sh", "-c", cmd)
138
+ id: 'go-exec-shell-c',
139
+ re: /\bexec\.Command\s*\(\s*"(?:sh|bash|\/bin\/sh|\/bin\/bash)"\s*,\s*"-c"\s*,\s*(?:"[^"\n]*"\s*\+|fmt\.Sprintf\s*\(|[A-Za-z_]\w*\s*[,)])/g,
140
+ severity: 'critical',
141
+ title: 'Go exec.Command with sh -c and a dynamic command',
142
+ description: 'exec.Command("sh", "-c", <dynamic>) runs the third argument through the shell; it is built from concatenation / fmt.Sprintf / a variable — an OS-command-injection vector.',
143
+ recommendation: 'Invoke the target binary directly with separate argument strings (exec.Command("git", "checkout", branch)) instead of routing through sh -c.',
144
+ },
62
145
  ];
63
146
 
64
147
  const SCANNER = makeRegexScanner({
@@ -4,6 +4,15 @@
4
4
  // vector when fed anything that is not a compile-time constant:
5
5
  // - eval(<non-literal>) // string-built code executed at runtime
6
6
  // - new Function(<...>) // the Function constructor = eval by proxy
7
+ // - exec(<non-literal>) // Python builtin exec(); runs a code string
8
+ //
9
+ // Note: the language-agnostic `eval(` token means Python `eval(user_input)` is
10
+ // already caught by the JS eval-call pattern below. Python's SAFE alternative
11
+ // ast.literal_eval(x) does NOT fire because eval-call's `(?<![.\w$])` lookbehind
12
+ // rejects the `.eval` member form. The Python `exec()` builtin gets its own
13
+ // pattern (exec-call) with the same lookbehind + static-literal filter, so
14
+ // member calls like RegExp.exec / cursor.exec / child_process exec("ls") never
15
+ // match.
7
16
  //
8
17
  // Comment-stripping (default in makeRegexScanner) removes the word "eval" in
9
18
  // comments and the disable-directive `// eslint-disable ... no-eval` lines, so
@@ -13,6 +22,17 @@
13
22
 
14
23
  import { makeRegexScanner } from './_helpers.js';
15
24
 
25
+ // Shared filter: reject a pure single string-literal / static template argument
26
+ // (low-signal static eval/exec). Any concatenation / interpolation / variable
27
+ // is dynamic and is kept.
28
+ const isDynamicArg = (m) => {
29
+ const arg = (m[1] || '').trim();
30
+ if (!arg) return false;
31
+ if (/^(["'])(?:[^"'\\\n]|\\.)*\1\s*$/.test(arg)) return false; // "lit" / 'lit'
32
+ if (/^`[^`$]*`\s*$/.test(arg)) return false; // `lit` (no ${})
33
+ return true;
34
+ };
35
+
16
36
  const PATTERNS = [
17
37
  {
18
38
  // eval( <arg> ) where the first non-space char of the argument is NOT a
@@ -36,6 +56,21 @@ const PATTERNS = [
36
56
  return true;
37
57
  },
38
58
  },
59
+ {
60
+ // Python builtin exec( <arg> ) with a non-literal argument: exec(code),
61
+ // exec("x = " + val), exec(f"...{x}..."). The `(?<![.\w$])` lookbehind keeps
62
+ // member calls out (RegExp.exec, cursor.exec, cp.exec — child_process exec
63
+ // is a command-injection concern, handled by that rule, not eval-usage). A
64
+ // pure static literal (exec("pass")) is filtered as low-signal, matching the
65
+ // eval-call behavior. ast.literal_eval / .execute(...) never match.
66
+ id: 'exec-call',
67
+ re: /(?<![.\w$])exec\s*\(\s*([^)]*)/g,
68
+ severity: 'high',
69
+ title: 'exec() with dynamic input detected',
70
+ description: 'Python exec() runs a string as code. With any runtime-assembled or external input this is a code-injection / RCE vector.',
71
+ recommendation: 'Remove exec(). Use ast.literal_eval for data, a lookup table / getattr for dispatch, or a real parser.',
72
+ filter: isDynamicArg,
73
+ },
39
74
  {
40
75
  // new Function('a','b','return a+b') — the Function constructor compiles a
41
76
  // string body into a function. Always flagged: the body is a string and the
@@ -6,17 +6,24 @@
6
6
  // - db.query("SELECT * FROM users WHERE id = " + userId)
7
7
  // - db.query(`SELECT * FROM t WHERE x = ${req.body.x}`)
8
8
  // - conn.execute("DELETE FROM logs WHERE owner='" + name + "'")
9
+ // - cursor.execute(f"SELECT * FROM users WHERE id = {uid}") (Python f-string)
10
+ // - cur.execute("DELETE FROM t WHERE id = " + uid) (Python concat)
11
+ // - cursor.execute("SELECT ... %s" % uid) (Python %-format)
9
12
  //
10
13
  // SAFE forms that must NOT fire (FP=0 against a diverse negative set):
11
14
  // - parameterized query: query("SELECT ... WHERE id = $1", [id])
12
15
  // - placeholder query: query("SELECT ... WHERE id = ?", [id])
13
16
  // - fully static SQL: query("SELECT 1")
14
17
  // - ORM / builder calls: repo.find({ where: { id } }) / qb.where('id = :id')
18
+ // - Python parameterized: cursor.execute("SELECT ... %s", (id,)) (2-arg form)
15
19
  //
16
20
  // The gate that keeps FP low: the dynamic string must contain a SQL DML/DDL
17
21
  // keyword (SELECT/INSERT/UPDATE/DELETE/...) AND mix in a concatenation or a
18
- // ${...} interpolation. A query() call with a static string + params array is
19
- // the safe shape and is explicitly excluded (no concat / no interpolation).
22
+ // ${...} / f-string interpolation / %-format. A query() call with a static
23
+ // string + params array is the safe shape and is explicitly excluded (no
24
+ // concat / no interpolation). The Python %-format pattern requires the `%` to
25
+ // be a string-format operator (literal `%` operand), NOT the safe 2-arg
26
+ // `.execute(sql, params)` call where params follow a comma.
20
27
  //
21
28
  // Severity: high.
22
29
 
@@ -55,6 +62,36 @@ const PATTERNS = [
55
62
  description: 'A SQL query template literal interpolates a variable with ${...} and is passed to query/execute/raw — a SQL-injection vector.',
56
63
  recommendation: 'Use parameterized queries (placeholders + a params array), not template interpolation.',
57
64
  },
65
+ {
66
+ // Python f-string SQL: cursor.execute(f"SELECT ... {x} ...").
67
+ // The f-string must contain a SQL keyword AND a {..} interpolation. The
68
+ // safe Python 2-arg form (cursor.execute("SELECT ... %s", (id,))) uses a
69
+ // plain string literal (no `f` prefix, no {..}) and never matches.
70
+ // cursor.execute(f"SELECT * FROM users WHERE id = {uid}")
71
+ // cur.execute(f'DELETE FROM t WHERE name = {name}')
72
+ id: 'sql-py-fstring',
73
+ re: new RegExp(`\\.${SINK}\\s*\\(\\s*f(?:"[^"\\n]*${SQL_KW}[^"\\n]*\\{[^}]+\\}|'[^'\\n]*${SQL_KW}[^'\\n]*\\{[^}]+\\})`, 'gi'),
74
+ severity: 'high',
75
+ title: 'SQL string built by Python f-string interpolation',
76
+ description: 'A Python f-string SQL query interpolates a variable with {..} and is passed to cursor.execute — a SQL-injection vector.',
77
+ recommendation: 'Use a parameterized query: cursor.execute("SELECT ... WHERE id = %s", (id,)). Never build SQL with an f-string.',
78
+ },
79
+ {
80
+ // Python %-format SQL: cursor.execute("SELECT ... %s ..." % x). The string
81
+ // literal contains a SQL keyword and is followed by a `%` FORMAT operator
82
+ // (string-format), distinct from the safe 2-arg `.execute(sql, params)`
83
+ // where params follow a COMMA. We require the literal to be immediately
84
+ // followed by `%` and then a non-`)` operand (a variable / tuple), so a
85
+ // literal that simply ends the call does not match.
86
+ // cursor.execute("SELECT * FROM users WHERE id = %s" % uid)
87
+ // cur.execute("DELETE FROM t WHERE name = '%s'" % (name,))
88
+ id: 'sql-py-percent',
89
+ re: new RegExp(`\\.${SINK}\\s*\\(\\s*(?:"[^"\\n]*${SQL_KW}[^"\\n]*"|'[^'\\n]*${SQL_KW}[^'\\n]*')\\s*%\\s*(?![\\s)])`, 'gi'),
90
+ severity: 'high',
91
+ title: 'SQL string built by Python %-format',
92
+ description: 'A Python SQL query is assembled with the %-format operator (string % value) and passed to cursor.execute — a SQL-injection vector. This is NOT the safe 2-arg execute(sql, params) form.',
93
+ recommendation: 'Use the 2-argument parameterized form: cursor.execute("SELECT ... %s", (id,)) where the driver binds the params — not Python string formatting.',
94
+ },
58
95
  ];
59
96
 
60
97
  const SCANNER = makeRegexScanner({