npm - @ps-neko/nekowork - Versions diffs - 0.2.0-alpha.7 → 0.2.0-alpha.9 - Mend

@ps-neko/nekowork 0.2.0-alpha.7 → 0.2.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +5 -4
package/package.json +1 -1
package/scripts/check.js +66 -4
package/scripts/lib/ast/analyze.js +306 -6
package/scripts/lib/rules/command-injection.js +83 -0
package/scripts/lib/rules/eval-usage.js +35 -0
package/scripts/lib/rules/sql-injection.js +39 -2

package/README.md CHANGED Viewed

@@ -10,11 +10,12 @@ hardcoded credentials, auto-push/commit, test/security disables, risky package
 hooks, eval, insecure TLS, CORS wildcard, basic SQL/command injection, and AST
 dataflow taint for variable-mediated injection) and routes everything else to a
 human decision. It is **not an exhaustive security audit** — the AST rule is
-intraprocedural (single-function, JS/TS); cross-function and whole-program dataflow
-are out of scope. The verdict is deterministic (same diff, same result), and it never
+inter-procedural (intra-module, JS/TS): it follows taint across functions within a
+single file (local-helper returns, sink aliasing); cross-file and whole-program
+dataflow are out of scope. The verdict is deterministic (same diff, same result), and it never
 commits, pushes, or deploys on its own. **You** make the final call.
-> Note: the published `@alpha` (0.2.0-alpha.7) now ships all **11 rules** described
+> Note: the published `@alpha` (0.2.0-alpha.8) now ships all **11 rules** described
 > above (incl. eval, insecure TLS, CORS wildcard, SQL/command injection, AST dataflow)
 > and adds **one tiny, well-known dependency** (`acorn`, the JS parser — MIT, zero
 > transitive dependencies) for the AST engine. Always install with the **`@alpha`**
@@ -85,7 +86,7 @@ step — it is not triggered by `decision.json`.
 - [Quickstart](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/QUICKSTART.md)
 - [How verification works](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/SCOPE-1.0.md)
-- [Benchmark](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/BENCHMARK.md) — 11 rules, 184/184 (100%) recall, 0/120 FP; 30 real OSS positives on `secret-fallback`, the newer rules (incl. sql/command injection and `ast-dataflow`) are synthetic-only
+- [Benchmark](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/BENCHMARK.md) — 11 rules, 234/234 (100%) recall, 0/130 FP; ~82 real OSS positives across rules (incl. 30 on `secret-fallback`), synthetic share 63%; `hardcoded-credential` stays synthetic-only by design
 - [Integration](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/INTEGRATION.md)
 ## License

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ps-neko/nekowork",
-  "version": "0.2.0-alpha.7",
+  "version": "0.2.0-alpha.9",
   "description": "Local verification gate for AI-written code diffs. Deterministic rules decide the verdict, never the LLM. No auto-commit, push, or deploy — you decide at the Human Gate.",
   "keywords": [
     "ai-code-review",

package/scripts/check.js CHANGED Viewed

@@ -72,20 +72,73 @@ function checkHasCommit() {
   }
 }
+// Mirror scripts/lib/diff-parser.js isSelfOutput: verify-pr drops its own output
+// (REPORT.md + .nekowork/**) from every diff source, so those artifacts must not
+// count as "working-tree changes" here either. Case-insensitive to match the
+// parser (Windows/macOS case-insensitive filesystems resolve REPORT.MD etc. to
+// the same files).
+function isSelfOutput(relPath) {
+  const lower = String(relPath).toLowerCase();
+  return lower === 'report.md' || lower.startsWith('.nekowork/');
+}
+// Parse one `git status --porcelain` line into its repo-relative path. Porcelain
+// v1 format is `XY <path>` (2 status chars + space + path); renames use
+// `XY old -> new`, where the post-rename path is what verify-pr would scan.
+function porcelainPath(line) {
+  let p = line.slice(3);
+  const arrow = p.indexOf(' -> ');
+  if (arrow !== -1) p = p.slice(arrow + ' -> '.length);
+  // Porcelain quotes paths with special chars; strip surrounding quotes.
+  if (p.startsWith('"') && p.endsWith('"')) p = p.slice(1, -1);
+  return p.replace(/\\/g, '/');
+}
 function checkDiff() {
+  // Use `git status --porcelain` (NOT `git diff`): plain `git diff` omits
+  // UNTRACKED new files, but verify-pr DOES scan them (synthesizeUntrackedDiff).
+  // Reporting "no diff" while verify-pr finds untracked criticals is a misleading
+  // false-negative. Porcelain lists untracked with `??`, so it matches verify-pr's
+  // diff scope. We then drop nekowork's own output so its artifacts don't count.
   const r = spawnSync('git', ['status', '--porcelain'], { encoding: 'utf8' });
   if (r.status !== 0) {
     record('git-diff', STATUSES.WARN, 'could not check working-tree state');
     return;
   }
-  const lines = r.stdout.split('\n').filter(l => l && !l.startsWith('??'));
-  if (lines.length > 0) {
-    record('git-diff', STATUSES.PASS, `${lines.length} modified file(s) — verify-pr will scan these`);
+  const changed = r.stdout
+    .split('\n')
+    .filter(Boolean)
+    .map(porcelainPath)
+    .filter(p => p && !isSelfOutput(p));
+  if (changed.length > 0) {
+    record('git-diff', STATUSES.PASS, `working-tree changes detected (${changed.length} file(s)) — verify-pr will scan them`);
   } else {
-    record('git-diff', STATUSES.WARN, 'no working-tree diff — `verify-pr` will report no changes');
+    record('git-diff', STATUSES.WARN, 'no changes to scan — `verify-pr` will report no changes');
   }
 }
+// Gentle, non-blocking hint: verify-pr leaves its evidence output (.nekowork/ and
+// REPORT.md) in the user's repo, which then shows up in `git status`. If those
+// artifacts already exist AND are not gitignored, suggest adding them. Returns a
+// hint string or null. Never a check/failure — just a nudge.
+function gitignoreHint() {
+  const artifacts = ['.nekowork/', 'REPORT.md'];
+  const present = artifacts.filter(a => {
+    try { return fs.existsSync(path.resolve(process.cwd(), a.replace(/\/$/, ''))); } catch { return false; }
+  });
+  if (present.length === 0) return null;
+  // git check-ignore exits 0 if the path IS ignored, 1 if not. Hint only for
+  // artifacts that exist but are NOT ignored.
+  const notIgnored = present.filter(a => {
+    const r = spawnSync('git', ['check-ignore', '-q', a], { encoding: 'utf8' });
+    return r.status !== 0;
+  });
+  if (notIgnored.length === 0) return null;
+  return 'Tip: NEKOWORK wrote evidence (.nekowork/, REPORT.md) into this repo. '
+    + 'Add them to .gitignore so they don\'t clutter `git status`:\n'
+    + '       echo -e ".nekowork/\\nREPORT.md" >> .gitignore';
+}
 checkNode();
 checkGitBinary();
 checkInsideRepo();
@@ -121,6 +174,15 @@ if (json) {
   } else {
     console.log('Ready. Next: `nekowork verify-pr`');
   }
+  // Only meaningful inside a repo (where check-ignore works). git-repo PASS implies that.
+  const repoOk = checks.find(c => c.name === 'git-repo')?.status === STATUSES.PASS;
+  if (repoOk) {
+    const hint = gitignoreHint();
+    if (hint) {
+      console.log('');
+      console.log(`  [i] ${hint}`);
+    }
+  }
 }
 process.exit(worstRank);

package/scripts/lib/ast/analyze.js CHANGED Viewed

@@ -1,4 +1,5 @@
-// Intraprocedural const/taint propagation + dangerous-sink detection.
+// Inter-procedural (intra-module) const/taint propagation + dangerous-sink
+// detection.
 //
 // Goal: catch the variable-mediated injection forms the line-oriented regex
 // rules provably miss, WITHOUT introducing a single false positive. A naive
@@ -19,9 +20,33 @@
 // binding is CONST-SAFE iff EVERY assignment to it (declarator init +
 // reassignments) is a const-safe string; any non-const-safe assignment, or a
 // reassignment we can't see as const-safe, makes it DYNAMIC. Function PARAMETERS
-// are always dynamic. Analysis is strictly intraprocedural: a value returned
-// from another function call is dynamic (we never chase across calls — that is
-// where FPs come from).
+// are always dynamic.
+//
+// Inter-procedural upgrade (intra-module only — never crosses files):
+//   1. Arg-sensitive local-function return-taint resolution. When a sink
+//      argument is a CallExpression to a function DEFINED in this module
+//      (FunctionDeclaration or const = FunctionExpression/Arrow), the function's
+//      return expression(s) are evaluated with its params BOUND to the call
+//      site's argument classifications, recovering both the dynamic flag and the
+//      static SQL text. This makes
+//        function build(x){ return "SELECT "+x } db.query(build(req.id))   // FLAG
+//      while keeping
+//        function build(){ return "SELECT 1" } db.query(build())          // clean
+//        function id(x){ return x }            db.query(id("SELECT 1"))   // clean
+//      The resolver is guarded by a visited-set (cycle guard) and a depth limit
+//      (~6). Unknown / non-local calls stay structurally dynamic with NO
+//      recovered text, so the SQL-keyword gate still protects against FPs. The
+//      resolution is ADDITIVE: it can only turn a clean SQL sink into a finding
+//      (by recovering SQL text from a helper) — it never clears an existing one.
+//   2. Sink-alias resolution. A module binding `const X = <obj>.<sinkMethod>`
+//      (query/execute/raw → sql alias; exec/execSync → shell alias), where X is a
+//      simple const not reassigned, makes a later `X(arg)` call get the same
+//      dynamic + SQL-keyword + parameterized treatment as the underlying sink.
+//      `const run = console.log; run(...)` is NOT a sink (console.log is not a
+//      tracked sink method).
+//
+// Both upgrades inherit the same FP guards (const-propagation, SQL-keyword gate,
+// params-array exemption), so they hold the FP=0 benchmark gate.
 import { parseToAst, walk } from './parse.js';
@@ -41,6 +66,221 @@ const CP_SHELL_EXEC = new Set(['exec', 'execSync']);
 // when shell:true is set AND the command is dynamic.
 const CP_SPAWN = new Set(['spawn', 'spawnSync', 'execFile', 'execFileSync']);
+// Inter-procedural resolution guards.
+const IP_DEPTH_LIMIT = 6; // max local-call resolution depth (cycle/runaway guard)
+/**
+ * Collect LOCALLY-DEFINED functions by name (module + nested scopes; last wins,
+ * matching JS hoisting/redeclaration for our conservative best-effort). A name
+ * here resolves to a FunctionDeclaration node, or the FunctionExpression/Arrow
+ * bound by `const f = () => …`. Used by the arg-sensitive return-taint resolver.
+ *
+ * @param {object} ast Program node
+ * @returns {Map<string, object>} name → function node
+ */
+function collectLocalFns(ast) {
+  const fns = new Map();
+  walk(ast, (n) => {
+    if (n.type === 'FunctionDeclaration' && n.id && n.id.type === 'Identifier') {
+      fns.set(n.id.name, n);
+    } else if (n.type === 'VariableDeclaration') {
+      for (const d of n.declarations) {
+        if (
+          d.id.type === 'Identifier' &&
+          d.init &&
+          (d.init.type === 'FunctionExpression' || d.init.type === 'ArrowFunctionExpression')
+        ) {
+          fns.set(d.id.name, d.init);
+        }
+      }
+    }
+  });
+  return fns;
+}
+/**
+ * Collect SINK ALIASES: a module binding `const X = <obj>.<sinkMethod>` where
+ * sinkMethod ∈ query/execute/raw (→ sql alias) or exec/execSync (→ shell alias).
+ * Only a SIMPLE const Identifier binding that is NEVER reassigned qualifies (a
+ * reassigned binding cannot be trusted to still point at the sink). A later
+ * `X(arg)` call is then treated as the underlying sink. `const run=console.log`
+ * is ignored (console.log is not a tracked sink method).
+ *
+ * @param {object} ast Program node
+ * @returns {Map<string, {kind:'sql'|'shell', method:string}>}
+ */
+function collectSinkAliases(ast) {
+  const candidates = new Map(); // name → {kind, method}
+  const reassigned = new Set(); // names reassigned anywhere → disqualified
+  walk(ast, (n) => {
+    if (n.type === 'VariableDeclaration') {
+      for (const d of n.declarations) {
+        if (
+          d.id.type === 'Identifier' &&
+          d.init &&
+          d.init.type === 'MemberExpression' &&
+          d.init.property.type === 'Identifier' &&
+          !d.init.computed
+        ) {
+          const method = d.init.property.name;
+          // Only `const` declarations qualify (let/var can be reassigned to a
+          // non-sink; const cannot be rebound).
+          if (n.kind !== 'const') continue;
+          if (SQL_SINKS.has(method)) candidates.set(d.id.name, { kind: 'sql', method });
+          else if (CP_SHELL_EXEC.has(method)) candidates.set(d.id.name, { kind: 'shell', method });
+        }
+      }
+    } else if (n.type === 'AssignmentExpression' && n.left.type === 'Identifier') {
+      reassigned.add(n.left.name);
+    }
+  });
+  for (const name of reassigned) candidates.delete(name);
+  return candidates;
+}
+/**
+ * Collect the return expressions of a function node. For an arrow with an
+ * expression body the body itself is the (single) return. For a block body we
+ * gather every ReturnStatement argument, NOT descending into nested functions
+ * (a nested closure's return is not this function's return value).
+ *
+ * @param {object} fn FunctionDeclaration | FunctionExpression | ArrowFunctionExpression
+ * @returns {object[]} return-value expressions
+ */
+function returnsOf(fn) {
+  if (fn.type === 'ArrowFunctionExpression' && fn.body.type !== 'BlockStatement') {
+    return [fn.body];
+  }
+  const out = [];
+  const recurse = (node) => {
+    if (!node || typeof node.type !== 'string') return;
+    if (node.type === 'ReturnStatement') {
+      if (node.argument) out.push(node.argument);
+      return;
+    }
+    // Do not descend into a NESTED function — its returns are not ours.
+    if (FN_TYPES.has(node.type)) return;
+    for (const key of Object.keys(node)) {
+      if (key === 'loc' || key === 'start' || key === 'end' || key === 'range' || key === '__parent') continue;
+      const v = node[key];
+      if (Array.isArray(v)) {
+        for (const c of v) if (c && typeof c.type === 'string') recurse(c);
+      } else if (v && typeof v.type === 'string') {
+        recurse(v);
+      }
+    }
+  };
+  recurse(fn.body);
+  return out;
+}
+/**
+ * Arg-sensitive evaluator: classify an expression's { dynamic, text } where
+ * `text` is the recovered static string (used by the SQL-keyword gate). `env`
+ * maps a parameter name → its already-computed { dynamic, text } at the call
+ * site. This is the inter-procedural core: a CallExpression to a LOCAL function
+ * is resolved by binding its params to the call arguments' classifications and
+ * evaluating its return expression(s).
+ *
+ * Conservative leaves (mirror the prototype): a bare unknown Identifier and a
+ * MemberExpression contribute NO text; an unknown/non-local call is structurally
+ * dynamic with NO text (so the SQL-keyword gate still guards FPs).
+ *
+ * @param {object} node
+ * @param {Map<string,{dynamic:boolean,text:string}>} env param bindings
+ * @param {Map<string,object>} fns local-function map
+ * @param {number} depth current recursion depth
+ * @param {Set<string>} seen function names on the active call stack (cycle guard)
+ * @returns {{dynamic:boolean, text:string}}
+ */
+function evalExpr(node, env, fns, depth, seen) {
+  if (!node || depth > IP_DEPTH_LIMIT) return { dynamic: depth > IP_DEPTH_LIMIT, text: '' };
+  switch (node.type) {
+    case 'Literal':
+      return { dynamic: false, text: typeof node.value === 'string' ? node.value : '' };
+    case 'TemplateLiteral': {
+      const text = node.quasis
+        .map((q) => (q.value && q.value.cooked != null ? q.value.cooked : q.value.raw || ''))
+        .join(' ');
+      const dyn = node.expressions.some((e) => evalExpr(e, env, fns, depth + 1, seen).dynamic);
+      return { dynamic: node.expressions.length > 0 && dyn, text };
+    }
+    case 'BinaryExpression': {
+      if (node.operator !== '+') return { dynamic: false, text: '' };
+      const l = evalExpr(node.left, env, fns, depth + 1, seen);
+      const r = evalExpr(node.right, env, fns, depth + 1, seen);
+      return { dynamic: l.dynamic || r.dynamic, text: l.text + ' ' + r.text };
+    }
+    case 'TaggedTemplateExpression':
+      return evalExpr(node.quasi, env, fns, depth + 1, seen);
+    case 'ParenthesizedExpression':
+      return evalExpr(node.expression, env, fns, depth + 1, seen);
+    case 'Identifier': {
+      if (env.has(node.name)) return env.get(node.name);
+      // Unknown bare identifier: conservative — not dynamic-flaggable, no text.
+      return { dynamic: false, text: '' };
+    }
+    case 'CallExpression': {
+      const callee = node.callee;
+      const name = callee.type === 'Identifier' ? callee.name : null;
+      if (name && fns.has(name) && !seen.has(name)) {
+        const fn = fns.get(name);
+        const argEnv = new Map();
+        (fn.params || []).forEach((p, i) => {
+          if (p.type === 'Identifier') {
+            const arg = node.arguments[i];
+            argEnv.set(
+              p.name,
+              arg ? evalExpr(arg, env, fns, depth + 1, seen) : { dynamic: false, text: '' },
+            );
+          }
+        });
+        const seen2 = new Set(seen);
+        seen2.add(name);
+        let dynamic = false;
+        let text = '';
+        for (const ret of returnsOf(fn)) {
+          const v = evalExpr(ret, argEnv, fns, depth + 1, seen2);
+          dynamic = dynamic || v.dynamic;
+          text += ' ' + v.text;
+        }
+        return { dynamic, text };
+      }
+      // Unknown / non-local / recursive call → structurally dynamic, no text.
+      return { dynamic: true, text: '' };
+    }
+    default:
+      // MemberExpression (req.body.x), AwaitExpression, etc. — runtime value,
+      // but no statically recoverable text.
+      return { dynamic: true, text: '' };
+  }
+}
+/**
+ * Build the enclosing-scope param env for a node: every parameter of an
+ * enclosing function is dynamic (external/runtime). This seeds evalExpr so a
+ * sink-arg call like `db.query(build(req.id))` inside `function h(req){…}` knows
+ * `req` is dynamic. Mirrors the prototype's enclosingEnv via the __parent chain.
+ *
+ * @param {object} node a CallExpression sink node
+ * @returns {Map<string,{dynamic:boolean,text:string}>}
+ */
+function enclosingParamEnv(node) {
+  const env = new Map();
+  let n = node.__parent;
+  while (n) {
+    if (FN_TYPES.has(n.type) && Array.isArray(n.params)) {
+      for (const p of n.params) {
+        for (const name of patternNames(p)) {
+          if (!env.has(name)) env.set(name, { dynamic: true, text: '' });
+        }
+      }
+    }
+    n = n.__parent;
+  }
+  return env;
+}
 /**
  * Scope: a binding map + parent link. `bindings` maps name → { dynamic: bool }.
  * A name absent from the whole chain resolves to dynamic (unknown = unsafe).
@@ -351,12 +591,16 @@ export function analyze(code, file, opts = {}) {
   annotateParents(ast);
   const scopeOf = buildScopes(ast);
+  // Inter-procedural (intra-module) maps: local functions for arg-sensitive
+  // return-taint resolution, and sink aliases for `const X = obj.query` etc.
+  const ipCtx = { fns: collectLocalFns(ast), aliases: collectSinkAliases(ast) };
   const findings = [];
   const line = (n) => (n.loc ? n.loc.start.line : 0);
   walk(ast, (node) => {
     if (node.type === 'CallExpression') {
-      handleCall(node, scopeOf, file, line, findings);
+      handleCall(node, scopeOf, file, line, findings, ipCtx);
     } else if (node.type === 'NewExpression') {
       handleNew(node, scopeOf, file, line, findings);
     }
@@ -365,6 +609,20 @@ export function analyze(code, file, opts = {}) {
   return { parsed: true, findings: dedupe(findings) };
 }
+/**
+ * Arg-sensitive inter-procedural resolution of a sink argument that is a CALL to
+ * a local function. Returns the recovered { dynamic, text } so the caller can
+ * apply the SAME dynamic + SQL-keyword gate it uses for intraprocedural values.
+ * Returns null when the argument is not a local-function call (the caller then
+ * keeps the existing intraprocedural classification — purely additive).
+ */
+function resolveLocalCallArg(arg, node, ipCtx) {
+  if (!arg || arg.type !== 'CallExpression') return null;
+  if (!(arg.callee.type === 'Identifier' && ipCtx.fns.has(arg.callee.name))) return null;
+  const env = enclosingParamEnv(node);
+  return evalExpr(arg, env, ipCtx.fns, 0, new Set());
+}
 /** Resolve the binding scope that ENCLOSES a given node (its nearest function
  * or the program). */
 function scopeForNode(scopeOf, node) {
@@ -379,7 +637,7 @@ function scopeForNode(scopeOf, node) {
   return makeScope(null);
 }
-function handleCall(node, scopeOf, file, line, findings) {
+function handleCall(node, scopeOf, file, line, findings, ipCtx) {
   const callee = node.callee;
   const scope = scopeForNode(scopeOf, node);
   const args = node.arguments || [];
@@ -417,6 +675,20 @@ function handleCall(node, scopeOf, file, line, findings) {
         findings.push(sqlFinding(file, line(node), node));
         return;
       }
+      // INTER-PROCEDURAL (additive): the intraprocedural path above recovers NO
+      // SQL text from a CallExpression arg. If arg0 is a call to a LOCAL helper,
+      // resolve its return arg-sensitively; flag only when the recovered value
+      // is dynamic AND carries a real SQL keyword AND the call is not
+      // parameterized. A const-returning helper or an identity-fn(constant) stays
+      // clean (no dynamic / no recovered keyword); a non-SQL helper stays clean
+      // (keyword gate).
+      if (ipCtx && arg0) {
+        const ip = resolveLocalCallArg(arg0, node, ipCtx);
+        if (ip && ip.dynamic && SQL_KW_RE.test(ip.text) && !isParameterized(node, arg0, scope)) {
+          findings.push(sqlFinding(file, line(node), node));
+          return;
+        }
+      }
     }
     // child_process exec / execSync with a dynamic command string.
@@ -438,6 +710,34 @@ function handleCall(node, scopeOf, file, line, findings) {
     }
   }
+  // SINK ALIAS (inter-procedural): `const X = obj.query` / `const X = cp.execSync`
+  // makes a later `X(arg)` call the same sink. Apply the SAME guards as the
+  // underlying member sink (dynamic + SQL-keyword + parameterized for sql;
+  // dynamic for shell). The arg may itself be a local-function call, so reuse the
+  // inter-procedural resolver. `const run=console.log; run(...)` is not an alias
+  // (console.log is not a tracked sink method) and never reaches here.
+  if (callee.type === 'Identifier' && ipCtx && ipCtx.aliases.has(callee.name)) {
+    const alias = ipCtx.aliases.get(callee.name);
+    const arg0 = args[0];
+    if (arg0) {
+      const ip = resolveLocalCallArg(arg0, node, ipCtx);
+      const dynamic = ip ? ip.dynamic : isDynamic(arg0, scope);
+      if (alias.kind === 'shell') {
+        if (dynamic) {
+          findings.push(cmdFinding(file, line(node), node, 'critical'));
+          return;
+        }
+      } else {
+        // sql alias: dynamic + real SQL keyword + not parameterized.
+        const text = ip ? ip.text : collectStaticText(arg0, scope, new Set());
+        if (dynamic && SQL_KW_RE.test(text) && !isParameterized(node, arg0, scope)) {
+          findings.push(sqlFinding(file, line(node), node));
+          return;
+        }
+      }
+    }
+  }
   // Bare exec/execSync identifier (destructured from child_process):
   //   const { exec } = require('child_process'); exec(cmd);
   if (callee.type === 'Identifier' && CP_SHELL_EXEC.has(callee.name)) {

package/scripts/lib/rules/command-injection.js CHANGED Viewed

@@ -6,6 +6,9 @@
 //   - exec("ls " + userInput)
 //   - execSync(`rm -rf ${path}`)
 //   - spawn(`sh -c ${cmd}`, { shell: true })
+//   - subprocess.run(f"git checkout {branch}", shell=True)   (Python)
+//   - os.system("rm -rf " + path)                            (Python)
+//   - exec.Command("sh", "-c", "tar " + name)                (Go)
 //
 // SAFE forms that must NOT fire (FP=0 against a diverse negative set):
 //   - array-arg spawn/execFile: spawn('ls', ['-la', dir])   (no shell parsing)
@@ -13,6 +16,12 @@
 //   - exec with a plain variable that is itself the whole command and was
 //     validated elsewhere is out of scope (we only flag interpolation INTO a
 //     command string, which is the unambiguous injection shape).
+//   - subprocess.run(["ls", "-la"])  / subprocess.run("ls", shell=False) (Py)
+//   - os.system("ls -la")            (static literal, Python)
+//   - exec.Command("ls", "-la")      (arg array, Go)
+//
+// Multi-language coverage mirrors insecure-tls.js: each language gets its own
+// regex pattern; the JS engine never sees the Python/Go forms and vice versa.
 //
 // Severity: high (critical when force flags / rm appear is left to other rules).
@@ -25,6 +34,23 @@ import { makeRegexScanner } from './_helpers.js';
 const CONCAT_STR = '(?:"[^"\\n]*"|\'[^\'\\n]*\')\\s*\\+'; // "lit" +  / 'lit' +
 const TEMPLATE_INTERP = '`[^`\\n]*\\$\\{[^}]+\\}[^`\\n]*`'; // `...${x}...`
+// Python dynamic-command shapes. A command argument is dynamic when it is:
+//   - an f-string:        f"... {x} ..."  /  f'... {x} ...'
+//   - a concatenation:    "..." + x       (string literal followed by `+`)
+//   - a %-format:         "..." % x       (string literal followed by `%`)
+//   - a bare variable:    cmd             (identifier, not a quote) — only for
+//                         the shell=True / os.system / os.popen sinks where a
+//                         non-literal first arg is the injectable shape.
+// A pure string literal (`"ls -la"`) or a list literal (`["ls", "-la"]`) is
+// the safe shape and must NOT match.
+const PY_FSTRING = 'f(?:"[^"\\n]*\\{[^}]+\\}[^"\\n]*"|\'[^\'\\n]*\\{[^}]+\\}[^\'\\n]*\')';
+const PY_CONCAT = '(?:"[^"\\n]*"|\'[^\'\\n]*\')\\s*[+%]'; // "lit" + x  /  "lit" % x
+// A non-literal, non-list first argument: an identifier (variable) optionally
+// with attribute/subscript access. Excludes a leading quote (string literal)
+// and a leading `[` (list args).
+const PY_VAR = '[A-Za-z_][\\w.\\[\\]\'"]*';
+const PY_DYNAMIC = `(?:${PY_FSTRING}|${PY_CONCAT})`;
 const PATTERNS = [
   {
     // child_process exec / execSync with a concatenated command string.
@@ -59,6 +85,63 @@ const PATTERNS = [
     description: 'spawn/exec with shell:true parses shell metacharacters; the command is built from interpolated/concatenated input — a command-injection / RCE vector.',
     recommendation: 'Drop shell:true and pass an argument array, or strictly validate the input. Never combine shell:true with assembled command strings.',
   },
+  {
+    // Python: subprocess.run / call / Popen with shell=True AND a dynamic
+    // command (f-string / concat / %-format / bare variable). shell=True hands
+    // the command string to /bin/sh, so an assembled command is injectable.
+    // A list-literal first arg (`subprocess.run(["ls","-la"])`) or shell=False
+    // never matches — the regex requires a non-list dynamic command followed by
+    // shell=True within the same call.
+    //   subprocess.run(f"git checkout {branch}", shell=True)
+    //   subprocess.Popen("rm -rf " + path, shell=True)
+    //   subprocess.call(cmd, shell=True)
+    id: 'py-subprocess-shell-true',
+    re: new RegExp(`\\bsubprocess\\.(?:run|call|Popen|check_output|check_call)\\s*\\(\\s*(?:${PY_DYNAMIC}|${PY_VAR})[\\s\\S]{0,200}?shell\\s*=\\s*True`, 'g'),
+    severity: 'critical',
+    title: 'Python subprocess with shell=True and a dynamic command',
+    description: 'subprocess.run/call/Popen with shell=True runs the command string through the shell; the command is built from an f-string / concatenation / variable — an OS-command-injection vector.',
+    recommendation: 'Drop shell=True and pass an argument list (subprocess.run(["git", "checkout", branch])), or strictly validate the input.',
+  },
+  {
+    // Python: os.system with a dynamic command (f-string / concat / %-format).
+    // A static literal (os.system("ls -la")) is the safe shape and is excluded
+    // by requiring an f-string or a literal-then-(+/%) concatenation.
+    //   os.system(f"rm -rf {path}")   os.system("tar " + name)
+    id: 'py-os-system',
+    re: new RegExp(`\\bos\\.system\\s*\\(\\s*(?:${PY_DYNAMIC})`, 'g'),
+    severity: 'critical',
+    title: 'Python os.system with a dynamic command',
+    description: 'os.system runs the string through the shell; the command is assembled from an f-string / concatenation / %-format of a variable — an OS-command-injection vector.',
+    recommendation: 'Use subprocess.run with an argument list and shell=False, or strictly validate the input. Never feed os.system an assembled command.',
+  },
+  {
+    // Python: os.popen with a dynamic command (f-string / concat / %-format /
+    // bare variable). os.popen always goes through the shell, so any non-literal
+    // command is injectable. A static literal first arg does not match.
+    //   os.popen(f"ls {dir}")   os.popen("grep " + pat)   os.popen(cmd)
+    id: 'py-os-popen',
+    re: new RegExp(`\\bos\\.popen\\s*\\(\\s*(?:${PY_DYNAMIC}|${PY_VAR}\\s*[,)])`, 'g'),
+    severity: 'high',
+    title: 'Python os.popen with a dynamic command',
+    description: 'os.popen runs the command through the shell; the command is built from an f-string / concatenation / variable — an OS-command-injection vector.',
+    recommendation: 'Use subprocess.run with an argument list (shell=False). Do not pass an assembled command to os.popen.',
+  },
+  {
+    // Go: exec.Command("sh", "-c", <dynamic>) / ("bash", "-c", <dynamic>).
+    // Passing a shell with `-c` and a concatenated / Sprintf'd / variable third
+    // argument re-introduces shell parsing — the Go command-injection shape.
+    // exec.Command("ls", "-la") (a real binary + literal args) does NOT match
+    // because the first arg must be a shell (sh/bash) followed by -c.
+    //   exec.Command("sh", "-c", "tar " + name)
+    //   exec.Command("bash", "-c", fmt.Sprintf("rm %s", path))
+    //   exec.Command("sh", "-c", cmd)
+    id: 'go-exec-shell-c',
+    re: /\bexec\.Command\s*\(\s*"(?:sh|bash|\/bin\/sh|\/bin\/bash)"\s*,\s*"-c"\s*,\s*(?:"[^"\n]*"\s*\+|fmt\.Sprintf\s*\(|[A-Za-z_]\w*\s*[,)])/g,
+    severity: 'critical',
+    title: 'Go exec.Command with sh -c and a dynamic command',
+    description: 'exec.Command("sh", "-c", <dynamic>) runs the third argument through the shell; it is built from concatenation / fmt.Sprintf / a variable — an OS-command-injection vector.',
+    recommendation: 'Invoke the target binary directly with separate argument strings (exec.Command("git", "checkout", branch)) instead of routing through sh -c.',
+  },
 ];
 const SCANNER = makeRegexScanner({

package/scripts/lib/rules/eval-usage.js CHANGED Viewed

@@ -4,6 +4,15 @@
 // vector when fed anything that is not a compile-time constant:
 //   - eval(<non-literal>)            // string-built code executed at runtime
 //   - new Function(<...>)            // the Function constructor = eval by proxy
+//   - exec(<non-literal>)            // Python builtin exec(); runs a code string
+//
+// Note: the language-agnostic `eval(` token means Python `eval(user_input)` is
+// already caught by the JS eval-call pattern below. Python's SAFE alternative
+// ast.literal_eval(x) does NOT fire because eval-call's `(?<![.\w$])` lookbehind
+// rejects the `.eval` member form. The Python `exec()` builtin gets its own
+// pattern (exec-call) with the same lookbehind + static-literal filter, so
+// member calls like RegExp.exec / cursor.exec / child_process exec("ls") never
+// match.
 //
 // Comment-stripping (default in makeRegexScanner) removes the word "eval" in
 // comments and the disable-directive `// eslint-disable ... no-eval` lines, so
@@ -13,6 +22,17 @@
 import { makeRegexScanner } from './_helpers.js';
+// Shared filter: reject a pure single string-literal / static template argument
+// (low-signal static eval/exec). Any concatenation / interpolation / variable
+// is dynamic and is kept.
+const isDynamicArg = (m) => {
+  const arg = (m[1] || '').trim();
+  if (!arg) return false;
+  if (/^(["'])(?:[^"'\\\n]|\\.)*\1\s*$/.test(arg)) return false; // "lit" / 'lit'
+  if (/^`[^`$]*`\s*$/.test(arg)) return false;                   // `lit` (no ${})
+  return true;
+};
 const PATTERNS = [
   {
     // eval( <arg> ) where the first non-space char of the argument is NOT a
@@ -36,6 +56,21 @@ const PATTERNS = [
       return true;
     },
   },
+  {
+    // Python builtin exec( <arg> ) with a non-literal argument: exec(code),
+    // exec("x = " + val), exec(f"...{x}..."). The `(?<![.\w$])` lookbehind keeps
+    // member calls out (RegExp.exec, cursor.exec, cp.exec — child_process exec
+    // is a command-injection concern, handled by that rule, not eval-usage). A
+    // pure static literal (exec("pass")) is filtered as low-signal, matching the
+    // eval-call behavior. ast.literal_eval / .execute(...) never match.
+    id: 'exec-call',
+    re: /(?<![.\w$])exec\s*\(\s*([^)]*)/g,
+    severity: 'high',
+    title: 'exec() with dynamic input detected',
+    description: 'Python exec() runs a string as code. With any runtime-assembled or external input this is a code-injection / RCE vector.',
+    recommendation: 'Remove exec(). Use ast.literal_eval for data, a lookup table / getattr for dispatch, or a real parser.',
+    filter: isDynamicArg,
+  },
   {
     // new Function('a','b','return a+b') — the Function constructor compiles a
     // string body into a function. Always flagged: the body is a string and the

package/scripts/lib/rules/sql-injection.js CHANGED Viewed

@@ -6,17 +6,24 @@
 //   - db.query("SELECT * FROM users WHERE id = " + userId)
 //   - db.query(`SELECT * FROM t WHERE x = ${req.body.x}`)
 //   - conn.execute("DELETE FROM logs WHERE owner='" + name + "'")
+//   - cursor.execute(f"SELECT * FROM users WHERE id = {uid}")   (Python f-string)
+//   - cur.execute("DELETE FROM t WHERE id = " + uid)            (Python concat)
+//   - cursor.execute("SELECT ... %s" % uid)                     (Python %-format)
 //
 // SAFE forms that must NOT fire (FP=0 against a diverse negative set):
 //   - parameterized query: query("SELECT ... WHERE id = $1", [id])
 //   - placeholder query:   query("SELECT ... WHERE id = ?", [id])
 //   - fully static SQL:     query("SELECT 1")
 //   - ORM / builder calls:  repo.find({ where: { id } }) / qb.where('id = :id')
+//   - Python parameterized: cursor.execute("SELECT ... %s", (id,))  (2-arg form)
 //
 // The gate that keeps FP low: the dynamic string must contain a SQL DML/DDL
 // keyword (SELECT/INSERT/UPDATE/DELETE/...) AND mix in a concatenation or a
-// ${...} interpolation. A query() call with a static string + params array is
-// the safe shape and is explicitly excluded (no concat / no interpolation).
+// ${...} / f-string interpolation / %-format. A query() call with a static
+// string + params array is the safe shape and is explicitly excluded (no
+// concat / no interpolation). The Python %-format pattern requires the `%` to
+// be a string-format operator (literal `%` operand), NOT the safe 2-arg
+// `.execute(sql, params)` call where params follow a comma.
 //
 // Severity: high.
@@ -55,6 +62,36 @@ const PATTERNS = [
     description: 'A SQL query template literal interpolates a variable with ${...} and is passed to query/execute/raw — a SQL-injection vector.',
     recommendation: 'Use parameterized queries (placeholders + a params array), not template interpolation.',
   },
+  {
+    // Python f-string SQL: cursor.execute(f"SELECT ... {x} ...").
+    // The f-string must contain a SQL keyword AND a {..} interpolation. The
+    // safe Python 2-arg form (cursor.execute("SELECT ... %s", (id,))) uses a
+    // plain string literal (no `f` prefix, no {..}) and never matches.
+    //   cursor.execute(f"SELECT * FROM users WHERE id = {uid}")
+    //   cur.execute(f'DELETE FROM t WHERE name = {name}')
+    id: 'sql-py-fstring',
+    re: new RegExp(`\\.${SINK}\\s*\\(\\s*f(?:"[^"\\n]*${SQL_KW}[^"\\n]*\\{[^}]+\\}|'[^'\\n]*${SQL_KW}[^'\\n]*\\{[^}]+\\})`, 'gi'),
+    severity: 'high',
+    title: 'SQL string built by Python f-string interpolation',
+    description: 'A Python f-string SQL query interpolates a variable with {..} and is passed to cursor.execute — a SQL-injection vector.',
+    recommendation: 'Use a parameterized query: cursor.execute("SELECT ... WHERE id = %s", (id,)). Never build SQL with an f-string.',
+  },
+  {
+    // Python %-format SQL: cursor.execute("SELECT ... %s ..." % x). The string
+    // literal contains a SQL keyword and is followed by a `%` FORMAT operator
+    // (string-format), distinct from the safe 2-arg `.execute(sql, params)`
+    // where params follow a COMMA. We require the literal to be immediately
+    // followed by `%` and then a non-`)` operand (a variable / tuple), so a
+    // literal that simply ends the call does not match.
+    //   cursor.execute("SELECT * FROM users WHERE id = %s" % uid)
+    //   cur.execute("DELETE FROM t WHERE name = '%s'" % (name,))
+    id: 'sql-py-percent',
+    re: new RegExp(`\\.${SINK}\\s*\\(\\s*(?:"[^"\\n]*${SQL_KW}[^"\\n]*"|'[^'\\n]*${SQL_KW}[^'\\n]*')\\s*%\\s*(?![\\s)])`, 'gi'),
+    severity: 'high',
+    title: 'SQL string built by Python %-format',
+    description: 'A Python SQL query is assembled with the %-format operator (string % value) and passed to cursor.execute — a SQL-injection vector. This is NOT the safe 2-arg execute(sql, params) form.',
+    recommendation: 'Use the 2-argument parameterized form: cursor.execute("SELECT ... %s", (id,)) where the driver binds the params — not Python string formatting.',
+  },
 ];
 const SCANNER = makeRegexScanner({