npm - @ps-neko/nekowork - Versions diffs - 0.2.0-alpha.8 → 0.2.0-alpha.9 - Mend

@ps-neko/nekowork 0.2.0-alpha.8 → 0.2.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +5 -4
package/package.json +1 -1
package/scripts/check.js +66 -4
package/scripts/lib/rules/command-injection.js +83 -0
package/scripts/lib/rules/eval-usage.js +35 -0
package/scripts/lib/rules/sql-injection.js +39 -2

package/README.md CHANGED Viewed

@@ -10,11 +10,12 @@ hardcoded credentials, auto-push/commit, test/security disables, risky package
 hooks, eval, insecure TLS, CORS wildcard, basic SQL/command injection, and AST
 dataflow taint for variable-mediated injection) and routes everything else to a
 human decision. It is **not an exhaustive security audit** — the AST rule is
-intraprocedural (single-function, JS/TS); cross-function and whole-program dataflow
-are out of scope. The verdict is deterministic (same diff, same result), and it never
+inter-procedural (intra-module, JS/TS): it follows taint across functions within a
+single file (local-helper returns, sink aliasing); cross-file and whole-program
+dataflow are out of scope. The verdict is deterministic (same diff, same result), and it never
 commits, pushes, or deploys on its own. **You** make the final call.
-> Note: the published `@alpha` (0.2.0-alpha.7) now ships all **11 rules** described
+> Note: the published `@alpha` (0.2.0-alpha.8) now ships all **11 rules** described
 > above (incl. eval, insecure TLS, CORS wildcard, SQL/command injection, AST dataflow)
 > and adds **one tiny, well-known dependency** (`acorn`, the JS parser — MIT, zero
 > transitive dependencies) for the AST engine. Always install with the **`@alpha`**
@@ -85,7 +86,7 @@ step — it is not triggered by `decision.json`.
 - [Quickstart](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/QUICKSTART.md)
 - [How verification works](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/SCOPE-1.0.md)
-- [Benchmark](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/BENCHMARK.md) — 11 rules, 184/184 (100%) recall, 0/120 FP; 30 real OSS positives on `secret-fallback`, the newer rules (incl. sql/command injection and `ast-dataflow`) are synthetic-only
+- [Benchmark](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/BENCHMARK.md) — 11 rules, 234/234 (100%) recall, 0/130 FP; ~82 real OSS positives across rules (incl. 30 on `secret-fallback`), synthetic share 63%; `hardcoded-credential` stays synthetic-only by design
 - [Integration](https://github.com/Ps-Neko/NEKOWORK/blob/main/packages/nekowork-cli/docs/INTEGRATION.md)
 ## License

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ps-neko/nekowork",
-  "version": "0.2.0-alpha.8",
+  "version": "0.2.0-alpha.9",
   "description": "Local verification gate for AI-written code diffs. Deterministic rules decide the verdict, never the LLM. No auto-commit, push, or deploy — you decide at the Human Gate.",
   "keywords": [
     "ai-code-review",

package/scripts/check.js CHANGED Viewed

@@ -72,20 +72,73 @@ function checkHasCommit() {
   }
 }
+// Mirror scripts/lib/diff-parser.js isSelfOutput: verify-pr drops its own output
+// (REPORT.md + .nekowork/**) from every diff source, so those artifacts must not
+// count as "working-tree changes" here either. Case-insensitive to match the
+// parser (Windows/macOS case-insensitive filesystems resolve REPORT.MD etc. to
+// the same files).
+function isSelfOutput(relPath) {
+  const lower = String(relPath).toLowerCase();
+  return lower === 'report.md' || lower.startsWith('.nekowork/');
+}
+// Parse one `git status --porcelain` line into its repo-relative path. Porcelain
+// v1 format is `XY <path>` (2 status chars + space + path); renames use
+// `XY old -> new`, where the post-rename path is what verify-pr would scan.
+function porcelainPath(line) {
+  let p = line.slice(3);
+  const arrow = p.indexOf(' -> ');
+  if (arrow !== -1) p = p.slice(arrow + ' -> '.length);
+  // Porcelain quotes paths with special chars; strip surrounding quotes.
+  if (p.startsWith('"') && p.endsWith('"')) p = p.slice(1, -1);
+  return p.replace(/\\/g, '/');
+}
 function checkDiff() {
+  // Use `git status --porcelain` (NOT `git diff`): plain `git diff` omits
+  // UNTRACKED new files, but verify-pr DOES scan them (synthesizeUntrackedDiff).
+  // Reporting "no diff" while verify-pr finds untracked criticals is a misleading
+  // false-negative. Porcelain lists untracked with `??`, so it matches verify-pr's
+  // diff scope. We then drop nekowork's own output so its artifacts don't count.
   const r = spawnSync('git', ['status', '--porcelain'], { encoding: 'utf8' });
   if (r.status !== 0) {
     record('git-diff', STATUSES.WARN, 'could not check working-tree state');
     return;
   }
-  const lines = r.stdout.split('\n').filter(l => l && !l.startsWith('??'));
-  if (lines.length > 0) {
-    record('git-diff', STATUSES.PASS, `${lines.length} modified file(s) — verify-pr will scan these`);
+  const changed = r.stdout
+    .split('\n')
+    .filter(Boolean)
+    .map(porcelainPath)
+    .filter(p => p && !isSelfOutput(p));
+  if (changed.length > 0) {
+    record('git-diff', STATUSES.PASS, `working-tree changes detected (${changed.length} file(s)) — verify-pr will scan them`);
   } else {
-    record('git-diff', STATUSES.WARN, 'no working-tree diff — `verify-pr` will report no changes');
+    record('git-diff', STATUSES.WARN, 'no changes to scan — `verify-pr` will report no changes');
   }
 }
+// Gentle, non-blocking hint: verify-pr leaves its evidence output (.nekowork/ and
+// REPORT.md) in the user's repo, which then shows up in `git status`. If those
+// artifacts already exist AND are not gitignored, suggest adding them. Returns a
+// hint string or null. Never a check/failure — just a nudge.
+function gitignoreHint() {
+  const artifacts = ['.nekowork/', 'REPORT.md'];
+  const present = artifacts.filter(a => {
+    try { return fs.existsSync(path.resolve(process.cwd(), a.replace(/\/$/, ''))); } catch { return false; }
+  });
+  if (present.length === 0) return null;
+  // git check-ignore exits 0 if the path IS ignored, 1 if not. Hint only for
+  // artifacts that exist but are NOT ignored.
+  const notIgnored = present.filter(a => {
+    const r = spawnSync('git', ['check-ignore', '-q', a], { encoding: 'utf8' });
+    return r.status !== 0;
+  });
+  if (notIgnored.length === 0) return null;
+  return 'Tip: NEKOWORK wrote evidence (.nekowork/, REPORT.md) into this repo. '
+    + 'Add them to .gitignore so they don\'t clutter `git status`:\n'
+    + '       echo -e ".nekowork/\\nREPORT.md" >> .gitignore';
+}
 checkNode();
 checkGitBinary();
 checkInsideRepo();
@@ -121,6 +174,15 @@ if (json) {
   } else {
     console.log('Ready. Next: `nekowork verify-pr`');
   }
+  // Only meaningful inside a repo (where check-ignore works). git-repo PASS implies that.
+  const repoOk = checks.find(c => c.name === 'git-repo')?.status === STATUSES.PASS;
+  if (repoOk) {
+    const hint = gitignoreHint();
+    if (hint) {
+      console.log('');
+      console.log(`  [i] ${hint}`);
+    }
+  }
 }
 process.exit(worstRank);

package/scripts/lib/rules/command-injection.js CHANGED Viewed

@@ -6,6 +6,9 @@
 //   - exec("ls " + userInput)
 //   - execSync(`rm -rf ${path}`)
 //   - spawn(`sh -c ${cmd}`, { shell: true })
+//   - subprocess.run(f"git checkout {branch}", shell=True)   (Python)
+//   - os.system("rm -rf " + path)                            (Python)
+//   - exec.Command("sh", "-c", "tar " + name)                (Go)
 //
 // SAFE forms that must NOT fire (FP=0 against a diverse negative set):
 //   - array-arg spawn/execFile: spawn('ls', ['-la', dir])   (no shell parsing)
@@ -13,6 +16,12 @@
 //   - exec with a plain variable that is itself the whole command and was
 //     validated elsewhere is out of scope (we only flag interpolation INTO a
 //     command string, which is the unambiguous injection shape).
+//   - subprocess.run(["ls", "-la"])  / subprocess.run("ls", shell=False) (Py)
+//   - os.system("ls -la")            (static literal, Python)
+//   - exec.Command("ls", "-la")      (arg array, Go)
+//
+// Multi-language coverage mirrors insecure-tls.js: each language gets its own
+// regex pattern; the JS engine never sees the Python/Go forms and vice versa.
 //
 // Severity: high (critical when force flags / rm appear is left to other rules).
@@ -25,6 +34,23 @@ import { makeRegexScanner } from './_helpers.js';
 const CONCAT_STR = '(?:"[^"\\n]*"|\'[^\'\\n]*\')\\s*\\+'; // "lit" +  / 'lit' +
 const TEMPLATE_INTERP = '`[^`\\n]*\\$\\{[^}]+\\}[^`\\n]*`'; // `...${x}...`
+// Python dynamic-command shapes. A command argument is dynamic when it is:
+//   - an f-string:        f"... {x} ..."  /  f'... {x} ...'
+//   - a concatenation:    "..." + x       (string literal followed by `+`)
+//   - a %-format:         "..." % x       (string literal followed by `%`)
+//   - a bare variable:    cmd             (identifier, not a quote) — only for
+//                         the shell=True / os.system / os.popen sinks where a
+//                         non-literal first arg is the injectable shape.
+// A pure string literal (`"ls -la"`) or a list literal (`["ls", "-la"]`) is
+// the safe shape and must NOT match.
+const PY_FSTRING = 'f(?:"[^"\\n]*\\{[^}]+\\}[^"\\n]*"|\'[^\'\\n]*\\{[^}]+\\}[^\'\\n]*\')';
+const PY_CONCAT = '(?:"[^"\\n]*"|\'[^\'\\n]*\')\\s*[+%]'; // "lit" + x  /  "lit" % x
+// A non-literal, non-list first argument: an identifier (variable) optionally
+// with attribute/subscript access. Excludes a leading quote (string literal)
+// and a leading `[` (list args).
+const PY_VAR = '[A-Za-z_][\\w.\\[\\]\'"]*';
+const PY_DYNAMIC = `(?:${PY_FSTRING}|${PY_CONCAT})`;
 const PATTERNS = [
   {
     // child_process exec / execSync with a concatenated command string.
@@ -59,6 +85,63 @@ const PATTERNS = [
     description: 'spawn/exec with shell:true parses shell metacharacters; the command is built from interpolated/concatenated input — a command-injection / RCE vector.',
     recommendation: 'Drop shell:true and pass an argument array, or strictly validate the input. Never combine shell:true with assembled command strings.',
   },
+  {
+    // Python: subprocess.run / call / Popen with shell=True AND a dynamic
+    // command (f-string / concat / %-format / bare variable). shell=True hands
+    // the command string to /bin/sh, so an assembled command is injectable.
+    // A list-literal first arg (`subprocess.run(["ls","-la"])`) or shell=False
+    // never matches — the regex requires a non-list dynamic command followed by
+    // shell=True within the same call.
+    //   subprocess.run(f"git checkout {branch}", shell=True)
+    //   subprocess.Popen("rm -rf " + path, shell=True)
+    //   subprocess.call(cmd, shell=True)
+    id: 'py-subprocess-shell-true',
+    re: new RegExp(`\\bsubprocess\\.(?:run|call|Popen|check_output|check_call)\\s*\\(\\s*(?:${PY_DYNAMIC}|${PY_VAR})[\\s\\S]{0,200}?shell\\s*=\\s*True`, 'g'),
+    severity: 'critical',
+    title: 'Python subprocess with shell=True and a dynamic command',
+    description: 'subprocess.run/call/Popen with shell=True runs the command string through the shell; the command is built from an f-string / concatenation / variable — an OS-command-injection vector.',
+    recommendation: 'Drop shell=True and pass an argument list (subprocess.run(["git", "checkout", branch])), or strictly validate the input.',
+  },
+  {
+    // Python: os.system with a dynamic command (f-string / concat / %-format).
+    // A static literal (os.system("ls -la")) is the safe shape and is excluded
+    // by requiring an f-string or a literal-then-(+/%) concatenation.
+    //   os.system(f"rm -rf {path}")   os.system("tar " + name)
+    id: 'py-os-system',
+    re: new RegExp(`\\bos\\.system\\s*\\(\\s*(?:${PY_DYNAMIC})`, 'g'),
+    severity: 'critical',
+    title: 'Python os.system with a dynamic command',
+    description: 'os.system runs the string through the shell; the command is assembled from an f-string / concatenation / %-format of a variable — an OS-command-injection vector.',
+    recommendation: 'Use subprocess.run with an argument list and shell=False, or strictly validate the input. Never feed os.system an assembled command.',
+  },
+  {
+    // Python: os.popen with a dynamic command (f-string / concat / %-format /
+    // bare variable). os.popen always goes through the shell, so any non-literal
+    // command is injectable. A static literal first arg does not match.
+    //   os.popen(f"ls {dir}")   os.popen("grep " + pat)   os.popen(cmd)
+    id: 'py-os-popen',
+    re: new RegExp(`\\bos\\.popen\\s*\\(\\s*(?:${PY_DYNAMIC}|${PY_VAR}\\s*[,)])`, 'g'),
+    severity: 'high',
+    title: 'Python os.popen with a dynamic command',
+    description: 'os.popen runs the command through the shell; the command is built from an f-string / concatenation / variable — an OS-command-injection vector.',
+    recommendation: 'Use subprocess.run with an argument list (shell=False). Do not pass an assembled command to os.popen.',
+  },
+  {
+    // Go: exec.Command("sh", "-c", <dynamic>) / ("bash", "-c", <dynamic>).
+    // Passing a shell with `-c` and a concatenated / Sprintf'd / variable third
+    // argument re-introduces shell parsing — the Go command-injection shape.
+    // exec.Command("ls", "-la") (a real binary + literal args) does NOT match
+    // because the first arg must be a shell (sh/bash) followed by -c.
+    //   exec.Command("sh", "-c", "tar " + name)
+    //   exec.Command("bash", "-c", fmt.Sprintf("rm %s", path))
+    //   exec.Command("sh", "-c", cmd)
+    id: 'go-exec-shell-c',
+    re: /\bexec\.Command\s*\(\s*"(?:sh|bash|\/bin\/sh|\/bin\/bash)"\s*,\s*"-c"\s*,\s*(?:"[^"\n]*"\s*\+|fmt\.Sprintf\s*\(|[A-Za-z_]\w*\s*[,)])/g,
+    severity: 'critical',
+    title: 'Go exec.Command with sh -c and a dynamic command',
+    description: 'exec.Command("sh", "-c", <dynamic>) runs the third argument through the shell; it is built from concatenation / fmt.Sprintf / a variable — an OS-command-injection vector.',
+    recommendation: 'Invoke the target binary directly with separate argument strings (exec.Command("git", "checkout", branch)) instead of routing through sh -c.',
+  },
 ];
 const SCANNER = makeRegexScanner({

package/scripts/lib/rules/eval-usage.js CHANGED Viewed

@@ -4,6 +4,15 @@
 // vector when fed anything that is not a compile-time constant:
 //   - eval(<non-literal>)            // string-built code executed at runtime
 //   - new Function(<...>)            // the Function constructor = eval by proxy
+//   - exec(<non-literal>)            // Python builtin exec(); runs a code string
+//
+// Note: the language-agnostic `eval(` token means Python `eval(user_input)` is
+// already caught by the JS eval-call pattern below. Python's SAFE alternative
+// ast.literal_eval(x) does NOT fire because eval-call's `(?<![.\w$])` lookbehind
+// rejects the `.eval` member form. The Python `exec()` builtin gets its own
+// pattern (exec-call) with the same lookbehind + static-literal filter, so
+// member calls like RegExp.exec / cursor.exec / child_process exec("ls") never
+// match.
 //
 // Comment-stripping (default in makeRegexScanner) removes the word "eval" in
 // comments and the disable-directive `// eslint-disable ... no-eval` lines, so
@@ -13,6 +22,17 @@
 import { makeRegexScanner } from './_helpers.js';
+// Shared filter: reject a pure single string-literal / static template argument
+// (low-signal static eval/exec). Any concatenation / interpolation / variable
+// is dynamic and is kept.
+const isDynamicArg = (m) => {
+  const arg = (m[1] || '').trim();
+  if (!arg) return false;
+  if (/^(["'])(?:[^"'\\\n]|\\.)*\1\s*$/.test(arg)) return false; // "lit" / 'lit'
+  if (/^`[^`$]*`\s*$/.test(arg)) return false;                   // `lit` (no ${})
+  return true;
+};
 const PATTERNS = [
   {
     // eval( <arg> ) where the first non-space char of the argument is NOT a
@@ -36,6 +56,21 @@ const PATTERNS = [
       return true;
     },
   },
+  {
+    // Python builtin exec( <arg> ) with a non-literal argument: exec(code),
+    // exec("x = " + val), exec(f"...{x}..."). The `(?<![.\w$])` lookbehind keeps
+    // member calls out (RegExp.exec, cursor.exec, cp.exec — child_process exec
+    // is a command-injection concern, handled by that rule, not eval-usage). A
+    // pure static literal (exec("pass")) is filtered as low-signal, matching the
+    // eval-call behavior. ast.literal_eval / .execute(...) never match.
+    id: 'exec-call',
+    re: /(?<![.\w$])exec\s*\(\s*([^)]*)/g,
+    severity: 'high',
+    title: 'exec() with dynamic input detected',
+    description: 'Python exec() runs a string as code. With any runtime-assembled or external input this is a code-injection / RCE vector.',
+    recommendation: 'Remove exec(). Use ast.literal_eval for data, a lookup table / getattr for dispatch, or a real parser.',
+    filter: isDynamicArg,
+  },
   {
     // new Function('a','b','return a+b') — the Function constructor compiles a
     // string body into a function. Always flagged: the body is a string and the

package/scripts/lib/rules/sql-injection.js CHANGED Viewed

@@ -6,17 +6,24 @@
 //   - db.query("SELECT * FROM users WHERE id = " + userId)
 //   - db.query(`SELECT * FROM t WHERE x = ${req.body.x}`)
 //   - conn.execute("DELETE FROM logs WHERE owner='" + name + "'")
+//   - cursor.execute(f"SELECT * FROM users WHERE id = {uid}")   (Python f-string)
+//   - cur.execute("DELETE FROM t WHERE id = " + uid)            (Python concat)
+//   - cursor.execute("SELECT ... %s" % uid)                     (Python %-format)
 //
 // SAFE forms that must NOT fire (FP=0 against a diverse negative set):
 //   - parameterized query: query("SELECT ... WHERE id = $1", [id])
 //   - placeholder query:   query("SELECT ... WHERE id = ?", [id])
 //   - fully static SQL:     query("SELECT 1")
 //   - ORM / builder calls:  repo.find({ where: { id } }) / qb.where('id = :id')
+//   - Python parameterized: cursor.execute("SELECT ... %s", (id,))  (2-arg form)
 //
 // The gate that keeps FP low: the dynamic string must contain a SQL DML/DDL
 // keyword (SELECT/INSERT/UPDATE/DELETE/...) AND mix in a concatenation or a
-// ${...} interpolation. A query() call with a static string + params array is
-// the safe shape and is explicitly excluded (no concat / no interpolation).
+// ${...} / f-string interpolation / %-format. A query() call with a static
+// string + params array is the safe shape and is explicitly excluded (no
+// concat / no interpolation). The Python %-format pattern requires the `%` to
+// be a string-format operator (literal `%` operand), NOT the safe 2-arg
+// `.execute(sql, params)` call where params follow a comma.
 //
 // Severity: high.
@@ -55,6 +62,36 @@ const PATTERNS = [
     description: 'A SQL query template literal interpolates a variable with ${...} and is passed to query/execute/raw — a SQL-injection vector.',
     recommendation: 'Use parameterized queries (placeholders + a params array), not template interpolation.',
   },
+  {
+    // Python f-string SQL: cursor.execute(f"SELECT ... {x} ...").
+    // The f-string must contain a SQL keyword AND a {..} interpolation. The
+    // safe Python 2-arg form (cursor.execute("SELECT ... %s", (id,))) uses a
+    // plain string literal (no `f` prefix, no {..}) and never matches.
+    //   cursor.execute(f"SELECT * FROM users WHERE id = {uid}")
+    //   cur.execute(f'DELETE FROM t WHERE name = {name}')
+    id: 'sql-py-fstring',
+    re: new RegExp(`\\.${SINK}\\s*\\(\\s*f(?:"[^"\\n]*${SQL_KW}[^"\\n]*\\{[^}]+\\}|'[^'\\n]*${SQL_KW}[^'\\n]*\\{[^}]+\\})`, 'gi'),
+    severity: 'high',
+    title: 'SQL string built by Python f-string interpolation',
+    description: 'A Python f-string SQL query interpolates a variable with {..} and is passed to cursor.execute — a SQL-injection vector.',
+    recommendation: 'Use a parameterized query: cursor.execute("SELECT ... WHERE id = %s", (id,)). Never build SQL with an f-string.',
+  },
+  {
+    // Python %-format SQL: cursor.execute("SELECT ... %s ..." % x). The string
+    // literal contains a SQL keyword and is followed by a `%` FORMAT operator
+    // (string-format), distinct from the safe 2-arg `.execute(sql, params)`
+    // where params follow a COMMA. We require the literal to be immediately
+    // followed by `%` and then a non-`)` operand (a variable / tuple), so a
+    // literal that simply ends the call does not match.
+    //   cursor.execute("SELECT * FROM users WHERE id = %s" % uid)
+    //   cur.execute("DELETE FROM t WHERE name = '%s'" % (name,))
+    id: 'sql-py-percent',
+    re: new RegExp(`\\.${SINK}\\s*\\(\\s*(?:"[^"\\n]*${SQL_KW}[^"\\n]*"|'[^'\\n]*${SQL_KW}[^'\\n]*')\\s*%\\s*(?![\\s)])`, 'gi'),
+    severity: 'high',
+    title: 'SQL string built by Python %-format',
+    description: 'A Python SQL query is assembled with the %-format operator (string % value) and passed to cursor.execute — a SQL-injection vector. This is NOT the safe 2-arg execute(sql, params) form.',
+    recommendation: 'Use the 2-argument parameterized form: cursor.execute("SELECT ... %s", (id,)) where the driver binds the params — not Python string formatting.',
+  },
 ];
 const SCANNER = makeRegexScanner({