npm - @gotgenes/pi-permission-system - Versions diffs - 5.0.0 → 5.1.0 - Mend

@gotgenes/pi-permission-system 5.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +13 -0
package/package.json +1 -1
package/src/external-directory.ts +238 -14
package/tests/bash-external-directory.test.ts +227 -0

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [5.1.0](https://github.com/gotgenes/pi-permission-system/compare/v5.0.0...v5.1.0) (2026-05-05)
+### Features
+* command-aware path extraction for pattern-first commands ([#91](https://github.com/gotgenes/pi-permission-system/issues/91)) ([befca23](https://github.com/gotgenes/pi-permission-system/commit/befca2341e1b54d9ed7e6ff3c3d465776afcc50d))
+### Documentation
+* plan command-aware path extraction for sed/awk/grep/rg/sd ([#91](https://github.com/gotgenes/pi-permission-system/issues/91)) ([be88a6a](https://github.com/gotgenes/pi-permission-system/commit/be88a6ab66ab386ce5843b3dd12218fc7968ee15))
+* **retro:** add retro notes for issue [#88](https://github.com/gotgenes/pi-permission-system/issues/88) ([453a8ba](https://github.com/gotgenes/pi-permission-system/commit/453a8ba69fb68f24200be7a604f4fac4738c0cfe))
 ## [5.0.0](https://github.com/gotgenes/pi-permission-system/compare/v4.9.0...v5.0.0) (2026-05-05)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@gotgenes/pi-permission-system",
-  "version": "5.0.0",
+  "version": "5.1.0",
   "description": "Permission enforcement extension for the Pi coding agent.",
   "type": "module",
   "files": [

package/src/external-directory.ts CHANGED Viewed

@@ -352,17 +352,252 @@ function resolveNodeText(node: TSNode): string {
   }
 }
+// ── Pattern-first command config ───────────────────────────────────────────
+interface PatternCommandConfig {
+  /** Flags that consume the next argument as a non-path value (pattern, separator, etc.) */
+  readonly argConsumingFlags: ReadonlySet<string>;
+  /** Flags that consume the next argument as a file path */
+  readonly fileConsumingFlags: ReadonlySet<string>;
+  /**
+   * Number of leading positional arguments that are patterns/scripts, not paths.
+   * Default: 1 (covers sed, awk, grep, rg).
+   * sd uses 2 (FIND and REPLACE_WITH are both non-path positionals).
+   */
+  readonly patternPositionals?: number;
+}
+/**
+ * Commands whose first N positional arguments are inline patterns/scripts,
+ * not filesystem paths. The map stores per-command flag configuration so
+ * the walker can correctly identify which arguments are consumed by flags
+ * vs. which are positional.
+ */
+const PATTERN_FIRST_COMMANDS: ReadonlyMap<string, PatternCommandConfig> =
+  new Map([
+    [
+      "sed",
+      {
+        argConsumingFlags: new Set(["-e", "-i"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "awk",
+      {
+        argConsumingFlags: new Set(["-e", "-F", "-v"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "gawk",
+      {
+        argConsumingFlags: new Set(["-e", "-F", "-v"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "nawk",
+      {
+        argConsumingFlags: new Set(["-e", "-F", "-v"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "grep",
+      {
+        argConsumingFlags: new Set(["-e", "-A", "-B", "-C", "-m"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "egrep",
+      {
+        argConsumingFlags: new Set(["-e", "-A", "-B", "-C", "-m"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "fgrep",
+      {
+        argConsumingFlags: new Set(["-e", "-A", "-B", "-C", "-m"]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "rg",
+      {
+        argConsumingFlags: new Set([
+          "-e",
+          "-A",
+          "-B",
+          "-C",
+          "-m",
+          "-g",
+          "-t",
+          "-T",
+          "-j",
+          "-M",
+          "-r",
+          "-E",
+        ]),
+        fileConsumingFlags: new Set(["-f"]),
+      },
+    ],
+    [
+      "sd",
+      {
+        argConsumingFlags: new Set(["-n", "-f"]),
+        fileConsumingFlags: new Set([]),
+        patternPositionals: 2,
+      },
+    ],
+  ]);
+/** Node types that represent argument values in the AST. */
+const ARG_NODE_TYPES = new Set([
+  "word",
+  "concatenation",
+  "string",
+  "raw_string",
+]);
+/**
+ * Extract the command name from a `command` node.
+ * Returns the basename (e.g. `/usr/bin/sed` → `sed`), or undefined
+ * if the command name cannot be determined (e.g. variable expansion).
+ */
+function extractCommandName(node: TSNode): string | undefined {
+  for (let i = 0; i < node.childCount; i++) {
+    const child = node.child(i);
+    if (!child) continue;
+    if (child.type === "command_name") {
+      const text = resolveNodeText(child);
+      return text ? basename(text) : undefined;
+    }
+  }
+  return undefined;
+}
+/**
+ * Collect path-candidate tokens from a command known to have
+ * pattern/script arguments in leading positional slots.
+ *
+ * Uses position-based skipping: the first N positional arguments
+ * (where N = patternPositionals, default 1) are assumed to be
+ * inline patterns/scripts and are skipped. Remaining positional
+ * arguments are collected as path candidates.
+ *
+ * Flags listed in `argConsumingFlags` consume the next argument
+ * (skipped). Flags in `fileConsumingFlags` consume the next
+ * argument as a file path (collected). The flags `-e` and `-f`
+ * additionally signal that an explicit script was provided via
+ * flag, so no inline positional script is expected.
+ */
+function collectPatternCommandTokens(
+  node: TSNode,
+  tokens: string[],
+  config: PatternCommandConfig,
+): void {
+  const patternPositionals = config.patternPositionals ?? 1;
+  let hasExplicitScript = false;
+  let positionalsSeen = 0;
+  let nextArgAction: "skip" | "extract" | null = null;
+  let pastEndOfFlags = false;
+  for (let i = 0; i < node.childCount; i++) {
+    const child = node.child(i);
+    if (!child) continue;
+    // Skip command_name and variable_assignment nodes.
+    if (child.type === "command_name" || child.type === "variable_assignment")
+      continue;
+    // Only process argument-like nodes; recurse into others
+    // (e.g. command_substitution) for nested commands.
+    if (!ARG_NODE_TYPES.has(child.type)) {
+      collectPathCandidateTokens(child, tokens);
+      continue;
+    }
+    const text = resolveNodeText(child);
+    // Handle consumed argument from previous flag.
+    if (nextArgAction === "skip") {
+      nextArgAction = null;
+      continue;
+    }
+    if (nextArgAction === "extract") {
+      tokens.push(text);
+      nextArgAction = null;
+      continue;
+    }
+    // Flag detection (only before "--" end-of-flags marker).
+    if (
+      !pastEndOfFlags &&
+      child.type === "word" &&
+      text.startsWith("-") &&
+      text.length > 1
+    ) {
+      if (text === "--") {
+        pastEndOfFlags = true;
+        continue;
+      }
+      if (config.argConsumingFlags.has(text)) {
+        nextArgAction = "skip";
+        if (text === "-e" || text === "-f") {
+          hasExplicitScript = true;
+        }
+        continue;
+      }
+      if (config.fileConsumingFlags.has(text)) {
+        nextArgAction = "extract";
+        hasExplicitScript = true;
+        continue;
+      }
+      // Regular flag — skip it.
+      continue;
+    }
+    // Positional argument.
+    if (!hasExplicitScript && positionalsSeen < patternPositionals) {
+      positionalsSeen++;
+      continue; // Skip: this is an inline pattern/script.
+    }
+    // File argument — collect as path candidate.
+    tokens.push(text);
+  }
+}
 /**
  * Recursively visit the AST and collect resolved text of nodes that
  * represent command arguments or redirect destinations.
  *
  * Skips `heredoc_body`, `heredoc_end`, and `comment` subtrees entirely.
+ *
+ * For commands in `PATTERN_FIRST_COMMANDS`, uses position-based
+ * argument skipping to avoid collecting inline patterns/scripts
+ * as path candidates. For all other commands, collects all
+ * arguments generically.
  */
 function collectPathCandidateTokens(node: TSNode, tokens: string[]): void {
   if (SKIP_SUBTREE_TYPES.has(node.type)) return;
-  // Extract arguments from `command` nodes (skip the command name).
+  // Extract arguments from `command` nodes.
   if (node.type === "command") {
+    const commandName = extractCommandName(node);
+    const patternConfig = commandName
+      ? PATTERN_FIRST_COMMANDS.get(commandName)
+      : undefined;
+    if (patternConfig) {
+      collectPatternCommandTokens(node, tokens, patternConfig);
+      return;
+    }
+    // Generic extraction: collect all arguments (skip command name).
     let seenCommandName = false;
     for (let i = 0; i < node.childCount; i++) {
       const child = node.child(i);
@@ -377,24 +612,13 @@ function collectPathCandidateTokens(node: TSNode, tokens: string[]): void {
       // If there was no explicit command_name node, the first word-like
       // child is the command name itself — skip it.
-      if (
-        !seenCommandName &&
-        (child.type === "word" ||
-          child.type === "concatenation" ||
-          child.type === "string" ||
-          child.type === "raw_string")
-      ) {
+      if (!seenCommandName && ARG_NODE_TYPES.has(child.type)) {
         seenCommandName = true;
         continue;
       }
       // Argument nodes: resolve their text and collect.
-      if (
-        child.type === "word" ||
-        child.type === "concatenation" ||
-        child.type === "string" ||
-        child.type === "raw_string"
-      ) {
+      if (ARG_NODE_TYPES.has(child.type)) {
         tokens.push(resolveNodeText(child));
         continue;
       }

package/tests/bash-external-directory.test.ts CHANGED Viewed

@@ -545,6 +545,233 @@ describe("extractExternalPathsFromBashCommand", () => {
     });
   });
+  describe("command-aware extraction", () => {
+    describe("sed", () => {
+      test("issue #91 reproducer: sed address pattern is not flagged", async () => {
+        const cmd = `sed -i '' '/source: "tool",/{/origin:/!s/source: "tool",/source: "tool",\n      origin: "builtin",/;}' tests/tool-input-preview.test.ts`;
+        const result = await extractExternalPathsFromBashCommand(cmd, cwd);
+        expect(result).toHaveLength(0);
+      });
+      test("sed script is skipped but file argument is extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed 's/foo/bar/g' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+      });
+      test("sed address pattern starting with / is skipped", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed '/pattern/d' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+      test("sed with only in-CWD file returns empty", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed 's/foo/bar/' src/index.ts",
+          cwd,
+        );
+        expect(result).toHaveLength(0);
+      });
+      test("sed -e: script consumed by flag, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed -e 's/foo/bar/' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+      test("sed -n: regular flag does not consume next arg", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed -n '/pattern/p' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+      test("sed -f: script file is extracted as path", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed -f /etc/sed-script.sed input.txt",
+          cwd,
+        );
+        expect(result).toContain("/etc/sed-script.sed");
+        expect(result).toHaveLength(1);
+      });
+      test("sed -i '': extension consumed, script skipped, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed -i '' 's/foo/bar/' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+    });
+    describe("grep", () => {
+      test("grep: pattern skipped, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "grep '/etc/' /var/log/syslog",
+          cwd,
+        );
+        expect(result).toContain("/var/log/syslog");
+        expect(result).toHaveLength(1);
+      });
+      test("grep -e: pattern consumed by flag, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "grep -e '/etc/' /var/log/syslog",
+          cwd,
+        );
+        expect(result).toContain("/var/log/syslog");
+        expect(result).toHaveLength(1);
+      });
+    });
+    describe("awk", () => {
+      test("awk: program skipped, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "awk '{print}' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+      test("awk -F: separator consumed, program skipped, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "awk -F: '{print $1}' /etc/passwd",
+          cwd,
+        );
+        expect(result).toContain("/etc/passwd");
+        expect(result).toHaveLength(1);
+      });
+    });
+    describe("rg", () => {
+      test("rg: pattern skipped, path extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "rg '/usr/local' /etc/profile.d/",
+          cwd,
+        );
+        expect(result).toContain("/etc/profile.d");
+        expect(result).toHaveLength(1);
+      });
+      test("rg -e: pattern consumed by flag, path extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "rg -e '/usr/local' /etc/profile.d/",
+          cwd,
+        );
+        expect(result).toContain("/etc/profile.d");
+        expect(result).toHaveLength(1);
+      });
+    });
+    describe("sd", () => {
+      test("sd: both pattern positionals skipped, file extracted", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sd '/usr/local/bin' '/opt/bin' /etc/profile",
+          cwd,
+        );
+        expect(result).toContain("/etc/profile");
+        expect(result).toHaveLength(1);
+      });
+      test("sd with only in-CWD file returns empty", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sd 'foo' 'bar' src/index.ts",
+          cwd,
+        );
+        expect(result).toHaveLength(0);
+      });
+    });
+    describe("unknown commands", () => {
+      test("unknown command: all args go through generic extraction", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "some-tool /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+      });
+    });
+    describe("edge cases", () => {
+      test("full-path command invocation: /usr/bin/sed", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "/usr/bin/sed 's/foo/bar/' /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+      test("-- end-of-flags: all remaining args are positional files", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "grep -- '/etc/' /var/log/syslog",
+          cwd,
+        );
+        // After --, '/etc/' is the pattern positional, /var/log/syslog is a file
+        expect(result).toContain("/var/log/syslog");
+        expect(result).toHaveLength(1);
+      });
+      test("redirect target still extracted for pattern-first command", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed 's/foo/bar/' input.txt > /tmp/output.txt",
+          cwd,
+        );
+        expect(result).toContain("/tmp/output.txt");
+      });
+      test("pipeline: sed piped to cat with external path", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "sed 's/foo/bar/' src/file.ts | cat /etc/hosts",
+          cwd,
+        );
+        expect(result).toContain("/etc/hosts");
+        expect(result).toHaveLength(1);
+      });
+      test("command substitution inside pattern-first command", async () => {
+        const result = await extractExternalPathsFromBashCommand(
+          "grep 'pattern' $(cat /etc/file-list)",
+          cwd,
+        );
+        // /etc/file-list is an argument to cat inside command substitution
+        expect(result).toContain("/etc/file-list");
+      });
+    });
+    describe("known limitations", () => {
+      test("sed -i without extension (GNU sed): /etc/hosts is missed (false negative)", async () => {
+        // GNU sed treats -i as a flag with no argument, so 's/foo/bar/' is
+        // the inline script and /etc/hosts is the input file.  Our logic
+        // treats -i as arg-consuming (correct for BSD sed -i ''), so it
+        // consumes the script as the -i extension and /etc/hosts becomes
+        // the first positional — which is skipped as the inline script.
+        // This is a known false negative.  The bash permission gate still
+        // applies, so external access is not silently allowed.
+        const result = await extractExternalPathsFromBashCommand(
+          "sed -i 's/foo/bar/' /etc/hosts",
+          cwd,
+        );
+        // Ideally this would detect /etc/hosts, but position tracking
+        // treats it as the inline script.  Assert current behavior so
+        // a future fix can flip this expectation.
+        expect(result).toHaveLength(0);
+      });
+    });
+  });
   describe("regex patterns are not mistaken for paths", () => {
     test("grep -v with //.*pattern is not flagged", async () => {
       const result = await extractExternalPathsFromBashCommand(