@aliou/pi-guardrails 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aliou/pi-guardrails",
3
- "version": "0.11.1",
3
+ "version": "0.11.2",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "private": false,
@@ -10,7 +10,7 @@ import {
10
10
  compileFilePatterns,
11
11
  normalizeFilePath,
12
12
  } from "../utils/matching";
13
- import { expandHomePath } from "../utils/path";
13
+ import { expandHomePath, maybePathLike } from "../utils/path";
14
14
  import { walkCommands, wordToString } from "../utils/shell-utils";
15
15
  import { pendingWarnings } from "../utils/warnings";
16
16
 
@@ -108,16 +108,6 @@ function compileRules(rules: PolicyRule[]): CompiledRule[] {
108
108
  return compiled;
109
109
  }
110
110
 
111
- function maybePathLike(token: string): boolean {
112
- return (
113
- token.includes("/") ||
114
- token.includes(".") ||
115
- token.startsWith("~") ||
116
- token.startsWith("./") ||
117
- token.startsWith("../")
118
- );
119
- }
120
-
121
111
  function normalizeTargetForPolicy(filePath: string, cwd: string): string {
122
112
  if (filePath === "~" || filePath.startsWith("~/")) {
123
113
  return normalizeFilePath(filePath);
@@ -6,6 +6,56 @@ const CWD = "/work/project";
6
6
  const HOME = homedir();
7
7
 
8
8
  describe("extractBashPathCandidates", () => {
9
+ describe("when a command has regular expression arguments", () => {
10
+ it("ignores sed expressions and extracts file operands", async () => {
11
+ const result = await extractBashPathCandidates(
12
+ "sed 's/abc/{2,3}/g' ./file",
13
+ CWD,
14
+ );
15
+ expect(result).toEqual(["/work/project/file"]);
16
+ });
17
+
18
+ it("ignores grep patterns and extracts file operands", async () => {
19
+ const result = await extractBashPathCandidates(
20
+ "grep '/api/v1' ./src",
21
+ CWD,
22
+ );
23
+ expect(result).toEqual(["/work/project/src"]);
24
+ });
25
+
26
+ it("ignores ripgrep patterns and extracts search roots", async () => {
27
+ const result = await extractBashPathCandidates("rg '/api/v1' ./src", CWD);
28
+ expect(result).toEqual(["/work/project/src"]);
29
+ });
30
+
31
+ it("ignores jq filters and extracts file operands", async () => {
32
+ const result = await extractBashPathCandidates(
33
+ "jq '.path | test(\"^/tmp/\")' ./data.json",
34
+ CWD,
35
+ );
36
+ expect(result).toEqual(["/work/project/data.json"]);
37
+ });
38
+
39
+ it("ignores interpreter inline code", async () => {
40
+ const result = await extractBashPathCandidates(
41
+ "python3 -c 'open(\"/etc/passwd\").read()'",
42
+ CWD,
43
+ );
44
+ expect(result).toEqual([]);
45
+ });
46
+ });
47
+
48
+ // Regression: github issue #32 — awk regex patterns should not be
49
+ // treated as file paths.
50
+ it("does not extract awk regex patterns as paths", async () => {
51
+ const result = await extractBashPathCandidates(
52
+ "awk '/aaa/{flag=1} flag{print}' test.txt",
53
+ CWD,
54
+ );
55
+ // The awk program should NOT be treated as a path
56
+ expect(result).toEqual([]);
57
+ });
58
+
9
59
  describe("when command has path arguments", () => {
10
60
  it("extracts a single absolute path", async () => {
11
61
  expect(await extractBashPathCandidates("cat /etc/hosts", CWD)).toEqual([
@@ -1,25 +1,10 @@
1
1
  import { resolve } from "node:path";
2
2
  import { parse } from "@aliou/sh";
3
+ import { classifyCommandArgs } from "./command-args";
3
4
  import { expandGlob, hasGlobChars } from "./glob-expander";
4
- import { expandHomePath } from "./path";
5
+ import { expandHomePath, maybePathLike } from "./path";
5
6
  import { walkCommands, wordToString } from "./shell-utils";
6
7
 
7
- /**
8
- * Heuristic: is this token likely a filesystem path?
9
- * Intentionally conservative — only structural signals.
10
- * Known false positives: "application/json", URL paths. These cause
11
- * spurious prompts in ask mode but are safe (better to over-prompt than miss).
12
- * Known false negatives: bare filenames without path separators (e.g. "README.md").
13
- * These are usually cwd-relative and would pass the boundary check anyway.
14
- */
15
- function maybePathLike(token: string): boolean {
16
- if (token.includes("/")) return true;
17
- if (token.includes("\\")) return true;
18
- if (/^[A-Za-z]:[\\/]/.test(token)) return true;
19
- if (token.startsWith("~")) return true;
20
- return false;
21
- }
22
-
23
8
  async function expandCandidate(
24
9
  candidate: string,
25
10
  cwd: string,
@@ -64,8 +49,11 @@ export async function extractBashPathCandidates(
64
49
 
65
50
  walkCommands(ast, (cmd) => {
66
51
  const words = (cmd.words ?? []).map(wordToString);
67
- for (let i = 1; i < words.length; i++) {
68
- pending.push(addCandidate(words[i] as string));
52
+ const commandName = words[0];
53
+ if (commandName) {
54
+ for (const arg of classifyCommandArgs(commandName, words.slice(1))) {
55
+ pending.push(addCandidate(arg.token, arg.forcePath));
56
+ }
69
57
  }
70
58
  for (const redir of cmd.redirects ?? []) {
71
59
  pending.push(addCandidate(wordToString(redir.target), true));
@@ -0,0 +1,83 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { classifyCommandArgs } from "./command-args";
3
+
4
+ const tokens = (command: string, args: string[]) =>
5
+ classifyCommandArgs(command, args).map((arg) => arg.token);
6
+
7
+ describe("classifyCommandArgs", () => {
8
+ it("keeps unknown command arguments unchanged", () => {
9
+ expect(tokens("cat", ["/etc/hosts", "./file"])).toEqual([
10
+ "/etc/hosts",
11
+ "./file",
12
+ ]);
13
+ });
14
+
15
+ it("ignores awk inline program and keeps file operands", () => {
16
+ expect(tokens("awk", ["/aaa/{print}", "./input"])).toEqual(["./input"]);
17
+ });
18
+
19
+ it("keeps awk -f program files", () => {
20
+ expect(tokens("awk", ["-f", "./prog.awk", "./input"])).toEqual([
21
+ "./prog.awk",
22
+ "./input",
23
+ ]);
24
+ });
25
+
26
+ it("ignores sed inline scripts and keeps file operands", () => {
27
+ expect(tokens("sed", ["s#/old#/new#g", "./file"])).toEqual(["./file"]);
28
+ });
29
+
30
+ it("keeps sed -f script files", () => {
31
+ expect(tokens("sed", ["-f", "./script.sed", "./file"])).toEqual([
32
+ "./script.sed",
33
+ "./file",
34
+ ]);
35
+ });
36
+
37
+ it("ignores grep patterns and keeps file operands", () => {
38
+ expect(tokens("grep", ["/api/v1", "./src"])).toEqual(["./src"]);
39
+ });
40
+
41
+ it("keeps grep pattern files", () => {
42
+ expect(tokens("grep", ["-f", "./patterns", "./src"])).toEqual([
43
+ "./patterns",
44
+ "./src",
45
+ ]);
46
+ });
47
+
48
+ it("keeps find roots and ignores expression patterns", () => {
49
+ expect(tokens("find", ["./src", "-regex", ".*/test/.*"])).toEqual([
50
+ "./src",
51
+ ]);
52
+ });
53
+
54
+ it("ignores jq filters and keeps file operands", () => {
55
+ expect(tokens("jq", ['.path | test("^/tmp/")', "./data.json"])).toEqual([
56
+ "./data.json",
57
+ ]);
58
+ });
59
+
60
+ it("keeps jq -f filter files", () => {
61
+ expect(tokens("jq", ["-f", "./filter.jq", "./data.json"])).toEqual([
62
+ "./filter.jq",
63
+ "./data.json",
64
+ ]);
65
+ });
66
+
67
+ it("ignores interpreter inline code", () => {
68
+ expect(tokens("python3", ["-c", 'open("/etc/passwd")'])).toEqual([]);
69
+ });
70
+
71
+ it("keeps interpreter script operands", () => {
72
+ expect(tokens("python3", ["./script.py", "./data.json"])).toEqual([
73
+ "./script.py",
74
+ "./data.json",
75
+ ]);
76
+ });
77
+
78
+ it("ignores delimiter args", () => {
79
+ expect(tokens("cut", ["-d", "/", "./file"])).toEqual(["./file"]);
80
+ expect(tokens("sort", ["-t", "/", "./file"])).toEqual(["./file"]);
81
+ expect(tokens("tr", ["/", ":"])).toEqual([]);
82
+ });
83
+ });
@@ -0,0 +1,226 @@
1
+ import { basename } from "node:path";
2
+
3
+ export type ClassifiedArg = { token: string; forcePath?: boolean };
4
+
5
+ function normalizeCommandName(command: string): string {
6
+ return basename(command).toLowerCase();
7
+ }
8
+
9
+ function isOption(arg: string): boolean {
10
+ return arg.startsWith("-") && arg !== "-" && arg !== "--";
11
+ }
12
+
13
+ export function classifyCommandArgs(
14
+ command: string,
15
+ args: string[],
16
+ ): ClassifiedArg[] {
17
+ const cmd = normalizeCommandName(command);
18
+
19
+ if (cmd === "awk" || cmd === "gawk" || cmd === "mawk" || cmd === "nawk") {
20
+ return classifyAwkArgs(args);
21
+ }
22
+ if (cmd === "sed" || cmd === "gsed") return classifySedArgs(args);
23
+ if (["grep", "egrep", "fgrep", "rg", "ripgrep", "ag", "ack"].includes(cmd)) {
24
+ return classifyGrepLikeArgs(args);
25
+ }
26
+ if (cmd === "find" || cmd === "gfind") return classifyFindArgs(args);
27
+ if (cmd === "jq" || cmd === "yq") return classifyFilterCommandArgs(args);
28
+ if (
29
+ ["python", "python2", "python3", "node", "ruby", "perl", "php"].includes(
30
+ cmd,
31
+ )
32
+ ) {
33
+ return classifyInterpreterArgs(cmd, args);
34
+ }
35
+ if (cmd === "cut")
36
+ return skipOptionValues(args, new Set(["-d", "--delimiter"]));
37
+ if (cmd === "sort")
38
+ return skipOptionValues(args, new Set(["-t", "--field-separator"]));
39
+ if (cmd === "tr") return [];
40
+
41
+ return args.map((token) => ({ token }));
42
+ }
43
+
44
+ function classifyAwkArgs(args: string[]): ClassifiedArg[] {
45
+ const out: ClassifiedArg[] = [];
46
+ let sawProgram = false;
47
+ for (let i = 0; i < args.length; i++) {
48
+ const arg = args[i] as string;
49
+ if (arg === "--") continue;
50
+ if (arg === "-f") {
51
+ if (args[i + 1]) out.push({ token: args[++i] as string });
52
+ sawProgram = true;
53
+ continue;
54
+ }
55
+ if (arg === "-v" || arg === "-F") {
56
+ i++;
57
+ continue;
58
+ }
59
+ if (arg.startsWith("-f") && arg.length > 2) {
60
+ out.push({ token: arg.slice(2) });
61
+ sawProgram = true;
62
+ continue;
63
+ }
64
+ if (isOption(arg)) continue;
65
+ if (!sawProgram) {
66
+ sawProgram = true;
67
+ continue;
68
+ }
69
+ out.push({ token: arg });
70
+ }
71
+ return out;
72
+ }
73
+
74
+ function classifySedArgs(args: string[]): ClassifiedArg[] {
75
+ const out: ClassifiedArg[] = [];
76
+ let hasExplicitScript = false;
77
+ let skippedImplicitScript = false;
78
+ for (let i = 0; i < args.length; i++) {
79
+ const arg = args[i] as string;
80
+ if (arg === "-e" || arg === "--expression") {
81
+ hasExplicitScript = true;
82
+ i++;
83
+ continue;
84
+ }
85
+ if (arg === "-f" || arg === "--file") {
86
+ hasExplicitScript = true;
87
+ if (args[i + 1]) out.push({ token: args[++i] as string });
88
+ continue;
89
+ }
90
+ if (arg.startsWith("-e") && arg.length > 2) {
91
+ hasExplicitScript = true;
92
+ continue;
93
+ }
94
+ if (arg.startsWith("-f") && arg.length > 2) {
95
+ hasExplicitScript = true;
96
+ out.push({ token: arg.slice(2) });
97
+ continue;
98
+ }
99
+ if (isOption(arg)) continue;
100
+ if (!hasExplicitScript && !skippedImplicitScript) {
101
+ skippedImplicitScript = true;
102
+ continue;
103
+ }
104
+ out.push({ token: arg });
105
+ }
106
+ return out;
107
+ }
108
+
109
+ function classifyGrepLikeArgs(args: string[]): ClassifiedArg[] {
110
+ const out: ClassifiedArg[] = [];
111
+ let patternProvided = false;
112
+ for (let i = 0; i < args.length; i++) {
113
+ const arg = args[i] as string;
114
+ if (arg === "-e" || arg === "--regexp") {
115
+ patternProvided = true;
116
+ i++;
117
+ continue;
118
+ }
119
+ if (arg === "-f" || arg === "--file") {
120
+ patternProvided = true;
121
+ if (args[i + 1]) out.push({ token: args[++i] as string });
122
+ continue;
123
+ }
124
+ if (["-g", "--glob", "-t", "-T", "--type", "--type-not"].includes(arg)) {
125
+ i++;
126
+ continue;
127
+ }
128
+ if (arg.startsWith("-e") && arg.length > 2) {
129
+ patternProvided = true;
130
+ continue;
131
+ }
132
+ if (arg.startsWith("-f") && arg.length > 2) {
133
+ patternProvided = true;
134
+ out.push({ token: arg.slice(2) });
135
+ continue;
136
+ }
137
+ if (isOption(arg)) continue;
138
+ if (!patternProvided) {
139
+ patternProvided = true;
140
+ continue;
141
+ }
142
+ out.push({ token: arg });
143
+ }
144
+ return out;
145
+ }
146
+
147
+ function classifyFindArgs(args: string[]): ClassifiedArg[] {
148
+ const out: ClassifiedArg[] = [];
149
+ let inExpression = false;
150
+ const patternOptions = new Set([
151
+ "-name",
152
+ "-iname",
153
+ "-path",
154
+ "-ipath",
155
+ "-regex",
156
+ "-iregex",
157
+ "-wholename",
158
+ "-iwholename",
159
+ ]);
160
+ for (let i = 0; i < args.length; i++) {
161
+ const arg = args[i] as string;
162
+ if (!inExpression && !arg.startsWith("-") && arg !== "(" && arg !== "!") {
163
+ out.push({ token: arg });
164
+ continue;
165
+ }
166
+ inExpression = true;
167
+ if (patternOptions.has(arg)) i++;
168
+ }
169
+ return out;
170
+ }
171
+
172
+ function classifyFilterCommandArgs(args: string[]): ClassifiedArg[] {
173
+ const out: ClassifiedArg[] = [];
174
+ let sawFilter = false;
175
+ for (let i = 0; i < args.length; i++) {
176
+ const arg = args[i] as string;
177
+ if (arg === "-f" || arg === "--from-file") {
178
+ if (args[i + 1]) out.push({ token: args[++i] as string });
179
+ sawFilter = true;
180
+ continue;
181
+ }
182
+ if (isOption(arg)) continue;
183
+ if (!sawFilter) {
184
+ sawFilter = true;
185
+ continue;
186
+ }
187
+ out.push({ token: arg });
188
+ }
189
+ return out;
190
+ }
191
+
192
+ function classifyInterpreterArgs(cmd: string, args: string[]): ClassifiedArg[] {
193
+ const codeFlags =
194
+ cmd === "python" || cmd.startsWith("python")
195
+ ? new Set(["-c"])
196
+ : cmd === "php"
197
+ ? new Set(["-r"])
198
+ : new Set(["-e"]);
199
+ const out: ClassifiedArg[] = [];
200
+ for (let i = 0; i < args.length; i++) {
201
+ const arg = args[i] as string;
202
+ if (codeFlags.has(arg)) {
203
+ i++;
204
+ continue;
205
+ }
206
+ if (isOption(arg)) continue;
207
+ out.push({ token: arg });
208
+ }
209
+ return out;
210
+ }
211
+
212
+ function skipOptionValues(
213
+ args: string[],
214
+ optionsWithValues: Set<string>,
215
+ ): ClassifiedArg[] {
216
+ const out: ClassifiedArg[] = [];
217
+ for (let i = 0; i < args.length; i++) {
218
+ const arg = args[i] as string;
219
+ if (optionsWithValues.has(arg)) {
220
+ i++;
221
+ continue;
222
+ }
223
+ out.push({ token: arg });
224
+ }
225
+ return out;
226
+ }
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
3
3
  import {
4
4
  expandHomePath,
5
5
  isWithinBoundary,
6
+ maybePathLike,
6
7
  normalizeForDisplay,
7
8
  resolveFromCwd,
8
9
  toStorageForm,
@@ -175,3 +176,118 @@ describe("toStorageForm", () => {
175
176
  expect(toStorageForm(absPath, isDirectory)).toBe(expected);
176
177
  });
177
178
  });
179
+
180
+ describe("maybePathLike", () => {
181
+ it.each([
182
+ // --- True cases: structural path signals ---
183
+ {
184
+ desc: "absolute Unix path",
185
+ input: "/etc/hosts",
186
+ expected: true,
187
+ },
188
+ {
189
+ desc: "relative path with /",
190
+ input: "src/index.ts",
191
+ expected: true,
192
+ },
193
+ {
194
+ desc: "./ prefix",
195
+ input: "./foo",
196
+ expected: true,
197
+ },
198
+ {
199
+ desc: "../ prefix",
200
+ input: "../bar",
201
+ expected: true,
202
+ },
203
+ {
204
+ desc: "backslash path (Windows)",
205
+ input: "foo\\bar",
206
+ expected: true,
207
+ },
208
+ {
209
+ desc: "Windows drive letter",
210
+ input: "C:\\tmp",
211
+ expected: true,
212
+ },
213
+ {
214
+ desc: "Windows drive with forward slash",
215
+ input: "C:/tmp",
216
+ expected: true,
217
+ },
218
+ {
219
+ desc: "tilde home path",
220
+ input: "~/code",
221
+ expected: true,
222
+ },
223
+ {
224
+ desc: "MIME type (has / — safe false positive)",
225
+ input: "application/json",
226
+ expected: true,
227
+ },
228
+ {
229
+ desc: "regular expression with braces (has / — safe false positive)",
230
+ input: "/abc/{2,3}",
231
+ expected: true,
232
+ },
233
+ // --- False cases: non-path tokens ---
234
+ {
235
+ desc: "empty string",
236
+ input: "",
237
+ expected: false,
238
+ },
239
+ {
240
+ desc: "simple command name",
241
+ input: "rm",
242
+ expected: false,
243
+ },
244
+ {
245
+ desc: "flag",
246
+ input: "--force",
247
+ expected: false,
248
+ },
249
+ {
250
+ desc: "short flag",
251
+ input: "-rf",
252
+ expected: false,
253
+ },
254
+ {
255
+ desc: "bare word",
256
+ input: "build",
257
+ expected: false,
258
+ },
259
+ {
260
+ desc: "bare tilde (no slash)",
261
+ input: "~",
262
+ expected: false,
263
+ },
264
+ {
265
+ desc: "version number",
266
+ input: "3.14",
267
+ expected: false,
268
+ },
269
+ {
270
+ desc: "domain name",
271
+ input: "example.com",
272
+ expected: false,
273
+ },
274
+ {
275
+ desc: "bare filename with extension",
276
+ input: "README.md",
277
+ expected: false,
278
+ },
279
+ {
280
+ desc: "dotfile without slash",
281
+ input: ".env",
282
+ expected: false,
283
+ },
284
+ ])("when $desc, returns $expected", ({ input, expected }) => {
285
+ expect(maybePathLike(input)).toBe(expected);
286
+ });
287
+
288
+ // maybePathLike is command-agnostic. Command-specific regex/code args are
289
+ // filtered by extractBashPathCandidates before this fallback heuristic runs.
290
+ it("treats awk-looking regex text as path-like without command context", () => {
291
+ expect(maybePathLike("/aaa/{flag=1} flag{print}")).toBe(true);
292
+ });
293
+ });
package/src/utils/path.ts CHANGED
@@ -72,3 +72,26 @@ export function toStorageForm(absPath: string, isDirectory: boolean): string {
72
72
  if (!isDirectory && stored.endsWith("/")) stored = stored.slice(0, -1);
73
73
  return stored;
74
74
  }
75
+
76
+ /**
77
+ * Heuristic: is this token likely a filesystem path?
78
+ *
79
+ * Checks for structural path signals: separators (/ \), drive letters
80
+ * (C:\), home prefix (~), and relative path prefixes (./ ../).
81
+ *
82
+ * False positives (MIME types, version strings, domains) are safe —
83
+ * they just get checked against policies and miss.
84
+ *
85
+ * Known false negatives: bare filenames without separators or dots
86
+ * (Makefile, LICENSE, README). These are cwd-relative and would
87
+ * pass the boundary check anyway.
88
+ */
89
+ export function maybePathLike(token: string): boolean {
90
+ if (!token) return false;
91
+
92
+ if (token.includes("/")) return true;
93
+ if (token.includes("\\")) return true;
94
+ if (/^[A-Za-z]:[\\/]/.test(token)) return true;
95
+ if (/^(?:~|\.{1,2})[\\/]/.test(token)) return true;
96
+ return false;
97
+ }