@gotgenes/pi-permission-system 8.3.2 → 9.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,727 @@
1
+ import { createRequire } from "node:module";
2
+ import { basename, resolve } from "node:path";
3
+
4
+ import {
5
+ classifyTokenAsPathCandidate,
6
+ classifyTokenAsRuleCandidate,
7
+ } from "#src/handlers/gates/bash-token-classification";
8
+ import {
9
+ isPathWithinDirectory,
10
+ isSafeSystemPath,
11
+ normalizePathForComparison,
12
+ } from "#src/path-utils";
13
+
14
+ // ── tree-sitter-bash lazy parser ───────────────────────────────────────────
15
+
16
+ /**
17
+ * Minimal subset of web-tree-sitter's SyntaxNode used by the AST walker.
18
+ * Defined locally so callers do not need to import web-tree-sitter types.
19
+ */
20
+ interface TSNode {
21
+ readonly type: string;
22
+ readonly text: string;
23
+ readonly childCount: number;
24
+ child(index: number): TSNode | null;
25
+ }
26
+
27
+ /**
28
+ * Minimal subset of web-tree-sitter's Parser used by this module.
29
+ */
30
+ interface TSParser {
31
+ parse(input: string): { rootNode: TSNode; delete(): void } | null;
32
+ delete(): void;
33
+ }
34
+
35
+ let parserPromise: Promise<TSParser> | null = null;
36
+
37
+ async function initParser(): Promise<TSParser> {
38
+ // Use named imports — web-tree-sitter exports Parser as a named class.
39
+ const { Parser, Language } = await import("web-tree-sitter");
40
+ const req = createRequire(import.meta.url);
41
+ const treeSitterWasm = req.resolve("web-tree-sitter/web-tree-sitter.wasm");
42
+ await Parser.init({ locateFile: () => treeSitterWasm });
43
+
44
+ const parser = new Parser();
45
+ const bashWasm = req.resolve("tree-sitter-bash/tree-sitter-bash.wasm");
46
+ const bash = await Language.load(bashWasm);
47
+ parser.setLanguage(bash);
48
+ return parser;
49
+ }
50
+
51
+ function getParser(): Promise<TSParser> {
52
+ parserPromise ??= initParser();
53
+ return parserPromise;
54
+ }
55
+
56
+ // ── Parsed bash command representation ───────────────────────────────────────
57
+
58
+ /**
59
+ * A bash command parsed once into a reusable representation.
60
+ *
61
+ * Parsing is the expensive step (tree-sitter WASM); `BashProgram` performs it
62
+ * a single time and exposes typed slices derived from the same AST walk so the
63
+ * bash permission gates do not each re-parse and re-walk the command, and so
64
+ * the slices are guaranteed to agree.
65
+ *
66
+ * Construct via the async `parse()` factory; the constructor is private.
67
+ */
68
+ export class BashProgram {
69
+ private constructor(
70
+ private readonly rawTokens: readonly string[],
71
+ private readonly leadingCdTarget: string | undefined,
72
+ private readonly topLevelCommandTexts: readonly string[],
73
+ ) {}
74
+
75
+ /**
76
+ * Parse a bash command into a `BashProgram`.
77
+ *
78
+ * Uses tree-sitter-bash to build the full AST, walks command-argument and
79
+ * redirect-destination nodes once into raw candidate tokens, and records the
80
+ * leading `cd` target. Heredoc bodies, comments, and other non-argument
81
+ * content are skipped. An unparseable command yields an empty program.
82
+ */
83
+ static async parse(command: string): Promise<BashProgram> {
84
+ const parser = await getParser();
85
+ const tree = parser.parse(command);
86
+ if (!tree) return new BashProgram([], undefined, []);
87
+
88
+ try {
89
+ const leadingCdTarget = extractLeadingCdTarget(tree.rootNode);
90
+ const rawTokens = collectPathCandidateTokens(tree.rootNode);
91
+ const topLevelCommandTexts = collectTopLevelCommandTexts(tree.rootNode);
92
+ return new BashProgram(rawTokens, leadingCdTarget, topLevelCommandTexts);
93
+ } finally {
94
+ tree.delete();
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Tokens that may be file paths, using the broader `path`-rule filter.
100
+ *
101
+ * Accepts relative paths (`.env`, `src/foo.ts`, `./build`) and absolute
102
+ * paths; does NOT filter by CWD. Returns deduplicated tokens for rule
103
+ * evaluation.
104
+ */
105
+ // Used by the facades (bash-path-extractor.ts) and tests. Fallow's syntactic
106
+ // analysis cannot resolve the static-factory return type (private ctor), so
107
+ // it reports a false positive here.
108
+ // fallow-ignore-next-line unused-class-member
109
+ pathTokens(): string[] {
110
+ const seen = new Set<string>();
111
+ const result: string[] = [];
112
+ for (const token of this.rawTokens) {
113
+ const candidate = classifyTokenAsRuleCandidate(token);
114
+ if (!candidate) continue;
115
+ if (!seen.has(candidate)) {
116
+ seen.add(candidate);
117
+ result.push(candidate);
118
+ }
119
+ }
120
+ return result;
121
+ }
122
+
123
+ /**
124
+ * Deduplicated paths that resolve outside `cwd`.
125
+ *
126
+ * When the command begins with `cd <dir> && …`, relative candidate paths are
127
+ * resolved against `<dir>` (if it stays within CWD) rather than CWD itself,
128
+ * mirroring how the shell would resolve them.
129
+ */
130
+ // Used by the facades (bash-path-extractor.ts) and tests. Fallow's syntactic
131
+ // analysis cannot resolve the static-factory return type (private ctor), so
132
+ // it reports a false positive here.
133
+ /**
134
+ * The top-level simple-commands of the chain, in source order.
135
+ *
136
+ * Splits on the shell chain operators (`&&`, `||`, `;`, `|`, `&`, newlines);
137
+ * quotes, command substitution, and subshells are respected by the parser and
138
+ * are NOT split — a subshell or other compound statement is emitted whole. May
139
+ * be empty (e.g. an empty command or a comment-only line); callers fall back
140
+ * to the whole command so the surface is never evaluated weaker than before.
141
+ */
142
+ // Used by resolveBashCommandCheck (bash-command.ts) and tests. Fallow's
143
+ // syntactic analysis cannot resolve the static-factory return type (private
144
+ // ctor), so it reports a false positive here.
145
+ // fallow-ignore-next-line unused-class-member
146
+ topLevelCommands(): string[] {
147
+ return [...this.topLevelCommandTexts];
148
+ }
149
+
150
+ // fallow-ignore-next-line unused-class-member
151
+ externalPaths(cwd: string): string[] {
152
+ const resolveBase = computeEffectiveResolveBase(this.leadingCdTarget, cwd);
153
+ const normalizedCwd = normalizePathForComparison(cwd, cwd);
154
+
155
+ const seen = new Set<string>();
156
+ const externalPaths: string[] = [];
157
+
158
+ for (const token of this.rawTokens) {
159
+ const candidate = classifyTokenAsPathCandidate(token);
160
+ if (!candidate) continue;
161
+
162
+ const normalized = normalizePathForComparison(candidate, resolveBase);
163
+ if (!normalized) continue;
164
+
165
+ if (
166
+ normalizedCwd !== "" &&
167
+ !isSafeSystemPath(normalized) &&
168
+ !isPathWithinDirectory(normalized, normalizedCwd) &&
169
+ !seen.has(normalized)
170
+ ) {
171
+ seen.add(normalized);
172
+ externalPaths.push(normalized);
173
+ }
174
+ }
175
+
176
+ return externalPaths;
177
+ }
178
+ }
179
+
180
+ // ── AST walker ─────────────────────────────────────────────────────────────
181
+
182
+ /**
183
+ * Node types whose subtrees must never be descended into for
184
+ * path extraction — their text content is not a command argument.
185
+ */
186
+ const SKIP_SUBTREE_TYPES = new Set(["heredoc_body", "heredoc_end", "comment"]);
187
+
188
+ /**
189
+ * Resolve the "shell value" of an argument node — the string the shell
190
+ * would pass to the command after quote removal.
191
+ *
192
+ * - `word` → `.text` (already unquoted)
193
+ * - `raw_string` → strip surrounding single quotes
194
+ * - `string` → strip surrounding double quotes, concatenate children text
195
+ * - `concatenation` → concatenate resolved children
196
+ * - other → `.text` as fallback
197
+ */
198
+ function resolveNodeText(node: TSNode): string {
199
+ switch (node.type) {
200
+ case "word":
201
+ return node.text;
202
+ case "raw_string": {
203
+ // Strip surrounding single quotes: 'content' → content
204
+ const t = node.text;
205
+ if (t.length >= 2 && t.startsWith("'") && t.endsWith("'")) {
206
+ return t.slice(1, -1);
207
+ }
208
+ return t;
209
+ }
210
+ case "string": {
211
+ // Double-quoted string: concatenate the resolved text of inner children,
212
+ // skipping the quote-delimiter nodes (literal `"`).
213
+ let result = "";
214
+ for (let i = 0; i < node.childCount; i++) {
215
+ const child = node.child(i);
216
+ if (!child) continue;
217
+ // Skip the literal `"` delimiters
218
+ if (child.type === '"') continue;
219
+ result += resolveNodeText(child);
220
+ }
221
+ return result;
222
+ }
223
+ case "string_content":
224
+ case "simple_expansion":
225
+ case "expansion":
226
+ return node.text;
227
+ case "concatenation": {
228
+ let result = "";
229
+ for (let i = 0; i < node.childCount; i++) {
230
+ const child = node.child(i);
231
+ if (!child) continue;
232
+ result += resolveNodeText(child);
233
+ }
234
+ return result;
235
+ }
236
+ default:
237
+ return node.text;
238
+ }
239
+ }
240
+
241
+ // ── Pattern-first command config ───────────────────────────────────────────
242
+
243
+ interface PatternCommandConfig {
244
+ /** Flags that consume the next argument as a non-path value (pattern, separator, etc.) */
245
+ readonly argConsumingFlags: ReadonlySet<string>;
246
+ /** Flags that consume the next argument as a file path */
247
+ readonly fileConsumingFlags: ReadonlySet<string>;
248
+ /**
249
+ * Number of leading positional arguments that are patterns/scripts, not paths.
250
+ * Default: 1 (covers sed, awk, grep, rg).
251
+ * sd uses 2 (FIND and REPLACE_WITH are both non-path positionals).
252
+ */
253
+ readonly patternPositionals?: number;
254
+ }
255
+
256
+ /**
257
+ * Commands whose first N positional arguments are inline patterns/scripts,
258
+ * not filesystem paths. The map stores per-command flag configuration so
259
+ * the walker can correctly identify which arguments are consumed by flags
260
+ * vs. which are positional.
261
+ */
262
+ const PATTERN_FIRST_COMMANDS: ReadonlyMap<string, PatternCommandConfig> =
263
+ new Map([
264
+ [
265
+ "sed",
266
+ {
267
+ argConsumingFlags: new Set(["-e", "-i"]),
268
+ fileConsumingFlags: new Set(["-f"]),
269
+ },
270
+ ],
271
+ [
272
+ "awk",
273
+ {
274
+ argConsumingFlags: new Set(["-e", "-F", "-v"]),
275
+ fileConsumingFlags: new Set(["-f"]),
276
+ },
277
+ ],
278
+ [
279
+ "gawk",
280
+ {
281
+ argConsumingFlags: new Set(["-e", "-F", "-v"]),
282
+ fileConsumingFlags: new Set(["-f"]),
283
+ },
284
+ ],
285
+ [
286
+ "nawk",
287
+ {
288
+ argConsumingFlags: new Set(["-e", "-F", "-v"]),
289
+ fileConsumingFlags: new Set(["-f"]),
290
+ },
291
+ ],
292
+ [
293
+ "grep",
294
+ {
295
+ argConsumingFlags: new Set(["-e", "-A", "-B", "-C", "-m"]),
296
+ fileConsumingFlags: new Set(["-f"]),
297
+ },
298
+ ],
299
+ [
300
+ "egrep",
301
+ {
302
+ argConsumingFlags: new Set(["-e", "-A", "-B", "-C", "-m"]),
303
+ fileConsumingFlags: new Set(["-f"]),
304
+ },
305
+ ],
306
+ [
307
+ "fgrep",
308
+ {
309
+ argConsumingFlags: new Set(["-e", "-A", "-B", "-C", "-m"]),
310
+ fileConsumingFlags: new Set(["-f"]),
311
+ },
312
+ ],
313
+ [
314
+ "rg",
315
+ {
316
+ argConsumingFlags: new Set([
317
+ "-e",
318
+ "-A",
319
+ "-B",
320
+ "-C",
321
+ "-m",
322
+ "-g",
323
+ "-t",
324
+ "-T",
325
+ "-j",
326
+ "-M",
327
+ "-r",
328
+ "-E",
329
+ ]),
330
+ fileConsumingFlags: new Set(["-f"]),
331
+ },
332
+ ],
333
+ [
334
+ "sd",
335
+ {
336
+ argConsumingFlags: new Set(["-n", "-f"]),
337
+ fileConsumingFlags: new Set([]),
338
+ patternPositionals: 2,
339
+ },
340
+ ],
341
+ ]);
342
+
343
+ /** Node types that represent argument values in the AST. */
344
+ const ARG_NODE_TYPES = new Set([
345
+ "word",
346
+ "concatenation",
347
+ "string",
348
+ "raw_string",
349
+ ]);
350
+
351
+ /**
352
+ * Extract the command name from a `command` node.
353
+ * Returns the basename (e.g. `/usr/bin/sed` → `sed`), or undefined
354
+ * if the command name cannot be determined (e.g. variable expansion).
355
+ */
356
+ function extractCommandName(node: TSNode): string | undefined {
357
+ for (let i = 0; i < node.childCount; i++) {
358
+ const child = node.child(i);
359
+ if (!child) continue;
360
+ if (child.type === "command_name") {
361
+ const text = resolveNodeText(child);
362
+ return text ? basename(text) : undefined;
363
+ }
364
+ }
365
+ return undefined;
366
+ }
367
+
368
+ /**
369
+ * Describes what the walker should do when it encounters a flag word inside
370
+ * a pattern-first command. Using a discriminated union lets the `switch` in
371
+ * `collectPatternCommandTokens` narrow `nextArgAction` without a non-null
372
+ * assertion (which would trigger the Biome/ESLint assertion conflict).
373
+ */
374
+ type PatternCommandFlagDirective =
375
+ | { kind: "end-of-flags" }
376
+ | { kind: "regular-flag" }
377
+ | {
378
+ kind: "consume-arg";
379
+ nextArgAction: "skip" | "extract";
380
+ setsExplicitScript: boolean;
381
+ };
382
+
383
+ /**
384
+ * Classify a flag word from a pattern-first command into a directive that
385
+ * tells the walker how to handle the flag and its following argument.
386
+ */
387
+ function classifyPatternCommandFlag(
388
+ text: string,
389
+ config: PatternCommandConfig,
390
+ ): PatternCommandFlagDirective {
391
+ if (text === "--") return { kind: "end-of-flags" };
392
+ if (config.argConsumingFlags.has(text)) {
393
+ return {
394
+ kind: "consume-arg",
395
+ nextArgAction: "skip",
396
+ setsExplicitScript: text === "-e" || text === "-f",
397
+ };
398
+ }
399
+ if (config.fileConsumingFlags.has(text)) {
400
+ return {
401
+ kind: "consume-arg",
402
+ nextArgAction: "extract",
403
+ setsExplicitScript: true,
404
+ };
405
+ }
406
+ return { kind: "regular-flag" };
407
+ }
408
+
409
+ /**
410
+ * Collect path-candidate tokens from a command known to have
411
+ * pattern/script arguments in leading positional slots.
412
+ *
413
+ * Uses position-based skipping: the first N positional arguments
414
+ * (where N = patternPositionals, default 1) are assumed to be
415
+ * inline patterns/scripts and are skipped. Remaining positional
416
+ * arguments are collected as path candidates.
417
+ *
418
+ * Flags listed in `argConsumingFlags` consume the next argument
419
+ * (skipped). Flags in `fileConsumingFlags` consume the next
420
+ * argument as a file path (collected). The flags `-e` and `-f`
421
+ * additionally signal that an explicit script was provided via
422
+ * flag, so no inline positional script is expected.
423
+ */
424
+ function collectPatternCommandTokens(
425
+ node: TSNode,
426
+ config: PatternCommandConfig,
427
+ ): string[] {
428
+ const patternPositionals = config.patternPositionals ?? 1;
429
+ let hasExplicitScript = false;
430
+ let positionalsSeen = 0;
431
+ let nextArgAction: "skip" | "extract" | null = null;
432
+ let pastEndOfFlags = false;
433
+ const tokens: string[] = [];
434
+
435
+ for (let i = 0; i < node.childCount; i++) {
436
+ const child = node.child(i);
437
+ if (!child) continue;
438
+
439
+ // Skip command_name and variable_assignment nodes.
440
+ if (child.type === "command_name" || child.type === "variable_assignment")
441
+ continue;
442
+
443
+ // Only process argument-like nodes; recurse into others
444
+ // (e.g. command_substitution) for nested commands.
445
+ if (!ARG_NODE_TYPES.has(child.type)) {
446
+ tokens.push(...collectPathCandidateTokens(child));
447
+ continue;
448
+ }
449
+
450
+ const text = resolveNodeText(child);
451
+
452
+ // Handle consumed argument from previous flag.
453
+ if (nextArgAction === "skip") {
454
+ nextArgAction = null;
455
+ continue;
456
+ }
457
+ if (nextArgAction === "extract") {
458
+ tokens.push(text);
459
+ nextArgAction = null;
460
+ continue;
461
+ }
462
+
463
+ // Flag detection (only before "--" end-of-flags marker).
464
+ if (
465
+ !pastEndOfFlags &&
466
+ child.type === "word" &&
467
+ text.startsWith("-") &&
468
+ text.length > 1
469
+ ) {
470
+ const directive = classifyPatternCommandFlag(text, config);
471
+ switch (directive.kind) {
472
+ case "end-of-flags":
473
+ pastEndOfFlags = true;
474
+ break;
475
+ case "consume-arg":
476
+ nextArgAction = directive.nextArgAction;
477
+ if (directive.setsExplicitScript) hasExplicitScript = true;
478
+ break;
479
+ case "regular-flag":
480
+ break;
481
+ }
482
+ continue;
483
+ }
484
+
485
+ // Positional argument.
486
+ if (!hasExplicitScript && positionalsSeen < patternPositionals) {
487
+ positionalsSeen++;
488
+ continue; // Skip: this is an inline pattern/script.
489
+ }
490
+
491
+ // File argument — collect as path candidate.
492
+ tokens.push(text);
493
+ }
494
+
495
+ return tokens;
496
+ }
497
+
498
+ /**
499
+ * Collect all argument tokens from a generic (non-pattern-first) command node,
500
+ * skipping the command name and variable assignments.
501
+ */
502
+ function collectGenericCommandTokens(node: TSNode): string[] {
503
+ const tokens: string[] = [];
504
+ let seenCommandName = false;
505
+
506
+ for (let i = 0; i < node.childCount; i++) {
507
+ const child = node.child(i);
508
+ if (!child) continue;
509
+
510
+ if (child.type === "command_name") {
511
+ seenCommandName = true;
512
+ continue;
513
+ }
514
+ // Skip variable_assignment nodes (FOO=/bar)
515
+ if (child.type === "variable_assignment") continue;
516
+
517
+ // If there was no explicit command_name node, the first word-like
518
+ // child is the command name itself — skip it.
519
+ if (!seenCommandName && ARG_NODE_TYPES.has(child.type)) {
520
+ seenCommandName = true;
521
+ continue;
522
+ }
523
+
524
+ // Argument nodes: resolve their text and collect.
525
+ if (ARG_NODE_TYPES.has(child.type)) {
526
+ tokens.push(resolveNodeText(child));
527
+ continue;
528
+ }
529
+
530
+ // Recurse into other children (e.g. command_substitution nested in args)
531
+ tokens.push(...collectPathCandidateTokens(child));
532
+ }
533
+
534
+ return tokens;
535
+ }
536
+
537
+ /**
538
+ * Collect redirect-destination tokens from a `file_redirect` node.
539
+ */
540
+ function collectRedirectTokens(node: TSNode): string[] {
541
+ const tokens: string[] = [];
542
+ for (let i = 0; i < node.childCount; i++) {
543
+ const child = node.child(i);
544
+ if (!child) continue;
545
+ if (ARG_NODE_TYPES.has(child.type)) {
546
+ tokens.push(resolveNodeText(child));
547
+ }
548
+ }
549
+ return tokens;
550
+ }
551
+
552
+ /**
553
+ * Select the collection strategy for a `command` node: pattern-first
554
+ * commands use `collectPatternCommandTokens`; all others use
555
+ * `collectGenericCommandTokens`.
556
+ */
557
+ function collectCommandTokens(node: TSNode): string[] {
558
+ const commandName = extractCommandName(node);
559
+ const config = commandName
560
+ ? PATTERN_FIRST_COMMANDS.get(commandName)
561
+ : undefined;
562
+ return config
563
+ ? collectPatternCommandTokens(node, config)
564
+ : collectGenericCommandTokens(node);
565
+ }
566
+
567
+ /**
568
+ * Recursively visit the AST and collect resolved text of nodes that
569
+ * represent command arguments or redirect destinations.
570
+ *
571
+ * Skips `heredoc_body`, `heredoc_end`, and `comment` subtrees entirely.
572
+ *
573
+ * For commands in `PATTERN_FIRST_COMMANDS`, uses position-based
574
+ * argument skipping to avoid collecting inline patterns/scripts
575
+ * as path candidates. For all other commands, collects all
576
+ * arguments generically.
577
+ */
578
+ function collectPathCandidateTokens(node: TSNode): string[] {
579
+ if (SKIP_SUBTREE_TYPES.has(node.type)) return [];
580
+ if (node.type === "command") return collectCommandTokens(node);
581
+ if (node.type === "file_redirect") return collectRedirectTokens(node);
582
+
583
+ const tokens: string[] = [];
584
+ for (let i = 0; i < node.childCount; i++) {
585
+ const child = node.child(i);
586
+ if (child) tokens.push(...collectPathCandidateTokens(child));
587
+ }
588
+ return tokens;
589
+ }
590
+
591
+ // Token classification is delegated to bash-token-classification.ts,
592
+ // which exports classifyTokenAsPathCandidate and classifyTokenAsRuleCandidate
593
+ // with a shared rejectNonPathToken predicate eliminating the prior clone.
594
+
595
+ // ── Top-level command enumeration ───────────────────────────────────────────
596
+
597
+ /**
598
+ * Container node types descended into when enumerating top-level commands.
599
+ * A `cd` or `rm` inside a subshell or compound statement is NOT a top-level
600
+ * command, so those node types are deliberately absent.
601
+ */
602
+ const TOP_LEVEL_COMMAND_DESCEND = new Set([
603
+ "program",
604
+ "list",
605
+ "pipeline",
606
+ "redirected_statement",
607
+ ]);
608
+
609
+ /**
610
+ * Node types skipped during top-level command enumeration: chain-operator and
611
+ * separator tokens, redirect targets, comments, and heredoc bodies. None of
612
+ * these is a command to evaluate.
613
+ */
614
+ const TOP_LEVEL_COMMAND_SKIP = new Set([
615
+ "&&",
616
+ "||",
617
+ ";",
618
+ "&",
619
+ "|",
620
+ "|&",
621
+ "\n",
622
+ "file_redirect",
623
+ "heredoc_redirect",
624
+ "herestring_redirect",
625
+ "comment",
626
+ "heredoc_body",
627
+ "heredoc_end",
628
+ ]);
629
+
630
+ /**
631
+ * Collect the text of each top-level simple-command in the program.
632
+ *
633
+ * Descends container nodes (`program`, `list`, `pipeline`, `redirected_statement`)
634
+ * and emits each `command` node's text. Chain-operator tokens and redirect
635
+ * targets are skipped. Any other top-level statement node (subshell, compound
636
+ * statement, control-flow) is emitted whole without descending, so its inner
637
+ * commands are matched as part of the enclosing statement's text rather than
638
+ * independently (the top-level scope).
639
+ */
640
+ function collectTopLevelCommandTexts(node: TSNode): string[] {
641
+ if (node.type === "command") return [node.text];
642
+ if (TOP_LEVEL_COMMAND_SKIP.has(node.type)) return [];
643
+ if (TOP_LEVEL_COMMAND_DESCEND.has(node.type)) {
644
+ const texts: string[] = [];
645
+ for (let i = 0; i < node.childCount; i++) {
646
+ const child = node.child(i);
647
+ if (child) texts.push(...collectTopLevelCommandTexts(child));
648
+ }
649
+ return texts;
650
+ }
651
+ // Any other named statement node (subshell, compound_statement, if/while/for,
652
+ // function_definition, …): emit whole, do not descend.
653
+ return [node.text];
654
+ }
655
+
656
+ // ── Leading cd detection ───────────────────────────────────────────────────
657
+
658
+ /**
659
+ * Walk down from the root to find the first `command` node in the program.
660
+ *
661
+ * Only descends into `program` and `list` nodes — subshells, pipelines, and
662
+ * other compound statements are ignored because a `cd` inside them does not
663
+ * affect the outer shell's working directory.
664
+ */
665
+ function findFirstCommand(node: TSNode): TSNode | null {
666
+ if (node.type === "command") return node;
667
+ if (node.type === "program" || node.type === "list") {
668
+ const firstChild = node.child(0);
669
+ if (firstChild) return findFirstCommand(firstChild);
670
+ }
671
+ return null;
672
+ }
673
+
674
+ /**
675
+ * Extract the target directory of a leading `cd` command from the parsed AST.
676
+ *
677
+ * When a bash command begins with `cd <dir> && …`, the shell resolves
678
+ * subsequent relative paths against `<dir>`, not the original working
679
+ * directory. The external-directory guard must do the same, otherwise a
680
+ * path that the shell keeps inside the working directory can appear to
681
+ * escape it and trigger a spurious permission prompt.
682
+ *
683
+ * Returns `undefined` when the first command is not `cd`, or when the
684
+ * target cannot be meaningfully resolved (`cd -`, bare `cd`, or `cd ~…`).
685
+ */
686
+ function extractLeadingCdTarget(rootNode: TSNode): string | undefined {
687
+ const firstCmd = findFirstCommand(rootNode);
688
+ if (!firstCmd) return undefined;
689
+
690
+ const cmdName = extractCommandName(firstCmd);
691
+ if (cmdName !== "cd") return undefined;
692
+
693
+ for (let i = 0; i < firstCmd.childCount; i++) {
694
+ const child = firstCmd.child(i);
695
+ if (!child) continue;
696
+ if (child.type === "command_name" || child.type === "variable_assignment")
697
+ continue;
698
+ if (!ARG_NODE_TYPES.has(child.type)) continue;
699
+
700
+ const text = resolveNodeText(child);
701
+ // Skip `--` (end-of-flags marker)
702
+ if (text === "--") continue;
703
+ // `cd -` jumps to $OLDPWD; `cd ~…` is home-relative — neither can be
704
+ // resolved against the working directory.
705
+ if (text === "-" || text.startsWith("~")) return undefined;
706
+ return text;
707
+ }
708
+ return undefined;
709
+ }
710
+
711
+ /**
712
+ * Compute the effective base directory for resolving relative path candidates.
713
+ *
714
+ * When the leading `cd` target stays within the working directory, subsequent
715
+ * relative paths should be resolved against it. An escaping target is itself
716
+ * an external access (reported via its own candidate token) and must never
717
+ * silence checks on subsequent paths, so the function falls back to `cwd`.
718
+ */
719
+ function computeEffectiveResolveBase(
720
+ cdTarget: string | undefined,
721
+ cwd: string,
722
+ ): string {
723
+ if (cdTarget === undefined) return cwd;
724
+ const resolved = resolve(cwd, cdTarget);
725
+ const normalizedCwd = resolve(cwd);
726
+ return isPathWithinDirectory(resolved, normalizedCwd) ? resolved : cwd;
727
+ }