@pugi/cli 0.1.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +172 -0
  3. package/bin/run.js +2 -0
  4. package/dist/commands/jobs.js +245 -0
  5. package/dist/core/agents/loader.js +104 -0
  6. package/dist/core/agents/registry.js +69 -0
  7. package/dist/core/auto-open-browser.js +128 -0
  8. package/dist/core/bash-classifier.js +1001 -0
  9. package/dist/core/clipboard.js +70 -0
  10. package/dist/core/context/builder.js +114 -0
  11. package/dist/core/context/compaction-events.js +99 -0
  12. package/dist/core/context/compaction.js +602 -0
  13. package/dist/core/context/invariants.js +250 -0
  14. package/dist/core/context/markdown-loader.js +270 -0
  15. package/dist/core/credentials.js +355 -0
  16. package/dist/core/engine/adapter-runner.js +8 -0
  17. package/dist/core/engine/anvil-client.js +156 -0
  18. package/dist/core/engine/compaction-hook.js +154 -0
  19. package/dist/core/engine/index.js +12 -0
  20. package/dist/core/engine/native-pugi.js +369 -0
  21. package/dist/core/engine/noop.js +27 -0
  22. package/dist/core/engine/prompts.js +118 -0
  23. package/dist/core/engine/tool-bridge.js +313 -0
  24. package/dist/core/file-cache.js +29 -0
  25. package/dist/core/hooks.js +415 -0
  26. package/dist/core/index-store.js +260 -0
  27. package/dist/core/jobs/registry.js +462 -0
  28. package/dist/core/mcp/client.js +316 -0
  29. package/dist/core/mcp/registry.js +171 -0
  30. package/dist/core/mcp/trust.js +91 -0
  31. package/dist/core/path-security.js +63 -0
  32. package/dist/core/permission.js +309 -0
  33. package/dist/core/repl/cap-warning.js +91 -0
  34. package/dist/core/repl/clipboard-read.js +174 -0
  35. package/dist/core/repl/history-search.js +175 -0
  36. package/dist/core/repl/history.js +172 -0
  37. package/dist/core/repl/kill-ring.js +138 -0
  38. package/dist/core/repl/session.js +618 -0
  39. package/dist/core/repl/slash-commands.js +227 -0
  40. package/dist/core/repl/workspace-context.js +113 -0
  41. package/dist/core/session.js +258 -0
  42. package/dist/core/settings.js +59 -0
  43. package/dist/core/skills/loader.js +454 -0
  44. package/dist/core/skills/sources.js +480 -0
  45. package/dist/core/skills/trust.js +172 -0
  46. package/dist/core/subagents/dispatcher.js +258 -0
  47. package/dist/core/subagents/index.js +26 -0
  48. package/dist/core/subagents/spawn.js +86 -0
  49. package/dist/core/trust.js +109 -0
  50. package/dist/index.js +8 -0
  51. package/dist/runtime/cli.js +3405 -0
  52. package/dist/runtime/commands/agents.js +385 -0
  53. package/dist/runtime/commands/budget.js +192 -0
  54. package/dist/runtime/commands/config.js +231 -0
  55. package/dist/runtime/commands/privacy.js +107 -0
  56. package/dist/runtime/commands/skills.js +401 -0
  57. package/dist/runtime/commands/undo.js +329 -0
  58. package/dist/runtime/update-check.js +294 -0
  59. package/dist/tools/bash.js +660 -0
  60. package/dist/tools/file-tools.js +346 -0
  61. package/dist/tools/registry.js +25 -0
  62. package/dist/tools/web-fetch.js +535 -0
  63. package/dist/tui/agent-tree.js +66 -0
  64. package/dist/tui/conversation-pane.js +45 -0
  65. package/dist/tui/device-flow.js +142 -0
  66. package/dist/tui/input-box.js +474 -0
  67. package/dist/tui/login-picker.js +69 -0
  68. package/dist/tui/render.js +125 -0
  69. package/dist/tui/repl-render.js +240 -0
  70. package/dist/tui/repl-splash-art.js +64 -0
  71. package/dist/tui/repl-splash.js +111 -0
  72. package/dist/tui/repl.js +214 -0
  73. package/dist/tui/slash-palette.js +106 -0
  74. package/dist/tui/splash-data.js +61 -0
  75. package/dist/tui/splash.js +31 -0
  76. package/dist/tui/status-bar.js +71 -0
  77. package/dist/tui/update-banner.js +8 -0
  78. package/dist/tui/workspace-context.js +105 -0
  79. package/package.json +71 -0
@@ -0,0 +1,1001 @@
1
+ /**
2
+ * Bash command classifier — Sprint α5.2 (ADR-0056 PR-PUGI-CLI-M1-GAP-B).
3
+ *
4
+ * Splits a shell command into a 7-class taxonomy so the permission
5
+ * engine can apply class-aware policy instead of the prior bool gate
6
+ * (`destructiveBashPatterns ? deny : ask`).
7
+ *
8
+ * Design notes:
9
+ * - The classifier is a conservative pattern matcher, not a full
10
+ * bash AST parser. M2 will replace it with a real parser (see
11
+ * bash-security.md §4). For M1 the rules are explicit-substring +
12
+ * simple tokenization, which is good enough to gate every command
13
+ * the engine loop currently emits.
14
+ * - Compound commands (`a && b`, `a || b`, `a ; b`, `a | b`) are
15
+ * split on the four separators and every component is classified
16
+ * individually. The overall class is the most dangerous component.
17
+ * - The `destructive` patterns originally lived in
18
+ * `permission.ts::destructiveBashPatterns`. They are now the
19
+ * single source of truth here; `permission.ts` re-exports the
20
+ * hard-deny check through `classifyBash`.
21
+ * - The `unknown` class fires on parse failure (`eval`, deep
22
+ * `$(...)` nesting, `curl | sh` install pipes) so the permission
23
+ * engine can fail closed in interactive modes.
24
+ */
25
+ /**
26
+ * Class rank for worst-component reduction in compound commands.
27
+ *
28
+ * `unknown` ranks ABOVE `read` and `build_test` so that a chain like
29
+ * `pwd && bash ./payload.sh` does not silently disarm the fail-closed
30
+ * unknown gate when the worst-component loop reduces over the
31
+ * components. The matrix in `permission.ts` treats `unknown` as
32
+ * `deny` in `plan`/`dontAsk` and `ask` everywhere else; this rank
33
+ * placement preserves that fail-closed posture for compounds while
34
+ * still letting genuine `write_workspace`, `network`, `write_protected`
35
+ * and `destructive` components win when they appear.
36
+ *
37
+ * Code Reviewer P0 retro 2026-05-24: previously `unknown: 0` meant
38
+ * `read` (rank 1) won over `unknown` (rank 0) in the worst-component
39
+ * reduction. That bypassed the file-level promise of fail-closed on
40
+ * parse failure.
41
+ */
42
+ const CLASS_RANK = {
43
+ destructive: 7,
44
+ write_protected: 6,
45
+ write_workspace: 5,
46
+ network: 4,
47
+ unknown: 3,
48
+ build_test: 2,
49
+ read: 1,
50
+ };
51
+ const DESTRUCTIVE_PATTERNS = [
52
+ // Filesystem wipe
53
+ { pattern: 'rm -rf /' },
54
+ { pattern: 'rm -rf ~' },
55
+ { pattern: 'rm -rf .' },
56
+ { pattern: 'rm -rf *' },
57
+ { pattern: 'rm -rf "/' },
58
+ { pattern: 'rm -rf "~' },
59
+ { pattern: 'rm -rf .git' },
60
+ { pattern: 'rm -r /' },
61
+ { pattern: 'rm -r ~' },
62
+ { pattern: 'rm -r .git' },
63
+ { pattern: 'sudo rm -rf' },
64
+ { pattern: 'sudo rm -r' },
65
+ { pattern: 'dd if=/dev/zero' },
66
+ { pattern: 'dd if=/dev/random' },
67
+ { pattern: 'dd of=/dev/' },
68
+ { pattern: 'mkfs' },
69
+ { pattern: 'shred ' },
70
+ { pattern: 'wipefs' },
71
+ { pattern: '> /dev/sda' },
72
+ { pattern: '> /dev/disk' },
73
+ // Permission wipe
74
+ { pattern: 'chmod 777 /' },
75
+ { pattern: 'chmod -R 777 /' },
76
+ { pattern: 'chmod -R 777 ~' },
77
+ { pattern: 'chown -R root /' },
78
+ { pattern: 'chown -R / ' },
79
+ // Shell tricks
80
+ { pattern: ':(){ :|:& };:' },
81
+ { pattern: 'eval "$' },
82
+ { pattern: "eval '$" },
83
+ // Git history loss
84
+ { pattern: 'git reset --hard' },
85
+ { pattern: 'git clean -fdx' },
86
+ { pattern: 'git push --force origin main' },
87
+ { pattern: 'git push -f origin main' },
88
+ { pattern: 'git push --force origin master' },
89
+ { pattern: 'git push -f origin master' },
90
+ { pattern: 'git push --force origin production' },
91
+ { pattern: 'git push -f origin production' },
92
+ // Container / infra
93
+ { pattern: 'docker system prune' },
94
+ { pattern: 'docker rm -f $(docker' },
95
+ { pattern: 'kubectl delete --all' },
96
+ { pattern: 'kubectl delete namespace' },
97
+ { pattern: 'terraform destroy' },
98
+ // SQL destructive (case-insensitive — model can emit any case).
99
+ { pattern: 'DROP DATABASE', caseInsensitive: true },
100
+ { pattern: 'DROP TABLE', caseInsensitive: true },
101
+ { pattern: 'TRUNCATE TABLE', caseInsensitive: true },
102
+ // Firewall / network
103
+ { pattern: 'ufw disable' },
104
+ { pattern: 'iptables -F' },
105
+ { pattern: 'iptables --flush' },
106
+ // Credential exfil
107
+ { pattern: 'cat ~/.ssh/id_rsa' },
108
+ { pattern: 'cat ~/.ssh/id_ed25519' },
109
+ { pattern: 'gpg --export-secret' },
110
+ // SSH config write paths (reads are OK; only redirections/tee block)
111
+ { pattern: '> sshd_config' },
112
+ { pattern: '>> sshd_config' },
113
+ { pattern: '> /etc/ssh/sshd_config' },
114
+ { pattern: '>> /etc/ssh/sshd_config' },
115
+ { pattern: 'tee sshd_config' },
116
+ { pattern: 'tee /etc/ssh/sshd_config' },
117
+ { pattern: 'tee -a sshd_config' },
118
+ { pattern: 'tee -a /etc/ssh/sshd_config' },
119
+ // History destruction
120
+ { pattern: 'history -c' },
121
+ { pattern: ' >/dev/null 2>&1; rm' },
122
+ ];
123
+ /**
124
+ * Compound separators. We split on `&&`, `||`, `;`, `|` to classify
125
+ * each component, then pick the most dangerous. `&` (background fork)
126
+ * is intentionally NOT a separator — backgrounding does not change
127
+ * what runs, only when.
128
+ */
129
+ const COMPOUND_SEPARATORS = /\s*(?:&&|\|\||;|\|)\s*/;
130
+ /**
131
+ * Split a shell command on compound separators (`&&`, `||`, `;`, `|`)
132
+ * while RESPECTING quoted strings (`'...'`, `"..."`, `` `...` ``) so
133
+ * that script bodies passed to `awk`, `sed`, `perl`, `python -c` are
134
+ * not mis-split when they contain bare `;` or `|` glyphs.
135
+ *
136
+ * Code Reviewer P0 retro 2026-05-24: a naive regex split on
137
+ * `awk 'BEGIN { for (i=0;i<5000;i++) ... }'` produces 3 components
138
+ * (the awk script header + two for-loop fragments) that get
139
+ * classified as `unknown` each and — with the unknown:3 rank above
140
+ * read:1 — escalate the overall verdict to `unknown`, breaking
141
+ * legitimate read-class scripts.
142
+ */
143
+ function splitCompoundRespectingQuotes(cmd) {
144
+ const out = [];
145
+ let buf = '';
146
+ let inSingle = false;
147
+ let inDouble = false;
148
+ let inBacktick = false;
149
+ for (let i = 0; i < cmd.length; i += 1) {
150
+ const ch = cmd[i];
151
+ const prev = i > 0 ? cmd[i - 1] : '';
152
+ if (ch === '\\') {
153
+ buf += ch;
154
+ if (i + 1 < cmd.length) {
155
+ buf += cmd[i + 1];
156
+ i += 1;
157
+ }
158
+ continue;
159
+ }
160
+ if (!inDouble && !inBacktick && ch === "'") {
161
+ inSingle = !inSingle;
162
+ buf += ch;
163
+ continue;
164
+ }
165
+ if (!inSingle && !inBacktick && ch === '"') {
166
+ inDouble = !inDouble;
167
+ buf += ch;
168
+ continue;
169
+ }
170
+ if (!inSingle && !inDouble && ch === '`') {
171
+ inBacktick = !inBacktick;
172
+ buf += ch;
173
+ continue;
174
+ }
175
+ if (!inSingle && !inDouble && !inBacktick) {
176
+ // `&&`
177
+ if (ch === '&' && cmd[i + 1] === '&') {
178
+ out.push(buf.trim());
179
+ buf = '';
180
+ i += 1;
181
+ continue;
182
+ }
183
+ // `||`
184
+ if (ch === '|' && cmd[i + 1] === '|') {
185
+ out.push(buf.trim());
186
+ buf = '';
187
+ i += 1;
188
+ continue;
189
+ }
190
+ // `;` (single semicolon, not part of `;;`)
191
+ if (ch === ';' && cmd[i + 1] !== ';' && prev !== ';') {
192
+ out.push(buf.trim());
193
+ buf = '';
194
+ continue;
195
+ }
196
+ // bare `|` (single pipe, not `||` and not `|&`)
197
+ if (ch === '|' && cmd[i + 1] !== '|' && cmd[i + 1] !== '&') {
198
+ out.push(buf.trim());
199
+ buf = '';
200
+ continue;
201
+ }
202
+ }
203
+ buf += ch;
204
+ }
205
+ const tail = buf.trim();
206
+ if (tail !== '')
207
+ out.push(tail);
208
+ return out.filter((s) => s !== '');
209
+ }
210
+ /** Network commands. */
211
+ const NETWORK_TOKENS = new Set([
212
+ 'curl',
213
+ 'wget',
214
+ 'ssh',
215
+ 'scp',
216
+ 'rsync',
217
+ 'nc',
218
+ 'netcat',
219
+ ]);
220
+ const NETWORK_PREFIXES = [
221
+ 'git clone',
222
+ 'git fetch',
223
+ 'git pull',
224
+ 'git push',
225
+ 'npm install',
226
+ 'npm i ',
227
+ 'npm ci',
228
+ 'pnpm install',
229
+ 'pnpm i ',
230
+ 'pnpm add',
231
+ 'yarn install',
232
+ 'yarn add',
233
+ 'pip install',
234
+ 'pip3 install',
235
+ 'brew install',
236
+ 'brew upgrade',
237
+ 'apt-get',
238
+ 'apt install',
239
+ 'yum install',
240
+ 'dnf install',
241
+ 'docker pull',
242
+ 'docker push',
243
+ 'cargo install',
244
+ 'go get',
245
+ 'go install',
246
+ ];
247
+ /**
248
+ * Build / test prefixes. These are common enough that the permission
249
+ * engine grants them auto in `acceptEdits`/`auto` modes (the rule of
250
+ * thumb is "ask first time, then allow rule" per bash-security.md §3).
251
+ *
252
+ * IMPORTANT: every prefix here must NOT also match a network installer
253
+ * (we handle `npm install` / `pnpm install` before this list).
254
+ */
255
+ const BUILD_TEST_PREFIXES = [
256
+ 'pnpm test',
257
+ 'pnpm build',
258
+ 'pnpm typecheck',
259
+ 'pnpm lint',
260
+ 'pnpm run test',
261
+ 'pnpm run build',
262
+ 'pnpm run typecheck',
263
+ 'pnpm run lint',
264
+ 'npm test',
265
+ 'npm run build',
266
+ 'npm run lint',
267
+ 'npm run typecheck',
268
+ 'npm run test',
269
+ 'yarn test',
270
+ 'yarn build',
271
+ 'yarn lint',
272
+ 'cargo test',
273
+ 'cargo build',
274
+ 'cargo check',
275
+ 'go test',
276
+ 'go build',
277
+ 'go vet',
278
+ 'pytest',
279
+ 'jest',
280
+ 'vitest',
281
+ 'make test',
282
+ 'make build',
283
+ 'make check',
284
+ 'mvn test',
285
+ 'mvn package',
286
+ 'gradle test',
287
+ 'gradle build',
288
+ 'tsc --noEmit',
289
+ 'tsc -p',
290
+ 'eslint',
291
+ 'prettier --check',
292
+ ];
293
+ /** Single-token read-only commands. Argument-free entries match exact. */
294
+ const READ_TOKENS = new Set([
295
+ 'pwd',
296
+ 'ls',
297
+ 'cat',
298
+ 'head',
299
+ 'tail',
300
+ 'wc',
301
+ 'which',
302
+ 'whereis',
303
+ 'file',
304
+ 'stat',
305
+ 'du',
306
+ 'df',
307
+ 'echo',
308
+ 'printenv',
309
+ 'env',
310
+ 'date',
311
+ 'uname',
312
+ 'hostname',
313
+ 'id',
314
+ 'whoami',
315
+ 'true',
316
+ 'false',
317
+ 'basename',
318
+ 'dirname',
319
+ 'realpath',
320
+ // `sleep` has no FS/network/proc impact beyond a timer; treated as
321
+ // read so background jobs can use it without tripping the unknown
322
+ // gate. Same logic for the no-op coreutils below.
323
+ 'sleep',
324
+ 'yes',
325
+ 'seq',
326
+ 'tr',
327
+ 'cut',
328
+ 'sort',
329
+ 'uniq',
330
+ ]);
331
+ const READ_PREFIXES = [
332
+ 'git status',
333
+ 'git log',
334
+ 'git diff',
335
+ 'git show',
336
+ 'git branch',
337
+ 'git remote',
338
+ 'git rev-parse',
339
+ 'git ls-files',
340
+ 'git config --get',
341
+ 'less ',
342
+ 'more ',
343
+ 'grep ',
344
+ 'rg ',
345
+ 'fd ',
346
+ 'tree',
347
+ ];
348
+ /** Write_workspace prefixes. Destination boundary is checked separately. */
349
+ const WRITE_WORKSPACE_PREFIXES = [
350
+ 'mkdir',
351
+ 'touch',
352
+ 'cp ',
353
+ 'mv ',
354
+ 'ln ',
355
+ 'git commit',
356
+ 'git add',
357
+ 'git checkout',
358
+ 'git switch',
359
+ 'git restore',
360
+ 'git stash',
361
+ 'git tag',
362
+ 'git rebase',
363
+ 'git merge',
364
+ ];
365
+ /**
366
+ * Protected-write triggers. If a command writes to any of these paths
367
+ * the class is `write_protected` regardless of the operation type.
368
+ *
369
+ * Wildcards are handled as substring matches (e.g. `/.ssh/` matches
370
+ * `~/.ssh/foo` and `/Users/x/.ssh/bar`).
371
+ */
372
+ const PROTECTED_PATH_SUBSTRINGS = [
373
+ '/.ssh/',
374
+ '/.aws/',
375
+ '/.gnupg/',
376
+ '/.config/',
377
+ '~/.ssh/',
378
+ '~/.aws/',
379
+ '~/.gnupg/',
380
+ '~/.config/',
381
+ '~/.npmrc',
382
+ '~/.pypirc',
383
+ '~/.bashrc',
384
+ '~/.zshrc',
385
+ '~/.profile',
386
+ '~/.bash_profile',
387
+ '/etc/',
388
+ '/usr/',
389
+ '/var/',
390
+ ];
391
+ /**
392
+ * Obfuscation triggers — any of these forces the `unknown` class so
393
+ * the permission engine can fail closed.
394
+ */
395
+ const OBFUSCATION_TRIGGERS = [
396
+ { needle: 'curl', reason: 'curl piped into shell installer' },
397
+ { needle: 'wget', reason: 'wget piped into shell installer' },
398
+ ];
399
+ /**
400
+ * Classify a single (non-compound) command component.
401
+ *
402
+ * Order of checks (most-specific first):
403
+ * 1. destructive substring (hard deny path)
404
+ * 2. obfuscation (curl|sh, deep $() nesting, raw eval)
405
+ * 3. cd-escape (covered by classifyBash for the overall command;
406
+ * single-component cd is handled here too)
407
+ * 4. protected-write (redirection or write op into a protected path)
408
+ * 5. write_workspace (mkdir/touch/cp/mv/git-write etc)
409
+ * 6. network (curl/wget/ssh/installers)
410
+ * 7. build_test (pnpm test, cargo build, ...)
411
+ * 8. read (pwd, ls, cat, ...)
412
+ * 9. unknown (default)
413
+ */
414
+ function classifyComponent(cmd, ctx) {
415
+ const trimmed = cmd.trim();
416
+ if (trimmed === '') {
417
+ return { class: 'unknown', reason: 'empty component', matched: '' };
418
+ }
419
+ // 1. Destructive hard-deny patterns.
420
+ const destructive = findDestructiveMatch(trimmed);
421
+ if (destructive) {
422
+ return {
423
+ class: 'destructive',
424
+ reason: `Destructive command pattern matched: ${destructive}`,
425
+ matched: destructive,
426
+ };
427
+ }
428
+ // 2. Obfuscation — curl|sh, wget|bash, deep $() nesting, raw eval.
429
+ const obfuscation = detectObfuscation(trimmed);
430
+ if (obfuscation) {
431
+ return {
432
+ class: 'unknown',
433
+ reason: obfuscation.reason,
434
+ matched: obfuscation.matched,
435
+ };
436
+ }
437
+ // 3. find with -delete / -exec is destructive-adjacent.
438
+ if (/\bfind\b[^|;]*\s-(?:delete|exec\b)/.test(trimmed)) {
439
+ return {
440
+ class: 'destructive',
441
+ reason: 'find with -delete or -exec is treated as destructive',
442
+ matched: 'find ... -delete|-exec',
443
+ };
444
+ }
445
+ // 4. Protected-write check (redirection OR write op into protected path).
446
+ const protectedWrite = detectProtectedWrite(trimmed, ctx);
447
+ if (protectedWrite) {
448
+ return {
449
+ class: 'write_protected',
450
+ reason: protectedWrite.reason,
451
+ matched: protectedWrite.matched,
452
+ };
453
+ }
454
+ // 4a. Protected-read check. Reads from credential / config paths
455
+ // (`cat ~/.aws/credentials`, `head ~/.npmrc`, `grep . ~/.ssh/id_ed25519`,
456
+ // `tail -f ~/.bash_history`) classify as `write_protected` so the
457
+ // permission matrix gates them in plan/dontAsk and asks elsewhere.
458
+ // The hard-coded DESTRUCTIVE entries for `cat ~/.ssh/id_rsa` /
459
+ // `cat ~/.ssh/id_ed25519` still win when matched (they run before
460
+ // this check).
461
+ //
462
+ // Code Reviewer P0 retro 2026-05-24: previously these reads fell
463
+ // through to READ_TOKENS and were allowed in every mode.
464
+ const protectedRead = detectProtectedRead(trimmed);
465
+ if (protectedRead) {
466
+ return {
467
+ class: 'write_protected',
468
+ reason: protectedRead.reason,
469
+ matched: protectedRead.matched,
470
+ };
471
+ }
472
+ // 4b. .env writes are always protected, even inside the workspace
473
+ // (CEO directive feedback_never_delete_untracked_env.md).
474
+ const envWrite = detectEnvWrite(trimmed);
475
+ if (envWrite) {
476
+ return {
477
+ class: 'write_protected',
478
+ reason: envWrite.reason,
479
+ matched: envWrite.matched,
480
+ };
481
+ }
482
+ // 5. Write_workspace ops (mkdir / touch / cp / mv / git commit / etc).
483
+ for (const prefix of WRITE_WORKSPACE_PREFIXES) {
484
+ if (trimmed.startsWith(prefix)) {
485
+ return {
486
+ class: 'write_workspace',
487
+ reason: `Workspace write op: ${prefix.trim()}`,
488
+ matched: prefix.trim(),
489
+ };
490
+ }
491
+ }
492
+ // 5b. Shell redirection (`>`/`>>`) without a protected target →
493
+ // workspace write. Pipes (`|`) are not redirections. The regex
494
+ // allows optional whitespace around `>` (catches `>file`, `> file`,
495
+ // `>>file`, `>> file`) and skips file-descriptor redirects
496
+ // (`>&1`, `2>&1`, `>&2`).
497
+ if (/(^|[^0-9&])>>?\s*[^&\s|;<>]/.test(trimmed) &&
498
+ !trimmed.includes('/dev/null')) {
499
+ return {
500
+ class: 'write_workspace',
501
+ reason: 'Shell redirection into a workspace target',
502
+ matched: '>',
503
+ };
504
+ }
505
+ // 6. Network commands.
506
+ const network = detectNetwork(trimmed);
507
+ if (network) {
508
+ return {
509
+ class: 'network',
510
+ reason: network.reason,
511
+ matched: network.matched,
512
+ };
513
+ }
514
+ // 7. Build/test runners.
515
+ for (const prefix of BUILD_TEST_PREFIXES) {
516
+ if (trimmed === prefix || trimmed.startsWith(`${prefix} `)) {
517
+ return {
518
+ class: 'build_test',
519
+ reason: `Build/test runner: ${prefix}`,
520
+ matched: prefix,
521
+ };
522
+ }
523
+ }
524
+ // 7b. Bare `make` (no subcommand) is build-class.
525
+ if (trimmed === 'make' || trimmed.startsWith('make ')) {
526
+ return { class: 'build_test', reason: 'make runner', matched: 'make' };
527
+ }
528
+ // 7c. Bare `cd <path>` (inside workspace — the cwd-escape detector
529
+ // upgrades the class to write_protected when the target is
530
+ // outside). Standalone `cd` (HOME) is escape, also handled by the
531
+ // cwd-escape detector.
532
+ if (/^cd(\s+\S+)?\s*$/.test(trimmed)) {
533
+ return { class: 'read', reason: 'cd inside workspace', matched: 'cd' };
534
+ }
535
+ // 8. Read-only commands.
536
+ const firstToken = trimmed.split(/\s+/)[0] ?? '';
537
+ if (READ_TOKENS.has(firstToken)) {
538
+ // `sed` and `awk` are allowed only without `>` (already gated by
539
+ // step 5b above) — they fall through to here when they are pure
540
+ // reads. We list them explicitly for clarity even though set
541
+ // membership is the source of truth.
542
+ return { class: 'read', reason: `Read-only command: ${firstToken}`, matched: firstToken };
543
+ }
544
+ for (const prefix of READ_PREFIXES) {
545
+ if (trimmed === prefix.trim() || trimmed.startsWith(prefix)) {
546
+ return {
547
+ class: 'read',
548
+ reason: `Read-only command: ${prefix.trim()}`,
549
+ matched: prefix.trim(),
550
+ };
551
+ }
552
+ }
553
+ // sed/awk: read-only when no `>` redirect (the redirect branch above
554
+ // catches the write case).
555
+ if (firstToken === 'sed' || firstToken === 'awk') {
556
+ return { class: 'read', reason: `Stream editor as read: ${firstToken}`, matched: firstToken };
557
+ }
558
+ // `find` without -delete / -exec is a read.
559
+ if (firstToken === 'find') {
560
+ return { class: 'read', reason: 'find (no -delete/-exec)', matched: 'find' };
561
+ }
562
+ // 9. Default: unknown.
563
+ return {
564
+ class: 'unknown',
565
+ reason: `Unrecognized command: ${firstToken || trimmed}`,
566
+ matched: firstToken || trimmed,
567
+ };
568
+ }
569
+ function findDestructiveMatch(cmd) {
570
+ const upper = cmd.toUpperCase();
571
+ for (const { pattern, caseInsensitive } of DESTRUCTIVE_PATTERNS) {
572
+ if (caseInsensitive) {
573
+ if (upper.includes(pattern))
574
+ return pattern;
575
+ }
576
+ else if (cmd.includes(pattern)) {
577
+ return pattern;
578
+ }
579
+ }
580
+ return null;
581
+ }
582
+ function detectObfuscation(cmd) {
583
+ // Raw `eval` with shell expansion. (`eval "$VAR"` is already in
584
+ // DESTRUCTIVE_PATTERNS — this catches the more general case of
585
+ // `eval $(...)`, `eval `...``, etc.)
586
+ if (/(^|\s)eval\s+[`$"']/.test(cmd)) {
587
+ return { reason: 'eval with shell expansion is treated as unknown', matched: 'eval' };
588
+ }
589
+ // `bash -c '...base64-decoded...'` — base64-decoded payloads are
590
+ // a common obfuscation. We trigger on the substring `base64 -d`
591
+ // anywhere in the command.
592
+ if (/\bbase64\s+-d\b/.test(cmd) || /\bbase64\s+--decode\b/.test(cmd)) {
593
+ return { reason: 'base64 decode pipeline is treated as unknown', matched: 'base64 -d' };
594
+ }
595
+ // Deep nested `$(...)` — more than 3 levels of nesting is treated
596
+ // as obfuscation.
597
+ if (nestingDepth(cmd, '$(', ')') > 3) {
598
+ return { reason: 'deeply nested command substitution', matched: '$(...)' };
599
+ }
600
+ if (nestingDepth(cmd, '`', '`') > 3) {
601
+ return { reason: 'deeply nested backtick substitution', matched: '`...`' };
602
+ }
603
+ // `curl ... | sh`, `wget ... | bash` — remote installer pipe.
604
+ // We require the entire command (including pipes) to contain both
605
+ // the network fetcher and the shell receiver.
606
+ for (const trigger of OBFUSCATION_TRIGGERS) {
607
+ if (cmd.includes(trigger.needle) && /\|\s*(?:sh|bash|zsh|fish)\b/.test(cmd)) {
608
+ return {
609
+ reason: `${trigger.reason}: ${trigger.needle} | <shell>`,
610
+ matched: `${trigger.needle} | sh`,
611
+ };
612
+ }
613
+ }
614
+ return null;
615
+ }
616
+ function nestingDepth(cmd, open, close) {
617
+ if (open === close) {
618
+ // Backtick pair — count occurrences / 2 for matched pairs and
619
+ // approximate "depth" as `pairs > 3` triggers.
620
+ const count = (cmd.match(new RegExp(escapeRegex(open), 'g')) ?? []).length;
621
+ return Math.floor(count / 2);
622
+ }
623
+ let depth = 0;
624
+ let max = 0;
625
+ for (let i = 0; i < cmd.length; i += 1) {
626
+ if (cmd.startsWith(open, i)) {
627
+ depth += 1;
628
+ max = Math.max(max, depth);
629
+ i += open.length - 1;
630
+ }
631
+ else if (cmd.startsWith(close, i) && depth > 0) {
632
+ depth -= 1;
633
+ }
634
+ }
635
+ return max;
636
+ }
637
+ function escapeRegex(s) {
638
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
639
+ }
640
+ function detectProtectedWrite(cmd, ctx) {
641
+ // Surface every write target this command produces so we can both
642
+ // protected-path-check and outside-workspace-check them uniformly.
643
+ // Captures `sort -o`, `uniq <in> <out>`, `sed -i` files, `awk '... > "file"'`,
644
+ // and `>` / `>>` redirections without surrounding whitespace.
645
+ const writeTargets = extractWriteTargets(cmd);
646
+ for (const needle of PROTECTED_PATH_SUBSTRINGS) {
647
+ if (!cmd.includes(needle))
648
+ continue;
649
+ // Reading from a protected path is allowed at the classifier
650
+ // layer (the permission engine still gates `read`); writing is
651
+ // the trigger here. We say it is a write if any of: `>`, `>>`,
652
+ // `tee`, `cp`, `mv`, `mkdir`, `touch`, `chmod`, `chown`,
653
+ // `ln`, `rm`, `rsync`, `scp` appears in the same component, or
654
+ // if any of the structured write targets (sort -o, uniq two-arg,
655
+ // sed -i, awk-redirect) was extracted above.
656
+ if (writeTargets.length > 0 ||
657
+ /(^|\s)>>?\s*\S/.test(cmd) ||
658
+ /\btee\b/.test(cmd) ||
659
+ /\bcp\b/.test(cmd) ||
660
+ /\bmv\b/.test(cmd) ||
661
+ /\bmkdir\b/.test(cmd) ||
662
+ /\btouch\b/.test(cmd) ||
663
+ /\bchmod\b/.test(cmd) ||
664
+ /\bchown\b/.test(cmd) ||
665
+ /\bln\b/.test(cmd) ||
666
+ /\brm\b/.test(cmd) ||
667
+ /\brsync\b/.test(cmd) ||
668
+ /\bscp\b/.test(cmd)) {
669
+ return {
670
+ reason: `Write into protected path: ${needle}`,
671
+ matched: needle,
672
+ };
673
+ }
674
+ }
675
+ // Per-target protected-path / outside-workspace check. Catches both
676
+ // `sort -o ~/.ssh/config` and `echo x > /tmp/other` even when the
677
+ // target was not a substring of the raw command (e.g. quoted paths).
678
+ for (const target of writeTargets) {
679
+ for (const needle of PROTECTED_PATH_SUBSTRINGS) {
680
+ if (target.includes(needle)) {
681
+ return {
682
+ reason: `Write into protected path: ${needle}`,
683
+ matched: target,
684
+ };
685
+ }
686
+ }
687
+ if (looksAbsoluteOutsideWorkspace(target, ctx)) {
688
+ return {
689
+ reason: `Write target outside workspace: ${target}`,
690
+ matched: target,
691
+ };
692
+ }
693
+ }
694
+ return null;
695
+ }
696
+ /**
697
+ * Extract every write-target path the command produces. Covers:
698
+ * - shell redirection `> file`, `>> file` (with optional whitespace,
699
+ * skipping `>&1`, `>&2`, etc.)
700
+ * - `sort -o file`
701
+ * - `uniq <input> <output>` (the two-arg form)
702
+ * - `sed -i <file>...` (in-place edit treats every trailing file as a
703
+ * write target)
704
+ * - `awk '... > "file"'` (quoted redirection inside an awk script)
705
+ *
706
+ * Conservative — we do not try to resolve shell vars or globs; the
707
+ * caller still gates absolute paths via `looksAbsoluteOutsideWorkspace`.
708
+ */
709
+ function extractWriteTargets(cmd) {
710
+ const targets = [];
711
+ // Shell redirection (`>`, `>>`) with optional whitespace. Skip
712
+ // file-descriptor redirects (`>&1`, `>&2`, `2>&1`).
713
+ const redirRegex = /(^|[^0-9&])>>?\s*([^\s|;&<>]+)/g;
714
+ let match;
715
+ while ((match = redirRegex.exec(cmd)) !== null) {
716
+ const candidate = match[2];
717
+ if (!candidate || candidate.startsWith('&'))
718
+ continue;
719
+ if (candidate === '/dev/null')
720
+ continue;
721
+ targets.push(stripQuotes(candidate));
722
+ }
723
+ // sort -o <file>
724
+ const sortMatch = cmd.match(/\bsort\b[^|;]*\s-o\s+(\S+)/);
725
+ if (sortMatch && sortMatch[1]) {
726
+ targets.push(stripQuotes(sortMatch[1]));
727
+ }
728
+ // uniq <input> <output> (two-arg form). Three-arg uniq does not exist
729
+ // in POSIX, so the second non-flag arg is the output file.
730
+ const uniqMatch = cmd.match(/\buniq\b(?:\s+-[^\s]+)*\s+(\S+)\s+(\S+)/);
731
+ if (uniqMatch && uniqMatch[2] && !uniqMatch[2].startsWith('-')) {
732
+ targets.push(stripQuotes(uniqMatch[2]));
733
+ }
734
+ // sed -i [SUFFIX] <expr> <file>... — every trailing positional is a
735
+ // write target. We capture the tail after `-i` and treat each
736
+ // whitespace-delimited token that is not a flag as a target.
737
+ const sedMatch = cmd.match(/\bsed\b[^|;]*\s-i\b([^|;]*)/);
738
+ if (sedMatch && sedMatch[1]) {
739
+ const tail = sedMatch[1].trim().split(/\s+/);
740
+ for (const token of tail) {
741
+ if (token === '' || token.startsWith('-'))
742
+ continue;
743
+ if (token.startsWith("'") || token.startsWith('"'))
744
+ continue;
745
+ targets.push(stripQuotes(token));
746
+ }
747
+ }
748
+ // awk '... > "file"' or awk "... > \"file\""
749
+ const awkQuoteRegex = /\bawk\b[^|;]*?['"][^'"]*?>\s*['"]([^'"]+)['"]/g;
750
+ while ((match = awkQuoteRegex.exec(cmd)) !== null) {
751
+ if (match[1])
752
+ targets.push(stripQuotes(match[1]));
753
+ }
754
+ return targets;
755
+ }
756
+ function stripQuotes(s) {
757
+ return s.replace(/^["']|["']$/g, '');
758
+ }
759
+ /**
760
+ * Detect READ operations targeted at protected paths. Runs after
761
+ * `detectProtectedWrite` and before the READ_TOKENS fallback. The leading
762
+ * token must be in READ_TOKENS so that we do not double-flag writes
763
+ * (which `detectProtectedWrite` already covers).
764
+ *
765
+ * Re-uses the `write_protected` class to keep the permission matrix
766
+ * simple — the matrix already gates that class as deny in plan/dontAsk
767
+ * and ask elsewhere, which is the intended fail-closed posture for
768
+ * unrestricted credential reads.
769
+ */
770
+ const READ_PREFIX_TOKENS = new Set(['grep', 'rg', 'less', 'more', 'fd', 'tree']);
771
+ function detectProtectedRead(cmd) {
772
+ const firstToken = cmd.split(/\s+/)[0] ?? '';
773
+ const isReadTool = READ_TOKENS.has(firstToken) ||
774
+ READ_PREFIX_TOKENS.has(firstToken) ||
775
+ firstToken === 'sed' ||
776
+ firstToken === 'awk' ||
777
+ firstToken === 'find';
778
+ if (!isReadTool)
779
+ return null;
780
+ for (const needle of PROTECTED_PATH_SUBSTRINGS) {
781
+ if (cmd.includes(needle)) {
782
+ return {
783
+ reason: `Read from protected path: ${needle}`,
784
+ matched: needle,
785
+ };
786
+ }
787
+ }
788
+ return null;
789
+ }
790
+ function detectEnvWrite(cmd) {
791
+ // .env / .env.<suffix> writes — match `\.env\b` adjacent to a write
792
+ // op (redirection, tee, cp, mv into) or `rm`. Reading .env in shell
793
+ // (`cat .env`) is gated by the permission engine, not classified
794
+ // here.
795
+ const envHit = /(^|\s|\/)\.env(\.[a-zA-Z0-9_-]+)?(\s|$|[>])/m.test(cmd);
796
+ if (!envHit)
797
+ return null;
798
+ if (/>>?\s/.test(cmd) ||
799
+ /\btee\b/.test(cmd) ||
800
+ /\bcp\b.*\.env/.test(cmd) ||
801
+ /\bmv\b.*\.env/.test(cmd) ||
802
+ /\brm\b.*\.env/.test(cmd)) {
803
+ return { reason: 'Write touches .env file', matched: '.env' };
804
+ }
805
+ return null;
806
+ }
807
+ function looksAbsoluteOutsideWorkspace(target, ctx) {
808
+ // Strip surrounding quotes and shell expansion artifacts.
809
+ const cleaned = target.replace(/^["']|["']$/g, '');
810
+ if (cleaned.startsWith('~')) {
811
+ // Home-relative path; treat as outside unless it explicitly
812
+ // re-enters an allowed dir (we are conservative — `~/foo` is
813
+ // outside).
814
+ return true;
815
+ }
816
+ if (!cleaned.startsWith('/')) {
817
+ // Workspace-relative path (or shell var); allowed at this layer.
818
+ // The path-security layer in `path-security.ts` already gates
819
+ // traversal escapes for the file tools; the bash classifier
820
+ // cannot resolve shell vars so we trust them.
821
+ return false;
822
+ }
823
+ const allowedRoots = [ctx.workspaceRoot, ...ctx.additionalDirectories].map((p) => p.endsWith('/') ? p : `${p}/`);
824
+ const cleanedWithSlash = cleaned.endsWith('/') ? cleaned : `${cleaned}/`;
825
+ return !allowedRoots.some((root) => cleanedWithSlash === root || cleanedWithSlash.startsWith(root));
826
+ }
827
+ function detectNetwork(cmd) {
828
+ for (const prefix of NETWORK_PREFIXES) {
829
+ if (cmd.startsWith(prefix) || cmd.includes(` ${prefix}`)) {
830
+ return { reason: `Network operation: ${prefix.trim()}`, matched: prefix.trim() };
831
+ }
832
+ }
833
+ const firstToken = cmd.split(/\s+/)[0] ?? '';
834
+ if (NETWORK_TOKENS.has(firstToken)) {
835
+ return { reason: `Network tool: ${firstToken}`, matched: firstToken };
836
+ }
837
+ return null;
838
+ }
839
+ /**
840
+ * Normalize a POSIX-ish path without resolving symlinks. Used by the
841
+ * `cd ... && rest` boundary check so we can decide whether the
842
+ * destination is inside `workspaceRoot ∪ additionalDirectories`
843
+ * before we ever spawn /bin/sh.
844
+ */
845
+ function normalizePosix(input, baseDir) {
846
+ const cleaned = input.replace(/^["']|["']$/g, '');
847
+ const expanded = cleaned.startsWith('~') ? cleaned : cleaned;
848
+ // ~ expansion is intentionally not done (it would force HOME read);
849
+ // a `cd ~/...` is treated as outside-workspace by default since the
850
+ // home directory is generally outside the workspace.
851
+ const isAbsolute = expanded.startsWith('/') || expanded.startsWith('~');
852
+ const start = isAbsolute ? expanded : `${baseDir.replace(/\/$/, '')}/${expanded}`;
853
+ const parts = start.split('/');
854
+ const stack = [];
855
+ for (const part of parts) {
856
+ if (part === '' || part === '.')
857
+ continue;
858
+ if (part === '..') {
859
+ if (stack.length > 0)
860
+ stack.pop();
861
+ continue;
862
+ }
863
+ stack.push(part);
864
+ }
865
+ return `/${stack.join('/')}`;
866
+ }
867
+ function isInsideAllowedRoot(absPath, ctx) {
868
+ const allowedRoots = [ctx.workspaceRoot, ...ctx.additionalDirectories];
869
+ for (const rootRaw of allowedRoots) {
870
+ const root = rootRaw.endsWith('/') ? rootRaw.slice(0, -1) : rootRaw;
871
+ if (absPath === root || absPath.startsWith(`${root}/`))
872
+ return true;
873
+ }
874
+ return false;
875
+ }
876
+ /**
877
+ * Detect `cd <path>` at the head of a component and decide whether
878
+ * the destination escapes the workspace boundary. Returns a
879
+ * classification when an escape is detected; otherwise null.
880
+ *
881
+ * Per spec: a command of shape `cd <path> && <rest>` should classify
882
+ * the cwd target — if `<path>` resolves outside the workspace, the
883
+ * overall class becomes `write_protected` regardless of `<rest>`.
884
+ *
885
+ * We treat `cd -` (last-dir) and `cd` (HOME) as escapes since the
886
+ * resulting cwd is not under our control.
887
+ */
888
+ function detectCwdEscape(components, ctx) {
889
+ if (components.length === 0)
890
+ return null;
891
+ // Walk every component, threading a simulated cwd through the chain.
892
+ // Pure `cd <path>` components update the simulated cwd; anything
893
+ // else leaves it untouched. If any hop lands outside
894
+ // `workspaceRoot ∪ additionalDirectories`, the overall classification
895
+ // upgrades to write_protected. Subshells (`(cd foo && rest)`) are
896
+ // out of scope for M1 — the parent cwd is unaffected there, and the
897
+ // classifier already treats parentheses as part of the component.
898
+ let cwd = ctx.workspaceRoot;
899
+ for (let i = 0; i < components.length; i += 1) {
900
+ const trimmed = components[i]?.trim() ?? '';
901
+ const cdMatch = trimmed.match(/^cd(?:\s+(\S+))?\s*$/);
902
+ if (!cdMatch)
903
+ continue;
904
+ const target = cdMatch[1];
905
+ if (target === undefined || target === '-' || target === '~') {
906
+ return {
907
+ class: 'write_protected',
908
+ reason: `cd chain escapes workspace boundary at hop ${i + 1}`,
909
+ matched: `cd${target ? ` ${target}` : ''}`,
910
+ };
911
+ }
912
+ cwd = normalizePosix(target, cwd);
913
+ if (!isInsideAllowedRoot(cwd, ctx)) {
914
+ return {
915
+ class: 'write_protected',
916
+ reason: `cd chain escapes workspace boundary at hop ${i + 1}`,
917
+ matched: `cd ${target}`,
918
+ };
919
+ }
920
+ }
921
+ return null;
922
+ }
923
+ export function classifyBash(cmd, ctx) {
924
+ const normalized = cmd.trim();
925
+ if (normalized === '') {
926
+ return { class: 'unknown', reason: 'empty command', matched: '' };
927
+ }
928
+ // Full-command destructive check first. Patterns like the fork bomb
929
+ // (`:(){ :|:& };:`) and SQL-in-pipe (`echo X | mysql -e 'DROP ...'`)
930
+ // would otherwise split on `|` or `;` and the components would each
931
+ // look benign. We catch them by matching the destructive substrings
932
+ // against the raw command before splitting.
933
+ const fullDestructive = findDestructiveMatch(normalized);
934
+ if (fullDestructive) {
935
+ return {
936
+ class: 'destructive',
937
+ reason: `Destructive command pattern matched: ${fullDestructive}`,
938
+ matched: fullDestructive,
939
+ };
940
+ }
941
+ // Full-command obfuscation check. `curl ... | sh` splits into two
942
+ // benign-looking components (`curl ...` is network; `sh` is unknown)
943
+ // but the pipeline together is the remote-installer pattern we want
944
+ // to flag as unknown so the engine can fail closed.
945
+ const fullObfuscation = detectObfuscation(normalized);
946
+ if (fullObfuscation) {
947
+ return {
948
+ class: 'unknown',
949
+ reason: fullObfuscation.reason,
950
+ matched: fullObfuscation.matched,
951
+ };
952
+ }
953
+ // Compound-command split. We classify each component, then pick
954
+ // the most dangerous one as the overall class.
955
+ const components = splitCompoundRespectingQuotes(normalized);
956
+ // Cwd-escape check runs over the raw component list so the `cd`
957
+ // verdict trumps the `<rest>` classification even when `<rest>` is
958
+ // a benign `ls`.
959
+ const cwdEscape = detectCwdEscape(components, ctx);
960
+ const classified = components.map((c) => classifyComponent(c, ctx));
961
+ // Pick the worst component.
962
+ let worst;
963
+ for (const candidate of classified) {
964
+ if (!worst || CLASS_RANK[candidate.class] > CLASS_RANK[worst.class]) {
965
+ worst = candidate;
966
+ }
967
+ }
968
+ if (!worst) {
969
+ return {
970
+ class: 'unknown',
971
+ reason: 'no recognizable component',
972
+ matched: normalized,
973
+ };
974
+ }
975
+ // Cwd escape upgrades the class to at least `write_protected`. A
976
+ // destructive component still wins (the user might be trying to
977
+ // wipe a protected path AND escape cwd — we report the worse one).
978
+ if (cwdEscape && CLASS_RANK[cwdEscape.class] > CLASS_RANK[worst.class]) {
979
+ return {
980
+ class: cwdEscape.class,
981
+ reason: cwdEscape.reason,
982
+ matched: cwdEscape.matched,
983
+ components: classified,
984
+ };
985
+ }
986
+ if (classified.length === 1) {
987
+ return worst;
988
+ }
989
+ return { ...worst, components: classified };
990
+ }
991
+ /**
992
+ * Re-exported destructive-pattern source. The permission engine used
993
+ * to maintain its own `destructiveBashPatterns` array; that array
994
+ * now lives here as the single source of truth. Callers that need to
995
+ * audit the list (e.g. doctor output) read this export instead of
996
+ * duplicating the regex set.
997
+ */
998
+ export function listDestructivePatterns() {
999
+ return DESTRUCTIVE_PATTERNS.map((p) => p.pattern);
1000
+ }
1001
+ //# sourceMappingURL=bash-classifier.js.map