@pugi/cli 0.1.0-alpha.3 → 0.1.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/dist/commands/jobs.js +245 -0
- package/dist/core/agents/registry.js +69 -0
- package/dist/core/bash-classifier.js +1001 -0
- package/dist/core/context/builder.js +114 -0
- package/dist/core/context/compaction-events.js +99 -0
- package/dist/core/context/compaction.js +602 -0
- package/dist/core/context/invariants.js +250 -0
- package/dist/core/context/markdown-loader.js +270 -0
- package/dist/core/engine/compaction-hook.js +154 -0
- package/dist/core/engine/index.js +5 -0
- package/dist/core/engine/prompts.js +42 -0
- package/dist/core/engine/tool-bridge.js +159 -61
- package/dist/core/hooks.js +415 -0
- package/dist/core/jobs/registry.js +462 -0
- package/dist/core/mcp/client.js +316 -0
- package/dist/core/mcp/registry.js +171 -0
- package/dist/core/mcp/trust.js +91 -0
- package/dist/core/permission.js +221 -116
- package/dist/core/repl/cap-warning.js +91 -0
- package/dist/core/repl/session.js +399 -0
- package/dist/core/repl/slash-commands.js +116 -0
- package/dist/core/session.js +168 -0
- package/dist/core/subagents/dispatcher.js +258 -0
- package/dist/core/subagents/index.js +26 -0
- package/dist/core/subagents/spawn.js +86 -0
- package/dist/core/trust.js +109 -0
- package/dist/runtime/cli.js +157 -45
- package/dist/runtime/commands/budget.js +192 -0
- package/dist/runtime/commands/config.js +231 -0
- package/dist/runtime/commands/privacy.js +107 -0
- package/dist/runtime/commands/undo.js +329 -0
- package/dist/tools/bash.js +660 -0
- package/dist/tui/agent-tree.js +66 -0
- package/dist/tui/conversation-pane.js +45 -0
- package/dist/tui/input-box.js +91 -0
- package/dist/tui/login-picker.js +69 -0
- package/dist/tui/render.js +68 -0
- package/dist/tui/repl-render.js +218 -0
- package/dist/tui/repl.js +152 -0
- package/dist/tui/splash-data.js +61 -0
- package/dist/tui/splash.js +31 -0
- package/dist/tui/status-bar.js +58 -0
- package/package.json +11 -5
|
@@ -0,0 +1,1001 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bash command classifier — Sprint α5.2 (ADR-0056 PR-PUGI-CLI-M1-GAP-B).
|
|
3
|
+
*
|
|
4
|
+
* Splits a shell command into a 7-class taxonomy so the permission
|
|
5
|
+
* engine can apply class-aware policy instead of the prior bool gate
|
|
6
|
+
* (`destructiveBashPatterns ? deny : ask`).
|
|
7
|
+
*
|
|
8
|
+
* Design notes:
|
|
9
|
+
* - The classifier is a conservative pattern matcher, not a full
|
|
10
|
+
* bash AST parser. M2 will replace it with a real parser (see
|
|
11
|
+
* bash-security.md §4). For M1 the rules are explicit-substring +
|
|
12
|
+
* simple tokenization, which is good enough to gate every command
|
|
13
|
+
* the engine loop currently emits.
|
|
14
|
+
* - Compound commands (`a && b`, `a || b`, `a ; b`, `a | b`) are
|
|
15
|
+
* split on the four separators and every component is classified
|
|
16
|
+
* individually. The overall class is the most dangerous component.
|
|
17
|
+
* - The `destructive` patterns originally lived in
|
|
18
|
+
* `permission.ts::destructiveBashPatterns`. They are now the
|
|
19
|
+
* single source of truth here; `permission.ts` re-exports the
|
|
20
|
+
* hard-deny check through `classifyBash`.
|
|
21
|
+
* - The `unknown` class fires on parse failure (`eval`, deep
|
|
22
|
+
* `$(...)` nesting, `curl | sh` install pipes) so the permission
|
|
23
|
+
* engine can fail closed in interactive modes.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Class rank for worst-component reduction in compound commands.
|
|
27
|
+
*
|
|
28
|
+
* `unknown` ranks ABOVE `read` and `build_test` so that a chain like
|
|
29
|
+
* `pwd && bash ./payload.sh` does not silently disarm the fail-closed
|
|
30
|
+
* unknown gate when the worst-component loop reduces over the
|
|
31
|
+
* components. The matrix in `permission.ts` treats `unknown` as
|
|
32
|
+
* `deny` in `plan`/`dontAsk` and `ask` everywhere else; this rank
|
|
33
|
+
* placement preserves that fail-closed posture for compounds while
|
|
34
|
+
* still letting genuine `write_workspace`, `network`, `write_protected`
|
|
35
|
+
* and `destructive` components win when they appear.
|
|
36
|
+
*
|
|
37
|
+
* Code Reviewer P0 retro 2026-05-24: previously `unknown: 0` meant
|
|
38
|
+
* `read` (rank 1) won over `unknown` (rank 0) in the worst-component
|
|
39
|
+
* reduction. That bypassed the file-level promise of fail-closed on
|
|
40
|
+
* parse failure.
|
|
41
|
+
*/
|
|
42
|
+
const CLASS_RANK = {
|
|
43
|
+
destructive: 7,
|
|
44
|
+
write_protected: 6,
|
|
45
|
+
write_workspace: 5,
|
|
46
|
+
network: 4,
|
|
47
|
+
unknown: 3,
|
|
48
|
+
build_test: 2,
|
|
49
|
+
read: 1,
|
|
50
|
+
};
|
|
51
|
+
const DESTRUCTIVE_PATTERNS = [
|
|
52
|
+
// Filesystem wipe
|
|
53
|
+
{ pattern: 'rm -rf /' },
|
|
54
|
+
{ pattern: 'rm -rf ~' },
|
|
55
|
+
{ pattern: 'rm -rf .' },
|
|
56
|
+
{ pattern: 'rm -rf *' },
|
|
57
|
+
{ pattern: 'rm -rf "/' },
|
|
58
|
+
{ pattern: 'rm -rf "~' },
|
|
59
|
+
{ pattern: 'rm -rf .git' },
|
|
60
|
+
{ pattern: 'rm -r /' },
|
|
61
|
+
{ pattern: 'rm -r ~' },
|
|
62
|
+
{ pattern: 'rm -r .git' },
|
|
63
|
+
{ pattern: 'sudo rm -rf' },
|
|
64
|
+
{ pattern: 'sudo rm -r' },
|
|
65
|
+
{ pattern: 'dd if=/dev/zero' },
|
|
66
|
+
{ pattern: 'dd if=/dev/random' },
|
|
67
|
+
{ pattern: 'dd of=/dev/' },
|
|
68
|
+
{ pattern: 'mkfs' },
|
|
69
|
+
{ pattern: 'shred ' },
|
|
70
|
+
{ pattern: 'wipefs' },
|
|
71
|
+
{ pattern: '> /dev/sda' },
|
|
72
|
+
{ pattern: '> /dev/disk' },
|
|
73
|
+
// Permission wipe
|
|
74
|
+
{ pattern: 'chmod 777 /' },
|
|
75
|
+
{ pattern: 'chmod -R 777 /' },
|
|
76
|
+
{ pattern: 'chmod -R 777 ~' },
|
|
77
|
+
{ pattern: 'chown -R root /' },
|
|
78
|
+
{ pattern: 'chown -R / ' },
|
|
79
|
+
// Shell tricks
|
|
80
|
+
{ pattern: ':(){ :|:& };:' },
|
|
81
|
+
{ pattern: 'eval "$' },
|
|
82
|
+
{ pattern: "eval '$" },
|
|
83
|
+
// Git history loss
|
|
84
|
+
{ pattern: 'git reset --hard' },
|
|
85
|
+
{ pattern: 'git clean -fdx' },
|
|
86
|
+
{ pattern: 'git push --force origin main' },
|
|
87
|
+
{ pattern: 'git push -f origin main' },
|
|
88
|
+
{ pattern: 'git push --force origin master' },
|
|
89
|
+
{ pattern: 'git push -f origin master' },
|
|
90
|
+
{ pattern: 'git push --force origin production' },
|
|
91
|
+
{ pattern: 'git push -f origin production' },
|
|
92
|
+
// Container / infra
|
|
93
|
+
{ pattern: 'docker system prune' },
|
|
94
|
+
{ pattern: 'docker rm -f $(docker' },
|
|
95
|
+
{ pattern: 'kubectl delete --all' },
|
|
96
|
+
{ pattern: 'kubectl delete namespace' },
|
|
97
|
+
{ pattern: 'terraform destroy' },
|
|
98
|
+
// SQL destructive (case-insensitive — model can emit any case).
|
|
99
|
+
{ pattern: 'DROP DATABASE', caseInsensitive: true },
|
|
100
|
+
{ pattern: 'DROP TABLE', caseInsensitive: true },
|
|
101
|
+
{ pattern: 'TRUNCATE TABLE', caseInsensitive: true },
|
|
102
|
+
// Firewall / network
|
|
103
|
+
{ pattern: 'ufw disable' },
|
|
104
|
+
{ pattern: 'iptables -F' },
|
|
105
|
+
{ pattern: 'iptables --flush' },
|
|
106
|
+
// Credential exfil
|
|
107
|
+
{ pattern: 'cat ~/.ssh/id_rsa' },
|
|
108
|
+
{ pattern: 'cat ~/.ssh/id_ed25519' },
|
|
109
|
+
{ pattern: 'gpg --export-secret' },
|
|
110
|
+
// SSH config write paths (reads are OK; only redirections/tee block)
|
|
111
|
+
{ pattern: '> sshd_config' },
|
|
112
|
+
{ pattern: '>> sshd_config' },
|
|
113
|
+
{ pattern: '> /etc/ssh/sshd_config' },
|
|
114
|
+
{ pattern: '>> /etc/ssh/sshd_config' },
|
|
115
|
+
{ pattern: 'tee sshd_config' },
|
|
116
|
+
{ pattern: 'tee /etc/ssh/sshd_config' },
|
|
117
|
+
{ pattern: 'tee -a sshd_config' },
|
|
118
|
+
{ pattern: 'tee -a /etc/ssh/sshd_config' },
|
|
119
|
+
// History destruction
|
|
120
|
+
{ pattern: 'history -c' },
|
|
121
|
+
{ pattern: ' >/dev/null 2>&1; rm' },
|
|
122
|
+
];
|
|
123
|
+
/**
|
|
124
|
+
* Compound separators. We split on `&&`, `||`, `;`, `|` to classify
|
|
125
|
+
* each component, then pick the most dangerous. `&` (background fork)
|
|
126
|
+
* is intentionally NOT a separator — backgrounding does not change
|
|
127
|
+
* what runs, only when.
|
|
128
|
+
*/
|
|
129
|
+
const COMPOUND_SEPARATORS = /\s*(?:&&|\|\||;|\|)\s*/;
|
|
130
|
+
/**
|
|
131
|
+
* Split a shell command on compound separators (`&&`, `||`, `;`, `|`)
|
|
132
|
+
* while RESPECTING quoted strings (`'...'`, `"..."`, `` `...` ``) so
|
|
133
|
+
* that script bodies passed to `awk`, `sed`, `perl`, `python -c` are
|
|
134
|
+
* not mis-split when they contain bare `;` or `|` glyphs.
|
|
135
|
+
*
|
|
136
|
+
* Code Reviewer P0 retro 2026-05-24: a naive regex split on
|
|
137
|
+
* `awk 'BEGIN { for (i=0;i<5000;i++) ... }'` produces 3 components
|
|
138
|
+
* (the awk script header + two for-loop fragments) that get
|
|
139
|
+
* classified as `unknown` each and — with the unknown:3 rank above
|
|
140
|
+
* read:1 — escalate the overall verdict to `unknown`, breaking
|
|
141
|
+
* legitimate read-class scripts.
|
|
142
|
+
*/
|
|
143
|
+
function splitCompoundRespectingQuotes(cmd) {
|
|
144
|
+
const out = [];
|
|
145
|
+
let buf = '';
|
|
146
|
+
let inSingle = false;
|
|
147
|
+
let inDouble = false;
|
|
148
|
+
let inBacktick = false;
|
|
149
|
+
for (let i = 0; i < cmd.length; i += 1) {
|
|
150
|
+
const ch = cmd[i];
|
|
151
|
+
const prev = i > 0 ? cmd[i - 1] : '';
|
|
152
|
+
if (ch === '\\') {
|
|
153
|
+
buf += ch;
|
|
154
|
+
if (i + 1 < cmd.length) {
|
|
155
|
+
buf += cmd[i + 1];
|
|
156
|
+
i += 1;
|
|
157
|
+
}
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
if (!inDouble && !inBacktick && ch === "'") {
|
|
161
|
+
inSingle = !inSingle;
|
|
162
|
+
buf += ch;
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
if (!inSingle && !inBacktick && ch === '"') {
|
|
166
|
+
inDouble = !inDouble;
|
|
167
|
+
buf += ch;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
if (!inSingle && !inDouble && ch === '`') {
|
|
171
|
+
inBacktick = !inBacktick;
|
|
172
|
+
buf += ch;
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
if (!inSingle && !inDouble && !inBacktick) {
|
|
176
|
+
// `&&`
|
|
177
|
+
if (ch === '&' && cmd[i + 1] === '&') {
|
|
178
|
+
out.push(buf.trim());
|
|
179
|
+
buf = '';
|
|
180
|
+
i += 1;
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
// `||`
|
|
184
|
+
if (ch === '|' && cmd[i + 1] === '|') {
|
|
185
|
+
out.push(buf.trim());
|
|
186
|
+
buf = '';
|
|
187
|
+
i += 1;
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
// `;` (single semicolon, not part of `;;`)
|
|
191
|
+
if (ch === ';' && cmd[i + 1] !== ';' && prev !== ';') {
|
|
192
|
+
out.push(buf.trim());
|
|
193
|
+
buf = '';
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
// bare `|` (single pipe, not `||` and not `|&`)
|
|
197
|
+
if (ch === '|' && cmd[i + 1] !== '|' && cmd[i + 1] !== '&') {
|
|
198
|
+
out.push(buf.trim());
|
|
199
|
+
buf = '';
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
buf += ch;
|
|
204
|
+
}
|
|
205
|
+
const tail = buf.trim();
|
|
206
|
+
if (tail !== '')
|
|
207
|
+
out.push(tail);
|
|
208
|
+
return out.filter((s) => s !== '');
|
|
209
|
+
}
|
|
210
|
+
/** Network commands. */
|
|
211
|
+
const NETWORK_TOKENS = new Set([
|
|
212
|
+
'curl',
|
|
213
|
+
'wget',
|
|
214
|
+
'ssh',
|
|
215
|
+
'scp',
|
|
216
|
+
'rsync',
|
|
217
|
+
'nc',
|
|
218
|
+
'netcat',
|
|
219
|
+
]);
|
|
220
|
+
const NETWORK_PREFIXES = [
|
|
221
|
+
'git clone',
|
|
222
|
+
'git fetch',
|
|
223
|
+
'git pull',
|
|
224
|
+
'git push',
|
|
225
|
+
'npm install',
|
|
226
|
+
'npm i ',
|
|
227
|
+
'npm ci',
|
|
228
|
+
'pnpm install',
|
|
229
|
+
'pnpm i ',
|
|
230
|
+
'pnpm add',
|
|
231
|
+
'yarn install',
|
|
232
|
+
'yarn add',
|
|
233
|
+
'pip install',
|
|
234
|
+
'pip3 install',
|
|
235
|
+
'brew install',
|
|
236
|
+
'brew upgrade',
|
|
237
|
+
'apt-get',
|
|
238
|
+
'apt install',
|
|
239
|
+
'yum install',
|
|
240
|
+
'dnf install',
|
|
241
|
+
'docker pull',
|
|
242
|
+
'docker push',
|
|
243
|
+
'cargo install',
|
|
244
|
+
'go get',
|
|
245
|
+
'go install',
|
|
246
|
+
];
|
|
247
|
+
/**
|
|
248
|
+
* Build / test prefixes. These are common enough that the permission
|
|
249
|
+
* engine grants them auto in `acceptEdits`/`auto` modes (the rule of
|
|
250
|
+
* thumb is "ask first time, then allow rule" per bash-security.md §3).
|
|
251
|
+
*
|
|
252
|
+
* IMPORTANT: every prefix here must NOT also match a network installer
|
|
253
|
+
* (we handle `npm install` / `pnpm install` before this list).
|
|
254
|
+
*/
|
|
255
|
+
const BUILD_TEST_PREFIXES = [
|
|
256
|
+
'pnpm test',
|
|
257
|
+
'pnpm build',
|
|
258
|
+
'pnpm typecheck',
|
|
259
|
+
'pnpm lint',
|
|
260
|
+
'pnpm run test',
|
|
261
|
+
'pnpm run build',
|
|
262
|
+
'pnpm run typecheck',
|
|
263
|
+
'pnpm run lint',
|
|
264
|
+
'npm test',
|
|
265
|
+
'npm run build',
|
|
266
|
+
'npm run lint',
|
|
267
|
+
'npm run typecheck',
|
|
268
|
+
'npm run test',
|
|
269
|
+
'yarn test',
|
|
270
|
+
'yarn build',
|
|
271
|
+
'yarn lint',
|
|
272
|
+
'cargo test',
|
|
273
|
+
'cargo build',
|
|
274
|
+
'cargo check',
|
|
275
|
+
'go test',
|
|
276
|
+
'go build',
|
|
277
|
+
'go vet',
|
|
278
|
+
'pytest',
|
|
279
|
+
'jest',
|
|
280
|
+
'vitest',
|
|
281
|
+
'make test',
|
|
282
|
+
'make build',
|
|
283
|
+
'make check',
|
|
284
|
+
'mvn test',
|
|
285
|
+
'mvn package',
|
|
286
|
+
'gradle test',
|
|
287
|
+
'gradle build',
|
|
288
|
+
'tsc --noEmit',
|
|
289
|
+
'tsc -p',
|
|
290
|
+
'eslint',
|
|
291
|
+
'prettier --check',
|
|
292
|
+
];
|
|
293
|
+
/** Single-token read-only commands. Argument-free entries match exact. */
|
|
294
|
+
const READ_TOKENS = new Set([
|
|
295
|
+
'pwd',
|
|
296
|
+
'ls',
|
|
297
|
+
'cat',
|
|
298
|
+
'head',
|
|
299
|
+
'tail',
|
|
300
|
+
'wc',
|
|
301
|
+
'which',
|
|
302
|
+
'whereis',
|
|
303
|
+
'file',
|
|
304
|
+
'stat',
|
|
305
|
+
'du',
|
|
306
|
+
'df',
|
|
307
|
+
'echo',
|
|
308
|
+
'printenv',
|
|
309
|
+
'env',
|
|
310
|
+
'date',
|
|
311
|
+
'uname',
|
|
312
|
+
'hostname',
|
|
313
|
+
'id',
|
|
314
|
+
'whoami',
|
|
315
|
+
'true',
|
|
316
|
+
'false',
|
|
317
|
+
'basename',
|
|
318
|
+
'dirname',
|
|
319
|
+
'realpath',
|
|
320
|
+
// `sleep` has no FS/network/proc impact beyond a timer; treated as
|
|
321
|
+
// read so background jobs can use it without tripping the unknown
|
|
322
|
+
// gate. Same logic for the no-op coreutils below.
|
|
323
|
+
'sleep',
|
|
324
|
+
'yes',
|
|
325
|
+
'seq',
|
|
326
|
+
'tr',
|
|
327
|
+
'cut',
|
|
328
|
+
'sort',
|
|
329
|
+
'uniq',
|
|
330
|
+
]);
|
|
331
|
+
const READ_PREFIXES = [
|
|
332
|
+
'git status',
|
|
333
|
+
'git log',
|
|
334
|
+
'git diff',
|
|
335
|
+
'git show',
|
|
336
|
+
'git branch',
|
|
337
|
+
'git remote',
|
|
338
|
+
'git rev-parse',
|
|
339
|
+
'git ls-files',
|
|
340
|
+
'git config --get',
|
|
341
|
+
'less ',
|
|
342
|
+
'more ',
|
|
343
|
+
'grep ',
|
|
344
|
+
'rg ',
|
|
345
|
+
'fd ',
|
|
346
|
+
'tree',
|
|
347
|
+
];
|
|
348
|
+
/** Write_workspace prefixes. Destination boundary is checked separately. */
|
|
349
|
+
const WRITE_WORKSPACE_PREFIXES = [
|
|
350
|
+
'mkdir',
|
|
351
|
+
'touch',
|
|
352
|
+
'cp ',
|
|
353
|
+
'mv ',
|
|
354
|
+
'ln ',
|
|
355
|
+
'git commit',
|
|
356
|
+
'git add',
|
|
357
|
+
'git checkout',
|
|
358
|
+
'git switch',
|
|
359
|
+
'git restore',
|
|
360
|
+
'git stash',
|
|
361
|
+
'git tag',
|
|
362
|
+
'git rebase',
|
|
363
|
+
'git merge',
|
|
364
|
+
];
|
|
365
|
+
/**
|
|
366
|
+
* Protected-write triggers. If a command writes to any of these paths
|
|
367
|
+
* the class is `write_protected` regardless of the operation type.
|
|
368
|
+
*
|
|
369
|
+
* Wildcards are handled as substring matches (e.g. `/.ssh/` matches
|
|
370
|
+
* `~/.ssh/foo` and `/Users/x/.ssh/bar`).
|
|
371
|
+
*/
|
|
372
|
+
const PROTECTED_PATH_SUBSTRINGS = [
|
|
373
|
+
'/.ssh/',
|
|
374
|
+
'/.aws/',
|
|
375
|
+
'/.gnupg/',
|
|
376
|
+
'/.config/',
|
|
377
|
+
'~/.ssh/',
|
|
378
|
+
'~/.aws/',
|
|
379
|
+
'~/.gnupg/',
|
|
380
|
+
'~/.config/',
|
|
381
|
+
'~/.npmrc',
|
|
382
|
+
'~/.pypirc',
|
|
383
|
+
'~/.bashrc',
|
|
384
|
+
'~/.zshrc',
|
|
385
|
+
'~/.profile',
|
|
386
|
+
'~/.bash_profile',
|
|
387
|
+
'/etc/',
|
|
388
|
+
'/usr/',
|
|
389
|
+
'/var/',
|
|
390
|
+
];
|
|
391
|
+
/**
|
|
392
|
+
* Obfuscation triggers — any of these forces the `unknown` class so
|
|
393
|
+
* the permission engine can fail closed.
|
|
394
|
+
*/
|
|
395
|
+
const OBFUSCATION_TRIGGERS = [
|
|
396
|
+
{ needle: 'curl', reason: 'curl piped into shell installer' },
|
|
397
|
+
{ needle: 'wget', reason: 'wget piped into shell installer' },
|
|
398
|
+
];
|
|
399
|
+
/**
|
|
400
|
+
* Classify a single (non-compound) command component.
|
|
401
|
+
*
|
|
402
|
+
* Order of checks (most-specific first):
|
|
403
|
+
* 1. destructive substring (hard deny path)
|
|
404
|
+
* 2. obfuscation (curl|sh, deep $() nesting, raw eval)
|
|
405
|
+
* 3. cd-escape (covered by classifyBash for the overall command;
|
|
406
|
+
* single-component cd is handled here too)
|
|
407
|
+
* 4. protected-write (redirection or write op into a protected path)
|
|
408
|
+
* 5. write_workspace (mkdir/touch/cp/mv/git-write etc)
|
|
409
|
+
* 6. network (curl/wget/ssh/installers)
|
|
410
|
+
* 7. build_test (pnpm test, cargo build, ...)
|
|
411
|
+
* 8. read (pwd, ls, cat, ...)
|
|
412
|
+
* 9. unknown (default)
|
|
413
|
+
*/
|
|
414
|
+
function classifyComponent(cmd, ctx) {
|
|
415
|
+
const trimmed = cmd.trim();
|
|
416
|
+
if (trimmed === '') {
|
|
417
|
+
return { class: 'unknown', reason: 'empty component', matched: '' };
|
|
418
|
+
}
|
|
419
|
+
// 1. Destructive hard-deny patterns.
|
|
420
|
+
const destructive = findDestructiveMatch(trimmed);
|
|
421
|
+
if (destructive) {
|
|
422
|
+
return {
|
|
423
|
+
class: 'destructive',
|
|
424
|
+
reason: `Destructive command pattern matched: ${destructive}`,
|
|
425
|
+
matched: destructive,
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
// 2. Obfuscation — curl|sh, wget|bash, deep $() nesting, raw eval.
|
|
429
|
+
const obfuscation = detectObfuscation(trimmed);
|
|
430
|
+
if (obfuscation) {
|
|
431
|
+
return {
|
|
432
|
+
class: 'unknown',
|
|
433
|
+
reason: obfuscation.reason,
|
|
434
|
+
matched: obfuscation.matched,
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
// 3. find with -delete / -exec is destructive-adjacent.
|
|
438
|
+
if (/\bfind\b[^|;]*\s-(?:delete|exec\b)/.test(trimmed)) {
|
|
439
|
+
return {
|
|
440
|
+
class: 'destructive',
|
|
441
|
+
reason: 'find with -delete or -exec is treated as destructive',
|
|
442
|
+
matched: 'find ... -delete|-exec',
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
// 4. Protected-write check (redirection OR write op into protected path).
|
|
446
|
+
const protectedWrite = detectProtectedWrite(trimmed, ctx);
|
|
447
|
+
if (protectedWrite) {
|
|
448
|
+
return {
|
|
449
|
+
class: 'write_protected',
|
|
450
|
+
reason: protectedWrite.reason,
|
|
451
|
+
matched: protectedWrite.matched,
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
// 4a. Protected-read check. Reads from credential / config paths
|
|
455
|
+
// (`cat ~/.aws/credentials`, `head ~/.npmrc`, `grep . ~/.ssh/id_ed25519`,
|
|
456
|
+
// `tail -f ~/.bash_history`) classify as `write_protected` so the
|
|
457
|
+
// permission matrix gates them in plan/dontAsk and asks elsewhere.
|
|
458
|
+
// The hard-coded DESTRUCTIVE entries for `cat ~/.ssh/id_rsa` /
|
|
459
|
+
// `cat ~/.ssh/id_ed25519` still win when matched (they run before
|
|
460
|
+
// this check).
|
|
461
|
+
//
|
|
462
|
+
// Code Reviewer P0 retro 2026-05-24: previously these reads fell
|
|
463
|
+
// through to READ_TOKENS and were allowed in every mode.
|
|
464
|
+
const protectedRead = detectProtectedRead(trimmed);
|
|
465
|
+
if (protectedRead) {
|
|
466
|
+
return {
|
|
467
|
+
class: 'write_protected',
|
|
468
|
+
reason: protectedRead.reason,
|
|
469
|
+
matched: protectedRead.matched,
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
// 4b. .env writes are always protected, even inside the workspace
|
|
473
|
+
// (CEO directive feedback_never_delete_untracked_env.md).
|
|
474
|
+
const envWrite = detectEnvWrite(trimmed);
|
|
475
|
+
if (envWrite) {
|
|
476
|
+
return {
|
|
477
|
+
class: 'write_protected',
|
|
478
|
+
reason: envWrite.reason,
|
|
479
|
+
matched: envWrite.matched,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
// 5. Write_workspace ops (mkdir / touch / cp / mv / git commit / etc).
|
|
483
|
+
for (const prefix of WRITE_WORKSPACE_PREFIXES) {
|
|
484
|
+
if (trimmed.startsWith(prefix)) {
|
|
485
|
+
return {
|
|
486
|
+
class: 'write_workspace',
|
|
487
|
+
reason: `Workspace write op: ${prefix.trim()}`,
|
|
488
|
+
matched: prefix.trim(),
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
// 5b. Shell redirection (`>`/`>>`) without a protected target →
|
|
493
|
+
// workspace write. Pipes (`|`) are not redirections. The regex
|
|
494
|
+
// allows optional whitespace around `>` (catches `>file`, `> file`,
|
|
495
|
+
// `>>file`, `>> file`) and skips file-descriptor redirects
|
|
496
|
+
// (`>&1`, `2>&1`, `>&2`).
|
|
497
|
+
if (/(^|[^0-9&])>>?\s*[^&\s|;<>]/.test(trimmed) &&
|
|
498
|
+
!trimmed.includes('/dev/null')) {
|
|
499
|
+
return {
|
|
500
|
+
class: 'write_workspace',
|
|
501
|
+
reason: 'Shell redirection into a workspace target',
|
|
502
|
+
matched: '>',
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
// 6. Network commands.
|
|
506
|
+
const network = detectNetwork(trimmed);
|
|
507
|
+
if (network) {
|
|
508
|
+
return {
|
|
509
|
+
class: 'network',
|
|
510
|
+
reason: network.reason,
|
|
511
|
+
matched: network.matched,
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
// 7. Build/test runners.
|
|
515
|
+
for (const prefix of BUILD_TEST_PREFIXES) {
|
|
516
|
+
if (trimmed === prefix || trimmed.startsWith(`${prefix} `)) {
|
|
517
|
+
return {
|
|
518
|
+
class: 'build_test',
|
|
519
|
+
reason: `Build/test runner: ${prefix}`,
|
|
520
|
+
matched: prefix,
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
// 7b. Bare `make` (no subcommand) is build-class.
|
|
525
|
+
if (trimmed === 'make' || trimmed.startsWith('make ')) {
|
|
526
|
+
return { class: 'build_test', reason: 'make runner', matched: 'make' };
|
|
527
|
+
}
|
|
528
|
+
// 7c. Bare `cd <path>` (inside workspace — the cwd-escape detector
|
|
529
|
+
// upgrades the class to write_protected when the target is
|
|
530
|
+
// outside). Standalone `cd` (HOME) is escape, also handled by the
|
|
531
|
+
// cwd-escape detector.
|
|
532
|
+
if (/^cd(\s+\S+)?\s*$/.test(trimmed)) {
|
|
533
|
+
return { class: 'read', reason: 'cd inside workspace', matched: 'cd' };
|
|
534
|
+
}
|
|
535
|
+
// 8. Read-only commands.
|
|
536
|
+
const firstToken = trimmed.split(/\s+/)[0] ?? '';
|
|
537
|
+
if (READ_TOKENS.has(firstToken)) {
|
|
538
|
+
// `sed` and `awk` are allowed only without `>` (already gated by
|
|
539
|
+
// step 5b above) — they fall through to here when they are pure
|
|
540
|
+
// reads. We list them explicitly for clarity even though set
|
|
541
|
+
// membership is the source of truth.
|
|
542
|
+
return { class: 'read', reason: `Read-only command: ${firstToken}`, matched: firstToken };
|
|
543
|
+
}
|
|
544
|
+
for (const prefix of READ_PREFIXES) {
|
|
545
|
+
if (trimmed === prefix.trim() || trimmed.startsWith(prefix)) {
|
|
546
|
+
return {
|
|
547
|
+
class: 'read',
|
|
548
|
+
reason: `Read-only command: ${prefix.trim()}`,
|
|
549
|
+
matched: prefix.trim(),
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
// sed/awk: read-only when no `>` redirect (the redirect branch above
|
|
554
|
+
// catches the write case).
|
|
555
|
+
if (firstToken === 'sed' || firstToken === 'awk') {
|
|
556
|
+
return { class: 'read', reason: `Stream editor as read: ${firstToken}`, matched: firstToken };
|
|
557
|
+
}
|
|
558
|
+
// `find` without -delete / -exec is a read.
|
|
559
|
+
if (firstToken === 'find') {
|
|
560
|
+
return { class: 'read', reason: 'find (no -delete/-exec)', matched: 'find' };
|
|
561
|
+
}
|
|
562
|
+
// 9. Default: unknown.
|
|
563
|
+
return {
|
|
564
|
+
class: 'unknown',
|
|
565
|
+
reason: `Unrecognized command: ${firstToken || trimmed}`,
|
|
566
|
+
matched: firstToken || trimmed,
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
function findDestructiveMatch(cmd) {
|
|
570
|
+
const upper = cmd.toUpperCase();
|
|
571
|
+
for (const { pattern, caseInsensitive } of DESTRUCTIVE_PATTERNS) {
|
|
572
|
+
if (caseInsensitive) {
|
|
573
|
+
if (upper.includes(pattern))
|
|
574
|
+
return pattern;
|
|
575
|
+
}
|
|
576
|
+
else if (cmd.includes(pattern)) {
|
|
577
|
+
return pattern;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
return null;
|
|
581
|
+
}
|
|
582
|
+
function detectObfuscation(cmd) {
|
|
583
|
+
// Raw `eval` with shell expansion. (`eval "$VAR"` is already in
|
|
584
|
+
// DESTRUCTIVE_PATTERNS — this catches the more general case of
|
|
585
|
+
// `eval $(...)`, `eval `...``, etc.)
|
|
586
|
+
if (/(^|\s)eval\s+[`$"']/.test(cmd)) {
|
|
587
|
+
return { reason: 'eval with shell expansion is treated as unknown', matched: 'eval' };
|
|
588
|
+
}
|
|
589
|
+
// `bash -c '...base64-decoded...'` — base64-decoded payloads are
|
|
590
|
+
// a common obfuscation. We trigger on the substring `base64 -d`
|
|
591
|
+
// anywhere in the command.
|
|
592
|
+
if (/\bbase64\s+-d\b/.test(cmd) || /\bbase64\s+--decode\b/.test(cmd)) {
|
|
593
|
+
return { reason: 'base64 decode pipeline is treated as unknown', matched: 'base64 -d' };
|
|
594
|
+
}
|
|
595
|
+
// Deep nested `$(...)` — more than 3 levels of nesting is treated
|
|
596
|
+
// as obfuscation.
|
|
597
|
+
if (nestingDepth(cmd, '$(', ')') > 3) {
|
|
598
|
+
return { reason: 'deeply nested command substitution', matched: '$(...)' };
|
|
599
|
+
}
|
|
600
|
+
if (nestingDepth(cmd, '`', '`') > 3) {
|
|
601
|
+
return { reason: 'deeply nested backtick substitution', matched: '`...`' };
|
|
602
|
+
}
|
|
603
|
+
// `curl ... | sh`, `wget ... | bash` — remote installer pipe.
|
|
604
|
+
// We require the entire command (including pipes) to contain both
|
|
605
|
+
// the network fetcher and the shell receiver.
|
|
606
|
+
for (const trigger of OBFUSCATION_TRIGGERS) {
|
|
607
|
+
if (cmd.includes(trigger.needle) && /\|\s*(?:sh|bash|zsh|fish)\b/.test(cmd)) {
|
|
608
|
+
return {
|
|
609
|
+
reason: `${trigger.reason}: ${trigger.needle} | <shell>`,
|
|
610
|
+
matched: `${trigger.needle} | sh`,
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
return null;
|
|
615
|
+
}
|
|
616
|
+
function nestingDepth(cmd, open, close) {
|
|
617
|
+
if (open === close) {
|
|
618
|
+
// Backtick pair — count occurrences / 2 for matched pairs and
|
|
619
|
+
// approximate "depth" as `pairs > 3` triggers.
|
|
620
|
+
const count = (cmd.match(new RegExp(escapeRegex(open), 'g')) ?? []).length;
|
|
621
|
+
return Math.floor(count / 2);
|
|
622
|
+
}
|
|
623
|
+
let depth = 0;
|
|
624
|
+
let max = 0;
|
|
625
|
+
for (let i = 0; i < cmd.length; i += 1) {
|
|
626
|
+
if (cmd.startsWith(open, i)) {
|
|
627
|
+
depth += 1;
|
|
628
|
+
max = Math.max(max, depth);
|
|
629
|
+
i += open.length - 1;
|
|
630
|
+
}
|
|
631
|
+
else if (cmd.startsWith(close, i) && depth > 0) {
|
|
632
|
+
depth -= 1;
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
return max;
|
|
636
|
+
}
|
|
637
|
+
function escapeRegex(s) {
|
|
638
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
639
|
+
}
|
|
640
|
+
function detectProtectedWrite(cmd, ctx) {
|
|
641
|
+
// Surface every write target this command produces so we can both
|
|
642
|
+
// protected-path-check and outside-workspace-check them uniformly.
|
|
643
|
+
// Captures `sort -o`, `uniq <in> <out>`, `sed -i` files, `awk '... > "file"'`,
|
|
644
|
+
// and `>` / `>>` redirections without surrounding whitespace.
|
|
645
|
+
const writeTargets = extractWriteTargets(cmd);
|
|
646
|
+
for (const needle of PROTECTED_PATH_SUBSTRINGS) {
|
|
647
|
+
if (!cmd.includes(needle))
|
|
648
|
+
continue;
|
|
649
|
+
// Reading from a protected path is allowed at the classifier
|
|
650
|
+
// layer (the permission engine still gates `read`); writing is
|
|
651
|
+
// the trigger here. We say it is a write if any of: `>`, `>>`,
|
|
652
|
+
// `tee`, `cp`, `mv`, `mkdir`, `touch`, `chmod`, `chown`,
|
|
653
|
+
// `ln`, `rm`, `rsync`, `scp` appears in the same component, or
|
|
654
|
+
// if any of the structured write targets (sort -o, uniq two-arg,
|
|
655
|
+
// sed -i, awk-redirect) was extracted above.
|
|
656
|
+
if (writeTargets.length > 0 ||
|
|
657
|
+
/(^|\s)>>?\s*\S/.test(cmd) ||
|
|
658
|
+
/\btee\b/.test(cmd) ||
|
|
659
|
+
/\bcp\b/.test(cmd) ||
|
|
660
|
+
/\bmv\b/.test(cmd) ||
|
|
661
|
+
/\bmkdir\b/.test(cmd) ||
|
|
662
|
+
/\btouch\b/.test(cmd) ||
|
|
663
|
+
/\bchmod\b/.test(cmd) ||
|
|
664
|
+
/\bchown\b/.test(cmd) ||
|
|
665
|
+
/\bln\b/.test(cmd) ||
|
|
666
|
+
/\brm\b/.test(cmd) ||
|
|
667
|
+
/\brsync\b/.test(cmd) ||
|
|
668
|
+
/\bscp\b/.test(cmd)) {
|
|
669
|
+
return {
|
|
670
|
+
reason: `Write into protected path: ${needle}`,
|
|
671
|
+
matched: needle,
|
|
672
|
+
};
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
// Per-target protected-path / outside-workspace check. Catches both
|
|
676
|
+
// `sort -o ~/.ssh/config` and `echo x > /tmp/other` even when the
|
|
677
|
+
// target was not a substring of the raw command (e.g. quoted paths).
|
|
678
|
+
for (const target of writeTargets) {
|
|
679
|
+
for (const needle of PROTECTED_PATH_SUBSTRINGS) {
|
|
680
|
+
if (target.includes(needle)) {
|
|
681
|
+
return {
|
|
682
|
+
reason: `Write into protected path: ${needle}`,
|
|
683
|
+
matched: target,
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
if (looksAbsoluteOutsideWorkspace(target, ctx)) {
|
|
688
|
+
return {
|
|
689
|
+
reason: `Write target outside workspace: ${target}`,
|
|
690
|
+
matched: target,
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
return null;
|
|
695
|
+
}
|
|
696
|
+
/**
|
|
697
|
+
* Extract every write-target path the command produces. Covers:
|
|
698
|
+
* - shell redirection `> file`, `>> file` (with optional whitespace,
|
|
699
|
+
* skipping `>&1`, `>&2`, etc.)
|
|
700
|
+
* - `sort -o file`
|
|
701
|
+
* - `uniq <input> <output>` (the two-arg form)
|
|
702
|
+
* - `sed -i <file>...` (in-place edit treats every trailing file as a
|
|
703
|
+
* write target)
|
|
704
|
+
* - `awk '... > "file"'` (quoted redirection inside an awk script)
|
|
705
|
+
*
|
|
706
|
+
* Conservative — we do not try to resolve shell vars or globs; the
|
|
707
|
+
* caller still gates absolute paths via `looksAbsoluteOutsideWorkspace`.
|
|
708
|
+
*/
|
|
709
|
+
function extractWriteTargets(cmd) {
|
|
710
|
+
const targets = [];
|
|
711
|
+
// Shell redirection (`>`, `>>`) with optional whitespace. Skip
|
|
712
|
+
// file-descriptor redirects (`>&1`, `>&2`, `2>&1`).
|
|
713
|
+
const redirRegex = /(^|[^0-9&])>>?\s*([^\s|;&<>]+)/g;
|
|
714
|
+
let match;
|
|
715
|
+
while ((match = redirRegex.exec(cmd)) !== null) {
|
|
716
|
+
const candidate = match[2];
|
|
717
|
+
if (!candidate || candidate.startsWith('&'))
|
|
718
|
+
continue;
|
|
719
|
+
if (candidate === '/dev/null')
|
|
720
|
+
continue;
|
|
721
|
+
targets.push(stripQuotes(candidate));
|
|
722
|
+
}
|
|
723
|
+
// sort -o <file>
|
|
724
|
+
const sortMatch = cmd.match(/\bsort\b[^|;]*\s-o\s+(\S+)/);
|
|
725
|
+
if (sortMatch && sortMatch[1]) {
|
|
726
|
+
targets.push(stripQuotes(sortMatch[1]));
|
|
727
|
+
}
|
|
728
|
+
// uniq <input> <output> (two-arg form). Three-arg uniq does not exist
|
|
729
|
+
// in POSIX, so the second non-flag arg is the output file.
|
|
730
|
+
const uniqMatch = cmd.match(/\buniq\b(?:\s+-[^\s]+)*\s+(\S+)\s+(\S+)/);
|
|
731
|
+
if (uniqMatch && uniqMatch[2] && !uniqMatch[2].startsWith('-')) {
|
|
732
|
+
targets.push(stripQuotes(uniqMatch[2]));
|
|
733
|
+
}
|
|
734
|
+
// sed -i [SUFFIX] <expr> <file>... — every trailing positional is a
|
|
735
|
+
// write target. We capture the tail after `-i` and treat each
|
|
736
|
+
// whitespace-delimited token that is not a flag as a target.
|
|
737
|
+
const sedMatch = cmd.match(/\bsed\b[^|;]*\s-i\b([^|;]*)/);
|
|
738
|
+
if (sedMatch && sedMatch[1]) {
|
|
739
|
+
const tail = sedMatch[1].trim().split(/\s+/);
|
|
740
|
+
for (const token of tail) {
|
|
741
|
+
if (token === '' || token.startsWith('-'))
|
|
742
|
+
continue;
|
|
743
|
+
if (token.startsWith("'") || token.startsWith('"'))
|
|
744
|
+
continue;
|
|
745
|
+
targets.push(stripQuotes(token));
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
// awk '... > "file"' or awk "... > \"file\""
|
|
749
|
+
const awkQuoteRegex = /\bawk\b[^|;]*?['"][^'"]*?>\s*['"]([^'"]+)['"]/g;
|
|
750
|
+
while ((match = awkQuoteRegex.exec(cmd)) !== null) {
|
|
751
|
+
if (match[1])
|
|
752
|
+
targets.push(stripQuotes(match[1]));
|
|
753
|
+
}
|
|
754
|
+
return targets;
|
|
755
|
+
}
|
|
756
|
+
function stripQuotes(s) {
|
|
757
|
+
return s.replace(/^["']|["']$/g, '');
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* Detect READ operations targeted at protected paths. Runs after
|
|
761
|
+
* `detectProtectedWrite` and before the READ_TOKENS fallback. The leading
|
|
762
|
+
* token must be in READ_TOKENS so that we do not double-flag writes
|
|
763
|
+
* (which `detectProtectedWrite` already covers).
|
|
764
|
+
*
|
|
765
|
+
* Re-uses the `write_protected` class to keep the permission matrix
|
|
766
|
+
* simple — the matrix already gates that class as deny in plan/dontAsk
|
|
767
|
+
* and ask elsewhere, which is the intended fail-closed posture for
|
|
768
|
+
* unrestricted credential reads.
|
|
769
|
+
*/
|
|
770
|
+
const READ_PREFIX_TOKENS = new Set(['grep', 'rg', 'less', 'more', 'fd', 'tree']);
|
|
771
|
+
function detectProtectedRead(cmd) {
|
|
772
|
+
const firstToken = cmd.split(/\s+/)[0] ?? '';
|
|
773
|
+
const isReadTool = READ_TOKENS.has(firstToken) ||
|
|
774
|
+
READ_PREFIX_TOKENS.has(firstToken) ||
|
|
775
|
+
firstToken === 'sed' ||
|
|
776
|
+
firstToken === 'awk' ||
|
|
777
|
+
firstToken === 'find';
|
|
778
|
+
if (!isReadTool)
|
|
779
|
+
return null;
|
|
780
|
+
for (const needle of PROTECTED_PATH_SUBSTRINGS) {
|
|
781
|
+
if (cmd.includes(needle)) {
|
|
782
|
+
return {
|
|
783
|
+
reason: `Read from protected path: ${needle}`,
|
|
784
|
+
matched: needle,
|
|
785
|
+
};
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
return null;
|
|
789
|
+
}
|
|
790
|
+
function detectEnvWrite(cmd) {
|
|
791
|
+
// .env / .env.<suffix> writes — match `\.env\b` adjacent to a write
|
|
792
|
+
// op (redirection, tee, cp, mv into) or `rm`. Reading .env in shell
|
|
793
|
+
// (`cat .env`) is gated by the permission engine, not classified
|
|
794
|
+
// here.
|
|
795
|
+
const envHit = /(^|\s|\/)\.env(\.[a-zA-Z0-9_-]+)?(\s|$|[>])/m.test(cmd);
|
|
796
|
+
if (!envHit)
|
|
797
|
+
return null;
|
|
798
|
+
if (/>>?\s/.test(cmd) ||
|
|
799
|
+
/\btee\b/.test(cmd) ||
|
|
800
|
+
/\bcp\b.*\.env/.test(cmd) ||
|
|
801
|
+
/\bmv\b.*\.env/.test(cmd) ||
|
|
802
|
+
/\brm\b.*\.env/.test(cmd)) {
|
|
803
|
+
return { reason: 'Write touches .env file', matched: '.env' };
|
|
804
|
+
}
|
|
805
|
+
return null;
|
|
806
|
+
}
|
|
807
|
+
function looksAbsoluteOutsideWorkspace(target, ctx) {
|
|
808
|
+
// Strip surrounding quotes and shell expansion artifacts.
|
|
809
|
+
const cleaned = target.replace(/^["']|["']$/g, '');
|
|
810
|
+
if (cleaned.startsWith('~')) {
|
|
811
|
+
// Home-relative path; treat as outside unless it explicitly
|
|
812
|
+
// re-enters an allowed dir (we are conservative — `~/foo` is
|
|
813
|
+
// outside).
|
|
814
|
+
return true;
|
|
815
|
+
}
|
|
816
|
+
if (!cleaned.startsWith('/')) {
|
|
817
|
+
// Workspace-relative path (or shell var); allowed at this layer.
|
|
818
|
+
// The path-security layer in `path-security.ts` already gates
|
|
819
|
+
// traversal escapes for the file tools; the bash classifier
|
|
820
|
+
// cannot resolve shell vars so we trust them.
|
|
821
|
+
return false;
|
|
822
|
+
}
|
|
823
|
+
const allowedRoots = [ctx.workspaceRoot, ...ctx.additionalDirectories].map((p) => p.endsWith('/') ? p : `${p}/`);
|
|
824
|
+
const cleanedWithSlash = cleaned.endsWith('/') ? cleaned : `${cleaned}/`;
|
|
825
|
+
return !allowedRoots.some((root) => cleanedWithSlash === root || cleanedWithSlash.startsWith(root));
|
|
826
|
+
}
|
|
827
|
+
function detectNetwork(cmd) {
|
|
828
|
+
for (const prefix of NETWORK_PREFIXES) {
|
|
829
|
+
if (cmd.startsWith(prefix) || cmd.includes(` ${prefix}`)) {
|
|
830
|
+
return { reason: `Network operation: ${prefix.trim()}`, matched: prefix.trim() };
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
const firstToken = cmd.split(/\s+/)[0] ?? '';
|
|
834
|
+
if (NETWORK_TOKENS.has(firstToken)) {
|
|
835
|
+
return { reason: `Network tool: ${firstToken}`, matched: firstToken };
|
|
836
|
+
}
|
|
837
|
+
return null;
|
|
838
|
+
}
|
|
839
|
+
/**
|
|
840
|
+
* Normalize a POSIX-ish path without resolving symlinks. Used by the
|
|
841
|
+
* `cd ... && rest` boundary check so we can decide whether the
|
|
842
|
+
* destination is inside `workspaceRoot ∪ additionalDirectories`
|
|
843
|
+
* before we ever spawn /bin/sh.
|
|
844
|
+
*/
|
|
845
|
+
function normalizePosix(input, baseDir) {
|
|
846
|
+
const cleaned = input.replace(/^["']|["']$/g, '');
|
|
847
|
+
const expanded = cleaned.startsWith('~') ? cleaned : cleaned;
|
|
848
|
+
// ~ expansion is intentionally not done (it would force HOME read);
|
|
849
|
+
// a `cd ~/...` is treated as outside-workspace by default since the
|
|
850
|
+
// home directory is generally outside the workspace.
|
|
851
|
+
const isAbsolute = expanded.startsWith('/') || expanded.startsWith('~');
|
|
852
|
+
const start = isAbsolute ? expanded : `${baseDir.replace(/\/$/, '')}/${expanded}`;
|
|
853
|
+
const parts = start.split('/');
|
|
854
|
+
const stack = [];
|
|
855
|
+
for (const part of parts) {
|
|
856
|
+
if (part === '' || part === '.')
|
|
857
|
+
continue;
|
|
858
|
+
if (part === '..') {
|
|
859
|
+
if (stack.length > 0)
|
|
860
|
+
stack.pop();
|
|
861
|
+
continue;
|
|
862
|
+
}
|
|
863
|
+
stack.push(part);
|
|
864
|
+
}
|
|
865
|
+
return `/${stack.join('/')}`;
|
|
866
|
+
}
|
|
867
|
+
function isInsideAllowedRoot(absPath, ctx) {
|
|
868
|
+
const allowedRoots = [ctx.workspaceRoot, ...ctx.additionalDirectories];
|
|
869
|
+
for (const rootRaw of allowedRoots) {
|
|
870
|
+
const root = rootRaw.endsWith('/') ? rootRaw.slice(0, -1) : rootRaw;
|
|
871
|
+
if (absPath === root || absPath.startsWith(`${root}/`))
|
|
872
|
+
return true;
|
|
873
|
+
}
|
|
874
|
+
return false;
|
|
875
|
+
}
|
|
876
|
+
/**
|
|
877
|
+
* Detect `cd <path>` at the head of a component and decide whether
|
|
878
|
+
* the destination escapes the workspace boundary. Returns a
|
|
879
|
+
* classification when an escape is detected; otherwise null.
|
|
880
|
+
*
|
|
881
|
+
* Per spec: a command of shape `cd <path> && <rest>` should classify
|
|
882
|
+
* the cwd target — if `<path>` resolves outside the workspace, the
|
|
883
|
+
* overall class becomes `write_protected` regardless of `<rest>`.
|
|
884
|
+
*
|
|
885
|
+
* We treat `cd -` (last-dir) and `cd` (HOME) as escapes since the
|
|
886
|
+
* resulting cwd is not under our control.
|
|
887
|
+
*/
|
|
888
|
+
function detectCwdEscape(components, ctx) {
|
|
889
|
+
if (components.length === 0)
|
|
890
|
+
return null;
|
|
891
|
+
// Walk every component, threading a simulated cwd through the chain.
|
|
892
|
+
// Pure `cd <path>` components update the simulated cwd; anything
|
|
893
|
+
// else leaves it untouched. If any hop lands outside
|
|
894
|
+
// `workspaceRoot ∪ additionalDirectories`, the overall classification
|
|
895
|
+
// upgrades to write_protected. Subshells (`(cd foo && rest)`) are
|
|
896
|
+
// out of scope for M1 — the parent cwd is unaffected there, and the
|
|
897
|
+
// classifier already treats parentheses as part of the component.
|
|
898
|
+
let cwd = ctx.workspaceRoot;
|
|
899
|
+
for (let i = 0; i < components.length; i += 1) {
|
|
900
|
+
const trimmed = components[i]?.trim() ?? '';
|
|
901
|
+
const cdMatch = trimmed.match(/^cd(?:\s+(\S+))?\s*$/);
|
|
902
|
+
if (!cdMatch)
|
|
903
|
+
continue;
|
|
904
|
+
const target = cdMatch[1];
|
|
905
|
+
if (target === undefined || target === '-' || target === '~') {
|
|
906
|
+
return {
|
|
907
|
+
class: 'write_protected',
|
|
908
|
+
reason: `cd chain escapes workspace boundary at hop ${i + 1}`,
|
|
909
|
+
matched: `cd${target ? ` ${target}` : ''}`,
|
|
910
|
+
};
|
|
911
|
+
}
|
|
912
|
+
cwd = normalizePosix(target, cwd);
|
|
913
|
+
if (!isInsideAllowedRoot(cwd, ctx)) {
|
|
914
|
+
return {
|
|
915
|
+
class: 'write_protected',
|
|
916
|
+
reason: `cd chain escapes workspace boundary at hop ${i + 1}`,
|
|
917
|
+
matched: `cd ${target}`,
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
return null;
|
|
922
|
+
}
|
|
923
|
+
export function classifyBash(cmd, ctx) {
|
|
924
|
+
const normalized = cmd.trim();
|
|
925
|
+
if (normalized === '') {
|
|
926
|
+
return { class: 'unknown', reason: 'empty command', matched: '' };
|
|
927
|
+
}
|
|
928
|
+
// Full-command destructive check first. Patterns like the fork bomb
|
|
929
|
+
// (`:(){ :|:& };:`) and SQL-in-pipe (`echo X | mysql -e 'DROP ...'`)
|
|
930
|
+
// would otherwise split on `|` or `;` and the components would each
|
|
931
|
+
// look benign. We catch them by matching the destructive substrings
|
|
932
|
+
// against the raw command before splitting.
|
|
933
|
+
const fullDestructive = findDestructiveMatch(normalized);
|
|
934
|
+
if (fullDestructive) {
|
|
935
|
+
return {
|
|
936
|
+
class: 'destructive',
|
|
937
|
+
reason: `Destructive command pattern matched: ${fullDestructive}`,
|
|
938
|
+
matched: fullDestructive,
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
// Full-command obfuscation check. `curl ... | sh` splits into two
|
|
942
|
+
// benign-looking components (`curl ...` is network; `sh` is unknown)
|
|
943
|
+
// but the pipeline together is the remote-installer pattern we want
|
|
944
|
+
// to flag as unknown so the engine can fail closed.
|
|
945
|
+
const fullObfuscation = detectObfuscation(normalized);
|
|
946
|
+
if (fullObfuscation) {
|
|
947
|
+
return {
|
|
948
|
+
class: 'unknown',
|
|
949
|
+
reason: fullObfuscation.reason,
|
|
950
|
+
matched: fullObfuscation.matched,
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
// Compound-command split. We classify each component, then pick
|
|
954
|
+
// the most dangerous one as the overall class.
|
|
955
|
+
const components = splitCompoundRespectingQuotes(normalized);
|
|
956
|
+
// Cwd-escape check runs over the raw component list so the `cd`
|
|
957
|
+
// verdict trumps the `<rest>` classification even when `<rest>` is
|
|
958
|
+
// a benign `ls`.
|
|
959
|
+
const cwdEscape = detectCwdEscape(components, ctx);
|
|
960
|
+
const classified = components.map((c) => classifyComponent(c, ctx));
|
|
961
|
+
// Pick the worst component.
|
|
962
|
+
let worst;
|
|
963
|
+
for (const candidate of classified) {
|
|
964
|
+
if (!worst || CLASS_RANK[candidate.class] > CLASS_RANK[worst.class]) {
|
|
965
|
+
worst = candidate;
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
if (!worst) {
|
|
969
|
+
return {
|
|
970
|
+
class: 'unknown',
|
|
971
|
+
reason: 'no recognizable component',
|
|
972
|
+
matched: normalized,
|
|
973
|
+
};
|
|
974
|
+
}
|
|
975
|
+
// Cwd escape upgrades the class to at least `write_protected`. A
|
|
976
|
+
// destructive component still wins (the user might be trying to
|
|
977
|
+
// wipe a protected path AND escape cwd — we report the worse one).
|
|
978
|
+
if (cwdEscape && CLASS_RANK[cwdEscape.class] > CLASS_RANK[worst.class]) {
|
|
979
|
+
return {
|
|
980
|
+
class: cwdEscape.class,
|
|
981
|
+
reason: cwdEscape.reason,
|
|
982
|
+
matched: cwdEscape.matched,
|
|
983
|
+
components: classified,
|
|
984
|
+
};
|
|
985
|
+
}
|
|
986
|
+
if (classified.length === 1) {
|
|
987
|
+
return worst;
|
|
988
|
+
}
|
|
989
|
+
return { ...worst, components: classified };
|
|
990
|
+
}
|
|
991
|
+
/**
|
|
992
|
+
* Re-exported destructive-pattern source. The permission engine used
|
|
993
|
+
* to maintain its own `destructiveBashPatterns` array; that array
|
|
994
|
+
* now lives here as the single source of truth. Callers that need to
|
|
995
|
+
* audit the list (e.g. doctor output) read this export instead of
|
|
996
|
+
* duplicating the regex set.
|
|
997
|
+
*/
|
|
998
|
+
export function listDestructivePatterns() {
|
|
999
|
+
return DESTRUCTIVE_PATTERNS.map((p) => p.pattern);
|
|
1000
|
+
}
|
|
1001
|
+
//# sourceMappingURL=bash-classifier.js.map
|