claude-dev-env 1.57.2 → 1.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -2
- package/_shared/pr-loop/scripts/code_rules_gate.py +36 -3
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/code_rules_gate_constants.py +6 -0
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/reviews_disabled_constants.py +1 -0
- package/_shared/pr-loop/scripts/reviews_disabled.py +12 -0
- package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +265 -0
- package/_shared/pr-loop/scripts/tests/test_reviews_disabled.py +29 -0
- package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +1 -1
- package/bin/install.mjs +317 -54
- package/bin/install.test.mjs +478 -3
- package/docs/CODE_RULES.md +3 -3
- package/hooks/blocking/code_rules_annotations_length.py +153 -0
- package/hooks/blocking/code_rules_dead_dataclass_field.py +319 -0
- package/hooks/blocking/code_rules_duplicate_body.py +287 -0
- package/hooks/blocking/code_rules_enforcer.py +175 -21
- package/hooks/blocking/code_rules_magic_values.py +98 -0
- package/hooks/blocking/code_rules_shared.py +41 -0
- package/hooks/blocking/destructive_command_blocker.py +1027 -12
- package/hooks/blocking/hook_prose_detector_consistency.py +150 -0
- package/hooks/blocking/intent_only_ending_blocker.py +155 -0
- package/hooks/blocking/session_handoff_blocker.py +190 -0
- package/hooks/blocking/subprocess_budget_completeness.py +380 -0
- package/hooks/blocking/test_code_rules_enforcer_annotations.py +225 -0
- package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +1 -0
- package/hooks/blocking/test_code_rules_enforcer_dead_dataclass_field.py +467 -0
- package/hooks/blocking/test_code_rules_enforcer_duplicate_body.py +330 -0
- package/hooks/blocking/test_code_rules_enforcer_duplicate_body_hook_routing.py +179 -0
- package/hooks/blocking/test_code_rules_enforcer_magic_slice_bounds.py +133 -0
- package/hooks/blocking/test_destructive_command_blocker.py +622 -3
- package/hooks/blocking/test_hook_prose_detector_consistency.py +265 -0
- package/hooks/blocking/test_intent_only_ending_blocker.py +175 -0
- package/hooks/blocking/test_session_handoff_blocker.py +312 -0
- package/hooks/blocking/test_subprocess_budget_completeness.py +588 -0
- package/hooks/blocking/test_workflow_substitution_slot_blocker.py +242 -0
- package/hooks/blocking/workflow_substitution_slot_blocker.py +159 -0
- package/hooks/hooks.json +25 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +16 -0
- package/hooks/hooks_constants/dead_dataclass_field_constants.py +25 -0
- package/hooks/hooks_constants/destructive_command_segment_constants.py +178 -0
- package/hooks/hooks_constants/duplicate_function_body_constants.py +17 -0
- package/hooks/hooks_constants/hook_prose_detector_consistency_constants.py +30 -0
- package/hooks/hooks_constants/messages.py +4 -0
- package/hooks/hooks_constants/session_handoff_blocker_constants.py +10 -0
- package/hooks/hooks_constants/subprocess_budget_completeness_constants.py +5 -0
- package/hooks/hooks_constants/workflow_substitution_slot_blocker_constants.py +22 -0
- package/hooks/workflow/auto_formatter.py +26 -1
- package/hooks/workflow/test_auto_formatter.py +134 -0
- package/package.json +1 -1
- package/rules/conservative-action.md +1 -0
- package/rules/docstring-prose-matches-implementation.md +43 -0
- package/rules/hook-prose-matches-detector.md +26 -0
- package/rules/long-horizon-autonomy.md +43 -0
- package/rules/no-inline-destructive-literals.md +11 -0
- package/rules/workflow-substitution-slots.md +7 -0
- package/skills/autoconverge/SKILL.md +68 -6
- package/skills/autoconverge/reference/closing-report.md +44 -0
- package/skills/autoconverge/reference/convergence.md +7 -3
- package/skills/autoconverge/reference/stop-conditions.md +7 -2
- package/skills/autoconverge/workflow/autoconverge_report_constants/__init__.py +0 -0
- package/skills/autoconverge/workflow/autoconverge_report_constants/render_report_constants.py +105 -0
- package/skills/autoconverge/workflow/converge.contract.test.mjs +30 -1
- package/skills/autoconverge/workflow/converge.copilot-gate.test.mjs +265 -0
- package/skills/autoconverge/workflow/converge.mjs +106 -38
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-a11d903476b803493.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-a26213978adeef6fb.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-a3def0d15ed9d9110.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-a41f41b1b708ee3b7.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-a758b880abecc3ff7.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-a8897b89656b1bd16.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-abd463d744a1437bc.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/subagents/workflows/wf_881252e6-700/agent-ad19d027ae8ee1816.jsonl +2 -0
- package/skills/autoconverge/workflow/fixtures/wf_run/workflows/wf_881252e6-700.json +259 -0
- package/skills/autoconverge/workflow/render_report.py +903 -0
- package/skills/autoconverge/workflow/test_render_report.py +484 -0
- package/skills/pr-converge/scripts/check_convergence.py +195 -64
- package/skills/pr-converge/scripts/test_check_convergence.py +173 -2
- package/skills/update/SKILL.md +37 -5
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
import datetime
|
|
3
|
+
import enum
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import re
|
|
@@ -18,6 +19,32 @@ from hooks_constants.convergence_branch_constants import ( # noqa: E402
|
|
|
18
19
|
CONVERGENCE_BRANCH_SUFFIX_PATTERN,
|
|
19
20
|
CONVERGENCE_FORCE_PUSH_DETECTION_PATTERN,
|
|
20
21
|
)
|
|
22
|
+
from hooks_constants.destructive_command_segment_constants import ( # noqa: E402
|
|
23
|
+
ALL_BENIGN_COMPOUND_SEGMENT_COMMANDS,
|
|
24
|
+
ALL_COMMAND_LAUNCHER_WRAPPER_COMMANDS,
|
|
25
|
+
ALL_FILE_WRITING_OUTPUT_FLAGS_BY_BENIGN_PROGRAM,
|
|
26
|
+
ALL_GH_API_GLUED_REQUEST_BODY_FIELD_FLAG_PREFIXES,
|
|
27
|
+
ALL_GH_API_REQUEST_BODY_FIELD_FLAGS,
|
|
28
|
+
ALL_GH_HTTP_WRITE_METHOD_FLAGS,
|
|
29
|
+
ALL_GH_HTTP_WRITE_METHODS,
|
|
30
|
+
ALL_GIT_CONFIG_READ_ONLY_FLAGS,
|
|
31
|
+
ALL_GIT_FETCH_FORCE_FLAGS,
|
|
32
|
+
ALL_GIT_REMOTE_READ_ONLY_VERBS,
|
|
33
|
+
ALL_INTERPRETER_AND_WRAPPER_COMMANDS,
|
|
34
|
+
ALL_LAUNCHER_OPTIONS_TAKING_SEPARATE_VALUE,
|
|
35
|
+
ALL_LAUNCHERS_REQUIRING_A_POSITIONAL_VALUE,
|
|
36
|
+
ALL_READ_ONLY_SUBCOMMANDS_BY_DISPATCHING_PROGRAM,
|
|
37
|
+
ALL_REMOTE_AND_PROGRAM_STRING_EXECUTORS,
|
|
38
|
+
ALL_SHELL_CONTROL_OPERATOR_TOKENS,
|
|
39
|
+
ALL_STRING_ARGUMENT_EXECUTION_FLAGS,
|
|
40
|
+
ALL_SUBSHELL_GROUPING_CHARACTERS,
|
|
41
|
+
GH_HTTP_READ_ONLY_METHOD,
|
|
42
|
+
GH_LONG_METHOD_FLAG_EQUALS_PREFIX,
|
|
43
|
+
GH_SHORT_METHOD_FLAG_PREFIX,
|
|
44
|
+
ALL_READ_ONLY_SUBCOMMAND_POSITION_DEPTHS_BY_DISPATCHING_PROGRAM,
|
|
45
|
+
LAUNCHER_POSITIONAL_VALUE_SHAPE_PATTERN,
|
|
46
|
+
OUTPUT_REDIRECTION_OPERATOR_PATTERN,
|
|
47
|
+
)
|
|
21
48
|
|
|
22
49
|
CLAUDE_DIRECTORY_PATH = os.path.normpath(os.path.expanduser("~/.claude"))
|
|
23
50
|
GH_REDIRECT_ACTIVE_ENV_VAR = "CLAUDE_GH_REDIRECT_ACTIVE"
|
|
@@ -245,11 +272,14 @@ def rm_targets_only_ephemeral_paths(command: str) -> bool:
|
|
|
245
272
|
"""Return True when command is a single rm invocation whose every target is inside an ephemeral directory.
|
|
246
273
|
|
|
247
274
|
Refuses compound commands so operators like && / || / ; / | / backticks /
|
|
248
|
-
$(...) cannot piggy-back non-rm work on the ephemeral auto-allow.
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
275
|
+
$(...) cannot piggy-back non-rm work on the ephemeral auto-allow. Refuses an
|
|
276
|
+
output redirection (``rm -rf /tmp/x>/etc/passwd`` truncates ``/etc/passwd``
|
|
277
|
+
even though the deletion targets an ephemeral path; shlex keeps the ``>`` glued
|
|
278
|
+
to the target token when no whitespace separates them). Rejects bare ephemeral
|
|
279
|
+
roots (/tmp, system temp dir) and bare directories named worktrees/worktree so
|
|
280
|
+
we never auto-approve wiping those roots. Only allows common short flags and a
|
|
281
|
+
small set of long options before ``--``; tokens with ``=`` or unknown long
|
|
282
|
+
options disable auto-allow.
|
|
253
283
|
"""
|
|
254
284
|
compound_shell_operator_pattern = re.compile(r'(?:&&|\|\||;|\||`|\$\()')
|
|
255
285
|
if compound_shell_operator_pattern.search(command):
|
|
@@ -258,6 +288,8 @@ def rm_targets_only_ephemeral_paths(command: str) -> bool:
|
|
|
258
288
|
all_command_tokens = _split_command_preserving_windows_backslashes(command)
|
|
259
289
|
except ValueError:
|
|
260
290
|
return False
|
|
291
|
+
if _segment_redirects_output_to_a_file(all_command_tokens):
|
|
292
|
+
return False
|
|
261
293
|
if len(all_command_tokens) < 2 or all_command_tokens[0] != "rm":
|
|
262
294
|
return False
|
|
263
295
|
tokens_after_rm = all_command_tokens[1:]
|
|
@@ -279,6 +311,979 @@ def rm_targets_only_ephemeral_paths(command: str) -> bool:
|
|
|
279
311
|
return True
|
|
280
312
|
|
|
281
313
|
|
|
314
|
+
def _destructive_match_is_rm_family(matched_description: str) -> bool:
|
|
315
|
+
"""Return True when the matched destructive pattern is one of the rm-family deletes.
|
|
316
|
+
|
|
317
|
+
The rm-family descriptions all begin with the same prefix; the compound
|
|
318
|
+
ephemeral auto-allow and the quoted-mention guard act only on these, never on
|
|
319
|
+
git, database, or device patterns.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
matched_description: A description from DESTRUCTIVE_BASH_PATTERNS.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
True when the description names an rm deletion.
|
|
326
|
+
"""
|
|
327
|
+
rm_family_description_prefix = "rm "
|
|
328
|
+
return matched_description.startswith(rm_family_description_prefix)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _command_contains_shell_expansion(command: str) -> bool:
|
|
332
|
+
"""Return True when the command contains shell parameter or command expansion.
|
|
333
|
+
|
|
334
|
+
Any ``$`` (variable reference or ``$(...)`` command substitution) or backtick
|
|
335
|
+
subshell means a token could expand at runtime to ``rm`` or to an arbitrary
|
|
336
|
+
destructive command that the hook cannot resolve statically. The quoted-mention
|
|
337
|
+
guard and the compound ephemeral auto-allow both fail closed on this so they
|
|
338
|
+
never grant on a command whose effective program list is unknown until the
|
|
339
|
+
shell runs.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
command: The raw Bash command string from the tool input.
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
True when the command contains a ``$`` or backtick expansion character.
|
|
346
|
+
"""
|
|
347
|
+
return "$" in command or "`" in command
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _split_tokens_into_shell_segments(all_command_tokens: list[str]) -> list[list[str]]:
|
|
351
|
+
"""Split a shlex token list into simple-command segments on control operators.
|
|
352
|
+
|
|
353
|
+
Segments are delimited by ``&&``, ``||``, ``;``, ``|&``, ``|`` and ``&`` tokens,
|
|
354
|
+
so each returned segment is one simple command. Operators that are not whitespace
|
|
355
|
+
separated stay inside one shlex token and therefore inside one segment; that
|
|
356
|
+
segment fails the absolute-ephemeral target check and the command falls through
|
|
357
|
+
to the prompt.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
all_command_tokens: Tokens produced by shlex tokenization.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
A list of segments, each a list of tokens with operators removed.
|
|
364
|
+
"""
|
|
365
|
+
all_segments: list[list[str]] = []
|
|
366
|
+
current_segment: list[str] = []
|
|
367
|
+
for each_token in all_command_tokens:
|
|
368
|
+
if each_token in ALL_SHELL_CONTROL_OPERATOR_TOKENS:
|
|
369
|
+
all_segments.append(current_segment)
|
|
370
|
+
current_segment = []
|
|
371
|
+
continue
|
|
372
|
+
current_segment.append(each_token)
|
|
373
|
+
all_segments.append(current_segment)
|
|
374
|
+
return all_segments
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _leading_command_token(all_command_tokens: list[str]) -> str | None:
|
|
378
|
+
"""Return the program token that leads the command, skipping VAR=value prefixes.
|
|
379
|
+
|
|
380
|
+
A shell command may begin with one or more ``NAME=value`` environment
|
|
381
|
+
assignments (``FOO=bar rm -rf x``); the first token that is not such an
|
|
382
|
+
assignment is the program the shell executes. Returns None when every token is
|
|
383
|
+
an assignment or the list is empty.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
all_command_tokens: Tokens produced by shlex tokenization.
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
The leading program token, or None when there is no program token.
|
|
390
|
+
"""
|
|
391
|
+
leading_assignment_pattern = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
|
|
392
|
+
for each_token in all_command_tokens:
|
|
393
|
+
if leading_assignment_pattern.match(each_token):
|
|
394
|
+
continue
|
|
395
|
+
return each_token
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _strip_leading_launcher_wrapper(all_command_tokens: list[str]) -> list[str] | None:
|
|
400
|
+
"""Return the tokens after a leading command-launcher wrapper, or None when absent.
|
|
401
|
+
|
|
402
|
+
A pure launcher wrapper (``timeout``, ``nohup``, ``nice``, ``ionice``,
|
|
403
|
+
``stdbuf``, ``time``, ``setsid``, ``chrt``, ``taskset``) forwards a trailing
|
|
404
|
+
command line to another program without itself executing a quoted string. To
|
|
405
|
+
find that real program, the launcher token and its own option tokens are
|
|
406
|
+
dropped: leading ``VAR=value`` assignments are skipped, the launcher token is
|
|
407
|
+
consumed, then option tokens are consumed until the first token that names a
|
|
408
|
+
program. A launcher option that takes a SEPARATE argument value
|
|
409
|
+
(``timeout -s SIGNAL`` / ``--signal SIGNAL``, ``timeout -k DURATION`` /
|
|
410
|
+
``--kill-after DURATION``, ``nice -n PRIORITY``) consumes both the flag and the
|
|
411
|
+
following value token, so a signal name such as ``KILL`` is never mistaken for
|
|
412
|
+
the wrapped program. Every dash-prefixed flag is consumed as well.
|
|
413
|
+
|
|
414
|
+
The first positional token after the launcher and its flags is its required
|
|
415
|
+
value for the launchers that take one (``timeout`` duration, ``chrt`` priority,
|
|
416
|
+
``taskset`` CPU mask or CPU range) and is consumed before the wrapped program. A
|
|
417
|
+
value matching the known shapes (decimal with optional unit suffix, hexadecimal
|
|
418
|
+
mask, CPU range/list) is consumed for any launcher. A launcher in
|
|
419
|
+
ALL_LAUNCHERS_REQUIRING_A_POSITIONAL_VALUE consumes its first positional even when
|
|
420
|
+
that value's shape is unrecognized (``timeout inf``, ``timeout 100ms``), so an
|
|
421
|
+
unrecognized duration never masks the wrapped program by being returned as the
|
|
422
|
+
program itself. A launcher that takes no positional value (``nohup``, ``time``,
|
|
423
|
+
``setsid``, ``ionice``, ``nice``, ``stdbuf``) returns its first positional as the
|
|
424
|
+
wrapped program. Returns None when the leading program is not a launcher wrapper.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
all_command_tokens: Tokens of one shell segment.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
The tokens beginning at the wrapped program, an empty list when no program
|
|
431
|
+
follows the launcher value, or None when no launcher leads.
|
|
432
|
+
"""
|
|
433
|
+
leading_assignment_pattern = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
|
|
434
|
+
launcher_positional_value_pattern = re.compile(LAUNCHER_POSITIONAL_VALUE_SHAPE_PATTERN)
|
|
435
|
+
first_program_index = next(
|
|
436
|
+
(
|
|
437
|
+
index
|
|
438
|
+
for index, token in enumerate(all_command_tokens)
|
|
439
|
+
if not leading_assignment_pattern.match(token)
|
|
440
|
+
),
|
|
441
|
+
None,
|
|
442
|
+
)
|
|
443
|
+
if first_program_index is None:
|
|
444
|
+
return None
|
|
445
|
+
leading_command_basename = Path(all_command_tokens[first_program_index]).name.lower()
|
|
446
|
+
if leading_command_basename not in ALL_COMMAND_LAUNCHER_WRAPPER_COMMANDS:
|
|
447
|
+
return None
|
|
448
|
+
launcher_requires_a_positional_value = (
|
|
449
|
+
leading_command_basename in ALL_LAUNCHERS_REQUIRING_A_POSITIONAL_VALUE
|
|
450
|
+
)
|
|
451
|
+
each_index = first_program_index + 1
|
|
452
|
+
has_consumed_required_positional_value = False
|
|
453
|
+
skip_next_token_as_option_value = False
|
|
454
|
+
while each_index < len(all_command_tokens):
|
|
455
|
+
each_token = all_command_tokens[each_index]
|
|
456
|
+
if skip_next_token_as_option_value:
|
|
457
|
+
skip_next_token_as_option_value = False
|
|
458
|
+
each_index += 1
|
|
459
|
+
continue
|
|
460
|
+
if each_token in ALL_LAUNCHER_OPTIONS_TAKING_SEPARATE_VALUE:
|
|
461
|
+
skip_next_token_as_option_value = True
|
|
462
|
+
each_index += 1
|
|
463
|
+
continue
|
|
464
|
+
if each_token.startswith("-"):
|
|
465
|
+
each_index += 1
|
|
466
|
+
continue
|
|
467
|
+
each_basename = Path(each_token).name.lower()
|
|
468
|
+
if launcher_positional_value_pattern.match(each_basename):
|
|
469
|
+
has_consumed_required_positional_value = True
|
|
470
|
+
each_index += 1
|
|
471
|
+
continue
|
|
472
|
+
if launcher_requires_a_positional_value and not has_consumed_required_positional_value:
|
|
473
|
+
has_consumed_required_positional_value = True
|
|
474
|
+
each_index += 1
|
|
475
|
+
continue
|
|
476
|
+
return all_command_tokens[each_index:]
|
|
477
|
+
return []
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _command_executes_a_string_argument(all_command_tokens: list[str]) -> bool:
|
|
481
|
+
"""Return True when the command's leading program runs a string argument as code.
|
|
482
|
+
|
|
483
|
+
Shell interpreters and wrappers (``bash``, ``sh``, ``eval``, ``sudo``,
|
|
484
|
+
``xargs`` and the rest of ALL_INTERPRETER_AND_WRAPPER_COMMANDS) and remote
|
|
485
|
+
runners such as ``ssh`` execute a following quoted token as a command line, so
|
|
486
|
+
``bash -c 'rm -rf /etc'`` and ``ssh host 'rm -rf /etc'`` run ``rm`` even though
|
|
487
|
+
no token's basename is ``rm``. Language interpreters (``python``, ``perl``,
|
|
488
|
+
``ruby``, ``node`` and the rest of ALL_REMOTE_AND_PROGRAM_STRING_EXECUTORS) run
|
|
489
|
+
a string only with a ``-c`` or ``-e`` flag, so those qualify only when such a
|
|
490
|
+
flag is present.
|
|
491
|
+
|
|
492
|
+
A pure command-launcher wrapper (``timeout``, ``nohup``, ``nice`` and the rest
|
|
493
|
+
of ALL_COMMAND_LAUNCHER_WRAPPER_COMMANDS) does not run a quoted string itself but
|
|
494
|
+
forwards argv to a following program, so a ``timeout`` in front of
|
|
495
|
+
``bash -c 'rm -rf /etc'`` runs ``rm`` through the wrapped ``bash``. The wrapper
|
|
496
|
+
and its own flags are stripped and the wrapped program is re-evaluated,
|
|
497
|
+
recursively through stacked wrappers, so a launcher in front of an interpreter is
|
|
498
|
+
caught while a launcher in front of a plain program (a ``timeout`` in front of
|
|
499
|
+
``rm -rf /tmp/scratch``) still reports False and reaches the legitimate-mention
|
|
500
|
+
path.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
all_command_tokens: Tokens produced by shlex tokenization.
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
True when the leading program executes a quoted string argument as code.
|
|
507
|
+
"""
|
|
508
|
+
leading_command_token = _leading_command_token(all_command_tokens)
|
|
509
|
+
if leading_command_token is None:
|
|
510
|
+
return False
|
|
511
|
+
leading_command_basename = Path(leading_command_token).name.lower()
|
|
512
|
+
if leading_command_basename in ALL_INTERPRETER_AND_WRAPPER_COMMANDS:
|
|
513
|
+
return True
|
|
514
|
+
if leading_command_basename in ALL_COMMAND_LAUNCHER_WRAPPER_COMMANDS:
|
|
515
|
+
wrapped_program_tokens = _strip_leading_launcher_wrapper(all_command_tokens)
|
|
516
|
+
if not wrapped_program_tokens:
|
|
517
|
+
return False
|
|
518
|
+
return _command_executes_a_string_argument(wrapped_program_tokens)
|
|
519
|
+
if leading_command_basename not in ALL_REMOTE_AND_PROGRAM_STRING_EXECUTORS:
|
|
520
|
+
return False
|
|
521
|
+
if leading_command_basename == "ssh":
|
|
522
|
+
return True
|
|
523
|
+
return any(
|
|
524
|
+
each_token in ALL_STRING_ARGUMENT_EXECUTION_FLAGS for each_token in all_command_tokens
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _explode_glued_shell_control_operators(all_command_tokens: list[str]) -> list[str]:
|
|
529
|
+
"""Split control operators off shlex tokens that glue them to a program name.
|
|
530
|
+
|
|
531
|
+
shlex keeps a control operator joined to an adjacent program when no whitespace
|
|
532
|
+
separates them, so ``true; eval 'x'`` tokenizes to ``['true;', 'eval', 'x']``
|
|
533
|
+
with the ``;`` hidden inside ``true;``. This re-splits each token on the
|
|
534
|
+
unquoted control operators ``&&`` / ``||`` / ``;`` / ``|&`` / ``|`` / ``&`` and
|
|
535
|
+
on the POSIX command terminators newline and carriage return, so the operator
|
|
536
|
+
becomes its own token and segment boundaries are visible. The ``|&`` pipe (stdout
|
|
537
|
+
and stderr both into the next command) is matched before the single ``|`` so a
|
|
538
|
+
glued ``cat foo|&tee x`` splits on ``|&`` rather than leaving ``&tee`` joined. The
|
|
539
|
+
lone background ``&`` is split only when it neighbors no ``>`` redirection
|
|
540
|
+
character, so a file-descriptor duplication such as a stderr-to-stdout redirect
|
|
541
|
+
stays one token and is left for the redirection guard rather than torn into a
|
|
542
|
+
dangling redirect fragment that would misread as a hidden segment boundary. shlex
|
|
543
|
+
has already removed quoting, so any operator character still present in a token
|
|
544
|
+
came from unquoted shell source and is a real boundary.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
all_command_tokens: Tokens produced by shlex tokenization.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Tokens with glued control operators separated into standalone tokens.
|
|
551
|
+
"""
|
|
552
|
+
control_operator_split_pattern = re.compile(r"(&&|\|\||;|\|&|\||(?<!>)&(?!>)|\n|\r)")
|
|
553
|
+
all_exploded_tokens: list[str] = []
|
|
554
|
+
for each_token in all_command_tokens:
|
|
555
|
+
for each_fragment in control_operator_split_pattern.split(each_token):
|
|
556
|
+
if each_fragment:
|
|
557
|
+
all_exploded_tokens.append(each_fragment)
|
|
558
|
+
return all_exploded_tokens
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _strip_leading_subshell_grouping_characters(token: str) -> str:
|
|
562
|
+
"""Return a token with leading subshell-grouping characters removed.
|
|
563
|
+
|
|
564
|
+
shlex keeps a subshell ``(`` or brace-group ``{`` joined to an adjacent program
|
|
565
|
+
when no whitespace separates them, so ``(rm -rf /etc)`` tokenizes to
|
|
566
|
+
``['(rm', '-rf', '/etc)']`` with the ``(`` hidden inside ``(rm``. Stripping the
|
|
567
|
+
leading grouping characters exposes the real program name (``rm``) so the
|
|
568
|
+
rm-detection check sees it. shlex has already removed quoting, so any grouping
|
|
569
|
+
character still present came from unquoted shell source.
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
token: One token produced by shlex tokenization.
|
|
573
|
+
|
|
574
|
+
Returns:
|
|
575
|
+
The token with leading ``(`` and ``{`` characters removed.
|
|
576
|
+
"""
|
|
577
|
+
return token.lstrip(ALL_SUBSHELL_GROUPING_CHARACTERS)
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _any_shell_segment_executes_a_string_argument(all_command_tokens: list[str]) -> bool:
|
|
581
|
+
"""Return True when any shell segment's leading program runs a string as code.
|
|
582
|
+
|
|
583
|
+
Splits the command into simple-command segments on ``&&`` / ``||`` / ``;`` /
|
|
584
|
+
``|`` / ``&`` and applies the leading-program string-execution check to each.
|
|
585
|
+
A benign program leading the whole command (``echo hi && bash -c 'rm -rf /etc'``,
|
|
586
|
+
``true; eval 'rm -rf /etc'``) must not mask an interpreter that runs the
|
|
587
|
+
destructive ``rm`` inside a later segment, so every segment is inspected rather
|
|
588
|
+
than only the command's first program. Control operators glued to a program by
|
|
589
|
+
missing whitespace are separated first so those segment boundaries are seen.
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
all_command_tokens: Tokens produced by shlex tokenization.
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
True when at least one segment's leading program executes a quoted string
|
|
596
|
+
argument as code.
|
|
597
|
+
"""
|
|
598
|
+
all_exploded_tokens = _explode_glued_shell_control_operators(all_command_tokens)
|
|
599
|
+
for each_segment in _split_tokens_into_shell_segments(all_exploded_tokens):
|
|
600
|
+
if each_segment and _command_executes_a_string_argument(each_segment):
|
|
601
|
+
return True
|
|
602
|
+
return False
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def command_has_no_real_rm_invocation(command: str) -> bool:
|
|
606
|
+
"""Return True when no shell token in the command actually invokes ``rm``.
|
|
607
|
+
|
|
608
|
+
Distinguishes a destructive-pattern match that lands inside a quoted string
|
|
609
|
+
argument (``grep 'rm -rf foo' log``, ``echo "rm -rf x"``,
|
|
610
|
+
``git commit -m "rm -rf cleanup"``) from a command that runs ``rm``. A quoted
|
|
611
|
+
mention tokenizes to a single token whose path basename is not ``rm``, so it is
|
|
612
|
+
reported as having no real invocation and the spurious ``rm`` prompt is
|
|
613
|
+
suppressed.
|
|
614
|
+
|
|
615
|
+
Fails closed (returns False, meaning "treat as a real invocation, keep
|
|
616
|
+
prompting") when the command contains shell expansion (``$`` or backtick),
|
|
617
|
+
where a token such as ``$RM`` could expand to ``rm``; when tokenization fails on
|
|
618
|
+
unbalanced quotes; or when any shell segment's leading program executes a quoted
|
|
619
|
+
string argument as code (``bash -c 'rm -rf /etc'``, ``eval 'rm -rf /etc'``,
|
|
620
|
+
``ssh host 'rm -rf /etc'``, ``awk 'BEGIN{system("rm -rf /etc")}'``,
|
|
621
|
+
``echo hi && bash -c 'rm -rf /etc'``, ``timeout bash -c 'rm -rf /etc'``), where
|
|
622
|
+
the destructive ``rm`` rides inside an executed string rather than a passive
|
|
623
|
+
mention. The command is split on the POSIX newline and carriage-return command
|
|
624
|
+
terminators before tokenizing, because shlex treats those as whitespace and would
|
|
625
|
+
otherwise merge a later-line interpreter (``echo safe`` newline
|
|
626
|
+
``bash -c 'rm -rf /etc'``) into the benign leading segment. The per-segment check
|
|
627
|
+
means a benign leader on a line does not mask an interpreter later on that line.
|
|
628
|
+
``/bin/rm``, ``sudo rm`` and ``\\rm`` each tokenize to a token whose basename is
|
|
629
|
+
``rm`` and are correctly reported as real. Before the rm-detection scan, each
|
|
630
|
+
token is split on glued control operators and stripped of leading subshell- and
|
|
631
|
+
brace-grouping characters, so ``(rm -rf /etc)``, ``;rm -rf /etc`` and
|
|
632
|
+
``echo|rm -rf /etc`` expose ``rm`` as a real invocation rather than a passive
|
|
633
|
+
mention.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
command: The raw Bash command string from the tool input.
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
True when the command contains no real ``rm`` invocation.
|
|
640
|
+
"""
|
|
641
|
+
if _command_contains_shell_expansion(command):
|
|
642
|
+
return False
|
|
643
|
+
all_physical_command_lines = re.split(r"[\n\r]+", command)
|
|
644
|
+
for each_command_line in all_physical_command_lines:
|
|
645
|
+
try:
|
|
646
|
+
all_command_tokens = _split_command_preserving_windows_backslashes(each_command_line)
|
|
647
|
+
except ValueError:
|
|
648
|
+
return False
|
|
649
|
+
if _any_shell_segment_executes_a_string_argument(all_command_tokens):
|
|
650
|
+
return False
|
|
651
|
+
all_operator_split_tokens = _explode_glued_shell_control_operators(all_command_tokens)
|
|
652
|
+
for each_token in all_operator_split_tokens:
|
|
653
|
+
each_program_token = _strip_leading_subshell_grouping_characters(each_token)
|
|
654
|
+
if Path(each_program_token).name == "rm":
|
|
655
|
+
return False
|
|
656
|
+
return True
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _find_non_rm_destructive_pattern(command: str) -> str | None:
|
|
660
|
+
"""Return the first non-rm-family destructive pattern description, or None.
|
|
661
|
+
|
|
662
|
+
Applied after the quoted-mention guard finds a matched rm-family pattern to be
|
|
663
|
+
a false positive: the command is rescanned for any other destructive pattern
|
|
664
|
+
(force push, git clean, mkfs, dd, DROP/TRUNCATE, signing bypass) so a real
|
|
665
|
+
non-rm hazard riding alongside the quoted mention
|
|
666
|
+
(``grep 'rm -rf' f && git push --force origin main``) still prompts.
|
|
667
|
+
|
|
668
|
+
Args:
|
|
669
|
+
command: The raw Bash command string from the tool input.
|
|
670
|
+
|
|
671
|
+
Returns:
|
|
672
|
+
The description of the first matching non-rm-family pattern, or None.
|
|
673
|
+
"""
|
|
674
|
+
for each_pattern_regex, each_pattern_description in DESTRUCTIVE_BASH_PATTERNS:
|
|
675
|
+
if _destructive_match_is_rm_family(each_pattern_description):
|
|
676
|
+
continue
|
|
677
|
+
if each_pattern_regex.search(command):
|
|
678
|
+
return each_pattern_description
|
|
679
|
+
return None
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def _find_non_force_push_destructive_hazard(command: str) -> str | None:
|
|
683
|
+
"""Return a destructive hazard riding alongside a convergence force-push, or None.
|
|
684
|
+
|
|
685
|
+
Applied when a force-push to a convergence branch is being auto-allowed: the
|
|
686
|
+
command is rescanned for any destructive pattern other than the force-push itself
|
|
687
|
+
so a real co-resident hazard (``git push --force origin claude/x && git reset
|
|
688
|
+
--hard``) still prompts. The force-push patterns are skipped because they are the
|
|
689
|
+
very thing the convergence exemption grants. An rm-family pattern is skipped when
|
|
690
|
+
it is only a quoted mention (``echo "rm -rf foo" && git push --force origin
|
|
691
|
+
claude/x``), so a passive ``rm`` string does not re-block a legitimate push.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
command: The raw Bash command string from the tool input.
|
|
695
|
+
|
|
696
|
+
Returns:
|
|
697
|
+
The description of the first co-resident destructive hazard, or None.
|
|
698
|
+
"""
|
|
699
|
+
for each_pattern_regex, each_pattern_description in DESTRUCTIVE_BASH_PATTERNS:
|
|
700
|
+
if "git push" in each_pattern_description and (
|
|
701
|
+
"force" in each_pattern_description or "-f" in each_pattern_description
|
|
702
|
+
):
|
|
703
|
+
continue
|
|
704
|
+
if not each_pattern_regex.search(command):
|
|
705
|
+
continue
|
|
706
|
+
if _destructive_match_is_rm_family(
|
|
707
|
+
each_pattern_description
|
|
708
|
+
) and command_has_no_real_rm_invocation(command):
|
|
709
|
+
continue
|
|
710
|
+
return each_pattern_description
|
|
711
|
+
return None
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _command_contains_non_rm_family_destructive_pattern(command: str) -> bool:
|
|
715
|
+
"""Return True when any destructive pattern in the command is not rm-family.
|
|
716
|
+
|
|
717
|
+
The compound ephemeral auto-allow grants only when every destructive pattern
|
|
718
|
+
present is an rm deletion. A git reset --hard, force push, git clean, mkfs, dd,
|
|
719
|
+
or DROP/TRUNCATE riding inside the chain is not bounded by the ephemeral rm
|
|
720
|
+
targets, so its presence declines the whole auto-allow.
|
|
721
|
+
|
|
722
|
+
Args:
|
|
723
|
+
command: The raw Bash command string from the tool input.
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
True when at least one matching destructive pattern is not rm-family.
|
|
727
|
+
"""
|
|
728
|
+
for each_pattern_regex, each_pattern_description in DESTRUCTIVE_BASH_PATTERNS:
|
|
729
|
+
if each_pattern_regex.search(command) and not _destructive_match_is_rm_family(
|
|
730
|
+
each_pattern_description
|
|
731
|
+
):
|
|
732
|
+
return True
|
|
733
|
+
return False
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def _rm_segment_targets_only_absolute_ephemeral_paths(all_rm_segment_tokens: list[str]) -> bool:
|
|
737
|
+
"""Return True when an ``rm`` segment's every target is an absolute ephemeral path.
|
|
738
|
+
|
|
739
|
+
``all_rm_segment_tokens`` is one shell segment beginning at its ``rm`` command
|
|
740
|
+
token. Rejects the segment (returns False) when the segment carries an output
|
|
741
|
+
redirection (``rm -rf /tmp/x>/etc/passwd`` truncates ``/etc/passwd`` even though
|
|
742
|
+
the deletion targets an ephemeral path; shlex keeps the ``>`` glued to the target
|
|
743
|
+
token when no whitespace separates them), when an unsafe flag precedes ``--``,
|
|
744
|
+
when there are no targets, when a target is relative (the compound auto-allow
|
|
745
|
+
refuses to resolve relative targets without a trusted working directory), when
|
|
746
|
+
a target basename is a glob wildcard, when a target is a bare ephemeral root or
|
|
747
|
+
a bare worktrees container, or when a target is not inside an ephemeral
|
|
748
|
+
directory.
|
|
749
|
+
|
|
750
|
+
Args:
|
|
751
|
+
all_rm_segment_tokens: Shlex tokens of a single ``rm`` segment, the first
|
|
752
|
+
token being the ``rm`` command.
|
|
753
|
+
|
|
754
|
+
Returns:
|
|
755
|
+
True when every target is an absolute ephemeral path safe to auto-allow.
|
|
756
|
+
"""
|
|
757
|
+
if _segment_redirects_output_to_a_file(all_rm_segment_tokens):
|
|
758
|
+
return False
|
|
759
|
+
tokens_after_rm = all_rm_segment_tokens[1:]
|
|
760
|
+
if _rm_flags_before_double_dash_are_unsafe(tokens_after_rm):
|
|
761
|
+
return False
|
|
762
|
+
all_target_tokens = _collect_rm_target_tokens(tokens_after_rm)
|
|
763
|
+
if not all_target_tokens:
|
|
764
|
+
return False
|
|
765
|
+
for each_target_token in all_target_tokens:
|
|
766
|
+
each_expanded_target = os.path.expanduser(each_target_token)
|
|
767
|
+
each_is_absolute = (
|
|
768
|
+
os.path.isabs(each_expanded_target)
|
|
769
|
+
or each_expanded_target.replace("\\", "/").startswith("/")
|
|
770
|
+
)
|
|
771
|
+
if not each_is_absolute:
|
|
772
|
+
return False
|
|
773
|
+
each_resolved_target = os.path.normpath(each_expanded_target)
|
|
774
|
+
if _path_basename_is_shell_glob_wildcard(each_resolved_target):
|
|
775
|
+
return False
|
|
776
|
+
if _path_is_bare_ephemeral_root(each_resolved_target):
|
|
777
|
+
return False
|
|
778
|
+
if _path_is_bare_named_worktrees_container(each_resolved_target):
|
|
779
|
+
return False
|
|
780
|
+
if not directory_is_ephemeral(each_resolved_target):
|
|
781
|
+
return False
|
|
782
|
+
return True
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
def _segment_redirects_output_to_a_file(all_segment_tokens: list[str]) -> bool:
|
|
786
|
+
"""Return True when a segment writes its output to a file via shell redirection.
|
|
787
|
+
|
|
788
|
+
An output redirection (a plain, appending, clobbering, or combined operator, with
|
|
789
|
+
or without a leading file-descriptor number) truncates or rewrites the redirect
|
|
790
|
+
target, so ``cat /dev/null > /etc/important.conf`` destroys the target file even
|
|
791
|
+
though ``cat`` itself is read-only. A file-descriptor duplication that names another
|
|
792
|
+
descriptor as its target writes no file and stays read-only. shlex keeps a
|
|
793
|
+
redirect operator glued to an adjacent program or target token when no whitespace
|
|
794
|
+
separates them (``echo pwned>/etc/passwd``, ``cat secret>/etc/x``), so each token is
|
|
795
|
+
scanned for a redirect operator anywhere within it rather than tested for exact
|
|
796
|
+
equality. The benign-segment check declines any segment carrying a redirect operator
|
|
797
|
+
so a benign program that overwrites a non-ephemeral file does not ride the ephemeral
|
|
798
|
+
``rm`` auto-allow.
|
|
799
|
+
|
|
800
|
+
Args:
|
|
801
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
802
|
+
|
|
803
|
+
Returns:
|
|
804
|
+
True when any token contains an output-redirection operator.
|
|
805
|
+
"""
|
|
806
|
+
output_redirection_pattern = re.compile(OUTPUT_REDIRECTION_OPERATOR_PATTERN)
|
|
807
|
+
return any(
|
|
808
|
+
output_redirection_pattern.search(each_token) for each_token in all_segment_tokens
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _all_positional_tokens_after_leader(all_segment_tokens: list[str]) -> list[str]:
|
|
813
|
+
"""Return the non-flag tokens that follow a segment's leading program.
|
|
814
|
+
|
|
815
|
+
Skips leading ``VAR=value`` assignments, the program token itself, every
|
|
816
|
+
dash-prefixed flag, and any ``key=value`` flag value, leaving the positional
|
|
817
|
+
words that name a subcommand chain (``repo``, ``delete`` in ``gh repo delete``;
|
|
818
|
+
``stash``, ``drop`` in ``git stash drop``).
|
|
819
|
+
|
|
820
|
+
Args:
|
|
821
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
822
|
+
|
|
823
|
+
Returns:
|
|
824
|
+
The positional tokens after the leading program, in order.
|
|
825
|
+
"""
|
|
826
|
+
leading_assignment_pattern = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
|
|
827
|
+
first_program_index = next(
|
|
828
|
+
(
|
|
829
|
+
index
|
|
830
|
+
for index, token in enumerate(all_segment_tokens)
|
|
831
|
+
if not leading_assignment_pattern.match(token)
|
|
832
|
+
),
|
|
833
|
+
None,
|
|
834
|
+
)
|
|
835
|
+
if first_program_index is None:
|
|
836
|
+
return []
|
|
837
|
+
return [
|
|
838
|
+
each_token
|
|
839
|
+
for each_token in all_segment_tokens[first_program_index + 1:]
|
|
840
|
+
if not each_token.startswith("-") and "=" not in each_token
|
|
841
|
+
]
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def _gh_segment_names_an_explicit_method(
|
|
845
|
+
all_segment_tokens: list[str], target_method: str
|
|
846
|
+
) -> bool:
|
|
847
|
+
"""Return True when a ``gh`` segment explicitly names ``target_method``.
|
|
848
|
+
|
|
849
|
+
Recognizes both ``gh`` flag spellings: the space-separated form where the flag
|
|
850
|
+
(``-X``/``--method``) is its own token and the next token names the method
|
|
851
|
+
(``-X GET``), and the glued forms where the method is inside the flag token
|
|
852
|
+
(``-XGET``, ``--method=GET``). The match is case-insensitive against the
|
|
853
|
+
already-uppercased ``target_method``.
|
|
854
|
+
|
|
855
|
+
Args:
|
|
856
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
857
|
+
target_method: The HTTP method name to match, uppercased.
|
|
858
|
+
|
|
859
|
+
Returns:
|
|
860
|
+
True when an ``-X``/``--method`` flag names ``target_method``.
|
|
861
|
+
"""
|
|
862
|
+
for each_index, each_token in enumerate(all_segment_tokens):
|
|
863
|
+
if each_token.startswith(GH_SHORT_METHOD_FLAG_PREFIX):
|
|
864
|
+
inline_method = each_token[len(GH_SHORT_METHOD_FLAG_PREFIX) :]
|
|
865
|
+
elif each_token.startswith(GH_LONG_METHOD_FLAG_EQUALS_PREFIX):
|
|
866
|
+
inline_method = each_token[len(GH_LONG_METHOD_FLAG_EQUALS_PREFIX) :]
|
|
867
|
+
else:
|
|
868
|
+
inline_method = ""
|
|
869
|
+
if inline_method.upper() == target_method:
|
|
870
|
+
return True
|
|
871
|
+
if each_token not in ALL_GH_HTTP_WRITE_METHOD_FLAGS:
|
|
872
|
+
continue
|
|
873
|
+
each_next_index = each_index + 1
|
|
874
|
+
if each_next_index < len(all_segment_tokens) and (
|
|
875
|
+
all_segment_tokens[each_next_index].upper() == target_method
|
|
876
|
+
):
|
|
877
|
+
return True
|
|
878
|
+
return False
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
def _gh_segment_carries_a_request_body_field(all_segment_tokens: list[str]) -> bool:
|
|
882
|
+
"""Return True when a ``gh api`` segment adds a request-body field.
|
|
883
|
+
|
|
884
|
+
``gh api`` adds a parameter to the request body through ``-f``/``--raw-field``,
|
|
885
|
+
``-F``/``--field``, or ``--input``, each accepted as its own token (``-f title=x``,
|
|
886
|
+
``--field a=b``) or glued to its value (``-ftitle=x``, ``--field=a=b``,
|
|
887
|
+
``--input=body.json``).
|
|
888
|
+
|
|
889
|
+
Args:
|
|
890
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
891
|
+
|
|
892
|
+
Returns:
|
|
893
|
+
True when any token is a request-body field flag.
|
|
894
|
+
"""
|
|
895
|
+
for each_token in all_segment_tokens:
|
|
896
|
+
if each_token in ALL_GH_API_REQUEST_BODY_FIELD_FLAGS:
|
|
897
|
+
return True
|
|
898
|
+
if each_token.startswith(ALL_GH_API_GLUED_REQUEST_BODY_FIELD_FLAG_PREFIXES):
|
|
899
|
+
return True
|
|
900
|
+
return False
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def _gh_segment_runs_an_http_write_method(all_segment_tokens: list[str]) -> bool:
|
|
904
|
+
"""Return True when a ``gh`` segment performs an HTTP write through ``gh api``.
|
|
905
|
+
|
|
906
|
+
``gh api`` reaches the GitHub API with whatever HTTP method an ``-X``/``--method``
|
|
907
|
+
flag names. A GET is read-only, but POST, PUT, PATCH and DELETE mutate server
|
|
908
|
+
state (``gh api repos/foo -X DELETE``). Both flag spellings are recognized: the
|
|
909
|
+
space-separated form where the method is its own token (``-X DELETE``) and the
|
|
910
|
+
glued forms where the method is inside the flag token (``-XDELETE``,
|
|
911
|
+
``--method=DELETE``). The method flag is dash-prefixed and so is dropped from the
|
|
912
|
+
positional-token list the read-only check inspects, so the raw segment tokens are
|
|
913
|
+
scanned here: when a write-method flag names a write method, the segment is
|
|
914
|
+
reported as a write rather than a read.
|
|
915
|
+
|
|
916
|
+
``gh api`` also defaults the method to POST when any request-body field flag
|
|
917
|
+
(``-f``/``--raw-field``, ``-F``/``--field``, ``--input``) is present and no explicit
|
|
918
|
+
method is given, so a field-carrying segment that does not explicitly name GET
|
|
919
|
+
(``gh api repos/foo -f title=x``) is an implicit-POST write. An explicit ``-X
|
|
920
|
+
GET``/``--method GET`` keeps such a segment read-only.
|
|
921
|
+
|
|
922
|
+
Args:
|
|
923
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
924
|
+
|
|
925
|
+
Returns:
|
|
926
|
+
True when the segment names an HTTP write method via ``gh api``.
|
|
927
|
+
"""
|
|
928
|
+
if any(
|
|
929
|
+
_gh_segment_names_an_explicit_method(all_segment_tokens, each_write_method)
|
|
930
|
+
for each_write_method in ALL_GH_HTTP_WRITE_METHODS
|
|
931
|
+
):
|
|
932
|
+
return True
|
|
933
|
+
if _gh_segment_carries_a_request_body_field(all_segment_tokens):
|
|
934
|
+
return not _gh_segment_names_an_explicit_method(
|
|
935
|
+
all_segment_tokens, GH_HTTP_READ_ONLY_METHOD
|
|
936
|
+
)
|
|
937
|
+
return False
|
|
938
|
+
|
|
939
|
+
|
|
940
|
+
def _git_fetch_segment_forces_a_local_ref_update(
|
|
941
|
+
all_positional_tokens: list[str], all_segment_tokens: list[str]
|
|
942
|
+
) -> bool:
|
|
943
|
+
"""Return True when a ``git fetch`` segment force-updates a local ref.
|
|
944
|
+
|
|
945
|
+
``git fetch`` is read-only in normal use, but two spellings force-update the local
|
|
946
|
+
destination ref even when the update is not a fast-forward, overwriting a local
|
|
947
|
+
branch and discarding local commits. A ``+``-prefixed refspec forces only the refs
|
|
948
|
+
it names: ``git fetch origin +refs/heads/main:refs/heads/main`` is detected from a
|
|
949
|
+
positional refspec that begins with ``+`` or names a ``+refs/`` source. The ``-f``/
|
|
950
|
+
``--force`` flag forces every named refspec at once: ``git fetch --force origin
|
|
951
|
+
refs/heads/main:refs/heads/main`` discards local ``main`` commits with no ``+`` in
|
|
952
|
+
sight. The force flag is dash-prefixed and so is dropped from the positional-token
|
|
953
|
+
list, so the raw segment tokens are scanned for it, mirroring how the ``gh api``
|
|
954
|
+
write-method check scans raw tokens for ``-X``.
|
|
955
|
+
|
|
956
|
+
Args:
|
|
957
|
+
all_positional_tokens: The non-flag tokens after the leading ``git`` program.
|
|
958
|
+
all_segment_tokens: Shlex tokens of the whole ``git`` segment.
|
|
959
|
+
|
|
960
|
+
Returns:
|
|
961
|
+
True when any positional refspec or a force flag force-updates a local ref.
|
|
962
|
+
"""
|
|
963
|
+
if any(each_token in ALL_GIT_FETCH_FORCE_FLAGS for each_token in all_segment_tokens):
|
|
964
|
+
return True
|
|
965
|
+
return any(
|
|
966
|
+
each_token.startswith("+") or "+refs/" in each_token
|
|
967
|
+
for each_token in all_positional_tokens
|
|
968
|
+
)
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
def _git_config_segment_runs_a_read_only_mode(all_segment_tokens: list[str]) -> bool:
|
|
972
|
+
"""Return True only when a ``git config`` segment carries a read-only mode flag.
|
|
973
|
+
|
|
974
|
+
``git config`` parses a read-only mode flag (``--get``/``--list``/``-l`` and the
|
|
975
|
+
rest of ALL_GIT_CONFIG_READ_ONLY_FLAGS) only while it sits before the first
|
|
976
|
+
positional key. Once a key positional appears (``git config core.editor ...``),
|
|
977
|
+
every following dash-prefixed token is the value being set, so ``git config
|
|
978
|
+
core.editor --get`` stores the literal string ``--get`` rather than querying.
|
|
979
|
+
Reading the mode from the flags that precede the first key positional, rather
|
|
980
|
+
than scanning the whole segment, keeps a value that happens to equal a read-only
|
|
981
|
+
flag string from masking the write.
|
|
982
|
+
|
|
983
|
+
Args:
|
|
984
|
+
all_segment_tokens: Shlex tokens of the whole ``git config`` segment.
|
|
985
|
+
|
|
986
|
+
Returns:
|
|
987
|
+
True when a read-only flag precedes the first ``config`` key positional.
|
|
988
|
+
"""
|
|
989
|
+
config_token_index = next(
|
|
990
|
+
(
|
|
991
|
+
each_index
|
|
992
|
+
for each_index, each_token in enumerate(all_segment_tokens)
|
|
993
|
+
if each_token.lower() == "config"
|
|
994
|
+
),
|
|
995
|
+
None,
|
|
996
|
+
)
|
|
997
|
+
if config_token_index is None:
|
|
998
|
+
return False
|
|
999
|
+
for each_token in all_segment_tokens[config_token_index + 1:]:
|
|
1000
|
+
if not each_token.startswith("-"):
|
|
1001
|
+
return False
|
|
1002
|
+
if each_token in ALL_GIT_CONFIG_READ_ONLY_FLAGS:
|
|
1003
|
+
return True
|
|
1004
|
+
return False
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def _git_segment_runs_a_mutating_mode(all_positional_tokens: list[str], all_segment_tokens: list[str]) -> bool:
|
|
1008
|
+
"""Return True when a ``git config``, ``git remote`` or ``git fetch`` segment mutates state.
|
|
1009
|
+
|
|
1010
|
+
``config``, ``remote`` and ``fetch`` appear in the git read-only allowlist for their
|
|
1011
|
+
query modes (``git config --list``, ``git remote -v``, plain ``git fetch``) but each
|
|
1012
|
+
carries a write mode: ``git config key value`` and ``git config --global key value``
|
|
1013
|
+
set a value, ``git remote add|remove|rm|set-url`` change the remote table, and
|
|
1014
|
+
``git fetch origin +refs/heads/main:refs/heads/main`` force-updates a local ref. A
|
|
1015
|
+
``config`` segment mutates unless a read-only flag (``--get``/``--list`` and the rest
|
|
1016
|
+
of ALL_GIT_CONFIG_READ_ONLY_FLAGS) precedes the first positional key; a ``remote``
|
|
1017
|
+
segment mutates unless the first positional verb after ``remote`` is a read-only one
|
|
1018
|
+
(``show``, ``get-url``); a ``fetch`` segment mutates when it carries a ``+``-prefixed
|
|
1019
|
+
force refspec or a ``-f``/``--force`` flag. Both the config mode and the remote verb
|
|
1020
|
+
are read positionally rather
|
|
1021
|
+
than by scanning the whole segment for any read-only token, because a value that
|
|
1022
|
+
follows the key positional (``git config core.editor --get`` stores ``--get``) and a
|
|
1023
|
+
global ``-v``/``--verbose`` before a write verb (``git remote -v add evil url``) would
|
|
1024
|
+
otherwise let a read-only token anywhere mask the write.
|
|
1025
|
+
|
|
1026
|
+
Args:
|
|
1027
|
+
all_positional_tokens: The non-flag tokens after the leading ``git`` program.
|
|
1028
|
+
all_segment_tokens: Shlex tokens of the whole ``git`` segment.
|
|
1029
|
+
|
|
1030
|
+
Returns:
|
|
1031
|
+
True when the segment runs a mutating ``config``, ``remote`` or ``fetch`` mode.
|
|
1032
|
+
"""
|
|
1033
|
+
all_lowercased_positional_tokens = [each_token.lower() for each_token in all_positional_tokens]
|
|
1034
|
+
if "config" in all_lowercased_positional_tokens:
|
|
1035
|
+
return not _git_config_segment_runs_a_read_only_mode(all_segment_tokens)
|
|
1036
|
+
if "remote" in all_lowercased_positional_tokens:
|
|
1037
|
+
remote_verb_index = all_lowercased_positional_tokens.index("remote") + 1
|
|
1038
|
+
all_remote_verbs = all_lowercased_positional_tokens[remote_verb_index:]
|
|
1039
|
+
if not all_remote_verbs:
|
|
1040
|
+
return False
|
|
1041
|
+
return all_remote_verbs[0] not in ALL_GIT_REMOTE_READ_ONLY_VERBS
|
|
1042
|
+
if "fetch" in all_lowercased_positional_tokens:
|
|
1043
|
+
return _git_fetch_segment_forces_a_local_ref_update(
|
|
1044
|
+
all_positional_tokens, all_segment_tokens
|
|
1045
|
+
)
|
|
1046
|
+
return False
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
def _subcommand_dispatching_segment_is_read_only(
|
|
1050
|
+
leading_program_basename: str,
|
|
1051
|
+
all_read_only_subcommands: frozenset[str],
|
|
1052
|
+
all_segment_tokens: list[str],
|
|
1053
|
+
) -> bool:
|
|
1054
|
+
"""Return True only when a subcommand-dispatching segment runs a read-only verb.
|
|
1055
|
+
|
|
1056
|
+
``git`` and ``gh`` dispatch destructive operations through subcommands the
|
|
1057
|
+
DESTRUCTIVE_BASH_PATTERNS table does not separately enumerate (``gh repo delete``,
|
|
1058
|
+
``git checkout -- .``, ``git stash drop``, ``git branch -D``), so a chained
|
|
1059
|
+
destructive subcommand would otherwise ride the ephemeral ``rm`` auto-allow. The
|
|
1060
|
+
check fails closed: a segment is benign only when a read-only subcommand verb sits
|
|
1061
|
+
in the program's leading subcommand window and the segment runs no known mutating
|
|
1062
|
+
mode. The window spans the first positional for ``git`` (``git status``) and the
|
|
1063
|
+
first two positionals for ``gh`` (``gh api``, ``gh pr view``), matching how each
|
|
1064
|
+
program names its subcommand. Bounding the search to that window keeps a read-only
|
|
1065
|
+
verb used as a deeper argument to a destructive subcommand (``gh repo delete
|
|
1066
|
+
status``, ``git push origin log``, ``git branch -D log``) from satisfying the check.
|
|
1067
|
+
``git config`` and ``git remote`` sit in the read-only allowlist for their query
|
|
1068
|
+
modes yet carry write modes (``git config --global key value``, ``git remote add
|
|
1069
|
+
evil url``), and ``gh api`` performs an HTTP write when an ``-X``/``--method`` flag
|
|
1070
|
+
names POST, PUT, PATCH or DELETE; each such mutating mode declines the segment.
|
|
1071
|
+
|
|
1072
|
+
Args:
|
|
1073
|
+
leading_program_basename: The dispatching program (``git`` or ``gh``).
|
|
1074
|
+
all_read_only_subcommands: The read-only subcommand verbs for the dispatching
|
|
1075
|
+
program.
|
|
1076
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
1077
|
+
|
|
1078
|
+
Returns:
|
|
1079
|
+
True when the segment's subcommand is on the read-only allowlist and the
|
|
1080
|
+
segment runs no mutating mode.
|
|
1081
|
+
"""
|
|
1082
|
+
all_positional_tokens = _all_positional_tokens_after_leader(all_segment_tokens)
|
|
1083
|
+
subcommand_window_depth = (
|
|
1084
|
+
ALL_READ_ONLY_SUBCOMMAND_POSITION_DEPTHS_BY_DISPATCHING_PROGRAM[
|
|
1085
|
+
leading_program_basename
|
|
1086
|
+
]
|
|
1087
|
+
)
|
|
1088
|
+
all_leading_subcommand_tokens = all_positional_tokens[:subcommand_window_depth]
|
|
1089
|
+
runs_a_read_only_subcommand = any(
|
|
1090
|
+
each_token.lower() in all_read_only_subcommands
|
|
1091
|
+
for each_token in all_leading_subcommand_tokens
|
|
1092
|
+
)
|
|
1093
|
+
if not runs_a_read_only_subcommand:
|
|
1094
|
+
return False
|
|
1095
|
+
if leading_program_basename == "git" and _git_segment_runs_a_mutating_mode(
|
|
1096
|
+
all_positional_tokens, all_segment_tokens
|
|
1097
|
+
):
|
|
1098
|
+
return False
|
|
1099
|
+
if leading_program_basename == "gh" and _gh_segment_runs_an_http_write_method(
|
|
1100
|
+
all_segment_tokens
|
|
1101
|
+
):
|
|
1102
|
+
return False
|
|
1103
|
+
return True
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
def _benign_program_writes_a_file_via_output_flag(
|
|
1107
|
+
leading_program_basename: str, all_segment_tokens: list[str]
|
|
1108
|
+
) -> bool:
|
|
1109
|
+
"""Return True when a benign program writes a file through its own output flag.
|
|
1110
|
+
|
|
1111
|
+
Some allowlisted reporting commands overwrite an arbitrary file without a shell
|
|
1112
|
+
redirection: ``sort -o FILE`` truncates and rewrites ``FILE`` the same way
|
|
1113
|
+
``cat ... > FILE`` does, so ``sort -o /etc/important.conf /etc/passwd`` destroys a
|
|
1114
|
+
non-ephemeral file even though ``sort`` is read-only by default. A segment whose
|
|
1115
|
+
leading program is in ALL_FILE_WRITING_OUTPUT_FLAGS_BY_BENIGN_PROGRAM and carries
|
|
1116
|
+
one of that program's file-writing output flags is reported as a write so it
|
|
1117
|
+
declines the ephemeral ``rm`` auto-allow.
|
|
1118
|
+
|
|
1119
|
+
Args:
|
|
1120
|
+
leading_program_basename: The segment's leading program basename, lowercased.
|
|
1121
|
+
all_segment_tokens: Shlex tokens of one shell segment.
|
|
1122
|
+
|
|
1123
|
+
Returns:
|
|
1124
|
+
True when the segment writes a file through the program's output flag.
|
|
1125
|
+
"""
|
|
1126
|
+
all_file_writing_output_flags = ALL_FILE_WRITING_OUTPUT_FLAGS_BY_BENIGN_PROGRAM.get(
|
|
1127
|
+
leading_program_basename
|
|
1128
|
+
)
|
|
1129
|
+
if all_file_writing_output_flags is None:
|
|
1130
|
+
return False
|
|
1131
|
+
return any(
|
|
1132
|
+
each_token in all_file_writing_output_flags
|
|
1133
|
+
or each_token.split("=", 1)[0] in all_file_writing_output_flags
|
|
1134
|
+
for each_token in all_segment_tokens
|
|
1135
|
+
)
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
def _segment_leading_program_is_benign(all_segment_tokens: list[str]) -> bool:
|
|
1139
|
+
"""Return True when a non-rm segment's leading program is a benign reporting command.
|
|
1140
|
+
|
|
1141
|
+
A compound chain auto-allow requires every segment that is not an ``rm`` deletion
|
|
1142
|
+
to be a recognized read-only or reporting command (``echo``, ``gh``, ``head``,
|
|
1143
|
+
``cat``, ``grep`` and the rest of ALL_BENIGN_COMPOUND_SEGMENT_COMMANDS). A segment
|
|
1144
|
+
leading with any other program — ``shred``, ``truncate``, ``find ... -delete``,
|
|
1145
|
+
``chmod -R``, ``mv`` and every other destructive command absent from the
|
|
1146
|
+
DESTRUCTIVE_BASH_PATTERNS table — is treated as non-benign so the chain falls
|
|
1147
|
+
through to the prompt rather than riding the ephemeral ``rm`` auto-allow.
|
|
1148
|
+
|
|
1149
|
+
Three further constraints fail the segment closed even when its leading program is
|
|
1150
|
+
allowlisted: an output redirection (``cat /dev/null > /etc/important.conf``)
|
|
1151
|
+
truncates the redirect target; a benign program that writes a file through its own
|
|
1152
|
+
output flag (``sort -o /etc/important.conf``) overwrites the named file without a
|
|
1153
|
+
shell redirection; and a ``git`` or ``gh`` segment must run a read-only subcommand
|
|
1154
|
+
in a read-only mode (``git status``, ``gh pr view``, ``git config --list``) rather
|
|
1155
|
+
than a destructive subcommand (``gh repo delete``, ``git checkout -- .``,
|
|
1156
|
+
``git stash drop``) or a mutating mode of an otherwise-read-only subcommand
|
|
1157
|
+
(``git config --global key value``, ``git remote add evil url``, ``gh api -X
|
|
1158
|
+
DELETE``).
|
|
1159
|
+
|
|
1160
|
+
Args:
|
|
1161
|
+
all_segment_tokens: Shlex tokens of one shell segment, possibly led by
|
|
1162
|
+
``VAR=value`` assignments before the program token.
|
|
1163
|
+
|
|
1164
|
+
Returns:
|
|
1165
|
+
True when the segment's leading program is in the benign allowlist.
|
|
1166
|
+
"""
|
|
1167
|
+
leading_command_token = _leading_command_token(all_segment_tokens)
|
|
1168
|
+
if leading_command_token is None:
|
|
1169
|
+
return False
|
|
1170
|
+
leading_program_basename = Path(leading_command_token).name.lower()
|
|
1171
|
+
if leading_program_basename not in ALL_BENIGN_COMPOUND_SEGMENT_COMMANDS:
|
|
1172
|
+
return False
|
|
1173
|
+
if _segment_redirects_output_to_a_file(all_segment_tokens):
|
|
1174
|
+
return False
|
|
1175
|
+
if _benign_program_writes_a_file_via_output_flag(leading_program_basename, all_segment_tokens):
|
|
1176
|
+
return False
|
|
1177
|
+
all_read_only_subcommands = ALL_READ_ONLY_SUBCOMMANDS_BY_DISPATCHING_PROGRAM.get(
|
|
1178
|
+
leading_program_basename
|
|
1179
|
+
)
|
|
1180
|
+
if all_read_only_subcommands is not None:
|
|
1181
|
+
return _subcommand_dispatching_segment_is_read_only(
|
|
1182
|
+
leading_program_basename, all_read_only_subcommands, all_segment_tokens
|
|
1183
|
+
)
|
|
1184
|
+
return True
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
class CompoundSegmentVerdict(enum.Enum):
|
|
1188
|
+
"""Auto-allow classification of one segment in a compound ``rm`` chain."""
|
|
1189
|
+
|
|
1190
|
+
DECLINES_AUTO_ALLOW = enum.auto()
|
|
1191
|
+
IS_EPHEMERAL_RM = enum.auto()
|
|
1192
|
+
IS_BENIGN = enum.auto()
|
|
1193
|
+
|
|
1194
|
+
|
|
1195
|
+
def _compound_segment_auto_allow_verdict(
|
|
1196
|
+
all_segment_tokens: list[str],
|
|
1197
|
+
) -> CompoundSegmentVerdict:
|
|
1198
|
+
"""Classify one compound-chain segment for the ephemeral ``rm`` auto-allow.
|
|
1199
|
+
|
|
1200
|
+
Returns DECLINES_AUTO_ALLOW when the segment's leading program executes a quoted
|
|
1201
|
+
string argument as code, when an ``rm`` segment targets a non-ephemeral path, or
|
|
1202
|
+
when a non-``rm`` segment is not a benign reporting command. Returns
|
|
1203
|
+
IS_EPHEMERAL_RM when the segment is an ``rm`` deletion whose every target is an
|
|
1204
|
+
absolute ephemeral path. Returns IS_BENIGN for an empty segment or a benign
|
|
1205
|
+
non-``rm`` segment.
|
|
1206
|
+
|
|
1207
|
+
Args:
|
|
1208
|
+
all_segment_tokens: Shlex tokens of one shell segment with control operators
|
|
1209
|
+
removed.
|
|
1210
|
+
|
|
1211
|
+
Returns:
|
|
1212
|
+
The CompoundSegmentVerdict for the segment.
|
|
1213
|
+
"""
|
|
1214
|
+
if not all_segment_tokens:
|
|
1215
|
+
return CompoundSegmentVerdict.IS_BENIGN
|
|
1216
|
+
if _command_executes_a_string_argument(all_segment_tokens):
|
|
1217
|
+
return CompoundSegmentVerdict.DECLINES_AUTO_ALLOW
|
|
1218
|
+
each_rm_token_index = next(
|
|
1219
|
+
(
|
|
1220
|
+
index
|
|
1221
|
+
for index, token in enumerate(all_segment_tokens)
|
|
1222
|
+
if Path(token).name == "rm"
|
|
1223
|
+
),
|
|
1224
|
+
None,
|
|
1225
|
+
)
|
|
1226
|
+
if each_rm_token_index is None:
|
|
1227
|
+
if _segment_leading_program_is_benign(all_segment_tokens):
|
|
1228
|
+
return CompoundSegmentVerdict.IS_BENIGN
|
|
1229
|
+
return CompoundSegmentVerdict.DECLINES_AUTO_ALLOW
|
|
1230
|
+
if _rm_segment_targets_only_absolute_ephemeral_paths(
|
|
1231
|
+
all_segment_tokens[each_rm_token_index:]
|
|
1232
|
+
):
|
|
1233
|
+
return CompoundSegmentVerdict.IS_EPHEMERAL_RM
|
|
1234
|
+
return CompoundSegmentVerdict.DECLINES_AUTO_ALLOW
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
def rm_compound_targets_only_absolute_ephemeral_paths(command: str) -> bool:
|
|
1238
|
+
"""Return True when a compound command's every ``rm`` segment is safe to auto-allow.
|
|
1239
|
+
|
|
1240
|
+
Handles destructive cleanup chains that declare no ephemeral working directory,
|
|
1241
|
+
such as ``rm -rf /tmp/pr136 /tmp/difftest && echo 'cleaned'``. Splits the
|
|
1242
|
+
command into shell segments and requires all of: at least one segment runs
|
|
1243
|
+
``rm``; every ``rm`` segment targets only absolute ephemeral paths; every
|
|
1244
|
+
non-``rm`` segment leads with a benign reporting command from
|
|
1245
|
+
ALL_BENIGN_COMPOUND_SEGMENT_COMMANDS, so a ``shred``, ``truncate``,
|
|
1246
|
+
``find ... -delete``, ``chmod -R`` or ``mv`` segment that destroys
|
|
1247
|
+
non-ephemeral data declines the auto-allow; no segment's leading program
|
|
1248
|
+
executes a quoted string argument as code — a shell interpreter, ``eval``,
|
|
1249
|
+
``exec``, ``source``, a privilege or argument wrapper (``sudo``, ``su``,
|
|
1250
|
+
``env``, ``xargs``), or a command-launcher wrapper that forwards argv to such a
|
|
1251
|
+
program (``timeout bash -c '...'``); no segment matches a destructive pattern
|
|
1252
|
+
that is not rm-family (force push, git clean, git reset --hard, mkfs, dd,
|
|
1253
|
+
DROP/TRUNCATE, signing bypass); and the command contains no shell expansion.
|
|
1254
|
+
|
|
1255
|
+
Fails closed (returns False) on shell expansion (``$`` or backtick), on a
|
|
1256
|
+
tokenization error, and whenever any ``rm`` segment fails the absolute-ephemeral
|
|
1257
|
+
target check, so the compound auto-allow grants only on chains it can fully
|
|
1258
|
+
bound.
|
|
1259
|
+
|
|
1260
|
+
Args:
|
|
1261
|
+
command: The raw Bash command string from the tool input.
|
|
1262
|
+
|
|
1263
|
+
Returns:
|
|
1264
|
+
True when every ``rm`` segment targets only absolute ephemeral paths and no
|
|
1265
|
+
other hazard is present.
|
|
1266
|
+
"""
|
|
1267
|
+
if _command_contains_shell_expansion(command):
|
|
1268
|
+
return False
|
|
1269
|
+
if _command_contains_non_rm_family_destructive_pattern(command):
|
|
1270
|
+
return False
|
|
1271
|
+
has_seen_rm_segment = False
|
|
1272
|
+
for each_command_line in re.split(r"[\n\r]+", command):
|
|
1273
|
+
try:
|
|
1274
|
+
all_command_tokens = _split_command_preserving_windows_backslashes(each_command_line)
|
|
1275
|
+
except ValueError:
|
|
1276
|
+
return False
|
|
1277
|
+
all_operator_split_tokens = _explode_glued_shell_control_operators(all_command_tokens)
|
|
1278
|
+
for each_segment in _split_tokens_into_shell_segments(all_operator_split_tokens):
|
|
1279
|
+
each_verdict = _compound_segment_auto_allow_verdict(each_segment)
|
|
1280
|
+
if each_verdict == CompoundSegmentVerdict.DECLINES_AUTO_ALLOW:
|
|
1281
|
+
return False
|
|
1282
|
+
if each_verdict == CompoundSegmentVerdict.IS_EPHEMERAL_RM:
|
|
1283
|
+
has_seen_rm_segment = True
|
|
1284
|
+
return has_seen_rm_segment
|
|
1285
|
+
|
|
1286
|
+
|
|
282
1287
|
def targets_only_claude_directory(command: str) -> bool:
|
|
283
1288
|
"""Check if rm command targets only paths under ~/.claude/."""
|
|
284
1289
|
all_rm_target_paths = re.findall(
|
|
@@ -564,6 +1569,13 @@ def main() -> None:
|
|
|
564
1569
|
|
|
565
1570
|
matched_description = find_destructive_pattern(command)
|
|
566
1571
|
|
|
1572
|
+
if (
|
|
1573
|
+
matched_description is not None
|
|
1574
|
+
and _destructive_match_is_rm_family(matched_description)
|
|
1575
|
+
and command_has_no_real_rm_invocation(command)
|
|
1576
|
+
):
|
|
1577
|
+
matched_description = _find_non_rm_destructive_pattern(command)
|
|
1578
|
+
|
|
567
1579
|
if matched_description is not None and targets_only_claude_directory(command):
|
|
568
1580
|
sys.exit(0)
|
|
569
1581
|
|
|
@@ -579,6 +1591,13 @@ def main() -> None:
|
|
|
579
1591
|
if matched_description is not None and _ephemeral_recursive_rm_auto_allow_granted(command, matched_description):
|
|
580
1592
|
sys.exit(0)
|
|
581
1593
|
|
|
1594
|
+
if (
|
|
1595
|
+
matched_description is not None
|
|
1596
|
+
and _destructive_match_is_rm_family(matched_description)
|
|
1597
|
+
and rm_compound_targets_only_absolute_ephemeral_paths(command)
|
|
1598
|
+
):
|
|
1599
|
+
sys.exit(0)
|
|
1600
|
+
|
|
582
1601
|
if matched_description is not None and "git reset --hard" in matched_description:
|
|
583
1602
|
if _git_reset_hard_allowed_for_command(command, os.getcwd()):
|
|
584
1603
|
sys.exit(0)
|
|
@@ -589,14 +1608,10 @@ def main() -> None:
|
|
|
589
1608
|
and ("force" in matched_description or "-f" in matched_description)
|
|
590
1609
|
and _force_push_targets_convergence_branch(command)
|
|
591
1610
|
):
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
continue
|
|
595
|
-
if each_pattern.search(command):
|
|
596
|
-
matched_description = each_description
|
|
597
|
-
break
|
|
598
|
-
else:
|
|
1611
|
+
co_resident_hazard_description = _find_non_force_push_destructive_hazard(command)
|
|
1612
|
+
if co_resident_hazard_description is None:
|
|
599
1613
|
sys.exit(0)
|
|
1614
|
+
matched_description = co_resident_hazard_description
|
|
600
1615
|
|
|
601
1616
|
if matched_description is not None:
|
|
602
1617
|
ask_response = {
|