claude-dev-env 1.40.0 → 1.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +1 -1
- package/_shared/pr-loop/scripts/grant_project_claude_permissions.py +53 -3
- package/_shared/pr-loop/scripts/post_audit_thread.py +2 -2
- package/_shared/pr-loop/scripts/revoke_project_claude_permissions.py +68 -3
- package/_shared/pr-loop/scripts/tests/test_grant_project_claude_permissions.py +1 -1
- package/_shared/pr-loop/scripts/tests/test_revoke_project_claude_permissions.py +1 -1
- package/hooks/_gh_pr_author_swap_utils.py +1211 -0
- package/hooks/blocking/gh_body_arg_blocker.py +9 -6
- package/hooks/blocking/gh_pr_author_enforcer.py +480 -0
- package/hooks/blocking/gh_pr_author_restore.py +100 -0
- package/hooks/blocking/pr_converge_bugteam_enforcer.py +170 -0
- package/hooks/blocking/pr_description_enforcer.py +1 -3
- package/hooks/blocking/test_gh_body_arg_blocker.py +25 -3
- package/hooks/blocking/test_gh_pr_author_enforcer.py +1166 -0
- package/hooks/blocking/test_gh_pr_author_restore.py +512 -0
- package/hooks/blocking/test_gh_pr_author_swap_utils.py +910 -0
- package/hooks/blocking/test_pr_converge_bugteam_enforcer.py +311 -0
- package/hooks/config/gh_pr_author_swap_constants.py +76 -0
- package/hooks/config/pr_converge_bugteam_enforcer_constants.py +55 -0
- package/hooks/config/pr_converge_bugteam_enforcer_state.py +67 -0
- package/hooks/config/pr_description_enforcer_constants.py +5 -0
- package/hooks/config/test_pr_description_enforcer_constants.py +82 -0
- package/hooks/hooks.json +40 -0
- package/hooks/lifecycle/pr_converge_bugteam_skill_tracker.py +204 -0
- package/hooks/lifecycle/test_pr_converge_bugteam_skill_tracker.py +283 -0
- package/hooks/session/gh_pr_author_session_cleanup.py +171 -0
- package/hooks/session/test_gh_pr_author_session_cleanup.py +575 -0
- package/hooks/test__gh_pr_author_swap_utils.py +333 -0
- package/package.json +1 -1
- package/skills/_shared/pr-loop/scripts/write_audit_outcomes.py +2 -2
- package/skills/_shared/pr-loop/scripts/write_fix_outcomes.py +2 -2
- package/skills/bugteam/reference/audit-contract.md +22 -0
- package/skills/bugteam/reference/github-pr-reviews.md +1 -1
- package/skills/bugteam/scripts/bugteam_fix_hookspath.py +8 -2
- package/skills/bugteam/scripts/test__claude_permissions_common.py +48 -0
- package/skills/bugteam/scripts/test_claude_permissions_common.py +18 -10
- package/skills/pr-converge/SKILL.md +8 -2
- package/skills/pr-converge/config/constants.py +2 -1
- package/skills/pr-converge/reference/state-schema.md +36 -8
|
@@ -0,0 +1,1211 @@
|
|
|
1
|
+
"""Shared utilities for the gh-pr-author swap hook trio.
|
|
2
|
+
|
|
3
|
+
The PreToolUse enforcer (``hooks/blocking/gh_pr_author_enforcer.py``), the
|
|
4
|
+
PostToolUse restore (``hooks/blocking/gh_pr_author_restore.py``), and the
|
|
5
|
+
SessionStart cleanup (``hooks/session/gh_pr_author_session_cleanup.py``)
|
|
6
|
+
all share a small set of helpers: write a line to a stream, build the
|
|
7
|
+
per-session state-file path, run ``gh auth switch``, read the
|
|
8
|
+
original-account login from a state file, delete a state file, and
|
|
9
|
+
detect a ``gh pr create`` invocation while ignoring quoted regions.
|
|
10
|
+
|
|
11
|
+
Centralising these helpers keeps the three hooks' contracts in
|
|
12
|
+
lock-step — a fix in the shared ``_command_invokes_gh_pr_create_in_stripped``
|
|
13
|
+
detector lands in the enforcer and the restore hook from a single edit,
|
|
14
|
+
and the state-file path and gh subprocess shape stay uniform across the
|
|
15
|
+
trio so a file written by the enforcer is always resolvable by the
|
|
16
|
+
restore and cleanup hooks.
|
|
17
|
+
|
|
18
|
+
Layout: a leading underscore marks the module as internal to the swap
|
|
19
|
+
feature, and the file lives directly under ``hooks/`` so both
|
|
20
|
+
``hooks/blocking/`` and ``hooks/session/`` consumers can import it
|
|
21
|
+
without a per-directory path shim.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import stat
|
|
29
|
+
import subprocess
|
|
30
|
+
import sys
|
|
31
|
+
import tempfile
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import TextIO
|
|
34
|
+
|
|
35
|
+
from config.gh_pr_author_swap_constants import (
|
|
36
|
+
ALL_GH_AUTH_SWITCH_COMMAND_HEAD,
|
|
37
|
+
ALL_SHELL_QUOTE_CHARACTERS,
|
|
38
|
+
BASH_COMMENT_INTRODUCER_CHARACTER,
|
|
39
|
+
COMMAND_SEPARATOR_PATTERN,
|
|
40
|
+
COMMAND_SUBSTITUTION_OPENER_LENGTH,
|
|
41
|
+
GH_AUTH_SWITCH_TIMEOUT_SECONDS,
|
|
42
|
+
GH_PR_CREATE_PATTERN,
|
|
43
|
+
HEREDOC_OPENER_TAG_PATTERN,
|
|
44
|
+
HEREDOC_OPENER_TOKEN_LENGTH,
|
|
45
|
+
SESSION_ID_UNSAFE_CHARACTERS_PATTERN,
|
|
46
|
+
SHELL_BACKSLASH_CHARACTER,
|
|
47
|
+
SHELL_BACKSLASH_ESCAPE_PAIR_LENGTH,
|
|
48
|
+
SHELL_BACKTICK_CHARACTER,
|
|
49
|
+
SHELL_DOLLAR_CHARACTER,
|
|
50
|
+
SHELL_LESS_THAN_CHARACTER,
|
|
51
|
+
SHELL_NEWLINE_CHARACTER,
|
|
52
|
+
SHELL_PAREN_CLOSE_CHARACTER,
|
|
53
|
+
SHELL_PAREN_OPEN_CHARACTER,
|
|
54
|
+
SHELL_QUOTE_REPLACEMENT_CHARACTER,
|
|
55
|
+
STATE_FILE_DEFAULT_SESSION_ID,
|
|
56
|
+
STATE_FILE_ORIGINAL_ACCOUNT_KEY,
|
|
57
|
+
STATE_FILE_PERMISSION_MODE,
|
|
58
|
+
STATE_FILE_PREFIX,
|
|
59
|
+
STATE_FILE_SUFFIX,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _write_line(message: str, into_stream: TextIO) -> None:
|
|
64
|
+
"""Write a single line to the caller-provided text stream.
|
|
65
|
+
|
|
66
|
+
Wrapping ``stream.write`` in a function that accepts an explicit
|
|
67
|
+
``into_stream`` parameter satisfies the project's logging rule
|
|
68
|
+
(route through logger or accept an explicit stream parameter) without
|
|
69
|
+
pulling the logging module into a self-contained hook script.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
message: Single line of output. A trailing newline is appended.
|
|
73
|
+
into_stream: Destination stream (typically ``sys.stdout`` for
|
|
74
|
+
the JSON deny payload or ``sys.stderr`` for diagnostics).
|
|
75
|
+
Each caller formats its own prefix into ``message``.
|
|
76
|
+
"""
|
|
77
|
+
into_stream.write(message + "\n")
|
|
78
|
+
into_stream.flush()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _sanitize_session_id(session_id: str) -> str:
|
|
82
|
+
"""Strip every character outside ``[A-Za-z0-9_-]`` from a session id.
|
|
83
|
+
|
|
84
|
+
The raw session id comes from the Claude Code hook input JSON, which
|
|
85
|
+
is attacker-influenceable. Path-traversal characters (``/``, ``\\``,
|
|
86
|
+
``..``), NUL bytes, and any other shell-metacharacter must be removed
|
|
87
|
+
before the value participates in a filename so the produced path
|
|
88
|
+
stays anchored inside ``tempfile.gettempdir()``.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
session_id: Raw session id value.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The input with every unsafe character removed. An empty result
|
|
95
|
+
signals the caller to fall back to the default session id.
|
|
96
|
+
"""
|
|
97
|
+
return SESSION_ID_UNSAFE_CHARACTERS_PATTERN.sub("", session_id)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _state_file_path(session_id: str) -> Path:
|
|
101
|
+
"""Return the per-session state-file path used by the hook trio.
|
|
102
|
+
|
|
103
|
+
The enforcer writes the file, the restore hook reads and deletes it,
|
|
104
|
+
and the session-cleanup hook globs the prefix to recover stranded
|
|
105
|
+
files. All three share this naming convention so a state file
|
|
106
|
+
written by one hook is always resolvable by the others.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
session_id: ``session_id`` from the Claude Code hook input JSON.
|
|
110
|
+
Empty string falls back to ``STATE_FILE_DEFAULT_SESSION_ID``.
|
|
111
|
+
Unsafe characters (path-traversal, NUL, shell metacharacters)
|
|
112
|
+
are stripped before the value participates in the filename so
|
|
113
|
+
the returned path stays anchored inside the temp directory.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Absolute path to the state file in the system temp directory.
|
|
117
|
+
"""
|
|
118
|
+
sanitized_session_id = _sanitize_session_id(session_id)
|
|
119
|
+
effective_session_id = sanitized_session_id or STATE_FILE_DEFAULT_SESSION_ID
|
|
120
|
+
filename = f"{STATE_FILE_PREFIX}{effective_session_id}{STATE_FILE_SUFFIX}"
|
|
121
|
+
return Path(tempfile.gettempdir()) / filename
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _switch_gh_account(to_account: str) -> bool:
|
|
125
|
+
"""Run ``gh auth switch --user <to_account>`` and report success.
|
|
126
|
+
|
|
127
|
+
Diagnostics on failure are intentionally not written here. Callers
|
|
128
|
+
decide whether a failed switch is worth a stderr line (the restore
|
|
129
|
+
and cleanup hooks log; the enforcer suppresses to keep the deny-path
|
|
130
|
+
payload the only output on the failure branch).
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
to_account: Login to switch the active gh CLI account to.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
True when the switch command exits zero. False when gh is missing,
|
|
137
|
+
the switch command exits non-zero, times out, lacks executable
|
|
138
|
+
permission on the gh binary, or otherwise fails. ``OSError``
|
|
139
|
+
covers every spawn-time failure (``FileNotFoundError`` when gh is
|
|
140
|
+
absent, ``PermissionError`` when gh exists but is not executable,
|
|
141
|
+
and any other platform-specific spawn errors) so the hook follows
|
|
142
|
+
its documented non-blocking failure path rather than crashing.
|
|
143
|
+
"""
|
|
144
|
+
switch_command = list(ALL_GH_AUTH_SWITCH_COMMAND_HEAD) + [to_account]
|
|
145
|
+
try:
|
|
146
|
+
completed_process = subprocess.run(
|
|
147
|
+
switch_command,
|
|
148
|
+
capture_output=True,
|
|
149
|
+
text=True,
|
|
150
|
+
timeout=GH_AUTH_SWITCH_TIMEOUT_SECONDS,
|
|
151
|
+
check=False,
|
|
152
|
+
)
|
|
153
|
+
except (OSError, subprocess.SubprocessError):
|
|
154
|
+
return False
|
|
155
|
+
return completed_process.returncode == 0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _read_original_account(state_file: Path) -> str | None:
|
|
159
|
+
"""Read the original-account login from a swap-state file.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
state_file: Path produced by ``_state_file_path``.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
The original account login when the file exists and parses to a
|
|
166
|
+
JSON object with a non-empty string ``original_account`` value.
|
|
167
|
+
None when the file is absent, unreadable, malformed JSON, the
|
|
168
|
+
wrong shape, missing the key, holds a non-string value, or holds
|
|
169
|
+
a blank value. Diagnostics for unreadable or malformed files are
|
|
170
|
+
written to stderr so the caller can see why a state file was not
|
|
171
|
+
consumed.
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
raw_contents = state_file.read_text(encoding="utf-8")
|
|
175
|
+
except FileNotFoundError:
|
|
176
|
+
return None
|
|
177
|
+
except OSError as os_error:
|
|
178
|
+
_write_line(
|
|
179
|
+
f"[gh-pr-author-utils] failed to read state file {state_file}: {os_error}",
|
|
180
|
+
sys.stderr,
|
|
181
|
+
)
|
|
182
|
+
return None
|
|
183
|
+
try:
|
|
184
|
+
parsed_state = json.loads(raw_contents)
|
|
185
|
+
except json.JSONDecodeError as decode_error:
|
|
186
|
+
_write_line(
|
|
187
|
+
f"[gh-pr-author-utils] malformed state file {state_file}: {decode_error}",
|
|
188
|
+
sys.stderr,
|
|
189
|
+
)
|
|
190
|
+
return None
|
|
191
|
+
if not isinstance(parsed_state, dict):
|
|
192
|
+
return None
|
|
193
|
+
original_account = parsed_state.get(STATE_FILE_ORIGINAL_ACCOUNT_KEY, "")
|
|
194
|
+
if not isinstance(original_account, str):
|
|
195
|
+
return None
|
|
196
|
+
stripped_original_account = original_account.strip()
|
|
197
|
+
return stripped_original_account or None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _delete_state_file(state_file: Path) -> None:
|
|
201
|
+
"""Remove a state file, ignoring an already-absent file.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
state_file: Path produced by ``_state_file_path``.
|
|
205
|
+
"""
|
|
206
|
+
try:
|
|
207
|
+
state_file.unlink()
|
|
208
|
+
except FileNotFoundError:
|
|
209
|
+
return
|
|
210
|
+
except OSError as os_error:
|
|
211
|
+
_write_line(
|
|
212
|
+
f"[gh-pr-author-utils] failed to delete state file {state_file}: {os_error}",
|
|
213
|
+
sys.stderr,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _state_file_is_attacker_planted(state_file: Path) -> bool:
|
|
218
|
+
"""Return True when the state file's mode or owner does not match an enforcer-written file.
|
|
219
|
+
|
|
220
|
+
The enforcer atomically creates each swap-state file with mode
|
|
221
|
+
``STATE_FILE_PERMISSION_MODE`` (``0o600``) owned by the current
|
|
222
|
+
user. A file at the predictable swap-state path that diverges on
|
|
223
|
+
either axis is overwhelmingly likely to be an attacker plant —
|
|
224
|
+
another user on the same workstation pre-creating a file to trick
|
|
225
|
+
the restore or cleanup hook into running
|
|
226
|
+
``gh auth switch --user <attacker-controlled-login>``.
|
|
227
|
+
|
|
228
|
+
The candidate is inspected via ``lstat`` rather than ``stat`` so a
|
|
229
|
+
symlink at the predictable path is screened on its own metadata,
|
|
230
|
+
not on whatever the symlink resolves to. The enforcer creates state
|
|
231
|
+
files with ``O_NOFOLLOW`` to prevent symlink hijacking; this helper
|
|
232
|
+
mirrors that contract.
|
|
233
|
+
|
|
234
|
+
The mode-bit and uid checks only apply on POSIX. Windows reports
|
|
235
|
+
``0o666`` from ``stat`` for files chmod'd to ``0o600`` because
|
|
236
|
+
``os.chmod`` on Windows only toggles the read-only attribute, and
|
|
237
|
+
``os.getuid`` is absent there. ``tempfile.gettempdir()`` on Windows
|
|
238
|
+
is already per-user (``%LOCALAPPDATA%\\Temp``), which closes the
|
|
239
|
+
cross-user attack surface this check guards against on POSIX, so
|
|
240
|
+
the check is a no-op on Windows.
|
|
241
|
+
|
|
242
|
+
A missing file returns False so callers can treat the missing-file
|
|
243
|
+
case the same way they treat a normal absent-state-file path.
|
|
244
|
+
|
|
245
|
+
For callers that already hold an ``lstat`` result for the candidate
|
|
246
|
+
path, prefer ``_lstat_indicates_attacker_planted`` to avoid a
|
|
247
|
+
redundant syscall and the TOCTOU window between two ``lstat`` calls.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
state_file: Path produced by ``_state_file_path``.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
True when the file exists on POSIX with wrong mode bits or
|
|
254
|
+
wrong uid, or the path is not a regular file (symlink, FIFO,
|
|
255
|
+
device, etc.), or ``os.lstat`` raises ``OSError``. False when
|
|
256
|
+
the file matches the enforcer's write contract, is absent, or
|
|
257
|
+
the platform lacks POSIX ownership semantics.
|
|
258
|
+
"""
|
|
259
|
+
if not hasattr(os, "getuid"):
|
|
260
|
+
return False
|
|
261
|
+
try:
|
|
262
|
+
file_lstat_result = state_file.lstat()
|
|
263
|
+
except FileNotFoundError:
|
|
264
|
+
return False
|
|
265
|
+
except OSError:
|
|
266
|
+
return True
|
|
267
|
+
return _lstat_indicates_attacker_planted(file_lstat_result)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _lstat_indicates_attacker_planted(file_lstat_result: os.stat_result) -> bool:
|
|
271
|
+
"""Return True when an ``lstat`` result does not match an enforcer-written state file.
|
|
272
|
+
|
|
273
|
+
Callers that already hold a fresh ``lstat`` result for a candidate
|
|
274
|
+
state-file path use this helper directly instead of
|
|
275
|
+
``_state_file_is_attacker_planted``, which would re-stat the path.
|
|
276
|
+
Skipping the second stat avoids a redundant syscall and the TOCTOU
|
|
277
|
+
window where an attacker could swap the inode between the two stat
|
|
278
|
+
calls.
|
|
279
|
+
|
|
280
|
+
The mode-bit and uid checks only apply on POSIX. Windows ``stat``
|
|
281
|
+
semantics differ (see ``_state_file_is_attacker_planted`` for the
|
|
282
|
+
full rationale) so the helper is a no-op on platforms without
|
|
283
|
+
``os.getuid``.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
file_lstat_result: Result of ``os.lstat`` (or ``Path.lstat``)
|
|
287
|
+
on the candidate state-file path. The caller is responsible
|
|
288
|
+
for using ``lstat`` rather than ``stat`` so symlinks are
|
|
289
|
+
screened on their own metadata.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
True on POSIX when the file is not a regular file, the mode
|
|
293
|
+
bits do not equal ``STATE_FILE_PERMISSION_MODE``, or the uid
|
|
294
|
+
does not match the current user. False on POSIX when the
|
|
295
|
+
candidate matches the enforcer's write contract, and on Windows
|
|
296
|
+
unconditionally.
|
|
297
|
+
"""
|
|
298
|
+
if not hasattr(os, "getuid"):
|
|
299
|
+
return False
|
|
300
|
+
if not stat.S_ISREG(file_lstat_result.st_mode):
|
|
301
|
+
return True
|
|
302
|
+
actual_permission_bits = stat.S_IMODE(file_lstat_result.st_mode)
|
|
303
|
+
if actual_permission_bits != STATE_FILE_PERMISSION_MODE:
|
|
304
|
+
return True
|
|
305
|
+
current_user_id = os.getuid()
|
|
306
|
+
if file_lstat_result.st_uid != current_user_id:
|
|
307
|
+
return True
|
|
308
|
+
return False
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _index_after_command_substitution(all_scanned_characters: list[str], opener_index: int) -> int:
|
|
312
|
+
"""Return the index one past the closing ``)`` of a ``$(...)`` substitution.
|
|
313
|
+
|
|
314
|
+
Walks from the opening ``$(`` past nested ``$(...)`` substitutions
|
|
315
|
+
and through inner quoted regions and backtick substitutions so the
|
|
316
|
+
closing paren matched is the one that actually balances the opener.
|
|
317
|
+
Bash executes the substitution body as its own command, so the
|
|
318
|
+
walker treats the body the same way the outer ``_strip_quoted_regions``
|
|
319
|
+
scan treats top-level text: single- and double-quoted regions inside
|
|
320
|
+
the body are BLANKED (replaced with spaces) so a literal token
|
|
321
|
+
sitting inside a quoted argument cannot leak out as if it were a
|
|
322
|
+
command. For example, ``$(printf 'gh pr create')`` runs ``printf``
|
|
323
|
+
against the literal data ``gh pr create`` — the data must not be
|
|
324
|
+
confused with a real ``gh pr create`` invocation.
|
|
325
|
+
|
|
326
|
+
Quote handling mirrors ``_strip_quoted_regions``:
|
|
327
|
+
|
|
328
|
+
* A single-quoted region (``'...'``) has no escape mechanism in bash —
|
|
329
|
+
the walker advances to the next ``'`` and blanks every character
|
|
330
|
+
between the openers.
|
|
331
|
+
* A double-quoted region (``"..."``) honors backslash escapes — a
|
|
332
|
+
``\\`` followed by any character is consumed as a two-character
|
|
333
|
+
unit, so ``\\"`` does not terminate the region. Backslash-escape
|
|
334
|
+
pairs are blanked too.
|
|
335
|
+
* A backtick substitution (``` `...` ```) inside the ``$(...)`` body
|
|
336
|
+
is itself a subshell — the walker advances past the next backtick
|
|
337
|
+
so that a ``)`` sitting inside the backtick body does not flip the
|
|
338
|
+
surrounding paren depth. Backtick bodies are kept scannable for
|
|
339
|
+
the same reason as ``$(...)`` bodies: bash executes them, so any
|
|
340
|
+
``gh pr create`` token sitting inside is a real invocation.
|
|
341
|
+
|
|
342
|
+
Bare ``(`` and ``)`` characters inside the substitution body
|
|
343
|
+
(bash subshells like ``(echo b)``, array assignments like
|
|
344
|
+
``arr=(a b c)``, function definitions like ``f() { ...; }``) also
|
|
345
|
+
track paren depth so they cancel out before a bare ``)`` can
|
|
346
|
+
prematurely close the outer ``$(...)`` substitution.
|
|
347
|
+
|
|
348
|
+
Unterminated quotes and backticks consume to the end of the buffer,
|
|
349
|
+
matching the behavior of ``_strip_quoted_regions``.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
all_scanned_characters: Mutable list view of the command string.
|
|
353
|
+
The walker MUTATES the buffer to blank quoted regions inside
|
|
354
|
+
the substitution body. The substitution opener (``$(``) and
|
|
355
|
+
closer (``)``) and any unquoted body characters remain
|
|
356
|
+
intact so the outer matcher can scan the body for real
|
|
357
|
+
commands.
|
|
358
|
+
opener_index: Index of the ``$`` that begins ``$(``.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
The index just past the matching ``)``. When no closing paren is
|
|
362
|
+
found the length of the buffer is returned, matching how an
|
|
363
|
+
interactive shell would consume the rest of the input on an
|
|
364
|
+
unterminated substitution.
|
|
365
|
+
"""
|
|
366
|
+
paren_depth = 1
|
|
367
|
+
interior_index = opener_index + COMMAND_SUBSTITUTION_OPENER_LENGTH
|
|
368
|
+
buffer_length = len(all_scanned_characters)
|
|
369
|
+
while interior_index < buffer_length and paren_depth > 0:
|
|
370
|
+
interior_character = all_scanned_characters[interior_index]
|
|
371
|
+
if (
|
|
372
|
+
interior_character == SHELL_DOLLAR_CHARACTER
|
|
373
|
+
and interior_index + 1 < buffer_length
|
|
374
|
+
and all_scanned_characters[interior_index + 1] == SHELL_PAREN_OPEN_CHARACTER
|
|
375
|
+
):
|
|
376
|
+
paren_depth += 1
|
|
377
|
+
interior_index += COMMAND_SUBSTITUTION_OPENER_LENGTH
|
|
378
|
+
continue
|
|
379
|
+
if interior_character == SHELL_BACKTICK_CHARACTER:
|
|
380
|
+
interior_index = _index_after_backtick_substitution(
|
|
381
|
+
all_scanned_characters, interior_index, buffer_length
|
|
382
|
+
)
|
|
383
|
+
continue
|
|
384
|
+
if interior_character in ALL_SHELL_QUOTE_CHARACTERS:
|
|
385
|
+
interior_index = _blank_quoted_region(
|
|
386
|
+
all_scanned_characters, interior_index, buffer_length, interior_character
|
|
387
|
+
)
|
|
388
|
+
continue
|
|
389
|
+
if interior_character == SHELL_PAREN_OPEN_CHARACTER:
|
|
390
|
+
paren_depth += 1
|
|
391
|
+
interior_index += 1
|
|
392
|
+
continue
|
|
393
|
+
if interior_character == SHELL_PAREN_CLOSE_CHARACTER:
|
|
394
|
+
paren_depth -= 1
|
|
395
|
+
interior_index += 1
|
|
396
|
+
continue
|
|
397
|
+
interior_index += 1
|
|
398
|
+
return interior_index
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _index_after_backtick_substitution(
|
|
402
|
+
all_scanned_characters: list[str],
|
|
403
|
+
opener_index: int,
|
|
404
|
+
buffer_length: int,
|
|
405
|
+
) -> int:
|
|
406
|
+
"""Return the index one past the closing backtick of a ``` `...` ``` region.
|
|
407
|
+
|
|
408
|
+
The backtick body is executed by bash, so the walker mirrors the
|
|
409
|
+
``$(...)`` helper: a single- or double-quoted region inside the
|
|
410
|
+
body is blanked via ``_blank_quoted_region`` so a literal token
|
|
411
|
+
sitting inside a quoted argument (for example
|
|
412
|
+
``` `printf ';gh pr create'` ```) cannot leak out as if it were a
|
|
413
|
+
real command.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
all_scanned_characters: Mutable list view of the command string.
|
|
417
|
+
The walker MUTATES the buffer to blank quoted regions inside
|
|
418
|
+
the substitution body.
|
|
419
|
+
opener_index: Index of the opening backtick.
|
|
420
|
+
buffer_length: Length of ``all_scanned_characters``, hoisted by
|
|
421
|
+
the caller to avoid a recomputation per call.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
The index just past the matching backtick, or ``buffer_length``
|
|
425
|
+
when the backtick region is unterminated.
|
|
426
|
+
"""
|
|
427
|
+
interior_index = opener_index + 1
|
|
428
|
+
while interior_index < buffer_length:
|
|
429
|
+
interior_character = all_scanned_characters[interior_index]
|
|
430
|
+
if interior_character == SHELL_BACKTICK_CHARACTER:
|
|
431
|
+
return interior_index + 1
|
|
432
|
+
if interior_character in ALL_SHELL_QUOTE_CHARACTERS:
|
|
433
|
+
interior_index = _blank_quoted_region(
|
|
434
|
+
all_scanned_characters, interior_index, buffer_length, interior_character
|
|
435
|
+
)
|
|
436
|
+
continue
|
|
437
|
+
interior_index += 1
|
|
438
|
+
return interior_index
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _blank_quoted_region(
|
|
442
|
+
all_scanned_characters: list[str],
|
|
443
|
+
opener_index: int,
|
|
444
|
+
buffer_length: int,
|
|
445
|
+
quote_character: str,
|
|
446
|
+
) -> int:
|
|
447
|
+
"""Blank the interior of a ``'...'`` or ``"..."`` region in place.
|
|
448
|
+
|
|
449
|
+
The opening quote, every character inside the region, and the closing
|
|
450
|
+
quote are all replaced with ``SHELL_QUOTE_REPLACEMENT_CHARACTER`` so
|
|
451
|
+
that downstream regex matching sees only whitespace where quoted text
|
|
452
|
+
used to live. Offsets are preserved end-to-end — the returned index
|
|
453
|
+
always lands one past the position the closing quote occupied
|
|
454
|
+
(whether or not a closing quote was found).
|
|
455
|
+
|
|
456
|
+
Single quotes have no escape mechanism in bash, so the walker advances
|
|
457
|
+
to the next matching ``'`` and blanks every character along the way.
|
|
458
|
+
Double quotes honor ``\\`` escapes, so a ``\\`` followed by any
|
|
459
|
+
character is blanked as a two-character unit (``\\"`` does not
|
|
460
|
+
terminate the region).
|
|
461
|
+
|
|
462
|
+
Within a double-quoted region, bash still expands ``$(...)`` and
|
|
463
|
+
``` `...` ``` substitutions. The walker recognizes both openers and
|
|
464
|
+
descends into their matching closer via the substitution helpers
|
|
465
|
+
instead of blanking, so a ``gh pr create`` token sitting inside the
|
|
466
|
+
substitution body remains scannable while the surrounding quoted
|
|
467
|
+
text is blanked. Single-quoted regions intentionally do NOT descend
|
|
468
|
+
into substitutions because ``$`` and ``` ` ``` are literal text
|
|
469
|
+
inside ``'...'``.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
all_scanned_characters: Mutable list view of the command string.
|
|
473
|
+
The walker MUTATES the buffer to blank the entire quoted
|
|
474
|
+
region (both quotes included).
|
|
475
|
+
opener_index: Index of the opening quote.
|
|
476
|
+
buffer_length: Length of ``all_scanned_characters``, hoisted by
|
|
477
|
+
the caller to avoid a recomputation per call.
|
|
478
|
+
quote_character: ``'`` or ``"`` — the quote whose match closes
|
|
479
|
+
the region.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
The index just past the matching closing quote, or ``buffer_length``
|
|
483
|
+
when the quoted region is unterminated.
|
|
484
|
+
"""
|
|
485
|
+
all_scanned_characters[opener_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
486
|
+
interior_index = opener_index + 1
|
|
487
|
+
while interior_index < buffer_length:
|
|
488
|
+
interior_character = all_scanned_characters[interior_index]
|
|
489
|
+
if (
|
|
490
|
+
quote_character == '"'
|
|
491
|
+
and interior_character == "\\"
|
|
492
|
+
and interior_index + 1 < buffer_length
|
|
493
|
+
):
|
|
494
|
+
all_scanned_characters[interior_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
495
|
+
all_scanned_characters[interior_index + 1] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
496
|
+
interior_index += SHELL_BACKSLASH_ESCAPE_PAIR_LENGTH
|
|
497
|
+
continue
|
|
498
|
+
if (
|
|
499
|
+
quote_character == '"'
|
|
500
|
+
and interior_character == SHELL_DOLLAR_CHARACTER
|
|
501
|
+
and interior_index + 1 < buffer_length
|
|
502
|
+
and all_scanned_characters[interior_index + 1] == SHELL_PAREN_OPEN_CHARACTER
|
|
503
|
+
):
|
|
504
|
+
interior_index = _index_after_command_substitution(
|
|
505
|
+
all_scanned_characters, interior_index
|
|
506
|
+
)
|
|
507
|
+
continue
|
|
508
|
+
if quote_character == '"' and interior_character == SHELL_BACKTICK_CHARACTER:
|
|
509
|
+
interior_index = _index_after_backtick_substitution(
|
|
510
|
+
all_scanned_characters, interior_index, buffer_length
|
|
511
|
+
)
|
|
512
|
+
continue
|
|
513
|
+
if interior_character == quote_character:
|
|
514
|
+
all_scanned_characters[interior_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
515
|
+
return interior_index + 1
|
|
516
|
+
all_scanned_characters[interior_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
517
|
+
interior_index += 1
|
|
518
|
+
return interior_index
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def _strip_quoted_regions(command: str) -> str:
|
|
522
|
+
"""Replace inert quoted regions with spaces, leaving substitutions scannable.
|
|
523
|
+
|
|
524
|
+
Single quotes (``'...'``) and double quotes (``"..."``) wrap inert
|
|
525
|
+
text in bash, so their interior is replaced with spaces. ``$(...)``
|
|
526
|
+
command substitution and backtick command substitution
|
|
527
|
+
(``` `...` ```) execute their bodies in a subshell, so the substitution
|
|
528
|
+
OPENER and CLOSER and the unquoted body characters are left intact —
|
|
529
|
+
any ``gh pr create`` token sitting unquoted inside either form must
|
|
530
|
+
remain visible to the matchers, otherwise the enforcer would
|
|
531
|
+
silently no-op on ``echo "$(gh pr create --title T)"``.
|
|
532
|
+
|
|
533
|
+
The substitution bodies themselves are recursively quote-stripped:
|
|
534
|
+
a single- or double-quoted argument INSIDE a substitution body is
|
|
535
|
+
blanked the same way as a top-level quoted region. That keeps shapes
|
|
536
|
+
like ``echo $(printf 'gh pr create')`` from leaking the literal
|
|
537
|
+
``gh pr create`` string out of ``printf``'s single-quoted argument
|
|
538
|
+
and tricking the matcher into thinking the substitution invokes
|
|
539
|
+
``gh pr create`` when it actually invokes ``printf`` against the
|
|
540
|
+
literal data.
|
|
541
|
+
|
|
542
|
+
Within a double-quoted region, ``$(...)`` substitution windows are
|
|
543
|
+
still expanded, so the walker recognizes the ``$(`` opener inside
|
|
544
|
+
the quoted scan and stops stripping until the matching ``)`` —
|
|
545
|
+
leaving the substitution body scannable while keeping the surrounding
|
|
546
|
+
quoted text inert. Backtick command substitution (``` `...` ```) is
|
|
547
|
+
likewise expanded by bash inside double quotes, so the same
|
|
548
|
+
skip-past-body behavior applies: the walker advances past the closing
|
|
549
|
+
backtick without stripping the interior, so any ``gh pr create`` token
|
|
550
|
+
sitting inside ``"`...`"`` remains visible to the matcher.
|
|
551
|
+
|
|
552
|
+
Backslash-escaped quotes inside a double-quoted segment (``\\"``) do
|
|
553
|
+
not terminate the region. An unterminated quote consumes the rest of
|
|
554
|
+
the string, matching how an interactive shell parses the same input.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
command: Raw bash command string from PreToolUse hook input.
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
A string of identical length to ``command`` with single- and
|
|
561
|
+
double-quoted region interiors replaced by spaces — including
|
|
562
|
+
quoted regions nested inside ``$(...)`` / ``` `...` ``` bodies —
|
|
563
|
+
and the unquoted body characters of the substitutions themselves
|
|
564
|
+
left intact.
|
|
565
|
+
"""
|
|
566
|
+
all_scanned_characters = list(command)
|
|
567
|
+
cursor_index = 0
|
|
568
|
+
command_length = len(command)
|
|
569
|
+
while cursor_index < command_length:
|
|
570
|
+
current_character = all_scanned_characters[cursor_index]
|
|
571
|
+
if (
|
|
572
|
+
current_character == SHELL_DOLLAR_CHARACTER
|
|
573
|
+
and cursor_index + 1 < command_length
|
|
574
|
+
and all_scanned_characters[cursor_index + 1] == SHELL_PAREN_OPEN_CHARACTER
|
|
575
|
+
):
|
|
576
|
+
cursor_index = _index_after_command_substitution(all_scanned_characters, cursor_index)
|
|
577
|
+
continue
|
|
578
|
+
if current_character == SHELL_BACKTICK_CHARACTER:
|
|
579
|
+
cursor_index = _index_after_backtick_substitution(
|
|
580
|
+
all_scanned_characters, cursor_index, command_length
|
|
581
|
+
)
|
|
582
|
+
continue
|
|
583
|
+
if current_character not in ALL_SHELL_QUOTE_CHARACTERS:
|
|
584
|
+
cursor_index += 1
|
|
585
|
+
continue
|
|
586
|
+
cursor_index = _blank_quoted_region(
|
|
587
|
+
all_scanned_characters, cursor_index, command_length, current_character
|
|
588
|
+
)
|
|
589
|
+
return "".join(all_scanned_characters)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _strip_bash_comments(quote_stripped_command: str) -> str:
|
|
593
|
+
"""Replace bash comments with spaces so a hash-prefixed token is inert.
|
|
594
|
+
|
|
595
|
+
Bash treats a hash character as the start of a comment only when it
|
|
596
|
+
appears at the beginning of the command or immediately after
|
|
597
|
+
whitespace. Inside a quoted region the hash character is literal,
|
|
598
|
+
but the caller is responsible for running ``_strip_quoted_regions``
|
|
599
|
+
first, which already blanks quoted text, so any hash character
|
|
600
|
+
reaching this helper is either a real comment introducer or a
|
|
601
|
+
token-internal character (for example
|
|
602
|
+
``--body-file body.md@@HASH@@fragment`` where ``@@HASH@@`` stands
|
|
603
|
+
in for the literal hash byte).
|
|
604
|
+
|
|
605
|
+
The walker is substitution-aware: it descends INTO ``$(...)`` and
|
|
606
|
+
``` `...` ``` bodies so a hash character inside a substitution
|
|
607
|
+
body is processed as a comment too — bash executes the substitution
|
|
608
|
+
body as its own command, so a hash after whitespace inside the
|
|
609
|
+
body really does start a comment. The substitution-bounded comment
|
|
610
|
+
runs to the next newline OR to the substitution closer (``)`` for
|
|
611
|
+
``$(...)``, backtick for ``` `...` ```), whichever comes first.
|
|
612
|
+
The substitution OPENER and CLOSER characters themselves are
|
|
613
|
+
preserved so the outer paren-depth tracking remains intact and the
|
|
614
|
+
surrounding command structure stays scannable.
|
|
615
|
+
|
|
616
|
+
A hash character at the top level (outside every substitution)
|
|
617
|
+
consumes until the next newline as usual. A comment inside a
|
|
618
|
+
substitution body must NOT escape outward — that is why the closer
|
|
619
|
+
bounds the consumption. Without that bound, a flat regex sweep
|
|
620
|
+
would consume the closing ``)`` or backtick and every byte after
|
|
621
|
+
it on the same line, silently erasing a real ``gh pr create``
|
|
622
|
+
invocation that follows the substitution.
|
|
623
|
+
|
|
624
|
+
Args:
|
|
625
|
+
quote_stripped_command: Output of ``_strip_quoted_regions``.
|
|
626
|
+
|
|
627
|
+
Returns:
|
|
628
|
+
A string of identical length with every bash comment replaced
|
|
629
|
+
by spaces. The trailing newline of each commented line is
|
|
630
|
+
retained so the matcher can still tell where one command ended
|
|
631
|
+
and the next began. Substitution openers and closers stay
|
|
632
|
+
intact.
|
|
633
|
+
"""
|
|
634
|
+
all_scanned_characters = list(quote_stripped_command)
|
|
635
|
+
command_length = len(quote_stripped_command)
|
|
636
|
+
_walk_and_blank_comments(
|
|
637
|
+
all_scanned_characters,
|
|
638
|
+
cursor_start_index=0,
|
|
639
|
+
end_index=command_length,
|
|
640
|
+
bounded_by_substitution_closer=None,
|
|
641
|
+
)
|
|
642
|
+
return "".join(all_scanned_characters)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _walk_and_blank_comments(
|
|
646
|
+
all_scanned_characters: list[str],
|
|
647
|
+
cursor_start_index: int,
|
|
648
|
+
end_index: int,
|
|
649
|
+
bounded_by_substitution_closer: str | None,
|
|
650
|
+
) -> int:
|
|
651
|
+
"""Walk a region of the character buffer and blank every comment found.
|
|
652
|
+
|
|
653
|
+
The walker is recursive over substitution bodies: when it encounters
|
|
654
|
+
a ``$(...)`` or ``` `...` ``` opener it descends into the body with
|
|
655
|
+
a substitution closer in hand, blanks any comment that appears
|
|
656
|
+
inside the body, and returns control to the outer caller once the
|
|
657
|
+
closer is consumed. The substitution opener and closer characters
|
|
658
|
+
themselves are preserved so paren-depth tracking elsewhere
|
|
659
|
+
(``_index_after_command_substitution``,
|
|
660
|
+
``_strip_substitution_bodies``) remains correct on the blanked
|
|
661
|
+
string.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
all_scanned_characters: Mutable list view of the command string.
|
|
665
|
+
The walker MUTATES the buffer to blank comment text in place.
|
|
666
|
+
cursor_start_index: Index at which to begin scanning.
|
|
667
|
+
end_index: Index at which to stop scanning. The caller passes
|
|
668
|
+
``len(all_scanned_characters)`` for the top-level walk and
|
|
669
|
+
the same value for substitution-body walks (each recursive
|
|
670
|
+
call independently checks for its closer).
|
|
671
|
+
bounded_by_substitution_closer: ``None`` for the top-level
|
|
672
|
+
walk. ``")"`` when walking inside a ``$(...)`` body so a
|
|
673
|
+
comment inside the body terminates at the matching ``)``.
|
|
674
|
+
When bounded by ``)``, paren depth is tracked so a bare
|
|
675
|
+
``(`` inside the body matches its own bare ``)`` rather
|
|
676
|
+
than letting that ``)`` exit the walker prematurely.
|
|
677
|
+
``"`"`` when walking inside a ``` `...` ``` body so a
|
|
678
|
+
comment inside the body terminates at the matching
|
|
679
|
+
backtick. Backtick bodies do not need depth tracking
|
|
680
|
+
because backticks do not nest in unescaped form.
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
The index just past the substitution closer when bounded, or
|
|
684
|
+
``end_index`` when the walker reaches the end of the buffer.
|
|
685
|
+
"""
|
|
686
|
+
cursor_index = cursor_start_index
|
|
687
|
+
paren_depth = (
|
|
688
|
+
1 if bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER else 0
|
|
689
|
+
)
|
|
690
|
+
while cursor_index < end_index:
|
|
691
|
+
current_character = all_scanned_characters[cursor_index]
|
|
692
|
+
if (
|
|
693
|
+
bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER
|
|
694
|
+
and current_character == SHELL_PAREN_CLOSE_CHARACTER
|
|
695
|
+
):
|
|
696
|
+
paren_depth -= 1
|
|
697
|
+
if paren_depth == 0:
|
|
698
|
+
return cursor_index + 1
|
|
699
|
+
cursor_index += 1
|
|
700
|
+
continue
|
|
701
|
+
if (
|
|
702
|
+
bounded_by_substitution_closer == SHELL_BACKTICK_CHARACTER
|
|
703
|
+
and current_character == SHELL_BACKTICK_CHARACTER
|
|
704
|
+
):
|
|
705
|
+
return cursor_index + 1
|
|
706
|
+
if (
|
|
707
|
+
current_character == SHELL_DOLLAR_CHARACTER
|
|
708
|
+
and cursor_index + 1 < end_index
|
|
709
|
+
and all_scanned_characters[cursor_index + 1] == SHELL_PAREN_OPEN_CHARACTER
|
|
710
|
+
):
|
|
711
|
+
cursor_index = _walk_and_blank_comments(
|
|
712
|
+
all_scanned_characters,
|
|
713
|
+
cursor_start_index=cursor_index + COMMAND_SUBSTITUTION_OPENER_LENGTH,
|
|
714
|
+
end_index=end_index,
|
|
715
|
+
bounded_by_substitution_closer=SHELL_PAREN_CLOSE_CHARACTER,
|
|
716
|
+
)
|
|
717
|
+
continue
|
|
718
|
+
if current_character == SHELL_BACKTICK_CHARACTER:
|
|
719
|
+
cursor_index = _walk_and_blank_comments(
|
|
720
|
+
all_scanned_characters,
|
|
721
|
+
cursor_start_index=cursor_index + 1,
|
|
722
|
+
end_index=end_index,
|
|
723
|
+
bounded_by_substitution_closer=SHELL_BACKTICK_CHARACTER,
|
|
724
|
+
)
|
|
725
|
+
continue
|
|
726
|
+
if (
|
|
727
|
+
bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER
|
|
728
|
+
and current_character == SHELL_PAREN_OPEN_CHARACTER
|
|
729
|
+
):
|
|
730
|
+
paren_depth += 1
|
|
731
|
+
cursor_index += 1
|
|
732
|
+
continue
|
|
733
|
+
if (
|
|
734
|
+
current_character == BASH_COMMENT_INTRODUCER_CHARACTER
|
|
735
|
+
and _is_comment_introducer_position(all_scanned_characters, cursor_index)
|
|
736
|
+
):
|
|
737
|
+
cursor_index = _blank_bounded_comment(
|
|
738
|
+
all_scanned_characters,
|
|
739
|
+
cursor_index,
|
|
740
|
+
end_index,
|
|
741
|
+
bounded_by_substitution_closer,
|
|
742
|
+
)
|
|
743
|
+
continue
|
|
744
|
+
cursor_index += 1
|
|
745
|
+
return cursor_index
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def _blank_bounded_comment(
|
|
749
|
+
all_scanned_characters: list[str],
|
|
750
|
+
hash_index: int,
|
|
751
|
+
end_index: int,
|
|
752
|
+
bounded_by_substitution_closer: str | None,
|
|
753
|
+
) -> int:
|
|
754
|
+
"""Blank a comment in place up to a newline or substitution closer.
|
|
755
|
+
|
|
756
|
+
A comment at the top level runs from the hash character to the next
|
|
757
|
+
newline. A comment inside a substitution body has the same upper
|
|
758
|
+
bound but ALSO terminates at the substitution closer (``)`` or
|
|
759
|
+
backtick), so the closer character itself is preserved and the
|
|
760
|
+
outer walker can continue from there.
|
|
761
|
+
|
|
762
|
+
Args:
|
|
763
|
+
all_scanned_characters: Mutable list view of the command string.
|
|
764
|
+
The walker MUTATES the buffer to blank the comment body.
|
|
765
|
+
hash_index: Index of the hash character that introduces the
|
|
766
|
+
comment.
|
|
767
|
+
end_index: Buffer length, hoisted by the caller to avoid a
|
|
768
|
+
recomputation per call.
|
|
769
|
+
bounded_by_substitution_closer: ``None`` for the top-level
|
|
770
|
+
walk. ``")"`` when inside a ``$(...)`` body. ``"`"`` when
|
|
771
|
+
inside a ``` `...` ``` body.
|
|
772
|
+
|
|
773
|
+
Returns:
|
|
774
|
+
The cursor position the outer walker should resume from. The
|
|
775
|
+
preserved terminating newline (when present) is included in
|
|
776
|
+
the returned range so command-separator detection still sees
|
|
777
|
+
the line break. When the comment terminates at a substitution
|
|
778
|
+
closer, the closer's own index is returned so the recursive
|
|
779
|
+
caller picks up the closer on its next iteration.
|
|
780
|
+
"""
|
|
781
|
+
blanking_index = hash_index
|
|
782
|
+
while blanking_index < end_index:
|
|
783
|
+
current_character = all_scanned_characters[blanking_index]
|
|
784
|
+
if current_character == "\n":
|
|
785
|
+
return blanking_index
|
|
786
|
+
if (
|
|
787
|
+
bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER
|
|
788
|
+
and current_character == SHELL_PAREN_CLOSE_CHARACTER
|
|
789
|
+
):
|
|
790
|
+
return blanking_index
|
|
791
|
+
if (
|
|
792
|
+
bounded_by_substitution_closer == SHELL_BACKTICK_CHARACTER
|
|
793
|
+
and current_character == SHELL_BACKTICK_CHARACTER
|
|
794
|
+
):
|
|
795
|
+
return blanking_index
|
|
796
|
+
all_scanned_characters[blanking_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
797
|
+
blanking_index += 1
|
|
798
|
+
return blanking_index
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def _is_comment_introducer_position(
|
|
802
|
+
all_scanned_characters: list[str], hash_index: int
|
|
803
|
+
) -> bool:
|
|
804
|
+
"""Return True when the hash at ``hash_index`` introduces a bash comment.
|
|
805
|
+
|
|
806
|
+
Bash treats a hash as a comment introducer only at the start of the
|
|
807
|
+
command or immediately after whitespace. This mirrors the lookbehind
|
|
808
|
+
branch of the bash comment rule (``(?<=\\s)|^``) while operating on
|
|
809
|
+
a mutable character list so the substitution-aware walker can
|
|
810
|
+
consult it inline.
|
|
811
|
+
|
|
812
|
+
Args:
|
|
813
|
+
all_scanned_characters: Character buffer being walked.
|
|
814
|
+
hash_index: Index of the hash character under test.
|
|
815
|
+
|
|
816
|
+
Returns:
|
|
817
|
+
True when ``hash_index`` is zero or the prior character is a
|
|
818
|
+
Python ``str.isspace`` whitespace character.
|
|
819
|
+
"""
|
|
820
|
+
if hash_index == 0:
|
|
821
|
+
return True
|
|
822
|
+
prior_character = all_scanned_characters[hash_index - 1]
|
|
823
|
+
return prior_character.isspace()
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def _strip_substitution_bodies(quote_stripped_command: str) -> str:
|
|
827
|
+
"""Replace ``$(...)`` and ``` `...` ``` bodies with spaces.
|
|
828
|
+
|
|
829
|
+
The ``gh pr create`` detection path relies on the substitution body
|
|
830
|
+
being scannable so that ``echo $(gh pr create)`` triggers the
|
|
831
|
+
enforcer. The web-flag detection path has the opposite requirement:
|
|
832
|
+
a ``--web`` token appearing inside a substitution body is an
|
|
833
|
+
argument to whatever command the subshell executes, not a flag on
|
|
834
|
+
the outer ``gh pr create`` invocation. ``gh pr create --title "$(echo --web)"``
|
|
835
|
+
should still trip the enforcer because ``--web`` belongs to ``echo``,
|
|
836
|
+
not to ``gh pr create``.
|
|
837
|
+
|
|
838
|
+
This helper blanks the OPENER, the body, and the CLOSER of every
|
|
839
|
+
top-level ``$(...)`` and ``` `...` ``` substitution so the
|
|
840
|
+
web-flag matcher sees only whitespace where a substitution used to
|
|
841
|
+
live. Offsets are preserved so the segment-extraction in
|
|
842
|
+
``_all_gh_pr_create_segments`` still works on the resulting string.
|
|
843
|
+
|
|
844
|
+
Args:
|
|
845
|
+
quote_stripped_command: Output of ``_strip_quoted_regions`` —
|
|
846
|
+
quotes must already be blanked so this helper does not need
|
|
847
|
+
to re-track quoted boundaries.
|
|
848
|
+
|
|
849
|
+
Returns:
|
|
850
|
+
A string of identical length with every substitution body
|
|
851
|
+
replaced by spaces.
|
|
852
|
+
"""
|
|
853
|
+
all_scanned_characters = list(quote_stripped_command)
|
|
854
|
+
cursor_index = 0
|
|
855
|
+
command_length = len(quote_stripped_command)
|
|
856
|
+
while cursor_index < command_length:
|
|
857
|
+
current_character = all_scanned_characters[cursor_index]
|
|
858
|
+
if (
|
|
859
|
+
current_character == SHELL_DOLLAR_CHARACTER
|
|
860
|
+
and cursor_index + 1 < command_length
|
|
861
|
+
and all_scanned_characters[cursor_index + 1] == SHELL_PAREN_OPEN_CHARACTER
|
|
862
|
+
):
|
|
863
|
+
substitution_end_index = _index_after_command_substitution(
|
|
864
|
+
all_scanned_characters, cursor_index
|
|
865
|
+
)
|
|
866
|
+
for each_blank_target_index in range(cursor_index, substitution_end_index):
|
|
867
|
+
all_scanned_characters[each_blank_target_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
868
|
+
cursor_index = substitution_end_index
|
|
869
|
+
continue
|
|
870
|
+
if current_character == SHELL_BACKTICK_CHARACTER:
|
|
871
|
+
substitution_end_index = _index_after_backtick_substitution(
|
|
872
|
+
all_scanned_characters, cursor_index, command_length
|
|
873
|
+
)
|
|
874
|
+
for each_blank_target_index in range(cursor_index, substitution_end_index):
|
|
875
|
+
all_scanned_characters[each_blank_target_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
876
|
+
cursor_index = substitution_end_index
|
|
877
|
+
continue
|
|
878
|
+
cursor_index += 1
|
|
879
|
+
return "".join(all_scanned_characters)
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
def _strip_heredoc_bodies(command: str) -> str:
|
|
883
|
+
"""Replace heredoc bodies with spaces so embedded text is inert.
|
|
884
|
+
|
|
885
|
+
A here-document opener (``<<TAG``, ``<<'TAG'``, ``<<"TAG"``, or
|
|
886
|
+
``<<-TAG``) followed by a newline begins a body whose contents bash
|
|
887
|
+
treats as literal data, not as commands. The body terminates at the
|
|
888
|
+
first subsequent line whose only content is the tag — or, for the
|
|
889
|
+
``<<-`` form, leading TAB characters followed by the tag. Any
|
|
890
|
+
``gh pr create`` token sitting inside a heredoc body is data being
|
|
891
|
+
fed to a command like ``cat`` or ``ssh``, not a command the shell
|
|
892
|
+
will execute, so the matcher must not see it.
|
|
893
|
+
|
|
894
|
+
Quote tracking is required because a literal ``<<EOF`` sitting
|
|
895
|
+
inside a quoted argument (for example ``echo "use <<EOF in your
|
|
896
|
+
script"``) is not a heredoc opener — it is literal text. The walker
|
|
897
|
+
skips past single- and double-quoted regions so the opener detector
|
|
898
|
+
only fires on syntactically real openers. ``<<<`` (here-string) is
|
|
899
|
+
explicitly skipped because it carries no body.
|
|
900
|
+
|
|
901
|
+
Body characters between the end of the opener line (the newline
|
|
902
|
+
after the opener) and the start of the closing tag line are
|
|
903
|
+
replaced with ``SHELL_QUOTE_REPLACEMENT_CHARACTER`` so offsets are
|
|
904
|
+
preserved end-to-end. The opener line and the closing tag line are
|
|
905
|
+
left intact so the surrounding command structure stays scannable.
|
|
906
|
+
|
|
907
|
+
When the closing tag is never found, no blanking happens — the
|
|
908
|
+
function returns the buffer unchanged for that opener. This
|
|
909
|
+
conservative branch protects against false positives where an
|
|
910
|
+
apparent ``<<TAG`` opener inside an unusual context lacks a real
|
|
911
|
+
matching closer; without it the walker would consume to end of
|
|
912
|
+
buffer and silently erase any real ``gh pr create`` that follows.
|
|
913
|
+
|
|
914
|
+
Multiple heredocs in one command are all handled — after each
|
|
915
|
+
successful blanking the walker resumes scanning from the closing
|
|
916
|
+
tag line so a second ``<<TAG2`` opener later in the command is
|
|
917
|
+
processed independently.
|
|
918
|
+
|
|
919
|
+
Args:
|
|
920
|
+
command: Raw bash command string from the PreToolUse or
|
|
921
|
+
PostToolUse hook input. The helper runs BEFORE
|
|
922
|
+
``_strip_quoted_regions`` so a heredoc opener whose tag
|
|
923
|
+
is itself quoted (``<<'EOF'``) still has its tag visible
|
|
924
|
+
for closing-tag matching.
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
A string of identical length to ``command`` with every heredoc
|
|
928
|
+
body blanked to spaces and opener/closer lines left intact.
|
|
929
|
+
"""
|
|
930
|
+
all_scanned_characters = list(command)
|
|
931
|
+
command_length = len(command)
|
|
932
|
+
cursor_index = 0
|
|
933
|
+
while cursor_index < command_length:
|
|
934
|
+
current_character = all_scanned_characters[cursor_index]
|
|
935
|
+
if current_character == "'":
|
|
936
|
+
cursor_index = _advance_past_single_quoted_region(
|
|
937
|
+
all_scanned_characters, cursor_index, command_length
|
|
938
|
+
)
|
|
939
|
+
continue
|
|
940
|
+
if current_character == "\"":
|
|
941
|
+
cursor_index = _advance_past_double_quoted_region(
|
|
942
|
+
all_scanned_characters, cursor_index, command_length
|
|
943
|
+
)
|
|
944
|
+
continue
|
|
945
|
+
if not _is_heredoc_opener_position(all_scanned_characters, cursor_index, command_length):
|
|
946
|
+
cursor_index += 1
|
|
947
|
+
continue
|
|
948
|
+
advance_after_blanking = _try_blank_one_heredoc_body(
|
|
949
|
+
all_scanned_characters, cursor_index, command_length
|
|
950
|
+
)
|
|
951
|
+
if advance_after_blanking is None:
|
|
952
|
+
cursor_index += 1
|
|
953
|
+
continue
|
|
954
|
+
cursor_index = advance_after_blanking
|
|
955
|
+
return "".join(all_scanned_characters)
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
def _advance_past_single_quoted_region(
|
|
959
|
+
all_scanned_characters: list[str],
|
|
960
|
+
opener_index: int,
|
|
961
|
+
buffer_length: int,
|
|
962
|
+
) -> int:
|
|
963
|
+
"""Return the index one past the closing ``'`` without mutating the buffer."""
|
|
964
|
+
cursor_index = opener_index + 1
|
|
965
|
+
while cursor_index < buffer_length:
|
|
966
|
+
if all_scanned_characters[cursor_index] == "'":
|
|
967
|
+
return cursor_index + 1
|
|
968
|
+
cursor_index += 1
|
|
969
|
+
return cursor_index
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
def _advance_past_double_quoted_region(
|
|
973
|
+
all_scanned_characters: list[str],
|
|
974
|
+
opener_index: int,
|
|
975
|
+
buffer_length: int,
|
|
976
|
+
) -> int:
|
|
977
|
+
"""Return the index one past the closing ``"`` honoring ``\\`` escapes."""
|
|
978
|
+
cursor_index = opener_index + 1
|
|
979
|
+
while cursor_index < buffer_length:
|
|
980
|
+
current_character = all_scanned_characters[cursor_index]
|
|
981
|
+
if (
|
|
982
|
+
current_character == SHELL_BACKSLASH_CHARACTER
|
|
983
|
+
and cursor_index + 1 < buffer_length
|
|
984
|
+
):
|
|
985
|
+
cursor_index += SHELL_BACKSLASH_ESCAPE_PAIR_LENGTH
|
|
986
|
+
continue
|
|
987
|
+
if current_character == "\"":
|
|
988
|
+
return cursor_index + 1
|
|
989
|
+
cursor_index += 1
|
|
990
|
+
return cursor_index
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
def _is_heredoc_opener_position(
|
|
994
|
+
all_scanned_characters: list[str],
|
|
995
|
+
cursor_index: int,
|
|
996
|
+
buffer_length: int,
|
|
997
|
+
) -> bool:
|
|
998
|
+
"""Return True when the cursor sits at a ``<<`` (but not ``<<<``) heredoc opener."""
|
|
999
|
+
if cursor_index + 1 >= buffer_length:
|
|
1000
|
+
return False
|
|
1001
|
+
if all_scanned_characters[cursor_index] != SHELL_LESS_THAN_CHARACTER:
|
|
1002
|
+
return False
|
|
1003
|
+
if all_scanned_characters[cursor_index + 1] != SHELL_LESS_THAN_CHARACTER:
|
|
1004
|
+
return False
|
|
1005
|
+
if (
|
|
1006
|
+
cursor_index + HEREDOC_OPENER_TOKEN_LENGTH < buffer_length
|
|
1007
|
+
and all_scanned_characters[cursor_index + HEREDOC_OPENER_TOKEN_LENGTH]
|
|
1008
|
+
== SHELL_LESS_THAN_CHARACTER
|
|
1009
|
+
):
|
|
1010
|
+
return False
|
|
1011
|
+
return True
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
def _try_blank_one_heredoc_body(
|
|
1015
|
+
all_scanned_characters: list[str],
|
|
1016
|
+
opener_index: int,
|
|
1017
|
+
buffer_length: int,
|
|
1018
|
+
) -> int | None:
|
|
1019
|
+
"""Blank one heredoc body in place when a matching closing tag is found.
|
|
1020
|
+
|
|
1021
|
+
Args:
|
|
1022
|
+
all_scanned_characters: Mutable list view of the command string.
|
|
1023
|
+
The function MUTATES the buffer to blank body characters
|
|
1024
|
+
with ``SHELL_QUOTE_REPLACEMENT_CHARACTER``.
|
|
1025
|
+
opener_index: Index of the first ``<`` in the ``<<`` opener.
|
|
1026
|
+
buffer_length: Length of ``all_scanned_characters``, hoisted by
|
|
1027
|
+
the caller.
|
|
1028
|
+
|
|
1029
|
+
Returns:
|
|
1030
|
+
The index just past the closing tag line on a successful match.
|
|
1031
|
+
``None`` when no tag could be parsed or no matching closing tag
|
|
1032
|
+
was found — the buffer is left unchanged in either case so the
|
|
1033
|
+
outer walker can advance by one and continue scanning.
|
|
1034
|
+
"""
|
|
1035
|
+
after_opener_index = opener_index + HEREDOC_OPENER_TOKEN_LENGTH
|
|
1036
|
+
tag_match = HEREDOC_OPENER_TAG_PATTERN.match(
|
|
1037
|
+
"".join(all_scanned_characters), after_opener_index
|
|
1038
|
+
)
|
|
1039
|
+
if tag_match is None:
|
|
1040
|
+
return None
|
|
1041
|
+
parsed_tag = (
|
|
1042
|
+
tag_match.group("sq_tag")
|
|
1043
|
+
or tag_match.group("dq_tag")
|
|
1044
|
+
or tag_match.group("bare_tag")
|
|
1045
|
+
)
|
|
1046
|
+
if not parsed_tag:
|
|
1047
|
+
return None
|
|
1048
|
+
tag_allows_leading_tabs = tag_match.group("dash") == "-"
|
|
1049
|
+
end_of_opener_line_index = _index_of_next_newline(
|
|
1050
|
+
all_scanned_characters, tag_match.end(), buffer_length
|
|
1051
|
+
)
|
|
1052
|
+
if end_of_opener_line_index >= buffer_length:
|
|
1053
|
+
return None
|
|
1054
|
+
body_start_index = end_of_opener_line_index + 1
|
|
1055
|
+
closing_tag_line_start, closing_tag_line_end = _find_closing_heredoc_tag_line(
|
|
1056
|
+
all_scanned_characters,
|
|
1057
|
+
body_start_index,
|
|
1058
|
+
buffer_length,
|
|
1059
|
+
parsed_tag,
|
|
1060
|
+
tag_allows_leading_tabs,
|
|
1061
|
+
)
|
|
1062
|
+
if closing_tag_line_start is None:
|
|
1063
|
+
return None
|
|
1064
|
+
for each_blank_target_index in range(body_start_index, closing_tag_line_start):
|
|
1065
|
+
all_scanned_characters[each_blank_target_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
|
|
1066
|
+
return closing_tag_line_end
|
|
1067
|
+
|
|
1068
|
+
|
|
1069
|
+
def _index_of_next_newline(
|
|
1070
|
+
all_scanned_characters: list[str],
|
|
1071
|
+
start_index: int,
|
|
1072
|
+
buffer_length: int,
|
|
1073
|
+
) -> int:
|
|
1074
|
+
"""Return the index of the next newline at or after ``start_index``."""
|
|
1075
|
+
cursor_index = start_index
|
|
1076
|
+
while cursor_index < buffer_length:
|
|
1077
|
+
if all_scanned_characters[cursor_index] == SHELL_NEWLINE_CHARACTER:
|
|
1078
|
+
return cursor_index
|
|
1079
|
+
cursor_index += 1
|
|
1080
|
+
return cursor_index
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
def _find_closing_heredoc_tag_line(
|
|
1084
|
+
all_scanned_characters: list[str],
|
|
1085
|
+
body_start_index: int,
|
|
1086
|
+
buffer_length: int,
|
|
1087
|
+
expected_tag: str,
|
|
1088
|
+
tag_allows_leading_tabs: bool,
|
|
1089
|
+
) -> tuple[int | None, int]:
|
|
1090
|
+
"""Return ``(start_of_closing_tag_line, end_of_closing_tag_line)`` for the tag.
|
|
1091
|
+
|
|
1092
|
+
The closing tag must appear on its own line — the entire line, after
|
|
1093
|
+
any allowed leading tabs (only when the opener used ``<<-``), is the
|
|
1094
|
+
tag and nothing else. Trailing carriage returns and trailing spaces
|
|
1095
|
+
are tolerated so heredocs authored in CRLF files still match.
|
|
1096
|
+
|
|
1097
|
+
Args:
|
|
1098
|
+
all_scanned_characters: Character buffer being walked.
|
|
1099
|
+
body_start_index: Index of the first byte of the heredoc body
|
|
1100
|
+
(the character after the newline that ended the opener line).
|
|
1101
|
+
buffer_length: Length of ``all_scanned_characters``.
|
|
1102
|
+
expected_tag: The tag extracted from the opener.
|
|
1103
|
+
tag_allows_leading_tabs: True when the opener used ``<<-`` (so
|
|
1104
|
+
leading TAB characters on the closing line are allowed).
|
|
1105
|
+
|
|
1106
|
+
Returns:
|
|
1107
|
+
``(start_of_closing_tag_line, end_of_closing_tag_line)`` when a
|
|
1108
|
+
matching closing line is found. ``(None, body_start_index)``
|
|
1109
|
+
when no matching line is found.
|
|
1110
|
+
"""
|
|
1111
|
+
line_start_index = body_start_index
|
|
1112
|
+
while line_start_index < buffer_length:
|
|
1113
|
+
line_end_index = _index_of_next_newline(
|
|
1114
|
+
all_scanned_characters, line_start_index, buffer_length
|
|
1115
|
+
)
|
|
1116
|
+
line_text = "".join(
|
|
1117
|
+
all_scanned_characters[line_start_index:line_end_index]
|
|
1118
|
+
)
|
|
1119
|
+
stripped_line_text = line_text.rstrip(" \t\r")
|
|
1120
|
+
if tag_allows_leading_tabs:
|
|
1121
|
+
stripped_line_text = stripped_line_text.lstrip("\t")
|
|
1122
|
+
if stripped_line_text == expected_tag:
|
|
1123
|
+
return line_start_index, line_end_index
|
|
1124
|
+
line_start_index = line_end_index + 1
|
|
1125
|
+
return None, body_start_index
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def _preprocess_command_for_matching(command: str) -> str:
|
|
1129
|
+
"""Return the canonical preprocessed form used by every command-shape matcher.
|
|
1130
|
+
|
|
1131
|
+
The enforcer, the restore hook, and the web-flag detector all share
|
|
1132
|
+
the same preprocessing pipeline: blank heredoc bodies first so a
|
|
1133
|
+
literal ``gh pr create`` inside heredoc data text cannot leak out
|
|
1134
|
+
as a real command; blank inert quoted regions next; blank bash
|
|
1135
|
+
comments last. Running every pass through a single helper keeps the
|
|
1136
|
+
three callers in lock-step — adding a new preprocessing pass lands
|
|
1137
|
+
on every consumer automatically.
|
|
1138
|
+
|
|
1139
|
+
Heredoc stripping runs BEFORE quoted-region stripping because a
|
|
1140
|
+
heredoc tag can itself be quoted (``<<'EOF'``); stripping quotes
|
|
1141
|
+
first would blank the tag and break the closing-tag match. The
|
|
1142
|
+
heredoc walker carries its own minimal quote skip so a literal
|
|
1143
|
+
``<<EOF`` sitting inside a string is not mistaken for a real opener.
|
|
1144
|
+
|
|
1145
|
+
Args:
|
|
1146
|
+
command: Raw bash command string from the PreToolUse or
|
|
1147
|
+
PostToolUse hook input.
|
|
1148
|
+
|
|
1149
|
+
Returns:
|
|
1150
|
+
The command with heredoc bodies, quoted regions, and bash
|
|
1151
|
+
comments blanked, substitution bodies kept scannable, and
|
|
1152
|
+
original offsets preserved end-to-end.
|
|
1153
|
+
"""
|
|
1154
|
+
return _strip_bash_comments(_strip_quoted_regions(_strip_heredoc_bodies(command)))
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def _all_gh_pr_create_segments(quote_stripped_command: str) -> list[str]:
|
|
1158
|
+
"""Return every ``gh pr create`` segment in the (quote-stripped) command.
|
|
1159
|
+
|
|
1160
|
+
A "segment" is the substring from the end of a ``gh pr create`` match
|
|
1161
|
+
up to the next shell command separator (``&&``, ``||``, ``;``,
|
|
1162
|
+
``|``, ``&``, newline) or the end of the string. The enforcer's
|
|
1163
|
+
web-flag detection runs against each segment independently so a
|
|
1164
|
+
chained ``gh pr create --web && gh pr create --title T`` does not
|
|
1165
|
+
let the second invocation slip through on the strength of the
|
|
1166
|
+
first segment's ``--web`` flag.
|
|
1167
|
+
|
|
1168
|
+
Args:
|
|
1169
|
+
quote_stripped_command: Output of ``_strip_quoted_regions`` —
|
|
1170
|
+
the caller is responsible for stripping inert quoted regions
|
|
1171
|
+
before passing in.
|
|
1172
|
+
|
|
1173
|
+
Returns:
|
|
1174
|
+
List of segment strings, one per ``gh pr create`` invocation
|
|
1175
|
+
found in the command. Empty list when the command does not
|
|
1176
|
+
invoke ``gh pr create`` at all.
|
|
1177
|
+
"""
|
|
1178
|
+
all_segments: list[str] = []
|
|
1179
|
+
command_length = len(quote_stripped_command)
|
|
1180
|
+
for each_gh_pr_create_match in GH_PR_CREATE_PATTERN.finditer(quote_stripped_command):
|
|
1181
|
+
segment_start = each_gh_pr_create_match.end()
|
|
1182
|
+
separator_match = COMMAND_SEPARATOR_PATTERN.search(quote_stripped_command, segment_start)
|
|
1183
|
+
segment_end = separator_match.start() if separator_match else command_length
|
|
1184
|
+
all_segments.append(quote_stripped_command[segment_start:segment_end])
|
|
1185
|
+
return all_segments
|
|
1186
|
+
|
|
1187
|
+
|
|
1188
|
+
def _command_invokes_gh_pr_create_in_stripped(quote_stripped_command: str) -> bool:
|
|
1189
|
+
"""Return True when the (quote-stripped) command contains a ``gh pr create`` invocation.
|
|
1190
|
+
|
|
1191
|
+
Both the enforcer's PreToolUse gate and the restore hook's
|
|
1192
|
+
PostToolUse gate share this function, so the pair stays in sync —
|
|
1193
|
+
a fix here lands on both ends of the swap-restore pair at once.
|
|
1194
|
+
A literal ``gh pr create`` inside ``echo "..."`` or any other quoted
|
|
1195
|
+
argument is intentionally ignored because the caller has already run
|
|
1196
|
+
``_strip_quoted_regions`` to blank out inert quoted text.
|
|
1197
|
+
|
|
1198
|
+
Args:
|
|
1199
|
+
quote_stripped_command: Output of ``_strip_quoted_regions`` —
|
|
1200
|
+
the caller is responsible for stripping inert quoted regions
|
|
1201
|
+
before passing in. ``main()`` in the enforcer computes this
|
|
1202
|
+
once and passes it to both this helper and
|
|
1203
|
+
``_command_uses_web_flag_in_stripped`` so the character-walk
|
|
1204
|
+
in ``_strip_quoted_regions`` runs exactly once per command.
|
|
1205
|
+
|
|
1206
|
+
Returns:
|
|
1207
|
+
True when ``gh pr create`` appears as a whole-word match in the
|
|
1208
|
+
already-stripped command. Matches regardless of whether ``gh``
|
|
1209
|
+
is at the start of the command or embedded in a chained pipeline.
|
|
1210
|
+
"""
|
|
1211
|
+
return bool(GH_PR_CREATE_PATTERN.search(quote_stripped_command))
|