claude-dev-env 1.40.0 → 1.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CLAUDE.md +1 -1
  2. package/_shared/pr-loop/scripts/grant_project_claude_permissions.py +53 -3
  3. package/_shared/pr-loop/scripts/post_audit_thread.py +2 -2
  4. package/_shared/pr-loop/scripts/revoke_project_claude_permissions.py +68 -3
  5. package/_shared/pr-loop/scripts/tests/test_grant_project_claude_permissions.py +1 -1
  6. package/_shared/pr-loop/scripts/tests/test_revoke_project_claude_permissions.py +1 -1
  7. package/hooks/_gh_pr_author_swap_utils.py +1211 -0
  8. package/hooks/blocking/gh_body_arg_blocker.py +9 -6
  9. package/hooks/blocking/gh_pr_author_enforcer.py +480 -0
  10. package/hooks/blocking/gh_pr_author_restore.py +100 -0
  11. package/hooks/blocking/pr_converge_bugteam_enforcer.py +170 -0
  12. package/hooks/blocking/pr_description_enforcer.py +1 -3
  13. package/hooks/blocking/test_gh_body_arg_blocker.py +25 -3
  14. package/hooks/blocking/test_gh_pr_author_enforcer.py +1166 -0
  15. package/hooks/blocking/test_gh_pr_author_restore.py +512 -0
  16. package/hooks/blocking/test_gh_pr_author_swap_utils.py +910 -0
  17. package/hooks/blocking/test_pr_converge_bugteam_enforcer.py +311 -0
  18. package/hooks/config/gh_pr_author_swap_constants.py +76 -0
  19. package/hooks/config/pr_converge_bugteam_enforcer_constants.py +55 -0
  20. package/hooks/config/pr_converge_bugteam_enforcer_state.py +67 -0
  21. package/hooks/config/pr_description_enforcer_constants.py +5 -0
  22. package/hooks/config/test_pr_description_enforcer_constants.py +82 -0
  23. package/hooks/hooks.json +40 -0
  24. package/hooks/lifecycle/pr_converge_bugteam_skill_tracker.py +204 -0
  25. package/hooks/lifecycle/test_pr_converge_bugteam_skill_tracker.py +283 -0
  26. package/hooks/session/gh_pr_author_session_cleanup.py +171 -0
  27. package/hooks/session/test_gh_pr_author_session_cleanup.py +575 -0
  28. package/hooks/test__gh_pr_author_swap_utils.py +333 -0
  29. package/package.json +1 -1
  30. package/skills/_shared/pr-loop/scripts/write_audit_outcomes.py +2 -2
  31. package/skills/_shared/pr-loop/scripts/write_fix_outcomes.py +2 -2
  32. package/skills/bugteam/reference/audit-contract.md +22 -0
  33. package/skills/bugteam/reference/github-pr-reviews.md +1 -1
  34. package/skills/bugteam/scripts/bugteam_fix_hookspath.py +8 -2
  35. package/skills/bugteam/scripts/test__claude_permissions_common.py +48 -0
  36. package/skills/bugteam/scripts/test_claude_permissions_common.py +18 -10
  37. package/skills/pr-converge/SKILL.md +8 -2
  38. package/skills/pr-converge/config/constants.py +2 -1
  39. package/skills/pr-converge/reference/state-schema.md +36 -8
@@ -0,0 +1,1211 @@
1
+ """Shared utilities for the gh-pr-author swap hook trio.
2
+
3
+ The PreToolUse enforcer (``hooks/blocking/gh_pr_author_enforcer.py``), the
4
+ PostToolUse restore (``hooks/blocking/gh_pr_author_restore.py``), and the
5
+ SessionStart cleanup (``hooks/session/gh_pr_author_session_cleanup.py``)
6
+ all share a small set of helpers: write a line to a stream, build the
7
+ per-session state-file path, run ``gh auth switch``, read the
8
+ original-account login from a state file, delete a state file, and
9
+ detect a ``gh pr create`` invocation while ignoring quoted regions.
10
+
11
+ Centralising these helpers keeps the three hooks' contracts in
12
+ lock-step — a fix in the shared ``_command_invokes_gh_pr_create_in_stripped``
13
+ detector lands in the enforcer and the restore hook from a single edit,
14
+ and the state-file path and gh subprocess shape stay uniform across the
15
+ trio so a file written by the enforcer is always resolvable by the
16
+ restore and cleanup hooks.
17
+
18
+ Layout: a leading underscore marks the module as internal to the swap
19
+ feature, and the file lives directly under ``hooks/`` so both
20
+ ``hooks/blocking/`` and ``hooks/session/`` consumers can import it
21
+ without a per-directory path shim.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ import os
28
+ import stat
29
+ import subprocess
30
+ import sys
31
+ import tempfile
32
+ from pathlib import Path
33
+ from typing import TextIO
34
+
35
+ from config.gh_pr_author_swap_constants import (
36
+ ALL_GH_AUTH_SWITCH_COMMAND_HEAD,
37
+ ALL_SHELL_QUOTE_CHARACTERS,
38
+ BASH_COMMENT_INTRODUCER_CHARACTER,
39
+ COMMAND_SEPARATOR_PATTERN,
40
+ COMMAND_SUBSTITUTION_OPENER_LENGTH,
41
+ GH_AUTH_SWITCH_TIMEOUT_SECONDS,
42
+ GH_PR_CREATE_PATTERN,
43
+ HEREDOC_OPENER_TAG_PATTERN,
44
+ HEREDOC_OPENER_TOKEN_LENGTH,
45
+ SESSION_ID_UNSAFE_CHARACTERS_PATTERN,
46
+ SHELL_BACKSLASH_CHARACTER,
47
+ SHELL_BACKSLASH_ESCAPE_PAIR_LENGTH,
48
+ SHELL_BACKTICK_CHARACTER,
49
+ SHELL_DOLLAR_CHARACTER,
50
+ SHELL_LESS_THAN_CHARACTER,
51
+ SHELL_NEWLINE_CHARACTER,
52
+ SHELL_PAREN_CLOSE_CHARACTER,
53
+ SHELL_PAREN_OPEN_CHARACTER,
54
+ SHELL_QUOTE_REPLACEMENT_CHARACTER,
55
+ STATE_FILE_DEFAULT_SESSION_ID,
56
+ STATE_FILE_ORIGINAL_ACCOUNT_KEY,
57
+ STATE_FILE_PERMISSION_MODE,
58
+ STATE_FILE_PREFIX,
59
+ STATE_FILE_SUFFIX,
60
+ )
61
+
62
+
63
+ def _write_line(message: str, into_stream: TextIO) -> None:
64
+ """Write a single line to the caller-provided text stream.
65
+
66
+ Wrapping ``stream.write`` in a function that accepts an explicit
67
+ ``into_stream`` parameter satisfies the project's logging rule
68
+ (route through logger or accept an explicit stream parameter) without
69
+ pulling the logging module into a self-contained hook script.
70
+
71
+ Args:
72
+ message: Single line of output. A trailing newline is appended.
73
+ into_stream: Destination stream (typically ``sys.stdout`` for
74
+ the JSON deny payload or ``sys.stderr`` for diagnostics).
75
+ Each caller formats its own prefix into ``message``.
76
+ """
77
+ into_stream.write(message + "\n")
78
+ into_stream.flush()
79
+
80
+
81
+ def _sanitize_session_id(session_id: str) -> str:
82
+ """Strip every character outside ``[A-Za-z0-9_-]`` from a session id.
83
+
84
+ The raw session id comes from the Claude Code hook input JSON, which
85
+ is attacker-influenceable. Path-traversal characters (``/``, ``\\``,
86
+ ``..``), NUL bytes, and any other shell-metacharacter must be removed
87
+ before the value participates in a filename so the produced path
88
+ stays anchored inside ``tempfile.gettempdir()``.
89
+
90
+ Args:
91
+ session_id: Raw session id value.
92
+
93
+ Returns:
94
+ The input with every unsafe character removed. An empty result
95
+ signals the caller to fall back to the default session id.
96
+ """
97
+ return SESSION_ID_UNSAFE_CHARACTERS_PATTERN.sub("", session_id)
98
+
99
+
100
+ def _state_file_path(session_id: str) -> Path:
101
+ """Return the per-session state-file path used by the hook trio.
102
+
103
+ The enforcer writes the file, the restore hook reads and deletes it,
104
+ and the session-cleanup hook globs the prefix to recover stranded
105
+ files. All three share this naming convention so a state file
106
+ written by one hook is always resolvable by the others.
107
+
108
+ Args:
109
+ session_id: ``session_id`` from the Claude Code hook input JSON.
110
+ Empty string falls back to ``STATE_FILE_DEFAULT_SESSION_ID``.
111
+ Unsafe characters (path-traversal, NUL, shell metacharacters)
112
+ are stripped before the value participates in the filename so
113
+ the returned path stays anchored inside the temp directory.
114
+
115
+ Returns:
116
+ Absolute path to the state file in the system temp directory.
117
+ """
118
+ sanitized_session_id = _sanitize_session_id(session_id)
119
+ effective_session_id = sanitized_session_id or STATE_FILE_DEFAULT_SESSION_ID
120
+ filename = f"{STATE_FILE_PREFIX}{effective_session_id}{STATE_FILE_SUFFIX}"
121
+ return Path(tempfile.gettempdir()) / filename
122
+
123
+
124
+ def _switch_gh_account(to_account: str) -> bool:
125
+ """Run ``gh auth switch --user <to_account>`` and report success.
126
+
127
+ Diagnostics on failure are intentionally not written here. Callers
128
+ decide whether a failed switch is worth a stderr line (the restore
129
+ and cleanup hooks log; the enforcer suppresses to keep the deny-path
130
+ payload the only output on the failure branch).
131
+
132
+ Args:
133
+ to_account: Login to switch the active gh CLI account to.
134
+
135
+ Returns:
136
+ True when the switch command exits zero. False when gh is missing,
137
+ the switch command exits non-zero, times out, lacks executable
138
+ permission on the gh binary, or otherwise fails. ``OSError``
139
+ covers every spawn-time failure (``FileNotFoundError`` when gh is
140
+ absent, ``PermissionError`` when gh exists but is not executable,
141
+ and any other platform-specific spawn errors) so the hook follows
142
+ its documented non-blocking failure path rather than crashing.
143
+ """
144
+ switch_command = list(ALL_GH_AUTH_SWITCH_COMMAND_HEAD) + [to_account]
145
+ try:
146
+ completed_process = subprocess.run(
147
+ switch_command,
148
+ capture_output=True,
149
+ text=True,
150
+ timeout=GH_AUTH_SWITCH_TIMEOUT_SECONDS,
151
+ check=False,
152
+ )
153
+ except (OSError, subprocess.SubprocessError):
154
+ return False
155
+ return completed_process.returncode == 0
156
+
157
+
158
+ def _read_original_account(state_file: Path) -> str | None:
159
+ """Read the original-account login from a swap-state file.
160
+
161
+ Args:
162
+ state_file: Path produced by ``_state_file_path``.
163
+
164
+ Returns:
165
+ The original account login when the file exists and parses to a
166
+ JSON object with a non-empty string ``original_account`` value.
167
+ None when the file is absent, unreadable, malformed JSON, the
168
+ wrong shape, missing the key, holds a non-string value, or holds
169
+ a blank value. Diagnostics for unreadable or malformed files are
170
+ written to stderr so the caller can see why a state file was not
171
+ consumed.
172
+ """
173
+ try:
174
+ raw_contents = state_file.read_text(encoding="utf-8")
175
+ except FileNotFoundError:
176
+ return None
177
+ except OSError as os_error:
178
+ _write_line(
179
+ f"[gh-pr-author-utils] failed to read state file {state_file}: {os_error}",
180
+ sys.stderr,
181
+ )
182
+ return None
183
+ try:
184
+ parsed_state = json.loads(raw_contents)
185
+ except json.JSONDecodeError as decode_error:
186
+ _write_line(
187
+ f"[gh-pr-author-utils] malformed state file {state_file}: {decode_error}",
188
+ sys.stderr,
189
+ )
190
+ return None
191
+ if not isinstance(parsed_state, dict):
192
+ return None
193
+ original_account = parsed_state.get(STATE_FILE_ORIGINAL_ACCOUNT_KEY, "")
194
+ if not isinstance(original_account, str):
195
+ return None
196
+ stripped_original_account = original_account.strip()
197
+ return stripped_original_account or None
198
+
199
+
200
+ def _delete_state_file(state_file: Path) -> None:
201
+ """Remove a state file, ignoring an already-absent file.
202
+
203
+ Args:
204
+ state_file: Path produced by ``_state_file_path``.
205
+ """
206
+ try:
207
+ state_file.unlink()
208
+ except FileNotFoundError:
209
+ return
210
+ except OSError as os_error:
211
+ _write_line(
212
+ f"[gh-pr-author-utils] failed to delete state file {state_file}: {os_error}",
213
+ sys.stderr,
214
+ )
215
+
216
+
217
+ def _state_file_is_attacker_planted(state_file: Path) -> bool:
218
+ """Return True when the state file's mode or owner does not match an enforcer-written file.
219
+
220
+ The enforcer atomically creates each swap-state file with mode
221
+ ``STATE_FILE_PERMISSION_MODE`` (``0o600``) owned by the current
222
+ user. A file at the predictable swap-state path that diverges on
223
+ either axis is overwhelmingly likely to be an attacker plant —
224
+ another user on the same workstation pre-creating a file to trick
225
+ the restore or cleanup hook into running
226
+ ``gh auth switch --user <attacker-controlled-login>``.
227
+
228
+ The candidate is inspected via ``lstat`` rather than ``stat`` so a
229
+ symlink at the predictable path is screened on its own metadata,
230
+ not on whatever the symlink resolves to. The enforcer creates state
231
+ files with ``O_NOFOLLOW`` to prevent symlink hijacking; this helper
232
+ mirrors that contract.
233
+
234
+ The mode-bit and uid checks only apply on POSIX. Windows reports
235
+ ``0o666`` from ``stat`` for files chmod'd to ``0o600`` because
236
+ ``os.chmod`` on Windows only toggles the read-only attribute, and
237
+ ``os.getuid`` is absent there. ``tempfile.gettempdir()`` on Windows
238
+ is already per-user (``%LOCALAPPDATA%\\Temp``), which closes the
239
+ cross-user attack surface this check guards against on POSIX, so
240
+ the check is a no-op on Windows.
241
+
242
+ A missing file returns False so callers can treat the missing-file
243
+ case the same way they treat a normal absent-state-file path.
244
+
245
+ For callers that already hold an ``lstat`` result for the candidate
246
+ path, prefer ``_lstat_indicates_attacker_planted`` to avoid a
247
+ redundant syscall and the TOCTOU window between two ``lstat`` calls.
248
+
249
+ Args:
250
+ state_file: Path produced by ``_state_file_path``.
251
+
252
+ Returns:
253
+ True when the file exists on POSIX with wrong mode bits or
254
+ wrong uid, or the path is not a regular file (symlink, FIFO,
255
+ device, etc.), or ``os.lstat`` raises ``OSError``. False when
256
+ the file matches the enforcer's write contract, is absent, or
257
+ the platform lacks POSIX ownership semantics.
258
+ """
259
+ if not hasattr(os, "getuid"):
260
+ return False
261
+ try:
262
+ file_lstat_result = state_file.lstat()
263
+ except FileNotFoundError:
264
+ return False
265
+ except OSError:
266
+ return True
267
+ return _lstat_indicates_attacker_planted(file_lstat_result)
268
+
269
+
270
+ def _lstat_indicates_attacker_planted(file_lstat_result: os.stat_result) -> bool:
271
+ """Return True when an ``lstat`` result does not match an enforcer-written state file.
272
+
273
+ Callers that already hold a fresh ``lstat`` result for a candidate
274
+ state-file path use this helper directly instead of
275
+ ``_state_file_is_attacker_planted``, which would re-stat the path.
276
+ Skipping the second stat avoids a redundant syscall and the TOCTOU
277
+ window where an attacker could swap the inode between the two stat
278
+ calls.
279
+
280
+ The mode-bit and uid checks only apply on POSIX. Windows ``stat``
281
+ semantics differ (see ``_state_file_is_attacker_planted`` for the
282
+ full rationale) so the helper is a no-op on platforms without
283
+ ``os.getuid``.
284
+
285
+ Args:
286
+ file_lstat_result: Result of ``os.lstat`` (or ``Path.lstat``)
287
+ on the candidate state-file path. The caller is responsible
288
+ for using ``lstat`` rather than ``stat`` so symlinks are
289
+ screened on their own metadata.
290
+
291
+ Returns:
292
+ True on POSIX when the file is not a regular file, the mode
293
+ bits do not equal ``STATE_FILE_PERMISSION_MODE``, or the uid
294
+ does not match the current user. False on POSIX when the
295
+ candidate matches the enforcer's write contract, and on Windows
296
+ unconditionally.
297
+ """
298
+ if not hasattr(os, "getuid"):
299
+ return False
300
+ if not stat.S_ISREG(file_lstat_result.st_mode):
301
+ return True
302
+ actual_permission_bits = stat.S_IMODE(file_lstat_result.st_mode)
303
+ if actual_permission_bits != STATE_FILE_PERMISSION_MODE:
304
+ return True
305
+ current_user_id = os.getuid()
306
+ if file_lstat_result.st_uid != current_user_id:
307
+ return True
308
+ return False
309
+
310
+
311
+ def _index_after_command_substitution(all_scanned_characters: list[str], opener_index: int) -> int:
312
+ """Return the index one past the closing ``)`` of a ``$(...)`` substitution.
313
+
314
+ Walks from the opening ``$(`` past nested ``$(...)`` substitutions
315
+ and through inner quoted regions and backtick substitutions so the
316
+ closing paren matched is the one that actually balances the opener.
317
+ Bash executes the substitution body as its own command, so the
318
+ walker treats the body the same way the outer ``_strip_quoted_regions``
319
+ scan treats top-level text: single- and double-quoted regions inside
320
+ the body are BLANKED (replaced with spaces) so a literal token
321
+ sitting inside a quoted argument cannot leak out as if it were a
322
+ command. For example, ``$(printf 'gh pr create')`` runs ``printf``
323
+ against the literal data ``gh pr create`` — the data must not be
324
+ confused with a real ``gh pr create`` invocation.
325
+
326
+ Quote handling mirrors ``_strip_quoted_regions``:
327
+
328
+ * A single-quoted region (``'...'``) has no escape mechanism in bash —
329
+ the walker advances to the next ``'`` and blanks every character
330
+ between the openers.
331
+ * A double-quoted region (``"..."``) honors backslash escapes — a
332
+ ``\\`` followed by any character is consumed as a two-character
333
+ unit, so ``\\"`` does not terminate the region. Backslash-escape
334
+ pairs are blanked too.
335
+ * A backtick substitution (``` `...` ```) inside the ``$(...)`` body
336
+ is itself a subshell — the walker advances past the next backtick
337
+ so that a ``)`` sitting inside the backtick body does not flip the
338
+ surrounding paren depth. Backtick bodies are kept scannable for
339
+ the same reason as ``$(...)`` bodies: bash executes them, so any
340
+ ``gh pr create`` token sitting inside is a real invocation.
341
+
342
+ Bare ``(`` and ``)`` characters inside the substitution body
343
+ (bash subshells like ``(echo b)``, array assignments like
344
+ ``arr=(a b c)``, function definitions like ``f() { ...; }``) also
345
+ track paren depth so they cancel out before a bare ``)`` can
346
+ prematurely close the outer ``$(...)`` substitution.
347
+
348
+ Unterminated quotes and backticks consume to the end of the buffer,
349
+ matching the behavior of ``_strip_quoted_regions``.
350
+
351
+ Args:
352
+ all_scanned_characters: Mutable list view of the command string.
353
+ The walker MUTATES the buffer to blank quoted regions inside
354
+ the substitution body. The substitution opener (``$(``) and
355
+ closer (``)``) and any unquoted body characters remain
356
+ intact so the outer matcher can scan the body for real
357
+ commands.
358
+ opener_index: Index of the ``$`` that begins ``$(``.
359
+
360
+ Returns:
361
+ The index just past the matching ``)``. When no closing paren is
362
+ found the length of the buffer is returned, matching how an
363
+ interactive shell would consume the rest of the input on an
364
+ unterminated substitution.
365
+ """
366
+ paren_depth = 1
367
+ interior_index = opener_index + COMMAND_SUBSTITUTION_OPENER_LENGTH
368
+ buffer_length = len(all_scanned_characters)
369
+ while interior_index < buffer_length and paren_depth > 0:
370
+ interior_character = all_scanned_characters[interior_index]
371
+ if (
372
+ interior_character == SHELL_DOLLAR_CHARACTER
373
+ and interior_index + 1 < buffer_length
374
+ and all_scanned_characters[interior_index + 1] == SHELL_PAREN_OPEN_CHARACTER
375
+ ):
376
+ paren_depth += 1
377
+ interior_index += COMMAND_SUBSTITUTION_OPENER_LENGTH
378
+ continue
379
+ if interior_character == SHELL_BACKTICK_CHARACTER:
380
+ interior_index = _index_after_backtick_substitution(
381
+ all_scanned_characters, interior_index, buffer_length
382
+ )
383
+ continue
384
+ if interior_character in ALL_SHELL_QUOTE_CHARACTERS:
385
+ interior_index = _blank_quoted_region(
386
+ all_scanned_characters, interior_index, buffer_length, interior_character
387
+ )
388
+ continue
389
+ if interior_character == SHELL_PAREN_OPEN_CHARACTER:
390
+ paren_depth += 1
391
+ interior_index += 1
392
+ continue
393
+ if interior_character == SHELL_PAREN_CLOSE_CHARACTER:
394
+ paren_depth -= 1
395
+ interior_index += 1
396
+ continue
397
+ interior_index += 1
398
+ return interior_index
399
+
400
+
401
+ def _index_after_backtick_substitution(
402
+ all_scanned_characters: list[str],
403
+ opener_index: int,
404
+ buffer_length: int,
405
+ ) -> int:
406
+ """Return the index one past the closing backtick of a ``` `...` ``` region.
407
+
408
+ The backtick body is executed by bash, so the walker mirrors the
409
+ ``$(...)`` helper: a single- or double-quoted region inside the
410
+ body is blanked via ``_blank_quoted_region`` so a literal token
411
+ sitting inside a quoted argument (for example
412
+ ``` `printf ';gh pr create'` ```) cannot leak out as if it were a
413
+ real command.
414
+
415
+ Args:
416
+ all_scanned_characters: Mutable list view of the command string.
417
+ The walker MUTATES the buffer to blank quoted regions inside
418
+ the substitution body.
419
+ opener_index: Index of the opening backtick.
420
+ buffer_length: Length of ``all_scanned_characters``, hoisted by
421
+ the caller to avoid a recomputation per call.
422
+
423
+ Returns:
424
+ The index just past the matching backtick, or ``buffer_length``
425
+ when the backtick region is unterminated.
426
+ """
427
+ interior_index = opener_index + 1
428
+ while interior_index < buffer_length:
429
+ interior_character = all_scanned_characters[interior_index]
430
+ if interior_character == SHELL_BACKTICK_CHARACTER:
431
+ return interior_index + 1
432
+ if interior_character in ALL_SHELL_QUOTE_CHARACTERS:
433
+ interior_index = _blank_quoted_region(
434
+ all_scanned_characters, interior_index, buffer_length, interior_character
435
+ )
436
+ continue
437
+ interior_index += 1
438
+ return interior_index
439
+
440
+
441
+ def _blank_quoted_region(
442
+ all_scanned_characters: list[str],
443
+ opener_index: int,
444
+ buffer_length: int,
445
+ quote_character: str,
446
+ ) -> int:
447
+ """Blank the interior of a ``'...'`` or ``"..."`` region in place.
448
+
449
+ The opening quote, every character inside the region, and the closing
450
+ quote are all replaced with ``SHELL_QUOTE_REPLACEMENT_CHARACTER`` so
451
+ that downstream regex matching sees only whitespace where quoted text
452
+ used to live. Offsets are preserved end-to-end — the returned index
453
+ always lands one past the position the closing quote occupied
454
+ (whether or not a closing quote was found).
455
+
456
+ Single quotes have no escape mechanism in bash, so the walker advances
457
+ to the next matching ``'`` and blanks every character along the way.
458
+ Double quotes honor ``\\`` escapes, so a ``\\`` followed by any
459
+ character is blanked as a two-character unit (``\\"`` does not
460
+ terminate the region).
461
+
462
+ Within a double-quoted region, bash still expands ``$(...)`` and
463
+ ``` `...` ``` substitutions. The walker recognizes both openers and
464
+ descends into their matching closer via the substitution helpers
465
+ instead of blanking, so a ``gh pr create`` token sitting inside the
466
+ substitution body remains scannable while the surrounding quoted
467
+ text is blanked. Single-quoted regions intentionally do NOT descend
468
+ into substitutions because ``$`` and ``` ` ``` are literal text
469
+ inside ``'...'``.
470
+
471
+ Args:
472
+ all_scanned_characters: Mutable list view of the command string.
473
+ The walker MUTATES the buffer to blank the entire quoted
474
+ region (both quotes included).
475
+ opener_index: Index of the opening quote.
476
+ buffer_length: Length of ``all_scanned_characters``, hoisted by
477
+ the caller to avoid a recomputation per call.
478
+ quote_character: ``'`` or ``"`` — the quote whose match closes
479
+ the region.
480
+
481
+ Returns:
482
+ The index just past the matching closing quote, or ``buffer_length``
483
+ when the quoted region is unterminated.
484
+ """
485
+ all_scanned_characters[opener_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
486
+ interior_index = opener_index + 1
487
+ while interior_index < buffer_length:
488
+ interior_character = all_scanned_characters[interior_index]
489
+ if (
490
+ quote_character == '"'
491
+ and interior_character == "\\"
492
+ and interior_index + 1 < buffer_length
493
+ ):
494
+ all_scanned_characters[interior_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
495
+ all_scanned_characters[interior_index + 1] = SHELL_QUOTE_REPLACEMENT_CHARACTER
496
+ interior_index += SHELL_BACKSLASH_ESCAPE_PAIR_LENGTH
497
+ continue
498
+ if (
499
+ quote_character == '"'
500
+ and interior_character == SHELL_DOLLAR_CHARACTER
501
+ and interior_index + 1 < buffer_length
502
+ and all_scanned_characters[interior_index + 1] == SHELL_PAREN_OPEN_CHARACTER
503
+ ):
504
+ interior_index = _index_after_command_substitution(
505
+ all_scanned_characters, interior_index
506
+ )
507
+ continue
508
+ if quote_character == '"' and interior_character == SHELL_BACKTICK_CHARACTER:
509
+ interior_index = _index_after_backtick_substitution(
510
+ all_scanned_characters, interior_index, buffer_length
511
+ )
512
+ continue
513
+ if interior_character == quote_character:
514
+ all_scanned_characters[interior_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
515
+ return interior_index + 1
516
+ all_scanned_characters[interior_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
517
+ interior_index += 1
518
+ return interior_index
519
+
520
+
521
+ def _strip_quoted_regions(command: str) -> str:
522
+ """Replace inert quoted regions with spaces, leaving substitutions scannable.
523
+
524
+ Single quotes (``'...'``) and double quotes (``"..."``) wrap inert
525
+ text in bash, so their interior is replaced with spaces. ``$(...)``
526
+ command substitution and backtick command substitution
527
+ (``` `...` ```) execute their bodies in a subshell, so the substitution
528
+ OPENER and CLOSER and the unquoted body characters are left intact —
529
+ any ``gh pr create`` token sitting unquoted inside either form must
530
+ remain visible to the matchers, otherwise the enforcer would
531
+ silently no-op on ``echo "$(gh pr create --title T)"``.
532
+
533
+ The substitution bodies themselves are recursively quote-stripped:
534
+ a single- or double-quoted argument INSIDE a substitution body is
535
+ blanked the same way as a top-level quoted region. That keeps shapes
536
+ like ``echo $(printf 'gh pr create')`` from leaking the literal
537
+ ``gh pr create`` string out of ``printf``'s single-quoted argument
538
+ and tricking the matcher into thinking the substitution invokes
539
+ ``gh pr create`` when it actually invokes ``printf`` against the
540
+ literal data.
541
+
542
+ Within a double-quoted region, ``$(...)`` substitution windows are
543
+ still expanded, so the walker recognizes the ``$(`` opener inside
544
+ the quoted scan and stops stripping until the matching ``)`` —
545
+ leaving the substitution body scannable while keeping the surrounding
546
+ quoted text inert. Backtick command substitution (``` `...` ```) is
547
+ likewise expanded by bash inside double quotes, so the same
548
+ skip-past-body behavior applies: the walker advances past the closing
549
+ backtick without stripping the interior, so any ``gh pr create`` token
550
+ sitting inside ``"`...`"`` remains visible to the matcher.
551
+
552
+ Backslash-escaped quotes inside a double-quoted segment (``\\"``) do
553
+ not terminate the region. An unterminated quote consumes the rest of
554
+ the string, matching how an interactive shell parses the same input.
555
+
556
+ Args:
557
+ command: Raw bash command string from PreToolUse hook input.
558
+
559
+ Returns:
560
+ A string of identical length to ``command`` with single- and
561
+ double-quoted region interiors replaced by spaces — including
562
+ quoted regions nested inside ``$(...)`` / ``` `...` ``` bodies —
563
+ and the unquoted body characters of the substitutions themselves
564
+ left intact.
565
+ """
566
+ all_scanned_characters = list(command)
567
+ cursor_index = 0
568
+ command_length = len(command)
569
+ while cursor_index < command_length:
570
+ current_character = all_scanned_characters[cursor_index]
571
+ if (
572
+ current_character == SHELL_DOLLAR_CHARACTER
573
+ and cursor_index + 1 < command_length
574
+ and all_scanned_characters[cursor_index + 1] == SHELL_PAREN_OPEN_CHARACTER
575
+ ):
576
+ cursor_index = _index_after_command_substitution(all_scanned_characters, cursor_index)
577
+ continue
578
+ if current_character == SHELL_BACKTICK_CHARACTER:
579
+ cursor_index = _index_after_backtick_substitution(
580
+ all_scanned_characters, cursor_index, command_length
581
+ )
582
+ continue
583
+ if current_character not in ALL_SHELL_QUOTE_CHARACTERS:
584
+ cursor_index += 1
585
+ continue
586
+ cursor_index = _blank_quoted_region(
587
+ all_scanned_characters, cursor_index, command_length, current_character
588
+ )
589
+ return "".join(all_scanned_characters)
590
+
591
+
592
+ def _strip_bash_comments(quote_stripped_command: str) -> str:
593
+ """Replace bash comments with spaces so a hash-prefixed token is inert.
594
+
595
+ Bash treats a hash character as the start of a comment only when it
596
+ appears at the beginning of the command or immediately after
597
+ whitespace. Inside a quoted region the hash character is literal,
598
+ but the caller is responsible for running ``_strip_quoted_regions``
599
+ first, which already blanks quoted text, so any hash character
600
+ reaching this helper is either a real comment introducer or a
601
+ token-internal character (for example
602
+ ``--body-file body.md@@HASH@@fragment`` where ``@@HASH@@`` stands
603
+ in for the literal hash byte).
604
+
605
+ The walker is substitution-aware: it descends INTO ``$(...)`` and
606
+ ``` `...` ``` bodies so a hash character inside a substitution
607
+ body is processed as a comment too — bash executes the substitution
608
+ body as its own command, so a hash after whitespace inside the
609
+ body really does start a comment. The substitution-bounded comment
610
+ runs to the next newline OR to the substitution closer (``)`` for
611
+ ``$(...)``, backtick for ``` `...` ```), whichever comes first.
612
+ The substitution OPENER and CLOSER characters themselves are
613
+ preserved so the outer paren-depth tracking remains intact and the
614
+ surrounding command structure stays scannable.
615
+
616
+ A hash character at the top level (outside every substitution)
617
+ consumes until the next newline as usual. A comment inside a
618
+ substitution body must NOT escape outward — that is why the closer
619
+ bounds the consumption. Without that bound, a flat regex sweep
620
+ would consume the closing ``)`` or backtick and every byte after
621
+ it on the same line, silently erasing a real ``gh pr create``
622
+ invocation that follows the substitution.
623
+
624
+ Args:
625
+ quote_stripped_command: Output of ``_strip_quoted_regions``.
626
+
627
+ Returns:
628
+ A string of identical length with every bash comment replaced
629
+ by spaces. The trailing newline of each commented line is
630
+ retained so the matcher can still tell where one command ended
631
+ and the next began. Substitution openers and closers stay
632
+ intact.
633
+ """
634
+ all_scanned_characters = list(quote_stripped_command)
635
+ command_length = len(quote_stripped_command)
636
+ _walk_and_blank_comments(
637
+ all_scanned_characters,
638
+ cursor_start_index=0,
639
+ end_index=command_length,
640
+ bounded_by_substitution_closer=None,
641
+ )
642
+ return "".join(all_scanned_characters)
643
+
644
+
645
+ def _walk_and_blank_comments(
646
+ all_scanned_characters: list[str],
647
+ cursor_start_index: int,
648
+ end_index: int,
649
+ bounded_by_substitution_closer: str | None,
650
+ ) -> int:
651
+ """Walk a region of the character buffer and blank every comment found.
652
+
653
+ The walker is recursive over substitution bodies: when it encounters
654
+ a ``$(...)`` or ``` `...` ``` opener it descends into the body with
655
+ a substitution closer in hand, blanks any comment that appears
656
+ inside the body, and returns control to the outer caller once the
657
+ closer is consumed. The substitution opener and closer characters
658
+ themselves are preserved so paren-depth tracking elsewhere
659
+ (``_index_after_command_substitution``,
660
+ ``_strip_substitution_bodies``) remains correct on the blanked
661
+ string.
662
+
663
+ Args:
664
+ all_scanned_characters: Mutable list view of the command string.
665
+ The walker MUTATES the buffer to blank comment text in place.
666
+ cursor_start_index: Index at which to begin scanning.
667
+ end_index: Index at which to stop scanning. The caller passes
668
+ ``len(all_scanned_characters)`` for the top-level walk and
669
+ the same value for substitution-body walks (each recursive
670
+ call independently checks for its closer).
671
+ bounded_by_substitution_closer: ``None`` for the top-level
672
+ walk. ``")"`` when walking inside a ``$(...)`` body so a
673
+ comment inside the body terminates at the matching ``)``.
674
+ When bounded by ``)``, paren depth is tracked so a bare
675
+ ``(`` inside the body matches its own bare ``)`` rather
676
+ than letting that ``)`` exit the walker prematurely.
677
+ ``"`"`` when walking inside a ``` `...` ``` body so a
678
+ comment inside the body terminates at the matching
679
+ backtick. Backtick bodies do not need depth tracking
680
+ because backticks do not nest in unescaped form.
681
+
682
+ Returns:
683
+ The index just past the substitution closer when bounded, or
684
+ ``end_index`` when the walker reaches the end of the buffer.
685
+ """
686
+ cursor_index = cursor_start_index
687
+ paren_depth = (
688
+ 1 if bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER else 0
689
+ )
690
+ while cursor_index < end_index:
691
+ current_character = all_scanned_characters[cursor_index]
692
+ if (
693
+ bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER
694
+ and current_character == SHELL_PAREN_CLOSE_CHARACTER
695
+ ):
696
+ paren_depth -= 1
697
+ if paren_depth == 0:
698
+ return cursor_index + 1
699
+ cursor_index += 1
700
+ continue
701
+ if (
702
+ bounded_by_substitution_closer == SHELL_BACKTICK_CHARACTER
703
+ and current_character == SHELL_BACKTICK_CHARACTER
704
+ ):
705
+ return cursor_index + 1
706
+ if (
707
+ current_character == SHELL_DOLLAR_CHARACTER
708
+ and cursor_index + 1 < end_index
709
+ and all_scanned_characters[cursor_index + 1] == SHELL_PAREN_OPEN_CHARACTER
710
+ ):
711
+ cursor_index = _walk_and_blank_comments(
712
+ all_scanned_characters,
713
+ cursor_start_index=cursor_index + COMMAND_SUBSTITUTION_OPENER_LENGTH,
714
+ end_index=end_index,
715
+ bounded_by_substitution_closer=SHELL_PAREN_CLOSE_CHARACTER,
716
+ )
717
+ continue
718
+ if current_character == SHELL_BACKTICK_CHARACTER:
719
+ cursor_index = _walk_and_blank_comments(
720
+ all_scanned_characters,
721
+ cursor_start_index=cursor_index + 1,
722
+ end_index=end_index,
723
+ bounded_by_substitution_closer=SHELL_BACKTICK_CHARACTER,
724
+ )
725
+ continue
726
+ if (
727
+ bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER
728
+ and current_character == SHELL_PAREN_OPEN_CHARACTER
729
+ ):
730
+ paren_depth += 1
731
+ cursor_index += 1
732
+ continue
733
+ if (
734
+ current_character == BASH_COMMENT_INTRODUCER_CHARACTER
735
+ and _is_comment_introducer_position(all_scanned_characters, cursor_index)
736
+ ):
737
+ cursor_index = _blank_bounded_comment(
738
+ all_scanned_characters,
739
+ cursor_index,
740
+ end_index,
741
+ bounded_by_substitution_closer,
742
+ )
743
+ continue
744
+ cursor_index += 1
745
+ return cursor_index
746
+
747
+
748
+ def _blank_bounded_comment(
749
+ all_scanned_characters: list[str],
750
+ hash_index: int,
751
+ end_index: int,
752
+ bounded_by_substitution_closer: str | None,
753
+ ) -> int:
754
+ """Blank a comment in place up to a newline or substitution closer.
755
+
756
+ A comment at the top level runs from the hash character to the next
757
+ newline. A comment inside a substitution body has the same upper
758
+ bound but ALSO terminates at the substitution closer (``)`` or
759
+ backtick), so the closer character itself is preserved and the
760
+ outer walker can continue from there.
761
+
762
+ Args:
763
+ all_scanned_characters: Mutable list view of the command string.
764
+ The walker MUTATES the buffer to blank the comment body.
765
+ hash_index: Index of the hash character that introduces the
766
+ comment.
767
+ end_index: Buffer length, hoisted by the caller to avoid a
768
+ recomputation per call.
769
+ bounded_by_substitution_closer: ``None`` for the top-level
770
+ walk. ``")"`` when inside a ``$(...)`` body. ``"`"`` when
771
+ inside a ``` `...` ``` body.
772
+
773
+ Returns:
774
+ The cursor position the outer walker should resume from. The
775
+ preserved terminating newline (when present) is included in
776
+ the returned range so command-separator detection still sees
777
+ the line break. When the comment terminates at a substitution
778
+ closer, the closer's own index is returned so the recursive
779
+ caller picks up the closer on its next iteration.
780
+ """
781
+ blanking_index = hash_index
782
+ while blanking_index < end_index:
783
+ current_character = all_scanned_characters[blanking_index]
784
+ if current_character == "\n":
785
+ return blanking_index
786
+ if (
787
+ bounded_by_substitution_closer == SHELL_PAREN_CLOSE_CHARACTER
788
+ and current_character == SHELL_PAREN_CLOSE_CHARACTER
789
+ ):
790
+ return blanking_index
791
+ if (
792
+ bounded_by_substitution_closer == SHELL_BACKTICK_CHARACTER
793
+ and current_character == SHELL_BACKTICK_CHARACTER
794
+ ):
795
+ return blanking_index
796
+ all_scanned_characters[blanking_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
797
+ blanking_index += 1
798
+ return blanking_index
799
+
800
+
801
+ def _is_comment_introducer_position(
802
+ all_scanned_characters: list[str], hash_index: int
803
+ ) -> bool:
804
+ """Return True when the hash at ``hash_index`` introduces a bash comment.
805
+
806
+ Bash treats a hash as a comment introducer only at the start of the
807
+ command or immediately after whitespace. This mirrors the lookbehind
808
+ branch of the bash comment rule (``(?<=\\s)|^``) while operating on
809
+ a mutable character list so the substitution-aware walker can
810
+ consult it inline.
811
+
812
+ Args:
813
+ all_scanned_characters: Character buffer being walked.
814
+ hash_index: Index of the hash character under test.
815
+
816
+ Returns:
817
+ True when ``hash_index`` is zero or the prior character is a
818
+ Python ``str.isspace`` whitespace character.
819
+ """
820
+ if hash_index == 0:
821
+ return True
822
+ prior_character = all_scanned_characters[hash_index - 1]
823
+ return prior_character.isspace()
824
+
825
+
826
+ def _strip_substitution_bodies(quote_stripped_command: str) -> str:
827
+ """Replace ``$(...)`` and ``` `...` ``` bodies with spaces.
828
+
829
+ The ``gh pr create`` detection path relies on the substitution body
830
+ being scannable so that ``echo $(gh pr create)`` triggers the
831
+ enforcer. The web-flag detection path has the opposite requirement:
832
+ a ``--web`` token appearing inside a substitution body is an
833
+ argument to whatever command the subshell executes, not a flag on
834
+ the outer ``gh pr create`` invocation. ``gh pr create --title "$(echo --web)"``
835
+ should still trip the enforcer because ``--web`` belongs to ``echo``,
836
+ not to ``gh pr create``.
837
+
838
+ This helper blanks the OPENER, the body, and the CLOSER of every
839
+ top-level ``$(...)`` and ``` `...` ``` substitution so the
840
+ web-flag matcher sees only whitespace where a substitution used to
841
+ live. Offsets are preserved so the segment-extraction in
842
+ ``_all_gh_pr_create_segments`` still works on the resulting string.
843
+
844
+ Args:
845
+ quote_stripped_command: Output of ``_strip_quoted_regions`` —
846
+ quotes must already be blanked so this helper does not need
847
+ to re-track quoted boundaries.
848
+
849
+ Returns:
850
+ A string of identical length with every substitution body
851
+ replaced by spaces.
852
+ """
853
+ all_scanned_characters = list(quote_stripped_command)
854
+ cursor_index = 0
855
+ command_length = len(quote_stripped_command)
856
+ while cursor_index < command_length:
857
+ current_character = all_scanned_characters[cursor_index]
858
+ if (
859
+ current_character == SHELL_DOLLAR_CHARACTER
860
+ and cursor_index + 1 < command_length
861
+ and all_scanned_characters[cursor_index + 1] == SHELL_PAREN_OPEN_CHARACTER
862
+ ):
863
+ substitution_end_index = _index_after_command_substitution(
864
+ all_scanned_characters, cursor_index
865
+ )
866
+ for each_blank_target_index in range(cursor_index, substitution_end_index):
867
+ all_scanned_characters[each_blank_target_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
868
+ cursor_index = substitution_end_index
869
+ continue
870
+ if current_character == SHELL_BACKTICK_CHARACTER:
871
+ substitution_end_index = _index_after_backtick_substitution(
872
+ all_scanned_characters, cursor_index, command_length
873
+ )
874
+ for each_blank_target_index in range(cursor_index, substitution_end_index):
875
+ all_scanned_characters[each_blank_target_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
876
+ cursor_index = substitution_end_index
877
+ continue
878
+ cursor_index += 1
879
+ return "".join(all_scanned_characters)
880
+
881
+
882
+ def _strip_heredoc_bodies(command: str) -> str:
883
+ """Replace heredoc bodies with spaces so embedded text is inert.
884
+
885
+ A here-document opener (``<<TAG``, ``<<'TAG'``, ``<<"TAG"``, or
886
+ ``<<-TAG``) followed by a newline begins a body whose contents bash
887
+ treats as literal data, not as commands. The body terminates at the
888
+ first subsequent line whose only content is the tag — or, for the
889
+ ``<<-`` form, leading TAB characters followed by the tag. Any
890
+ ``gh pr create`` token sitting inside a heredoc body is data being
891
+ fed to a command like ``cat`` or ``ssh``, not a command the shell
892
+ will execute, so the matcher must not see it.
893
+
894
+ Quote tracking is required because a literal ``<<EOF`` sitting
895
+ inside a quoted argument (for example ``echo "use <<EOF in your
896
+ script"``) is not a heredoc opener — it is literal text. The walker
897
+ skips past single- and double-quoted regions so the opener detector
898
+ only fires on syntactically real openers. ``<<<`` (here-string) is
899
+ explicitly skipped because it carries no body.
900
+
901
+ Body characters between the end of the opener line (the newline
902
+ after the opener) and the start of the closing tag line are
903
+ replaced with ``SHELL_QUOTE_REPLACEMENT_CHARACTER`` so offsets are
904
+ preserved end-to-end. The opener line and the closing tag line are
905
+ left intact so the surrounding command structure stays scannable.
906
+
907
+ When the closing tag is never found, no blanking happens — the
908
+ function returns the buffer unchanged for that opener. This
909
+ conservative branch protects against false positives where an
910
+ apparent ``<<TAG`` opener inside an unusual context lacks a real
911
+ matching closer; without it the walker would consume to end of
912
+ buffer and silently erase any real ``gh pr create`` that follows.
913
+
914
+ Multiple heredocs in one command are all handled — after each
915
+ successful blanking the walker resumes scanning from the closing
916
+ tag line so a second ``<<TAG2`` opener later in the command is
917
+ processed independently.
918
+
919
+ Args:
920
+ command: Raw bash command string from the PreToolUse or
921
+ PostToolUse hook input. The helper runs BEFORE
922
+ ``_strip_quoted_regions`` so a heredoc opener whose tag
923
+ is itself quoted (``<<'EOF'``) still has its tag visible
924
+ for closing-tag matching.
925
+
926
+ Returns:
927
+ A string of identical length to ``command`` with every heredoc
928
+ body blanked to spaces and opener/closer lines left intact.
929
+ """
930
+ all_scanned_characters = list(command)
931
+ command_length = len(command)
932
+ cursor_index = 0
933
+ while cursor_index < command_length:
934
+ current_character = all_scanned_characters[cursor_index]
935
+ if current_character == "'":
936
+ cursor_index = _advance_past_single_quoted_region(
937
+ all_scanned_characters, cursor_index, command_length
938
+ )
939
+ continue
940
+ if current_character == "\"":
941
+ cursor_index = _advance_past_double_quoted_region(
942
+ all_scanned_characters, cursor_index, command_length
943
+ )
944
+ continue
945
+ if not _is_heredoc_opener_position(all_scanned_characters, cursor_index, command_length):
946
+ cursor_index += 1
947
+ continue
948
+ advance_after_blanking = _try_blank_one_heredoc_body(
949
+ all_scanned_characters, cursor_index, command_length
950
+ )
951
+ if advance_after_blanking is None:
952
+ cursor_index += 1
953
+ continue
954
+ cursor_index = advance_after_blanking
955
+ return "".join(all_scanned_characters)
956
+
957
+
958
+ def _advance_past_single_quoted_region(
959
+ all_scanned_characters: list[str],
960
+ opener_index: int,
961
+ buffer_length: int,
962
+ ) -> int:
963
+ """Return the index one past the closing ``'`` without mutating the buffer."""
964
+ cursor_index = opener_index + 1
965
+ while cursor_index < buffer_length:
966
+ if all_scanned_characters[cursor_index] == "'":
967
+ return cursor_index + 1
968
+ cursor_index += 1
969
+ return cursor_index
970
+
971
+
972
+ def _advance_past_double_quoted_region(
973
+ all_scanned_characters: list[str],
974
+ opener_index: int,
975
+ buffer_length: int,
976
+ ) -> int:
977
+ """Return the index one past the closing ``"`` honoring ``\\`` escapes."""
978
+ cursor_index = opener_index + 1
979
+ while cursor_index < buffer_length:
980
+ current_character = all_scanned_characters[cursor_index]
981
+ if (
982
+ current_character == SHELL_BACKSLASH_CHARACTER
983
+ and cursor_index + 1 < buffer_length
984
+ ):
985
+ cursor_index += SHELL_BACKSLASH_ESCAPE_PAIR_LENGTH
986
+ continue
987
+ if current_character == "\"":
988
+ return cursor_index + 1
989
+ cursor_index += 1
990
+ return cursor_index
991
+
992
+
993
+ def _is_heredoc_opener_position(
994
+ all_scanned_characters: list[str],
995
+ cursor_index: int,
996
+ buffer_length: int,
997
+ ) -> bool:
998
+ """Return True when the cursor sits at a ``<<`` (but not ``<<<``) heredoc opener."""
999
+ if cursor_index + 1 >= buffer_length:
1000
+ return False
1001
+ if all_scanned_characters[cursor_index] != SHELL_LESS_THAN_CHARACTER:
1002
+ return False
1003
+ if all_scanned_characters[cursor_index + 1] != SHELL_LESS_THAN_CHARACTER:
1004
+ return False
1005
+ if (
1006
+ cursor_index + HEREDOC_OPENER_TOKEN_LENGTH < buffer_length
1007
+ and all_scanned_characters[cursor_index + HEREDOC_OPENER_TOKEN_LENGTH]
1008
+ == SHELL_LESS_THAN_CHARACTER
1009
+ ):
1010
+ return False
1011
+ return True
1012
+
1013
+
1014
+ def _try_blank_one_heredoc_body(
1015
+ all_scanned_characters: list[str],
1016
+ opener_index: int,
1017
+ buffer_length: int,
1018
+ ) -> int | None:
1019
+ """Blank one heredoc body in place when a matching closing tag is found.
1020
+
1021
+ Args:
1022
+ all_scanned_characters: Mutable list view of the command string.
1023
+ The function MUTATES the buffer to blank body characters
1024
+ with ``SHELL_QUOTE_REPLACEMENT_CHARACTER``.
1025
+ opener_index: Index of the first ``<`` in the ``<<`` opener.
1026
+ buffer_length: Length of ``all_scanned_characters``, hoisted by
1027
+ the caller.
1028
+
1029
+ Returns:
1030
+ The index just past the closing tag line on a successful match.
1031
+ ``None`` when no tag could be parsed or no matching closing tag
1032
+ was found — the buffer is left unchanged in either case so the
1033
+ outer walker can advance by one and continue scanning.
1034
+ """
1035
+ after_opener_index = opener_index + HEREDOC_OPENER_TOKEN_LENGTH
1036
+ tag_match = HEREDOC_OPENER_TAG_PATTERN.match(
1037
+ "".join(all_scanned_characters), after_opener_index
1038
+ )
1039
+ if tag_match is None:
1040
+ return None
1041
+ parsed_tag = (
1042
+ tag_match.group("sq_tag")
1043
+ or tag_match.group("dq_tag")
1044
+ or tag_match.group("bare_tag")
1045
+ )
1046
+ if not parsed_tag:
1047
+ return None
1048
+ tag_allows_leading_tabs = tag_match.group("dash") == "-"
1049
+ end_of_opener_line_index = _index_of_next_newline(
1050
+ all_scanned_characters, tag_match.end(), buffer_length
1051
+ )
1052
+ if end_of_opener_line_index >= buffer_length:
1053
+ return None
1054
+ body_start_index = end_of_opener_line_index + 1
1055
+ closing_tag_line_start, closing_tag_line_end = _find_closing_heredoc_tag_line(
1056
+ all_scanned_characters,
1057
+ body_start_index,
1058
+ buffer_length,
1059
+ parsed_tag,
1060
+ tag_allows_leading_tabs,
1061
+ )
1062
+ if closing_tag_line_start is None:
1063
+ return None
1064
+ for each_blank_target_index in range(body_start_index, closing_tag_line_start):
1065
+ all_scanned_characters[each_blank_target_index] = SHELL_QUOTE_REPLACEMENT_CHARACTER
1066
+ return closing_tag_line_end
1067
+
1068
+
1069
+ def _index_of_next_newline(
1070
+ all_scanned_characters: list[str],
1071
+ start_index: int,
1072
+ buffer_length: int,
1073
+ ) -> int:
1074
+ """Return the index of the next newline at or after ``start_index``."""
1075
+ cursor_index = start_index
1076
+ while cursor_index < buffer_length:
1077
+ if all_scanned_characters[cursor_index] == SHELL_NEWLINE_CHARACTER:
1078
+ return cursor_index
1079
+ cursor_index += 1
1080
+ return cursor_index
1081
+
1082
+
1083
+ def _find_closing_heredoc_tag_line(
1084
+ all_scanned_characters: list[str],
1085
+ body_start_index: int,
1086
+ buffer_length: int,
1087
+ expected_tag: str,
1088
+ tag_allows_leading_tabs: bool,
1089
+ ) -> tuple[int | None, int]:
1090
+ """Return ``(start_of_closing_tag_line, end_of_closing_tag_line)`` for the tag.
1091
+
1092
+ The closing tag must appear on its own line — the entire line, after
1093
+ any allowed leading tabs (only when the opener used ``<<-``), is the
1094
+ tag and nothing else. Trailing carriage returns and trailing spaces
1095
+ are tolerated so heredocs authored in CRLF files still match.
1096
+
1097
+ Args:
1098
+ all_scanned_characters: Character buffer being walked.
1099
+ body_start_index: Index of the first byte of the heredoc body
1100
+ (the character after the newline that ended the opener line).
1101
+ buffer_length: Length of ``all_scanned_characters``.
1102
+ expected_tag: The tag extracted from the opener.
1103
+ tag_allows_leading_tabs: True when the opener used ``<<-`` (so
1104
+ leading TAB characters on the closing line are allowed).
1105
+
1106
+ Returns:
1107
+ ``(start_of_closing_tag_line, end_of_closing_tag_line)`` when a
1108
+ matching closing line is found. ``(None, body_start_index)``
1109
+ when no matching line is found.
1110
+ """
1111
+ line_start_index = body_start_index
1112
+ while line_start_index < buffer_length:
1113
+ line_end_index = _index_of_next_newline(
1114
+ all_scanned_characters, line_start_index, buffer_length
1115
+ )
1116
+ line_text = "".join(
1117
+ all_scanned_characters[line_start_index:line_end_index]
1118
+ )
1119
+ stripped_line_text = line_text.rstrip(" \t\r")
1120
+ if tag_allows_leading_tabs:
1121
+ stripped_line_text = stripped_line_text.lstrip("\t")
1122
+ if stripped_line_text == expected_tag:
1123
+ return line_start_index, line_end_index
1124
+ line_start_index = line_end_index + 1
1125
+ return None, body_start_index
1126
+
1127
+
1128
+ def _preprocess_command_for_matching(command: str) -> str:
1129
+ """Return the canonical preprocessed form used by every command-shape matcher.
1130
+
1131
+ The enforcer, the restore hook, and the web-flag detector all share
1132
+ the same preprocessing pipeline: blank heredoc bodies first so a
1133
+ literal ``gh pr create`` inside heredoc data text cannot leak out
1134
+ as a real command; blank inert quoted regions next; blank bash
1135
+ comments last. Running every pass through a single helper keeps the
1136
+ three callers in lock-step — adding a new preprocessing pass lands
1137
+ on every consumer automatically.
1138
+
1139
+ Heredoc stripping runs BEFORE quoted-region stripping because a
1140
+ heredoc tag can itself be quoted (``<<'EOF'``); stripping quotes
1141
+ first would blank the tag and break the closing-tag match. The
1142
+ heredoc walker carries its own minimal quote skip so a literal
1143
+ ``<<EOF`` sitting inside a string is not mistaken for a real opener.
1144
+
1145
+ Args:
1146
+ command: Raw bash command string from the PreToolUse or
1147
+ PostToolUse hook input.
1148
+
1149
+ Returns:
1150
+ The command with heredoc bodies, quoted regions, and bash
1151
+ comments blanked, substitution bodies kept scannable, and
1152
+ original offsets preserved end-to-end.
1153
+ """
1154
+ return _strip_bash_comments(_strip_quoted_regions(_strip_heredoc_bodies(command)))
1155
+
1156
+
1157
+ def _all_gh_pr_create_segments(quote_stripped_command: str) -> list[str]:
1158
+ """Return every ``gh pr create`` segment in the (quote-stripped) command.
1159
+
1160
+ A "segment" is the substring from the end of a ``gh pr create`` match
1161
+ up to the next shell command separator (``&&``, ``||``, ``;``,
1162
+ ``|``, ``&``, newline) or the end of the string. The enforcer's
1163
+ web-flag detection runs against each segment independently so a
1164
+ chained ``gh pr create --web && gh pr create --title T`` does not
1165
+ let the second invocation slip through on the strength of the
1166
+ first segment's ``--web`` flag.
1167
+
1168
+ Args:
1169
+ quote_stripped_command: Output of ``_strip_quoted_regions`` —
1170
+ the caller is responsible for stripping inert quoted regions
1171
+ before passing in.
1172
+
1173
+ Returns:
1174
+ List of segment strings, one per ``gh pr create`` invocation
1175
+ found in the command. Empty list when the command does not
1176
+ invoke ``gh pr create`` at all.
1177
+ """
1178
+ all_segments: list[str] = []
1179
+ command_length = len(quote_stripped_command)
1180
+ for each_gh_pr_create_match in GH_PR_CREATE_PATTERN.finditer(quote_stripped_command):
1181
+ segment_start = each_gh_pr_create_match.end()
1182
+ separator_match = COMMAND_SEPARATOR_PATTERN.search(quote_stripped_command, segment_start)
1183
+ segment_end = separator_match.start() if separator_match else command_length
1184
+ all_segments.append(quote_stripped_command[segment_start:segment_end])
1185
+ return all_segments
1186
+
1187
+
1188
+ def _command_invokes_gh_pr_create_in_stripped(quote_stripped_command: str) -> bool:
1189
+ """Return True when the (quote-stripped) command contains a ``gh pr create`` invocation.
1190
+
1191
+ Both the enforcer's PreToolUse gate and the restore hook's
1192
+ PostToolUse gate share this function, so the pair stays in sync —
1193
+ a fix here lands on both ends of the swap-restore pair at once.
1194
+ A literal ``gh pr create`` inside ``echo "..."`` or any other quoted
1195
+ argument is intentionally ignored because the caller has already run
1196
+ ``_strip_quoted_regions`` to blank out inert quoted text.
1197
+
1198
+ Args:
1199
+ quote_stripped_command: Output of ``_strip_quoted_regions`` —
1200
+ the caller is responsible for stripping inert quoted regions
1201
+ before passing in. ``main()`` in the enforcer computes this
1202
+ once and passes it to both this helper and
1203
+ ``_command_uses_web_flag_in_stripped`` so the character-walk
1204
+ in ``_strip_quoted_regions`` runs exactly once per command.
1205
+
1206
+ Returns:
1207
+ True when ``gh pr create`` appears as a whole-word match in the
1208
+ already-stripped command. Matches regardless of whether ``gh``
1209
+ is at the start of the command or embedded in a chained pipeline.
1210
+ """
1211
+ return bool(GH_PR_CREATE_PATTERN.search(quote_stripped_command))