claude-dev-env 1.49.1 → 1.50.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/audit-rubrics/category_rubrics/category-a-api-contracts.md +17 -3
- package/audit-rubrics/prompts/category-a-api-contracts.md +17 -2
- package/docs/CODE_RULES.md +6 -1
- package/hooks/blocking/_gh_body_arg_utils.py +67 -11
- package/hooks/blocking/_md_to_html_blocker_test_support.py +65 -0
- package/hooks/blocking/code_rules_enforcer.py +386 -32
- package/hooks/blocking/conftest.py +30 -0
- package/hooks/blocking/md_to_html_blocker.py +2 -2
- package/hooks/blocking/pr_description_body_audit.py +148 -0
- package/hooks/blocking/pr_description_command_parser.py +233 -0
- package/hooks/blocking/pr_description_enforcer.py +36 -825
- package/hooks/blocking/pr_description_pr_number.py +153 -0
- package/hooks/blocking/pr_description_readability.py +366 -0
- package/hooks/blocking/test_code_rules_enforcer.py +65 -0
- package/hooks/blocking/test_code_rules_enforcer_docstring_args_signature.py +256 -0
- package/hooks/blocking/test_code_rules_enforcer_function_length.py +136 -5
- package/hooks/blocking/test_code_rules_enforcer_ignored_must_check_return.py +256 -0
- package/hooks/blocking/test_code_rules_enforcer_naming_pattern.py +137 -1
- package/hooks/blocking/test_md_to_html_blocker_exemptions.py +368 -0
- package/hooks/blocking/test_md_to_html_blocker_extensions.py +157 -0
- package/hooks/blocking/test_md_to_html_blocker_path_resolution.py +336 -0
- package/hooks/blocking/test_pr_description_enforcer.py +13 -1499
- package/hooks/blocking/test_pr_description_enforcer_body_audit.py +247 -0
- package/hooks/blocking/test_pr_description_enforcer_body_rules.py +493 -0
- package/hooks/blocking/test_pr_description_enforcer_command_parser.py +366 -0
- package/hooks/blocking/test_pr_description_enforcer_pr_number.py +159 -0
- package/hooks/blocking/test_pr_description_enforcer_readability.py +443 -0
- package/hooks/hooks_constants/blocking_check_limits.py +2 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +15 -1
- package/hooks/hooks_constants/md_to_html_blocker_constants.py +1 -1
- package/hooks/hooks_constants/pr_description_enforcer_constants.py +7 -0
- package/hooks/hooks_constants/test_md_to_html_blocker_constants.py +11 -4
- package/package.json +1 -1
- package/hooks/blocking/test_md_to_html_blocker.py +0 -772
|
@@ -1,784 +1,60 @@
|
|
|
1
|
+
"""PreToolUse hook gating gh pr create/edit/comment body content.
|
|
2
|
+
|
|
3
|
+
Reads a PreToolUse JSON payload on stdin, recognises the body-carrying
|
|
4
|
+
gh pr create/edit/comment Bash invocations, audits the PR body against the
|
|
5
|
+
Anthropic claude-code style rules, and denies the command when the body fails.
|
|
6
|
+
Readability-management CLI flags short-circuit the stdin path to adjust the
|
|
7
|
+
persisted readability state.
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
import json
|
|
2
|
-
import math
|
|
3
11
|
import os
|
|
4
|
-
import re
|
|
5
|
-
import shlex
|
|
6
12
|
import sys
|
|
7
13
|
from pathlib import Path
|
|
8
|
-
from typing import TextIO
|
|
9
14
|
|
|
10
15
|
_hooks_dir = str(Path(__file__).resolve().parent.parent)
|
|
11
16
|
if _hooks_dir not in sys.path:
|
|
12
17
|
sys.path.insert(0, _hooks_dir)
|
|
13
18
|
|
|
14
|
-
from blocking.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
19
|
+
from blocking.pr_description_body_audit import ( # noqa: E402
|
|
20
|
+
_body_contains_any_header,
|
|
21
|
+
_compute_pr_body_shape,
|
|
22
|
+
_count_substantive_prose_chars,
|
|
23
|
+
_extract_vague_scan_text,
|
|
24
|
+
_iter_section_headers,
|
|
25
|
+
_matches_self_closing_reference,
|
|
26
|
+
_opens_with_this_pr_phrase,
|
|
27
|
+
)
|
|
28
|
+
from blocking.pr_description_command_parser import ( # noqa: E402
|
|
29
|
+
extract_body_from_command,
|
|
30
|
+
)
|
|
31
|
+
from blocking.pr_description_pr_number import ( # noqa: E402
|
|
32
|
+
_command_carries_body_flag,
|
|
33
|
+
_extract_pr_number_from_command,
|
|
34
|
+
)
|
|
35
|
+
from blocking.pr_description_readability import ( # noqa: E402
|
|
36
|
+
_build_readability_escape_hatch_message,
|
|
37
|
+
_dispatch_cli_flag,
|
|
38
|
+
_evaluate_readability_metrics,
|
|
39
|
+
_extract_readability_target_text,
|
|
40
|
+
_increment_strike_count,
|
|
41
|
+
_is_readability_enabled,
|
|
42
|
+
_load_readability_thresholds,
|
|
26
43
|
)
|
|
27
|
-
|
|
28
|
-
|
|
29
44
|
from hooks_constants.pr_description_enforcer_constants import ( # noqa: E402
|
|
30
45
|
ALL_HEAVY_OPENING_HEADERS,
|
|
31
46
|
ALL_HEAVY_TESTING_HEADERS,
|
|
32
47
|
ALL_READABILITY_CLI_FLAG_TOKENS,
|
|
33
|
-
ATOMIC_WRITE_TEMP_SUFFIX,
|
|
34
|
-
BLOCKQUOTE_LINE_PATTERN,
|
|
35
|
-
BLOCKQUOTE_MARKER_PATTERN,
|
|
36
|
-
BOLD_PAIR_PATTERN,
|
|
37
|
-
BULLET_MARKER_PATTERN,
|
|
38
|
-
DEFAULT_READABILITY_THRESHOLDS,
|
|
39
|
-
FENCED_CODE_BLOCK_PATTERN,
|
|
40
|
-
FLESCH_BASE_SCORE,
|
|
41
|
-
FLESCH_PERFECT_SCORE,
|
|
42
|
-
FLESCH_SYLLABLES_PER_WORD_COEFFICIENT,
|
|
43
|
-
FLESCH_WORDS_PER_SENTENCE_COEFFICIENT,
|
|
44
|
-
GH_PR_COMMAND_MIN_TOKEN_COUNT,
|
|
45
|
-
HEADING_LINE_PATTERN,
|
|
46
|
-
HEAVY_MIN_BODY_CHARS_FOR_CLASSIFICATION,
|
|
47
48
|
HEAVY_SHAPE,
|
|
48
|
-
INLINE_CODE_PATTERN,
|
|
49
|
-
LINK_TEXT_PATTERN,
|
|
50
49
|
MINIMUM_SUBSTANTIVE_PROSE_CHARS,
|
|
51
50
|
PR_GUIDE_PATH,
|
|
52
|
-
READABILITY_AVG_SENTENCE_WORDS_CEILING,
|
|
53
|
-
READABILITY_ENABLED_STATE_FILE,
|
|
54
|
-
READABILITY_FLESCH_LOOSEN_FACTOR,
|
|
55
|
-
READABILITY_LOOSEN_CAP,
|
|
56
|
-
READABILITY_MAX_SENTENCE_WORDS_CEILING,
|
|
57
|
-
READABILITY_MIN_FLESCH_FLOOR,
|
|
58
|
-
READABILITY_SENTENCE_WORDS_LOOSEN_FACTOR,
|
|
59
|
-
READABILITY_STATE_FILE,
|
|
60
51
|
READABILITY_STRIKE_THRESHOLD,
|
|
61
|
-
READABILITY_THRESHOLD_OVERRIDE_FILE,
|
|
62
|
-
ReadabilityThresholds,
|
|
63
52
|
SELF_CLOSING_REFERENCE_MESSAGE_PREFIX,
|
|
64
53
|
SELF_CLOSING_REFERENCE_MESSAGE_SUFFIX,
|
|
65
|
-
SELF_REFERENCE_PATTERN_TEMPLATE,
|
|
66
|
-
STANDARD_SHAPE,
|
|
67
|
-
TABLE_ROW_LINE_PATTERN,
|
|
68
|
-
THIS_PR_OPENING_PATTERN,
|
|
69
54
|
TRIVIAL_BODY_CHAR_THRESHOLD,
|
|
70
|
-
|
|
71
|
-
WHITESPACE_RUN_PATTERN,
|
|
55
|
+
VAGUE_LANGUAGE_PATTERN,
|
|
72
56
|
)
|
|
73
57
|
|
|
74
|
-
VAGUE_LANGUAGE_PATTERN = re.compile(
|
|
75
|
-
r'\b(fix(?:ed)? (?:bug|issue|it)|update(?:d)? code|minor changes|various (?:fixes|updates|improvements))\b',
|
|
76
|
-
re.IGNORECASE,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
shell_variable_sigil: str = "$"
|
|
81
|
-
body_file_stdin_sentinel: str = "-"
|
|
82
|
-
all_quote_characters: frozenset[str] = frozenset({'"', "'"})
|
|
83
|
-
file_encoding_utf8: str = "utf-8"
|
|
84
|
-
|
|
85
|
-
_non_body_value_flags: frozenset[str] = all_value_flags - {body_file_flag, body_file_short_flag}
|
|
86
|
-
|
|
87
|
-
_non_body_value_flag_equals_prefixes: tuple[str, ...] = tuple(
|
|
88
|
-
sorted(
|
|
89
|
-
(
|
|
90
|
-
prefix for prefix in all_value_flag_equals_prefixes
|
|
91
|
-
if not prefix.startswith("--body")
|
|
92
|
-
and not prefix.startswith("-b=")
|
|
93
|
-
and not prefix.startswith("-F=")
|
|
94
|
-
),
|
|
95
|
-
key=len,
|
|
96
|
-
reverse=True,
|
|
97
|
-
)
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
class PathTraversalError(Exception):
|
|
102
|
-
pass
|
|
103
|
-
|
|
104
|
-
def _is_flag_shaped_token(token: str) -> bool:
|
|
105
|
-
if len(token) < 2:
|
|
106
|
-
return False
|
|
107
|
-
if not token.startswith("-"):
|
|
108
|
-
return False
|
|
109
|
-
return token[1] == "-" or token[1].isalpha()
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _strip_surrounding_quotes(token: str) -> str:
|
|
113
|
-
if len(token) < 2:
|
|
114
|
-
return token
|
|
115
|
-
first_character = token[0]
|
|
116
|
-
last_character = token[-1]
|
|
117
|
-
if first_character in all_quote_characters and first_character == last_character:
|
|
118
|
-
return token[1:-1]
|
|
119
|
-
return token
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def _is_unresolvable_shell_value(token: str) -> bool:
|
|
123
|
-
return token.startswith(shell_variable_sigil)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def _read_body_file_contents(file_path: str) -> str | None:
|
|
127
|
-
given_path = Path(file_path)
|
|
128
|
-
allowed_root = Path.cwd().resolve()
|
|
129
|
-
if given_path.is_symlink():
|
|
130
|
-
resolved_target = given_path.resolve()
|
|
131
|
-
try:
|
|
132
|
-
resolved_target.relative_to(allowed_root)
|
|
133
|
-
except ValueError:
|
|
134
|
-
raise PathTraversalError("symlink target resolves outside allowed root")
|
|
135
|
-
resolved_path = given_path.resolve()
|
|
136
|
-
if not given_path.is_absolute():
|
|
137
|
-
try:
|
|
138
|
-
resolved_path.relative_to(allowed_root)
|
|
139
|
-
except ValueError:
|
|
140
|
-
raise PathTraversalError("relative path resolves outside allowed root")
|
|
141
|
-
try:
|
|
142
|
-
with open(resolved_path, "r", encoding=file_encoding_utf8, errors="replace") as body_file:
|
|
143
|
-
return body_file.read()
|
|
144
|
-
except (FileNotFoundError, IsADirectoryError, PermissionError, OSError):
|
|
145
|
-
return None
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def _resolve_body_file_value(raw_value_token: str) -> str | None:
|
|
149
|
-
"""Return file contents, or None when the body cannot be audited.
|
|
150
|
-
|
|
151
|
-
None means body is present but unauditable -- skip enforcement.
|
|
152
|
-
This covers: stdin sentinel, unresolvable shell variables, and path-traversal-rejected paths.
|
|
153
|
-
"""
|
|
154
|
-
stripped_value = _strip_surrounding_quotes(raw_value_token)
|
|
155
|
-
if not stripped_value:
|
|
156
|
-
return None
|
|
157
|
-
if stripped_value == body_file_stdin_sentinel:
|
|
158
|
-
return None
|
|
159
|
-
if _is_unresolvable_shell_value(stripped_value):
|
|
160
|
-
return None
|
|
161
|
-
try:
|
|
162
|
-
return _read_body_file_contents(stripped_value)
|
|
163
|
-
except PathTraversalError:
|
|
164
|
-
return None
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def _resolve_body_string_value(raw_value_token: str) -> str | None:
|
|
168
|
-
"""Return the literal body string, or None when the value is an
|
|
169
|
-
unresolvable shell variable.
|
|
170
|
-
|
|
171
|
-
Distinguishing the two cases lets `main()` skip enforcement only for
|
|
172
|
-
unauditable bodies; a literal `--body ""` still returns `""` and flows
|
|
173
|
-
into `validate_pr_body` so the substantive-prose check blocks it.
|
|
174
|
-
"""
|
|
175
|
-
stripped_value = _strip_surrounding_quotes(raw_value_token)
|
|
176
|
-
if _is_unresolvable_shell_value(stripped_value):
|
|
177
|
-
return None
|
|
178
|
-
return stripped_value
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def _reassemble_split_quoted_value(first_value_token: str, remaining_tokens: list[str]) -> str | None:
|
|
182
|
-
extra_tokens_consumed = count_extra_tokens_to_skip_for_split_quoted_value(
|
|
183
|
-
remaining_tokens,
|
|
184
|
-
first_value_token,
|
|
185
|
-
)
|
|
186
|
-
if extra_tokens_consumed is None:
|
|
187
|
-
return None
|
|
188
|
-
if extra_tokens_consumed == 0:
|
|
189
|
-
return first_value_token
|
|
190
|
-
continuation_tokens = remaining_tokens[:extra_tokens_consumed]
|
|
191
|
-
return " ".join([first_value_token, *continuation_tokens])
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def _match_body_flag_equals_prefix(token: str) -> str | None:
|
|
195
|
-
for each_prefix in all_body_flag_prefixes:
|
|
196
|
-
if token.startswith(each_prefix):
|
|
197
|
-
return each_prefix
|
|
198
|
-
return None
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
def _match_body_file_equals_prefix(token: str) -> str | None:
|
|
202
|
-
for each_prefix in (body_file_flag_prefix, body_file_short_flag_prefix):
|
|
203
|
-
if token.startswith(each_prefix):
|
|
204
|
-
return each_prefix
|
|
205
|
-
return None
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
def _match_non_body_value_flag_equals_prefix(token: str) -> str | None:
|
|
209
|
-
for each_prefix in _non_body_value_flag_equals_prefixes:
|
|
210
|
-
if token.startswith(each_prefix):
|
|
211
|
-
return each_prefix
|
|
212
|
-
return None
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
def _scan_raw_tokens_for_body(all_raw_tokens: list[str]) -> str | None | bool:
|
|
216
|
-
"""Return the body value from a raw token list, or False if no body flag found.
|
|
217
|
-
|
|
218
|
-
Returns False when no body/body-file flag is present (caller should continue).
|
|
219
|
-
Returns None when a body-file flag is present but malformed (no value
|
|
220
|
-
follows), OR when the body value is an unresolvable shell variable (e.g.
|
|
221
|
-
`--body "$VAR"`) — in either case the body is unauditable and the caller
|
|
222
|
-
skips enforcement.
|
|
223
|
-
Returns str for resolved body string values. An empty string `""` is a
|
|
224
|
-
literal-empty body (e.g. `--body ""`) and must still flow into
|
|
225
|
-
`validate_pr_body` so the substantive-prose check blocks it.
|
|
226
|
-
"""
|
|
227
|
-
token_index = 0
|
|
228
|
-
while token_index < len(all_raw_tokens):
|
|
229
|
-
current_token = all_raw_tokens[token_index]
|
|
230
|
-
remaining_raw = all_raw_tokens[token_index + 1:]
|
|
231
|
-
non_body_equals_prefix = _match_non_body_value_flag_equals_prefix(current_token)
|
|
232
|
-
if non_body_equals_prefix is not None:
|
|
233
|
-
first_value_token = current_token[len(non_body_equals_prefix):]
|
|
234
|
-
extra_skip = count_extra_tokens_to_skip_for_split_quoted_value(remaining_raw, first_value_token)
|
|
235
|
-
token_index += 1 + (extra_skip or 0)
|
|
236
|
-
continue
|
|
237
|
-
if current_token in _non_body_value_flags:
|
|
238
|
-
if remaining_raw and not _is_flag_shaped_token(remaining_raw[0]):
|
|
239
|
-
first_value_token = remaining_raw[0]
|
|
240
|
-
extra_skip = count_extra_tokens_to_skip_for_split_quoted_value(remaining_raw[1:], first_value_token)
|
|
241
|
-
token_index += 1 + 1 + (extra_skip or 0)
|
|
242
|
-
continue
|
|
243
|
-
token_index += 1
|
|
244
|
-
continue
|
|
245
|
-
body_equals_prefix = _match_body_flag_equals_prefix(current_token)
|
|
246
|
-
if body_equals_prefix is not None:
|
|
247
|
-
first_value_token = current_token[len(body_equals_prefix):]
|
|
248
|
-
full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw)
|
|
249
|
-
if full_value_token is None:
|
|
250
|
-
return None
|
|
251
|
-
return _resolve_body_string_value(full_value_token)
|
|
252
|
-
body_file_equals_prefix = _match_body_file_equals_prefix(current_token)
|
|
253
|
-
if body_file_equals_prefix is not None:
|
|
254
|
-
first_value_token = current_token[len(body_file_equals_prefix):]
|
|
255
|
-
full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw)
|
|
256
|
-
if full_value_token is None:
|
|
257
|
-
return None
|
|
258
|
-
return _resolve_body_file_value(full_value_token)
|
|
259
|
-
if current_token in all_body_flags:
|
|
260
|
-
if not remaining_raw or _is_flag_shaped_token(remaining_raw[0]):
|
|
261
|
-
return None
|
|
262
|
-
first_value_token = remaining_raw[0]
|
|
263
|
-
full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw[1:])
|
|
264
|
-
if full_value_token is None:
|
|
265
|
-
return None
|
|
266
|
-
return _resolve_body_string_value(full_value_token)
|
|
267
|
-
if current_token in {body_file_flag, body_file_short_flag}:
|
|
268
|
-
if not remaining_raw or _is_flag_shaped_token(remaining_raw[0]):
|
|
269
|
-
return None
|
|
270
|
-
first_value_token = remaining_raw[0]
|
|
271
|
-
full_value_token = _reassemble_split_quoted_value(first_value_token, remaining_raw[1:])
|
|
272
|
-
if full_value_token is None:
|
|
273
|
-
return None
|
|
274
|
-
return _resolve_body_file_value(full_value_token)
|
|
275
|
-
token_index += 1
|
|
276
|
-
return False
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
def extract_body_from_command(
|
|
280
|
-
command: str,
|
|
281
|
-
pre_tokenized: tuple[str, list[str]] | None = None,
|
|
282
|
-
) -> str | None:
|
|
283
|
-
"""Return the PR body content for validation, or None if unextractable.
|
|
284
|
-
|
|
285
|
-
Uses iter_significant_tokens to skip values of non-body value-taking flags
|
|
286
|
-
so that --body/--body-file embedded in a quoted --title value never false-matches.
|
|
287
|
-
For space-form body-file flags, scans the raw token list directly because
|
|
288
|
-
iter_significant_tokens consumes the value token (yielding remaining-after-value).
|
|
289
|
-
|
|
290
|
-
If pre_tokenized is provided as (logical_line, raw_tokens), reuses those instead
|
|
291
|
-
of recomputing the logical line and shlex split a second time.
|
|
292
|
-
"""
|
|
293
|
-
if pre_tokenized is not None:
|
|
294
|
-
logical_line, all_raw_tokens = pre_tokenized
|
|
295
|
-
else:
|
|
296
|
-
logical_line = get_logical_first_line(command)
|
|
297
|
-
if not logical_line:
|
|
298
|
-
return None
|
|
299
|
-
try:
|
|
300
|
-
all_raw_tokens = shlex.split(logical_line, posix=False)
|
|
301
|
-
except ValueError:
|
|
302
|
-
return None
|
|
303
|
-
try:
|
|
304
|
-
all_significant_tokens = list(
|
|
305
|
-
iter_significant_tokens(command, pre_tokenized=(logical_line, all_raw_tokens))
|
|
306
|
-
)
|
|
307
|
-
except ValueError:
|
|
308
|
-
return None
|
|
309
|
-
|
|
310
|
-
significant_token_set = {each_token for each_token, _ in all_significant_tokens}
|
|
311
|
-
body_flag_found_in_significant = (
|
|
312
|
-
any(each_token in all_body_flags for each_token in significant_token_set)
|
|
313
|
-
or any(_match_body_flag_equals_prefix(each_token) is not None for each_token in significant_token_set)
|
|
314
|
-
or any(_match_body_file_equals_prefix(each_token) is not None for each_token in significant_token_set)
|
|
315
|
-
or any(each_token in {body_file_flag, body_file_short_flag} for each_token in significant_token_set)
|
|
316
|
-
)
|
|
317
|
-
if not body_flag_found_in_significant:
|
|
318
|
-
return None
|
|
319
|
-
|
|
320
|
-
scan_outcome = _scan_raw_tokens_for_body(all_raw_tokens)
|
|
321
|
-
if isinstance(scan_outcome, bool):
|
|
322
|
-
return None
|
|
323
|
-
return scan_outcome
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
def _strip_markdown_ceremony(body: str) -> str:
|
|
327
|
-
"""Return the body with Markdown ceremony stripped to leave underlying prose.
|
|
328
|
-
|
|
329
|
-
Removes fenced code, inline code, heading lines, blockquote markers,
|
|
330
|
-
bullet list markers, bold/emphasis markers, and Markdown link targets.
|
|
331
|
-
Whitespace is preserved so callers can collapse or measure it as needed.
|
|
332
|
-
"""
|
|
333
|
-
body_without_fences = FENCED_CODE_BLOCK_PATTERN.sub("", body)
|
|
334
|
-
body_without_inline_code = INLINE_CODE_PATTERN.sub("", body_without_fences)
|
|
335
|
-
body_without_blockquotes = BLOCKQUOTE_MARKER_PATTERN.sub("", body_without_inline_code)
|
|
336
|
-
body_without_headings = HEADING_LINE_PATTERN.sub("", body_without_blockquotes)
|
|
337
|
-
body_without_bullets = BULLET_MARKER_PATTERN.sub("", body_without_headings)
|
|
338
|
-
body_without_bold = BOLD_PAIR_PATTERN.sub(r"\1", body_without_bullets)
|
|
339
|
-
body_without_emphasis = body_without_bold.replace("*", "")
|
|
340
|
-
body_without_links = LINK_TEXT_PATTERN.sub(r"\1", body_without_emphasis)
|
|
341
|
-
return body_without_links
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
def _count_substantive_prose_chars(body: str) -> int:
|
|
345
|
-
"""Return the count of prose characters after stripping Markdown ceremony.
|
|
346
|
-
|
|
347
|
-
Collapses internal whitespace so a body of only headers and bullets --
|
|
348
|
-
no real WHY paragraph -- registers as effectively empty.
|
|
349
|
-
"""
|
|
350
|
-
stripped_body = _strip_markdown_ceremony(body)
|
|
351
|
-
body_collapsed = WHITESPACE_RUN_PATTERN.sub(' ', stripped_body).strip()
|
|
352
|
-
return len(body_collapsed)
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
def _extract_vague_scan_text(body: str) -> str:
|
|
356
|
-
"""Return the prose to scan for vague language, with non-prose regions removed.
|
|
357
|
-
|
|
358
|
-
Drops whole blockquote lines and whole pipe-delimited table rows, then strips
|
|
359
|
-
the same Markdown ceremony as the prose-count path -- which removes fenced
|
|
360
|
-
code, inline code, and whole heading lines. This exempts vague phrases that
|
|
361
|
-
appear only inside code fences, inline code, Markdown headings, quoted
|
|
362
|
-
reviewer text, or pipe-delimited example tables -- those are not the author's
|
|
363
|
-
own prose. A pipe-delimited row carries at least two pipes; a line with a
|
|
364
|
-
single leading pipe, or a borderless table row with no leading pipe, stays in
|
|
365
|
-
scope.
|
|
366
|
-
"""
|
|
367
|
-
without_blockquote_lines = BLOCKQUOTE_LINE_PATTERN.sub("", body)
|
|
368
|
-
without_table_rows = TABLE_ROW_LINE_PATTERN.sub("", without_blockquote_lines)
|
|
369
|
-
return _strip_markdown_ceremony(without_table_rows)
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
def _iter_section_headers(body: str) -> list[str]:
|
|
373
|
-
"""Return every ATX heading line in the body, preserving canonical form.
|
|
374
|
-
|
|
375
|
-
HEADING_LINE_PATTERN matches the leading hash run (one or more hash
|
|
376
|
-
characters at line start), so the result spans every ATX level.
|
|
377
|
-
Downstream callers in this module only test specific two-hash header
|
|
378
|
-
strings, so matching every heading level keeps the parser permissive
|
|
379
|
-
without changing behaviour for the canonical two-hash header shape.
|
|
380
|
-
|
|
381
|
-
Fenced code blocks are stripped first so example markdown nested inside ``` fences
|
|
382
|
-
(a PR body that demonstrates the Heavy shape, for instance) is not counted as a
|
|
383
|
-
structural header. This keeps the shape classifier and Heavy required-header check
|
|
384
|
-
aligned with `_strip_markdown_ceremony`, which already strips fences before measuring.
|
|
385
|
-
"""
|
|
386
|
-
body_without_fences = FENCED_CODE_BLOCK_PATTERN.sub("", body)
|
|
387
|
-
all_headers: list[str] = []
|
|
388
|
-
for each_match in HEADING_LINE_PATTERN.finditer(body_without_fences):
|
|
389
|
-
header_text = each_match.group(0).strip()
|
|
390
|
-
all_headers.append(header_text)
|
|
391
|
-
return all_headers
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
def _compute_pr_body_shape(body: str) -> str:
|
|
395
|
-
"""Classify a PR body as `trivial`, `standard`, or `heavy` from content alone.
|
|
396
|
-
|
|
397
|
-
Uses substantive prose chars (post-Markdown-strip) rather than raw length so the
|
|
398
|
-
classifier and the ceremony-on-Trivial check both measure the same metric against
|
|
399
|
-
TRIVIAL_BODY_CHAR_THRESHOLD; otherwise a body can be classified Standard by shape
|
|
400
|
-
while simultaneously being flagged as Trivial-sized by the ceremony check.
|
|
401
|
-
"""
|
|
402
|
-
substantive_length = _count_substantive_prose_chars(body)
|
|
403
|
-
header_count = len(_iter_section_headers(body))
|
|
404
|
-
|
|
405
|
-
if substantive_length < TRIVIAL_BODY_CHAR_THRESHOLD and header_count == 0:
|
|
406
|
-
return TRIVIAL_SHAPE
|
|
407
|
-
|
|
408
|
-
if substantive_length >= HEAVY_MIN_BODY_CHARS_FOR_CLASSIFICATION:
|
|
409
|
-
return HEAVY_SHAPE
|
|
410
|
-
|
|
411
|
-
return STANDARD_SHAPE
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
def _body_contains_any_header(body: str, all_candidate_headers: frozenset[str]) -> bool:
|
|
415
|
-
body_headers_lower = {each_header.lower() for each_header in _iter_section_headers(body)}
|
|
416
|
-
for each_candidate in all_candidate_headers:
|
|
417
|
-
candidate_lower = each_candidate.lower()
|
|
418
|
-
for each_present in body_headers_lower:
|
|
419
|
-
if each_present == candidate_lower:
|
|
420
|
-
return True
|
|
421
|
-
if each_present.startswith(candidate_lower):
|
|
422
|
-
character_after_candidate = each_present[len(candidate_lower)]
|
|
423
|
-
if not (character_after_candidate.isalnum() or character_after_candidate == "_"):
|
|
424
|
-
return True
|
|
425
|
-
return False
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
def _matches_self_closing_reference(body: str, pr_number: int) -> bool:
|
|
429
|
-
pattern_source = SELF_REFERENCE_PATTERN_TEMPLATE.format(pr_number=pr_number)
|
|
430
|
-
compiled_pattern = re.compile(pattern_source, re.IGNORECASE)
|
|
431
|
-
return compiled_pattern.search(body) is not None
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
def _opens_with_this_pr_phrase(body: str) -> bool:
|
|
435
|
-
return THIS_PR_OPENING_PATTERN.search(body) is not None
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
def _atomic_write_json(target_path: Path, all_payload_fields: dict[str, object]) -> None:
|
|
439
|
-
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
440
|
-
temporary_path = target_path.with_suffix(target_path.suffix + ATOMIC_WRITE_TEMP_SUFFIX)
|
|
441
|
-
with open(temporary_path, "w", encoding=file_encoding_utf8) as write_handle:
|
|
442
|
-
json.dump(all_payload_fields, write_handle)
|
|
443
|
-
os.replace(temporary_path, target_path)
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
def _read_json_or_default(target_path: Path, all_default_payload_fields: dict[str, object]) -> dict[str, object]:
|
|
447
|
-
if not target_path.exists():
|
|
448
|
-
return dict(all_default_payload_fields)
|
|
449
|
-
try:
|
|
450
|
-
with open(target_path, "r", encoding=file_encoding_utf8) as read_handle:
|
|
451
|
-
loaded_payload = json.load(read_handle)
|
|
452
|
-
except (FileNotFoundError, PermissionError, OSError, json.JSONDecodeError):
|
|
453
|
-
return dict(all_default_payload_fields)
|
|
454
|
-
if not isinstance(loaded_payload, dict):
|
|
455
|
-
return dict(all_default_payload_fields)
|
|
456
|
-
return loaded_payload
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
def _read_strike_count() -> int:
|
|
460
|
-
payload = _read_json_or_default(READABILITY_STATE_FILE, {"strikes": 0})
|
|
461
|
-
raw_count = payload.get("strikes", 0)
|
|
462
|
-
if isinstance(raw_count, int) and not isinstance(raw_count, bool):
|
|
463
|
-
return max(raw_count, 0)
|
|
464
|
-
return 0
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
def _increment_strike_count() -> int:
|
|
468
|
-
payload = _read_json_or_default(READABILITY_STATE_FILE, {"strikes": 0})
|
|
469
|
-
raw_count = payload.get("strikes", 0)
|
|
470
|
-
is_valid_integer = isinstance(raw_count, int) and not isinstance(raw_count, bool)
|
|
471
|
-
starting_count = max(raw_count, 0) if is_valid_integer else 0
|
|
472
|
-
new_count = starting_count + 1
|
|
473
|
-
_atomic_write_json(READABILITY_STATE_FILE, {"strikes": new_count})
|
|
474
|
-
return new_count
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
def _reset_strike_count() -> None:
|
|
478
|
-
_atomic_write_json(READABILITY_STATE_FILE, {"strikes": 0})
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
def _load_readability_thresholds() -> ReadabilityThresholds:
|
|
482
|
-
payload = _read_json_or_default(READABILITY_THRESHOLD_OVERRIDE_FILE, {})
|
|
483
|
-
flesch_min_value = payload.get("flesch_min", DEFAULT_READABILITY_THRESHOLDS.flesch_min)
|
|
484
|
-
max_sentence_value = payload.get(
|
|
485
|
-
"max_sentence_words", DEFAULT_READABILITY_THRESHOLDS.max_sentence_words
|
|
486
|
-
)
|
|
487
|
-
avg_sentence_value = payload.get(
|
|
488
|
-
"avg_sentence_words", DEFAULT_READABILITY_THRESHOLDS.avg_sentence_words
|
|
489
|
-
)
|
|
490
|
-
flesch_is_int = isinstance(flesch_min_value, int) and not isinstance(flesch_min_value, bool)
|
|
491
|
-
max_is_int = isinstance(max_sentence_value, int) and not isinstance(max_sentence_value, bool)
|
|
492
|
-
avg_is_int = isinstance(avg_sentence_value, int) and not isinstance(avg_sentence_value, bool)
|
|
493
|
-
resolved_flesch = flesch_min_value if flesch_is_int else DEFAULT_READABILITY_THRESHOLDS.flesch_min
|
|
494
|
-
resolved_max = max_sentence_value if max_is_int else DEFAULT_READABILITY_THRESHOLDS.max_sentence_words
|
|
495
|
-
resolved_avg = avg_sentence_value if avg_is_int else DEFAULT_READABILITY_THRESHOLDS.avg_sentence_words
|
|
496
|
-
return ReadabilityThresholds(
|
|
497
|
-
flesch_min=resolved_flesch,
|
|
498
|
-
max_sentence_words=resolved_max,
|
|
499
|
-
avg_sentence_words=resolved_avg,
|
|
500
|
-
)
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def _read_loosens_used() -> int:
|
|
504
|
-
payload = _read_json_or_default(READABILITY_THRESHOLD_OVERRIDE_FILE, {})
|
|
505
|
-
raw_count = payload.get("loosens_used", 0)
|
|
506
|
-
if isinstance(raw_count, int) and not isinstance(raw_count, bool):
|
|
507
|
-
return max(raw_count, 0)
|
|
508
|
-
return 0
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
def _is_readability_enabled() -> bool:
|
|
512
|
-
payload = _read_json_or_default(READABILITY_ENABLED_STATE_FILE, {"enabled": True})
|
|
513
|
-
enabled_value = payload.get("enabled", True)
|
|
514
|
-
if isinstance(enabled_value, bool):
|
|
515
|
-
return enabled_value
|
|
516
|
-
return True
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
def _set_readability_enabled(enabled: bool) -> None:
|
|
520
|
-
_atomic_write_json(READABILITY_ENABLED_STATE_FILE, {"enabled": enabled})
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
def _count_syllables_in_word(word: str) -> int:
|
|
524
|
-
all_vowel_characters: frozenset[str] = frozenset("aeiouy")
|
|
525
|
-
cleaned_word = "".join(each_character for each_character in word.lower() if each_character.isalpha())
|
|
526
|
-
if not cleaned_word:
|
|
527
|
-
return 0
|
|
528
|
-
syllable_count = 0
|
|
529
|
-
is_previous_character_vowel = False
|
|
530
|
-
for each_character in cleaned_word:
|
|
531
|
-
is_vowel = each_character in all_vowel_characters
|
|
532
|
-
if is_vowel and not is_previous_character_vowel:
|
|
533
|
-
syllable_count += 1
|
|
534
|
-
is_previous_character_vowel = is_vowel
|
|
535
|
-
if cleaned_word.endswith("e") and syllable_count > 1:
|
|
536
|
-
syllable_count -= 1
|
|
537
|
-
return max(syllable_count, 1)
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
def _split_sentences(text: str) -> list[str]:
|
|
541
|
-
sentence_split_pattern = re.compile(r"[.!?]+\s+")
|
|
542
|
-
cleaned_text = text.strip()
|
|
543
|
-
if not cleaned_text:
|
|
544
|
-
return []
|
|
545
|
-
raw_pieces = sentence_split_pattern.split(cleaned_text)
|
|
546
|
-
all_sentences = [each_piece.strip() for each_piece in raw_pieces if each_piece.strip()]
|
|
547
|
-
return all_sentences
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
def _compute_flesch_reading_ease(text: str) -> float:
|
|
551
|
-
all_sentences = _split_sentences(text)
|
|
552
|
-
if not all_sentences:
|
|
553
|
-
return FLESCH_PERFECT_SCORE
|
|
554
|
-
all_words: list[str] = []
|
|
555
|
-
total_syllables = 0
|
|
556
|
-
for each_sentence in all_sentences:
|
|
557
|
-
sentence_words = [each_token for each_token in re.split(r"\s+", each_sentence) if each_token]
|
|
558
|
-
all_words.extend(sentence_words)
|
|
559
|
-
for each_word in sentence_words:
|
|
560
|
-
total_syllables += _count_syllables_in_word(each_word)
|
|
561
|
-
total_words = len(all_words)
|
|
562
|
-
if total_words == 0:
|
|
563
|
-
return FLESCH_PERFECT_SCORE
|
|
564
|
-
total_sentences = len(all_sentences)
|
|
565
|
-
return (
|
|
566
|
-
FLESCH_BASE_SCORE
|
|
567
|
-
- FLESCH_WORDS_PER_SENTENCE_COEFFICIENT * (total_words / total_sentences)
|
|
568
|
-
- FLESCH_SYLLABLES_PER_WORD_COEFFICIENT * (total_syllables / total_words)
|
|
569
|
-
)
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
def _extract_readability_target_text(body: str) -> str:
|
|
573
|
-
intro_paragraph = ""
|
|
574
|
-
body_without_fences = FENCED_CODE_BLOCK_PATTERN.sub("", body)
|
|
575
|
-
body_after_strip = body_without_fences.lstrip()
|
|
576
|
-
blank_line_position = body_after_strip.find("\n\n")
|
|
577
|
-
header_position_match = HEADING_LINE_PATTERN.search(body_after_strip)
|
|
578
|
-
header_position = header_position_match.start() if header_position_match else -1
|
|
579
|
-
|
|
580
|
-
if blank_line_position == -1 and header_position == -1:
|
|
581
|
-
intro_paragraph = body_after_strip
|
|
582
|
-
elif blank_line_position == -1:
|
|
583
|
-
intro_paragraph = body_after_strip[:header_position]
|
|
584
|
-
elif header_position == -1:
|
|
585
|
-
intro_paragraph = body_after_strip[:blank_line_position]
|
|
586
|
-
else:
|
|
587
|
-
first_boundary = min(blank_line_position, header_position)
|
|
588
|
-
intro_paragraph = body_after_strip[:first_boundary]
|
|
589
|
-
|
|
590
|
-
first_body_section = ""
|
|
591
|
-
if header_position_match is not None:
|
|
592
|
-
section_start = header_position_match.end()
|
|
593
|
-
remainder = body_after_strip[section_start:]
|
|
594
|
-
next_header_match = HEADING_LINE_PATTERN.search(remainder)
|
|
595
|
-
if next_header_match is not None:
|
|
596
|
-
first_body_section = remainder[: next_header_match.start()]
|
|
597
|
-
else:
|
|
598
|
-
first_body_section = remainder
|
|
599
|
-
|
|
600
|
-
combined_text = f"{intro_paragraph}\n\n{first_body_section}"
|
|
601
|
-
return _strip_markdown_ceremony(combined_text)
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
def _evaluate_readability_metrics(
|
|
605
|
-
target_text: str,
|
|
606
|
-
thresholds: ReadabilityThresholds,
|
|
607
|
-
) -> list[str]:
|
|
608
|
-
all_metric_violations: list[str] = []
|
|
609
|
-
all_sentences = _split_sentences(target_text)
|
|
610
|
-
if not all_sentences:
|
|
611
|
-
return all_metric_violations
|
|
612
|
-
word_counts_per_sentence: list[int] = []
|
|
613
|
-
for each_sentence in all_sentences:
|
|
614
|
-
sentence_words = [each_token for each_token in re.split(r"\s+", each_sentence) if each_token]
|
|
615
|
-
word_counts_per_sentence.append(len(sentence_words))
|
|
616
|
-
max_sentence_words = max(word_counts_per_sentence) if word_counts_per_sentence else 0
|
|
617
|
-
average_sentence_words = (
|
|
618
|
-
sum(word_counts_per_sentence) / len(word_counts_per_sentence)
|
|
619
|
-
if word_counts_per_sentence
|
|
620
|
-
else 0.0
|
|
621
|
-
)
|
|
622
|
-
if max_sentence_words > thresholds.max_sentence_words:
|
|
623
|
-
all_metric_violations.append(
|
|
624
|
-
f"Readability: longest sentence is {max_sentence_words} words "
|
|
625
|
-
f"(maximum {thresholds.max_sentence_words}); "
|
|
626
|
-
"split or rewrite the longest sentence"
|
|
627
|
-
)
|
|
628
|
-
if average_sentence_words > thresholds.avg_sentence_words:
|
|
629
|
-
all_metric_violations.append(
|
|
630
|
-
f"Readability: average sentence is {average_sentence_words:.1f} words "
|
|
631
|
-
f"(maximum {thresholds.avg_sentence_words}); "
|
|
632
|
-
"shorten or split your longest sentences"
|
|
633
|
-
)
|
|
634
|
-
flesch_score = _compute_flesch_reading_ease(target_text)
|
|
635
|
-
if flesch_score < thresholds.flesch_min:
|
|
636
|
-
all_metric_violations.append(
|
|
637
|
-
f"Readability: Flesch Reading Ease is {flesch_score:.1f} "
|
|
638
|
-
f"(minimum {thresholds.flesch_min}); use shorter words and sentences"
|
|
639
|
-
)
|
|
640
|
-
return all_metric_violations
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
def _build_readability_escape_hatch_message() -> str:
|
|
644
|
-
return (
|
|
645
|
-
"Readability strike threshold reached. Pick one: "
|
|
646
|
-
"(1) python <enforcer-path> --readability-loosen to widen thresholds 10%, "
|
|
647
|
-
"(2) python <enforcer-path> --readability-disable to skip the readability check, "
|
|
648
|
-
"(3) python <enforcer-path> --readability-reset to zero the strike counter, "
|
|
649
|
-
"(4) reply with the body plus the intended message to report a false positive."
|
|
650
|
-
)
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
def _apply_readability_loosen() -> str:
|
|
654
|
-
current_thresholds = _load_readability_thresholds()
|
|
655
|
-
loosens_used = _read_loosens_used()
|
|
656
|
-
|
|
657
|
-
if loosens_used >= READABILITY_LOOSEN_CAP:
|
|
658
|
-
return "cap_reached"
|
|
659
|
-
|
|
660
|
-
if current_thresholds.flesch_min <= READABILITY_MIN_FLESCH_FLOOR:
|
|
661
|
-
return "floor_reached"
|
|
662
|
-
|
|
663
|
-
if current_thresholds.max_sentence_words >= READABILITY_MAX_SENTENCE_WORDS_CEILING:
|
|
664
|
-
return "ceiling_reached"
|
|
665
|
-
|
|
666
|
-
if current_thresholds.avg_sentence_words >= READABILITY_AVG_SENTENCE_WORDS_CEILING:
|
|
667
|
-
return "ceiling_reached"
|
|
668
|
-
|
|
669
|
-
next_flesch = max(
|
|
670
|
-
READABILITY_MIN_FLESCH_FLOOR,
|
|
671
|
-
math.floor(current_thresholds.flesch_min * READABILITY_FLESCH_LOOSEN_FACTOR),
|
|
672
|
-
)
|
|
673
|
-
next_max_sentence = min(
|
|
674
|
-
READABILITY_MAX_SENTENCE_WORDS_CEILING,
|
|
675
|
-
math.ceil(current_thresholds.max_sentence_words * READABILITY_SENTENCE_WORDS_LOOSEN_FACTOR),
|
|
676
|
-
)
|
|
677
|
-
next_avg_sentence = min(
|
|
678
|
-
READABILITY_AVG_SENTENCE_WORDS_CEILING,
|
|
679
|
-
math.ceil(current_thresholds.avg_sentence_words * READABILITY_SENTENCE_WORDS_LOOSEN_FACTOR),
|
|
680
|
-
)
|
|
681
|
-
|
|
682
|
-
next_payload: dict[str, object] = {
|
|
683
|
-
"flesch_min": next_flesch,
|
|
684
|
-
"max_sentence_words": next_max_sentence,
|
|
685
|
-
"avg_sentence_words": next_avg_sentence,
|
|
686
|
-
"loosens_used": loosens_used + 1,
|
|
687
|
-
}
|
|
688
|
-
_atomic_write_json(READABILITY_THRESHOLD_OVERRIDE_FILE, next_payload)
|
|
689
|
-
return "ok"
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
def _apply_readability_reset() -> None:
|
|
693
|
-
_reset_strike_count()
|
|
694
|
-
_atomic_write_json(READABILITY_THRESHOLD_OVERRIDE_FILE, {"loosens_used": 0})
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
def _resolve_positional_pr_number(token: str) -> int | None:
|
|
698
|
-
"""Return the PR number named by a positional token, or None if it is not one.
|
|
699
|
-
|
|
700
|
-
Accepts either a bare integer literal or a GitHub PR URL whose final path
|
|
701
|
-
segment is ``/pull/<number>``. The token may carry surrounding quotes;
|
|
702
|
-
unresolvable shell variables are rejected.
|
|
703
|
-
"""
|
|
704
|
-
stripped_candidate = _strip_surrounding_quotes(token)
|
|
705
|
-
if _is_unresolvable_shell_value(stripped_candidate):
|
|
706
|
-
return None
|
|
707
|
-
url_match = re.match(
|
|
708
|
-
r"^https?://[^/]+/[^/]+/[^/]+/pull/(\d+)(?:[/?#].*)?$",
|
|
709
|
-
stripped_candidate,
|
|
710
|
-
)
|
|
711
|
-
if url_match is not None:
|
|
712
|
-
try:
|
|
713
|
-
return int(url_match.group(1))
|
|
714
|
-
except ValueError:
|
|
715
|
-
return None
|
|
716
|
-
try:
|
|
717
|
-
return int(stripped_candidate)
|
|
718
|
-
except ValueError:
|
|
719
|
-
return None
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def _extract_pr_number_from_command(command: str) -> int | None:
|
|
723
|
-
"""Return the PR number positional argument from a `gh pr edit|comment` command.
|
|
724
|
-
|
|
725
|
-
Skips value-taking non-body flags (and their value tokens) so that ``--repo owner/r``
|
|
726
|
-
pairs do not consume the trailing PR number. Accepts both a bare integer literal
|
|
727
|
-
and a GitHub PR URL (``https://github.com/o/r/pull/<n>``) in the positional slot.
|
|
728
|
-
|
|
729
|
-
Args:
|
|
730
|
-
command: The raw shell command captured by the hook.
|
|
731
|
-
|
|
732
|
-
Returns:
|
|
733
|
-
The PR number when one positional value (integer or URL) is present, else None.
|
|
734
|
-
"""
|
|
735
|
-
logical_line = get_logical_first_line(command)
|
|
736
|
-
if not logical_line:
|
|
737
|
-
return None
|
|
738
|
-
try:
|
|
739
|
-
all_tokens = shlex.split(logical_line, posix=False)
|
|
740
|
-
except ValueError:
|
|
741
|
-
return None
|
|
742
|
-
if len(all_tokens) < GH_PR_COMMAND_MIN_TOKEN_COUNT:
|
|
743
|
-
return None
|
|
744
|
-
if all_tokens[0] != "gh" or all_tokens[1] != "pr":
|
|
745
|
-
return None
|
|
746
|
-
subcommand_token = all_tokens[2]
|
|
747
|
-
if subcommand_token not in {"edit", "comment"}:
|
|
748
|
-
return None
|
|
749
|
-
all_value_taking_bare_flags: frozenset[str] = (
|
|
750
|
-
_non_body_value_flags | all_body_flags | {body_file_flag, body_file_short_flag}
|
|
751
|
-
)
|
|
752
|
-
token_index = GH_PR_COMMAND_MIN_TOKEN_COUNT
|
|
753
|
-
while token_index < len(all_tokens):
|
|
754
|
-
current_token = all_tokens[token_index]
|
|
755
|
-
matched_equals_prefix = (
|
|
756
|
-
_match_non_body_value_flag_equals_prefix(current_token)
|
|
757
|
-
or _match_body_flag_equals_prefix(current_token)
|
|
758
|
-
or _match_body_file_equals_prefix(current_token)
|
|
759
|
-
)
|
|
760
|
-
if matched_equals_prefix is not None:
|
|
761
|
-
first_value_token = current_token[len(matched_equals_prefix):]
|
|
762
|
-
remaining_raw_tokens = all_tokens[token_index + 1:]
|
|
763
|
-
extra_skip = count_extra_tokens_to_skip_for_split_quoted_value(
|
|
764
|
-
remaining_raw_tokens, first_value_token
|
|
765
|
-
) or 0
|
|
766
|
-
token_index += 1 + extra_skip
|
|
767
|
-
continue
|
|
768
|
-
if current_token in all_value_taking_bare_flags:
|
|
769
|
-
token_index += 1
|
|
770
|
-
if token_index < len(all_tokens):
|
|
771
|
-
token_index += 1
|
|
772
|
-
continue
|
|
773
|
-
if _is_flag_shaped_token(current_token):
|
|
774
|
-
token_index += 1
|
|
775
|
-
continue
|
|
776
|
-
resolved_pr_number = _resolve_positional_pr_number(current_token)
|
|
777
|
-
if resolved_pr_number is not None:
|
|
778
|
-
return resolved_pr_number
|
|
779
|
-
return None
|
|
780
|
-
return None
|
|
781
|
-
|
|
782
58
|
|
|
783
59
|
def validate_pr_body(body: str, pr_number: int | None = None) -> list[str]:
|
|
784
60
|
"""Audit a PR body against the Anthropic claude-code style rules.
|
|
@@ -854,71 +130,6 @@ def validate_pr_body(body: str, pr_number: int | None = None) -> list[str]:
|
|
|
854
130
|
return violations
|
|
855
131
|
|
|
856
132
|
|
|
857
|
-
def _dispatch_cli_flag(
|
|
858
|
-
flag_token: str,
|
|
859
|
-
output_stream: TextIO,
|
|
860
|
-
error_stream: TextIO,
|
|
861
|
-
) -> None:
|
|
862
|
-
"""Handle a single readability-management CLI flag and exit the process."""
|
|
863
|
-
if flag_token == "--readability-loosen":
|
|
864
|
-
outcome = _apply_readability_loosen()
|
|
865
|
-
if outcome == "cap_reached":
|
|
866
|
-
error_stream.write(
|
|
867
|
-
"loosen cap reached; use --readability-disable or --readability-reset\n"
|
|
868
|
-
)
|
|
869
|
-
sys.exit(1)
|
|
870
|
-
if outcome in {"floor_reached", "ceiling_reached"}:
|
|
871
|
-
error_stream.write(
|
|
872
|
-
"thresholds already at floor/ceiling; use --readability-disable or --readability-reset\n"
|
|
873
|
-
)
|
|
874
|
-
sys.exit(1)
|
|
875
|
-
output_stream.write("readability thresholds loosened 10%\n")
|
|
876
|
-
sys.exit(0)
|
|
877
|
-
if flag_token == "--readability-reset":
|
|
878
|
-
_apply_readability_reset()
|
|
879
|
-
output_stream.write("readability strike counter and override thresholds reset\n")
|
|
880
|
-
sys.exit(0)
|
|
881
|
-
if flag_token == "--readability-disable":
|
|
882
|
-
_set_readability_enabled(False)
|
|
883
|
-
output_stream.write("readability check disabled\n")
|
|
884
|
-
sys.exit(0)
|
|
885
|
-
if flag_token == "--readability-enable":
|
|
886
|
-
_set_readability_enabled(True)
|
|
887
|
-
output_stream.write("readability check enabled\n")
|
|
888
|
-
sys.exit(0)
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
def _command_carries_body_flag(command: str) -> bool:
|
|
892
|
-
"""Return True when the command string carries any body or body-file flag.
|
|
893
|
-
|
|
894
|
-
Detects the body/body-file forms accepted by ``gh pr {create,edit,comment}``:
|
|
895
|
-
|
|
896
|
-
- Long flags: a single ``"--body" in command`` substring check catches
|
|
897
|
-
every long form — ``--body``, ``--body=<value>``, ``--body-file``, and
|
|
898
|
-
``--body-file=<value>`` — because ``--body`` is a prefix of
|
|
899
|
-
``--body-file``. No separate ``--body-file`` check is needed.
|
|
900
|
-
- Short flags, space-separated: ``-b <value>``, ``-F <value>`` — matched
|
|
901
|
-
as `` -b `` and `` -F `` so the literal substring cannot collide with a
|
|
902
|
-
surrounding token (e.g. ``-base``, ``-Foo``).
|
|
903
|
-
- Short flags, equal-attached: ``-b=<value>``, ``-F=<value>`` — matched
|
|
904
|
-
as `` -b=`` and `` -F=`` for the same anti-collision reason. The test
|
|
905
|
-
suite relies on this detection path.
|
|
906
|
-
|
|
907
|
-
Args:
|
|
908
|
-
command: The raw shell command captured by the hook.
|
|
909
|
-
|
|
910
|
-
Returns:
|
|
911
|
-
True if any documented body or body-file flag appears in the command.
|
|
912
|
-
"""
|
|
913
|
-
return (
|
|
914
|
-
"--body" in command
|
|
915
|
-
or " -b " in command
|
|
916
|
-
or " -b=" in command
|
|
917
|
-
or " -F " in command
|
|
918
|
-
or " -F=" in command
|
|
919
|
-
)
|
|
920
|
-
|
|
921
|
-
|
|
922
133
|
def main() -> None:
|
|
923
134
|
for each_argv_token in sys.argv[1:]:
|
|
924
135
|
if each_argv_token in ALL_READABILITY_CLI_FLAG_TOKENS:
|
|
@@ -969,14 +180,14 @@ def main() -> None:
|
|
|
969
180
|
f"Use the pr-description-writer agent to author the body in Anthropic claude-code style. "
|
|
970
181
|
f"Guide:{pr_guide_reference}"
|
|
971
182
|
)
|
|
972
|
-
|
|
183
|
+
denial_payload = {
|
|
973
184
|
"hookSpecificOutput": {
|
|
974
185
|
"hookEventName": "PreToolUse",
|
|
975
186
|
"permissionDecision": "deny",
|
|
976
187
|
"permissionDecisionReason": denial_reason,
|
|
977
188
|
}
|
|
978
189
|
}
|
|
979
|
-
print(json.dumps(
|
|
190
|
+
print(json.dumps(denial_payload))
|
|
980
191
|
sys.stdout.flush()
|
|
981
192
|
|
|
982
193
|
sys.exit(0)
|