claude-dev-env 1.50.0 → 1.50.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/blocking/_gh_body_arg_utils.py +67 -11
- package/hooks/blocking/_md_to_html_blocker_test_support.py +65 -0
- package/hooks/blocking/code_rules_enforcer.py +63 -21
- package/hooks/blocking/conftest.py +30 -0
- package/hooks/blocking/pr_description_body_audit.py +148 -0
- package/hooks/blocking/pr_description_command_parser.py +233 -0
- package/hooks/blocking/pr_description_enforcer.py +36 -825
- package/hooks/blocking/pr_description_pr_number.py +153 -0
- package/hooks/blocking/pr_description_readability.py +366 -0
- package/hooks/blocking/test_code_rules_enforcer_function_length.py +136 -5
- package/hooks/blocking/test_md_to_html_blocker_exemptions.py +368 -0
- package/hooks/blocking/test_md_to_html_blocker_extensions.py +157 -0
- package/hooks/blocking/test_md_to_html_blocker_path_resolution.py +336 -0
- package/hooks/blocking/test_pr_description_enforcer.py +13 -1499
- package/hooks/blocking/test_pr_description_enforcer_body_audit.py +247 -0
- package/hooks/blocking/test_pr_description_enforcer_body_rules.py +493 -0
- package/hooks/blocking/test_pr_description_enforcer_command_parser.py +366 -0
- package/hooks/blocking/test_pr_description_enforcer_pr_number.py +159 -0
- package/hooks/blocking/test_pr_description_enforcer_readability.py +443 -0
- package/hooks/hooks_constants/pr_description_enforcer_constants.py +7 -0
- package/package.json +1 -1
- package/hooks/blocking/test_md_to_html_blocker.py +0 -810
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Shared gh body-arg parsing utilities for blocking hooks."""
|
|
1
|
+
"""Shared shell-token and gh body-arg parsing utilities for blocking hooks."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -55,6 +55,63 @@ _all_equals_prefixes_for_skip: tuple[str, ...] = tuple(
|
|
|
55
55
|
bash_continuation_marker: str = "\\"
|
|
56
56
|
powershell_continuation_marker: str = "`"
|
|
57
57
|
|
|
58
|
+
shell_variable_sigil: str = "$"
|
|
59
|
+
all_quote_characters: frozenset[str] = frozenset({'"', "'"})
|
|
60
|
+
minimum_meaningful_token_length: int = 2
|
|
61
|
+
|
|
62
|
+
non_body_value_flags: frozenset[str] = all_value_flags - {body_file_flag, body_file_short_flag}
|
|
63
|
+
|
|
64
|
+
_non_body_value_flag_equals_prefixes: tuple[str, ...] = tuple(
|
|
65
|
+
sorted((f"{each_flag}=" for each_flag in non_body_value_flags), key=len, reverse=True)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def is_flag_shaped_token(token: str) -> bool:
|
|
70
|
+
"""Report whether a token is flag-shaped for body/PR-number extraction.
|
|
71
|
+
|
|
72
|
+
Treats any token whose second character is "-" as flag-shaped, so bare
|
|
73
|
+
"--" and "--<digit>" tokens both count as flags. `_is_flag_shaped` applies
|
|
74
|
+
a stricter rule for token-stream scanning.
|
|
75
|
+
"""
|
|
76
|
+
if len(token) < minimum_meaningful_token_length:
|
|
77
|
+
return False
|
|
78
|
+
if not token.startswith("-"):
|
|
79
|
+
return False
|
|
80
|
+
return token[1] == "-" or token[1].isalpha()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def strip_surrounding_quotes(token: str) -> str:
|
|
84
|
+
if len(token) < minimum_meaningful_token_length:
|
|
85
|
+
return token
|
|
86
|
+
first_character = token[0]
|
|
87
|
+
last_character = token[-1]
|
|
88
|
+
if first_character in all_quote_characters and first_character == last_character:
|
|
89
|
+
return token[1:-1]
|
|
90
|
+
return token
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_unresolvable_shell_value(token: str) -> bool:
|
|
94
|
+
return token.startswith(shell_variable_sigil)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _match_prefix(token: str, all_prefixes: tuple[str, ...]) -> str | None:
|
|
98
|
+
for each_prefix in all_prefixes:
|
|
99
|
+
if token.startswith(each_prefix):
|
|
100
|
+
return each_prefix
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def match_body_flag_equals_prefix(token: str) -> str | None:
|
|
105
|
+
return _match_prefix(token, all_body_flag_prefixes)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def match_body_file_equals_prefix(token: str) -> str | None:
|
|
109
|
+
return _match_prefix(token, (body_file_flag_prefix, body_file_short_flag_prefix))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def match_non_body_value_flag_equals_prefix(token: str) -> str | None:
|
|
113
|
+
return _match_prefix(token, _non_body_value_flag_equals_prefixes)
|
|
114
|
+
|
|
58
115
|
|
|
59
116
|
def _count_trailing_run(text: str, marker_character: str) -> int:
|
|
60
117
|
trailing_run_length = 0
|
|
@@ -91,7 +148,13 @@ def get_logical_first_line(command: str) -> str:
|
|
|
91
148
|
|
|
92
149
|
|
|
93
150
|
def _is_flag_shaped(token: str) -> bool:
|
|
94
|
-
|
|
151
|
+
"""Report whether a token is flag-shaped for token-stream scanning.
|
|
152
|
+
|
|
153
|
+
Requires an alphabetic character after "--", so bare "--" and "--<digit>"
|
|
154
|
+
tokens are not flag-shaped. `is_flag_shaped_token` applies a looser rule
|
|
155
|
+
for body/PR-number extraction.
|
|
156
|
+
"""
|
|
157
|
+
if len(token) < minimum_meaningful_token_length:
|
|
95
158
|
return False
|
|
96
159
|
if not token.startswith("-"):
|
|
97
160
|
return False
|
|
@@ -102,7 +165,7 @@ def _is_flag_shaped(token: str) -> bool:
|
|
|
102
165
|
|
|
103
166
|
|
|
104
167
|
def _quoted_value_starts_split(value_token: str) -> bool:
|
|
105
|
-
if len(value_token) <
|
|
168
|
+
if len(value_token) < minimum_meaningful_token_length:
|
|
106
169
|
return False
|
|
107
170
|
first_character = value_token[0]
|
|
108
171
|
if first_character not in {'"', "'"}:
|
|
@@ -129,13 +192,6 @@ def count_extra_tokens_to_skip_for_split_quoted_value(
|
|
|
129
192
|
return None
|
|
130
193
|
|
|
131
194
|
|
|
132
|
-
def _match_equals_prefix_for_skip(token: str) -> str | None:
|
|
133
|
-
for each_prefix in _all_equals_prefixes_for_skip:
|
|
134
|
-
if token.startswith(each_prefix):
|
|
135
|
-
return each_prefix
|
|
136
|
-
return None
|
|
137
|
-
|
|
138
|
-
|
|
139
195
|
def iter_significant_tokens(
|
|
140
196
|
command: str,
|
|
141
197
|
pre_tokenized: tuple[str, list[str]] | None = None,
|
|
@@ -175,7 +231,7 @@ def iter_significant_tokens(
|
|
|
175
231
|
while token_index < len(all_tokens):
|
|
176
232
|
current_token = all_tokens[token_index]
|
|
177
233
|
remaining_tokens = all_tokens[token_index + 1:]
|
|
178
|
-
matched_equals_prefix =
|
|
234
|
+
matched_equals_prefix = _match_prefix(current_token, _all_equals_prefixes_for_skip)
|
|
179
235
|
if matched_equals_prefix is not None:
|
|
180
236
|
value_token = current_token[len(matched_equals_prefix):]
|
|
181
237
|
split_value_extra_tokens = count_extra_tokens_to_skip_for_split_quoted_value(
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Shared subprocess-invocation helpers for the md_to_html_blocker test suites.
|
|
2
|
+
|
|
3
|
+
Subprocess CWD is rooted in a per-session sandbox created lazily so that
|
|
4
|
+
relative-path test cases canonicalize outside any `.claude-plugin/` ancestor,
|
|
5
|
+
outside the OS temp directory, and outside the exempt home-relative
|
|
6
|
+
subdirectories. The sandbox is a real repo root (it carries a `.git` marker) so
|
|
7
|
+
relative `README.md` / `CHANGELOG.md` writes exercise the repo-root exemption
|
|
8
|
+
path. This keeps the suites independent of where pytest itself is run.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import functools
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import stat
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
import tempfile
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
HOOK_SCRIPT_PATH = os.path.join(os.path.dirname(__file__), "md_to_html_blocker.py")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _strip_read_only_and_retry(removal_function, target_path, *_exc_info):
|
|
25
|
+
try:
|
|
26
|
+
os.chmod(target_path, stat.S_IWRITE)
|
|
27
|
+
removal_function(target_path)
|
|
28
|
+
except OSError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _force_rmtree(target_path: str) -> None:
|
|
33
|
+
handler_kw = (
|
|
34
|
+
{"onexc": _strip_read_only_and_retry}
|
|
35
|
+
if sys.version_info >= (3, 12)
|
|
36
|
+
else {"onerror": _strip_read_only_and_retry}
|
|
37
|
+
)
|
|
38
|
+
try:
|
|
39
|
+
shutil.rmtree(target_path, **handler_kw)
|
|
40
|
+
except OSError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@functools.lru_cache(maxsize=1)
|
|
45
|
+
def _get_sandbox_parent_directory() -> str:
|
|
46
|
+
sandbox_parent = tempfile.mkdtemp(prefix="pytest_md_blocker_", dir=str(Path.home()))
|
|
47
|
+
git_marker_path = os.path.join(sandbox_parent, ".git")
|
|
48
|
+
Path(git_marker_path).touch()
|
|
49
|
+
return sandbox_parent
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _RunHook:
|
|
53
|
+
def __call__(self, tool_name: str, tool_input: dict) -> subprocess.CompletedProcess:
|
|
54
|
+
payload = json.dumps({"tool_name": tool_name, "tool_input": tool_input})
|
|
55
|
+
return subprocess.run(
|
|
56
|
+
[sys.executable, HOOK_SCRIPT_PATH],
|
|
57
|
+
input=payload,
|
|
58
|
+
capture_output=True,
|
|
59
|
+
text=True,
|
|
60
|
+
check=False,
|
|
61
|
+
cwd=_get_sandbox_parent_directory(),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
_run_hook = _RunHook()
|
|
@@ -1723,7 +1723,7 @@ def _is_init_file(file_path: str) -> bool:
|
|
|
1723
1723
|
return file_path.replace("\\", "/").rsplit("/", 1)[-1] == "__init__.py"
|
|
1724
1724
|
|
|
1725
1725
|
|
|
1726
|
-
def
|
|
1726
|
+
def _statement_is_docstring(statement_node: ast.stmt) -> bool:
|
|
1727
1727
|
return (
|
|
1728
1728
|
isinstance(statement_node, ast.Expr)
|
|
1729
1729
|
and isinstance(statement_node.value, ast.Constant)
|
|
@@ -1778,7 +1778,7 @@ def check_thin_wrapper_files(content: str, file_path: str) -> list[str]:
|
|
|
1778
1778
|
|
|
1779
1779
|
statements_after_docstring = (
|
|
1780
1780
|
body_statements[1:]
|
|
1781
|
-
if
|
|
1781
|
+
if _statement_is_docstring(body_statements[0])
|
|
1782
1782
|
else body_statements
|
|
1783
1783
|
)
|
|
1784
1784
|
if not statements_after_docstring:
|
|
@@ -1937,11 +1937,7 @@ def _function_body_line_count(
|
|
|
1937
1937
|
if not function_node.body:
|
|
1938
1938
|
return 0
|
|
1939
1939
|
first_body_index = 0
|
|
1940
|
-
if (
|
|
1941
|
-
isinstance(function_node.body[0], ast.Expr)
|
|
1942
|
-
and isinstance(function_node.body[0].value, ast.Constant)
|
|
1943
|
-
and isinstance(function_node.body[0].value.value, str)
|
|
1944
|
-
):
|
|
1940
|
+
if _statement_is_docstring(function_node.body[0]):
|
|
1945
1941
|
if len(function_node.body) == 1:
|
|
1946
1942
|
return 0
|
|
1947
1943
|
first_body_index = 1
|
|
@@ -2355,7 +2351,7 @@ def _statement_is_raise_not_implemented(statement_node: ast.stmt) -> bool:
|
|
|
2355
2351
|
|
|
2356
2352
|
def _function_body_is_stub(function_node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
|
|
2357
2353
|
body_statements = list(function_node.body)
|
|
2358
|
-
if body_statements and
|
|
2354
|
+
if body_statements and _statement_is_docstring(body_statements[0]):
|
|
2359
2355
|
body_statements = body_statements[1:]
|
|
2360
2356
|
if len(body_statements) != 1:
|
|
2361
2357
|
return False
|
|
@@ -5630,6 +5626,37 @@ def _function_definition_line_span(
|
|
|
5630
5626
|
return end_lineno - function_node.lineno + 1
|
|
5631
5627
|
|
|
5632
5628
|
|
|
5629
|
+
def _definition_docstring_line_span(
|
|
5630
|
+
definition_node: ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef,
|
|
5631
|
+
) -> int:
|
|
5632
|
+
"""Return the source-line count of the definition's leading docstring.
|
|
5633
|
+
|
|
5634
|
+
The Google Python Style Guide pairs a small-function preference that
|
|
5635
|
+
targets executable complexity (§3.18) with a requirement for complete
|
|
5636
|
+
docstrings on public functions and classes (§3.8). Counting those
|
|
5637
|
+
docstring lines toward the function-length gate would penalize the very
|
|
5638
|
+
documentation §3.8 mandates, so the gate measures executable span and
|
|
5639
|
+
excludes leading docstring statements.
|
|
5640
|
+
|
|
5641
|
+
Args:
|
|
5642
|
+
definition_node: The function, method, or class definition node to
|
|
5643
|
+
inspect.
|
|
5644
|
+
|
|
5645
|
+
Returns:
|
|
5646
|
+
The number of source lines the leading docstring statement occupies,
|
|
5647
|
+
or zero when the definition body is empty or does not open with a
|
|
5648
|
+
string literal.
|
|
5649
|
+
"""
|
|
5650
|
+
definition_body = definition_node.body
|
|
5651
|
+
if not definition_body:
|
|
5652
|
+
return 0
|
|
5653
|
+
first_statement = definition_body[0]
|
|
5654
|
+
if _statement_is_docstring(first_statement):
|
|
5655
|
+
docstring_end = getattr(first_statement, "end_lineno", None) or first_statement.lineno
|
|
5656
|
+
return docstring_end - first_statement.lineno + 1
|
|
5657
|
+
return 0
|
|
5658
|
+
|
|
5659
|
+
|
|
5633
5660
|
def changed_line_numbers(prior_content: str, post_edit_content: str) -> set[int]:
|
|
5634
5661
|
"""Return the post-edit line numbers an edit added or replaced.
|
|
5635
5662
|
|
|
@@ -5716,18 +5743,23 @@ def check_function_length(
|
|
|
5716
5743
|
all_changed_lines: set[int] | None = None,
|
|
5717
5744
|
defer_scope_to_caller: bool = False,
|
|
5718
5745
|
) -> list[str]:
|
|
5719
|
-
"""Flag functions whose
|
|
5720
|
-
|
|
5721
|
-
Function
|
|
5722
|
-
inclusive)
|
|
5723
|
-
|
|
5724
|
-
|
|
5725
|
-
|
|
5726
|
-
|
|
5727
|
-
|
|
5728
|
-
|
|
5729
|
-
|
|
5730
|
-
|
|
5746
|
+
"""Flag functions whose executable span exceeds cognitive-load thresholds.
|
|
5747
|
+
|
|
5748
|
+
Function executable spans — the definition span (signature line through
|
|
5749
|
+
last body statement, inclusive) minus the leading docstring lines of the
|
|
5750
|
+
function and of every function or class nested within it, per
|
|
5751
|
+
``_definition_docstring_line_span`` summed over the nested definitions —
|
|
5752
|
+
at or above
|
|
5753
|
+
``FUNCTION_LENGTH_BLOCKING_THRESHOLD`` (60 lines) appear in the returned
|
|
5754
|
+
issues list and block the write at the gate. The threshold rests on the
|
|
5755
|
+
small-function guidance in Robert C. Martin, *Clean Code* Ch. 3
|
|
5756
|
+
("Functions") and the Google Python Style Guide's ~40-line function review
|
|
5757
|
+
hint (https://google.github.io/styleguide/pyguide.html) — a measure of
|
|
5758
|
+
executable complexity, paired with the Guide's complete-docstring mandate
|
|
5759
|
+
for public APIs, so documentation lines never count against the gate; this
|
|
5760
|
+
gate blocks on body growth that pushes a function past that span. It does
|
|
5761
|
+
not derive from CODE_RULES §6.5, which governs advisory file-length
|
|
5762
|
+
signals and argues against hard numeric blocks.
|
|
5731
5763
|
|
|
5732
5764
|
The issue message carries ``Function NAME (defined at line X) is Y lines``
|
|
5733
5765
|
precisely so the gate's ``function_length_span_range`` can recover the
|
|
@@ -5776,7 +5808,17 @@ def check_function_length(
|
|
|
5776
5808
|
if not isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
5777
5809
|
continue
|
|
5778
5810
|
line_span = _function_definition_line_span(each_node)
|
|
5779
|
-
if line_span
|
|
5811
|
+
if line_span < FUNCTION_LENGTH_BLOCKING_THRESHOLD:
|
|
5812
|
+
continue
|
|
5813
|
+
docstring_line_total = sum(
|
|
5814
|
+
_definition_docstring_line_span(each_definition)
|
|
5815
|
+
for each_definition in ast.walk(each_node)
|
|
5816
|
+
if isinstance(
|
|
5817
|
+
each_definition, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
|
|
5818
|
+
)
|
|
5819
|
+
)
|
|
5820
|
+
executable_line_span = line_span - docstring_line_total
|
|
5821
|
+
if executable_line_span >= FUNCTION_LENGTH_BLOCKING_THRESHOLD:
|
|
5780
5822
|
span_range = range(each_node.lineno, each_node.lineno + line_span)
|
|
5781
5823
|
message = (
|
|
5782
5824
|
f"Function {each_node.name!r} (defined at line {each_node.lineno}) "
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Session-scoped cleanup fixture for the md_to_html_blocker test suites.
|
|
2
|
+
|
|
3
|
+
The md_to_html_blocker suites share one lazily-created sandbox parent
|
|
4
|
+
directory under the home directory. This fixture tears that sandbox down once
|
|
5
|
+
the session ends so the suites leave no residue regardless of which split file
|
|
6
|
+
pytest collects first.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
_BLOCKING_DIRECTORY = Path(__file__).resolve().parent
|
|
15
|
+
|
|
16
|
+
if str(_BLOCKING_DIRECTORY) not in sys.path:
|
|
17
|
+
sys.path.insert(0, str(_BLOCKING_DIRECTORY))
|
|
18
|
+
|
|
19
|
+
from _md_to_html_blocker_test_support import ( # noqa: E402
|
|
20
|
+
_force_rmtree,
|
|
21
|
+
_get_sandbox_parent_directory,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
26
|
+
def _cleanup_sandbox_parent_directory():
|
|
27
|
+
yield
|
|
28
|
+
if _get_sandbox_parent_directory.cache_info().currsize:
|
|
29
|
+
_force_rmtree(_get_sandbox_parent_directory())
|
|
30
|
+
_get_sandbox_parent_directory.cache_clear()
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Audit PR body markdown for prose substance, shape, and structural rules.
|
|
2
|
+
|
|
3
|
+
Strips Markdown ceremony to measure substantive prose, classifies the body as
|
|
4
|
+
trivial, standard, or heavy, enumerates section headers, prepares the prose
|
|
5
|
+
scanned for vague language, and flags self-closing references to the PR's own
|
|
6
|
+
number and the discouraged "This PR ..." opening. Vague-language enforcement
|
|
7
|
+
runs in validate_pr_body in pr_description_enforcer.py.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
_hooks_dir = str(Path(__file__).resolve().parent.parent)
|
|
15
|
+
if _hooks_dir not in sys.path:
|
|
16
|
+
sys.path.insert(0, _hooks_dir)
|
|
17
|
+
|
|
18
|
+
from hooks_constants.pr_description_enforcer_constants import ( # noqa: E402
|
|
19
|
+
BLOCKQUOTE_LINE_PATTERN,
|
|
20
|
+
BLOCKQUOTE_MARKER_PATTERN,
|
|
21
|
+
BOLD_PAIR_PATTERN,
|
|
22
|
+
BULLET_MARKER_PATTERN,
|
|
23
|
+
FENCED_CODE_BLOCK_PATTERN,
|
|
24
|
+
HEADING_LINE_PATTERN,
|
|
25
|
+
HEAVY_MIN_BODY_CHARS_FOR_CLASSIFICATION,
|
|
26
|
+
HEAVY_SHAPE,
|
|
27
|
+
INLINE_CODE_PATTERN,
|
|
28
|
+
LINK_TEXT_PATTERN,
|
|
29
|
+
SELF_REFERENCE_PATTERN_TEMPLATE,
|
|
30
|
+
STANDARD_SHAPE,
|
|
31
|
+
TABLE_ROW_LINE_PATTERN,
|
|
32
|
+
THIS_PR_OPENING_PATTERN,
|
|
33
|
+
TRIVIAL_BODY_CHAR_THRESHOLD,
|
|
34
|
+
TRIVIAL_SHAPE,
|
|
35
|
+
WHITESPACE_RUN_PATTERN,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def strip_markdown_ceremony(body: str) -> str:
|
|
40
|
+
"""Return the body with Markdown ceremony stripped to leave underlying prose.
|
|
41
|
+
|
|
42
|
+
Removes fenced code, inline code, heading lines, blockquote markers,
|
|
43
|
+
bullet list markers, bold/emphasis markers, and Markdown link targets.
|
|
44
|
+
Whitespace is preserved so callers can collapse or measure it as needed.
|
|
45
|
+
"""
|
|
46
|
+
body_without_fences = FENCED_CODE_BLOCK_PATTERN.sub("", body)
|
|
47
|
+
body_without_inline_code = INLINE_CODE_PATTERN.sub("", body_without_fences)
|
|
48
|
+
body_without_blockquotes = BLOCKQUOTE_MARKER_PATTERN.sub("", body_without_inline_code)
|
|
49
|
+
body_without_headings = HEADING_LINE_PATTERN.sub("", body_without_blockquotes)
|
|
50
|
+
body_without_bullets = BULLET_MARKER_PATTERN.sub("", body_without_headings)
|
|
51
|
+
body_without_bold = BOLD_PAIR_PATTERN.sub(r"\1", body_without_bullets)
|
|
52
|
+
body_without_emphasis = body_without_bold.replace("*", "")
|
|
53
|
+
body_without_links = LINK_TEXT_PATTERN.sub(r"\1", body_without_emphasis)
|
|
54
|
+
return body_without_links
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _count_substantive_prose_chars(body: str) -> int:
|
|
58
|
+
"""Return the count of prose characters after stripping Markdown ceremony.
|
|
59
|
+
|
|
60
|
+
Collapses internal whitespace so a body of only headers and bullets --
|
|
61
|
+
no real WHY paragraph -- registers as effectively empty.
|
|
62
|
+
"""
|
|
63
|
+
stripped_body = strip_markdown_ceremony(body)
|
|
64
|
+
body_collapsed = WHITESPACE_RUN_PATTERN.sub(" ", stripped_body).strip()
|
|
65
|
+
return len(body_collapsed)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _extract_vague_scan_text(body: str) -> str:
|
|
69
|
+
"""Return the prose to scan for vague language, with non-prose regions removed.
|
|
70
|
+
|
|
71
|
+
Drops whole blockquote lines and whole pipe-delimited table rows, then strips
|
|
72
|
+
the same Markdown ceremony as the prose-count path -- which removes fenced
|
|
73
|
+
code, inline code, and whole heading lines. This exempts vague phrases that
|
|
74
|
+
appear only inside code fences, inline code, Markdown headings, quoted
|
|
75
|
+
reviewer text, or pipe-delimited example tables -- those are not the author's
|
|
76
|
+
own prose. A pipe-delimited row carries at least two pipes; a line with a
|
|
77
|
+
single leading pipe, or a borderless table row with no leading pipe, stays in
|
|
78
|
+
scope.
|
|
79
|
+
"""
|
|
80
|
+
without_blockquote_lines = BLOCKQUOTE_LINE_PATTERN.sub("", body)
|
|
81
|
+
without_table_rows = TABLE_ROW_LINE_PATTERN.sub("", without_blockquote_lines)
|
|
82
|
+
return strip_markdown_ceremony(without_table_rows)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _iter_section_headers(body: str) -> list[str]:
|
|
86
|
+
"""Return every ATX heading line in the body, preserving canonical form.
|
|
87
|
+
|
|
88
|
+
HEADING_LINE_PATTERN matches the leading hash run (one or more hash
|
|
89
|
+
characters at line start), so the result spans every ATX level.
|
|
90
|
+
Downstream callers in this module only test specific two-hash header
|
|
91
|
+
strings, so matching every heading level keeps the parser permissive
|
|
92
|
+
without changing behaviour for the canonical two-hash header shape.
|
|
93
|
+
|
|
94
|
+
Fenced code blocks are stripped first so example markdown nested inside ``` fences
|
|
95
|
+
(a PR body that demonstrates the Heavy shape, for instance) is not counted as a
|
|
96
|
+
structural header. This keeps the shape classifier and Heavy required-header check
|
|
97
|
+
aligned with `strip_markdown_ceremony`, which already strips fences before measuring.
|
|
98
|
+
"""
|
|
99
|
+
body_without_fences = FENCED_CODE_BLOCK_PATTERN.sub("", body)
|
|
100
|
+
all_headers: list[str] = []
|
|
101
|
+
for each_match in HEADING_LINE_PATTERN.finditer(body_without_fences):
|
|
102
|
+
header_text = each_match.group(0).strip()
|
|
103
|
+
all_headers.append(header_text)
|
|
104
|
+
return all_headers
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _compute_pr_body_shape(body: str) -> str:
|
|
108
|
+
"""Classify a PR body as `trivial`, `standard`, or `heavy` from content alone.
|
|
109
|
+
|
|
110
|
+
Uses substantive prose chars (post-Markdown-strip) rather than raw length so the
|
|
111
|
+
classifier and the ceremony-on-Trivial check both measure the same metric against
|
|
112
|
+
TRIVIAL_BODY_CHAR_THRESHOLD; otherwise a body can be classified Standard by shape
|
|
113
|
+
while simultaneously being flagged as Trivial-sized by the ceremony check.
|
|
114
|
+
"""
|
|
115
|
+
substantive_length = _count_substantive_prose_chars(body)
|
|
116
|
+
header_count = len(_iter_section_headers(body))
|
|
117
|
+
|
|
118
|
+
if substantive_length < TRIVIAL_BODY_CHAR_THRESHOLD and header_count == 0:
|
|
119
|
+
return TRIVIAL_SHAPE
|
|
120
|
+
|
|
121
|
+
if substantive_length >= HEAVY_MIN_BODY_CHARS_FOR_CLASSIFICATION:
|
|
122
|
+
return HEAVY_SHAPE
|
|
123
|
+
|
|
124
|
+
return STANDARD_SHAPE
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _body_contains_any_header(body: str, all_candidate_headers: frozenset[str]) -> bool:
|
|
128
|
+
body_headers_lower = {each_header.lower() for each_header in _iter_section_headers(body)}
|
|
129
|
+
for each_candidate in all_candidate_headers:
|
|
130
|
+
candidate_lower = each_candidate.lower()
|
|
131
|
+
for each_present in body_headers_lower:
|
|
132
|
+
if each_present == candidate_lower:
|
|
133
|
+
return True
|
|
134
|
+
if each_present.startswith(candidate_lower):
|
|
135
|
+
character_after_candidate = each_present[len(candidate_lower)]
|
|
136
|
+
if not (character_after_candidate.isalnum() or character_after_candidate == "_"):
|
|
137
|
+
return True
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _matches_self_closing_reference(body: str, pr_number: int) -> bool:
|
|
142
|
+
pattern_source = SELF_REFERENCE_PATTERN_TEMPLATE.format(pr_number=pr_number)
|
|
143
|
+
compiled_pattern = re.compile(pattern_source, re.IGNORECASE)
|
|
144
|
+
return compiled_pattern.search(body) is not None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _opens_with_this_pr_phrase(body: str) -> bool:
|
|
148
|
+
return THIS_PR_OPENING_PATTERN.search(body) is not None
|