claude-dev-env 1.49.1 → 1.50.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/audit-rubrics/category_rubrics/category-a-api-contracts.md +17 -3
- package/audit-rubrics/prompts/category-a-api-contracts.md +17 -2
- package/docs/CODE_RULES.md +6 -1
- package/hooks/blocking/_gh_body_arg_utils.py +67 -11
- package/hooks/blocking/_md_to_html_blocker_test_support.py +65 -0
- package/hooks/blocking/code_rules_enforcer.py +386 -32
- package/hooks/blocking/conftest.py +30 -0
- package/hooks/blocking/md_to_html_blocker.py +2 -2
- package/hooks/blocking/pr_description_body_audit.py +148 -0
- package/hooks/blocking/pr_description_command_parser.py +233 -0
- package/hooks/blocking/pr_description_enforcer.py +36 -825
- package/hooks/blocking/pr_description_pr_number.py +153 -0
- package/hooks/blocking/pr_description_readability.py +366 -0
- package/hooks/blocking/test_code_rules_enforcer.py +65 -0
- package/hooks/blocking/test_code_rules_enforcer_docstring_args_signature.py +256 -0
- package/hooks/blocking/test_code_rules_enforcer_function_length.py +136 -5
- package/hooks/blocking/test_code_rules_enforcer_ignored_must_check_return.py +256 -0
- package/hooks/blocking/test_code_rules_enforcer_naming_pattern.py +137 -1
- package/hooks/blocking/test_md_to_html_blocker_exemptions.py +368 -0
- package/hooks/blocking/test_md_to_html_blocker_extensions.py +157 -0
- package/hooks/blocking/test_md_to_html_blocker_path_resolution.py +336 -0
- package/hooks/blocking/test_pr_description_enforcer.py +13 -1499
- package/hooks/blocking/test_pr_description_enforcer_body_audit.py +247 -0
- package/hooks/blocking/test_pr_description_enforcer_body_rules.py +493 -0
- package/hooks/blocking/test_pr_description_enforcer_command_parser.py +366 -0
- package/hooks/blocking/test_pr_description_enforcer_pr_number.py +159 -0
- package/hooks/blocking/test_pr_description_enforcer_readability.py +443 -0
- package/hooks/hooks_constants/blocking_check_limits.py +2 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +15 -1
- package/hooks/hooks_constants/md_to_html_blocker_constants.py +1 -1
- package/hooks/hooks_constants/pr_description_enforcer_constants.py +7 -0
- package/hooks/hooks_constants/test_md_to_html_blocker_constants.py +11 -4
- package/package.json +1 -1
- package/hooks/blocking/test_md_to_html_blocker.py +0 -772
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Detect body flags and recover the positional PR number from a gh command.
|
|
2
|
+
|
|
3
|
+
Reports whether a captured shell command carries any body or body-file flag,
|
|
4
|
+
and extracts the positional PR number (bare integer or GitHub PR URL) from a
|
|
5
|
+
gh pr edit/comment command while skipping value-taking flags and their values.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import shlex
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
_hooks_dir = str(Path(__file__).resolve().parent.parent)
|
|
14
|
+
if _hooks_dir not in sys.path:
|
|
15
|
+
sys.path.insert(0, _hooks_dir)
|
|
16
|
+
|
|
17
|
+
from blocking._gh_body_arg_utils import ( # noqa: E402
|
|
18
|
+
all_body_flags,
|
|
19
|
+
body_file_flag,
|
|
20
|
+
body_file_short_flag,
|
|
21
|
+
count_extra_tokens_to_skip_for_split_quoted_value,
|
|
22
|
+
get_logical_first_line,
|
|
23
|
+
is_flag_shaped_token,
|
|
24
|
+
is_unresolvable_shell_value,
|
|
25
|
+
match_body_file_equals_prefix,
|
|
26
|
+
match_body_flag_equals_prefix,
|
|
27
|
+
match_non_body_value_flag_equals_prefix,
|
|
28
|
+
non_body_value_flags,
|
|
29
|
+
strip_surrounding_quotes,
|
|
30
|
+
)
|
|
31
|
+
from hooks_constants.pr_description_enforcer_constants import ( # noqa: E402
|
|
32
|
+
GH_PR_COMMAND_MIN_TOKEN_COUNT,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _resolve_positional_pr_number(token: str) -> int | None:
|
|
37
|
+
"""Return the PR number named by a positional token, or None if it is not one.
|
|
38
|
+
|
|
39
|
+
Accepts either a bare integer literal or a GitHub PR URL whose final path
|
|
40
|
+
segment is ``/pull/<number>``. The token may carry surrounding quotes;
|
|
41
|
+
unresolvable shell variables are rejected.
|
|
42
|
+
"""
|
|
43
|
+
stripped_candidate = strip_surrounding_quotes(token)
|
|
44
|
+
if is_unresolvable_shell_value(stripped_candidate):
|
|
45
|
+
return None
|
|
46
|
+
url_match = re.match(
|
|
47
|
+
r"^https?://[^/]+/[^/]+/[^/]+/pull/(\d+)(?:[/?#].*)?$",
|
|
48
|
+
stripped_candidate,
|
|
49
|
+
)
|
|
50
|
+
if url_match is not None:
|
|
51
|
+
try:
|
|
52
|
+
return int(url_match.group(1))
|
|
53
|
+
except ValueError:
|
|
54
|
+
return None
|
|
55
|
+
try:
|
|
56
|
+
return int(stripped_candidate)
|
|
57
|
+
except ValueError:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _extract_pr_number_from_command(command: str) -> int | None:
|
|
62
|
+
"""Return the PR number positional argument from a `gh pr edit|comment` command.
|
|
63
|
+
|
|
64
|
+
Skips value-taking non-body flags (and their value tokens) so that ``--repo owner/r``
|
|
65
|
+
pairs do not consume the trailing PR number. Accepts both a bare integer literal
|
|
66
|
+
and a GitHub PR URL (``https://github.com/o/r/pull/<n>``) in the positional slot.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
command: The raw shell command captured by the hook.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
The PR number when one positional value (integer or URL) is present, else None.
|
|
73
|
+
"""
|
|
74
|
+
logical_line = get_logical_first_line(command)
|
|
75
|
+
if not logical_line:
|
|
76
|
+
return None
|
|
77
|
+
try:
|
|
78
|
+
all_tokens = shlex.split(logical_line, posix=False)
|
|
79
|
+
except ValueError:
|
|
80
|
+
return None
|
|
81
|
+
if len(all_tokens) < GH_PR_COMMAND_MIN_TOKEN_COUNT:
|
|
82
|
+
return None
|
|
83
|
+
if all_tokens[0] != "gh" or all_tokens[1] != "pr":
|
|
84
|
+
return None
|
|
85
|
+
subcommand_token = all_tokens[2]
|
|
86
|
+
if subcommand_token not in {"edit", "comment"}:
|
|
87
|
+
return None
|
|
88
|
+
all_value_taking_bare_flags: frozenset[str] = (
|
|
89
|
+
non_body_value_flags | all_body_flags | {body_file_flag, body_file_short_flag}
|
|
90
|
+
)
|
|
91
|
+
token_index = GH_PR_COMMAND_MIN_TOKEN_COUNT
|
|
92
|
+
while token_index < len(all_tokens):
|
|
93
|
+
current_token = all_tokens[token_index]
|
|
94
|
+
matched_equals_prefix = (
|
|
95
|
+
match_non_body_value_flag_equals_prefix(current_token)
|
|
96
|
+
or match_body_flag_equals_prefix(current_token)
|
|
97
|
+
or match_body_file_equals_prefix(current_token)
|
|
98
|
+
)
|
|
99
|
+
if matched_equals_prefix is not None:
|
|
100
|
+
first_value_token = current_token[len(matched_equals_prefix) :]
|
|
101
|
+
remaining_raw_tokens = all_tokens[token_index + 1 :]
|
|
102
|
+
extra_skip = (
|
|
103
|
+
count_extra_tokens_to_skip_for_split_quoted_value(
|
|
104
|
+
remaining_raw_tokens, first_value_token
|
|
105
|
+
)
|
|
106
|
+
or 0
|
|
107
|
+
)
|
|
108
|
+
token_index += 1 + extra_skip
|
|
109
|
+
continue
|
|
110
|
+
if current_token in all_value_taking_bare_flags:
|
|
111
|
+
token_index += 1
|
|
112
|
+
if token_index < len(all_tokens):
|
|
113
|
+
token_index += 1
|
|
114
|
+
continue
|
|
115
|
+
if is_flag_shaped_token(current_token):
|
|
116
|
+
token_index += 1
|
|
117
|
+
continue
|
|
118
|
+
resolved_pr_number = _resolve_positional_pr_number(current_token)
|
|
119
|
+
if resolved_pr_number is not None:
|
|
120
|
+
return resolved_pr_number
|
|
121
|
+
return None
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _command_carries_body_flag(command: str) -> bool:
|
|
126
|
+
"""Return True when the command string carries any body or body-file flag.
|
|
127
|
+
|
|
128
|
+
Detects the body/body-file forms accepted by ``gh pr {create,edit,comment}``:
|
|
129
|
+
|
|
130
|
+
- Long flags: a single ``"--body" in command`` substring check catches
|
|
131
|
+
every long form — ``--body``, ``--body=<value>``, ``--body-file``, and
|
|
132
|
+
``--body-file=<value>`` — because ``--body`` is a prefix of
|
|
133
|
+
``--body-file``. No separate ``--body-file`` check is needed.
|
|
134
|
+
- Short flags, space-separated: ``-b <value>``, ``-F <value>`` — matched
|
|
135
|
+
as `` -b `` and `` -F `` so the literal substring cannot collide with a
|
|
136
|
+
surrounding token (e.g. ``-base``, ``-Foo``).
|
|
137
|
+
- Short flags, equal-attached: ``-b=<value>``, ``-F=<value>`` — matched
|
|
138
|
+
as `` -b=`` and `` -F=`` for the same anti-collision reason. The test
|
|
139
|
+
suite relies on this detection path.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
command: The raw shell command captured by the hook.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
True if any documented body or body-file flag appears in the command.
|
|
146
|
+
"""
|
|
147
|
+
return (
|
|
148
|
+
"--body" in command
|
|
149
|
+
or " -b " in command
|
|
150
|
+
or " -b=" in command
|
|
151
|
+
or " -F " in command
|
|
152
|
+
or " -F=" in command
|
|
153
|
+
)
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""Score PR body readability and manage its persisted strike/threshold state.
|
|
2
|
+
|
|
3
|
+
Computes Flesch Reading Ease and sentence-length metrics over the intro and
|
|
4
|
+
first section of a PR body, escalates repeated readability failures through a
|
|
5
|
+
persisted strike counter, applies the loosen/reset/enable/disable threshold
|
|
6
|
+
overrides, and dispatches the readability-management CLI flags.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import math
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import TextIO
|
|
16
|
+
|
|
17
|
+
_hooks_dir = str(Path(__file__).resolve().parent.parent)
|
|
18
|
+
if _hooks_dir not in sys.path:
|
|
19
|
+
sys.path.insert(0, _hooks_dir)
|
|
20
|
+
|
|
21
|
+
from blocking.pr_description_body_audit import strip_markdown_ceremony # noqa: E402
|
|
22
|
+
from hooks_constants.pr_description_enforcer_constants import ( # noqa: E402
|
|
23
|
+
ATOMIC_WRITE_TEMP_SUFFIX,
|
|
24
|
+
DEFAULT_READABILITY_THRESHOLDS,
|
|
25
|
+
FENCED_CODE_BLOCK_PATTERN,
|
|
26
|
+
FLESCH_BASE_SCORE,
|
|
27
|
+
FLESCH_PERFECT_SCORE,
|
|
28
|
+
FLESCH_SYLLABLES_PER_WORD_COEFFICIENT,
|
|
29
|
+
FLESCH_WORDS_PER_SENTENCE_COEFFICIENT,
|
|
30
|
+
HEADING_LINE_PATTERN,
|
|
31
|
+
READABILITY_AVG_SENTENCE_WORDS_CEILING,
|
|
32
|
+
READABILITY_ENABLED_STATE_FILE,
|
|
33
|
+
READABILITY_FLESCH_LOOSEN_FACTOR,
|
|
34
|
+
READABILITY_LOOSEN_CAP,
|
|
35
|
+
READABILITY_MAX_SENTENCE_WORDS_CEILING,
|
|
36
|
+
READABILITY_MIN_FLESCH_FLOOR,
|
|
37
|
+
READABILITY_SENTENCE_WORDS_LOOSEN_FACTOR,
|
|
38
|
+
READABILITY_STATE_FILE,
|
|
39
|
+
READABILITY_THRESHOLD_OVERRIDE_FILE,
|
|
40
|
+
ReadabilityThresholds,
|
|
41
|
+
)
|
|
42
|
+
from hooks_constants.setup_project_paths_constants import UTF8_ENCODING # noqa: E402
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _atomic_write_json(target_path: Path, all_payload_fields: dict[str, object]) -> None:
|
|
46
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
temporary_path = target_path.with_suffix(target_path.suffix + ATOMIC_WRITE_TEMP_SUFFIX)
|
|
48
|
+
with open(temporary_path, "w", encoding=UTF8_ENCODING) as write_handle:
|
|
49
|
+
json.dump(all_payload_fields, write_handle)
|
|
50
|
+
os.replace(temporary_path, target_path)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _read_json_or_default(
|
|
54
|
+
target_path: Path, all_default_payload_fields: dict[str, object]
|
|
55
|
+
) -> dict[str, object]:
|
|
56
|
+
if not target_path.exists():
|
|
57
|
+
return dict(all_default_payload_fields)
|
|
58
|
+
try:
|
|
59
|
+
with open(target_path, "r", encoding=UTF8_ENCODING) as read_handle:
|
|
60
|
+
loaded_payload = json.load(read_handle)
|
|
61
|
+
except (FileNotFoundError, PermissionError, OSError, json.JSONDecodeError):
|
|
62
|
+
return dict(all_default_payload_fields)
|
|
63
|
+
if not isinstance(loaded_payload, dict):
|
|
64
|
+
return dict(all_default_payload_fields)
|
|
65
|
+
return loaded_payload
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _read_strike_count() -> int:
|
|
69
|
+
payload = _read_json_or_default(READABILITY_STATE_FILE, {"strikes": 0})
|
|
70
|
+
raw_count = payload.get("strikes", 0)
|
|
71
|
+
if isinstance(raw_count, int) and not isinstance(raw_count, bool):
|
|
72
|
+
return max(raw_count, 0)
|
|
73
|
+
return 0
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _increment_strike_count() -> int:
|
|
77
|
+
payload = _read_json_or_default(READABILITY_STATE_FILE, {"strikes": 0})
|
|
78
|
+
raw_count = payload.get("strikes", 0)
|
|
79
|
+
is_valid_integer = isinstance(raw_count, int) and not isinstance(raw_count, bool)
|
|
80
|
+
starting_count = max(raw_count, 0) if is_valid_integer else 0
|
|
81
|
+
new_count = starting_count + 1
|
|
82
|
+
_atomic_write_json(READABILITY_STATE_FILE, {"strikes": new_count})
|
|
83
|
+
return new_count
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _reset_strike_count() -> None:
|
|
87
|
+
_atomic_write_json(READABILITY_STATE_FILE, {"strikes": 0})
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _load_readability_thresholds() -> ReadabilityThresholds:
|
|
91
|
+
payload = _read_json_or_default(READABILITY_THRESHOLD_OVERRIDE_FILE, {})
|
|
92
|
+
flesch_min_value = payload.get("flesch_min", DEFAULT_READABILITY_THRESHOLDS.flesch_min)
|
|
93
|
+
max_sentence_value = payload.get(
|
|
94
|
+
"max_sentence_words", DEFAULT_READABILITY_THRESHOLDS.max_sentence_words
|
|
95
|
+
)
|
|
96
|
+
avg_sentence_value = payload.get(
|
|
97
|
+
"avg_sentence_words", DEFAULT_READABILITY_THRESHOLDS.avg_sentence_words
|
|
98
|
+
)
|
|
99
|
+
flesch_is_int = isinstance(flesch_min_value, int) and not isinstance(flesch_min_value, bool)
|
|
100
|
+
max_is_int = isinstance(max_sentence_value, int) and not isinstance(max_sentence_value, bool)
|
|
101
|
+
avg_is_int = isinstance(avg_sentence_value, int) and not isinstance(avg_sentence_value, bool)
|
|
102
|
+
resolved_flesch = (
|
|
103
|
+
flesch_min_value if flesch_is_int else DEFAULT_READABILITY_THRESHOLDS.flesch_min
|
|
104
|
+
)
|
|
105
|
+
resolved_max = (
|
|
106
|
+
max_sentence_value if max_is_int else DEFAULT_READABILITY_THRESHOLDS.max_sentence_words
|
|
107
|
+
)
|
|
108
|
+
resolved_avg = (
|
|
109
|
+
avg_sentence_value if avg_is_int else DEFAULT_READABILITY_THRESHOLDS.avg_sentence_words
|
|
110
|
+
)
|
|
111
|
+
return ReadabilityThresholds(
|
|
112
|
+
flesch_min=resolved_flesch,
|
|
113
|
+
max_sentence_words=resolved_max,
|
|
114
|
+
avg_sentence_words=resolved_avg,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _read_loosens_used() -> int:
|
|
119
|
+
payload = _read_json_or_default(READABILITY_THRESHOLD_OVERRIDE_FILE, {})
|
|
120
|
+
raw_count = payload.get("loosens_used", 0)
|
|
121
|
+
if isinstance(raw_count, int) and not isinstance(raw_count, bool):
|
|
122
|
+
return max(raw_count, 0)
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _is_readability_enabled() -> bool:
|
|
127
|
+
payload = _read_json_or_default(READABILITY_ENABLED_STATE_FILE, {"enabled": True})
|
|
128
|
+
enabled_value = payload.get("enabled", True)
|
|
129
|
+
if isinstance(enabled_value, bool):
|
|
130
|
+
return enabled_value
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _set_readability_enabled(enabled: bool) -> None:
|
|
135
|
+
_atomic_write_json(READABILITY_ENABLED_STATE_FILE, {"enabled": enabled})
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _count_syllables_in_word(word: str) -> int:
|
|
139
|
+
all_vowel_characters: frozenset[str] = frozenset("aeiouy")
|
|
140
|
+
cleaned_word = "".join(
|
|
141
|
+
each_character for each_character in word.lower() if each_character.isalpha()
|
|
142
|
+
)
|
|
143
|
+
if not cleaned_word:
|
|
144
|
+
return 0
|
|
145
|
+
syllable_count = 0
|
|
146
|
+
is_previous_character_vowel = False
|
|
147
|
+
for each_character in cleaned_word:
|
|
148
|
+
is_vowel = each_character in all_vowel_characters
|
|
149
|
+
if is_vowel and not is_previous_character_vowel:
|
|
150
|
+
syllable_count += 1
|
|
151
|
+
is_previous_character_vowel = is_vowel
|
|
152
|
+
if cleaned_word.endswith("e") and syllable_count > 1:
|
|
153
|
+
syllable_count -= 1
|
|
154
|
+
return max(syllable_count, 1)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _split_sentences(text: str) -> list[str]:
|
|
158
|
+
sentence_split_pattern = re.compile(r"[.!?]+\s+")
|
|
159
|
+
cleaned_text = text.strip()
|
|
160
|
+
if not cleaned_text:
|
|
161
|
+
return []
|
|
162
|
+
raw_pieces = sentence_split_pattern.split(cleaned_text)
|
|
163
|
+
all_sentences = [each_piece.strip() for each_piece in raw_pieces if each_piece.strip()]
|
|
164
|
+
return all_sentences
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _compute_flesch_reading_ease(text: str) -> float:
|
|
168
|
+
all_sentences = _split_sentences(text)
|
|
169
|
+
if not all_sentences:
|
|
170
|
+
return FLESCH_PERFECT_SCORE
|
|
171
|
+
all_words: list[str] = []
|
|
172
|
+
total_syllables = 0
|
|
173
|
+
for each_sentence in all_sentences:
|
|
174
|
+
sentence_words = [
|
|
175
|
+
each_token for each_token in re.split(r"\s+", each_sentence) if each_token
|
|
176
|
+
]
|
|
177
|
+
all_words.extend(sentence_words)
|
|
178
|
+
for each_word in sentence_words:
|
|
179
|
+
total_syllables += _count_syllables_in_word(each_word)
|
|
180
|
+
total_words = len(all_words)
|
|
181
|
+
if total_words == 0:
|
|
182
|
+
return FLESCH_PERFECT_SCORE
|
|
183
|
+
total_sentences = len(all_sentences)
|
|
184
|
+
return (
|
|
185
|
+
FLESCH_BASE_SCORE
|
|
186
|
+
- FLESCH_WORDS_PER_SENTENCE_COEFFICIENT * (total_words / total_sentences)
|
|
187
|
+
- FLESCH_SYLLABLES_PER_WORD_COEFFICIENT * (total_syllables / total_words)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _extract_readability_target_text(body: str) -> str:
|
|
192
|
+
"""Return the ceremony-stripped prose window scored for readability.
|
|
193
|
+
|
|
194
|
+
Strips fenced code blocks, then builds a window from the body's intro
|
|
195
|
+
paragraph plus its first section's prose. The intro paragraph ends at the
|
|
196
|
+
earliest boundary among the first blank line and the first ATX header; when
|
|
197
|
+
neither boundary exists the whole body is the intro. The first section runs
|
|
198
|
+
from just after that first header to the next header (or end of body). The
|
|
199
|
+
intro and first section are joined with a blank line and returned with
|
|
200
|
+
Markdown ceremony stripped.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
body: The raw PR body markdown text.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
The ceremony-stripped intro-paragraph plus first-section prose window
|
|
207
|
+
used for readability scoring.
|
|
208
|
+
"""
|
|
209
|
+
intro_paragraph = ""
|
|
210
|
+
body_without_fences = FENCED_CODE_BLOCK_PATTERN.sub("", body)
|
|
211
|
+
body_after_strip = body_without_fences.lstrip()
|
|
212
|
+
blank_line_position = body_after_strip.find("\n\n")
|
|
213
|
+
header_position_match = HEADING_LINE_PATTERN.search(body_after_strip)
|
|
214
|
+
header_position = header_position_match.start() if header_position_match else -1
|
|
215
|
+
|
|
216
|
+
if blank_line_position == -1 and header_position == -1:
|
|
217
|
+
intro_paragraph = body_after_strip
|
|
218
|
+
elif blank_line_position == -1:
|
|
219
|
+
intro_paragraph = body_after_strip[:header_position]
|
|
220
|
+
elif header_position == -1:
|
|
221
|
+
intro_paragraph = body_after_strip[:blank_line_position]
|
|
222
|
+
else:
|
|
223
|
+
first_boundary = min(blank_line_position, header_position)
|
|
224
|
+
intro_paragraph = body_after_strip[:first_boundary]
|
|
225
|
+
|
|
226
|
+
first_body_section = ""
|
|
227
|
+
if header_position_match is not None:
|
|
228
|
+
section_start = header_position_match.end()
|
|
229
|
+
remainder = body_after_strip[section_start:]
|
|
230
|
+
next_header_match = HEADING_LINE_PATTERN.search(remainder)
|
|
231
|
+
if next_header_match is not None:
|
|
232
|
+
first_body_section = remainder[: next_header_match.start()]
|
|
233
|
+
else:
|
|
234
|
+
first_body_section = remainder
|
|
235
|
+
|
|
236
|
+
combined_text = f"{intro_paragraph}\n\n{first_body_section}"
|
|
237
|
+
return strip_markdown_ceremony(combined_text)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _evaluate_readability_metrics(
|
|
241
|
+
target_text: str,
|
|
242
|
+
thresholds: ReadabilityThresholds,
|
|
243
|
+
) -> list[str]:
|
|
244
|
+
all_metric_violations: list[str] = []
|
|
245
|
+
all_sentences = _split_sentences(target_text)
|
|
246
|
+
if not all_sentences:
|
|
247
|
+
return all_metric_violations
|
|
248
|
+
word_counts_per_sentence: list[int] = []
|
|
249
|
+
for each_sentence in all_sentences:
|
|
250
|
+
sentence_words = [
|
|
251
|
+
each_token for each_token in re.split(r"\s+", each_sentence) if each_token
|
|
252
|
+
]
|
|
253
|
+
word_counts_per_sentence.append(len(sentence_words))
|
|
254
|
+
max_sentence_words = max(word_counts_per_sentence) if word_counts_per_sentence else 0
|
|
255
|
+
average_sentence_words = (
|
|
256
|
+
sum(word_counts_per_sentence) / len(word_counts_per_sentence)
|
|
257
|
+
if word_counts_per_sentence
|
|
258
|
+
else 0.0
|
|
259
|
+
)
|
|
260
|
+
if max_sentence_words > thresholds.max_sentence_words:
|
|
261
|
+
all_metric_violations.append(
|
|
262
|
+
f"Readability: longest sentence is {max_sentence_words} words "
|
|
263
|
+
f"(maximum {thresholds.max_sentence_words}); "
|
|
264
|
+
"split or rewrite the longest sentence"
|
|
265
|
+
)
|
|
266
|
+
if average_sentence_words > thresholds.avg_sentence_words:
|
|
267
|
+
all_metric_violations.append(
|
|
268
|
+
f"Readability: average sentence is {average_sentence_words:.1f} words "
|
|
269
|
+
f"(maximum {thresholds.avg_sentence_words}); "
|
|
270
|
+
"shorten or split your longest sentences"
|
|
271
|
+
)
|
|
272
|
+
flesch_score = _compute_flesch_reading_ease(target_text)
|
|
273
|
+
if flesch_score < thresholds.flesch_min:
|
|
274
|
+
all_metric_violations.append(
|
|
275
|
+
f"Readability: Flesch Reading Ease is {flesch_score:.1f} "
|
|
276
|
+
f"(minimum {thresholds.flesch_min}); use shorter words and sentences"
|
|
277
|
+
)
|
|
278
|
+
return all_metric_violations
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _build_readability_escape_hatch_message() -> str:
|
|
282
|
+
return (
|
|
283
|
+
"Readability strike threshold reached. Pick one: "
|
|
284
|
+
"(1) python <enforcer-path> --readability-loosen to widen thresholds 10%, "
|
|
285
|
+
"(2) python <enforcer-path> --readability-disable to skip the readability check, "
|
|
286
|
+
"(3) python <enforcer-path> --readability-reset to zero the strike counter, "
|
|
287
|
+
"(4) reply with the body plus the intended message to report a false positive."
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _apply_readability_loosen() -> str:
|
|
292
|
+
current_thresholds = _load_readability_thresholds()
|
|
293
|
+
loosens_used = _read_loosens_used()
|
|
294
|
+
|
|
295
|
+
if loosens_used >= READABILITY_LOOSEN_CAP:
|
|
296
|
+
return "cap_reached"
|
|
297
|
+
|
|
298
|
+
if current_thresholds.flesch_min <= READABILITY_MIN_FLESCH_FLOOR:
|
|
299
|
+
return "floor_reached"
|
|
300
|
+
|
|
301
|
+
if current_thresholds.max_sentence_words >= READABILITY_MAX_SENTENCE_WORDS_CEILING:
|
|
302
|
+
return "ceiling_reached"
|
|
303
|
+
|
|
304
|
+
if current_thresholds.avg_sentence_words >= READABILITY_AVG_SENTENCE_WORDS_CEILING:
|
|
305
|
+
return "ceiling_reached"
|
|
306
|
+
|
|
307
|
+
next_flesch = max(
|
|
308
|
+
READABILITY_MIN_FLESCH_FLOOR,
|
|
309
|
+
math.floor(current_thresholds.flesch_min * READABILITY_FLESCH_LOOSEN_FACTOR),
|
|
310
|
+
)
|
|
311
|
+
next_max_sentence = min(
|
|
312
|
+
READABILITY_MAX_SENTENCE_WORDS_CEILING,
|
|
313
|
+
math.ceil(current_thresholds.max_sentence_words * READABILITY_SENTENCE_WORDS_LOOSEN_FACTOR),
|
|
314
|
+
)
|
|
315
|
+
next_avg_sentence = min(
|
|
316
|
+
READABILITY_AVG_SENTENCE_WORDS_CEILING,
|
|
317
|
+
math.ceil(current_thresholds.avg_sentence_words * READABILITY_SENTENCE_WORDS_LOOSEN_FACTOR),
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
next_payload: dict[str, object] = {
|
|
321
|
+
"flesch_min": next_flesch,
|
|
322
|
+
"max_sentence_words": next_max_sentence,
|
|
323
|
+
"avg_sentence_words": next_avg_sentence,
|
|
324
|
+
"loosens_used": loosens_used + 1,
|
|
325
|
+
}
|
|
326
|
+
_atomic_write_json(READABILITY_THRESHOLD_OVERRIDE_FILE, next_payload)
|
|
327
|
+
return "ok"
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _apply_readability_reset() -> None:
|
|
331
|
+
_reset_strike_count()
|
|
332
|
+
_atomic_write_json(READABILITY_THRESHOLD_OVERRIDE_FILE, {"loosens_used": 0})
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _dispatch_cli_flag(
|
|
336
|
+
flag_token: str,
|
|
337
|
+
output_stream: TextIO,
|
|
338
|
+
error_stream: TextIO,
|
|
339
|
+
) -> None:
|
|
340
|
+
"""Handle a single readability-management CLI flag and exit the process."""
|
|
341
|
+
if flag_token == "--readability-loosen":
|
|
342
|
+
outcome = _apply_readability_loosen()
|
|
343
|
+
if outcome == "cap_reached":
|
|
344
|
+
error_stream.write(
|
|
345
|
+
"loosen cap reached; use --readability-disable or --readability-reset\n"
|
|
346
|
+
)
|
|
347
|
+
sys.exit(1)
|
|
348
|
+
if outcome in {"floor_reached", "ceiling_reached"}:
|
|
349
|
+
error_stream.write(
|
|
350
|
+
"thresholds already at floor/ceiling; use --readability-disable or --readability-reset\n"
|
|
351
|
+
)
|
|
352
|
+
sys.exit(1)
|
|
353
|
+
output_stream.write("readability thresholds loosened 10%\n")
|
|
354
|
+
sys.exit(0)
|
|
355
|
+
if flag_token == "--readability-reset":
|
|
356
|
+
_apply_readability_reset()
|
|
357
|
+
output_stream.write("readability strike counter and override thresholds reset\n")
|
|
358
|
+
sys.exit(0)
|
|
359
|
+
if flag_token == "--readability-disable":
|
|
360
|
+
_set_readability_enabled(False)
|
|
361
|
+
output_stream.write("readability check disabled\n")
|
|
362
|
+
sys.exit(0)
|
|
363
|
+
if flag_token == "--readability-enable":
|
|
364
|
+
_set_readability_enabled(True)
|
|
365
|
+
output_stream.write("readability check enabled\n")
|
|
366
|
+
sys.exit(0)
|
|
@@ -2602,3 +2602,68 @@ def test_banned_noun_word_boundary_flags_plural_results_identifier() -> None:
|
|
|
2602
2602
|
"a plural banned-noun identifier must be flagged by the word-boundary "
|
|
2603
2603
|
f"check; got: {issues!r}"
|
|
2604
2604
|
)
|
|
2605
|
+
|
|
2606
|
+
|
|
2607
|
+
def test_ignored_must_check_return_flags_bare_awaited_call() -> None:
|
|
2608
|
+
"""A bare ``await find_and_click(...)`` statement discards its only failure signal.
|
|
2609
|
+
|
|
2610
|
+
The curated must-check functions are async, so the common real call site is a
|
|
2611
|
+
bare ``await``-wrapped call. Unwrapping ``ast.Await`` before the Call check is
|
|
2612
|
+
required for this case to be flagged.
|
|
2613
|
+
"""
|
|
2614
|
+
source = "async def step() -> None:\n await find_and_click('#x')\n"
|
|
2615
|
+
issues = code_rules_enforcer.check_ignored_must_check_return(
|
|
2616
|
+
source, "/project/src/clicker.py"
|
|
2617
|
+
)
|
|
2618
|
+
assert any("find_and_click" in each_issue for each_issue in issues), (
|
|
2619
|
+
f"a bare awaited must-check call must be flagged; got: {issues!r}"
|
|
2620
|
+
)
|
|
2621
|
+
assert len(issues) == 1
|
|
2622
|
+
|
|
2623
|
+
|
|
2624
|
+
def test_ignored_must_check_return_exempts_consumed_awaited_call() -> None:
|
|
2625
|
+
"""An assigned or branched-on awaited must-check call consumes its outcome."""
|
|
2626
|
+
assigned = "async def step() -> None:\n clicked = await find_and_click('#x')\n print(clicked)\n"
|
|
2627
|
+
branched = "async def step() -> None:\n if await find_and_click('#x'):\n pass\n"
|
|
2628
|
+
assert (
|
|
2629
|
+
code_rules_enforcer.check_ignored_must_check_return(assigned, "/project/src/clicker.py")
|
|
2630
|
+
== []
|
|
2631
|
+
)
|
|
2632
|
+
assert (
|
|
2633
|
+
code_rules_enforcer.check_ignored_must_check_return(branched, "/project/src/clicker.py")
|
|
2634
|
+
== []
|
|
2635
|
+
)
|
|
2636
|
+
|
|
2637
|
+
|
|
2638
|
+
def test_ignored_must_check_return_flags_edited_line_past_a_cap_of_earlier_violations() -> None:
|
|
2639
|
+
"""The cap must apply after scoping so the edited-line violation is never dropped.
|
|
2640
|
+
|
|
2641
|
+
Collecting only a cap's worth of violations in ``ast.walk`` order, then scoping,
|
|
2642
|
+
fills the cap with earlier out-of-scope calls and discards the edited-line one —
|
|
2643
|
+
the very violation the scoped enforcer exists to block. Every violation must be
|
|
2644
|
+
collected before scoping so the edited line survives the diff filter.
|
|
2645
|
+
"""
|
|
2646
|
+
pre_existing_call_count = 5
|
|
2647
|
+
edited_call_line_number = pre_existing_call_count + 2
|
|
2648
|
+
all_pre_existing_call_lines = [
|
|
2649
|
+
f" await find_and_click('#x{each_index}')"
|
|
2650
|
+
for each_index in range(pre_existing_call_count)
|
|
2651
|
+
]
|
|
2652
|
+
all_lines = (
|
|
2653
|
+
["async def step() -> None:"]
|
|
2654
|
+
+ all_pre_existing_call_lines
|
|
2655
|
+
+ [" await find_and_click('#edited')"]
|
|
2656
|
+
)
|
|
2657
|
+
source = "\n".join(all_lines) + "\n"
|
|
2658
|
+
issues = code_rules_enforcer.check_ignored_must_check_return(
|
|
2659
|
+
source,
|
|
2660
|
+
"/project/src/clicker.py",
|
|
2661
|
+
{edited_call_line_number},
|
|
2662
|
+
False,
|
|
2663
|
+
)
|
|
2664
|
+
assert len(issues) == 1, (
|
|
2665
|
+
f"the edited-line violation must survive a cap's worth of earlier calls; got: {issues!r}"
|
|
2666
|
+
)
|
|
2667
|
+
assert f"Line {edited_call_line_number}:" in issues[0], (
|
|
2668
|
+
f"the single issue must name the edited line {edited_call_line_number}; got: {issues!r}"
|
|
2669
|
+
)
|