claude-dev-env 1.44.0 → 1.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +9 -0
- package/_shared/pr-loop/scripts/code_rules_gate.py +426 -85
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/code_rules_gate_constants.py +20 -0
- package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +625 -21
- package/_shared/pr-loop/scripts/tests/test_code_rules_gate_constants.py +15 -0
- package/agents/clean-coder.md +7 -1
- package/agents/code-quality-agent.md +8 -5
- package/hooks/blocking/code_rules_enforcer.py +1562 -37
- package/hooks/blocking/open_questions_in_plans_blocker.py +249 -0
- package/hooks/blocking/test_code_rules_enforcer.py +1389 -0
- package/hooks/blocking/test_code_rules_enforcer_banned_noun_word.py +292 -0
- package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +46 -8
- package/hooks/blocking/test_code_rules_enforcer_exempt_marker_chained.py +189 -0
- package/hooks/blocking/test_code_rules_enforcer_function_length.py +210 -0
- package/hooks/blocking/test_code_rules_enforcer_tests_isolate_home_temp.py +1512 -0
- package/hooks/blocking/test_code_rules_enforcer_unused_imports.py +9 -5
- package/hooks/blocking/test_open_questions_in_plans_blocker.py +790 -0
- package/hooks/hooks.json +10 -0
- package/hooks/hooks_constants/banned_identifiers_constants.py +19 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +129 -2
- package/hooks/hooks_constants/open_questions_in_plans_blocker_constants.py +35 -0
- package/hooks/hooks_constants/test_open_questions_in_plans_blocker_constants.py +125 -0
- package/package.json +1 -1
- package/skills/_shared/pr-loop/scripts/_path_resolver.py +34 -13
- package/skills/_shared/pr-loop/scripts/init_loop_state.py +1 -2
- package/skills/_shared/pr-loop/scripts/teardown_worktrees.py +1 -4
- package/skills/_shared/pr-loop/scripts/test__path_resolver.py +57 -0
- package/skills/_shared/pr-loop/scripts/test_init_loop_state.py +48 -0
- package/skills/_shared/pr-loop/scripts/test_teardown_worktrees.py +59 -0
- package/skills/bugteam/PROMPTS.md +48 -12
- package/skills/bugteam/reference/team-setup.md +4 -2
- package/skills/bugteam/scripts/bugteam_code_rules_gate.py +487 -76
- package/skills/bugteam/scripts/bugteam_scripts_constants/bugteam_code_rules_gate_constants.py +22 -1
- package/skills/bugteam/scripts/test_bugteam_code_rules_gate.py +597 -12
|
@@ -23,6 +23,7 @@ the suffix variants, so edits to this file include the bypass sentinel
|
|
|
23
23
|
``# pragma: no-tdd-gate`` until the TDD hook learns the suffix convention.
|
|
24
24
|
"""
|
|
25
25
|
import ast
|
|
26
|
+
import difflib
|
|
26
27
|
import io
|
|
27
28
|
import json
|
|
28
29
|
import re
|
|
@@ -42,8 +43,11 @@ if _HOOKS_DIR not in sys.path:
|
|
|
42
43
|
from code_rules_path_utils import is_config_file # noqa: E402
|
|
43
44
|
from hooks_constants.banned_identifiers_constants import ( # noqa: E402
|
|
44
45
|
ALL_BANNED_IDENTIFIERS,
|
|
46
|
+
ALL_BANNED_NOUN_WORDS,
|
|
45
47
|
BANNED_IDENTIFIER_MESSAGE_SUFFIX,
|
|
46
48
|
BANNED_IDENTIFIER_SKIP_ADVISORY,
|
|
49
|
+
BANNED_NOUN_WORD_MESSAGE_SUFFIX,
|
|
50
|
+
CAMEL_CASE_WORD_PATTERN,
|
|
47
51
|
MAX_BANNED_IDENTIFIER_ISSUES,
|
|
48
52
|
)
|
|
49
53
|
from hooks_constants.hardcoded_user_path_constants import ( # noqa: E402
|
|
@@ -100,16 +104,45 @@ from hooks_constants.code_rules_enforcer_constants import ( # noqa: E402
|
|
|
100
104
|
ADVISORY_LINE_THRESHOLD_SOFT,
|
|
101
105
|
ALL_CODE_EXTENSIONS,
|
|
102
106
|
ALL_CAPS_WITH_UNDERSCORE_PATTERN,
|
|
107
|
+
ALL_FILESYSTEM_HOME_PROBE_DOTTED_NAMES,
|
|
108
|
+
ALL_DIR_ACCEPTING_TEMPFILE_FACTORY_DOTTED_NAMES,
|
|
109
|
+
ALL_SHARED_TEMP_SOURCE_PROBE_DOTTED_NAMES,
|
|
110
|
+
TEMPFILE_FACTORY_ISOLATION_DIRECTORY_KEYWORD,
|
|
111
|
+
ALL_HOME_DIRECTORY_ENV_VAR_NAMES,
|
|
112
|
+
ALL_ENVIRONMENT_GETTER_DOTTED_NAMES,
|
|
113
|
+
ALL_PROBE_RELEVANT_MODULE_CANONICAL_NAMES,
|
|
114
|
+
ALL_CANONICAL_DOTTED_NAMES_BY_BARE_IMPORT,
|
|
115
|
+
OS_ENVIRON_DOTTED_NAME,
|
|
116
|
+
ENVIRON_GET_METHOD_NAME,
|
|
117
|
+
ENVIRONMENT_VARIABLE_REFERENCE_PATTERN,
|
|
118
|
+
WINDOWS_PERCENT_VARIABLE_REFERENCE_PATTERN,
|
|
119
|
+
EXPANDVARS_DOTTED_NAME,
|
|
120
|
+
EXPANDUSER_DOTTED_NAME,
|
|
121
|
+
ALL_PATHLIB_STATIC_EXPANDUSER_DOTTED_NAMES,
|
|
122
|
+
PATHLIB_EXPANDUSER_METHOD_NAME,
|
|
123
|
+
ALL_PATHLIB_PATH_CONSTRUCTOR_CANONICAL_NAMES,
|
|
124
|
+
ALL_PROBE_ALIASABLE_CANONICAL_PREFIXES,
|
|
125
|
+
HOME_DIRECTORY_TILDE_PREFIX,
|
|
126
|
+
ALL_PYTEST_FILESYSTEM_ISOLATION_FIXTURE_NAMES,
|
|
127
|
+
PYTEST_USEFIXTURES_MARKER_NAME,
|
|
128
|
+
PYTEST_TEST_CLASS_NAME_PREFIX,
|
|
129
|
+
ALL_DIFF_CHANGED_OPCODE_TAGS,
|
|
130
|
+
FUNCTION_LENGTH_BLOCKING_MESSAGE_SUFFIX,
|
|
131
|
+
FUNCTION_LENGTH_BLOCKING_THRESHOLD,
|
|
132
|
+
BANNED_NOUN_SPAN_FRAGMENT_TEMPLATE,
|
|
103
133
|
BARE_EACH_TOKEN,
|
|
104
134
|
ALL_BOOLEAN_NAME_PREFIXES,
|
|
105
135
|
ALL_BUILTIN_DICT_METHOD_NAMES,
|
|
106
136
|
ALL_CLI_FILE_PATH_MARKERS,
|
|
137
|
+
CHAINED_INLINE_COMMENT_PATTERN,
|
|
107
138
|
COLLECTION_BY_NAME_PATTERN,
|
|
108
139
|
ALL_COLLECTION_TYPE_NAMES,
|
|
109
140
|
ALL_SUBSCRIPT_ONLY_COLLECTION_TYPE_NAMES,
|
|
110
141
|
DOTTED_SEGMENT_PATTERN,
|
|
111
142
|
EACH_PREFIX,
|
|
112
|
-
|
|
143
|
+
ALL_FREE_FORM_EXEMPT_COMMENT_BODIES,
|
|
144
|
+
ALL_TOKEN_ANCHORED_EXEMPT_COMMENT_BODIES,
|
|
145
|
+
ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS,
|
|
113
146
|
ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES,
|
|
114
147
|
ALL_JAVASCRIPT_EXEMPT_INLINE_COMMENT_PREFIXES,
|
|
115
148
|
ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS,
|
|
@@ -117,6 +150,7 @@ from hooks_constants.code_rules_enforcer_constants import ( # noqa: E402
|
|
|
117
150
|
ALL_HOOK_INFRASTRUCTURE_PATTERNS,
|
|
118
151
|
ALL_IMPORT_STATEMENT_PREFIXES,
|
|
119
152
|
MAX_COMMENT_ISSUES,
|
|
153
|
+
TEST_ISOLATION_MESSAGE_SUFFIX,
|
|
120
154
|
INLINE_COLLECTION_MIN_LENGTH,
|
|
121
155
|
ALL_JAVASCRIPT_EXTENSIONS,
|
|
122
156
|
LOGGING_FSTRING_PATTERN,
|
|
@@ -146,7 +180,7 @@ def get_file_extension(file_path: str) -> str:
|
|
|
146
180
|
|
|
147
181
|
def is_hook_infrastructure(file_path: str) -> bool:
|
|
148
182
|
"""Check if file is a Claude Code hook (standalone infrastructure, not project code)."""
|
|
149
|
-
path_lower = file_path.lower().replace("\\", "/")
|
|
183
|
+
path_lower = "/" + file_path.lower().replace("\\", "/").lstrip("/")
|
|
150
184
|
return any(pattern.replace("\\", "/") in path_lower for pattern in ALL_HOOK_INFRASTRUCTURE_PATTERNS)
|
|
151
185
|
|
|
152
186
|
|
|
@@ -819,13 +853,28 @@ def _is_exempt_python_comment(comment_token: tokenize.TokenInfo) -> bool:
|
|
|
819
853
|
line 2 or later) is NOT a real shebang and remains subject to the
|
|
820
854
|
no-comments rule.
|
|
821
855
|
|
|
822
|
-
Matches any prefix listed in
|
|
823
|
-
regardless of whether the directive sits flush
|
|
824
|
-
hash character or carries one or more whitespace
|
|
825
|
-
or tab) between the hash and the directive body.
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
856
|
+
Matches any prefix listed in the token-anchored or free-form exempt-
|
|
857
|
+
comment-body sets regardless of whether the directive sits flush
|
|
858
|
+
against the leading hash character or carries one or more whitespace
|
|
859
|
+
characters (space or tab) between the hash and the directive body.
|
|
860
|
+
|
|
861
|
+
Token-anchored markers (``noqa``, ``pylint:``, ``pragma:``) are
|
|
862
|
+
exempt only when the comment carries no chained second comment. Any
|
|
863
|
+
second ``#`` after the directive body — regardless of whitespace
|
|
864
|
+
around the inner hash, so ``# noqa: F401#note``,
|
|
865
|
+
``# noqa: F401 #prose``, and ``# noqa: F401 # imported for re-export``
|
|
866
|
+
all qualify — indicates a second free-form inline comment
|
|
867
|
+
piggybacking on the exempt marker; the trailing prose is not itself
|
|
868
|
+
an exempt directive and therefore must not inherit exemption. A
|
|
869
|
+
token-anchored directive body never legitimately carries a ``#``
|
|
870
|
+
(noqa codes, pylint symbols, and pragma directives contain none), so
|
|
871
|
+
any inner ``#`` reliably marks chained prose. Free-form markers
|
|
872
|
+
(``type:``, ``TODO``, ``FIXME``, ``HACK``, ``XXX``) accept any
|
|
873
|
+
trailing prose:
|
|
874
|
+
``# type:`` participates in the documented justification
|
|
875
|
+
convention enforced by ``check_type_escape_hatches`` (which
|
|
876
|
+
requires a trailing reason), and the TODO-family markers carry
|
|
877
|
+
annotation text by convention.
|
|
829
878
|
"""
|
|
830
879
|
comment_string = comment_token.string
|
|
831
880
|
if comment_string.startswith("#!") and comment_token.start == (1, 0):
|
|
@@ -833,7 +882,44 @@ def _is_exempt_python_comment(comment_token: tokenize.TokenInfo) -> bool:
|
|
|
833
882
|
directive_body = comment_string[1:].lstrip()
|
|
834
883
|
if not directive_body:
|
|
835
884
|
return True
|
|
836
|
-
|
|
885
|
+
if directive_body.startswith(ALL_FREE_FORM_EXEMPT_COMMENT_BODIES):
|
|
886
|
+
return True
|
|
887
|
+
if not _starts_with_bounded_token_anchored_directive(directive_body):
|
|
888
|
+
return False
|
|
889
|
+
return CHAINED_INLINE_COMMENT_PATTERN.search(directive_body) is None
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def _starts_with_bounded_token_anchored_directive(directive_body: str) -> bool:
|
|
893
|
+
"""Return True when *directive_body* opens with a real exempt directive.
|
|
894
|
+
|
|
895
|
+
A token-anchored marker (``noqa``, ``pylint:``, ``pragma:``) counts only
|
|
896
|
+
when the matched token is immediately followed by a directive boundary —
|
|
897
|
+
end of string, a colon, or whitespace — so prose like
|
|
898
|
+
``noqa-but-not-really: explanation`` that merely shares the prefix does
|
|
899
|
+
not inherit the exemption.
|
|
900
|
+
|
|
901
|
+
Args:
|
|
902
|
+
directive_body: The comment text with the leading hash and surrounding
|
|
903
|
+
whitespace already stripped.
|
|
904
|
+
|
|
905
|
+
Returns:
|
|
906
|
+
True when a token-anchored exempt directive is present at a real token
|
|
907
|
+
boundary, False otherwise.
|
|
908
|
+
"""
|
|
909
|
+
for each_token in ALL_TOKEN_ANCHORED_EXEMPT_COMMENT_BODIES:
|
|
910
|
+
if not directive_body.startswith(each_token):
|
|
911
|
+
continue
|
|
912
|
+
if each_token[-1] in ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS:
|
|
913
|
+
return True
|
|
914
|
+
following_text = directive_body[len(each_token):]
|
|
915
|
+
if not following_text:
|
|
916
|
+
return True
|
|
917
|
+
next_character = following_text[0]
|
|
918
|
+
if next_character.isspace():
|
|
919
|
+
return True
|
|
920
|
+
if next_character in ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS:
|
|
921
|
+
return True
|
|
922
|
+
return False
|
|
837
923
|
|
|
838
924
|
|
|
839
925
|
def _extract_python_comment_sets(content: str) -> tuple[set[str], set[str], bool]:
|
|
@@ -1267,6 +1353,208 @@ def check_banned_identifiers(content: str, file_path: str) -> list[str]:
|
|
|
1267
1353
|
return issues
|
|
1268
1354
|
|
|
1269
1355
|
|
|
1356
|
+
def _identifier_word_parts(identifier: str) -> list[str]:
|
|
1357
|
+
"""Split an identifier into lowercase word parts.
|
|
1358
|
+
|
|
1359
|
+
Handles snake_case (split on ``_``), SCREAMING_SNAKE_CASE, and camelCase /
|
|
1360
|
+
PascalCase (split on capital-letter boundaries). Returns a list of
|
|
1361
|
+
lowercased word tokens for membership comparison against banned-noun
|
|
1362
|
+
vocabularies.
|
|
1363
|
+
|
|
1364
|
+
Args:
|
|
1365
|
+
identifier: A Python identifier (variable, parameter, class, or
|
|
1366
|
+
function name).
|
|
1367
|
+
|
|
1368
|
+
Returns:
|
|
1369
|
+
Word tokens in their original order, lowercased. Empty list when the
|
|
1370
|
+
identifier carries no letter characters.
|
|
1371
|
+
"""
|
|
1372
|
+
all_words: list[str] = []
|
|
1373
|
+
for each_snake_segment in identifier.split("_"):
|
|
1374
|
+
if not each_snake_segment:
|
|
1375
|
+
continue
|
|
1376
|
+
camel_pieces = CAMEL_CASE_WORD_PATTERN.findall(each_snake_segment)
|
|
1377
|
+
if camel_pieces:
|
|
1378
|
+
for each_piece in camel_pieces:
|
|
1379
|
+
all_words.append(each_piece.lower())
|
|
1380
|
+
else:
|
|
1381
|
+
all_words.append(each_snake_segment.lower())
|
|
1382
|
+
return all_words
|
|
1383
|
+
|
|
1384
|
+
|
|
1385
|
+
def _find_banned_noun_word(identifier: str) -> str | None:
|
|
1386
|
+
"""Return the first banned-noun word embedded in *identifier*, or None.
|
|
1387
|
+
|
|
1388
|
+
Args:
|
|
1389
|
+
identifier: A Python identifier.
|
|
1390
|
+
|
|
1391
|
+
Returns:
|
|
1392
|
+
The lowercased banned noun word that appears as a word part inside the
|
|
1393
|
+
identifier (e.g., ``'result'`` for ``'HolidayPeakResult'``). Returns
|
|
1394
|
+
``None`` when no banned noun word is present.
|
|
1395
|
+
"""
|
|
1396
|
+
for each_word in _identifier_word_parts(identifier):
|
|
1397
|
+
if each_word in ALL_BANNED_NOUN_WORDS:
|
|
1398
|
+
return each_word
|
|
1399
|
+
return None
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
def _is_dunder_name(identifier: str) -> bool:
|
|
1403
|
+
return identifier.startswith("__") and identifier.endswith("__")
|
|
1404
|
+
|
|
1405
|
+
|
|
1406
|
+
def _collect_banned_noun_word_bindings(
|
|
1407
|
+
parsed_tree: ast.AST,
|
|
1408
|
+
) -> list[tuple[str, int, int, str]]:
|
|
1409
|
+
"""Yield ``(identifier, lineno, col_offset, banned_word)`` for each binding.
|
|
1410
|
+
|
|
1411
|
+
Walks assignment targets, annotated assignments, function/method
|
|
1412
|
+
parameters, function/method definitions, and class definitions. Skips
|
|
1413
|
+
identifiers that already match ``ALL_BANNED_IDENTIFIERS`` exactly (those
|
|
1414
|
+
are reported by ``check_banned_identifiers``) and dunder names.
|
|
1415
|
+
"""
|
|
1416
|
+
flagged_bindings: list[tuple[str, int, int, str]] = []
|
|
1417
|
+
seen_keys: set[tuple[str, int, int]] = set()
|
|
1418
|
+
|
|
1419
|
+
def record(name: str, lineno: int, col_offset: int) -> None:
|
|
1420
|
+
if name in ALL_BANNED_IDENTIFIERS:
|
|
1421
|
+
return
|
|
1422
|
+
if _is_dunder_name(name):
|
|
1423
|
+
return
|
|
1424
|
+
banned_word = _find_banned_noun_word(name)
|
|
1425
|
+
if banned_word is None:
|
|
1426
|
+
return
|
|
1427
|
+
key = (name, lineno, col_offset)
|
|
1428
|
+
if key in seen_keys:
|
|
1429
|
+
return
|
|
1430
|
+
seen_keys.add(key)
|
|
1431
|
+
flagged_bindings.append((name, lineno, col_offset, banned_word))
|
|
1432
|
+
|
|
1433
|
+
for each_node in ast.walk(parsed_tree):
|
|
1434
|
+
if isinstance(each_node, ast.Assign):
|
|
1435
|
+
for each_target in each_node.targets:
|
|
1436
|
+
for each_name_node in _collect_target_names(each_target):
|
|
1437
|
+
record(each_name_node.id, each_name_node.lineno, each_name_node.col_offset)
|
|
1438
|
+
elif isinstance(each_node, ast.AnnAssign):
|
|
1439
|
+
for each_name_node in _collect_target_names(each_node.target):
|
|
1440
|
+
record(each_name_node.id, each_name_node.lineno, each_name_node.col_offset)
|
|
1441
|
+
elif isinstance(each_node, (ast.For, ast.AsyncFor)):
|
|
1442
|
+
for each_name_node in _collect_target_names(each_node.target):
|
|
1443
|
+
record(each_name_node.id, each_name_node.lineno, each_name_node.col_offset)
|
|
1444
|
+
elif isinstance(each_node, ast.NamedExpr) and isinstance(each_node.target, ast.Name):
|
|
1445
|
+
record(each_node.target.id, each_node.target.lineno, each_node.target.col_offset)
|
|
1446
|
+
elif isinstance(each_node, ast.comprehension):
|
|
1447
|
+
for each_name_node in _collect_target_names(each_node.target):
|
|
1448
|
+
record(each_name_node.id, each_name_node.lineno, each_name_node.col_offset)
|
|
1449
|
+
elif isinstance(each_node, ast.withitem) and each_node.optional_vars is not None:
|
|
1450
|
+
for each_name_node in _collect_target_names(each_node.optional_vars):
|
|
1451
|
+
record(each_name_node.id, each_name_node.lineno, each_name_node.col_offset)
|
|
1452
|
+
elif isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
1453
|
+
record(each_node.name, each_node.lineno, each_node.col_offset)
|
|
1454
|
+
for each_arg in _collect_annotated_arguments(each_node):
|
|
1455
|
+
if each_arg.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
|
|
1456
|
+
continue
|
|
1457
|
+
record(each_arg.arg, each_arg.lineno, each_arg.col_offset)
|
|
1458
|
+
elif isinstance(each_node, ast.ClassDef):
|
|
1459
|
+
record(each_node.name, each_node.lineno, each_node.col_offset)
|
|
1460
|
+
elif isinstance(each_node, (ast.Import, ast.ImportFrom)):
|
|
1461
|
+
for each_alias in each_node.names:
|
|
1462
|
+
if each_alias.asname is None:
|
|
1463
|
+
continue
|
|
1464
|
+
record(each_alias.asname, each_node.lineno, each_node.col_offset)
|
|
1465
|
+
|
|
1466
|
+
flagged_bindings.sort(key=lambda binding: (binding[1], binding[2]))
|
|
1467
|
+
return flagged_bindings
|
|
1468
|
+
|
|
1469
|
+
|
|
1470
|
+
def check_banned_noun_word_boundary(
|
|
1471
|
+
content: str,
|
|
1472
|
+
file_path: str,
|
|
1473
|
+
all_changed_lines: set[int] | None = None,
|
|
1474
|
+
defer_scope_to_caller: bool = False,
|
|
1475
|
+
) -> list[str]:
|
|
1476
|
+
"""Flag identifiers containing CODE_RULES §5 banned noun words.
|
|
1477
|
+
|
|
1478
|
+
Companion to ``check_banned_identifiers`` (exact-match cases only). This
|
|
1479
|
+
check catches the wider pattern: a banned noun word from
|
|
1480
|
+
``ALL_BANNED_NOUN_WORDS`` — the singular nouns ``result``, ``data``,
|
|
1481
|
+
``output``, ``response``, ``value``, ``item``, ``temp`` plus the plural
|
|
1482
|
+
forms ``results``, ``outputs``, ``responses``, ``values``, ``items`` —
|
|
1483
|
+
appearing as a snake_case word part or camelCase word part inside a longer
|
|
1484
|
+
identifier (``canned_results``, ``HolidayPeakResult``, ``OUTPUT_DIR``,
|
|
1485
|
+
``cached_response``).
|
|
1486
|
+
|
|
1487
|
+
Skips test files, config files, hook infrastructure, workflow registries,
|
|
1488
|
+
and migrations. Identifiers that exactly match ``ALL_BANNED_IDENTIFIERS``
|
|
1489
|
+
are skipped because they are already reported by
|
|
1490
|
+
``check_banned_identifiers``.
|
|
1491
|
+
|
|
1492
|
+
Scoping mirrors ``check_function_length`` and
|
|
1493
|
+
``check_tests_use_isolated_filesystem_paths`` through the shared
|
|
1494
|
+
``_scope_violations_to_changed_lines`` helper. A banned-noun binding is a
|
|
1495
|
+
point fact about one identifier, so its enclosing unit is its own binding
|
|
1496
|
+
line: each violation carries the binding line as a one-line ``range`` for
|
|
1497
|
+
terminal diff scoping and a ``(binding span at line X, spanning 1 lines)``
|
|
1498
|
+
message fragment the commit gate reconstructs through the same shared span
|
|
1499
|
+
extractor registry the other two scoped checks use. Anchoring to the
|
|
1500
|
+
binding line (rather than the whole enclosing function) matches the
|
|
1501
|
+
companion exact-match ``check_banned_identifiers`` and keeps a pre-existing
|
|
1502
|
+
binding out of scope when an unrelated line of its enclosing function is
|
|
1503
|
+
edited. On a terminal Edit only violations whose binding line is among
|
|
1504
|
+
``all_changed_lines`` are returned; on a new-file or full-file write every
|
|
1505
|
+
violation is in scope; ``defer_scope_to_caller`` returns every violation so
|
|
1506
|
+
the gate scopes by added line.
|
|
1507
|
+
|
|
1508
|
+
Args:
|
|
1509
|
+
content: The reconstructed effective file content to analyze (the
|
|
1510
|
+
whole post-edit file on an Edit, the whole file at the gate).
|
|
1511
|
+
file_path: The path of the file being checked (used for exemption
|
|
1512
|
+
routing).
|
|
1513
|
+
all_changed_lines: Post-edit line numbers the current edit touched, or
|
|
1514
|
+
None to treat the whole file as in scope. When provided, a violation
|
|
1515
|
+
blocks only when its binding line is among the changed lines.
|
|
1516
|
+
defer_scope_to_caller: When True, return every violation so the
|
|
1517
|
+
commit/push gate's ``split_violations_by_scope`` can scope by added
|
|
1518
|
+
line and report the in-scope set.
|
|
1519
|
+
|
|
1520
|
+
Returns:
|
|
1521
|
+
Issue strings, each describing one offending binding. When
|
|
1522
|
+
*defer_scope_to_caller* is True every binding is returned for the gate
|
|
1523
|
+
to scope; otherwise every binding in scope is returned.
|
|
1524
|
+
"""
|
|
1525
|
+
if is_test_file(file_path) or is_hook_infrastructure(file_path):
|
|
1526
|
+
return []
|
|
1527
|
+
if is_config_file(file_path):
|
|
1528
|
+
return []
|
|
1529
|
+
if is_workflow_registry_file(file_path):
|
|
1530
|
+
return []
|
|
1531
|
+
if is_migration_file(file_path):
|
|
1532
|
+
return []
|
|
1533
|
+
|
|
1534
|
+
try:
|
|
1535
|
+
parsed_tree = ast.parse(content)
|
|
1536
|
+
except SyntaxError:
|
|
1537
|
+
return []
|
|
1538
|
+
|
|
1539
|
+
single_line_span = 1
|
|
1540
|
+
all_violations_in_walk_order: list[tuple[range, str]] = []
|
|
1541
|
+
for each_name, each_lineno, _, each_word in _collect_banned_noun_word_bindings(parsed_tree):
|
|
1542
|
+
span_range = range(each_lineno, each_lineno + single_line_span)
|
|
1543
|
+
span_fragment = BANNED_NOUN_SPAN_FRAGMENT_TEMPLATE.format(
|
|
1544
|
+
definition_line=each_lineno, line_span=single_line_span
|
|
1545
|
+
)
|
|
1546
|
+
message = (
|
|
1547
|
+
f"Line {each_lineno}: Identifier {each_name!r} {BANNED_NOUN_WORD_MESSAGE_SUFFIX} "
|
|
1548
|
+
f"(word: {each_word!r}) {span_fragment}"
|
|
1549
|
+
)
|
|
1550
|
+
all_violations_in_walk_order.append((span_range, message))
|
|
1551
|
+
return _scope_violations_to_changed_lines(
|
|
1552
|
+
all_violations_in_walk_order,
|
|
1553
|
+
all_changed_lines,
|
|
1554
|
+
defer_scope_to_caller,
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
|
|
1270
1558
|
|
|
1271
1559
|
|
|
1272
1560
|
def _string_constant_value(node: ast.expr) -> str | None:
|
|
@@ -2228,6 +2516,999 @@ def check_skip_decorators_in_tests(content: str, file_path: str) -> list[str]:
|
|
|
2228
2516
|
return issues
|
|
2229
2517
|
|
|
2230
2518
|
|
|
2519
|
+
def _dotted_call_attribute_chain(call_node: ast.Call) -> str | None:
|
|
2520
|
+
"""Return the dotted name path of *call_node*'s callee, or None.
|
|
2521
|
+
|
|
2522
|
+
For ``pathlib.Path.home()`` returns ``"pathlib.Path.home"``; for
|
|
2523
|
+
``Path.home()`` returns ``"Path.home"``; for ``tempfile.gettempdir()``
|
|
2524
|
+
returns ``"tempfile.gettempdir"``. Returns ``None`` when the call target
|
|
2525
|
+
is not a pure attribute chain rooted at an ``ast.Name`` (for example,
|
|
2526
|
+
``obj.method()`` where ``obj`` is the result of another expression).
|
|
2527
|
+
"""
|
|
2528
|
+
chain_parts: list[str] = []
|
|
2529
|
+
walker: ast.expr = call_node.func
|
|
2530
|
+
while isinstance(walker, ast.Attribute):
|
|
2531
|
+
chain_parts.append(walker.attr)
|
|
2532
|
+
walker = walker.value
|
|
2533
|
+
if not isinstance(walker, ast.Name):
|
|
2534
|
+
return None
|
|
2535
|
+
chain_parts.append(walker.id)
|
|
2536
|
+
chain_parts.reverse()
|
|
2537
|
+
return ".".join(chain_parts)
|
|
2538
|
+
|
|
2539
|
+
|
|
2540
|
+
def _record_probe_import_aliases(
|
|
2541
|
+
import_node: ast.Import | ast.ImportFrom,
|
|
2542
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
2543
|
+
) -> None:
|
|
2544
|
+
"""Record the probe-relevant alias entries from a single import statement.
|
|
2545
|
+
|
|
2546
|
+
Module aliases are recorded only for the probe-relevant modules in
|
|
2547
|
+
``ALL_PROBE_RELEVANT_MODULE_CANONICAL_NAMES``. Bare-imported names are
|
|
2548
|
+
recorded only for the ``(module, name)`` pairs in
|
|
2549
|
+
``ALL_CANONICAL_DOTTED_NAMES_BY_BARE_IMPORT``. Imports outside those sets are
|
|
2550
|
+
ignored so unrelated bindings never rewrite a chain.
|
|
2551
|
+
|
|
2552
|
+
Args:
|
|
2553
|
+
import_node: A single ``ast.Import`` or ``ast.ImportFrom`` statement.
|
|
2554
|
+
all_canonical_names_by_alias: The alias map to mutate in place with any
|
|
2555
|
+
probe-relevant local-name to canonical-dotted-prefix entries.
|
|
2556
|
+
"""
|
|
2557
|
+
if isinstance(import_node, ast.Import):
|
|
2558
|
+
for each_alias in import_node.names:
|
|
2559
|
+
if each_alias.name not in ALL_PROBE_RELEVANT_MODULE_CANONICAL_NAMES:
|
|
2560
|
+
continue
|
|
2561
|
+
local_name = each_alias.asname or each_alias.name
|
|
2562
|
+
all_canonical_names_by_alias[local_name] = each_alias.name
|
|
2563
|
+
return
|
|
2564
|
+
for each_alias in import_node.names:
|
|
2565
|
+
canonical_dotted = ALL_CANONICAL_DOTTED_NAMES_BY_BARE_IMPORT.get(
|
|
2566
|
+
(import_node.module or "", each_alias.name)
|
|
2567
|
+
)
|
|
2568
|
+
if canonical_dotted is None:
|
|
2569
|
+
continue
|
|
2570
|
+
local_name = each_alias.asname or each_alias.name
|
|
2571
|
+
all_canonical_names_by_alias[local_name] = canonical_dotted
|
|
2572
|
+
|
|
2573
|
+
|
|
2574
|
+
def _build_alias_canonicalization_map(syntax_tree: ast.Module) -> dict[str, str]:
|
|
2575
|
+
"""Map each module-level probe import local name to its canonical prefix.
|
|
2576
|
+
|
|
2577
|
+
Resolves both module aliases and bare-imported names so a dotted-call
|
|
2578
|
+
chain rooted at any module-level binding rewrites to the canonical form the
|
|
2579
|
+
probe set already matches:
|
|
2580
|
+
|
|
2581
|
+
- ``import os as o`` -> ``o`` resolves to ``os`` (so ``o.getenv`` ->
|
|
2582
|
+
``os.getenv`` and ``o.path.expanduser`` -> ``os.path.expanduser``).
|
|
2583
|
+
- ``import os.path as op`` -> ``op`` resolves to ``os.path`` (so
|
|
2584
|
+
``op.expanduser`` -> ``os.path.expanduser``).
|
|
2585
|
+
- ``import pathlib as pl`` -> ``pl`` resolves to ``pathlib``.
|
|
2586
|
+
- ``from pathlib import Path as P`` -> ``P`` resolves to ``Path``.
|
|
2587
|
+
- ``from os import path`` -> ``path`` resolves to ``os.path`` (so
|
|
2588
|
+
``path.expanduser`` -> ``os.path.expanduser``).
|
|
2589
|
+
- ``from os.path import expanduser as e`` -> ``e`` resolves to
|
|
2590
|
+
``os.path.expanduser``; ``from os import getenv`` -> ``getenv``
|
|
2591
|
+
resolves to ``os.getenv``; ``from os import environ`` -> ``environ``
|
|
2592
|
+
resolves to ``os.environ``.
|
|
2593
|
+
|
|
2594
|
+
An import is module-scoped — and enters this shared map — when it is not
|
|
2595
|
+
lexically inside any ``FunctionDef``/``AsyncFunctionDef``/``ClassDef`` body.
|
|
2596
|
+
That admits top-level imports nested in module-level ``try``/``except``,
|
|
2597
|
+
``if``, or ``with`` blocks (the ``try: import os as o except ImportError:``
|
|
2598
|
+
optional-import idiom binds ``o`` module-wide) while excluding both
|
|
2599
|
+
function-local and class-body imports. A function-local import binds its
|
|
2600
|
+
name only inside the function it appears in, and a class-body import binds
|
|
2601
|
+
its alias only within the class namespace; neither may enter this shared,
|
|
2602
|
+
module-wide map — otherwise a probe import inside one test would
|
|
2603
|
+
canonicalize a same-named reference in a sibling test that never imported
|
|
2604
|
+
it. Function-local imports are scoped to their own function by
|
|
2605
|
+
``_collect_local_probe_alias_bindings``.
|
|
2606
|
+
|
|
2607
|
+
Args:
|
|
2608
|
+
syntax_tree: The parsed module to scan for module-scoped import
|
|
2609
|
+
statements.
|
|
2610
|
+
|
|
2611
|
+
Returns:
|
|
2612
|
+
Mapping from module-level local binding name to its canonical dotted
|
|
2613
|
+
prefix.
|
|
2614
|
+
"""
|
|
2615
|
+
parent_by_child_id = _build_parent_map(syntax_tree)
|
|
2616
|
+
all_canonical_names_by_alias: dict[str, str] = {}
|
|
2617
|
+
for each_node in ast.walk(syntax_tree):
|
|
2618
|
+
if not isinstance(each_node, (ast.Import, ast.ImportFrom)):
|
|
2619
|
+
continue
|
|
2620
|
+
if _node_is_lexically_inside_function_or_class(each_node, parent_by_child_id):
|
|
2621
|
+
continue
|
|
2622
|
+
_record_probe_import_aliases(each_node, all_canonical_names_by_alias)
|
|
2623
|
+
return all_canonical_names_by_alias
|
|
2624
|
+
|
|
2625
|
+
|
|
2626
|
+
def _node_is_lexically_inside_function_or_class(
|
|
2627
|
+
node: ast.AST, parent_by_child_id: dict[int, ast.AST],
|
|
2628
|
+
) -> bool:
|
|
2629
|
+
"""Return True when *node* is nested inside a function or class body.
|
|
2630
|
+
|
|
2631
|
+
Walks ancestors via *parent_by_child_id*. A node nested only inside
|
|
2632
|
+
module-level ``try``/``if``/``with`` blocks has no enclosing function or
|
|
2633
|
+
class and is module-scoped; a node inside a
|
|
2634
|
+
``FunctionDef``/``AsyncFunctionDef``/``ClassDef`` body is scoped to that
|
|
2635
|
+
enclosing definition and is not module-scoped. A class-body import binds
|
|
2636
|
+
its alias only within the class namespace, so it must not enter the
|
|
2637
|
+
module-wide alias map any more than a function-local import does.
|
|
2638
|
+
|
|
2639
|
+
Args:
|
|
2640
|
+
node: The node whose lexical scope is being classified.
|
|
2641
|
+
parent_by_child_id: Child-``id()``-to-parent map from
|
|
2642
|
+
``_build_parent_map``.
|
|
2643
|
+
|
|
2644
|
+
Returns:
|
|
2645
|
+
True when an enclosing
|
|
2646
|
+
``FunctionDef``/``AsyncFunctionDef``/``ClassDef`` exists.
|
|
2647
|
+
"""
|
|
2648
|
+
current_ancestor = parent_by_child_id.get(id(node))
|
|
2649
|
+
while current_ancestor is not None:
|
|
2650
|
+
if isinstance(
|
|
2651
|
+
current_ancestor,
|
|
2652
|
+
(ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef),
|
|
2653
|
+
):
|
|
2654
|
+
return True
|
|
2655
|
+
current_ancestor = parent_by_child_id.get(id(current_ancestor))
|
|
2656
|
+
return False
|
|
2657
|
+
|
|
2658
|
+
|
|
2659
|
+
def _collect_os_environ_local_binding_names(
|
|
2660
|
+
scope_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
2661
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
2662
|
+
) -> set[str]:
|
|
2663
|
+
"""Return local names bound to ``os.environ`` within *scope_node*.
|
|
2664
|
+
|
|
2665
|
+
Scoped to the single test function passed as *scope_node* so a binding in
|
|
2666
|
+
one test never attributes a same-named access in a sibling test. Tracks
|
|
2667
|
+
``e = os.environ`` style assignments (resolving the right-hand side through
|
|
2668
|
+
*all_canonical_names_by_alias* so ``e = o.environ`` with ``import os as o``
|
|
2669
|
+
is recognized) and ``from os import environ`` bindings (rare inside a
|
|
2670
|
+
function but supported for completeness). Subscript and ``.get(...)`` reads
|
|
2671
|
+
on these local names are treated as ``os.environ`` accesses.
|
|
2672
|
+
|
|
2673
|
+
Args:
|
|
2674
|
+
scope_node: The single test function node to scan for bindings.
|
|
2675
|
+
all_canonical_names_by_alias: Import-alias map from
|
|
2676
|
+
``_build_alias_canonicalization_map``.
|
|
2677
|
+
|
|
2678
|
+
Returns:
|
|
2679
|
+
Set of local variable names that reference ``os.environ``.
|
|
2680
|
+
"""
|
|
2681
|
+
environ_bindings: set[str] = set()
|
|
2682
|
+
for each_node in _descend_within_test_scope(scope_node):
|
|
2683
|
+
if isinstance(each_node, ast.ImportFrom):
|
|
2684
|
+
for each_alias in each_node.names:
|
|
2685
|
+
canonical_dotted = ALL_CANONICAL_DOTTED_NAMES_BY_BARE_IMPORT.get(
|
|
2686
|
+
(each_node.module or "", each_alias.name)
|
|
2687
|
+
)
|
|
2688
|
+
if canonical_dotted == OS_ENVIRON_DOTTED_NAME:
|
|
2689
|
+
environ_bindings.add(each_alias.asname or each_alias.name)
|
|
2690
|
+
continue
|
|
2691
|
+
if not isinstance(each_node, ast.Assign):
|
|
2692
|
+
continue
|
|
2693
|
+
if not _attribute_chain_resolves_to_os_environ(each_node.value, all_canonical_names_by_alias):
|
|
2694
|
+
continue
|
|
2695
|
+
for each_target in each_node.targets:
|
|
2696
|
+
if isinstance(each_target, ast.Name):
|
|
2697
|
+
environ_bindings.add(each_target.id)
|
|
2698
|
+
return environ_bindings
|
|
2699
|
+
|
|
2700
|
+
|
|
2701
|
+
def _collect_pathlib_path_local_binding_names(
|
|
2702
|
+
scope_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
2703
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
2704
|
+
) -> set[str]:
|
|
2705
|
+
"""Return local names bound to a home-tilde ``pathlib.Path(...)`` construction.
|
|
2706
|
+
|
|
2707
|
+
Scoped to the single test function passed as *scope_node* so a binding in
|
|
2708
|
+
one test never attributes a same-named ``.expanduser()`` call in a sibling
|
|
2709
|
+
test. Tracks ``candidate = Path('~/x')`` style assignments whose first
|
|
2710
|
+
constructor argument is a literal string beginning with ``~`` (resolving
|
|
2711
|
+
the constructor through *all_canonical_names_by_alias* so an aliased
|
|
2712
|
+
``candidate = P('~/x')`` with ``from pathlib import Path as P`` and a
|
|
2713
|
+
fully qualified ``candidate = pathlib.Path('~/x')`` are both recognized).
|
|
2714
|
+
A later ``candidate.expanduser()`` call on such a name is attributed to a
|
|
2715
|
+
home-directory probe. A tilde-free or dynamic constructor argument
|
|
2716
|
+
(``Path('/tmp/x')`` / ``Path(some_path)``) expands no home directory and
|
|
2717
|
+
is not collected, keeping the instance ``.expanduser()`` form symmetric
|
|
2718
|
+
with ``os.path.expanduser`` argument inspection.
|
|
2719
|
+
|
|
2720
|
+
Args:
|
|
2721
|
+
scope_node: The single test function node to scan for bindings.
|
|
2722
|
+
all_canonical_names_by_alias: Import-alias map from
|
|
2723
|
+
``_build_alias_canonicalization_map``.
|
|
2724
|
+
|
|
2725
|
+
Returns:
|
|
2726
|
+
Set of local variable names bound to a home-tilde ``pathlib.Path``
|
|
2727
|
+
construction.
|
|
2728
|
+
"""
|
|
2729
|
+
path_bindings: set[str] = set()
|
|
2730
|
+
for each_node in _descend_within_test_scope(scope_node):
|
|
2731
|
+
if not isinstance(each_node, ast.Assign):
|
|
2732
|
+
continue
|
|
2733
|
+
if not _pathlib_path_construction_uses_home_tilde(
|
|
2734
|
+
each_node.value, all_canonical_names_by_alias
|
|
2735
|
+
):
|
|
2736
|
+
continue
|
|
2737
|
+
for each_target in each_node.targets:
|
|
2738
|
+
if isinstance(each_target, ast.Name):
|
|
2739
|
+
path_bindings.add(each_target.id)
|
|
2740
|
+
return path_bindings
|
|
2741
|
+
|
|
2742
|
+
|
|
2743
|
+
def _collect_local_probe_alias_bindings(
|
|
2744
|
+
scope_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
2745
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
2746
|
+
) -> dict[str, str]:
|
|
2747
|
+
"""Return a per-test overlay mapping local names to canonical probe prefixes.
|
|
2748
|
+
|
|
2749
|
+
Scoped to the single test function passed as *scope_node* so an alias bound
|
|
2750
|
+
in one test never resolves a same-named access in a sibling test. Two
|
|
2751
|
+
binding forms are tracked, both scoped to this function only:
|
|
2752
|
+
|
|
2753
|
+
- Function-local imports — ``import os as o``, ``from os import environ``,
|
|
2754
|
+
``from pathlib import Path`` — resolved through the same probe-relevant
|
|
2755
|
+
filtering ``_build_alias_canonicalization_map`` applies to module-level
|
|
2756
|
+
imports. Because the shared module map omits function-local imports, this
|
|
2757
|
+
overlay is the only place a probe import inside one test takes effect, and
|
|
2758
|
+
it stays confined to that test's body.
|
|
2759
|
+
- Rebindings of a probe module, class, or callable to a local name —
|
|
2760
|
+
``path_class = Path``, ``read_env = os.getenv``, ``temp_module = tempfile``,
|
|
2761
|
+
``path_module = os.path``, ``e = os.environ`` — by resolving each
|
|
2762
|
+
right-hand side through *all_canonical_names_by_alias* and keeping only
|
|
2763
|
+
those whose canonical prefix is probe-aliasable
|
|
2764
|
+
(``ALL_PROBE_ALIASABLE_CANONICAL_PREFIXES``).
|
|
2765
|
+
|
|
2766
|
+
Merged over the module-level alias map, the overlay lets a later
|
|
2767
|
+
``path_class.home()`` / ``read_env('HOME')`` / ``temp_module.mkdtemp()``
|
|
2768
|
+
resolve to its canonical probe chain.
|
|
2769
|
+
|
|
2770
|
+
Args:
|
|
2771
|
+
scope_node: The single test function node to scan for alias bindings.
|
|
2772
|
+
all_canonical_names_by_alias: Module-level import-alias map from
|
|
2773
|
+
``_build_alias_canonicalization_map``.
|
|
2774
|
+
|
|
2775
|
+
Returns:
|
|
2776
|
+
Mapping from local binding name to its canonical probe prefix.
|
|
2777
|
+
"""
|
|
2778
|
+
local_alias_canonical_names: dict[str, str] = {}
|
|
2779
|
+
for each_node in _descend_within_test_scope(scope_node):
|
|
2780
|
+
if isinstance(each_node, (ast.Import, ast.ImportFrom)):
|
|
2781
|
+
_record_probe_import_aliases(each_node, local_alias_canonical_names)
|
|
2782
|
+
continue
|
|
2783
|
+
if not isinstance(each_node, ast.Assign):
|
|
2784
|
+
continue
|
|
2785
|
+
canonical_prefix = _canonical_probe_prefix_for_value(
|
|
2786
|
+
each_node.value, all_canonical_names_by_alias
|
|
2787
|
+
)
|
|
2788
|
+
if canonical_prefix is None:
|
|
2789
|
+
continue
|
|
2790
|
+
for each_target in each_node.targets:
|
|
2791
|
+
if isinstance(each_target, ast.Name):
|
|
2792
|
+
local_alias_canonical_names[each_target.id] = canonical_prefix
|
|
2793
|
+
return local_alias_canonical_names
|
|
2794
|
+
|
|
2795
|
+
|
|
2796
|
+
def _canonical_probe_prefix_for_value(
|
|
2797
|
+
node: ast.expr, all_canonical_names_by_alias: dict[str, str],
|
|
2798
|
+
) -> str | None:
|
|
2799
|
+
if isinstance(node, ast.Name):
|
|
2800
|
+
candidate_prefix = all_canonical_names_by_alias.get(node.id, node.id)
|
|
2801
|
+
elif isinstance(node, ast.Attribute):
|
|
2802
|
+
attribute_chain = _dotted_attribute_chain(node)
|
|
2803
|
+
if attribute_chain is None:
|
|
2804
|
+
return None
|
|
2805
|
+
candidate_prefix = _resolve_chain_through_aliases(
|
|
2806
|
+
attribute_chain, all_canonical_names_by_alias
|
|
2807
|
+
)
|
|
2808
|
+
else:
|
|
2809
|
+
return None
|
|
2810
|
+
if candidate_prefix in ALL_PROBE_ALIASABLE_CANONICAL_PREFIXES:
|
|
2811
|
+
return candidate_prefix
|
|
2812
|
+
return None
|
|
2813
|
+
|
|
2814
|
+
|
|
2815
|
+
def _pathlib_path_construction_uses_home_tilde(
|
|
2816
|
+
node: ast.expr, all_canonical_names_by_alias: dict[str, str],
|
|
2817
|
+
) -> bool:
|
|
2818
|
+
"""Return True for a ``pathlib.Path('~...')`` construction with a home tilde.
|
|
2819
|
+
|
|
2820
|
+
The node is a Path construction when its callee chain resolves (directly,
|
|
2821
|
+
aliased, or fully qualified) to a member of
|
|
2822
|
+
``ALL_PATHLIB_PATH_CONSTRUCTOR_CANONICAL_NAMES``. It uses the home tilde
|
|
2823
|
+
when its first argument is a literal string beginning with ``~``. A
|
|
2824
|
+
tilde-free or dynamic first argument expands no home directory and returns
|
|
2825
|
+
False, mirroring ``_expanduser_argument_references_home``.
|
|
2826
|
+
|
|
2827
|
+
Args:
|
|
2828
|
+
node: The candidate ``Path(...)`` construction expression.
|
|
2829
|
+
all_canonical_names_by_alias: Import-alias map from
|
|
2830
|
+
``_build_alias_canonicalization_map``.
|
|
2831
|
+
|
|
2832
|
+
Returns:
|
|
2833
|
+
True when *node* constructs a ``pathlib.Path`` from a leading-tilde
|
|
2834
|
+
literal string.
|
|
2835
|
+
"""
|
|
2836
|
+
if not isinstance(node, ast.Call):
|
|
2837
|
+
return False
|
|
2838
|
+
constructor_chain = _dotted_call_attribute_chain(node)
|
|
2839
|
+
if constructor_chain is None:
|
|
2840
|
+
return False
|
|
2841
|
+
canonical_chain = _resolve_chain_through_aliases(
|
|
2842
|
+
constructor_chain, all_canonical_names_by_alias
|
|
2843
|
+
)
|
|
2844
|
+
if canonical_chain not in ALL_PATHLIB_PATH_CONSTRUCTOR_CANONICAL_NAMES:
|
|
2845
|
+
return False
|
|
2846
|
+
return _expanduser_argument_references_home(node)
|
|
2847
|
+
|
|
2848
|
+
|
|
2849
|
+
def _expanduser_method_call_targets_pathlib_path(
|
|
2850
|
+
call_node: ast.Call,
|
|
2851
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
2852
|
+
all_path_local_bindings: set[str],
|
|
2853
|
+
) -> bool:
|
|
2854
|
+
"""Return True for a ``.expanduser()`` call on a home-tilde ``pathlib.Path``.
|
|
2855
|
+
|
|
2856
|
+
``Path.expanduser`` expands the ``~`` bound into the receiver Path, so the
|
|
2857
|
+
call resolves the home directory only when that receiver carries a leading
|
|
2858
|
+
tilde. The receiver carries a tilde when it is a ``pathlib.Path('~...')``
|
|
2859
|
+
construction (directly, aliased, or fully qualified) or a local variable
|
|
2860
|
+
previously bound to such a construction. A tilde-free or dynamic receiver
|
|
2861
|
+
(``Path('/tmp/x').expanduser()`` / ``Path(some_path).expanduser()``)
|
|
2862
|
+
expands no home directory and is not flagged, keeping the form symmetric
|
|
2863
|
+
with ``os.path.expanduser`` argument inspection.
|
|
2864
|
+
|
|
2865
|
+
Args:
|
|
2866
|
+
call_node: The call whose callee attribute is ``expanduser``.
|
|
2867
|
+
all_canonical_names_by_alias: Import-alias map from
|
|
2868
|
+
``_build_alias_canonicalization_map``.
|
|
2869
|
+
all_path_local_bindings: Local names bound to a home-tilde
|
|
2870
|
+
``pathlib.Path`` construction from
|
|
2871
|
+
``_collect_pathlib_path_local_binding_names``.
|
|
2872
|
+
|
|
2873
|
+
Returns:
|
|
2874
|
+
True when the ``expanduser`` receiver resolves to a home-tilde
|
|
2875
|
+
``pathlib.Path``.
|
|
2876
|
+
"""
|
|
2877
|
+
callee = call_node.func
|
|
2878
|
+
if not isinstance(callee, ast.Attribute):
|
|
2879
|
+
return False
|
|
2880
|
+
if callee.attr != PATHLIB_EXPANDUSER_METHOD_NAME:
|
|
2881
|
+
return False
|
|
2882
|
+
receiver = callee.value
|
|
2883
|
+
if isinstance(receiver, ast.Name):
|
|
2884
|
+
return receiver.id in all_path_local_bindings
|
|
2885
|
+
return _pathlib_path_construction_uses_home_tilde(receiver, all_canonical_names_by_alias)
|
|
2886
|
+
|
|
2887
|
+
|
|
2888
|
+
def _attribute_chain_resolves_to_os_environ(
|
|
2889
|
+
node: ast.expr, all_canonical_names_by_alias: dict[str, str],
|
|
2890
|
+
) -> bool:
|
|
2891
|
+
if not isinstance(node, ast.Attribute):
|
|
2892
|
+
return False
|
|
2893
|
+
chain = _dotted_attribute_chain(node)
|
|
2894
|
+
if chain is None:
|
|
2895
|
+
return False
|
|
2896
|
+
canonical_chain = _resolve_chain_through_aliases(
|
|
2897
|
+
chain, all_canonical_names_by_alias
|
|
2898
|
+
)
|
|
2899
|
+
return canonical_chain == OS_ENVIRON_DOTTED_NAME
|
|
2900
|
+
|
|
2901
|
+
|
|
2902
|
+
def _dotted_attribute_chain(attribute_node: ast.Attribute) -> str | None:
|
|
2903
|
+
chain_parts: list[str] = []
|
|
2904
|
+
walker: ast.expr = attribute_node
|
|
2905
|
+
while isinstance(walker, ast.Attribute):
|
|
2906
|
+
chain_parts.append(walker.attr)
|
|
2907
|
+
walker = walker.value
|
|
2908
|
+
if not isinstance(walker, ast.Name):
|
|
2909
|
+
return None
|
|
2910
|
+
chain_parts.append(walker.id)
|
|
2911
|
+
chain_parts.reverse()
|
|
2912
|
+
return ".".join(chain_parts)
|
|
2913
|
+
|
|
2914
|
+
|
|
2915
|
+
def _resolve_chain_through_aliases(
|
|
2916
|
+
chain: str, all_canonical_names_by_alias: dict[str, str],
|
|
2917
|
+
) -> str:
|
|
2918
|
+
"""Rewrite the leading segment of *chain* through the alias map.
|
|
2919
|
+
|
|
2920
|
+
Args:
|
|
2921
|
+
chain: A dotted callee chain such as ``"P.home"``,
|
|
2922
|
+
``"op.expanduser"``, or ``"o.path.expanduser"``.
|
|
2923
|
+
all_canonical_names_by_alias: Local-binding-to-canonical-prefix
|
|
2924
|
+
mapping from ``_build_alias_canonicalization_map``.
|
|
2925
|
+
|
|
2926
|
+
Returns:
|
|
2927
|
+
The chain with its leading segment replaced by the canonical
|
|
2928
|
+
(possibly multi-segment) prefix when a binding matches; otherwise
|
|
2929
|
+
the chain unchanged.
|
|
2930
|
+
"""
|
|
2931
|
+
first_segment, separator, remainder = chain.partition(".")
|
|
2932
|
+
canonical_prefix = all_canonical_names_by_alias.get(first_segment)
|
|
2933
|
+
if canonical_prefix is None:
|
|
2934
|
+
return chain
|
|
2935
|
+
if not separator:
|
|
2936
|
+
return canonical_prefix
|
|
2937
|
+
return f"{canonical_prefix}{separator}{remainder}"
|
|
2938
|
+
|
|
2939
|
+
|
|
2940
|
+
def _expandvars_argument_references_home_or_temp(call_node: ast.Call) -> bool:
|
|
2941
|
+
"""Return True when an ``expandvars`` call expands a home/temp env var.
|
|
2942
|
+
|
|
2943
|
+
Inspects the first string argument for dollar-style ``$NAME`` / ``${NAME}``
|
|
2944
|
+
references and Windows percent-style ``%NAME%`` references, then reports
|
|
2945
|
+
whether any referenced name is a home/temp env var. ``os.path.expandvars``
|
|
2946
|
+
expands percent syntax on Windows, so both forms reach the same home/temp
|
|
2947
|
+
env-var name set. A non-constant or absent argument is treated as not
|
|
2948
|
+
referencing a home/temp variable, mirroring the conservative env-name
|
|
2949
|
+
filtering applied to ``os.getenv``.
|
|
2950
|
+
|
|
2951
|
+
Args:
|
|
2952
|
+
call_node: The ``os.path.expandvars(...)`` call node.
|
|
2953
|
+
|
|
2954
|
+
Returns:
|
|
2955
|
+
True when at least one expanded variable name is in
|
|
2956
|
+
``ALL_HOME_DIRECTORY_ENV_VAR_NAMES``.
|
|
2957
|
+
"""
|
|
2958
|
+
if not call_node.args:
|
|
2959
|
+
return False
|
|
2960
|
+
first_argument = call_node.args[0]
|
|
2961
|
+
if not (
|
|
2962
|
+
isinstance(first_argument, ast.Constant)
|
|
2963
|
+
and isinstance(first_argument.value, str)
|
|
2964
|
+
):
|
|
2965
|
+
return False
|
|
2966
|
+
dollar_style_names = ENVIRONMENT_VARIABLE_REFERENCE_PATTERN.findall(
|
|
2967
|
+
first_argument.value
|
|
2968
|
+
)
|
|
2969
|
+
percent_style_names = WINDOWS_PERCENT_VARIABLE_REFERENCE_PATTERN.findall(
|
|
2970
|
+
first_argument.value
|
|
2971
|
+
)
|
|
2972
|
+
all_referenced_names = dollar_style_names + percent_style_names
|
|
2973
|
+
return any(
|
|
2974
|
+
each_name in ALL_HOME_DIRECTORY_ENV_VAR_NAMES
|
|
2975
|
+
for each_name in all_referenced_names
|
|
2976
|
+
)
|
|
2977
|
+
|
|
2978
|
+
|
|
2979
|
+
def _expanduser_argument_references_home(call_node: ast.Call) -> bool:
|
|
2980
|
+
"""Return True when an ``expanduser`` call expands the home directory.
|
|
2981
|
+
|
|
2982
|
+
``os.path.expanduser`` only substitutes a leading ``~`` (``~`` alone or
|
|
2983
|
+
``~user``); a string without a leading tilde is returned unchanged and
|
|
2984
|
+
never touches HOME. A non-constant or absent argument is treated as not
|
|
2985
|
+
referencing home, mirroring the conservative argument inspection applied
|
|
2986
|
+
to ``expandvars``.
|
|
2987
|
+
|
|
2988
|
+
Args:
|
|
2989
|
+
call_node: The ``os.path.expanduser(...)`` call node.
|
|
2990
|
+
|
|
2991
|
+
Returns:
|
|
2992
|
+
True when the first string argument begins with the home-directory
|
|
2993
|
+
tilde prefix.
|
|
2994
|
+
"""
|
|
2995
|
+
if not call_node.args:
|
|
2996
|
+
return False
|
|
2997
|
+
first_argument = call_node.args[0]
|
|
2998
|
+
if not (
|
|
2999
|
+
isinstance(first_argument, ast.Constant)
|
|
3000
|
+
and isinstance(first_argument.value, str)
|
|
3001
|
+
):
|
|
3002
|
+
return False
|
|
3003
|
+
return first_argument.value.startswith(HOME_DIRECTORY_TILDE_PREFIX)
|
|
3004
|
+
|
|
3005
|
+
|
|
3006
|
+
def _tempfile_factory_call_is_isolated_by_dir(
|
|
3007
|
+
call_node: ast.Call,
|
|
3008
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3009
|
+
all_environ_local_bindings: set[str],
|
|
3010
|
+
) -> bool:
|
|
3011
|
+
"""Return True when a tempfile factory's ``dir=`` sandboxes the allocation.
|
|
3012
|
+
|
|
3013
|
+
A ``dir=`` keyword sandboxes the allocation only when its value is a
|
|
3014
|
+
plausibly isolated path (typically the pytest ``tmp_path`` fixture). A
|
|
3015
|
+
``dir=`` value that resolves to the shared temp directory does not isolate
|
|
3016
|
+
the call and is treated as absent:
|
|
3017
|
+
|
|
3018
|
+
- a constant ``None`` selects the default shared temp directory; and
|
|
3019
|
+
- a shared-temp source — ``os.getenv('TMPDIR'|'TEMP'|'TMP')`` /
|
|
3020
|
+
``os.environ['TMPDIR'|...]`` / ``os.environ.get('TMPDIR'|...)``, or
|
|
3021
|
+
``tempfile.gettempdir()`` / ``tempfile.gettempprefix()`` — returns the
|
|
3022
|
+
shared temp directory.
|
|
3023
|
+
|
|
3024
|
+
Only an explicit ``dir=`` keyword counts; a ``**kwargs`` ``dir`` cannot be
|
|
3025
|
+
resolved statically and is treated as absent, mirroring the conservative
|
|
3026
|
+
argument inspection applied to ``expandvars`` and ``expanduser``.
|
|
3027
|
+
|
|
3028
|
+
Args:
|
|
3029
|
+
call_node: The tempfile factory call node.
|
|
3030
|
+
all_canonical_names_by_alias: Import-alias map used to resolve aliased
|
|
3031
|
+
shared-temp sources passed as the ``dir=`` value.
|
|
3032
|
+
all_environ_local_bindings: Local names bound to ``os.environ`` within
|
|
3033
|
+
the test function, used to recognize aliased ``os.environ`` reads.
|
|
3034
|
+
|
|
3035
|
+
Returns:
|
|
3036
|
+
True when an explicit ``dir=`` keyword is present and its value is not
|
|
3037
|
+
a recognized shared-temp source.
|
|
3038
|
+
"""
|
|
3039
|
+
for each_keyword in call_node.keywords:
|
|
3040
|
+
if each_keyword.arg != TEMPFILE_FACTORY_ISOLATION_DIRECTORY_KEYWORD:
|
|
3041
|
+
continue
|
|
3042
|
+
return not _dir_value_resolves_to_shared_temp(
|
|
3043
|
+
each_keyword.value,
|
|
3044
|
+
all_canonical_names_by_alias,
|
|
3045
|
+
all_environ_local_bindings,
|
|
3046
|
+
)
|
|
3047
|
+
return False
|
|
3048
|
+
|
|
3049
|
+
|
|
3050
|
+
def _dir_value_resolves_to_shared_temp(
|
|
3051
|
+
dir_value: ast.expr,
|
|
3052
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3053
|
+
all_environ_local_bindings: set[str],
|
|
3054
|
+
) -> bool:
|
|
3055
|
+
"""Return True when a tempfile ``dir=`` value points at the shared temp dir.
|
|
3056
|
+
|
|
3057
|
+
Args:
|
|
3058
|
+
dir_value: The expression supplied as the factory's ``dir=`` value.
|
|
3059
|
+
all_canonical_names_by_alias: Import-alias map used to resolve aliased
|
|
3060
|
+
``os.getenv`` / ``os.environ`` / ``tempfile`` references.
|
|
3061
|
+
all_environ_local_bindings: Local names bound to ``os.environ`` within
|
|
3062
|
+
the test function.
|
|
3063
|
+
|
|
3064
|
+
Returns:
|
|
3065
|
+
True when the value is a constant ``None`` or a recognized shared-temp
|
|
3066
|
+
source that yields the default shared temp directory.
|
|
3067
|
+
"""
|
|
3068
|
+
if isinstance(dir_value, ast.Constant) and dir_value.value is None:
|
|
3069
|
+
return True
|
|
3070
|
+
if isinstance(dir_value, ast.Call):
|
|
3071
|
+
environ_key = _environ_key_string_from_call(
|
|
3072
|
+
dir_value, all_canonical_names_by_alias, all_environ_local_bindings
|
|
3073
|
+
)
|
|
3074
|
+
if environ_key in ALL_HOME_DIRECTORY_ENV_VAR_NAMES:
|
|
3075
|
+
return True
|
|
3076
|
+
raw_chain = _dotted_call_attribute_chain(dir_value)
|
|
3077
|
+
if raw_chain is None:
|
|
3078
|
+
return False
|
|
3079
|
+
canonical_chain = _resolve_chain_through_aliases(
|
|
3080
|
+
raw_chain, all_canonical_names_by_alias
|
|
3081
|
+
)
|
|
3082
|
+
return canonical_chain in ALL_SHARED_TEMP_SOURCE_PROBE_DOTTED_NAMES
|
|
3083
|
+
if isinstance(dir_value, ast.Subscript):
|
|
3084
|
+
environ_key = _environ_key_string_from_subscript(
|
|
3085
|
+
dir_value, all_canonical_names_by_alias, all_environ_local_bindings
|
|
3086
|
+
)
|
|
3087
|
+
return environ_key in ALL_HOME_DIRECTORY_ENV_VAR_NAMES
|
|
3088
|
+
return False
|
|
3089
|
+
|
|
3090
|
+
|
|
3091
|
+
def _environ_key_string_from_call(
|
|
3092
|
+
call_node: ast.Call,
|
|
3093
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3094
|
+
all_environ_local_bindings: set[str],
|
|
3095
|
+
) -> str | None:
|
|
3096
|
+
if not _call_is_environment_getter(call_node, all_canonical_names_by_alias, all_environ_local_bindings):
|
|
3097
|
+
return None
|
|
3098
|
+
if not call_node.args:
|
|
3099
|
+
return None
|
|
3100
|
+
first_argument = call_node.args[0]
|
|
3101
|
+
if isinstance(first_argument, ast.Constant) and isinstance(first_argument.value, str):
|
|
3102
|
+
return first_argument.value
|
|
3103
|
+
return None
|
|
3104
|
+
|
|
3105
|
+
|
|
3106
|
+
def _call_is_environment_getter(
|
|
3107
|
+
call_node: ast.Call,
|
|
3108
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3109
|
+
all_environ_local_bindings: set[str],
|
|
3110
|
+
) -> bool:
|
|
3111
|
+
"""Return True when *call_node* reads an env var via a recognized getter.
|
|
3112
|
+
|
|
3113
|
+
Recognizes the canonical ``os.getenv(...)`` / ``os.environ.get(...)``
|
|
3114
|
+
chains and the local-alias ``e.get(...)`` form where ``e`` is a name in
|
|
3115
|
+
*all_environ_local_bindings* (a binding to ``os.environ`` collected from
|
|
3116
|
+
the same test function).
|
|
3117
|
+
|
|
3118
|
+
Args:
|
|
3119
|
+
call_node: The call to inspect.
|
|
3120
|
+
all_canonical_names_by_alias: Import-alias map from
|
|
3121
|
+
``_build_alias_canonicalization_map``.
|
|
3122
|
+
all_environ_local_bindings: Local names bound to ``os.environ`` within
|
|
3123
|
+
the test function being analyzed.
|
|
3124
|
+
|
|
3125
|
+
Returns:
|
|
3126
|
+
True when the call is an environment getter whose key argument is
|
|
3127
|
+
worth inspecting.
|
|
3128
|
+
"""
|
|
3129
|
+
if _call_targets_local_environ_get(call_node, all_environ_local_bindings):
|
|
3130
|
+
return True
|
|
3131
|
+
raw_chain = _dotted_call_attribute_chain(call_node)
|
|
3132
|
+
if raw_chain is None:
|
|
3133
|
+
return False
|
|
3134
|
+
canonical_chain = _resolve_chain_through_aliases(raw_chain, all_canonical_names_by_alias)
|
|
3135
|
+
return canonical_chain in ALL_ENVIRONMENT_GETTER_DOTTED_NAMES
|
|
3136
|
+
|
|
3137
|
+
|
|
3138
|
+
def _call_targets_local_environ_get(
|
|
3139
|
+
call_node: ast.Call, all_environ_local_bindings: set[str],
|
|
3140
|
+
) -> bool:
|
|
3141
|
+
callee = call_node.func
|
|
3142
|
+
if not isinstance(callee, ast.Attribute):
|
|
3143
|
+
return False
|
|
3144
|
+
if callee.attr != ENVIRON_GET_METHOD_NAME:
|
|
3145
|
+
return False
|
|
3146
|
+
receiver = callee.value
|
|
3147
|
+
return isinstance(receiver, ast.Name) and receiver.id in all_environ_local_bindings
|
|
3148
|
+
|
|
3149
|
+
|
|
3150
|
+
def _environ_key_string_from_subscript(
|
|
3151
|
+
subscript_node: ast.Subscript,
|
|
3152
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3153
|
+
all_environ_local_bindings: set[str],
|
|
3154
|
+
) -> str | None:
|
|
3155
|
+
if not _subscript_target_is_os_environ(
|
|
3156
|
+
subscript_node.value, all_canonical_names_by_alias, all_environ_local_bindings
|
|
3157
|
+
):
|
|
3158
|
+
return None
|
|
3159
|
+
key_node = subscript_node.slice
|
|
3160
|
+
if isinstance(key_node, ast.Constant) and isinstance(key_node.value, str):
|
|
3161
|
+
return key_node.value
|
|
3162
|
+
return None
|
|
3163
|
+
|
|
3164
|
+
|
|
3165
|
+
def _subscript_target_is_os_environ(
|
|
3166
|
+
target_node: ast.expr,
|
|
3167
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3168
|
+
all_environ_local_bindings: set[str],
|
|
3169
|
+
) -> bool:
|
|
3170
|
+
if isinstance(target_node, ast.Name):
|
|
3171
|
+
if target_node.id in all_environ_local_bindings:
|
|
3172
|
+
return True
|
|
3173
|
+
return all_canonical_names_by_alias.get(target_node.id) == OS_ENVIRON_DOTTED_NAME
|
|
3174
|
+
if isinstance(target_node, ast.Attribute):
|
|
3175
|
+
return _attribute_chain_resolves_to_os_environ(target_node, all_canonical_names_by_alias)
|
|
3176
|
+
return False
|
|
3177
|
+
|
|
3178
|
+
|
|
3179
|
+
def _collect_pytest_collectable_test_functions(
|
|
3180
|
+
syntax_tree: ast.Module,
|
|
3181
|
+
) -> list[ast.FunctionDef | ast.AsyncFunctionDef]:
|
|
3182
|
+
"""Enumerate the function nodes pytest would actually collect as tests.
|
|
3183
|
+
|
|
3184
|
+
Walks module-level statements and the top-level methods of module-level
|
|
3185
|
+
classes only. Functions nested inside other functions or lambdas are
|
|
3186
|
+
excluded because pytest does not collect nested callables. Module-level
|
|
3187
|
+
classes whose name does not start with the
|
|
3188
|
+
``PYTEST_TEST_CLASS_NAME_PREFIX`` (``Test``) are skipped because the
|
|
3189
|
+
repo's ``pytest.ini`` declares ``python_classes = Test*``; methods on
|
|
3190
|
+
non-``Test*`` helper classes are never collected by pytest.
|
|
3191
|
+
"""
|
|
3192
|
+
collectable: list[ast.FunctionDef | ast.AsyncFunctionDef] = []
|
|
3193
|
+
for each_module_statement in syntax_tree.body:
|
|
3194
|
+
if isinstance(each_module_statement, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
3195
|
+
if (
|
|
3196
|
+
each_module_statement.name.startswith("test_")
|
|
3197
|
+
or each_module_statement.name.startswith("should_")
|
|
3198
|
+
):
|
|
3199
|
+
collectable.append(each_module_statement)
|
|
3200
|
+
elif isinstance(each_module_statement, ast.ClassDef):
|
|
3201
|
+
if not each_module_statement.name.startswith(PYTEST_TEST_CLASS_NAME_PREFIX):
|
|
3202
|
+
continue
|
|
3203
|
+
for each_class_member in each_module_statement.body:
|
|
3204
|
+
if isinstance(each_class_member, (ast.FunctionDef, ast.AsyncFunctionDef)) and (
|
|
3205
|
+
each_class_member.name.startswith("test_")
|
|
3206
|
+
or each_class_member.name.startswith("should_")
|
|
3207
|
+
):
|
|
3208
|
+
collectable.append(each_class_member)
|
|
3209
|
+
return collectable
|
|
3210
|
+
|
|
3211
|
+
|
|
3212
|
+
def _detect_home_or_temp_probes_in_body(
|
|
3213
|
+
function_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
3214
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3215
|
+
all_environ_local_bindings: set[str],
|
|
3216
|
+
all_path_local_bindings: set[str],
|
|
3217
|
+
) -> list[tuple[int, str]]:
|
|
3218
|
+
"""Yield ``(line, probe_label)`` pairs for HOME/TMP probes in *function_node*.
|
|
3219
|
+
|
|
3220
|
+
The walk descends into ``ClassDef`` nodes nested inside the test body and
|
|
3221
|
+
into their class-level statements. Class-level statements (class attribute
|
|
3222
|
+
initializers) run at class-creation time as the ``class`` statement
|
|
3223
|
+
executes during the test, so a probe in an initializer such as ``root =
|
|
3224
|
+
Path.home()`` is on the test's runtime path and is reported. A method of a
|
|
3225
|
+
nested class is a callable-scope boundary: Python does not run a method
|
|
3226
|
+
just because its class is defined, so the walk does not descend into method
|
|
3227
|
+
bodies. Standalone nested helper functions and lambdas defined anywhere are
|
|
3228
|
+
likewise scope boundaries — each runs in its own callable scope and carries
|
|
3229
|
+
its own isolation contract. Probes that genuinely execute on the test path
|
|
3230
|
+
(top-level statements and class-level initializers) are still detected.
|
|
3231
|
+
|
|
3232
|
+
Args:
|
|
3233
|
+
function_node: The test function whose body is being scanned.
|
|
3234
|
+
all_canonical_names_by_alias: Local-binding-to-canonical-prefix mapping used to resolve
|
|
3235
|
+
aliased imports before probe membership checks.
|
|
3236
|
+
all_environ_local_bindings: Local names bound to ``os.environ`` (scoped
|
|
3237
|
+
to *function_node*) used to attribute subscript and ``.get(...)``
|
|
3238
|
+
reads to a HOME/TMP env probe.
|
|
3239
|
+
all_path_local_bindings: Local names bound to a ``pathlib.Path``
|
|
3240
|
+
construction (scoped to *function_node*) used to attribute a
|
|
3241
|
+
``.expanduser()`` method call to a home-directory probe.
|
|
3242
|
+
|
|
3243
|
+
Returns:
|
|
3244
|
+
A list of ``(line_number, probe_label)`` tuples for each HOME/TMP
|
|
3245
|
+
probe attributed to the test, in stack-pop order.
|
|
3246
|
+
"""
|
|
3247
|
+
probes: list[tuple[int, str]] = []
|
|
3248
|
+
for each_descendant in _descend_within_test_scope(function_node):
|
|
3249
|
+
_record_home_or_temp_probe(
|
|
3250
|
+
each_descendant,
|
|
3251
|
+
probes,
|
|
3252
|
+
all_canonical_names_by_alias,
|
|
3253
|
+
all_environ_local_bindings,
|
|
3254
|
+
all_path_local_bindings,
|
|
3255
|
+
)
|
|
3256
|
+
probes.sort(key=lambda each_probe: each_probe[0])
|
|
3257
|
+
return probes
|
|
3258
|
+
|
|
3259
|
+
|
|
3260
|
+
def _record_home_or_temp_probe(
|
|
3261
|
+
node: ast.AST,
|
|
3262
|
+
all_probes: list[tuple[int, str]],
|
|
3263
|
+
all_canonical_names_by_alias: dict[str, str],
|
|
3264
|
+
all_environ_local_bindings: set[str],
|
|
3265
|
+
all_path_local_bindings: set[str],
|
|
3266
|
+
) -> None:
|
|
3267
|
+
if isinstance(node, ast.Call):
|
|
3268
|
+
if _expanduser_method_call_targets_pathlib_path(
|
|
3269
|
+
node, all_canonical_names_by_alias, all_path_local_bindings
|
|
3270
|
+
):
|
|
3271
|
+
all_probes.append((node.lineno, f"Path.{PATHLIB_EXPANDUSER_METHOD_NAME}()"))
|
|
3272
|
+
return
|
|
3273
|
+
raw_chain = _dotted_call_attribute_chain(node)
|
|
3274
|
+
if raw_chain is None:
|
|
3275
|
+
return
|
|
3276
|
+
canonical_chain = _resolve_chain_through_aliases(raw_chain, all_canonical_names_by_alias)
|
|
3277
|
+
if canonical_chain == EXPANDVARS_DOTTED_NAME:
|
|
3278
|
+
if _expandvars_argument_references_home_or_temp(node):
|
|
3279
|
+
all_probes.append((node.lineno, f"{canonical_chain}()"))
|
|
3280
|
+
return
|
|
3281
|
+
if canonical_chain == EXPANDUSER_DOTTED_NAME:
|
|
3282
|
+
if _expanduser_argument_references_home(node):
|
|
3283
|
+
all_probes.append((node.lineno, f"{canonical_chain}()"))
|
|
3284
|
+
return
|
|
3285
|
+
if canonical_chain in ALL_PATHLIB_STATIC_EXPANDUSER_DOTTED_NAMES:
|
|
3286
|
+
if _expanduser_argument_references_home(node):
|
|
3287
|
+
all_probes.append((node.lineno, f"{canonical_chain}()"))
|
|
3288
|
+
return
|
|
3289
|
+
if canonical_chain in ALL_FILESYSTEM_HOME_PROBE_DOTTED_NAMES:
|
|
3290
|
+
if (
|
|
3291
|
+
canonical_chain in ALL_DIR_ACCEPTING_TEMPFILE_FACTORY_DOTTED_NAMES
|
|
3292
|
+
and _tempfile_factory_call_is_isolated_by_dir(
|
|
3293
|
+
node, all_canonical_names_by_alias, all_environ_local_bindings
|
|
3294
|
+
)
|
|
3295
|
+
):
|
|
3296
|
+
return
|
|
3297
|
+
all_probes.append((node.lineno, f"{canonical_chain}()"))
|
|
3298
|
+
return
|
|
3299
|
+
environ_key = _environ_key_string_from_call(
|
|
3300
|
+
node, all_canonical_names_by_alias, all_environ_local_bindings
|
|
3301
|
+
)
|
|
3302
|
+
if environ_key in ALL_HOME_DIRECTORY_ENV_VAR_NAMES:
|
|
3303
|
+
all_probes.append((node.lineno, f"os env probe '{environ_key}'"))
|
|
3304
|
+
return
|
|
3305
|
+
if isinstance(node, ast.Subscript):
|
|
3306
|
+
environ_key = _environ_key_string_from_subscript(
|
|
3307
|
+
node, all_canonical_names_by_alias, all_environ_local_bindings
|
|
3308
|
+
)
|
|
3309
|
+
if environ_key in ALL_HOME_DIRECTORY_ENV_VAR_NAMES:
|
|
3310
|
+
all_probes.append((node.lineno, f"os.environ['{environ_key}']"))
|
|
3311
|
+
|
|
3312
|
+
|
|
3313
|
+
def _children_to_descend_into(node: ast.AST) -> list[ast.AST]:
|
|
3314
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
|
|
3315
|
+
return []
|
|
3316
|
+
if isinstance(node, ast.ClassDef):
|
|
3317
|
+
return list(node.body)
|
|
3318
|
+
return list(ast.iter_child_nodes(node))
|
|
3319
|
+
|
|
3320
|
+
|
|
3321
|
+
def _descend_within_test_scope(
|
|
3322
|
+
function_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
3323
|
+
) -> Iterator[ast.AST]:
|
|
3324
|
+
"""Yield every descendant of *function_node* on the test's own runtime path.
|
|
3325
|
+
|
|
3326
|
+
Bounded traversal that shares ``_children_to_descend_into`` so every caller
|
|
3327
|
+
treats the same nodes as in scope. Nested function definitions, methods, and
|
|
3328
|
+
lambdas are scope boundaries — Python does not run a callable's body just
|
|
3329
|
+
because the callable (or its enclosing class) is defined, so a binding or
|
|
3330
|
+
probe inside one does not leak onto the test's runtime path. Nested
|
|
3331
|
+
``ClassDef`` bodies stay in scope because their class-creation statements
|
|
3332
|
+
(class attribute initializers) run as the ``class`` statement executes
|
|
3333
|
+
during the test; descent stops at the methods declared in that class body.
|
|
3334
|
+
|
|
3335
|
+
Args:
|
|
3336
|
+
function_node: The test function whose in-scope descendants to yield.
|
|
3337
|
+
|
|
3338
|
+
Yields:
|
|
3339
|
+
Each descendant node within the test's bounded scope, in stack-pop
|
|
3340
|
+
order.
|
|
3341
|
+
"""
|
|
3342
|
+
nodes_to_visit: list[ast.AST] = list(ast.iter_child_nodes(function_node))
|
|
3343
|
+
while nodes_to_visit:
|
|
3344
|
+
each_descendant = nodes_to_visit.pop()
|
|
3345
|
+
yield each_descendant
|
|
3346
|
+
nodes_to_visit.extend(_children_to_descend_into(each_descendant))
|
|
3347
|
+
|
|
3348
|
+
|
|
3349
|
+
def _usefixtures_decorator_requests_isolation_fixture(decorator_node: ast.expr) -> bool:
|
|
3350
|
+
"""Report whether a decorator is ``usefixtures`` requesting an isolation fixture.
|
|
3351
|
+
|
|
3352
|
+
Recognizes ``@pytest.mark.usefixtures("monkeypatch")`` and the
|
|
3353
|
+
``@mark.usefixtures("monkeypatch")`` short form: an ``ast.Call`` whose callee
|
|
3354
|
+
attribute chain ends in ``usefixtures`` and whose string-constant arguments
|
|
3355
|
+
include any name in ``ALL_PYTEST_FILESYSTEM_ISOLATION_FIXTURE_NAMES``.
|
|
3356
|
+
|
|
3357
|
+
Args:
|
|
3358
|
+
decorator_node: A single decorator expression from a test's decorator list.
|
|
3359
|
+
|
|
3360
|
+
Returns:
|
|
3361
|
+
True when the decorator injects an isolation fixture by name.
|
|
3362
|
+
"""
|
|
3363
|
+
if not isinstance(decorator_node, ast.Call):
|
|
3364
|
+
return False
|
|
3365
|
+
if not isinstance(decorator_node.func, ast.Attribute):
|
|
3366
|
+
return False
|
|
3367
|
+
callee_chain = _dotted_attribute_chain(decorator_node.func)
|
|
3368
|
+
if callee_chain is None:
|
|
3369
|
+
return False
|
|
3370
|
+
if not callee_chain.endswith(PYTEST_USEFIXTURES_MARKER_NAME):
|
|
3371
|
+
return False
|
|
3372
|
+
for each_argument in decorator_node.args:
|
|
3373
|
+
if (
|
|
3374
|
+
isinstance(each_argument, ast.Constant)
|
|
3375
|
+
and isinstance(each_argument.value, str)
|
|
3376
|
+
and each_argument.value in ALL_PYTEST_FILESYSTEM_ISOLATION_FIXTURE_NAMES
|
|
3377
|
+
):
|
|
3378
|
+
return True
|
|
3379
|
+
return False
|
|
3380
|
+
|
|
3381
|
+
|
|
3382
|
+
def _function_uses_pytest_isolation_fixture(
|
|
3383
|
+
function_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
3384
|
+
) -> bool:
|
|
3385
|
+
for each_argument in function_node.args.posonlyargs:
|
|
3386
|
+
if each_argument.arg in ALL_PYTEST_FILESYSTEM_ISOLATION_FIXTURE_NAMES:
|
|
3387
|
+
return True
|
|
3388
|
+
for each_argument in function_node.args.args:
|
|
3389
|
+
if each_argument.arg in ALL_PYTEST_FILESYSTEM_ISOLATION_FIXTURE_NAMES:
|
|
3390
|
+
return True
|
|
3391
|
+
for each_argument in function_node.args.kwonlyargs:
|
|
3392
|
+
if each_argument.arg in ALL_PYTEST_FILESYSTEM_ISOLATION_FIXTURE_NAMES:
|
|
3393
|
+
return True
|
|
3394
|
+
for each_decorator in function_node.decorator_list:
|
|
3395
|
+
if _usefixtures_decorator_requests_isolation_fixture(each_decorator):
|
|
3396
|
+
return True
|
|
3397
|
+
return False
|
|
3398
|
+
|
|
3399
|
+
|
|
3400
|
+
def check_tests_use_isolated_filesystem_paths(
|
|
3401
|
+
content: str,
|
|
3402
|
+
file_path: str,
|
|
3403
|
+
all_changed_lines: set[int] | None = None,
|
|
3404
|
+
defer_scope_to_caller: bool = False,
|
|
3405
|
+
) -> list[str]:
|
|
3406
|
+
"""Flag test functions that probe HOME or TMP without pytest isolation.
|
|
3407
|
+
|
|
3408
|
+
Pattern class: tests that call ``Path.home()``, ``os.path.expanduser('~')``,
|
|
3409
|
+
``os.getenv('HOME'|'USERPROFILE'|'TMPDIR'|…)``, ``os.environ['HOME'|…]``, or
|
|
3410
|
+
``tempfile.gettempdir()`` against the real environment leak state across
|
|
3411
|
+
the suite and surface as environment-coupled bugs (audit Theme M).
|
|
3412
|
+
|
|
3413
|
+
Test functions whose signatures take ``monkeypatch`` are treated as
|
|
3414
|
+
intentionally isolated and pass — ``monkeypatch.setenv('HOME', ...)``
|
|
3415
|
+
can intercept every env-derived probe, and this suppression applies
|
|
3416
|
+
uniformly to every probe type below. ``tmp_path`` / ``tmp_path_factory``
|
|
3417
|
+
/ ``tmpdir`` / ``tmpdir_factory`` allocate alternative sandbox paths but
|
|
3418
|
+
do not intercept env reads, so their presence alone does not suppress
|
|
3419
|
+
the check. Module-level helpers and fixtures (any function whose name
|
|
3420
|
+
does not start with ``test_`` or ``should_``) are out of scope — only
|
|
3421
|
+
pytest-collectable ``def test_*`` / ``async def test_*`` / ``def
|
|
3422
|
+
should_*`` module-level or class-method functions are scanned.
|
|
3423
|
+
|
|
3424
|
+
Covered forms (API surface × access form):
|
|
3425
|
+
Probe API surfaces — ``pathlib.Path.home()``,
|
|
3426
|
+
``pathlib.Path('~...').expanduser()``, ``os.path.expanduser(arg)``,
|
|
3427
|
+
``os.path.expandvars(arg)``, ``os.getenv(name)``,
|
|
3428
|
+
``os.environ[name]``, ``os.environ.get(name)``, and the ``tempfile``
|
|
3429
|
+
allocators (``gettempdir``, ``gettempdirb``, ``gettempprefix``,
|
|
3430
|
+
``mkstemp``, ``mkdtemp``, ``mktemp``, ``NamedTemporaryFile``,
|
|
3431
|
+
``TemporaryFile``, ``TemporaryDirectory``, ``SpooledTemporaryFile``).
|
|
3432
|
+
Each surface is recognized through four access forms: (1) canonical
|
|
3433
|
+
dotted (``os.path.expanduser``), (2) module-level ``from X import
|
|
3434
|
+
name`` bare use (``from os import environ; environ['HOME']``),
|
|
3435
|
+
(3) module-level aliased import (``import tempfile as tf;
|
|
3436
|
+
tf.mkdtemp()``), and (4) a function-local binding tracked per test —
|
|
3437
|
+
either a function-local import (``def t(): from os import environ;
|
|
3438
|
+
environ['HOME']``) or a local rebinding (``path_class = Path;
|
|
3439
|
+
path_class.home()``; ``read_env = os.getenv; read_env('HOME')``). A
|
|
3440
|
+
function-local binding never leaks into a sibling test, so a same-named
|
|
3441
|
+
bare reference in another test that lacks its own binding does not fire.
|
|
3442
|
+
Gating is symmetric across the two ``expanduser`` forms (flag only on a
|
|
3443
|
+
leading-``~`` literal) and across the env getters / subscript (flag only
|
|
3444
|
+
on a home/temp env-var name). Probes are reported in source-line order
|
|
3445
|
+
for every probe type.
|
|
3446
|
+
|
|
3447
|
+
Out of scope by design (dynamically constructed call targets that no
|
|
3448
|
+
AST-level pattern can resolve statically): attribute access through
|
|
3449
|
+
``getattr(os, 'environ')``, callable names assembled at runtime by
|
|
3450
|
+
string concatenation, and calls built through ``exec``/``eval``. These
|
|
3451
|
+
bound the detector to a fixed, documented surface rather than an
|
|
3452
|
+
open-ended chase.
|
|
3453
|
+
|
|
3454
|
+
Args:
|
|
3455
|
+
content: The Python source to analyze.
|
|
3456
|
+
file_path: The path of the file being checked. The check only fires
|
|
3457
|
+
on test files.
|
|
3458
|
+
all_changed_lines: Post-edit line numbers the current edit touched, or
|
|
3459
|
+
None to treat the whole file as in scope. When provided, a probe
|
|
3460
|
+
blocks when any line of its enclosing test function's declared span
|
|
3461
|
+
(signature line through last body line) is among the changed lines,
|
|
3462
|
+
so editing the signature to remove an isolation fixture brings an
|
|
3463
|
+
unchanged-body probe into scope.
|
|
3464
|
+
defer_scope_to_caller: When True, return every probe so the commit/push
|
|
3465
|
+
gate's ``split_violations_by_scope`` can scope by added line and
|
|
3466
|
+
report the in-scope set.
|
|
3467
|
+
|
|
3468
|
+
Returns:
|
|
3469
|
+
A list of issue strings naming each offending probe call. When
|
|
3470
|
+
*defer_scope_to_caller* is True every probe is returned for the gate to
|
|
3471
|
+
scope; otherwise every probe in scope is returned.
|
|
3472
|
+
"""
|
|
3473
|
+
if not is_test_file(file_path):
|
|
3474
|
+
return []
|
|
3475
|
+
|
|
3476
|
+
try:
|
|
3477
|
+
syntax_tree = ast.parse(content)
|
|
3478
|
+
except SyntaxError:
|
|
3479
|
+
return []
|
|
3480
|
+
|
|
3481
|
+
all_module_canonical_names_by_alias = _build_alias_canonicalization_map(syntax_tree)
|
|
3482
|
+
all_violations_in_source_line_order: list[tuple[range, str]] = []
|
|
3483
|
+
for each_node in _collect_pytest_collectable_test_functions(syntax_tree):
|
|
3484
|
+
if _function_uses_pytest_isolation_fixture(each_node):
|
|
3485
|
+
continue
|
|
3486
|
+
all_canonical_names_by_alias = {
|
|
3487
|
+
**all_module_canonical_names_by_alias,
|
|
3488
|
+
**_collect_local_probe_alias_bindings(each_node, all_module_canonical_names_by_alias),
|
|
3489
|
+
}
|
|
3490
|
+
all_environ_local_bindings = _collect_os_environ_local_binding_names(each_node, all_canonical_names_by_alias)
|
|
3491
|
+
all_path_local_bindings = _collect_pathlib_path_local_binding_names(each_node, all_canonical_names_by_alias)
|
|
3492
|
+
line_span = _function_definition_line_span(each_node)
|
|
3493
|
+
enclosing_function_span = range(each_node.lineno, each_node.lineno + line_span)
|
|
3494
|
+
for each_line, each_probe_label in _detect_home_or_temp_probes_in_body(
|
|
3495
|
+
each_node, all_canonical_names_by_alias, all_environ_local_bindings, all_path_local_bindings
|
|
3496
|
+
):
|
|
3497
|
+
message = (
|
|
3498
|
+
f"Line {each_line}: Test {each_node.name!r} "
|
|
3499
|
+
f"(defined at line {each_node.lineno}, spanning {line_span} lines) "
|
|
3500
|
+
f"probes {each_probe_label} - {TEST_ISOLATION_MESSAGE_SUFFIX}"
|
|
3501
|
+
)
|
|
3502
|
+
all_violations_in_source_line_order.append(
|
|
3503
|
+
(enclosing_function_span, message)
|
|
3504
|
+
)
|
|
3505
|
+
return _scope_violations_to_changed_lines(
|
|
3506
|
+
all_violations_in_source_line_order,
|
|
3507
|
+
all_changed_lines,
|
|
3508
|
+
defer_scope_to_caller,
|
|
3509
|
+
)
|
|
3510
|
+
|
|
3511
|
+
|
|
2231
3512
|
def _collect_assert_nodes_bounded(node: ast.AST) -> list[ast.Assert]:
|
|
2232
3513
|
"""Collect Assert nodes under node without crossing scope boundaries.
|
|
2233
3514
|
|
|
@@ -3982,10 +5263,10 @@ def check_loop_variable_naming(content: str, file_path: str) -> list[str]:
|
|
|
3982
5263
|
except SyntaxError:
|
|
3983
5264
|
return []
|
|
3984
5265
|
issues: list[str] = []
|
|
3985
|
-
for
|
|
3986
|
-
if not isinstance(
|
|
5266
|
+
for each_node in ast.walk(tree):
|
|
5267
|
+
if not isinstance(each_node, (ast.For, ast.AsyncFor)):
|
|
3987
5268
|
continue
|
|
3988
|
-
for each_name_node in _collect_target_names(
|
|
5269
|
+
for each_name_node in _collect_target_names(each_node.target):
|
|
3989
5270
|
target_name = each_name_node.id
|
|
3990
5271
|
if target_name in ALL_LOOP_INDEX_LETTER_EXEMPTIONS:
|
|
3991
5272
|
continue
|
|
@@ -4046,11 +5327,180 @@ def check_return_annotations(content: str, file_path: str) -> list[str]:
|
|
|
4046
5327
|
return issues
|
|
4047
5328
|
|
|
4048
5329
|
|
|
5330
|
+
def _function_definition_line_span(
|
|
5331
|
+
function_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
5332
|
+
) -> int:
|
|
5333
|
+
end_lineno = getattr(function_node, "end_lineno", None) or function_node.lineno
|
|
5334
|
+
return end_lineno - function_node.lineno + 1
|
|
5335
|
+
|
|
5336
|
+
|
|
5337
|
+
def changed_line_numbers(prior_content: str, post_edit_content: str) -> set[int]:
|
|
5338
|
+
"""Return the post-edit line numbers an edit added or replaced.
|
|
5339
|
+
|
|
5340
|
+
Runs a line-level diff of *prior_content* against *post_edit_content* and
|
|
5341
|
+
collects the 1-indexed line numbers in *post_edit_content* that fall inside
|
|
5342
|
+
a ``replace`` or ``insert`` opcode. This mirrors the "added lines" notion
|
|
5343
|
+
that ``code_rules_gate.parse_added_line_numbers`` derives from
|
|
5344
|
+
``git diff --unified=0``, so the PreToolUse layer and the gate agree on
|
|
5345
|
+
which lines the change touched.
|
|
5346
|
+
|
|
5347
|
+
Args:
|
|
5348
|
+
prior_content: The file content before the edit.
|
|
5349
|
+
post_edit_content: The reconstructed file content after the edit.
|
|
5350
|
+
|
|
5351
|
+
Returns:
|
|
5352
|
+
The set of 1-indexed line numbers in *post_edit_content* that the edit
|
|
5353
|
+
added or replaced.
|
|
5354
|
+
"""
|
|
5355
|
+
matcher = difflib.SequenceMatcher(
|
|
5356
|
+
a=prior_content.splitlines(),
|
|
5357
|
+
b=post_edit_content.splitlines(),
|
|
5358
|
+
autojunk=False,
|
|
5359
|
+
)
|
|
5360
|
+
all_changed_lines: set[int] = set()
|
|
5361
|
+
for each_tag, _, _, each_post_start, each_post_end in matcher.get_opcodes():
|
|
5362
|
+
if each_tag in ALL_DIFF_CHANGED_OPCODE_TAGS:
|
|
5363
|
+
for each_post_index in range(each_post_start, each_post_end):
|
|
5364
|
+
all_changed_lines.add(each_post_index + 1)
|
|
5365
|
+
return all_changed_lines
|
|
5366
|
+
|
|
5367
|
+
|
|
5368
|
+
def _scope_violations_to_changed_lines(
|
|
5369
|
+
all_violations_in_walk_order: list[tuple[range, str]],
|
|
5370
|
+
all_changed_lines: set[int] | None,
|
|
5371
|
+
defer_scope_to_caller: bool = False,
|
|
5372
|
+
) -> list[str]:
|
|
5373
|
+
"""Scope span-tagged violations by diff intersection.
|
|
5374
|
+
|
|
5375
|
+
In-scope violations are always reported; the untouched out-of-scope set is
|
|
5376
|
+
surfaced or dropped according to which caller path is active:
|
|
5377
|
+
|
|
5378
|
+
- ``defer_scope_to_caller`` True (the commit/push gate): every violation is
|
|
5379
|
+
returned in walk order so the gate's ``split_violations_by_scope`` can
|
|
5380
|
+
classify blocking vs advisory by added line. The gate does this scoping,
|
|
5381
|
+
so no scoping happens here.
|
|
5382
|
+
- ``all_changed_lines`` None (a terminal new-file or full-file write): every
|
|
5383
|
+
line was just authored, so every violation is in scope and returned.
|
|
5384
|
+
- ``all_changed_lines`` provided (a terminal diff-scoped Edit): only the
|
|
5385
|
+
in-scope violations whose span intersects the changed lines are returned;
|
|
5386
|
+
the untouched out-of-scope set is dropped, because untouched code must not
|
|
5387
|
+
block a single-file edit.
|
|
5388
|
+
|
|
5389
|
+
Args:
|
|
5390
|
+
all_violations_in_walk_order: ``(span_range, issue_message)`` pairs in
|
|
5391
|
+
``ast.walk`` traversal order, where ``span_range`` covers the
|
|
5392
|
+
violation's source lines.
|
|
5393
|
+
all_changed_lines: Post-edit line numbers the current edit touched, or
|
|
5394
|
+
None to treat every violation as in-scope.
|
|
5395
|
+
defer_scope_to_caller: When True, return every violation message in walk
|
|
5396
|
+
order so the gate scopes by added line. When False, this enforcer is
|
|
5397
|
+
terminal and scopes directly.
|
|
5398
|
+
|
|
5399
|
+
Returns:
|
|
5400
|
+
Every violation message when *defer_scope_to_caller* is True or
|
|
5401
|
+
*all_changed_lines* is None; otherwise only the in-scope messages whose
|
|
5402
|
+
span intersects the changed lines — so an edit that grows a function
|
|
5403
|
+
past the threshold always blocks even when many earlier untouched
|
|
5404
|
+
functions already exceed it.
|
|
5405
|
+
"""
|
|
5406
|
+
if defer_scope_to_caller:
|
|
5407
|
+
return [each_message for _, each_message in all_violations_in_walk_order]
|
|
5408
|
+
if all_changed_lines is None:
|
|
5409
|
+
return [each_message for _, each_message in all_violations_in_walk_order]
|
|
5410
|
+
return [
|
|
5411
|
+
each_message
|
|
5412
|
+
for each_span, each_message in all_violations_in_walk_order
|
|
5413
|
+
if any(each_line in all_changed_lines for each_line in each_span)
|
|
5414
|
+
]
|
|
5415
|
+
|
|
5416
|
+
|
|
5417
|
+
def check_function_length(
|
|
5418
|
+
content: str,
|
|
5419
|
+
file_path: str,
|
|
5420
|
+
all_changed_lines: set[int] | None = None,
|
|
5421
|
+
defer_scope_to_caller: bool = False,
|
|
5422
|
+
) -> list[str]:
|
|
5423
|
+
"""Flag functions whose definition span exceeds cognitive-load thresholds.
|
|
5424
|
+
|
|
5425
|
+
Function definition spans (signature line through last body statement,
|
|
5426
|
+
inclusive) at or above ``FUNCTION_LENGTH_BLOCKING_THRESHOLD`` (60
|
|
5427
|
+
lines) appear in the returned issues list and block the write at the
|
|
5428
|
+
gate. The threshold rests on the small-function guidance in Robert C.
|
|
5429
|
+
Martin, *Clean Code* Ch. 3 ("Functions") and the Google Python Style
|
|
5430
|
+
Guide's ~40-line function review hint
|
|
5431
|
+
(https://google.github.io/styleguide/pyguide.html); this gate blocks on
|
|
5432
|
+
body growth that pushes a function past that span. It does not derive
|
|
5433
|
+
from CODE_RULES §6.5, which governs advisory file-length signals and
|
|
5434
|
+
argues against hard numeric blocks.
|
|
5435
|
+
|
|
5436
|
+
The issue message carries ``Function NAME (defined at line X) is Y lines``
|
|
5437
|
+
precisely so the gate's ``function_length_span_range`` can recover the
|
|
5438
|
+
function's full declared span (lines ``X`` through ``X + Y - 1``). The
|
|
5439
|
+
gate classifies the violation blocking when that span intersects the
|
|
5440
|
+
diff's added lines — the body grew this diff — and advisory otherwise — a
|
|
5441
|
+
pre-existing, untouched long function in a file the diff happened to
|
|
5442
|
+
touch. Anchoring to the span rather than a single ``Line N:`` definition
|
|
5443
|
+
line lets body growth on any interior line block correctly even when the
|
|
5444
|
+
``def`` line itself is untouched.
|
|
5445
|
+
|
|
5446
|
+
Exempt: test files (test bodies are sometimes long by necessity), Django
|
|
5447
|
+
migrations (auto-generated), workflow registries (registry entries), and
|
|
5448
|
+
hook infrastructure.
|
|
5449
|
+
|
|
5450
|
+
Args:
|
|
5451
|
+
content: The Python source to analyze.
|
|
5452
|
+
file_path: The path of the file being checked.
|
|
5453
|
+
all_changed_lines: Post-edit line numbers the current edit touched, or
|
|
5454
|
+
None to treat the whole file as in scope. When provided, a violation
|
|
5455
|
+
blocks only when the function's declared span intersects the changed
|
|
5456
|
+
lines.
|
|
5457
|
+
defer_scope_to_caller: When True, return every violation so the
|
|
5458
|
+
commit/push gate's ``split_violations_by_scope`` can scope by added
|
|
5459
|
+
line and report the in-scope set.
|
|
5460
|
+
|
|
5461
|
+
Returns:
|
|
5462
|
+
Blocking issues. When *defer_scope_to_caller* is True every violation is
|
|
5463
|
+
returned for the gate to scope; otherwise every violation in scope is
|
|
5464
|
+
returned.
|
|
5465
|
+
"""
|
|
5466
|
+
if is_test_file(file_path):
|
|
5467
|
+
return []
|
|
5468
|
+
if is_hook_infrastructure(file_path):
|
|
5469
|
+
return []
|
|
5470
|
+
if is_workflow_registry_file(file_path) or is_migration_file(file_path):
|
|
5471
|
+
return []
|
|
5472
|
+
|
|
5473
|
+
try:
|
|
5474
|
+
parsed_tree = ast.parse(content)
|
|
5475
|
+
except SyntaxError:
|
|
5476
|
+
return []
|
|
5477
|
+
|
|
5478
|
+
all_violations_in_walk_order: list[tuple[range, str]] = []
|
|
5479
|
+
for each_node in ast.walk(parsed_tree):
|
|
5480
|
+
if not isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
5481
|
+
continue
|
|
5482
|
+
line_span = _function_definition_line_span(each_node)
|
|
5483
|
+
if line_span >= FUNCTION_LENGTH_BLOCKING_THRESHOLD:
|
|
5484
|
+
span_range = range(each_node.lineno, each_node.lineno + line_span)
|
|
5485
|
+
message = (
|
|
5486
|
+
f"Function {each_node.name!r} (defined at line {each_node.lineno}) "
|
|
5487
|
+
f"is {line_span} lines - {FUNCTION_LENGTH_BLOCKING_MESSAGE_SUFFIX}"
|
|
5488
|
+
)
|
|
5489
|
+
all_violations_in_walk_order.append((span_range, message))
|
|
5490
|
+
return _scope_violations_to_changed_lines(
|
|
5491
|
+
all_violations_in_walk_order,
|
|
5492
|
+
all_changed_lines,
|
|
5493
|
+
defer_scope_to_caller,
|
|
5494
|
+
)
|
|
5495
|
+
|
|
5496
|
+
|
|
4049
5497
|
def validate_content(
|
|
4050
5498
|
content: str,
|
|
4051
5499
|
file_path: str,
|
|
4052
5500
|
old_content: str = "",
|
|
4053
5501
|
full_file_content: str | None = None,
|
|
5502
|
+
prior_full_file_content: str = "",
|
|
5503
|
+
defer_scope_to_caller: bool = False,
|
|
4054
5504
|
) -> list[str]:
|
|
4055
5505
|
"""Run all applicable validators on content.
|
|
4056
5506
|
|
|
@@ -4065,10 +5515,31 @@ def validate_content(
|
|
|
4065
5515
|
by ``new_string``). Whole-file checks such as the unused-import
|
|
4066
5516
|
scanner use this to evaluate references across the file rather than
|
|
4067
5517
|
just within the inserted fragment.
|
|
5518
|
+
prior_full_file_content: For Edit operations, the entire file content as
|
|
5519
|
+
it existed before the edit applied. Whole-file span checks
|
|
5520
|
+
(function length, test isolation) diff this against
|
|
5521
|
+
``full_file_content`` to recover the lines the edit touched, then
|
|
5522
|
+
block only on violations whose source span intersects those lines —
|
|
5523
|
+
mirroring the gate's span-intersection scoping. Defaults to the
|
|
5524
|
+
empty string for Write and for gate invocations, which leaves those
|
|
5525
|
+
checks scanning the whole file with no diff scoping.
|
|
5526
|
+
defer_scope_to_caller: The explicit signal that a downstream scoper will
|
|
5527
|
+
run, used to disambiguate the two callers that supply no changed-line
|
|
5528
|
+
set. The commit/push gate passes True: it owns
|
|
5529
|
+
``split_violations_by_scope`` and classifies blocking vs advisory by
|
|
5530
|
+
added line, so the function-length, test-isolation, and banned-noun
|
|
5531
|
+
checks return their violations unscoped for the gate to classify.
|
|
5532
|
+
PreToolUse new-file or full-file writes leave this False: this
|
|
5533
|
+
enforcer is terminal, so it marks every violation in scope.
|
|
4068
5534
|
"""
|
|
4069
5535
|
extension = get_file_extension(file_path)
|
|
4070
5536
|
all_issues = []
|
|
4071
5537
|
effective_content = content if full_file_content is None else full_file_content
|
|
5538
|
+
all_changed_lines = (
|
|
5539
|
+
changed_line_numbers(prior_full_file_content, full_file_content)
|
|
5540
|
+
if full_file_content is not None
|
|
5541
|
+
else None
|
|
5542
|
+
)
|
|
4072
5543
|
|
|
4073
5544
|
if extension in ALL_PYTHON_EXTENSIONS:
|
|
4074
5545
|
if not is_test_file(file_path):
|
|
@@ -4083,6 +5554,14 @@ def validate_content(
|
|
|
4083
5554
|
all_issues.extend(check_file_global_constants_use_count(content, file_path))
|
|
4084
5555
|
all_issues.extend(check_type_escape_hatches(effective_content, file_path))
|
|
4085
5556
|
all_issues.extend(check_banned_identifiers(content, file_path))
|
|
5557
|
+
all_issues.extend(
|
|
5558
|
+
check_banned_noun_word_boundary(
|
|
5559
|
+
effective_content,
|
|
5560
|
+
file_path,
|
|
5561
|
+
all_changed_lines,
|
|
5562
|
+
defer_scope_to_caller,
|
|
5563
|
+
)
|
|
5564
|
+
)
|
|
4086
5565
|
all_issues.extend(check_banned_prefixes(effective_content, file_path))
|
|
4087
5566
|
all_issues.extend(check_stub_implementations(effective_content, file_path))
|
|
4088
5567
|
all_issues.extend(check_typed_dict_encode_decode(effective_content, file_path))
|
|
@@ -4093,6 +5572,14 @@ def validate_content(
|
|
|
4093
5572
|
all_issues.extend(check_docstring_format(effective_content, file_path))
|
|
4094
5573
|
all_issues.extend(check_boolean_naming(content, file_path))
|
|
4095
5574
|
all_issues.extend(check_skip_decorators_in_tests(content, file_path))
|
|
5575
|
+
all_issues.extend(
|
|
5576
|
+
check_tests_use_isolated_filesystem_paths(
|
|
5577
|
+
effective_content,
|
|
5578
|
+
file_path,
|
|
5579
|
+
all_changed_lines,
|
|
5580
|
+
defer_scope_to_caller,
|
|
5581
|
+
)
|
|
5582
|
+
)
|
|
4096
5583
|
all_issues.extend(check_existence_check_tests(content, file_path))
|
|
4097
5584
|
all_issues.extend(check_constant_equality_tests(content, file_path))
|
|
4098
5585
|
all_issues.extend(check_unused_optional_parameters(content, file_path))
|
|
@@ -4106,6 +5593,14 @@ def validate_content(
|
|
|
4106
5593
|
all_issues.extend(check_library_print(content, file_path))
|
|
4107
5594
|
all_issues.extend(check_parameter_annotations(content, file_path))
|
|
4108
5595
|
all_issues.extend(check_return_annotations(content, file_path))
|
|
5596
|
+
all_issues.extend(
|
|
5597
|
+
check_function_length(
|
|
5598
|
+
effective_content,
|
|
5599
|
+
file_path,
|
|
5600
|
+
all_changed_lines,
|
|
5601
|
+
defer_scope_to_caller,
|
|
5602
|
+
)
|
|
5603
|
+
)
|
|
4109
5604
|
all_issues.extend(check_loop_variable_naming(content, file_path))
|
|
4110
5605
|
all_issues.extend(check_inline_literal_collections(content, file_path))
|
|
4111
5606
|
all_issues.extend(check_inline_tuple_string_magic(content, file_path))
|
|
@@ -4124,28 +5619,50 @@ def validate_content(
|
|
|
4124
5619
|
return all_issues
|
|
4125
5620
|
|
|
4126
5621
|
|
|
4127
|
-
def
|
|
4128
|
-
file_path
|
|
4129
|
-
) -> str | None:
|
|
4130
|
-
"""Return the file content as it will look after the Edit applies, or None.
|
|
4131
|
-
|
|
4132
|
-
Reads ``file_path`` from disk and replaces the first occurrence of
|
|
4133
|
-
``old_string`` with ``new_string``, mirroring how the Edit tool itself
|
|
4134
|
-
applies a single replacement. Returns None when the file cannot be read,
|
|
4135
|
-
``old_string`` is empty, or ``old_string`` is not present in the existing
|
|
4136
|
-
file (which means the Edit will fail or has already been applied — neither
|
|
4137
|
-
case yields a well-defined post-edit view).
|
|
4138
|
-
"""
|
|
4139
|
-
if not old_string:
|
|
4140
|
-
return None
|
|
5622
|
+
def _read_existing_file_content(file_path: str) -> str | None:
|
|
5623
|
+
"""Return the on-disk content of *file_path*, or None when it cannot be read."""
|
|
4141
5624
|
try:
|
|
4142
5625
|
with open(file_path, "r", encoding="utf-8") as existing_file:
|
|
4143
|
-
|
|
5626
|
+
return existing_file.read()
|
|
4144
5627
|
except (FileNotFoundError, OSError, UnicodeDecodeError):
|
|
4145
5628
|
return None
|
|
5629
|
+
|
|
5630
|
+
|
|
5631
|
+
def prior_and_post_edit_content(
|
|
5632
|
+
file_path: str, old_string: str, new_string: str,
|
|
5633
|
+
) -> tuple[str | None, str | None]:
|
|
5634
|
+
"""Return the pre-edit and post-edit file content from a single disk read.
|
|
5635
|
+
|
|
5636
|
+
Reads ``file_path`` once and derives both views from that single read so the
|
|
5637
|
+
prior and the reconstruction never diverge across two independent reads.
|
|
5638
|
+
The post-edit view replaces the first occurrence of ``old_string`` with
|
|
5639
|
+
``new_string``, mirroring how the Edit tool itself applies a single
|
|
5640
|
+
replacement.
|
|
5641
|
+
|
|
5642
|
+
Returns ``(None, None)`` when the file cannot be read, ``old_string`` is
|
|
5643
|
+
empty, or ``old_string`` is not present in the existing file (the Edit will
|
|
5644
|
+
fail or has already been applied — neither case yields a well-defined
|
|
5645
|
+
post-edit view). A failed prior read is never coerced to an empty string,
|
|
5646
|
+
because an empty prior diffs every line of the reconstruction as changed and
|
|
5647
|
+
defeats the diff scoping the scoped checks rely on.
|
|
5648
|
+
|
|
5649
|
+
Args:
|
|
5650
|
+
file_path: The path of the file the Edit targets.
|
|
5651
|
+
old_string: The Edit's ``old_string`` fragment.
|
|
5652
|
+
new_string: The Edit's ``new_string`` fragment.
|
|
5653
|
+
|
|
5654
|
+
Returns:
|
|
5655
|
+
A ``(prior_content, post_edit_content)`` pair, or ``(None, None)`` when
|
|
5656
|
+
no well-defined post-edit view exists.
|
|
5657
|
+
"""
|
|
5658
|
+
if not old_string:
|
|
5659
|
+
return None, None
|
|
5660
|
+
existing_content = _read_existing_file_content(file_path)
|
|
5661
|
+
if existing_content is None:
|
|
5662
|
+
return None, None
|
|
4146
5663
|
if old_string not in existing_content:
|
|
4147
|
-
return None
|
|
4148
|
-
return existing_content.replace(old_string, new_string, 1)
|
|
5664
|
+
return None, None
|
|
5665
|
+
return existing_content, existing_content.replace(old_string, new_string, 1)
|
|
4149
5666
|
|
|
4150
5667
|
|
|
4151
5668
|
def main() -> None:
|
|
@@ -4169,20 +5686,22 @@ def main() -> None:
|
|
|
4169
5686
|
sys.exit(0)
|
|
4170
5687
|
|
|
4171
5688
|
old_content = ""
|
|
5689
|
+
prior_full_file_content = ""
|
|
4172
5690
|
full_file_content_after_edit: str | None = None
|
|
4173
5691
|
if tool_name == "Edit":
|
|
4174
5692
|
content = tool_input.get("new_string", "")
|
|
4175
5693
|
old_content = tool_input.get("old_string", "")
|
|
4176
|
-
full_file_content_after_edit =
|
|
5694
|
+
prior_content, full_file_content_after_edit = prior_and_post_edit_content(
|
|
4177
5695
|
file_path, old_content, content,
|
|
4178
5696
|
)
|
|
5697
|
+
prior_full_file_content = prior_content or ""
|
|
5698
|
+
if full_file_content_after_edit is None:
|
|
5699
|
+
full_file_content_after_edit = _read_existing_file_content(file_path)
|
|
5700
|
+
if full_file_content_after_edit is None:
|
|
5701
|
+
sys.exit(0)
|
|
4179
5702
|
else:
|
|
4180
5703
|
content = tool_input.get("content", "") or tool_input.get("new_string", "")
|
|
4181
|
-
|
|
4182
|
-
with open(file_path, "r", encoding="utf-8") as existing_file:
|
|
4183
|
-
old_content = existing_file.read()
|
|
4184
|
-
except (FileNotFoundError, OSError, UnicodeDecodeError):
|
|
4185
|
-
old_content = ""
|
|
5704
|
+
old_content = _read_existing_file_content(file_path) or ""
|
|
4186
5705
|
|
|
4187
5706
|
if old_content:
|
|
4188
5707
|
sys.exit(0)
|
|
@@ -4190,7 +5709,13 @@ def main() -> None:
|
|
|
4190
5709
|
if not content:
|
|
4191
5710
|
sys.exit(0)
|
|
4192
5711
|
|
|
4193
|
-
issues = validate_content(
|
|
5712
|
+
issues = validate_content(
|
|
5713
|
+
content,
|
|
5714
|
+
file_path,
|
|
5715
|
+
old_content,
|
|
5716
|
+
full_file_content_after_edit,
|
|
5717
|
+
prior_full_file_content,
|
|
5718
|
+
)
|
|
4194
5719
|
|
|
4195
5720
|
if issues:
|
|
4196
5721
|
issue_list = "; ".join(issues[:10])
|