npm - claude-dev-env - Versions diffs - 1.50.1 → 1.50.3 - Mend

claude-dev-env 1.50.1 → 1.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/hooks/blocking/code_rules_boolean_mustcheck.py ADDED Viewed

@@ -0,0 +1,350 @@
+"""Boolean naming-prefix and ignored must-check-return checks."""
+import ast
+import sys
+from pathlib import Path
+_blocking_directory = str(Path(__file__).resolve().parent)
+_hooks_directory = str(Path(__file__).resolve().parent.parent)
+if _blocking_directory not in sys.path:
+    sys.path.insert(0, _blocking_directory)
+if _hooks_directory not in sys.path:
+    sys.path.insert(0, _hooks_directory)
+from code_rules_path_utils import (  # noqa: E402
+    is_config_file,
+)
+from code_rules_shared import (  # noqa: E402
+    _scope_violations_to_changed_lines,
+    is_hook_infrastructure,
+    is_test_file,
+    is_workflow_registry_file,
+)
+from hooks_constants.blocking_check_limits import (  # noqa: E402
+    MAX_IGNORED_MUST_CHECK_RETURN_ISSUES,
+)
+from hooks_constants.code_rules_enforcer_constants import (  # noqa: E402
+    ALL_BOOLEAN_NAME_PREFIXES,
+    ALL_MUST_CHECK_RETURN_FUNCTION_NAMES,
+    ALL_SELF_AND_CLS_PARAMETER_NAMES,
+    UPPER_SNAKE_CONSTANT_PATTERN,
+)
+def _is_bool_constant(node: ast.AST) -> bool:
+    return isinstance(node, ast.Constant) and isinstance(node.value, bool)
+def _rhs_names_if_all_bool(value_node: ast.AST, target_node: ast.AST) -> list[str]:
+    """Return names from a tuple assignment target when every RHS element is a bool constant.
+    Handles cases like `valid, permitted = True, False` where target is a Tuple
+    and value is a Tuple of bool constants. Returns empty list otherwise.
+    """
+    if not isinstance(target_node, ast.Tuple):
+        return []
+    if not isinstance(value_node, ast.Tuple):
+        return []
+    if len(target_node.elts) != len(value_node.elts):
+        return []
+    if not all(_is_bool_constant(element) for element in value_node.elts):
+        return []
+    names: list[str] = []
+    for each_element in target_node.elts:
+        if isinstance(each_element, ast.Name):
+            names.append(each_element.id)
+    return names
+def _assign_target_names_for_bool(node: ast.Assign) -> list[str]:
+    if not node.targets:
+        return []
+    names: list[str] = []
+    for each_target in node.targets:
+        if isinstance(each_target, ast.Name) and _is_bool_constant(node.value):
+            names.append(each_target.id)
+        else:
+            names.extend(_rhs_names_if_all_bool(node.value, each_target))
+    return names
+def _annassign_target_name_for_bool(node: ast.AnnAssign) -> list[str]:
+    if not isinstance(node.target, ast.Name):
+        return []
+    is_annotation_bool_type = isinstance(node.annotation, ast.Name) and node.annotation.id == "bool"
+    is_value_bool_constant = node.value is not None and _is_bool_constant(node.value)
+    if is_annotation_bool_type and is_value_bool_constant:
+        return [node.target.id]
+    return []
+def _walrus_name_for_bool(node: ast.NamedExpr) -> list[str]:
+    if not isinstance(node.target, ast.Name):
+        return []
+    if not _is_bool_constant(node.value):
+        return []
+    return [node.target.id]
+def _collect_boolean_assignments(tree: ast.Module) -> list[tuple[str, int, bool]]:
+    """Collect boolean-constant assignments with (name, line_number, is_upper_snake_scope).
+    `is_upper_snake_scope` is True for module-level statements and direct class body
+    statements, where UPPER_SNAKE constants are acceptable (dataclass fields, class
+    constants). Function/method scope is False.
+    Invariant: relies on `ast.walk` returning the same node instances that were
+    stored in `upper_snake_scope_ids` via their `id()`. Do not call this helper
+    on a tree that has been rebuilt through an `ast.NodeTransformer` — the
+    transformer may replace nodes with fresh instances, and the identity-based
+    scope tagging will silently fail for the replaced nodes.
+    """
+    upper_snake_scope_ids: set[int] = set()
+    for each_statement in tree.body:
+        upper_snake_scope_ids.add(id(each_statement))
+    for each_node in ast.walk(tree):
+        if isinstance(each_node, ast.ClassDef):
+            for each_class_statement in each_node.body:
+                upper_snake_scope_ids.add(id(each_class_statement))
+    collected: list[tuple[str, int, bool]] = []
+    for each_node in ast.walk(tree):
+        names: list[str] = []
+        line_number = 0
+        if isinstance(each_node, ast.Assign):
+            names = _assign_target_names_for_bool(each_node)
+            line_number = each_node.lineno
+        elif isinstance(each_node, ast.AnnAssign):
+            names = _annassign_target_name_for_bool(each_node)
+            line_number = each_node.lineno
+        elif isinstance(each_node, ast.NamedExpr):
+            names = _walrus_name_for_bool(each_node)
+            line_number = each_node.lineno
+        if not names:
+            continue
+        is_in_upper_snake_scope = id(each_node) in upper_snake_scope_ids
+        for each_name in names:
+            collected.append((each_name, line_number, is_in_upper_snake_scope))
+    return collected
+def _argument_is_boolean(argument_node: ast.arg, default_node: ast.expr | None) -> bool:
+    annotation_is_bool = (
+        isinstance(argument_node.annotation, ast.Name)
+        and argument_node.annotation.id == "bool"
+    )
+    default_is_bool = default_node is not None and _is_bool_constant(default_node)
+    return annotation_is_bool or default_is_bool
+def _bool_parameters_for_function(
+    function_node: ast.FunctionDef | ast.AsyncFunctionDef,
+) -> list[tuple[str, int]]:
+    arguments = function_node.args
+    positional_arguments = arguments.posonlyargs + arguments.args
+    positional_defaults = arguments.defaults
+    leading_without_default = len(positional_arguments) - len(positional_defaults)
+    bool_parameters: list[tuple[str, int]] = []
+    for each_position, each_argument in enumerate(positional_arguments):
+        default_index = each_position - leading_without_default
+        default_node = (
+            positional_defaults[default_index] if default_index >= 0 else None
+        )
+        if each_argument.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
+            continue
+        if _argument_is_boolean(each_argument, default_node):
+            bool_parameters.append((each_argument.arg, each_argument.lineno))
+    for each_argument, each_default in zip(arguments.kwonlyargs, arguments.kw_defaults):
+        if each_argument.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
+            continue
+        if _argument_is_boolean(each_argument, each_default):
+            bool_parameters.append((each_argument.arg, each_argument.lineno))
+    return bool_parameters
+def _collect_bool_parameter_names(tree: ast.Module) -> list[tuple[str, int]]:
+    """Collect (name, line_number) for boolean-typed function parameters.
+    A parameter counts as boolean when its annotation is the ``bool`` name or
+    its default is a boolean literal. ``self`` and ``cls`` are skipped.
+    Args:
+        tree: The parsed module to inspect.
+    Returns:
+        Each boolean parameter as a (name, line_number) pair.
+    """
+    bool_parameters: list[tuple[str, int]] = []
+    for each_node in ast.walk(tree):
+        if isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            bool_parameters.extend(_bool_parameters_for_function(each_node))
+    return bool_parameters
+def check_boolean_naming(
+    content: str,
+    file_path: str,
+    all_changed_lines: set[int] | None = None,
+    defer_scope_to_caller: bool = False,
+) -> list[str]:
+    """Flag boolean assignments and parameters whose name lacks a required prefix.
+    The caller passes the reconstructed full file as *content* so ``ast.parse``
+    sees a complete module rather than an Edit's ``new_string`` fragment, which is
+    rarely valid standalone Python. Findings are then scoped to *all_changed_lines*
+    so an Edit blocks on the unprefixed boolean it just introduced while a
+    pre-existing violation on an untouched line does not block the edit.
+    Args:
+        content: The source text to inspect — the reconstructed full file on an
+            Edit so the parse succeeds.
+        file_path: The path the source will be written to, used for exemptions.
+        all_changed_lines: Post-edit line numbers the current edit touched, or
+            None to treat the whole file as in scope. When provided, a violation
+            blocks only when its source line intersects the changed lines.
+        defer_scope_to_caller: When True, return every violation so the
+            commit/push gate's ``split_violations_by_scope`` can scope by added
+            line.
+    Returns:
+        One issue per unprefixed boolean assignment and parameter, scoped to the
+        changed lines unless *defer_scope_to_caller* is True or *all_changed_lines*
+        is None. This check has no module cap.
+    """
+    if is_test_file(file_path):
+        return []
+    if is_hook_infrastructure(file_path):
+        return []
+    if is_config_file(file_path):
+        return []
+    if is_workflow_registry_file(file_path):
+        return []
+    try:
+        tree = ast.parse(content)
+    except SyntaxError as parse_error:
+        print(
+            f"[CODE_RULES advisory] {file_path}: boolean-naming check skipped - "
+            f"SyntaxError at line {parse_error.lineno}: {parse_error.msg}",
+            file=sys.stderr,
+        )
+        return []
+    all_violations_in_walk_order: list[tuple[range, str]] = []
+    for each_name, each_line_number, each_is_in_upper_snake_scope in _collect_boolean_assignments(tree):
+        if len(each_name) == 1:
+            continue
+        if each_is_in_upper_snake_scope and UPPER_SNAKE_CONSTANT_PATTERN.match(each_name):
+            continue
+        if each_name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
+            continue
+        boolean_prefix_suffix = "is_/has_/should_/can_/was_/did_"
+        message = (
+            f"Line {each_line_number}: Boolean {each_name} - prefix with "
+            f"{boolean_prefix_suffix}"
+        )
+        all_violations_in_walk_order.append(
+            (range(each_line_number, each_line_number + 1), message)
+        )
+    for each_name, each_line_number in _collect_bool_parameter_names(tree):
+        if len(each_name) == 1:
+            continue
+        if each_name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
+            continue
+        boolean_prefix_suffix = "is_/has_/should_/can_/was_/did_"
+        message = (
+            f"Line {each_line_number}: Boolean parameter {each_name} - prefix with "
+            f"{boolean_prefix_suffix}"
+        )
+        all_violations_in_walk_order.append(
+            (range(each_line_number, each_line_number + 1), message)
+        )
+    return _scope_violations_to_changed_lines(
+        all_violations_in_walk_order,
+        all_changed_lines,
+        defer_scope_to_caller,
+    )
+def _called_terminal_name(call_node: ast.Call) -> str | None:
+    callee = call_node.func
+    if isinstance(callee, ast.Name):
+        return callee.id
+    if isinstance(callee, ast.Attribute):
+        return callee.attr
+    return None
+def check_ignored_must_check_return(
+    content: str,
+    file_path: str,
+    all_changed_lines: set[int] | None = None,
+    defer_scope_to_caller: bool = False,
+) -> list[str]:
+    """Flag bare-expression calls whose discarded return is the only failure signal.
+    Functions in ``ALL_MUST_CHECK_RETURN_FUNCTION_NAMES`` report success or failure
+    solely through their return value. A bare-statement call discards that value,
+    so the caller silently proceeds on failure. Bare ``ast.Expr`` calls are flagged,
+    including a bare ``await``-wrapped call (``await find_and_click(...)`` as a
+    statement); an assigned or branched-on call is exempt.
+    The caller passes the reconstructed full file as *content* so ``ast.parse``
+    sees a complete module rather than an Edit's ``new_string`` fragment, which is
+    rarely valid standalone Python (a bare ``await find_and_click(...)`` line is a
+    SyntaxError on its own). Findings are then scoped to *all_changed_lines* so an
+    Edit blocks on the discarded return it just introduced while a pre-existing
+    violation on an untouched line does not block the edit.
+    Args:
+        content: The source text to inspect — the reconstructed full file on an
+            Edit so the parse succeeds.
+        file_path: The path the source will be written to, used for exemptions.
+        all_changed_lines: Post-edit line numbers the current edit touched, or
+            None to treat the whole file as in scope. When provided, a violation
+            blocks only when the bare call's line intersects the changed lines.
+        defer_scope_to_caller: When True, return every violation so the
+            commit/push gate's ``split_violations_by_scope`` can scope by added
+            line.
+    Returns:
+        One issue per discarded must-check return, scoped to the changed lines
+        unless *defer_scope_to_caller* is True or *all_changed_lines* is None. When
+        *defer_scope_to_caller* is True every violation is returned uncapped so the
+        gate can scope by added line and apply its own ceiling; otherwise the
+        terminal result is capped at the module limit.
+    """
+    if is_test_file(file_path):
+        return []
+    try:
+        tree = ast.parse(content)
+    except SyntaxError:
+        return []
+    all_violations_in_walk_order: list[tuple[range, str]] = []
+    for each_node in ast.walk(tree):
+        if not isinstance(each_node, ast.Expr):
+            continue
+        expression_value = each_node.value
+        call_node = (
+            expression_value.value
+            if isinstance(expression_value, ast.Await)
+            else expression_value
+        )
+        if not isinstance(call_node, ast.Call):
+            continue
+        called_name = _called_terminal_name(call_node)
+        if called_name is None or called_name not in ALL_MUST_CHECK_RETURN_FUNCTION_NAMES:
+            continue
+        end_line_number = each_node.end_lineno or each_node.lineno
+        line_span = range(each_node.lineno, end_line_number + 1)
+        message = (
+            f"Line {each_node.lineno}: return value of {called_name}() is discarded - "
+            "assign and check it (the boolean/outcome is the only failure signal)"
+        )
+        all_violations_in_walk_order.append((line_span, message))
+    scoped_issues = _scope_violations_to_changed_lines(
+        all_violations_in_walk_order,
+        all_changed_lines,
+        defer_scope_to_caller,
+    )
+    if defer_scope_to_caller:
+        return scoped_issues
+    return scoped_issues[:MAX_IGNORED_MUST_CHECK_RETURN_ISSUES]

package/hooks/blocking/code_rules_comments.py ADDED Viewed

@@ -0,0 +1,337 @@
+"""Comment-presence and comment-change checks for Python and JavaScript sources."""
+import io
+import sys
+import tokenize
+from collections.abc import Iterator
+from pathlib import Path
+_blocking_directory = str(Path(__file__).resolve().parent)
+_hooks_directory = str(Path(__file__).resolve().parent.parent)
+if _blocking_directory not in sys.path:
+    sys.path.insert(0, _blocking_directory)
+if _hooks_directory not in sys.path:
+    sys.path.insert(0, _hooks_directory)
+from code_rules_shared import (  # noqa: E402
+    get_file_extension,
+)
+from hooks_constants.code_rules_enforcer_constants import (  # noqa: E402
+    ALL_FREE_FORM_EXEMPT_COMMENT_BODIES,
+    ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES,
+    ALL_JAVASCRIPT_EXEMPT_INLINE_COMMENT_PREFIXES,
+    ALL_JAVASCRIPT_EXTENSIONS,
+    ALL_PYTHON_EXTENSIONS,
+    ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS,
+    ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS,
+    ALL_TOKEN_ANCHORED_EXEMPT_COMMENT_BODIES,
+    CHAINED_INLINE_COMMENT_PATTERN,
+    MAX_COMMENT_ISSUES,
+)
+def check_comments_python(content: str) -> list[str]:
+    """Check for comments in Python code.
+    Uses ``tokenize.generate_tokens`` to find true ``COMMENT`` tokens.
+    Hash characters that appear inside string literals (hex color codes,
+    URL fragments, and the hash inside an f-string interpolation pattern)
+    are correctly skipped because the tokenizer recognizes them as parts
+    of string tokens rather than comment tokens.
+    When the tokenizer cannot parse the file (partial content during
+    Edit, invalid syntax), the check returns no findings rather than
+    falling back to a line-walker scan — false negatives on
+    syntactically-invalid drafts are preferable to false positives that
+    mis-classify string-interior hash characters as comments.
+    """
+    issues = []
+    for each_comment_token in _comment_tokens(content):
+        if _is_exempt_python_comment(each_comment_token):
+            continue
+        line_number = each_comment_token.start[0]
+        issues.append(
+            f"Line {line_number}: Comment found - refactor to self-documenting code"
+        )
+        if len(issues) >= MAX_COMMENT_ISSUES:
+            break
+    return issues
+def check_comments_javascript(content: str) -> list[str]:
+    """Check for comments in JavaScript/TypeScript code."""
+    issues = []
+    lines = content.split("\n")
+    is_in_multiline_comment = False
+    for each_line_number, each_line in enumerate(lines, 1):
+        stripped = each_line.strip()
+        if not stripped:
+            continue
+        if is_in_multiline_comment:
+            if "*/" in stripped:
+                is_in_multiline_comment = False
+            continue
+        if stripped.startswith("/*"):
+            is_in_multiline_comment = "*/" not in stripped
+            if not stripped.startswith("/**"):
+                issues.append(f"Line {each_line_number}: Block comment found - refactor to self-documenting code")
+            continue
+        if stripped.startswith("//"):
+            if not stripped.startswith(ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES):
+                issues.append(f"Line {each_line_number}: Comment found - refactor to self-documenting code")
+        if len(issues) >= MAX_COMMENT_ISSUES:
+            break
+    return issues
+def extract_comment_texts(content: str, file_path: str) -> tuple[set[str], set[str]]:
+    """Extract normalized comment text strings from content for comparison.
+    Returns:
+        Tuple of (inline_comments, standalone_comments).
+        Inline comments appear after code on the same line.
+        Standalone comments are lines where the entire line is a comment.
+    """
+    extension = get_file_extension(file_path)
+    inline_comments: set[str] = set()
+    standalone_comments: set[str] = set()
+    if not content:
+        return inline_comments, standalone_comments
+    if extension in ALL_PYTHON_EXTENSIONS:
+        inline_comments, standalone_comments, _ = _extract_python_comment_sets(content)
+        return inline_comments, standalone_comments
+    lines = content.split("\n")
+    if extension in ALL_JAVASCRIPT_EXTENSIONS:
+        is_in_multiline = False
+        for each_line in lines:
+            stripped = each_line.strip()
+            if not stripped:
+                continue
+            if is_in_multiline:
+                if "*/" in stripped:
+                    is_in_multiline = False
+                continue
+            if stripped.startswith("/*"):
+                is_in_multiline = "*/" not in stripped
+                if not stripped.startswith("/**"):
+                    standalone_comments.add(stripped)
+                continue
+            if stripped.startswith("//"):
+                if not stripped.startswith(ALL_JAVASCRIPT_EXEMPT_COMMENT_PREFIXES):
+                    standalone_comments.add(stripped)
+            elif "//" in each_line:
+                before_slash = each_line[:each_line.index("//")]
+                if before_slash.strip():
+                    comment_start = stripped.index("//")
+                    comment_text = stripped[comment_start + 2 :].strip()
+                    if not comment_text.startswith(ALL_JAVASCRIPT_EXEMPT_INLINE_COMMENT_PREFIXES):
+                        inline_comments.add(stripped[comment_start:])
+    return inline_comments, standalone_comments
+def check_comment_changes(old_content: str, new_content: str, file_path: str) -> list[str]:
+    """Check for comment additions or removals between old and new content.
+    Inline comments (after code on same line): BLOCK when added.
+    Standalone comment lines: NUDGE (print advisory) when added.
+    Existing comments being removed: BLOCK (comment preservation principle).
+    When the file is Python and either *old_content* or *new_content* cannot
+    be tokenized (common for mid-edit Edit fragments), the comparison is
+    indeterminate: the per-side tokenize failure would empty one set and
+    misrepresent every comment on the other side as either added or
+    removed. The check returns no issues in that case — false negatives on
+    syntactically-invalid drafts are preferable to false positives that
+    flag legitimate comments as deleted.
+    """
+    issues: list[str] = []
+    extension = get_file_extension(file_path)
+    if extension in ALL_PYTHON_EXTENSIONS:
+        old_inline, old_standalone, old_tokenize_ok = _extract_python_comment_sets(old_content)
+        new_inline, new_standalone, new_tokenize_ok = _extract_python_comment_sets(new_content)
+        if not (old_tokenize_ok and new_tokenize_ok):
+            return issues
+    else:
+        old_inline, old_standalone = extract_comment_texts(old_content, file_path)
+        new_inline, new_standalone = extract_comment_texts(new_content, file_path)
+    added_inline = new_inline - old_inline
+    if added_inline:
+        sample = next(iter(added_inline))
+        issues.append(f"Inline comment added: {sample[:60]} - refactor to self-documenting code")
+    added_standalone = new_standalone - old_standalone
+    if added_standalone:
+        sample = next(iter(added_standalone))
+        print(f"[CODE_RULES advisory] Standalone comment added: {sample[:60]} - prefer self-documenting code", file=sys.stderr)
+    all_old = old_inline | old_standalone
+    all_new = new_inline | new_standalone
+    removed_comments = all_old - all_new
+    if removed_comments:
+        old_line_count = len([line for line in old_content.split("\n") if line.strip()])
+        new_line_count = len([line for line in new_content.split("\n") if line.strip()])
+        code_was_removed = new_line_count < old_line_count - len(removed_comments)
+        if not code_was_removed:
+            sample = next(iter(removed_comments))
+            issues.append(f"Existing comment removed: {sample[:60]} - NEVER delete existing comments")
+    return issues
+def _python_tokens(source: str) -> Iterator[tokenize.TokenInfo]:
+    """Yield Python tokens from *source* one at a time.
+    Centralizes the ``tokenize.generate_tokens`` entry-point so a future
+    change to the API lands in exactly one place. Iteration may raise
+    any of ``ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS`` when the source is
+    not valid Python (mid-edit Edit fragments, unterminated strings,
+    mismatched indentation) — callers handle the exception according to
+    their own contract (silently stop, return an indeterminate flag, etc.).
+    """
+    yield from tokenize.generate_tokens(io.StringIO(source).readline)
+def _comment_tokens(source: str) -> Iterator[tokenize.TokenInfo]:
+    """Yield COMMENT tokens from *source* one at a time.
+    Streams from ``_python_tokens`` so consumers that early-exit (e.g.
+    ``check_comments_python`` caps at ``MAX_COMMENT_ISSUES``) avoid
+    materializing the entire token list. Silently stops on tokenize
+    failure so callers receive only valid comment tokens — no
+    indeterminate signal is exposed at this layer because the consumers
+    that need it (``_extract_python_comment_sets``) bypass this helper.
+    """
+    try:
+        for each_token in _python_tokens(source):
+            if each_token.type == tokenize.COMMENT:
+                yield each_token
+    except ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS:
+        return
+def _is_exempt_python_comment(comment_token: tokenize.TokenInfo) -> bool:
+    """Return True for shebangs and tooling-directive comments.
+    The shebang exemption applies only when the comment token starts
+    at line 1, column 0 — matching the OS-level convention that a
+    shebang line is meaningful only as the first line of an executable
+    file. An inline shebang-lookalike later in the file (an
+    after-code occurrence on any line, or a standalone occurrence on
+    the second line or later) is NOT a real shebang and remains subject to the
+    no-comments rule.
+    Matches any prefix listed in the token-anchored or free-form exempt-
+    comment-body sets regardless of whether the directive sits flush
+    against the leading hash character or carries one or more whitespace
+    characters (space or tab) between the hash and the directive body.
+    Token-anchored markers (``noqa``, ``pylint:``, ``pragma:``) are
+    exempt only when the comment carries no chained second comment. Any
+    second ``#`` after the directive body — regardless of whitespace
+    around the inner hash, so ``# noqa: F401#note``,
+    ``# noqa: F401 #prose``, and ``# noqa: F401  # imported for re-export``
+    all qualify — indicates a second free-form inline comment
+    piggybacking on the exempt marker; the trailing prose is not itself
+    an exempt directive and therefore must not inherit exemption. A
+    token-anchored directive body never legitimately carries a ``#``
+    (noqa codes, pylint symbols, and pragma directives contain none), so
+    any inner ``#`` reliably marks chained prose. Free-form markers
+    (``type:``, ``TODO``, ``FIXME``, ``HACK``, ``XXX``) accept any
+    trailing prose:
+    ``# type:`` participates in the documented justification
+    convention enforced by ``check_type_escape_hatches`` (which
+    requires a trailing reason), and the TODO-family markers carry
+    annotation text by convention.
+    """
+    comment_string = comment_token.string
+    if comment_string.startswith("#!") and comment_token.start == (1, 0):
+        return True
+    directive_body = comment_string[1:].lstrip()
+    if not directive_body:
+        return True
+    if directive_body.startswith(ALL_FREE_FORM_EXEMPT_COMMENT_BODIES):
+        return True
+    if not _starts_with_bounded_token_anchored_directive(directive_body):
+        return False
+    return CHAINED_INLINE_COMMENT_PATTERN.search(directive_body) is None
+def _starts_with_bounded_token_anchored_directive(directive_body: str) -> bool:
+    """Return True when *directive_body* opens with a real exempt directive.
+    A token-anchored marker (``noqa``, ``pylint:``, ``pragma:``) counts only
+    when the matched token is immediately followed by a directive boundary —
+    end of string, a colon, or whitespace — so prose like
+    ``noqa-but-not-really: explanation`` that merely shares the prefix does
+    not inherit the exemption.
+    Args:
+        directive_body: The comment text with the leading hash and surrounding
+            whitespace already stripped.
+    Returns:
+        True when a token-anchored exempt directive is present at a real token
+        boundary, False otherwise.
+    """
+    for each_token in ALL_TOKEN_ANCHORED_EXEMPT_COMMENT_BODIES:
+        if not directive_body.startswith(each_token):
+            continue
+        if each_token[-1] in ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS:
+            return True
+        following_text = directive_body[len(each_token):]
+        if not following_text:
+            return True
+        next_character = following_text[0]
+        if next_character.isspace():
+            return True
+        if next_character in ALL_TOKEN_ANCHORED_DIRECTIVE_BOUNDARY_CHARACTERS:
+            return True
+    return False
+def _extract_python_comment_sets(content: str) -> tuple[set[str], set[str], bool]:
+    """Return (inline_comments, standalone_comments, tokenize_succeeded).
+    Streams *content* once via ``_python_tokens``. A tokenize failure
+    (mid-edit fragment, syntax error) returns empty sets and ``False``
+    so callers can treat the situation as indeterminate rather than as
+    "no comments present". Inline vs standalone is decided by inspecting
+    the column offset of each ``COMMENT`` token against its source
+    line: an all-whitespace prefix means standalone.
+    """
+    inline_comments: set[str] = set()
+    standalone_comments: set[str] = set()
+    lines = content.split("\n")
+    try:
+        for each_token in _python_tokens(content):
+            if each_token.type != tokenize.COMMENT:
+                continue
+            if _is_exempt_python_comment(each_token):
+                continue
+            line_number = each_token.start[0]
+            column_offset = each_token.start[1]
+            source_line = lines[line_number - 1] if line_number - 1 < len(lines) else ""
+            text_before_comment = source_line[:column_offset]
+            normalized_comment_text = each_token.string.strip()
+            if not text_before_comment.strip():
+                standalone_comments.add(normalized_comment_text)
+            else:
+                inline_comments.add(normalized_comment_text)
+    except ALL_PYTHON_TOKENIZE_FAILURE_EXCEPTIONS:
+        return set(), set(), False
+    return inline_comments, standalone_comments, True