PyPI - flake8-stepdown - Versions diffs - 0.1.0__py3-none-any.whl - Mend

flake8-stepdown 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

flake8_stepdown/__init__.py +5 -0
flake8_stepdown/cli.py +168 -0
flake8_stepdown/core/__init__.py +1 -0
flake8_stepdown/core/bindings.py +148 -0
flake8_stepdown/core/graph.py +156 -0
flake8_stepdown/core/ordering.py +184 -0
flake8_stepdown/core/parser.py +102 -0
flake8_stepdown/core/references.py +260 -0
flake8_stepdown/flake8_plugin.py +42 -0
flake8_stepdown/py.typed +0 -0
flake8_stepdown/reporter.py +75 -0
flake8_stepdown/rewriter.py +99 -0
flake8_stepdown/types.py +78 -0
flake8_stepdown-0.1.0.dist-info/METADATA +126 -0
flake8_stepdown-0.1.0.dist-info/RECORD +17 -0
flake8_stepdown-0.1.0.dist-info/WHEEL +4 -0
flake8_stepdown-0.1.0.dist-info/entry_points.txt +5 -0

flake8_stepdown/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""flake8-stepdown: enforce top-down function ordering in Python."""
+__version__ = "0.1.0"
+from flake8_stepdown.core.ordering import order_module

flake8_stepdown/cli.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""CLI entry point for flake8-stepdown."""
+from __future__ import annotations
+import argparse
+import sys
+from fnmatch import fnmatch
+from pathlib import Path
+from flake8_stepdown.core.ordering import order_module
+from flake8_stepdown.reporter import format_diff, format_violations
+EXIT_OK = 0
+EXIT_VIOLATIONS = 1
+EXIT_ERROR = 2
+def main(argv: list[str] | None = None) -> int:
+    """CLI entry point.
+    Returns:
+        Exit code: 0 (clean), 1 (violations/changes), 2 (error).
+    """
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    # Handle stdin
+    if args.stdin_filename:
+        source = sys.stdin.read()
+        code, output = _process_source(source, args.stdin_filename, args)
+        if output:
+            _write_output(output)
+        return code
+    if not args.files:
+        sys.stderr.write("Error: no files specified\n")
+        return EXIT_ERROR
+    filepaths = _resolve_paths(args.files, args.exclude)
+    if not filepaths:
+        return EXIT_OK
+    exit_code = EXIT_OK
+    for filepath in filepaths:
+        code = _process_file(filepath, args)
+        if code == EXIT_ERROR:
+            return EXIT_ERROR
+        exit_code = max(exit_code, code)
+    return exit_code
+def _build_parser() -> argparse.ArgumentParser:
+    """Build the argument parser."""
+    parser = argparse.ArgumentParser(
+        prog="stepdown",
+        description="Enforce top-down function ordering in Python",
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    common = argparse.ArgumentParser(add_help=False)
+    common.add_argument("files", nargs="*", help="Files or directories to check")
+    common.add_argument("--exclude", action="append", default=[], help="Glob patterns to exclude")
+    common.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Show debug info (mutual recursion info on stderr)",
+    )
+    common.add_argument(
+        "--stdin-filename",
+        help="Read from stdin, use this filename for output",
+    )
+    check_parser = subparsers.add_parser("check", parents=[common], help="Report violations")
+    check_parser.add_argument(
+        "--format",
+        dest="fmt",
+        choices=["text", "json"],
+        default="text",
+        help="Output format",
+    )
+    subparsers.add_parser("diff", parents=[common], help="Show unified diff")
+    subparsers.add_parser("fix", parents=[common], help="Rewrite files in place")
+    return parser
+def _resolve_paths(paths: list[str], exclude: list[str]) -> list[str]:
+    """Expand directories to .py files and apply exclude patterns."""
+    resolved: list[str] = []
+    for entry in paths:
+        p = Path(entry)
+        if p.is_dir():
+            for py_file in sorted(p.rglob("*.py")):
+                filepath = str(py_file)
+                if not any(fnmatch(filepath, pat) for pat in exclude):
+                    resolved.append(filepath)
+        elif not any(fnmatch(entry, pat) for pat in exclude):
+            resolved.append(entry)
+    return resolved
+def _process_file(filepath: str, args: argparse.Namespace) -> int:
+    """Process a single file and handle output."""
+    path = Path(filepath)
+    if not path.exists():
+        sys.stderr.write(f"Error: {filepath} not found\n")
+        return EXIT_ERROR
+    try:
+        source = path.read_text()
+    except (OSError, UnicodeDecodeError) as e:
+        sys.stderr.write(f"Error reading {filepath}: {e}\n")
+        return EXIT_ERROR
+    code, output = _process_source(source, filepath, args)
+    if args.command == "fix" and code == EXIT_VIOLATIONS and output:
+        path.write_text(output)
+    elif output:
+        _write_output(output)
+    return code
+def _process_source(
+    source: str,
+    filename: str,
+    args: argparse.Namespace,
+) -> tuple[int, str]:
+    """Process a single source file and return (exit_code, output)."""
+    compute_rewrite = args.command != "check"
+    result = order_module(source, compute_rewrite=compute_rewrite)
+    if args.verbose and result.mutual_recursion_groups:
+        for group in result.mutual_recursion_groups:
+            sys.stderr.write(
+                f"{filename}: mutual recursion between {', '.join(group)}; original order preserved\n"
+            )
+    if args.command == "check":
+        output = format_violations(result.violations, filename=filename, fmt=args.fmt)
+        return (EXIT_VIOLATIONS if result.violations else EXIT_OK), output
+    if args.command == "diff":
+        if result.reordered_source is not None:
+            output = format_diff(source, result.reordered_source, filename=filename)
+            return EXIT_VIOLATIONS, output
+        return EXIT_OK, ""
+    # fix command
+    if result.reordered_source is not None:
+        return EXIT_VIOLATIONS, result.reordered_source
+    return EXIT_OK, ""
+def _write_output(output: str) -> None:
+    """Write output to stdout with trailing newline if needed."""
+    sys.stdout.write(output)
+    if not output.endswith("\n"):
+        sys.stdout.write("\n")
+if __name__ == "__main__":
+    sys.exit(main())

flake8_stepdown/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core analysis modules for flake8-stepdown."""

flake8_stepdown/core/bindings.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Extract bindings (defined names) from module-level statements."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import libcst as cst
+import libcst.matchers as m
+from flake8_stepdown.types import Statement
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+def extract_bindings(
+    statements: list[cst.CSTNode],
+    positions: Mapping[cst.CSTNode, cst.metadata.CodeRange],
+) -> list[Statement]:
+    """Extract bindings from module-level statements.
+    Args:
+        statements: The module-level CST nodes to analyze (functions, classes,
+            and assignments from the reorderable zone between preamble and postamble).
+        positions: Position mapping from MetadataWrapper.resolve(PositionProvider).
+    Groups consecutive @overload stubs with their implementation into a single Statement.
+    Returns Statement objects with empty refs (to be populated by references module).
+    """
+    result: list[Statement] = []
+    i = 0
+    nodes = list(statements)
+    while i < len(nodes):
+        node = nodes[i]
+        # Check for @overload grouping
+        if isinstance(node, cst.FunctionDef) and _has_overload_decorator(node):
+            func_name = node.name.value
+            group_nodes: list[cst.CSTNode] = [node]
+            # Collect consecutive same-name functions
+            j = i + 1
+            while j < len(nodes):
+                next_node = nodes[j]
+                if isinstance(next_node, cst.FunctionDef) and next_node.name.value == func_name:
+                    group_nodes.append(next_node)
+                    if not _has_overload_decorator(next_node):
+                        j += 1
+                        break
+                    j += 1
+                else:
+                    break
+            # Merge if stubs + implementation (>1 node and last is not overload)
+            last_node = group_nodes[-1]
+            if (
+                len(group_nodes) > 1
+                and isinstance(last_node, cst.FunctionDef)
+                and not _has_overload_decorator(last_node)
+            ):
+                first_pos = positions.get(group_nodes[0])
+                last_pos = positions.get(last_node)
+                result.append(
+                    Statement(
+                        node=last_node,
+                        start_line=first_pos.start.line if first_pos else 0,
+                        end_line=last_pos.end.line if last_pos else 0,
+                        bindings=frozenset({func_name}),
+                        immediate_refs=frozenset(),
+                        deferred_refs=frozenset(),
+                        is_overload_group=True,
+                    ),
+                )
+                i = j
+                continue
+            # Not a complete overload group — fall through to normal handling
+        # Normal statement
+        pos = positions.get(node)
+        start_line = pos.start.line if pos else 0
+        end_line = pos.end.line if pos else 0
+        bindings = (
+            _extract_binding_names(node) if isinstance(node, cst.BaseStatement) else frozenset()
+        )
+        result.append(
+            Statement(
+                node=node,
+                start_line=start_line,
+                end_line=end_line,
+                bindings=bindings,
+                immediate_refs=frozenset(),
+                deferred_refs=frozenset(),
+                is_overload_group=False,
+            ),
+        )
+        i += 1
+    return result
+def _extract_binding_names(node: cst.BaseStatement) -> frozenset[str]:
+    """Extract the names defined by a single statement."""
+    if isinstance(node, cst.FunctionDef):
+        return frozenset({node.name.value})
+    if isinstance(node, cst.ClassDef):
+        return frozenset({node.name.value})
+    if isinstance(node, cst.SimpleStatementLine):
+        names: set[str] = set()
+        for stmt in node.body:
+            if isinstance(stmt, cst.Assign):
+                for target in stmt.targets:
+                    names |= _collect_names(target.target)
+            elif isinstance(stmt, cst.AnnAssign) and stmt.value is not None:
+                names |= _collect_names(stmt.target)
+        return frozenset(names)
+    return frozenset()
+def _collect_names(target: cst.BaseExpression) -> set[str]:
+    """Recursively collect all Name identifiers from an assignment target."""
+    if isinstance(target, cst.Name):
+        return {target.value}
+    if isinstance(target, cst.Tuple):
+        names: set[str] = set()
+        for element in target.elements:
+            names |= _collect_names(element.value)
+        return names
+    if isinstance(target, cst.StarredElement):
+        return _collect_names(target.value)
+    return set()
+def _has_overload_decorator(node: cst.FunctionDef) -> bool:
+    """Check if a FunctionDef has @typing.overload or @overload."""
+    for decorator in node.decorators:
+        dec = decorator.decorator
+        if m.matches(dec, m.Name("overload")):
+            return True
+        if m.matches(dec, m.Attribute(value=m.Name("typing"), attr=m.Name("overload"))):
+            return True
+    return False

flake8_stepdown/core/graph.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Dependency graph construction, topological sort, and SCC detection."""
+from __future__ import annotations
+import heapq
+from typing import TYPE_CHECKING
+from flake8_stepdown.core.parser import is_docstring, is_simple_assignment
+if TYPE_CHECKING:
+    from flake8_stepdown.types import Statement
+def build_normalized_graph(statements: list[Statement]) -> dict[int, set[int]]:
+    """Build a normalized dependency graph where edge A->B means "A must appear before B".
+    - Deferred ref: A calls B -> edge A->B (caller before callee)
+    - Immediate ref: A uses @B -> edge B->A (dependency before dependent)
+    """
+    # Build name -> index mapping
+    name_to_idx: dict[str, int] = {}
+    for idx, stmt in enumerate(statements):
+        for name in stmt.bindings:
+            name_to_idx[name] = idx
+    graph: dict[int, set[int]] = {i: set() for i in range(len(statements))}
+    for idx, stmt in enumerate(statements):
+        # Deferred refs: caller before callee -> edge idx -> target
+        for ref in stmt.deferred_refs:
+            if ref in name_to_idx:
+                target = name_to_idx[ref]
+                if target != idx:
+                    graph[idx].add(target)
+        # Immediate refs: dependency before dependent -> edge target -> idx
+        for ref in stmt.immediate_refs:
+            if ref in name_to_idx:
+                target = name_to_idx[ref]
+                if target != idx:
+                    graph[target].add(idx)
+    return graph
+def topological_sort(graph: dict[int, set[int]], num_nodes: int) -> list[int] | None:
+    """Kahn's topological sort with min-heap stability tie-breaking.
+    Returns ordered list of node indices, or None if a cycle is detected.
+    """
+    # Compute in-degrees
+    in_degree = [0] * num_nodes
+    for successors in graph.values():
+        for s in successors:
+            in_degree[s] += 1
+    # Initialize min-heap with zero in-degree nodes (keyed by original index for stability)
+    heap: list[int] = [i for i in range(num_nodes) if in_degree[i] == 0]
+    heapq.heapify(heap)
+    result: list[int] = []
+    while heap:
+        node = heapq.heappop(heap)
+        result.append(node)
+        for successor in graph.get(node, set()):
+            in_degree[successor] -= 1
+            if in_degree[successor] == 0:
+                heapq.heappush(heap, successor)
+    if len(result) != num_nodes:
+        return None  # Cycle detected
+    return result
+def find_sccs(graph: dict[int, set[int]], num_nodes: int) -> list[list[int]]:  # noqa: C901
+    """Find strongly connected components with size > 1 using Tarjan's algorithm."""
+    index_counter = [0]
+    stack: list[int] = []
+    on_stack = [False] * num_nodes
+    indices = [-1] * num_nodes
+    lowlinks = [-1] * num_nodes
+    result: list[list[int]] = []
+    def strongconnect(v: int) -> None:
+        indices[v] = index_counter[0]
+        lowlinks[v] = index_counter[0]
+        index_counter[0] += 1
+        stack.append(v)
+        on_stack[v] = True
+        for w in graph.get(v, set()):
+            if indices[w] == -1:
+                strongconnect(w)
+                lowlinks[v] = min(lowlinks[v], lowlinks[w])
+            elif on_stack[w]:
+                lowlinks[v] = min(lowlinks[v], indices[w])
+        if lowlinks[v] == indices[v]:
+            scc: list[int] = []
+            while True:
+                w = stack.pop()
+                on_stack[w] = False
+                scc.append(w)
+                if w == v:
+                    break
+            if len(scc) > 1:
+                result.append(scc)
+    for v in range(num_nodes):
+        if indices[v] == -1:
+            strongconnect(v)
+    return result
+def attach_no_binding_stmts(statements: list[Statement]) -> list[list[Statement]]:
+    """Group statements so that those with no bindings attach to their neighbor.
+    A statement with no bindings attaches to the next statement with bindings.
+    If it's the last statement, it attaches to the preceding one.
+    Returns a list of groups, where each group is one or more statements
+    that move together.
+    """
+    if not statements:
+        return []
+    groups: list[list[Statement]] = []
+    pending: list[Statement] = []
+    for stmt in statements:
+        if stmt.bindings:
+            # This statement has bindings — flush pending no-binding stmts as prefix
+            groups.append([*pending, stmt])
+            pending = []
+        elif (
+            groups
+            and not pending
+            and is_docstring(stmt.node)
+            and is_simple_assignment(groups[-1][-1].node)
+        ):
+            # Docstring immediately after a constant — attach to the constant's group
+            groups[-1].append(stmt)
+        else:
+            pending.append(stmt)
+    # Handle trailing no-binding statements: attach to last group
+    if pending:
+        if groups:
+            groups[-1].extend(pending)
+        else:
+            # All statements have no bindings — just return them as one group
+            groups.append(pending)
+    return groups

flake8_stepdown/core/ordering.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""Orchestrator: parse -> segment -> bindings -> refs -> graph -> sort -> violations."""
+from __future__ import annotations
+import libcst as cst
+from flake8_stepdown.core.bindings import extract_bindings
+from flake8_stepdown.core.graph import (
+    attach_no_binding_stmts,
+    build_normalized_graph,
+    find_sccs,
+    topological_sort,
+)
+from flake8_stepdown.core.parser import parse_source, segment
+from flake8_stepdown.core.references import detect_future_annotations, extract_refs
+from flake8_stepdown.rewriter import rewrite
+from flake8_stepdown.types import OrderingResult, Statement, Violation
+_EMPTY_RESULT = OrderingResult(violations=[], reordered_source=None, mutual_recursion_groups=[])
+def order_module(source: str, *, compute_rewrite: bool = True) -> OrderingResult:
+    """Analyze and determine the correct ordering for a Python module.
+    Args:
+        source: Python source code.
+        compute_rewrite: Whether to compute the reordered source (default True).
+            Set to False when only violations are needed (e.g. flake8 plugin, check command).
+    Returns:
+        OrderingResult with violations and optionally reordered source.
+    """
+    if not source.strip():
+        return _EMPTY_RESULT
+    module = parse_source(source)
+    wrapper = cst.metadata.MetadataWrapper(module)
+    positions = wrapper.resolve(cst.metadata.PositionProvider)
+    seg = segment(wrapper.module)
+    if not seg.interstitials:
+        return _EMPTY_RESULT
+    # Extract bindings
+    statements = extract_bindings(seg.interstitials, positions)
+    # Extract references
+    has_future = detect_future_annotations(seg.preamble)
+    statements = extract_refs(statements, has_future_annotations=has_future)
+    # Attach no-binding statements
+    groups = attach_no_binding_stmts(statements)
+    # Build merged statements for graph (one per group)
+    merged: list[Statement] = []
+    for group in groups:
+        # Merge bindings and refs from all statements in the group
+        all_bindings: frozenset[str] = frozenset().union(*(s.bindings for s in group))
+        all_immediate: frozenset[str] = frozenset().union(*(s.immediate_refs for s in group))
+        all_deferred: frozenset[str] = frozenset().union(*(s.deferred_refs for s in group))
+        merged.append(
+            Statement(
+                node=group[0].node,
+                start_line=group[0].start_line,
+                end_line=group[-1].end_line,
+                bindings=all_bindings,
+                immediate_refs=all_immediate,
+                deferred_refs=all_deferred,
+                is_overload_group=group[0].is_overload_group,
+            ),
+        )
+    # Build graph and sort
+    graph = build_normalized_graph(merged)
+    num_nodes = len(merged)
+    # Detect SCCs
+    sccs = find_sccs(graph, num_nodes)
+    # For SCCs: remove internal edges and preserve original order
+    for scc in sccs:
+        scc_set = set(scc)
+        for node in scc:
+            graph[node] = {s for s in graph[node] if s not in scc_set}
+    # Topological sort
+    new_order = topological_sort(graph, num_nodes)
+    if new_order is None:
+        # Remaining cycles after SCC removal — shouldn't happen but handle gracefully
+        new_order = list(range(num_nodes))
+    # Check if order changed
+    changed = new_order != list(range(num_nodes))
+    # Generate violations and mutual recursion info
+    violations = _generate_violations(merged, new_order)
+    mutual_recursion_groups = _extract_mutual_recursion_groups(merged, sccs)
+    # Rewrite source if order changed and rewrite requested
+    reordered_source = None
+    if changed and compute_rewrite:
+        # Expand group order back to individual statement order
+        expanded_order: list[int] = []
+        offsets = []
+        offset = 0
+        for group in groups:
+            offsets.append(offset)
+            offset += len(group)
+        for group_idx in new_order:
+            group = groups[group_idx]
+            base = offsets[group_idx]
+            expanded_order.extend(base + j for j in range(len(group)))
+        all_nodes = [s.node for group in groups for s in group]
+        reordered_source = rewrite(
+            seg.module,
+            seg.preamble,
+            all_nodes,
+            seg.postamble,
+            expanded_order,
+        )
+    return OrderingResult(
+        violations=violations,
+        reordered_source=reordered_source,
+        mutual_recursion_groups=mutual_recursion_groups,
+    )
+def _generate_violations(
+    statements: list[Statement],
+    new_order: list[int],
+) -> list[Violation]:
+    """Generate TDP001 violations from ordering differences."""
+    violations: list[Violation] = []
+    # Map from original index to new position
+    new_position = {orig_idx: new_pos for new_pos, orig_idx in enumerate(new_order)}
+    for orig_idx, stmt in enumerate(statements):
+        new_pos = new_position[orig_idx]
+        if new_pos != orig_idx:
+            # Find what it should come before
+            name = next(iter(stmt.bindings), "<unnamed>")
+            # Find the first statement that this one should precede
+            for other_idx in new_order:
+                if other_idx == orig_idx:
+                    break
+                other = statements[other_idx]
+                other_name = next(iter(other.bindings), "<unnamed>")
+                if new_position[orig_idx] < new_position[other_idx]:
+                    continue
+                violations.append(
+                    Violation(
+                        code="TDP001",
+                        lineno=stmt.start_line,
+                        col_offset=0,
+                        name=name,
+                        message=f"{name} should appear after {other_name}",
+                        dependency=other_name,
+                    ),
+                )
+                break
+    return violations
+def _extract_mutual_recursion_groups(
+    statements: list[Statement],
+    sccs: list[list[int]],
+) -> list[list[str]]:
+    """Extract mutual recursion groups from SCCs as lists of function names."""
+    groups: list[list[str]] = []
+    for scc in sccs:
+        names = sorted(
+            {n for idx in scc for n in statements[idx].bindings},
+        )
+        if names:
+            groups.append(names)
+    return groups