PyPI - pydry-cli - Versions diffs - 0.0.3__py3-none-any.whl - Mend

pydry-cli 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pydry/__init__.py +5 -0
pydry/__main__.py +5 -0
pydry/analyze.py +319 -0
pydry/builtin_plugins.py +206 -0
pydry/cli.py +646 -0
pydry/engine.py +518 -0
pydry/models.py +51 -0
pydry/normalize.py +154 -0
pydry/plugins.py +105 -0
pydry_cli-0.0.3.dist-info/METADATA +216 -0
pydry_cli-0.0.3.dist-info/RECORD +15 -0
pydry_cli-0.0.3.dist-info/WHEEL +5 -0
pydry_cli-0.0.3.dist-info/entry_points.txt +2 -0
pydry_cli-0.0.3.dist-info/licenses/LICENSE +22 -0
pydry_cli-0.0.3.dist-info/top_level.txt +1 -0

pydry/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .cli import main
+__version__ = "0.0.3"
+__all__ = ["__version__", "main"]

pydry/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from __future__ import annotations
+from .cli import main
+raise SystemExit(main())

pydry/analyze.py ADDED Viewed

@@ -0,0 +1,319 @@
+from __future__ import annotations
+import ast
+import os
+from collections import Counter
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from .models import FunctionOccurrence
+from .normalize import FunctionNormalizer
+if TYPE_CHECKING:
+    from collections.abc import Generator, Iterable
+_FuncNode = ast.FunctionDef | ast.AsyncFunctionDef
+SIDE_EFFECT_CALLS = {
+    "print",
+    "open",
+    "write",
+    "send",
+    "post",
+    "put",
+    "delete",
+    "remove",
+    "unlink",
+    "save",
+    "commit",
+}
+CONTROL_FLOW_NODES = (
+    ast.If,
+    ast.For,
+    ast.AsyncFor,
+    ast.While,
+    ast.Try,
+    ast.With,
+    ast.AsyncWith,
+    ast.Match,
+)
+STMT_TYPES = (
+    ast.Assign,
+    ast.AnnAssign,
+    ast.AugAssign,
+    ast.Return,
+    ast.Expr,
+    ast.If,
+    ast.For,
+    ast.AsyncFor,
+    ast.While,
+    ast.Try,
+    ast.With,
+    ast.AsyncWith,
+    ast.Raise,
+    ast.Assert,
+    ast.Pass,
+    ast.Break,
+    ast.Continue,
+    ast.Import,
+    ast.ImportFrom,
+    ast.Delete,
+    ast.Match,
+    ast.Yield,
+    ast.YieldFrom,
+)
+DEFAULT_EXCLUDED_DIRS = {
+    "__pycache__",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".pytest_cache",
+    ".ruff_cache",
+    ".tox",
+    ".nox",
+    ".venv",
+    "venv",
+    "site-packages",
+    "build",
+    "dist",
+    ".eggs",
+}
+def iter_python_files(root: Path) -> Iterable[Path]:
+    for dirpath, dirnames, filenames in os.walk(root, topdown=True):
+        dirnames[:] = sorted(d for d in dirnames if d not in DEFAULT_EXCLUDED_DIRS)
+        for filename in sorted(filenames):
+            if filename.endswith(".py"):
+                path = Path(dirpath, filename)
+                if path.is_file():
+                    yield path
+def build_qualname(parents: list[str], name: str) -> str:
+    return ".".join([*parents, name]) if parents else name
+def iter_functions(
+    module: ast.Module, top_level_only: bool = False
+) -> Generator[tuple[_FuncNode, list[str], bool]]:
+    if top_level_only:
+        for node in module.body:
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                yield node, [], False
+        return
+    def walk(
+        nodes: list[ast.stmt], parents: list[str], container_kind: str
+    ) -> Generator[tuple[_FuncNode, list[str], bool]]:
+        for node in nodes:
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                is_class_method = container_kind == "class"
+                yield node, parents, is_class_method
+                yield from walk(node.body, [*parents, node.name], "function")
+            elif isinstance(node, ast.ClassDef):
+                yield from walk(node.body, [*parents, node.name], "class")
+    yield from walk(module.body, [], "module")
+def param_count(fn: ast.FunctionDef | ast.AsyncFunctionDef) -> int:
+    return (
+        len(fn.args.posonlyargs)
+        + len(fn.args.args)
+        + len(fn.args.kwonlyargs)
+        + int(fn.args.vararg is not None)
+        + int(fn.args.kwarg is not None)
+    )
+def is_method(parents: list[str]) -> bool:
+    return bool(parents)
+def canonicalize(fn: _FuncNode, **opts: Any) -> str:
+    cloned = ast.fix_missing_locations(ast.parse(ast.unparse(fn)).body[0])
+    norm = FunctionNormalizer(**opts)
+    cloned = ast.fix_missing_locations(norm.visit(cloned))
+    return ast.dump(cloned, annotate_fields=True, include_attributes=False)
+def _call_name(node: ast.Call) -> str:
+    f = node.func
+    if isinstance(f, ast.Name):
+        return f.id
+    if isinstance(f, ast.Attribute):
+        parts = []
+        cur: ast.expr = f
+        while isinstance(cur, ast.Attribute):
+            parts.append(cur.attr)
+            cur = cur.value
+        if isinstance(cur, ast.Name):
+            parts.append(cur.id)
+        return ".".join(reversed(parts))
+    return "<dynamic>"
+def _literal_token(value: object) -> str:
+    if isinstance(value, str):
+        return f"str:{value}"
+    if isinstance(value, bytes):
+        return f"bytes:{value!r}"
+    if value is None:
+        return "none"
+    if isinstance(value, bool):
+        return f"bool:{value}"
+    if isinstance(value, int):
+        return f"int:{value}"
+    if isinstance(value, float):
+        return f"float:{value!r}"
+    if isinstance(value, complex):
+        return f"complex:{value!r}"
+    return f"type:{type(value).__name__}"
+def _stmt_sequence(fn: _FuncNode) -> list[str]:
+    seq = []
+    for n in ast.walk(fn):
+        if isinstance(n, STMT_TYPES):
+            seq.append(type(n).__name__)
+    return seq
+def _counter_jaccard(a: Counter[str], b: Counter[str]) -> float:
+    keys = set(a) | set(b)
+    if not keys:
+        return 1.0
+    inter = sum(min(a[k], b[k]) for k in keys)
+    union = sum(max(a[k], b[k]) for k in keys)
+    return inter / union if union else 1.0
+def _lcs_ratio(a: list[str], b: list[str]) -> float:
+    if not a and not b:
+        return 1.0
+    if not a or not b:
+        return 0.0
+    longer = a
+    shorter = b
+    if len(shorter) > len(longer):
+        longer, shorter = shorter, longer
+    prev = [0] * (len(shorter) + 1)
+    for token in longer:
+        current = [0] * (len(shorter) + 1)
+        for j, short_token in enumerate(shorter, start=1):
+            if token == short_token:
+                current[j] = prev[j - 1] + 1
+            else:
+                current[j] = max(prev[j], current[j - 1])
+        prev = current
+    lcs = prev[-1]
+    return (2 * lcs) / (len(a) + len(b))
+def extract_features(fn: _FuncNode) -> dict[str, Any]:
+    node_types = Counter(type(n).__name__ for n in ast.walk(fn))
+    stmt_seq = _stmt_sequence(fn)
+    call_names = Counter(_call_name(n) for n in ast.walk(fn) if isinstance(n, ast.Call))
+    literal_tokens = Counter(
+        _literal_token(n.value) for n in ast.walk(fn) if isinstance(n, ast.Constant)
+    )
+    external_names = Counter(
+        n.id
+        for n in ast.walk(fn)
+        if isinstance(n, ast.Name) and isinstance(n.ctx, ast.Load)
+    )
+    has_yield = any(isinstance(n, (ast.Yield, ast.YieldFrom)) for n in ast.walk(fn))
+    has_await = any(isinstance(n, ast.Await) for n in ast.walk(fn))
+    control_count = sum(1 for n in ast.walk(fn) if isinstance(n, CONTROL_FLOW_NODES))
+    returns = sum(1 for n in ast.walk(fn) if isinstance(n, ast.Return))
+    raises = sum(1 for n in ast.walk(fn) if isinstance(n, ast.Raise))
+    literals = sum(literal_tokens.values())
+    side_effect_calls = sorted(
+        {name for name in call_names if name.split(".")[-1] in SIDE_EFFECT_CALLS}
+    )
+    is_wrapper = False
+    wrapper_target = None
+    fixed_args = 0
+    passthrough_args = 0
+    body = getattr(fn, "body", [])
+    if len(body) == 1:
+        stmt = body[0]
+        call = None
+        if (isinstance(stmt, ast.Return) and isinstance(stmt.value, ast.Call)) or (
+            isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call)
+        ):
+            call = stmt.value
+        if call is not None:
+            is_wrapper = True
+            wrapper_target = _call_name(call)
+            arg_names = {
+                a.arg
+                for a in list(fn.args.posonlyargs)
+                + list(fn.args.args)
+                + list(fn.args.kwonlyargs)
+            }
+            for arg in call.args:
+                if isinstance(arg, ast.Name) and arg.id in arg_names:
+                    passthrough_args += 1
+                else:
+                    fixed_args += 1
+    returns_lambda = False
+    curry_depth = 0
+    for stmt in body:
+        candidate = stmt.value if isinstance(stmt, ast.Return) else None
+        while isinstance(candidate, ast.Lambda):
+            returns_lambda = True
+            curry_depth += 1
+            candidate = candidate.body
+    return {
+        "node_types": node_types,
+        "stmt_seq": stmt_seq,
+        "call_names": call_names,
+        "external_names": external_names,
+        "param_count": param_count(fn),
+        "has_yield": has_yield,
+        "has_await": has_await,
+        "control_count": control_count,
+        "returns": returns,
+        "raises": raises,
+        "literals": literals,
+        "literal_tokens": literal_tokens,
+        "side_effect_calls": side_effect_calls,
+        "is_wrapper": is_wrapper,
+        "wrapper_target": wrapper_target,
+        "fixed_args": fixed_args,
+        "passthrough_args": passthrough_args,
+        "returns_lambda": returns_lambda,
+        "curry_depth": curry_depth,
+        "stmt_count": len(stmt_seq),
+    }
+def occurrence_for(
+    path: Path,
+    fn: _FuncNode,
+    parents: list[str],
+    *,
+    is_method_flag: bool | None = None,
+) -> FunctionOccurrence:
+    resolved_is_method = (
+        is_method(parents) if is_method_flag is None else is_method_flag
+    )
+    return FunctionOccurrence(
+        path=str(path),
+        lineno=getattr(fn, "lineno", 0),
+        end_lineno=getattr(fn, "end_lineno", None),
+        col_offset=getattr(fn, "col_offset", 0),
+        name=fn.name,
+        qualname=build_qualname(parents, fn.name),
+        kind="async def" if isinstance(fn, ast.AsyncFunctionDef) else "def",
+        param_count=param_count(fn),
+        is_method=resolved_is_method,
+    )

pydry/builtin_plugins.py ADDED Viewed

@@ -0,0 +1,206 @@
+from __future__ import annotations
+from typing import Any
+from .plugins import PairContext, PairPluginResult, register_pair_plugin
+def _literal_token_diff(a: dict[str, Any], b: dict[str, Any]) -> int:
+    a_tokens: dict[str, int] = a.get("literal_tokens", {})
+    b_tokens: dict[str, int] = b.get("literal_tokens", {})
+    keys = set(a_tokens) | set(b_tokens)
+    return sum(abs(a_tokens.get(k, 0) - b_tokens.get(k, 0)) for k in keys)
+@register_pair_plugin
+class WrapperPlugin:
+    name = "wrapper"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        a = ctx.a.features
+        b = ctx.b.features
+        evidence = ctx.evidence
+        if evidence.wrapper_score < 0.5:
+            return PairPluginResult()
+        metadata = {
+            "a_wrapper_target": a.get("wrapper_target"),
+            "b_wrapper_target": b.get("wrapper_target"),
+            "a_fixed_args": a.get("fixed_args", 0),
+            "b_fixed_args": b.get("fixed_args", 0),
+        }
+        return PairPluginResult(
+            pattern_labels=["wrapper"],
+            key_differences=(
+                ["wrapper targets differ"]
+                if a.get("wrapper_target") != b.get("wrapper_target")
+                else []
+            ),
+            suggested_refactor_kind="merge_into_single_function_with_param",
+            refactorability_delta=0.05,
+            metadata=metadata,
+        )
+@register_pair_plugin
+class CurryingPlugin:
+    name = "currying"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        a = ctx.a.features
+        b = ctx.b.features
+        evidence = ctx.evidence
+        if evidence.curry_score < 0.4:
+            return PairPluginResult()
+        return PairPluginResult(
+            pattern_labels=["partial_application"],
+            suggested_refactor_kind="introduce_partial",
+            refactorability_delta=0.04,
+            metadata={
+                "a_returns_lambda": a.get("returns_lambda"),
+                "b_returns_lambda": b.get("returns_lambda"),
+                "a_curry_depth": a.get("curry_depth"),
+                "b_curry_depth": b.get("curry_depth"),
+            },
+        )
+@register_pair_plugin
+class SideEffectRiskPlugin:
+    name = "side_effects"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        a = ctx.a.features
+        b = ctx.b.features
+        calls = sorted(
+            set(a.get("side_effect_calls", [])) | set(b.get("side_effect_calls", []))
+        )
+        if not calls:
+            return PairPluginResult()
+        return PairPluginResult(
+            risk_flags=["possible_side_effects"],
+            refactorability_delta=-0.05,
+            metadata={"calls": calls},
+        )
+@register_pair_plugin
+class AsyncBoundaryPlugin:
+    name = "async_boundary"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        a = ctx.a.features
+        b = ctx.b.features
+        flags = []
+        diffs = []
+        delta = 0.0
+        if a.get("has_await") != b.get("has_await"):
+            flags.append("async_boundary_diff")
+            diffs.append("async behavior differs")
+            delta -= 0.08
+        if a.get("has_yield") != b.get("has_yield"):
+            flags.append("return_shape_diff")
+            diffs.append("generator behavior differs")
+            delta -= 0.08
+        if a.get("raises") != b.get("raises"):
+            flags.append("exception_behavior_diff")
+            diffs.append("exception behavior differs")
+            delta -= 0.05
+        if not flags and not diffs:
+            return PairPluginResult()
+        return PairPluginResult(
+            risk_flags=flags,
+            key_differences=diffs,
+            refactorability_delta=delta,
+            metadata={
+                "a_has_await": a.get("has_await"),
+                "b_has_await": b.get("has_await"),
+                "a_has_yield": a.get("has_yield"),
+                "b_has_yield": b.get("has_yield"),
+                "a_raises": a.get("raises"),
+                "b_raises": b.get("raises"),
+            },
+        )
+@register_pair_plugin
+class LiteralSpecializationPlugin:
+    name = "literal_specialization"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        a = ctx.a.features
+        b = ctx.b.features
+        e = ctx.evidence
+        literal_token_diff = _literal_token_diff(a, b)
+        if (
+            literal_token_diff > 0
+            and abs(a.get("literals", 0) - b.get("literals", 0)) <= 2
+            and e.shape_similarity >= 0.85
+            and e.call_similarity >= 0.6
+        ):
+            return PairPluginResult(
+                pattern_labels=["literal_specialization"],
+                suggested_refactor_kind="parameterize_constant",
+                refactorability_delta=0.03,
+                abstract_template=(
+                    "def shared_helper(..., configurable_value):\n"
+                    "    # parameterize constant-like variation\n"
+                    "    ..."
+                ),
+                metadata={
+                    "a_literals": a.get("literals"),
+                    "b_literals": b.get("literals"),
+                    "literal_token_diff": literal_token_diff,
+                },
+            )
+        return PairPluginResult()
+@register_pair_plugin
+class ExtractHelperPlugin:
+    name = "extract_helper"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        e = ctx.evidence
+        if e.shape_similarity >= 0.8 and e.stmt_similarity >= 0.8:
+            return PairPluginResult(
+                pattern_labels=["extract_helper_candidate"],
+                suggested_refactor_kind="extract_common_helper",
+                refactorability_delta=0.05,
+                abstract_template=(
+                    "def shared_helper(...):\n"
+                    "    # candidate abstraction for "
+                    f"{ctx.a.occurrence.qualname}"
+                    f" and {ctx.b.occurrence.qualname}\n"
+                    "    ..."
+                ),
+                metadata={
+                    "shape_similarity": e.shape_similarity,
+                    "stmt_similarity": e.stmt_similarity,
+                },
+            )
+        return PairPluginResult()
+@register_pair_plugin
+class DependencyDivergencePlugin:
+    name = "dependency_divergence"
+    def analyze_pair(self, ctx: PairContext) -> PairPluginResult:
+        a = ctx.a.features
+        b = ctx.b.features
+        e = ctx.evidence
+        ext_diff = len(
+            set(a.get("external_names", {})) ^ set(b.get("external_names", {}))
+        )
+        if ext_diff >= 6:
+            return PairPluginResult(
+                risk_flags=["ambient_dependency_diff"],
+                pattern_labels=(
+                    ["same_shape_different_dependencies"]
+                    if e.signature_similarity >= 0.8 and e.call_similarity < 0.5
+                    else []
+                ),
+                refactorability_delta=-0.06,
+                metadata={"external_name_symmetric_difference": ext_diff},
+            )
+        return PairPluginResult()