pydry-cli 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydry/normalize.py ADDED
@@ -0,0 +1,154 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import keyword
5
+
6
+
7
+ class LocalNameNormalizer(ast.NodeTransformer):
8
+ def __init__(self, preserve_self_cls: bool = True) -> None:
9
+ self.name_map: dict[str, str] = {}
10
+ self.counter = 0
11
+ self.preserve_self_cls = preserve_self_cls
12
+
13
+ def _preserve(self, name: str) -> bool:
14
+ if keyword.iskeyword(name) or name in {"True", "False", "None"}:
15
+ return True
16
+ return self.preserve_self_cls and name in {"self", "cls"}
17
+
18
+ def _tok(self, name: str) -> str:
19
+ if name not in self.name_map:
20
+ self.name_map[name] = f"v{self.counter}"
21
+ self.counter += 1
22
+ return self.name_map[name]
23
+
24
+ def visit_Name(self, node: ast.Name) -> ast.Name:
25
+ if self._preserve(node.id):
26
+ return node
27
+ return ast.copy_location(ast.Name(id=self._tok(node.id), ctx=node.ctx), node)
28
+
29
+ def visit_arg(self, node: ast.arg) -> ast.arg:
30
+ if not self._preserve(node.arg):
31
+ node.arg = self._tok(node.arg)
32
+ return node
33
+
34
+ def visit_ExceptHandler(self, node: ast.ExceptHandler) -> ast.ExceptHandler:
35
+ self.generic_visit(node)
36
+ if node.name and not self._preserve(node.name):
37
+ node.name = self._tok(node.name)
38
+ return node
39
+
40
+ def visit_Global(self, node: ast.Global) -> ast.Global: # preserve semantics
41
+ return node
42
+
43
+ def visit_Nonlocal(self, node: ast.Nonlocal) -> ast.Nonlocal: # preserve semantics
44
+ return node
45
+
46
+
47
+ class ConstantNormalizer(ast.NodeTransformer):
48
+ def visit_Constant(self, node: ast.Constant) -> ast.Constant:
49
+ v = node.value
50
+ rep: str | bytes | bool | int | float | complex | None
51
+ if isinstance(v, str):
52
+ rep = "__str__"
53
+ elif isinstance(v, bytes):
54
+ rep = b"__bytes__"
55
+ elif isinstance(v, bool) or v is None:
56
+ rep = v
57
+ elif isinstance(v, int):
58
+ rep = 0
59
+ elif isinstance(v, float):
60
+ rep = 0.0
61
+ elif isinstance(v, complex):
62
+ rep = 0j
63
+ else:
64
+ rep = "__const__"
65
+ return ast.copy_location(ast.Constant(value=rep), node)
66
+
67
+
68
+ class FunctionNormalizer(ast.NodeTransformer):
69
+ def __init__(
70
+ self,
71
+ *,
72
+ strip_docstrings: bool = True,
73
+ strip_decorators: bool = True,
74
+ normalize_arg_names: bool = True,
75
+ strip_annotations: bool = True,
76
+ normalize_local_names: bool = False,
77
+ normalize_constants: bool = False,
78
+ preserve_function_name: bool = False,
79
+ ) -> None:
80
+ self.strip_docstrings = strip_docstrings
81
+ self.strip_decorators = strip_decorators
82
+ self.normalize_arg_names = normalize_arg_names
83
+ self.strip_annotations = strip_annotations
84
+ self.normalize_local_names = normalize_local_names
85
+ self.normalize_constants = normalize_constants
86
+ self.preserve_function_name = preserve_function_name
87
+
88
+ def visit_FunctionDef(
89
+ self, node: ast.FunctionDef
90
+ ) -> ast.FunctionDef | ast.AsyncFunctionDef:
91
+ return self._normalize(node)
92
+
93
+ def visit_AsyncFunctionDef(
94
+ self, node: ast.AsyncFunctionDef
95
+ ) -> ast.FunctionDef | ast.AsyncFunctionDef:
96
+ return self._normalize(node)
97
+
98
+ def _normalize(
99
+ self, node: ast.FunctionDef | ast.AsyncFunctionDef
100
+ ) -> ast.FunctionDef | ast.AsyncFunctionDef:
101
+ node = self.generic_visit(node) # type: ignore[assignment]
102
+
103
+ if self.strip_docstrings and node.body:
104
+ first = node.body[0]
105
+ if (
106
+ isinstance(first, ast.Expr)
107
+ and isinstance(first.value, ast.Constant)
108
+ and isinstance(first.value.value, str)
109
+ ):
110
+ node.body = node.body[1:]
111
+
112
+ if not self.preserve_function_name:
113
+ node.name = "__func__"
114
+
115
+ if self.strip_decorators:
116
+ node.decorator_list = []
117
+
118
+ if self.strip_annotations:
119
+ node.returns = None
120
+ node.type_comment = None
121
+
122
+ ordered_args = (
123
+ list(node.args.posonlyargs)
124
+ + list(node.args.args)
125
+ + list(node.args.kwonlyargs)
126
+ )
127
+ if self.normalize_arg_names:
128
+ for i, arg in enumerate(ordered_args):
129
+ arg.arg = f"arg{i}"
130
+ if node.args.vararg:
131
+ node.args.vararg.arg = "vararg"
132
+ if node.args.kwarg:
133
+ node.args.kwarg.arg = "kwarg"
134
+
135
+ if self.strip_annotations:
136
+ for arg in ordered_args:
137
+ arg.annotation = None
138
+ arg.type_comment = None
139
+ if node.args.vararg:
140
+ node.args.vararg.annotation = None
141
+ node.args.vararg.type_comment = None
142
+ if node.args.kwarg:
143
+ node.args.kwarg.annotation = None
144
+ node.args.kwarg.type_comment = None
145
+
146
+ if self.normalize_local_names:
147
+ node = LocalNameNormalizer().visit(node)
148
+ node = ast.fix_missing_locations(node)
149
+
150
+ if self.normalize_constants:
151
+ node = ConstantNormalizer().visit(node)
152
+ node = ast.fix_missing_locations(node)
153
+
154
+ return node
pydry/plugins.py ADDED
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Protocol, TypeVar
5
+
6
+
7
+ @dataclass
8
+ class PluginContext:
9
+ occurrence: Any
10
+ node: Any
11
+ features: dict[str, Any]
12
+
13
+
14
+ @dataclass
15
+ class PairContext:
16
+ a: PluginContext
17
+ b: PluginContext
18
+ evidence: Any
19
+
20
+
21
+ @dataclass
22
+ class PairPluginResult:
23
+ pattern_labels: list[str] = field(default_factory=list)
24
+ key_differences: list[str] = field(default_factory=list)
25
+ risk_flags: list[str] = field(default_factory=list)
26
+ metadata: dict[str, Any] = field(default_factory=dict)
27
+ suggested_refactor_kind: str | None = None
28
+ refactorability_delta: float = 0.0
29
+ abstract_template: str | None = None
30
+
31
+
32
+ class PairPlugin(Protocol):
33
+ name: str
34
+
35
+ def analyze_pair(self, ctx: PairContext) -> PairPluginResult: ...
36
+
37
+
38
+ class PluginRegistry:
39
+ def __init__(self) -> None:
40
+ self._pair_plugins: list[PairPlugin] = []
41
+
42
+ def register_pair(self, plugin: PairPlugin) -> None:
43
+ self._pair_plugins.append(plugin)
44
+
45
+ @property
46
+ def pair_plugins(self) -> list[PairPlugin]:
47
+ return list(self._pair_plugins)
48
+
49
+
50
+ registry = PluginRegistry()
51
+
52
+
53
+ _T = TypeVar("_T")
54
+
55
+
56
+ def register_pair_plugin(plugin: _T) -> _T:
57
+ obj: PairPlugin = plugin() if isinstance(plugin, type) else plugin # type: ignore[assignment]
58
+ registry.register_pair(obj)
59
+ return plugin
60
+
61
+
62
+ def _uniq(items: list[str]) -> list[str]:
63
+ seen = set()
64
+ out = []
65
+ for item in items:
66
+ if item not in seen:
67
+ seen.add(item)
68
+ out.append(item)
69
+ return out
70
+
71
+
72
+ def apply_pair_plugins(
73
+ ctx: PairContext, *, plugin_errors: list[str] | None = None
74
+ ) -> PairPluginResult:
75
+ merged = PairPluginResult()
76
+ pair_errors: list[str] = []
77
+ for plugin in registry.pair_plugins:
78
+ try:
79
+ result = plugin.analyze_pair(ctx)
80
+ except Exception as exc:
81
+ msg = f"{plugin.name}: {type(exc).__name__}: {exc}"
82
+ pair_errors.append(msg)
83
+ if plugin_errors is not None:
84
+ plugin_errors.append(msg)
85
+ continue
86
+ if result is None:
87
+ continue
88
+ merged.pattern_labels.extend(result.pattern_labels)
89
+ merged.key_differences.extend(result.key_differences)
90
+ merged.risk_flags.extend(result.risk_flags)
91
+ merged.metadata[plugin.name] = result.metadata
92
+ merged.refactorability_delta += result.refactorability_delta
93
+ if (
94
+ result.suggested_refactor_kind is not None
95
+ and merged.suggested_refactor_kind is None
96
+ ):
97
+ merged.suggested_refactor_kind = result.suggested_refactor_kind
98
+ if result.abstract_template is not None and merged.abstract_template is None:
99
+ merged.abstract_template = result.abstract_template
100
+ merged.pattern_labels = _uniq(merged.pattern_labels)
101
+ merged.key_differences = _uniq(merged.key_differences)
102
+ merged.risk_flags = _uniq(merged.risk_flags)
103
+ if pair_errors:
104
+ merged.metadata["_plugin_errors"] = pair_errors
105
+ return merged
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: pydry-cli
3
+ Version: 0.0.3
4
+ Summary: AST-based duplicate and structural similarity detector for Python
5
+ Author: Really Him
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/hesreallyhim/pydry
8
+ Project-URL: Repository, https://github.com/hesreallyhim/pydry
9
+ Project-URL: Issues, https://github.com/hesreallyhim/pydry/issues
10
+ Keywords: ast,cli,code-analysis,duplicates,refactoring
11
+ Classifier: Development Status :: 2 - Pre-Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Quality Assurance
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Provides-Extra: dev
24
+ Requires-Dist: build>=1.2; extra == "dev"
25
+ Requires-Dist: pytest>=8.0; extra == "dev"
26
+ Requires-Dist: pytest-cov>=6.0; extra == "dev"
27
+ Requires-Dist: ruff>=0.9; extra == "dev"
28
+ Requires-Dist: mypy>=1.14; extra == "dev"
29
+ Requires-Dist: pre-commit>=4.0; extra == "dev"
30
+ Requires-Dist: twine>=6.0; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # pydry
34
+
35
+ `pydry` is a small Python CLI for finding exact duplicate functions and structurally similar functions in Python code.
36
+
37
+ ## Features
38
+
39
+ - Finds exact duplicate functions using AST normalization.
40
+ - Ranks near matches by structural similarity and refactorability.
41
+ - Flags likely abstraction candidates and common risk signals.
42
+ - Emits text output for quick inspection and JSON output for automation.
43
+ - Runs without third-party runtime dependencies.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ python -m pip install pydry-cli
49
+ ```
50
+
51
+ For local development from a checkout:
52
+
53
+ ```bash
54
+ make venv
55
+ source venv/bin/activate
56
+ make install
57
+ make check
58
+ ```
59
+
60
+ ## Quick start
61
+
62
+ Run a compact summary for the current directory:
63
+
64
+ ```bash
65
+ pydry showcase
66
+ ```
67
+
68
+ Find exact duplicates:
69
+
70
+ ```bash
71
+ pydry exact ./src --normalize-local-names --normalize-constants
72
+ ```
73
+
74
+ Find near matches:
75
+
76
+ ```bash
77
+ pydry near ./src --threshold 0.85 --top-k 25
78
+ ```
79
+
80
+ Write a full JSON report:
81
+
82
+ ```bash
83
+ pydry report ./src --output reports/pydry-report.json
84
+ ```
85
+
86
+ ## Commands
87
+
88
+ ### `pydry exact`
89
+
90
+ Find exact duplicate functions after AST normalization.
91
+
92
+ ```bash
93
+ pydry exact ./src
94
+ pydry exact ./src --min-count 3
95
+ pydry exact ./src --normalize-local-names --normalize-constants
96
+ pydry exact ./src --format json
97
+ ```
98
+
99
+ Useful options:
100
+
101
+ - `--min-count`: minimum group size, default `2`.
102
+ - `--top-level-only`: ignore nested functions and methods.
103
+ - `--normalize-local-names`: treat local variable renames as equivalent.
104
+ - `--normalize-constants`: treat many literal value changes as equivalent.
105
+ - `--include-canonical`: include canonical AST dumps in JSON output.
106
+ - `--strict`: fail on files that cannot be read or parsed.
107
+
108
+ ### `pydry near`
109
+
110
+ Rank structurally similar function pairs.
111
+
112
+ ```bash
113
+ pydry near ./src
114
+ pydry near ./src --threshold 0.85 --top-k 25
115
+ pydry near ./src --format json --output reports/near.json
116
+ ```
117
+
118
+ Useful options:
119
+
120
+ - `--threshold`: similarity threshold from `0` to `1`, default `0.8`.
121
+ - `--top-k`: cap the number of returned pairs.
122
+ - `--top-level-only`: ignore nested functions and methods.
123
+ - `--strict`: fail on files that cannot be read or parsed.
124
+
125
+ ### `pydry abstract`
126
+
127
+ Filter near matches to pairs that look like plausible refactor candidates.
128
+
129
+ ```bash
130
+ pydry abstract ./src
131
+ pydry abstract ./src --threshold 0.86 --format json
132
+ ```
133
+
134
+ ### `pydry report`
135
+
136
+ Generate one JSON document with exact, near, and abstract sections.
137
+
138
+ ```bash
139
+ pydry report ./src
140
+ pydry report ./src --threshold 0.82 --top-k 250 --output reports/pydry-report.json
141
+ ```
142
+
143
+ ### `pydry showcase` and `pydry simulate`
144
+
145
+ Run a compact terminal summary. Both commands use the same analysis pipeline. With no path, they scan the current directory.
146
+
147
+ ```bash
148
+ pydry showcase
149
+ pydry showcase ./src --top-k 10 --threshold 0.8
150
+ pydry showcase ./src --format json
151
+ pydry simulate ./src
152
+ ```
153
+
154
+ ## JSON output
155
+
156
+ JSON-capable commands return an envelope:
157
+
158
+ ```json
159
+ {
160
+ "results": [],
161
+ "diagnostics": {
162
+ "scan_errors_count": 0,
163
+ "scan_error_samples": [],
164
+ "plugin_errors_count": 0,
165
+ "plugin_error_samples": []
166
+ }
167
+ }
168
+ ```
169
+
170
+ Near and abstract entries include:
171
+
172
+ - `similarity_score`
173
+ - `refactorability_score`
174
+ - `pattern_labels`
175
+ - `shared_structure_summary`
176
+ - `key_differences`
177
+ - `risk_flags`
178
+ - `suggested_refactor_kind`
179
+ - `evidence`
180
+
181
+ ## Python API
182
+
183
+ The CLI is the primary interface, but the core functions are importable:
184
+
185
+ ```python
186
+ from pathlib import Path
187
+
188
+ from pydry.engine import exact_groups, near_matches
189
+
190
+ groups = exact_groups(
191
+ Path("src"),
192
+ normalize_local_names=True,
193
+ normalize_constants=True,
194
+ )
195
+ rows = near_matches(Path("src"), threshold=0.85, top_k=25)
196
+ ```
197
+
198
+ ## Limitations
199
+
200
+ - Similarity is heuristic. It does not prove semantic equivalence.
201
+ - Cross-file import resolution is not attempted.
202
+ - Generated abstraction templates are suggestions, not executable patches.
203
+
204
+ ## Development
205
+
206
+ ```bash
207
+ make check
208
+ make coverage
209
+ make check-dist
210
+ ```
211
+
212
+ The package supports Python 3.11 and newer.
213
+
214
+ ## License
215
+
216
+ `pydry` is released under the MIT License. See [LICENSE](LICENSE).
@@ -0,0 +1,15 @@
1
+ pydry/__init__.py,sha256=PTpHrEPXX0XD3mrDKdheFqUx5YwwouTDlmzQldP7Jzk,80
2
+ pydry/__main__.py,sha256=fOlwIdXXr9knJszM70QIFZbOxw41ZpVFpvCdGZ1VsxA,84
3
+ pydry/analyze.py,sha256=_j0rvUHflZwzmagbPLN39ScauKG3hfQ0uzPXHQuW4w4,9114
4
+ pydry/builtin_plugins.py,sha256=U0MivLXCmcyoMKB18IyNBndB-QWpuAYF_Kt2qhUT6Qs,7071
5
+ pydry/cli.py,sha256=4geaO-fRMEdqYD_ykOztc5lHChInHPeZRDznJDFbL1M,22280
6
+ pydry/engine.py,sha256=itI_0vmRuV1pwuf_Q8YPG6G0z9dKpbwhoh9R5cYZjuc,17112
7
+ pydry/models.py,sha256=rs6gpsQkJiIMc5qxldPwKtm9BRxA2A6I23zT9TbCvII,1089
8
+ pydry/normalize.py,sha256=_HxA9QcEGjhhCUdJB3LjTJT-MTqOi0zeOc4kB9ICw9Q,5192
9
+ pydry/plugins.py,sha256=0kVE-4-dUE_X3UGPP99SuIKJZGIywMHoX2NnDudAnoM,3055
10
+ pydry_cli-0.0.3.dist-info/licenses/LICENSE,sha256=4A4mEaxxz9M3y3bLvO5ZpNZTTaKrGrE0bBI7Sa1JX3A,1068
11
+ pydry_cli-0.0.3.dist-info/METADATA,sha256=h3WKDx1rOEHp-x5KPHYgpI3zY_MPpbuVKrf22GvSq74,5244
12
+ pydry_cli-0.0.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ pydry_cli-0.0.3.dist-info/entry_points.txt,sha256=I0YdAv3mqkz-xEf-1YvyWt2v9w2OG2_XNEUcdcFbY0M,41
14
+ pydry_cli-0.0.3.dist-info/top_level.txt,sha256=9C__-pSPfs_qwX4JUoB8I5Gv28FLqc2tpRaVFuXE4vo,6
15
+ pydry_cli-0.0.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pydry = pydry.cli:main
@@ -0,0 +1,22 @@
1
+
2
+ MIT License
3
+
4
+ Copyright (c) 2026 Really Him
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ pydry