codeclone 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeclone/baseline.py +13 -9
- codeclone/cache.py +12 -6
- codeclone/cfg.py +166 -1
- codeclone/cli.py +305 -75
- codeclone/html_report.py +569 -586
- codeclone/normalize.py +29 -6
- codeclone/report.py +6 -6
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/METADATA +15 -5
- codeclone-1.2.0.dist-info/RECORD +19 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/WHEEL +1 -1
- codeclone-1.1.0.dist-info/RECORD +0 -19
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/entry_points.txt +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.0.dist-info}/top_level.txt +0 -0
codeclone/normalize.py
CHANGED
|
@@ -9,6 +9,8 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import ast
|
|
12
|
+
import copy
|
|
13
|
+
from ast import AST
|
|
12
14
|
from collections.abc import Sequence
|
|
13
15
|
from dataclasses import dataclass
|
|
14
16
|
|
|
@@ -83,12 +85,33 @@ class AstNormalizer(ast.NodeTransformer):
|
|
|
83
85
|
node.value = "_CONST_"
|
|
84
86
|
return node
|
|
85
87
|
|
|
88
|
+
def visit_AugAssign(self, node: ast.AugAssign) -> AST:
|
|
89
|
+
# Normalize x += 1 to x = x + 1
|
|
90
|
+
# This allows detecting clones where one uses += and another uses = +
|
|
91
|
+
# We transform AugAssign(target, op, value) to Assign([target], BinOp(target, op, value))
|
|
92
|
+
|
|
93
|
+
# Deepcopy target to avoid reuse issues in the AST
|
|
94
|
+
target_load = copy.deepcopy(node.target)
|
|
95
|
+
# Ensure context is Load() for the right-hand side usage
|
|
96
|
+
if hasattr(target_load, "ctx"):
|
|
97
|
+
target_load.ctx = ast.Load()
|
|
98
|
+
|
|
99
|
+
new_node = ast.Assign(
|
|
100
|
+
targets=[node.target],
|
|
101
|
+
value=ast.BinOp(left=target_load, op=node.op, right=node.value),
|
|
102
|
+
lineno=node.lineno,
|
|
103
|
+
col_offset=node.col_offset,
|
|
104
|
+
end_lineno=getattr(node, "end_lineno", None),
|
|
105
|
+
end_col_offset=getattr(node, "end_col_offset", None),
|
|
106
|
+
)
|
|
107
|
+
return self.generic_visit(new_node)
|
|
108
|
+
|
|
86
109
|
|
|
87
110
|
def normalized_ast_dump(func_node: ast.AST, cfg: NormalizationConfig) -> str:
|
|
88
111
|
normalizer = AstNormalizer(cfg)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
)
|
|
112
|
+
# Deepcopy to prevent side effects on the original AST
|
|
113
|
+
node_copy = copy.deepcopy(func_node)
|
|
114
|
+
new_node = ast.fix_missing_locations(normalizer.visit(node_copy))
|
|
92
115
|
return ast.dump(new_node, annotate_fields=True, include_attributes=False)
|
|
93
116
|
|
|
94
117
|
|
|
@@ -99,9 +122,9 @@ def normalized_ast_dump_from_list(
|
|
|
99
122
|
dumps: list[str] = []
|
|
100
123
|
|
|
101
124
|
for node in nodes:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
)
|
|
125
|
+
# Deepcopy to prevent side effects
|
|
126
|
+
node_copy = copy.deepcopy(node)
|
|
127
|
+
new_node = ast.fix_missing_locations(normalizer.visit(node_copy))
|
|
105
128
|
dumps.append(ast.dump(new_node, annotate_fields=True, include_attributes=False))
|
|
106
129
|
|
|
107
130
|
return ";".join(dumps)
|
codeclone/report.py
CHANGED
|
@@ -12,8 +12,8 @@ import json
|
|
|
12
12
|
from typing import Any
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict]]:
|
|
16
|
-
groups: dict[str, list[dict]] = {}
|
|
15
|
+
def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
|
|
16
|
+
groups: dict[str, list[dict[str, Any]]] = {}
|
|
17
17
|
for u in units:
|
|
18
18
|
key = f"{u['fingerprint']}|{u['loc_bucket']}"
|
|
19
19
|
groups.setdefault(key, []).append(u)
|
|
@@ -21,13 +21,13 @@ def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict]]:
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def build_block_groups(
|
|
24
|
-
blocks: list[dict], min_functions: int = 2
|
|
25
|
-
) -> dict[str, list[dict]]:
|
|
26
|
-
groups: dict[str, list[dict]] = {}
|
|
24
|
+
blocks: list[dict[str, Any]], min_functions: int = 2
|
|
25
|
+
) -> dict[str, list[dict[str, Any]]]:
|
|
26
|
+
groups: dict[str, list[dict[str, Any]]] = {}
|
|
27
27
|
for b in blocks:
|
|
28
28
|
groups.setdefault(b["block_hash"], []).append(b)
|
|
29
29
|
|
|
30
|
-
filtered: dict[str, list[dict]] = {}
|
|
30
|
+
filtered: dict[str, list[dict[str, Any]]] = {}
|
|
31
31
|
for h, items in groups.items():
|
|
32
32
|
functions = {i["qualname"] for i in items}
|
|
33
33
|
if len(functions) >= min_functions:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -23,11 +23,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.11
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.12
|
|
25
25
|
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
26
27
|
Classifier: Operating System :: OS Independent
|
|
27
28
|
Requires-Python: >=3.10
|
|
28
29
|
Description-Content-Type: text/markdown
|
|
29
30
|
License-File: LICENSE
|
|
30
31
|
Requires-Dist: pygments>=2.19.2
|
|
32
|
+
Requires-Dist: rich>=14.3.2
|
|
31
33
|
Provides-Extra: dev
|
|
32
34
|
Requires-Dist: pytest>=9.0.0; extra == "dev"
|
|
33
35
|
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
@@ -102,7 +104,12 @@ Typical use cases:
|
|
|
102
104
|
|
|
103
105
|
- Each function is converted into a **Control Flow Graph**.
|
|
104
106
|
- CFG nodes contain normalized AST statements.
|
|
105
|
-
- CFG edges represent structural control flow
|
|
107
|
+
- CFG edges represent structural control flow:
|
|
108
|
+
- `if` / `else`
|
|
109
|
+
- `for` / `async for` / `while`
|
|
110
|
+
- `try` / `except` / `finally`
|
|
111
|
+
- `with` / `async with`
|
|
112
|
+
- `match` / `case` (Python 3.10+)
|
|
106
113
|
- Current CFG semantics (v1):
|
|
107
114
|
- `break` and `continue` are treated as statements (no jump targets),
|
|
108
115
|
- after-blocks are explicit and always present,
|
|
@@ -154,14 +161,14 @@ Generate reports:
|
|
|
154
161
|
|
|
155
162
|
```bash
|
|
156
163
|
codeclone . \
|
|
157
|
-
--json
|
|
158
|
-
--text
|
|
164
|
+
--json .cache/codeclone/report.json \
|
|
165
|
+
--text .cache/codeclone/report.txt
|
|
159
166
|
```
|
|
160
167
|
|
|
161
168
|
Generate an HTML report:
|
|
162
169
|
|
|
163
170
|
```bash
|
|
164
|
-
codeclone . --html
|
|
171
|
+
codeclone . --html .cache/codeclone/report.html
|
|
165
172
|
```
|
|
166
173
|
|
|
167
174
|
---
|
|
@@ -235,6 +242,9 @@ repos:
|
|
|
235
242
|
5. Detect function-level and block-level clones.
|
|
236
243
|
6. Apply conservative filters to suppress noise.
|
|
237
244
|
|
|
245
|
+
See the architectural overview:
|
|
246
|
+
- [docs/architecture.md](docs/architecture.md)
|
|
247
|
+
|
|
238
248
|
---
|
|
239
249
|
|
|
240
250
|
## Control Flow Graph (CFG)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
codeclone/__init__.py,sha256=_MZuqgioYGn49qw6_OluedgcRvuu8IhAwVgHkM4Kj7s,364
|
|
2
|
+
codeclone/baseline.py,sha256=tV-vgCpyQi-g1AlWJwuUFDeQwUZuhj4tX-BxFDv-LWo,1719
|
|
3
|
+
codeclone/blockhash.py,sha256=jOswW9jqe9ww3y4r-gLTUZjMmn0CHXpU5qTtFndKQ10,594
|
|
4
|
+
codeclone/blocks.py,sha256=Y75BSpzf-zyyeD9-vKnQfJ3QwIjYxMwiN9DbEqnbONg,1735
|
|
5
|
+
codeclone/cache.py,sha256=DexslhZfxj79fnoPna8r_oBCqmsstN2ICRA_o6ZVpGo,1559
|
|
6
|
+
codeclone/cfg.py,sha256=op503zRnew2ZIz5coK_HUaKt1VG1ucJBacXAZ1rBJYQ,11587
|
|
7
|
+
codeclone/cli.py,sha256=c_61l3wSF2nbTZgd6LmvoUvejXatydPAPmJyRknHx78,13229
|
|
8
|
+
codeclone/extractor.py,sha256=crCxgkK1n2fl5FUz6HtbaEZUH5CO8S0zqM0Xj0RSE6E,4558
|
|
9
|
+
codeclone/fingerprint.py,sha256=H0YY209sfGf02VeLlxHNDp7n6es0vLiMmq3TBWDm3SE,545
|
|
10
|
+
codeclone/html_report.py,sha256=e7gYxEHk5ezJtGUYIYsQlcxu_0fP3hmd9jj-zMWMfJY,25574
|
|
11
|
+
codeclone/normalize.py,sha256=bvMoY3VDiZsnFiD50h5XUgwnMUKOyUEx6lJXCBiembg,4290
|
|
12
|
+
codeclone/report.py,sha256=IwblTgZe4liTq3gHagnw6O4ZUkRqJ448XqzhLhJZoWM,1972
|
|
13
|
+
codeclone/scanner.py,sha256=BdJFyaLv1xvimVkJyvvTN0FcG2RQZzRlTlHWi2fRQnU,1050
|
|
14
|
+
codeclone-1.2.0.dist-info/licenses/LICENSE,sha256=ndXAbunvN-jCQjgCaoBOF5AN4FcRlAa0R7hK1lWDuBU,1073
|
|
15
|
+
codeclone-1.2.0.dist-info/METADATA,sha256=RekVw29wHrLX0fHNLBYS9Ky6Ee6oFnntQxeCOcRbMTM,7255
|
|
16
|
+
codeclone-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
17
|
+
codeclone-1.2.0.dist-info/entry_points.txt,sha256=_MI9DVTLOmv3OlxpyogdOmMAduiLVIdHeOlZ_KBsrIY,49
|
|
18
|
+
codeclone-1.2.0.dist-info/top_level.txt,sha256=4tQa_d-4Zle-qV9KmNDkWq0WHYgZsW9vdaeF30rNntg,10
|
|
19
|
+
codeclone-1.2.0.dist-info/RECORD,,
|
codeclone-1.1.0.dist-info/RECORD
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
codeclone/__init__.py,sha256=_MZuqgioYGn49qw6_OluedgcRvuu8IhAwVgHkM4Kj7s,364
|
|
2
|
-
codeclone/baseline.py,sha256=VdIh5AeWeAod6MZ1EV2UWMlvlhwHwlbyUJM_RXbhf24,1545
|
|
3
|
-
codeclone/blockhash.py,sha256=jOswW9jqe9ww3y4r-gLTUZjMmn0CHXpU5qTtFndKQ10,594
|
|
4
|
-
codeclone/blocks.py,sha256=Y75BSpzf-zyyeD9-vKnQfJ3QwIjYxMwiN9DbEqnbONg,1735
|
|
5
|
-
codeclone/cache.py,sha256=PZ9XgHc3A-JSGXHYQHJK_Ldu24EIaGL7Spi2cTpbEUo,1334
|
|
6
|
-
codeclone/cfg.py,sha256=e4zCfHeqWRvxIN1E4D-t_717ZftQY7XlwgYBfFaa9oU,4794
|
|
7
|
-
codeclone/cli.py,sha256=Zp6e7PC-9WJIBkqX88V-4L7wryi1c2qPB87Pbpo4BfE,5177
|
|
8
|
-
codeclone/extractor.py,sha256=crCxgkK1n2fl5FUz6HtbaEZUH5CO8S0zqM0Xj0RSE6E,4558
|
|
9
|
-
codeclone/fingerprint.py,sha256=H0YY209sfGf02VeLlxHNDp7n6es0vLiMmq3TBWDm3SE,545
|
|
10
|
-
codeclone/html_report.py,sha256=Es4lYOMm24O2UTsaF59qjvCxLA2nwojwLX-zOpvluro,24337
|
|
11
|
-
codeclone/normalize.py,sha256=AzKE-1kOrOV3LVK8x4gGytulxXVYBkB0W_3oIqiRWS8,3236
|
|
12
|
-
codeclone/report.py,sha256=2Yu9267M2GLYKqsMAD4mrTUWhTbHP6_fIOGqhtDkxL0,1912
|
|
13
|
-
codeclone/scanner.py,sha256=BdJFyaLv1xvimVkJyvvTN0FcG2RQZzRlTlHWi2fRQnU,1050
|
|
14
|
-
codeclone-1.1.0.dist-info/licenses/LICENSE,sha256=ndXAbunvN-jCQjgCaoBOF5AN4FcRlAa0R7hK1lWDuBU,1073
|
|
15
|
-
codeclone-1.1.0.dist-info/METADATA,sha256=J2l_qX6VxJ_WRCK87KJPuzdVlFI5hcFR9lWjMzuQ55w,6984
|
|
16
|
-
codeclone-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
-
codeclone-1.1.0.dist-info/entry_points.txt,sha256=_MI9DVTLOmv3OlxpyogdOmMAduiLVIdHeOlZ_KBsrIY,49
|
|
18
|
-
codeclone-1.1.0.dist-info/top_level.txt,sha256=4tQa_d-4Zle-qV9KmNDkWq0WHYgZsW9vdaeF30rNntg,10
|
|
19
|
-
codeclone-1.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|