codeclone 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeclone/normalize.py CHANGED
@@ -9,6 +9,8 @@ Licensed under the MIT License.
9
9
  from __future__ import annotations
10
10
 
11
11
  import ast
12
+ import copy
13
+ from ast import AST
12
14
  from collections.abc import Sequence
13
15
  from dataclasses import dataclass
14
16
 
@@ -83,12 +85,33 @@ class AstNormalizer(ast.NodeTransformer):
83
85
  node.value = "_CONST_"
84
86
  return node
85
87
 
88
+ def visit_AugAssign(self, node: ast.AugAssign) -> AST:
89
+ # Normalize x += 1 to x = x + 1
90
+ # This allows detecting clones where one uses += and another uses = +
91
+ # We transform AugAssign(target, op, value) to Assign([target], BinOp(target, op, value))
92
+
93
+ # Deepcopy target to avoid reuse issues in the AST
94
+ target_load = copy.deepcopy(node.target)
95
+ # Ensure context is Load() for the right-hand side usage
96
+ if hasattr(target_load, "ctx"):
97
+ target_load.ctx = ast.Load()
98
+
99
+ new_node = ast.Assign(
100
+ targets=[node.target],
101
+ value=ast.BinOp(left=target_load, op=node.op, right=node.value),
102
+ lineno=node.lineno,
103
+ col_offset=node.col_offset,
104
+ end_lineno=getattr(node, "end_lineno", None),
105
+ end_col_offset=getattr(node, "end_col_offset", None),
106
+ )
107
+ return self.generic_visit(new_node)
108
+
86
109
 
87
110
  def normalized_ast_dump(func_node: ast.AST, cfg: NormalizationConfig) -> str:
88
111
  normalizer = AstNormalizer(cfg)
89
- new_node = ast.fix_missing_locations(
90
- normalizer.visit(ast.copy_location(func_node, func_node))
91
- )
112
+ # Deepcopy to prevent side effects on the original AST
113
+ node_copy = copy.deepcopy(func_node)
114
+ new_node = ast.fix_missing_locations(normalizer.visit(node_copy))
92
115
  return ast.dump(new_node, annotate_fields=True, include_attributes=False)
93
116
 
94
117
 
@@ -99,9 +122,9 @@ def normalized_ast_dump_from_list(
99
122
  dumps: list[str] = []
100
123
 
101
124
  for node in nodes:
102
- new_node = ast.fix_missing_locations(
103
- normalizer.visit(ast.copy_location(node, node))
104
- )
125
+ # Deepcopy to prevent side effects
126
+ node_copy = copy.deepcopy(node)
127
+ new_node = ast.fix_missing_locations(normalizer.visit(node_copy))
105
128
  dumps.append(ast.dump(new_node, annotate_fields=True, include_attributes=False))
106
129
 
107
130
  return ";".join(dumps)
codeclone/report.py CHANGED
@@ -12,8 +12,8 @@ import json
12
12
  from typing import Any
13
13
 
14
14
 
15
- def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict]]:
16
- groups: dict[str, list[dict]] = {}
15
+ def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
16
+ groups: dict[str, list[dict[str, Any]]] = {}
17
17
  for u in units:
18
18
  key = f"{u['fingerprint']}|{u['loc_bucket']}"
19
19
  groups.setdefault(key, []).append(u)
@@ -21,13 +21,13 @@ def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict]]:
21
21
 
22
22
 
23
23
  def build_block_groups(
24
- blocks: list[dict], min_functions: int = 2
25
- ) -> dict[str, list[dict]]:
26
- groups: dict[str, list[dict]] = {}
24
+ blocks: list[dict[str, Any]], min_functions: int = 2
25
+ ) -> dict[str, list[dict[str, Any]]]:
26
+ groups: dict[str, list[dict[str, Any]]] = {}
27
27
  for b in blocks:
28
28
  groups.setdefault(b["block_hash"], []).append(b)
29
29
 
30
- filtered: dict[str, list[dict]] = {}
30
+ filtered: dict[str, list[dict[str, Any]]] = {}
31
31
  for h, items in groups.items():
32
32
  functions = {i["qualname"] for i in items}
33
33
  if len(functions) >= min_functions:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeclone
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
5
5
  Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
6
6
  Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
@@ -23,11 +23,13 @@ Classifier: Programming Language :: Python :: 3.10
23
23
  Classifier: Programming Language :: Python :: 3.11
24
24
  Classifier: Programming Language :: Python :: 3.12
25
25
  Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Programming Language :: Python :: 3.14
26
27
  Classifier: Operating System :: OS Independent
27
28
  Requires-Python: >=3.10
28
29
  Description-Content-Type: text/markdown
29
30
  License-File: LICENSE
30
31
  Requires-Dist: pygments>=2.19.2
32
+ Requires-Dist: rich>=14.3.2
31
33
  Provides-Extra: dev
32
34
  Requires-Dist: pytest>=9.0.0; extra == "dev"
33
35
  Requires-Dist: build>=1.2.0; extra == "dev"
@@ -102,7 +104,12 @@ Typical use cases:
102
104
 
103
105
  - Each function is converted into a **Control Flow Graph**.
104
106
  - CFG nodes contain normalized AST statements.
105
- - CFG edges represent structural control flow (`if`, `for`, `while`).
107
+ - CFG edges represent structural control flow:
108
+ - `if` / `else`
109
+ - `for` / `async for` / `while`
110
+ - `try` / `except` / `finally`
111
+ - `with` / `async with`
112
+ - `match` / `case` (Python 3.10+)
106
113
  - Current CFG semantics (v1):
107
114
  - `break` and `continue` are treated as statements (no jump targets),
108
115
  - after-blocks are explicit and always present,
@@ -154,14 +161,14 @@ Generate reports:
154
161
 
155
162
  ```bash
156
163
  codeclone . \
157
- --json-out .cache/codeclone/report.json \
158
- --text-out .cache/codeclone/report.txt
164
+ --json .cache/codeclone/report.json \
165
+ --text .cache/codeclone/report.txt
159
166
  ```
160
167
 
161
168
  Generate an HTML report:
162
169
 
163
170
  ```bash
164
- codeclone . --html-out .cache/codeclone/report.html
171
+ codeclone . --html .cache/codeclone/report.html
165
172
  ```
166
173
 
167
174
  ---
@@ -235,6 +242,9 @@ repos:
235
242
  5. Detect function-level and block-level clones.
236
243
  6. Apply conservative filters to suppress noise.
237
244
 
245
+ See the architectural overview:
246
+ - [docs/architecture.md](docs/architecture.md)
247
+
238
248
  ---
239
249
 
240
250
  ## Control Flow Graph (CFG)
@@ -0,0 +1,19 @@
1
+ codeclone/__init__.py,sha256=_MZuqgioYGn49qw6_OluedgcRvuu8IhAwVgHkM4Kj7s,364
2
+ codeclone/baseline.py,sha256=tV-vgCpyQi-g1AlWJwuUFDeQwUZuhj4tX-BxFDv-LWo,1719
3
+ codeclone/blockhash.py,sha256=jOswW9jqe9ww3y4r-gLTUZjMmn0CHXpU5qTtFndKQ10,594
4
+ codeclone/blocks.py,sha256=Y75BSpzf-zyyeD9-vKnQfJ3QwIjYxMwiN9DbEqnbONg,1735
5
+ codeclone/cache.py,sha256=DexslhZfxj79fnoPna8r_oBCqmsstN2ICRA_o6ZVpGo,1559
6
+ codeclone/cfg.py,sha256=op503zRnew2ZIz5coK_HUaKt1VG1ucJBacXAZ1rBJYQ,11587
7
+ codeclone/cli.py,sha256=c_61l3wSF2nbTZgd6LmvoUvejXatydPAPmJyRknHx78,13229
8
+ codeclone/extractor.py,sha256=crCxgkK1n2fl5FUz6HtbaEZUH5CO8S0zqM0Xj0RSE6E,4558
9
+ codeclone/fingerprint.py,sha256=H0YY209sfGf02VeLlxHNDp7n6es0vLiMmq3TBWDm3SE,545
10
+ codeclone/html_report.py,sha256=e7gYxEHk5ezJtGUYIYsQlcxu_0fP3hmd9jj-zMWMfJY,25574
11
+ codeclone/normalize.py,sha256=bvMoY3VDiZsnFiD50h5XUgwnMUKOyUEx6lJXCBiembg,4290
12
+ codeclone/report.py,sha256=IwblTgZe4liTq3gHagnw6O4ZUkRqJ448XqzhLhJZoWM,1972
13
+ codeclone/scanner.py,sha256=BdJFyaLv1xvimVkJyvvTN0FcG2RQZzRlTlHWi2fRQnU,1050
14
+ codeclone-1.2.0.dist-info/licenses/LICENSE,sha256=ndXAbunvN-jCQjgCaoBOF5AN4FcRlAa0R7hK1lWDuBU,1073
15
+ codeclone-1.2.0.dist-info/METADATA,sha256=RekVw29wHrLX0fHNLBYS9Ky6Ee6oFnntQxeCOcRbMTM,7255
16
+ codeclone-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
17
+ codeclone-1.2.0.dist-info/entry_points.txt,sha256=_MI9DVTLOmv3OlxpyogdOmMAduiLVIdHeOlZ_KBsrIY,49
18
+ codeclone-1.2.0.dist-info/top_level.txt,sha256=4tQa_d-4Zle-qV9KmNDkWq0WHYgZsW9vdaeF30rNntg,10
19
+ codeclone-1.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- codeclone/__init__.py,sha256=_MZuqgioYGn49qw6_OluedgcRvuu8IhAwVgHkM4Kj7s,364
2
- codeclone/baseline.py,sha256=VdIh5AeWeAod6MZ1EV2UWMlvlhwHwlbyUJM_RXbhf24,1545
3
- codeclone/blockhash.py,sha256=jOswW9jqe9ww3y4r-gLTUZjMmn0CHXpU5qTtFndKQ10,594
4
- codeclone/blocks.py,sha256=Y75BSpzf-zyyeD9-vKnQfJ3QwIjYxMwiN9DbEqnbONg,1735
5
- codeclone/cache.py,sha256=PZ9XgHc3A-JSGXHYQHJK_Ldu24EIaGL7Spi2cTpbEUo,1334
6
- codeclone/cfg.py,sha256=e4zCfHeqWRvxIN1E4D-t_717ZftQY7XlwgYBfFaa9oU,4794
7
- codeclone/cli.py,sha256=Zp6e7PC-9WJIBkqX88V-4L7wryi1c2qPB87Pbpo4BfE,5177
8
- codeclone/extractor.py,sha256=crCxgkK1n2fl5FUz6HtbaEZUH5CO8S0zqM0Xj0RSE6E,4558
9
- codeclone/fingerprint.py,sha256=H0YY209sfGf02VeLlxHNDp7n6es0vLiMmq3TBWDm3SE,545
10
- codeclone/html_report.py,sha256=Es4lYOMm24O2UTsaF59qjvCxLA2nwojwLX-zOpvluro,24337
11
- codeclone/normalize.py,sha256=AzKE-1kOrOV3LVK8x4gGytulxXVYBkB0W_3oIqiRWS8,3236
12
- codeclone/report.py,sha256=2Yu9267M2GLYKqsMAD4mrTUWhTbHP6_fIOGqhtDkxL0,1912
13
- codeclone/scanner.py,sha256=BdJFyaLv1xvimVkJyvvTN0FcG2RQZzRlTlHWi2fRQnU,1050
14
- codeclone-1.1.0.dist-info/licenses/LICENSE,sha256=ndXAbunvN-jCQjgCaoBOF5AN4FcRlAa0R7hK1lWDuBU,1073
15
- codeclone-1.1.0.dist-info/METADATA,sha256=J2l_qX6VxJ_WRCK87KJPuzdVlFI5hcFR9lWjMzuQ55w,6984
16
- codeclone-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
- codeclone-1.1.0.dist-info/entry_points.txt,sha256=_MI9DVTLOmv3OlxpyogdOmMAduiLVIdHeOlZ_KBsrIY,49
18
- codeclone-1.1.0.dist-info/top_level.txt,sha256=4tQa_d-4Zle-qV9KmNDkWq0WHYgZsW9vdaeF30rNntg,10
19
- codeclone-1.1.0.dist-info/RECORD,,