codeclone 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeclone/__init__.py +16 -0
- codeclone/baseline.py +8 -0
- codeclone/blockhash.py +10 -1
- codeclone/blocks.py +26 -16
- codeclone/cache.py +8 -0
- codeclone/cfg.py +173 -0
- codeclone/cli.py +92 -58
- codeclone/extractor.py +92 -32
- codeclone/fingerprint.py +11 -1
- codeclone/html_report.py +953 -0
- codeclone/normalize.py +50 -26
- codeclone/report.py +25 -9
- codeclone/scanner.py +24 -4
- codeclone-1.1.0.dist-info/METADATA +254 -0
- codeclone-1.1.0.dist-info/RECORD +19 -0
- codeclone-1.0.0.dist-info/METADATA +0 -211
- codeclone-1.0.0.dist-info/RECORD +0 -17
- {codeclone-1.0.0.dist-info → codeclone-1.1.0.dist-info}/WHEEL +0 -0
- {codeclone-1.0.0.dist-info → codeclone-1.1.0.dist-info}/entry_points.txt +0 -0
- {codeclone-1.0.0.dist-info → codeclone-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {codeclone-1.0.0.dist-info → codeclone-1.1.0.dist-info}/top_level.txt +0 -0
codeclone/normalize.py
CHANGED
|
@@ -1,6 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CodeClone — AST and CFG-based code clone detector for Python
|
|
3
|
+
focused on architectural duplication.
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Den Rozhnovskiy
|
|
6
|
+
Licensed under the MIT License.
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
from __future__ import annotations
|
|
2
10
|
|
|
3
11
|
import ast
|
|
12
|
+
from collections.abc import Sequence
|
|
4
13
|
from dataclasses import dataclass
|
|
5
14
|
|
|
6
15
|
|
|
@@ -8,15 +17,15 @@ from dataclasses import dataclass
|
|
|
8
17
|
class NormalizationConfig:
|
|
9
18
|
ignore_docstrings: bool = True
|
|
10
19
|
ignore_type_annotations: bool = True
|
|
11
|
-
normalize_attributes: bool = True
|
|
12
|
-
normalize_constants: bool = True
|
|
13
|
-
normalize_names: bool = True
|
|
20
|
+
normalize_attributes: bool = True
|
|
21
|
+
normalize_constants: bool = True
|
|
22
|
+
normalize_names: bool = True
|
|
14
23
|
|
|
15
24
|
|
|
16
25
|
class AstNormalizer(ast.NodeTransformer):
|
|
17
26
|
def __init__(self, cfg: NormalizationConfig):
|
|
18
|
-
self.cfg = cfg
|
|
19
27
|
super().__init__()
|
|
28
|
+
self.cfg = cfg
|
|
20
29
|
|
|
21
30
|
def visit_FunctionDef(self, node: ast.FunctionDef):
|
|
22
31
|
return self._visit_func(node)
|
|
@@ -24,28 +33,30 @@ class AstNormalizer(ast.NodeTransformer):
|
|
|
24
33
|
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
|
|
25
34
|
return self._visit_func(node)
|
|
26
35
|
|
|
27
|
-
def _visit_func(self, node):
|
|
28
|
-
# Drop docstring
|
|
36
|
+
def _visit_func(self, node: ast.FunctionDef | ast.AsyncFunctionDef):
|
|
37
|
+
# Drop docstring
|
|
29
38
|
if self.cfg.ignore_docstrings and node.body:
|
|
30
39
|
first = node.body[0]
|
|
31
|
-
if
|
|
32
|
-
|
|
33
|
-
|
|
40
|
+
if (
|
|
41
|
+
isinstance(first, ast.Expr)
|
|
42
|
+
and isinstance(first.value, ast.Constant)
|
|
43
|
+
and isinstance(first.value.value, str)
|
|
44
|
+
):
|
|
45
|
+
node.body = node.body[1:]
|
|
34
46
|
|
|
35
47
|
if self.cfg.ignore_type_annotations:
|
|
36
|
-
|
|
37
|
-
if hasattr(node, "returns"):
|
|
38
|
-
node.returns = None
|
|
48
|
+
node.returns = None
|
|
39
49
|
args = node.args
|
|
50
|
+
|
|
40
51
|
for a in getattr(args, "posonlyargs", []):
|
|
41
52
|
a.annotation = None
|
|
42
|
-
for a in
|
|
53
|
+
for a in args.args:
|
|
43
54
|
a.annotation = None
|
|
44
|
-
for a in
|
|
55
|
+
for a in args.kwonlyargs:
|
|
45
56
|
a.annotation = None
|
|
46
|
-
if
|
|
57
|
+
if args.vararg:
|
|
47
58
|
args.vararg.annotation = None
|
|
48
|
-
if
|
|
59
|
+
if args.kwarg:
|
|
49
60
|
args.kwarg.annotation = None
|
|
50
61
|
|
|
51
62
|
return self.generic_visit(node)
|
|
@@ -60,24 +71,37 @@ class AstNormalizer(ast.NodeTransformer):
|
|
|
60
71
|
node.id = "_VAR_"
|
|
61
72
|
return node
|
|
62
73
|
|
|
63
|
-
def visit_Attribute(self, node: ast.Attribute):
|
|
64
|
-
|
|
74
|
+
def visit_Attribute(self, node: ast.Attribute) -> ast.Attribute:
|
|
75
|
+
new_node = self.generic_visit(node)
|
|
76
|
+
assert isinstance(new_node, ast.Attribute)
|
|
65
77
|
if self.cfg.normalize_attributes:
|
|
66
|
-
|
|
67
|
-
return
|
|
78
|
+
new_node.attr = "_ATTR_"
|
|
79
|
+
return new_node
|
|
68
80
|
|
|
69
81
|
def visit_Constant(self, node: ast.Constant):
|
|
70
82
|
if self.cfg.normalize_constants:
|
|
71
|
-
# Preserve booleans? up to you; default: normalize everything
|
|
72
83
|
node.value = "_CONST_"
|
|
73
84
|
return node
|
|
74
85
|
|
|
75
86
|
|
|
76
87
|
def normalized_ast_dump(func_node: ast.AST, cfg: NormalizationConfig) -> str:
|
|
77
|
-
"""
|
|
78
|
-
Returns stable string representation of normalized AST.
|
|
79
|
-
"""
|
|
80
88
|
normalizer = AstNormalizer(cfg)
|
|
81
|
-
new_node = ast.fix_missing_locations(
|
|
82
|
-
|
|
89
|
+
new_node = ast.fix_missing_locations(
|
|
90
|
+
normalizer.visit(ast.copy_location(func_node, func_node))
|
|
91
|
+
)
|
|
83
92
|
return ast.dump(new_node, annotate_fields=True, include_attributes=False)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def normalized_ast_dump_from_list(
|
|
96
|
+
nodes: Sequence[ast.AST], cfg: NormalizationConfig
|
|
97
|
+
) -> str:
|
|
98
|
+
normalizer = AstNormalizer(cfg)
|
|
99
|
+
dumps: list[str] = []
|
|
100
|
+
|
|
101
|
+
for node in nodes:
|
|
102
|
+
new_node = ast.fix_missing_locations(
|
|
103
|
+
normalizer.visit(ast.copy_location(node, node))
|
|
104
|
+
)
|
|
105
|
+
dumps.append(ast.dump(new_node, annotate_fields=True, include_attributes=False))
|
|
106
|
+
|
|
107
|
+
return ";".join(dumps)
|
codeclone/report.py
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CodeClone — AST and CFG-based code clone detector for Python
|
|
3
|
+
focused on architectural duplication.
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Den Rozhnovskiy
|
|
6
|
+
Licensed under the MIT License.
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
from __future__ import annotations
|
|
2
10
|
|
|
3
11
|
import json
|
|
@@ -12,7 +20,9 @@ def build_groups(units: list[dict[str, Any]]) -> dict[str, list[dict]]:
|
|
|
12
20
|
return {k: v for k, v in groups.items() if len(v) > 1}
|
|
13
21
|
|
|
14
22
|
|
|
15
|
-
def build_block_groups(
|
|
23
|
+
def build_block_groups(
|
|
24
|
+
blocks: list[dict], min_functions: int = 2
|
|
25
|
+
) -> dict[str, list[dict]]:
|
|
16
26
|
groups: dict[str, list[dict]] = {}
|
|
17
27
|
for b in blocks:
|
|
18
28
|
groups.setdefault(b["block_hash"], []).append(b)
|
|
@@ -27,19 +37,25 @@ def build_block_groups(blocks: list[dict], min_functions: int = 2) -> dict[str,
|
|
|
27
37
|
|
|
28
38
|
|
|
29
39
|
def to_json(groups: dict) -> str:
|
|
30
|
-
return json.dumps(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
40
|
+
return json.dumps(
|
|
41
|
+
{
|
|
42
|
+
"group_count": len(groups),
|
|
43
|
+
"groups": [
|
|
44
|
+
{"key": k, "count": len(v), "items": v}
|
|
45
|
+
for k, v in sorted(
|
|
46
|
+
groups.items(), key=lambda kv: len(kv[1]), reverse=True
|
|
47
|
+
)
|
|
48
|
+
],
|
|
49
|
+
},
|
|
50
|
+
ensure_ascii=False,
|
|
51
|
+
indent=2,
|
|
52
|
+
)
|
|
37
53
|
|
|
38
54
|
|
|
39
55
|
def to_text(groups: dict) -> str:
|
|
40
56
|
lines: list[str] = []
|
|
41
57
|
for i, (_, v) in enumerate(
|
|
42
|
-
|
|
58
|
+
sorted(groups.items(), key=lambda kv: len(kv[1]), reverse=True)
|
|
43
59
|
):
|
|
44
60
|
lines.append(f"\n=== Clone group #{i + 1} (count={len(v)}) ===")
|
|
45
61
|
for item in v:
|
codeclone/scanner.py
CHANGED
|
@@ -1,14 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CodeClone — AST and CFG-based code clone detector for Python
|
|
3
|
+
focused on architectural duplication.
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Den Rozhnovskiy
|
|
6
|
+
Licensed under the MIT License.
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
from __future__ import annotations
|
|
2
10
|
|
|
3
11
|
from pathlib import Path
|
|
4
12
|
from typing import Iterable
|
|
5
13
|
|
|
6
14
|
DEFAULT_EXCLUDES = (
|
|
7
|
-
".git",
|
|
8
|
-
"
|
|
15
|
+
".git",
|
|
16
|
+
".venv",
|
|
17
|
+
"venv",
|
|
18
|
+
"__pycache__",
|
|
19
|
+
"site-packages",
|
|
20
|
+
"migrations",
|
|
21
|
+
"alembic",
|
|
22
|
+
"dist",
|
|
23
|
+
"build",
|
|
24
|
+
".tox",
|
|
9
25
|
)
|
|
10
26
|
|
|
11
|
-
|
|
27
|
+
|
|
28
|
+
def iter_py_files(
|
|
29
|
+
root: str, excludes: tuple[str, ...] = DEFAULT_EXCLUDES
|
|
30
|
+
) -> Iterable[str]:
|
|
12
31
|
rootp = Path(root)
|
|
13
32
|
for p in rootp.rglob("*.py"):
|
|
14
33
|
parts = set(p.parts)
|
|
@@ -16,6 +35,7 @@ def iter_py_files(root: str, excludes: tuple[str, ...] = DEFAULT_EXCLUDES) -> It
|
|
|
16
35
|
continue
|
|
17
36
|
yield str(p)
|
|
18
37
|
|
|
38
|
+
|
|
19
39
|
def module_name_from_path(root: str, filepath: str) -> str:
|
|
20
40
|
rootp = Path(root).resolve()
|
|
21
41
|
fp = Path(filepath).resolve()
|
|
@@ -25,4 +45,4 @@ def module_name_from_path(root: str, filepath: str) -> str:
|
|
|
25
45
|
# __init__.py -> package name
|
|
26
46
|
if stem.name == "__init__":
|
|
27
47
|
stem = stem.parent
|
|
28
|
-
return ".".join(stem.parts)
|
|
48
|
+
return ".".join(stem.parts)
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codeclone
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
|
+
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
|
+
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/orenlab/codeclone
|
|
9
|
+
Project-URL: Repository, https://github.com/orenlab/codeclone
|
|
10
|
+
Project-URL: Issues, https://github.com/orenlab/codeclone/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/orenlab/codeclone/releases
|
|
12
|
+
Project-URL: Documentation, https://github.com/orenlab/codeclone/tree/main/docs
|
|
13
|
+
Keywords: python,ast,code-clone,duplication,static-analysis,ci,architecture
|
|
14
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
17
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
18
|
+
Classifier: Topic :: Software Development :: Testing
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Classifier: Operating System :: OS Independent
|
|
27
|
+
Requires-Python: >=3.10
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: pygments>=2.19.2
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=9.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
34
|
+
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy>=1.19.1; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# CodeClone
|
|
39
|
+
|
|
40
|
+
[](https://pypi.org/project/codeclone/)
|
|
41
|
+
[](https://pypi.org/project/codeclone/)
|
|
42
|
+
[](https://pypi.org/project/codeclone/)
|
|
43
|
+
[](LICENSE)
|
|
44
|
+
|
|
45
|
+
**CodeClone** is a Python code clone detector based on **normalized AST and control-flow graphs (CFG)**.
|
|
46
|
+
It helps teams discover architectural duplication and prevent new copy-paste from entering the codebase via CI.
|
|
47
|
+
|
|
48
|
+
CodeClone is designed to help teams:
|
|
49
|
+
|
|
50
|
+
- discover **structural and control-flow duplication**,
|
|
51
|
+
- identify architectural hotspots,
|
|
52
|
+
- prevent *new* duplication via CI and pre-commit hooks.
|
|
53
|
+
|
|
54
|
+
Unlike token- or text-based tools, CodeClone operates on **normalized Python AST and CFG**, making it robust against renaming,
|
|
55
|
+
formatting, and minor refactoring.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Why CodeClone?
|
|
60
|
+
|
|
61
|
+
Most existing tools detect *textual* duplication.
|
|
62
|
+
CodeClone detects **structural and block-level duplication**, which usually signals missing abstractions or architectural drift.
|
|
63
|
+
|
|
64
|
+
Typical use cases:
|
|
65
|
+
|
|
66
|
+
- duplicated service or orchestration logic across layers (API ↔ application),
|
|
67
|
+
- repeated validation or guard blocks,
|
|
68
|
+
- copy-pasted request / handler flows,
|
|
69
|
+
- duplicated control-flow logic in routers, handlers, or services.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Features
|
|
74
|
+
|
|
75
|
+
### Function-level clone detection (Type-2, CFG-based)
|
|
76
|
+
|
|
77
|
+
- Detects functions and methods with identical **control-flow structure**.
|
|
78
|
+
- Based on **Control Flow Graph (CFG)** fingerprinting.
|
|
79
|
+
- Robust to:
|
|
80
|
+
- variable renaming,
|
|
81
|
+
- constant changes,
|
|
82
|
+
- attribute renaming,
|
|
83
|
+
- formatting differences,
|
|
84
|
+
- docstrings and type annotations.
|
|
85
|
+
- Ideal for spotting architectural duplication across layers.
|
|
86
|
+
|
|
87
|
+
### Block-level clone detection (Type-3-lite)
|
|
88
|
+
|
|
89
|
+
- Detects repeated **statement blocks** inside larger functions.
|
|
90
|
+
- Uses sliding windows over CFG-normalized statement sequences.
|
|
91
|
+
- Targets:
|
|
92
|
+
- validation blocks,
|
|
93
|
+
- guard clauses,
|
|
94
|
+
- repeated orchestration logic.
|
|
95
|
+
- Carefully filtered to reduce noise:
|
|
96
|
+
- no overlapping windows,
|
|
97
|
+
- no clones inside the same function,
|
|
98
|
+
- no `__init__` noise,
|
|
99
|
+
- size and statement-count thresholds.
|
|
100
|
+
|
|
101
|
+
### Control-Flow Awareness (CFG v1)
|
|
102
|
+
|
|
103
|
+
- Each function is converted into a **Control Flow Graph**.
|
|
104
|
+
- CFG nodes contain normalized AST statements.
|
|
105
|
+
- CFG edges represent structural control flow (`if`, `for`, `while`).
|
|
106
|
+
- Current CFG semantics (v1):
|
|
107
|
+
- `break` and `continue` are treated as statements (no jump targets),
|
|
108
|
+
- after-blocks are explicit and always present,
|
|
109
|
+
- focus is on **structural similarity**, not precise runtime semantics.
|
|
110
|
+
|
|
111
|
+
This design keeps clone detection **stable, deterministic, and low-noise**.
|
|
112
|
+
|
|
113
|
+
### Low-noise by design
|
|
114
|
+
|
|
115
|
+
- AST + CFG normalization instead of token matching.
|
|
116
|
+
- Conservative defaults tuned for real-world Python projects.
|
|
117
|
+
- Explicit thresholds for size and statement count.
|
|
118
|
+
- Focus on *architectural duplication*, not micro-similarities.
|
|
119
|
+
|
|
120
|
+
### CI-friendly baseline mode
|
|
121
|
+
|
|
122
|
+
- Establish a baseline of existing clones.
|
|
123
|
+
- Fail CI **only when new clones are introduced**.
|
|
124
|
+
- Safe for legacy codebases and incremental refactoring.
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Installation
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
pip install codeclone
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Python **3.10+** is required.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Quick Start
|
|
139
|
+
|
|
140
|
+
Run on a project:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
codeclone .
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
This will:
|
|
147
|
+
|
|
148
|
+
- scan Python files,
|
|
149
|
+
- build CFGs for functions,
|
|
150
|
+
- detect function-level and block-level clones,
|
|
151
|
+
- print a summary to stdout.
|
|
152
|
+
|
|
153
|
+
Generate reports:
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
codeclone . \
|
|
157
|
+
--json-out .cache/codeclone/report.json \
|
|
158
|
+
--text-out .cache/codeclone/report.txt
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Generate an HTML report:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
codeclone . --html-out .cache/codeclone/report.html
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Baseline Workflow (Recommended)
|
|
170
|
+
|
|
171
|
+
### 1. Create a baseline
|
|
172
|
+
|
|
173
|
+
Run once on your current codebase:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
codeclone . --update-baseline
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Commit the generated baseline file to the repository.
|
|
180
|
+
|
|
181
|
+
### 2. Use in CI
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
codeclone . --fail-on-new
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
Behavior:
|
|
188
|
+
|
|
189
|
+
- ✅ existing clones are allowed,
|
|
190
|
+
- ❌ build fails if *new* clones appear,
|
|
191
|
+
- ✅ refactoring that removes duplication is always allowed.
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Using with pre-commit
|
|
196
|
+
|
|
197
|
+
```yaml
|
|
198
|
+
repos:
|
|
199
|
+
- repo: local
|
|
200
|
+
hooks:
|
|
201
|
+
- id: codeclone
|
|
202
|
+
name: CodeClone
|
|
203
|
+
entry: codeclone
|
|
204
|
+
language: python
|
|
205
|
+
args: [".", "--fail-on-new"]
|
|
206
|
+
types: [python]
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## What CodeClone Is (and Is Not)
|
|
212
|
+
|
|
213
|
+
### CodeClone **is**
|
|
214
|
+
|
|
215
|
+
- an architectural analysis tool,
|
|
216
|
+
- a duplication radar,
|
|
217
|
+
- a CI guard against copy-paste,
|
|
218
|
+
- a control-flow-aware clone detector.
|
|
219
|
+
|
|
220
|
+
### CodeClone **is not**
|
|
221
|
+
|
|
222
|
+
- a linter,
|
|
223
|
+
- a formatter,
|
|
224
|
+
- a semantic equivalence prover,
|
|
225
|
+
- a runtime analyzer.
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## How It Works (High Level)
|
|
230
|
+
|
|
231
|
+
1. Parse Python source into AST.
|
|
232
|
+
2. Normalize AST (names, constants, attributes, annotations).
|
|
233
|
+
3. Build a **Control Flow Graph (CFG)** per function.
|
|
234
|
+
4. Compute stable CFG fingerprints.
|
|
235
|
+
5. Detect function-level and block-level clones.
|
|
236
|
+
6. Apply conservative filters to suppress noise.
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Control Flow Graph (CFG)
|
|
241
|
+
|
|
242
|
+
Starting from **version 1.1.0**, CodeClone uses a **Control Flow Graph (CFG)**
|
|
243
|
+
to improve structural clone detection robustness.
|
|
244
|
+
|
|
245
|
+
The CFG is a **structural abstraction**, not a runtime execution model.
|
|
246
|
+
|
|
247
|
+
See full design and semantics:
|
|
248
|
+
- [docs/cfg.md](docs/cfg.md)
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## License
|
|
253
|
+
|
|
254
|
+
MIT License
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
codeclone/__init__.py,sha256=_MZuqgioYGn49qw6_OluedgcRvuu8IhAwVgHkM4Kj7s,364
|
|
2
|
+
codeclone/baseline.py,sha256=VdIh5AeWeAod6MZ1EV2UWMlvlhwHwlbyUJM_RXbhf24,1545
|
|
3
|
+
codeclone/blockhash.py,sha256=jOswW9jqe9ww3y4r-gLTUZjMmn0CHXpU5qTtFndKQ10,594
|
|
4
|
+
codeclone/blocks.py,sha256=Y75BSpzf-zyyeD9-vKnQfJ3QwIjYxMwiN9DbEqnbONg,1735
|
|
5
|
+
codeclone/cache.py,sha256=PZ9XgHc3A-JSGXHYQHJK_Ldu24EIaGL7Spi2cTpbEUo,1334
|
|
6
|
+
codeclone/cfg.py,sha256=e4zCfHeqWRvxIN1E4D-t_717ZftQY7XlwgYBfFaa9oU,4794
|
|
7
|
+
codeclone/cli.py,sha256=Zp6e7PC-9WJIBkqX88V-4L7wryi1c2qPB87Pbpo4BfE,5177
|
|
8
|
+
codeclone/extractor.py,sha256=crCxgkK1n2fl5FUz6HtbaEZUH5CO8S0zqM0Xj0RSE6E,4558
|
|
9
|
+
codeclone/fingerprint.py,sha256=H0YY209sfGf02VeLlxHNDp7n6es0vLiMmq3TBWDm3SE,545
|
|
10
|
+
codeclone/html_report.py,sha256=Es4lYOMm24O2UTsaF59qjvCxLA2nwojwLX-zOpvluro,24337
|
|
11
|
+
codeclone/normalize.py,sha256=AzKE-1kOrOV3LVK8x4gGytulxXVYBkB0W_3oIqiRWS8,3236
|
|
12
|
+
codeclone/report.py,sha256=2Yu9267M2GLYKqsMAD4mrTUWhTbHP6_fIOGqhtDkxL0,1912
|
|
13
|
+
codeclone/scanner.py,sha256=BdJFyaLv1xvimVkJyvvTN0FcG2RQZzRlTlHWi2fRQnU,1050
|
|
14
|
+
codeclone-1.1.0.dist-info/licenses/LICENSE,sha256=ndXAbunvN-jCQjgCaoBOF5AN4FcRlAa0R7hK1lWDuBU,1073
|
|
15
|
+
codeclone-1.1.0.dist-info/METADATA,sha256=J2l_qX6VxJ_WRCK87KJPuzdVlFI5hcFR9lWjMzuQ55w,6984
|
|
16
|
+
codeclone-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
codeclone-1.1.0.dist-info/entry_points.txt,sha256=_MI9DVTLOmv3OlxpyogdOmMAduiLVIdHeOlZ_KBsrIY,49
|
|
18
|
+
codeclone-1.1.0.dist-info/top_level.txt,sha256=4tQa_d-4Zle-qV9KmNDkWq0WHYgZsW9vdaeF30rNntg,10
|
|
19
|
+
codeclone-1.1.0.dist-info/RECORD,,
|