codeclone 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeclone/__init__.py +1 -1
- codeclone/baseline.py +44 -14
- codeclone/blockhash.py +1 -1
- codeclone/blocks.py +4 -3
- codeclone/cache.py +154 -17
- codeclone/cfg.py +128 -38
- codeclone/cfg_model.py +47 -0
- codeclone/cli.py +524 -100
- codeclone/errors.py +27 -0
- codeclone/extractor.py +101 -24
- codeclone/html_report.py +230 -691
- codeclone/normalize.py +43 -13
- codeclone/py.typed +0 -0
- codeclone/report.py +23 -12
- codeclone/scanner.py +66 -3
- codeclone/templates.py +1262 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/METADATA +62 -34
- codeclone-1.2.1.dist-info/RECORD +23 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/WHEEL +1 -1
- codeclone-1.1.0.dist-info/RECORD +0 -19
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/entry_points.txt +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {codeclone-1.1.0.dist-info → codeclone-1.2.1.dist-info}/top_level.txt +0 -0
codeclone/__init__.py
CHANGED
codeclone/baseline.py
CHANGED
|
@@ -9,32 +9,40 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import json
|
|
12
|
+
from collections.abc import Mapping
|
|
12
13
|
from pathlib import Path
|
|
13
|
-
from typing import
|
|
14
|
+
from typing import Any
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class Baseline:
|
|
17
|
-
|
|
18
|
+
__slots__ = ("blocks", "functions", "path", "python_version")
|
|
19
|
+
|
|
20
|
+
def __init__(self, path: str | Path):
|
|
18
21
|
self.path = Path(path)
|
|
19
|
-
self.functions:
|
|
20
|
-
self.blocks:
|
|
22
|
+
self.functions: set[str] = set()
|
|
23
|
+
self.blocks: set[str] = set()
|
|
24
|
+
self.python_version: str | None = None
|
|
21
25
|
|
|
22
26
|
def load(self) -> None:
|
|
23
27
|
if not self.path.exists():
|
|
24
28
|
return
|
|
25
29
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
30
|
+
try:
|
|
31
|
+
data = json.loads(self.path.read_text("utf-8"))
|
|
32
|
+
self.functions = set(data.get("functions", []))
|
|
33
|
+
self.blocks = set(data.get("blocks", []))
|
|
34
|
+
python_version = data.get("python_version")
|
|
35
|
+
self.python_version = (
|
|
36
|
+
python_version if isinstance(python_version, str) else None
|
|
37
|
+
)
|
|
38
|
+
except json.JSONDecodeError as e:
|
|
39
|
+
raise ValueError(f"Corrupted baseline file at {self.path}: {e}") from e
|
|
29
40
|
|
|
30
41
|
def save(self) -> None:
|
|
31
42
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
32
43
|
self.path.write_text(
|
|
33
44
|
json.dumps(
|
|
34
|
-
|
|
35
|
-
"functions": sorted(self.functions),
|
|
36
|
-
"blocks": sorted(self.blocks),
|
|
37
|
-
},
|
|
45
|
+
_baseline_payload(self.functions, self.blocks, self.python_version),
|
|
38
46
|
indent=2,
|
|
39
47
|
ensure_ascii=False,
|
|
40
48
|
),
|
|
@@ -42,13 +50,35 @@ class Baseline:
|
|
|
42
50
|
)
|
|
43
51
|
|
|
44
52
|
@staticmethod
|
|
45
|
-
def from_groups(
|
|
46
|
-
|
|
53
|
+
def from_groups(
|
|
54
|
+
func_groups: Mapping[str, object],
|
|
55
|
+
block_groups: Mapping[str, object],
|
|
56
|
+
path: str | Path = "",
|
|
57
|
+
python_version: str | None = None,
|
|
58
|
+
) -> Baseline:
|
|
59
|
+
bl = Baseline(path)
|
|
47
60
|
bl.functions = set(func_groups.keys())
|
|
48
61
|
bl.blocks = set(block_groups.keys())
|
|
62
|
+
bl.python_version = python_version
|
|
49
63
|
return bl
|
|
50
64
|
|
|
51
|
-
def diff(
|
|
65
|
+
def diff(
|
|
66
|
+
self, func_groups: Mapping[str, object], block_groups: Mapping[str, object]
|
|
67
|
+
) -> tuple[set[str], set[str]]:
|
|
52
68
|
new_funcs = set(func_groups.keys()) - self.functions
|
|
53
69
|
new_blocks = set(block_groups.keys()) - self.blocks
|
|
54
70
|
return new_funcs, new_blocks
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _baseline_payload(
|
|
74
|
+
functions: set[str],
|
|
75
|
+
blocks: set[str],
|
|
76
|
+
python_version: str | None,
|
|
77
|
+
) -> dict[str, Any]:
|
|
78
|
+
payload: dict[str, Any] = {
|
|
79
|
+
"functions": sorted(functions),
|
|
80
|
+
"blocks": sorted(blocks),
|
|
81
|
+
}
|
|
82
|
+
if python_version:
|
|
83
|
+
payload["python_version"] = python_version
|
|
84
|
+
return payload
|
codeclone/blockhash.py
CHANGED
|
@@ -11,7 +11,7 @@ from __future__ import annotations
|
|
|
11
11
|
import ast
|
|
12
12
|
import hashlib
|
|
13
13
|
|
|
14
|
-
from .normalize import
|
|
14
|
+
from .normalize import AstNormalizer, NormalizationConfig
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def stmt_hash(stmt: ast.stmt, cfg: NormalizationConfig) -> str:
|
codeclone/blocks.py
CHANGED
|
@@ -15,7 +15,7 @@ from .blockhash import stmt_hash
|
|
|
15
15
|
from .normalize import NormalizationConfig
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
@dataclass(frozen=True)
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
19
|
class BlockUnit:
|
|
20
20
|
block_hash: str
|
|
21
21
|
filepath: str
|
|
@@ -42,7 +42,8 @@ def extract_blocks(
|
|
|
42
42
|
|
|
43
43
|
blocks: list[BlockUnit] = []
|
|
44
44
|
last_start: int | None = None
|
|
45
|
-
|
|
45
|
+
# Allow some overlap (50%), but at least 3 lines apart
|
|
46
|
+
min_line_distance = max(block_size // 2, 3)
|
|
46
47
|
|
|
47
48
|
for i in range(len(stmt_hashes) - block_size + 1):
|
|
48
49
|
start = getattr(body[i], "lineno", None)
|
|
@@ -50,7 +51,7 @@ def extract_blocks(
|
|
|
50
51
|
if not start or not end:
|
|
51
52
|
continue
|
|
52
53
|
|
|
53
|
-
if last_start is not None and start - last_start <
|
|
54
|
+
if last_start is not None and start - last_start < min_line_distance:
|
|
54
55
|
continue
|
|
55
56
|
|
|
56
57
|
bh = "|".join(stmt_hashes[i : i + block_size])
|
codeclone/cache.py
CHANGED
|
@@ -8,41 +8,178 @@ Licensed under the MIT License.
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
+
import hashlib
|
|
12
|
+
import hmac
|
|
11
13
|
import json
|
|
12
14
|
import os
|
|
15
|
+
import secrets
|
|
16
|
+
from collections.abc import Mapping
|
|
13
17
|
from dataclasses import asdict
|
|
14
18
|
from pathlib import Path
|
|
15
|
-
from typing import
|
|
19
|
+
from typing import TYPE_CHECKING, Any, TypedDict, cast
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from .blocks import BlockUnit
|
|
23
|
+
from .extractor import Unit
|
|
24
|
+
|
|
25
|
+
from .errors import CacheError
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FileStat(TypedDict):
|
|
29
|
+
mtime_ns: int
|
|
30
|
+
size: int
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class UnitDict(TypedDict):
|
|
34
|
+
qualname: str
|
|
35
|
+
filepath: str
|
|
36
|
+
start_line: int
|
|
37
|
+
end_line: int
|
|
38
|
+
loc: int
|
|
39
|
+
stmt_count: int
|
|
40
|
+
fingerprint: str
|
|
41
|
+
loc_bucket: str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BlockDict(TypedDict):
|
|
45
|
+
block_hash: str
|
|
46
|
+
filepath: str
|
|
47
|
+
qualname: str
|
|
48
|
+
start_line: int
|
|
49
|
+
end_line: int
|
|
50
|
+
size: int
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class CacheEntry(TypedDict):
|
|
54
|
+
stat: FileStat
|
|
55
|
+
units: list[UnitDict]
|
|
56
|
+
blocks: list[BlockDict]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CacheData(TypedDict):
|
|
60
|
+
version: str
|
|
61
|
+
files: dict[str, CacheEntry]
|
|
16
62
|
|
|
17
63
|
|
|
18
64
|
class Cache:
|
|
19
|
-
|
|
65
|
+
__slots__ = ("data", "load_warning", "path", "secret")
|
|
66
|
+
CACHE_VERSION = "1.0"
|
|
67
|
+
|
|
68
|
+
def __init__(self, path: str | Path):
|
|
20
69
|
self.path = Path(path)
|
|
21
|
-
self.data:
|
|
70
|
+
self.data: CacheData = {"version": self.CACHE_VERSION, "files": {}}
|
|
71
|
+
self.secret = self._load_secret()
|
|
72
|
+
self.load_warning: str | None = None
|
|
73
|
+
|
|
74
|
+
def _load_secret(self) -> bytes:
|
|
75
|
+
"""Load or create cache signing secret."""
|
|
76
|
+
# Store secret in the same directory as the cache file, named .cache_secret
|
|
77
|
+
# If cache is at ~/.cache/codeclone/cache.json, secret is
|
|
78
|
+
# ~/.cache/codeclone/.cache_secret
|
|
79
|
+
secret_path = self.path.parent / ".cache_secret"
|
|
80
|
+
if secret_path.exists():
|
|
81
|
+
return secret_path.read_bytes()
|
|
82
|
+
else:
|
|
83
|
+
secret = secrets.token_bytes(32)
|
|
84
|
+
try:
|
|
85
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
secret_path.write_bytes(secret)
|
|
87
|
+
# Set restrictive permissions on secret file (Unix only)
|
|
88
|
+
if os.name == "posix":
|
|
89
|
+
secret_path.chmod(0o600)
|
|
90
|
+
except OSError:
|
|
91
|
+
pass
|
|
92
|
+
return secret
|
|
93
|
+
|
|
94
|
+
def _sign_data(self, data: Mapping[str, Any]) -> str:
|
|
95
|
+
"""Create HMAC signature of cache data."""
|
|
96
|
+
# Sort keys for deterministic JSON serialization
|
|
97
|
+
data_str = json.dumps(data, sort_keys=True)
|
|
98
|
+
return hmac.new(self.secret, data_str.encode(), hashlib.sha256).hexdigest()
|
|
22
99
|
|
|
23
100
|
def load(self) -> None:
|
|
24
|
-
if self.path.exists():
|
|
25
|
-
|
|
101
|
+
if not self.path.exists():
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
raw = json.loads(self.path.read_text("utf-8"))
|
|
106
|
+
stored_sig = raw.get("_signature")
|
|
107
|
+
|
|
108
|
+
# Extract data without signature for verification
|
|
109
|
+
data = {k: v for k, v in raw.items() if k != "_signature"}
|
|
110
|
+
|
|
111
|
+
# Verify signature
|
|
112
|
+
expected_sig = self._sign_data(data)
|
|
113
|
+
if stored_sig != expected_sig:
|
|
114
|
+
self.load_warning = "Cache signature mismatch; ignoring cache."
|
|
115
|
+
self.data = {"version": self.CACHE_VERSION, "files": {}}
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
if data.get("version") != self.CACHE_VERSION:
|
|
119
|
+
self.load_warning = (
|
|
120
|
+
"Cache version mismatch "
|
|
121
|
+
f"(found {data.get('version')}); ignoring cache."
|
|
122
|
+
)
|
|
123
|
+
self.data = {"version": self.CACHE_VERSION, "files": {}}
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
# Basic structure check
|
|
127
|
+
if not isinstance(data.get("files"), dict):
|
|
128
|
+
self.load_warning = "Cache format invalid; ignoring cache."
|
|
129
|
+
self.data = {"version": self.CACHE_VERSION, "files": {}}
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
self.data = cast(CacheData, data)
|
|
133
|
+
self.load_warning = None
|
|
134
|
+
|
|
135
|
+
except (json.JSONDecodeError, ValueError):
|
|
136
|
+
self.load_warning = "Cache corrupted; ignoring cache."
|
|
137
|
+
self.data = {"version": self.CACHE_VERSION, "files": {}}
|
|
26
138
|
|
|
27
139
|
def save(self) -> None:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
140
|
+
try:
|
|
141
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
# Add signature
|
|
144
|
+
data_with_sig = {**self.data, "_signature": self._sign_data(self.data)}
|
|
145
|
+
|
|
146
|
+
self.path.write_text(
|
|
147
|
+
json.dumps(data_with_sig, ensure_ascii=False, indent=2),
|
|
148
|
+
"utf-8",
|
|
149
|
+
)
|
|
150
|
+
except OSError as e:
|
|
151
|
+
raise CacheError(f"Failed to save cache: {e}") from e
|
|
152
|
+
|
|
153
|
+
def get_file_entry(self, filepath: str) -> CacheEntry | None:
|
|
154
|
+
entry = self.data["files"].get(filepath)
|
|
155
|
+
|
|
156
|
+
if entry is None:
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
if not isinstance(entry, dict):
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
required = {"stat", "units", "blocks"}
|
|
163
|
+
if not required.issubset(entry.keys()):
|
|
164
|
+
return None
|
|
33
165
|
|
|
34
|
-
|
|
35
|
-
return self.data.get("files", {}).get(filepath)
|
|
166
|
+
return entry
|
|
36
167
|
|
|
37
|
-
def put_file_entry(
|
|
38
|
-
self
|
|
168
|
+
def put_file_entry(
|
|
169
|
+
self,
|
|
170
|
+
filepath: str,
|
|
171
|
+
stat_sig: FileStat,
|
|
172
|
+
units: list[Unit],
|
|
173
|
+
blocks: list[BlockUnit],
|
|
174
|
+
) -> None:
|
|
175
|
+
self.data["files"][filepath] = {
|
|
39
176
|
"stat": stat_sig,
|
|
40
|
-
"units": [asdict(u) for u in units],
|
|
41
|
-
"blocks": [asdict(b) for b in blocks],
|
|
177
|
+
"units": cast(list[UnitDict], cast(object, [asdict(u) for u in units])),
|
|
178
|
+
"blocks": cast(list[BlockDict], cast(object, [asdict(b) for b in blocks])),
|
|
42
179
|
}
|
|
43
180
|
|
|
44
181
|
|
|
45
|
-
def file_stat_signature(path: str) ->
|
|
182
|
+
def file_stat_signature(path: str) -> FileStat:
|
|
46
183
|
st = os.stat(path)
|
|
47
184
|
return {
|
|
48
185
|
"mtime_ns": st.st_mtime_ns,
|
codeclone/cfg.py
CHANGED
|
@@ -9,48 +9,21 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import ast
|
|
12
|
-
from
|
|
13
|
-
from typing import
|
|
12
|
+
from collections.abc import Iterable
|
|
13
|
+
from typing import Protocol, cast
|
|
14
14
|
|
|
15
|
+
from .cfg_model import CFG, Block
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
# Core CFG structures
|
|
18
|
-
# =========================
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass(eq=False)
|
|
22
|
-
class Block:
|
|
23
|
-
id: int
|
|
24
|
-
statements: list[ast.stmt] = field(default_factory=list)
|
|
25
|
-
successors: set["Block"] = field(default_factory=set)
|
|
26
|
-
is_terminated: bool = False
|
|
27
|
-
|
|
28
|
-
def add_successor(self, block: Block) -> None:
|
|
29
|
-
self.successors.add(block)
|
|
30
|
-
|
|
31
|
-
def __hash__(self) -> int:
|
|
32
|
-
return hash(self.id)
|
|
17
|
+
__all__ = ["CFG", "CFGBuilder"]
|
|
33
18
|
|
|
34
|
-
|
|
35
|
-
return isinstance(other, Block) and self.id == other.id
|
|
19
|
+
TryStar = getattr(ast, "TryStar", ast.Try)
|
|
36
20
|
|
|
37
21
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
entry: Block = field(init=False)
|
|
44
|
-
exit: Block = field(init=False)
|
|
45
|
-
|
|
46
|
-
def __post_init__(self) -> None:
|
|
47
|
-
self.entry = self.create_block()
|
|
48
|
-
self.exit = self.create_block()
|
|
49
|
-
|
|
50
|
-
def create_block(self) -> Block:
|
|
51
|
-
block = Block(id=len(self.blocks))
|
|
52
|
-
self.blocks.append(block)
|
|
53
|
-
return block
|
|
22
|
+
class _TryLike(Protocol):
|
|
23
|
+
body: list[ast.stmt]
|
|
24
|
+
handlers: list[ast.ExceptHandler]
|
|
25
|
+
orelse: list[ast.stmt]
|
|
26
|
+
finalbody: list[ast.stmt]
|
|
54
27
|
|
|
55
28
|
|
|
56
29
|
# =========================
|
|
@@ -59,6 +32,8 @@ class CFG:
|
|
|
59
32
|
|
|
60
33
|
|
|
61
34
|
class CFGBuilder:
|
|
35
|
+
__slots__ = ("cfg", "current")
|
|
36
|
+
|
|
62
37
|
def __init__(self) -> None:
|
|
63
38
|
self.cfg: CFG
|
|
64
39
|
self.current: Block
|
|
@@ -107,6 +82,20 @@ class CFGBuilder:
|
|
|
107
82
|
case ast.For():
|
|
108
83
|
self._visit_for(stmt)
|
|
109
84
|
|
|
85
|
+
case ast.AsyncFor():
|
|
86
|
+
self._visit_for(stmt) # Structure is identical to For
|
|
87
|
+
|
|
88
|
+
case ast.Try():
|
|
89
|
+
self._visit_try(cast(_TryLike, stmt))
|
|
90
|
+
case _ if TryStar is not None and isinstance(stmt, TryStar):
|
|
91
|
+
self._visit_try(cast(_TryLike, stmt))
|
|
92
|
+
|
|
93
|
+
case ast.With() | ast.AsyncWith():
|
|
94
|
+
self._visit_with(stmt)
|
|
95
|
+
|
|
96
|
+
case ast.Match():
|
|
97
|
+
self._visit_match(stmt)
|
|
98
|
+
|
|
110
99
|
case _:
|
|
111
100
|
self.current.statements.append(stmt)
|
|
112
101
|
|
|
@@ -153,7 +142,7 @@ class CFGBuilder:
|
|
|
153
142
|
|
|
154
143
|
self.current = after_block
|
|
155
144
|
|
|
156
|
-
def _visit_for(self, stmt: ast.For) -> None:
|
|
145
|
+
def _visit_for(self, stmt: ast.For | ast.AsyncFor) -> None:
|
|
157
146
|
iter_block = self.cfg.create_block()
|
|
158
147
|
body_block = self.cfg.create_block()
|
|
159
148
|
after_block = self.cfg.create_block()
|
|
@@ -171,3 +160,104 @@ class CFGBuilder:
|
|
|
171
160
|
self.current.add_successor(iter_block)
|
|
172
161
|
|
|
173
162
|
self.current = after_block
|
|
163
|
+
|
|
164
|
+
def _visit_with(self, stmt: ast.With | ast.AsyncWith) -> None:
|
|
165
|
+
# Treat WITH as linear flow (enter -> body -> exit), but preserve
|
|
166
|
+
# block structure
|
|
167
|
+
# We record the context manager expression in the current block
|
|
168
|
+
# Then we enter a new block for the body (to separate it structurally)
|
|
169
|
+
# Then we enter a new block for 'after' (exit)
|
|
170
|
+
|
|
171
|
+
# Why new block? Because 'with' implies a scope/context.
|
|
172
|
+
# It helps matching.
|
|
173
|
+
|
|
174
|
+
body_block = self.cfg.create_block()
|
|
175
|
+
after_block = self.cfg.create_block()
|
|
176
|
+
|
|
177
|
+
# Record the 'items' (context managers)
|
|
178
|
+
# We wrap them in Expr to treat them as statements for hashing
|
|
179
|
+
for item in stmt.items:
|
|
180
|
+
self.current.statements.append(ast.Expr(value=item.context_expr))
|
|
181
|
+
|
|
182
|
+
self.current.add_successor(body_block)
|
|
183
|
+
|
|
184
|
+
self.current = body_block
|
|
185
|
+
self._visit_statements(stmt.body)
|
|
186
|
+
if not self.current.is_terminated:
|
|
187
|
+
self.current.add_successor(after_block)
|
|
188
|
+
|
|
189
|
+
self.current = after_block
|
|
190
|
+
|
|
191
|
+
def _visit_try(self, stmt: _TryLike) -> None:
|
|
192
|
+
try_entry = self.cfg.create_block()
|
|
193
|
+
self.current.add_successor(try_entry)
|
|
194
|
+
self.current = try_entry
|
|
195
|
+
|
|
196
|
+
handlers_blocks = [self.cfg.create_block() for _ in stmt.handlers]
|
|
197
|
+
else_block = self.cfg.create_block() if stmt.orelse else None
|
|
198
|
+
final_block = self.cfg.create_block()
|
|
199
|
+
|
|
200
|
+
# Process each statement in try body
|
|
201
|
+
# Link each to exception handlers
|
|
202
|
+
for stmt_node in stmt.body:
|
|
203
|
+
if self.current.is_terminated:
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
# Current statement could raise exception
|
|
207
|
+
for h_block in handlers_blocks:
|
|
208
|
+
self.current.add_successor(h_block)
|
|
209
|
+
|
|
210
|
+
self._visit(stmt_node)
|
|
211
|
+
|
|
212
|
+
# Normal exit from try
|
|
213
|
+
if not self.current.is_terminated:
|
|
214
|
+
if else_block:
|
|
215
|
+
self.current.add_successor(else_block)
|
|
216
|
+
else:
|
|
217
|
+
self.current.add_successor(final_block)
|
|
218
|
+
|
|
219
|
+
# Process handlers
|
|
220
|
+
for handler, h_block in zip(stmt.handlers, handlers_blocks, strict=True):
|
|
221
|
+
self.current = h_block
|
|
222
|
+
if handler.type:
|
|
223
|
+
self.current.statements.append(ast.Expr(value=handler.type))
|
|
224
|
+
|
|
225
|
+
self._visit_statements(handler.body)
|
|
226
|
+
if not self.current.is_terminated:
|
|
227
|
+
self.current.add_successor(final_block)
|
|
228
|
+
|
|
229
|
+
# Process else
|
|
230
|
+
if else_block:
|
|
231
|
+
self.current = else_block
|
|
232
|
+
self._visit_statements(stmt.orelse)
|
|
233
|
+
if not self.current.is_terminated:
|
|
234
|
+
self.current.add_successor(final_block)
|
|
235
|
+
|
|
236
|
+
# Process finally
|
|
237
|
+
self.current = final_block
|
|
238
|
+
if stmt.finalbody:
|
|
239
|
+
self._visit_statements(stmt.finalbody)
|
|
240
|
+
|
|
241
|
+
def _visit_match(self, stmt: ast.Match) -> None:
|
|
242
|
+
self.current.statements.append(ast.Expr(value=stmt.subject))
|
|
243
|
+
|
|
244
|
+
subject_block = self.current
|
|
245
|
+
after_block = self.cfg.create_block()
|
|
246
|
+
|
|
247
|
+
for case_ in stmt.cases:
|
|
248
|
+
case_block = self.cfg.create_block()
|
|
249
|
+
subject_block.add_successor(case_block)
|
|
250
|
+
|
|
251
|
+
self.current = case_block
|
|
252
|
+
|
|
253
|
+
# Record pattern structure
|
|
254
|
+
pattern_repr = ast.dump(case_.pattern, annotate_fields=False)
|
|
255
|
+
self.current.statements.append(
|
|
256
|
+
ast.Expr(value=ast.Constant(value=f"PATTERN:{pattern_repr}"))
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
self._visit_statements(case_.body)
|
|
260
|
+
if not self.current.is_terminated:
|
|
261
|
+
self.current.add_successor(after_block)
|
|
262
|
+
|
|
263
|
+
self.current = after_block
|
codeclone/cfg_model.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CodeClone — AST and CFG-based code clone detector for Python
|
|
3
|
+
focused on architectural duplication.
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Den Rozhnovskiy
|
|
6
|
+
Licensed under the MIT License.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(eq=False, slots=True)
|
|
16
|
+
class Block:
|
|
17
|
+
id: int
|
|
18
|
+
statements: list[ast.stmt] = field(default_factory=list)
|
|
19
|
+
successors: set[Block] = field(default_factory=set)
|
|
20
|
+
is_terminated: bool = False
|
|
21
|
+
|
|
22
|
+
def add_successor(self, block: Block) -> None:
|
|
23
|
+
self.successors.add(block)
|
|
24
|
+
|
|
25
|
+
def __hash__(self) -> int:
|
|
26
|
+
return hash(self.id)
|
|
27
|
+
|
|
28
|
+
def __eq__(self, other: object) -> bool:
|
|
29
|
+
return isinstance(other, Block) and self.id == other.id
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(slots=True)
|
|
33
|
+
class CFG:
|
|
34
|
+
qualname: str
|
|
35
|
+
blocks: list[Block] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
entry: Block = field(init=False)
|
|
38
|
+
exit: Block = field(init=False)
|
|
39
|
+
|
|
40
|
+
def __post_init__(self) -> None:
|
|
41
|
+
self.entry = self.create_block()
|
|
42
|
+
self.exit = self.create_block()
|
|
43
|
+
|
|
44
|
+
def create_block(self) -> Block:
|
|
45
|
+
block = Block(id=len(self.blocks))
|
|
46
|
+
self.blocks.append(block)
|
|
47
|
+
return block
|