codeclone 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeclone-1.1.0 → codeclone-1.2.0}/PKG-INFO +15 -5
- {codeclone-1.1.0 → codeclone-1.2.0}/README.md +12 -4
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/baseline.py +13 -9
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/cache.py +12 -6
- codeclone-1.2.0/codeclone/cfg.py +338 -0
- codeclone-1.2.0/codeclone/cli.py +409 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/html_report.py +569 -586
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/normalize.py +29 -6
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/report.py +6 -6
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone.egg-info/PKG-INFO +15 -5
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone.egg-info/SOURCES.txt +1 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone.egg-info/requires.txt +1 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/pyproject.toml +3 -1
- codeclone-1.2.0/tests/test_baseline.py +62 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/tests/test_cfg.py +43 -0
- codeclone-1.2.0/tests/test_cli_smoke.py +107 -0
- codeclone-1.2.0/tests/test_html_report.py +44 -0
- codeclone-1.1.0/codeclone/cfg.py +0 -173
- codeclone-1.1.0/codeclone/cli.py +0 -179
- codeclone-1.1.0/tests/test_baseline.py +0 -15
- codeclone-1.1.0/tests/test_cli_smoke.py +0 -24
- {codeclone-1.1.0 → codeclone-1.2.0}/LICENSE +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/__init__.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/blockhash.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/blocks.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/extractor.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/fingerprint.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone/scanner.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone.egg-info/dependency_links.txt +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone.egg-info/entry_points.txt +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/codeclone.egg-info/top_level.txt +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/setup.cfg +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/tests/test_blocks.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/tests/test_extractor.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/tests/test_normalize.py +0 -0
- {codeclone-1.1.0 → codeclone-1.2.0}/tests/test_report.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -23,11 +23,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.11
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.12
|
|
25
25
|
Classifier: Programming Language :: Python :: 3.13
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
26
27
|
Classifier: Operating System :: OS Independent
|
|
27
28
|
Requires-Python: >=3.10
|
|
28
29
|
Description-Content-Type: text/markdown
|
|
29
30
|
License-File: LICENSE
|
|
30
31
|
Requires-Dist: pygments>=2.19.2
|
|
32
|
+
Requires-Dist: rich>=14.3.2
|
|
31
33
|
Provides-Extra: dev
|
|
32
34
|
Requires-Dist: pytest>=9.0.0; extra == "dev"
|
|
33
35
|
Requires-Dist: build>=1.2.0; extra == "dev"
|
|
@@ -102,7 +104,12 @@ Typical use cases:
|
|
|
102
104
|
|
|
103
105
|
- Each function is converted into a **Control Flow Graph**.
|
|
104
106
|
- CFG nodes contain normalized AST statements.
|
|
105
|
-
- CFG edges represent structural control flow
|
|
107
|
+
- CFG edges represent structural control flow:
|
|
108
|
+
- `if` / `else`
|
|
109
|
+
- `for` / `async for` / `while`
|
|
110
|
+
- `try` / `except` / `finally`
|
|
111
|
+
- `with` / `async with`
|
|
112
|
+
- `match` / `case` (Python 3.10+)
|
|
106
113
|
- Current CFG semantics (v1):
|
|
107
114
|
- `break` and `continue` are treated as statements (no jump targets),
|
|
108
115
|
- after-blocks are explicit and always present,
|
|
@@ -154,14 +161,14 @@ Generate reports:
|
|
|
154
161
|
|
|
155
162
|
```bash
|
|
156
163
|
codeclone . \
|
|
157
|
-
--json
|
|
158
|
-
--text
|
|
164
|
+
--json .cache/codeclone/report.json \
|
|
165
|
+
--text .cache/codeclone/report.txt
|
|
159
166
|
```
|
|
160
167
|
|
|
161
168
|
Generate an HTML report:
|
|
162
169
|
|
|
163
170
|
```bash
|
|
164
|
-
codeclone . --html
|
|
171
|
+
codeclone . --html .cache/codeclone/report.html
|
|
165
172
|
```
|
|
166
173
|
|
|
167
174
|
---
|
|
@@ -235,6 +242,9 @@ repos:
|
|
|
235
242
|
5. Detect function-level and block-level clones.
|
|
236
243
|
6. Apply conservative filters to suppress noise.
|
|
237
244
|
|
|
245
|
+
See the architectural overview:
|
|
246
|
+
- [docs/architecture.md](docs/architecture.md)
|
|
247
|
+
|
|
238
248
|
---
|
|
239
249
|
|
|
240
250
|
## Control Flow Graph (CFG)
|
|
@@ -65,7 +65,12 @@ Typical use cases:
|
|
|
65
65
|
|
|
66
66
|
- Each function is converted into a **Control Flow Graph**.
|
|
67
67
|
- CFG nodes contain normalized AST statements.
|
|
68
|
-
- CFG edges represent structural control flow
|
|
68
|
+
- CFG edges represent structural control flow:
|
|
69
|
+
- `if` / `else`
|
|
70
|
+
- `for` / `async for` / `while`
|
|
71
|
+
- `try` / `except` / `finally`
|
|
72
|
+
- `with` / `async with`
|
|
73
|
+
- `match` / `case` (Python 3.10+)
|
|
69
74
|
- Current CFG semantics (v1):
|
|
70
75
|
- `break` and `continue` are treated as statements (no jump targets),
|
|
71
76
|
- after-blocks are explicit and always present,
|
|
@@ -117,14 +122,14 @@ Generate reports:
|
|
|
117
122
|
|
|
118
123
|
```bash
|
|
119
124
|
codeclone . \
|
|
120
|
-
--json
|
|
121
|
-
--text
|
|
125
|
+
--json .cache/codeclone/report.json \
|
|
126
|
+
--text .cache/codeclone/report.txt
|
|
122
127
|
```
|
|
123
128
|
|
|
124
129
|
Generate an HTML report:
|
|
125
130
|
|
|
126
131
|
```bash
|
|
127
|
-
codeclone . --html
|
|
132
|
+
codeclone . --html .cache/codeclone/report.html
|
|
128
133
|
```
|
|
129
134
|
|
|
130
135
|
---
|
|
@@ -198,6 +203,9 @@ repos:
|
|
|
198
203
|
5. Detect function-level and block-level clones.
|
|
199
204
|
6. Apply conservative filters to suppress noise.
|
|
200
205
|
|
|
206
|
+
See the architectural overview:
|
|
207
|
+
- [docs/architecture.md](docs/architecture.md)
|
|
208
|
+
|
|
201
209
|
---
|
|
202
210
|
|
|
203
211
|
## Control Flow Graph (CFG)
|
|
@@ -10,22 +10,24 @@ from __future__ import annotations
|
|
|
10
10
|
|
|
11
11
|
import json
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Set
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class Baseline:
|
|
17
|
-
def __init__(self, path: str):
|
|
16
|
+
def __init__(self, path: str | Path):
|
|
18
17
|
self.path = Path(path)
|
|
19
|
-
self.functions:
|
|
20
|
-
self.blocks:
|
|
18
|
+
self.functions: set[str] = set()
|
|
19
|
+
self.blocks: set[str] = set()
|
|
21
20
|
|
|
22
21
|
def load(self) -> None:
|
|
23
22
|
if not self.path.exists():
|
|
24
23
|
return
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
25
|
+
try:
|
|
26
|
+
data = json.loads(self.path.read_text("utf-8"))
|
|
27
|
+
self.functions = set(data.get("functions", []))
|
|
28
|
+
self.blocks = set(data.get("blocks", []))
|
|
29
|
+
except json.JSONDecodeError as e:
|
|
30
|
+
raise ValueError(f"Corrupted baseline file at {self.path}: {e}") from e
|
|
29
31
|
|
|
30
32
|
def save(self) -> None:
|
|
31
33
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -42,8 +44,10 @@ class Baseline:
|
|
|
42
44
|
)
|
|
43
45
|
|
|
44
46
|
@staticmethod
|
|
45
|
-
def from_groups(
|
|
46
|
-
|
|
47
|
+
def from_groups(
|
|
48
|
+
func_groups: dict, block_groups: dict, path: str | Path = ""
|
|
49
|
+
) -> "Baseline":
|
|
50
|
+
bl = Baseline(path)
|
|
47
51
|
bl.functions = set(func_groups.keys())
|
|
48
52
|
bl.blocks = set(block_groups.keys())
|
|
49
53
|
return bl
|
|
@@ -12,17 +12,21 @@ import json
|
|
|
12
12
|
import os
|
|
13
13
|
from dataclasses import asdict
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Optional
|
|
15
|
+
from typing import Any, Optional
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Cache:
|
|
19
|
-
def __init__(self, path: str):
|
|
19
|
+
def __init__(self, path: str | Path):
|
|
20
20
|
self.path = Path(path)
|
|
21
|
-
self.data: dict = {"files": {}}
|
|
21
|
+
self.data: dict[str, Any] = {"files": {}}
|
|
22
22
|
|
|
23
23
|
def load(self) -> None:
|
|
24
24
|
if self.path.exists():
|
|
25
|
-
|
|
25
|
+
try:
|
|
26
|
+
self.data = json.loads(self.path.read_text("utf-8"))
|
|
27
|
+
except json.JSONDecodeError:
|
|
28
|
+
# If cache is corrupted, start fresh
|
|
29
|
+
self.data = {"files": {}}
|
|
26
30
|
|
|
27
31
|
def save(self) -> None:
|
|
28
32
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -31,10 +35,12 @@ class Cache:
|
|
|
31
35
|
"utf-8",
|
|
32
36
|
)
|
|
33
37
|
|
|
34
|
-
def get_file_entry(self, filepath: str) -> Optional[dict]:
|
|
38
|
+
def get_file_entry(self, filepath: str) -> Optional[dict[str, Any]]:
|
|
35
39
|
return self.data.get("files", {}).get(filepath)
|
|
36
40
|
|
|
37
|
-
def put_file_entry(
|
|
41
|
+
def put_file_entry(
|
|
42
|
+
self, filepath: str, stat_sig: dict[str, Any], units: list, blocks: list
|
|
43
|
+
) -> None:
|
|
38
44
|
self.data.setdefault("files", {})[filepath] = {
|
|
39
45
|
"stat": stat_sig,
|
|
40
46
|
"units": [asdict(u) for u in units],
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CodeClone — AST and CFG-based code clone detector for Python
|
|
3
|
+
focused on architectural duplication.
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Den Rozhnovskiy
|
|
6
|
+
Licensed under the MIT License.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Iterable
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# =========================
|
|
17
|
+
# Core CFG structures
|
|
18
|
+
# =========================
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(eq=False)
|
|
22
|
+
class Block:
|
|
23
|
+
id: int
|
|
24
|
+
statements: list[ast.stmt] = field(default_factory=list)
|
|
25
|
+
successors: set["Block"] = field(default_factory=set)
|
|
26
|
+
is_terminated: bool = False
|
|
27
|
+
|
|
28
|
+
def add_successor(self, block: Block) -> None:
|
|
29
|
+
self.successors.add(block)
|
|
30
|
+
|
|
31
|
+
def __hash__(self) -> int:
|
|
32
|
+
return hash(self.id)
|
|
33
|
+
|
|
34
|
+
def __eq__(self, other: object) -> bool:
|
|
35
|
+
return isinstance(other, Block) and self.id == other.id
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class CFG:
|
|
40
|
+
qualname: str
|
|
41
|
+
blocks: list[Block] = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
entry: Block = field(init=False)
|
|
44
|
+
exit: Block = field(init=False)
|
|
45
|
+
|
|
46
|
+
def __post_init__(self) -> None:
|
|
47
|
+
self.entry = self.create_block()
|
|
48
|
+
self.exit = self.create_block()
|
|
49
|
+
|
|
50
|
+
def create_block(self) -> Block:
|
|
51
|
+
block = Block(id=len(self.blocks))
|
|
52
|
+
self.blocks.append(block)
|
|
53
|
+
return block
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# =========================
|
|
57
|
+
# CFG Builder
|
|
58
|
+
# =========================
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CFGBuilder:
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
self.cfg: CFG
|
|
64
|
+
self.current: Block
|
|
65
|
+
|
|
66
|
+
def build(
|
|
67
|
+
self,
|
|
68
|
+
qualname: str,
|
|
69
|
+
node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
70
|
+
) -> CFG:
|
|
71
|
+
self.cfg = CFG(qualname)
|
|
72
|
+
self.current = self.cfg.entry
|
|
73
|
+
|
|
74
|
+
self._visit_statements(node.body)
|
|
75
|
+
|
|
76
|
+
if not self.current.is_terminated:
|
|
77
|
+
self.current.add_successor(self.cfg.exit)
|
|
78
|
+
|
|
79
|
+
return self.cfg
|
|
80
|
+
|
|
81
|
+
# ---------- Internals ----------
|
|
82
|
+
|
|
83
|
+
def _visit_statements(self, stmts: Iterable[ast.stmt]) -> None:
|
|
84
|
+
for stmt in stmts:
|
|
85
|
+
if self.current.is_terminated:
|
|
86
|
+
break
|
|
87
|
+
self._visit(stmt)
|
|
88
|
+
|
|
89
|
+
def _visit(self, stmt: ast.stmt) -> None:
|
|
90
|
+
match stmt:
|
|
91
|
+
case ast.Return():
|
|
92
|
+
self.current.statements.append(stmt)
|
|
93
|
+
self.current.is_terminated = True
|
|
94
|
+
self.current.add_successor(self.cfg.exit)
|
|
95
|
+
|
|
96
|
+
case ast.Raise():
|
|
97
|
+
self.current.statements.append(stmt)
|
|
98
|
+
self.current.is_terminated = True
|
|
99
|
+
self.current.add_successor(self.cfg.exit)
|
|
100
|
+
|
|
101
|
+
case ast.If():
|
|
102
|
+
self._visit_if(stmt)
|
|
103
|
+
|
|
104
|
+
case ast.While():
|
|
105
|
+
self._visit_while(stmt)
|
|
106
|
+
|
|
107
|
+
case ast.For():
|
|
108
|
+
self._visit_for(stmt)
|
|
109
|
+
|
|
110
|
+
case ast.AsyncFor():
|
|
111
|
+
self._visit_for(stmt) # Structure is identical to For
|
|
112
|
+
|
|
113
|
+
case ast.Try() | ast.TryStar():
|
|
114
|
+
self._visit_try(stmt)
|
|
115
|
+
|
|
116
|
+
case ast.With() | ast.AsyncWith():
|
|
117
|
+
self._visit_with(stmt)
|
|
118
|
+
|
|
119
|
+
case ast.Match():
|
|
120
|
+
self._visit_match(stmt)
|
|
121
|
+
|
|
122
|
+
case _:
|
|
123
|
+
self.current.statements.append(stmt)
|
|
124
|
+
|
|
125
|
+
# ---------- Control Flow ----------
|
|
126
|
+
|
|
127
|
+
def _visit_if(self, stmt: ast.If) -> None:
|
|
128
|
+
self.current.statements.append(ast.Expr(value=stmt.test))
|
|
129
|
+
|
|
130
|
+
then_block = self.cfg.create_block()
|
|
131
|
+
else_block = self.cfg.create_block()
|
|
132
|
+
after_block = self.cfg.create_block()
|
|
133
|
+
|
|
134
|
+
self.current.add_successor(then_block)
|
|
135
|
+
self.current.add_successor(else_block)
|
|
136
|
+
|
|
137
|
+
self.current = then_block
|
|
138
|
+
self._visit_statements(stmt.body)
|
|
139
|
+
if not self.current.is_terminated:
|
|
140
|
+
self.current.add_successor(after_block)
|
|
141
|
+
|
|
142
|
+
self.current = else_block
|
|
143
|
+
self._visit_statements(stmt.orelse)
|
|
144
|
+
if not self.current.is_terminated:
|
|
145
|
+
self.current.add_successor(after_block)
|
|
146
|
+
|
|
147
|
+
self.current = after_block
|
|
148
|
+
|
|
149
|
+
def _visit_while(self, stmt: ast.While) -> None:
|
|
150
|
+
cond_block = self.cfg.create_block()
|
|
151
|
+
body_block = self.cfg.create_block()
|
|
152
|
+
after_block = self.cfg.create_block()
|
|
153
|
+
|
|
154
|
+
self.current.add_successor(cond_block)
|
|
155
|
+
|
|
156
|
+
self.current = cond_block
|
|
157
|
+
self.current.statements.append(ast.Expr(value=stmt.test))
|
|
158
|
+
self.current.add_successor(body_block)
|
|
159
|
+
self.current.add_successor(after_block)
|
|
160
|
+
|
|
161
|
+
self.current = body_block
|
|
162
|
+
self._visit_statements(stmt.body)
|
|
163
|
+
if not self.current.is_terminated:
|
|
164
|
+
self.current.add_successor(cond_block)
|
|
165
|
+
|
|
166
|
+
self.current = after_block
|
|
167
|
+
|
|
168
|
+
def _visit_for(self, stmt: ast.For | ast.AsyncFor) -> None:
|
|
169
|
+
iter_block = self.cfg.create_block()
|
|
170
|
+
body_block = self.cfg.create_block()
|
|
171
|
+
after_block = self.cfg.create_block()
|
|
172
|
+
|
|
173
|
+
self.current.add_successor(iter_block)
|
|
174
|
+
|
|
175
|
+
self.current = iter_block
|
|
176
|
+
self.current.statements.append(ast.Expr(value=stmt.iter))
|
|
177
|
+
self.current.add_successor(body_block)
|
|
178
|
+
self.current.add_successor(after_block)
|
|
179
|
+
|
|
180
|
+
self.current = body_block
|
|
181
|
+
self._visit_statements(stmt.body)
|
|
182
|
+
if not self.current.is_terminated:
|
|
183
|
+
self.current.add_successor(iter_block)
|
|
184
|
+
|
|
185
|
+
self.current = after_block
|
|
186
|
+
|
|
187
|
+
def _visit_with(self, stmt: ast.With | ast.AsyncWith) -> None:
|
|
188
|
+
# Treat WITH as linear flow (enter -> body -> exit), but preserve block structure
|
|
189
|
+
# We record the context manager expression in the current block
|
|
190
|
+
# Then we enter a new block for the body (to separate it structurally)
|
|
191
|
+
# Then we enter a new block for 'after' (exit)
|
|
192
|
+
|
|
193
|
+
# Why new block? Because 'with' implies a scope/context.
|
|
194
|
+
# It helps matching.
|
|
195
|
+
|
|
196
|
+
body_block = self.cfg.create_block()
|
|
197
|
+
after_block = self.cfg.create_block()
|
|
198
|
+
|
|
199
|
+
# Record the 'items' (context managers)
|
|
200
|
+
# We wrap them in Expr to treat them as statements for hashing
|
|
201
|
+
for item in stmt.items:
|
|
202
|
+
self.current.statements.append(ast.Expr(value=item.context_expr))
|
|
203
|
+
|
|
204
|
+
self.current.add_successor(body_block)
|
|
205
|
+
|
|
206
|
+
self.current = body_block
|
|
207
|
+
self._visit_statements(stmt.body)
|
|
208
|
+
if not self.current.is_terminated:
|
|
209
|
+
self.current.add_successor(after_block)
|
|
210
|
+
|
|
211
|
+
self.current = after_block
|
|
212
|
+
|
|
213
|
+
def _visit_try(self, stmt: ast.Try | ast.TryStar) -> None:
|
|
214
|
+
# Simplified Try CFG:
|
|
215
|
+
# Try Body -> [Handlers...] -> Finally/After
|
|
216
|
+
# Try Body -> Else -> Finally/After
|
|
217
|
+
|
|
218
|
+
try_block = self.cfg.create_block()
|
|
219
|
+
self.current.add_successor(try_block)
|
|
220
|
+
|
|
221
|
+
# We don't know WHERE in the try block exception happens, so we assume
|
|
222
|
+
# any point in try block *could* jump to handlers.
|
|
223
|
+
# But for structural hashing, we just process the body.
|
|
224
|
+
# Ideally, we should link the try_block (or its end) to handlers?
|
|
225
|
+
# A simple approximation:
|
|
226
|
+
# 1. Process body.
|
|
227
|
+
# 2. Link entry (or end of body) to handlers?
|
|
228
|
+
# Let's do: Entry -> BodyBlock.
|
|
229
|
+
# Entry -> HandlerBlocks (to represent potential jump).
|
|
230
|
+
|
|
231
|
+
# Actually, let's keep it linear but branched.
|
|
232
|
+
# Current -> TryBody
|
|
233
|
+
# Current -> Handlers (Abstractly representing the jump)
|
|
234
|
+
|
|
235
|
+
handlers_blocks = [self.cfg.create_block() for _ in stmt.handlers]
|
|
236
|
+
else_block = self.cfg.create_block() if stmt.orelse else None
|
|
237
|
+
final_block = self.cfg.create_block() # This is finally or after
|
|
238
|
+
|
|
239
|
+
# Link current to TryBody
|
|
240
|
+
self.current = try_block
|
|
241
|
+
self._visit_statements(stmt.body)
|
|
242
|
+
|
|
243
|
+
# If try body finishes successfully:
|
|
244
|
+
if not self.current.is_terminated:
|
|
245
|
+
if else_block:
|
|
246
|
+
self.current.add_successor(else_block)
|
|
247
|
+
else:
|
|
248
|
+
self.current.add_successor(final_block)
|
|
249
|
+
|
|
250
|
+
# Handle Else
|
|
251
|
+
if else_block:
|
|
252
|
+
self.current = else_block
|
|
253
|
+
self._visit_statements(stmt.orelse)
|
|
254
|
+
if not self.current.is_terminated:
|
|
255
|
+
self.current.add_successor(final_block)
|
|
256
|
+
|
|
257
|
+
# Handle Handlers
|
|
258
|
+
# We assume control flow *could* jump from start of Try to any handler
|
|
259
|
+
# (Technically from inside try, but we model structural containment)
|
|
260
|
+
# To make fingerprints stable, we just need to ensure handlers are visited
|
|
261
|
+
# and linked.
|
|
262
|
+
|
|
263
|
+
# We link the *original* predecessor (before try) or the try_block start to handlers?
|
|
264
|
+
# Let's link the `try_block` (as a container concept) to handlers.
|
|
265
|
+
# But `try_block` was mutated by `_visit_statements`.
|
|
266
|
+
# Let's use the `try_block` (start of try) to link to handlers.
|
|
267
|
+
for h_block in handlers_blocks:
|
|
268
|
+
try_block.add_successor(h_block)
|
|
269
|
+
|
|
270
|
+
for handler, h_block in zip(stmt.handlers, handlers_blocks):
|
|
271
|
+
self.current = h_block
|
|
272
|
+
# Record exception type
|
|
273
|
+
if handler.type:
|
|
274
|
+
self.current.statements.append(ast.Expr(value=handler.type))
|
|
275
|
+
self._visit_statements(handler.body)
|
|
276
|
+
if not self.current.is_terminated:
|
|
277
|
+
self.current.add_successor(final_block)
|
|
278
|
+
|
|
279
|
+
# Finally logic:
|
|
280
|
+
# If there is a finally block, `final_block` IS the finally block.
|
|
281
|
+
# We visit it. Then we create a new `after_finally` block?
|
|
282
|
+
# Or `final_block` is the start of finally.
|
|
283
|
+
|
|
284
|
+
if stmt.finalbody:
|
|
285
|
+
self.current = final_block
|
|
286
|
+
self._visit_statements(stmt.finalbody)
|
|
287
|
+
# And then continue to next code?
|
|
288
|
+
# Yes, finally flows to next statement.
|
|
289
|
+
# Unless terminated.
|
|
290
|
+
|
|
291
|
+
# If no finally, `final_block` is just the merge point (after).
|
|
292
|
+
self.current = final_block
|
|
293
|
+
|
|
294
|
+
def _visit_match(self, stmt: ast.Match) -> None:
|
|
295
|
+
# Match subject -> Cases -> After
|
|
296
|
+
|
|
297
|
+
self.current.statements.append(ast.Expr(value=stmt.subject))
|
|
298
|
+
|
|
299
|
+
after_block = self.cfg.create_block()
|
|
300
|
+
|
|
301
|
+
for case_ in stmt.cases:
|
|
302
|
+
case_block = self.cfg.create_block()
|
|
303
|
+
self.current.add_successor(case_block)
|
|
304
|
+
|
|
305
|
+
# Save current context to restore for next case branching?
|
|
306
|
+
# No, 'current' is the match subject block. It branches to ALL cases.
|
|
307
|
+
|
|
308
|
+
# Visit Case
|
|
309
|
+
# We must set self.current to case_block for visiting body
|
|
310
|
+
# But we lose reference to 'match subject block' to link next case!
|
|
311
|
+
# So we need a variable `subject_block`.
|
|
312
|
+
pass
|
|
313
|
+
|
|
314
|
+
# Re-implementing loop correctly
|
|
315
|
+
subject_block = self.current
|
|
316
|
+
|
|
317
|
+
for case_ in stmt.cases:
|
|
318
|
+
case_block = self.cfg.create_block()
|
|
319
|
+
subject_block.add_successor(case_block)
|
|
320
|
+
|
|
321
|
+
self.current = case_block
|
|
322
|
+
# We could record the pattern here?
|
|
323
|
+
# patterns are complex AST nodes. For now, let's skip pattern structure hash
|
|
324
|
+
# and just hash the body. Or dump pattern as statement?
|
|
325
|
+
# Pattern is not a statement.
|
|
326
|
+
# Let's ignore pattern details for V1, or try to normalize it.
|
|
327
|
+
# If we ignore pattern, then `case []:` and `case {}:` look same.
|
|
328
|
+
# Ideally: `self.current.statements.append(case_.pattern)` but pattern is not stmt.
|
|
329
|
+
# We can wrap in Expr? `ast.Expr(value=case_.pattern)`?
|
|
330
|
+
# Pattern is NOT an Expr subclass in 3.10. It's `ast.pattern`.
|
|
331
|
+
# So we cannot append it to `statements` list which expects `ast.stmt`.
|
|
332
|
+
# We will ignore pattern structure for now (it's structural flow we care about).
|
|
333
|
+
|
|
334
|
+
self._visit_statements(case_.body)
|
|
335
|
+
if not self.current.is_terminated:
|
|
336
|
+
self.current.add_successor(after_block)
|
|
337
|
+
|
|
338
|
+
self.current = after_block
|