codeclone 1.4.3__tar.gz → 1.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeclone-1.4.3 → codeclone-1.4.4}/LICENSE +1 -1
- {codeclone-1.4.3 → codeclone-1.4.4}/PKG-INFO +5 -5
- {codeclone-1.4.3 → codeclone-1.4.4}/README.md +4 -4
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_html_snippets.py +50 -24
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_explain.py +93 -23
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone.egg-info/PKG-INFO +5 -5
- {codeclone-1.4.3 → codeclone-1.4.4}/pyproject.toml +1 -1
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/__init__.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_cli_args.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_cli_meta.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_cli_paths.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_cli_summary.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_html_escape.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_blocks.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_explain_contract.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_grouping.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_segments.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_serialize.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/_report_types.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/baseline.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/blockhash.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/blocks.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/cache.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/cfg.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/cfg_model.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/cli.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/contracts.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/errors.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/extractor.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/fingerprint.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/html_report.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/meta_markers.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/normalize.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/py.typed +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/report.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/scanner.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/templates.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone/ui_messages.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone.egg-info/SOURCES.txt +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone.egg-info/dependency_links.txt +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone.egg-info/entry_points.txt +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone.egg-info/requires.txt +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/codeclone.egg-info/top_level.txt +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/setup.cfg +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_baseline.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_blockhash.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_blocks.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cache.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cfg.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cfg_model.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cli_inprocess.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cli_main_guard.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cli_main_guard_runpy.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cli_smoke.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_cli_unit.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_detector_golden.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_extractor.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_fingerprint.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_html_report.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_init.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_normalize.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_report.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_report_explain.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_scanner_extra.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_security.py +0 -0
- {codeclone-1.4.3 → codeclone-1.4.4}/tests/test_segments.py +0 -0
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -49,7 +49,7 @@ Dynamic: license-file
|
|
|
49
49
|

|
|
50
50
|
[](LICENSE)
|
|
51
51
|
|
|
52
|
-
**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**.
|
|
52
|
+
**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**.
|
|
53
53
|
It discovers architectural duplication and prevents new copy-paste from entering your codebase via CI.
|
|
54
54
|
|
|
55
55
|
---
|
|
@@ -75,13 +75,13 @@ Unlike token-based tools, CodeClone compares **structure and control flow**, mak
|
|
|
75
75
|
|
|
76
76
|
**Three Detection Levels:**
|
|
77
77
|
|
|
78
|
-
1. **Function clones (CFG fingerprint)**
|
|
78
|
+
1. **Function clones (CFG fingerprint)**
|
|
79
79
|
Strong structural signal for cross-layer duplication
|
|
80
80
|
|
|
81
|
-
2. **Block clones (statement windows)**
|
|
81
|
+
2. **Block clones (statement windows)**
|
|
82
82
|
Detects repeated local logic patterns
|
|
83
83
|
|
|
84
|
-
3. **Segment clones (report-only)**
|
|
84
|
+
3. **Segment clones (report-only)**
|
|
85
85
|
Internal function repetition for explainability; not used for baseline gating
|
|
86
86
|
|
|
87
87
|
**CI-Ready Features:**
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|

|
|
9
9
|
[](LICENSE)
|
|
10
10
|
|
|
11
|
-
**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**.
|
|
11
|
+
**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**.
|
|
12
12
|
It discovers architectural duplication and prevents new copy-paste from entering your codebase via CI.
|
|
13
13
|
|
|
14
14
|
---
|
|
@@ -34,13 +34,13 @@ Unlike token-based tools, CodeClone compares **structure and control flow**, mak
|
|
|
34
34
|
|
|
35
35
|
**Three Detection Levels:**
|
|
36
36
|
|
|
37
|
-
1. **Function clones (CFG fingerprint)**
|
|
37
|
+
1. **Function clones (CFG fingerprint)**
|
|
38
38
|
Strong structural signal for cross-layer duplication
|
|
39
39
|
|
|
40
|
-
2. **Block clones (statement windows)**
|
|
40
|
+
2. **Block clones (statement windows)**
|
|
41
41
|
Detects repeated local logic patterns
|
|
42
42
|
|
|
43
|
-
3. **Segment clones (report-only)**
|
|
43
|
+
3. **Segment clones (report-only)**
|
|
44
44
|
Internal function repetition for explainability; not used for baseline gating
|
|
45
45
|
|
|
46
46
|
**CI-Ready Features:**
|
|
@@ -14,6 +14,7 @@ import itertools
|
|
|
14
14
|
from collections.abc import Iterable
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from functools import lru_cache
|
|
17
|
+
from types import ModuleType
|
|
17
18
|
from typing import NamedTuple, cast
|
|
18
19
|
|
|
19
20
|
from .errors import FileProcessingError
|
|
@@ -34,33 +35,19 @@ class _Snippet:
|
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class _FileCache:
|
|
37
|
-
__slots__ = ("
|
|
38
|
+
__slots__ = ("_get_file_lines_impl", "maxsize")
|
|
38
39
|
|
|
39
40
|
def __init__(self, maxsize: int = 128) -> None:
|
|
40
41
|
self.maxsize = maxsize
|
|
41
|
-
self.
|
|
42
|
+
self._get_file_lines_impl = lru_cache(maxsize=maxsize)(self._read_file_lines)
|
|
42
43
|
|
|
43
44
|
@staticmethod
|
|
44
|
-
def
|
|
45
|
-
filepath: str, start_line: int, end_line: int
|
|
46
|
-
) -> tuple[str, ...]:
|
|
47
|
-
if start_line < 1:
|
|
48
|
-
start_line = 1
|
|
49
|
-
if end_line < start_line:
|
|
50
|
-
return ()
|
|
51
|
-
|
|
45
|
+
def _read_file_lines(filepath: str) -> tuple[str, ...]:
|
|
52
46
|
try:
|
|
53
47
|
|
|
54
48
|
def _read_with_errors(errors: str) -> tuple[str, ...]:
|
|
55
|
-
lines: list[str] = []
|
|
56
49
|
with open(filepath, encoding="utf-8", errors=errors) as f:
|
|
57
|
-
for
|
|
58
|
-
if lineno < start_line:
|
|
59
|
-
continue
|
|
60
|
-
if lineno > end_line:
|
|
61
|
-
break
|
|
62
|
-
lines.append(line.rstrip("\n"))
|
|
63
|
-
return tuple(lines)
|
|
50
|
+
return tuple(line.rstrip("\n") for line in f)
|
|
64
51
|
|
|
65
52
|
try:
|
|
66
53
|
return _read_with_errors("strict")
|
|
@@ -72,7 +59,16 @@ class _FileCache:
|
|
|
72
59
|
def get_lines_range(
|
|
73
60
|
self, filepath: str, start_line: int, end_line: int
|
|
74
61
|
) -> tuple[str, ...]:
|
|
75
|
-
|
|
62
|
+
if start_line < 1:
|
|
63
|
+
start_line = 1
|
|
64
|
+
if end_line < start_line:
|
|
65
|
+
return ()
|
|
66
|
+
lines = self._get_file_lines_impl(filepath)
|
|
67
|
+
start_index = start_line - 1
|
|
68
|
+
if start_index >= len(lines):
|
|
69
|
+
return ()
|
|
70
|
+
end_index = min(len(lines), end_line)
|
|
71
|
+
return lines[start_index:end_index]
|
|
76
72
|
|
|
77
73
|
class _CacheInfo(NamedTuple):
|
|
78
74
|
hits: int
|
|
@@ -81,10 +77,30 @@ class _FileCache:
|
|
|
81
77
|
currsize: int
|
|
82
78
|
|
|
83
79
|
def cache_info(self) -> _CacheInfo:
|
|
84
|
-
return cast(_FileCache._CacheInfo, self.
|
|
80
|
+
return cast(_FileCache._CacheInfo, self._get_file_lines_impl.cache_info())
|
|
85
81
|
|
|
86
82
|
|
|
87
|
-
|
|
83
|
+
_PYGMENTS_IMPORTER_ID: int | None = None
|
|
84
|
+
_PYGMENTS_API: tuple[ModuleType, ModuleType, ModuleType] | None = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _load_pygments_api() -> tuple[ModuleType, ModuleType, ModuleType] | None:
|
|
88
|
+
"""
|
|
89
|
+
Load pygments modules once per import-function identity.
|
|
90
|
+
|
|
91
|
+
Tests monkeypatch `importlib.import_module`; tracking importer identity keeps
|
|
92
|
+
behavior deterministic and allows import-error branches to stay testable.
|
|
93
|
+
"""
|
|
94
|
+
global _PYGMENTS_IMPORTER_ID
|
|
95
|
+
global _PYGMENTS_API
|
|
96
|
+
|
|
97
|
+
importer_id = id(importlib.import_module)
|
|
98
|
+
if importer_id != _PYGMENTS_IMPORTER_ID:
|
|
99
|
+
_PYGMENTS_IMPORTER_ID = importer_id
|
|
100
|
+
_PYGMENTS_API = None
|
|
101
|
+
if _PYGMENTS_API is not None:
|
|
102
|
+
return _PYGMENTS_API
|
|
103
|
+
|
|
88
104
|
try:
|
|
89
105
|
pygments = importlib.import_module("pygments")
|
|
90
106
|
formatters = importlib.import_module("pygments.formatters")
|
|
@@ -92,6 +108,16 @@ def _try_pygments(code: str) -> str | None:
|
|
|
92
108
|
except ImportError:
|
|
93
109
|
return None
|
|
94
110
|
|
|
111
|
+
_PYGMENTS_API = (pygments, formatters, lexers)
|
|
112
|
+
return _PYGMENTS_API
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _try_pygments(code: str) -> str | None:
|
|
116
|
+
pygments_api = _load_pygments_api()
|
|
117
|
+
if pygments_api is None:
|
|
118
|
+
return None
|
|
119
|
+
pygments, formatters, lexers = pygments_api
|
|
120
|
+
|
|
95
121
|
highlight = pygments.highlight
|
|
96
122
|
formatter_cls = formatters.HtmlFormatter
|
|
97
123
|
lexer_cls = lexers.PythonLexer
|
|
@@ -104,10 +130,10 @@ def _pygments_css(style_name: str) -> str:
|
|
|
104
130
|
Returns CSS for pygments tokens. Scoped to `.codebox` to avoid leaking styles.
|
|
105
131
|
If Pygments is not available or style missing, returns "".
|
|
106
132
|
"""
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
except ImportError:
|
|
133
|
+
pygments_api = _load_pygments_api()
|
|
134
|
+
if pygments_api is None:
|
|
110
135
|
return ""
|
|
136
|
+
_, formatters, _ = pygments_api
|
|
111
137
|
|
|
112
138
|
try:
|
|
113
139
|
formatter_cls = formatters.HtmlFormatter
|
|
@@ -9,6 +9,8 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import ast
|
|
12
|
+
from bisect import bisect_left, bisect_right
|
|
13
|
+
from dataclasses import dataclass
|
|
12
14
|
from pathlib import Path
|
|
13
15
|
|
|
14
16
|
from ._report_explain_contract import (
|
|
@@ -23,6 +25,19 @@ from ._report_explain_contract import (
|
|
|
23
25
|
from ._report_types import GroupItem, GroupMap
|
|
24
26
|
|
|
25
27
|
|
|
28
|
+
@dataclass(frozen=True, slots=True)
|
|
29
|
+
class _StatementRecord:
|
|
30
|
+
node: ast.stmt
|
|
31
|
+
start_line: int
|
|
32
|
+
end_line: int
|
|
33
|
+
start_col: int
|
|
34
|
+
end_col: int
|
|
35
|
+
type_name: str
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
_StatementIndex = tuple[tuple[_StatementRecord, ...], tuple[int, ...]]
|
|
39
|
+
|
|
40
|
+
|
|
26
41
|
def _signature_parts(group_key: str) -> list[str]:
|
|
27
42
|
return [part for part in group_key.split("|") if part]
|
|
28
43
|
|
|
@@ -42,6 +57,53 @@ def _parsed_file_tree(
|
|
|
42
57
|
return tree
|
|
43
58
|
|
|
44
59
|
|
|
60
|
+
def _build_statement_index(tree: ast.AST) -> _StatementIndex:
|
|
61
|
+
records = tuple(
|
|
62
|
+
sorted(
|
|
63
|
+
(
|
|
64
|
+
_StatementRecord(
|
|
65
|
+
node=node,
|
|
66
|
+
start_line=int(getattr(node, "lineno", 0)),
|
|
67
|
+
end_line=int(getattr(node, "end_lineno", 0)),
|
|
68
|
+
start_col=int(getattr(node, "col_offset", 0)),
|
|
69
|
+
end_col=int(getattr(node, "end_col_offset", 0)),
|
|
70
|
+
type_name=type(node).__name__,
|
|
71
|
+
)
|
|
72
|
+
for node in ast.walk(tree)
|
|
73
|
+
if isinstance(node, ast.stmt)
|
|
74
|
+
),
|
|
75
|
+
key=lambda record: (
|
|
76
|
+
record.start_line,
|
|
77
|
+
record.end_line,
|
|
78
|
+
record.start_col,
|
|
79
|
+
record.end_col,
|
|
80
|
+
record.type_name,
|
|
81
|
+
),
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
start_lines = tuple(record.start_line for record in records)
|
|
85
|
+
return records, start_lines
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _parsed_statement_index(
|
|
89
|
+
filepath: str,
|
|
90
|
+
*,
|
|
91
|
+
ast_cache: dict[str, ast.AST | None],
|
|
92
|
+
stmt_index_cache: dict[str, _StatementIndex | None],
|
|
93
|
+
) -> _StatementIndex | None:
|
|
94
|
+
if filepath in stmt_index_cache:
|
|
95
|
+
return stmt_index_cache[filepath]
|
|
96
|
+
|
|
97
|
+
tree = _parsed_file_tree(filepath, ast_cache=ast_cache)
|
|
98
|
+
if tree is None:
|
|
99
|
+
stmt_index_cache[filepath] = None
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
index = _build_statement_index(tree)
|
|
103
|
+
stmt_index_cache[filepath] = index
|
|
104
|
+
return index
|
|
105
|
+
|
|
106
|
+
|
|
45
107
|
def _is_assert_like_stmt(stmt: ast.stmt) -> bool:
|
|
46
108
|
if isinstance(stmt, ast.Assert):
|
|
47
109
|
return True
|
|
@@ -64,45 +126,42 @@ def _assert_range_stats(
|
|
|
64
126
|
start_line: int,
|
|
65
127
|
end_line: int,
|
|
66
128
|
ast_cache: dict[str, ast.AST | None],
|
|
129
|
+
stmt_index_cache: dict[str, _StatementIndex | None],
|
|
67
130
|
range_cache: dict[tuple[str, int, int], tuple[int, int, int]],
|
|
68
131
|
) -> tuple[int, int, int]:
|
|
69
132
|
cache_key = (filepath, start_line, end_line)
|
|
70
133
|
if cache_key in range_cache:
|
|
71
134
|
return range_cache[cache_key]
|
|
72
135
|
|
|
73
|
-
|
|
74
|
-
|
|
136
|
+
statement_index = _parsed_statement_index(
|
|
137
|
+
filepath,
|
|
138
|
+
ast_cache=ast_cache,
|
|
139
|
+
stmt_index_cache=stmt_index_cache,
|
|
140
|
+
)
|
|
141
|
+
if statement_index is None:
|
|
75
142
|
range_cache[cache_key] = (0, 0, 0)
|
|
76
143
|
return 0, 0, 0
|
|
77
144
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
for node in ast.walk(tree)
|
|
81
|
-
if isinstance(node, ast.stmt)
|
|
82
|
-
and int(getattr(node, "lineno", 0)) >= start_line
|
|
83
|
-
and int(getattr(node, "end_lineno", 0)) <= end_line
|
|
84
|
-
]
|
|
85
|
-
if not stmts:
|
|
145
|
+
records, start_lines = statement_index
|
|
146
|
+
if not records:
|
|
86
147
|
range_cache[cache_key] = (0, 0, 0)
|
|
87
148
|
return 0, 0, 0
|
|
88
149
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
int(getattr(stmt, "col_offset", 0)),
|
|
95
|
-
int(getattr(stmt, "end_col_offset", 0)),
|
|
96
|
-
type(stmt).__name__,
|
|
97
|
-
),
|
|
98
|
-
)
|
|
150
|
+
left = bisect_left(start_lines, start_line)
|
|
151
|
+
right = bisect_right(start_lines, end_line)
|
|
152
|
+
if left >= right:
|
|
153
|
+
range_cache[cache_key] = (0, 0, 0)
|
|
154
|
+
return 0, 0, 0
|
|
99
155
|
|
|
100
|
-
total =
|
|
156
|
+
total = 0
|
|
101
157
|
assert_like = 0
|
|
102
158
|
max_consecutive = 0
|
|
103
159
|
current_consecutive = 0
|
|
104
|
-
for
|
|
105
|
-
if
|
|
160
|
+
for record in records[left:right]:
|
|
161
|
+
if record.end_line > end_line:
|
|
162
|
+
continue
|
|
163
|
+
total += 1
|
|
164
|
+
if _is_assert_like_stmt(record.node):
|
|
106
165
|
assert_like += 1
|
|
107
166
|
current_consecutive += 1
|
|
108
167
|
if current_consecutive > max_consecutive:
|
|
@@ -110,6 +169,10 @@ def _assert_range_stats(
|
|
|
110
169
|
else:
|
|
111
170
|
current_consecutive = 0
|
|
112
171
|
|
|
172
|
+
if total == 0:
|
|
173
|
+
range_cache[cache_key] = (0, 0, 0)
|
|
174
|
+
return 0, 0, 0
|
|
175
|
+
|
|
113
176
|
stats = (total, assert_like, max_consecutive)
|
|
114
177
|
range_cache[cache_key] = stats
|
|
115
178
|
return stats
|
|
@@ -121,6 +184,7 @@ def _is_assert_only_range(
|
|
|
121
184
|
start_line: int,
|
|
122
185
|
end_line: int,
|
|
123
186
|
ast_cache: dict[str, ast.AST | None],
|
|
187
|
+
stmt_index_cache: dict[str, _StatementIndex | None],
|
|
124
188
|
range_cache: dict[tuple[str, int, int], tuple[int, int, int]],
|
|
125
189
|
) -> bool:
|
|
126
190
|
total, assert_like, _ = _assert_range_stats(
|
|
@@ -128,6 +192,7 @@ def _is_assert_only_range(
|
|
|
128
192
|
start_line=start_line,
|
|
129
193
|
end_line=end_line,
|
|
130
194
|
ast_cache=ast_cache,
|
|
195
|
+
stmt_index_cache=stmt_index_cache,
|
|
131
196
|
range_cache=range_cache,
|
|
132
197
|
)
|
|
133
198
|
return total > 0 and total == assert_like
|
|
@@ -157,6 +222,7 @@ def _enrich_with_assert_facts(
|
|
|
157
222
|
facts: dict[str, str],
|
|
158
223
|
items: list[GroupItem],
|
|
159
224
|
ast_cache: dict[str, ast.AST | None],
|
|
225
|
+
stmt_index_cache: dict[str, _StatementIndex | None],
|
|
160
226
|
range_cache: dict[tuple[str, int, int], tuple[int, int, int]],
|
|
161
227
|
) -> None:
|
|
162
228
|
assert_only = True
|
|
@@ -181,6 +247,7 @@ def _enrich_with_assert_facts(
|
|
|
181
247
|
start_line=start_line,
|
|
182
248
|
end_line=end_line,
|
|
183
249
|
ast_cache=ast_cache,
|
|
250
|
+
stmt_index_cache=stmt_index_cache,
|
|
184
251
|
range_cache=range_cache,
|
|
185
252
|
)
|
|
186
253
|
total_statements += range_total
|
|
@@ -198,6 +265,7 @@ def _enrich_with_assert_facts(
|
|
|
198
265
|
start_line=start_line,
|
|
199
266
|
end_line=end_line,
|
|
200
267
|
ast_cache=ast_cache,
|
|
268
|
+
stmt_index_cache=stmt_index_cache,
|
|
201
269
|
range_cache=range_cache,
|
|
202
270
|
)
|
|
203
271
|
):
|
|
@@ -223,6 +291,7 @@ def build_block_group_facts(block_groups: GroupMap) -> dict[str, dict[str, str]]
|
|
|
223
291
|
Renderers (HTML/TXT/JSON) should only display these facts.
|
|
224
292
|
"""
|
|
225
293
|
ast_cache: dict[str, ast.AST | None] = {}
|
|
294
|
+
stmt_index_cache: dict[str, _StatementIndex | None] = {}
|
|
226
295
|
range_cache: dict[tuple[str, int, int], tuple[int, int, int]] = {}
|
|
227
296
|
facts_by_group: dict[str, dict[str, str]] = {}
|
|
228
297
|
|
|
@@ -232,6 +301,7 @@ def build_block_group_facts(block_groups: GroupMap) -> dict[str, dict[str, str]]
|
|
|
232
301
|
facts=facts,
|
|
233
302
|
items=items,
|
|
234
303
|
ast_cache=ast_cache,
|
|
304
|
+
stmt_index_cache=stmt_index_cache,
|
|
235
305
|
range_cache=range_cache,
|
|
236
306
|
)
|
|
237
307
|
group_arity = len(items)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -49,7 +49,7 @@ Dynamic: license-file
|
|
|
49
49
|

|
|
50
50
|
[](LICENSE)
|
|
51
51
|
|
|
52
|
-
**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**.
|
|
52
|
+
**CodeClone** is a Python code clone detector based on **normalized AST and Control Flow Graphs (CFG)**.
|
|
53
53
|
It discovers architectural duplication and prevents new copy-paste from entering your codebase via CI.
|
|
54
54
|
|
|
55
55
|
---
|
|
@@ -75,13 +75,13 @@ Unlike token-based tools, CodeClone compares **structure and control flow**, mak
|
|
|
75
75
|
|
|
76
76
|
**Three Detection Levels:**
|
|
77
77
|
|
|
78
|
-
1. **Function clones (CFG fingerprint)**
|
|
78
|
+
1. **Function clones (CFG fingerprint)**
|
|
79
79
|
Strong structural signal for cross-layer duplication
|
|
80
80
|
|
|
81
|
-
2. **Block clones (statement windows)**
|
|
81
|
+
2. **Block clones (statement windows)**
|
|
82
82
|
Detects repeated local logic patterns
|
|
83
83
|
|
|
84
|
-
3. **Segment clones (report-only)**
|
|
84
|
+
3. **Segment clones (report-only)**
|
|
85
85
|
Internal function repetition for explainability; not used for baseline gating
|
|
86
86
|
|
|
87
87
|
**CI-Ready Features:**
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "codeclone"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.4"
|
|
8
8
|
description = "AST and CFG-based code clone detector for Python focused on architectural duplication"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { text = "MIT" }
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|