codeclone 1.4.1__tar.gz → 1.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeclone-1.4.1 → codeclone-1.4.2}/PKG-INFO +1 -1
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/blocks.py +19 -2
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cache.py +5 -5
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cli.py +39 -62
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/extractor.py +38 -23
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/scanner.py +12 -8
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/PKG-INFO +1 -1
- {codeclone-1.4.1 → codeclone-1.4.2}/pyproject.toml +1 -1
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_unit.py +7 -3
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_extractor.py +105 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_security.py +22 -6
- {codeclone-1.4.1 → codeclone-1.4.2}/LICENSE +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/README.md +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/__init__.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_args.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_meta.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_paths.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_cli_summary.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_html_escape.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_html_snippets.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_blocks.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_explain.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_explain_contract.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_grouping.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_segments.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_serialize.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/_report_types.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/baseline.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/blockhash.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cfg.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/cfg_model.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/contracts.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/errors.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/fingerprint.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/html_report.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/meta_markers.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/normalize.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/py.typed +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/report.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/templates.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone/ui_messages.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/SOURCES.txt +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/dependency_links.txt +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/entry_points.txt +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/requires.txt +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/codeclone.egg-info/top_level.txt +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/setup.cfg +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_baseline.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_blockhash.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_blocks.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cache.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cfg.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cfg_model.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_inprocess.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_main_guard.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_main_guard_runpy.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_cli_smoke.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_detector_golden.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_fingerprint.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_html_report.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_init.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_normalize.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_report.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_report_explain.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_scanner_extra.py +0 -0
- {codeclone-1.4.1 → codeclone-1.4.2}/tests/test_segments.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -9,6 +9,7 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import ast
|
|
12
|
+
from collections.abc import Sequence
|
|
12
13
|
from dataclasses import dataclass
|
|
13
14
|
|
|
14
15
|
from .blockhash import stmt_hash
|
|
@@ -45,12 +46,20 @@ def extract_blocks(
|
|
|
45
46
|
cfg: NormalizationConfig,
|
|
46
47
|
block_size: int,
|
|
47
48
|
max_blocks: int,
|
|
49
|
+
precomputed_hashes: Sequence[str] | None = None,
|
|
48
50
|
) -> list[BlockUnit]:
|
|
49
51
|
body = getattr(func_node, "body", None)
|
|
50
52
|
if not isinstance(body, list) or len(body) < block_size:
|
|
51
53
|
return []
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
if precomputed_hashes is not None:
|
|
56
|
+
assert len(precomputed_hashes) == len(body), (
|
|
57
|
+
f"precomputed_hashes length {len(precomputed_hashes)} "
|
|
58
|
+
f"!= body length {len(body)}"
|
|
59
|
+
)
|
|
60
|
+
stmt_hashes = precomputed_hashes
|
|
61
|
+
else:
|
|
62
|
+
stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
|
|
54
63
|
|
|
55
64
|
blocks: list[BlockUnit] = []
|
|
56
65
|
last_start: int | None = None
|
|
@@ -94,12 +103,20 @@ def extract_segments(
|
|
|
94
103
|
cfg: NormalizationConfig,
|
|
95
104
|
window_size: int,
|
|
96
105
|
max_segments: int,
|
|
106
|
+
precomputed_hashes: Sequence[str] | None = None,
|
|
97
107
|
) -> list[SegmentUnit]:
|
|
98
108
|
body = getattr(func_node, "body", None)
|
|
99
109
|
if not isinstance(body, list) or len(body) < window_size:
|
|
100
110
|
return []
|
|
101
111
|
|
|
102
|
-
|
|
112
|
+
if precomputed_hashes is not None:
|
|
113
|
+
assert len(precomputed_hashes) == len(body), (
|
|
114
|
+
f"precomputed_hashes length {len(precomputed_hashes)} "
|
|
115
|
+
f"!= body length {len(body)}"
|
|
116
|
+
)
|
|
117
|
+
stmt_hashes = precomputed_hashes
|
|
118
|
+
else:
|
|
119
|
+
stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
|
|
103
120
|
|
|
104
121
|
segments: list[SegmentUnit] = []
|
|
105
122
|
|
|
@@ -344,14 +344,14 @@ class Cache:
|
|
|
344
344
|
try:
|
|
345
345
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
346
346
|
wire_files: dict[str, object] = {}
|
|
347
|
-
|
|
348
|
-
self.data["files"]
|
|
349
|
-
|
|
347
|
+
wire_map = {
|
|
348
|
+
rp: self._wire_filepath_from_runtime(rp) for rp in self.data["files"]
|
|
349
|
+
}
|
|
350
|
+
for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__):
|
|
350
351
|
entry = self.get_file_entry(runtime_path)
|
|
351
352
|
if entry is None:
|
|
352
353
|
continue
|
|
353
|
-
|
|
354
|
-
wire_files[wire_path] = _encode_wire_file_entry(entry)
|
|
354
|
+
wire_files[wire_map[runtime_path]] = _encode_wire_file_entry(entry)
|
|
355
355
|
|
|
356
356
|
payload: dict[str, object] = {
|
|
357
357
|
"py": current_python_tag(),
|
|
@@ -122,14 +122,14 @@ def process_file(
|
|
|
122
122
|
"""
|
|
123
123
|
|
|
124
124
|
try:
|
|
125
|
-
#
|
|
125
|
+
# Single os.stat() for both size check and cache signature
|
|
126
126
|
try:
|
|
127
|
-
|
|
128
|
-
if st_size > MAX_FILE_SIZE:
|
|
127
|
+
st = os.stat(filepath)
|
|
128
|
+
if st.st_size > MAX_FILE_SIZE:
|
|
129
129
|
return ProcessingResult(
|
|
130
130
|
filepath=filepath,
|
|
131
131
|
success=False,
|
|
132
|
-
error=f"File too large: {st_size} bytes (max {MAX_FILE_SIZE})",
|
|
132
|
+
error=f"File too large: {st.st_size} bytes (max {MAX_FILE_SIZE})",
|
|
133
133
|
error_kind="file_too_large",
|
|
134
134
|
)
|
|
135
135
|
except OSError as e:
|
|
@@ -140,6 +140,8 @@ def process_file(
|
|
|
140
140
|
error_kind="stat_error",
|
|
141
141
|
)
|
|
142
142
|
|
|
143
|
+
stat: FileStat = {"mtime_ns": st.st_mtime_ns, "size": st.st_size}
|
|
144
|
+
|
|
143
145
|
try:
|
|
144
146
|
source = Path(filepath).read_text("utf-8")
|
|
145
147
|
except UnicodeDecodeError as e:
|
|
@@ -157,7 +159,6 @@ def process_file(
|
|
|
157
159
|
error_kind="source_read_error",
|
|
158
160
|
)
|
|
159
161
|
|
|
160
|
-
stat = file_stat_signature(filepath)
|
|
161
162
|
module_name = module_name_from_path(root, filepath)
|
|
162
163
|
|
|
163
164
|
units, blocks, segments = extract_units_from_source(
|
|
@@ -355,68 +356,44 @@ def _main_impl() -> None:
|
|
|
355
356
|
return None, str(e)
|
|
356
357
|
|
|
357
358
|
# Discovery phase
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
)
|
|
359
|
+
def _discover_files() -> None:
|
|
360
|
+
nonlocal files_found, cache_hits, files_skipped
|
|
361
|
+
for fp in iter_py_files(str(root_path)):
|
|
362
|
+
files_found += 1
|
|
363
|
+
stat, cached, warn = _get_cached_entry(fp)
|
|
364
|
+
if warn:
|
|
365
|
+
console.print(warn)
|
|
366
|
+
files_skipped += 1
|
|
367
|
+
continue
|
|
368
|
+
if cached and cached.get("stat") == stat:
|
|
369
|
+
cache_hits += 1
|
|
370
|
+
all_units.extend(
|
|
371
|
+
cast(
|
|
372
|
+
list[GroupItem],
|
|
373
|
+
cast(object, cached.get("units", [])),
|
|
374
374
|
)
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
)
|
|
375
|
+
)
|
|
376
|
+
all_blocks.extend(
|
|
377
|
+
cast(
|
|
378
|
+
list[GroupItem],
|
|
379
|
+
cast(object, cached.get("blocks", [])),
|
|
380
380
|
)
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
)
|
|
381
|
+
)
|
|
382
|
+
all_segments.extend(
|
|
383
|
+
cast(
|
|
384
|
+
list[GroupItem],
|
|
385
|
+
cast(object, cached.get("segments", [])),
|
|
386
386
|
)
|
|
387
|
-
|
|
388
|
-
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
files_to_process.append(fp)
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
if args.quiet:
|
|
393
|
+
_discover_files()
|
|
389
394
|
else:
|
|
390
395
|
with console.status(ui.STATUS_DISCOVERING, spinner="dots"):
|
|
391
|
-
|
|
392
|
-
files_found += 1
|
|
393
|
-
stat, cached, warn = _get_cached_entry(fp)
|
|
394
|
-
if warn:
|
|
395
|
-
console.print(warn)
|
|
396
|
-
files_skipped += 1
|
|
397
|
-
continue
|
|
398
|
-
if cached and cached.get("stat") == stat:
|
|
399
|
-
cache_hits += 1
|
|
400
|
-
all_units.extend(
|
|
401
|
-
cast(
|
|
402
|
-
list[GroupItem],
|
|
403
|
-
cast(object, cached.get("units", [])),
|
|
404
|
-
)
|
|
405
|
-
)
|
|
406
|
-
all_blocks.extend(
|
|
407
|
-
cast(
|
|
408
|
-
list[GroupItem],
|
|
409
|
-
cast(object, cached.get("blocks", [])),
|
|
410
|
-
)
|
|
411
|
-
)
|
|
412
|
-
all_segments.extend(
|
|
413
|
-
cast(
|
|
414
|
-
list[GroupItem],
|
|
415
|
-
cast(object, cached.get("segments", [])),
|
|
416
|
-
)
|
|
417
|
-
)
|
|
418
|
-
else:
|
|
419
|
-
files_to_process.append(fp)
|
|
396
|
+
_discover_files()
|
|
420
397
|
except OSError as e:
|
|
421
398
|
console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=e)))
|
|
422
399
|
sys.exit(ExitCode.CONTRACT_ERROR)
|
|
@@ -16,6 +16,7 @@ from collections.abc import Iterator
|
|
|
16
16
|
from contextlib import contextmanager
|
|
17
17
|
from dataclasses import dataclass
|
|
18
18
|
|
|
19
|
+
from .blockhash import stmt_hash
|
|
19
20
|
from .blocks import BlockUnit, SegmentUnit, extract_blocks, extract_segments
|
|
20
21
|
from .cfg import CFGBuilder
|
|
21
22
|
from .errors import ParseError
|
|
@@ -250,28 +251,42 @@ def extract_units_from_source(
|
|
|
250
251
|
)
|
|
251
252
|
)
|
|
252
253
|
|
|
253
|
-
# Block-level units
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
254
|
+
# Block-level and segment-level units share statement hashes
|
|
255
|
+
needs_blocks = (
|
|
256
|
+
not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10
|
|
257
|
+
)
|
|
258
|
+
needs_segments = loc >= 30 and stmt_count >= 12
|
|
259
|
+
|
|
260
|
+
if needs_blocks or needs_segments:
|
|
261
|
+
body = getattr(node, "body", None)
|
|
262
|
+
hashes: list[str] | None = None
|
|
263
|
+
if isinstance(body, list):
|
|
264
|
+
hashes = [stmt_hash(stmt, cfg) for stmt in body]
|
|
265
|
+
|
|
266
|
+
if needs_blocks:
|
|
267
|
+
block_units.extend(
|
|
268
|
+
extract_blocks(
|
|
269
|
+
node,
|
|
270
|
+
filepath=filepath,
|
|
271
|
+
qualname=qualname,
|
|
272
|
+
cfg=cfg,
|
|
273
|
+
block_size=4,
|
|
274
|
+
max_blocks=15,
|
|
275
|
+
precomputed_hashes=hashes,
|
|
276
|
+
)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if needs_segments:
|
|
280
|
+
segment_units.extend(
|
|
281
|
+
extract_segments(
|
|
282
|
+
node,
|
|
283
|
+
filepath=filepath,
|
|
284
|
+
qualname=qualname,
|
|
285
|
+
cfg=cfg,
|
|
286
|
+
window_size=6,
|
|
287
|
+
max_segments=60,
|
|
288
|
+
precomputed_hashes=hashes,
|
|
289
|
+
)
|
|
290
|
+
)
|
|
276
291
|
|
|
277
292
|
return units, block_units, segment_units
|
|
@@ -77,8 +77,9 @@ def iter_py_files(
|
|
|
77
77
|
if root_str.startswith(sensitive + "/"):
|
|
78
78
|
raise ValidationError(f"Cannot scan under sensitive directory: {root}")
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
# Collect and filter first, then sort — avoids sorting excluded paths
|
|
81
|
+
candidates: list[Path] = []
|
|
82
|
+
for p in rootp.rglob("*.py"):
|
|
82
83
|
# Verify path is actually under root (prevent symlink attacks)
|
|
83
84
|
try:
|
|
84
85
|
p.resolve().relative_to(rootp)
|
|
@@ -90,12 +91,15 @@ def iter_py_files(
|
|
|
90
91
|
if any(ex in parts for ex in excludes):
|
|
91
92
|
continue
|
|
92
93
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
94
|
+
candidates.append(p)
|
|
95
|
+
|
|
96
|
+
if len(candidates) > max_files:
|
|
97
|
+
raise ValidationError(
|
|
98
|
+
f"File count exceeds limit of {max_files}. "
|
|
99
|
+
"Use more specific root or increase limit."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
for p in sorted(candidates, key=lambda path: str(path)):
|
|
99
103
|
yield str(p)
|
|
100
104
|
|
|
101
105
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "codeclone"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.2"
|
|
8
8
|
description = "AST and CFG-based code clone detector for Python focused on architectural duplication"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -28,10 +28,14 @@ def test_process_file_stat_error(
|
|
|
28
28
|
src = tmp_path / "a.py"
|
|
29
29
|
src.write_text("def f():\n return 1\n", "utf-8")
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
raise OSError("nope")
|
|
31
|
+
_original_stat = os.stat
|
|
33
32
|
|
|
34
|
-
|
|
33
|
+
def _boom(path: str, *args: object, **kwargs: object) -> os.stat_result:
|
|
34
|
+
if str(path) == str(src):
|
|
35
|
+
raise OSError("nope")
|
|
36
|
+
return _original_stat(path, *args, **kwargs) # type: ignore[arg-type]
|
|
37
|
+
|
|
38
|
+
monkeypatch.setattr(os, "stat", _boom)
|
|
35
39
|
result = process_file(str(src), str(tmp_path), NormalizationConfig(), 1, 1)
|
|
36
40
|
assert result.success is False
|
|
37
41
|
assert result.error is not None
|
|
@@ -374,6 +374,111 @@ def f():
|
|
|
374
374
|
assert segments == []
|
|
375
375
|
|
|
376
376
|
|
|
377
|
+
def test_extract_generates_segments_without_blocks_when_only_segment_gate_met() -> None:
|
|
378
|
+
lines = ["def f():"]
|
|
379
|
+
for i in range(12):
|
|
380
|
+
lines.append(f" x{i} = {i}")
|
|
381
|
+
lines.append("")
|
|
382
|
+
lines.append("")
|
|
383
|
+
src = "\n".join(lines)
|
|
384
|
+
|
|
385
|
+
units, blocks, segments = extract_units_from_source(
|
|
386
|
+
source=src,
|
|
387
|
+
filepath="x.py",
|
|
388
|
+
module_name="mod",
|
|
389
|
+
cfg=NormalizationConfig(),
|
|
390
|
+
min_loc=1,
|
|
391
|
+
min_stmt=1,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
assert units
|
|
395
|
+
assert blocks == []
|
|
396
|
+
assert segments
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def test_extract_generates_blocks_without_segments_when_only_block_gate_met() -> None:
|
|
400
|
+
lines = ["def f():"]
|
|
401
|
+
for i in range(10):
|
|
402
|
+
lines.append(f" x{i} = {i}")
|
|
403
|
+
lines.append("")
|
|
404
|
+
lines.append("")
|
|
405
|
+
lines.append("")
|
|
406
|
+
lines.append("")
|
|
407
|
+
src = "\n".join(lines)
|
|
408
|
+
|
|
409
|
+
units, blocks, segments = extract_units_from_source(
|
|
410
|
+
source=src,
|
|
411
|
+
filepath="x.py",
|
|
412
|
+
module_name="mod",
|
|
413
|
+
cfg=NormalizationConfig(),
|
|
414
|
+
min_loc=1,
|
|
415
|
+
min_stmt=1,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
assert units
|
|
419
|
+
assert blocks
|
|
420
|
+
assert segments == []
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def test_extract_handles_non_list_function_body_for_hash_reuse(
|
|
424
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
425
|
+
) -> None:
|
|
426
|
+
lines = ["def f():"]
|
|
427
|
+
for i in range(12):
|
|
428
|
+
lines.append(f" x{i} = {i}")
|
|
429
|
+
lines.append("")
|
|
430
|
+
lines.append("")
|
|
431
|
+
tree = ast.parse("\n".join(lines))
|
|
432
|
+
func = tree.body[0]
|
|
433
|
+
assert isinstance(func, ast.FunctionDef)
|
|
434
|
+
func.body = tuple(func.body) # type: ignore[assignment]
|
|
435
|
+
|
|
436
|
+
captured_hashes: dict[str, object] = {}
|
|
437
|
+
|
|
438
|
+
def _fake_parse(_source: str, _timeout_s: int) -> ast.AST:
|
|
439
|
+
return tree
|
|
440
|
+
|
|
441
|
+
def _fake_fingerprint(
|
|
442
|
+
_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
443
|
+
_cfg: NormalizationConfig,
|
|
444
|
+
_qualname: str,
|
|
445
|
+
) -> str:
|
|
446
|
+
return "f" * 40
|
|
447
|
+
|
|
448
|
+
def _fake_extract_segments(
|
|
449
|
+
_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
450
|
+
filepath: str,
|
|
451
|
+
qualname: str,
|
|
452
|
+
cfg: NormalizationConfig,
|
|
453
|
+
window_size: int = 6,
|
|
454
|
+
max_segments: int = 60,
|
|
455
|
+
*,
|
|
456
|
+
precomputed_hashes: list[str] | None = None,
|
|
457
|
+
) -> list[object]:
|
|
458
|
+
del filepath, qualname, cfg, window_size, max_segments
|
|
459
|
+
captured_hashes["value"] = precomputed_hashes
|
|
460
|
+
return []
|
|
461
|
+
|
|
462
|
+
monkeypatch.setattr(extractor, "_parse_with_limits", _fake_parse)
|
|
463
|
+
monkeypatch.setattr(extractor, "_stmt_count", lambda _node: 12)
|
|
464
|
+
monkeypatch.setattr(extractor, "get_cfg_fingerprint", _fake_fingerprint)
|
|
465
|
+
monkeypatch.setattr(extractor, "extract_segments", _fake_extract_segments)
|
|
466
|
+
|
|
467
|
+
units, blocks, segments = extract_units_from_source(
|
|
468
|
+
source="def f():\n pass\n",
|
|
469
|
+
filepath="x.py",
|
|
470
|
+
module_name="mod",
|
|
471
|
+
cfg=NormalizationConfig(),
|
|
472
|
+
min_loc=1,
|
|
473
|
+
min_stmt=1,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
assert len(units) == 1
|
|
477
|
+
assert blocks == []
|
|
478
|
+
assert segments == []
|
|
479
|
+
assert captured_hashes["value"] is None
|
|
480
|
+
|
|
481
|
+
|
|
377
482
|
def test_extract_skips_invalid_positions(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
378
483
|
tree = ast.parse(
|
|
379
484
|
"""
|
|
@@ -30,18 +30,34 @@ def test_process_file_size_limit() -> None:
|
|
|
30
30
|
|
|
31
31
|
try:
|
|
32
32
|
cfg = NormalizationConfig()
|
|
33
|
+
real_stat = os.stat(tmp_path)
|
|
33
34
|
|
|
34
|
-
# Mock os.
|
|
35
|
-
|
|
35
|
+
# Mock os.stat to return huge st_size
|
|
36
|
+
def _huge_stat(path: str, *args: object, **kwargs: object) -> os.stat_result:
|
|
37
|
+
return os.stat_result(
|
|
38
|
+
(
|
|
39
|
+
real_stat.st_mode,
|
|
40
|
+
real_stat.st_ino,
|
|
41
|
+
real_stat.st_dev,
|
|
42
|
+
real_stat.st_nlink,
|
|
43
|
+
real_stat.st_uid,
|
|
44
|
+
real_stat.st_gid,
|
|
45
|
+
MAX_FILE_SIZE + 1, # st_size
|
|
46
|
+
int(real_stat.st_atime),
|
|
47
|
+
int(real_stat.st_mtime),
|
|
48
|
+
int(real_stat.st_ctime),
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
with patch("os.stat", side_effect=_huge_stat):
|
|
36
53
|
result = process_file(tmp_path, os.path.dirname(tmp_path), cfg, 0, 0)
|
|
37
54
|
assert result.success is False
|
|
38
55
|
assert result.error is not None
|
|
39
56
|
assert "File too large" in result.error
|
|
40
57
|
|
|
41
|
-
# Normal size should pass
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
assert result.success is True
|
|
58
|
+
# Normal size should pass (no mock — real stat)
|
|
59
|
+
result = process_file(tmp_path, os.path.dirname(tmp_path), cfg, 0, 0)
|
|
60
|
+
assert result.success is True
|
|
45
61
|
|
|
46
62
|
finally:
|
|
47
63
|
os.remove(tmp_path)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|