codeclone 1.4.0__tar.gz → 1.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeclone-1.4.0 → codeclone-1.4.2}/PKG-INFO +1 -1
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_cli_summary.py +11 -1
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_html_snippets.py +9 -2
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/blocks.py +19 -2
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/cache.py +5 -5
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/cli.py +46 -63
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/extractor.py +38 -23
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/html_report.py +1 -2
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/scanner.py +12 -8
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/templates.py +205 -110
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/ui_messages.py +3 -3
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone.egg-info/PKG-INFO +1 -1
- {codeclone-1.4.0 → codeclone-1.4.2}/pyproject.toml +1 -1
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cli_unit.py +8 -4
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_extractor.py +105 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_html_report.py +2 -2
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_security.py +22 -6
- {codeclone-1.4.0 → codeclone-1.4.2}/LICENSE +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/README.md +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/__init__.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_cli_args.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_cli_meta.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_cli_paths.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_html_escape.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_blocks.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_explain.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_explain_contract.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_grouping.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_segments.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_serialize.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_report_types.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/baseline.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/blockhash.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/cfg.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/cfg_model.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/contracts.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/errors.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/fingerprint.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/meta_markers.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/normalize.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/py.typed +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone/report.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone.egg-info/SOURCES.txt +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone.egg-info/dependency_links.txt +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone.egg-info/entry_points.txt +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone.egg-info/requires.txt +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/codeclone.egg-info/top_level.txt +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/setup.cfg +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_baseline.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_blockhash.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_blocks.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cache.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cfg.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cfg_model.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cli_inprocess.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cli_main_guard.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cli_main_guard_runpy.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_cli_smoke.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_detector_golden.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_fingerprint.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_init.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_normalize.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_report.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_report_explain.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_scanner_extra.py +0 -0
- {codeclone-1.4.0 → codeclone-1.4.2}/tests/test_segments.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeclone
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
|
|
5
5
|
Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
6
6
|
Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
|
|
@@ -14,6 +14,14 @@ from rich.text import Text
|
|
|
14
14
|
|
|
15
15
|
from . import ui_messages as ui
|
|
16
16
|
|
|
17
|
+
_CLONE_LABELS = frozenset(
|
|
18
|
+
{
|
|
19
|
+
ui.SUMMARY_LABEL_FUNCTION,
|
|
20
|
+
ui.SUMMARY_LABEL_BLOCK,
|
|
21
|
+
ui.SUMMARY_LABEL_SEGMENT,
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
|
|
17
25
|
|
|
18
26
|
def _summary_value_style(*, label: str, value: int) -> str:
|
|
19
27
|
if value == 0:
|
|
@@ -22,7 +30,9 @@ def _summary_value_style(*, label: str, value: int) -> str:
|
|
|
22
30
|
return "bold red"
|
|
23
31
|
if label == ui.SUMMARY_LABEL_SUPPRESSED:
|
|
24
32
|
return "yellow"
|
|
25
|
-
|
|
33
|
+
if label in _CLONE_LABELS:
|
|
34
|
+
return "bold yellow"
|
|
35
|
+
return "bold"
|
|
26
36
|
|
|
27
37
|
|
|
28
38
|
def _build_summary_rows(
|
|
@@ -196,9 +196,16 @@ def _render_code_block(
|
|
|
196
196
|
rendered.append(
|
|
197
197
|
f'<div class="{cls}">{html.escape(text, quote=False)}</div>'
|
|
198
198
|
)
|
|
199
|
-
body = "
|
|
199
|
+
body = "".join(rendered)
|
|
200
200
|
else:
|
|
201
|
-
|
|
201
|
+
hit_flags = [hit for hit, _ in numbered]
|
|
202
|
+
pyg_lines = highlighted.split("\n")
|
|
203
|
+
rendered_pyg: list[str] = []
|
|
204
|
+
for i, pyg_line in enumerate(pyg_lines):
|
|
205
|
+
hit = hit_flags[i] if i < len(hit_flags) else False
|
|
206
|
+
cls = "hitline" if hit else "line"
|
|
207
|
+
rendered_pyg.append(f'<div class="{cls}">{pyg_line}</div>')
|
|
208
|
+
body = "".join(rendered_pyg)
|
|
202
209
|
|
|
203
210
|
return _Snippet(
|
|
204
211
|
filepath=filepath,
|
|
@@ -9,6 +9,7 @@ Licensed under the MIT License.
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import ast
|
|
12
|
+
from collections.abc import Sequence
|
|
12
13
|
from dataclasses import dataclass
|
|
13
14
|
|
|
14
15
|
from .blockhash import stmt_hash
|
|
@@ -45,12 +46,20 @@ def extract_blocks(
|
|
|
45
46
|
cfg: NormalizationConfig,
|
|
46
47
|
block_size: int,
|
|
47
48
|
max_blocks: int,
|
|
49
|
+
precomputed_hashes: Sequence[str] | None = None,
|
|
48
50
|
) -> list[BlockUnit]:
|
|
49
51
|
body = getattr(func_node, "body", None)
|
|
50
52
|
if not isinstance(body, list) or len(body) < block_size:
|
|
51
53
|
return []
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
if precomputed_hashes is not None:
|
|
56
|
+
assert len(precomputed_hashes) == len(body), (
|
|
57
|
+
f"precomputed_hashes length {len(precomputed_hashes)} "
|
|
58
|
+
f"!= body length {len(body)}"
|
|
59
|
+
)
|
|
60
|
+
stmt_hashes = precomputed_hashes
|
|
61
|
+
else:
|
|
62
|
+
stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
|
|
54
63
|
|
|
55
64
|
blocks: list[BlockUnit] = []
|
|
56
65
|
last_start: int | None = None
|
|
@@ -94,12 +103,20 @@ def extract_segments(
|
|
|
94
103
|
cfg: NormalizationConfig,
|
|
95
104
|
window_size: int,
|
|
96
105
|
max_segments: int,
|
|
106
|
+
precomputed_hashes: Sequence[str] | None = None,
|
|
97
107
|
) -> list[SegmentUnit]:
|
|
98
108
|
body = getattr(func_node, "body", None)
|
|
99
109
|
if not isinstance(body, list) or len(body) < window_size:
|
|
100
110
|
return []
|
|
101
111
|
|
|
102
|
-
|
|
112
|
+
if precomputed_hashes is not None:
|
|
113
|
+
assert len(precomputed_hashes) == len(body), (
|
|
114
|
+
f"precomputed_hashes length {len(precomputed_hashes)} "
|
|
115
|
+
f"!= body length {len(body)}"
|
|
116
|
+
)
|
|
117
|
+
stmt_hashes = precomputed_hashes
|
|
118
|
+
else:
|
|
119
|
+
stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
|
|
103
120
|
|
|
104
121
|
segments: list[SegmentUnit] = []
|
|
105
122
|
|
|
@@ -344,14 +344,14 @@ class Cache:
|
|
|
344
344
|
try:
|
|
345
345
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
346
346
|
wire_files: dict[str, object] = {}
|
|
347
|
-
|
|
348
|
-
self.data["files"]
|
|
349
|
-
|
|
347
|
+
wire_map = {
|
|
348
|
+
rp: self._wire_filepath_from_runtime(rp) for rp in self.data["files"]
|
|
349
|
+
}
|
|
350
|
+
for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__):
|
|
350
351
|
entry = self.get_file_entry(runtime_path)
|
|
351
352
|
if entry is None:
|
|
352
353
|
continue
|
|
353
|
-
|
|
354
|
-
wire_files[wire_path] = _encode_wire_file_entry(entry)
|
|
354
|
+
wire_files[wire_map[runtime_path]] = _encode_wire_file_entry(entry)
|
|
355
355
|
|
|
356
356
|
payload: dict[str, object] = {
|
|
357
357
|
"py": current_python_tag(),
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
5
|
+
import time
|
|
5
6
|
from collections.abc import Mapping, Sequence
|
|
6
7
|
from concurrent.futures import Future, ProcessPoolExecutor, as_completed
|
|
7
8
|
from dataclasses import asdict, dataclass
|
|
@@ -71,7 +72,6 @@ custom_theme = Theme(
|
|
|
71
72
|
}
|
|
72
73
|
)
|
|
73
74
|
|
|
74
|
-
|
|
75
75
|
LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser()
|
|
76
76
|
|
|
77
77
|
|
|
@@ -122,14 +122,14 @@ def process_file(
|
|
|
122
122
|
"""
|
|
123
123
|
|
|
124
124
|
try:
|
|
125
|
-
#
|
|
125
|
+
# Single os.stat() for both size check and cache signature
|
|
126
126
|
try:
|
|
127
|
-
|
|
128
|
-
if st_size > MAX_FILE_SIZE:
|
|
127
|
+
st = os.stat(filepath)
|
|
128
|
+
if st.st_size > MAX_FILE_SIZE:
|
|
129
129
|
return ProcessingResult(
|
|
130
130
|
filepath=filepath,
|
|
131
131
|
success=False,
|
|
132
|
-
error=f"File too large: {st_size} bytes (max {MAX_FILE_SIZE})",
|
|
132
|
+
error=f"File too large: {st.st_size} bytes (max {MAX_FILE_SIZE})",
|
|
133
133
|
error_kind="file_too_large",
|
|
134
134
|
)
|
|
135
135
|
except OSError as e:
|
|
@@ -140,6 +140,8 @@ def process_file(
|
|
|
140
140
|
error_kind="stat_error",
|
|
141
141
|
)
|
|
142
142
|
|
|
143
|
+
stat: FileStat = {"mtime_ns": st.st_mtime_ns, "size": st.st_size}
|
|
144
|
+
|
|
143
145
|
try:
|
|
144
146
|
source = Path(filepath).read_text("utf-8")
|
|
145
147
|
except UnicodeDecodeError as e:
|
|
@@ -157,7 +159,6 @@ def process_file(
|
|
|
157
159
|
error_kind="source_read_error",
|
|
158
160
|
)
|
|
159
161
|
|
|
160
|
-
stat = file_stat_signature(filepath)
|
|
161
162
|
module_name = module_name_from_path(root, filepath)
|
|
162
163
|
|
|
163
164
|
units, blocks, segments = extract_units_from_source(
|
|
@@ -238,6 +239,8 @@ def _main_impl() -> None:
|
|
|
238
239
|
)
|
|
239
240
|
sys.exit(ExitCode.CONTRACT_ERROR)
|
|
240
241
|
|
|
242
|
+
t0 = time.monotonic()
|
|
243
|
+
|
|
241
244
|
if not args.quiet:
|
|
242
245
|
print_banner()
|
|
243
246
|
|
|
@@ -353,68 +356,44 @@ def _main_impl() -> None:
|
|
|
353
356
|
return None, str(e)
|
|
354
357
|
|
|
355
358
|
# Discovery phase
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
)
|
|
359
|
+
def _discover_files() -> None:
|
|
360
|
+
nonlocal files_found, cache_hits, files_skipped
|
|
361
|
+
for fp in iter_py_files(str(root_path)):
|
|
362
|
+
files_found += 1
|
|
363
|
+
stat, cached, warn = _get_cached_entry(fp)
|
|
364
|
+
if warn:
|
|
365
|
+
console.print(warn)
|
|
366
|
+
files_skipped += 1
|
|
367
|
+
continue
|
|
368
|
+
if cached and cached.get("stat") == stat:
|
|
369
|
+
cache_hits += 1
|
|
370
|
+
all_units.extend(
|
|
371
|
+
cast(
|
|
372
|
+
list[GroupItem],
|
|
373
|
+
cast(object, cached.get("units", [])),
|
|
372
374
|
)
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
)
|
|
375
|
+
)
|
|
376
|
+
all_blocks.extend(
|
|
377
|
+
cast(
|
|
378
|
+
list[GroupItem],
|
|
379
|
+
cast(object, cached.get("blocks", [])),
|
|
378
380
|
)
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
)
|
|
381
|
+
)
|
|
382
|
+
all_segments.extend(
|
|
383
|
+
cast(
|
|
384
|
+
list[GroupItem],
|
|
385
|
+
cast(object, cached.get("segments", [])),
|
|
384
386
|
)
|
|
385
|
-
|
|
386
|
-
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
files_to_process.append(fp)
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
if args.quiet:
|
|
393
|
+
_discover_files()
|
|
387
394
|
else:
|
|
388
395
|
with console.status(ui.STATUS_DISCOVERING, spinner="dots"):
|
|
389
|
-
|
|
390
|
-
files_found += 1
|
|
391
|
-
stat, cached, warn = _get_cached_entry(fp)
|
|
392
|
-
if warn:
|
|
393
|
-
console.print(warn)
|
|
394
|
-
files_skipped += 1
|
|
395
|
-
continue
|
|
396
|
-
if cached and cached.get("stat") == stat:
|
|
397
|
-
cache_hits += 1
|
|
398
|
-
all_units.extend(
|
|
399
|
-
cast(
|
|
400
|
-
list[GroupItem],
|
|
401
|
-
cast(object, cached.get("units", [])),
|
|
402
|
-
)
|
|
403
|
-
)
|
|
404
|
-
all_blocks.extend(
|
|
405
|
-
cast(
|
|
406
|
-
list[GroupItem],
|
|
407
|
-
cast(object, cached.get("blocks", [])),
|
|
408
|
-
)
|
|
409
|
-
)
|
|
410
|
-
all_segments.extend(
|
|
411
|
-
cast(
|
|
412
|
-
list[GroupItem],
|
|
413
|
-
cast(object, cached.get("segments", [])),
|
|
414
|
-
)
|
|
415
|
-
)
|
|
416
|
-
else:
|
|
417
|
-
files_to_process.append(fp)
|
|
396
|
+
_discover_files()
|
|
418
397
|
except OSError as e:
|
|
419
398
|
console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=e)))
|
|
420
399
|
sys.exit(ExitCode.CONTRACT_ERROR)
|
|
@@ -900,6 +879,10 @@ def _main_impl() -> None:
|
|
|
900
879
|
if not args.update_baseline and not args.fail_on_new and new_clones_count > 0:
|
|
901
880
|
console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL)
|
|
902
881
|
|
|
882
|
+
if not args.quiet:
|
|
883
|
+
elapsed = time.monotonic() - t0
|
|
884
|
+
console.print(f"\n[dim]Done in {elapsed:.1f}s[/dim]")
|
|
885
|
+
|
|
903
886
|
|
|
904
887
|
def main() -> None:
|
|
905
888
|
try:
|
|
@@ -16,6 +16,7 @@ from collections.abc import Iterator
|
|
|
16
16
|
from contextlib import contextmanager
|
|
17
17
|
from dataclasses import dataclass
|
|
18
18
|
|
|
19
|
+
from .blockhash import stmt_hash
|
|
19
20
|
from .blocks import BlockUnit, SegmentUnit, extract_blocks, extract_segments
|
|
20
21
|
from .cfg import CFGBuilder
|
|
21
22
|
from .errors import ParseError
|
|
@@ -250,28 +251,42 @@ def extract_units_from_source(
|
|
|
250
251
|
)
|
|
251
252
|
)
|
|
252
253
|
|
|
253
|
-
# Block-level units
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
254
|
+
# Block-level and segment-level units share statement hashes
|
|
255
|
+
needs_blocks = (
|
|
256
|
+
not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10
|
|
257
|
+
)
|
|
258
|
+
needs_segments = loc >= 30 and stmt_count >= 12
|
|
259
|
+
|
|
260
|
+
if needs_blocks or needs_segments:
|
|
261
|
+
body = getattr(node, "body", None)
|
|
262
|
+
hashes: list[str] | None = None
|
|
263
|
+
if isinstance(body, list):
|
|
264
|
+
hashes = [stmt_hash(stmt, cfg) for stmt in body]
|
|
265
|
+
|
|
266
|
+
if needs_blocks:
|
|
267
|
+
block_units.extend(
|
|
268
|
+
extract_blocks(
|
|
269
|
+
node,
|
|
270
|
+
filepath=filepath,
|
|
271
|
+
qualname=qualname,
|
|
272
|
+
cfg=cfg,
|
|
273
|
+
block_size=4,
|
|
274
|
+
max_blocks=15,
|
|
275
|
+
precomputed_hashes=hashes,
|
|
276
|
+
)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if needs_segments:
|
|
280
|
+
segment_units.extend(
|
|
281
|
+
extract_segments(
|
|
282
|
+
node,
|
|
283
|
+
filepath=filepath,
|
|
284
|
+
qualname=qualname,
|
|
285
|
+
cfg=cfg,
|
|
286
|
+
window_size=6,
|
|
287
|
+
max_segments=60,
|
|
288
|
+
precomputed_hashes=hashes,
|
|
289
|
+
)
|
|
290
|
+
)
|
|
276
291
|
|
|
277
292
|
return units, block_units, segment_units
|
|
@@ -760,10 +760,9 @@ def build_html_report(
|
|
|
760
760
|
f'<div class="meta-panel" id="report-meta" {meta_attrs}>'
|
|
761
761
|
'<div class="meta-header">'
|
|
762
762
|
'<div class="meta-title">'
|
|
763
|
-
f"{chevron_icon}"
|
|
764
763
|
"Report Provenance"
|
|
765
764
|
"</div>"
|
|
766
|
-
'<div class="meta-toggle collapsed"
|
|
765
|
+
f'<div class="meta-toggle collapsed">{chevron_icon}</div>'
|
|
767
766
|
"</div>"
|
|
768
767
|
'<div class="meta-content collapsed">'
|
|
769
768
|
f'<div class="meta-grid">{meta_rows_html}</div>'
|
|
@@ -77,8 +77,9 @@ def iter_py_files(
|
|
|
77
77
|
if root_str.startswith(sensitive + "/"):
|
|
78
78
|
raise ValidationError(f"Cannot scan under sensitive directory: {root}")
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
# Collect and filter first, then sort — avoids sorting excluded paths
|
|
81
|
+
candidates: list[Path] = []
|
|
82
|
+
for p in rootp.rglob("*.py"):
|
|
82
83
|
# Verify path is actually under root (prevent symlink attacks)
|
|
83
84
|
try:
|
|
84
85
|
p.resolve().relative_to(rootp)
|
|
@@ -90,12 +91,15 @@ def iter_py_files(
|
|
|
90
91
|
if any(ex in parts for ex in excludes):
|
|
91
92
|
continue
|
|
92
93
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
94
|
+
candidates.append(p)
|
|
95
|
+
|
|
96
|
+
if len(candidates) > max_files:
|
|
97
|
+
raise ValidationError(
|
|
98
|
+
f"File count exceeds limit of {max_files}. "
|
|
99
|
+
"Use more specific root or increase limit."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
for p in sorted(candidates, key=lambda path: str(path)):
|
|
99
103
|
yield str(p)
|
|
100
104
|
|
|
101
105
|
|