PyPI - codeclone - Versions diffs - 1.4.0__tar.gz → 1.4.2__tar.gz - Mend

codeclone 1.4.0tar.gz → 1.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

{codeclone-1.4.0 → codeclone-1.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codeclone
-Version: 1.4.0
+Version: 1.4.2
 Summary: AST and CFG-based code clone detector for Python focused on architectural duplication
 Author-email: Den Rozhnovskiy <pytelemonbot@mail.ru>
 Maintainer-email: Den Rozhnovskiy <pytelemonbot@mail.ru>

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_cli_summary.py RENAMED Viewed

@@ -14,6 +14,14 @@ from rich.text import Text
 from . import ui_messages as ui
+_CLONE_LABELS = frozenset(
+    {
+        ui.SUMMARY_LABEL_FUNCTION,
+        ui.SUMMARY_LABEL_BLOCK,
+        ui.SUMMARY_LABEL_SEGMENT,
+    }
+)
 def _summary_value_style(*, label: str, value: int) -> str:
     if value == 0:
@@ -22,7 +30,9 @@ def _summary_value_style(*, label: str, value: int) -> str:
         return "bold red"
     if label == ui.SUMMARY_LABEL_SUPPRESSED:
         return "yellow"
-    return "bold green"
+    if label in _CLONE_LABELS:
+        return "bold yellow"
+    return "bold"
 def _build_summary_rows(

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/_html_snippets.py RENAMED Viewed

@@ -196,9 +196,16 @@ def _render_code_block(
             rendered.append(
                 f'<div class="{cls}">{html.escape(text, quote=False)}</div>'
             )
-        body = "\n".join(rendered)
+        body = "".join(rendered)
     else:
-        body = highlighted
+        hit_flags = [hit for hit, _ in numbered]
+        pyg_lines = highlighted.split("\n")
+        rendered_pyg: list[str] = []
+        for i, pyg_line in enumerate(pyg_lines):
+            hit = hit_flags[i] if i < len(hit_flags) else False
+            cls = "hitline" if hit else "line"
+            rendered_pyg.append(f'<div class="{cls}">{pyg_line}</div>')
+        body = "".join(rendered_pyg)
     return _Snippet(
         filepath=filepath,

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/blocks.py RENAMED Viewed

@@ -9,6 +9,7 @@ Licensed under the MIT License.
 from __future__ import annotations
 import ast
+from collections.abc import Sequence
 from dataclasses import dataclass
 from .blockhash import stmt_hash
@@ -45,12 +46,20 @@ def extract_blocks(
     cfg: NormalizationConfig,
     block_size: int,
     max_blocks: int,
+    precomputed_hashes: Sequence[str] | None = None,
 ) -> list[BlockUnit]:
     body = getattr(func_node, "body", None)
     if not isinstance(body, list) or len(body) < block_size:
         return []
-    stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
+    if precomputed_hashes is not None:
+        assert len(precomputed_hashes) == len(body), (
+            f"precomputed_hashes length {len(precomputed_hashes)} "
+            f"!= body length {len(body)}"
+        )
+        stmt_hashes = precomputed_hashes
+    else:
+        stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
     blocks: list[BlockUnit] = []
     last_start: int | None = None
@@ -94,12 +103,20 @@ def extract_segments(
     cfg: NormalizationConfig,
     window_size: int,
     max_segments: int,
+    precomputed_hashes: Sequence[str] | None = None,
 ) -> list[SegmentUnit]:
     body = getattr(func_node, "body", None)
     if not isinstance(body, list) or len(body) < window_size:
         return []
-    stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
+    if precomputed_hashes is not None:
+        assert len(precomputed_hashes) == len(body), (
+            f"precomputed_hashes length {len(precomputed_hashes)} "
+            f"!= body length {len(body)}"
+        )
+        stmt_hashes = precomputed_hashes
+    else:
+        stmt_hashes = [stmt_hash(stmt, cfg) for stmt in body]
     segments: list[SegmentUnit] = []

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/cache.py RENAMED Viewed

@@ -344,14 +344,14 @@ class Cache:
         try:
             self.path.parent.mkdir(parents=True, exist_ok=True)
             wire_files: dict[str, object] = {}
-            for runtime_path in sorted(
-                self.data["files"], key=self._wire_filepath_from_runtime
-            ):
+            wire_map = {
+                rp: self._wire_filepath_from_runtime(rp) for rp in self.data["files"]
+            }
+            for runtime_path in sorted(self.data["files"], key=wire_map.__getitem__):
                 entry = self.get_file_entry(runtime_path)
                 if entry is None:
                     continue
-                wire_path = self._wire_filepath_from_runtime(runtime_path)
-                wire_files[wire_path] = _encode_wire_file_entry(entry)
+                wire_files[wire_map[runtime_path]] = _encode_wire_file_entry(entry)
             payload: dict[str, object] = {
                 "py": current_python_tag(),

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/cli.py RENAMED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import os
 import sys
+import time
 from collections.abc import Mapping, Sequence
 from concurrent.futures import Future, ProcessPoolExecutor, as_completed
 from dataclasses import asdict, dataclass
@@ -71,7 +72,6 @@ custom_theme = Theme(
     }
 )
 LEGACY_CACHE_PATH = Path("~/.cache/codeclone/cache.json").expanduser()
@@ -122,14 +122,14 @@ def process_file(
     """
     try:
-        # Check file size
+        # Single os.stat() for both size check and cache signature
         try:
-            st_size = os.path.getsize(filepath)
-            if st_size > MAX_FILE_SIZE:
+            st = os.stat(filepath)
+            if st.st_size > MAX_FILE_SIZE:
                 return ProcessingResult(
                     filepath=filepath,
                     success=False,
-                    error=f"File too large: {st_size} bytes (max {MAX_FILE_SIZE})",
+                    error=f"File too large: {st.st_size} bytes (max {MAX_FILE_SIZE})",
                     error_kind="file_too_large",
                 )
         except OSError as e:
@@ -140,6 +140,8 @@ def process_file(
                 error_kind="stat_error",
             )
+        stat: FileStat = {"mtime_ns": st.st_mtime_ns, "size": st.st_size}
         try:
             source = Path(filepath).read_text("utf-8")
         except UnicodeDecodeError as e:
@@ -157,7 +159,6 @@ def process_file(
                 error_kind="source_read_error",
             )
-        stat = file_stat_signature(filepath)
         module_name = module_name_from_path(root, filepath)
         units, blocks, segments = extract_units_from_source(
@@ -238,6 +239,8 @@ def _main_impl() -> None:
         )
         sys.exit(ExitCode.CONTRACT_ERROR)
+    t0 = time.monotonic()
     if not args.quiet:
         print_banner()
@@ -353,68 +356,44 @@ def _main_impl() -> None:
             return None, str(e)
     # Discovery phase
-    try:
-        if args.quiet:
-            for fp in iter_py_files(str(root_path)):
-                files_found += 1
-                stat, cached, warn = _get_cached_entry(fp)
-                if warn:
-                    console.print(warn)
-                    files_skipped += 1
-                    continue
-                if cached and cached.get("stat") == stat:
-                    cache_hits += 1
-                    all_units.extend(
-                        cast(
-                            list[GroupItem],
-                            cast(object, cached.get("units", [])),
-                        )
+    def _discover_files() -> None:
+        nonlocal files_found, cache_hits, files_skipped
+        for fp in iter_py_files(str(root_path)):
+            files_found += 1
+            stat, cached, warn = _get_cached_entry(fp)
+            if warn:
+                console.print(warn)
+                files_skipped += 1
+                continue
+            if cached and cached.get("stat") == stat:
+                cache_hits += 1
+                all_units.extend(
+                    cast(
+                        list[GroupItem],
+                        cast(object, cached.get("units", [])),
                     )
-                    all_blocks.extend(
-                        cast(
-                            list[GroupItem],
-                            cast(object, cached.get("blocks", [])),
-                        )
+                )
+                all_blocks.extend(
+                    cast(
+                        list[GroupItem],
+                        cast(object, cached.get("blocks", [])),
                     )
-                    all_segments.extend(
-                        cast(
-                            list[GroupItem],
-                            cast(object, cached.get("segments", [])),
-                        )
+                )
+                all_segments.extend(
+                    cast(
+                        list[GroupItem],
+                        cast(object, cached.get("segments", [])),
                     )
-                else:
-                    files_to_process.append(fp)
+                )
+            else:
+                files_to_process.append(fp)
+    try:
+        if args.quiet:
+            _discover_files()
         else:
             with console.status(ui.STATUS_DISCOVERING, spinner="dots"):
-                for fp in iter_py_files(str(root_path)):
-                    files_found += 1
-                    stat, cached, warn = _get_cached_entry(fp)
-                    if warn:
-                        console.print(warn)
-                        files_skipped += 1
-                        continue
-                    if cached and cached.get("stat") == stat:
-                        cache_hits += 1
-                        all_units.extend(
-                            cast(
-                                list[GroupItem],
-                                cast(object, cached.get("units", [])),
-                            )
-                        )
-                        all_blocks.extend(
-                            cast(
-                                list[GroupItem],
-                                cast(object, cached.get("blocks", [])),
-                            )
-                        )
-                        all_segments.extend(
-                            cast(
-                                list[GroupItem],
-                                cast(object, cached.get("segments", [])),
-                            )
-                        )
-                    else:
-                        files_to_process.append(fp)
+                _discover_files()
     except OSError as e:
         console.print(ui.fmt_contract_error(ui.ERR_SCAN_FAILED.format(error=e)))
         sys.exit(ExitCode.CONTRACT_ERROR)
@@ -900,6 +879,10 @@ def _main_impl() -> None:
     if not args.update_baseline and not args.fail_on_new and new_clones_count > 0:
         console.print(ui.WARN_NEW_CLONES_WITHOUT_FAIL)
+    if not args.quiet:
+        elapsed = time.monotonic() - t0
+        console.print(f"\n[dim]Done in {elapsed:.1f}s[/dim]")
 def main() -> None:
     try:

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/extractor.py RENAMED Viewed

@@ -16,6 +16,7 @@ from collections.abc import Iterator
 from contextlib import contextmanager
 from dataclasses import dataclass
+from .blockhash import stmt_hash
 from .blocks import BlockUnit, SegmentUnit, extract_blocks, extract_segments
 from .cfg import CFGBuilder
 from .errors import ParseError
@@ -250,28 +251,42 @@ def extract_units_from_source(
             )
         )
-        # Block-level units (exclude __init__)
-        if not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10:
-            blocks = extract_blocks(
-                node,
-                filepath=filepath,
-                qualname=qualname,
-                cfg=cfg,
-                block_size=4,
-                max_blocks=15,
-            )
-            block_units.extend(blocks)
-        # Segment-level units (windows within functions, for internal clones)
-        if loc >= 30 and stmt_count >= 12:
-            segments = extract_segments(
-                node,
-                filepath=filepath,
-                qualname=qualname,
-                cfg=cfg,
-                window_size=6,
-                max_segments=60,
-            )
-            segment_units.extend(segments)
+        # Block-level and segment-level units share statement hashes
+        needs_blocks = (
+            not local_name.endswith("__init__") and loc >= 40 and stmt_count >= 10
+        )
+        needs_segments = loc >= 30 and stmt_count >= 12
+        if needs_blocks or needs_segments:
+            body = getattr(node, "body", None)
+            hashes: list[str] | None = None
+            if isinstance(body, list):
+                hashes = [stmt_hash(stmt, cfg) for stmt in body]
+            if needs_blocks:
+                block_units.extend(
+                    extract_blocks(
+                        node,
+                        filepath=filepath,
+                        qualname=qualname,
+                        cfg=cfg,
+                        block_size=4,
+                        max_blocks=15,
+                        precomputed_hashes=hashes,
+                    )
+                )
+            if needs_segments:
+                segment_units.extend(
+                    extract_segments(
+                        node,
+                        filepath=filepath,
+                        qualname=qualname,
+                        cfg=cfg,
+                        window_size=6,
+                        max_segments=60,
+                        precomputed_hashes=hashes,
+                    )
+                )
     return units, block_units, segment_units

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/html_report.py RENAMED Viewed

@@ -760,10 +760,9 @@ def build_html_report(
         f'<div class="meta-panel" id="report-meta" {meta_attrs}>'
         '<div class="meta-header">'
         '<div class="meta-title">'
-        f"{chevron_icon}"
         "Report Provenance"
         "</div>"
-        '<div class="meta-toggle collapsed">▸</div>'
+        f'<div class="meta-toggle collapsed">{chevron_icon}</div>'
         "</div>"
         '<div class="meta-content collapsed">'
         f'<div class="meta-grid">{meta_rows_html}</div>'

{codeclone-1.4.0 → codeclone-1.4.2}/codeclone/scanner.py RENAMED Viewed

@@ -77,8 +77,9 @@ def iter_py_files(
             if root_str.startswith(sensitive + "/"):
                 raise ValidationError(f"Cannot scan under sensitive directory: {root}")
-    file_count = 0
-    for p in sorted(rootp.rglob("*.py"), key=lambda path: str(path)):
+    # Collect and filter first, then sort — avoids sorting excluded paths
+    candidates: list[Path] = []
+    for p in rootp.rglob("*.py"):
         # Verify path is actually under root (prevent symlink attacks)
         try:
             p.resolve().relative_to(rootp)
@@ -90,12 +91,15 @@ def iter_py_files(
         if any(ex in parts for ex in excludes):
             continue
-        file_count += 1
-        if file_count > max_files:
-            raise ValidationError(
-                f"File count exceeds limit of {max_files}. "
-                "Use more specific root or increase limit."
-            )
+        candidates.append(p)
+    if len(candidates) > max_files:
+        raise ValidationError(
+            f"File count exceeds limit of {max_files}. "
+            "Use more specific root or increase limit."
+        )
+    for p in sorted(candidates, key=lambda path: str(path)):
         yield str(p)

codeclone 1.4.0__tar.gz → 1.4.2__tar.gz

codeclone 1.4.0tar.gz → 1.4.2tar.gz