PyPI - inspectr - Versions diffs - 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

inspectr 0.0.4py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

inspectr/__main__.py +28 -3
inspectr/authenticity.py +28 -3
inspectr/bare_ratio.py +10 -1
inspectr/compare_funcs.py +118 -0
inspectr/complexity.py +738 -0
inspectr/count_exceptions.py +10 -1
inspectr/duplicates.py +188 -21
inspectr/size_counts.py +10 -1
inspectr/with_open.py +10 -1
{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/METADATA +38 -2
inspectr-0.1.0.dist-info/RECORD +16 -0
inspectr-0.0.4.dist-info/RECORD +0 -14
{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/WHEEL +0 -0
{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/entry_points.txt +0 -0
{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/licenses/LICENSE +0 -0
{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/top_level.txt +0 -0

inspectr/count_exceptions.py CHANGED Viewed

@@ -5,7 +5,16 @@ from collections import Counter, defaultdict
 from typing import List
-def main(files: List[pathlib.Path]) -> None:
+def main(files: List[pathlib.Path], **kwargs) -> None:
+    for f in files:
+        if not f.exists():
+            print(f"Error: File does not exist: {f}")
+            return
+        if not f.is_file():
+            print(f"Error: Not a file: {f}")
+            return
     exception_types = Counter()
     bare_or_exception_per_file = defaultdict(int)

inspectr/duplicates.py CHANGED Viewed

@@ -1,6 +1,130 @@
 import sys
-import hashlib
+import pathlib
 from collections import defaultdict
+from typing import List, Tuple
+def calculate_similarity(lines1: List[str], lines2: List[str]) -> float:
+    if not lines1 or not lines2:
+        return 0.0
+    matches = sum(1 for a, b in zip(lines1, lines2) if a == b)
+    return matches / max(len(lines1), len(lines2))
+def ranges_overlap(start1, end1, start2, end2):
+    """Check if two ranges overlap by more than 50%"""
+    overlap_start = max(start1, start2)
+    overlap_end = min(end1, end2)
+    if overlap_start >= overlap_end:
+        return False
+    overlap_size = overlap_end - overlap_start
+    range1_size = end1 - start1
+    range2_size = end2 - start2
+    overlap_pct1 = overlap_size / range1_size if range1_size > 0 else 0
+    overlap_pct2 = overlap_size / range2_size if range2_size > 0 else 0
+    return overlap_pct1 > 0.5 or overlap_pct2 > 0.5
+def merge_overlapping_groups(groups, block_size, file_lines):
+    """Merge groups that have overlapping ranges by more than 50%, recompute similarity after merging"""
+    if not groups:
+        return []
+    groups_by_file = defaultdict(list)
+    for fname, lnum, similar_blocks in groups:
+        groups_by_file[fname].append((lnum, similar_blocks))
+    merged_groups = []
+    for fname, file_groups in groups_by_file.items():
+        file_groups.sort(key=lambda x: x[0])
+        current_start = file_groups[0][0]
+        current_end = current_start + block_size - 1
+        current_similar_dict = {}
+        for f, l, s in file_groups[0][1]:
+            end = l + block_size - 1
+            key = f
+            if key not in current_similar_dict:
+                current_similar_dict[key] = []
+            current_similar_dict[key].append((l, end))
+        for i in range(1, len(file_groups)):
+            lnum, similar_blocks = file_groups[i]
+            end_line = lnum + block_size - 1
+            if ranges_overlap(current_start, current_end, lnum, end_line):
+                current_end = max(current_end, end_line)
+                for f, l, s in similar_blocks:
+                    end = l + block_size - 1
+                    key = f
+                    if key not in current_similar_dict:
+                        current_similar_dict[key] = []
+                    current_similar_dict[key].append((l, end))
+            else:
+                actual_size = current_end - current_start + 1
+                merged_similar = compute_merged_similarities(
+                    fname, current_start, current_end,
+                    current_similar_dict, file_lines
+                )
+                merged_groups.append((fname, current_start, actual_size, merged_similar))
+                current_start = lnum
+                current_end = end_line
+                current_similar_dict = {}
+                for f, l, s in similar_blocks:
+                    end = l + block_size - 1
+                    key = f
+                    if key not in current_similar_dict:
+                        current_similar_dict[key] = []
+                    current_similar_dict[key].append((l, end))
+        actual_size = current_end - current_start + 1
+        merged_similar = compute_merged_similarities(
+            fname, current_start, current_end,
+            current_similar_dict, file_lines
+        )
+        merged_groups.append((fname, current_start, actual_size, merged_similar))
+    return merged_groups
+def compute_merged_similarities(fname, start, end, similar_dict, file_lines):
+    """Compute similarity for merged ranges"""
+    merged_similar = []
+    if fname not in file_lines:
+        return merged_similar
+    primary_lines = file_lines[fname][start - 1:end]
+    for other_file, ranges in similar_dict.items():
+        if other_file not in file_lines:
+            continue
+        ranges.sort()
+        merged_ranges = []
+        for range_start, range_end in ranges:
+            if merged_ranges and ranges_overlap(merged_ranges[-1][0], merged_ranges[-1][1], range_start, range_end):
+                prev_start, prev_end = merged_ranges[-1]
+                merged_ranges[-1] = (min(prev_start, range_start), max(prev_end, range_end))
+            else:
+                merged_ranges.append((range_start, range_end))
+        for merged_start, merged_end in merged_ranges:
+            other_lines = file_lines[other_file][merged_start - 1:merged_end]
+            similarity = calculate_similarity(primary_lines, other_lines)
+            merged_similar.append((other_file, merged_start, similarity))
+    return merged_similar
 def find_duplicates(files, block_size=10, min_occur=3):
     """
@@ -12,39 +136,82 @@ def find_duplicates(files, block_size=10, min_occur=3):
         min_occur: minimum number of occurrences to report
     Yields:
-        (filename, line_number, count)
+        (primary_filename, primary_line, actual_block_size, [(other_file, other_line, similarity), ...])
     """
-    blocks = defaultdict(list)  # hash -> list of (file, line)
+    file_lines = {}
     for fname in files:
         try:
             with open(fname, encoding="utf-8") as f:
-                lines = f.readlines()
+                file_lines[fname] = f.readlines()
         except OSError as e:
             print(f"Could not read {fname}: {e}", file=sys.stderr)
             continue
+    all_blocks = []
+    for fname in file_lines:
+        lines = file_lines[fname]
         for i in range(len(lines) - block_size + 1):
-            # join block of lines
-            block = "".join(lines[i:i + block_size])
-            # stable hash
-            h = hashlib.sha1(block.encode("utf-8")).hexdigest()
-            blocks[h].append((fname, i + 1))
+            block_lines = lines[i:i + block_size]
+            all_blocks.append((fname, i + 1, block_lines))
-    for locs in blocks.values():
-        if len(locs) >= min_occur:
-            for fname, lnum in locs:
-                yield fname, lnum, len(locs)
+    reported = set()
+    groups = []
+    for i, (fname1, lnum1, block1) in enumerate(all_blocks):
+        key1 = (fname1, lnum1)
+        if key1 in reported:
+            continue
+        similar_blocks = []
+        for j, (fname2, lnum2, block2) in enumerate(all_blocks):
+            if i >= j:
+                continue
+            key2 = (fname2, lnum2)
+            if key2 in reported:
+                continue
+            similarity = calculate_similarity(block1, block2)
+            if similarity > 0.8:
+                similar_blocks.append((fname2, lnum2, similarity))
+                reported.add(key2)
+        if len(similar_blocks) + 1 >= min_occur:
+            reported.add(key1)
+            groups.append((fname1, lnum1, similar_blocks))
+    merged = merge_overlapping_groups(groups, block_size, file_lines)
+    for fname, lnum, actual_size, similar_blocks in merged:
+        yield fname, lnum, actual_size, similar_blocks
-def main(args=None) -> None:
-    if args is None:
-        args = sys.argv[1:]
-    if not args:
-        print("Usage: inspectr duplicates file1.py [file2.py ...]")
+def main(files: List[pathlib.Path], block_size: int = 10, min_occur: int = 3) -> None:
+    for f in files:
+        if not f.exists():
+            print(f"Error: File does not exist: {f}")
+            return
+        if not f.is_file():
+            print(f"Error: Not a file: {f}")
+            return
+    if not files:
+        print("Usage: inspectr duplicates [--block-size N] [--min-occur N] file1.py [file2.py ...]")
         return
-    for fname, lnum, count in find_duplicates(args, block_size=10, min_occur=3):
-        print(f"{fname}:{lnum}  (occurs {count} times)")
+    file_paths = [str(f) for f in files]
+    for fname, lnum, block_sz, similar_blocks in find_duplicates(file_paths, block_size=block_size, min_occur=min_occur):
+        end_line = lnum + block_sz - 1
+        output_parts = [f"{fname}: lines {lnum}-{end_line} occur in"]
+        for other_file, other_line, similarity in similar_blocks:
+            other_end = other_line + block_sz - 1
+            similarity_pct = int(similarity * 100)
+            output_parts.append(f" {other_file} at lines {other_line}-{other_end} ({similarity_pct}% similarity) and")
+        output = "".join(output_parts).rstrip(" and")
+        print(output)

inspectr/size_counts.py CHANGED Viewed

@@ -11,7 +11,16 @@ FUNC_PARAMS = 5
 CLASS_METHODS = 20
-def main(files: List[pathlib.Path]) -> None:
+def main(files: List[pathlib.Path], **kwargs) -> None:
+    for f in files:
+        if not f.exists():
+            print(f"Error: File does not exist: {f}")
+            return
+        if not f.is_file():
+            print(f"Error: Not a file: {f}")
+            return
     # TODO: rename these
     # Counters and diagnostics
     files_over_1000 = []

inspectr/with_open.py CHANGED Viewed

@@ -4,11 +4,20 @@ import sys
 from typing import List
-def main(files: List[pathlib.Path]) -> None:
+def main(files: List[pathlib.Path], **kwargs) -> None:
     if not files:
         print("Usage: inspectr with_open <file1> [file2 ...]")
         sys.exit(1)
+    for f in files:
+        if not f.exists():
+            print(f"Error: File does not exist: {f}")
+            return
+        if not f.is_file():
+            print(f"Error: Not a file: {f}")
+            return
     for filepath in files:
         tree = ast.parse(filepath.read_text(), filename=str(filepath))
         for node in ast.walk(tree):

{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspectr
-Version: 0.0.4
+Version: 0.1.0
 Summary: A collection of python tools to inspect code quality.
 Maintainer-email: Alex Mueller <amueller474@gmail.com>
 License-Expression: Apache-2.0
@@ -14,6 +14,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Topic :: Software Development
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: colorama
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
 Dynamic: license-file
 # inspectr
@@ -29,15 +32,48 @@ pip install inspectr
 ## Usage
 Generally, the syntax goes:
 ```bash
-inspectr <subtool> [files...]
+inspectr <subtool> [options] [files...]
 ```
 where `<subtool>` is one of the following:
 - `authenticity`: looks for TODO comments, empty try/except blocks, and stub functions
 - `bare_ratio`: checks for the ratio of bare excepts to meaningful exception usage
+- `compare_funcs`: compares function/method names across two directory versions
+- `complexity`: analyzes algorithmic complexity of Python code
 - `count_exceptions`: counts how many of each type of exception there are (including bare except)
+- `duplicates`: looks for occurrences of duplicate or similar code (>80% similarity, default: 10+ lines, 3+ occurrences)
 - `size_counts`: various linecount-related code complexity checks
 - `with_open`: checks for `open` in the absense of `with` and manual calls to `close()`
+### Command-Line Options
+All tools accept command-line options in the format `--option-name value`.
+Options are passed as keyword arguments to the tool's main function. For example:
+```bash
+inspectr duplicates --block-size 15 --min-occur 2 file1.py file2.py
+```
+Recognized options include:
+- `duplicates`:
+  - `--block-size N`: number of consecutive lines in a block (default: 10)
+  - `--min-occur N`: minimum number of occurrences to report (default: 3
+**Note**: any unrecognized options will be silently ignored.
+### Usage for compare_funcs
+The `compare_funcs` tool compares functions across two directory versions:
+```bash
+inspectr compare_funcs files_list.txt dir1 dir2
+```
+where `files_list.txt` contains relative paths to compare, one per line.
+## Local Testing
+First install in development mode with test dependencies, then run the tests:
+```bash
+git clone https://github.com/ajcm474/inspectr.git
+cd inspectr
+pip install -e ".[test]"
+pytest tests/
+```
 **Please note:** this project is in the early alpha stage, so don't expect the above subtool names
 to be stable between versions. I might even merge/split them at some point.

inspectr-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+inspectr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+inspectr/__main__.py,sha256=-BF-0Mi7-JjNabGSUPJ6IHu21EoNO0j0-jKd3wgnoDQ,1395
+inspectr/authenticity.py,sha256=GMM82Ol0Gn_Ol5W0C9UWsT3U7g1vNCRv30_oz-1gX-Q,2791
+inspectr/bare_ratio.py,sha256=lJ0cKToUy9y4hfcwCt2D1_-U0XI-j35gCTIgsbxgXBk,2229
+inspectr/compare_funcs.py,sha256=PHXV4XsrsPbfT_gAPi7HYPfljfZSJTCxpI13m3n8Gxg,3863
+inspectr/complexity.py,sha256=ewl7SccHgD5uBJi7z9n4I6rNFbHEz1rvZDqvjQN_IpM,30887
+inspectr/count_exceptions.py,sha256=Fpfq2Uyozg8rmc9fS_PAzB4WGDuzONKOjYUcbALrGwc,1820
+inspectr/duplicates.py,sha256=iam-p8pA_LuYxWJ0p3wbWsGX2EYTqHGdkyLuzfw1MEM,7721
+inspectr/size_counts.py,sha256=q0YZytb30XivT6W4B2HDxh2tWDox1B4Iz8VesIq7a6Y,3152
+inspectr/with_open.py,sha256=ekcdVeLQT0SEt3bMpxRWoJDXO8hdctArRdWkKliLes8,838
+inspectr-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+inspectr-0.1.0.dist-info/METADATA,sha256=TBsPAavCdBfCjj6s89BFSfTgVahkx54ad4W_pON4Bi0,2896
+inspectr-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+inspectr-0.1.0.dist-info/entry_points.txt,sha256=IrOM4SpCRfFZzBWngg3ezXuX31ZqFBR-flSU5c8tbTo,52
+inspectr-0.1.0.dist-info/top_level.txt,sha256=NlTFBMaWgYmxFzjQtp4Y6itVQQV8sBPtAAZvFnviT-A,9
+inspectr-0.1.0.dist-info/RECORD,,

inspectr-0.0.4.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-inspectr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-inspectr/__main__.py,sha256=Rojob_KTdzcHjItkGgA3TPLqPd223pIfeajpCWDP3Gc,652
-inspectr/authenticity.py,sha256=fgKJSE_1qYE1NgLdXmOjPQjo-dwD42sEhqpcsazIj3U,1703
-inspectr/bare_ratio.py,sha256=oQfag96zjGnmFHofZ5qabBGGVoKUnmRNzGq_WX_8ZZg,1997
-inspectr/count_exceptions.py,sha256=UhUc3ZZY8eDxLtlLX1USKzP_JEPY3clx8R8TPvtQ5LE,1588
-inspectr/duplicates.py,sha256=xAFNO90JVEhzZqmyHg8gXAo28e_DIrB3SYh7tONAzbU,1455
-inspectr/size_counts.py,sha256=Yw-k5v7lPylzfh-9SUs1PtlV3Bhvdy9X7w2Pi93xL9g,2920
-inspectr/with_open.py,sha256=qbCAb14cZ15yX7SbLo2C3nE1sDEKLiZ48bvFQfTAKJk,606
-inspectr-0.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-inspectr-0.0.4.dist-info/METADATA,sha256=2YAiL5P6MPWIgR54rlkZXzt3fXxaW6CsT1vT1krn_uo,1573
-inspectr-0.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-inspectr-0.0.4.dist-info/entry_points.txt,sha256=IrOM4SpCRfFZzBWngg3ezXuX31ZqFBR-flSU5c8tbTo,52
-inspectr-0.0.4.dist-info/top_level.txt,sha256=NlTFBMaWgYmxFzjQtp4Y6itVQQV8sBPtAAZvFnviT-A,9
-inspectr-0.0.4.dist-info/RECORD,,

{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{inspectr-0.0.4.dist-info → inspectr-0.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

inspectr 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl

inspectr 0.0.4py3-none-any.whl → 0.1.0py3-none-any.whl