inspectr 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {inspectr-0.0.1 → inspectr-0.0.3}/PKG-INFO +1 -1
- inspectr-0.0.3/inspectr/duplicates.py +51 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/size_counts.py +0 -2
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr.egg-info/PKG-INFO +1 -1
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr.egg-info/SOURCES.txt +1 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/pyproject.toml +2 -2
- {inspectr-0.0.1 → inspectr-0.0.3}/LICENSE +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/README.md +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/__init__.py +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/__main__.py +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/authenticity.py +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/bare_ratio.py +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/count_exceptions.py +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr/with_open.py +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr.egg-info/dependency_links.txt +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr.egg-info/entry_points.txt +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/inspectr.egg-info/top_level.txt +0 -0
- {inspectr-0.0.1 → inspectr-0.0.3}/setup.cfg +0 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
# mytool/duplicates.py
|
2
|
+
import sys
|
3
|
+
import hashlib
|
4
|
+
from collections import defaultdict
|
5
|
+
|
6
|
+
def find_duplicates(files, block_size=1, min_occur=2):
|
7
|
+
"""
|
8
|
+
Find duplicate blocks of code across files.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
files: list of file paths
|
12
|
+
block_size: number of consecutive lines in a block
|
13
|
+
min_occur: minimum number of occurrences to report
|
14
|
+
|
15
|
+
Yields:
|
16
|
+
(filename, line_number, count)
|
17
|
+
"""
|
18
|
+
blocks = defaultdict(list) # hash -> list of (file, line)
|
19
|
+
|
20
|
+
for fname in files:
|
21
|
+
try:
|
22
|
+
with open(fname, encoding="utf-8") as f:
|
23
|
+
lines = f.readlines()
|
24
|
+
except OSError as e:
|
25
|
+
print(f"Could not read {fname}: {e}", file=sys.stderr)
|
26
|
+
continue
|
27
|
+
|
28
|
+
for i in range(len(lines) - block_size + 1):
|
29
|
+
# join block of lines
|
30
|
+
block = "".join(lines[i:i + block_size])
|
31
|
+
# stable hash
|
32
|
+
h = hashlib.sha1(block.encode("utf-8")).hexdigest()
|
33
|
+
blocks[h].append((fname, i + 1))
|
34
|
+
|
35
|
+
for locs in blocks.values():
|
36
|
+
if len(locs) >= min_occur:
|
37
|
+
for fname, lnum in locs:
|
38
|
+
yield fname, lnum, len(locs)
|
39
|
+
|
40
|
+
|
41
|
+
def main(args=None) -> None:
|
42
|
+
if args is None:
|
43
|
+
args = sys.argv[1:]
|
44
|
+
|
45
|
+
if not args:
|
46
|
+
print("Usage: inspectr duplicates file1.py [file2.py ...]")
|
47
|
+
return
|
48
|
+
|
49
|
+
for fname, lnum, count in find_duplicates(args, block_size=1, min_occur=2):
|
50
|
+
print(f"{fname}:{lnum} (occurs {count} times)")
|
51
|
+
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
5
5
|
|
6
6
|
[project]
|
7
7
|
name = "inspectr"
|
8
|
-
version = "0.0.
|
8
|
+
version = "0.0.3"
|
9
9
|
description = "A collection of python tools to inspect code quality. "
|
10
10
|
maintainers = [
|
11
11
|
{name = "Alex Mueller", email="amueller474@gmail.com"},
|
@@ -31,4 +31,4 @@ inspectr = "inspectr.__main__:main"
|
|
31
31
|
|
32
32
|
[project.urls]
|
33
33
|
Homepage = "https://github.com/ajcm474/inspectr"
|
34
|
-
Issues = "https://github.com/ajcm474/inspectr/issues"
|
34
|
+
Issues = "https://github.com/ajcm474/inspectr/issues"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|