inspectr 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspectr/__main__.py +28 -3
- inspectr/authenticity.py +19 -4
- inspectr/bare_ratio.py +10 -1
- inspectr/compare_funcs.py +118 -0
- inspectr/complexity.py +738 -0
- inspectr/count_exceptions.py +10 -1
- inspectr/duplicates.py +188 -21
- inspectr/size_counts.py +10 -1
- inspectr/with_open.py +10 -1
- {inspectr-0.0.5.dist-info → inspectr-0.1.0.dist-info}/METADATA +38 -3
- inspectr-0.1.0.dist-info/RECORD +16 -0
- inspectr-0.0.5.dist-info/RECORD +0 -14
- {inspectr-0.0.5.dist-info → inspectr-0.1.0.dist-info}/WHEEL +0 -0
- {inspectr-0.0.5.dist-info → inspectr-0.1.0.dist-info}/entry_points.txt +0 -0
- {inspectr-0.0.5.dist-info → inspectr-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {inspectr-0.0.5.dist-info → inspectr-0.1.0.dist-info}/top_level.txt +0 -0
inspectr/count_exceptions.py
CHANGED
@@ -5,7 +5,16 @@ from collections import Counter, defaultdict
|
|
5
5
|
from typing import List
|
6
6
|
|
7
7
|
|
8
|
-
def main(files: List[pathlib.Path]) -> None:
|
8
|
+
def main(files: List[pathlib.Path], **kwargs) -> None:
|
9
|
+
for f in files:
|
10
|
+
if not f.exists():
|
11
|
+
print(f"Error: File does not exist: {f}")
|
12
|
+
return
|
13
|
+
|
14
|
+
if not f.is_file():
|
15
|
+
print(f"Error: Not a file: {f}")
|
16
|
+
return
|
17
|
+
|
9
18
|
exception_types = Counter()
|
10
19
|
bare_or_exception_per_file = defaultdict(int)
|
11
20
|
|
inspectr/duplicates.py
CHANGED
@@ -1,6 +1,130 @@
|
|
1
1
|
import sys
|
2
|
-
import
|
2
|
+
import pathlib
|
3
3
|
from collections import defaultdict
|
4
|
+
from typing import List, Tuple
|
5
|
+
|
6
|
+
|
7
|
+
def calculate_similarity(lines1: List[str], lines2: List[str]) -> float:
|
8
|
+
if not lines1 or not lines2:
|
9
|
+
return 0.0
|
10
|
+
matches = sum(1 for a, b in zip(lines1, lines2) if a == b)
|
11
|
+
return matches / max(len(lines1), len(lines2))
|
12
|
+
|
13
|
+
|
14
|
+
def ranges_overlap(start1, end1, start2, end2):
|
15
|
+
"""Check if two ranges overlap by more than 50%"""
|
16
|
+
overlap_start = max(start1, start2)
|
17
|
+
overlap_end = min(end1, end2)
|
18
|
+
|
19
|
+
if overlap_start >= overlap_end:
|
20
|
+
return False
|
21
|
+
|
22
|
+
overlap_size = overlap_end - overlap_start
|
23
|
+
range1_size = end1 - start1
|
24
|
+
range2_size = end2 - start2
|
25
|
+
|
26
|
+
overlap_pct1 = overlap_size / range1_size if range1_size > 0 else 0
|
27
|
+
overlap_pct2 = overlap_size / range2_size if range2_size > 0 else 0
|
28
|
+
|
29
|
+
return overlap_pct1 > 0.5 or overlap_pct2 > 0.5
|
30
|
+
|
31
|
+
|
32
|
+
def merge_overlapping_groups(groups, block_size, file_lines):
|
33
|
+
"""Merge groups that have overlapping ranges by more than 50%, recompute similarity after merging"""
|
34
|
+
if not groups:
|
35
|
+
return []
|
36
|
+
|
37
|
+
groups_by_file = defaultdict(list)
|
38
|
+
for fname, lnum, similar_blocks in groups:
|
39
|
+
groups_by_file[fname].append((lnum, similar_blocks))
|
40
|
+
|
41
|
+
merged_groups = []
|
42
|
+
|
43
|
+
for fname, file_groups in groups_by_file.items():
|
44
|
+
file_groups.sort(key=lambda x: x[0])
|
45
|
+
|
46
|
+
current_start = file_groups[0][0]
|
47
|
+
current_end = current_start + block_size - 1
|
48
|
+
current_similar_dict = {}
|
49
|
+
|
50
|
+
for f, l, s in file_groups[0][1]:
|
51
|
+
end = l + block_size - 1
|
52
|
+
key = f
|
53
|
+
if key not in current_similar_dict:
|
54
|
+
current_similar_dict[key] = []
|
55
|
+
current_similar_dict[key].append((l, end))
|
56
|
+
|
57
|
+
for i in range(1, len(file_groups)):
|
58
|
+
lnum, similar_blocks = file_groups[i]
|
59
|
+
end_line = lnum + block_size - 1
|
60
|
+
|
61
|
+
if ranges_overlap(current_start, current_end, lnum, end_line):
|
62
|
+
current_end = max(current_end, end_line)
|
63
|
+
|
64
|
+
for f, l, s in similar_blocks:
|
65
|
+
end = l + block_size - 1
|
66
|
+
key = f
|
67
|
+
if key not in current_similar_dict:
|
68
|
+
current_similar_dict[key] = []
|
69
|
+
current_similar_dict[key].append((l, end))
|
70
|
+
else:
|
71
|
+
actual_size = current_end - current_start + 1
|
72
|
+
merged_similar = compute_merged_similarities(
|
73
|
+
fname, current_start, current_end,
|
74
|
+
current_similar_dict, file_lines
|
75
|
+
)
|
76
|
+
merged_groups.append((fname, current_start, actual_size, merged_similar))
|
77
|
+
|
78
|
+
current_start = lnum
|
79
|
+
current_end = end_line
|
80
|
+
current_similar_dict = {}
|
81
|
+
for f, l, s in similar_blocks:
|
82
|
+
end = l + block_size - 1
|
83
|
+
key = f
|
84
|
+
if key not in current_similar_dict:
|
85
|
+
current_similar_dict[key] = []
|
86
|
+
current_similar_dict[key].append((l, end))
|
87
|
+
|
88
|
+
actual_size = current_end - current_start + 1
|
89
|
+
merged_similar = compute_merged_similarities(
|
90
|
+
fname, current_start, current_end,
|
91
|
+
current_similar_dict, file_lines
|
92
|
+
)
|
93
|
+
merged_groups.append((fname, current_start, actual_size, merged_similar))
|
94
|
+
|
95
|
+
return merged_groups
|
96
|
+
|
97
|
+
|
98
|
+
def compute_merged_similarities(fname, start, end, similar_dict, file_lines):
|
99
|
+
"""Compute similarity for merged ranges"""
|
100
|
+
merged_similar = []
|
101
|
+
|
102
|
+
if fname not in file_lines:
|
103
|
+
return merged_similar
|
104
|
+
|
105
|
+
primary_lines = file_lines[fname][start - 1:end]
|
106
|
+
|
107
|
+
for other_file, ranges in similar_dict.items():
|
108
|
+
if other_file not in file_lines:
|
109
|
+
continue
|
110
|
+
|
111
|
+
ranges.sort()
|
112
|
+
merged_ranges = []
|
113
|
+
|
114
|
+
for range_start, range_end in ranges:
|
115
|
+
if merged_ranges and ranges_overlap(merged_ranges[-1][0], merged_ranges[-1][1], range_start, range_end):
|
116
|
+
prev_start, prev_end = merged_ranges[-1]
|
117
|
+
merged_ranges[-1] = (min(prev_start, range_start), max(prev_end, range_end))
|
118
|
+
else:
|
119
|
+
merged_ranges.append((range_start, range_end))
|
120
|
+
|
121
|
+
for merged_start, merged_end in merged_ranges:
|
122
|
+
other_lines = file_lines[other_file][merged_start - 1:merged_end]
|
123
|
+
similarity = calculate_similarity(primary_lines, other_lines)
|
124
|
+
merged_similar.append((other_file, merged_start, similarity))
|
125
|
+
|
126
|
+
return merged_similar
|
127
|
+
|
4
128
|
|
5
129
|
def find_duplicates(files, block_size=10, min_occur=3):
|
6
130
|
"""
|
@@ -12,39 +136,82 @@ def find_duplicates(files, block_size=10, min_occur=3):
|
|
12
136
|
min_occur: minimum number of occurrences to report
|
13
137
|
|
14
138
|
Yields:
|
15
|
-
(
|
139
|
+
(primary_filename, primary_line, actual_block_size, [(other_file, other_line, similarity), ...])
|
16
140
|
"""
|
17
|
-
|
18
|
-
|
141
|
+
file_lines = {}
|
19
142
|
for fname in files:
|
20
143
|
try:
|
21
144
|
with open(fname, encoding="utf-8") as f:
|
22
|
-
|
145
|
+
file_lines[fname] = f.readlines()
|
23
146
|
except OSError as e:
|
24
147
|
print(f"Could not read {fname}: {e}", file=sys.stderr)
|
25
148
|
continue
|
26
149
|
|
150
|
+
all_blocks = []
|
151
|
+
for fname in file_lines:
|
152
|
+
lines = file_lines[fname]
|
27
153
|
for i in range(len(lines) - block_size + 1):
|
28
|
-
|
29
|
-
|
30
|
-
# stable hash
|
31
|
-
h = hashlib.sha1(block.encode("utf-8")).hexdigest()
|
32
|
-
blocks[h].append((fname, i + 1))
|
154
|
+
block_lines = lines[i:i + block_size]
|
155
|
+
all_blocks.append((fname, i + 1, block_lines))
|
33
156
|
|
34
|
-
|
35
|
-
|
36
|
-
for fname, lnum in locs:
|
37
|
-
yield fname, lnum, len(locs)
|
157
|
+
reported = set()
|
158
|
+
groups = []
|
38
159
|
|
160
|
+
for i, (fname1, lnum1, block1) in enumerate(all_blocks):
|
161
|
+
key1 = (fname1, lnum1)
|
162
|
+
|
163
|
+
if key1 in reported:
|
164
|
+
continue
|
165
|
+
|
166
|
+
similar_blocks = []
|
167
|
+
|
168
|
+
for j, (fname2, lnum2, block2) in enumerate(all_blocks):
|
169
|
+
if i >= j:
|
170
|
+
continue
|
171
|
+
|
172
|
+
key2 = (fname2, lnum2)
|
173
|
+
if key2 in reported:
|
174
|
+
continue
|
175
|
+
|
176
|
+
similarity = calculate_similarity(block1, block2)
|
177
|
+
if similarity > 0.8:
|
178
|
+
similar_blocks.append((fname2, lnum2, similarity))
|
179
|
+
reported.add(key2)
|
180
|
+
|
181
|
+
if len(similar_blocks) + 1 >= min_occur:
|
182
|
+
reported.add(key1)
|
183
|
+
groups.append((fname1, lnum1, similar_blocks))
|
184
|
+
|
185
|
+
merged = merge_overlapping_groups(groups, block_size, file_lines)
|
186
|
+
|
187
|
+
for fname, lnum, actual_size, similar_blocks in merged:
|
188
|
+
yield fname, lnum, actual_size, similar_blocks
|
39
189
|
|
40
|
-
def main(args=None) -> None:
|
41
|
-
if args is None:
|
42
|
-
args = sys.argv[1:]
|
43
190
|
|
44
|
-
|
45
|
-
|
191
|
+
def main(files: List[pathlib.Path], block_size: int = 10, min_occur: int = 3) -> None:
|
192
|
+
for f in files:
|
193
|
+
if not f.exists():
|
194
|
+
print(f"Error: File does not exist: {f}")
|
195
|
+
return
|
196
|
+
|
197
|
+
if not f.is_file():
|
198
|
+
print(f"Error: Not a file: {f}")
|
199
|
+
return
|
200
|
+
|
201
|
+
if not files:
|
202
|
+
print("Usage: inspectr duplicates [--block-size N] [--min-occur N] file1.py [file2.py ...]")
|
46
203
|
return
|
47
204
|
|
48
|
-
|
49
|
-
|
205
|
+
file_paths = [str(f) for f in files]
|
206
|
+
for fname, lnum, block_sz, similar_blocks in find_duplicates(file_paths, block_size=block_size, min_occur=min_occur):
|
207
|
+
end_line = lnum + block_sz - 1
|
208
|
+
output_parts = [f"{fname}: lines {lnum}-{end_line} occur in"]
|
209
|
+
|
210
|
+
for other_file, other_line, similarity in similar_blocks:
|
211
|
+
other_end = other_line + block_sz - 1
|
212
|
+
similarity_pct = int(similarity * 100)
|
213
|
+
output_parts.append(f" {other_file} at lines {other_line}-{other_end} ({similarity_pct}% similarity) and")
|
214
|
+
|
215
|
+
output = "".join(output_parts).rstrip(" and")
|
216
|
+
print(output)
|
50
217
|
|
inspectr/size_counts.py
CHANGED
@@ -11,7 +11,16 @@ FUNC_PARAMS = 5
|
|
11
11
|
CLASS_METHODS = 20
|
12
12
|
|
13
13
|
|
14
|
-
def main(files: List[pathlib.Path]) -> None:
|
14
|
+
def main(files: List[pathlib.Path], **kwargs) -> None:
|
15
|
+
for f in files:
|
16
|
+
if not f.exists():
|
17
|
+
print(f"Error: File does not exist: {f}")
|
18
|
+
return
|
19
|
+
|
20
|
+
if not f.is_file():
|
21
|
+
print(f"Error: Not a file: {f}")
|
22
|
+
return
|
23
|
+
|
15
24
|
# TODO: rename these
|
16
25
|
# Counters and diagnostics
|
17
26
|
files_over_1000 = []
|
inspectr/with_open.py
CHANGED
@@ -4,11 +4,20 @@ import sys
|
|
4
4
|
from typing import List
|
5
5
|
|
6
6
|
|
7
|
-
def main(files: List[pathlib.Path]) -> None:
|
7
|
+
def main(files: List[pathlib.Path], **kwargs) -> None:
|
8
8
|
if not files:
|
9
9
|
print("Usage: inspectr with_open <file1> [file2 ...]")
|
10
10
|
sys.exit(1)
|
11
11
|
|
12
|
+
for f in files:
|
13
|
+
if not f.exists():
|
14
|
+
print(f"Error: File does not exist: {f}")
|
15
|
+
return
|
16
|
+
|
17
|
+
if not f.is_file():
|
18
|
+
print(f"Error: Not a file: {f}")
|
19
|
+
return
|
20
|
+
|
12
21
|
for filepath in files:
|
13
22
|
tree = ast.parse(filepath.read_text(), filename=str(filepath))
|
14
23
|
for node in ast.walk(tree):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspectr
|
3
|
-
Version: 0.0
|
3
|
+
Version: 0.1.0
|
4
4
|
Summary: A collection of python tools to inspect code quality.
|
5
5
|
Maintainer-email: Alex Mueller <amueller474@gmail.com>
|
6
6
|
License-Expression: Apache-2.0
|
@@ -14,6 +14,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
14
|
Classifier: Topic :: Software Development
|
15
15
|
Description-Content-Type: text/markdown
|
16
16
|
License-File: LICENSE
|
17
|
+
Requires-Dist: colorama
|
18
|
+
Provides-Extra: test
|
19
|
+
Requires-Dist: pytest; extra == "test"
|
17
20
|
Dynamic: license-file
|
18
21
|
|
19
22
|
# inspectr
|
@@ -29,16 +32,48 @@ pip install inspectr
|
|
29
32
|
## Usage
|
30
33
|
Generally, the syntax goes:
|
31
34
|
```bash
|
32
|
-
inspectr <subtool> [files...]
|
35
|
+
inspectr <subtool> [options] [files...]
|
33
36
|
```
|
34
37
|
where `<subtool>` is one of the following:
|
35
38
|
|
36
39
|
- `authenticity`: looks for TODO comments, empty try/except blocks, and stub functions
|
37
40
|
- `bare_ratio`: checks for the ratio of bare excepts to meaningful exception usage
|
41
|
+
- `compare_funcs`: compares function/method names across two directory versions
|
42
|
+
- `complexity`: analyzes algorithmic complexity of Python code
|
38
43
|
- `count_exceptions`: counts how many of each type of exception there are (including bare except)
|
39
|
-
- `duplicates`: looks for occurrences of duplicate code (default: 10+ lines, 3+ occurrences)
|
44
|
+
- `duplicates`: looks for occurrences of duplicate or similar code (>80% similarity, default: 10+ lines, 3+ occurrences)
|
40
45
|
- `size_counts`: various linecount-related code complexity checks
|
41
46
|
- `with_open`: checks for `open` in the absense of `with` and manual calls to `close()`
|
42
47
|
|
48
|
+
### Command-Line Options
|
49
|
+
All tools accept command-line options in the format `--option-name value`.
|
50
|
+
Options are passed as keyword arguments to the tool's main function. For example:
|
51
|
+
```bash
|
52
|
+
inspectr duplicates --block-size 15 --min-occur 2 file1.py file2.py
|
53
|
+
```
|
54
|
+
|
55
|
+
Recognized options include:
|
56
|
+
- `duplicates`:
|
57
|
+
- `--block-size N`: number of consecutive lines in a block (default: 10)
|
58
|
+
- `--min-occur N`: minimum number of occurrences to report (default: 3
|
59
|
+
|
60
|
+
**Note**: any unrecognized options will be silently ignored.
|
61
|
+
|
62
|
+
### Usage for compare_funcs
|
63
|
+
The `compare_funcs` tool compares functions across two directory versions:
|
64
|
+
```bash
|
65
|
+
inspectr compare_funcs files_list.txt dir1 dir2
|
66
|
+
```
|
67
|
+
where `files_list.txt` contains relative paths to compare, one per line.
|
68
|
+
|
69
|
+
## Local Testing
|
70
|
+
First install in development mode with test dependencies, then run the tests:
|
71
|
+
```bash
|
72
|
+
git clone https://github.com/ajcm474/inspectr.git
|
73
|
+
cd inspectr
|
74
|
+
pip install -e ".[test]"
|
75
|
+
pytest tests/
|
76
|
+
```
|
77
|
+
|
43
78
|
**Please note:** this project is in the early alpha stage, so don't expect the above subtool names
|
44
79
|
to be stable between versions. I might even merge/split them at some point.
|
@@ -0,0 +1,16 @@
|
|
1
|
+
inspectr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
inspectr/__main__.py,sha256=-BF-0Mi7-JjNabGSUPJ6IHu21EoNO0j0-jKd3wgnoDQ,1395
|
3
|
+
inspectr/authenticity.py,sha256=GMM82Ol0Gn_Ol5W0C9UWsT3U7g1vNCRv30_oz-1gX-Q,2791
|
4
|
+
inspectr/bare_ratio.py,sha256=lJ0cKToUy9y4hfcwCt2D1_-U0XI-j35gCTIgsbxgXBk,2229
|
5
|
+
inspectr/compare_funcs.py,sha256=PHXV4XsrsPbfT_gAPi7HYPfljfZSJTCxpI13m3n8Gxg,3863
|
6
|
+
inspectr/complexity.py,sha256=ewl7SccHgD5uBJi7z9n4I6rNFbHEz1rvZDqvjQN_IpM,30887
|
7
|
+
inspectr/count_exceptions.py,sha256=Fpfq2Uyozg8rmc9fS_PAzB4WGDuzONKOjYUcbALrGwc,1820
|
8
|
+
inspectr/duplicates.py,sha256=iam-p8pA_LuYxWJ0p3wbWsGX2EYTqHGdkyLuzfw1MEM,7721
|
9
|
+
inspectr/size_counts.py,sha256=q0YZytb30XivT6W4B2HDxh2tWDox1B4Iz8VesIq7a6Y,3152
|
10
|
+
inspectr/with_open.py,sha256=ekcdVeLQT0SEt3bMpxRWoJDXO8hdctArRdWkKliLes8,838
|
11
|
+
inspectr-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
12
|
+
inspectr-0.1.0.dist-info/METADATA,sha256=TBsPAavCdBfCjj6s89BFSfTgVahkx54ad4W_pON4Bi0,2896
|
13
|
+
inspectr-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
14
|
+
inspectr-0.1.0.dist-info/entry_points.txt,sha256=IrOM4SpCRfFZzBWngg3ezXuX31ZqFBR-flSU5c8tbTo,52
|
15
|
+
inspectr-0.1.0.dist-info/top_level.txt,sha256=NlTFBMaWgYmxFzjQtp4Y6itVQQV8sBPtAAZvFnviT-A,9
|
16
|
+
inspectr-0.1.0.dist-info/RECORD,,
|
inspectr-0.0.5.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
inspectr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
inspectr/__main__.py,sha256=Rojob_KTdzcHjItkGgA3TPLqPd223pIfeajpCWDP3Gc,652
|
3
|
-
inspectr/authenticity.py,sha256=r2nld4JZ8Cl6RWdpP-3jRcFG-5YJCWjAeEiNzFqfO4k,2202
|
4
|
-
inspectr/bare_ratio.py,sha256=oQfag96zjGnmFHofZ5qabBGGVoKUnmRNzGq_WX_8ZZg,1997
|
5
|
-
inspectr/count_exceptions.py,sha256=UhUc3ZZY8eDxLtlLX1USKzP_JEPY3clx8R8TPvtQ5LE,1588
|
6
|
-
inspectr/duplicates.py,sha256=xAFNO90JVEhzZqmyHg8gXAo28e_DIrB3SYh7tONAzbU,1455
|
7
|
-
inspectr/size_counts.py,sha256=Yw-k5v7lPylzfh-9SUs1PtlV3Bhvdy9X7w2Pi93xL9g,2920
|
8
|
-
inspectr/with_open.py,sha256=qbCAb14cZ15yX7SbLo2C3nE1sDEKLiZ48bvFQfTAKJk,606
|
9
|
-
inspectr-0.0.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
10
|
-
inspectr-0.0.5.dist-info/METADATA,sha256=tzbZCSIw8J7QTMSzLQ9hyBDXZGQzQF2gBm7xRbzWKPk,1666
|
11
|
-
inspectr-0.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
-
inspectr-0.0.5.dist-info/entry_points.txt,sha256=IrOM4SpCRfFZzBWngg3ezXuX31ZqFBR-flSU5c8tbTo,52
|
13
|
-
inspectr-0.0.5.dist-info/top_level.txt,sha256=NlTFBMaWgYmxFzjQtp4Y6itVQQV8sBPtAAZvFnviT-A,9
|
14
|
-
inspectr-0.0.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|