inspectr 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,16 @@ from collections import Counter, defaultdict
5
5
  from typing import List
6
6
 
7
7
 
8
- def main(files: List[pathlib.Path]) -> None:
8
+ def main(files: List[pathlib.Path], **kwargs) -> None:
9
+ for f in files:
10
+ if not f.exists():
11
+ print(f"Error: File does not exist: {f}")
12
+ return
13
+
14
+ if not f.is_file():
15
+ print(f"Error: Not a file: {f}")
16
+ return
17
+
9
18
  exception_types = Counter()
10
19
  bare_or_exception_per_file = defaultdict(int)
11
20
 
inspectr/duplicates.py CHANGED
@@ -1,6 +1,130 @@
1
1
  import sys
2
- import hashlib
2
+ import pathlib
3
3
  from collections import defaultdict
4
+ from typing import List, Tuple
5
+
6
+
7
+ def calculate_similarity(lines1: List[str], lines2: List[str]) -> float:
8
+ if not lines1 or not lines2:
9
+ return 0.0
10
+ matches = sum(1 for a, b in zip(lines1, lines2) if a == b)
11
+ return matches / max(len(lines1), len(lines2))
12
+
13
+
14
+ def ranges_overlap(start1, end1, start2, end2):
15
+ """Check if two ranges overlap by more than 50%"""
16
+ overlap_start = max(start1, start2)
17
+ overlap_end = min(end1, end2)
18
+
19
+ if overlap_start >= overlap_end:
20
+ return False
21
+
22
+ overlap_size = overlap_end - overlap_start
23
+ range1_size = end1 - start1
24
+ range2_size = end2 - start2
25
+
26
+ overlap_pct1 = overlap_size / range1_size if range1_size > 0 else 0
27
+ overlap_pct2 = overlap_size / range2_size if range2_size > 0 else 0
28
+
29
+ return overlap_pct1 > 0.5 or overlap_pct2 > 0.5
30
+
31
+
32
+ def merge_overlapping_groups(groups, block_size, file_lines):
33
+ """Merge groups that have overlapping ranges by more than 50%, recompute similarity after merging"""
34
+ if not groups:
35
+ return []
36
+
37
+ groups_by_file = defaultdict(list)
38
+ for fname, lnum, similar_blocks in groups:
39
+ groups_by_file[fname].append((lnum, similar_blocks))
40
+
41
+ merged_groups = []
42
+
43
+ for fname, file_groups in groups_by_file.items():
44
+ file_groups.sort(key=lambda x: x[0])
45
+
46
+ current_start = file_groups[0][0]
47
+ current_end = current_start + block_size - 1
48
+ current_similar_dict = {}
49
+
50
+ for f, l, s in file_groups[0][1]:
51
+ end = l + block_size - 1
52
+ key = f
53
+ if key not in current_similar_dict:
54
+ current_similar_dict[key] = []
55
+ current_similar_dict[key].append((l, end))
56
+
57
+ for i in range(1, len(file_groups)):
58
+ lnum, similar_blocks = file_groups[i]
59
+ end_line = lnum + block_size - 1
60
+
61
+ if ranges_overlap(current_start, current_end, lnum, end_line):
62
+ current_end = max(current_end, end_line)
63
+
64
+ for f, l, s in similar_blocks:
65
+ end = l + block_size - 1
66
+ key = f
67
+ if key not in current_similar_dict:
68
+ current_similar_dict[key] = []
69
+ current_similar_dict[key].append((l, end))
70
+ else:
71
+ actual_size = current_end - current_start + 1
72
+ merged_similar = compute_merged_similarities(
73
+ fname, current_start, current_end,
74
+ current_similar_dict, file_lines
75
+ )
76
+ merged_groups.append((fname, current_start, actual_size, merged_similar))
77
+
78
+ current_start = lnum
79
+ current_end = end_line
80
+ current_similar_dict = {}
81
+ for f, l, s in similar_blocks:
82
+ end = l + block_size - 1
83
+ key = f
84
+ if key not in current_similar_dict:
85
+ current_similar_dict[key] = []
86
+ current_similar_dict[key].append((l, end))
87
+
88
+ actual_size = current_end - current_start + 1
89
+ merged_similar = compute_merged_similarities(
90
+ fname, current_start, current_end,
91
+ current_similar_dict, file_lines
92
+ )
93
+ merged_groups.append((fname, current_start, actual_size, merged_similar))
94
+
95
+ return merged_groups
96
+
97
+
98
+ def compute_merged_similarities(fname, start, end, similar_dict, file_lines):
99
+ """Compute similarity for merged ranges"""
100
+ merged_similar = []
101
+
102
+ if fname not in file_lines:
103
+ return merged_similar
104
+
105
+ primary_lines = file_lines[fname][start - 1:end]
106
+
107
+ for other_file, ranges in similar_dict.items():
108
+ if other_file not in file_lines:
109
+ continue
110
+
111
+ ranges.sort()
112
+ merged_ranges = []
113
+
114
+ for range_start, range_end in ranges:
115
+ if merged_ranges and ranges_overlap(merged_ranges[-1][0], merged_ranges[-1][1], range_start, range_end):
116
+ prev_start, prev_end = merged_ranges[-1]
117
+ merged_ranges[-1] = (min(prev_start, range_start), max(prev_end, range_end))
118
+ else:
119
+ merged_ranges.append((range_start, range_end))
120
+
121
+ for merged_start, merged_end in merged_ranges:
122
+ other_lines = file_lines[other_file][merged_start - 1:merged_end]
123
+ similarity = calculate_similarity(primary_lines, other_lines)
124
+ merged_similar.append((other_file, merged_start, similarity))
125
+
126
+ return merged_similar
127
+
4
128
 
5
129
  def find_duplicates(files, block_size=10, min_occur=3):
6
130
  """
@@ -12,39 +136,82 @@ def find_duplicates(files, block_size=10, min_occur=3):
12
136
  min_occur: minimum number of occurrences to report
13
137
 
14
138
  Yields:
15
- (filename, line_number, count)
139
+ (primary_filename, primary_line, actual_block_size, [(other_file, other_line, similarity), ...])
16
140
  """
17
- blocks = defaultdict(list) # hash -> list of (file, line)
18
-
141
+ file_lines = {}
19
142
  for fname in files:
20
143
  try:
21
144
  with open(fname, encoding="utf-8") as f:
22
- lines = f.readlines()
145
+ file_lines[fname] = f.readlines()
23
146
  except OSError as e:
24
147
  print(f"Could not read {fname}: {e}", file=sys.stderr)
25
148
  continue
26
149
 
150
+ all_blocks = []
151
+ for fname in file_lines:
152
+ lines = file_lines[fname]
27
153
  for i in range(len(lines) - block_size + 1):
28
- # join block of lines
29
- block = "".join(lines[i:i + block_size])
30
- # stable hash
31
- h = hashlib.sha1(block.encode("utf-8")).hexdigest()
32
- blocks[h].append((fname, i + 1))
154
+ block_lines = lines[i:i + block_size]
155
+ all_blocks.append((fname, i + 1, block_lines))
33
156
 
34
- for locs in blocks.values():
35
- if len(locs) >= min_occur:
36
- for fname, lnum in locs:
37
- yield fname, lnum, len(locs)
157
+ reported = set()
158
+ groups = []
38
159
 
160
+ for i, (fname1, lnum1, block1) in enumerate(all_blocks):
161
+ key1 = (fname1, lnum1)
162
+
163
+ if key1 in reported:
164
+ continue
165
+
166
+ similar_blocks = []
167
+
168
+ for j, (fname2, lnum2, block2) in enumerate(all_blocks):
169
+ if i >= j:
170
+ continue
171
+
172
+ key2 = (fname2, lnum2)
173
+ if key2 in reported:
174
+ continue
175
+
176
+ similarity = calculate_similarity(block1, block2)
177
+ if similarity > 0.8:
178
+ similar_blocks.append((fname2, lnum2, similarity))
179
+ reported.add(key2)
180
+
181
+ if len(similar_blocks) + 1 >= min_occur:
182
+ reported.add(key1)
183
+ groups.append((fname1, lnum1, similar_blocks))
184
+
185
+ merged = merge_overlapping_groups(groups, block_size, file_lines)
186
+
187
+ for fname, lnum, actual_size, similar_blocks in merged:
188
+ yield fname, lnum, actual_size, similar_blocks
39
189
 
40
- def main(args=None) -> None:
41
- if args is None:
42
- args = sys.argv[1:]
43
190
 
44
- if not args:
45
- print("Usage: inspectr duplicates file1.py [file2.py ...]")
191
+ def main(files: List[pathlib.Path], block_size: int = 10, min_occur: int = 3) -> None:
192
+ for f in files:
193
+ if not f.exists():
194
+ print(f"Error: File does not exist: {f}")
195
+ return
196
+
197
+ if not f.is_file():
198
+ print(f"Error: Not a file: {f}")
199
+ return
200
+
201
+ if not files:
202
+ print("Usage: inspectr duplicates [--block-size N] [--min-occur N] file1.py [file2.py ...]")
46
203
  return
47
204
 
48
- for fname, lnum, count in find_duplicates(args, block_size=10, min_occur=3):
49
- print(f"{fname}:{lnum} (occurs {count} times)")
205
+ file_paths = [str(f) for f in files]
206
+ for fname, lnum, block_sz, similar_blocks in find_duplicates(file_paths, block_size=block_size, min_occur=min_occur):
207
+ end_line = lnum + block_sz - 1
208
+ output_parts = [f"{fname}: lines {lnum}-{end_line} occur in"]
209
+
210
+ for other_file, other_line, similarity in similar_blocks:
211
+ other_end = other_line + block_sz - 1
212
+ similarity_pct = int(similarity * 100)
213
+ output_parts.append(f" {other_file} at lines {other_line}-{other_end} ({similarity_pct}% similarity) and")
214
+
215
+ output = "".join(output_parts).rstrip(" and")
216
+ print(output)
50
217
 
inspectr/size_counts.py CHANGED
@@ -11,7 +11,16 @@ FUNC_PARAMS = 5
11
11
  CLASS_METHODS = 20
12
12
 
13
13
 
14
- def main(files: List[pathlib.Path]) -> None:
14
+ def main(files: List[pathlib.Path], **kwargs) -> None:
15
+ for f in files:
16
+ if not f.exists():
17
+ print(f"Error: File does not exist: {f}")
18
+ return
19
+
20
+ if not f.is_file():
21
+ print(f"Error: Not a file: {f}")
22
+ return
23
+
15
24
  # TODO: rename these
16
25
  # Counters and diagnostics
17
26
  files_over_1000 = []
inspectr/with_open.py CHANGED
@@ -4,11 +4,20 @@ import sys
4
4
  from typing import List
5
5
 
6
6
 
7
- def main(files: List[pathlib.Path]) -> None:
7
+ def main(files: List[pathlib.Path], **kwargs) -> None:
8
8
  if not files:
9
9
  print("Usage: inspectr with_open <file1> [file2 ...]")
10
10
  sys.exit(1)
11
11
 
12
+ for f in files:
13
+ if not f.exists():
14
+ print(f"Error: File does not exist: {f}")
15
+ return
16
+
17
+ if not f.is_file():
18
+ print(f"Error: Not a file: {f}")
19
+ return
20
+
12
21
  for filepath in files:
13
22
  tree = ast.parse(filepath.read_text(), filename=str(filepath))
14
23
  for node in ast.walk(tree):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspectr
3
- Version: 0.0.4
3
+ Version: 0.1.0
4
4
  Summary: A collection of python tools to inspect code quality.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -14,6 +14,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
14
14
  Classifier: Topic :: Software Development
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
+ Requires-Dist: colorama
18
+ Provides-Extra: test
19
+ Requires-Dist: pytest; extra == "test"
17
20
  Dynamic: license-file
18
21
 
19
22
  # inspectr
@@ -29,15 +32,48 @@ pip install inspectr
29
32
  ## Usage
30
33
  Generally, the syntax goes:
31
34
  ```bash
32
- inspectr <subtool> [files...]
35
+ inspectr <subtool> [options] [files...]
33
36
  ```
34
37
  where `<subtool>` is one of the following:
35
38
 
36
39
  - `authenticity`: looks for TODO comments, empty try/except blocks, and stub functions
37
40
  - `bare_ratio`: checks for the ratio of bare excepts to meaningful exception usage
41
+ - `compare_funcs`: compares function/method names across two directory versions
42
+ - `complexity`: analyzes algorithmic complexity of Python code
38
43
  - `count_exceptions`: counts how many of each type of exception there are (including bare except)
44
+ - `duplicates`: looks for occurrences of duplicate or similar code (>80% similarity, default: 10+ lines, 3+ occurrences)
39
45
  - `size_counts`: various linecount-related code complexity checks
40
46
  - `with_open`: checks for `open` in the absense of `with` and manual calls to `close()`
41
47
 
48
+ ### Command-Line Options
49
+ All tools accept command-line options in the format `--option-name value`.
50
+ Options are passed as keyword arguments to the tool's main function. For example:
51
+ ```bash
52
+ inspectr duplicates --block-size 15 --min-occur 2 file1.py file2.py
53
+ ```
54
+
55
+ Recognized options include:
56
+ - `duplicates`:
57
+ - `--block-size N`: number of consecutive lines in a block (default: 10)
58
+ - `--min-occur N`: minimum number of occurrences to report (default: 3
59
+
60
+ **Note**: any unrecognized options will be silently ignored.
61
+
62
+ ### Usage for compare_funcs
63
+ The `compare_funcs` tool compares functions across two directory versions:
64
+ ```bash
65
+ inspectr compare_funcs files_list.txt dir1 dir2
66
+ ```
67
+ where `files_list.txt` contains relative paths to compare, one per line.
68
+
69
+ ## Local Testing
70
+ First install in development mode with test dependencies, then run the tests:
71
+ ```bash
72
+ git clone https://github.com/ajcm474/inspectr.git
73
+ cd inspectr
74
+ pip install -e ".[test]"
75
+ pytest tests/
76
+ ```
77
+
42
78
  **Please note:** this project is in the early alpha stage, so don't expect the above subtool names
43
79
  to be stable between versions. I might even merge/split them at some point.
@@ -0,0 +1,16 @@
1
+ inspectr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ inspectr/__main__.py,sha256=-BF-0Mi7-JjNabGSUPJ6IHu21EoNO0j0-jKd3wgnoDQ,1395
3
+ inspectr/authenticity.py,sha256=GMM82Ol0Gn_Ol5W0C9UWsT3U7g1vNCRv30_oz-1gX-Q,2791
4
+ inspectr/bare_ratio.py,sha256=lJ0cKToUy9y4hfcwCt2D1_-U0XI-j35gCTIgsbxgXBk,2229
5
+ inspectr/compare_funcs.py,sha256=PHXV4XsrsPbfT_gAPi7HYPfljfZSJTCxpI13m3n8Gxg,3863
6
+ inspectr/complexity.py,sha256=ewl7SccHgD5uBJi7z9n4I6rNFbHEz1rvZDqvjQN_IpM,30887
7
+ inspectr/count_exceptions.py,sha256=Fpfq2Uyozg8rmc9fS_PAzB4WGDuzONKOjYUcbALrGwc,1820
8
+ inspectr/duplicates.py,sha256=iam-p8pA_LuYxWJ0p3wbWsGX2EYTqHGdkyLuzfw1MEM,7721
9
+ inspectr/size_counts.py,sha256=q0YZytb30XivT6W4B2HDxh2tWDox1B4Iz8VesIq7a6Y,3152
10
+ inspectr/with_open.py,sha256=ekcdVeLQT0SEt3bMpxRWoJDXO8hdctArRdWkKliLes8,838
11
+ inspectr-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
+ inspectr-0.1.0.dist-info/METADATA,sha256=TBsPAavCdBfCjj6s89BFSfTgVahkx54ad4W_pON4Bi0,2896
13
+ inspectr-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ inspectr-0.1.0.dist-info/entry_points.txt,sha256=IrOM4SpCRfFZzBWngg3ezXuX31ZqFBR-flSU5c8tbTo,52
15
+ inspectr-0.1.0.dist-info/top_level.txt,sha256=NlTFBMaWgYmxFzjQtp4Y6itVQQV8sBPtAAZvFnviT-A,9
16
+ inspectr-0.1.0.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- inspectr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- inspectr/__main__.py,sha256=Rojob_KTdzcHjItkGgA3TPLqPd223pIfeajpCWDP3Gc,652
3
- inspectr/authenticity.py,sha256=fgKJSE_1qYE1NgLdXmOjPQjo-dwD42sEhqpcsazIj3U,1703
4
- inspectr/bare_ratio.py,sha256=oQfag96zjGnmFHofZ5qabBGGVoKUnmRNzGq_WX_8ZZg,1997
5
- inspectr/count_exceptions.py,sha256=UhUc3ZZY8eDxLtlLX1USKzP_JEPY3clx8R8TPvtQ5LE,1588
6
- inspectr/duplicates.py,sha256=xAFNO90JVEhzZqmyHg8gXAo28e_DIrB3SYh7tONAzbU,1455
7
- inspectr/size_counts.py,sha256=Yw-k5v7lPylzfh-9SUs1PtlV3Bhvdy9X7w2Pi93xL9g,2920
8
- inspectr/with_open.py,sha256=qbCAb14cZ15yX7SbLo2C3nE1sDEKLiZ48bvFQfTAKJk,606
9
- inspectr-0.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
10
- inspectr-0.0.4.dist-info/METADATA,sha256=2YAiL5P6MPWIgR54rlkZXzt3fXxaW6CsT1vT1krn_uo,1573
11
- inspectr-0.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- inspectr-0.0.4.dist-info/entry_points.txt,sha256=IrOM4SpCRfFZzBWngg3ezXuX31ZqFBR-flSU5c8tbTo,52
13
- inspectr-0.0.4.dist-info/top_level.txt,sha256=NlTFBMaWgYmxFzjQtp4Y6itVQQV8sBPtAAZvFnviT-A,9
14
- inspectr-0.0.4.dist-info/RECORD,,