codebase-stats 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_stats/__init__.py +148 -0
- codebase_stats/core.py +240 -0
- codebase_stats/coverage.py +295 -0
- codebase_stats/duration.py +245 -0
- codebase_stats/lowcov.py +204 -0
- codebase_stats/metrics.py +393 -0
- codebase_stats/radon.py +264 -0
- codebase_stats/reporter.py +283 -0
- codebase_stats/sizes.py +100 -0
- codebase_stats/tree.py +144 -0
- codebase_stats/utils.py +86 -0
- codebase_stats-0.0.1.dist-info/METADATA +376 -0
- codebase_stats-0.0.1.dist-info/RECORD +16 -0
- codebase_stats-0.0.1.dist-info/WHEEL +5 -0
- codebase_stats-0.0.1.dist-info/entry_points.txt +2 -0
- codebase_stats-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
"""Code quality metrics analysis (complexity, maintainability, etc.)."""
|
|
2
|
+
|
|
3
|
+
import bisect
|
|
4
|
+
from .utils import percentile, format_line_ranges, ascii_histogram, blame_header
|
|
5
|
+
from .radon import ( # re-export so callers can still import from here
|
|
6
|
+
cc_rank,
|
|
7
|
+
mi_rank,
|
|
8
|
+
run_radon_json,
|
|
9
|
+
run_radon,
|
|
10
|
+
run_radon_mi,
|
|
11
|
+
run_radon_raw,
|
|
12
|
+
run_radon_hal,
|
|
13
|
+
_HAL_VOLUME,
|
|
14
|
+
_HAL_DIFFICULTY,
|
|
15
|
+
_HAL_EFFORT,
|
|
16
|
+
_HAL_BUGS,
|
|
17
|
+
_parse_hal_list,
|
|
18
|
+
_parse_hal_dict,
|
|
19
|
+
_parse_hal_entry,
|
|
20
|
+
_run_hal_chunk,
|
|
21
|
+
_collect_hal_raw,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _float_buckets(sorted_values: list, n_bins: int) -> tuple:
|
|
26
|
+
"""Build histogram edges and bucket counts for a sorted list of floats.
|
|
27
|
+
|
|
28
|
+
Returns (edges, buckets) wherOverflow values land in the last bucket.
|
|
29
|
+
"""
|
|
30
|
+
maximum = sorted_values[-1]
|
|
31
|
+
step = max((maximum + 0.001) / n_bins, 0.001)
|
|
32
|
+
edges = [i * step for i in range(n_bins + 1)]
|
|
33
|
+
buckets = [0] * n_bins
|
|
34
|
+
for v in sorted_values:
|
|
35
|
+
i = min(bisect.bisect_right(edges, v) - 1, n_bins - 1)
|
|
36
|
+
buckets[max(0, i)] += 1
|
|
37
|
+
return edges, buckets
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _cc_int_buckets(scores: list, cutoff: int) -> tuple:
|
|
41
|
+
"""Bucket integer CC scores into per-value bars up to cutoff, then a tail.
|
|
42
|
+
|
|
43
|
+
Returns (buckets, has_tail). has_tail is True when max(scores) > cutoff.
|
|
44
|
+
"""
|
|
45
|
+
has_tail = int(scores[-1]) > cutoff
|
|
46
|
+
buckets = [0] * (cutoff + (1 if has_tail else 0))
|
|
47
|
+
for v in scores:
|
|
48
|
+
iv = int(v)
|
|
49
|
+
buckets[iv - 1 if iv <= cutoff else -1] += 1
|
|
50
|
+
return buckets, has_tail
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _blame_cc(cc_files: list, blame_limit: int, width: int) -> None:
|
|
54
|
+
"""Print CC blame: files with avg CC above outlier threshold AND coverage < 80%."""
|
|
55
|
+
file_avgs = sorted(f[0] for f in cc_files)
|
|
56
|
+
q1_fa = percentile(file_avgs, 25)
|
|
57
|
+
q3_fa = percentile(file_avgs, 75)
|
|
58
|
+
threshold = q3_fa + 1.5 * (q3_fa - q1_fa)
|
|
59
|
+
risky = sorted(
|
|
60
|
+
[f for f in cc_files if f[0] > threshold and f[2] < 80.0],
|
|
61
|
+
key=lambda x: x[0] * (100 - x[2]),
|
|
62
|
+
reverse=True,
|
|
63
|
+
)
|
|
64
|
+
blame_header(
|
|
65
|
+
f"files with avg CC > {threshold:.1f} AND coverage < 80%",
|
|
66
|
+
len(risky),
|
|
67
|
+
blame_limit,
|
|
68
|
+
width,
|
|
69
|
+
)
|
|
70
|
+
display = risky if not blame_limit else risky[:blame_limit]
|
|
71
|
+
if display:
|
|
72
|
+
print(f" {'avg CC':<8} {'max CC':<8} {'Coverage':<10} File")
|
|
73
|
+
print(f" {'─' * 6} {'─' * 6} {'─' * 8} {'─' * 50}")
|
|
74
|
+
for cc_avg, cc_max, cov, path, missing_lines in display:
|
|
75
|
+
print(
|
|
76
|
+
f" {cc_avg:>5.1f}{cc_rank(cc_avg)} "
|
|
77
|
+
f"{cc_max:>5}{cc_rank(cc_max)} {cov:>6.1f}% {path}"
|
|
78
|
+
)
|
|
79
|
+
ranges = format_line_ranges(missing_lines)
|
|
80
|
+
if ranges:
|
|
81
|
+
print(f" {' ' * 16}📍 {ranges}")
|
|
82
|
+
else:
|
|
83
|
+
print(" ✅ No high-complexity low-coverage files.")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _blame_mi(mi_files: list, q1_mi: float, blame_limit: int, width: int) -> None:
|
|
87
|
+
"""Print MI blame: files below Q1 maintainability index."""
|
|
88
|
+
blamed = sorted(
|
|
89
|
+
[f for f in mi_files if f[0] < q1_mi],
|
|
90
|
+
key=lambda x: x[0],
|
|
91
|
+
)
|
|
92
|
+
blame_header(f"MI below Q1 ({q1_mi:.1f})", len(blamed), blame_limit, width)
|
|
93
|
+
display = blamed if not blame_limit else blamed[:blame_limit]
|
|
94
|
+
if display:
|
|
95
|
+
print(f" {'MI':<7} {'Coverage':<10} File")
|
|
96
|
+
print(f" {'─' * 5} {'─' * 8} {'─' * 50}")
|
|
97
|
+
for mi, cov, path, missing_lines in display:
|
|
98
|
+
print(f" {mi:>5.1f}{mi_rank(mi)} {cov:>6.1f}% {path}")
|
|
99
|
+
ranges = format_line_ranges(missing_lines)
|
|
100
|
+
if ranges:
|
|
101
|
+
print(f" {' ' * 8}📍 {ranges}")
|
|
102
|
+
else:
|
|
103
|
+
print(" ✅ All files above Q1 MI.")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _blame_raw(raw_files: list, q1_ratio: float, blame_limit: int, width: int) -> None:
|
|
107
|
+
"""Print raw blame: files below Q1 comment ratio."""
|
|
108
|
+
blamed = sorted(
|
|
109
|
+
[f for f in raw_files if f[0] < q1_ratio],
|
|
110
|
+
key=lambda x: x[0],
|
|
111
|
+
)
|
|
112
|
+
blame_header(
|
|
113
|
+
f"comment ratio below Q1 ({q1_ratio * 100:.1f}%)", len(blamed), blame_limit, width
|
|
114
|
+
)
|
|
115
|
+
display = blamed if not blame_limit else blamed[:blame_limit]
|
|
116
|
+
if display:
|
|
117
|
+
print(f" {'Comment%':<10} {'SLOC':<7} {'Coverage':<10} File")
|
|
118
|
+
print(f" {'─' * 8} {'─' * 5} {'─' * 8} {'─' * 50}")
|
|
119
|
+
for ratio, sloc, cov, path, missing_lines in display:
|
|
120
|
+
print(f" {ratio * 100:>6.1f}% {sloc:>5} {cov:>6.1f}% {path}")
|
|
121
|
+
ranges = format_line_ranges(missing_lines)
|
|
122
|
+
if ranges:
|
|
123
|
+
print(f" {' ' * 16}📍 {ranges}")
|
|
124
|
+
else:
|
|
125
|
+
print(" ✅ All files above Q1 comment ratio.")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _blame_hal(hal_files: list, threshold: float, blame_limit: int, width: int) -> None:
|
|
129
|
+
"""Print Halstead blame: files above Q3+1.5×IQR bug estimate."""
|
|
130
|
+
blamed = sorted(
|
|
131
|
+
[f for f in hal_files if f[0] > threshold],
|
|
132
|
+
key=lambda x: x[0],
|
|
133
|
+
reverse=True,
|
|
134
|
+
)
|
|
135
|
+
blame_header(
|
|
136
|
+
f"Halstead bug outliers Q3+1.5×IQR > {threshold:.2f} bugs",
|
|
137
|
+
len(blamed),
|
|
138
|
+
blame_limit,
|
|
139
|
+
width,
|
|
140
|
+
)
|
|
141
|
+
display = blamed if not blame_limit else blamed[:blame_limit]
|
|
142
|
+
if display:
|
|
143
|
+
print(f" {'Bugs':<8} {'Difficulty':<12} {'Coverage':<10} File")
|
|
144
|
+
print(f" {'─' * 6} {'─' * 10} {'─' * 8} {'─' * 50}")
|
|
145
|
+
for bugs, diff, cov, path, missing_lines in display:
|
|
146
|
+
print(f" {bugs:>6.2f} {diff:>8.1f} {cov:>6.1f}% {path}")
|
|
147
|
+
ranges = format_line_ranges(missing_lines)
|
|
148
|
+
if ranges:
|
|
149
|
+
print(f" {' ' * 18}📍 {ranges}")
|
|
150
|
+
else:
|
|
151
|
+
print(" ✅ No Halstead bug outliers.")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _cc_rank_summary_line(all_scores: list) -> str:
|
|
155
|
+
"""Build rank distribution summary string (e.g. 'A:15 B:8 C:3')."""
|
|
156
|
+
rank_counts: dict[str, int] = {}
|
|
157
|
+
for s in all_scores:
|
|
158
|
+
r = cc_rank(s)
|
|
159
|
+
rank_counts[r] = rank_counts.get(r, 0) + 1
|
|
160
|
+
return " ".join(
|
|
161
|
+
f"{r}:{rank_counts[r]}" for r in ("A", "B", "C", "D", "E", "F") if r in rank_counts
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _print_cc_percentile_table(all_scores: list, maximum, width: int) -> None:
|
|
166
|
+
"""Print the percentile table for CC scores."""
|
|
167
|
+
print(f"{'─' * width}")
|
|
168
|
+
print(" PERCENTILES (function-level)")
|
|
169
|
+
print(f"{'─' * width}")
|
|
170
|
+
for pct in (25, 50, 75, 90, 95, 99):
|
|
171
|
+
label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
|
|
172
|
+
val = percentile(all_scores, pct)
|
|
173
|
+
print(f" {label:<8} {val:>4} ({cc_rank(val)})")
|
|
174
|
+
print(f" {'max':<8} {maximum:>4} ({cc_rank(maximum)})")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _print_mi_percentile_table(mis: list, width: int) -> None:
|
|
178
|
+
"""Print the percentile table for MI scores."""
|
|
179
|
+
print(f"{'─' * width}")
|
|
180
|
+
print(" PERCENTILES")
|
|
181
|
+
print(f"{'─' * width}")
|
|
182
|
+
for pct in (25, 50, 75, 90):
|
|
183
|
+
label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
|
|
184
|
+
val = percentile(mis, pct)
|
|
185
|
+
print(f" {label:<8} {val:>5.1f} ({mi_rank(val)})")
|
|
186
|
+
print(f" {'min':<8} {mis[0]:>5.1f} ({mi_rank(mis[0])})")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def show_complexity_histogram(
|
|
190
|
+
stats: dict,
|
|
191
|
+
bins: int = 10,
|
|
192
|
+
blame_limit: int = 20,
|
|
193
|
+
show_blame: bool = True,
|
|
194
|
+
width: int = 80,
|
|
195
|
+
):
|
|
196
|
+
"""Display cyclomatic complexity histogram and high-complexity blame."""
|
|
197
|
+
cc_scores = stats.get("cc_scores", []) # (cc_value, filepath) pairs
|
|
198
|
+
if not cc_scores:
|
|
199
|
+
print("⚠️ No CC data — run with --radon-root <dir>")
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
all_scores = sorted(v for v, _ in cc_scores)
|
|
203
|
+
cc_files = [
|
|
204
|
+
(f["cc_avg"], f["cc_max"], f["pct"], f["path"], f["missing_lines"])
|
|
205
|
+
for f in stats["file_stats"]
|
|
206
|
+
if f.get("cc_avg") is not None
|
|
207
|
+
]
|
|
208
|
+
n_funcs = len(cc_scores)
|
|
209
|
+
n_files = len(cc_files)
|
|
210
|
+
mean_cc = sum(all_scores) / n_funcs
|
|
211
|
+
maximum = all_scores[-1]
|
|
212
|
+
|
|
213
|
+
# Integer bins: each CC value 1–N gets its own bucket, tail groups tail.
|
|
214
|
+
p99_cc = int(percentile(all_scores, 99))
|
|
215
|
+
cutoff = min(p99_cc, 25, int(maximum))
|
|
216
|
+
int_vals = list(range(1, cutoff + 1))
|
|
217
|
+
buckets, has_tail = _cc_int_buckets(all_scores, cutoff)
|
|
218
|
+
|
|
219
|
+
labels = [f"CC {i:<2}" for i in int_vals]
|
|
220
|
+
suffixes = [cc_rank(i) for i in int_vals]
|
|
221
|
+
if has_tail:
|
|
222
|
+
labels.append(f"CC {cutoff + 1}+")
|
|
223
|
+
suffixes.append(cc_rank(cutoff + 1))
|
|
224
|
+
|
|
225
|
+
rank_summary = _cc_rank_summary_line(all_scores)
|
|
226
|
+
|
|
227
|
+
print(f"\n{'═' * width}")
|
|
228
|
+
print(" CYCLOMATIC COMPLEXITY HISTOGRAM (per function / method)")
|
|
229
|
+
print(f"{'═' * width}")
|
|
230
|
+
print(
|
|
231
|
+
f" Functions: {n_funcs} Files: {n_files} Mean CC: {mean_cc:.1f} "
|
|
232
|
+
f"Ranks: A=1-5 B=6-10 C=11-15 D=16-20 E/F=21+"
|
|
233
|
+
)
|
|
234
|
+
print(f" Rank distribution: {rank_summary}")
|
|
235
|
+
print()
|
|
236
|
+
ascii_histogram(buckets, labels, suffixes=suffixes, width=width)
|
|
237
|
+
print()
|
|
238
|
+
_print_cc_percentile_table(all_scores, maximum, width)
|
|
239
|
+
|
|
240
|
+
if show_blame:
|
|
241
|
+
_blame_cc(cc_files, blame_limit, width)
|
|
242
|
+
print(f"\n{'═' * width}")
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def show_mi_histogram(
|
|
246
|
+
stats: dict, bins: int = 10, blame_limit: int = 20, show_blame: bool = True, width: int = 80
|
|
247
|
+
):
|
|
248
|
+
"""Display maintainability index histogram and low-MI blame."""
|
|
249
|
+
mi_files = [
|
|
250
|
+
(f["mi"], f["pct"], f["path"], f["missing_lines"])
|
|
251
|
+
for f in stats["file_stats"]
|
|
252
|
+
if f.get("mi") is not None
|
|
253
|
+
]
|
|
254
|
+
if not mi_files:
|
|
255
|
+
print("⚠️ No MI data — run with --radon-root <dir>")
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
mis = sorted(f[0] for f in mi_files)
|
|
259
|
+
n = len(mis)
|
|
260
|
+
mean = sum(mis) / n
|
|
261
|
+
|
|
262
|
+
# MI range is 0–100; use a fixed scale for consistent cross-run comparison.
|
|
263
|
+
step = max((100.0 + 0.001) / bins, 0.1)
|
|
264
|
+
edges = [i * step for i in range(bins + 1)]
|
|
265
|
+
buckets = [0] * bins
|
|
266
|
+
for v in mis:
|
|
267
|
+
i = min(bisect.bisect_right(edges, v) - 1, bins - 1)
|
|
268
|
+
buckets[max(0, i)] += 1
|
|
269
|
+
|
|
270
|
+
labels = [f"{edges[i]:>5.1f}–{edges[i + 1]:<5.1f} MI" for i in range(bins)]
|
|
271
|
+
suffixes = [mi_rank((edges[i] + edges[i + 1]) / 2) for i in range(bins)]
|
|
272
|
+
|
|
273
|
+
print(f"\n{'═' * width}")
|
|
274
|
+
print(" MAINTAINABILITY INDEX HISTOGRAM (higher = more maintainable)")
|
|
275
|
+
print(f"{'═' * width}")
|
|
276
|
+
print(f" Files: {n} Mean MI: {mean:.1f} Ranks: A=20-100 B=10-19 C=0-9")
|
|
277
|
+
print()
|
|
278
|
+
ascii_histogram(buckets, labels, suffixes=suffixes, width=width)
|
|
279
|
+
print()
|
|
280
|
+
_print_mi_percentile_table(mis, width)
|
|
281
|
+
|
|
282
|
+
if show_blame:
|
|
283
|
+
q1_mi = percentile(mis, 25)
|
|
284
|
+
_blame_mi(mi_files, q1_mi, blame_limit, width)
|
|
285
|
+
print(f"\n{'═' * width}")
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def show_raw_histogram(
|
|
289
|
+
stats: dict, bins: int = 10, blame_limit: int = 20, show_blame: bool = True, width: int = 80
|
|
290
|
+
):
|
|
291
|
+
"""Display comment ratio histogram and under-documented blame.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
stats: Precomputed statistics from precompute_coverage_stats()
|
|
295
|
+
bins: Number of histogram bins
|
|
296
|
+
blame_limit: Maximum blamed files to display
|
|
297
|
+
show_blame: Whether to show quality blame sections
|
|
298
|
+
width: Line width for output
|
|
299
|
+
"""
|
|
300
|
+
raw_files = [
|
|
301
|
+
(f["comment_ratio"], f["sloc"], f["pct"], f["path"], f["missing_lines"])
|
|
302
|
+
for f in stats["file_stats"]
|
|
303
|
+
if f.get("comment_ratio") is not None
|
|
304
|
+
]
|
|
305
|
+
if not raw_files:
|
|
306
|
+
print("⚠️ No raw metrics data — run with --radon-root <dir>")
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
ratios = sorted(f[0] for f in raw_files)
|
|
310
|
+
n = len(ratios)
|
|
311
|
+
mean = sum(ratios) / n
|
|
312
|
+
|
|
313
|
+
edges, buckets = _float_buckets(ratios, bins)
|
|
314
|
+
labels = [f"{edges[i] * 100:>4.0f}–{edges[i + 1] * 100:<4.0f}% comment" for i in range(bins)]
|
|
315
|
+
|
|
316
|
+
print(f"\n{'═' * width}")
|
|
317
|
+
print(" COMMENT RATIO HISTOGRAM (comments + docstrings / SLOC)")
|
|
318
|
+
print(f"{'═' * width}")
|
|
319
|
+
total_sloc = sum(f[1] for f in raw_files)
|
|
320
|
+
total_comments = sum(int(f[0] * f[1]) for f in raw_files)
|
|
321
|
+
print(
|
|
322
|
+
f" Files: {n} Total SLOC: {total_sloc:,} "
|
|
323
|
+
f"Overall comment ratio: {total_comments / total_sloc * 100:.1f}%"
|
|
324
|
+
)
|
|
325
|
+
print()
|
|
326
|
+
ascii_histogram(buckets, labels, width=width)
|
|
327
|
+
print()
|
|
328
|
+
print(f"{'─' * width}")
|
|
329
|
+
print(" PERCENTILES")
|
|
330
|
+
print(f"{'─' * width}")
|
|
331
|
+
for pct in (25, 50, 75, 90):
|
|
332
|
+
label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
|
|
333
|
+
print(f" {label:<8} {percentile(ratios, pct) * 100:>5.1f}%")
|
|
334
|
+
print(f" {'mean':<8} {mean * 100:>5.1f}%")
|
|
335
|
+
|
|
336
|
+
if show_blame:
|
|
337
|
+
q1_ratio = percentile(ratios, 25)
|
|
338
|
+
_blame_raw(raw_files, q1_ratio, blame_limit, width)
|
|
339
|
+
print(f"\n{'═' * width}")
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def show_hal_histogram(
|
|
343
|
+
stats: dict, bins: int = 10, blame_limit: int = 20, show_blame: bool = True, width: int = 80
|
|
344
|
+
):
|
|
345
|
+
"""Display Halstead metrics histogram and bug-prone file blame.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
stats: Precomputed statistics from precompute_coverage_stats()
|
|
349
|
+
bins: Number of histogram bins
|
|
350
|
+
blame_limit: Maximum blamed files to display
|
|
351
|
+
show_blame: Whether to show quality blame sections
|
|
352
|
+
width: Line width for output
|
|
353
|
+
"""
|
|
354
|
+
hal_files = [
|
|
355
|
+
(f["hal_bugs"], f["hal_difficulty"], f["pct"], f["path"], f["missing_lines"])
|
|
356
|
+
for f in stats["file_stats"]
|
|
357
|
+
if f.get("hal_bugs") is not None
|
|
358
|
+
]
|
|
359
|
+
if not hal_files:
|
|
360
|
+
print("⚠️ No Halstead data — run with --radon-root <dir>")
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
bugs_list = sorted(f[0] for f in hal_files)
|
|
364
|
+
n = len(bugs_list)
|
|
365
|
+
total_bugs = sum(bugs_list)
|
|
366
|
+
|
|
367
|
+
edges, buckets = _float_buckets(bugs_list, bins)
|
|
368
|
+
labels = [f"{edges[i]:>5.2f}–{edges[i + 1]:<5.2f} bugs" for i in range(bins)]
|
|
369
|
+
|
|
370
|
+
print(f"\n{'═' * width}")
|
|
371
|
+
print(" HALSTEAD METRICS HISTOGRAM (estimated bug count per file)")
|
|
372
|
+
print(f"{'═' * width}")
|
|
373
|
+
print(
|
|
374
|
+
f" Files: {n} Total estimated bugs: {total_bugs:.1f} "
|
|
375
|
+
f"Mean per file: {total_bugs / n:.2f}"
|
|
376
|
+
)
|
|
377
|
+
print()
|
|
378
|
+
ascii_histogram(buckets, labels, width=width)
|
|
379
|
+
print()
|
|
380
|
+
print(f"{'─' * width}")
|
|
381
|
+
print(" PERCENTILES (bugs estimate)")
|
|
382
|
+
print(f"{'─' * width}")
|
|
383
|
+
for pct in (50, 75, 90, 95):
|
|
384
|
+
label = {50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
|
|
385
|
+
print(f" {label:<8} {percentile(bugs_list, pct):>6.2f} bugs")
|
|
386
|
+
print(f" {'max':<8} {bugs_list[-1]:>6.2f} bugs")
|
|
387
|
+
|
|
388
|
+
if show_blame:
|
|
389
|
+
q1_b = percentile(bugs_list, 25)
|
|
390
|
+
q3_b = percentile(bugs_list, 75)
|
|
391
|
+
threshold = q3_b + 1.5 * (q3_b - q1_b)
|
|
392
|
+
_blame_hal(hal_files, threshold, blame_limit, width)
|
|
393
|
+
print(f"\n{'═' * width}")
|
codebase_stats/radon.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""Radon subprocess wrappers and data-parsing helpers for code quality metrics."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Cyclomatic Complexity rank thresholds (McCabe scale)
|
|
9
|
+
# A: 1–5 B: 6–10 C: 11–15 D: 16–20 E: 21–25 F: 26+
|
|
10
|
+
_CC_RISK = {
|
|
11
|
+
1: "A", 2: "A", 3: "A", 4: "A", 5: "A",
|
|
12
|
+
6: "B", 7: "B", 8: "B", 9: "B", 10: "B",
|
|
13
|
+
11: "C", 12: "C", 13: "C", 14: "C", 15: "C",
|
|
14
|
+
16: "D", 17: "D", 18: "D", 19: "D", 20: "D",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def cc_rank(cc: float) -> str:
|
|
19
|
+
"""Get the complexity rank (A-F) for a cyclomatic complexity value."""
|
|
20
|
+
key = min(int(cc), 20)
|
|
21
|
+
return _CC_RISK.get(key, "E" if cc <= 25 else "F")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def mi_rank(mi: float) -> str:
|
|
25
|
+
"""Get the maintainability rank (A-C) for a maintainability index."""
|
|
26
|
+
return "A" if mi >= 20 else "B" if mi >= 10 else "C"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# radon hal list-format index positions
|
|
30
|
+
_HAL_VOLUME = 7
|
|
31
|
+
_HAL_DIFFICULTY = 8
|
|
32
|
+
_HAL_EFFORT = 9
|
|
33
|
+
_HAL_BUGS = 11
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def run_radon_json(subcmd: str, root: str, extra_flags: list = None, debug: bool = False) -> dict:
|
|
37
|
+
"""Run `radon <subcmd> -j [flags] <root>`, return parsed JSON or {}.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
subcmd: Radon subcommand (cc, mi, raw, hal)
|
|
41
|
+
root: Root directory to analyze
|
|
42
|
+
extra_flags: Additional flags for the radon command
|
|
43
|
+
debug: Print debug information
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Parsed JSON output from radon or empty dict on failure
|
|
47
|
+
"""
|
|
48
|
+
cmd = [sys.executable, "-m", "radon", subcmd, "-j"] + (extra_flags or []) + [root]
|
|
49
|
+
try:
|
|
50
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
51
|
+
if result.returncode != 0:
|
|
52
|
+
print(f"⚠️ radon {subcmd} exited {result.returncode}: {result.stderr.strip()[:120]}")
|
|
53
|
+
return {}
|
|
54
|
+
parsed = json.loads(result.stdout)
|
|
55
|
+
if debug:
|
|
56
|
+
sample = list(parsed.items())[:2]
|
|
57
|
+
print(f" [debug] radon {subcmd}: {len(parsed)} entries")
|
|
58
|
+
for k, v in sample:
|
|
59
|
+
print(f" {k!r}: {str(v)[:120]}")
|
|
60
|
+
return parsed
|
|
61
|
+
except FileNotFoundError:
|
|
62
|
+
print("⚠️ radon not found — install with: pip install radon")
|
|
63
|
+
return {}
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print(f"⚠️ radon {subcmd} failed: {e}")
|
|
66
|
+
if debug:
|
|
67
|
+
import traceback
|
|
68
|
+
traceback.print_exc()
|
|
69
|
+
return {}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def run_radon(root: str, debug: bool = False) -> dict:
|
|
73
|
+
"""Get cyclomatic complexity: filepath → {avg, max, n_blocks, scores}.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
root: Root directory to analyze
|
|
77
|
+
debug: Print debug information
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Dictionary mapping filepaths to complexity metrics
|
|
81
|
+
"""
|
|
82
|
+
# --min A includes all ranks (default is C+ only, which misses most files)
|
|
83
|
+
raw = run_radon_json("cc", root, extra_flags=["--min", "A"], debug=debug)
|
|
84
|
+
out = {}
|
|
85
|
+
for filepath, blocks in raw.items():
|
|
86
|
+
if not blocks:
|
|
87
|
+
continue
|
|
88
|
+
scores = [b.get("complexity", 0) for b in blocks if isinstance(b, dict)]
|
|
89
|
+
if not scores:
|
|
90
|
+
continue
|
|
91
|
+
out[filepath] = {
|
|
92
|
+
"avg": sum(scores) / len(scores),
|
|
93
|
+
"max": max(scores),
|
|
94
|
+
"n_blocks": len(scores),
|
|
95
|
+
"scores": scores, # individual function CC values
|
|
96
|
+
}
|
|
97
|
+
return out
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def run_radon_mi(root: str, debug: bool = False) -> dict:
|
|
101
|
+
"""Get maintainability index: filepath → {mi: float, rank: str}.
|
|
102
|
+
|
|
103
|
+
radon mi -j emits either a bare float or a dict with an "mi" key
|
|
104
|
+
depending on version — handle both.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
root: Root directory to analyze
|
|
108
|
+
debug: Print debug information
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dictionary mapping filepaths to MI metrics
|
|
112
|
+
"""
|
|
113
|
+
raw = run_radon_json("mi", root, debug=debug)
|
|
114
|
+
out = {}
|
|
115
|
+
for fp, v in raw.items():
|
|
116
|
+
if isinstance(v, (int, float)):
|
|
117
|
+
mi_val = float(v)
|
|
118
|
+
elif isinstance(v, dict):
|
|
119
|
+
mi_val = float(v.get("mi", v.get("value", 0.0)))
|
|
120
|
+
else:
|
|
121
|
+
continue
|
|
122
|
+
out[fp] = {"mi": mi_val, "rank": mi_rank(mi_val)}
|
|
123
|
+
return out
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def run_radon_raw(root: str, debug: bool = False) -> dict:
|
|
127
|
+
"""Get raw metrics: filepath → {loc, sloc, comments, blank, comment_ratio}.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
root: Root directory to analyze
|
|
131
|
+
debug: Print debug information
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Dictionary mapping filepaths to raw metrics
|
|
135
|
+
"""
|
|
136
|
+
raw = run_radon_json("raw", root, debug=debug)
|
|
137
|
+
out = {}
|
|
138
|
+
for fp, v in raw.items():
|
|
139
|
+
if not isinstance(v, dict):
|
|
140
|
+
continue
|
|
141
|
+
sloc = v.get("sloc", 0)
|
|
142
|
+
comments = v.get("comments", 0) + v.get("multi", 0)
|
|
143
|
+
out[fp] = {
|
|
144
|
+
"loc": v.get("loc", 0),
|
|
145
|
+
"sloc": sloc,
|
|
146
|
+
"comments": comments,
|
|
147
|
+
"blank": v.get("blank", 0),
|
|
148
|
+
"comment_ratio": comments / sloc if sloc else 0.0,
|
|
149
|
+
}
|
|
150
|
+
return out
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _parse_hal_list(t: list) -> dict | None:
|
|
154
|
+
"""Parse radon hal list-format total into metrics dict, or None if empty."""
|
|
155
|
+
if len(t) < 12 or all(x == 0 for x in t):
|
|
156
|
+
return None
|
|
157
|
+
return {
|
|
158
|
+
"volume": float(t[_HAL_VOLUME] or 0),
|
|
159
|
+
"difficulty": float(t[_HAL_DIFFICULTY] or 0),
|
|
160
|
+
"effort": float(t[_HAL_EFFORT] or 0),
|
|
161
|
+
"bugs": float(t[_HAL_BUGS] or 0),
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _parse_hal_dict(t: dict) -> dict:
|
|
166
|
+
"""Parse radon hal dict-format total into metrics dict."""
|
|
167
|
+
return {
|
|
168
|
+
"volume": float(t.get("volume", 0) or 0),
|
|
169
|
+
"difficulty": float(t.get("difficulty", 0) or 0),
|
|
170
|
+
"effort": float(t.get("effort", 0) or 0),
|
|
171
|
+
"bugs": float(t.get("bugs") or 0.0),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _parse_hal_entry(v: dict) -> dict | None:
|
|
176
|
+
"""Parse a single radon hal JSON entry into {volume,difficulty,effort,bugs}.
|
|
177
|
+
|
|
178
|
+
radon hal -j may emit either a list or dict for the 'total' key depending
|
|
179
|
+
on the radon version. Returns None for empty/invalid entries.
|
|
180
|
+
"""
|
|
181
|
+
t = v.get("total", v)
|
|
182
|
+
try:
|
|
183
|
+
if isinstance(t, list):
|
|
184
|
+
return _parse_hal_list(t)
|
|
185
|
+
if isinstance(t, dict):
|
|
186
|
+
return _parse_hal_dict(t)
|
|
187
|
+
except (IndexError, TypeError, ValueError):
|
|
188
|
+
pass
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _run_hal_chunk(chunk: list, chunk_idx: int, debug: bool) -> dict:
|
|
193
|
+
"""Run one radon hal -j chunk and return its parsed JSON (or {})."""
|
|
194
|
+
try:
|
|
195
|
+
result = subprocess.run(
|
|
196
|
+
[sys.executable, "-m", "radon", "hal", "-j"] + chunk,
|
|
197
|
+
capture_output=True,
|
|
198
|
+
text=True,
|
|
199
|
+
timeout=300,
|
|
200
|
+
)
|
|
201
|
+
if result.returncode != 0:
|
|
202
|
+
if debug:
|
|
203
|
+
print(
|
|
204
|
+
f" [debug] radon hal chunk rc={result.returncode}: "
|
|
205
|
+
f"{result.stderr.strip()[:80]}"
|
|
206
|
+
)
|
|
207
|
+
return {}
|
|
208
|
+
if not result.stdout.strip():
|
|
209
|
+
if debug:
|
|
210
|
+
print(f" [debug] radon hal chunk: empty stdout for files {chunk[:2]}…")
|
|
211
|
+
return {}
|
|
212
|
+
return json.loads(result.stdout)
|
|
213
|
+
except json.JSONDecodeError as e:
|
|
214
|
+
print(f"⚠️ radon hal: JSON parse error in chunk {chunk_idx}: {e}")
|
|
215
|
+
except Exception as e:
|
|
216
|
+
print(f"⚠️ radon hal chunk failed: {e}")
|
|
217
|
+
return {}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _collect_hal_raw(py_files: list, debug: bool) -> dict:
|
|
221
|
+
"""Run radon hal in chunks and collect raw results."""
|
|
222
|
+
raw: dict = {}
|
|
223
|
+
for i in range(0, len(py_files), 200):
|
|
224
|
+
raw.update(_run_hal_chunk(py_files[i : i + 200], i // 200, debug))
|
|
225
|
+
if debug:
|
|
226
|
+
print(f" [debug] radon hal: {len(raw)} entries from {len(py_files)} files")
|
|
227
|
+
for k, v in list(raw.items())[:2]:
|
|
228
|
+
print(f" {k!r}: {str(v)[:120]}")
|
|
229
|
+
return raw
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def run_radon_hal(root: str, debug: bool = False) -> dict:
|
|
233
|
+
"""Get Halstead metrics: filepath → {volume, difficulty, effort, bugs}.
|
|
234
|
+
|
|
235
|
+
radon hal -j does not reliably support directory scanning across versions,
|
|
236
|
+
so always use an explicit file list split into chunks of 200.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
root: Root directory to analyze
|
|
240
|
+
debug: Print debug information
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Dictionary mapping filepaths to Halstead metrics
|
|
244
|
+
"""
|
|
245
|
+
root_path = Path(root).resolve()
|
|
246
|
+
if not root_path.exists():
|
|
247
|
+
print(f"⚠️ radon hal: path does not exist: {root_path}")
|
|
248
|
+
return {}
|
|
249
|
+
py_files = sorted(
|
|
250
|
+
str(p) for p in root_path.rglob("*.py")
|
|
251
|
+
if not any(part.startswith(".") for part in p.parts)
|
|
252
|
+
)
|
|
253
|
+
if not py_files:
|
|
254
|
+
print(f"⚠️ radon hal: no .py files found under {root_path}")
|
|
255
|
+
return {}
|
|
256
|
+
raw = _collect_hal_raw(py_files, debug)
|
|
257
|
+
out = {}
|
|
258
|
+
for fp, v in raw.items():
|
|
259
|
+
if not isinstance(v, dict):
|
|
260
|
+
continue
|
|
261
|
+
entry = _parse_hal_entry(v)
|
|
262
|
+
if entry is not None:
|
|
263
|
+
out[fp] = entry
|
|
264
|
+
return out
|