codebase-stats 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,393 @@
1
+ """Code quality metrics analysis (complexity, maintainability, etc.)."""
2
+
3
+ import bisect
4
+ from .utils import percentile, format_line_ranges, ascii_histogram, blame_header
5
+ from .radon import ( # re-export so callers can still import from here
6
+ cc_rank,
7
+ mi_rank,
8
+ run_radon_json,
9
+ run_radon,
10
+ run_radon_mi,
11
+ run_radon_raw,
12
+ run_radon_hal,
13
+ _HAL_VOLUME,
14
+ _HAL_DIFFICULTY,
15
+ _HAL_EFFORT,
16
+ _HAL_BUGS,
17
+ _parse_hal_list,
18
+ _parse_hal_dict,
19
+ _parse_hal_entry,
20
+ _run_hal_chunk,
21
+ _collect_hal_raw,
22
+ )
23
+
24
+
25
+ def _float_buckets(sorted_values: list, n_bins: int) -> tuple:
26
+ """Build histogram edges and bucket counts for a sorted list of floats.
27
+
28
+ Returns (edges, buckets) wherOverflow values land in the last bucket.
29
+ """
30
+ maximum = sorted_values[-1]
31
+ step = max((maximum + 0.001) / n_bins, 0.001)
32
+ edges = [i * step for i in range(n_bins + 1)]
33
+ buckets = [0] * n_bins
34
+ for v in sorted_values:
35
+ i = min(bisect.bisect_right(edges, v) - 1, n_bins - 1)
36
+ buckets[max(0, i)] += 1
37
+ return edges, buckets
38
+
39
+
40
+ def _cc_int_buckets(scores: list, cutoff: int) -> tuple:
41
+ """Bucket integer CC scores into per-value bars up to cutoff, then a tail.
42
+
43
+ Returns (buckets, has_tail). has_tail is True when max(scores) > cutoff.
44
+ """
45
+ has_tail = int(scores[-1]) > cutoff
46
+ buckets = [0] * (cutoff + (1 if has_tail else 0))
47
+ for v in scores:
48
+ iv = int(v)
49
+ buckets[iv - 1 if iv <= cutoff else -1] += 1
50
+ return buckets, has_tail
51
+
52
+
53
+ def _blame_cc(cc_files: list, blame_limit: int, width: int) -> None:
54
+ """Print CC blame: files with avg CC above outlier threshold AND coverage < 80%."""
55
+ file_avgs = sorted(f[0] for f in cc_files)
56
+ q1_fa = percentile(file_avgs, 25)
57
+ q3_fa = percentile(file_avgs, 75)
58
+ threshold = q3_fa + 1.5 * (q3_fa - q1_fa)
59
+ risky = sorted(
60
+ [f for f in cc_files if f[0] > threshold and f[2] < 80.0],
61
+ key=lambda x: x[0] * (100 - x[2]),
62
+ reverse=True,
63
+ )
64
+ blame_header(
65
+ f"files with avg CC > {threshold:.1f} AND coverage < 80%",
66
+ len(risky),
67
+ blame_limit,
68
+ width,
69
+ )
70
+ display = risky if not blame_limit else risky[:blame_limit]
71
+ if display:
72
+ print(f" {'avg CC':<8} {'max CC':<8} {'Coverage':<10} File")
73
+ print(f" {'─' * 6} {'─' * 6} {'─' * 8} {'─' * 50}")
74
+ for cc_avg, cc_max, cov, path, missing_lines in display:
75
+ print(
76
+ f" {cc_avg:>5.1f}{cc_rank(cc_avg)} "
77
+ f"{cc_max:>5}{cc_rank(cc_max)} {cov:>6.1f}% {path}"
78
+ )
79
+ ranges = format_line_ranges(missing_lines)
80
+ if ranges:
81
+ print(f" {' ' * 16}📍 {ranges}")
82
+ else:
83
+ print(" ✅ No high-complexity low-coverage files.")
84
+
85
+
86
+ def _blame_mi(mi_files: list, q1_mi: float, blame_limit: int, width: int) -> None:
87
+ """Print MI blame: files below Q1 maintainability index."""
88
+ blamed = sorted(
89
+ [f for f in mi_files if f[0] < q1_mi],
90
+ key=lambda x: x[0],
91
+ )
92
+ blame_header(f"MI below Q1 ({q1_mi:.1f})", len(blamed), blame_limit, width)
93
+ display = blamed if not blame_limit else blamed[:blame_limit]
94
+ if display:
95
+ print(f" {'MI':<7} {'Coverage':<10} File")
96
+ print(f" {'─' * 5} {'─' * 8} {'─' * 50}")
97
+ for mi, cov, path, missing_lines in display:
98
+ print(f" {mi:>5.1f}{mi_rank(mi)} {cov:>6.1f}% {path}")
99
+ ranges = format_line_ranges(missing_lines)
100
+ if ranges:
101
+ print(f" {' ' * 8}📍 {ranges}")
102
+ else:
103
+ print(" ✅ All files above Q1 MI.")
104
+
105
+
106
+ def _blame_raw(raw_files: list, q1_ratio: float, blame_limit: int, width: int) -> None:
107
+ """Print raw blame: files below Q1 comment ratio."""
108
+ blamed = sorted(
109
+ [f for f in raw_files if f[0] < q1_ratio],
110
+ key=lambda x: x[0],
111
+ )
112
+ blame_header(
113
+ f"comment ratio below Q1 ({q1_ratio * 100:.1f}%)", len(blamed), blame_limit, width
114
+ )
115
+ display = blamed if not blame_limit else blamed[:blame_limit]
116
+ if display:
117
+ print(f" {'Comment%':<10} {'SLOC':<7} {'Coverage':<10} File")
118
+ print(f" {'─' * 8} {'─' * 5} {'─' * 8} {'─' * 50}")
119
+ for ratio, sloc, cov, path, missing_lines in display:
120
+ print(f" {ratio * 100:>6.1f}% {sloc:>5} {cov:>6.1f}% {path}")
121
+ ranges = format_line_ranges(missing_lines)
122
+ if ranges:
123
+ print(f" {' ' * 16}📍 {ranges}")
124
+ else:
125
+ print(" ✅ All files above Q1 comment ratio.")
126
+
127
+
128
+ def _blame_hal(hal_files: list, threshold: float, blame_limit: int, width: int) -> None:
129
+ """Print Halstead blame: files above Q3+1.5×IQR bug estimate."""
130
+ blamed = sorted(
131
+ [f for f in hal_files if f[0] > threshold],
132
+ key=lambda x: x[0],
133
+ reverse=True,
134
+ )
135
+ blame_header(
136
+ f"Halstead bug outliers Q3+1.5×IQR > {threshold:.2f} bugs",
137
+ len(blamed),
138
+ blame_limit,
139
+ width,
140
+ )
141
+ display = blamed if not blame_limit else blamed[:blame_limit]
142
+ if display:
143
+ print(f" {'Bugs':<8} {'Difficulty':<12} {'Coverage':<10} File")
144
+ print(f" {'─' * 6} {'─' * 10} {'─' * 8} {'─' * 50}")
145
+ for bugs, diff, cov, path, missing_lines in display:
146
+ print(f" {bugs:>6.2f} {diff:>8.1f} {cov:>6.1f}% {path}")
147
+ ranges = format_line_ranges(missing_lines)
148
+ if ranges:
149
+ print(f" {' ' * 18}📍 {ranges}")
150
+ else:
151
+ print(" ✅ No Halstead bug outliers.")
152
+
153
+
154
+ def _cc_rank_summary_line(all_scores: list) -> str:
155
+ """Build rank distribution summary string (e.g. 'A:15 B:8 C:3')."""
156
+ rank_counts: dict[str, int] = {}
157
+ for s in all_scores:
158
+ r = cc_rank(s)
159
+ rank_counts[r] = rank_counts.get(r, 0) + 1
160
+ return " ".join(
161
+ f"{r}:{rank_counts[r]}" for r in ("A", "B", "C", "D", "E", "F") if r in rank_counts
162
+ )
163
+
164
+
165
+ def _print_cc_percentile_table(all_scores: list, maximum, width: int) -> None:
166
+ """Print the percentile table for CC scores."""
167
+ print(f"{'─' * width}")
168
+ print(" PERCENTILES (function-level)")
169
+ print(f"{'─' * width}")
170
+ for pct in (25, 50, 75, 90, 95, 99):
171
+ label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
172
+ val = percentile(all_scores, pct)
173
+ print(f" {label:<8} {val:>4} ({cc_rank(val)})")
174
+ print(f" {'max':<8} {maximum:>4} ({cc_rank(maximum)})")
175
+
176
+
177
+ def _print_mi_percentile_table(mis: list, width: int) -> None:
178
+ """Print the percentile table for MI scores."""
179
+ print(f"{'─' * width}")
180
+ print(" PERCENTILES")
181
+ print(f"{'─' * width}")
182
+ for pct in (25, 50, 75, 90):
183
+ label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
184
+ val = percentile(mis, pct)
185
+ print(f" {label:<8} {val:>5.1f} ({mi_rank(val)})")
186
+ print(f" {'min':<8} {mis[0]:>5.1f} ({mi_rank(mis[0])})")
187
+
188
+
189
+ def show_complexity_histogram(
190
+ stats: dict,
191
+ bins: int = 10,
192
+ blame_limit: int = 20,
193
+ show_blame: bool = True,
194
+ width: int = 80,
195
+ ):
196
+ """Display cyclomatic complexity histogram and high-complexity blame."""
197
+ cc_scores = stats.get("cc_scores", []) # (cc_value, filepath) pairs
198
+ if not cc_scores:
199
+ print("⚠️ No CC data — run with --radon-root <dir>")
200
+ return
201
+
202
+ all_scores = sorted(v for v, _ in cc_scores)
203
+ cc_files = [
204
+ (f["cc_avg"], f["cc_max"], f["pct"], f["path"], f["missing_lines"])
205
+ for f in stats["file_stats"]
206
+ if f.get("cc_avg") is not None
207
+ ]
208
+ n_funcs = len(cc_scores)
209
+ n_files = len(cc_files)
210
+ mean_cc = sum(all_scores) / n_funcs
211
+ maximum = all_scores[-1]
212
+
213
+ # Integer bins: each CC value 1–N gets its own bucket, tail groups tail.
214
+ p99_cc = int(percentile(all_scores, 99))
215
+ cutoff = min(p99_cc, 25, int(maximum))
216
+ int_vals = list(range(1, cutoff + 1))
217
+ buckets, has_tail = _cc_int_buckets(all_scores, cutoff)
218
+
219
+ labels = [f"CC {i:<2}" for i in int_vals]
220
+ suffixes = [cc_rank(i) for i in int_vals]
221
+ if has_tail:
222
+ labels.append(f"CC {cutoff + 1}+")
223
+ suffixes.append(cc_rank(cutoff + 1))
224
+
225
+ rank_summary = _cc_rank_summary_line(all_scores)
226
+
227
+ print(f"\n{'═' * width}")
228
+ print(" CYCLOMATIC COMPLEXITY HISTOGRAM (per function / method)")
229
+ print(f"{'═' * width}")
230
+ print(
231
+ f" Functions: {n_funcs} Files: {n_files} Mean CC: {mean_cc:.1f} "
232
+ f"Ranks: A=1-5 B=6-10 C=11-15 D=16-20 E/F=21+"
233
+ )
234
+ print(f" Rank distribution: {rank_summary}")
235
+ print()
236
+ ascii_histogram(buckets, labels, suffixes=suffixes, width=width)
237
+ print()
238
+ _print_cc_percentile_table(all_scores, maximum, width)
239
+
240
+ if show_blame:
241
+ _blame_cc(cc_files, blame_limit, width)
242
+ print(f"\n{'═' * width}")
243
+
244
+
245
+ def show_mi_histogram(
246
+ stats: dict, bins: int = 10, blame_limit: int = 20, show_blame: bool = True, width: int = 80
247
+ ):
248
+ """Display maintainability index histogram and low-MI blame."""
249
+ mi_files = [
250
+ (f["mi"], f["pct"], f["path"], f["missing_lines"])
251
+ for f in stats["file_stats"]
252
+ if f.get("mi") is not None
253
+ ]
254
+ if not mi_files:
255
+ print("⚠️ No MI data — run with --radon-root <dir>")
256
+ return
257
+
258
+ mis = sorted(f[0] for f in mi_files)
259
+ n = len(mis)
260
+ mean = sum(mis) / n
261
+
262
+ # MI range is 0–100; use a fixed scale for consistent cross-run comparison.
263
+ step = max((100.0 + 0.001) / bins, 0.1)
264
+ edges = [i * step for i in range(bins + 1)]
265
+ buckets = [0] * bins
266
+ for v in mis:
267
+ i = min(bisect.bisect_right(edges, v) - 1, bins - 1)
268
+ buckets[max(0, i)] += 1
269
+
270
+ labels = [f"{edges[i]:>5.1f}–{edges[i + 1]:<5.1f} MI" for i in range(bins)]
271
+ suffixes = [mi_rank((edges[i] + edges[i + 1]) / 2) for i in range(bins)]
272
+
273
+ print(f"\n{'═' * width}")
274
+ print(" MAINTAINABILITY INDEX HISTOGRAM (higher = more maintainable)")
275
+ print(f"{'═' * width}")
276
+ print(f" Files: {n} Mean MI: {mean:.1f} Ranks: A=20-100 B=10-19 C=0-9")
277
+ print()
278
+ ascii_histogram(buckets, labels, suffixes=suffixes, width=width)
279
+ print()
280
+ _print_mi_percentile_table(mis, width)
281
+
282
+ if show_blame:
283
+ q1_mi = percentile(mis, 25)
284
+ _blame_mi(mi_files, q1_mi, blame_limit, width)
285
+ print(f"\n{'═' * width}")
286
+
287
+
288
+ def show_raw_histogram(
289
+ stats: dict, bins: int = 10, blame_limit: int = 20, show_blame: bool = True, width: int = 80
290
+ ):
291
+ """Display comment ratio histogram and under-documented blame.
292
+
293
+ Args:
294
+ stats: Precomputed statistics from precompute_coverage_stats()
295
+ bins: Number of histogram bins
296
+ blame_limit: Maximum blamed files to display
297
+ show_blame: Whether to show quality blame sections
298
+ width: Line width for output
299
+ """
300
+ raw_files = [
301
+ (f["comment_ratio"], f["sloc"], f["pct"], f["path"], f["missing_lines"])
302
+ for f in stats["file_stats"]
303
+ if f.get("comment_ratio") is not None
304
+ ]
305
+ if not raw_files:
306
+ print("⚠️ No raw metrics data — run with --radon-root <dir>")
307
+ return
308
+
309
+ ratios = sorted(f[0] for f in raw_files)
310
+ n = len(ratios)
311
+ mean = sum(ratios) / n
312
+
313
+ edges, buckets = _float_buckets(ratios, bins)
314
+ labels = [f"{edges[i] * 100:>4.0f}–{edges[i + 1] * 100:<4.0f}% comment" for i in range(bins)]
315
+
316
+ print(f"\n{'═' * width}")
317
+ print(" COMMENT RATIO HISTOGRAM (comments + docstrings / SLOC)")
318
+ print(f"{'═' * width}")
319
+ total_sloc = sum(f[1] for f in raw_files)
320
+ total_comments = sum(int(f[0] * f[1]) for f in raw_files)
321
+ print(
322
+ f" Files: {n} Total SLOC: {total_sloc:,} "
323
+ f"Overall comment ratio: {total_comments / total_sloc * 100:.1f}%"
324
+ )
325
+ print()
326
+ ascii_histogram(buckets, labels, width=width)
327
+ print()
328
+ print(f"{'─' * width}")
329
+ print(" PERCENTILES")
330
+ print(f"{'─' * width}")
331
+ for pct in (25, 50, 75, 90):
332
+ label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
333
+ print(f" {label:<8} {percentile(ratios, pct) * 100:>5.1f}%")
334
+ print(f" {'mean':<8} {mean * 100:>5.1f}%")
335
+
336
+ if show_blame:
337
+ q1_ratio = percentile(ratios, 25)
338
+ _blame_raw(raw_files, q1_ratio, blame_limit, width)
339
+ print(f"\n{'═' * width}")
340
+
341
+
342
+ def show_hal_histogram(
343
+ stats: dict, bins: int = 10, blame_limit: int = 20, show_blame: bool = True, width: int = 80
344
+ ):
345
+ """Display Halstead metrics histogram and bug-prone file blame.
346
+
347
+ Args:
348
+ stats: Precomputed statistics from precompute_coverage_stats()
349
+ bins: Number of histogram bins
350
+ blame_limit: Maximum blamed files to display
351
+ show_blame: Whether to show quality blame sections
352
+ width: Line width for output
353
+ """
354
+ hal_files = [
355
+ (f["hal_bugs"], f["hal_difficulty"], f["pct"], f["path"], f["missing_lines"])
356
+ for f in stats["file_stats"]
357
+ if f.get("hal_bugs") is not None
358
+ ]
359
+ if not hal_files:
360
+ print("⚠️ No Halstead data — run with --radon-root <dir>")
361
+ return
362
+
363
+ bugs_list = sorted(f[0] for f in hal_files)
364
+ n = len(bugs_list)
365
+ total_bugs = sum(bugs_list)
366
+
367
+ edges, buckets = _float_buckets(bugs_list, bins)
368
+ labels = [f"{edges[i]:>5.2f}–{edges[i + 1]:<5.2f} bugs" for i in range(bins)]
369
+
370
+ print(f"\n{'═' * width}")
371
+ print(" HALSTEAD METRICS HISTOGRAM (estimated bug count per file)")
372
+ print(f"{'═' * width}")
373
+ print(
374
+ f" Files: {n} Total estimated bugs: {total_bugs:.1f} "
375
+ f"Mean per file: {total_bugs / n:.2f}"
376
+ )
377
+ print()
378
+ ascii_histogram(buckets, labels, width=width)
379
+ print()
380
+ print(f"{'─' * width}")
381
+ print(" PERCENTILES (bugs estimate)")
382
+ print(f"{'─' * width}")
383
+ for pct in (50, 75, 90, 95):
384
+ label = {50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
385
+ print(f" {label:<8} {percentile(bugs_list, pct):>6.2f} bugs")
386
+ print(f" {'max':<8} {bugs_list[-1]:>6.2f} bugs")
387
+
388
+ if show_blame:
389
+ q1_b = percentile(bugs_list, 25)
390
+ q3_b = percentile(bugs_list, 75)
391
+ threshold = q3_b + 1.5 * (q3_b - q1_b)
392
+ _blame_hal(hal_files, threshold, blame_limit, width)
393
+ print(f"\n{'═' * width}")
@@ -0,0 +1,264 @@
1
+ """Radon subprocess wrappers and data-parsing helpers for code quality metrics."""
2
+
3
+ import json
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ # Cyclomatic Complexity rank thresholds (McCabe scale)
9
+ # A: 1–5 B: 6–10 C: 11–15 D: 16–20 E: 21–25 F: 26+
10
+ _CC_RISK = {
11
+ 1: "A", 2: "A", 3: "A", 4: "A", 5: "A",
12
+ 6: "B", 7: "B", 8: "B", 9: "B", 10: "B",
13
+ 11: "C", 12: "C", 13: "C", 14: "C", 15: "C",
14
+ 16: "D", 17: "D", 18: "D", 19: "D", 20: "D",
15
+ }
16
+
17
+
18
+ def cc_rank(cc: float) -> str:
19
+ """Get the complexity rank (A-F) for a cyclomatic complexity value."""
20
+ key = min(int(cc), 20)
21
+ return _CC_RISK.get(key, "E" if cc <= 25 else "F")
22
+
23
+
24
+ def mi_rank(mi: float) -> str:
25
+ """Get the maintainability rank (A-C) for a maintainability index."""
26
+ return "A" if mi >= 20 else "B" if mi >= 10 else "C"
27
+
28
+
29
+ # radon hal list-format index positions
30
+ _HAL_VOLUME = 7
31
+ _HAL_DIFFICULTY = 8
32
+ _HAL_EFFORT = 9
33
+ _HAL_BUGS = 11
34
+
35
+
36
+ def run_radon_json(subcmd: str, root: str, extra_flags: list = None, debug: bool = False) -> dict:
37
+ """Run `radon <subcmd> -j [flags] <root>`, return parsed JSON or {}.
38
+
39
+ Args:
40
+ subcmd: Radon subcommand (cc, mi, raw, hal)
41
+ root: Root directory to analyze
42
+ extra_flags: Additional flags for the radon command
43
+ debug: Print debug information
44
+
45
+ Returns:
46
+ Parsed JSON output from radon or empty dict on failure
47
+ """
48
+ cmd = [sys.executable, "-m", "radon", subcmd, "-j"] + (extra_flags or []) + [root]
49
+ try:
50
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
51
+ if result.returncode != 0:
52
+ print(f"⚠️ radon {subcmd} exited {result.returncode}: {result.stderr.strip()[:120]}")
53
+ return {}
54
+ parsed = json.loads(result.stdout)
55
+ if debug:
56
+ sample = list(parsed.items())[:2]
57
+ print(f" [debug] radon {subcmd}: {len(parsed)} entries")
58
+ for k, v in sample:
59
+ print(f" {k!r}: {str(v)[:120]}")
60
+ return parsed
61
+ except FileNotFoundError:
62
+ print("⚠️ radon not found — install with: pip install radon")
63
+ return {}
64
+ except Exception as e:
65
+ print(f"⚠️ radon {subcmd} failed: {e}")
66
+ if debug:
67
+ import traceback
68
+ traceback.print_exc()
69
+ return {}
70
+
71
+
72
+ def run_radon(root: str, debug: bool = False) -> dict:
73
+ """Get cyclomatic complexity: filepath → {avg, max, n_blocks, scores}.
74
+
75
+ Args:
76
+ root: Root directory to analyze
77
+ debug: Print debug information
78
+
79
+ Returns:
80
+ Dictionary mapping filepaths to complexity metrics
81
+ """
82
+ # --min A includes all ranks (default is C+ only, which misses most files)
83
+ raw = run_radon_json("cc", root, extra_flags=["--min", "A"], debug=debug)
84
+ out = {}
85
+ for filepath, blocks in raw.items():
86
+ if not blocks:
87
+ continue
88
+ scores = [b.get("complexity", 0) for b in blocks if isinstance(b, dict)]
89
+ if not scores:
90
+ continue
91
+ out[filepath] = {
92
+ "avg": sum(scores) / len(scores),
93
+ "max": max(scores),
94
+ "n_blocks": len(scores),
95
+ "scores": scores, # individual function CC values
96
+ }
97
+ return out
98
+
99
+
100
+ def run_radon_mi(root: str, debug: bool = False) -> dict:
101
+ """Get maintainability index: filepath → {mi: float, rank: str}.
102
+
103
+ radon mi -j emits either a bare float or a dict with an "mi" key
104
+ depending on version — handle both.
105
+
106
+ Args:
107
+ root: Root directory to analyze
108
+ debug: Print debug information
109
+
110
+ Returns:
111
+ Dictionary mapping filepaths to MI metrics
112
+ """
113
+ raw = run_radon_json("mi", root, debug=debug)
114
+ out = {}
115
+ for fp, v in raw.items():
116
+ if isinstance(v, (int, float)):
117
+ mi_val = float(v)
118
+ elif isinstance(v, dict):
119
+ mi_val = float(v.get("mi", v.get("value", 0.0)))
120
+ else:
121
+ continue
122
+ out[fp] = {"mi": mi_val, "rank": mi_rank(mi_val)}
123
+ return out
124
+
125
+
126
+ def run_radon_raw(root: str, debug: bool = False) -> dict:
127
+ """Get raw metrics: filepath → {loc, sloc, comments, blank, comment_ratio}.
128
+
129
+ Args:
130
+ root: Root directory to analyze
131
+ debug: Print debug information
132
+
133
+ Returns:
134
+ Dictionary mapping filepaths to raw metrics
135
+ """
136
+ raw = run_radon_json("raw", root, debug=debug)
137
+ out = {}
138
+ for fp, v in raw.items():
139
+ if not isinstance(v, dict):
140
+ continue
141
+ sloc = v.get("sloc", 0)
142
+ comments = v.get("comments", 0) + v.get("multi", 0)
143
+ out[fp] = {
144
+ "loc": v.get("loc", 0),
145
+ "sloc": sloc,
146
+ "comments": comments,
147
+ "blank": v.get("blank", 0),
148
+ "comment_ratio": comments / sloc if sloc else 0.0,
149
+ }
150
+ return out
151
+
152
+
153
+ def _parse_hal_list(t: list) -> dict | None:
154
+ """Parse radon hal list-format total into metrics dict, or None if empty."""
155
+ if len(t) < 12 or all(x == 0 for x in t):
156
+ return None
157
+ return {
158
+ "volume": float(t[_HAL_VOLUME] or 0),
159
+ "difficulty": float(t[_HAL_DIFFICULTY] or 0),
160
+ "effort": float(t[_HAL_EFFORT] or 0),
161
+ "bugs": float(t[_HAL_BUGS] or 0),
162
+ }
163
+
164
+
165
+ def _parse_hal_dict(t: dict) -> dict:
166
+ """Parse radon hal dict-format total into metrics dict."""
167
+ return {
168
+ "volume": float(t.get("volume", 0) or 0),
169
+ "difficulty": float(t.get("difficulty", 0) or 0),
170
+ "effort": float(t.get("effort", 0) or 0),
171
+ "bugs": float(t.get("bugs") or 0.0),
172
+ }
173
+
174
+
175
+ def _parse_hal_entry(v: dict) -> dict | None:
176
+ """Parse a single radon hal JSON entry into {volume,difficulty,effort,bugs}.
177
+
178
+ radon hal -j may emit either a list or dict for the 'total' key depending
179
+ on the radon version. Returns None for empty/invalid entries.
180
+ """
181
+ t = v.get("total", v)
182
+ try:
183
+ if isinstance(t, list):
184
+ return _parse_hal_list(t)
185
+ if isinstance(t, dict):
186
+ return _parse_hal_dict(t)
187
+ except (IndexError, TypeError, ValueError):
188
+ pass
189
+ return None
190
+
191
+
192
+ def _run_hal_chunk(chunk: list, chunk_idx: int, debug: bool) -> dict:
193
+ """Run one radon hal -j chunk and return its parsed JSON (or {})."""
194
+ try:
195
+ result = subprocess.run(
196
+ [sys.executable, "-m", "radon", "hal", "-j"] + chunk,
197
+ capture_output=True,
198
+ text=True,
199
+ timeout=300,
200
+ )
201
+ if result.returncode != 0:
202
+ if debug:
203
+ print(
204
+ f" [debug] radon hal chunk rc={result.returncode}: "
205
+ f"{result.stderr.strip()[:80]}"
206
+ )
207
+ return {}
208
+ if not result.stdout.strip():
209
+ if debug:
210
+ print(f" [debug] radon hal chunk: empty stdout for files {chunk[:2]}…")
211
+ return {}
212
+ return json.loads(result.stdout)
213
+ except json.JSONDecodeError as e:
214
+ print(f"⚠️ radon hal: JSON parse error in chunk {chunk_idx}: {e}")
215
+ except Exception as e:
216
+ print(f"⚠️ radon hal chunk failed: {e}")
217
+ return {}
218
+
219
+
220
+ def _collect_hal_raw(py_files: list, debug: bool) -> dict:
221
+ """Run radon hal in chunks and collect raw results."""
222
+ raw: dict = {}
223
+ for i in range(0, len(py_files), 200):
224
+ raw.update(_run_hal_chunk(py_files[i : i + 200], i // 200, debug))
225
+ if debug:
226
+ print(f" [debug] radon hal: {len(raw)} entries from {len(py_files)} files")
227
+ for k, v in list(raw.items())[:2]:
228
+ print(f" {k!r}: {str(v)[:120]}")
229
+ return raw
230
+
231
+
232
+ def run_radon_hal(root: str, debug: bool = False) -> dict:
233
+ """Get Halstead metrics: filepath → {volume, difficulty, effort, bugs}.
234
+
235
+ radon hal -j does not reliably support directory scanning across versions,
236
+ so always use an explicit file list split into chunks of 200.
237
+
238
+ Args:
239
+ root: Root directory to analyze
240
+ debug: Print debug information
241
+
242
+ Returns:
243
+ Dictionary mapping filepaths to Halstead metrics
244
+ """
245
+ root_path = Path(root).resolve()
246
+ if not root_path.exists():
247
+ print(f"⚠️ radon hal: path does not exist: {root_path}")
248
+ return {}
249
+ py_files = sorted(
250
+ str(p) for p in root_path.rglob("*.py")
251
+ if not any(part.startswith(".") for part in p.parts)
252
+ )
253
+ if not py_files:
254
+ print(f"⚠️ radon hal: no .py files found under {root_path}")
255
+ return {}
256
+ raw = _collect_hal_raw(py_files, debug)
257
+ out = {}
258
+ for fp, v in raw.items():
259
+ if not isinstance(v, dict):
260
+ continue
261
+ entry = _parse_hal_entry(v)
262
+ if entry is not None:
263
+ out[fp] = entry
264
+ return out