codebase-stats 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,283 @@
1
+ """CodebaseStatsReporter — Programmatic report generation for code metrics."""
2
+
3
+ import sys
4
+ from io import StringIO
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from .core import (
10
+ load_coverage,
11
+ load_report,
12
+ precompute_coverage_stats,
13
+ )
14
+ from .coverage import scan_pragma_counts
15
+ from .metrics import (
16
+ run_radon,
17
+ run_radon_mi,
18
+ run_radon_raw,
19
+ run_radon_hal,
20
+ )
21
+ from .lowcov import parse_sorts
22
+ from .coverage import show_coverage_histogram
23
+ from .duration import show_duration_histogram
24
+ from .sizes import show_file_size_distribution
25
+ from .metrics import (
26
+ show_complexity_histogram,
27
+ show_mi_histogram,
28
+ show_raw_histogram,
29
+ show_hal_histogram,
30
+ )
31
+ from .tree import analyze_tree
32
+ from .lowcov import show_low_coverage
33
+
34
+
35
+ class CodebaseStatsReporter:
36
+ """Generate comprehensive code quality reports."""
37
+
38
+ def __init__(
39
+ self,
40
+ coverage_file: str = "coverage.json",
41
+ report_file: Optional[str] = None,
42
+ radon_root: Optional[str] = None,
43
+ fs_root: str = ".",
44
+ tree_root: Optional[str] = None,
45
+ ):
46
+ """
47
+ Initialize the reporter.
48
+
49
+ Args:
50
+ coverage_file: Path to coverage.json file
51
+ report_file: Path to pytest-json-report file
52
+ radon_root: Root directory for radon analysis (CC, MI, etc.)
53
+ fs_root: Root directory for file size analysis
54
+ tree_root: Root directory for tree analysis
55
+ """
56
+ self.coverage_file = coverage_file
57
+ self.report_file = report_file
58
+ self.radon_root = radon_root
59
+ self.fs_root = fs_root
60
+ self.tree_root = tree_root
61
+ self.output = StringIO()
62
+
63
+ def _capture_output(self, func, *args, **kwargs):
64
+ """Capture stdout from a function call."""
65
+ old_stdout = sys.stdout
66
+ sys.stdout = self.output
67
+ try:
68
+ func(*args, **kwargs)
69
+ finally:
70
+ sys.stdout = old_stdout
71
+
72
+ def _run_radon_analyses(self, debug: bool = False) -> tuple[dict, dict, dict, dict]:
73
+ """Run all radon analyses in parallel."""
74
+ runners = {
75
+ "cc": lambda: run_radon(self.radon_root, debug=debug),
76
+ "mi": lambda: run_radon_mi(self.radon_root, debug=debug),
77
+ "raw": lambda: run_radon_raw(self.radon_root, debug=debug),
78
+ "hal": lambda: run_radon_hal(self.radon_root, debug=debug),
79
+ }
80
+ results = {key: {} for key in runners}
81
+ with ThreadPoolExecutor(max_workers=4) as pool:
82
+ futures = {pool.submit(fn): key for key, fn in runners.items()}
83
+ for future in as_completed(futures):
84
+ key = futures[future]
85
+ try:
86
+ results[key] = future.result()
87
+ except Exception as e:
88
+ print(f"⚠️ radon {key} failed: {e}")
89
+ return (
90
+ results["cc"],
91
+ results["mi"],
92
+ results["raw"],
93
+ results["hal"],
94
+ )
95
+
96
+ def generate_full_report(
97
+ self,
98
+ bins: int = 10,
99
+ blame_limit: int = 20,
100
+ include_coverage: bool = True,
101
+ include_duration: bool = True,
102
+ include_sizes: bool = True,
103
+ include_complexity: bool = True,
104
+ include_mi: bool = True,
105
+ include_raw: bool = True,
106
+ include_hal: bool = True,
107
+ include_tree: bool = True,
108
+ include_lowcov: bool = True,
109
+ show_blame: bool = True,
110
+ threshold: float = 90.0,
111
+ max_threshold: Optional[float] = None,
112
+ top_n: int = 20,
113
+ extension: str = "py",
114
+ fs_percentiles: Optional[list[int]] = None,
115
+ sort_specs: Optional[list[str]] = None,
116
+ show_lines: bool = False,
117
+ slow_threshold: float = 1.0,
118
+ fs_above: Optional[int] = None,
119
+ debug_radon: bool = False,
120
+ ) -> str:
121
+ """
122
+ Generate a comprehensive code quality report.
123
+
124
+ Args:
125
+ bins: Number of histogram bins
126
+ blame_limit: Maximum entries in quality blame sections
127
+ include_*: Sections to include in report
128
+ show_blame: Whether to show quality blame sections
129
+ threshold: Show files below this coverage % (default: 90.0)
130
+ max_threshold: Upper bound % (range filter)
131
+ top_n: Max rows in file listing (0 = all)
132
+ extension: File extension to measure
133
+ fs_percentiles: List of percentiles for file-size histogram
134
+ sort_specs: Sort field specifications
135
+ show_lines: Print missing line numbers
136
+ slow_threshold: Slow-test threshold in seconds
137
+ fs_above: List files above this percentile
138
+ debug_radon: Print debug info from radon
139
+
140
+ Returns:
141
+ Report as string
142
+ """
143
+ if fs_percentiles is None:
144
+ fs_percentiles = [25, 50, 75, 90, 95, 99]
145
+ if sort_specs is None:
146
+ sort_specs = ["priority:desc"]
147
+
148
+ # Load radon data if needed
149
+ complexity_map = mi_map = raw_map = hal_map = {}
150
+ if include_complexity or include_mi or include_raw or include_hal:
151
+ if self.radon_root:
152
+ complexity_map, mi_map, raw_map, hal_map = self._run_radon_analyses(debug_radon)
153
+
154
+ # Scan pragmas
155
+ pragma_counts = {}
156
+ pragma_root = self.radon_root if self.radon_root else (self.fs_root if self.fs_root != "." else None)
157
+ if pragma_root:
158
+ pragma_counts = scan_pragma_counts(pragma_root)
159
+
160
+ # Load coverage data
161
+ cov_data = load_coverage(self.coverage_file)
162
+ stats = precompute_coverage_stats(cov_data, complexity_map, mi_map, raw_map, hal_map)
163
+
164
+ # Coverage histogram
165
+ if include_coverage:
166
+ self._capture_output(
167
+ show_coverage_histogram,
168
+ stats,
169
+ bins=bins,
170
+ blame_limit=blame_limit,
171
+ show_blame=show_blame,
172
+ threshold=threshold,
173
+ pragma_counts=pragma_counts,
174
+ )
175
+
176
+ # Duration histogram
177
+ if include_duration and self.report_file:
178
+ try:
179
+ report_data = load_report(self.report_file)
180
+ self._capture_output(
181
+ show_duration_histogram,
182
+ report_data,
183
+ bins=bins,
184
+ slow_threshold=slow_threshold,
185
+ blame_limit=blame_limit,
186
+ show_blame=show_blame,
187
+ )
188
+ except Exception as e:
189
+ self.output.write(f"⚠️ Could not load report file: {e}\n")
190
+
191
+ # File size distribution
192
+ if include_sizes:
193
+ self._capture_output(
194
+ show_file_size_distribution,
195
+ root=self.fs_root,
196
+ extension=extension,
197
+ percentiles=fs_percentiles,
198
+ show_above_pct=fs_above,
199
+ bins=bins,
200
+ blame_limit=blame_limit,
201
+ show_blame=show_blame,
202
+ )
203
+
204
+ # Low coverage listing
205
+ if include_lowcov:
206
+ sorts = parse_sorts(sort_specs, "desc")
207
+ self._capture_output(
208
+ show_low_coverage,
209
+ stats,
210
+ threshold=threshold,
211
+ max_threshold=max_threshold,
212
+ top_n=top_n if top_n > 0 else None,
213
+ sorts=sorts,
214
+ show_lines=show_lines,
215
+ )
216
+
217
+ # Complexity histogram
218
+ if include_complexity and self.radon_root:
219
+ self._capture_output(
220
+ show_complexity_histogram,
221
+ stats,
222
+ bins=bins,
223
+ blame_limit=blame_limit,
224
+ show_blame=show_blame,
225
+ )
226
+
227
+ # Maintainability index histogram
228
+ if include_mi and self.radon_root:
229
+ self._capture_output(
230
+ show_mi_histogram,
231
+ stats,
232
+ bins=bins,
233
+ blame_limit=blame_limit,
234
+ show_blame=show_blame,
235
+ )
236
+
237
+ # Raw metrics histogram
238
+ if include_raw and self.radon_root:
239
+ self._capture_output(
240
+ show_raw_histogram,
241
+ stats,
242
+ bins=bins,
243
+ blame_limit=blame_limit,
244
+ show_blame=show_blame,
245
+ )
246
+
247
+ # Halstead metrics histogram
248
+ if include_hal and self.radon_root:
249
+ self._capture_output(
250
+ show_hal_histogram,
251
+ stats,
252
+ bins=bins,
253
+ blame_limit=blame_limit,
254
+ show_blame=show_blame,
255
+ )
256
+
257
+ # File tree analysis
258
+ if include_tree and self.tree_root:
259
+ self._capture_output(
260
+ analyze_tree,
261
+ self.tree_root,
262
+ bins=bins,
263
+ blame_limit=blame_limit,
264
+ )
265
+
266
+ return self.output.getvalue()
267
+
268
+ def save_report(self, output_file: str, **kwargs) -> Path:
269
+ """
270
+ Generate and save report to file.
271
+
272
+ Args:
273
+ output_file: Path to save report to
274
+ **kwargs: Arguments passed to generate_full_report()
275
+
276
+ Returns:
277
+ Path to saved file
278
+ """
279
+ report = self.generate_full_report(**kwargs)
280
+ output_path = Path(output_file)
281
+ output_path.parent.mkdir(parents=True, exist_ok=True)
282
+ output_path.write_text(report)
283
+ return output_path
@@ -0,0 +1,100 @@
1
+ """File size distribution analysis."""
2
+
3
+ from pathlib import Path
4
+ from .utils import percentile, ascii_histogram, blame_header
5
+
6
+
7
+ def show_file_size_distribution(
8
+ root: str = ".",
9
+ extension: str = "py",
10
+ percentiles=None,
11
+ show_above_pct=None,
12
+ bins: int = 10,
13
+ blame_limit: int = 20,
14
+ show_blame: bool = True,
15
+ width: int = 80,
16
+ ):
17
+ """Display file size distribution histogram.
18
+
19
+ Args:
20
+ root: Root directory to scan
21
+ extension: File extension to analyze
22
+ percentiles: List of percentiles to display
23
+ show_above_pct: Show files above this percentile
24
+ bins: Number of histogram bins
25
+ blame_limit: Maximum blamed files to display
26
+ show_blame: Whether to show outlier blame
27
+ width: Line width for output
28
+ """
29
+ if percentiles is None:
30
+ percentiles = [25, 50, 75, 90, 95, 99]
31
+
32
+ files = []
33
+ for path in Path(root).rglob(f"*.{extension}"):
34
+ if any(part.startswith(".") for part in path.parts):
35
+ continue
36
+ try:
37
+ files.append((path.read_text(encoding="utf-8", errors="ignore").count("\n"), str(path)))
38
+ except OSError:
39
+ pass
40
+
41
+ if not files:
42
+ print(f"No .{extension} files found under '{root}'")
43
+ return
44
+
45
+ files.sort()
46
+ counts = [f[0] for f in files]
47
+ n = len(counts)
48
+ minimum = counts[0]
49
+ maximum = counts[-1]
50
+ avg = sum(counts) / n
51
+
52
+ step = max((maximum - minimum + 1) / bins, 1)
53
+ edges = [minimum + i * step for i in range(bins + 1)]
54
+ buckets = [0] * bins
55
+ for c in counts:
56
+ for i in range(bins):
57
+ if edges[i] <= c < edges[i + 1]:
58
+ buckets[i] += 1
59
+ break
60
+ else: # pragma: no cover
61
+ buckets[-1] += 1
62
+
63
+ labels = [f"{int(edges[i]):>5}–{int(edges[i + 1]):<5} lines" for i in range(bins)]
64
+
65
+ print(f"\n{'═' * width}")
66
+ print(f" FILE SIZE DISTRIBUTION (*.{extension} under '{root}')")
67
+ print(f"{'═' * width}")
68
+ print(f" Files: {n} Total lines: {sum(counts):,} Mean: {avg:.0f} lines")
69
+ print()
70
+ ascii_histogram(buckets, labels, width=width)
71
+ print()
72
+
73
+ print(f"{'─' * width}")
74
+ print(" PERCENTILES")
75
+ print(f"{'─' * width}")
76
+ print(f" {'min':<8} {minimum:>6} lines")
77
+ for pct in percentiles:
78
+ label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
79
+ print(f" {label:<8} {int(percentile(counts, pct)):>6} lines")
80
+ print(f" {'max':<8} {maximum:>6} lines")
81
+ print(f" {'mean':<8} {avg:>6.0f} lines")
82
+
83
+ if show_blame:
84
+ q1_lines = int(percentile(counts, 25))
85
+ q3_lines = int(percentile(counts, 75))
86
+ iqr_lines = q3_lines + int(1.5 * (q3_lines - q1_lines))
87
+ blamed_files = sorted(
88
+ [(lc, fp) for lc, fp in files if lc > iqr_lines], key=lambda x: x[0], reverse=True
89
+ )
90
+ blame_header(
91
+ f"size outliers Q3 + 1.5×IQR > {iqr_lines} lines", len(blamed_files), blame_limit, width
92
+ )
93
+ display = blamed_files if not blame_limit else blamed_files[:blame_limit]
94
+ if display:
95
+ for lc, fp in display:
96
+ print(f" {lc:>6} lines {fp}")
97
+ else:
98
+ print(" ✅ No file size outliers.")
99
+
100
+ print(f"\n{'═' * width}")
codebase_stats/tree.py ADDED
@@ -0,0 +1,144 @@
1
+ """File tree structure analysis."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from .utils import percentile, ascii_histogram, blame_header
6
+
7
+
8
+ def analyze_tree(root_path: str, bins: int = 10, blame_limit: int = 20, width: int = 80):
9
+ """Analyze file tree structure, collecting metrics per directory.
10
+
11
+ Displays statistics and histograms about file and folder distribution.
12
+
13
+ Args:
14
+ root_path: Root directory to analyze
15
+ bins: Number of histogram bins
16
+ blame_limit: Maximum blamed directories to display
17
+ width: Line width for output
18
+ """
19
+ root = Path(root_path).resolve()
20
+
21
+ dir_stats = [] # list of (rel_path, file_count, folder_count)
22
+
23
+ for dirpath, dirnames, filenames in os.walk(root):
24
+ # Filter out __pycache__ and hidden directories from traversal
25
+ dirnames[:] = [d for d in dirnames if d != "__pycache__" and not d.startswith(".")]
26
+
27
+ rel_path = os.path.relpath(dirpath, root)
28
+ if rel_path == ".":
29
+ rel_path = "./"
30
+
31
+ dir_stats.append({"path": rel_path, "files": len(filenames), "folders": len(dirnames)})
32
+
33
+ if not dir_stats:
34
+ print(f"⚠️ No directories found under {root}")
35
+ return
36
+
37
+ n_dirs = len(dir_stats)
38
+ file_counts = sorted(d["files"] for d in dir_stats)
39
+ folder_counts = sorted(d["folders"] for d in dir_stats)
40
+
41
+ total_files = sum(file_counts)
42
+ total_folders = sum(folder_counts)
43
+
44
+ print(f"\n{'═' * width}")
45
+ print(f" FILE TREE ANALYSIS: {root}")
46
+ print(f"{'═' * width}")
47
+ print(
48
+ f" Total Nodes: {n_dirs:<8} Total Files: {total_files:<8} Total Folders: {total_folders}"
49
+ )
50
+
51
+ # --- Files per Folder Histogram ---
52
+ print("\n DISTRIBUTION: FILES PER FOLDER")
53
+ print(f"{'─' * width}")
54
+
55
+ f_max = file_counts[-1]
56
+ f_step = max((f_max + 0.1) / bins, 1)
57
+ f_buckets = [0] * bins
58
+ f_labels = []
59
+ for i in range(bins):
60
+ lo, hi = i * f_step, (i + 1) * f_step
61
+ f_labels.append(f"{int(lo):>4}–{int(hi):<4} files")
62
+ for count in file_counts:
63
+ if lo <= count < hi or (i == bins - 1 and count >= hi):
64
+ f_buckets[i] += 1
65
+
66
+ ascii_histogram(f_buckets, f_labels, width=width)
67
+
68
+ print("\n STATS (Files per Folder):")
69
+ print(
70
+ f" min: {file_counts[0]:<6} max: {file_counts[-1]:<6} mean: {total_files / n_dirs:>5.1f}"
71
+ )
72
+ print(
73
+ f" p50: {int(percentile(file_counts, 50)):<6} p75: {int(percentile(file_counts, 75)):<6} p90: {int(percentile(file_counts, 90)):<6}"
74
+ )
75
+
76
+ # --- Subfolders per Folder Histogram ---
77
+ print("\n DISTRIBUTION: SUBFOLDERS PER FOLDER")
78
+ print(f"{'─' * width}")
79
+
80
+ d_max = folder_counts[-1]
81
+ d_step = max((d_max + 0.1) / bins, 1)
82
+ d_buckets = [0] * bins
83
+ d_labels = []
84
+ for i in range(bins):
85
+ lo, hi = i * d_step, (i + 1) * d_step
86
+ d_labels.append(f"{int(lo):>4}–{int(hi):<4} dirs ")
87
+ for count in folder_counts:
88
+ if lo <= count < hi or (i == bins - 1 and count >= hi):
89
+ d_buckets[i] += 1
90
+
91
+ ascii_histogram(d_buckets, d_labels, width=width)
92
+
93
+ print("\n STATS (Subfolders per Folder):")
94
+ print(
95
+ f" min: {folder_counts[0]:<6} max: {folder_counts[-1]:<6} mean: {total_folders / n_dirs:>5.1f}"
96
+ )
97
+ print(
98
+ f" p50: {int(percentile(folder_counts, 50)):<6} p75: {int(percentile(folder_counts, 75)):<6} p90: {int(percentile(folder_counts, 90)):<6}"
99
+ )
100
+
101
+ if blame_limit > 0:
102
+ # --- Files Outliers (IQR Method) ---
103
+ q1_f = percentile(file_counts, 25)
104
+ q3_f = percentile(file_counts, 75)
105
+ boundary_f = q3_f + 1.5 * (q3_f - q1_f)
106
+
107
+ blamed_f = sorted(
108
+ [d for d in dir_stats if d["files"] > boundary_f],
109
+ key=lambda x: x["files"],
110
+ reverse=True,
111
+ )
112
+
113
+ blame_header(
114
+ f"file count outliers (Q3+1.5×IQR > {boundary_f:g})", len(blamed_f), blame_limit, width
115
+ )
116
+ display_f = blamed_f if not blame_limit else blamed_f[:blame_limit]
117
+ if display_f:
118
+ for d in display_f:
119
+ print(f" {d['files']:>5} files {d['path']}")
120
+ else:
121
+ print(" ✅ No file count outliers.")
122
+
123
+ # --- Subfolder Outliers (IQR Method) ---
124
+ q1_d = percentile(folder_counts, 25)
125
+ q3_d = percentile(folder_counts, 75)
126
+ boundary_d = q3_d + 1.5 * (q3_d - q1_d)
127
+
128
+ blamed_d = sorted(
129
+ [d for d in dir_stats if d["folders"] > boundary_d],
130
+ key=lambda x: x["folders"],
131
+ reverse=True,
132
+ )
133
+
134
+ blame_header(
135
+ f"subfolder count outliers (Q3+1.5×IQR > {boundary_d:g})", len(blamed_d), blame_limit, width
136
+ )
137
+ display_d = blamed_d if not blame_limit else blamed_d[:blame_limit]
138
+ if display_d:
139
+ for d in display_d:
140
+ print(f" {d['folders']:>5} dirs {d['path']}")
141
+ else:
142
+ print(" ✅ No subfolder count outliers.")
143
+
144
+ print(f"\n{'═' * width}\n")
@@ -0,0 +1,86 @@
1
+ """Shared utility functions for codebase analysis."""
2
+
3
+ import math
4
+
5
+
6
+ def percentile(sorted_vals: list, pct: int):
7
+ """Calculate the pth percentile of a sorted values list."""
8
+ n = len(sorted_vals)
9
+ pos = max(0, min(int((n - 1) * pct / 100), n - 1))
10
+ return sorted_vals[pos]
11
+
12
+
13
+ def format_line_ranges(lines: list) -> str:
14
+ """Convert a list of line numbers into a human-readable ranges string.
15
+
16
+ Args:
17
+ lines: List of line numbers
18
+
19
+ Returns:
20
+ String like "1-5, 10, 15-18"
21
+ """
22
+ if not lines:
23
+ return ""
24
+ sl = sorted(set(lines))
25
+ ranges, start, end = [], sl[0], sl[0]
26
+ for n in sl[1:]:
27
+ if n == end + 1:
28
+ end = n
29
+ else:
30
+ ranges.append(str(start) if start == end else f"{start}-{end}")
31
+ start = end = n
32
+ ranges.append(str(start) if start == end else f"{start}-{end}")
33
+ return ", ".join(ranges)
34
+
35
+
36
+ def ascii_histogram(
37
+ counts: list, labels: list, bar_width: int = 36, suffixes: list = None, width: int = 80
38
+ ) -> None:
39
+ """Print a fixed-width ASCII histogram.
40
+
41
+ Args:
42
+ counts: List of counts for each bar
43
+ labels: List of labels for each bar
44
+ bar_width: Width of the bar in characters
45
+ suffixes: Optional per-row annotation
46
+ width: Total line width
47
+ """
48
+ max_count = max(counts) or 1
49
+ total = sum(counts)
50
+ label_w = max(len(l) for l in labels)
51
+ for i, (label, count) in enumerate(zip(labels, counts)):
52
+ bar = "█" * round(count / max_count * bar_width)
53
+ pct = count / total * 100 if total else 0
54
+ suffix = f" {suffixes[i]}" if suffixes and i < len(suffixes) and suffixes[i] else ""
55
+ print(f" {label:<{label_w}} │{bar:<{bar_width}} {count:>4} ({pct:4.1f}%){suffix}")
56
+
57
+
58
+ def blame_header(label: str, total: int, limit: int, width: int = 80) -> None:
59
+ """Print a blame section header.
60
+
61
+ Args:
62
+ label: Description of what's being blamed
63
+ total: Total number of blamed items
64
+ limit: Current display limit for blamed items
65
+ width: Total line width
66
+ """
67
+ print(f"\n{'─' * width}")
68
+ tail = f" (showing top {limit}, --blame-limit 0 for all)" if limit and total > limit else ""
69
+ print(f" QUALITY BLAME — {label} {total}{tail}")
70
+ print(f"{'─' * width}")
71
+
72
+
73
+ def fmt_seconds(seconds: float) -> str:
74
+ """Format seconds into human-readable time string.
75
+
76
+ Examples:
77
+ 0.5 -> "500ms"
78
+ 1.5 -> "1.50s"
79
+ 125 -> "2m 05.0s"
80
+ """
81
+ if seconds < 1:
82
+ return f"{seconds * 1000:.0f}ms"
83
+ if seconds < 60:
84
+ return f"{seconds:.2f}s"
85
+ m, s = divmod(seconds, 60)
86
+ return f"{int(m)}m {s:04.1f}s"