codebase-stats 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_stats/__init__.py +148 -0
- codebase_stats/core.py +240 -0
- codebase_stats/coverage.py +295 -0
- codebase_stats/duration.py +245 -0
- codebase_stats/lowcov.py +204 -0
- codebase_stats/metrics.py +393 -0
- codebase_stats/radon.py +264 -0
- codebase_stats/reporter.py +283 -0
- codebase_stats/sizes.py +100 -0
- codebase_stats/tree.py +144 -0
- codebase_stats/utils.py +86 -0
- codebase_stats-0.0.1.dist-info/METADATA +376 -0
- codebase_stats-0.0.1.dist-info/RECORD +16 -0
- codebase_stats-0.0.1.dist-info/WHEEL +5 -0
- codebase_stats-0.0.1.dist-info/entry_points.txt +2 -0
- codebase_stats-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Codebase Statistics Analysis Library
|
|
2
|
+
|
|
3
|
+
A comprehensive library for analyzing Python codebase metrics including coverage,
|
|
4
|
+
test duration, code complexity, maintainability, and more.
|
|
5
|
+
|
|
6
|
+
Main Modules:
|
|
7
|
+
- core: Data loading and preprocessing
|
|
8
|
+
- coverage: Coverage analysis and reporting
|
|
9
|
+
- metrics: Code quality metrics (complexity, maintainability, etc.)
|
|
10
|
+
- duration: Test duration analysis
|
|
11
|
+
- sizes: File size distribution analysis
|
|
12
|
+
- tree: Directory structure analysis
|
|
13
|
+
- lowcov: Low-coverage file identification and prioritization
|
|
14
|
+
- utils: Utility functions for formatting and analysis
|
|
15
|
+
|
|
16
|
+
Example Usage:
|
|
17
|
+
from codebase_stats import load_coverage, precompute_coverage_stats
|
|
18
|
+
from codebase_stats import show_coverage_histogram
|
|
19
|
+
|
|
20
|
+
# Load and analyze coverage data
|
|
21
|
+
data = load_coverage("coverage.json")
|
|
22
|
+
stats = precompute_coverage_stats(data)
|
|
23
|
+
|
|
24
|
+
# Display coverage report
|
|
25
|
+
show_coverage_histogram(stats, bins=10, show_blame=True)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
# Core data functions
|
|
29
|
+
from .core import (
|
|
30
|
+
load_coverage,
|
|
31
|
+
load_report,
|
|
32
|
+
precompute_coverage_stats,
|
|
33
|
+
extract_layer,
|
|
34
|
+
build_suffix_index,
|
|
35
|
+
suffix_lookup,
|
|
36
|
+
LAYER_MAP,
|
|
37
|
+
LAYER_ORDER,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Coverage analysis
|
|
41
|
+
from .coverage import (
|
|
42
|
+
show_coverage_histogram,
|
|
43
|
+
scan_pragma_counts,
|
|
44
|
+
scan_pragma_intervals,
|
|
45
|
+
show_pragma_histogram,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Code quality metrics
|
|
49
|
+
from .metrics import (
|
|
50
|
+
run_radon,
|
|
51
|
+
run_radon_mi,
|
|
52
|
+
run_radon_raw,
|
|
53
|
+
run_radon_hal,
|
|
54
|
+
show_complexity_histogram,
|
|
55
|
+
show_mi_histogram,
|
|
56
|
+
show_raw_histogram,
|
|
57
|
+
show_hal_histogram,
|
|
58
|
+
cc_rank,
|
|
59
|
+
mi_rank,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Test duration
|
|
63
|
+
from .duration import (
|
|
64
|
+
show_duration_histogram,
|
|
65
|
+
test_duration,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# File sizes
|
|
69
|
+
from .sizes import (
|
|
70
|
+
show_file_size_distribution,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Tree analysis
|
|
74
|
+
from .tree import (
|
|
75
|
+
analyze_tree,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Low coverage reporting
|
|
79
|
+
from .lowcov import (
|
|
80
|
+
show_low_coverage,
|
|
81
|
+
parse_sorts,
|
|
82
|
+
priority_score,
|
|
83
|
+
VALID_SORT_FIELDS,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Utilities
|
|
87
|
+
from .utils import (
|
|
88
|
+
percentile,
|
|
89
|
+
format_line_ranges,
|
|
90
|
+
ascii_histogram,
|
|
91
|
+
blame_header,
|
|
92
|
+
fmt_seconds,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Reporter
|
|
96
|
+
from .reporter import (
|
|
97
|
+
CodebaseStatsReporter,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
__version__ = "0.1.0"
|
|
101
|
+
|
|
102
|
+
__all__ = [
|
|
103
|
+
# Core
|
|
104
|
+
"load_coverage",
|
|
105
|
+
"load_report",
|
|
106
|
+
"precompute_coverage_stats",
|
|
107
|
+
"extract_layer",
|
|
108
|
+
"build_suffix_index",
|
|
109
|
+
"suffix_lookup",
|
|
110
|
+
"LAYER_MAP",
|
|
111
|
+
"LAYER_ORDER",
|
|
112
|
+
# Coverage
|
|
113
|
+
"show_coverage_histogram",
|
|
114
|
+
"scan_pragma_counts",
|
|
115
|
+
"scan_pragma_intervals",
|
|
116
|
+
"show_pragma_histogram",
|
|
117
|
+
# Metrics
|
|
118
|
+
"run_radon",
|
|
119
|
+
"run_radon_mi",
|
|
120
|
+
"run_radon_raw",
|
|
121
|
+
"run_radon_hal",
|
|
122
|
+
"show_complexity_histogram",
|
|
123
|
+
"show_mi_histogram",
|
|
124
|
+
"show_raw_histogram",
|
|
125
|
+
"show_hal_histogram",
|
|
126
|
+
"cc_rank",
|
|
127
|
+
"mi_rank",
|
|
128
|
+
# Duration
|
|
129
|
+
"show_duration_histogram",
|
|
130
|
+
"test_duration",
|
|
131
|
+
# Sizes
|
|
132
|
+
"show_file_size_distribution",
|
|
133
|
+
# Tree
|
|
134
|
+
"analyze_tree",
|
|
135
|
+
# Low coverage
|
|
136
|
+
"show_low_coverage",
|
|
137
|
+
"parse_sorts",
|
|
138
|
+
"priority_score",
|
|
139
|
+
"VALID_SORT_FIELDS",
|
|
140
|
+
# Utils
|
|
141
|
+
"percentile",
|
|
142
|
+
"format_line_ranges",
|
|
143
|
+
"ascii_histogram",
|
|
144
|
+
"blame_header",
|
|
145
|
+
"fmt_seconds",
|
|
146
|
+
# Reporter
|
|
147
|
+
"CodebaseStatsReporter",
|
|
148
|
+
]
|
codebase_stats/core.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Core data loading and preprocessing functions."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_coverage(path: str) -> dict:
|
|
9
|
+
"""Load and parse a coverage.json file.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
path: Path to coverage.json file
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Parsed coverage data dictionary
|
|
16
|
+
|
|
17
|
+
Raises:
|
|
18
|
+
SystemExit: On file not found or invalid JSON
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
with open(path, encoding="utf-8") as f:
|
|
22
|
+
data = json.load(f)
|
|
23
|
+
if "files" not in data:
|
|
24
|
+
raise KeyError("coverage JSON missing 'files' key")
|
|
25
|
+
return data
|
|
26
|
+
except FileNotFoundError:
|
|
27
|
+
sys.exit(f"❌ File not found: {path}")
|
|
28
|
+
except json.JSONDecodeError:
|
|
29
|
+
sys.exit(f"❌ Invalid JSON: {path}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_report(path: str) -> dict:
|
|
33
|
+
"""Load and parse a pytest-json-report file.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
path: Path to pytest JSON report file
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Parsed report data dictionary
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
SystemExit: On file not found or invalid JSON
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
with open(path, encoding="utf-8") as f:
|
|
46
|
+
data = json.load(f)
|
|
47
|
+
if "tests" not in data:
|
|
48
|
+
sys.exit(f"❌ '{path}' has no 'tests' key — is this a pytest-json-report file?")
|
|
49
|
+
return data
|
|
50
|
+
except FileNotFoundError:
|
|
51
|
+
sys.exit(f"❌ File not found: {path}")
|
|
52
|
+
except json.JSONDecodeError:
|
|
53
|
+
sys.exit(f"❌ Invalid JSON: {path}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Layer mapping and ordering constants
|
|
57
|
+
LAYER_MAP = {
|
|
58
|
+
"domain": "Domain",
|
|
59
|
+
"application": "Application",
|
|
60
|
+
"infrastructure": "Infrastructure",
|
|
61
|
+
"services": "Services",
|
|
62
|
+
"repositories": "Repositories",
|
|
63
|
+
"use_cases": "Use Cases",
|
|
64
|
+
"api": "API",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
LAYER_ORDER = {
|
|
68
|
+
"Domain": 1,
|
|
69
|
+
"Application": 2,
|
|
70
|
+
"Services": 3,
|
|
71
|
+
"Use Cases": 4,
|
|
72
|
+
"Repositories": 5,
|
|
73
|
+
"Infrastructure": 6,
|
|
74
|
+
"API": 7,
|
|
75
|
+
"Other": 8,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def extract_layer(path: str) -> str:
|
|
80
|
+
"""Extract the architectural layer from a file path.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
path: File path to analyze
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Layer name (e.g., "Domain", "Application", "Other")
|
|
87
|
+
"""
|
|
88
|
+
parts = [p.lower() for p in Path(path).parts]
|
|
89
|
+
for key, name in LAYER_MAP.items():
|
|
90
|
+
if key in parts:
|
|
91
|
+
return name
|
|
92
|
+
return "Other"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def build_suffix_index(radon_map: dict) -> dict:
|
|
96
|
+
"""Build a suffix-keyed index for path matching.
|
|
97
|
+
|
|
98
|
+
This allows matching coverage.json paths (which may use different root
|
|
99
|
+
prefixes or be absolute vs relative) to radon output.
|
|
100
|
+
|
|
101
|
+
Strategy: for each radon key, store it under every possible suffix:
|
|
102
|
+
'/home/user/proj/app/foo/bar.py' → keys: 'bar.py', 'foo/bar.py',
|
|
103
|
+
'app/foo/bar.py', ...
|
|
104
|
+
Then look up a coverage path by trying progressively longer suffixes
|
|
105
|
+
until one hits. The longest match wins to avoid false collisions.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
radon_map: Dictionary mapping file paths to radon analysis data
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dictionary mapping suffixes to radon values
|
|
112
|
+
"""
|
|
113
|
+
index: dict[str, object] = {} # suffix_str → radon value
|
|
114
|
+
for k, v in radon_map.items():
|
|
115
|
+
parts = Path(k).parts
|
|
116
|
+
for i in range(len(parts)):
|
|
117
|
+
suffix = "/".join(parts[i:])
|
|
118
|
+
# First writer wins for each suffix key (shortest path = most specific)
|
|
119
|
+
if suffix not in index:
|
|
120
|
+
index[suffix] = v
|
|
121
|
+
return index
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def suffix_lookup(index: dict, coverage_path: str):
|
|
125
|
+
"""Look up a coverage.json path in a suffix index.
|
|
126
|
+
|
|
127
|
+
Try suffixes from most-specific (full path) down to basename.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
index: Index built by build_suffix_index()
|
|
131
|
+
coverage_path: Path from coverage.json to look up
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
The radon value if found, None otherwise
|
|
135
|
+
"""
|
|
136
|
+
parts = Path(coverage_path).parts
|
|
137
|
+
for i in range(len(parts)):
|
|
138
|
+
key = "/".join(parts[i:])
|
|
139
|
+
if key in index:
|
|
140
|
+
return index[key]
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def precompute_coverage_stats(
|
|
145
|
+
data: dict,
|
|
146
|
+
complexity_map: dict = None,
|
|
147
|
+
mi_map: dict = None,
|
|
148
|
+
raw_map: dict = None,
|
|
149
|
+
hal_map: dict = None,
|
|
150
|
+
) -> dict:
|
|
151
|
+
"""Precompute statistics from coverage.json and radon data.
|
|
152
|
+
|
|
153
|
+
One pass over data['files']. Returns everything both histogram and
|
|
154
|
+
list functions need so neither has to re-iterate the JSON.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
data: Parsed coverage.json data
|
|
158
|
+
complexity_map: Radon cyclomatic complexity data (optional)
|
|
159
|
+
mi_map: Radon maintainability index data (optional)
|
|
160
|
+
raw_map: Radon raw metrics data (optional)
|
|
161
|
+
hal_map: Radon Halstead metrics data (optional)
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dictionary with precomputed statistics including file_stats, project totals,
|
|
165
|
+
and coverage percentiles
|
|
166
|
+
"""
|
|
167
|
+
proj_total = proj_covered = 0
|
|
168
|
+
t = data.get("totals", {})
|
|
169
|
+
proj_total = t.get("num_statements", 0)
|
|
170
|
+
proj_covered = t.get("covered_lines", 0)
|
|
171
|
+
|
|
172
|
+
# Build suffix indexes once — O(n·depth) — so the per-file lookup is O(depth)
|
|
173
|
+
_cc_idx = build_suffix_index(complexity_map) if complexity_map else {}
|
|
174
|
+
_mi_idx = build_suffix_index(mi_map) if mi_map else {}
|
|
175
|
+
_raw_idx = build_suffix_index(raw_map) if raw_map else {}
|
|
176
|
+
_hal_idx = build_suffix_index(hal_map) if hal_map else {}
|
|
177
|
+
|
|
178
|
+
file_stats = [] # one dict per file with non-zero statements
|
|
179
|
+
for path, info in data["files"].items():
|
|
180
|
+
s = info.get("summary", {})
|
|
181
|
+
total = s.get("num_statements", 0)
|
|
182
|
+
if total == 0:
|
|
183
|
+
continue
|
|
184
|
+
covered = s.get("covered_lines", 0)
|
|
185
|
+
pct = s.get("percent_covered", 0.0)
|
|
186
|
+
missing = info.get("missing_lines") or []
|
|
187
|
+
layer = extract_layer(path)
|
|
188
|
+
|
|
189
|
+
if proj_total == 0: # totals block missing — accumulate
|
|
190
|
+
proj_total += total
|
|
191
|
+
proj_covered += covered
|
|
192
|
+
|
|
193
|
+
cc = suffix_lookup(_cc_idx, path) if _cc_idx else None
|
|
194
|
+
mi = suffix_lookup(_mi_idx, path) if _mi_idx else None
|
|
195
|
+
raw = suffix_lookup(_raw_idx, path) if _raw_idx else None
|
|
196
|
+
hal = suffix_lookup(_hal_idx, path) if _hal_idx else None
|
|
197
|
+
file_stats.append(
|
|
198
|
+
{
|
|
199
|
+
"path": path,
|
|
200
|
+
"pct": pct,
|
|
201
|
+
"total": total,
|
|
202
|
+
"covered": covered,
|
|
203
|
+
"missing_count": len(missing),
|
|
204
|
+
"missing_lines": missing,
|
|
205
|
+
"layer": layer,
|
|
206
|
+
"layer_order": LAYER_ORDER.get(layer, 8),
|
|
207
|
+
# radon cc
|
|
208
|
+
"cc_avg": cc["avg"] if cc else None,
|
|
209
|
+
"cc_max": cc["max"] if cc else None,
|
|
210
|
+
"cc_n_blocks": cc["n_blocks"] if cc else None,
|
|
211
|
+
"cc_scores": cc["scores"] if cc else None, # individual function scores
|
|
212
|
+
# radon mi
|
|
213
|
+
"mi": mi["mi"] if mi else None,
|
|
214
|
+
"mi_rank": mi["rank"] if mi else None,
|
|
215
|
+
# radon raw
|
|
216
|
+
"comment_ratio": raw["comment_ratio"] if raw else None,
|
|
217
|
+
"sloc": raw["sloc"] if raw else None,
|
|
218
|
+
# radon hal
|
|
219
|
+
"hal_bugs": hal["bugs"] if hal else None,
|
|
220
|
+
"hal_difficulty": hal["difficulty"] if hal else None,
|
|
221
|
+
}
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
proj_pct = proj_covered / proj_total * 100 if proj_total else 0.0
|
|
225
|
+
coverages_sorted = sorted(f["pct"] for f in file_stats)
|
|
226
|
+
|
|
227
|
+
# Flat list of (cc_score, filepath) for all functions across all files
|
|
228
|
+
# used by the CC histogram to plot true function distribution.
|
|
229
|
+
cc_scores = sorted(
|
|
230
|
+
(score, f["path"]) for f in file_stats if f.get("cc_scores") for score in f["cc_scores"]
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
"file_stats": file_stats,
|
|
235
|
+
"proj_total": proj_total,
|
|
236
|
+
"proj_covered": proj_covered,
|
|
237
|
+
"proj_pct": proj_pct,
|
|
238
|
+
"coverages_sorted": coverages_sorted,
|
|
239
|
+
"cc_scores": cc_scores, # (cc_value, filepath) per function
|
|
240
|
+
}
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""Coverage analysis functions."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from .utils import percentile, format_line_ranges, ascii_histogram, blame_header
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def scan_pragma_intervals(root: str) -> dict[str, list]:
|
|
8
|
+
"""Find sequential # pragma: no cover blocks per .py file under root.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
root: Root directory to scan
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Dictionary mapping file paths to lists of (start, end) tuples
|
|
15
|
+
"""
|
|
16
|
+
result = {}
|
|
17
|
+
for path in Path(root).rglob("*.py"):
|
|
18
|
+
if any(part.startswith(".") or part == "__pycache__" for part in path.parts):
|
|
19
|
+
continue
|
|
20
|
+
try:
|
|
21
|
+
lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
|
22
|
+
pragma_rows = [i + 1 for i, line in enumerate(lines) if "# pragma: no cover" in line]
|
|
23
|
+
if not pragma_rows:
|
|
24
|
+
continue
|
|
25
|
+
# Find sequential blocks
|
|
26
|
+
intervals = []
|
|
27
|
+
start = None
|
|
28
|
+
for idx, row in enumerate(pragma_rows):
|
|
29
|
+
if start is None:
|
|
30
|
+
start = row
|
|
31
|
+
if idx == len(pragma_rows) - 1 or pragma_rows[idx + 1] != row + 1:
|
|
32
|
+
end = row
|
|
33
|
+
intervals.append((start, end))
|
|
34
|
+
start = None
|
|
35
|
+
result[str(path)] = intervals
|
|
36
|
+
except OSError:
|
|
37
|
+
pass
|
|
38
|
+
return result
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def scan_pragma_counts(root: str) -> dict[str, int]:
|
|
42
|
+
"""Count '# pragma: no cover' occurrences per .py file under root.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
root: Root directory to scan
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Dictionary mapping file paths to pragma counts
|
|
49
|
+
"""
|
|
50
|
+
result = {}
|
|
51
|
+
for path in Path(root).rglob("*.py"):
|
|
52
|
+
if any(part.startswith(".") or part == "__pycache__" for part in path.parts):
|
|
53
|
+
continue
|
|
54
|
+
try:
|
|
55
|
+
count = path.read_text(encoding="utf-8", errors="ignore").count("# pragma: no cover")
|
|
56
|
+
if count > 0:
|
|
57
|
+
result[str(path)] = count
|
|
58
|
+
except OSError:
|
|
59
|
+
pass
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def show_pragma_histogram(pragma_counts: dict, bins: int = 10, blame_limit: int = 20, width: int = 80) -> None:
|
|
64
|
+
"""Histogram of '# pragma: no cover' count per file + all-files listing.
|
|
65
|
+
|
|
66
|
+
Also prints a secondary histogram and table for sequential blocks of
|
|
67
|
+
pragmas (intervals) so that you can spot long runs of skipped code.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
pragma_counts: Dictionary mapping files to pragma counts
|
|
71
|
+
bins: Number of histogram bins
|
|
72
|
+
blame_limit: Maximum blamed items to display
|
|
73
|
+
width: Line width for output
|
|
74
|
+
"""
|
|
75
|
+
# --- Sequential pragma interval histogram ---
|
|
76
|
+
pragma_intervals = scan_pragma_intervals(".")
|
|
77
|
+
interval_lengths = [
|
|
78
|
+
end - start + 1 for intervals in pragma_intervals.values() for (start, end) in intervals
|
|
79
|
+
]
|
|
80
|
+
if interval_lengths:
|
|
81
|
+
n_int = len(interval_lengths)
|
|
82
|
+
total_int = sum(interval_lengths)
|
|
83
|
+
int_max = max(interval_lengths)
|
|
84
|
+
int_step = max((int_max + 0.1) / bins, 1)
|
|
85
|
+
edges = [i * int_step for i in range(bins + 1)]
|
|
86
|
+
int_buckets = [0] * bins
|
|
87
|
+
for v in interval_lengths:
|
|
88
|
+
placed = False
|
|
89
|
+
for bi in range(bins - 1):
|
|
90
|
+
if edges[bi] <= v < edges[bi + 1]:
|
|
91
|
+
int_buckets[bi] += 1
|
|
92
|
+
placed = True
|
|
93
|
+
break
|
|
94
|
+
if not placed:
|
|
95
|
+
int_buckets[-1] += 1
|
|
96
|
+
int_labels = [f"{int(edges[i]):>4}\u2013{int(edges[i + 1]):<4} rows" for i in range(bins)]
|
|
97
|
+
print(f"\n{'═' * width}")
|
|
98
|
+
print(f" PRAGMA INTERVAL LENGTH HISTOGRAM (sequential blocks, {n_int} intervals)")
|
|
99
|
+
print(f" [linear scale, {bins} bins]")
|
|
100
|
+
print()
|
|
101
|
+
ascii_histogram(int_buckets, int_labels, width=width)
|
|
102
|
+
print()
|
|
103
|
+
print(
|
|
104
|
+
f" p50: {int(percentile(interval_lengths, 50)):<6} "
|
|
105
|
+
f"p75: {int(percentile(interval_lengths, 75)):<6} "
|
|
106
|
+
f"p90: {int(percentile(interval_lengths, 90)):<6} "
|
|
107
|
+
f"p99: {int(percentile(interval_lengths, 99)):<6}"
|
|
108
|
+
)
|
|
109
|
+
print(f" avg: {total_int / n_int:<6.1f} max: {int_max:<6} total rows: {total_int}")
|
|
110
|
+
print(f"\n{'═' * width}")
|
|
111
|
+
print(f" PRAGMA INTERVAL ROWS {n_int}")
|
|
112
|
+
print(f"\n{'═' * width}")
|
|
113
|
+
for path, intervals in pragma_intervals.items():
|
|
114
|
+
for start, end in intervals:
|
|
115
|
+
print(f" {path} rows {start}-{end} ({end - start + 1} rows)")
|
|
116
|
+
# --- End interval histogram ---
|
|
117
|
+
if not pragma_counts:
|
|
118
|
+
return
|
|
119
|
+
file_list = sorted(pragma_counts.items(), key=lambda x: x[1], reverse=True)
|
|
120
|
+
counts_sorted = sorted(pragma_counts.values())
|
|
121
|
+
n = len(counts_sorted)
|
|
122
|
+
total = sum(counts_sorted)
|
|
123
|
+
m_max = counts_sorted[-1]
|
|
124
|
+
|
|
125
|
+
m_step = max((m_max + 0.1) / bins, 1)
|
|
126
|
+
edges = [i * m_step for i in range(bins + 1)]
|
|
127
|
+
p_buckets = [0] * bins
|
|
128
|
+
for v in counts_sorted:
|
|
129
|
+
placed = False
|
|
130
|
+
for bi in range(bins - 1):
|
|
131
|
+
if edges[bi] <= v < edges[bi + 1]:
|
|
132
|
+
p_buckets[bi] += 1
|
|
133
|
+
placed = True
|
|
134
|
+
break
|
|
135
|
+
if not placed:
|
|
136
|
+
p_buckets[-1] += 1
|
|
137
|
+
p_labels = [f"{int(edges[i]):>4}\u2013{int(edges[i + 1]):<4} pragmas" for i in range(bins)]
|
|
138
|
+
|
|
139
|
+
print(f"\n{'═' * width}")
|
|
140
|
+
print(f" PRAGMA: NO COVER DISTRIBUTION (files with \u22651 pragma: {n}, total: {total})")
|
|
141
|
+
print(f" [linear scale, {bins} bins]")
|
|
142
|
+
print()
|
|
143
|
+
ascii_histogram(p_buckets, p_labels, width=width)
|
|
144
|
+
print()
|
|
145
|
+
print(
|
|
146
|
+
f" p50: {int(percentile(counts_sorted, 50)):<6} "
|
|
147
|
+
f"p75: {int(percentile(counts_sorted, 75)):<6} "
|
|
148
|
+
f"p90: {int(percentile(counts_sorted, 90)):<6} "
|
|
149
|
+
f"p99: {int(percentile(counts_sorted, 99)):<6}"
|
|
150
|
+
)
|
|
151
|
+
print(f" avg: {total / n:<6.1f} max: {m_max:<6} total pragmas: {total}")
|
|
152
|
+
|
|
153
|
+
shown = file_list if not blame_limit else file_list[:blame_limit]
|
|
154
|
+
tail = (
|
|
155
|
+
f" (showing top {blame_limit}, --blame-limit 0 for all)"
|
|
156
|
+
if blame_limit and len(file_list) > blame_limit
|
|
157
|
+
else ""
|
|
158
|
+
)
|
|
159
|
+
print(f"\n{'═' * width}")
|
|
160
|
+
print(f" PRAGMA FILES {len(file_list)}{tail}")
|
|
161
|
+
print(f"\n{'═' * width}")
|
|
162
|
+
for path, count in shown:
|
|
163
|
+
print(f" {count:>4} pragmas {path}")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def show_coverage_histogram(
|
|
167
|
+
stats: dict,
|
|
168
|
+
bins: int = 10,
|
|
169
|
+
blame_limit: int = 20,
|
|
170
|
+
show_blame: bool = True,
|
|
171
|
+
threshold: float = 90.0,
|
|
172
|
+
pragma_counts: dict | None = None,
|
|
173
|
+
width: int = 80,
|
|
174
|
+
):
|
|
175
|
+
"""Display coverage distribution histogram and low-coverage blame.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
stats: Precomputed statistics from precompute_coverage_stats()
|
|
179
|
+
bins: Number of histogram bins
|
|
180
|
+
blame_limit: Maximum blamed files to display
|
|
181
|
+
show_blame: Whether to show quality blame sections
|
|
182
|
+
threshold: Blame ceiling (show files below Q1 or this threshold, whichever is lower)
|
|
183
|
+
pragma_counts: Optional pragma count data to display
|
|
184
|
+
width: Line width for output
|
|
185
|
+
"""
|
|
186
|
+
cs = stats["coverages_sorted"]
|
|
187
|
+
if not cs:
|
|
188
|
+
print("No files found.")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
n = len(cs)
|
|
192
|
+
avg = sum(cs) / n
|
|
193
|
+
below_80 = sum(1 for c in cs if c < 80)
|
|
194
|
+
below_60 = sum(1 for c in cs if c < 60)
|
|
195
|
+
|
|
196
|
+
step = 100 / bins
|
|
197
|
+
buckets = [0] * bins
|
|
198
|
+
labels = []
|
|
199
|
+
for i in range(bins):
|
|
200
|
+
lo, hi = i * step, (i + 1) * step
|
|
201
|
+
labels.append(f"{lo:>5.1f}–{hi:<5.1f}%")
|
|
202
|
+
for c in cs:
|
|
203
|
+
if lo <= c < hi or (i == bins - 1 and c == 100.0):
|
|
204
|
+
buckets[i] += 1
|
|
205
|
+
|
|
206
|
+
print(f"\n{'═' * width}")
|
|
207
|
+
print(" COVERAGE DISTRIBUTION HISTOGRAM")
|
|
208
|
+
print(f"{'═' * width}")
|
|
209
|
+
print(f" Files: {n} Project coverage: {stats['proj_pct']:.1f}%")
|
|
210
|
+
print()
|
|
211
|
+
ascii_histogram(buckets, labels, width=width)
|
|
212
|
+
print()
|
|
213
|
+
print(f"{'─' * width}")
|
|
214
|
+
print(" PERCENTILES")
|
|
215
|
+
print(f"{'─' * width}")
|
|
216
|
+
for pct in (25, 50, 75, 90, 95, 99):
|
|
217
|
+
label = {25: "Q1", 50: "Q2/med", 75: "Q3"}.get(pct, f"p{pct}")
|
|
218
|
+
print(f" {label:<8} {percentile(cs, pct):>6.1f}%")
|
|
219
|
+
print(f" {'avg':<8} {avg:>6.1f}%")
|
|
220
|
+
print(f" {'min':<8} {cs[0]:>6.1f}%")
|
|
221
|
+
print(f" {'max':<8} {cs[-1]:>6.1f}%")
|
|
222
|
+
not_100 = sum(1 for c in cs if c < 100)
|
|
223
|
+
print(f" {'not-100%':<8} {not_100} files ({not_100 / n * 100:.1f}%)")
|
|
224
|
+
print(f" <80% {below_80} files ({below_80 / n * 100:.1f}%)")
|
|
225
|
+
print(f" <60% {below_60} files ({below_60 / n * 100:.1f}%)")
|
|
226
|
+
|
|
227
|
+
# ── missing lines histogram (non-100% files only) ──────────────────────
|
|
228
|
+
missing_counts = sorted(f["missing_count"] for f in stats["file_stats"] if f["pct"] < 100.0)
|
|
229
|
+
if missing_counts:
|
|
230
|
+
import math
|
|
231
|
+
nm = len(missing_counts)
|
|
232
|
+
total_missing = sum(missing_counts)
|
|
233
|
+
m_max = missing_counts[-1]
|
|
234
|
+
use_log = m_max > 0 and missing_counts[0] > 0 and (m_max / missing_counts[0]) >= 100
|
|
235
|
+
if use_log:
|
|
236
|
+
log_min = math.log10(max(missing_counts[0], 1))
|
|
237
|
+
log_max = math.log10(m_max + 1)
|
|
238
|
+
log_step = (log_max - log_min) / bins
|
|
239
|
+
edges = [10 ** (log_min + i * log_step) for i in range(bins + 1)]
|
|
240
|
+
scale_note = "log scale"
|
|
241
|
+
else:
|
|
242
|
+
m_step = max((m_max + 0.1) / bins, 1)
|
|
243
|
+
edges = [i * m_step for i in range(bins + 1)]
|
|
244
|
+
scale_note = "linear scale"
|
|
245
|
+
m_buckets = [0] * bins
|
|
246
|
+
for v in missing_counts:
|
|
247
|
+
for bi in range(bins):
|
|
248
|
+
if edges[bi] <= v < edges[bi + 1] or (bi == bins - 1 and v == m_max):
|
|
249
|
+
m_buckets[bi] += 1
|
|
250
|
+
break
|
|
251
|
+
else: # pragma: no cover
|
|
252
|
+
m_buckets[-1] += 1
|
|
253
|
+
m_labels = [f"{int(edges[i]):>5}–{int(edges[i + 1]):<5} lines" for i in range(bins)]
|
|
254
|
+
print(f"\n{'─' * width}")
|
|
255
|
+
print(f" MISSING LINES DISTRIBUTION (non-100% files: {nm})")
|
|
256
|
+
print(f" [{scale_note}, {bins} bins]")
|
|
257
|
+
print()
|
|
258
|
+
ascii_histogram(m_buckets, m_labels, width=width)
|
|
259
|
+
print()
|
|
260
|
+
m_avg = total_missing / nm
|
|
261
|
+
print(
|
|
262
|
+
f" p50: {int(percentile(missing_counts, 50)):<6} p75: {int(percentile(missing_counts, 75)):<6} "
|
|
263
|
+
f"p90: {int(percentile(missing_counts, 90)):<6} p99: {int(percentile(missing_counts, 99)):<6}"
|
|
264
|
+
)
|
|
265
|
+
print(f" avg: {m_avg:<6.1f} max: {m_max:<6} total missing: {total_missing:,} lines")
|
|
266
|
+
|
|
267
|
+
if pragma_counts:
|
|
268
|
+
show_pragma_histogram(pragma_counts, bins=bins, blame_limit=blame_limit, width=width)
|
|
269
|
+
|
|
270
|
+
if show_blame:
|
|
271
|
+
# quality blame — cap ceiling at `threshold` so near-perfect codebases don't sweep 25% of files
|
|
272
|
+
q1 = percentile(cs, 25)
|
|
273
|
+
blame_ceiling = min(q1, threshold)
|
|
274
|
+
boundary_note = (
|
|
275
|
+
f"Q1={q1:.1f}% capped at {threshold:.0f}%" if q1 > threshold else f"Q1={q1:.1f}%"
|
|
276
|
+
)
|
|
277
|
+
blamed = sorted(
|
|
278
|
+
[
|
|
279
|
+
(f["pct"], f["path"], f["missing_count"], f["missing_lines"])
|
|
280
|
+
for f in stats["file_stats"]
|
|
281
|
+
if f["pct"] < blame_ceiling
|
|
282
|
+
],
|
|
283
|
+
key=lambda x: x[0],
|
|
284
|
+
)
|
|
285
|
+
blame_header(f"below {blame_ceiling:.1f}% ({boundary_note})", len(blamed), blame_limit, width)
|
|
286
|
+
display = blamed if not blame_limit else blamed[:blame_limit]
|
|
287
|
+
if display:
|
|
288
|
+
for pct, path, missing_count, missing_lines in display:
|
|
289
|
+
print(f" {pct:>5.1f}% {missing_count:>4} lines missing {path}")
|
|
290
|
+
ranges = format_line_ranges(missing_lines)
|
|
291
|
+
if ranges:
|
|
292
|
+
print(f" 📍 {ranges}")
|
|
293
|
+
else:
|
|
294
|
+
print(" ✅ No files below quality boundary.")
|
|
295
|
+
print(f"\n{'═' * width}\n")
|