codebase-stats 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ """Test duration and performance analysis."""
2
+
3
+ import math
4
+ from .utils import percentile, fmt_seconds, ascii_histogram, blame_header
5
+
6
+
7
+ def test_duration(t: dict):
8
+ """Get (total_duration, breakdown_dict) for a test.
9
+
10
+ Sum setup, call and teardown phases.
11
+
12
+ Args:
13
+ t: Test dictionary from pytest-json-report
14
+
15
+ Returns:
16
+ Tuple of (total_duration_seconds, phase_breakdown_dict)
17
+ """
18
+ breakdown = {}
19
+ total, found = 0.0, False
20
+ for phase in ("setup", "call", "teardown"):
21
+ p = t.get(phase)
22
+ if isinstance(p, dict) and "duration" in p:
23
+ val = float(p["duration"])
24
+ breakdown[phase] = val
25
+ total += val
26
+ found = True
27
+ if not found and "duration" in t:
28
+ d = float(t["duration"])
29
+ return d, {"call": d}
30
+ return (total, breakdown) if found else (None, {})
31
+
32
+
33
+ def render_duration_stats(ds: list, slow_threshold: float, width: int = 80):
34
+ """Render summary statistics for a duration list.
35
+
36
+ Args:
37
+ ds: Sorted list of durations
38
+ slow_threshold: Threshold for considering tests slow
39
+ width: Line width for output
40
+ """
41
+ if not ds:
42
+ return
43
+ n = len(ds)
44
+ total = sum(ds)
45
+ slow_count = sum(1 for d in ds if d >= slow_threshold)
46
+ avg = total / n
47
+ p50 = percentile(ds, 50)
48
+ p75 = percentile(ds, 75)
49
+ p90 = percentile(ds, 90)
50
+ p95 = percentile(ds, 95)
51
+ p99 = percentile(ds, 99)
52
+ max_val = ds[-1]
53
+
54
+ print(
55
+ f" p50: {fmt_seconds(p50):<8} p75: {fmt_seconds(p75):<8} p90: {fmt_seconds(p90):<8} p95: {fmt_seconds(p95):<8} p99: {fmt_seconds(p99):<8}"
56
+ )
57
+ print(
58
+ f" avg: {fmt_seconds(avg):<8} max: {fmt_seconds(max_val):<8} total: {fmt_seconds(total):<8} slow: {slow_count:<5} n: {n}"
59
+ )
60
+
61
+
62
+ def render_duration_histogram_core(
63
+ ds: list,
64
+ title: str,
65
+ bins: int,
66
+ slow_threshold: float,
67
+ width: int = 80,
68
+ ):
69
+ """Render the ASCII histogram for a duration list.
70
+
71
+ Args:
72
+ ds: Sorted list of durations
73
+ title: Title for the histogram
74
+ bins: Number of histogram bins
75
+ slow_threshold: Threshold for considering tests slow
76
+ width: Line width for output
77
+ """
78
+ if not ds:
79
+ return
80
+
81
+ n = len(ds)
82
+ slowest = ds[-1]
83
+ fastest = ds[0]
84
+
85
+ use_log = fastest > 0 and (slowest / max(fastest, 1e-9)) >= 100
86
+ if use_log:
87
+ lo = math.log10(max(fastest, 1e-6))
88
+ hi = math.log10(slowest * 1.0001)
89
+ step = (hi - lo) / bins
90
+ edges = [10 ** (lo + i * step) for i in range(bins + 1)]
91
+ scale_note = "log scale"
92
+ else:
93
+ step = (slowest * 1.0001) / bins
94
+ edges = [i * step for i in range(bins + 1)]
95
+ scale_note = "linear scale"
96
+
97
+ buckets = [0] * bins
98
+ for d in ds:
99
+ for i in range(bins):
100
+ if edges[i] <= d < edges[i + 1]:
101
+ buckets[i] += 1
102
+ break
103
+ else: # pragma: no cover
104
+ buckets[-1] += 1
105
+
106
+ p90 = percentile(ds, 90)
107
+ labels, suffixes = [], []
108
+ for i in range(bins):
109
+ tag = (
110
+ "← p90"
111
+ if edges[i] <= p90 < edges[i + 1]
112
+ else f"← {fmt_seconds(slow_threshold)} threshold"
113
+ if edges[i] <= slow_threshold < edges[i + 1]
114
+ else ""
115
+ )
116
+ fill = (
117
+ "█"
118
+ if edges[i + 1] > slow_threshold
119
+ else ("▒" if edges[i + 1] > slow_threshold * 0.1 else "░")
120
+ )
121
+ labels.append(f"{fmt_seconds(edges[i]):>7}–{fmt_seconds(edges[i + 1])}")
122
+ suffixes.append(f"{fill} {tag}" if tag else fill)
123
+
124
+ print(f"\n {title}")
125
+ print(f" [{scale_note}, {bins} bins, slow >= {fmt_seconds(slow_threshold)}]")
126
+ print()
127
+ ascii_histogram(buckets, labels, suffixes=suffixes, width=width)
128
+ print()
129
+ render_duration_stats(ds, slow_threshold, width)
130
+
131
+
132
+ def show_duration_histogram(
133
+ report: dict,
134
+ bins: int = 10,
135
+ slow_threshold: float = 1.0,
136
+ blame_limit: int = 20,
137
+ show_blame: bool = True,
138
+ width: int = 80,
139
+ ):
140
+ """Display test duration histogram and slow test blame.
141
+
142
+ Args:
143
+ report: Parsed pytest-json-report data
144
+ bins: Number of histogram bins
145
+ slow_threshold: Threshold in seconds for considering tests slow
146
+ blame_limit: Maximum blamed tests to display
147
+ show_blame: Whether to show quality blame sections
148
+ width: Line width for output
149
+ """
150
+ tests = report.get("tests", [])
151
+ test_data = []
152
+
153
+ # Store phase durations for separate histograms
154
+ phases = {"setup": [], "call": [], "teardown": []}
155
+
156
+ for t in tests:
157
+ dur, breakdown = test_duration(t)
158
+ if dur is not None:
159
+ test_data.append({"duration": dur, "breakdown": breakdown, "test": t})
160
+ for p_name in phases:
161
+ if p_name in breakdown:
162
+ phases[p_name].append(breakdown[p_name])
163
+
164
+ if not test_data:
165
+ print("⚠️ No duration data found in report.")
166
+ return
167
+
168
+ durations = sorted([d["duration"] for d in test_data])
169
+ outcomes: dict = {}
170
+ for t in tests:
171
+ k = t.get("outcome", "unknown")
172
+ outcomes[k] = outcomes.get(k, 0) + 1
173
+
174
+ print(f"\n{'═' * width}")
175
+ print(" TEST DURATION ANALYSIS")
176
+ print(f"{'═' * width}")
177
+ n = len(durations)
178
+ total = sum(durations)
179
+ outcome_str = " ".join(f"{k}: {v}" for k, v in sorted(outcomes.items()))
180
+ # Get the actual total time from report.json (includes collection, setup, call, teardown)
181
+ report_total = report.get("duration", 0)
182
+ collection_time = report_total - total if report_total > total else 0
183
+
184
+ print(f" Tests: {n} Total: {fmt_seconds(total)} ({fmt_seconds(report_total)} wall time) {outcome_str}")
185
+ if collection_time > 1:
186
+ print(f" (includes ~{fmt_seconds(collection_time)} collection/overhead)")
187
+
188
+ # 1. Show setup histogram
189
+ if phases["setup"]:
190
+ render_duration_histogram_core(
191
+ sorted(phases["setup"]), "PHASE: SETUP", bins, slow_threshold, width
192
+ )
193
+ print(f"\n {'-' * (width - 4)}")
194
+
195
+ # 2. Show call histogram
196
+ if phases["call"]:
197
+ render_duration_histogram_core(sorted(phases["call"]), "PHASE: CALL", bins, slow_threshold, width)
198
+ print(f"\n {'-' * (width - 4)}")
199
+
200
+ # 3. Show teardown histogram
201
+ if phases["teardown"]:
202
+ render_duration_histogram_core(
203
+ sorted(phases["teardown"]), "PHASE: TEARDOWN", bins, slow_threshold, width
204
+ )
205
+ print(f"\n {'-' * (width - 4)}")
206
+
207
+ # 4. Show total duration histogram
208
+ render_duration_histogram_core(durations, "AGGREGATE: TOTAL DURATION", bins, slow_threshold, width)
209
+
210
+ if show_blame:
211
+ q1_dur = percentile(durations, 25)
212
+ q3_dur = percentile(durations, 75)
213
+ iqr_boundary = q3_dur + 1.5 * (q3_dur - q1_dur)
214
+ blamed_tests = sorted(
215
+ [d for d in test_data if d["duration"] > iqr_boundary],
216
+ key=lambda x: x["duration"],
217
+ reverse=True,
218
+ )
219
+ blame_header(
220
+ f"duration outliers Q3 + 1.5×IQR > {fmt_seconds(iqr_boundary)}",
221
+ len(blamed_tests),
222
+ blame_limit,
223
+ width,
224
+ )
225
+ display = blamed_tests if not blame_limit else blamed_tests[:blame_limit]
226
+ if display:
227
+ for item in display:
228
+ d = item["duration"]
229
+ t = item["test"]
230
+ b = item["breakdown"]
231
+ nodeid = t.get("nodeid", "?")
232
+ icon = {"passed": "✅", "failed": "❌"}.get(t.get("outcome", ""), "⚠️ ")
233
+ if len(nodeid) > 60:
234
+ nodeid = "…" + nodeid[-59:]
235
+
236
+ # Add breakdown info to the blame report
237
+ # Especially useful when setup is the bottleneck
238
+ s_val, c_val = b.get("setup", 0), b.get("call", 0)
239
+ # Always show breakdown for outliers to immediately identify slow call phases
240
+ breakdown_str = f" (s:{fmt_seconds(s_val)} c:{fmt_seconds(c_val)})"
241
+
242
+ print(f" {icon} {fmt_seconds(d):>8}{breakdown_str} {nodeid}")
243
+ else:
244
+ print(" ✅ No duration outliers.")
245
+ print(f"\n{'═' * width}")
@@ -0,0 +1,204 @@
1
+ """Low-coverage file listing and prioritization."""
2
+
3
+ from .utils import format_line_ranges
4
+ from .metrics import cc_rank, mi_rank
5
+
6
+
7
+ VALID_SORT_FIELDS = {"priority", "coverage", "layer", "missing", "missing_pct", "complexity"}
8
+
9
+
10
+ def parse_sorts(sort_specs: list, default_order: str) -> list:
11
+ """Parse sort specifications into (field, is_descending) tuples.
12
+
13
+ Args:
14
+ sort_specs: List of sort specs like ["priority:desc", "coverage:asc"]
15
+ default_order: Default sort order ("asc" or "desc")
16
+
17
+ Returns:
18
+ List of (field, is_descending) tuples
19
+ """
20
+ result = []
21
+ for spec in sort_specs:
22
+ if ":" in spec:
23
+ field, order = spec.rsplit(":", 1)
24
+ if order not in ("asc", "desc"):
25
+ field, order = spec, default_order
26
+ else:
27
+ field, order = spec, default_order
28
+ if field not in VALID_SORT_FIELDS:
29
+ print(f"⚠️ Unknown sort field '{field}', falling back to 'priority'")
30
+ field = "priority"
31
+ result.append((field, order == "desc"))
32
+ return result
33
+
34
+
35
+ def priority_score(pct: float, layer: str, missing: int, cc_avg: float = None, mi: float = None) -> int:
36
+ """Calculate priority score for a file based on coverage and metrics.
37
+
38
+ Args:
39
+ pct: Coverage percentage
40
+ layer: Architectural layer
41
+ missing: Number of missing statements
42
+ cc_avg: Average cyclomatic complexity (optional)
43
+ mi: Maintainability index (optional)
44
+
45
+ Returns:
46
+ Priority score (higher = more important to fix)
47
+ """
48
+ score = 40 if pct < 10 else 30 if pct < 30 else 20 if pct < 50 else 10
49
+ score += {"Domain": 30, "Application": 25, "Services": 20}.get(layer, 10)
50
+ # Cyclomatic complexity (higher = harder to test)
51
+ if cc_avg is not None:
52
+ score += 30 if cc_avg > 20 else 20 if cc_avg > 10 else 10 if cc_avg > 5 else 0
53
+ else:
54
+ score += 20 if missing > 50 else 10 if missing > 20 else 0
55
+ # Maintainability Index (lower MI = harder to maintain = higher priority)
56
+ if mi is not None:
57
+ score += 20 if mi < 10 else 10 if mi < 20 else 0
58
+ return score
59
+
60
+
61
+ def show_low_coverage(
62
+ stats: dict,
63
+ threshold: float = 50.0,
64
+ max_threshold=None,
65
+ top_n=20,
66
+ sorts=None,
67
+ show_lines: bool = False,
68
+ width: int = 100,
69
+ ):
70
+ """Display files with low coverage and quality metrics.
71
+
72
+ Args:
73
+ stats: Precomputed statistics from precompute_coverage_stats()
74
+ threshold: Show files below this coverage percentage
75
+ max_threshold: Upper bound for coverage range (optional)
76
+ top_n: Maximum files to display (None for all)
77
+ sorts: List of (field, is_descending) sort tuples
78
+ show_lines: Include missing line numbers
79
+ width: Line width for output
80
+ """
81
+ if sorts is None:
82
+ sorts = [("priority", True)]
83
+
84
+ files = []
85
+ for f in stats["file_stats"]:
86
+ pct = f["pct"]
87
+ in_range = (
88
+ threshold <= pct < max_threshold if max_threshold is not None else pct < threshold
89
+ )
90
+ if not in_range:
91
+ continue
92
+ missing = f["missing_count"]
93
+ files.append(
94
+ {
95
+ "path": f["path"],
96
+ "coverage": pct,
97
+ "missing_pct": 100.0 - pct,
98
+ "total": f["total"],
99
+ "missing": missing,
100
+ "missing_lines_list": f["missing_lines"],
101
+ "layer": f["layer"],
102
+ "layer_order": f["layer_order"],
103
+ "cc_avg": f.get("cc_avg"),
104
+ "mi": f.get("mi"),
105
+ "priority": priority_score(pct, f["layer"], missing, f.get("cc_avg"), f.get("mi")),
106
+ }
107
+ )
108
+
109
+ extractors = {
110
+ "priority": lambda x: x["priority"],
111
+ "coverage": lambda x: x["coverage"],
112
+ "layer": lambda x: x["layer_order"],
113
+ "missing": lambda x: x["missing"],
114
+ "missing_pct": lambda x: x["missing_pct"],
115
+ "complexity": lambda x: x.get("cc_avg") or 0,
116
+ }
117
+
118
+ def sort_key(item):
119
+ vals = []
120
+ for field, desc in sorts:
121
+ v = extractors[field](item)
122
+ vals.append(-v if desc and isinstance(v, (int, float)) else v)
123
+ return tuple(vals)
124
+
125
+ files.sort(key=sort_key)
126
+
127
+ sort_label = ", ".join(f"{f}{'↓' if d else '↑'}" for f, d in sorts)
128
+ title = (
129
+ f"COVERAGE RANGE {threshold:.0f}%–{max_threshold:.0f}% — sorted: {sort_label}"
130
+ if max_threshold is not None
131
+ else f"LOW COVERAGE (< {threshold:.0f}%) — sorted: {sort_label}"
132
+ )
133
+
134
+ print(f"\n{'═' * width}")
135
+ print(f" {title}")
136
+ print(f"{'═' * width}")
137
+
138
+ if not files:
139
+ msg = (
140
+ f"✅ No files in range {threshold:.0f}%–{max_threshold:.0f}%!"
141
+ if max_threshold
142
+ else f"✅ No files below {threshold:.0f}%!"
143
+ )
144
+ print(f"\n{msg}\n{'═' * width}")
145
+ return
146
+
147
+ display = files if top_n is None else files[:top_n]
148
+ print(f"\nFound {len(files)} files (showing {len(display)})\n")
149
+ print(f"{'#':<4} {'Prio':<6} {'Coverage':<10} {'Missing':<20} {'Layer':<15} File")
150
+ print("─" * width)
151
+
152
+ for i, f in enumerate(display, 1):
153
+ icon = "🔴" if f["coverage"] < 20 else "🟠" if f["coverage"] < 40 else "🟡"
154
+ missing_disp = f"{f['missing']:>4} ({f['missing_pct']:>5.1f}%)"
155
+ cc_str = f" CC{f['cc_avg']:>4.1f}{cc_rank(f['cc_avg'])}" if f.get("cc_avg") else ""
156
+ mi_str = f" MI{f['mi']:>5.1f}{mi_rank(f['mi'])}" if f.get("mi") else ""
157
+ print(
158
+ f"{i:<4} {icon} {f['priority']:<4} "
159
+ f"{f['coverage']:>6.1f}% {missing_disp:<18} "
160
+ f"{f['layer']:<15}{cc_str}{mi_str} {f['path']}"
161
+ )
162
+ if show_lines and f["missing_lines_list"]:
163
+ print(f"{'':48}📍 {format_line_ranges(f['missing_lines_list'])}")
164
+
165
+ if top_n and len(files) > top_n:
166
+ print(f"\n … and {len(files) - top_n} more files")
167
+
168
+ total_miss = sum(f["missing"] for f in files)
169
+ total_stmts = sum(f["total"] for f in files)
170
+ proj_total = stats["proj_total"]
171
+ proj_pct = stats["proj_pct"]
172
+ projected = (stats["proj_covered"] + total_miss) / proj_total * 100 if proj_total else 0.0
173
+
174
+ cvs = sorted(f["coverage"] for f in files)
175
+ nn = len(cvs)
176
+ q25, med, q75 = cvs[nn // 4], cvs[nn // 2], cvs[3 * nn // 4]
177
+
178
+ print(f"\n{'─' * width}")
179
+ print("📊 SUMMARY")
180
+ print(f"{'─' * width}")
181
+ print(f" Files : {len(files)}")
182
+ print(f" Coverage P25/P50/P75 : {q25:.1f}% | {med:.1f}% | {q75:.1f}%")
183
+ print(
184
+ f" Total missing lines : {total_miss:,} ({total_miss / total_stmts * 100:.1f}% of their statements)"
185
+ )
186
+ print(f"\n{'─' * width}")
187
+ print("🚀 COVERAGE PROJECTION (if these files reach 100%)")
188
+ print(f"{'─' * width}")
189
+ print(f" Current : {proj_pct:.1f}%")
190
+ print(f" Projected: {projected:.1f}% (+{projected - proj_pct:.1f}pp)")
191
+ print(f" Lines to cover: {total_miss:,} / {proj_total:,} total")
192
+
193
+ print(f"\n{'─' * width}")
194
+ print("🎯 TOP 5 BY PRIORITY")
195
+ print(f"{'─' * width}")
196
+ for i, f in enumerate(sorted(files, key=lambda x: x["priority"], reverse=True)[:5], 1):
197
+ impact = "HIGH" if f["missing"] > 30 else "MED" if f["missing"] > 15 else "LOW"
198
+ effort = "HARD" if f["missing"] > 50 else "MOD" if f["missing"] > 20 else "EASY"
199
+ print(
200
+ f" {i}. [{f['layer']}] {f['path']}\n"
201
+ f" → {f['missing']} lines ({f['missing_pct']:.1f}%) | Impact: {impact} | Effort: {effort}"
202
+ )
203
+
204
+ print(f"\n{'═' * width}")