PyPI - codebase-stats - Versions diffs - 0.0.1__py3-none-any.whl - Mend

codebase-stats 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

codebase_stats/__init__.py +148 -0
codebase_stats/core.py +240 -0
codebase_stats/coverage.py +295 -0
codebase_stats/duration.py +245 -0
codebase_stats/lowcov.py +204 -0
codebase_stats/metrics.py +393 -0
codebase_stats/radon.py +264 -0
codebase_stats/reporter.py +283 -0
codebase_stats/sizes.py +100 -0
codebase_stats/tree.py +144 -0
codebase_stats/utils.py +86 -0
codebase_stats-0.0.1.dist-info/METADATA +376 -0
codebase_stats-0.0.1.dist-info/RECORD +16 -0
codebase_stats-0.0.1.dist-info/WHEEL +5 -0
codebase_stats-0.0.1.dist-info/entry_points.txt +2 -0
codebase_stats-0.0.1.dist-info/top_level.txt +1 -0

codebase_stats/duration.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""Test duration and performance analysis."""
+import math
+from .utils import percentile, fmt_seconds, ascii_histogram, blame_header
+def test_duration(t: dict):
+    """Get (total_duration, breakdown_dict) for a test.
+    Sum setup, call and teardown phases.
+    Args:
+        t: Test dictionary from pytest-json-report
+    Returns:
+        Tuple of (total_duration_seconds, phase_breakdown_dict)
+    """
+    breakdown = {}
+    total, found = 0.0, False
+    for phase in ("setup", "call", "teardown"):
+        p = t.get(phase)
+        if isinstance(p, dict) and "duration" in p:
+            val = float(p["duration"])
+            breakdown[phase] = val
+            total += val
+            found = True
+    if not found and "duration" in t:
+        d = float(t["duration"])
+        return d, {"call": d}
+    return (total, breakdown) if found else (None, {})
+def render_duration_stats(ds: list, slow_threshold: float, width: int = 80):
+    """Render summary statistics for a duration list.
+    Args:
+        ds: Sorted list of durations
+        slow_threshold: Threshold for considering tests slow
+        width: Line width for output
+    """
+    if not ds:
+        return
+    n = len(ds)
+    total = sum(ds)
+    slow_count = sum(1 for d in ds if d >= slow_threshold)
+    avg = total / n
+    p50 = percentile(ds, 50)
+    p75 = percentile(ds, 75)
+    p90 = percentile(ds, 90)
+    p95 = percentile(ds, 95)
+    p99 = percentile(ds, 99)
+    max_val = ds[-1]
+    print(
+        f"  p50: {fmt_seconds(p50):<8}  p75: {fmt_seconds(p75):<8}  p90: {fmt_seconds(p90):<8}  p95: {fmt_seconds(p95):<8}  p99: {fmt_seconds(p99):<8}"
+    )
+    print(
+        f"  avg: {fmt_seconds(avg):<8}  max: {fmt_seconds(max_val):<8}  total: {fmt_seconds(total):<8}  slow: {slow_count:<5}  n: {n}"
+    )
+def render_duration_histogram_core(
+    ds: list,
+    title: str,
+    bins: int,
+    slow_threshold: float,
+    width: int = 80,
+):
+    """Render the ASCII histogram for a duration list.
+    Args:
+        ds: Sorted list of durations
+        title: Title for the histogram
+        bins: Number of histogram bins
+        slow_threshold: Threshold for considering tests slow
+        width: Line width for output
+    """
+    if not ds:
+        return
+    n = len(ds)
+    slowest = ds[-1]
+    fastest = ds[0]
+    use_log = fastest > 0 and (slowest / max(fastest, 1e-9)) >= 100
+    if use_log:
+        lo = math.log10(max(fastest, 1e-6))
+        hi = math.log10(slowest * 1.0001)
+        step = (hi - lo) / bins
+        edges = [10 ** (lo + i * step) for i in range(bins + 1)]
+        scale_note = "log scale"
+    else:
+        step = (slowest * 1.0001) / bins
+        edges = [i * step for i in range(bins + 1)]
+        scale_note = "linear scale"
+    buckets = [0] * bins
+    for d in ds:
+        for i in range(bins):
+            if edges[i] <= d < edges[i + 1]:
+                buckets[i] += 1
+                break
+        else:  # pragma: no cover
+            buckets[-1] += 1
+    p90 = percentile(ds, 90)
+    labels, suffixes = [], []
+    for i in range(bins):
+        tag = (
+            "← p90"
+            if edges[i] <= p90 < edges[i + 1]
+            else f"← {fmt_seconds(slow_threshold)} threshold"
+            if edges[i] <= slow_threshold < edges[i + 1]
+            else ""
+        )
+        fill = (
+            "█"
+            if edges[i + 1] > slow_threshold
+            else ("▒" if edges[i + 1] > slow_threshold * 0.1 else "░")
+        )
+        labels.append(f"{fmt_seconds(edges[i]):>7}–{fmt_seconds(edges[i + 1])}")
+        suffixes.append(f"{fill}  {tag}" if tag else fill)
+    print(f"\n  {title}")
+    print(f"  [{scale_note}, {bins} bins, slow >= {fmt_seconds(slow_threshold)}]")
+    print()
+    ascii_histogram(buckets, labels, suffixes=suffixes, width=width)
+    print()
+    render_duration_stats(ds, slow_threshold, width)
+def show_duration_histogram(
+    report: dict,
+    bins: int = 10,
+    slow_threshold: float = 1.0,
+    blame_limit: int = 20,
+    show_blame: bool = True,
+    width: int = 80,
+):
+    """Display test duration histogram and slow test blame.
+    Args:
+        report: Parsed pytest-json-report data
+        bins: Number of histogram bins
+        slow_threshold: Threshold in seconds for considering tests slow
+        blame_limit: Maximum blamed tests to display
+        show_blame: Whether to show quality blame sections
+        width: Line width for output
+    """
+    tests = report.get("tests", [])
+    test_data = []
+    # Store phase durations for separate histograms
+    phases = {"setup": [], "call": [], "teardown": []}
+    for t in tests:
+        dur, breakdown = test_duration(t)
+        if dur is not None:
+            test_data.append({"duration": dur, "breakdown": breakdown, "test": t})
+            for p_name in phases:
+                if p_name in breakdown:
+                    phases[p_name].append(breakdown[p_name])
+    if not test_data:
+        print("⚠️  No duration data found in report.")
+        return
+    durations = sorted([d["duration"] for d in test_data])
+    outcomes: dict = {}
+    for t in tests:
+        k = t.get("outcome", "unknown")
+        outcomes[k] = outcomes.get(k, 0) + 1
+    print(f"\n{'═' * width}")
+    print("  TEST DURATION ANALYSIS")
+    print(f"{'═' * width}")
+    n = len(durations)
+    total = sum(durations)
+    outcome_str = "  ".join(f"{k}: {v}" for k, v in sorted(outcomes.items()))
+    # Get the actual total time from report.json (includes collection, setup, call, teardown)
+    report_total = report.get("duration", 0)
+    collection_time = report_total - total if report_total > total else 0
+    print(f"  Tests: {n}   Total: {fmt_seconds(total)} ({fmt_seconds(report_total)} wall time)   {outcome_str}")
+    if collection_time > 1:
+        print(f"           (includes ~{fmt_seconds(collection_time)} collection/overhead)")
+    # 1. Show setup histogram
+    if phases["setup"]:
+        render_duration_histogram_core(
+            sorted(phases["setup"]), "PHASE: SETUP", bins, slow_threshold, width
+        )
+        print(f"\n  {'-' * (width - 4)}")
+    # 2. Show call histogram
+    if phases["call"]:
+        render_duration_histogram_core(sorted(phases["call"]), "PHASE: CALL", bins, slow_threshold, width)
+        print(f"\n  {'-' * (width - 4)}")
+    # 3. Show teardown histogram
+    if phases["teardown"]:
+        render_duration_histogram_core(
+            sorted(phases["teardown"]), "PHASE: TEARDOWN", bins, slow_threshold, width
+        )
+        print(f"\n  {'-' * (width - 4)}")
+    # 4. Show total duration histogram
+    render_duration_histogram_core(durations, "AGGREGATE: TOTAL DURATION", bins, slow_threshold, width)
+    if show_blame:
+        q1_dur = percentile(durations, 25)
+        q3_dur = percentile(durations, 75)
+        iqr_boundary = q3_dur + 1.5 * (q3_dur - q1_dur)
+        blamed_tests = sorted(
+            [d for d in test_data if d["duration"] > iqr_boundary],
+            key=lambda x: x["duration"],
+            reverse=True,
+        )
+        blame_header(
+            f"duration outliers  Q3 + 1.5×IQR > {fmt_seconds(iqr_boundary)}",
+            len(blamed_tests),
+            blame_limit,
+            width,
+        )
+        display = blamed_tests if not blame_limit else blamed_tests[:blame_limit]
+        if display:
+            for item in display:
+                d = item["duration"]
+                t = item["test"]
+                b = item["breakdown"]
+                nodeid = t.get("nodeid", "?")
+                icon = {"passed": "✅", "failed": "❌"}.get(t.get("outcome", ""), "⚠️ ")
+                if len(nodeid) > 60:
+                    nodeid = "…" + nodeid[-59:]
+                # Add breakdown info to the blame report
+                # Especially useful when setup is the bottleneck
+                s_val, c_val = b.get("setup", 0), b.get("call", 0)
+                # Always show breakdown for outliers to immediately identify slow call phases
+                breakdown_str = f" (s:{fmt_seconds(s_val)} c:{fmt_seconds(c_val)})"
+                print(f"  {icon} {fmt_seconds(d):>8}{breakdown_str}  {nodeid}")
+        else:
+            print("  ✅  No duration outliers.")
+    print(f"\n{'═' * width}")

codebase_stats/lowcov.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""Low-coverage file listing and prioritization."""
+from .utils import format_line_ranges
+from .metrics import cc_rank, mi_rank
+VALID_SORT_FIELDS = {"priority", "coverage", "layer", "missing", "missing_pct", "complexity"}
+def parse_sorts(sort_specs: list, default_order: str) -> list:
+    """Parse sort specifications into (field, is_descending) tuples.
+    Args:
+        sort_specs: List of sort specs like ["priority:desc", "coverage:asc"]
+        default_order: Default sort order ("asc" or "desc")
+    Returns:
+        List of (field, is_descending) tuples
+    """
+    result = []
+    for spec in sort_specs:
+        if ":" in spec:
+            field, order = spec.rsplit(":", 1)
+            if order not in ("asc", "desc"):
+                field, order = spec, default_order
+        else:
+            field, order = spec, default_order
+        if field not in VALID_SORT_FIELDS:
+            print(f"⚠️  Unknown sort field '{field}', falling back to 'priority'")
+            field = "priority"
+        result.append((field, order == "desc"))
+    return result
+def priority_score(pct: float, layer: str, missing: int, cc_avg: float = None, mi: float = None) -> int:
+    """Calculate priority score for a file based on coverage and metrics.
+    Args:
+        pct: Coverage percentage
+        layer: Architectural layer
+        missing: Number of missing statements
+        cc_avg: Average cyclomatic complexity (optional)
+        mi: Maintainability index (optional)
+    Returns:
+        Priority score (higher = more important to fix)
+    """
+    score = 40 if pct < 10 else 30 if pct < 30 else 20 if pct < 50 else 10
+    score += {"Domain": 30, "Application": 25, "Services": 20}.get(layer, 10)
+    # Cyclomatic complexity (higher = harder to test)
+    if cc_avg is not None:
+        score += 30 if cc_avg > 20 else 20 if cc_avg > 10 else 10 if cc_avg > 5 else 0
+    else:
+        score += 20 if missing > 50 else 10 if missing > 20 else 0
+    # Maintainability Index (lower MI = harder to maintain = higher priority)
+    if mi is not None:
+        score += 20 if mi < 10 else 10 if mi < 20 else 0
+    return score
+def show_low_coverage(
+    stats: dict,
+    threshold: float = 50.0,
+    max_threshold=None,
+    top_n=20,
+    sorts=None,
+    show_lines: bool = False,
+    width: int = 100,
+):
+    """Display files with low coverage and quality metrics.
+    Args:
+        stats: Precomputed statistics from precompute_coverage_stats()
+        threshold: Show files below this coverage percentage
+        max_threshold: Upper bound for coverage range (optional)
+        top_n: Maximum files to display (None for all)
+        sorts: List of (field, is_descending) sort tuples
+        show_lines: Include missing line numbers
+        width: Line width for output
+    """
+    if sorts is None:
+        sorts = [("priority", True)]
+    files = []
+    for f in stats["file_stats"]:
+        pct = f["pct"]
+        in_range = (
+            threshold <= pct < max_threshold if max_threshold is not None else pct < threshold
+        )
+        if not in_range:
+            continue
+        missing = f["missing_count"]
+        files.append(
+            {
+                "path": f["path"],
+                "coverage": pct,
+                "missing_pct": 100.0 - pct,
+                "total": f["total"],
+                "missing": missing,
+                "missing_lines_list": f["missing_lines"],
+                "layer": f["layer"],
+                "layer_order": f["layer_order"],
+                "cc_avg": f.get("cc_avg"),
+                "mi": f.get("mi"),
+                "priority": priority_score(pct, f["layer"], missing, f.get("cc_avg"), f.get("mi")),
+            }
+        )
+    extractors = {
+        "priority": lambda x: x["priority"],
+        "coverage": lambda x: x["coverage"],
+        "layer": lambda x: x["layer_order"],
+        "missing": lambda x: x["missing"],
+        "missing_pct": lambda x: x["missing_pct"],
+        "complexity": lambda x: x.get("cc_avg") or 0,
+    }
+    def sort_key(item):
+        vals = []
+        for field, desc in sorts:
+            v = extractors[field](item)
+            vals.append(-v if desc and isinstance(v, (int, float)) else v)
+        return tuple(vals)
+    files.sort(key=sort_key)
+    sort_label = ", ".join(f"{f}{'↓' if d else '↑'}" for f, d in sorts)
+    title = (
+        f"COVERAGE RANGE {threshold:.0f}%–{max_threshold:.0f}% — sorted: {sort_label}"
+        if max_threshold is not None
+        else f"LOW COVERAGE (< {threshold:.0f}%) — sorted: {sort_label}"
+    )
+    print(f"\n{'═' * width}")
+    print(f"  {title}")
+    print(f"{'═' * width}")
+    if not files:
+        msg = (
+            f"✅  No files in range {threshold:.0f}%–{max_threshold:.0f}%!"
+            if max_threshold
+            else f"✅  No files below {threshold:.0f}%!"
+        )
+        print(f"\n{msg}\n{'═' * width}")
+        return
+    display = files if top_n is None else files[:top_n]
+    print(f"\nFound {len(files)} files   (showing {len(display)})\n")
+    print(f"{'#':<4} {'Prio':<6} {'Coverage':<10} {'Missing':<20} {'Layer':<15} File")
+    print("─" * width)
+    for i, f in enumerate(display, 1):
+        icon = "🔴" if f["coverage"] < 20 else "🟠" if f["coverage"] < 40 else "🟡"
+        missing_disp = f"{f['missing']:>4} ({f['missing_pct']:>5.1f}%)"
+        cc_str = f"  CC{f['cc_avg']:>4.1f}{cc_rank(f['cc_avg'])}" if f.get("cc_avg") else ""
+        mi_str = f"  MI{f['mi']:>5.1f}{mi_rank(f['mi'])}" if f.get("mi") else ""
+        print(
+            f"{i:<4} {icon} {f['priority']:<4} "
+            f"{f['coverage']:>6.1f}%    {missing_disp:<18}  "
+            f"{f['layer']:<15}{cc_str}{mi_str}  {f['path']}"
+        )
+        if show_lines and f["missing_lines_list"]:
+            print(f"{'':48}📍 {format_line_ranges(f['missing_lines_list'])}")
+    if top_n and len(files) > top_n:
+        print(f"\n  … and {len(files) - top_n} more files")
+    total_miss = sum(f["missing"] for f in files)
+    total_stmts = sum(f["total"] for f in files)
+    proj_total = stats["proj_total"]
+    proj_pct = stats["proj_pct"]
+    projected = (stats["proj_covered"] + total_miss) / proj_total * 100 if proj_total else 0.0
+    cvs = sorted(f["coverage"] for f in files)
+    nn = len(cvs)
+    q25, med, q75 = cvs[nn // 4], cvs[nn // 2], cvs[3 * nn // 4]
+    print(f"\n{'─' * width}")
+    print("📊 SUMMARY")
+    print(f"{'─' * width}")
+    print(f"   Files                : {len(files)}")
+    print(f"   Coverage P25/P50/P75 : {q25:.1f}% | {med:.1f}% | {q75:.1f}%")
+    print(
+        f"   Total missing lines  : {total_miss:,}  ({total_miss / total_stmts * 100:.1f}% of their statements)"
+    )
+    print(f"\n{'─' * width}")
+    print("🚀 COVERAGE PROJECTION (if these files reach 100%)")
+    print(f"{'─' * width}")
+    print(f"   Current  : {proj_pct:.1f}%")
+    print(f"   Projected: {projected:.1f}%  (+{projected - proj_pct:.1f}pp)")
+    print(f"   Lines to cover: {total_miss:,} / {proj_total:,} total")
+    print(f"\n{'─' * width}")
+    print("🎯 TOP 5 BY PRIORITY")
+    print(f"{'─' * width}")
+    for i, f in enumerate(sorted(files, key=lambda x: x["priority"], reverse=True)[:5], 1):
+        impact = "HIGH" if f["missing"] > 30 else "MED" if f["missing"] > 15 else "LOW"
+        effort = "HARD" if f["missing"] > 50 else "MOD" if f["missing"] > 20 else "EASY"
+        print(
+            f"   {i}. [{f['layer']}] {f['path']}\n"
+            f"      → {f['missing']} lines ({f['missing_pct']:.1f}%) | Impact: {impact} | Effort: {effort}"
+        )
+    print(f"\n{'═' * width}")