PyPI - cdfi-benchmark - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cdfi-benchmark 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

cdfi_benchmark-0.1.0.dist-info/METADATA +143 -0
cdfi_benchmark-0.1.0.dist-info/RECORD +20 -0
cdfi_benchmark-0.1.0.dist-info/WHEEL +5 -0
cdfi_benchmark-0.1.0.dist-info/top_level.txt +2 -0
cdfibenchmark/__init__.py +28 -0
cdfibenchmark/data/__init__.py +0 -0
cdfibenchmark/data/fdic.py +227 -0
cdfibenchmark/data/schema.py +203 -0
cdfibenchmark/metrics/__init__.py +0 -0
cdfibenchmark/metrics/calculator.py +109 -0
cdfibenchmark/peers/__init__.py +0 -0
cdfibenchmark/peers/selector.py +93 -0
cdfibenchmark/report/__init__.py +0 -0
cdfibenchmark/report/generator.py +153 -0
tests/__init__.py +0 -0
tests/conftest.py +32 -0
tests/test_metrics.py +50 -0
tests/test_peers.py +27 -0
tests/test_report.py +34 -0
tests/test_schema.py +62 -0

cdfi_benchmark-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,143 @@
+Metadata-Version: 2.4
+Name: cdfi-benchmark
+Version: 0.1.0
+Summary: CDFI and MDI peer benchmarking tool using FDIC call report data — NIM, efficiency ratio, ROAA, CET1, and more
+License: MIT
+Project-URL: Homepage, https://github.com/Jaypatel1511/cdfi-benchmark
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: pandas>=1.4.0
+Requires-Dist: numpy>=1.21.0
+Requires-Dist: requests>=2.27.0
+# cdfi-benchmark 📊
+**CDFI and MDI peer benchmarking tool using FDIC call report data.**
+Pull call report financials for any FDIC-insured CDFI or MDI, compute key performance
+metrics, build a peer group of similar institutions, and generate a benchmarking report
+— using the free FDIC BankFind Suite API, no API key required.
+---
+## Why cdfi-benchmark?
+CDFI banks and MDIs benchmark their performance against peers manually — pulling
+call report data from FFIEC, computing ratios in Excel, and building comparison
+tables by hand. cdfi-benchmark automates the entire workflow in Python.
+---
+## Installation
+    pip install cdfi-benchmark
+---
+## Quickstart
+    from cdfibenchmark import (
+        get_financials, build_peer_group,
+        generate_report, summary_table,
+    )
+    # Pull call report data for Broadway Federal Bank (CERT 57542)
+    institution = get_financials(cert=57542)
+    # Build peer group — similar asset size, no API key needed
+    peers = build_peer_group(institution, same_state=True)
+    # Generate benchmarking report
+    report = generate_report(institution, peers)
+    print(report)
+    # Get results as DataFrame
+    df = summary_table(institution, peers)
+---
+## Sample Data (No API Required)
+    from cdfibenchmark import build_sample_peer_group
+    from cdfibenchmark.data.schema import InstitutionProfile
+    institution = InstitutionProfile(
+        cert=57542,
+        name="Broadway Federal Bank",
+        city="Los Angeles",
+        state="CA",
+        report_date="20241231",
+        total_assets=655_000,
+        total_deposits=520_000,
+        net_loans=380_000,
+        net_income=1_950,
+        interest_income=28_000,
+        interest_expense=8_000,
+        non_interest_income=3_500,
+        non_interest_expense=22_000,
+        total_equity=48_000,
+        tier1_ratio=12.2,
+    )
+    peers = build_sample_peer_group(institution)
+    report = generate_report(institution, peers)
+    print(report)
+---
+## Metrics Computed
+| Metric | Description | Benchmark (Strong) |
+|--------|-------------|-------------------|
+| NIM | Net Interest Margin | >= 3.5% |
+| Efficiency Ratio | Non-interest expense / Revenue | <= 60% |
+| ROAA | Return on Average Assets | >= 1.0% |
+| ROAE | Return on Average Equity | >= 10% |
+| Tier 1 Capital Ratio | Regulatory capital ratio | >= 12% |
+| Loans-to-Deposits | Loan utilization | <= 80% |
+| NPL Ratio | Non-performing loans / Gross loans | <= 1.0% |
+| Reserve Coverage | Loan loss reserve / NPLs | >= 100% |
+---
+## Asset Size Buckets
+- micro — Under $50MM
+- small — $50MM to $250MM
+- medium — $250MM to $1B
+- large — $1B to $5B
+- mega — Over $5B
+---
+## Data Source
+FDIC BankFind Suite API — free public API, no authentication required.
+Data covers all FDIC-insured institutions with quarterly call report data
+since 1934.
+    https://banks.data.fdic.gov/api
+---
+## Running Tests
+    PYTHONPATH=. pytest tests/ -v
+27 tests across all modules.
+---
+## Who This Is For
+- CDFI banks and credit unions benchmarking against peers
+- MDI management teams preparing board reports
+- CDFI Fund analysts reviewing institution performance
+- Impact investors evaluating CDFI bank investments
+- Researchers studying community banking performance trends
+---
+## License
+MIT 2026 Jaypatel1511

cdfi_benchmark-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+cdfibenchmark/__init__.py,sha256=4vazC1tV7ZuXINA6VWbSL-eHWcXo2Rg1TysEwwtWdz8,900
+cdfibenchmark/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cdfibenchmark/data/fdic.py,sha256=miJzX1u1yO9d999eQsQj1Jy3T-pA8CpNTEwu3qG7B3k,6646
+cdfibenchmark/data/schema.py,sha256=06WSvMx7xqNQmYE98WqUZcVDzraS4vtI2DuaGbf18jM,7318
+cdfibenchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cdfibenchmark/metrics/calculator.py,sha256=GteLj6mu8cC0Znpimf1ASVukER1a_yw2f19oWTgtNOQ,3305
+cdfibenchmark/peers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cdfibenchmark/peers/selector.py,sha256=oWeh-J8nOsMK6aoifmaZBy0nFaIU-3iukxxIjWV73bY,3539
+cdfibenchmark/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+cdfibenchmark/report/generator.py,sha256=m5VGb4KginCSbzKUrlNwxxA6F_bBq7ZQxygD1w7apDs,5028
+tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tests/conftest.py,sha256=SUqRhx7sTSQTHVJDVIiFx1sAh5NqESYX9q7-j6GN2Bs,869
+tests/test_metrics.py,sha256=LBheQHJqo0p3m_U3u5xz_ZqXRfQGXQGGFVwbb-WdZdA,1777
+tests/test_peers.py,sha256=iQ9mFp4D2fZ_YX6S0tXa3yVzIthM_nKJgPQq011zTRU,952
+tests/test_report.py,sha256=gvrhOVr8nrvwGEaT92btyFCoBHbhtQf3dxZwPd0gprU,1193
+tests/test_schema.py,sha256=CHEDIaX2wtKXogiebQCuV5_uBToIh3Mx_VWJl8zOWVo,1541
+cdfi_benchmark-0.1.0.dist-info/METADATA,sha256=Dxnpr8XdoXBlfvWGocVvJBEZ5bb9B-ZbpoBUnnGy4GY,3722
+cdfi_benchmark-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+cdfi_benchmark-0.1.0.dist-info/top_level.txt,sha256=_BLdlhCVUtZjAwIqqzeE11T9wg3HVoIHM2hu2Mgytjg,20
+cdfi_benchmark-0.1.0.dist-info/RECORD,,

cdfi_benchmark-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

cdfi_benchmark-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ cdfibenchmark
2	+ tests

cdfibenchmark/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+from cdfibenchmark.data.schema import (
+    InstitutionProfile, BenchmarkResult,
+    BENCHMARKS, ASSET_BUCKETS,
+)
+from cdfibenchmark.data.fdic import (
+    get_institution, get_financials,
+    search_institutions, get_peer_financials,
+)
+from cdfibenchmark.metrics.calculator import (
+    compute_peer_metrics, benchmark_institution, rank_institution,
+)
+from cdfibenchmark.peers.selector import (
+    build_peer_group, build_sample_peer_group,
+)
+from cdfibenchmark.report.generator import (
+    generate_report, summary_table,
+)
+__version__ = "0.1.0"
+__all__ = [
+    "InstitutionProfile", "BenchmarkResult",
+    "get_institution", "get_financials",
+    "search_institutions", "get_peer_financials",
+    "compute_peer_metrics", "benchmark_institution", "rank_institution",
+    "build_peer_group", "build_sample_peer_group",
+    "generate_report", "summary_table",
+    "BENCHMARKS", "ASSET_BUCKETS",
+]

cdfibenchmark/data/__init__.py ADDED Viewed

File without changes

cdfibenchmark/data/fdic.py ADDED Viewed

@@ -0,0 +1,227 @@
+"""
+FDIC BankFind Suite API wrapper.
+Free public API — no authentication required.
+"""
+import requests
+import pandas as pd
+from typing import Optional
+from cdfibenchmark.data.schema import (
+    InstitutionProfile, FDIC_API_BASE, FDIC_FIELDS
+)
+TIMEOUT = 30
+def get_institution(cert: int) -> Optional[dict]:
+    """
+    Fetch institution profile by FDIC certificate number.
+    """
+    url = f"{FDIC_API_BASE}/institutions"
+    params = {
+        "filters": f"CERT:{cert}",
+        "fields": "CERT,INSTNAME,CITY,STALP,ASSET,ACTIVE",
+        "limit": 1,
+        "format": "json",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=TIMEOUT)
+        r.raise_for_status()
+        data = r.json()
+        institutions = data.get("data", [])
+        if institutions:
+            return institutions[0].get("data", {})
+    except Exception as e:
+        print(f"FDIC API error: {e}")
+    return None
+def search_institutions(
+    name: str = None,
+    state: str = None,
+    min_assets: int = None,
+    max_assets: int = None,
+    limit: int = 20,
+) -> pd.DataFrame:
+    """
+    Search for FDIC-insured institutions by name, state, or asset size.
+    Returns a DataFrame of matching institutions.
+    """
+    filters = ["ACTIVE:1"]
+    if name:
+        filters.append(f'INSTNAME:"{name}"')
+    if state:
+        filters.append(f"STALP:{state.upper()}")
+    if min_assets:
+        filters.append(f"ASSET:[{min_assets} TO *]")
+    if max_assets:
+        filters.append(f"ASSET:[* TO {max_assets}]")
+    url = f"{FDIC_API_BASE}/institutions"
+    params = {
+        "filters": " AND ".join(filters),
+        "fields": "CERT,INSTNAME,CITY,STALP,ASSET",
+        "limit": limit,
+        "sort_by": "ASSET",
+        "sort_order": "DESC",
+        "format": "json",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=TIMEOUT)
+        r.raise_for_status()
+        data = r.json()
+        rows = [item.get("data", {}) for item in data.get("data", [])]
+        if rows:
+            df = pd.DataFrame(rows)
+            if "ASSET" in df.columns:
+                df["ASSET_MM"] = df["ASSET"] / 1_000
+            return df
+    except Exception as e:
+        print(f"FDIC API error: {e}")
+    return pd.DataFrame()
+def get_financials(
+    cert: int,
+    report_date: str = None,
+    limit: int = 4,
+) -> Optional[InstitutionProfile]:
+    """
+    Fetch call report financials for a single institution.
+    Args:
+        cert:        FDIC certificate number
+        report_date: Specific date e.g. "20241231" (default: most recent)
+        limit:       Number of periods to fetch
+    Returns:
+        InstitutionProfile with computed metrics
+    """
+    url = f"{FDIC_API_BASE}/financials"
+    fields = [
+        "REPDTE", "CERT", "INSTNAME", "CITY", "STALP",
+        "ASSET", "DEP", "LNLSNET", "NETINC",
+        "INTINC", "EINTEXP", "NONII", "NONIX", "EQ",
+        "RBCT1J", "LNLSGR", "NCLNLS", "LNATRES",
+    ]
+    filters = f"CERT:{cert}"
+    if report_date:
+        filters += f" AND REPDTE:{report_date}"
+    params = {
+        "filters": filters,
+        "fields": ",".join(fields),
+        "limit": limit,
+        "sort_by": "REPDTE",
+        "sort_order": "DESC",
+        "format": "json",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=TIMEOUT)
+        r.raise_for_status()
+        data = r.json()
+        records = data.get("data", [])
+        if not records:
+            print(f"No financial data found for CERT {cert}")
+            return None
+        row = records[0].get("data", {})
+        return _parse_institution(row)
+    except Exception as e:
+        print(f"FDIC API error fetching financials for CERT {cert}: {e}")
+        return None
+def get_peer_financials(
+    state: str = None,
+    min_assets: int = None,
+    max_assets: int = None,
+    report_date: str = None,
+    limit: int = 100,
+) -> list:
+    """
+    Fetch call report financials for a group of peer institutions.
+    Args:
+        state:       Filter by state abbreviation e.g. "IL"
+        min_assets:  Minimum assets in thousands
+        max_assets:  Maximum assets in thousands
+        report_date: Report date e.g. "20241231"
+        limit:       Maximum number of institutions
+    Returns:
+        List of InstitutionProfile objects
+    """
+    url = f"{FDIC_API_BASE}/financials"
+    fields = [
+        "REPDTE", "CERT", "INSTNAME", "CITY", "STALP",
+        "ASSET", "DEP", "LNLSNET", "NETINC",
+        "INTINC", "EINTEXP", "NONII", "NONIX", "EQ",
+        "RBCT1J", "LNLSGR", "NCLNLS", "LNATRES",
+    ]
+    filters = ["ASSET:[1 TO *]"]
+    if state:
+        filters.append(f"STALP:{state.upper()}")
+    if min_assets:
+        filters.append(f"ASSET:[{min_assets} TO *]")
+    if max_assets:
+        filters.append(f"ASSET:[* TO {max_assets}]")
+    if report_date:
+        filters.append(f"REPDTE:{report_date}")
+    params = {
+        "filters": " AND ".join(filters),
+        "fields": ",".join(fields),
+        "limit": limit,
+        "sort_by": "ASSET",
+        "sort_order": "DESC",
+        "format": "json",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=TIMEOUT)
+        r.raise_for_status()
+        data = r.json()
+        records = data.get("data", [])
+        return [_parse_institution(item.get("data", {}))
+                for item in records if item.get("data")]
+    except Exception as e:
+        print(f"FDIC API error fetching peer data: {e}")
+        return []
+def _parse_institution(row: dict) -> InstitutionProfile:
+    """Parse a raw FDIC API response row into an InstitutionProfile."""
+    def safe_float(key, default=0.0):
+        val = row.get(key)
+        try:
+            return float(val) if val is not None else default
+        except (TypeError, ValueError):
+            return default
+    return InstitutionProfile(
+        cert=int(row.get("CERT", 0)),
+        name=str(row.get("INSTNAME", "Unknown")),
+        city=str(row.get("CITY", "")),
+        state=str(row.get("STALP", "")),
+        report_date=str(row.get("REPDTE", "")),
+        total_assets=safe_float("ASSET"),
+        total_deposits=safe_float("DEP"),
+        net_loans=safe_float("LNLSNET"),
+        net_income=safe_float("NETINC"),
+        interest_income=safe_float("INTINC"),
+        interest_expense=safe_float("EINTEXP"),
+        non_interest_income=safe_float("NONII"),
+        non_interest_expense=safe_float("NONIX"),
+        total_equity=safe_float("EQ"),
+        tier1_ratio=safe_float("RBCT1J") or None,
+        gross_loans=safe_float("LNLSGR") or None,
+        non_current_loans=safe_float("NCLNLS") or None,
+        loan_loss_allowance=safe_float("LNATRES") or None,
+    )

cdfibenchmark/data/schema.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""
+Core dataclasses and constants for CDFI benchmarking.
+Uses FDIC BankFind Suite API — free, no API key required.
+"""
+from dataclasses import dataclass, field
+from typing import Optional
+# ── FDIC BankFind Suite API ───────────────────────────────────────────────────
+FDIC_API_BASE = "https://banks.data.fdic.gov/api"
+# Key call report fields we pull
+FDIC_FIELDS = [
+    "REPDTE",       # Report date
+    "CERT",         # FDIC certificate number
+    "INSTNAME",     # Institution name
+    "CITY",         # City
+    "STALP",        # State abbreviation
+    "ASSET",        # Total assets
+    "DEP",          # Total deposits
+    "LNLSNET",      # Net loans and leases
+    "NETINC",       # Net income
+    "INTINC",       # Total interest income
+    "EINTEXP",      # Total interest expense
+    "NONII",        # Non-interest income
+    "NONIX",        # Non-interest expense
+    "EQ",           # Total equity capital
+    "RBCT1J",       # Tier 1 capital ratio (leverage)
+    "REPDTE",       # Report date
+    "LNLSNET",      # Net loans
+    "LNLSDEPM",     # Loans to deposits ratio
+    "NCLNLS",       # Non-current loans
+    "LNATRES",      # Loan loss allowance
+    "LNLSGR",       # Gross loans
+    "INTEXP",       # Interest expense
+    "NIM",          # Net interest margin
+    "ROA",          # Return on assets
+    "ROE",          # Return on equity
+    "LNLSDEPM",     # Loan to deposit ratio
+    "EFFRATIO",     # Efficiency ratio
+    "INTINC",       # Interest income
+]
+# Deduplicated list
+FDIC_FIELDS = list(dict.fromkeys(FDIC_FIELDS))
+# ── Asset Size Buckets ────────────────────────────────────────────────────────
+ASSET_BUCKETS = {
+    "micro":    (0,           50_000),      # Under $50MM
+    "small":    (50_000,      250_000),     # $50MM - $250MM
+    "medium":   (250_000,     1_000_000),   # $250MM - $1B
+    "large":    (1_000_000,   5_000_000),   # $1B - $5B
+    "mega":     (5_000_000,   float("inf")),# Over $5B
+}
+# ── Benchmark Thresholds ──────────────────────────────────────────────────────
+BENCHMARKS = {
+    "nim":              {"good": 3.5, "warning": 2.5,  "unit": "%"},
+    "efficiency_ratio": {"good": 60,  "warning": 80,   "unit": "%", "lower_is_better": True},
+    "roaa":             {"good": 1.0, "warning": 0.5,  "unit": "%"},
+    "roae":             {"good": 10,  "warning": 5,    "unit": "%"},
+    "tier1_ratio":      {"good": 12,  "warning": 8,    "unit": "%"},
+    "loans_to_deposits":{"good": 80,  "warning": 95,   "unit": "%"},
+    "npl_ratio":        {"good": 1.0, "warning": 3.0,  "unit": "%", "lower_is_better": True},
+    "reserve_coverage": {"good": 100, "warning": 50,   "unit": "%"},
+}
+@dataclass
+class InstitutionProfile:
+    """Profile of a single FDIC-insured institution from call report data."""
+    cert: int
+    name: str
+    city: str
+    state: str
+    report_date: str
+    total_assets: float             # in thousands
+    total_deposits: float
+    net_loans: float
+    net_income: float
+    interest_income: float
+    interest_expense: float
+    non_interest_income: float
+    non_interest_expense: float
+    total_equity: float
+    tier1_ratio: Optional[float] = None
+    gross_loans: Optional[float] = None
+    non_current_loans: Optional[float] = None
+    loan_loss_allowance: Optional[float] = None
+    @property
+    def total_assets_mm(self) -> float:
+        return self.total_assets / 1_000
+    @property
+    def asset_bucket(self) -> str:
+        assets = self.total_assets
+        for bucket, (low, high) in ASSET_BUCKETS.items():
+            if low <= assets < high:
+                return bucket
+        return "mega"
+    @property
+    def nim(self) -> Optional[float]:
+        if self.total_assets and self.total_assets > 0:
+            return ((self.interest_income - self.interest_expense)
+                    / self.total_assets * 100)
+        return None
+    @property
+    def efficiency_ratio(self) -> Optional[float]:
+        revenue = self.interest_income + self.non_interest_income
+        if revenue and revenue > 0:
+            return (self.non_interest_expense / revenue) * 100
+        return None
+    @property
+    def roaa(self) -> Optional[float]:
+        if self.total_assets and self.total_assets > 0:
+            return (self.net_income / self.total_assets) * 100
+        return None
+    @property
+    def roae(self) -> Optional[float]:
+        if self.total_equity and self.total_equity > 0:
+            return (self.net_income / self.total_equity) * 100
+        return None
+    @property
+    def loans_to_deposits(self) -> Optional[float]:
+        if self.total_deposits and self.total_deposits > 0:
+            return (self.net_loans / self.total_deposits) * 100
+        return None
+    @property
+    def npl_ratio(self) -> Optional[float]:
+        if (self.non_current_loans is not None and
+                self.gross_loans and self.gross_loans > 0):
+            return (self.non_current_loans / self.gross_loans) * 100
+        return None
+    @property
+    def reserve_coverage(self) -> Optional[float]:
+        if (self.loan_loss_allowance is not None and
+                self.non_current_loans and self.non_current_loans > 0):
+            return (self.loan_loss_allowance / self.non_current_loans) * 100
+        return None
+    def metrics_dict(self) -> dict:
+        return {
+            "nim":               self.nim,
+            "efficiency_ratio":  self.efficiency_ratio,
+            "roaa":              self.roaa,
+            "roae":              self.roae,
+            "tier1_ratio":       self.tier1_ratio,
+            "loans_to_deposits": self.loans_to_deposits,
+            "npl_ratio":         self.npl_ratio,
+            "reserve_coverage":  self.reserve_coverage,
+        }
+@dataclass
+class BenchmarkResult:
+    """Benchmarking result for a single metric."""
+    metric: str
+    institution_value: Optional[float]
+    peer_median: Optional[float]
+    peer_25th: Optional[float]
+    peer_75th: Optional[float]
+    peer_count: int
+    unit: str = "%"
+    lower_is_better: bool = False
+    @property
+    def vs_median(self) -> Optional[float]:
+        if self.institution_value and self.peer_median:
+            return self.institution_value - self.peer_median
+        return None
+    @property
+    def status(self) -> str:
+        if self.institution_value is None:
+            return "N/A"
+        benchmark = BENCHMARKS.get(self.metric, {})
+        good = benchmark.get("good")
+        warning = benchmark.get("warning")
+        lower = benchmark.get("lower_is_better", False)
+        if good is None:
+            return "N/A"
+        if lower:
+            if self.institution_value <= good:
+                return "STRONG"
+            elif self.institution_value <= warning:
+                return "ADEQUATE"
+            return "WEAK"
+        else:
+            if self.institution_value >= good:
+                return "STRONG"
+            elif self.institution_value >= warning:
+                return "ADEQUATE"
+            return "WEAK"

cdfibenchmark/metrics/__init__.py ADDED Viewed

File without changes

cdfibenchmark/metrics/calculator.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""
+Compute benchmarking metrics across a peer group.
+"""
+import pandas as pd
+import numpy as np
+from cdfibenchmark.data.schema import (
+    InstitutionProfile, BenchmarkResult, BENCHMARKS
+)
+def compute_peer_metrics(peers: list) -> pd.DataFrame:
+    """
+    Compute all metrics for a list of InstitutionProfile objects.
+    Returns a DataFrame with one row per institution.
+    """
+    rows = []
+    for inst in peers:
+        if inst is None:
+            continue
+        row = {
+            "cert": inst.cert,
+            "name": inst.name,
+            "city": inst.city,
+            "state": inst.state,
+            "report_date": inst.report_date,
+            "total_assets_mm": inst.total_assets_mm,
+            "asset_bucket": inst.asset_bucket,
+        }
+        row.update(inst.metrics_dict())
+        rows.append(row)
+    return pd.DataFrame(rows)
+def benchmark_institution(
+    institution: InstitutionProfile,
+    peers: list,
+) -> list:
+    """
+    Benchmark an institution against a peer group.
+    Args:
+        institution: The institution to benchmark
+        peers:       List of peer InstitutionProfile objects
+    Returns:
+        List of BenchmarkResult objects, one per metric
+    """
+    peer_df = compute_peer_metrics(peers)
+    results = []
+    for metric, config in BENCHMARKS.items():
+        inst_value = institution.metrics_dict().get(metric)
+        if metric in peer_df.columns:
+            peer_values = peer_df[metric].dropna()
+            peer_median = float(peer_values.median()) if len(peer_values) else None
+            peer_25th   = float(peer_values.quantile(0.25)) if len(peer_values) else None
+            peer_75th   = float(peer_values.quantile(0.75)) if len(peer_values) else None
+            peer_count  = len(peer_values)
+        else:
+            peer_median = peer_25th = peer_75th = None
+            peer_count = 0
+        results.append(BenchmarkResult(
+            metric=metric,
+            institution_value=inst_value,
+            peer_median=peer_median,
+            peer_25th=peer_25th,
+            peer_75th=peer_75th,
+            peer_count=peer_count,
+            unit=config.get("unit", "%"),
+            lower_is_better=config.get("lower_is_better", False),
+        ))
+    return results
+def rank_institution(
+    institution: InstitutionProfile,
+    peers: list,
+    metric: str,
+) -> dict:
+    """
+    Rank an institution within its peer group for a specific metric.
+    Returns:
+        Dict with rank, percentile, and peer count
+    """
+    peer_df = compute_peer_metrics(peers)
+    inst_value = institution.metrics_dict().get(metric)
+    if inst_value is None or metric not in peer_df.columns:
+        return {"rank": None, "percentile": None, "peer_count": len(peers)}
+    peer_values = peer_df[metric].dropna().tolist()
+    peer_values_with_inst = sorted(peer_values + [inst_value], reverse=True)
+    lower_is_better = BENCHMARKS.get(metric, {}).get("lower_is_better", False)
+    if lower_is_better:
+        peer_values_with_inst = sorted(peer_values + [inst_value])
+    rank = peer_values_with_inst.index(inst_value) + 1
+    percentile = round((1 - rank / len(peer_values_with_inst)) * 100, 1)
+    return {
+        "rank": rank,
+        "percentile": percentile,
+        "peer_count": len(peer_values_with_inst),
+    }

cdfibenchmark/peers/__init__.py ADDED Viewed

File without changes

cdfibenchmark/peers/selector.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+Peer group selection logic for CDFI benchmarking.
+"""
+from cdfibenchmark.data.schema import InstitutionProfile, ASSET_BUCKETS
+from cdfibenchmark.data.fdic import get_peer_financials
+def build_peer_group(
+    institution: InstitutionProfile,
+    same_state: bool = False,
+    asset_tolerance: float = 0.5,
+    min_peers: int = 10,
+    max_peers: int = 50,
+    report_date: str = None,
+) -> list:
+    """
+    Build a peer group for an institution based on asset size and geography.
+    Args:
+        institution:       The institution to benchmark
+        same_state:        Restrict peers to same state
+        asset_tolerance:   +/- tolerance for asset size (0.5 = 50%)
+        min_peers:         Minimum number of peers to return
+        max_peers:         Maximum number of peers to return
+        report_date:       Report date for peer financials
+    Returns:
+        List of InstitutionProfile objects (excluding the institution itself)
+    """
+    assets = institution.total_assets
+    min_assets = int(assets * (1 - asset_tolerance))
+    max_assets = int(assets * (1 + asset_tolerance))
+    state = institution.state if same_state else None
+    peers = get_peer_financials(
+        state=state,
+        min_assets=min_assets,
+        max_assets=max_assets,
+        report_date=report_date,
+        limit=max_peers + 5,
+    )
+    # Exclude the institution itself
+    peers = [p for p in peers if p.cert != institution.cert]
+    # If not enough peers, widen the asset range
+    if len(peers) < min_peers and same_state:
+        peers = get_peer_financials(
+            min_assets=min_assets,
+            max_assets=max_assets,
+            report_date=report_date,
+            limit=max_peers + 5,
+        )
+        peers = [p for p in peers if p.cert != institution.cert]
+    return peers[:max_peers]
+def build_sample_peer_group(institution: InstitutionProfile) -> list:
+    """
+    Build a synthetic peer group for testing without API calls.
+    Generates realistic peer institutions based on the target institution.
+    """
+    import random
+    import copy
+    random.seed(42)
+    peers = []
+    for i in range(20):
+        scale = random.uniform(0.6, 1.4)
+        peer = InstitutionProfile(
+            cert=90000 + i,
+            name=f"Community Bank {i+1}",
+            city="Chicago",
+            state=institution.state,
+            report_date=institution.report_date,
+            total_assets=institution.total_assets * scale,
+            total_deposits=institution.total_deposits * scale * random.uniform(0.85, 1.1),
+            net_loans=institution.net_loans * scale * random.uniform(0.7, 1.2),
+            net_income=institution.net_income * scale * random.uniform(0.5, 1.5),
+            interest_income=institution.interest_income * scale * random.uniform(0.9, 1.1),
+            interest_expense=institution.interest_expense * scale * random.uniform(0.8, 1.2),
+            non_interest_income=institution.non_interest_income * scale * random.uniform(0.7, 1.3),
+            non_interest_expense=institution.non_interest_expense * scale * random.uniform(0.85, 1.15),
+            total_equity=institution.total_equity * scale * random.uniform(0.8, 1.2),
+            tier1_ratio=random.uniform(8.0, 18.0),
+            gross_loans=institution.net_loans * scale * 1.05,
+            non_current_loans=institution.net_loans * scale * random.uniform(0.005, 0.04),
+            loan_loss_allowance=institution.net_loans * scale * random.uniform(0.008, 0.02),
+        )
+        peers.append(peer)
+    return peers

cdfibenchmark/report/__init__.py ADDED Viewed

File without changes

cdfibenchmark/report/generator.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""
+Generate CDFI peer benchmarking reports.
+"""
+import pandas as pd
+from cdfibenchmark.data.schema import (
+    InstitutionProfile, BenchmarkResult, BENCHMARKS
+)
+from cdfibenchmark.metrics.calculator import (
+    compute_peer_metrics, benchmark_institution, rank_institution
+)
+METRIC_LABELS = {
+    "nim":               "Net Interest Margin (NIM)",
+    "efficiency_ratio":  "Efficiency Ratio",
+    "roaa":              "Return on Avg Assets (ROAA)",
+    "roae":              "Return on Avg Equity (ROAE)",
+    "tier1_ratio":       "Tier 1 Capital Ratio",
+    "loans_to_deposits": "Loans-to-Deposits",
+    "npl_ratio":         "Non-Performing Loan Ratio",
+    "reserve_coverage":  "Loan Loss Reserve Coverage",
+}
+def generate_report(
+    institution: InstitutionProfile,
+    peers: list,
+    title: str = None,
+) -> str:
+    """
+    Generate a full peer benchmarking report as a Markdown string.
+    """
+    results = benchmark_institution(institution, peers)
+    lines = [
+        f"# CDFI Peer Benchmarking Report",
+        f"## {title or institution.name}",
+        "",
+        f"**Institution:** {institution.name}",
+        f"**Location:** {institution.city}, {institution.state}",
+        f"**Total Assets:** ${institution.total_assets_mm:.1f}MM",
+        f"**Asset Bucket:** {institution.asset_bucket.title()}",
+        f"**Report Date:** {institution.report_date}",
+        f"**Peer Group Size:** {len(peers)} institutions",
+        "",
+        "---",
+        "",
+        "## Performance Summary",
+        "",
+        "| Metric | Institution | Peer Median | 25th Pctile | 75th Pctile | Status |",
+        "|--------|-------------|-------------|-------------|-------------|--------|",
+    ]
+    for result in results:
+        label = METRIC_LABELS.get(result.metric, result.metric)
+        inst_val = f"{result.institution_value:.2f}%" if result.institution_value else "N/A"
+        median = f"{result.peer_median:.2f}%" if result.peer_median else "N/A"
+        p25 = f"{result.peer_25th:.2f}%" if result.peer_25th else "N/A"
+        p75 = f"{result.peer_75th:.2f}%" if result.peer_75th else "N/A"
+        status_emoji = {
+            "STRONG": "✅ STRONG",
+            "ADEQUATE": "⚠️ ADEQUATE",
+            "WEAK": "❌ WEAK",
+            "N/A": "—",
+        }.get(result.status, result.status)
+        lines.append(
+            f"| {label} | {inst_val} | {median} | {p25} | {p75} | {status_emoji} |"
+        )
+    lines += [
+        "",
+        "---",
+        "",
+        "## Metric Detail",
+        "",
+    ]
+    for result in results:
+        label = METRIC_LABELS.get(result.metric, result.metric)
+        lines.append(f"### {label}")
+        lines.append("")
+        if result.institution_value is not None:
+            lines.append(f"**Institution Value:** {result.institution_value:.2f}%")
+        if result.peer_median is not None:
+            lines.append(f"**Peer Median:** {result.peer_median:.2f}%")
+        if result.vs_median is not None:
+            direction = "above" if result.vs_median > 0 else "below"
+            lines.append(
+                f"**vs Peer Median:** {abs(result.vs_median):.2f}% {direction} median"
+            )
+        benchmark = BENCHMARKS.get(result.metric, {})
+        good = benchmark.get("good")
+        warning = benchmark.get("warning")
+        lower = benchmark.get("lower_is_better", False)
+        if good and warning:
+            if lower:
+                lines.append(
+                    f"**Benchmark:** Strong <= {good}% | Adequate <= {warning}%"
+                )
+            else:
+                lines.append(
+                    f"**Benchmark:** Strong >= {good}% | Adequate >= {warning}%"
+                )
+        lines.append(f"**Status:** {result.status}")
+        lines.append("")
+    lines += [
+        "---",
+        "",
+        "## Peer Group Summary",
+        "",
+    ]
+    peer_df = compute_peer_metrics(peers)
+    lines.append(f"**Peer Count:** {len(peers)}")
+    if "total_assets_mm" in peer_df.columns:
+        lines.append(
+            f"**Peer Asset Range:** "
+            f"${peer_df['total_assets_mm'].min():.1f}MM – "
+            f"${peer_df['total_assets_mm'].max():.1f}MM"
+        )
+    if "state" in peer_df.columns:
+        states = peer_df["state"].nunique()
+        lines.append(f"**States Represented:** {states}")
+    lines.append("")
+    return "\n".join(lines)
+def summary_table(
+    institution: InstitutionProfile,
+    peers: list,
+) -> pd.DataFrame:
+    """Return benchmarking results as a pandas DataFrame."""
+    results = benchmark_institution(institution, peers)
+    rows = []
+    for r in results:
+        rows.append({
+            "metric": METRIC_LABELS.get(r.metric, r.metric),
+            "institution": r.institution_value,
+            "peer_median": r.peer_median,
+            "peer_25th": r.peer_25th,
+            "peer_75th": r.peer_75th,
+            "vs_median": r.vs_median,
+            "status": r.status,
+            "peer_count": r.peer_count,
+        })
+    return pd.DataFrame(rows)

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

@@ -0,0 +1,32 @@
+import pytest
+from cdfibenchmark.data.schema import InstitutionProfile
+from cdfibenchmark.peers.selector import build_sample_peer_group
+@pytest.fixture
+def sample_institution():
+    return InstitutionProfile(
+        cert=57542,
+        name="Broadway Federal Bank",
+        city="Los Angeles",
+        state="CA",
+        report_date="20241231",
+        total_assets=655_000,
+        total_deposits=520_000,
+        net_loans=380_000,
+        net_income=1_950,
+        interest_income=28_000,
+        interest_expense=8_000,
+        non_interest_income=3_500,
+        non_interest_expense=22_000,
+        total_equity=48_000,
+        tier1_ratio=12.2,
+        gross_loans=390_000,
+        non_current_loans=5_850,
+        loan_loss_allowance=7_800,
+    )
+@pytest.fixture
+def sample_peers(sample_institution):
+    return build_sample_peer_group(sample_institution)

tests/test_metrics.py ADDED Viewed

@@ -0,0 +1,50 @@
+import pytest
+import pandas as pd
+from cdfibenchmark.metrics.calculator import (
+    compute_peer_metrics, benchmark_institution, rank_institution
+)
+from cdfibenchmark.data.schema import BenchmarkResult
+def test_peer_metrics_returns_dataframe(sample_institution, sample_peers):
+    df = compute_peer_metrics(sample_peers)
+    assert isinstance(df, pd.DataFrame)
+    assert len(df) == len(sample_peers)
+def test_peer_metrics_has_columns(sample_institution, sample_peers):
+    df = compute_peer_metrics(sample_peers)
+    assert "nim" in df.columns
+    assert "efficiency_ratio" in df.columns
+    assert "roaa" in df.columns
+def test_benchmark_returns_list(sample_institution, sample_peers):
+    results = benchmark_institution(sample_institution, sample_peers)
+    assert isinstance(results, list)
+    assert len(results) > 0
+def test_benchmark_result_type(sample_institution, sample_peers):
+    results = benchmark_institution(sample_institution, sample_peers)
+    assert all(isinstance(r, BenchmarkResult) for r in results)
+def test_benchmark_has_peer_median(sample_institution, sample_peers):
+    results = benchmark_institution(sample_institution, sample_peers)
+    nim_result = next(r for r in results if r.metric == "nim")
+    assert nim_result.peer_median is not None
+def test_benchmark_status_valid(sample_institution, sample_peers):
+    results = benchmark_institution(sample_institution, sample_peers)
+    valid_statuses = {"STRONG", "ADEQUATE", "WEAK", "N/A"}
+    for r in results:
+        assert r.status in valid_statuses
+def test_rank_institution(sample_institution, sample_peers):
+    result = rank_institution(sample_institution, sample_peers, "nim")
+    assert "rank" in result
+    assert "percentile" in result
+    assert result["peer_count"] > 0

tests/test_peers.py ADDED Viewed

@@ -0,0 +1,27 @@
+import pytest
+from cdfibenchmark.peers.selector import build_sample_peer_group
+from cdfibenchmark.data.schema import InstitutionProfile
+def test_sample_peer_group_returns_list(sample_institution):
+    peers = build_sample_peer_group(sample_institution)
+    assert isinstance(peers, list)
+    assert len(peers) > 0
+def test_sample_peers_are_institution_profiles(sample_institution):
+    peers = build_sample_peer_group(sample_institution)
+    assert all(isinstance(p, InstitutionProfile) for p in peers)
+def test_sample_peers_exclude_institution(sample_institution):
+    peers = build_sample_peer_group(sample_institution)
+    certs = [p.cert for p in peers]
+    assert sample_institution.cert not in certs
+def test_sample_peers_similar_assets(sample_institution):
+    peers = build_sample_peer_group(sample_institution)
+    for peer in peers:
+        ratio = peer.total_assets / sample_institution.total_assets
+        assert 0.1 < ratio < 5.0

tests/test_report.py ADDED Viewed

@@ -0,0 +1,34 @@
+import pytest
+import pandas as pd
+from cdfibenchmark.report.generator import generate_report, summary_table
+def test_generate_report_returns_string(sample_institution, sample_peers):
+    report = generate_report(sample_institution, sample_peers)
+    assert isinstance(report, str)
+    assert len(report) > 100
+def test_report_contains_institution_name(sample_institution, sample_peers):
+    report = generate_report(sample_institution, sample_peers)
+    assert sample_institution.name in report
+def test_report_contains_sections(sample_institution, sample_peers):
+    report = generate_report(sample_institution, sample_peers)
+    assert "Performance Summary" in report
+    assert "Metric Detail" in report
+    assert "Peer Group Summary" in report
+def test_summary_table_returns_dataframe(sample_institution, sample_peers):
+    df = summary_table(sample_institution, sample_peers)
+    assert isinstance(df, pd.DataFrame)
+    assert len(df) > 0
+    assert "metric" in df.columns
+    assert "status" in df.columns
+def test_report_contains_nim(sample_institution, sample_peers):
+    report = generate_report(sample_institution, sample_peers)
+    assert "Net Interest Margin" in report

tests/test_schema.py ADDED Viewed

@@ -0,0 +1,62 @@
+import pytest
+from cdfibenchmark.data.schema import InstitutionProfile
+def test_institution_created(sample_institution):
+    assert sample_institution.name == "Broadway Federal Bank"
+    assert sample_institution.cert == 57542
+def test_total_assets_mm(sample_institution):
+    assert sample_institution.total_assets_mm == pytest.approx(655.0)
+def test_asset_bucket(sample_institution):
+    assert sample_institution.asset_bucket == "medium"
+def test_nim_computed(sample_institution):
+    nim = sample_institution.nim
+    assert nim is not None
+    assert nim > 0
+def test_efficiency_ratio_computed(sample_institution):
+    er = sample_institution.efficiency_ratio
+    assert er is not None
+    assert 0 < er < 200
+def test_roaa_computed(sample_institution):
+    roaa = sample_institution.roaa
+    assert roaa is not None
+def test_roae_computed(sample_institution):
+    roae = sample_institution.roae
+    assert roae is not None
+def test_loans_to_deposits(sample_institution):
+    ltd = sample_institution.loans_to_deposits
+    assert ltd is not None
+    assert ltd > 0
+def test_npl_ratio(sample_institution):
+    npl = sample_institution.npl_ratio
+    assert npl is not None
+    assert npl > 0
+def test_reserve_coverage(sample_institution):
+    rc = sample_institution.reserve_coverage
+    assert rc is not None
+    assert rc > 0
+def test_metrics_dict(sample_institution):
+    metrics = sample_institution.metrics_dict()
+    assert "nim" in metrics
+    assert "efficiency_ratio" in metrics
+    assert "roaa" in metrics