cdfi-benchmark 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: cdfi-benchmark
3
+ Version: 0.1.0
4
+ Summary: CDFI and MDI peer benchmarking tool using FDIC call report data — NIM, efficiency ratio, ROAA, CET1, and more
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/Jaypatel1511/cdfi-benchmark
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pandas>=1.4.0
10
+ Requires-Dist: numpy>=1.21.0
11
+ Requires-Dist: requests>=2.27.0
12
+
13
+ # cdfi-benchmark 📊
14
+
15
+ **CDFI and MDI peer benchmarking tool using FDIC call report data.**
16
+
17
+ Pull call report financials for any FDIC-insured CDFI or MDI, compute key performance
18
+ metrics, build a peer group of similar institutions, and generate a benchmarking report
19
+ — using the free FDIC BankFind Suite API, no API key required.
20
+
21
+ ---
22
+
23
+ ## Why cdfi-benchmark?
24
+
25
+ CDFI banks and MDIs benchmark their performance against peers manually — pulling
26
+ call report data from FFIEC, computing ratios in Excel, and building comparison
27
+ tables by hand. cdfi-benchmark automates the entire workflow in Python.
28
+
29
+ ---
30
+
31
+ ## Installation
32
+
33
+ pip install cdfi-benchmark
34
+
35
+ ---
36
+
37
+ ## Quickstart
38
+
39
+ from cdfibenchmark import (
40
+ get_financials, build_peer_group,
41
+ generate_report, summary_table,
42
+ )
43
+
44
+ # Pull call report data for Broadway Federal Bank (CERT 57542)
45
+ institution = get_financials(cert=57542)
46
+
47
+ # Build peer group — similar asset size, no API key needed
48
+ peers = build_peer_group(institution, same_state=True)
49
+
50
+ # Generate benchmarking report
51
+ report = generate_report(institution, peers)
52
+ print(report)
53
+
54
+ # Get results as DataFrame
55
+ df = summary_table(institution, peers)
56
+
57
+ ---
58
+
59
+ ## Sample Data (No API Required)
60
+
61
+ from cdfibenchmark import build_sample_peer_group
62
+ from cdfibenchmark.data.schema import InstitutionProfile
63
+
64
+ institution = InstitutionProfile(
65
+ cert=57542,
66
+ name="Broadway Federal Bank",
67
+ city="Los Angeles",
68
+ state="CA",
69
+ report_date="20241231",
70
+ total_assets=655_000,
71
+ total_deposits=520_000,
72
+ net_loans=380_000,
73
+ net_income=1_950,
74
+ interest_income=28_000,
75
+ interest_expense=8_000,
76
+ non_interest_income=3_500,
77
+ non_interest_expense=22_000,
78
+ total_equity=48_000,
79
+ tier1_ratio=12.2,
80
+ )
81
+
82
+ peers = build_sample_peer_group(institution)
83
+ report = generate_report(institution, peers)
84
+ print(report)
85
+
86
+ ---
87
+
88
+ ## Metrics Computed
89
+
90
+ | Metric | Description | Benchmark (Strong) |
91
+ |--------|-------------|-------------------|
92
+ | NIM | Net Interest Margin | >= 3.5% |
93
+ | Efficiency Ratio | Non-interest expense / Revenue | <= 60% |
94
+ | ROAA | Return on Average Assets | >= 1.0% |
95
+ | ROAE | Return on Average Equity | >= 10% |
96
+ | Tier 1 Capital Ratio | Regulatory capital ratio | >= 12% |
97
+ | Loans-to-Deposits | Loan utilization | <= 80% |
98
+ | NPL Ratio | Non-performing loans / Gross loans | <= 1.0% |
99
+ | Reserve Coverage | Loan loss reserve / NPLs | >= 100% |
100
+
101
+ ---
102
+
103
+ ## Asset Size Buckets
104
+
105
+ - micro — Under $50MM
106
+ - small — $50MM to $250MM
107
+ - medium — $250MM to $1B
108
+ - large — $1B to $5B
109
+ - mega — Over $5B
110
+
111
+ ---
112
+
113
+ ## Data Source
114
+
115
+ FDIC BankFind Suite API — free public API, no authentication required.
116
+ Data covers all FDIC-insured institutions with quarterly call report data
117
+ since 1934.
118
+
119
+ https://banks.data.fdic.gov/api
120
+
121
+ ---
122
+
123
+ ## Running Tests
124
+
125
+ PYTHONPATH=. pytest tests/ -v
126
+
127
+ 27 tests across all modules.
128
+
129
+ ---
130
+
131
+ ## Who This Is For
132
+
133
+ - CDFI banks and credit unions benchmarking against peers
134
+ - MDI management teams preparing board reports
135
+ - CDFI Fund analysts reviewing institution performance
136
+ - Impact investors evaluating CDFI bank investments
137
+ - Researchers studying community banking performance trends
138
+
139
+ ---
140
+
141
+ ## License
142
+
143
+ MIT 2026 Jaypatel1511
@@ -0,0 +1,20 @@
1
+ cdfibenchmark/__init__.py,sha256=4vazC1tV7ZuXINA6VWbSL-eHWcXo2Rg1TysEwwtWdz8,900
2
+ cdfibenchmark/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ cdfibenchmark/data/fdic.py,sha256=miJzX1u1yO9d999eQsQj1Jy3T-pA8CpNTEwu3qG7B3k,6646
4
+ cdfibenchmark/data/schema.py,sha256=06WSvMx7xqNQmYE98WqUZcVDzraS4vtI2DuaGbf18jM,7318
5
+ cdfibenchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ cdfibenchmark/metrics/calculator.py,sha256=GteLj6mu8cC0Znpimf1ASVukER1a_yw2f19oWTgtNOQ,3305
7
+ cdfibenchmark/peers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ cdfibenchmark/peers/selector.py,sha256=oWeh-J8nOsMK6aoifmaZBy0nFaIU-3iukxxIjWV73bY,3539
9
+ cdfibenchmark/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ cdfibenchmark/report/generator.py,sha256=m5VGb4KginCSbzKUrlNwxxA6F_bBq7ZQxygD1w7apDs,5028
11
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ tests/conftest.py,sha256=SUqRhx7sTSQTHVJDVIiFx1sAh5NqESYX9q7-j6GN2Bs,869
13
+ tests/test_metrics.py,sha256=LBheQHJqo0p3m_U3u5xz_ZqXRfQGXQGGFVwbb-WdZdA,1777
14
+ tests/test_peers.py,sha256=iQ9mFp4D2fZ_YX6S0tXa3yVzIthM_nKJgPQq011zTRU,952
15
+ tests/test_report.py,sha256=gvrhOVr8nrvwGEaT92btyFCoBHbhtQf3dxZwPd0gprU,1193
16
+ tests/test_schema.py,sha256=CHEDIaX2wtKXogiebQCuV5_uBToIh3Mx_VWJl8zOWVo,1541
17
+ cdfi_benchmark-0.1.0.dist-info/METADATA,sha256=Dxnpr8XdoXBlfvWGocVvJBEZ5bb9B-ZbpoBUnnGy4GY,3722
18
+ cdfi_benchmark-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
19
+ cdfi_benchmark-0.1.0.dist-info/top_level.txt,sha256=_BLdlhCVUtZjAwIqqzeE11T9wg3HVoIHM2hu2Mgytjg,20
20
+ cdfi_benchmark-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ cdfibenchmark
2
+ tests
@@ -0,0 +1,28 @@
1
+ from cdfibenchmark.data.schema import (
2
+ InstitutionProfile, BenchmarkResult,
3
+ BENCHMARKS, ASSET_BUCKETS,
4
+ )
5
+ from cdfibenchmark.data.fdic import (
6
+ get_institution, get_financials,
7
+ search_institutions, get_peer_financials,
8
+ )
9
+ from cdfibenchmark.metrics.calculator import (
10
+ compute_peer_metrics, benchmark_institution, rank_institution,
11
+ )
12
+ from cdfibenchmark.peers.selector import (
13
+ build_peer_group, build_sample_peer_group,
14
+ )
15
+ from cdfibenchmark.report.generator import (
16
+ generate_report, summary_table,
17
+ )
18
+
19
+ __version__ = "0.1.0"
20
+ __all__ = [
21
+ "InstitutionProfile", "BenchmarkResult",
22
+ "get_institution", "get_financials",
23
+ "search_institutions", "get_peer_financials",
24
+ "compute_peer_metrics", "benchmark_institution", "rank_institution",
25
+ "build_peer_group", "build_sample_peer_group",
26
+ "generate_report", "summary_table",
27
+ "BENCHMARKS", "ASSET_BUCKETS",
28
+ ]
File without changes
@@ -0,0 +1,227 @@
1
+ """
2
+ FDIC BankFind Suite API wrapper.
3
+ Free public API — no authentication required.
4
+ """
5
+ import requests
6
+ import pandas as pd
7
+ from typing import Optional
8
+ from cdfibenchmark.data.schema import (
9
+ InstitutionProfile, FDIC_API_BASE, FDIC_FIELDS
10
+ )
11
+
12
+ TIMEOUT = 30
13
+
14
+
15
+ def get_institution(cert: int) -> Optional[dict]:
16
+ """
17
+ Fetch institution profile by FDIC certificate number.
18
+ """
19
+ url = f"{FDIC_API_BASE}/institutions"
20
+ params = {
21
+ "filters": f"CERT:{cert}",
22
+ "fields": "CERT,INSTNAME,CITY,STALP,ASSET,ACTIVE",
23
+ "limit": 1,
24
+ "format": "json",
25
+ }
26
+ try:
27
+ r = requests.get(url, params=params, timeout=TIMEOUT)
28
+ r.raise_for_status()
29
+ data = r.json()
30
+ institutions = data.get("data", [])
31
+ if institutions:
32
+ return institutions[0].get("data", {})
33
+ except Exception as e:
34
+ print(f"FDIC API error: {e}")
35
+ return None
36
+
37
+
38
+ def search_institutions(
39
+ name: str = None,
40
+ state: str = None,
41
+ min_assets: int = None,
42
+ max_assets: int = None,
43
+ limit: int = 20,
44
+ ) -> pd.DataFrame:
45
+ """
46
+ Search for FDIC-insured institutions by name, state, or asset size.
47
+ Returns a DataFrame of matching institutions.
48
+ """
49
+ filters = ["ACTIVE:1"]
50
+ if name:
51
+ filters.append(f'INSTNAME:"{name}"')
52
+ if state:
53
+ filters.append(f"STALP:{state.upper()}")
54
+ if min_assets:
55
+ filters.append(f"ASSET:[{min_assets} TO *]")
56
+ if max_assets:
57
+ filters.append(f"ASSET:[* TO {max_assets}]")
58
+
59
+ url = f"{FDIC_API_BASE}/institutions"
60
+ params = {
61
+ "filters": " AND ".join(filters),
62
+ "fields": "CERT,INSTNAME,CITY,STALP,ASSET",
63
+ "limit": limit,
64
+ "sort_by": "ASSET",
65
+ "sort_order": "DESC",
66
+ "format": "json",
67
+ }
68
+
69
+ try:
70
+ r = requests.get(url, params=params, timeout=TIMEOUT)
71
+ r.raise_for_status()
72
+ data = r.json()
73
+ rows = [item.get("data", {}) for item in data.get("data", [])]
74
+ if rows:
75
+ df = pd.DataFrame(rows)
76
+ if "ASSET" in df.columns:
77
+ df["ASSET_MM"] = df["ASSET"] / 1_000
78
+ return df
79
+ except Exception as e:
80
+ print(f"FDIC API error: {e}")
81
+
82
+ return pd.DataFrame()
83
+
84
+
85
+ def get_financials(
86
+ cert: int,
87
+ report_date: str = None,
88
+ limit: int = 4,
89
+ ) -> Optional[InstitutionProfile]:
90
+ """
91
+ Fetch call report financials for a single institution.
92
+
93
+ Args:
94
+ cert: FDIC certificate number
95
+ report_date: Specific date e.g. "20241231" (default: most recent)
96
+ limit: Number of periods to fetch
97
+
98
+ Returns:
99
+ InstitutionProfile with computed metrics
100
+ """
101
+ url = f"{FDIC_API_BASE}/financials"
102
+ fields = [
103
+ "REPDTE", "CERT", "INSTNAME", "CITY", "STALP",
104
+ "ASSET", "DEP", "LNLSNET", "NETINC",
105
+ "INTINC", "EINTEXP", "NONII", "NONIX", "EQ",
106
+ "RBCT1J", "LNLSGR", "NCLNLS", "LNATRES",
107
+ ]
108
+
109
+ filters = f"CERT:{cert}"
110
+ if report_date:
111
+ filters += f" AND REPDTE:{report_date}"
112
+
113
+ params = {
114
+ "filters": filters,
115
+ "fields": ",".join(fields),
116
+ "limit": limit,
117
+ "sort_by": "REPDTE",
118
+ "sort_order": "DESC",
119
+ "format": "json",
120
+ }
121
+
122
+ try:
123
+ r = requests.get(url, params=params, timeout=TIMEOUT)
124
+ r.raise_for_status()
125
+ data = r.json()
126
+ records = data.get("data", [])
127
+
128
+ if not records:
129
+ print(f"No financial data found for CERT {cert}")
130
+ return None
131
+
132
+ row = records[0].get("data", {})
133
+ return _parse_institution(row)
134
+
135
+ except Exception as e:
136
+ print(f"FDIC API error fetching financials for CERT {cert}: {e}")
137
+ return None
138
+
139
+
140
+ def get_peer_financials(
141
+ state: str = None,
142
+ min_assets: int = None,
143
+ max_assets: int = None,
144
+ report_date: str = None,
145
+ limit: int = 100,
146
+ ) -> list:
147
+ """
148
+ Fetch call report financials for a group of peer institutions.
149
+
150
+ Args:
151
+ state: Filter by state abbreviation e.g. "IL"
152
+ min_assets: Minimum assets in thousands
153
+ max_assets: Maximum assets in thousands
154
+ report_date: Report date e.g. "20241231"
155
+ limit: Maximum number of institutions
156
+
157
+ Returns:
158
+ List of InstitutionProfile objects
159
+ """
160
+ url = f"{FDIC_API_BASE}/financials"
161
+ fields = [
162
+ "REPDTE", "CERT", "INSTNAME", "CITY", "STALP",
163
+ "ASSET", "DEP", "LNLSNET", "NETINC",
164
+ "INTINC", "EINTEXP", "NONII", "NONIX", "EQ",
165
+ "RBCT1J", "LNLSGR", "NCLNLS", "LNATRES",
166
+ ]
167
+
168
+ filters = ["ASSET:[1 TO *]"]
169
+ if state:
170
+ filters.append(f"STALP:{state.upper()}")
171
+ if min_assets:
172
+ filters.append(f"ASSET:[{min_assets} TO *]")
173
+ if max_assets:
174
+ filters.append(f"ASSET:[* TO {max_assets}]")
175
+ if report_date:
176
+ filters.append(f"REPDTE:{report_date}")
177
+
178
+ params = {
179
+ "filters": " AND ".join(filters),
180
+ "fields": ",".join(fields),
181
+ "limit": limit,
182
+ "sort_by": "ASSET",
183
+ "sort_order": "DESC",
184
+ "format": "json",
185
+ }
186
+
187
+ try:
188
+ r = requests.get(url, params=params, timeout=TIMEOUT)
189
+ r.raise_for_status()
190
+ data = r.json()
191
+ records = data.get("data", [])
192
+ return [_parse_institution(item.get("data", {}))
193
+ for item in records if item.get("data")]
194
+ except Exception as e:
195
+ print(f"FDIC API error fetching peer data: {e}")
196
+ return []
197
+
198
+
199
+ def _parse_institution(row: dict) -> InstitutionProfile:
200
+ """Parse a raw FDIC API response row into an InstitutionProfile."""
201
+ def safe_float(key, default=0.0):
202
+ val = row.get(key)
203
+ try:
204
+ return float(val) if val is not None else default
205
+ except (TypeError, ValueError):
206
+ return default
207
+
208
+ return InstitutionProfile(
209
+ cert=int(row.get("CERT", 0)),
210
+ name=str(row.get("INSTNAME", "Unknown")),
211
+ city=str(row.get("CITY", "")),
212
+ state=str(row.get("STALP", "")),
213
+ report_date=str(row.get("REPDTE", "")),
214
+ total_assets=safe_float("ASSET"),
215
+ total_deposits=safe_float("DEP"),
216
+ net_loans=safe_float("LNLSNET"),
217
+ net_income=safe_float("NETINC"),
218
+ interest_income=safe_float("INTINC"),
219
+ interest_expense=safe_float("EINTEXP"),
220
+ non_interest_income=safe_float("NONII"),
221
+ non_interest_expense=safe_float("NONIX"),
222
+ total_equity=safe_float("EQ"),
223
+ tier1_ratio=safe_float("RBCT1J") or None,
224
+ gross_loans=safe_float("LNLSGR") or None,
225
+ non_current_loans=safe_float("NCLNLS") or None,
226
+ loan_loss_allowance=safe_float("LNATRES") or None,
227
+ )
@@ -0,0 +1,203 @@
1
+ """
2
+ Core dataclasses and constants for CDFI benchmarking.
3
+ Uses FDIC BankFind Suite API — free, no API key required.
4
+ """
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional
7
+
8
+
9
+ # ── FDIC BankFind Suite API ───────────────────────────────────────────────────
10
+ FDIC_API_BASE = "https://banks.data.fdic.gov/api"
11
+
12
+ # Key call report fields we pull
13
+ FDIC_FIELDS = [
14
+ "REPDTE", # Report date
15
+ "CERT", # FDIC certificate number
16
+ "INSTNAME", # Institution name
17
+ "CITY", # City
18
+ "STALP", # State abbreviation
19
+ "ASSET", # Total assets
20
+ "DEP", # Total deposits
21
+ "LNLSNET", # Net loans and leases
22
+ "NETINC", # Net income
23
+ "INTINC", # Total interest income
24
+ "EINTEXP", # Total interest expense
25
+ "NONII", # Non-interest income
26
+ "NONIX", # Non-interest expense
27
+ "EQ", # Total equity capital
28
+ "RBCT1J", # Tier 1 capital ratio (leverage)
29
+ "REPDTE", # Report date
30
+ "LNLSNET", # Net loans
31
+ "LNLSDEPM", # Loans to deposits ratio
32
+ "NCLNLS", # Non-current loans
33
+ "LNATRES", # Loan loss allowance
34
+ "LNLSGR", # Gross loans
35
+ "INTEXP", # Interest expense
36
+ "NIM", # Net interest margin
37
+ "ROA", # Return on assets
38
+ "ROE", # Return on equity
39
+ "LNLSDEPM", # Loan to deposit ratio
40
+ "EFFRATIO", # Efficiency ratio
41
+ "INTINC", # Interest income
42
+ ]
43
+
44
+ # Deduplicated list
45
+ FDIC_FIELDS = list(dict.fromkeys(FDIC_FIELDS))
46
+
47
+ # ── Asset Size Buckets ────────────────────────────────────────────────────────
48
+ ASSET_BUCKETS = {
49
+ "micro": (0, 50_000), # Under $50MM
50
+ "small": (50_000, 250_000), # $50MM - $250MM
51
+ "medium": (250_000, 1_000_000), # $250MM - $1B
52
+ "large": (1_000_000, 5_000_000), # $1B - $5B
53
+ "mega": (5_000_000, float("inf")),# Over $5B
54
+ }
55
+
56
+ # ── Benchmark Thresholds ──────────────────────────────────────────────────────
57
+ BENCHMARKS = {
58
+ "nim": {"good": 3.5, "warning": 2.5, "unit": "%"},
59
+ "efficiency_ratio": {"good": 60, "warning": 80, "unit": "%", "lower_is_better": True},
60
+ "roaa": {"good": 1.0, "warning": 0.5, "unit": "%"},
61
+ "roae": {"good": 10, "warning": 5, "unit": "%"},
62
+ "tier1_ratio": {"good": 12, "warning": 8, "unit": "%"},
63
+ "loans_to_deposits":{"good": 80, "warning": 95, "unit": "%"},
64
+ "npl_ratio": {"good": 1.0, "warning": 3.0, "unit": "%", "lower_is_better": True},
65
+ "reserve_coverage": {"good": 100, "warning": 50, "unit": "%"},
66
+ }
67
+
68
+
69
+ @dataclass
70
+ class InstitutionProfile:
71
+ """Profile of a single FDIC-insured institution from call report data."""
72
+ cert: int
73
+ name: str
74
+ city: str
75
+ state: str
76
+ report_date: str
77
+ total_assets: float # in thousands
78
+ total_deposits: float
79
+ net_loans: float
80
+ net_income: float
81
+ interest_income: float
82
+ interest_expense: float
83
+ non_interest_income: float
84
+ non_interest_expense: float
85
+ total_equity: float
86
+ tier1_ratio: Optional[float] = None
87
+ gross_loans: Optional[float] = None
88
+ non_current_loans: Optional[float] = None
89
+ loan_loss_allowance: Optional[float] = None
90
+
91
+ @property
92
+ def total_assets_mm(self) -> float:
93
+ return self.total_assets / 1_000
94
+
95
+ @property
96
+ def asset_bucket(self) -> str:
97
+ assets = self.total_assets
98
+ for bucket, (low, high) in ASSET_BUCKETS.items():
99
+ if low <= assets < high:
100
+ return bucket
101
+ return "mega"
102
+
103
+ @property
104
+ def nim(self) -> Optional[float]:
105
+ if self.total_assets and self.total_assets > 0:
106
+ return ((self.interest_income - self.interest_expense)
107
+ / self.total_assets * 100)
108
+ return None
109
+
110
+ @property
111
+ def efficiency_ratio(self) -> Optional[float]:
112
+ revenue = self.interest_income + self.non_interest_income
113
+ if revenue and revenue > 0:
114
+ return (self.non_interest_expense / revenue) * 100
115
+ return None
116
+
117
+ @property
118
+ def roaa(self) -> Optional[float]:
119
+ if self.total_assets and self.total_assets > 0:
120
+ return (self.net_income / self.total_assets) * 100
121
+ return None
122
+
123
+ @property
124
+ def roae(self) -> Optional[float]:
125
+ if self.total_equity and self.total_equity > 0:
126
+ return (self.net_income / self.total_equity) * 100
127
+ return None
128
+
129
+ @property
130
+ def loans_to_deposits(self) -> Optional[float]:
131
+ if self.total_deposits and self.total_deposits > 0:
132
+ return (self.net_loans / self.total_deposits) * 100
133
+ return None
134
+
135
+ @property
136
+ def npl_ratio(self) -> Optional[float]:
137
+ if (self.non_current_loans is not None and
138
+ self.gross_loans and self.gross_loans > 0):
139
+ return (self.non_current_loans / self.gross_loans) * 100
140
+ return None
141
+
142
+ @property
143
+ def reserve_coverage(self) -> Optional[float]:
144
+ if (self.loan_loss_allowance is not None and
145
+ self.non_current_loans and self.non_current_loans > 0):
146
+ return (self.loan_loss_allowance / self.non_current_loans) * 100
147
+ return None
148
+
149
+ def metrics_dict(self) -> dict:
150
+ return {
151
+ "nim": self.nim,
152
+ "efficiency_ratio": self.efficiency_ratio,
153
+ "roaa": self.roaa,
154
+ "roae": self.roae,
155
+ "tier1_ratio": self.tier1_ratio,
156
+ "loans_to_deposits": self.loans_to_deposits,
157
+ "npl_ratio": self.npl_ratio,
158
+ "reserve_coverage": self.reserve_coverage,
159
+ }
160
+
161
+
162
+ @dataclass
163
+ class BenchmarkResult:
164
+ """Benchmarking result for a single metric."""
165
+ metric: str
166
+ institution_value: Optional[float]
167
+ peer_median: Optional[float]
168
+ peer_25th: Optional[float]
169
+ peer_75th: Optional[float]
170
+ peer_count: int
171
+ unit: str = "%"
172
+ lower_is_better: bool = False
173
+
174
+ @property
175
+ def vs_median(self) -> Optional[float]:
176
+ if self.institution_value and self.peer_median:
177
+ return self.institution_value - self.peer_median
178
+ return None
179
+
180
+ @property
181
+ def status(self) -> str:
182
+ if self.institution_value is None:
183
+ return "N/A"
184
+ benchmark = BENCHMARKS.get(self.metric, {})
185
+ good = benchmark.get("good")
186
+ warning = benchmark.get("warning")
187
+ lower = benchmark.get("lower_is_better", False)
188
+
189
+ if good is None:
190
+ return "N/A"
191
+
192
+ if lower:
193
+ if self.institution_value <= good:
194
+ return "STRONG"
195
+ elif self.institution_value <= warning:
196
+ return "ADEQUATE"
197
+ return "WEAK"
198
+ else:
199
+ if self.institution_value >= good:
200
+ return "STRONG"
201
+ elif self.institution_value >= warning:
202
+ return "ADEQUATE"
203
+ return "WEAK"
File without changes
@@ -0,0 +1,109 @@
1
+ """
2
+ Compute benchmarking metrics across a peer group.
3
+ """
4
+ import pandas as pd
5
+ import numpy as np
6
+ from cdfibenchmark.data.schema import (
7
+ InstitutionProfile, BenchmarkResult, BENCHMARKS
8
+ )
9
+
10
+
11
+ def compute_peer_metrics(peers: list) -> pd.DataFrame:
12
+ """
13
+ Compute all metrics for a list of InstitutionProfile objects.
14
+ Returns a DataFrame with one row per institution.
15
+ """
16
+ rows = []
17
+ for inst in peers:
18
+ if inst is None:
19
+ continue
20
+ row = {
21
+ "cert": inst.cert,
22
+ "name": inst.name,
23
+ "city": inst.city,
24
+ "state": inst.state,
25
+ "report_date": inst.report_date,
26
+ "total_assets_mm": inst.total_assets_mm,
27
+ "asset_bucket": inst.asset_bucket,
28
+ }
29
+ row.update(inst.metrics_dict())
30
+ rows.append(row)
31
+ return pd.DataFrame(rows)
32
+
33
+
34
+ def benchmark_institution(
35
+ institution: InstitutionProfile,
36
+ peers: list,
37
+ ) -> list:
38
+ """
39
+ Benchmark an institution against a peer group.
40
+
41
+ Args:
42
+ institution: The institution to benchmark
43
+ peers: List of peer InstitutionProfile objects
44
+
45
+ Returns:
46
+ List of BenchmarkResult objects, one per metric
47
+ """
48
+ peer_df = compute_peer_metrics(peers)
49
+ results = []
50
+
51
+ for metric, config in BENCHMARKS.items():
52
+ inst_value = institution.metrics_dict().get(metric)
53
+
54
+ if metric in peer_df.columns:
55
+ peer_values = peer_df[metric].dropna()
56
+ peer_median = float(peer_values.median()) if len(peer_values) else None
57
+ peer_25th = float(peer_values.quantile(0.25)) if len(peer_values) else None
58
+ peer_75th = float(peer_values.quantile(0.75)) if len(peer_values) else None
59
+ peer_count = len(peer_values)
60
+ else:
61
+ peer_median = peer_25th = peer_75th = None
62
+ peer_count = 0
63
+
64
+ results.append(BenchmarkResult(
65
+ metric=metric,
66
+ institution_value=inst_value,
67
+ peer_median=peer_median,
68
+ peer_25th=peer_25th,
69
+ peer_75th=peer_75th,
70
+ peer_count=peer_count,
71
+ unit=config.get("unit", "%"),
72
+ lower_is_better=config.get("lower_is_better", False),
73
+ ))
74
+
75
+ return results
76
+
77
+
78
+ def rank_institution(
79
+ institution: InstitutionProfile,
80
+ peers: list,
81
+ metric: str,
82
+ ) -> dict:
83
+ """
84
+ Rank an institution within its peer group for a specific metric.
85
+
86
+ Returns:
87
+ Dict with rank, percentile, and peer count
88
+ """
89
+ peer_df = compute_peer_metrics(peers)
90
+ inst_value = institution.metrics_dict().get(metric)
91
+
92
+ if inst_value is None or metric not in peer_df.columns:
93
+ return {"rank": None, "percentile": None, "peer_count": len(peers)}
94
+
95
+ peer_values = peer_df[metric].dropna().tolist()
96
+ peer_values_with_inst = sorted(peer_values + [inst_value], reverse=True)
97
+
98
+ lower_is_better = BENCHMARKS.get(metric, {}).get("lower_is_better", False)
99
+ if lower_is_better:
100
+ peer_values_with_inst = sorted(peer_values + [inst_value])
101
+
102
+ rank = peer_values_with_inst.index(inst_value) + 1
103
+ percentile = round((1 - rank / len(peer_values_with_inst)) * 100, 1)
104
+
105
+ return {
106
+ "rank": rank,
107
+ "percentile": percentile,
108
+ "peer_count": len(peer_values_with_inst),
109
+ }
File without changes
@@ -0,0 +1,93 @@
1
+ """
2
+ Peer group selection logic for CDFI benchmarking.
3
+ """
4
+ from cdfibenchmark.data.schema import InstitutionProfile, ASSET_BUCKETS
5
+ from cdfibenchmark.data.fdic import get_peer_financials
6
+
7
+
8
+ def build_peer_group(
9
+ institution: InstitutionProfile,
10
+ same_state: bool = False,
11
+ asset_tolerance: float = 0.5,
12
+ min_peers: int = 10,
13
+ max_peers: int = 50,
14
+ report_date: str = None,
15
+ ) -> list:
16
+ """
17
+ Build a peer group for an institution based on asset size and geography.
18
+
19
+ Args:
20
+ institution: The institution to benchmark
21
+ same_state: Restrict peers to same state
22
+ asset_tolerance: +/- tolerance for asset size (0.5 = 50%)
23
+ min_peers: Minimum number of peers to return
24
+ max_peers: Maximum number of peers to return
25
+ report_date: Report date for peer financials
26
+
27
+ Returns:
28
+ List of InstitutionProfile objects (excluding the institution itself)
29
+ """
30
+ assets = institution.total_assets
31
+ min_assets = int(assets * (1 - asset_tolerance))
32
+ max_assets = int(assets * (1 + asset_tolerance))
33
+
34
+ state = institution.state if same_state else None
35
+
36
+ peers = get_peer_financials(
37
+ state=state,
38
+ min_assets=min_assets,
39
+ max_assets=max_assets,
40
+ report_date=report_date,
41
+ limit=max_peers + 5,
42
+ )
43
+
44
+ # Exclude the institution itself
45
+ peers = [p for p in peers if p.cert != institution.cert]
46
+
47
+ # If not enough peers, widen the asset range
48
+ if len(peers) < min_peers and same_state:
49
+ peers = get_peer_financials(
50
+ min_assets=min_assets,
51
+ max_assets=max_assets,
52
+ report_date=report_date,
53
+ limit=max_peers + 5,
54
+ )
55
+ peers = [p for p in peers if p.cert != institution.cert]
56
+
57
+ return peers[:max_peers]
58
+
59
+
60
+ def build_sample_peer_group(institution: InstitutionProfile) -> list:
61
+ """
62
+ Build a synthetic peer group for testing without API calls.
63
+ Generates realistic peer institutions based on the target institution.
64
+ """
65
+ import random
66
+ import copy
67
+ random.seed(42)
68
+
69
+ peers = []
70
+ for i in range(20):
71
+ scale = random.uniform(0.6, 1.4)
72
+ peer = InstitutionProfile(
73
+ cert=90000 + i,
74
+ name=f"Community Bank {i+1}",
75
+ city="Chicago",
76
+ state=institution.state,
77
+ report_date=institution.report_date,
78
+ total_assets=institution.total_assets * scale,
79
+ total_deposits=institution.total_deposits * scale * random.uniform(0.85, 1.1),
80
+ net_loans=institution.net_loans * scale * random.uniform(0.7, 1.2),
81
+ net_income=institution.net_income * scale * random.uniform(0.5, 1.5),
82
+ interest_income=institution.interest_income * scale * random.uniform(0.9, 1.1),
83
+ interest_expense=institution.interest_expense * scale * random.uniform(0.8, 1.2),
84
+ non_interest_income=institution.non_interest_income * scale * random.uniform(0.7, 1.3),
85
+ non_interest_expense=institution.non_interest_expense * scale * random.uniform(0.85, 1.15),
86
+ total_equity=institution.total_equity * scale * random.uniform(0.8, 1.2),
87
+ tier1_ratio=random.uniform(8.0, 18.0),
88
+ gross_loans=institution.net_loans * scale * 1.05,
89
+ non_current_loans=institution.net_loans * scale * random.uniform(0.005, 0.04),
90
+ loan_loss_allowance=institution.net_loans * scale * random.uniform(0.008, 0.02),
91
+ )
92
+ peers.append(peer)
93
+ return peers
File without changes
@@ -0,0 +1,153 @@
1
+ """
2
+ Generate CDFI peer benchmarking reports.
3
+ """
4
+ import pandas as pd
5
+ from cdfibenchmark.data.schema import (
6
+ InstitutionProfile, BenchmarkResult, BENCHMARKS
7
+ )
8
+ from cdfibenchmark.metrics.calculator import (
9
+ compute_peer_metrics, benchmark_institution, rank_institution
10
+ )
11
+
12
+
13
+ METRIC_LABELS = {
14
+ "nim": "Net Interest Margin (NIM)",
15
+ "efficiency_ratio": "Efficiency Ratio",
16
+ "roaa": "Return on Avg Assets (ROAA)",
17
+ "roae": "Return on Avg Equity (ROAE)",
18
+ "tier1_ratio": "Tier 1 Capital Ratio",
19
+ "loans_to_deposits": "Loans-to-Deposits",
20
+ "npl_ratio": "Non-Performing Loan Ratio",
21
+ "reserve_coverage": "Loan Loss Reserve Coverage",
22
+ }
23
+
24
+
25
+ def generate_report(
26
+ institution: InstitutionProfile,
27
+ peers: list,
28
+ title: str = None,
29
+ ) -> str:
30
+ """
31
+ Generate a full peer benchmarking report as a Markdown string.
32
+ """
33
+ results = benchmark_institution(institution, peers)
34
+
35
+ lines = [
36
+ f"# CDFI Peer Benchmarking Report",
37
+ f"## {title or institution.name}",
38
+ "",
39
+ f"**Institution:** {institution.name}",
40
+ f"**Location:** {institution.city}, {institution.state}",
41
+ f"**Total Assets:** ${institution.total_assets_mm:.1f}MM",
42
+ f"**Asset Bucket:** {institution.asset_bucket.title()}",
43
+ f"**Report Date:** {institution.report_date}",
44
+ f"**Peer Group Size:** {len(peers)} institutions",
45
+ "",
46
+ "---",
47
+ "",
48
+ "## Performance Summary",
49
+ "",
50
+ "| Metric | Institution | Peer Median | 25th Pctile | 75th Pctile | Status |",
51
+ "|--------|-------------|-------------|-------------|-------------|--------|",
52
+ ]
53
+
54
+ for result in results:
55
+ label = METRIC_LABELS.get(result.metric, result.metric)
56
+ inst_val = f"{result.institution_value:.2f}%" if result.institution_value else "N/A"
57
+ median = f"{result.peer_median:.2f}%" if result.peer_median else "N/A"
58
+ p25 = f"{result.peer_25th:.2f}%" if result.peer_25th else "N/A"
59
+ p75 = f"{result.peer_75th:.2f}%" if result.peer_75th else "N/A"
60
+ status_emoji = {
61
+ "STRONG": "✅ STRONG",
62
+ "ADEQUATE": "⚠️ ADEQUATE",
63
+ "WEAK": "❌ WEAK",
64
+ "N/A": "—",
65
+ }.get(result.status, result.status)
66
+
67
+ lines.append(
68
+ f"| {label} | {inst_val} | {median} | {p25} | {p75} | {status_emoji} |"
69
+ )
70
+
71
+ lines += [
72
+ "",
73
+ "---",
74
+ "",
75
+ "## Metric Detail",
76
+ "",
77
+ ]
78
+
79
+ for result in results:
80
+ label = METRIC_LABELS.get(result.metric, result.metric)
81
+ lines.append(f"### {label}")
82
+ lines.append("")
83
+
84
+ if result.institution_value is not None:
85
+ lines.append(f"**Institution Value:** {result.institution_value:.2f}%")
86
+ if result.peer_median is not None:
87
+ lines.append(f"**Peer Median:** {result.peer_median:.2f}%")
88
+ if result.vs_median is not None:
89
+ direction = "above" if result.vs_median > 0 else "below"
90
+ lines.append(
91
+ f"**vs Peer Median:** {abs(result.vs_median):.2f}% {direction} median"
92
+ )
93
+
94
+ benchmark = BENCHMARKS.get(result.metric, {})
95
+ good = benchmark.get("good")
96
+ warning = benchmark.get("warning")
97
+ lower = benchmark.get("lower_is_better", False)
98
+
99
+ if good and warning:
100
+ if lower:
101
+ lines.append(
102
+ f"**Benchmark:** Strong <= {good}% | Adequate <= {warning}%"
103
+ )
104
+ else:
105
+ lines.append(
106
+ f"**Benchmark:** Strong >= {good}% | Adequate >= {warning}%"
107
+ )
108
+
109
+ lines.append(f"**Status:** {result.status}")
110
+ lines.append("")
111
+
112
+ lines += [
113
+ "---",
114
+ "",
115
+ "## Peer Group Summary",
116
+ "",
117
+ ]
118
+
119
+ peer_df = compute_peer_metrics(peers)
120
+ lines.append(f"**Peer Count:** {len(peers)}")
121
+ if "total_assets_mm" in peer_df.columns:
122
+ lines.append(
123
+ f"**Peer Asset Range:** "
124
+ f"${peer_df['total_assets_mm'].min():.1f}MM – "
125
+ f"${peer_df['total_assets_mm'].max():.1f}MM"
126
+ )
127
+ if "state" in peer_df.columns:
128
+ states = peer_df["state"].nunique()
129
+ lines.append(f"**States Represented:** {states}")
130
+ lines.append("")
131
+
132
+ return "\n".join(lines)
133
+
134
+
135
+ def summary_table(
136
+ institution: InstitutionProfile,
137
+ peers: list,
138
+ ) -> pd.DataFrame:
139
+ """Return benchmarking results as a pandas DataFrame."""
140
+ results = benchmark_institution(institution, peers)
141
+ rows = []
142
+ for r in results:
143
+ rows.append({
144
+ "metric": METRIC_LABELS.get(r.metric, r.metric),
145
+ "institution": r.institution_value,
146
+ "peer_median": r.peer_median,
147
+ "peer_25th": r.peer_25th,
148
+ "peer_75th": r.peer_75th,
149
+ "vs_median": r.vs_median,
150
+ "status": r.status,
151
+ "peer_count": r.peer_count,
152
+ })
153
+ return pd.DataFrame(rows)
tests/__init__.py ADDED
File without changes
tests/conftest.py ADDED
@@ -0,0 +1,32 @@
1
+ import pytest
2
+ from cdfibenchmark.data.schema import InstitutionProfile
3
+ from cdfibenchmark.peers.selector import build_sample_peer_group
4
+
5
+
6
+ @pytest.fixture
7
+ def sample_institution():
8
+ return InstitutionProfile(
9
+ cert=57542,
10
+ name="Broadway Federal Bank",
11
+ city="Los Angeles",
12
+ state="CA",
13
+ report_date="20241231",
14
+ total_assets=655_000,
15
+ total_deposits=520_000,
16
+ net_loans=380_000,
17
+ net_income=1_950,
18
+ interest_income=28_000,
19
+ interest_expense=8_000,
20
+ non_interest_income=3_500,
21
+ non_interest_expense=22_000,
22
+ total_equity=48_000,
23
+ tier1_ratio=12.2,
24
+ gross_loans=390_000,
25
+ non_current_loans=5_850,
26
+ loan_loss_allowance=7_800,
27
+ )
28
+
29
+
30
+ @pytest.fixture
31
+ def sample_peers(sample_institution):
32
+ return build_sample_peer_group(sample_institution)
tests/test_metrics.py ADDED
@@ -0,0 +1,50 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from cdfibenchmark.metrics.calculator import (
4
+ compute_peer_metrics, benchmark_institution, rank_institution
5
+ )
6
+ from cdfibenchmark.data.schema import BenchmarkResult
7
+
8
+
9
+ def test_peer_metrics_returns_dataframe(sample_institution, sample_peers):
10
+ df = compute_peer_metrics(sample_peers)
11
+ assert isinstance(df, pd.DataFrame)
12
+ assert len(df) == len(sample_peers)
13
+
14
+
15
+ def test_peer_metrics_has_columns(sample_institution, sample_peers):
16
+ df = compute_peer_metrics(sample_peers)
17
+ assert "nim" in df.columns
18
+ assert "efficiency_ratio" in df.columns
19
+ assert "roaa" in df.columns
20
+
21
+
22
+ def test_benchmark_returns_list(sample_institution, sample_peers):
23
+ results = benchmark_institution(sample_institution, sample_peers)
24
+ assert isinstance(results, list)
25
+ assert len(results) > 0
26
+
27
+
28
+ def test_benchmark_result_type(sample_institution, sample_peers):
29
+ results = benchmark_institution(sample_institution, sample_peers)
30
+ assert all(isinstance(r, BenchmarkResult) for r in results)
31
+
32
+
33
+ def test_benchmark_has_peer_median(sample_institution, sample_peers):
34
+ results = benchmark_institution(sample_institution, sample_peers)
35
+ nim_result = next(r for r in results if r.metric == "nim")
36
+ assert nim_result.peer_median is not None
37
+
38
+
39
+ def test_benchmark_status_valid(sample_institution, sample_peers):
40
+ results = benchmark_institution(sample_institution, sample_peers)
41
+ valid_statuses = {"STRONG", "ADEQUATE", "WEAK", "N/A"}
42
+ for r in results:
43
+ assert r.status in valid_statuses
44
+
45
+
46
+ def test_rank_institution(sample_institution, sample_peers):
47
+ result = rank_institution(sample_institution, sample_peers, "nim")
48
+ assert "rank" in result
49
+ assert "percentile" in result
50
+ assert result["peer_count"] > 0
tests/test_peers.py ADDED
@@ -0,0 +1,27 @@
1
+ import pytest
2
+ from cdfibenchmark.peers.selector import build_sample_peer_group
3
+ from cdfibenchmark.data.schema import InstitutionProfile
4
+
5
+
6
+ def test_sample_peer_group_returns_list(sample_institution):
7
+ peers = build_sample_peer_group(sample_institution)
8
+ assert isinstance(peers, list)
9
+ assert len(peers) > 0
10
+
11
+
12
+ def test_sample_peers_are_institution_profiles(sample_institution):
13
+ peers = build_sample_peer_group(sample_institution)
14
+ assert all(isinstance(p, InstitutionProfile) for p in peers)
15
+
16
+
17
+ def test_sample_peers_exclude_institution(sample_institution):
18
+ peers = build_sample_peer_group(sample_institution)
19
+ certs = [p.cert for p in peers]
20
+ assert sample_institution.cert not in certs
21
+
22
+
23
+ def test_sample_peers_similar_assets(sample_institution):
24
+ peers = build_sample_peer_group(sample_institution)
25
+ for peer in peers:
26
+ ratio = peer.total_assets / sample_institution.total_assets
27
+ assert 0.1 < ratio < 5.0
tests/test_report.py ADDED
@@ -0,0 +1,34 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from cdfibenchmark.report.generator import generate_report, summary_table
4
+
5
+
6
+ def test_generate_report_returns_string(sample_institution, sample_peers):
7
+ report = generate_report(sample_institution, sample_peers)
8
+ assert isinstance(report, str)
9
+ assert len(report) > 100
10
+
11
+
12
+ def test_report_contains_institution_name(sample_institution, sample_peers):
13
+ report = generate_report(sample_institution, sample_peers)
14
+ assert sample_institution.name in report
15
+
16
+
17
+ def test_report_contains_sections(sample_institution, sample_peers):
18
+ report = generate_report(sample_institution, sample_peers)
19
+ assert "Performance Summary" in report
20
+ assert "Metric Detail" in report
21
+ assert "Peer Group Summary" in report
22
+
23
+
24
+ def test_summary_table_returns_dataframe(sample_institution, sample_peers):
25
+ df = summary_table(sample_institution, sample_peers)
26
+ assert isinstance(df, pd.DataFrame)
27
+ assert len(df) > 0
28
+ assert "metric" in df.columns
29
+ assert "status" in df.columns
30
+
31
+
32
+ def test_report_contains_nim(sample_institution, sample_peers):
33
+ report = generate_report(sample_institution, sample_peers)
34
+ assert "Net Interest Margin" in report
tests/test_schema.py ADDED
@@ -0,0 +1,62 @@
1
+ import pytest
2
+ from cdfibenchmark.data.schema import InstitutionProfile
3
+
4
+
5
+ def test_institution_created(sample_institution):
6
+ assert sample_institution.name == "Broadway Federal Bank"
7
+ assert sample_institution.cert == 57542
8
+
9
+
10
+ def test_total_assets_mm(sample_institution):
11
+ assert sample_institution.total_assets_mm == pytest.approx(655.0)
12
+
13
+
14
+ def test_asset_bucket(sample_institution):
15
+ assert sample_institution.asset_bucket == "medium"
16
+
17
+
18
+ def test_nim_computed(sample_institution):
19
+ nim = sample_institution.nim
20
+ assert nim is not None
21
+ assert nim > 0
22
+
23
+
24
+ def test_efficiency_ratio_computed(sample_institution):
25
+ er = sample_institution.efficiency_ratio
26
+ assert er is not None
27
+ assert 0 < er < 200
28
+
29
+
30
+ def test_roaa_computed(sample_institution):
31
+ roaa = sample_institution.roaa
32
+ assert roaa is not None
33
+
34
+
35
+ def test_roae_computed(sample_institution):
36
+ roae = sample_institution.roae
37
+ assert roae is not None
38
+
39
+
40
+ def test_loans_to_deposits(sample_institution):
41
+ ltd = sample_institution.loans_to_deposits
42
+ assert ltd is not None
43
+ assert ltd > 0
44
+
45
+
46
+ def test_npl_ratio(sample_institution):
47
+ npl = sample_institution.npl_ratio
48
+ assert npl is not None
49
+ assert npl > 0
50
+
51
+
52
+ def test_reserve_coverage(sample_institution):
53
+ rc = sample_institution.reserve_coverage
54
+ assert rc is not None
55
+ assert rc > 0
56
+
57
+
58
+ def test_metrics_dict(sample_institution):
59
+ metrics = sample_institution.metrics_dict()
60
+ assert "nim" in metrics
61
+ assert "efficiency_ratio" in metrics
62
+ assert "roaa" in metrics