hmda-analyzer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: hmda-analyzer
3
+ Version: 0.1.0
4
+ Summary: HMDA mortgage lending disparity analyzer — denial rates, racial disparities, lending deserts, and lender benchmarking
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/Jaypatel1511/hmda-analyzer
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pandas>=1.4.0
10
+ Requires-Dist: numpy>=1.21.0
11
+ Requires-Dist: requests>=2.27.0
12
+
13
+ # hmda-analyzer 📊
14
+
15
+ **HMDA mortgage lending disparity analyzer.**
16
+
17
+ Compute denial rate disparities by race, identify lending deserts, benchmark lenders
18
+ against peers, and generate fair lending analysis reports — using CFPB HMDA LAR data.
19
+ Free public API, no authentication required.
20
+
21
+ ---
22
+
23
+ ## Why hmda-analyzer?
24
+
25
+ HMDA data covers 10+ million mortgage applications per year with borrower demographics,
26
+ denial rates, loan amounts, and census tract locations. It is the most powerful public
27
+ dataset for analyzing mortgage lending disparities — but it requires significant
28
+ engineering to use. hmda-analyzer makes it accessible in Python.
29
+
30
+ ---
31
+
32
+ ## Installation
33
+
34
+ pip install hmda-analyzer
35
+
36
+ ---
37
+
38
+ ## Quickstart
39
+
40
+ from hmdaanalyzer import (
41
+ load_sample, denial_rate_by_race, disparity_ratio,
42
+ lending_by_tract, lender_summary, generate_disparity_report,
43
+ )
44
+
45
+ # Load sample data (no API required)
46
+ df = load_sample(n=5000)
47
+
48
+ # Or load from CFPB API (real data)
49
+ # df = load_from_api(year=2023, state="IL")
50
+
51
+ # Denial rates by race
52
+ rates = denial_rate_by_race(df)
53
+ print(rates)
54
+
55
+ # Disparity ratios vs White applicants
56
+ disparities = disparity_ratio(df)
57
+ print(disparities)
58
+
59
+ # Geographic analysis
60
+ tracts = lending_by_tract(df)
61
+ deserts = lending_by_tract(df)
62
+
63
+ # Lender analysis
64
+ summary = lender_summary(df, lei="LEI000001")
65
+
66
+ # Full disparity report
67
+ report = generate_disparity_report(df, title="Illinois Mortgage Market 2023")
68
+ print(report)
69
+
70
+ ---
71
+
72
+ ## Analyses Supported
73
+
74
+ - Denial rate by race and ethnicity
75
+ - Disparity ratios vs reference group (default: White applicants)
76
+ - Denial rate by income band
77
+ - Denial reasons by race
78
+ - Lending activity by census tract, county, and state
79
+ - Lending desert identification (low application volume tracts)
80
+ - Lender vs market comparison
81
+ - Top lenders by origination volume
82
+
83
+ ---
84
+
85
+ ## Disparity Ratio Thresholds
86
+
87
+ Based on CFPB fair lending examination standards:
88
+
89
+ - >= 2.0x — HIGH disparity (triggers regulatory scrutiny)
90
+ - >= 1.5x — MODERATE disparity
91
+ - < 1.5x — LOW disparity
92
+ - < 1.0x — FAVORABLE (group has lower denial rate than reference)
93
+
94
+ ---
95
+
96
+ ## Data Sources
97
+
98
+ CFPB HMDA Data Browser API — free, no API key required.
99
+ 2024 data covers 4,908 institutions and millions of loan applications.
100
+
101
+ https://ffiec.cfpb.gov/data-browser/
102
+
103
+ ---
104
+
105
+ ## Running Tests
106
+
107
+ PYTHONPATH=. pytest tests/ -v
108
+
109
+ 28 tests across all modules.
110
+
111
+ ---
112
+
113
+ ## Who This Is For
114
+
115
+ - Fair lending analysts and compliance teams at banks and CDFIs
116
+ - Community reinvestment researchers studying mortgage disparities
117
+ - Journalists covering housing discrimination and redlining
118
+ - Regulators and examiners analyzing lender performance
119
+ - Academics studying racial wealth gaps and homeownership barriers
120
+
121
+ ---
122
+
123
+ ## License
124
+
125
+ MIT 2026 Jaypatel1511
@@ -0,0 +1,113 @@
1
+ # hmda-analyzer 📊
2
+
3
+ **HMDA mortgage lending disparity analyzer.**
4
+
5
+ Compute denial rate disparities by race, identify lending deserts, benchmark lenders
6
+ against peers, and generate fair lending analysis reports — using CFPB HMDA LAR data.
7
+ Free public API, no authentication required.
8
+
9
+ ---
10
+
11
+ ## Why hmda-analyzer?
12
+
13
+ HMDA data covers 10+ million mortgage applications per year with borrower demographics,
14
+ denial rates, loan amounts, and census tract locations. It is the most powerful public
15
+ dataset for analyzing mortgage lending disparities — but it requires significant
16
+ engineering to use. hmda-analyzer makes it accessible in Python.
17
+
18
+ ---
19
+
20
+ ## Installation
21
+
22
+ pip install hmda-analyzer
23
+
24
+ ---
25
+
26
+ ## Quickstart
27
+
28
+ from hmdaanalyzer import (
29
+ load_sample, denial_rate_by_race, disparity_ratio,
30
+ lending_by_tract, lender_summary, generate_disparity_report,
31
+ )
32
+
33
+ # Load sample data (no API required)
34
+ df = load_sample(n=5000)
35
+
36
+ # Or load from CFPB API (real data)
37
+ # df = load_from_api(year=2023, state="IL")
38
+
39
+ # Denial rates by race
40
+ rates = denial_rate_by_race(df)
41
+ print(rates)
42
+
43
+ # Disparity ratios vs White applicants
44
+ disparities = disparity_ratio(df)
45
+ print(disparities)
46
+
47
+ # Geographic analysis
48
+ tracts = lending_by_tract(df)
49
+ deserts = lending_by_tract(df)
50
+
51
+ # Lender analysis
52
+ summary = lender_summary(df, lei="LEI000001")
53
+
54
+ # Full disparity report
55
+ report = generate_disparity_report(df, title="Illinois Mortgage Market 2023")
56
+ print(report)
57
+
58
+ ---
59
+
60
+ ## Analyses Supported
61
+
62
+ - Denial rate by race and ethnicity
63
+ - Disparity ratios vs reference group (default: White applicants)
64
+ - Denial rate by income band
65
+ - Denial reasons by race
66
+ - Lending activity by census tract, county, and state
67
+ - Lending desert identification (low application volume tracts)
68
+ - Lender vs market comparison
69
+ - Top lenders by origination volume
70
+
71
+ ---
72
+
73
+ ## Disparity Ratio Thresholds
74
+
75
+ Based on CFPB fair lending examination standards:
76
+
77
+ - >= 2.0x — HIGH disparity (triggers regulatory scrutiny)
78
+ - >= 1.5x — MODERATE disparity
79
+ - < 1.5x — LOW disparity
80
+ - < 1.0x — FAVORABLE (group has lower denial rate than reference)
81
+
82
+ ---
83
+
84
+ ## Data Sources
85
+
86
+ CFPB HMDA Data Browser API — free, no API key required.
87
+ 2024 data covers 4,908 institutions and millions of loan applications.
88
+
89
+ https://ffiec.cfpb.gov/data-browser/
90
+
91
+ ---
92
+
93
+ ## Running Tests
94
+
95
+ PYTHONPATH=. pytest tests/ -v
96
+
97
+ 28 tests across all modules.
98
+
99
+ ---
100
+
101
+ ## Who This Is For
102
+
103
+ - Fair lending analysts and compliance teams at banks and CDFIs
104
+ - Community reinvestment researchers studying mortgage disparities
105
+ - Journalists covering housing discrimination and redlining
106
+ - Regulators and examiners analyzing lender performance
107
+ - Academics studying racial wealth gaps and homeownership barriers
108
+
109
+ ---
110
+
111
+ ## License
112
+
113
+ MIT 2026 Jaypatel1511
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: hmda-analyzer
3
+ Version: 0.1.0
4
+ Summary: HMDA mortgage lending disparity analyzer — denial rates, racial disparities, lending deserts, and lender benchmarking
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/Jaypatel1511/hmda-analyzer
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pandas>=1.4.0
10
+ Requires-Dist: numpy>=1.21.0
11
+ Requires-Dist: requests>=2.27.0
12
+
13
+ # hmda-analyzer 📊
14
+
15
+ **HMDA mortgage lending disparity analyzer.**
16
+
17
+ Compute denial rate disparities by race, identify lending deserts, benchmark lenders
18
+ against peers, and generate fair lending analysis reports — using CFPB HMDA LAR data.
19
+ Free public API, no authentication required.
20
+
21
+ ---
22
+
23
+ ## Why hmda-analyzer?
24
+
25
+ HMDA data covers 10+ million mortgage applications per year with borrower demographics,
26
+ denial rates, loan amounts, and census tract locations. It is the most powerful public
27
+ dataset for analyzing mortgage lending disparities — but it requires significant
28
+ engineering to use. hmda-analyzer makes it accessible in Python.
29
+
30
+ ---
31
+
32
+ ## Installation
33
+
34
+ pip install hmda-analyzer
35
+
36
+ ---
37
+
38
+ ## Quickstart
39
+
40
+ from hmdaanalyzer import (
41
+ load_sample, denial_rate_by_race, disparity_ratio,
42
+ lending_by_tract, lender_summary, generate_disparity_report,
43
+ )
44
+
45
+ # Load sample data (no API required)
46
+ df = load_sample(n=5000)
47
+
48
+ # Or load from CFPB API (real data)
49
+ # df = load_from_api(year=2023, state="IL")
50
+
51
+ # Denial rates by race
52
+ rates = denial_rate_by_race(df)
53
+ print(rates)
54
+
55
+ # Disparity ratios vs White applicants
56
+ disparities = disparity_ratio(df)
57
+ print(disparities)
58
+
59
+ # Geographic analysis
60
+ tracts = lending_by_tract(df)
61
+ deserts = lending_by_tract(df)
62
+
63
+ # Lender analysis
64
+ summary = lender_summary(df, lei="LEI000001")
65
+
66
+ # Full disparity report
67
+ report = generate_disparity_report(df, title="Illinois Mortgage Market 2023")
68
+ print(report)
69
+
70
+ ---
71
+
72
+ ## Analyses Supported
73
+
74
+ - Denial rate by race and ethnicity
75
+ - Disparity ratios vs reference group (default: White applicants)
76
+ - Denial rate by income band
77
+ - Denial reasons by race
78
+ - Lending activity by census tract, county, and state
79
+ - Lending desert identification (low application volume tracts)
80
+ - Lender vs market comparison
81
+ - Top lenders by origination volume
82
+
83
+ ---
84
+
85
+ ## Disparity Ratio Thresholds
86
+
87
+ Based on CFPB fair lending examination standards:
88
+
89
+ - >= 2.0x — HIGH disparity (triggers regulatory scrutiny)
90
+ - >= 1.5x — MODERATE disparity
91
+ - < 1.5x — LOW disparity
92
+ - < 1.0x — FAVORABLE (group has lower denial rate than reference)
93
+
94
+ ---
95
+
96
+ ## Data Sources
97
+
98
+ CFPB HMDA Data Browser API — free, no API key required.
99
+ 2024 data covers 4,908 institutions and millions of loan applications.
100
+
101
+ https://ffiec.cfpb.gov/data-browser/
102
+
103
+ ---
104
+
105
+ ## Running Tests
106
+
107
+ PYTHONPATH=. pytest tests/ -v
108
+
109
+ 28 tests across all modules.
110
+
111
+ ---
112
+
113
+ ## Who This Is For
114
+
115
+ - Fair lending analysts and compliance teams at banks and CDFIs
116
+ - Community reinvestment researchers studying mortgage disparities
117
+ - Journalists covering housing discrimination and redlining
118
+ - Regulators and examiners analyzing lender performance
119
+ - Academics studying racial wealth gaps and homeownership barriers
120
+
121
+ ---
122
+
123
+ ## License
124
+
125
+ MIT 2026 Jaypatel1511
@@ -0,0 +1,25 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ hmda_analyzer.egg-info/PKG-INFO
5
+ hmda_analyzer.egg-info/SOURCES.txt
6
+ hmda_analyzer.egg-info/dependency_links.txt
7
+ hmda_analyzer.egg-info/requires.txt
8
+ hmda_analyzer.egg-info/top_level.txt
9
+ hmdaanalyzer/__init__.py
10
+ hmdaanalyzer/analysis/__init__.py
11
+ hmdaanalyzer/analysis/disparity.py
12
+ hmdaanalyzer/analysis/geographic.py
13
+ hmdaanalyzer/analysis/lender.py
14
+ hmdaanalyzer/data/__init__.py
15
+ hmdaanalyzer/data/loader.py
16
+ hmdaanalyzer/data/schema.py
17
+ hmdaanalyzer/report/__init__.py
18
+ hmdaanalyzer/report/generator.py
19
+ tests/__init__.py
20
+ tests/conftest.py
21
+ tests/test_disparity.py
22
+ tests/test_geographic.py
23
+ tests/test_lender.py
24
+ tests/test_loader.py
25
+ tests/test_report.py
@@ -0,0 +1,3 @@
1
+ pandas>=1.4.0
2
+ numpy>=1.21.0
3
+ requests>=2.27.0
@@ -0,0 +1,2 @@
1
+ hmdaanalyzer
2
+ tests
@@ -0,0 +1,28 @@
1
+ from hmdaanalyzer.data.loader import (
2
+ load_from_api, load_from_file, load_sample,
3
+ )
4
+ from hmdaanalyzer.analysis.disparity import (
5
+ denial_rate_by_race, disparity_ratio,
6
+ denial_rate_by_income_band, denial_reasons_by_race,
7
+ )
8
+ from hmdaanalyzer.analysis.geographic import (
9
+ lending_by_tract, lending_by_county, lending_by_state,
10
+ lending_desert_score, racial_composition_by_tract,
11
+ )
12
+ from hmdaanalyzer.analysis.lender import (
13
+ lender_summary, lender_vs_market, top_lenders_by_volume,
14
+ )
15
+ from hmdaanalyzer.report.generator import (
16
+ generate_disparity_report, summary_table,
17
+ )
18
+
19
+ __version__ = "0.1.0"
20
+ __all__ = [
21
+ "load_from_api", "load_from_file", "load_sample",
22
+ "denial_rate_by_race", "disparity_ratio",
23
+ "denial_rate_by_income_band", "denial_reasons_by_race",
24
+ "lending_by_tract", "lending_by_county", "lending_by_state",
25
+ "lending_desert_score", "racial_composition_by_tract",
26
+ "lender_summary", "lender_vs_market", "top_lenders_by_volume",
27
+ "generate_disparity_report", "summary_table",
28
+ ]
File without changes
@@ -0,0 +1,128 @@
1
+ """
2
+ Denial rate disparity analysis.
3
+ Computes disparate impact ratios between racial/ethnic groups.
4
+ """
5
+ import pandas as pd
6
+ import numpy as np
7
+ from hmdaanalyzer.data.schema import DISPARITY_THRESHOLDS, REFERENCE_RACE
8
+
9
+
10
+ def denial_rate_by_race(df: pd.DataFrame) -> pd.DataFrame:
11
+ """
12
+ Compute denial rates by race for a HMDA LAR DataFrame.
13
+
14
+ Args:
15
+ df: Cleaned HMDA LAR DataFrame with is_denied and derived_race columns
16
+
17
+ Returns:
18
+ DataFrame with denial rates by race
19
+ """
20
+ if "derived_race" not in df.columns or "is_denied" not in df.columns:
21
+ raise ValueError("DataFrame must have 'derived_race' and 'is_denied' columns")
22
+
23
+ actionable = df[df["action_taken"].isin([1, 2, 3])].copy()
24
+
25
+ result = actionable.groupby("derived_race").agg(
26
+ applications=("is_denied", "count"),
27
+ denials=("is_denied", "sum"),
28
+ ).reset_index()
29
+
30
+ result["denial_rate"] = result["denials"] / result["applications"]
31
+ result = result[result["applications"] >= 5]
32
+ result = result.sort_values("denial_rate", ascending=False)
33
+
34
+ return result
35
+
36
+
37
+ def disparity_ratio(df: pd.DataFrame, reference: str = None) -> pd.DataFrame:
38
+ """
39
+ Compute disparity ratios relative to a reference group (default: White).
40
+
41
+ Disparity ratio = group denial rate / reference group denial rate
42
+ A ratio > 2.0 indicates high disparity (CFPB threshold).
43
+
44
+ Args:
45
+ df: Cleaned HMDA LAR DataFrame
46
+ reference: Reference race group (default: "White")
47
+
48
+ Returns:
49
+ DataFrame with disparity ratios and severity flags
50
+ """
51
+ reference = reference or REFERENCE_RACE
52
+ denial_rates = denial_rate_by_race(df)
53
+
54
+ ref_row = denial_rates[denial_rates["derived_race"] == reference]
55
+ if ref_row.empty:
56
+ raise ValueError(f"Reference group '{reference}' not found in data.")
57
+
58
+ ref_rate = ref_row["denial_rate"].iloc[0]
59
+
60
+ result = denial_rates.copy()
61
+ result["reference_group"] = reference
62
+ result["reference_denial_rate"] = ref_rate
63
+ result["disparity_ratio"] = result["denial_rate"] / ref_rate if ref_rate > 0 else None
64
+
65
+ def classify(ratio):
66
+ if ratio is None or pd.isna(ratio):
67
+ return "N/A"
68
+ if ratio >= DISPARITY_THRESHOLDS["high"]:
69
+ return "HIGH"
70
+ elif ratio >= DISPARITY_THRESHOLDS["moderate"]:
71
+ return "MODERATE"
72
+ elif ratio < 1.0:
73
+ return "FAVORABLE"
74
+ return "LOW"
75
+
76
+ result["disparity_level"] = result["disparity_ratio"].apply(classify)
77
+ result = result.sort_values("disparity_ratio", ascending=False)
78
+
79
+ return result
80
+
81
+
82
+ def denial_rate_by_income_band(df: pd.DataFrame) -> pd.DataFrame:
83
+ """
84
+ Compute denial rates by income band to identify income-based disparities.
85
+ """
86
+ df = df.copy()
87
+ df["income_band"] = pd.cut(
88
+ df["income"],
89
+ bins=[0, 50, 80, 120, 200, float("inf")],
90
+ labels=["<$50k", "$50-80k", "$80-120k", "$120-200k", "$200k+"],
91
+ )
92
+
93
+ actionable = df[df["action_taken"].isin([1, 2, 3])].copy()
94
+
95
+ result = actionable.groupby("income_band", observed=True).agg(
96
+ applications=("is_denied", "count"),
97
+ denials=("is_denied", "sum"),
98
+ ).reset_index()
99
+
100
+ result["denial_rate"] = result["denials"] / result["applications"]
101
+ return result
102
+
103
+
104
+ def denial_reasons_by_race(df: pd.DataFrame) -> pd.DataFrame:
105
+ """
106
+ Analyze denial reasons broken down by race.
107
+ """
108
+ from hmdaanalyzer.data.schema import DENIAL_REASONS
109
+
110
+ denied = df[df["is_denied"] == True].copy()
111
+
112
+ if "denial_reason_1" not in denied.columns:
113
+ return pd.DataFrame()
114
+
115
+ denied["denial_reason_label"] = denied["denial_reason_1"].map(
116
+ lambda x: DENIAL_REASONS.get(int(x), "Unknown") if pd.notna(x) else "Unknown"
117
+ )
118
+
119
+ result = denied.groupby(
120
+ ["derived_race", "denial_reason_label"]
121
+ ).size().reset_index(name="count")
122
+
123
+ totals = denied.groupby("derived_race").size().reset_index(name="total")
124
+ result = result.merge(totals, on="derived_race")
125
+ result["pct"] = result["count"] / result["total"] * 100
126
+ result = result.sort_values(["derived_race", "pct"], ascending=[True, False])
127
+
128
+ return result
@@ -0,0 +1,125 @@
1
+ """
2
+ Geographic analysis of HMDA lending patterns.
3
+ Identifies lending deserts and maps activity by census tract.
4
+ """
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+
9
+ def lending_by_tract(df: pd.DataFrame) -> pd.DataFrame:
10
+ """
11
+ Aggregate HMDA lending activity by census tract.
12
+
13
+ Returns:
14
+ DataFrame with application counts, denial rates, and loan volumes by tract
15
+ """
16
+ if "census_tract" not in df.columns:
17
+ raise ValueError("DataFrame must have 'census_tract' column")
18
+
19
+ actionable = df[df["action_taken"].isin([1, 2, 3])].copy()
20
+
21
+ result = actionable.groupby("census_tract").agg(
22
+ applications=("is_denied", "count"),
23
+ denials=("is_denied", "sum"),
24
+ originations=("is_approved", "sum"),
25
+ avg_loan_amount=("loan_amount", "mean"),
26
+ median_income=("income", "median"),
27
+ ).reset_index()
28
+
29
+ result["denial_rate"] = result["denials"] / result["applications"]
30
+ result["origination_rate"] = result["originations"] / result["applications"]
31
+
32
+ return result.sort_values("applications", ascending=False)
33
+
34
+
35
+ def lending_by_county(df: pd.DataFrame) -> pd.DataFrame:
36
+ """
37
+ Aggregate HMDA lending activity by county.
38
+ """
39
+ if "county_code" not in df.columns:
40
+ raise ValueError("DataFrame must have 'county_code' column")
41
+
42
+ actionable = df[df["action_taken"].isin([1, 2, 3])].copy()
43
+
44
+ result = actionable.groupby("county_code").agg(
45
+ applications=("is_denied", "count"),
46
+ denials=("is_denied", "sum"),
47
+ originations=("is_approved", "sum"),
48
+ total_loan_volume=("loan_amount", "sum"),
49
+ avg_loan_amount=("loan_amount", "mean"),
50
+ ).reset_index()
51
+
52
+ result["denial_rate"] = result["denials"] / result["applications"]
53
+ result["state_code"] = result["county_code"].str[:2]
54
+
55
+ return result.sort_values("applications", ascending=False)
56
+
57
+
58
+ def lending_desert_score(df: pd.DataFrame) -> pd.DataFrame:
59
+ """
60
+ Identify census tracts with abnormally low application volumes.
61
+ A 'lending desert' is a tract with very few mortgage applications
62
+ relative to its expected volume based on housing units.
63
+
64
+ Returns:
65
+ DataFrame with lending desert scores by census tract
66
+ """
67
+ tract_df = lending_by_tract(df)
68
+
69
+ # Percentile rank by application volume
70
+ tract_df["app_percentile"] = (
71
+ tract_df["applications"].rank(pct=True) * 100
72
+ ).round(1)
73
+
74
+ # Low denial rate + low application volume = potential lending desert
75
+ # (lenders may be avoiding the area entirely)
76
+ tract_df["desert_score"] = (
77
+ (100 - tract_df["app_percentile"]) * 0.6 +
78
+ tract_df["denial_rate"] * 100 * 0.4
79
+ ).round(1)
80
+
81
+ tract_df["is_lending_desert"] = (
82
+ (tract_df["app_percentile"] < 25) &
83
+ (tract_df["denial_rate"] > 0.15)
84
+ )
85
+
86
+ return tract_df.sort_values("desert_score", ascending=False)
87
+
88
+
89
+ def racial_composition_by_tract(df: pd.DataFrame) -> pd.DataFrame:
90
+ """
91
+ Show racial composition of applicants by census tract.
92
+ Useful for identifying tracts where lending may differ by applicant race.
93
+ """
94
+ if "derived_race" not in df.columns or "census_tract" not in df.columns:
95
+ return pd.DataFrame()
96
+
97
+ result = df.groupby(
98
+ ["census_tract", "derived_race"]
99
+ ).agg(
100
+ applications=("is_denied", "count"),
101
+ denial_rate=("is_denied", "mean"),
102
+ ).reset_index()
103
+
104
+ return result.sort_values(["census_tract", "applications"], ascending=[True, False])
105
+
106
+
107
+ def lending_by_state(df: pd.DataFrame) -> pd.DataFrame:
108
+ """
109
+ Aggregate lending activity by state.
110
+ """
111
+ state_col = "state_code" if "state_code" in df.columns else None
112
+ if state_col is None:
113
+ return pd.DataFrame()
114
+
115
+ actionable = df[df["action_taken"].isin([1, 2, 3])].copy()
116
+
117
+ result = actionable.groupby(state_col).agg(
118
+ applications=("is_denied", "count"),
119
+ denials=("is_denied", "sum"),
120
+ originations=("is_approved", "sum"),
121
+ total_volume=("loan_amount", "sum"),
122
+ ).reset_index()
123
+
124
+ result["denial_rate"] = result["denials"] / result["applications"]
125
+ return result.sort_values("applications", ascending=False)