nmtc-mapper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ Metadata-Version: 2.4
2
+ Name: nmtc-mapper
3
+ Version: 0.1.0
4
+ Summary: Automated NMTC eligibility checker β€” geocode addresses and check Low-Income Community status using CDFI Fund and Census data
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/Jaypatel1511/nmtc-mapper
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pandas>=1.4.0
10
+ Requires-Dist: numpy>=1.21.0
11
+ Requires-Dist: requests>=2.27.0
12
+ Requires-Dist: openpyxl>=3.0.0
13
+
14
+ # nmtc-mapper πŸ—ΊοΈ
15
+
16
+ **Automated NMTC eligibility checker for addresses and census tracts.**
17
+
18
+ Pass a DataFrame of addresses and get back a boolean column for NMTC eligibility,
19
+ distress level, poverty rate, AMI ratio, and more β€” using official CDFI Fund and
20
+ Census Bureau data. No manual lookups required.
21
+
22
+ ---
23
+
24
+ ## Why nmtc-mapper?
25
+
26
+ The CDFI Fund provides a manual web tool (CIMS) for checking NMTC eligibility
27
+ one address at a time. nmtc-mapper automates this β€” pass 10,000 addresses and
28
+ get results in seconds, using the same official data source.
29
+
30
+ ---
31
+
32
+ ## Installation
33
+
34
+ pip install nmtc-mapper
35
+
36
+ ---
37
+
38
+ ## Quickstart
39
+
40
+ from nmtcmapper import NMTCMapper
41
+
42
+ mapper = NMTCMapper()
43
+
44
+ # Single address (geocodes automatically)
45
+ result = mapper.check_address("1234 S Michigan Ave, Chicago, IL 60605")
46
+ result.summary()
47
+ print(result.nmtc_eligible) # True
48
+ print(result.distress_level) # "severe"
49
+ print(result.poverty_rate) # 0.38
50
+
51
+ # Known census tract (no geocoding needed)
52
+ result = mapper.check_tract("17031840100")
53
+ print(result.nmtc_eligible) # True
54
+
55
+ # Batch β€” enrich a DataFrame of addresses
56
+ import pandas as pd
57
+ df = pd.read_csv("projects.csv") # must have 'address' column
58
+ df = mapper.enrich(df, address_col="address")
59
+ print(df["nmtc_eligible"].value_counts())
60
+ print(df["distress_level"].value_counts())
61
+
62
+ # If you already have census tract IDs
63
+ df = mapper.enrich(df, tract_col="tract_id")
64
+
65
+ # Summary stats
66
+ mapper.eligible_count(df)
67
+
68
+ ---
69
+
70
+ ## Eligibility Rules (2016-2020 ACS β€” mandatory since Sept 1, 2024)
71
+
72
+ A census tract qualifies as a Low-Income Community (LIC) if it meets ANY of:
73
+
74
+ - Poverty rate >= 20%
75
+ - Median Family Income <= 80% of metro/state AMI
76
+ - Median Family Income <= 85% of state AMI (high migration rural counties)
77
+
78
+ Distress levels:
79
+
80
+ - deep β€” Poverty >= 40% OR AMI <= 50% OR unemployment >= 2x national rate
81
+ - severe β€” Poverty >= 30% OR AMI <= 60% OR unemployment >= 1.5x national rate
82
+ - lic β€” NMTC eligible (meets LIC criteria)
83
+ - ineligible β€” Does not qualify
84
+
85
+ ---
86
+
87
+ ## Data Sources
88
+
89
+ - CDFI Fund 2016-2020 ACS Low-Income Community Eligibility File
90
+ https://www.cdfifund.gov/research-data
91
+ - US Census Bureau Geocoding API (free, no API key required)
92
+ https://geocoding.geo.census.gov
93
+
94
+ ---
95
+
96
+ ## Output Columns
97
+
98
+ After running .enrich(), your DataFrame will have:
99
+
100
+ - nmtc_eligible (bool)
101
+ - distress_level (str: deep / severe / lic / ineligible)
102
+ - poverty_rate (float)
103
+ - ami_ratio (float)
104
+ - unemployment_rate (float)
105
+ - is_non_metro (bool)
106
+ - severe_distress (bool)
107
+ - deep_distress (bool)
108
+
109
+ ---
110
+
111
+ ## Running Tests
112
+
113
+ PYTHONPATH=. pytest tests/ -v
114
+
115
+ 24 tests across all modules.
116
+
117
+ ---
118
+
119
+ ## Who This Is For
120
+
121
+ - CDEs screening project locations for NMTC eligibility
122
+ - CDFI analysts qualifying borrower locations at scale
123
+ - Researchers analyzing geographic distribution of LIC tracts
124
+ - Anyone replacing manual CIMS lookups with automated Python
125
+
126
+ ---
127
+
128
+ ## License
129
+
130
+ MIT 2026 Jaypatel1511
@@ -0,0 +1,18 @@
1
+ nmtcmapper/__init__.py,sha256=ocg4kkfGopciLzufm0rTB4vHMW_H-zXnb9keMMczzko,340
2
+ nmtcmapper/mapper.py,sha256=Sdgt0bWaBGh8rZaIxYRXSFAMfxUYBNbiJF69DdqVc3M,6068
3
+ nmtcmapper/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ nmtcmapper/data/loader.py,sha256=Qr5VwN92g7Wokb95gkZjFcGCiIY32pz-jJWaFBWCy4g,5689
5
+ nmtcmapper/data/schema.py,sha256=wGu-ZAIpGSla4Zu7PVoegV70JYCy20zK9GBbaKjpSMM,3546
6
+ nmtcmapper/eligibility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ nmtcmapper/eligibility/checker.py,sha256=3C2UkqqTV0fjj1M5LkOKFA92vqmOOTe-yf4ZXOAdyNM,4374
8
+ nmtcmapper/geocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ nmtcmapper/geocoder/census.py,sha256=KFJA3xEZX_zqYrZmAg3UksBPTrNJRYtfdRAyvhg08aE,5865
10
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ tests/conftest.py,sha256=bei5KSwvDktDp6icRhOuH6WMViqKS_FtLeIr-GYzcoU,965
12
+ tests/test_checker.py,sha256=kFuhb9xB1Ecdt5MIWRSLB1ReAX3rZPOcD4u2TkL-q6E,1239
13
+ tests/test_loader.py,sha256=yXzMJAuvWfkDl9256KtnO8DukHnYjyatiJIEL9aoiNw,2313
14
+ tests/test_mapper.py,sha256=47ekz6hBxO4RzygTizcI6ILkRqzcM4S6PezsDbQ8_Qg,1574
15
+ nmtc_mapper-0.1.0.dist-info/METADATA,sha256=81W7T8PIrE99i2my6wp5qtCrLxY6IArrLcFJ-ysRQ2s,3463
16
+ nmtc_mapper-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
17
+ nmtc_mapper-0.1.0.dist-info/top_level.txt,sha256=sk7Bw2sFRwBefFlqTEWJ3PZtemcXsSOwuUHztWwEL5k,17
18
+ nmtc_mapper-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ nmtcmapper
2
+ tests
nmtcmapper/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ from nmtcmapper.mapper import NMTCMapper
2
+ from nmtcmapper.eligibility.checker import EligibilityResult
3
+ from nmtcmapper.data.loader import load_eligibility_table
4
+ from nmtcmapper.geocoder.census import geocode_address
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = [
8
+ "NMTCMapper", "EligibilityResult",
9
+ "load_eligibility_table", "geocode_address",
10
+ ]
File without changes
@@ -0,0 +1,156 @@
1
+ """
2
+ Download and cache the CDFI Fund NMTC eligibility file.
3
+ Builds a lookup table of all eligible census tracts.
4
+ """
5
+ import os
6
+ import requests
7
+ import pandas as pd
8
+ from pathlib import Path
9
+
10
+ from nmtcmapper.data.schema import (
11
+ CACHE_DIR, CDFI_FUND_LIC_URL_2020,
12
+ ELIGIBILITY_FILE_COLUMNS,
13
+ LIC_POVERTY_RATE_THRESHOLD,
14
+ LIC_AMI_RATIO_METRO_THRESHOLD,
15
+ LIC_AMI_RATIO_RURAL_THRESHOLD,
16
+ SEVERE_POVERTY_THRESHOLD, SEVERE_AMI_THRESHOLD,
17
+ SEVERE_UNEMPLOYMENT_MULTIPLIER, NATIONAL_UNEMPLOYMENT_RATE,
18
+ DEEP_POVERTY_THRESHOLD, DEEP_AMI_THRESHOLD,
19
+ DEEP_UNEMPLOYMENT_MULTIPLIER,
20
+ )
21
+
22
+
23
+ def get_cache_dir() -> Path:
24
+ path = Path(CACHE_DIR)
25
+ path.mkdir(parents=True, exist_ok=True)
26
+ return path
27
+
28
+
29
+ def _cache_path(filename: str) -> Path:
30
+ return get_cache_dir() / filename
31
+
32
+
33
+ def download_eligibility_file(force: bool = False) -> Path:
34
+ filename = "NMTC_LIC_Eligibility_2016_2020.xlsx"
35
+ path = _cache_path(filename)
36
+ if path.exists() and not force:
37
+ print(f"Using cached eligibility file: {path}")
38
+ return path
39
+ print("Downloading NMTC eligibility file from CDFI Fund...")
40
+ try:
41
+ response = requests.get(CDFI_FUND_LIC_URL_2020, stream=True, timeout=120)
42
+ response.raise_for_status()
43
+ with open(path, "wb") as f:
44
+ for chunk in response.iter_content(chunk_size=8192):
45
+ f.write(chunk)
46
+ print(f"Saved to {path}")
47
+ return path
48
+ except Exception as e:
49
+ print(f"Download failed: {e}")
50
+ return None
51
+
52
+
53
+ def load_eligibility_table(force: bool = False) -> pd.DataFrame:
54
+ path = download_eligibility_file(force=force)
55
+ if path is None or not path.exists():
56
+ print("Using built-in sample eligibility data.")
57
+ return _build_sample_table()
58
+ print(f"Loading eligibility table from {path}...")
59
+ try:
60
+ df = pd.read_excel(path, dtype=str)
61
+ return _process_eligibility_table(df)
62
+ except Exception as e:
63
+ print(f"Error loading file: {e}. Using sample data.")
64
+ return _build_sample_table()
65
+
66
+
67
+ def _process_eligibility_table(df: pd.DataFrame) -> pd.DataFrame:
68
+ df.columns = df.columns.str.strip().str.upper()
69
+ col_map = {k: v for k, v in ELIGIBILITY_FILE_COLUMNS.items() if k in df.columns}
70
+ df = df.rename(columns=col_map)
71
+ if "tract_id" not in df.columns:
72
+ if all(c in df.columns for c in ["state", "county", "tract"]):
73
+ df["tract_id"] = (
74
+ df["state"].str.zfill(2) +
75
+ df["county"].str.zfill(3) +
76
+ df["tract"].str.zfill(6)
77
+ )
78
+ for col in ["poverty_rate", "ami_ratio", "unemployment_rate"]:
79
+ if col in df.columns:
80
+ df[col] = pd.to_numeric(df[col], errors="coerce")
81
+ for col in ["is_non_metro", "is_high_migration_rural"]:
82
+ if col in df.columns:
83
+ df[col] = df[col].isin({"Y", "YES", "1", "True", "TRUE", "X"})
84
+ df = _compute_eligibility(df)
85
+ if "tract_id" in df.columns:
86
+ df = df.set_index("tract_id")
87
+ print(f"Eligibility table loaded: {len(df):,} census tracts")
88
+ return df
89
+
90
+
91
+ def _compute_eligibility(df: pd.DataFrame) -> pd.DataFrame:
92
+ pr = df.get("poverty_rate", pd.Series(dtype=float))
93
+ ami = df.get("ami_ratio", pd.Series(dtype=float))
94
+ unemp = df.get("unemployment_rate", pd.Series(dtype=float))
95
+ non_metro = df.get("is_non_metro", pd.Series(False, index=df.index))
96
+
97
+ poverty_lic = pr >= LIC_POVERTY_RATE_THRESHOLD
98
+ ami_lic = (
99
+ (non_metro & (ami <= LIC_AMI_RATIO_RURAL_THRESHOLD)) |
100
+ (~non_metro & (ami <= LIC_AMI_RATIO_METRO_THRESHOLD))
101
+ )
102
+ df["nmtc_eligible"] = poverty_lic | ami_lic
103
+
104
+ sev_poverty = pr >= SEVERE_POVERTY_THRESHOLD
105
+ sev_ami = ami <= SEVERE_AMI_THRESHOLD
106
+ sev_unemp = unemp >= (NATIONAL_UNEMPLOYMENT_RATE * SEVERE_UNEMPLOYMENT_MULTIPLIER)
107
+ df["severe_distress"] = sev_poverty | sev_ami | sev_unemp
108
+
109
+ deep_poverty = pr >= DEEP_POVERTY_THRESHOLD
110
+ deep_ami = ami <= DEEP_AMI_THRESHOLD
111
+ deep_unemp = unemp >= (NATIONAL_UNEMPLOYMENT_RATE * DEEP_UNEMPLOYMENT_MULTIPLIER)
112
+ df["deep_distress"] = deep_poverty | deep_ami | deep_unemp
113
+
114
+ def distress_label(row):
115
+ if row.get("deep_distress"):
116
+ return "deep"
117
+ elif row.get("severe_distress"):
118
+ return "severe"
119
+ elif row.get("nmtc_eligible"):
120
+ return "lic"
121
+ return "ineligible"
122
+
123
+ df["distress_level"] = df.apply(distress_label, axis=1)
124
+ return df
125
+
126
+
127
+ def _build_sample_table() -> pd.DataFrame:
128
+ sample_tracts = [
129
+ ("17031840100", 0.38, 0.55, 0.12, False, False),
130
+ ("17031839100", 0.42, 0.48, 0.15, False, False),
131
+ ("17031010100", 0.18, 0.92, 0.04, False, False),
132
+ ("36061015900", 0.35, 0.60, 0.11, False, False),
133
+ ("36061019100", 0.28, 0.72, 0.09, False, False),
134
+ ("36047052200", 0.14, 0.88, 0.05, False, False),
135
+ ("26163518300", 0.45, 0.45, 0.18, False, False),
136
+ ("26163520100", 0.32, 0.62, 0.13, False, False),
137
+ ("13121010400", 0.29, 0.68, 0.10, False, False),
138
+ ("48113010900", 0.22, 0.78, 0.07, False, False),
139
+ ("17019000100", 0.15, 0.95, 0.03, True, True),
140
+ ("26001010100", 0.18, 0.88, 0.06, True, False),
141
+ ]
142
+ rows = []
143
+ for tid, pr, ami, unemp, non_metro, high_migration in sample_tracts:
144
+ rows.append({
145
+ "tract_id": tid,
146
+ "state": tid[:2],
147
+ "poverty_rate": pr,
148
+ "ami_ratio": ami,
149
+ "unemployment_rate": unemp,
150
+ "is_non_metro": non_metro,
151
+ "is_high_migration_rural": high_migration,
152
+ })
153
+ df = pd.DataFrame(rows)
154
+ df = _compute_eligibility(df)
155
+ df = df.set_index("tract_id")
156
+ return df
@@ -0,0 +1,68 @@
1
+ """
2
+ Column mappings, eligibility thresholds, and constants for NMTC eligibility.
3
+ Based on 2016-2020 ACS data β€” mandatory for QLICIs closed on or after Sept 1, 2024.
4
+ Source: https://www.cdfifund.gov/research-data
5
+ """
6
+
7
+ # ── Eligibility Thresholds ────────────────────────────────────────────────────
8
+
9
+ # Low-Income Community (LIC) criteria β€” Section 45D
10
+ LIC_POVERTY_RATE_THRESHOLD = 0.20 # >= 20% poverty rate
11
+ LIC_AMI_RATIO_METRO_THRESHOLD = 0.80 # <= 80% of metro/state AMI
12
+ LIC_AMI_RATIO_RURAL_THRESHOLD = 0.85 # <= 85% of state AMI (high migration rural)
13
+
14
+ # Severe Distress thresholds
15
+ SEVERE_POVERTY_THRESHOLD = 0.30 # >= 30% poverty rate
16
+ SEVERE_AMI_THRESHOLD = 0.60 # <= 60% of AMI
17
+ SEVERE_UNEMPLOYMENT_MULTIPLIER = 1.5 # >= 1.5x national unemployment rate
18
+
19
+ # Deep Distress thresholds
20
+ DEEP_POVERTY_THRESHOLD = 0.40 # >= 40% poverty rate
21
+ DEEP_AMI_THRESHOLD = 0.50 # <= 50% of AMI
22
+ DEEP_UNEMPLOYMENT_MULTIPLIER = 2.0 # >= 2x national unemployment rate
23
+
24
+ # National unemployment rate benchmark (2016-2020 ACS)
25
+ NATIONAL_UNEMPLOYMENT_RATE = 0.057 # 5.7%
26
+
27
+ # ── CDFI Fund Eligibility File Column Mappings ────────────────────────────────
28
+ # Source: 2016-2020 ACS Low-Income Community Eligibility file from cdfifund.gov
29
+
30
+ ELIGIBILITY_FILE_COLUMNS = {
31
+ "GEOID": "tract_id",
32
+ "STATE": "state",
33
+ "COUNTY": "county",
34
+ "TRACT": "tract",
35
+ "POVERTY_RATE": "poverty_rate",
36
+ "MFI_RATIO": "ami_ratio",
37
+ "UNEMPLOYMENT_RATE": "unemployment_rate",
38
+ "NON_METRO": "is_non_metro",
39
+ "HIGH_MIGRATION_RURAL": "is_high_migration_rural",
40
+ "LIC_ELIGIBLE": "lic_eligible_raw",
41
+ "SEVERE_DISTRESS": "severe_distress_raw",
42
+ }
43
+
44
+ # ── Download URLs ─────────────────────────────────────────────────────────────
45
+ CDFI_FUND_LIC_URL_2020 = (
46
+ "https://www.cdfifund.gov/sites/cdfi/files/2024-08/"
47
+ "NMTC_LIC_Eligibility_2016_2020_ACS.xlsx"
48
+ )
49
+
50
+ # ── Cache ─────────────────────────────────────────────────────────────────────
51
+ import os
52
+ CACHE_DIR = os.path.join(os.path.expanduser("~"), ".nmtcmapper", "cache")
53
+
54
+ # ── Census Geocoder API ───────────────────────────────────────────────────────
55
+ CENSUS_GEOCODER_URL = (
56
+ "https://geocoding.geo.census.gov/geocoder/geographies/address"
57
+ )
58
+ CENSUS_GEOCODER_BATCH_URL = (
59
+ "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
60
+ )
61
+
62
+ # ── Distress Levels ───────────────────────────────────────────────────────────
63
+ DISTRESS_LEVELS = {
64
+ "deep": "Deep Distress β€” highest need, strongest NMTC application score",
65
+ "severe": "Severe Distress β€” qualifies for 85% investment commitment",
66
+ "lic": "Low-Income Community β€” NMTC eligible",
67
+ "ineligible": "Not NMTC eligible",
68
+ }
File without changes
@@ -0,0 +1,128 @@
1
+ """
2
+ NMTC eligibility checker β€” applies eligibility rules to census tract data.
3
+ """
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+ import pandas as pd
7
+
8
+ from nmtcmapper.data.schema import DISTRESS_LEVELS
9
+
10
+
11
+ @dataclass
12
+ class EligibilityResult:
13
+ """Result of a single address NMTC eligibility check."""
14
+ address: str
15
+ tract_id: Optional[str]
16
+ nmtc_eligible: bool
17
+ distress_level: str
18
+ poverty_rate: Optional[float]
19
+ ami_ratio: Optional[float]
20
+ unemployment_rate: Optional[float]
21
+ is_non_metro: bool
22
+ is_high_migration_rural: bool
23
+ severe_distress: bool
24
+ deep_distress: bool
25
+ geocode_success: bool
26
+
27
+ @property
28
+ def distress_description(self) -> str:
29
+ return DISTRESS_LEVELS.get(self.distress_level, "Unknown")
30
+
31
+ def summary(self) -> None:
32
+ print(f"\nNMTC Eligibility Result")
33
+ print(f"{'='*50}")
34
+ print(f" Address: {self.address}")
35
+ print(f" Census Tract: {self.tract_id or 'Not found'}")
36
+ print(f" NMTC Eligible: {'βœ… YES' if self.nmtc_eligible else '❌ NO'}")
37
+ print(f" Distress Level: {self.distress_level.upper()}")
38
+ print(f" Description: {self.distress_description}")
39
+ if self.poverty_rate is not None:
40
+ print(f"\n Poverty Rate: {self.poverty_rate*100:.1f}%")
41
+ if self.ami_ratio is not None:
42
+ print(f" AMI Ratio: {self.ami_ratio*100:.1f}%")
43
+ if self.unemployment_rate is not None:
44
+ print(f" Unemployment: {self.unemployment_rate*100:.1f}%")
45
+ print(f" Non-Metro: {'Yes' if self.is_non_metro else 'No'}")
46
+ print(f" High Migration: {'Yes' if self.is_high_migration_rural else 'No'}")
47
+ print()
48
+
49
+
50
+ def check_tract(
51
+ tract_id: str,
52
+ eligibility_table: pd.DataFrame,
53
+ ) -> dict:
54
+ """
55
+ Check NMTC eligibility for a known census tract ID.
56
+
57
+ Args:
58
+ tract_id: 11-digit census tract GEOID
59
+ eligibility_table: DataFrame indexed by tract_id
60
+
61
+ Returns:
62
+ Dict with eligibility fields
63
+ """
64
+ if tract_id not in eligibility_table.index:
65
+ return {
66
+ "nmtc_eligible": False,
67
+ "distress_level": "ineligible",
68
+ "poverty_rate": None,
69
+ "ami_ratio": None,
70
+ "unemployment_rate": None,
71
+ "is_non_metro": False,
72
+ "is_high_migration_rural": False,
73
+ "severe_distress": False,
74
+ "deep_distress": False,
75
+ }
76
+
77
+ row = eligibility_table.loc[tract_id]
78
+ return {
79
+ "nmtc_eligible": bool(row.get("nmtc_eligible", False)),
80
+ "distress_level": str(row.get("distress_level", "ineligible")),
81
+ "poverty_rate": row.get("poverty_rate"),
82
+ "ami_ratio": row.get("ami_ratio"),
83
+ "unemployment_rate": row.get("unemployment_rate"),
84
+ "is_non_metro": bool(row.get("is_non_metro", False)),
85
+ "is_high_migration_rural": bool(row.get("is_high_migration_rural", False)),
86
+ "severe_distress": bool(row.get("severe_distress", False)),
87
+ "deep_distress": bool(row.get("deep_distress", False)),
88
+ }
89
+
90
+
91
+ def enrich_dataframe(
92
+ df: pd.DataFrame,
93
+ eligibility_table: pd.DataFrame,
94
+ tract_col: str = "tract_id",
95
+ ) -> pd.DataFrame:
96
+ """
97
+ Add NMTC eligibility columns to a DataFrame that already has tract IDs.
98
+
99
+ Args:
100
+ df: DataFrame with tract_id column
101
+ eligibility_table: Full eligibility lookup table
102
+ tract_col: Name of the tract ID column
103
+
104
+ Returns:
105
+ DataFrame with added eligibility columns
106
+ """
107
+ df = df.copy()
108
+
109
+ eligibility_cols = [
110
+ "nmtc_eligible", "distress_level", "poverty_rate",
111
+ "ami_ratio", "unemployment_rate", "is_non_metro",
112
+ "is_high_migration_rural", "severe_distress", "deep_distress",
113
+ ]
114
+
115
+ for col in eligibility_cols:
116
+ df[col] = None
117
+
118
+ for idx, row in df.iterrows():
119
+ tract_id = row.get(tract_col)
120
+ if pd.notna(tract_id) and tract_id in eligibility_table.index:
121
+ result = check_tract(str(tract_id), eligibility_table)
122
+ for col, val in result.items():
123
+ df.at[idx, col] = val
124
+ else:
125
+ df.at[idx, "nmtc_eligible"] = False
126
+ df.at[idx, "distress_level"] = "ineligible"
127
+
128
+ return df
File without changes
@@ -0,0 +1,196 @@
1
+ """
2
+ Census Geocoding API wrapper.
3
+ Converts addresses to census tract GEOIDs using the free Census Bureau API.
4
+ """
5
+ import requests
6
+ import pandas as pd
7
+ import io
8
+ import time
9
+ from typing import Optional
10
+
11
+ from nmtcmapper.data.schema import (
12
+ CENSUS_GEOCODER_URL, CENSUS_GEOCODER_BATCH_URL
13
+ )
14
+
15
+
16
+ def geocode_address(address: str, retry: int = 2) -> Optional[str]:
17
+ """
18
+ Geocode a single address to an 11-digit census tract GEOID.
19
+
20
+ Uses the free Census Bureau Geocoding API β€” no API key required.
21
+
22
+ Args:
23
+ address: Full address string e.g. "1234 S Michigan Ave, Chicago, IL 60605"
24
+ retry: Number of retries on failure
25
+
26
+ Returns:
27
+ 11-digit census tract GEOID (state+county+tract) or None if not found
28
+ """
29
+ params = {
30
+ "street": _parse_street(address),
31
+ "city": _parse_city(address),
32
+ "state": _parse_state(address),
33
+ "zip": _parse_zip(address),
34
+ "benchmark": "Public_AR_Current",
35
+ "vintage": "Current_Current",
36
+ "layers": "Census Tracts",
37
+ "format": "json",
38
+ }
39
+
40
+ for attempt in range(retry + 1):
41
+ try:
42
+ response = requests.get(
43
+ CENSUS_GEOCODER_URL, params=params, timeout=15
44
+ )
45
+ response.raise_for_status()
46
+ data = response.json()
47
+
48
+ matches = data.get("result", {}).get("addressMatches", [])
49
+ if not matches:
50
+ return None
51
+
52
+ geo = matches[0].get("geographies", {})
53
+ tracts = geo.get("Census Tracts", [])
54
+ if not tracts:
55
+ return None
56
+
57
+ state = tracts[0].get("STATE", "")
58
+ county = tracts[0].get("COUNTY", "")
59
+ tract = tracts[0].get("TRACT", "")
60
+
61
+ if state and county and tract:
62
+ return f"{state}{county}{tract}"
63
+ return None
64
+
65
+ except Exception as e:
66
+ if attempt < retry:
67
+ time.sleep(1)
68
+ else:
69
+ return None
70
+
71
+
72
+ def geocode_batch(
73
+ df: pd.DataFrame,
74
+ address_col: str = "address",
75
+ batch_size: int = 100,
76
+ sleep_between: float = 1.0,
77
+ ) -> pd.DataFrame:
78
+ """
79
+ Geocode a batch of addresses using the Census batch geocoder.
80
+
81
+ Args:
82
+ df: DataFrame with address column
83
+ address_col: Name of the address column
84
+ batch_size: Addresses per batch (max 10,000 per Census API)
85
+ sleep_between: Seconds to sleep between batches
86
+
87
+ Returns:
88
+ DataFrame with added 'tract_id' column
89
+ """
90
+ df = df.copy()
91
+ df["tract_id"] = None
92
+
93
+ total = len(df)
94
+ print(f"Geocoding {total:,} addresses in batches of {batch_size}...")
95
+
96
+ for start in range(0, total, batch_size):
97
+ end = min(start + batch_size, total)
98
+ batch = df.iloc[start:end]
99
+
100
+ print(f" Batch {start//batch_size + 1}: rows {start}–{end}")
101
+
102
+ try:
103
+ tract_ids = _batch_geocode_census(batch, address_col)
104
+ df.loc[batch.index, "tract_id"] = tract_ids
105
+ except Exception as e:
106
+ print(f" Batch failed: {e} β€” falling back to single geocoding")
107
+ for idx, row in batch.iterrows():
108
+ df.at[idx, "tract_id"] = geocode_address(row[address_col])
109
+
110
+ if end < total:
111
+ time.sleep(sleep_between)
112
+
113
+ matched = df["tract_id"].notna().sum()
114
+ print(f"Geocoded {matched:,}/{total:,} addresses successfully")
115
+ return df
116
+
117
+
118
+ def _batch_geocode_census(
119
+ df: pd.DataFrame, address_col: str
120
+ ) -> list:
121
+ """
122
+ Use Census batch geocoding API for a chunk of addresses.
123
+ Returns list of tract IDs in same order as input.
124
+ """
125
+ # Build CSV for batch API
126
+ rows = []
127
+ for i, (idx, row) in enumerate(df.iterrows()):
128
+ addr = str(row[address_col])
129
+ street = _parse_street(addr)
130
+ city = _parse_city(addr)
131
+ state = _parse_state(addr)
132
+ zip_ = _parse_zip(addr)
133
+ rows.append(f'{i},"{street}","{city}","{state}","{zip_}"')
134
+
135
+ csv_content = "\n".join(rows)
136
+
137
+ response = requests.post(
138
+ CENSUS_GEOCODER_BATCH_URL,
139
+ files={"addressFile": ("addresses.csv", csv_content, "text/csv")},
140
+ data={
141
+ "benchmark": "Public_AR_Current",
142
+ "vintage": "Current_Current",
143
+ "layers": "Census Tracts",
144
+ },
145
+ timeout=60,
146
+ )
147
+ response.raise_for_status()
148
+
149
+ result_df = pd.read_csv(
150
+ io.StringIO(response.text),
151
+ header=None,
152
+ names=["id", "input_address", "match", "match_type",
153
+ "matched_address", "coords", "tiger_line_id",
154
+ "side", "state", "county", "tract", "block"],
155
+ dtype=str,
156
+ )
157
+
158
+ tract_ids = []
159
+ for _, row in result_df.iterrows():
160
+ if (row.get("match") == "Match" and
161
+ pd.notna(row.get("state")) and
162
+ pd.notna(row.get("county")) and
163
+ pd.notna(row.get("tract"))):
164
+ tract_ids.append(
165
+ f"{row['state']}{row['county']}{row['tract']}"
166
+ )
167
+ else:
168
+ tract_ids.append(None)
169
+
170
+ return tract_ids
171
+
172
+
173
+ def _parse_street(address: str) -> str:
174
+ parts = [p.strip() for p in address.split(",")]
175
+ return parts[0] if parts else address
176
+
177
+
178
+ def _parse_city(address: str) -> str:
179
+ parts = [p.strip() for p in address.split(",")]
180
+ return parts[1] if len(parts) > 1 else ""
181
+
182
+
183
+ def _parse_state(address: str) -> str:
184
+ parts = [p.strip() for p in address.split(",")]
185
+ if len(parts) > 2:
186
+ state_zip = parts[2].strip().split()
187
+ return state_zip[0] if state_zip else ""
188
+ return ""
189
+
190
+
191
+ def _parse_zip(address: str) -> str:
192
+ parts = [p.strip() for p in address.split(",")]
193
+ if len(parts) > 2:
194
+ state_zip = parts[2].strip().split()
195
+ return state_zip[1] if len(state_zip) > 1 else ""
196
+ return ""
nmtcmapper/mapper.py ADDED
@@ -0,0 +1,182 @@
1
+ """
2
+ NMTCMapper β€” main public API for NMTC eligibility checking.
3
+ """
4
+ import pandas as pd
5
+ from typing import Optional
6
+
7
+ from nmtcmapper.data.loader import load_eligibility_table
8
+ from nmtcmapper.geocoder.census import geocode_address, geocode_batch
9
+ from nmtcmapper.eligibility.checker import (
10
+ check_tract, enrich_dataframe, EligibilityResult
11
+ )
12
+
13
+
14
+ class NMTCMapper:
15
+ """
16
+ Check NMTC eligibility for addresses or census tracts.
17
+
18
+ Usage:
19
+ mapper = NMTCMapper()
20
+
21
+ # Single address
22
+ result = mapper.check_address("1234 S Michigan Ave, Chicago, IL 60605")
23
+ result.summary()
24
+
25
+ # Known census tract
26
+ result = mapper.check_tract("17031840100")
27
+
28
+ # Batch β€” DataFrame of addresses
29
+ df = pd.read_csv("projects.csv")
30
+ df = mapper.enrich(df, address_col="address")
31
+ """
32
+
33
+ def __init__(self, force_reload: bool = False):
34
+ """
35
+ Initialize NMTCMapper and load the eligibility table.
36
+
37
+ Args:
38
+ force_reload: Re-download the eligibility file even if cached
39
+ """
40
+ print("Loading NMTC eligibility table...")
41
+ self._table = load_eligibility_table(force=force_reload)
42
+ print(f"Ready. {len(self._table):,} census tracts loaded.")
43
+
44
+ def check_address(self, address: str) -> EligibilityResult:
45
+ """
46
+ Check NMTC eligibility for a single address.
47
+
48
+ Geocodes the address to a census tract using the free
49
+ Census Bureau API, then looks up eligibility.
50
+
51
+ Args:
52
+ address: Full address string e.g.
53
+ "1234 S Michigan Ave, Chicago, IL 60605"
54
+
55
+ Returns:
56
+ EligibilityResult with eligibility flags and tract data
57
+ """
58
+ tract_id = geocode_address(address)
59
+ geocode_success = tract_id is not None
60
+
61
+ if tract_id:
62
+ data = check_tract(tract_id, self._table)
63
+ else:
64
+ data = {
65
+ "nmtc_eligible": False,
66
+ "distress_level": "ineligible",
67
+ "poverty_rate": None,
68
+ "ami_ratio": None,
69
+ "unemployment_rate": None,
70
+ "is_non_metro": False,
71
+ "is_high_migration_rural": False,
72
+ "severe_distress": False,
73
+ "deep_distress": False,
74
+ }
75
+
76
+ return EligibilityResult(
77
+ address=address,
78
+ tract_id=tract_id,
79
+ geocode_success=geocode_success,
80
+ **data,
81
+ )
82
+
83
+ def check_tract(self, tract_id: str) -> EligibilityResult:
84
+ """
85
+ Check NMTC eligibility for a known 11-digit census tract GEOID.
86
+
87
+ Args:
88
+ tract_id: 11-digit GEOID e.g. "17031840100"
89
+
90
+ Returns:
91
+ EligibilityResult with eligibility flags
92
+ """
93
+ data = check_tract(tract_id, self._table)
94
+ return EligibilityResult(
95
+ address=f"Census Tract {tract_id}",
96
+ tract_id=tract_id,
97
+ geocode_success=True,
98
+ **data,
99
+ )
100
+
101
+ def enrich(
102
+ self,
103
+ df: pd.DataFrame,
104
+ address_col: str = "address",
105
+ tract_col: str = None,
106
+ batch_size: int = 100,
107
+ ) -> pd.DataFrame:
108
+ """
109
+ Add NMTC eligibility columns to a DataFrame.
110
+
111
+ If tract_col is provided, uses existing tract IDs (no geocoding).
112
+ If address_col is provided, geocodes addresses first.
113
+
114
+ Args:
115
+ df: DataFrame with address or tract ID column
116
+ address_col: Column with full address strings
117
+ tract_col: Column with 11-digit tract GEOIDs (skips geocoding)
118
+ batch_size: Addresses per geocoding batch
119
+
120
+ Returns:
121
+ DataFrame with added columns:
122
+ - nmtc_eligible (bool)
123
+ - distress_level (str: 'deep', 'severe', 'lic', 'ineligible')
124
+ - poverty_rate (float)
125
+ - ami_ratio (float)
126
+ - unemployment_rate (float)
127
+ - is_non_metro (bool)
128
+ - severe_distress (bool)
129
+ - deep_distress (bool)
130
+ """
131
+ df = df.copy()
132
+
133
+ if tract_col and tract_col in df.columns:
134
+ print(f"Using existing tract IDs from column '{tract_col}'")
135
+ return enrich_dataframe(df, self._table, tract_col=tract_col)
136
+
137
+ print(f"Geocoding addresses from column '{address_col}'...")
138
+ df = geocode_batch(df, address_col=address_col, batch_size=batch_size)
139
+ return enrich_dataframe(df, self._table, tract_col="tract_id")
140
+
141
+ def eligible_count(self, df: pd.DataFrame) -> dict:
142
+ """
143
+ Summarize NMTC eligibility across a DataFrame.
144
+ Requires df to have 'nmtc_eligible' and 'distress_level' columns.
145
+ """
146
+ if "nmtc_eligible" not in df.columns:
147
+ raise ValueError("Run .enrich() first to add eligibility columns.")
148
+
149
+ total = len(df)
150
+ eligible = df["nmtc_eligible"].sum()
151
+ deep = (df["distress_level"] == "deep").sum()
152
+ severe = (df["distress_level"] == "severe").sum()
153
+ lic = (df["distress_level"] == "lic").sum()
154
+
155
+ result = {
156
+ "total": total,
157
+ "nmtc_eligible": int(eligible),
158
+ "pct_eligible": round(eligible / total * 100, 1) if total else 0,
159
+ "deep_distress": int(deep),
160
+ "severe_distress": int(severe),
161
+ "lic_only": int(lic),
162
+ "ineligible": int(total - eligible),
163
+ }
164
+
165
+ print(f"\nNMTC Eligibility Summary")
166
+ print(f"{'='*40}")
167
+ print(f" Total addresses: {total:,}")
168
+ print(f" NMTC Eligible: {eligible:,} ({result['pct_eligible']}%)")
169
+ print(f" ── Deep Distress: {deep:,}")
170
+ print(f" ── Severe Distress: {severe:,}")
171
+ print(f" ── LIC Only: {lic:,}")
172
+ print(f" Not Eligible: {total - eligible:,}")
173
+ print()
174
+ return result
175
+
176
+ @property
177
+ def tract_count(self) -> int:
178
+ return len(self._table)
179
+
180
+ @property
181
+ def eligible_tract_count(self) -> int:
182
+ return int(self._table["nmtc_eligible"].sum())
tests/__init__.py ADDED
File without changes
tests/conftest.py ADDED
@@ -0,0 +1,37 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from nmtcmapper.data.loader import _build_sample_table
4
+ from nmtcmapper.mapper import NMTCMapper
5
+
6
+
7
+ @pytest.fixture
8
+ def sample_table():
9
+ return _build_sample_table()
10
+
11
+
12
+ @pytest.fixture
13
+ def mapper(monkeypatch):
14
+ """NMTCMapper with sample data β€” no real download."""
15
+ monkeypatch.setattr(
16
+ "nmtcmapper.data.loader.download_eligibility_file",
17
+ lambda force=False: None
18
+ )
19
+ return NMTCMapper()
20
+
21
+
22
+ @pytest.fixture
23
+ def sample_df():
24
+ return pd.DataFrame({
25
+ "project_name": [
26
+ "Southside Health Center",
27
+ "North Shore Office",
28
+ "Detroit Manufacturing",
29
+ "NYC Bronx Project",
30
+ ],
31
+ "tract_id": [
32
+ "17031840100", # Chicago South Side β€” eligible
33
+ "17031010100", # Chicago North Shore β€” not eligible
34
+ "26163518300", # Detroit β€” eligible
35
+ "36061015900", # NYC Bronx β€” eligible
36
+ ]
37
+ })
tests/test_checker.py ADDED
@@ -0,0 +1,36 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from nmtcmapper.eligibility.checker import check_tract, enrich_dataframe
4
+
5
+
6
+ def test_check_known_eligible_tract(sample_table):
7
+ result = check_tract("17031840100", sample_table)
8
+ assert result["nmtc_eligible"] == True
9
+
10
+
11
+ def test_check_known_ineligible_tract(sample_table):
12
+ result = check_tract("17031010100", sample_table)
13
+ assert result["nmtc_eligible"] == False
14
+
15
+
16
+ def test_check_unknown_tract(sample_table):
17
+ result = check_tract("99999999999", sample_table)
18
+ assert result["nmtc_eligible"] == False
19
+ assert result["distress_level"] == "ineligible"
20
+
21
+
22
+ def test_enrich_dataframe(sample_table, sample_df):
23
+ result = enrich_dataframe(sample_df, sample_table, tract_col="tract_id")
24
+ assert "nmtc_eligible" in result.columns
25
+ assert "distress_level" in result.columns
26
+ assert len(result) == len(sample_df)
27
+
28
+
29
+ def test_enrich_eligible_count(sample_table, sample_df):
30
+ result = enrich_dataframe(sample_df, sample_table, tract_col="tract_id")
31
+ assert result["nmtc_eligible"].sum() >= 2
32
+
33
+
34
+ def test_distress_levels_present(sample_table, sample_df):
35
+ result = enrich_dataframe(sample_df, sample_table, tract_col="tract_id")
36
+ assert result["distress_level"].notna().all()
tests/test_loader.py ADDED
@@ -0,0 +1,84 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from nmtcmapper.data.loader import _build_sample_table, _compute_eligibility
4
+
5
+
6
+ def test_sample_table_returns_dataframe():
7
+ df = _build_sample_table()
8
+ assert isinstance(df, pd.DataFrame)
9
+ assert len(df) > 0
10
+
11
+
12
+ def test_sample_table_has_required_columns():
13
+ df = _build_sample_table()
14
+ required = ["nmtc_eligible", "distress_level", "poverty_rate", "ami_ratio"]
15
+ for col in required:
16
+ assert col in df.columns
17
+
18
+
19
+ def test_sample_table_has_eligible_tracts():
20
+ df = _build_sample_table()
21
+ assert df["nmtc_eligible"].any()
22
+
23
+
24
+ def test_sample_table_has_ineligible_tracts():
25
+ df = _build_sample_table()
26
+ assert (~df["nmtc_eligible"]).any()
27
+
28
+
29
+ def test_distress_levels_valid():
30
+ df = _build_sample_table()
31
+ valid = {"deep", "severe", "lic", "ineligible"}
32
+ assert set(df["distress_level"].unique()).issubset(valid)
33
+
34
+
35
+ def test_high_poverty_is_eligible():
36
+ df = pd.DataFrame([{
37
+ "tract_id": "TEST001",
38
+ "poverty_rate": 0.35,
39
+ "ami_ratio": 0.90,
40
+ "unemployment_rate": 0.05,
41
+ "is_non_metro": False,
42
+ "is_high_migration_rural": False,
43
+ }])
44
+ result = _compute_eligibility(df)
45
+ assert result["nmtc_eligible"].iloc[0] == True
46
+
47
+
48
+ def test_low_ami_is_eligible():
49
+ df = pd.DataFrame([{
50
+ "tract_id": "TEST002",
51
+ "poverty_rate": 0.10,
52
+ "ami_ratio": 0.75,
53
+ "unemployment_rate": 0.04,
54
+ "is_non_metro": False,
55
+ "is_high_migration_rural": False,
56
+ }])
57
+ result = _compute_eligibility(df)
58
+ assert result["nmtc_eligible"].iloc[0] == True
59
+
60
+
61
+ def test_affluent_tract_not_eligible():
62
+ df = pd.DataFrame([{
63
+ "tract_id": "TEST003",
64
+ "poverty_rate": 0.05,
65
+ "ami_ratio": 1.20,
66
+ "unemployment_rate": 0.02,
67
+ "is_non_metro": False,
68
+ "is_high_migration_rural": False,
69
+ }])
70
+ result = _compute_eligibility(df)
71
+ assert result["nmtc_eligible"].iloc[0] == False
72
+
73
+
74
+ def test_deep_distress_classified():
75
+ df = pd.DataFrame([{
76
+ "tract_id": "TEST004",
77
+ "poverty_rate": 0.45,
78
+ "ami_ratio": 0.45,
79
+ "unemployment_rate": 0.15,
80
+ "is_non_metro": False,
81
+ "is_high_migration_rural": False,
82
+ }])
83
+ result = _compute_eligibility(df)
84
+ assert result["distress_level"].iloc[0] == "deep"
tests/test_mapper.py ADDED
@@ -0,0 +1,54 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from nmtcmapper.eligibility.checker import EligibilityResult
4
+
5
+
6
+ def test_mapper_loads(mapper):
7
+ assert mapper.tract_count > 0
8
+
9
+
10
+ def test_mapper_eligible_tracts(mapper):
11
+ assert mapper.eligible_tract_count > 0
12
+
13
+
14
+ def test_check_tract_eligible(mapper):
15
+ result = mapper.check_tract("17031840100")
16
+ assert isinstance(result, EligibilityResult)
17
+ assert result.nmtc_eligible == True
18
+ assert result.tract_id == "17031840100"
19
+
20
+
21
+ def test_check_tract_ineligible(mapper):
22
+ result = mapper.check_tract("17031010100")
23
+ assert result.nmtc_eligible == False
24
+
25
+
26
+ def test_check_tract_unknown(mapper):
27
+ result = mapper.check_tract("99999999999")
28
+ assert result.nmtc_eligible == False
29
+ assert result.distress_level == "ineligible"
30
+
31
+
32
+ def test_enrich_with_tract_col(mapper, sample_df):
33
+ result = mapper.enrich(sample_df, tract_col="tract_id")
34
+ assert "nmtc_eligible" in result.columns
35
+ assert "distress_level" in result.columns
36
+ assert len(result) == len(sample_df)
37
+
38
+
39
+ def test_eligible_count_summary(mapper, sample_df):
40
+ enriched = mapper.enrich(sample_df, tract_col="tract_id")
41
+ summary = mapper.eligible_count(enriched)
42
+ assert "total" in summary
43
+ assert "nmtc_eligible" in summary
44
+ assert summary["total"] == len(sample_df)
45
+
46
+
47
+ def test_result_summary_runs(mapper):
48
+ result = mapper.check_tract("17031840100")
49
+ result.summary()
50
+
51
+
52
+ def test_eligible_count_raises_without_enrich(mapper, sample_df):
53
+ with pytest.raises(ValueError, match="Run .enrich()"):
54
+ mapper.eligible_count(sample_df)