PyPI - nmtc-mapper - Versions diffs - 0.1.0__py3-none-any.whl - Mend

nmtc-mapper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

nmtc_mapper-0.1.0.dist-info/METADATA +130 -0
nmtc_mapper-0.1.0.dist-info/RECORD +18 -0
nmtc_mapper-0.1.0.dist-info/WHEEL +5 -0
nmtc_mapper-0.1.0.dist-info/top_level.txt +2 -0
nmtcmapper/__init__.py +10 -0
nmtcmapper/data/__init__.py +0 -0
nmtcmapper/data/loader.py +156 -0
nmtcmapper/data/schema.py +68 -0
nmtcmapper/eligibility/__init__.py +0 -0
nmtcmapper/eligibility/checker.py +128 -0
nmtcmapper/geocoder/__init__.py +0 -0
nmtcmapper/geocoder/census.py +196 -0
nmtcmapper/mapper.py +182 -0
tests/__init__.py +0 -0
tests/conftest.py +37 -0
tests/test_checker.py +36 -0
tests/test_loader.py +84 -0
tests/test_mapper.py +54 -0

nmtc_mapper-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,130 @@
+Metadata-Version: 2.4
+Name: nmtc-mapper
+Version: 0.1.0
+Summary: Automated NMTC eligibility checker — geocode addresses and check Low-Income Community status using CDFI Fund and Census data
+License: MIT
+Project-URL: Homepage, https://github.com/Jaypatel1511/nmtc-mapper
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: pandas>=1.4.0
+Requires-Dist: numpy>=1.21.0
+Requires-Dist: requests>=2.27.0
+Requires-Dist: openpyxl>=3.0.0
+# nmtc-mapper 🗺️
+**Automated NMTC eligibility checker for addresses and census tracts.**
+Pass a DataFrame of addresses and get back a boolean column for NMTC eligibility,
+distress level, poverty rate, AMI ratio, and more — using official CDFI Fund and
+Census Bureau data. No manual lookups required.
+---
+## Why nmtc-mapper?
+The CDFI Fund provides a manual web tool (CIMS) for checking NMTC eligibility
+one address at a time. nmtc-mapper automates this — pass 10,000 addresses and
+get results in seconds, using the same official data source.
+---
+## Installation
+    pip install nmtc-mapper
+---
+## Quickstart
+    from nmtcmapper import NMTCMapper
+    mapper = NMTCMapper()
+    # Single address (geocodes automatically)
+    result = mapper.check_address("1234 S Michigan Ave, Chicago, IL 60605")
+    result.summary()
+    print(result.nmtc_eligible)    # True
+    print(result.distress_level)   # "severe"
+    print(result.poverty_rate)     # 0.38
+    # Known census tract (no geocoding needed)
+    result = mapper.check_tract("17031840100")
+    print(result.nmtc_eligible)    # True
+    # Batch — enrich a DataFrame of addresses
+    import pandas as pd
+    df = pd.read_csv("projects.csv")   # must have 'address' column
+    df = mapper.enrich(df, address_col="address")
+    print(df["nmtc_eligible"].value_counts())
+    print(df["distress_level"].value_counts())
+    # If you already have census tract IDs
+    df = mapper.enrich(df, tract_col="tract_id")
+    # Summary stats
+    mapper.eligible_count(df)
+---
+## Eligibility Rules (2016-2020 ACS — mandatory since Sept 1, 2024)
+A census tract qualifies as a Low-Income Community (LIC) if it meets ANY of:
+- Poverty rate >= 20%
+- Median Family Income <= 80% of metro/state AMI
+- Median Family Income <= 85% of state AMI (high migration rural counties)
+Distress levels:
+- deep     — Poverty >= 40% OR AMI <= 50% OR unemployment >= 2x national rate
+- severe   — Poverty >= 30% OR AMI <= 60% OR unemployment >= 1.5x national rate
+- lic      — NMTC eligible (meets LIC criteria)
+- ineligible — Does not qualify
+---
+## Data Sources
+- CDFI Fund 2016-2020 ACS Low-Income Community Eligibility File
+  https://www.cdfifund.gov/research-data
+- US Census Bureau Geocoding API (free, no API key required)
+  https://geocoding.geo.census.gov
+---
+## Output Columns
+After running .enrich(), your DataFrame will have:
+- nmtc_eligible (bool)
+- distress_level (str: deep / severe / lic / ineligible)
+- poverty_rate (float)
+- ami_ratio (float)
+- unemployment_rate (float)
+- is_non_metro (bool)
+- severe_distress (bool)
+- deep_distress (bool)
+---
+## Running Tests
+    PYTHONPATH=. pytest tests/ -v
+24 tests across all modules.
+---
+## Who This Is For
+- CDEs screening project locations for NMTC eligibility
+- CDFI analysts qualifying borrower locations at scale
+- Researchers analyzing geographic distribution of LIC tracts
+- Anyone replacing manual CIMS lookups with automated Python
+---
+## License
+MIT 2026 Jaypatel1511

nmtc_mapper-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+nmtcmapper/__init__.py,sha256=ocg4kkfGopciLzufm0rTB4vHMW_H-zXnb9keMMczzko,340
+nmtcmapper/mapper.py,sha256=Sdgt0bWaBGh8rZaIxYRXSFAMfxUYBNbiJF69DdqVc3M,6068
+nmtcmapper/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nmtcmapper/data/loader.py,sha256=Qr5VwN92g7Wokb95gkZjFcGCiIY32pz-jJWaFBWCy4g,5689
+nmtcmapper/data/schema.py,sha256=wGu-ZAIpGSla4Zu7PVoegV70JYCy20zK9GBbaKjpSMM,3546
+nmtcmapper/eligibility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nmtcmapper/eligibility/checker.py,sha256=3C2UkqqTV0fjj1M5LkOKFA92vqmOOTe-yf4ZXOAdyNM,4374
+nmtcmapper/geocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nmtcmapper/geocoder/census.py,sha256=KFJA3xEZX_zqYrZmAg3UksBPTrNJRYtfdRAyvhg08aE,5865
+tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tests/conftest.py,sha256=bei5KSwvDktDp6icRhOuH6WMViqKS_FtLeIr-GYzcoU,965
+tests/test_checker.py,sha256=kFuhb9xB1Ecdt5MIWRSLB1ReAX3rZPOcD4u2TkL-q6E,1239
+tests/test_loader.py,sha256=yXzMJAuvWfkDl9256KtnO8DukHnYjyatiJIEL9aoiNw,2313
+tests/test_mapper.py,sha256=47ekz6hBxO4RzygTizcI6ILkRqzcM4S6PezsDbQ8_Qg,1574
+nmtc_mapper-0.1.0.dist-info/METADATA,sha256=81W7T8PIrE99i2my6wp5qtCrLxY6IArrLcFJ-ysRQ2s,3463
+nmtc_mapper-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+nmtc_mapper-0.1.0.dist-info/top_level.txt,sha256=sk7Bw2sFRwBefFlqTEWJ3PZtemcXsSOwuUHztWwEL5k,17
+nmtc_mapper-0.1.0.dist-info/RECORD,,

nmtc_mapper-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

nmtc_mapper-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ nmtcmapper
2	+ tests

nmtcmapper/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from nmtcmapper.mapper import NMTCMapper
+from nmtcmapper.eligibility.checker import EligibilityResult
+from nmtcmapper.data.loader import load_eligibility_table
+from nmtcmapper.geocoder.census import geocode_address
+__version__ = "0.1.0"
+__all__ = [
+    "NMTCMapper", "EligibilityResult",
+    "load_eligibility_table", "geocode_address",
+]

nmtcmapper/data/__init__.py ADDED Viewed

File without changes

nmtcmapper/data/loader.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""
+Download and cache the CDFI Fund NMTC eligibility file.
+Builds a lookup table of all eligible census tracts.
+"""
+import os
+import requests
+import pandas as pd
+from pathlib import Path
+from nmtcmapper.data.schema import (
+    CACHE_DIR, CDFI_FUND_LIC_URL_2020,
+    ELIGIBILITY_FILE_COLUMNS,
+    LIC_POVERTY_RATE_THRESHOLD,
+    LIC_AMI_RATIO_METRO_THRESHOLD,
+    LIC_AMI_RATIO_RURAL_THRESHOLD,
+    SEVERE_POVERTY_THRESHOLD, SEVERE_AMI_THRESHOLD,
+    SEVERE_UNEMPLOYMENT_MULTIPLIER, NATIONAL_UNEMPLOYMENT_RATE,
+    DEEP_POVERTY_THRESHOLD, DEEP_AMI_THRESHOLD,
+    DEEP_UNEMPLOYMENT_MULTIPLIER,
+)
+def get_cache_dir() -> Path:
+    path = Path(CACHE_DIR)
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def _cache_path(filename: str) -> Path:
+    return get_cache_dir() / filename
+def download_eligibility_file(force: bool = False) -> Path:
+    filename = "NMTC_LIC_Eligibility_2016_2020.xlsx"
+    path = _cache_path(filename)
+    if path.exists() and not force:
+        print(f"Using cached eligibility file: {path}")
+        return path
+    print("Downloading NMTC eligibility file from CDFI Fund...")
+    try:
+        response = requests.get(CDFI_FUND_LIC_URL_2020, stream=True, timeout=120)
+        response.raise_for_status()
+        with open(path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Saved to {path}")
+        return path
+    except Exception as e:
+        print(f"Download failed: {e}")
+        return None
+def load_eligibility_table(force: bool = False) -> pd.DataFrame:
+    path = download_eligibility_file(force=force)
+    if path is None or not path.exists():
+        print("Using built-in sample eligibility data.")
+        return _build_sample_table()
+    print(f"Loading eligibility table from {path}...")
+    try:
+        df = pd.read_excel(path, dtype=str)
+        return _process_eligibility_table(df)
+    except Exception as e:
+        print(f"Error loading file: {e}. Using sample data.")
+        return _build_sample_table()
+def _process_eligibility_table(df: pd.DataFrame) -> pd.DataFrame:
+    df.columns = df.columns.str.strip().str.upper()
+    col_map = {k: v for k, v in ELIGIBILITY_FILE_COLUMNS.items() if k in df.columns}
+    df = df.rename(columns=col_map)
+    if "tract_id" not in df.columns:
+        if all(c in df.columns for c in ["state", "county", "tract"]):
+            df["tract_id"] = (
+                df["state"].str.zfill(2) +
+                df["county"].str.zfill(3) +
+                df["tract"].str.zfill(6)
+            )
+    for col in ["poverty_rate", "ami_ratio", "unemployment_rate"]:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    for col in ["is_non_metro", "is_high_migration_rural"]:
+        if col in df.columns:
+            df[col] = df[col].isin({"Y", "YES", "1", "True", "TRUE", "X"})
+    df = _compute_eligibility(df)
+    if "tract_id" in df.columns:
+        df = df.set_index("tract_id")
+    print(f"Eligibility table loaded: {len(df):,} census tracts")
+    return df
+def _compute_eligibility(df: pd.DataFrame) -> pd.DataFrame:
+    pr = df.get("poverty_rate", pd.Series(dtype=float))
+    ami = df.get("ami_ratio", pd.Series(dtype=float))
+    unemp = df.get("unemployment_rate", pd.Series(dtype=float))
+    non_metro = df.get("is_non_metro", pd.Series(False, index=df.index))
+    poverty_lic = pr >= LIC_POVERTY_RATE_THRESHOLD
+    ami_lic = (
+        (non_metro & (ami <= LIC_AMI_RATIO_RURAL_THRESHOLD)) |
+        (~non_metro & (ami <= LIC_AMI_RATIO_METRO_THRESHOLD))
+    )
+    df["nmtc_eligible"] = poverty_lic | ami_lic
+    sev_poverty = pr >= SEVERE_POVERTY_THRESHOLD
+    sev_ami = ami <= SEVERE_AMI_THRESHOLD
+    sev_unemp = unemp >= (NATIONAL_UNEMPLOYMENT_RATE * SEVERE_UNEMPLOYMENT_MULTIPLIER)
+    df["severe_distress"] = sev_poverty | sev_ami | sev_unemp
+    deep_poverty = pr >= DEEP_POVERTY_THRESHOLD
+    deep_ami = ami <= DEEP_AMI_THRESHOLD
+    deep_unemp = unemp >= (NATIONAL_UNEMPLOYMENT_RATE * DEEP_UNEMPLOYMENT_MULTIPLIER)
+    df["deep_distress"] = deep_poverty | deep_ami | deep_unemp
+    def distress_label(row):
+        if row.get("deep_distress"):
+            return "deep"
+        elif row.get("severe_distress"):
+            return "severe"
+        elif row.get("nmtc_eligible"):
+            return "lic"
+        return "ineligible"
+    df["distress_level"] = df.apply(distress_label, axis=1)
+    return df
+def _build_sample_table() -> pd.DataFrame:
+    sample_tracts = [
+        ("17031840100", 0.38, 0.55, 0.12, False, False),
+        ("17031839100", 0.42, 0.48, 0.15, False, False),
+        ("17031010100", 0.18, 0.92, 0.04, False, False),
+        ("36061015900", 0.35, 0.60, 0.11, False, False),
+        ("36061019100", 0.28, 0.72, 0.09, False, False),
+        ("36047052200", 0.14, 0.88, 0.05, False, False),
+        ("26163518300", 0.45, 0.45, 0.18, False, False),
+        ("26163520100", 0.32, 0.62, 0.13, False, False),
+        ("13121010400", 0.29, 0.68, 0.10, False, False),
+        ("48113010900", 0.22, 0.78, 0.07, False, False),
+        ("17019000100", 0.15, 0.95, 0.03, True,  True),
+        ("26001010100", 0.18, 0.88, 0.06, True,  False),
+    ]
+    rows = []
+    for tid, pr, ami, unemp, non_metro, high_migration in sample_tracts:
+        rows.append({
+            "tract_id": tid,
+            "state": tid[:2],
+            "poverty_rate": pr,
+            "ami_ratio": ami,
+            "unemployment_rate": unemp,
+            "is_non_metro": non_metro,
+            "is_high_migration_rural": high_migration,
+        })
+    df = pd.DataFrame(rows)
+    df = _compute_eligibility(df)
+    df = df.set_index("tract_id")
+    return df

nmtcmapper/data/schema.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""
+Column mappings, eligibility thresholds, and constants for NMTC eligibility.
+Based on 2016-2020 ACS data — mandatory for QLICIs closed on or after Sept 1, 2024.
+Source: https://www.cdfifund.gov/research-data
+"""
+# ── Eligibility Thresholds ────────────────────────────────────────────────────
+# Low-Income Community (LIC) criteria — Section 45D
+LIC_POVERTY_RATE_THRESHOLD     = 0.20   # >= 20% poverty rate
+LIC_AMI_RATIO_METRO_THRESHOLD  = 0.80   # <= 80% of metro/state AMI
+LIC_AMI_RATIO_RURAL_THRESHOLD  = 0.85   # <= 85% of state AMI (high migration rural)
+# Severe Distress thresholds
+SEVERE_POVERTY_THRESHOLD       = 0.30   # >= 30% poverty rate
+SEVERE_AMI_THRESHOLD           = 0.60   # <= 60% of AMI
+SEVERE_UNEMPLOYMENT_MULTIPLIER = 1.5    # >= 1.5x national unemployment rate
+# Deep Distress thresholds
+DEEP_POVERTY_THRESHOLD         = 0.40   # >= 40% poverty rate
+DEEP_AMI_THRESHOLD             = 0.50   # <= 50% of AMI
+DEEP_UNEMPLOYMENT_MULTIPLIER   = 2.0    # >= 2x national unemployment rate
+# National unemployment rate benchmark (2016-2020 ACS)
+NATIONAL_UNEMPLOYMENT_RATE     = 0.057  # 5.7%
+# ── CDFI Fund Eligibility File Column Mappings ────────────────────────────────
+# Source: 2016-2020 ACS Low-Income Community Eligibility file from cdfifund.gov
+ELIGIBILITY_FILE_COLUMNS = {
+    "GEOID":                    "tract_id",
+    "STATE":                    "state",
+    "COUNTY":                   "county",
+    "TRACT":                    "tract",
+    "POVERTY_RATE":             "poverty_rate",
+    "MFI_RATIO":                "ami_ratio",
+    "UNEMPLOYMENT_RATE":        "unemployment_rate",
+    "NON_METRO":                "is_non_metro",
+    "HIGH_MIGRATION_RURAL":     "is_high_migration_rural",
+    "LIC_ELIGIBLE":             "lic_eligible_raw",
+    "SEVERE_DISTRESS":          "severe_distress_raw",
+}
+# ── Download URLs ─────────────────────────────────────────────────────────────
+CDFI_FUND_LIC_URL_2020 = (
+    "https://www.cdfifund.gov/sites/cdfi/files/2024-08/"
+    "NMTC_LIC_Eligibility_2016_2020_ACS.xlsx"
+)
+# ── Cache ─────────────────────────────────────────────────────────────────────
+import os
+CACHE_DIR = os.path.join(os.path.expanduser("~"), ".nmtcmapper", "cache")
+# ── Census Geocoder API ───────────────────────────────────────────────────────
+CENSUS_GEOCODER_URL = (
+    "https://geocoding.geo.census.gov/geocoder/geographies/address"
+)
+CENSUS_GEOCODER_BATCH_URL = (
+    "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
+)
+# ── Distress Levels ───────────────────────────────────────────────────────────
+DISTRESS_LEVELS = {
+    "deep":     "Deep Distress — highest need, strongest NMTC application score",
+    "severe":   "Severe Distress — qualifies for 85% investment commitment",
+    "lic":      "Low-Income Community — NMTC eligible",
+    "ineligible": "Not NMTC eligible",
+}

nmtcmapper/eligibility/__init__.py ADDED Viewed

File without changes

nmtcmapper/eligibility/checker.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""
+NMTC eligibility checker — applies eligibility rules to census tract data.
+"""
+from dataclasses import dataclass
+from typing import Optional
+import pandas as pd
+from nmtcmapper.data.schema import DISTRESS_LEVELS
+@dataclass
+class EligibilityResult:
+    """Result of a single address NMTC eligibility check."""
+    address: str
+    tract_id: Optional[str]
+    nmtc_eligible: bool
+    distress_level: str
+    poverty_rate: Optional[float]
+    ami_ratio: Optional[float]
+    unemployment_rate: Optional[float]
+    is_non_metro: bool
+    is_high_migration_rural: bool
+    severe_distress: bool
+    deep_distress: bool
+    geocode_success: bool
+    @property
+    def distress_description(self) -> str:
+        return DISTRESS_LEVELS.get(self.distress_level, "Unknown")
+    def summary(self) -> None:
+        print(f"\nNMTC Eligibility Result")
+        print(f"{'='*50}")
+        print(f"  Address:          {self.address}")
+        print(f"  Census Tract:     {self.tract_id or 'Not found'}")
+        print(f"  NMTC Eligible:    {'✅ YES' if self.nmtc_eligible else '❌ NO'}")
+        print(f"  Distress Level:   {self.distress_level.upper()}")
+        print(f"  Description:      {self.distress_description}")
+        if self.poverty_rate is not None:
+            print(f"\n  Poverty Rate:     {self.poverty_rate*100:.1f}%")
+        if self.ami_ratio is not None:
+            print(f"  AMI Ratio:        {self.ami_ratio*100:.1f}%")
+        if self.unemployment_rate is not None:
+            print(f"  Unemployment:     {self.unemployment_rate*100:.1f}%")
+        print(f"  Non-Metro:        {'Yes' if self.is_non_metro else 'No'}")
+        print(f"  High Migration:   {'Yes' if self.is_high_migration_rural else 'No'}")
+        print()
+def check_tract(
+    tract_id: str,
+    eligibility_table: pd.DataFrame,
+) -> dict:
+    """
+    Check NMTC eligibility for a known census tract ID.
+    Args:
+        tract_id:          11-digit census tract GEOID
+        eligibility_table: DataFrame indexed by tract_id
+    Returns:
+        Dict with eligibility fields
+    """
+    if tract_id not in eligibility_table.index:
+        return {
+            "nmtc_eligible": False,
+            "distress_level": "ineligible",
+            "poverty_rate": None,
+            "ami_ratio": None,
+            "unemployment_rate": None,
+            "is_non_metro": False,
+            "is_high_migration_rural": False,
+            "severe_distress": False,
+            "deep_distress": False,
+        }
+    row = eligibility_table.loc[tract_id]
+    return {
+        "nmtc_eligible":         bool(row.get("nmtc_eligible", False)),
+        "distress_level":        str(row.get("distress_level", "ineligible")),
+        "poverty_rate":          row.get("poverty_rate"),
+        "ami_ratio":             row.get("ami_ratio"),
+        "unemployment_rate":     row.get("unemployment_rate"),
+        "is_non_metro":          bool(row.get("is_non_metro", False)),
+        "is_high_migration_rural": bool(row.get("is_high_migration_rural", False)),
+        "severe_distress":       bool(row.get("severe_distress", False)),
+        "deep_distress":         bool(row.get("deep_distress", False)),
+    }
+def enrich_dataframe(
+    df: pd.DataFrame,
+    eligibility_table: pd.DataFrame,
+    tract_col: str = "tract_id",
+) -> pd.DataFrame:
+    """
+    Add NMTC eligibility columns to a DataFrame that already has tract IDs.
+    Args:
+        df:                DataFrame with tract_id column
+        eligibility_table: Full eligibility lookup table
+        tract_col:         Name of the tract ID column
+    Returns:
+        DataFrame with added eligibility columns
+    """
+    df = df.copy()
+    eligibility_cols = [
+        "nmtc_eligible", "distress_level", "poverty_rate",
+        "ami_ratio", "unemployment_rate", "is_non_metro",
+        "is_high_migration_rural", "severe_distress", "deep_distress",
+    ]
+    for col in eligibility_cols:
+        df[col] = None
+    for idx, row in df.iterrows():
+        tract_id = row.get(tract_col)
+        if pd.notna(tract_id) and tract_id in eligibility_table.index:
+            result = check_tract(str(tract_id), eligibility_table)
+            for col, val in result.items():
+                df.at[idx, col] = val
+        else:
+            df.at[idx, "nmtc_eligible"] = False
+            df.at[idx, "distress_level"] = "ineligible"
+    return df

nmtcmapper/geocoder/__init__.py ADDED Viewed

File without changes

nmtcmapper/geocoder/census.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+Census Geocoding API wrapper.
+Converts addresses to census tract GEOIDs using the free Census Bureau API.
+"""
+import requests
+import pandas as pd
+import io
+import time
+from typing import Optional
+from nmtcmapper.data.schema import (
+    CENSUS_GEOCODER_URL, CENSUS_GEOCODER_BATCH_URL
+)
+def geocode_address(address: str, retry: int = 2) -> Optional[str]:
+    """
+    Geocode a single address to an 11-digit census tract GEOID.
+    Uses the free Census Bureau Geocoding API — no API key required.
+    Args:
+        address: Full address string e.g. "1234 S Michigan Ave, Chicago, IL 60605"
+        retry:   Number of retries on failure
+    Returns:
+        11-digit census tract GEOID (state+county+tract) or None if not found
+    """
+    params = {
+        "street":       _parse_street(address),
+        "city":         _parse_city(address),
+        "state":        _parse_state(address),
+        "zip":          _parse_zip(address),
+        "benchmark":    "Public_AR_Current",
+        "vintage":      "Current_Current",
+        "layers":       "Census Tracts",
+        "format":       "json",
+    }
+    for attempt in range(retry + 1):
+        try:
+            response = requests.get(
+                CENSUS_GEOCODER_URL, params=params, timeout=15
+            )
+            response.raise_for_status()
+            data = response.json()
+            matches = data.get("result", {}).get("addressMatches", [])
+            if not matches:
+                return None
+            geo = matches[0].get("geographies", {})
+            tracts = geo.get("Census Tracts", [])
+            if not tracts:
+                return None
+            state  = tracts[0].get("STATE", "")
+            county = tracts[0].get("COUNTY", "")
+            tract  = tracts[0].get("TRACT", "")
+            if state and county and tract:
+                return f"{state}{county}{tract}"
+            return None
+        except Exception as e:
+            if attempt < retry:
+                time.sleep(1)
+            else:
+                return None
+def geocode_batch(
+    df: pd.DataFrame,
+    address_col: str = "address",
+    batch_size: int = 100,
+    sleep_between: float = 1.0,
+) -> pd.DataFrame:
+    """
+    Geocode a batch of addresses using the Census batch geocoder.
+    Args:
+        df:             DataFrame with address column
+        address_col:    Name of the address column
+        batch_size:     Addresses per batch (max 10,000 per Census API)
+        sleep_between:  Seconds to sleep between batches
+    Returns:
+        DataFrame with added 'tract_id' column
+    """
+    df = df.copy()
+    df["tract_id"] = None
+    total = len(df)
+    print(f"Geocoding {total:,} addresses in batches of {batch_size}...")
+    for start in range(0, total, batch_size):
+        end = min(start + batch_size, total)
+        batch = df.iloc[start:end]
+        print(f"  Batch {start//batch_size + 1}: rows {start}–{end}")
+        try:
+            tract_ids = _batch_geocode_census(batch, address_col)
+            df.loc[batch.index, "tract_id"] = tract_ids
+        except Exception as e:
+            print(f"  Batch failed: {e} — falling back to single geocoding")
+            for idx, row in batch.iterrows():
+                df.at[idx, "tract_id"] = geocode_address(row[address_col])
+        if end < total:
+            time.sleep(sleep_between)
+    matched = df["tract_id"].notna().sum()
+    print(f"Geocoded {matched:,}/{total:,} addresses successfully")
+    return df
+def _batch_geocode_census(
+    df: pd.DataFrame, address_col: str
+) -> list:
+    """
+    Use Census batch geocoding API for a chunk of addresses.
+    Returns list of tract IDs in same order as input.
+    """
+    # Build CSV for batch API
+    rows = []
+    for i, (idx, row) in enumerate(df.iterrows()):
+        addr = str(row[address_col])
+        street = _parse_street(addr)
+        city   = _parse_city(addr)
+        state  = _parse_state(addr)
+        zip_   = _parse_zip(addr)
+        rows.append(f'{i},"{street}","{city}","{state}","{zip_}"')
+    csv_content = "\n".join(rows)
+    response = requests.post(
+        CENSUS_GEOCODER_BATCH_URL,
+        files={"addressFile": ("addresses.csv", csv_content, "text/csv")},
+        data={
+            "benchmark": "Public_AR_Current",
+            "vintage": "Current_Current",
+            "layers": "Census Tracts",
+        },
+        timeout=60,
+    )
+    response.raise_for_status()
+    result_df = pd.read_csv(
+        io.StringIO(response.text),
+        header=None,
+        names=["id", "input_address", "match", "match_type",
+               "matched_address", "coords", "tiger_line_id",
+               "side", "state", "county", "tract", "block"],
+        dtype=str,
+    )
+    tract_ids = []
+    for _, row in result_df.iterrows():
+        if (row.get("match") == "Match" and
+                pd.notna(row.get("state")) and
+                pd.notna(row.get("county")) and
+                pd.notna(row.get("tract"))):
+            tract_ids.append(
+                f"{row['state']}{row['county']}{row['tract']}"
+            )
+        else:
+            tract_ids.append(None)
+    return tract_ids
+def _parse_street(address: str) -> str:
+    parts = [p.strip() for p in address.split(",")]
+    return parts[0] if parts else address
+def _parse_city(address: str) -> str:
+    parts = [p.strip() for p in address.split(",")]
+    return parts[1] if len(parts) > 1 else ""
+def _parse_state(address: str) -> str:
+    parts = [p.strip() for p in address.split(",")]
+    if len(parts) > 2:
+        state_zip = parts[2].strip().split()
+        return state_zip[0] if state_zip else ""
+    return ""
+def _parse_zip(address: str) -> str:
+    parts = [p.strip() for p in address.split(",")]
+    if len(parts) > 2:
+        state_zip = parts[2].strip().split()
+        return state_zip[1] if len(state_zip) > 1 else ""
+    return ""

nmtcmapper/mapper.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""
+NMTCMapper — main public API for NMTC eligibility checking.
+"""
+import pandas as pd
+from typing import Optional
+from nmtcmapper.data.loader import load_eligibility_table
+from nmtcmapper.geocoder.census import geocode_address, geocode_batch
+from nmtcmapper.eligibility.checker import (
+    check_tract, enrich_dataframe, EligibilityResult
+)
+class NMTCMapper:
+    """
+    Check NMTC eligibility for addresses or census tracts.
+    Usage:
+        mapper = NMTCMapper()
+        # Single address
+        result = mapper.check_address("1234 S Michigan Ave, Chicago, IL 60605")
+        result.summary()
+        # Known census tract
+        result = mapper.check_tract("17031840100")
+        # Batch — DataFrame of addresses
+        df = pd.read_csv("projects.csv")
+        df = mapper.enrich(df, address_col="address")
+    """
+    def __init__(self, force_reload: bool = False):
+        """
+        Initialize NMTCMapper and load the eligibility table.
+        Args:
+            force_reload: Re-download the eligibility file even if cached
+        """
+        print("Loading NMTC eligibility table...")
+        self._table = load_eligibility_table(force=force_reload)
+        print(f"Ready. {len(self._table):,} census tracts loaded.")
+    def check_address(self, address: str) -> EligibilityResult:
+        """
+        Check NMTC eligibility for a single address.
+        Geocodes the address to a census tract using the free
+        Census Bureau API, then looks up eligibility.
+        Args:
+            address: Full address string e.g.
+                     "1234 S Michigan Ave, Chicago, IL 60605"
+        Returns:
+            EligibilityResult with eligibility flags and tract data
+        """
+        tract_id = geocode_address(address)
+        geocode_success = tract_id is not None
+        if tract_id:
+            data = check_tract(tract_id, self._table)
+        else:
+            data = {
+                "nmtc_eligible": False,
+                "distress_level": "ineligible",
+                "poverty_rate": None,
+                "ami_ratio": None,
+                "unemployment_rate": None,
+                "is_non_metro": False,
+                "is_high_migration_rural": False,
+                "severe_distress": False,
+                "deep_distress": False,
+            }
+        return EligibilityResult(
+            address=address,
+            tract_id=tract_id,
+            geocode_success=geocode_success,
+            **data,
+        )
+    def check_tract(self, tract_id: str) -> EligibilityResult:
+        """
+        Check NMTC eligibility for a known 11-digit census tract GEOID.
+        Args:
+            tract_id: 11-digit GEOID e.g. "17031840100"
+        Returns:
+            EligibilityResult with eligibility flags
+        """
+        data = check_tract(tract_id, self._table)
+        return EligibilityResult(
+            address=f"Census Tract {tract_id}",
+            tract_id=tract_id,
+            geocode_success=True,
+            **data,
+        )
+    def enrich(
+        self,
+        df: pd.DataFrame,
+        address_col: str = "address",
+        tract_col: str = None,
+        batch_size: int = 100,
+    ) -> pd.DataFrame:
+        """
+        Add NMTC eligibility columns to a DataFrame.
+        If tract_col is provided, uses existing tract IDs (no geocoding).
+        If address_col is provided, geocodes addresses first.
+        Args:
+            df:          DataFrame with address or tract ID column
+            address_col: Column with full address strings
+            tract_col:   Column with 11-digit tract GEOIDs (skips geocoding)
+            batch_size:  Addresses per geocoding batch
+        Returns:
+            DataFrame with added columns:
+            - nmtc_eligible (bool)
+            - distress_level (str: 'deep', 'severe', 'lic', 'ineligible')
+            - poverty_rate (float)
+            - ami_ratio (float)
+            - unemployment_rate (float)
+            - is_non_metro (bool)
+            - severe_distress (bool)
+            - deep_distress (bool)
+        """
+        df = df.copy()
+        if tract_col and tract_col in df.columns:
+            print(f"Using existing tract IDs from column '{tract_col}'")
+            return enrich_dataframe(df, self._table, tract_col=tract_col)
+        print(f"Geocoding addresses from column '{address_col}'...")
+        df = geocode_batch(df, address_col=address_col, batch_size=batch_size)
+        return enrich_dataframe(df, self._table, tract_col="tract_id")
+    def eligible_count(self, df: pd.DataFrame) -> dict:
+        """
+        Summarize NMTC eligibility across a DataFrame.
+        Requires df to have 'nmtc_eligible' and 'distress_level' columns.
+        """
+        if "nmtc_eligible" not in df.columns:
+            raise ValueError("Run .enrich() first to add eligibility columns.")
+        total = len(df)
+        eligible = df["nmtc_eligible"].sum()
+        deep = (df["distress_level"] == "deep").sum()
+        severe = (df["distress_level"] == "severe").sum()
+        lic = (df["distress_level"] == "lic").sum()
+        result = {
+            "total": total,
+            "nmtc_eligible": int(eligible),
+            "pct_eligible": round(eligible / total * 100, 1) if total else 0,
+            "deep_distress": int(deep),
+            "severe_distress": int(severe),
+            "lic_only": int(lic),
+            "ineligible": int(total - eligible),
+        }
+        print(f"\nNMTC Eligibility Summary")
+        print(f"{'='*40}")
+        print(f"  Total addresses:    {total:,}")
+        print(f"  NMTC Eligible:      {eligible:,} ({result['pct_eligible']}%)")
+        print(f"  ── Deep Distress:   {deep:,}")
+        print(f"  ── Severe Distress: {severe:,}")
+        print(f"  ── LIC Only:        {lic:,}")
+        print(f"  Not Eligible:       {total - eligible:,}")
+        print()
+        return result
+    @property
+    def tract_count(self) -> int:
+        return len(self._table)
+    @property
+    def eligible_tract_count(self) -> int:
+        return int(self._table["nmtc_eligible"].sum())

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

@@ -0,0 +1,37 @@
+import pytest
+import pandas as pd
+from nmtcmapper.data.loader import _build_sample_table
+from nmtcmapper.mapper import NMTCMapper
+@pytest.fixture
+def sample_table():
+    return _build_sample_table()
+@pytest.fixture
+def mapper(monkeypatch):
+    """NMTCMapper with sample data — no real download."""
+    monkeypatch.setattr(
+        "nmtcmapper.data.loader.download_eligibility_file",
+        lambda force=False: None
+    )
+    return NMTCMapper()
+@pytest.fixture
+def sample_df():
+    return pd.DataFrame({
+        "project_name": [
+            "Southside Health Center",
+            "North Shore Office",
+            "Detroit Manufacturing",
+            "NYC Bronx Project",
+        ],
+        "tract_id": [
+            "17031840100",  # Chicago South Side — eligible
+            "17031010100",  # Chicago North Shore — not eligible
+            "26163518300",  # Detroit — eligible
+            "36061015900",  # NYC Bronx — eligible
+        ]
+    })

tests/test_checker.py ADDED Viewed

@@ -0,0 +1,36 @@
+import pytest
+import pandas as pd
+from nmtcmapper.eligibility.checker import check_tract, enrich_dataframe
+def test_check_known_eligible_tract(sample_table):
+    result = check_tract("17031840100", sample_table)
+    assert result["nmtc_eligible"] == True
+def test_check_known_ineligible_tract(sample_table):
+    result = check_tract("17031010100", sample_table)
+    assert result["nmtc_eligible"] == False
+def test_check_unknown_tract(sample_table):
+    result = check_tract("99999999999", sample_table)
+    assert result["nmtc_eligible"] == False
+    assert result["distress_level"] == "ineligible"
+def test_enrich_dataframe(sample_table, sample_df):
+    result = enrich_dataframe(sample_df, sample_table, tract_col="tract_id")
+    assert "nmtc_eligible" in result.columns
+    assert "distress_level" in result.columns
+    assert len(result) == len(sample_df)
+def test_enrich_eligible_count(sample_table, sample_df):
+    result = enrich_dataframe(sample_df, sample_table, tract_col="tract_id")
+    assert result["nmtc_eligible"].sum() >= 2
+def test_distress_levels_present(sample_table, sample_df):
+    result = enrich_dataframe(sample_df, sample_table, tract_col="tract_id")
+    assert result["distress_level"].notna().all()

tests/test_loader.py ADDED Viewed

@@ -0,0 +1,84 @@
+import pytest
+import pandas as pd
+from nmtcmapper.data.loader import _build_sample_table, _compute_eligibility
+def test_sample_table_returns_dataframe():
+    df = _build_sample_table()
+    assert isinstance(df, pd.DataFrame)
+    assert len(df) > 0
+def test_sample_table_has_required_columns():
+    df = _build_sample_table()
+    required = ["nmtc_eligible", "distress_level", "poverty_rate", "ami_ratio"]
+    for col in required:
+        assert col in df.columns
+def test_sample_table_has_eligible_tracts():
+    df = _build_sample_table()
+    assert df["nmtc_eligible"].any()
+def test_sample_table_has_ineligible_tracts():
+    df = _build_sample_table()
+    assert (~df["nmtc_eligible"]).any()
+def test_distress_levels_valid():
+    df = _build_sample_table()
+    valid = {"deep", "severe", "lic", "ineligible"}
+    assert set(df["distress_level"].unique()).issubset(valid)
+def test_high_poverty_is_eligible():
+    df = pd.DataFrame([{
+        "tract_id": "TEST001",
+        "poverty_rate": 0.35,
+        "ami_ratio": 0.90,
+        "unemployment_rate": 0.05,
+        "is_non_metro": False,
+        "is_high_migration_rural": False,
+    }])
+    result = _compute_eligibility(df)
+    assert result["nmtc_eligible"].iloc[0] == True
+def test_low_ami_is_eligible():
+    df = pd.DataFrame([{
+        "tract_id": "TEST002",
+        "poverty_rate": 0.10,
+        "ami_ratio": 0.75,
+        "unemployment_rate": 0.04,
+        "is_non_metro": False,
+        "is_high_migration_rural": False,
+    }])
+    result = _compute_eligibility(df)
+    assert result["nmtc_eligible"].iloc[0] == True
+def test_affluent_tract_not_eligible():
+    df = pd.DataFrame([{
+        "tract_id": "TEST003",
+        "poverty_rate": 0.05,
+        "ami_ratio": 1.20,
+        "unemployment_rate": 0.02,
+        "is_non_metro": False,
+        "is_high_migration_rural": False,
+    }])
+    result = _compute_eligibility(df)
+    assert result["nmtc_eligible"].iloc[0] == False
+def test_deep_distress_classified():
+    df = pd.DataFrame([{
+        "tract_id": "TEST004",
+        "poverty_rate": 0.45,
+        "ami_ratio": 0.45,
+        "unemployment_rate": 0.15,
+        "is_non_metro": False,
+        "is_high_migration_rural": False,
+    }])
+    result = _compute_eligibility(df)
+    assert result["distress_level"].iloc[0] == "deep"

tests/test_mapper.py ADDED Viewed

@@ -0,0 +1,54 @@
+import pytest
+import pandas as pd
+from nmtcmapper.eligibility.checker import EligibilityResult
+def test_mapper_loads(mapper):
+    assert mapper.tract_count > 0
+def test_mapper_eligible_tracts(mapper):
+    assert mapper.eligible_tract_count > 0
+def test_check_tract_eligible(mapper):
+    result = mapper.check_tract("17031840100")
+    assert isinstance(result, EligibilityResult)
+    assert result.nmtc_eligible == True
+    assert result.tract_id == "17031840100"
+def test_check_tract_ineligible(mapper):
+    result = mapper.check_tract("17031010100")
+    assert result.nmtc_eligible == False
+def test_check_tract_unknown(mapper):
+    result = mapper.check_tract("99999999999")
+    assert result.nmtc_eligible == False
+    assert result.distress_level == "ineligible"
+def test_enrich_with_tract_col(mapper, sample_df):
+    result = mapper.enrich(sample_df, tract_col="tract_id")
+    assert "nmtc_eligible" in result.columns
+    assert "distress_level" in result.columns
+    assert len(result) == len(sample_df)
+def test_eligible_count_summary(mapper, sample_df):
+    enriched = mapper.enrich(sample_df, tract_col="tract_id")
+    summary = mapper.eligible_count(enriched)
+    assert "total" in summary
+    assert "nmtc_eligible" in summary
+    assert summary["total"] == len(sample_df)
+def test_result_summary_runs(mapper):
+    result = mapper.check_tract("17031840100")
+    result.summary()
+def test_eligible_count_raises_without_enrich(mapper, sample_df):
+    with pytest.raises(ValueError, match="Run .enrich()"):
+        mapper.eligible_count(sample_df)