PyPI - microunit - Versions diffs - 0.1.0__py3-none-any.whl - Mend

microunit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

microunit/__init__.py +61 -0
microunit/core.py +189 -0
microunit/data/dependent_gross_income_limit.yaml +88 -0
microunit/diagnostics.py +82 -0
microunit/py.typed +1 -0
microunit/registry.py +51 -0
microunit/rule_helpers.py +155 -0
microunit/tax_unit_construction.py +891 -0
microunit/units/__init__.py +26 -0
microunit/units/_helpers.py +22 -0
microunit/units/medicaid.py +44 -0
microunit/units/passthrough.py +27 -0
microunit/units/programs.py +56 -0
microunit/units/snap.py +53 -0
microunit/units/spm.py +57 -0
microunit/units/tax.py +88 -0
microunit-0.1.0.dist-info/METADATA +183 -0
microunit-0.1.0.dist-info/RECORD +20 -0
microunit-0.1.0.dist-info/WHEEL +4 -0
microunit-0.1.0.dist-info/licenses/LICENSE +21 -0

microunit/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""Microdata unit assignment primitives."""
+from microunit.core import EgoUnitMembership, UnitPartition
+from microunit.diagnostics import PartitionMatchReport, partition_match_report
+from microunit.registry import UnitKind, UnitScheme, get_scheme, list_schemes
+from microunit.rule_helpers import (
+    REFERENCE_PERSON_CODES,
+    REFERENCE_QUALIFYING_CHILD_CODES,
+    REFERENCE_QUALIFYING_RELATIVE_CODES,
+    REFERENCE_SPOUSE_CODES,
+    CPSRelationshipCode,
+    dependent_gross_income_limit,
+    qualifying_child_age_test,
+    reference_relationship_allows_qualifying_child,
+    reference_relationship_allows_qualifying_relative,
+    related_to_head_or_spouse,
+)
+from microunit.tax_unit_construction import (
+    CENSUS_DOCUMENTED_MODE,
+    DEPENDENT,
+    HEAD,
+    POLICYENGINE_MODE,
+    SPOUSE,
+    SUPPORTED_TAX_UNIT_CONSTRUCTION_MODES,
+    construct_tax_units,
+    estimate_dependent_gross_income,
+)
+__version__ = "0.1.0"
+__all__ = [
+    "__version__",
+    # Core containers
+    "EgoUnitMembership",
+    "PartitionMatchReport",
+    "UnitKind",
+    "UnitPartition",
+    "UnitScheme",
+    "get_scheme",
+    "list_schemes",
+    "partition_match_report",
+    # Rules-based tax-unit construction engine
+    "construct_tax_units",
+    "estimate_dependent_gross_income",
+    "HEAD",
+    "SPOUSE",
+    "DEPENDENT",
+    "POLICYENGINE_MODE",
+    "CENSUS_DOCUMENTED_MODE",
+    "SUPPORTED_TAX_UNIT_CONSTRUCTION_MODES",
+    "CPSRelationshipCode",
+    "REFERENCE_PERSON_CODES",
+    "REFERENCE_SPOUSE_CODES",
+    "REFERENCE_QUALIFYING_CHILD_CODES",
+    "REFERENCE_QUALIFYING_RELATIVE_CODES",
+    "dependent_gross_income_limit",
+    "qualifying_child_age_test",
+    "reference_relationship_allows_qualifying_child",
+    "reference_relationship_allows_qualifying_relative",
+    "related_to_head_or_spouse",
+]

microunit/core.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""Core unit assignment containers."""
+from __future__ import annotations
+from collections.abc import Hashable, Iterable, Mapping
+from dataclasses import dataclass
+import pandas as pd
+def _series(values: pd.Series | Iterable[Hashable], name: str) -> pd.Series:
+    if isinstance(values, pd.Series):
+        return values.rename(name)
+    return pd.Series(list(values), name=name)
+@dataclass(frozen=True)
+class UnitPartition:
+    """A policy-unit assignment with exactly one unit per person."""
+    unit_type: str
+    person_id: pd.Series
+    unit_id: pd.Series
+    role: pd.Series | None = None
+    source: str | None = None
+    def __post_init__(self) -> None:
+        person_id = _series(self.person_id, "person_id")
+        unit_id = _series(self.unit_id, "unit_id")
+        if len(person_id) != len(unit_id):
+            raise ValueError("person_id and unit_id must have the same length")
+        if person_id.isna().any():
+            raise ValueError("person_id cannot contain missing values")
+        if unit_id.isna().any():
+            raise ValueError("unit_id cannot contain missing values")
+        if person_id.duplicated().any():
+            duplicates = person_id[person_id.duplicated()].unique().tolist()
+            raise ValueError(
+                f"person_id must be unique, found duplicates: {duplicates}"
+            )
+        object.__setattr__(self, "person_id", person_id.reset_index(drop=True))
+        object.__setattr__(self, "unit_id", unit_id.reset_index(drop=True))
+        if self.role is not None:
+            role = _series(self.role, "role")
+            if len(role) != len(person_id):
+                raise ValueError("role must have the same length as person_id")
+            object.__setattr__(self, "role", role.reset_index(drop=True))
+    @classmethod
+    def from_frame(
+        cls,
+        frame: pd.DataFrame,
+        unit_type: str,
+        person_col: str = "person_id",
+        unit_col: str = "unit_id",
+        role_col: str | None = None,
+        source: str | None = None,
+    ) -> UnitPartition:
+        """Build a partition from columns in a person-level frame."""
+        role = frame[role_col] if role_col is not None else None
+        return cls(
+            unit_type=unit_type,
+            person_id=frame[person_col],
+            unit_id=frame[unit_col],
+            role=role,
+            source=source,
+        )
+    @property
+    def n_persons(self) -> int:
+        return len(self.person_id)
+    @property
+    def n_units(self) -> int:
+        return int(self.unit_id.nunique())
+    def to_frame(self) -> pd.DataFrame:
+        """Return person-level unit assignments."""
+        frame = pd.DataFrame(
+            {
+                "person_id": self.person_id,
+                "unit_id": self.unit_id,
+            }
+        )
+        if self.role is not None:
+            frame["role"] = self.role
+        return frame
+    def members(self) -> dict[Hashable, tuple[Hashable, ...]]:
+        """Return unit members keyed by unit ID."""
+        frame = self.to_frame()
+        grouped = frame.groupby("unit_id", sort=False)["person_id"]
+        return {unit_id: tuple(group.tolist()) for unit_id, group in grouped}
+    def unit_sizes(self) -> pd.Series:
+        """Return the number of people in each unit."""
+        return self.unit_id.value_counts(sort=False)
+    def relabel(self, prefix: str = "unit_") -> UnitPartition:
+        """Return a copy with dense, stable unit IDs in encounter order."""
+        codes = pd.factorize(self.unit_id, sort=False)[0]
+        unit_id = pd.Series([f"{prefix}{code + 1}" for code in codes])
+        return UnitPartition(
+            unit_type=self.unit_type,
+            person_id=self.person_id,
+            unit_id=unit_id,
+            role=self.role,
+            source=self.source,
+        )
+@dataclass(frozen=True)
+class EgoUnitMembership:
+    """A possibly-overlapping unit assignment for each focal person."""
+    unit_type: str
+    focal_person_id: pd.Series
+    member_person_id: pd.Series
+    role: pd.Series | None = None
+    source: str | None = None
+    def __post_init__(self) -> None:
+        focal = _series(self.focal_person_id, "focal_person_id")
+        member = _series(self.member_person_id, "member_person_id")
+        if len(focal) != len(member):
+            raise ValueError("focal_person_id and member_person_id must align")
+        if focal.isna().any() or member.isna().any():
+            raise ValueError("ego unit memberships cannot contain missing IDs")
+        pairs = pd.DataFrame({"focal": focal, "member": member})
+        if pairs.duplicated().any():
+            raise ValueError("ego unit memberships cannot contain duplicate pairs")
+        object.__setattr__(self, "focal_person_id", focal.reset_index(drop=True))
+        object.__setattr__(self, "member_person_id", member.reset_index(drop=True))
+        if self.role is not None:
+            role = _series(self.role, "role")
+            if len(role) != len(focal):
+                raise ValueError("role must have the same length as memberships")
+            object.__setattr__(self, "role", role.reset_index(drop=True))
+    @classmethod
+    def from_mapping(
+        cls,
+        unit_type: str,
+        memberships: Mapping[Hashable, Iterable[Hashable]],
+        source: str | None = None,
+    ) -> EgoUnitMembership:
+        """Build overlapping units from focal-person membership sets."""
+        focal_ids: list[Hashable] = []
+        member_ids: list[Hashable] = []
+        for focal, members in memberships.items():
+            for member in members:
+                focal_ids.append(focal)
+                member_ids.append(member)
+        return cls(
+            unit_type, pd.Series(focal_ids), pd.Series(member_ids), source=source
+        )
+    def to_frame(self) -> pd.DataFrame:
+        """Return membership rows keyed by focal person and member person."""
+        frame = pd.DataFrame(
+            {
+                "focal_person_id": self.focal_person_id,
+                "member_person_id": self.member_person_id,
+            }
+        )
+        if self.role is not None:
+            frame["role"] = self.role
+        return frame
+    def members_for(self, focal_person_id: Hashable) -> tuple[Hashable, ...]:
+        frame = self.to_frame()
+        members = frame.loc[
+            frame["focal_person_id"] == focal_person_id, "member_person_id"
+        ]
+        return tuple(members.tolist())

microunit/data/dependent_gross_income_limit.yaml ADDED Viewed

@@ -0,0 +1,88 @@
+description: >-
+  Personal and dependent exemption amount under IRC 151(d). TCJA set the
+  deduction to $0 from 2018 (made permanent by OBBB), but the underlying
+  amount continues to be inflation-adjusted and published in annual Rev. Proc.
+  for other provisions that reference it, such as the qualifying relative
+  gross income test under IRC 152(d)(1)(B). The deduction suspension is
+  represented separately in gov.irs.income.exemption.suspended.
+metadata:
+  unit: currency-USD
+  uprating: gov.irs.uprating
+  period: year
+  reference:
+    - title: 26 U.S. Code § 151(d)(1) - Exemption amount
+      href: https://www.law.cornell.edu/uscode/text/26/151#d_1
+    - title: IRS Notice 2018-70 - Guidance on qualifying relative exemption amount
+      href: https://www.irs.gov/pub/irs-drop/n-18-70.pdf
+values:
+  2013-01-01:
+    value: 3_900
+    reference:
+      - title: Rev. Proc. 2013-15
+        href: https://www.irs.gov/pub/irs-drop/rp-13-15.pdf
+  2014-01-01:
+    value: 3_950
+    reference:
+      - title: Rev. Proc. 2013-35
+        href: https://www.irs.gov/pub/irs-drop/rp-13-35.pdf
+  2015-01-01:
+    value: 4_000
+    reference:
+      - title: Rev. Proc. 2014-61
+        href: https://www.irs.gov/pub/irs-drop/rp-14-61.pdf
+  2016-01-01:
+    value: 4_050
+    reference:
+      - title: Rev. Proc. 2015-53
+        href: https://www.irs.gov/pub/irs-drop/rp-15-53.pdf
+  2017-01-01:
+    value: 4_050
+    reference:
+      - title: Rev. Proc. 2016-55
+        href: https://www.irs.gov/pub/irs-drop/rp-16-55.pdf
+  2018-01-01:
+    value: 4_150
+    reference:
+      - title: Rev. Proc. 2017-58
+        href: https://www.irs.gov/pub/irs-drop/rp-17-58.pdf
+  2019-01-01:
+    value: 4_200
+    reference:
+      - title: Rev. Proc. 2018-57
+        href: https://www.irs.gov/pub/irs-drop/rp-18-57.pdf
+  2020-01-01:
+    value: 4_300
+    reference:
+      - title: Rev. Proc. 2019-44
+        href: https://www.irs.gov/pub/irs-drop/rp-19-44.pdf
+  2021-01-01:
+    value: 4_300
+    reference:
+      - title: Rev. Proc. 2020-45
+        href: https://www.irs.gov/pub/irs-drop/rp-20-45.pdf
+  2022-01-01:
+    value: 4_400
+    reference:
+      - title: Rev. Proc. 2021-45
+        href: https://www.irs.gov/pub/irs-drop/rp-21-45.pdf
+  2023-01-01:
+    value: 4_700
+    reference:
+      - title: Rev. Proc. 2022-38
+        href: https://www.irs.gov/pub/irs-drop/rp-22-38.pdf
+  2024-01-01:
+    value: 5_050
+    reference:
+      - title: Rev. Proc. 2023-34
+        href: https://www.irs.gov/pub/irs-drop/rp-23-34.pdf
+  2025-01-01:
+    value: 5_200
+    reference:
+      - title: Rev. Proc. 2024-40
+        href: https://www.irs.gov/pub/irs-drop/rp-24-40.pdf
+  2026-01-01:
+    value: 5_300
+    reference:
+      - title: Rev. Proc. 2025-32
+        href: https://www.irs.gov/pub/irs-drop/rp-25-32.pdf

microunit/diagnostics.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Diagnostics for comparing unit assignments."""
+from __future__ import annotations
+from collections.abc import Hashable
+from dataclasses import dataclass
+import pandas as pd
+from microunit.core import UnitPartition
+@dataclass(frozen=True)
+class PartitionMatchReport:
+    """Household-level comparison between two partitions."""
+    group_count: int
+    matched_group_count: int
+    person_count: int
+    persons_in_matched_groups: int
+    @property
+    def group_match_rate(self) -> float:
+        if self.group_count == 0:
+            return 0.0
+        return self.matched_group_count / self.group_count
+    @property
+    def person_match_rate(self) -> float:
+        if self.person_count == 0:
+            return 0.0
+        return self.persons_in_matched_groups / self.person_count
+def _signature(
+    person_id: pd.Series, unit_id: pd.Series
+) -> frozenset[frozenset[Hashable]]:
+    frame = pd.DataFrame({"person_id": person_id, "unit_id": unit_id})
+    return frozenset(
+        frozenset(group["person_id"].tolist())
+        for _, group in frame.groupby("unit_id", sort=False)
+    )
+def partition_match_report(
+    reference: UnitPartition,
+    candidate: UnitPartition,
+    group_id: pd.Series,
+) -> PartitionMatchReport:
+    """Compare two partitions within household-like groups.
+    Unit IDs are arbitrary labels, so this compares each group's partition of
+    people rather than literal unit ID values.
+    """
+    group_id = group_id.rename("group_id")
+    if len(group_id) != reference.n_persons:
+        raise ValueError("group_id must have the same length as the reference")
+    ref = reference.to_frame().rename(columns={"unit_id": "reference_unit_id"})
+    cand = candidate.to_frame().rename(columns={"unit_id": "candidate_unit_id"})
+    frame = ref.merge(cand, on="person_id", how="inner", validate="one_to_one")
+    if len(frame) != reference.n_persons:
+        raise ValueError("reference and candidate must contain the same person IDs")
+    frame["group_id"] = group_id.reset_index(drop=True)
+    matched_groups = 0
+    persons_in_matched_groups = 0
+    for _, group in frame.groupby("group_id", sort=False):
+        ref_sig = _signature(group["person_id"], group["reference_unit_id"])
+        cand_sig = _signature(group["person_id"], group["candidate_unit_id"])
+        if ref_sig == cand_sig:
+            matched_groups += 1
+            persons_in_matched_groups += len(group)
+    return PartitionMatchReport(
+        group_count=int(frame["group_id"].nunique()),
+        matched_group_count=matched_groups,
+        person_count=len(frame),
+        persons_in_matched_groups=persons_in_matched_groups,
+    )

microunit/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+

microunit/registry.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Metadata for known policy-unit schemes."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Literal
+UnitKind = Literal["partition", "ego"]
+@dataclass(frozen=True)
+class UnitScheme:
+    name: str
+    kind: UnitKind
+    description: str
+_SCHEMES: dict[str, UnitScheme] = {
+    "spm": UnitScheme(
+        name="spm",
+        kind="partition",
+        description="Supplemental Poverty Measure resource unit.",
+    ),
+    "tax": UnitScheme(
+        name="tax",
+        kind="partition",
+        description="Federal income tax filing/dependency unit.",
+    ),
+    "snap": UnitScheme(
+        name="snap",
+        kind="partition",
+        description="SNAP household assignment within a physical household.",
+    ),
+    "medicaid_magi": UnitScheme(
+        name="medicaid_magi",
+        kind="ego",
+        description="Focal-person Medicaid MAGI household.",
+    ),
+}
+def get_scheme(name: str) -> UnitScheme:
+    try:
+        return _SCHEMES[name]
+    except KeyError as exc:
+        known = ", ".join(sorted(_SCHEMES))
+        raise KeyError(f"Unknown unit scheme {name!r}. Known schemes: {known}") from exc
+def list_schemes() -> tuple[UnitScheme, ...]:
+    return tuple(_SCHEMES[name] for name in sorted(_SCHEMES))

microunit/rule_helpers.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Rules-based helpers for tax-unit construction.
+These helpers encode the federal dependency and filing rules used to assign
+people into tax units: the qualifying-child age test, the
+relationship-to-reference-person tests for qualifying children and qualifying
+relatives, and the qualifying-relative gross income limit (the personal- and
+dependent-exemption amount under IRC 151(d), used by the IRC 152(d)(1)(B)
+gross income test).
+The CPS relationship codes mirror the Census ``A_EXPRRP`` recode used in the
+ASEC. Consumers that start from a different relationship coding (for example
+ACS ``RELSHIPP``) are expected to map onto these codes before calling
+:func:`microunit.construct_tax_units`.
+The gross income limit is read from package data
+(``data/dependent_gross_income_limit.yaml``) so the package is self-contained
+and does not depend on ``policyengine-us`` being installed.
+"""
+from __future__ import annotations
+from enum import IntEnum
+from functools import cache
+from importlib import resources
+import yaml
+class CPSRelationshipCode(IntEnum):
+    """CPS ASEC relationship-to-reference-person recode (``A_EXPRRP``)."""
+    REFERENCE_PERSON_WITH_RELATIVES = 1
+    REFERENCE_PERSON_WITHOUT_RELATIVES = 2
+    HUSBAND = 3
+    WIFE = 4
+    OWN_CHILD = 5
+    GRANDCHILD = 7
+    PARENT = 8
+    SIBLING = 9
+    OTHER_RELATIVE = 10
+    FOSTER_CHILD = 11
+    NONRELATIVE_WITH_RELATIVES = 12
+    PARTNER_OR_ROOMMATE = 13
+    NONRELATIVE_WITHOUT_RELATIVES = 14
+REFERENCE_PERSON_CODES = frozenset(
+    {
+        CPSRelationshipCode.REFERENCE_PERSON_WITH_RELATIVES,
+        CPSRelationshipCode.REFERENCE_PERSON_WITHOUT_RELATIVES,
+    }
+)
+REFERENCE_SPOUSE_CODES = frozenset(
+    {
+        CPSRelationshipCode.HUSBAND,
+        CPSRelationshipCode.WIFE,
+    }
+)
+REFERENCE_QUALIFYING_CHILD_CODES = frozenset(
+    {
+        CPSRelationshipCode.OWN_CHILD,
+        CPSRelationshipCode.GRANDCHILD,
+        CPSRelationshipCode.SIBLING,
+        CPSRelationshipCode.FOSTER_CHILD,
+    }
+)
+REFERENCE_QUALIFYING_RELATIVE_CODES = frozenset(
+    {
+        CPSRelationshipCode.OWN_CHILD,
+        CPSRelationshipCode.GRANDCHILD,
+        CPSRelationshipCode.PARENT,
+        CPSRelationshipCode.SIBLING,
+        CPSRelationshipCode.OTHER_RELATIVE,
+        CPSRelationshipCode.FOSTER_CHILD,
+    }
+)
+def qualifying_child_age_test(
+    age: int | float,
+    is_full_time_student: bool = False,
+    is_permanently_disabled: bool = False,
+    non_student_age_limit: int = 19,
+    student_age_limit: int = 24,
+) -> bool:
+    if is_permanently_disabled:
+        return True
+    age_limit = student_age_limit if is_full_time_student else non_student_age_limit
+    return float(age) < age_limit
+def _relationship_from_code(relationship_code: int | None):
+    if relationship_code is None:
+        return None
+    try:
+        return CPSRelationshipCode(int(relationship_code))
+    except ValueError:
+        return None
+def reference_relationship_allows_qualifying_child(
+    relationship_code: int | None,
+) -> bool:
+    relationship = _relationship_from_code(relationship_code)
+    return relationship in REFERENCE_QUALIFYING_CHILD_CODES
+def reference_relationship_allows_qualifying_relative(
+    relationship_code: int | None,
+) -> bool:
+    relationship = _relationship_from_code(relationship_code)
+    return relationship in REFERENCE_QUALIFYING_RELATIVE_CODES
+def related_to_head_or_spouse(relationship_code: int | None) -> bool:
+    relationship = _relationship_from_code(relationship_code)
+    return relationship in (
+        REFERENCE_PERSON_CODES
+        | REFERENCE_SPOUSE_CODES
+        | REFERENCE_QUALIFYING_RELATIVE_CODES
+    )
+@cache
+def _gross_income_limit_values() -> dict:
+    parameter_path = (
+        resources.files("microunit") / "data" / "dependent_gross_income_limit.yaml"
+    )
+    with parameter_path.open("r", encoding="utf-8") as f:
+        return yaml.safe_load(f)["values"]
+@cache
+def dependent_gross_income_limit(year: int) -> float:
+    values = _gross_income_limit_values()
+    def _period_year(period) -> int:
+        if hasattr(period, "year"):
+            return int(period.year)
+        return int(str(period)[:4])
+    applicable_years = sorted(
+        _period_year(period) for period in values if _period_year(period) <= year
+    )
+    if not applicable_years:
+        raise ValueError(f"No dependent gross income limit configured for {year}.")
+    selected_year = applicable_years[-1]
+    for period, entry in values.items():
+        if _period_year(period) == selected_year:
+            return float(entry["value"])
+    raise ValueError(f"No dependent gross income limit configured for {year}.")