policyengine 3.1.6__tar.gz → 3.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {policyengine-3.1.6 → policyengine-3.1.8}/CHANGELOG.md +14 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/PKG-INFO +1 -1
- {policyengine-3.1.6 → policyengine-3.1.8}/changelog.yaml +10 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/pyproject.toml +1 -1
- policyengine-3.1.8/src/policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/parameter.py +4 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/parameter_value.py +4 -2
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/datasets.py +27 -4
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/PKG-INFO +1 -1
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/SOURCES.txt +1 -6
- {policyengine-3.1.6 → policyengine-3.1.8}/uv.lock +1 -1
- policyengine-3.1.6/src/policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
- policyengine-3.1.6/tests/test_get_parameter_variable.py +0 -141
- policyengine-3.1.6/tests/test_uk_dataset.py +0 -112
- policyengine-3.1.6/tests/test_us_datasets.py +0 -109
- policyengine-3.1.6/tests/test_us_entity_mapping.py +0 -334
- policyengine-3.1.6/tests/test_us_simulation.py +0 -249
- {policyengine-3.1.6 → policyengine-3.1.8}/.claude/policyengine-guide.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.claude/quick-reference.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/CONTRIBUTING.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/changelog_template.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/fetch_version.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/get-changelog-diff.sh +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/has-functional-changes.sh +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/is-version-number-acceptable.sh +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/publish-git-tag.sh +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/code_changes.yaml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/docs.yml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/pr_code_changes.yaml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/pr_docs_changes.yaml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/versioning.yaml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/.gitignore +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/CLAUDE.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/LICENSE +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/Makefile +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/README.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/changelog_entry.yaml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/.gitignore +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/core-concepts.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/country-models-uk.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/country-models-us.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/dev.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/index.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/myst.yml +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/docs/visualisation.md +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/examples/employment_income_variation_uk.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/examples/employment_income_variation_us.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/examples/income_bands_uk.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/examples/income_distribution_us.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/examples/policy_change_uk.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/examples/speedtest_us_simulation.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/setup.cfg +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/__init__.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/__init__.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/dataset.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/dataset_version.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/dynamic.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/output.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/policy.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/simulation.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/tax_benefit_model.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/tax_benefit_model_version.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/variable.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/__init__.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/aggregate.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/change_aggregate.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/decile_impact.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/__init__.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/analysis.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/model.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/outputs.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/__init__.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/analysis.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/datasets.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/model.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/outputs.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/__init__.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/dates.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/parametric_reforms.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/plotting.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/dependency_links.txt +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/requires.txt +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/top_level.txt +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/tests/test_aggregate.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/tests/test_change_aggregate.py +0 -0
- {policyengine-3.1.6 → policyengine-3.1.8}/tests/test_entity_mapping.py +0 -0
|
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [3.1.8] - 2025-12-02 00:20:11
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Dataset speedup with better handling of string cols.
|
|
13
|
+
|
|
14
|
+
## [3.1.7] - 2025-11-24 16:34:53
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
|
|
18
|
+
- Build error
|
|
19
|
+
|
|
8
20
|
## [3.1.6] - 2025-11-24 16:23:57
|
|
9
21
|
|
|
10
22
|
### Fixed
|
|
@@ -233,6 +245,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
233
245
|
|
|
234
246
|
|
|
235
247
|
|
|
248
|
+
[3.1.8]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.7...3.1.8
|
|
249
|
+
[3.1.7]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.6...3.1.7
|
|
236
250
|
[3.1.6]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.5...3.1.6
|
|
237
251
|
[3.1.5]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.4...3.1.5
|
|
238
252
|
[3.1.4]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.3...3.1.4
|
|
@@ -190,3 +190,13 @@
|
|
|
190
190
|
fixed:
|
|
191
191
|
- Parameter values now accessible from models.
|
|
192
192
|
date: 2025-11-24 16:23:57
|
|
193
|
+
- bump: patch
|
|
194
|
+
changes:
|
|
195
|
+
fixed:
|
|
196
|
+
- Build error
|
|
197
|
+
date: 2025-11-24 16:34:53
|
|
198
|
+
- bump: patch
|
|
199
|
+
changes:
|
|
200
|
+
fixed:
|
|
201
|
+
- Dataset speedup with better handling of string cols.
|
|
202
|
+
date: 2025-12-02 00:20:11
|
|
Binary file
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
1
2
|
from uuid import uuid4
|
|
2
3
|
|
|
3
4
|
from pydantic import BaseModel, Field
|
|
@@ -5,6 +6,9 @@ from pydantic import BaseModel, Field
|
|
|
5
6
|
from .parameter_value import ParameterValue
|
|
6
7
|
from .tax_benefit_model_version import TaxBenefitModelVersion
|
|
7
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from .parameter_value import ParameterValue
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
class Parameter(BaseModel):
|
|
10
14
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
2
3
|
from uuid import uuid4
|
|
3
4
|
|
|
4
5
|
from pydantic import BaseModel, Field
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from .parameter import Parameter
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class ParameterValue(BaseModel):
|
|
10
12
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
11
|
-
parameter: Parameter | None = None
|
|
13
|
+
parameter: "Parameter | None" = None
|
|
12
14
|
value: float | int | str | bool | list | None = None
|
|
13
15
|
start_date: datetime
|
|
14
16
|
end_date: datetime | None = None
|
{policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/datasets.py
RENAMED
|
@@ -40,14 +40,37 @@ class PolicyEngineUKDataset(Dataset):
|
|
|
40
40
|
self.load()
|
|
41
41
|
|
|
42
42
|
def save(self) -> None:
|
|
43
|
-
"""Save dataset to HDF5 file.
|
|
43
|
+
"""Save dataset to HDF5 file.
|
|
44
|
+
|
|
45
|
+
Converts object columns to categorical dtype to avoid slow pickle serialization.
|
|
46
|
+
"""
|
|
44
47
|
filepath = Path(self.filepath)
|
|
45
48
|
if not filepath.parent.exists():
|
|
46
49
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
|
|
51
|
+
# Convert DataFrames and optimize object columns to categorical
|
|
52
|
+
person_df = pd.DataFrame(self.data.person)
|
|
53
|
+
benunit_df = pd.DataFrame(self.data.benunit)
|
|
54
|
+
household_df = pd.DataFrame(self.data.household)
|
|
55
|
+
|
|
56
|
+
# Convert object columns to categorical to avoid pickle serialization
|
|
57
|
+
for col in person_df.columns:
|
|
58
|
+
if person_df[col].dtype == "object":
|
|
59
|
+
person_df[col] = person_df[col].astype("category")
|
|
60
|
+
|
|
61
|
+
for col in benunit_df.columns:
|
|
62
|
+
if benunit_df[col].dtype == "object":
|
|
63
|
+
benunit_df[col] = benunit_df[col].astype("category")
|
|
64
|
+
|
|
65
|
+
for col in household_df.columns:
|
|
66
|
+
if household_df[col].dtype == "object":
|
|
67
|
+
household_df[col] = household_df[col].astype("category")
|
|
68
|
+
|
|
47
69
|
with pd.HDFStore(filepath, mode="w") as store:
|
|
48
|
-
|
|
49
|
-
store
|
|
50
|
-
store
|
|
70
|
+
# Use format='table' to support categorical dtypes
|
|
71
|
+
store.put("person", person_df, format="table")
|
|
72
|
+
store.put("benunit", benunit_df, format="table")
|
|
73
|
+
store.put("household", household_df, format="table")
|
|
51
74
|
|
|
52
75
|
def load(self) -> None:
|
|
53
76
|
"""Load dataset from HDF5 file into this instance."""
|
|
@@ -77,9 +77,4 @@ src/policyengine/utils/parametric_reforms.py
|
|
|
77
77
|
src/policyengine/utils/plotting.py
|
|
78
78
|
tests/test_aggregate.py
|
|
79
79
|
tests/test_change_aggregate.py
|
|
80
|
-
tests/test_entity_mapping.py
|
|
81
|
-
tests/test_get_parameter_variable.py
|
|
82
|
-
tests/test_uk_dataset.py
|
|
83
|
-
tests/test_us_datasets.py
|
|
84
|
-
tests/test_us_entity_mapping.py
|
|
85
|
-
tests/test_us_simulation.py
|
|
80
|
+
tests/test_entity_mapping.py
|
|
Binary file
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
"""Tests for get_parameter and get_variable methods on TaxBenefitModelVersion."""
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from policyengine.tax_benefit_models.uk import uk_latest
|
|
6
|
-
from policyengine.tax_benefit_models.us import us_latest
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def test_uk_get_variable():
|
|
10
|
-
"""Test getting a variable by name from UK model."""
|
|
11
|
-
# Get a known variable
|
|
12
|
-
var = uk_latest.get_variable("income_tax")
|
|
13
|
-
|
|
14
|
-
assert var is not None
|
|
15
|
-
assert var.name == "income_tax"
|
|
16
|
-
assert var.entity == "person"
|
|
17
|
-
assert var.tax_benefit_model_version == uk_latest
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def test_uk_get_variable_not_found():
|
|
21
|
-
"""Test error handling when variable doesn't exist."""
|
|
22
|
-
with pytest.raises(
|
|
23
|
-
ValueError, match="Variable 'nonexistent_variable' not found"
|
|
24
|
-
):
|
|
25
|
-
uk_latest.get_variable("nonexistent_variable")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def test_uk_get_parameter():
|
|
29
|
-
"""Test getting a parameter by name from UK model."""
|
|
30
|
-
# Get a known parameter
|
|
31
|
-
param = uk_latest.get_parameter(
|
|
32
|
-
"gov.hmrc.income_tax.allowances.personal_allowance.amount"
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
assert param is not None
|
|
36
|
-
assert (
|
|
37
|
-
param.name
|
|
38
|
-
== "gov.hmrc.income_tax.allowances.personal_allowance.amount"
|
|
39
|
-
)
|
|
40
|
-
assert param.tax_benefit_model_version == uk_latest
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def test_uk_get_parameter_not_found():
|
|
44
|
-
"""Test error handling when parameter doesn't exist."""
|
|
45
|
-
with pytest.raises(
|
|
46
|
-
ValueError, match="Parameter 'nonexistent.parameter' not found"
|
|
47
|
-
):
|
|
48
|
-
uk_latest.get_parameter("nonexistent.parameter")
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def test_us_get_variable():
|
|
52
|
-
"""Test getting a variable by name from US model."""
|
|
53
|
-
# Get a known variable
|
|
54
|
-
var = us_latest.get_variable("income_tax")
|
|
55
|
-
|
|
56
|
-
assert var is not None
|
|
57
|
-
assert var.name == "income_tax"
|
|
58
|
-
assert var.entity == "tax_unit"
|
|
59
|
-
assert var.tax_benefit_model_version == us_latest
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def test_us_get_variable_not_found():
|
|
63
|
-
"""Test error handling when variable doesn't exist."""
|
|
64
|
-
with pytest.raises(
|
|
65
|
-
ValueError, match="Variable 'nonexistent_variable' not found"
|
|
66
|
-
):
|
|
67
|
-
us_latest.get_variable("nonexistent_variable")
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def test_us_get_parameter():
|
|
71
|
-
"""Test getting a parameter by name from US model."""
|
|
72
|
-
# Get a known parameter
|
|
73
|
-
param = us_latest.get_parameter(
|
|
74
|
-
"gov.irs.investment.net_investment_income_tax.rate"
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
assert param is not None
|
|
78
|
-
assert param.name == "gov.irs.investment.net_investment_income_tax.rate"
|
|
79
|
-
assert param.tax_benefit_model_version == us_latest
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def test_us_get_parameter_not_found():
|
|
83
|
-
"""Test error handling when parameter doesn't exist."""
|
|
84
|
-
with pytest.raises(
|
|
85
|
-
ValueError, match="Parameter 'nonexistent.parameter' not found"
|
|
86
|
-
):
|
|
87
|
-
us_latest.get_parameter("nonexistent.parameter")
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def test_uk_multiple_variables():
|
|
91
|
-
"""Test getting multiple different variables."""
|
|
92
|
-
vars_to_test = [
|
|
93
|
-
"income_tax",
|
|
94
|
-
"national_insurance",
|
|
95
|
-
"universal_credit",
|
|
96
|
-
"household_net_income",
|
|
97
|
-
]
|
|
98
|
-
|
|
99
|
-
for var_name in vars_to_test:
|
|
100
|
-
var = uk_latest.get_variable(var_name)
|
|
101
|
-
assert var.name == var_name
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def test_us_multiple_variables():
|
|
105
|
-
"""Test getting multiple different variables."""
|
|
106
|
-
vars_to_test = [
|
|
107
|
-
"income_tax",
|
|
108
|
-
"employee_payroll_tax",
|
|
109
|
-
"eitc",
|
|
110
|
-
"household_net_income",
|
|
111
|
-
]
|
|
112
|
-
|
|
113
|
-
for var_name in vars_to_test:
|
|
114
|
-
var = us_latest.get_variable(var_name)
|
|
115
|
-
assert var.name == var_name
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def test_uk_multiple_parameters():
|
|
119
|
-
"""Test getting multiple different parameters."""
|
|
120
|
-
params_to_test = [
|
|
121
|
-
"gov.hmrc.income_tax.allowances.personal_allowance.amount",
|
|
122
|
-
"gov.hmrc.income_tax.rates.uk[0].rate",
|
|
123
|
-
"gov.dwp.universal_credit.means_test.reduction_rate",
|
|
124
|
-
]
|
|
125
|
-
|
|
126
|
-
for param_name in params_to_test:
|
|
127
|
-
param = uk_latest.get_parameter(param_name)
|
|
128
|
-
assert param.name == param_name
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def test_us_multiple_parameters():
|
|
132
|
-
"""Test getting multiple different parameters."""
|
|
133
|
-
params_to_test = [
|
|
134
|
-
"gov.irs.investment.net_investment_income_tax.rate",
|
|
135
|
-
"gov.irs.self_employment.rate.social_security",
|
|
136
|
-
"gov.irs.vita.eligibility.income_limit",
|
|
137
|
-
]
|
|
138
|
-
|
|
139
|
-
for param_name in params_to_test:
|
|
140
|
-
param = us_latest.get_parameter(param_name)
|
|
141
|
-
assert param.name == param_name
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import tempfile
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from microdf import MicroDataFrame
|
|
6
|
-
|
|
7
|
-
from policyengine.core import Dataset, TaxBenefitModel
|
|
8
|
-
from policyengine.tax_benefit_models.uk import (
|
|
9
|
-
PolicyEngineUKDataset,
|
|
10
|
-
UKYearData,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_imports():
|
|
15
|
-
"""Test that basic imports work."""
|
|
16
|
-
# Verify classes are importable
|
|
17
|
-
assert PolicyEngineUKDataset is not None
|
|
18
|
-
assert UKYearData is not None
|
|
19
|
-
assert Dataset is not None
|
|
20
|
-
assert TaxBenefitModel is not None
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_uk_latest_instantiation():
|
|
24
|
-
"""Test that uk_latest can be instantiated without errors."""
|
|
25
|
-
from policyengine.tax_benefit_models.uk import uk_latest
|
|
26
|
-
|
|
27
|
-
assert uk_latest is not None
|
|
28
|
-
assert uk_latest.version is not None
|
|
29
|
-
assert uk_latest.model is not None
|
|
30
|
-
assert uk_latest.created_at is not None
|
|
31
|
-
assert (
|
|
32
|
-
len(uk_latest.variables) > 0
|
|
33
|
-
) # Should have variables from policyengine-uk
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def test_save_and_load_single_year():
|
|
37
|
-
"""Test saving and loading a dataset with a single year."""
|
|
38
|
-
# Create sample data
|
|
39
|
-
person_df = MicroDataFrame(
|
|
40
|
-
pd.DataFrame(
|
|
41
|
-
{
|
|
42
|
-
"person_id": [1, 2, 3],
|
|
43
|
-
"age": [25, 30, 35],
|
|
44
|
-
"income": [30000, 45000, 60000],
|
|
45
|
-
"person_weight": [1.0, 1.0, 1.0],
|
|
46
|
-
}
|
|
47
|
-
),
|
|
48
|
-
weights="person_weight",
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
benunit_df = MicroDataFrame(
|
|
52
|
-
pd.DataFrame(
|
|
53
|
-
{
|
|
54
|
-
"benunit_id": [1, 2],
|
|
55
|
-
"size": [2, 1],
|
|
56
|
-
"total_income": [75000, 60000],
|
|
57
|
-
"benunit_weight": [1.0, 1.0],
|
|
58
|
-
}
|
|
59
|
-
),
|
|
60
|
-
weights="benunit_weight",
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
household_df = MicroDataFrame(
|
|
64
|
-
pd.DataFrame(
|
|
65
|
-
{
|
|
66
|
-
"household_id": [1],
|
|
67
|
-
"num_people": [3],
|
|
68
|
-
"rent": [1200],
|
|
69
|
-
"household_weight": [1.0],
|
|
70
|
-
}
|
|
71
|
-
),
|
|
72
|
-
weights="household_weight",
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
# Create dataset
|
|
76
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
77
|
-
filepath = os.path.join(tmpdir, "test_dataset.h5")
|
|
78
|
-
|
|
79
|
-
dataset = PolicyEngineUKDataset(
|
|
80
|
-
name="Test Dataset",
|
|
81
|
-
description="A test dataset",
|
|
82
|
-
filepath=filepath,
|
|
83
|
-
year=2025,
|
|
84
|
-
data=UKYearData(
|
|
85
|
-
person=person_df, benunit=benunit_df, household=household_df
|
|
86
|
-
),
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
# Save to file
|
|
90
|
-
dataset.save()
|
|
91
|
-
|
|
92
|
-
# Load it back
|
|
93
|
-
loaded = PolicyEngineUKDataset(
|
|
94
|
-
name="Loaded Dataset",
|
|
95
|
-
description="Loaded from file",
|
|
96
|
-
filepath=filepath,
|
|
97
|
-
year=2025,
|
|
98
|
-
)
|
|
99
|
-
loaded.load()
|
|
100
|
-
|
|
101
|
-
# Verify data
|
|
102
|
-
assert loaded.year == 2025
|
|
103
|
-
# Convert to DataFrame for comparison (MicroDataFrame inherits from DataFrame)
|
|
104
|
-
pd.testing.assert_frame_equal(
|
|
105
|
-
pd.DataFrame(loaded.data.person), pd.DataFrame(person_df)
|
|
106
|
-
)
|
|
107
|
-
pd.testing.assert_frame_equal(
|
|
108
|
-
pd.DataFrame(loaded.data.benunit), pd.DataFrame(benunit_df)
|
|
109
|
-
)
|
|
110
|
-
pd.testing.assert_frame_equal(
|
|
111
|
-
pd.DataFrame(loaded.data.household), pd.DataFrame(household_df)
|
|
112
|
-
)
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
"""Tests for US dataset creation from HuggingFace paths."""
|
|
2
|
-
|
|
3
|
-
import shutil
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
from policyengine.tax_benefit_models.us import (
|
|
9
|
-
PolicyEngineUSDataset,
|
|
10
|
-
create_datasets,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_create_datasets_from_enhanced_cps():
|
|
15
|
-
"""Test creating datasets from enhanced CPS HuggingFace path."""
|
|
16
|
-
# Clean up data directory if it exists
|
|
17
|
-
data_dir = Path("./data")
|
|
18
|
-
if data_dir.exists():
|
|
19
|
-
shutil.rmtree(data_dir)
|
|
20
|
-
|
|
21
|
-
# Create datasets for a single year to test
|
|
22
|
-
datasets = ["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"]
|
|
23
|
-
years = [2024]
|
|
24
|
-
|
|
25
|
-
create_datasets(datasets=datasets, years=years)
|
|
26
|
-
|
|
27
|
-
# Verify the dataset was created
|
|
28
|
-
dataset_file = data_dir / "enhanced_cps_2024_year_2024.h5"
|
|
29
|
-
assert dataset_file.exists(), f"Dataset file {dataset_file} should exist"
|
|
30
|
-
|
|
31
|
-
# Load and verify dataset structure
|
|
32
|
-
dataset = PolicyEngineUSDataset(
|
|
33
|
-
name="test",
|
|
34
|
-
description="test",
|
|
35
|
-
filepath=str(dataset_file),
|
|
36
|
-
year=2024,
|
|
37
|
-
)
|
|
38
|
-
dataset.load()
|
|
39
|
-
|
|
40
|
-
# Check all entity types exist
|
|
41
|
-
assert dataset.data is not None
|
|
42
|
-
assert dataset.data.person is not None
|
|
43
|
-
assert dataset.data.household is not None
|
|
44
|
-
assert dataset.data.marital_unit is not None
|
|
45
|
-
assert dataset.data.family is not None
|
|
46
|
-
assert dataset.data.spm_unit is not None
|
|
47
|
-
assert dataset.data.tax_unit is not None
|
|
48
|
-
|
|
49
|
-
# Check person data has required columns
|
|
50
|
-
person_df = pd.DataFrame(dataset.data.person)
|
|
51
|
-
assert "person_id" in person_df.columns
|
|
52
|
-
assert "person_household_id" in person_df.columns
|
|
53
|
-
assert "person_weight" in person_df.columns
|
|
54
|
-
assert len(person_df) > 0
|
|
55
|
-
|
|
56
|
-
# Check household data
|
|
57
|
-
household_df = pd.DataFrame(dataset.data.household)
|
|
58
|
-
assert "household_id" in household_df.columns
|
|
59
|
-
assert "household_weight" in household_df.columns
|
|
60
|
-
assert len(household_df) > 0
|
|
61
|
-
|
|
62
|
-
# Check all group entities have weight columns
|
|
63
|
-
for entity_name in [
|
|
64
|
-
"marital_unit",
|
|
65
|
-
"family",
|
|
66
|
-
"spm_unit",
|
|
67
|
-
"tax_unit",
|
|
68
|
-
]:
|
|
69
|
-
entity_df = pd.DataFrame(getattr(dataset.data, entity_name))
|
|
70
|
-
assert f"{entity_name}_id" in entity_df.columns
|
|
71
|
-
assert f"{entity_name}_weight" in entity_df.columns
|
|
72
|
-
assert len(entity_df) > 0
|
|
73
|
-
|
|
74
|
-
# Clean up
|
|
75
|
-
shutil.rmtree(data_dir)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def test_create_datasets_multiple_years():
|
|
79
|
-
"""Test creating datasets for multiple years."""
|
|
80
|
-
# Clean up data directory if it exists
|
|
81
|
-
data_dir = Path("./data")
|
|
82
|
-
if data_dir.exists():
|
|
83
|
-
shutil.rmtree(data_dir)
|
|
84
|
-
|
|
85
|
-
datasets = ["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"]
|
|
86
|
-
years = [2024, 2025]
|
|
87
|
-
|
|
88
|
-
create_datasets(datasets=datasets, years=years)
|
|
89
|
-
|
|
90
|
-
# Verify both year datasets were created
|
|
91
|
-
for year in years:
|
|
92
|
-
dataset_file = data_dir / f"enhanced_cps_2024_year_{year}.h5"
|
|
93
|
-
assert dataset_file.exists(), (
|
|
94
|
-
f"Dataset file for year {year} should exist"
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
# Load and verify
|
|
98
|
-
dataset = PolicyEngineUSDataset(
|
|
99
|
-
name=f"test-{year}",
|
|
100
|
-
description=f"test {year}",
|
|
101
|
-
filepath=str(dataset_file),
|
|
102
|
-
year=year,
|
|
103
|
-
)
|
|
104
|
-
dataset.load()
|
|
105
|
-
assert dataset.data is not None
|
|
106
|
-
assert len(pd.DataFrame(dataset.data.person)) > 0
|
|
107
|
-
|
|
108
|
-
# Clean up
|
|
109
|
-
shutil.rmtree(data_dir)
|