policyengine 3.1.6__tar.gz → 3.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {policyengine-3.1.6 → policyengine-3.1.8}/CHANGELOG.md +14 -0
  2. {policyengine-3.1.6 → policyengine-3.1.8}/PKG-INFO +1 -1
  3. {policyengine-3.1.6 → policyengine-3.1.8}/changelog.yaml +10 -0
  4. {policyengine-3.1.6 → policyengine-3.1.8}/pyproject.toml +1 -1
  5. policyengine-3.1.8/src/policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
  6. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/parameter.py +4 -0
  7. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/parameter_value.py +4 -2
  8. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/datasets.py +27 -4
  9. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/PKG-INFO +1 -1
  10. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/SOURCES.txt +1 -6
  11. {policyengine-3.1.6 → policyengine-3.1.8}/uv.lock +1 -1
  12. policyengine-3.1.6/src/policyengine/__pycache__/__init__.cpython-313.pyc +0 -0
  13. policyengine-3.1.6/tests/test_get_parameter_variable.py +0 -141
  14. policyengine-3.1.6/tests/test_uk_dataset.py +0 -112
  15. policyengine-3.1.6/tests/test_us_datasets.py +0 -109
  16. policyengine-3.1.6/tests/test_us_entity_mapping.py +0 -334
  17. policyengine-3.1.6/tests/test_us_simulation.py +0 -249
  18. {policyengine-3.1.6 → policyengine-3.1.8}/.claude/policyengine-guide.md +0 -0
  19. {policyengine-3.1.6 → policyengine-3.1.8}/.claude/quick-reference.md +0 -0
  20. {policyengine-3.1.6 → policyengine-3.1.8}/.github/CONTRIBUTING.md +0 -0
  21. {policyengine-3.1.6 → policyengine-3.1.8}/.github/changelog_template.md +0 -0
  22. {policyengine-3.1.6 → policyengine-3.1.8}/.github/fetch_version.py +0 -0
  23. {policyengine-3.1.6 → policyengine-3.1.8}/.github/get-changelog-diff.sh +0 -0
  24. {policyengine-3.1.6 → policyengine-3.1.8}/.github/has-functional-changes.sh +0 -0
  25. {policyengine-3.1.6 → policyengine-3.1.8}/.github/is-version-number-acceptable.sh +0 -0
  26. {policyengine-3.1.6 → policyengine-3.1.8}/.github/publish-git-tag.sh +0 -0
  27. {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/code_changes.yaml +0 -0
  28. {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/docs.yml +0 -0
  29. {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/pr_code_changes.yaml +0 -0
  30. {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/pr_docs_changes.yaml +0 -0
  31. {policyengine-3.1.6 → policyengine-3.1.8}/.github/workflows/versioning.yaml +0 -0
  32. {policyengine-3.1.6 → policyengine-3.1.8}/.gitignore +0 -0
  33. {policyengine-3.1.6 → policyengine-3.1.8}/CLAUDE.md +0 -0
  34. {policyengine-3.1.6 → policyengine-3.1.8}/LICENSE +0 -0
  35. {policyengine-3.1.6 → policyengine-3.1.8}/Makefile +0 -0
  36. {policyengine-3.1.6 → policyengine-3.1.8}/README.md +0 -0
  37. {policyengine-3.1.6 → policyengine-3.1.8}/changelog_entry.yaml +0 -0
  38. {policyengine-3.1.6 → policyengine-3.1.8}/docs/.gitignore +0 -0
  39. {policyengine-3.1.6 → policyengine-3.1.8}/docs/core-concepts.md +0 -0
  40. {policyengine-3.1.6 → policyengine-3.1.8}/docs/country-models-uk.md +0 -0
  41. {policyengine-3.1.6 → policyengine-3.1.8}/docs/country-models-us.md +0 -0
  42. {policyengine-3.1.6 → policyengine-3.1.8}/docs/dev.md +0 -0
  43. {policyengine-3.1.6 → policyengine-3.1.8}/docs/index.md +0 -0
  44. {policyengine-3.1.6 → policyengine-3.1.8}/docs/myst.yml +0 -0
  45. {policyengine-3.1.6 → policyengine-3.1.8}/docs/visualisation.md +0 -0
  46. {policyengine-3.1.6 → policyengine-3.1.8}/examples/employment_income_variation_uk.py +0 -0
  47. {policyengine-3.1.6 → policyengine-3.1.8}/examples/employment_income_variation_us.py +0 -0
  48. {policyengine-3.1.6 → policyengine-3.1.8}/examples/income_bands_uk.py +0 -0
  49. {policyengine-3.1.6 → policyengine-3.1.8}/examples/income_distribution_us.py +0 -0
  50. {policyengine-3.1.6 → policyengine-3.1.8}/examples/policy_change_uk.py +0 -0
  51. {policyengine-3.1.6 → policyengine-3.1.8}/examples/speedtest_us_simulation.py +0 -0
  52. {policyengine-3.1.6 → policyengine-3.1.8}/setup.cfg +0 -0
  53. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/__init__.py +0 -0
  54. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/__init__.py +0 -0
  55. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/dataset.py +0 -0
  56. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/dataset_version.py +0 -0
  57. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/dynamic.py +0 -0
  58. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/output.py +0 -0
  59. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/policy.py +0 -0
  60. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/simulation.py +0 -0
  61. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/tax_benefit_model.py +0 -0
  62. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/tax_benefit_model_version.py +0 -0
  63. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/core/variable.py +0 -0
  64. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/__init__.py +0 -0
  65. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/aggregate.py +0 -0
  66. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/change_aggregate.py +0 -0
  67. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/outputs/decile_impact.py +0 -0
  68. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/__init__.py +0 -0
  69. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/analysis.py +0 -0
  70. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/model.py +0 -0
  71. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk/outputs.py +0 -0
  72. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/uk.py +0 -0
  73. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/__init__.py +0 -0
  74. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/analysis.py +0 -0
  75. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/datasets.py +0 -0
  76. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/model.py +0 -0
  77. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us/outputs.py +0 -0
  78. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/tax_benefit_models/us.py +0 -0
  79. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/__init__.py +0 -0
  80. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/dates.py +0 -0
  81. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/parametric_reforms.py +0 -0
  82. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine/utils/plotting.py +0 -0
  83. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/dependency_links.txt +0 -0
  84. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/requires.txt +0 -0
  85. {policyengine-3.1.6 → policyengine-3.1.8}/src/policyengine.egg-info/top_level.txt +0 -0
  86. {policyengine-3.1.6 → policyengine-3.1.8}/tests/test_aggregate.py +0 -0
  87. {policyengine-3.1.6 → policyengine-3.1.8}/tests/test_change_aggregate.py +0 -0
  88. {policyengine-3.1.6 → policyengine-3.1.8}/tests/test_entity_mapping.py +0 -0
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [3.1.8] - 2025-12-02 00:20:11
9
+
10
+ ### Fixed
11
+
12
+ - Dataset speedup with better handling of string cols.
13
+
14
+ ## [3.1.7] - 2025-11-24 16:34:53
15
+
16
+ ### Fixed
17
+
18
+ - Build error
19
+
8
20
  ## [3.1.6] - 2025-11-24 16:23:57
9
21
 
10
22
  ### Fixed
@@ -233,6 +245,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
233
245
 
234
246
 
235
247
 
248
+ [3.1.8]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.7...3.1.8
249
+ [3.1.7]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.6...3.1.7
236
250
  [3.1.6]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.5...3.1.6
237
251
  [3.1.5]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.4...3.1.5
238
252
  [3.1.4]: https://github.com/PolicyEngine/policyengine.py/compare/3.1.3...3.1.4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: policyengine
3
- Version: 3.1.6
3
+ Version: 3.1.8
4
4
  Summary: A package to conduct policy analysis using PolicyEngine tax-benefit models.
5
5
  Author-email: PolicyEngine <hello@policyengine.org>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -190,3 +190,13 @@
190
190
  fixed:
191
191
  - Parameter values now accessible from models.
192
192
  date: 2025-11-24 16:23:57
193
+ - bump: patch
194
+ changes:
195
+ fixed:
196
+ - Build error
197
+ date: 2025-11-24 16:34:53
198
+ - bump: patch
199
+ changes:
200
+ fixed:
201
+ - Dataset speedup with better handling of string cols.
202
+ date: 2025-12-02 00:20:11
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "policyengine"
7
- version = "3.1.6"
7
+ version = "3.1.8"
8
8
  description = "A package to conduct policy analysis using PolicyEngine tax-benefit models."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -1,3 +1,4 @@
1
+ from typing import TYPE_CHECKING
1
2
  from uuid import uuid4
2
3
 
3
4
  from pydantic import BaseModel, Field
@@ -5,6 +6,9 @@ from pydantic import BaseModel, Field
5
6
  from .parameter_value import ParameterValue
6
7
  from .tax_benefit_model_version import TaxBenefitModelVersion
7
8
 
9
+ if TYPE_CHECKING:
10
+ from .parameter_value import ParameterValue
11
+
8
12
 
9
13
  class Parameter(BaseModel):
10
14
  id: str = Field(default_factory=lambda: str(uuid4()))
@@ -1,14 +1,16 @@
1
1
  from datetime import datetime
2
+ from typing import TYPE_CHECKING
2
3
  from uuid import uuid4
3
4
 
4
5
  from pydantic import BaseModel, Field
5
6
 
6
- from .parameter import Parameter
7
+ if TYPE_CHECKING:
8
+ from .parameter import Parameter
7
9
 
8
10
 
9
11
  class ParameterValue(BaseModel):
10
12
  id: str = Field(default_factory=lambda: str(uuid4()))
11
- parameter: Parameter | None = None
13
+ parameter: "Parameter | None" = None
12
14
  value: float | int | str | bool | list | None = None
13
15
  start_date: datetime
14
16
  end_date: datetime | None = None
@@ -40,14 +40,37 @@ class PolicyEngineUKDataset(Dataset):
40
40
  self.load()
41
41
 
42
42
  def save(self) -> None:
43
- """Save dataset to HDF5 file."""
43
+ """Save dataset to HDF5 file.
44
+
45
+ Converts object columns to categorical dtype to avoid slow pickle serialization.
46
+ """
44
47
  filepath = Path(self.filepath)
45
48
  if not filepath.parent.exists():
46
49
  filepath.parent.mkdir(parents=True, exist_ok=True)
50
+
51
+ # Convert DataFrames and optimize object columns to categorical
52
+ person_df = pd.DataFrame(self.data.person)
53
+ benunit_df = pd.DataFrame(self.data.benunit)
54
+ household_df = pd.DataFrame(self.data.household)
55
+
56
+ # Convert object columns to categorical to avoid pickle serialization
57
+ for col in person_df.columns:
58
+ if person_df[col].dtype == "object":
59
+ person_df[col] = person_df[col].astype("category")
60
+
61
+ for col in benunit_df.columns:
62
+ if benunit_df[col].dtype == "object":
63
+ benunit_df[col] = benunit_df[col].astype("category")
64
+
65
+ for col in household_df.columns:
66
+ if household_df[col].dtype == "object":
67
+ household_df[col] = household_df[col].astype("category")
68
+
47
69
  with pd.HDFStore(filepath, mode="w") as store:
48
- store["person"] = pd.DataFrame(self.data.person)
49
- store["benunit"] = pd.DataFrame(self.data.benunit)
50
- store["household"] = pd.DataFrame(self.data.household)
70
+ # Use format='table' to support categorical dtypes
71
+ store.put("person", person_df, format="table")
72
+ store.put("benunit", benunit_df, format="table")
73
+ store.put("household", household_df, format="table")
51
74
 
52
75
  def load(self) -> None:
53
76
  """Load dataset from HDF5 file into this instance."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: policyengine
3
- Version: 3.1.6
3
+ Version: 3.1.8
4
4
  Summary: A package to conduct policy analysis using PolicyEngine tax-benefit models.
5
5
  Author-email: PolicyEngine <hello@policyengine.org>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -77,9 +77,4 @@ src/policyengine/utils/parametric_reforms.py
77
77
  src/policyengine/utils/plotting.py
78
78
  tests/test_aggregate.py
79
79
  tests/test_change_aggregate.py
80
- tests/test_entity_mapping.py
81
- tests/test_get_parameter_variable.py
82
- tests/test_uk_dataset.py
83
- tests/test_us_datasets.py
84
- tests/test_us_entity_mapping.py
85
- tests/test_us_simulation.py
80
+ tests/test_entity_mapping.py
@@ -1080,7 +1080,7 @@ wheels = [
1080
1080
 
1081
1081
  [[package]]
1082
1082
  name = "policyengine"
1083
- version = "3.0.0"
1083
+ version = "3.1.7"
1084
1084
  source = { editable = "." }
1085
1085
  dependencies = [
1086
1086
  { name = "microdf-python" },
@@ -1,141 +0,0 @@
1
- """Tests for get_parameter and get_variable methods on TaxBenefitModelVersion."""
2
-
3
- import pytest
4
-
5
- from policyengine.tax_benefit_models.uk import uk_latest
6
- from policyengine.tax_benefit_models.us import us_latest
7
-
8
-
9
- def test_uk_get_variable():
10
- """Test getting a variable by name from UK model."""
11
- # Get a known variable
12
- var = uk_latest.get_variable("income_tax")
13
-
14
- assert var is not None
15
- assert var.name == "income_tax"
16
- assert var.entity == "person"
17
- assert var.tax_benefit_model_version == uk_latest
18
-
19
-
20
- def test_uk_get_variable_not_found():
21
- """Test error handling when variable doesn't exist."""
22
- with pytest.raises(
23
- ValueError, match="Variable 'nonexistent_variable' not found"
24
- ):
25
- uk_latest.get_variable("nonexistent_variable")
26
-
27
-
28
- def test_uk_get_parameter():
29
- """Test getting a parameter by name from UK model."""
30
- # Get a known parameter
31
- param = uk_latest.get_parameter(
32
- "gov.hmrc.income_tax.allowances.personal_allowance.amount"
33
- )
34
-
35
- assert param is not None
36
- assert (
37
- param.name
38
- == "gov.hmrc.income_tax.allowances.personal_allowance.amount"
39
- )
40
- assert param.tax_benefit_model_version == uk_latest
41
-
42
-
43
- def test_uk_get_parameter_not_found():
44
- """Test error handling when parameter doesn't exist."""
45
- with pytest.raises(
46
- ValueError, match="Parameter 'nonexistent.parameter' not found"
47
- ):
48
- uk_latest.get_parameter("nonexistent.parameter")
49
-
50
-
51
- def test_us_get_variable():
52
- """Test getting a variable by name from US model."""
53
- # Get a known variable
54
- var = us_latest.get_variable("income_tax")
55
-
56
- assert var is not None
57
- assert var.name == "income_tax"
58
- assert var.entity == "tax_unit"
59
- assert var.tax_benefit_model_version == us_latest
60
-
61
-
62
- def test_us_get_variable_not_found():
63
- """Test error handling when variable doesn't exist."""
64
- with pytest.raises(
65
- ValueError, match="Variable 'nonexistent_variable' not found"
66
- ):
67
- us_latest.get_variable("nonexistent_variable")
68
-
69
-
70
- def test_us_get_parameter():
71
- """Test getting a parameter by name from US model."""
72
- # Get a known parameter
73
- param = us_latest.get_parameter(
74
- "gov.irs.investment.net_investment_income_tax.rate"
75
- )
76
-
77
- assert param is not None
78
- assert param.name == "gov.irs.investment.net_investment_income_tax.rate"
79
- assert param.tax_benefit_model_version == us_latest
80
-
81
-
82
- def test_us_get_parameter_not_found():
83
- """Test error handling when parameter doesn't exist."""
84
- with pytest.raises(
85
- ValueError, match="Parameter 'nonexistent.parameter' not found"
86
- ):
87
- us_latest.get_parameter("nonexistent.parameter")
88
-
89
-
90
- def test_uk_multiple_variables():
91
- """Test getting multiple different variables."""
92
- vars_to_test = [
93
- "income_tax",
94
- "national_insurance",
95
- "universal_credit",
96
- "household_net_income",
97
- ]
98
-
99
- for var_name in vars_to_test:
100
- var = uk_latest.get_variable(var_name)
101
- assert var.name == var_name
102
-
103
-
104
- def test_us_multiple_variables():
105
- """Test getting multiple different variables."""
106
- vars_to_test = [
107
- "income_tax",
108
- "employee_payroll_tax",
109
- "eitc",
110
- "household_net_income",
111
- ]
112
-
113
- for var_name in vars_to_test:
114
- var = us_latest.get_variable(var_name)
115
- assert var.name == var_name
116
-
117
-
118
- def test_uk_multiple_parameters():
119
- """Test getting multiple different parameters."""
120
- params_to_test = [
121
- "gov.hmrc.income_tax.allowances.personal_allowance.amount",
122
- "gov.hmrc.income_tax.rates.uk[0].rate",
123
- "gov.dwp.universal_credit.means_test.reduction_rate",
124
- ]
125
-
126
- for param_name in params_to_test:
127
- param = uk_latest.get_parameter(param_name)
128
- assert param.name == param_name
129
-
130
-
131
- def test_us_multiple_parameters():
132
- """Test getting multiple different parameters."""
133
- params_to_test = [
134
- "gov.irs.investment.net_investment_income_tax.rate",
135
- "gov.irs.self_employment.rate.social_security",
136
- "gov.irs.vita.eligibility.income_limit",
137
- ]
138
-
139
- for param_name in params_to_test:
140
- param = us_latest.get_parameter(param_name)
141
- assert param.name == param_name
@@ -1,112 +0,0 @@
1
- import os
2
- import tempfile
3
-
4
- import pandas as pd
5
- from microdf import MicroDataFrame
6
-
7
- from policyengine.core import Dataset, TaxBenefitModel
8
- from policyengine.tax_benefit_models.uk import (
9
- PolicyEngineUKDataset,
10
- UKYearData,
11
- )
12
-
13
-
14
- def test_imports():
15
- """Test that basic imports work."""
16
- # Verify classes are importable
17
- assert PolicyEngineUKDataset is not None
18
- assert UKYearData is not None
19
- assert Dataset is not None
20
- assert TaxBenefitModel is not None
21
-
22
-
23
- def test_uk_latest_instantiation():
24
- """Test that uk_latest can be instantiated without errors."""
25
- from policyengine.tax_benefit_models.uk import uk_latest
26
-
27
- assert uk_latest is not None
28
- assert uk_latest.version is not None
29
- assert uk_latest.model is not None
30
- assert uk_latest.created_at is not None
31
- assert (
32
- len(uk_latest.variables) > 0
33
- ) # Should have variables from policyengine-uk
34
-
35
-
36
- def test_save_and_load_single_year():
37
- """Test saving and loading a dataset with a single year."""
38
- # Create sample data
39
- person_df = MicroDataFrame(
40
- pd.DataFrame(
41
- {
42
- "person_id": [1, 2, 3],
43
- "age": [25, 30, 35],
44
- "income": [30000, 45000, 60000],
45
- "person_weight": [1.0, 1.0, 1.0],
46
- }
47
- ),
48
- weights="person_weight",
49
- )
50
-
51
- benunit_df = MicroDataFrame(
52
- pd.DataFrame(
53
- {
54
- "benunit_id": [1, 2],
55
- "size": [2, 1],
56
- "total_income": [75000, 60000],
57
- "benunit_weight": [1.0, 1.0],
58
- }
59
- ),
60
- weights="benunit_weight",
61
- )
62
-
63
- household_df = MicroDataFrame(
64
- pd.DataFrame(
65
- {
66
- "household_id": [1],
67
- "num_people": [3],
68
- "rent": [1200],
69
- "household_weight": [1.0],
70
- }
71
- ),
72
- weights="household_weight",
73
- )
74
-
75
- # Create dataset
76
- with tempfile.TemporaryDirectory() as tmpdir:
77
- filepath = os.path.join(tmpdir, "test_dataset.h5")
78
-
79
- dataset = PolicyEngineUKDataset(
80
- name="Test Dataset",
81
- description="A test dataset",
82
- filepath=filepath,
83
- year=2025,
84
- data=UKYearData(
85
- person=person_df, benunit=benunit_df, household=household_df
86
- ),
87
- )
88
-
89
- # Save to file
90
- dataset.save()
91
-
92
- # Load it back
93
- loaded = PolicyEngineUKDataset(
94
- name="Loaded Dataset",
95
- description="Loaded from file",
96
- filepath=filepath,
97
- year=2025,
98
- )
99
- loaded.load()
100
-
101
- # Verify data
102
- assert loaded.year == 2025
103
- # Convert to DataFrame for comparison (MicroDataFrame inherits from DataFrame)
104
- pd.testing.assert_frame_equal(
105
- pd.DataFrame(loaded.data.person), pd.DataFrame(person_df)
106
- )
107
- pd.testing.assert_frame_equal(
108
- pd.DataFrame(loaded.data.benunit), pd.DataFrame(benunit_df)
109
- )
110
- pd.testing.assert_frame_equal(
111
- pd.DataFrame(loaded.data.household), pd.DataFrame(household_df)
112
- )
@@ -1,109 +0,0 @@
1
- """Tests for US dataset creation from HuggingFace paths."""
2
-
3
- import shutil
4
- from pathlib import Path
5
-
6
- import pandas as pd
7
-
8
- from policyengine.tax_benefit_models.us import (
9
- PolicyEngineUSDataset,
10
- create_datasets,
11
- )
12
-
13
-
14
- def test_create_datasets_from_enhanced_cps():
15
- """Test creating datasets from enhanced CPS HuggingFace path."""
16
- # Clean up data directory if it exists
17
- data_dir = Path("./data")
18
- if data_dir.exists():
19
- shutil.rmtree(data_dir)
20
-
21
- # Create datasets for a single year to test
22
- datasets = ["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"]
23
- years = [2024]
24
-
25
- create_datasets(datasets=datasets, years=years)
26
-
27
- # Verify the dataset was created
28
- dataset_file = data_dir / "enhanced_cps_2024_year_2024.h5"
29
- assert dataset_file.exists(), f"Dataset file {dataset_file} should exist"
30
-
31
- # Load and verify dataset structure
32
- dataset = PolicyEngineUSDataset(
33
- name="test",
34
- description="test",
35
- filepath=str(dataset_file),
36
- year=2024,
37
- )
38
- dataset.load()
39
-
40
- # Check all entity types exist
41
- assert dataset.data is not None
42
- assert dataset.data.person is not None
43
- assert dataset.data.household is not None
44
- assert dataset.data.marital_unit is not None
45
- assert dataset.data.family is not None
46
- assert dataset.data.spm_unit is not None
47
- assert dataset.data.tax_unit is not None
48
-
49
- # Check person data has required columns
50
- person_df = pd.DataFrame(dataset.data.person)
51
- assert "person_id" in person_df.columns
52
- assert "person_household_id" in person_df.columns
53
- assert "person_weight" in person_df.columns
54
- assert len(person_df) > 0
55
-
56
- # Check household data
57
- household_df = pd.DataFrame(dataset.data.household)
58
- assert "household_id" in household_df.columns
59
- assert "household_weight" in household_df.columns
60
- assert len(household_df) > 0
61
-
62
- # Check all group entities have weight columns
63
- for entity_name in [
64
- "marital_unit",
65
- "family",
66
- "spm_unit",
67
- "tax_unit",
68
- ]:
69
- entity_df = pd.DataFrame(getattr(dataset.data, entity_name))
70
- assert f"{entity_name}_id" in entity_df.columns
71
- assert f"{entity_name}_weight" in entity_df.columns
72
- assert len(entity_df) > 0
73
-
74
- # Clean up
75
- shutil.rmtree(data_dir)
76
-
77
-
78
- def test_create_datasets_multiple_years():
79
- """Test creating datasets for multiple years."""
80
- # Clean up data directory if it exists
81
- data_dir = Path("./data")
82
- if data_dir.exists():
83
- shutil.rmtree(data_dir)
84
-
85
- datasets = ["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"]
86
- years = [2024, 2025]
87
-
88
- create_datasets(datasets=datasets, years=years)
89
-
90
- # Verify both year datasets were created
91
- for year in years:
92
- dataset_file = data_dir / f"enhanced_cps_2024_year_{year}.h5"
93
- assert dataset_file.exists(), (
94
- f"Dataset file for year {year} should exist"
95
- )
96
-
97
- # Load and verify
98
- dataset = PolicyEngineUSDataset(
99
- name=f"test-{year}",
100
- description=f"test {year}",
101
- filepath=str(dataset_file),
102
- year=year,
103
- )
104
- dataset.load()
105
- assert dataset.data is not None
106
- assert len(pd.DataFrame(dataset.data.person)) > 0
107
-
108
- # Clean up
109
- shutil.rmtree(data_dir)